basebackup bench: debug-log basebackup size

WIP: implement tracing_chrome support for utils::logging
jwt support; debug spans in basebackup
2026-05-25 17:10:38 +00:00 · 2023-12-06 18:07:24 +00:00 · 2023-12-06 18:07:24 +00:00 · 2023-12-06 17:20:04 +00:00 · 2023-12-05 23:55:49 +00:00 · 2023-12-05 23:55:49 +00:00
134 changed files with 3462 additions and 4044 deletions
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -404,7 +404,7 @@ jobs:
        uses: ./.github/actions/save-coverage-data

  regress-tests:
-    needs: [ check-permissions, build-neon, tag ]
+    needs: [ check-permissions, build-neon ]
    runs-on: [ self-hosted, gen3, large ]
    container:
      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
@@ -436,7 +436,6 @@ jobs:
        env:
          TEST_RESULT_CONNSTR: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
          CHECK_ONDISK_DATA_COMPATIBILITY: nonempty
-          BUILD_TAG: ${{ needs.tag.outputs.build-tag }}

      - name: Merge and upload coverage data
        if: matrix.build_type == 'debug' && matrix.pg_version == 'v14'
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -2,7 +2,7 @@ name: Create Release Branch

 on:
  schedule:
-    - cron: '0 6 * * 1'
+    - cron: '0 7 * * 5'
  workflow_dispatch:

 jobs:
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -193,8 +193,6 @@ dependencies = [
 "memchr",
 "pin-project-lite",
 "tokio",
- "zstd",
- "zstd-safe",
 ]

 [[package]]
@@ -651,7 +649,7 @@ dependencies = [
 "async-trait",
 "axum-core",
 "base64 0.21.1",
- "bitflags 1.3.2",
+ "bitflags",
 "bytes",
 "futures-util",
 "http",
@@ -848,7 +846,7 @@ version = "0.65.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "cfdf7b466f9a4903edc73f95d6d2bcd5baf8ae620638762244d3f60143643cc5"
 dependencies = [
- "bitflags 1.3.2",
+ "bitflags",
 "cexpr",
 "clang-sys",
 "lazy_static",
@@ -871,12 +869,6 @@ version = "1.3.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"

-[[package]]
-name = "bitflags"
-version = "2.4.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07"
-
 [[package]]
 name = "block-buffer"
 version = "0.10.4"
@@ -1060,7 +1052,7 @@ checksum = "4f423e341edefb78c9caba2d9c7f7687d0e72e89df3ce3394554754393ac3990"
 dependencies = [
 "anstream",
 "anstyle",
- "bitflags 1.3.2",
+ "bitflags",
 "clap_lex",
 "strsim",
 ]
@@ -1132,7 +1124,6 @@ version = "0.1.0"
 dependencies = [
 "anyhow",
 "async-compression",
- "bytes",
 "cfg-if",
 "chrono",
 "clap",
@@ -1382,7 +1373,7 @@ version = "0.25.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e64e6c0fbe2c17357405f7c758c1ef960fce08bdfb2c03d88d2a18d7e09c4b67"
 dependencies = [
- "bitflags 1.3.2",
+ "bitflags",
 "crossterm_winapi",
 "libc",
 "mio",
@@ -1964,6 +1955,20 @@ dependencies = [
 "hashbrown 0.13.2",
 ]

+[[package]]
+name = "hdrhistogram"
+version = "7.5.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "765c9198f173dd59ce26ff9f95ef0aafd0a0fe01fb9d72841bc5066a4c06511d"
+dependencies = [
+ "base64 0.21.1",
+ "byteorder",
+ "crossbeam-channel",
+ "flate2",
+ "nom",
+ "num-traits",
+]
+
 [[package]]
 name = "heapless"
 version = "0.8.0"
@@ -2242,7 +2247,7 @@ version = "0.9.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f8069d3ec154eb856955c1c0fbffefbf5f3c40a104ec912d4797314c1801abff"
 dependencies = [
- "bitflags 1.3.2",
+ "bitflags",
 "inotify-sys",
 "libc",
 ]
@@ -2253,7 +2258,7 @@ version = "0.10.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fdd168d97690d0b8c412d6b6c10360277f4d7ee495c5d0d5d5fe0854923255cc"
 dependencies = [
- "bitflags 1.3.2",
+ "bitflags",
 "futures-core",
 "inotify-sys",
 "libc",
@@ -2373,7 +2378,7 @@ version = "1.0.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8367585489f01bc55dd27404dcf56b95e6da061a256a666ab23be9ba96a2e587"
 dependencies = [
- "bitflags 1.3.2",
+ "bitflags",
 "libc",
 ]

@@ -2587,7 +2592,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f346ff70e7dbfd675fe90590b92d59ef2de15a8779ae305ebcbfd3f0caf59be4"
 dependencies = [
 "autocfg",
- "bitflags 1.3.2",
+ "bitflags",
 "cfg-if",
 "libc",
 ]
@@ -2598,7 +2603,7 @@ version = "0.26.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "bfdda3d196821d6af13126e40375cdf7da646a96114af134d5f417a9a1dc8e1a"
 dependencies = [
- "bitflags 1.3.2",
+ "bitflags",
 "cfg-if",
 "libc",
 "memoffset 0.7.1",
@@ -2622,7 +2627,7 @@ version = "5.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "729f63e1ca555a43fe3efa4f3efdf4801c479da85b432242a7b726f353c88486"
 dependencies = [
- "bitflags 1.3.2",
+ "bitflags",
 "crossbeam-channel",
 "filetime",
 "fsevent-sys",
@@ -2643,6 +2648,16 @@ dependencies = [
 "winapi",
 ]

+[[package]]
+name = "nu-ansi-term"
+version = "0.46.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84"
+dependencies = [
+ "overload",
+ "winapi",
+]
+
 [[package]]
 name = "num-bigint"
 version = "0.4.3"
@@ -2743,11 +2758,11 @@ checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575"

 [[package]]
 name = "openssl"
-version = "0.10.60"
+version = "0.10.55"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "79a4c6c3a2b158f7f8f2a2fc5a969fa3a068df6fc9dbb4a43845436e3af7c800"
+checksum = "345df152bc43501c5eb9e4654ff05f794effb78d4efe3d53abc158baddc0703d"
 dependencies = [
- "bitflags 2.4.1",
+ "bitflags",
 "cfg-if",
 "foreign-types",
 "libc",
@@ -2775,9 +2790,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf"

 [[package]]
 name = "openssl-sys"
-version = "0.9.96"
+version = "0.9.90"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3812c071ba60da8b5677cc12bcb1d42989a65553772897a7e0355545a819838f"
+checksum = "374533b0e45f3a7ced10fcaeccca020e66656bc03dac384f852e4e5a7a8104a6"
 dependencies = [
 "cc",
 "libc",
@@ -2903,6 +2918,32 @@ version = "0.5.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4030760ffd992bef45b0ae3f10ce1aba99e33464c90d14dd7c039884963ddc7a"

+[[package]]
+name = "overload"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
+
+[[package]]
+name = "pagebench"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "clap",
+ "futures",
+ "hdrhistogram",
+ "humantime",
+ "humantime-serde",
+ "pageserver",
+ "rand 0.8.5",
+ "serde",
+ "serde_json",
+ "tokio",
+ "tokio-util",
+ "tracing",
+ "utils",
+]
+
 [[package]]
 name = "pagectl"
 version = "0.1.0"
@@ -2914,8 +2955,6 @@ dependencies = [
 "git-version",
 "pageserver",
 "postgres_ffi",
- "serde",
- "serde_json",
 "svg_fmt",
 "tokio",
 "utils",
@@ -2990,10 +3029,13 @@ dependencies = [
 "tokio",
 "tokio-io-timeout",
 "tokio-postgres",
+ "tokio-stream",
 "tokio-tar",
 "tokio-util",
 "toml_edit",
 "tracing",
+ "tracing-chrome",
+ "tracing-subscriber",
 "url",
 "utils",
 "walkdir",
@@ -3017,7 +3059,6 @@ dependencies = [
 "serde_with",
 "strum",
 "strum_macros",
- "thiserror",
 "utils",
 "workspace_hack",
 ]
@@ -3402,7 +3443,7 @@ version = "0.14.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b1de8dacb0873f77e6aefc6d71e044761fcc68060290f5b1089fcdf84626bb69"
 dependencies = [
- "bitflags 1.3.2",
+ "bitflags",
 "byteorder",
 "hex",
 "lazy_static",
@@ -3512,7 +3553,6 @@ dependencies = [
 "pbkdf2",
 "pin-project-lite",
 "postgres-native-tls",
- "postgres-protocol",
 "postgres_backend",
 "pq_proto",
 "prometheus",
@@ -3684,7 +3724,7 @@ version = "0.2.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a"
 dependencies = [
- "bitflags 1.3.2",
+ "bitflags",
 ]

 [[package]]
@@ -3693,7 +3733,7 @@ version = "0.3.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29"
 dependencies = [
- "bitflags 1.3.2",
+ "bitflags",
 ]

 [[package]]
@@ -3987,7 +4027,7 @@ version = "0.36.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6da3636faa25820d8648e0e31c5d519bbb01f72fdf57131f0f5f7da5fed36eab"
 dependencies = [
- "bitflags 1.3.2",
+ "bitflags",
 "errno",
 "io-lifetimes",
 "libc",
@@ -4001,7 +4041,7 @@ version = "0.37.25"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d4eb579851244c2c03e7c24f501c3432bed80b8f720af1d6e5b0e0f01555a035"
 dependencies = [
- "bitflags 1.3.2",
+ "bitflags",
 "errno",
 "io-lifetimes",
 "libc",
@@ -4214,7 +4254,7 @@ version = "2.9.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1fc758eb7bffce5b308734e9b0c1468893cae9ff70ebf13e7090be8dcbcc83a8"
 dependencies = [
- "bitflags 1.3.2",
+ "bitflags",
 "core-foundation",
 "core-foundation-sys",
 "libc",
@@ -5231,6 +5271,17 @@ dependencies = [
 "syn 2.0.28",
 ]

+[[package]]
+name = "tracing-chrome"
+version = "0.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "496b3cd5447f7ff527bbbf19b071ad542a000adf297d4127078b4dfdb931f41a"
+dependencies = [
+ "serde_json",
+ "tracing-core",
+ "tracing-subscriber",
+]
+
 [[package]]
 name = "tracing-core"
 version = "0.1.31"
@@ -5303,6 +5354,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "30a651bc37f915e81f087d86e62a18eec5f79550c7faff886f7090b4ea757c77"
 dependencies = [
 "matchers",
+ "nu-ansi-term",
 "once_cell",
 "regex",
 "serde",
@@ -5517,6 +5569,7 @@ dependencies = [
 "tokio-stream",
 "tokio-util",
 "tracing",
+ "tracing-chrome",
 "tracing-error",
 "tracing-subscriber",
 "url",
@@ -6044,9 +6097,6 @@ dependencies = [
 "tungstenite",
 "url",
 "uuid",
- "zstd",
- "zstd-safe",
- "zstd-sys",
 ]

 [[package]]
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -5,6 +5,7 @@ members = [
    "control_plane",
    "pageserver",
    "pageserver/ctl",
+    "pageserver/pagebench",
    "proxy",
    "safekeeper",
    "storage_broker",
@@ -37,7 +38,7 @@ license = "Apache-2.0"
 [workspace.dependencies]
 anyhow = { version = "1.0", features = ["backtrace"] }
 arc-swap = "1.6"
-async-compression = { version = "0.4.0", features = ["tokio", "gzip", "zstd"] }
+async-compression = { version = "0.4.0", features = ["tokio", "gzip"] }
 azure_core = "0.16"
 azure_identity = "0.16"
 azure_storage = "0.16"
@@ -79,6 +80,7 @@ futures-util = "0.3"
 git-version = "0.3"
 hashbrown = "0.13"
 hashlink = "0.8.1"
+hdrhistogram = "7.5.2"
 hex = "0.4"
 hex-literal = "0.4"
 hmac = "0.12.1"
--- a/Dockerfile.compute-node
+++ b/Dockerfile.compute-node
@@ -714,24 +714,6 @@ RUN wget https://github.com/pksunkara/pgx_ulid/archive/refs/tags/v0.1.3.tar.gz -
    cargo pgrx install --release && \
    echo "trusted = true" >> /usr/local/pgsql/share/extension/ulid.control

-#########################################################################################
-#
-# Layer "wal2json-build"
-# Compile "wal2json" extension
-#
-#########################################################################################
-
-FROM build-deps AS wal2json-pg-build
-COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
-
-ENV PATH "/usr/local/pgsql/bin/:$PATH"
-RUN wget https://github.com/eulerto/wal2json/archive/refs/tags/wal2json_2_5.tar.gz && \
-    echo "b516653575541cf221b99cf3f8be9b6821f6dbcfc125675c85f35090f824f00e wal2json_2_5.tar.gz" | sha256sum --check && \
-    mkdir wal2json-src && cd wal2json-src && tar xvzf ../wal2json_2_5.tar.gz --strip-components=1 -C . && \
-    make -j $(getconf _NPROCESSORS_ONLN) && \
-    make -j $(getconf _NPROCESSORS_ONLN) install && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/wal2json.control
-
 #########################################################################################
 #
 # Layer "neon-pg-ext-build"
@@ -768,7 +750,6 @@ COPY --from=rdkit-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg-uuidv7-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg-roaringbitmap-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg-embedding-pg-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=wal2json-pg-build /usr/local/pgsql /usr/local/pgsql
 COPY pgxn/ pgxn/

 RUN make -j $(getconf _NPROCESSORS_ONLN) \
--- a/compute_tools/Cargo.toml
+++ b/compute_tools/Cargo.toml
@@ -38,4 +38,3 @@ toml_edit.workspace = true
 remote_storage = { version = "0.1", path = "../libs/remote_storage/" }
 vm_monitor = { version = "0.1", path = "../libs/vm_monitor/" }
 zstd = "0.12.4"
-bytes = "1.0"
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -31,7 +31,7 @@
 //!             -C 'postgresql://cloud_admin@localhost/postgres' \
 //!             -S /var/db/postgres/specs/current.json \
 //!             -b /usr/local/bin/postgres \
-//!             -r http://pg-ext-s3-gateway
+//!             -r {"bucket": "neon-dev-extensions-eu-central-1", "region": "eu-central-1"}
 //! ```
 //!
 use std::collections::HashMap;
@@ -51,7 +51,7 @@ use compute_api::responses::ComputeStatus;

 use compute_tools::compute::{ComputeNode, ComputeState, ParsedSpec};
 use compute_tools::configurator::launch_configurator;
-use compute_tools::extension_server::get_pg_version;
+use compute_tools::extension_server::{get_pg_version, init_remote_storage};
 use compute_tools::http::api::launch_http_server;
 use compute_tools::logger::*;
 use compute_tools::monitor::launch_monitor;
@@ -60,7 +60,7 @@ use compute_tools::spec::*;

 // this is an arbitrary build tag. Fine as a default / for testing purposes
 // in-case of not-set environment var
-const BUILD_TAG_DEFAULT: &str = "latest";
+const BUILD_TAG_DEFAULT: &str = "5670669815";

 fn main() -> Result<()> {
    init_tracing_and_logging(DEFAULT_LOG_LEVEL)?;
@@ -74,18 +74,10 @@ fn main() -> Result<()> {
    let pgbin_default = String::from("postgres");
    let pgbin = matches.get_one::<String>("pgbin").unwrap_or(&pgbin_default);

-    let ext_remote_storage = matches
-        .get_one::<String>("remote-ext-config")
-        // Compatibility hack: if the control plane specified any remote-ext-config
-        // use the default value for extension storage proxy gateway.
-        // Remove this once the control plane is updated to pass the gateway URL
-        .map(|conf| {
-            if conf.starts_with("http") {
-                conf.trim_end_matches('/')
-            } else {
-                "http://pg-ext-s3-gateway"
-            }
-        });
+    let remote_ext_config = matches.get_one::<String>("remote-ext-config");
+    let ext_remote_storage = remote_ext_config.map(|x| {
+        init_remote_storage(x).expect("cannot initialize remote extension storage from config")
+    });

    let http_port = *matches
        .get_one::<u16>("http-port")
@@ -206,7 +198,7 @@ fn main() -> Result<()> {
        live_config_allowed,
        state: Mutex::new(new_state),
        state_changed: Condvar::new(),
-        ext_remote_storage: ext_remote_storage.map(|s| s.to_string()),
+        ext_remote_storage,
        ext_download_progress: RwLock::new(HashMap::new()),
        build_tag,
    };
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -25,7 +25,7 @@ use compute_api::responses::{ComputeMetrics, ComputeStatus};
 use compute_api::spec::{ComputeMode, ComputeSpec};
 use utils::measured_stream::MeasuredReader;

-use remote_storage::{DownloadError, RemotePath};
+use remote_storage::{DownloadError, GenericRemoteStorage, RemotePath};

 use crate::checker::create_availability_check_data;
 use crate::pg_helpers::*;
@@ -59,8 +59,8 @@ pub struct ComputeNode {
    pub state: Mutex<ComputeState>,
    /// `Condvar` to allow notifying waiters about state changes.
    pub state_changed: Condvar,
-    /// the address of extension storage proxy gateway
-    pub ext_remote_storage: Option<String>,
+    ///  the S3 bucket that we search for extensions in
+    pub ext_remote_storage: Option<GenericRemoteStorage>,
    // key: ext_archive_name, value: started download time, download_completed?
    pub ext_download_progress: RwLock<HashMap<String, (DateTime<Utc>, bool)>>,
    pub build_tag: String,
@@ -698,7 +698,6 @@ impl ComputeNode {
        handle_role_deletions(spec, self.connstr.as_str(), &mut client)?;
        handle_grants(spec, &mut client, self.connstr.as_str())?;
        handle_extensions(spec, &mut client)?;
-        handle_extension_neon(&mut client)?;
        create_availability_check_data(&mut client)?;

        // 'Close' connection
@@ -743,7 +742,6 @@ impl ComputeNode {
            handle_role_deletions(&spec, self.connstr.as_str(), &mut client)?;
            handle_grants(&spec, &mut client, self.connstr.as_str())?;
            handle_extensions(&spec, &mut client)?;
-            handle_extension_neon(&mut client)?;
        }

        // 'Close' connection
@@ -957,12 +955,12 @@ LIMIT 100",
        real_ext_name: String,
        ext_path: RemotePath,
    ) -> Result<u64, DownloadError> {
-        let ext_remote_storage =
-            self.ext_remote_storage
-                .as_ref()
-                .ok_or(DownloadError::BadInput(anyhow::anyhow!(
-                    "Remote extensions storage is not configured",
-                )))?;
+        let remote_storage = self
+            .ext_remote_storage
+            .as_ref()
+            .ok_or(DownloadError::BadInput(anyhow::anyhow!(
+                "Remote extensions storage is not configured",
+            )))?;

        let ext_archive_name = ext_path.object_name().expect("bad path");

@@ -1018,7 +1016,7 @@ LIMIT 100",
        let download_size = extension_server::download_extension(
            &real_ext_name,
            &ext_path,
-            ext_remote_storage,
+            remote_storage,
            &self.pgbin,
        )
        .await
--- a/compute_tools/src/extension_server.rs
+++ b/compute_tools/src/extension_server.rs
@@ -71,16 +71,18 @@ More specifically, here is an example ext_index.json
    }
 }
 */
+use anyhow::Context;
 use anyhow::{self, Result};
-use anyhow::{bail, Context};
-use bytes::Bytes;
 use compute_api::spec::RemoteExtSpec;
 use regex::Regex;
 use remote_storage::*;
-use reqwest::StatusCode;
+use serde_json;
+use std::io::Read;
+use std::num::NonZeroUsize;
 use std::path::Path;
 use std::str;
 use tar::Archive;
+use tokio::io::AsyncReadExt;
 use tracing::info;
 use tracing::log::warn;
 use zstd::stream::read::Decoder;
@@ -136,31 +138,23 @@ fn parse_pg_version(human_version: &str) -> &str {
 pub async fn download_extension(
    ext_name: &str,
    ext_path: &RemotePath,
-    ext_remote_storage: &str,
+    remote_storage: &GenericRemoteStorage,
    pgbin: &str,
 ) -> Result<u64> {
    info!("Download extension {:?} from {:?}", ext_name, ext_path);
-
-    // TODO add retry logic
-    let download_buffer =
-        match download_extension_tar(ext_remote_storage, &ext_path.to_string()).await {
-            Ok(buffer) => buffer,
-            Err(error_message) => {
-                return Err(anyhow::anyhow!(
-                    "error downloading extension {:?}: {:?}",
-                    ext_name,
-                    error_message
-                ));
-            }
-        };
-
+    let mut download = remote_storage.download(ext_path).await?;
+    let mut download_buffer = Vec::new();
+    download
+        .download_stream
+        .read_to_end(&mut download_buffer)
+        .await?;
    let download_size = download_buffer.len() as u64;
-    info!("Download size {:?}", download_size);
    // it's unclear whether it is more performant to decompress into memory or not
    // TODO: decompressing into memory can be avoided
-    let decoder = Decoder::new(download_buffer.as_ref())?;
-    let mut archive = Archive::new(decoder);
-
+    let mut decoder = Decoder::new(download_buffer.as_slice())?;
+    let mut decompress_buffer = Vec::new();
+    decoder.read_to_end(&mut decompress_buffer)?;
+    let mut archive = Archive::new(decompress_buffer.as_slice());
    let unzip_dest = pgbin
        .strip_suffix("/bin/postgres")
        .expect("bad pgbin")
@@ -228,32 +222,29 @@ pub fn create_control_files(remote_extensions: &RemoteExtSpec, pgbin: &str) {
    }
 }

-// Do request to extension storage proxy, i.e.
-// curl http://pg-ext-s3-gateway/latest/v15/extensions/anon.tar.zst
-// using HHTP GET
-// and return the response body as bytes
-//
-async fn download_extension_tar(ext_remote_storage: &str, ext_path: &str) -> Result<Bytes> {
-    let uri = format!("{}/{}", ext_remote_storage, ext_path);
-
-    info!("Download extension {:?} from uri {:?}", ext_path, uri);
-
-    let resp = reqwest::get(uri).await?;
-
-    match resp.status() {
-        StatusCode::OK => match resp.bytes().await {
-            Ok(resp) => {
-                info!("Download extension {:?} completed successfully", ext_path);
-                Ok(resp)
-            }
-            Err(e) => bail!("could not deserialize remote extension response: {}", e),
-        },
-        StatusCode::SERVICE_UNAVAILABLE => bail!("remote extension is temporarily unavailable"),
-        _ => bail!(
-            "unexpected remote extension response status code: {}",
-            resp.status()
-        ),
+// This function initializes the necessary structs to use remote storage
+pub fn init_remote_storage(remote_ext_config: &str) -> anyhow::Result<GenericRemoteStorage> {
+    #[derive(Debug, serde::Deserialize)]
+    struct RemoteExtJson {
+        bucket: String,
+        region: String,
+        endpoint: Option<String>,
+        prefix: Option<String>,
    }
+    let remote_ext_json = serde_json::from_str::<RemoteExtJson>(remote_ext_config)?;
+
+    let config = S3Config {
+        bucket_name: remote_ext_json.bucket,
+        bucket_region: remote_ext_json.region,
+        prefix_in_bucket: remote_ext_json.prefix,
+        endpoint: remote_ext_json.endpoint,
+        concurrency_limit: NonZeroUsize::new(100).expect("100 != 0"),
+        max_keys_per_list_response: None,
+    };
+    let config = RemoteStorageConfig {
+        storage: RemoteStorageKind::AwsS3(config),
+    };
+    GenericRemoteStorage::from_config(&config)
 }

 #[cfg(test)]
--- a/compute_tools/src/http/api.rs
+++ b/compute_tools/src/http/api.rs
@@ -123,7 +123,7 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
            }
        }

-        // download extension files from remote extension storage on demand
+        // download extension files from S3 on demand
        (&Method::POST, route) if route.starts_with("/extension_server/") => {
            info!("serving {:?} POST request", route);
            info!("req.uri {:?}", req.uri());
--- a/compute_tools/src/spec.rs
+++ b/compute_tools/src/spec.rs
@@ -674,33 +674,3 @@ pub fn handle_extensions(spec: &ComputeSpec, client: &mut Client) -> Result<()>

    Ok(())
 }
-
-/// Run CREATE and ALTER EXTENSION neon UPDATE for postgres database
-#[instrument(skip_all)]
-pub fn handle_extension_neon(client: &mut Client) -> Result<()> {
-    info!("handle extension neon");
-
-    let mut query = "CREATE SCHEMA IF NOT EXISTS neon";
-    client.simple_query(query)?;
-
-    query = "CREATE EXTENSION IF NOT EXISTS neon WITH SCHEMA neon";
-    info!("create neon extension with query: {}", query);
-    client.simple_query(query)?;
-
-    query = "UPDATE pg_extension SET extrelocatable = true WHERE extname = 'neon'";
-    client.simple_query(query)?;
-
-    query = "ALTER EXTENSION neon SET SCHEMA neon";
-    info!("alter neon extension schema with query: {}", query);
-    client.simple_query(query)?;
-
-    // this will be a no-op if extension is already up to date,
-    // which may happen in two cases:
-    // - extension was just installed
-    // - extension was already installed and is up to date
-    let query = "ALTER EXTENSION neon UPDATE";
-    info!("update neon extension schema with query: {}", query);
-    client.simple_query(query)?;
-
-    Ok(())
-}
--- a/control_plane/src/background_process.rs
+++ b/control_plane/src/background_process.rs
@@ -86,7 +86,10 @@ where
        .stdout(process_log_file)
        .stderr(same_file_for_stderr)
        .args(args);
-    let filled_cmd = fill_remote_storage_secrets_vars(fill_rust_env_vars(background_command));
+
+    let filled_cmd = fill_env_vars_prefixed_neon(fill_remote_storage_secrets_vars(
+        fill_rust_env_vars(background_command),
+    ));
    filled_cmd.envs(envs);

    let pid_file_to_check = match initial_pid_file {
@@ -253,6 +256,15 @@ fn fill_remote_storage_secrets_vars(mut cmd: &mut Command) -> &mut Command {
    cmd
 }

+fn fill_env_vars_prefixed_neon(mut cmd: &mut Command) -> &mut Command {
+    for (var, val) in std::env::vars() {
+        if var.starts_with("NEON_") {
+            cmd = cmd.env(var, val);
+        }
+    }
+    cmd
+}
+
 /// Add a `pre_exec` to the cmd that, inbetween fork() and exec(),
 /// 1. Claims a pidfile with a fcntl lock on it and
 /// 2. Sets up the pidfile's file descriptor so that it (and the lock)
--- a/control_plane/src/bin/attachment_service.rs
+++ b/control_plane/src/bin/attachment_service.rs
@@ -283,7 +283,7 @@ fn make_router(persistent_state: PersistentState) -> RouterBuilder<hyper::Body,

 #[tokio::main]
 async fn main() -> anyhow::Result<()> {
-    logging::init(
+    let _guard = logging::init(
        LogFormat::Plain,
        logging::TracingErrorLayerEnablement::Disabled,
        logging::Output::Stdout,
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -487,15 +487,8 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
                .copied()
                .context("Failed to parse postgres version from the argument string")?;

-            let new_timeline_id_opt = parse_timeline_id(create_match)?;
-
-            let timeline_info = pageserver.timeline_create(
-                tenant_id,
-                new_timeline_id_opt,
-                None,
-                None,
-                Some(pg_version),
-            )?;
+            let timeline_info =
+                pageserver.timeline_create(tenant_id, None, None, None, Some(pg_version))?;
            let new_timeline_id = timeline_info.timeline_id;

            let last_record_lsn = timeline_info.last_record_lsn;
@@ -1252,7 +1245,7 @@ fn cli() -> Command {
    let remote_ext_config_args = Arg::new("remote-ext-config")
        .long("remote-ext-config")
        .num_args(1)
-        .help("Configure the remote extensions storage proxy gateway to request for extensions.")
+        .help("Configure the S3 bucket that we search for extensions in.")
        .required(false);

    let lsn_arg = Arg::new("lsn")
@@ -1315,7 +1308,6 @@ fn cli() -> Command {
            .subcommand(Command::new("create")
                .about("Create a new blank timeline")
                .arg(tenant_id_arg.clone())
-                .arg(timeline_id_arg.clone())
                .arg(branch_name_arg.clone())
                .arg(pg_version_arg.clone())
            )
--- a/control_plane/src/endpoint.rs
+++ b/control_plane/src/endpoint.rs
@@ -45,7 +45,6 @@ use std::sync::Arc;
 use std::time::Duration;

 use anyhow::{anyhow, bail, Context, Result};
-use compute_api::spec::RemoteExtSpec;
 use serde::{Deserialize, Serialize};
 use utils::id::{NodeId, TenantId, TimelineId};

@@ -477,18 +476,6 @@ impl Endpoint {
            }
        }

-        // check for file remote_extensions_spec.json
-        // if it is present, read it and pass to compute_ctl
-        let remote_extensions_spec_path = self.endpoint_path().join("remote_extensions_spec.json");
-        let remote_extensions_spec = std::fs::File::open(remote_extensions_spec_path);
-        let remote_extensions: Option<RemoteExtSpec>;
-
-        if let Ok(spec_file) = remote_extensions_spec {
-            remote_extensions = serde_json::from_reader(spec_file).ok();
-        } else {
-            remote_extensions = None;
-        };
-
        // Create spec file
        let spec = ComputeSpec {
            skip_pg_catalog_updates: self.skip_pg_catalog_updates,
@@ -510,7 +497,7 @@ impl Endpoint {
            pageserver_connstring: Some(pageserver_connstring),
            safekeeper_connstrings,
            storage_auth_token: auth_token.clone(),
-            remote_extensions,
+            remote_extensions: None,
        };
        let spec_path = self.endpoint_path().join("spec.json");
        std::fs::write(spec_path, serde_json::to_string_pretty(&spec)?)?;
--- a/control_plane/src/tenant_migration.rs
+++ b/control_plane/src/tenant_migration.rs
@@ -14,6 +14,7 @@ use pageserver_api::models::{
 use std::collections::HashMap;
 use std::time::Duration;
 use utils::{
+    generation::Generation,
    id::{TenantId, TimelineId},
    lsn::Lsn,
 };
@@ -92,22 +93,6 @@ pub fn migrate_tenant(
    // Get a new generation
    let attachment_service = AttachmentService::from_env(env);

-    fn build_location_config(
-        mode: LocationConfigMode,
-        generation: Option<u32>,
-        secondary_conf: Option<LocationConfigSecondary>,
-    ) -> LocationConfig {
-        LocationConfig {
-            mode,
-            generation,
-            secondary_conf,
-            tenant_conf: TenantConfig::default(),
-            shard_number: 0,
-            shard_count: 0,
-            shard_stripe_size: 0,
-        }
-    }
-
    let previous = attachment_service.inspect(tenant_id)?;
    let mut baseline_lsns = None;
    if let Some((generation, origin_ps_id)) = &previous {
@@ -116,7 +101,12 @@ pub fn migrate_tenant(
        if origin_ps_id == &dest_ps.conf.id {
            println!("🔁 Already attached to {origin_ps_id}, freshening...");
            let gen = attachment_service.attach_hook(tenant_id, dest_ps.conf.id)?;
-            let dest_conf = build_location_config(LocationConfigMode::AttachedSingle, gen, None);
+            let dest_conf = LocationConfig {
+                mode: LocationConfigMode::AttachedSingle,
+                generation: gen.map(Generation::new),
+                secondary_conf: None,
+                tenant_conf: TenantConfig::default(),
+            };
            dest_ps.location_config(tenant_id, dest_conf)?;
            println!("✅ Migration complete");
            return Ok(());
@@ -124,15 +114,24 @@ pub fn migrate_tenant(

        println!("🔁 Switching origin pageserver {origin_ps_id} to stale mode");

-        let stale_conf =
-            build_location_config(LocationConfigMode::AttachedStale, Some(*generation), None);
+        let stale_conf = LocationConfig {
+            mode: LocationConfigMode::AttachedStale,
+            generation: Some(Generation::new(*generation)),
+            secondary_conf: None,
+            tenant_conf: TenantConfig::default(),
+        };
        origin_ps.location_config(tenant_id, stale_conf)?;

        baseline_lsns = Some(get_lsns(tenant_id, &origin_ps)?);
    }

    let gen = attachment_service.attach_hook(tenant_id, dest_ps.conf.id)?;
-    let dest_conf = build_location_config(LocationConfigMode::AttachedMulti, gen, None);
+    let dest_conf = LocationConfig {
+        mode: LocationConfigMode::AttachedMulti,
+        generation: gen.map(Generation::new),
+        secondary_conf: None,
+        tenant_conf: TenantConfig::default(),
+    };

    println!("🔁 Attaching to pageserver {}", dest_ps.conf.id);
    dest_ps.location_config(tenant_id, dest_conf)?;
@@ -171,11 +170,12 @@ pub fn migrate_tenant(
        }

        // Downgrade to a secondary location
-        let secondary_conf = build_location_config(
-            LocationConfigMode::Secondary,
-            None,
-            Some(LocationConfigSecondary { warm: true }),
-        );
+        let secondary_conf = LocationConfig {
+            mode: LocationConfigMode::Secondary,
+            generation: None,
+            secondary_conf: Some(LocationConfigSecondary { warm: true }),
+            tenant_conf: TenantConfig::default(),
+        };

        println!(
            "💤 Switching to secondary mode on pageserver {}",
@@ -188,7 +188,12 @@ pub fn migrate_tenant(
        "🔁 Switching to AttachedSingle mode on pageserver {}",
        dest_ps.conf.id
    );
-    let dest_conf = build_location_config(LocationConfigMode::AttachedSingle, gen, None);
+    let dest_conf = LocationConfig {
+        mode: LocationConfigMode::AttachedSingle,
+        generation: gen.map(Generation::new),
+        secondary_conf: None,
+        tenant_conf: TenantConfig::default(),
+    };
    dest_ps.location_config(tenant_id, dest_conf)?;

    println!("✅ Migration complete");
--- a/libs/pageserver_api/Cargo.toml
+++ b/libs/pageserver_api/Cargo.toml
@@ -18,7 +18,6 @@ enum-map.workspace = true
 strum.workspace = true
 strum_macros.workspace = true
 hex.workspace = true
-thiserror.workspace = true

 workspace_hack.workspace = true

--- a/libs/pageserver_api/src/key.rs
+++ b/libs/pageserver_api/src/key.rs
@@ -140,3 +140,35 @@ impl Key {
        })
    }
 }
+
+impl std::str::FromStr for Key {
+    type Err = anyhow::Error;
+
+    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
+        Self::from_hex(s)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::str::FromStr;
+
+    use crate::key::Key;
+
+    #[test]
+    fn display_fromstr_bijection() {
+        let mut rng = rand::thread_rng();
+        use rand::Rng;
+
+        let key = Key {
+            field1: rng.gen(),
+            field2: rng.gen(),
+            field3: rng.gen(),
+            field4: rng.gen(),
+            field5: rng.gen(),
+            field6: rng.gen(),
+        };
+
+        assert_eq!(key, Key::from_str(&format!("{key}")).unwrap());
+    }
+}
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -10,6 +10,7 @@ use serde_with::serde_as;
 use strum_macros;
 use utils::{
    completion,
+    generation::Generation,
    history_buffer::HistoryBufferWithDropCounter,
    id::{NodeId, TenantId, TimelineId},
    lsn::Lsn,
@@ -17,7 +18,7 @@ use utils::{

 use crate::{reltag::RelTag, shard::TenantShardId};
 use anyhow::bail;
-use bytes::{BufMut, Bytes, BytesMut};
+use bytes::{Buf, BufMut, Bytes, BytesMut};

 /// The state of a tenant in this pageserver.
 ///
@@ -261,19 +262,10 @@ pub struct LocationConfig {
    pub mode: LocationConfigMode,
    /// If attaching, in what generation?
    #[serde(default)]
-    pub generation: Option<u32>,
+    pub generation: Option<Generation>,
    #[serde(default)]
    pub secondary_conf: Option<LocationConfigSecondary>,

-    // Shard parameters: if shard_count is nonzero, then other shard_* fields
-    // must be set accurately.
-    #[serde(default)]
-    pub shard_number: u8,
-    #[serde(default)]
-    pub shard_count: u8,
-    #[serde(default)]
-    pub shard_stripe_size: u32,
-
    // If requesting mode `Secondary`, configuration for that.
    // Custom storage configuration for the tenant, if any
    pub tenant_conf: TenantConfig,
@@ -379,6 +371,8 @@ pub struct TenantInfo {
    /// If a layer is present in both local FS and S3, it counts only once.
    pub current_physical_size: Option<u64>, // physical size is only included in `tenant_status` endpoint
    pub attachment_status: TenantAttachmentStatus,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub generation: Option<u32>,
 }

 /// This represents the output of the "timeline_detail" and "timeline_list" API calls.
@@ -523,6 +517,8 @@ pub enum HistoricLayerInfo {
        lsn_end: Lsn,
        remote: bool,
        access_stats: LayerAccessStats,
+
+        remote_path: Option<String>,
    },
    Image {
        layer_file_name: String,
@@ -531,6 +527,8 @@ pub enum HistoricLayerInfo {
        lsn_start: Lsn,
        remote: bool,
        access_stats: LayerAccessStats,
+
+        remote_path: Option<String>,
    },
 }

@@ -775,6 +773,36 @@ impl PagestreamBeMessage {

        bytes.into()
    }
+
+    pub fn deserialize(buf: Bytes) -> anyhow::Result<Self> {
+        let mut buf = buf.reader();
+        let msg_tag = buf.read_u8()?;
+        match msg_tag {
+            100 => todo!(),
+            101 => todo!(),
+            102 => {
+                let buf = buf.get_ref();
+                /* TODO use constant */
+                if buf.len() == 8192 {
+                    Ok(PagestreamBeMessage::GetPage(PagestreamGetPageResponse {
+                        page: buf.clone(),
+                    }))
+                } else {
+                    anyhow::bail!("invalid page size: {}", buf.len());
+                }
+            }
+            103 => {
+                let buf = buf.get_ref();
+                let cstr = std::ffi::CStr::from_bytes_until_nul(buf)?;
+                let rust_str = cstr.to_str()?;
+                Ok(PagestreamBeMessage::Error(PagestreamErrorResponse {
+                    message: rust_str.to_owned(),
+                }))
+            }
+            104 => todo!(),
+            _ => bail!("unknown tag: {:?}", msg_tag),
+        }
+    }
 }

 #[cfg(test)]
@@ -840,6 +868,7 @@ mod tests {
            state: TenantState::Active,
            current_physical_size: Some(42),
            attachment_status: TenantAttachmentStatus::Attached,
+            generation: None,
        };
        let expected_active = json!({
            "id": original_active.id.to_string(),
@@ -860,6 +889,7 @@ mod tests {
            },
            current_physical_size: Some(42),
            attachment_status: TenantAttachmentStatus::Attached,
+            generation: None,
        };
        let expected_broken = json!({
            "id": original_broken.id.to_string(),
--- a/libs/pageserver_api/src/shard.rs
+++ b/libs/pageserver_api/src/shard.rs
@@ -2,7 +2,6 @@ use std::{ops::RangeInclusive, str::FromStr};

 use hex::FromHex;
 use serde::{Deserialize, Serialize};
-use thiserror;
 use utils::id::TenantId;

 #[derive(Ord, PartialOrd, Eq, PartialEq, Clone, Copy, Serialize, Deserialize, Debug)]
@@ -140,89 +139,6 @@ impl From<[u8; 18]> for TenantShardId {
    }
 }

-/// For use within the context of a particular tenant, when we need to know which
-/// shard we're dealing with, but do not need to know the full ShardIdentity (because
-/// we won't be doing any page->shard mapping), and do not need to know the fully qualified
-/// TenantShardId.
-#[derive(Eq, PartialEq, PartialOrd, Ord, Clone, Copy)]
-pub struct ShardIndex {
-    pub shard_number: ShardNumber,
-    pub shard_count: ShardCount,
-}
-
-impl ShardIndex {
-    pub fn new(number: ShardNumber, count: ShardCount) -> Self {
-        Self {
-            shard_number: number,
-            shard_count: count,
-        }
-    }
-    pub fn unsharded() -> Self {
-        Self {
-            shard_number: ShardNumber(0),
-            shard_count: ShardCount(0),
-        }
-    }
-
-    pub fn is_unsharded(&self) -> bool {
-        self.shard_number == ShardNumber(0) && self.shard_count == ShardCount(0)
-    }
-
-    /// For use in constructing remote storage paths: concatenate this with a TenantId
-    /// to get a fully qualified TenantShardId.
-    ///
-    /// Backward compat: this function returns an empty string if Self::is_unsharded, such
-    /// that the legacy pre-sharding remote key format is preserved.
-    pub fn get_suffix(&self) -> String {
-        if self.is_unsharded() {
-            "".to_string()
-        } else {
-            format!("-{:02x}{:02x}", self.shard_number.0, self.shard_count.0)
-        }
-    }
-}
-
-impl std::fmt::Display for ShardIndex {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "{:02x}{:02x}", self.shard_number.0, self.shard_count.0)
-    }
-}
-
-impl std::fmt::Debug for ShardIndex {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        // Debug is the same as Display: the compact hex representation
-        write!(f, "{}", self)
-    }
-}
-
-impl std::str::FromStr for ShardIndex {
-    type Err = hex::FromHexError;
-
-    fn from_str(s: &str) -> Result<Self, Self::Err> {
-        // Expect format: 1 byte shard number, 1 byte shard count
-        if s.len() == 4 {
-            let bytes = s.as_bytes();
-            let mut shard_parts: [u8; 2] = [0u8; 2];
-            hex::decode_to_slice(bytes, &mut shard_parts)?;
-            Ok(Self {
-                shard_number: ShardNumber(shard_parts[0]),
-                shard_count: ShardCount(shard_parts[1]),
-            })
-        } else {
-            Err(hex::FromHexError::InvalidStringLength)
-        }
-    }
-}
-
-impl From<[u8; 2]> for ShardIndex {
-    fn from(b: [u8; 2]) -> Self {
-        Self {
-            shard_number: ShardNumber(b[0]),
-            shard_count: ShardCount(b[1]),
-        }
-    }
-}
-
 impl Serialize for TenantShardId {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
@@ -293,151 +209,6 @@ impl<'de> Deserialize<'de> for TenantShardId {
    }
 }

-/// Stripe size in number of pages
-#[derive(Clone, Copy, Serialize, Deserialize, Eq, PartialEq, Debug)]
-pub struct ShardStripeSize(pub u32);
-
-/// Layout version: for future upgrades where we might change how the key->shard mapping works
-#[derive(Clone, Copy, Serialize, Deserialize, Eq, PartialEq, Debug)]
-pub struct ShardLayout(u8);
-
-const LAYOUT_V1: ShardLayout = ShardLayout(1);
-
-/// Default stripe size in pages: 256MiB divided by 8kiB page size.
-const DEFAULT_STRIPE_SIZE: ShardStripeSize = ShardStripeSize(256 * 1024 / 8);
-
-/// The ShardIdentity contains the information needed for one member of map
-/// to resolve a key to a shard, and then check whether that shard is ==self.
-#[derive(Clone, Copy, Serialize, Deserialize, Eq, PartialEq, Debug)]
-pub struct ShardIdentity {
-    pub layout: ShardLayout,
-    pub number: ShardNumber,
-    pub count: ShardCount,
-    pub stripe_size: ShardStripeSize,
-}
-
-#[derive(thiserror::Error, Debug, PartialEq, Eq)]
-pub enum ShardConfigError {
-    #[error("Invalid shard count")]
-    InvalidCount,
-    #[error("Invalid shard number")]
-    InvalidNumber,
-    #[error("Invalid stripe size")]
-    InvalidStripeSize,
-}
-
-impl ShardIdentity {
-    /// An identity with number=0 count=0 is a "none" identity, which represents legacy
-    /// tenants.  Modern single-shard tenants should not use this: they should
-    /// have number=0 count=1.
-    pub fn unsharded() -> Self {
-        Self {
-            number: ShardNumber(0),
-            count: ShardCount(0),
-            layout: LAYOUT_V1,
-            stripe_size: DEFAULT_STRIPE_SIZE,
-        }
-    }
-
-    pub fn is_unsharded(&self) -> bool {
-        self.number == ShardNumber(0) && self.count == ShardCount(0)
-    }
-
-    /// Count must be nonzero, and number must be < count. To construct
-    /// the legacy case (count==0), use Self::unsharded instead.
-    pub fn new(
-        number: ShardNumber,
-        count: ShardCount,
-        stripe_size: ShardStripeSize,
-    ) -> Result<Self, ShardConfigError> {
-        if count.0 == 0 {
-            Err(ShardConfigError::InvalidCount)
-        } else if number.0 > count.0 - 1 {
-            Err(ShardConfigError::InvalidNumber)
-        } else if stripe_size.0 == 0 {
-            Err(ShardConfigError::InvalidStripeSize)
-        } else {
-            Ok(Self {
-                number,
-                count,
-                layout: LAYOUT_V1,
-                stripe_size,
-            })
-        }
-    }
-}
-
-impl Serialize for ShardIndex {
-    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
-    where
-        S: serde::Serializer,
-    {
-        if serializer.is_human_readable() {
-            serializer.collect_str(self)
-        } else {
-            // Binary encoding is not used in index_part.json, but is included in anticipation of
-            // switching various structures (e.g. inter-process communication, remote metadata) to more
-            // compact binary encodings in future.
-            let mut packed: [u8; 2] = [0; 2];
-            packed[0] = self.shard_number.0;
-            packed[1] = self.shard_count.0;
-            packed.serialize(serializer)
-        }
-    }
-}
-
-impl<'de> Deserialize<'de> for ShardIndex {
-    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
-    where
-        D: serde::Deserializer<'de>,
-    {
-        struct IdVisitor {
-            is_human_readable_deserializer: bool,
-        }
-
-        impl<'de> serde::de::Visitor<'de> for IdVisitor {
-            type Value = ShardIndex;
-
-            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
-                if self.is_human_readable_deserializer {
-                    formatter.write_str("value in form of hex string")
-                } else {
-                    formatter.write_str("value in form of integer array([u8; 2])")
-                }
-            }
-
-            fn visit_seq<A>(self, seq: A) -> Result<Self::Value, A::Error>
-            where
-                A: serde::de::SeqAccess<'de>,
-            {
-                let s = serde::de::value::SeqAccessDeserializer::new(seq);
-                let id: [u8; 2] = Deserialize::deserialize(s)?;
-                Ok(ShardIndex::from(id))
-            }
-
-            fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
-            where
-                E: serde::de::Error,
-            {
-                ShardIndex::from_str(v).map_err(E::custom)
-            }
-        }
-
-        if deserializer.is_human_readable() {
-            deserializer.deserialize_str(IdVisitor {
-                is_human_readable_deserializer: true,
-            })
-        } else {
-            deserializer.deserialize_tuple(
-                2,
-                IdVisitor {
-                    is_human_readable_deserializer: false,
-                },
-            )
-        }
-    }
-}
-
 #[cfg(test)]
 mod tests {
    use std::str::FromStr;
@@ -547,66 +318,4 @@ mod tests {

        Ok(())
    }
-
-    #[test]
-    fn shard_identity_validation() -> Result<(), ShardConfigError> {
-        // Happy cases
-        ShardIdentity::new(ShardNumber(0), ShardCount(1), DEFAULT_STRIPE_SIZE)?;
-        ShardIdentity::new(ShardNumber(0), ShardCount(1), ShardStripeSize(1))?;
-        ShardIdentity::new(ShardNumber(254), ShardCount(255), ShardStripeSize(1))?;
-
-        assert_eq!(
-            ShardIdentity::new(ShardNumber(0), ShardCount(0), DEFAULT_STRIPE_SIZE),
-            Err(ShardConfigError::InvalidCount)
-        );
-        assert_eq!(
-            ShardIdentity::new(ShardNumber(10), ShardCount(10), DEFAULT_STRIPE_SIZE),
-            Err(ShardConfigError::InvalidNumber)
-        );
-        assert_eq!(
-            ShardIdentity::new(ShardNumber(11), ShardCount(10), DEFAULT_STRIPE_SIZE),
-            Err(ShardConfigError::InvalidNumber)
-        );
-        assert_eq!(
-            ShardIdentity::new(ShardNumber(255), ShardCount(255), DEFAULT_STRIPE_SIZE),
-            Err(ShardConfigError::InvalidNumber)
-        );
-        assert_eq!(
-            ShardIdentity::new(ShardNumber(0), ShardCount(1), ShardStripeSize(0)),
-            Err(ShardConfigError::InvalidStripeSize)
-        );
-
-        Ok(())
-    }
-
-    #[test]
-    fn shard_index_human_encoding() -> Result<(), hex::FromHexError> {
-        let example = ShardIndex {
-            shard_number: ShardNumber(13),
-            shard_count: ShardCount(17),
-        };
-        let expected: String = "0d11".to_string();
-        let encoded = format!("{example}");
-        assert_eq!(&encoded, &expected);
-
-        let decoded = ShardIndex::from_str(&encoded)?;
-        assert_eq!(example, decoded);
-        Ok(())
-    }
-
-    #[test]
-    fn shard_index_binary_encoding() -> Result<(), hex::FromHexError> {
-        let example = ShardIndex {
-            shard_number: ShardNumber(13),
-            shard_count: ShardCount(17),
-        };
-        let expected: [u8; 2] = [0x0d, 0x11];
-
-        let encoded = bincode::serialize(&example).unwrap();
-        assert_eq!(Hex(&encoded), Hex(&expected));
-        let decoded = bincode::deserialize(&encoded).unwrap();
-        assert_eq!(example, decoded);
-
-        Ok(())
-    }
 }
--- a/libs/remote_storage/src/lib.rs
+++ b/libs/remote_storage/src/lib.rs
@@ -81,6 +81,12 @@ impl std::fmt::Display for RemotePath {
    }
 }

+impl From<RemotePath> for String {
+    fn from(val: RemotePath) -> Self {
+        val.0.into()
+    }
+}
+
 impl RemotePath {
    pub fn new(relative_path: &Utf8Path) -> anyhow::Result<Self> {
        anyhow::ensure!(
@@ -102,7 +108,7 @@ impl RemotePath {
        self.0.file_name()
    }

-    pub fn join(&self, segment: &Utf8Path) -> Self {
+    pub fn join<P: AsRef<Utf8Path>>(&self, segment: P) -> Self {
        Self(self.0.join(segment))
    }

--- a/libs/remote_storage/tests/test_real_azure.rs
+++ b/libs/remote_storage/tests/test_real_azure.rs
@@ -278,7 +278,7 @@ async fn azure_upload_download_works(ctx: &mut MaybeEnabledAzure) -> anyhow::Res

 fn ensure_logging_ready() {
    LOGGING_DONE.get_or_init(|| {
-        utils::logging::init(
+        let _ = utils::logging::init(
            utils::logging::LogFormat::Test,
            utils::logging::TracingErrorLayerEnablement::Disabled,
            utils::logging::Output::Stdout,
--- a/libs/remote_storage/tests/test_real_s3.rs
+++ b/libs/remote_storage/tests/test_real_s3.rs
@@ -207,7 +207,7 @@ async fn s3_delete_objects_works(ctx: &mut MaybeEnabledS3) -> anyhow::Result<()>

 fn ensure_logging_ready() {
    LOGGING_DONE.get_or_init(|| {
-        utils::logging::init(
+        let _ = utils::logging::init(
            utils::logging::LogFormat::Test,
            utils::logging::TracingErrorLayerEnablement::Disabled,
            utils::logging::Output::Stdout,
--- a/libs/utils/Cargo.toml
+++ b/libs/utils/Cargo.toml
@@ -49,6 +49,7 @@ const_format.workspace = true
 # to use tokio channels as streams, this is faster to compile than async_stream
 # why is it only here? no other crate should use it, streams are rarely needed.
 tokio-stream = { version = "0.1.14" }
+tracing-chrome = "0.7.1"

 [dev-dependencies]
 byteorder.workspace = true
--- a/libs/utils/scripts/restore_from_wal_initdb.sh
+++ b/libs/utils/scripts/restore_from_wal_initdb.sh
@@ -1,21 +0,0 @@
-#!/bin/bash
-
-# like restore_from_wal.sh, but takes existing initdb.tar.zst
-
-set -euxo pipefail
-
-PG_BIN=$1
-WAL_PATH=$2
-DATA_DIR=$3
-PORT=$4
-echo "port=$PORT" >> "$DATA_DIR"/postgresql.conf
-echo "shared_preload_libraries='\$libdir/neon_rmgr.so'" >> "$DATA_DIR"/postgresql.conf
-REDO_POS=0x$("$PG_BIN"/pg_controldata -D "$DATA_DIR" | grep -F "REDO location"| cut -c 42-)
-declare -i WAL_SIZE=$REDO_POS+114
-"$PG_BIN"/pg_ctl -D "$DATA_DIR" -l "$DATA_DIR/logfile.log" start
-"$PG_BIN"/pg_ctl -D "$DATA_DIR" -l "$DATA_DIR/logfile.log" stop -m immediate
-cp "$DATA_DIR"/pg_wal/000000010000000000000001 .
-cp "$WAL_PATH"/* "$DATA_DIR"/pg_wal/
-for partial in "$DATA_DIR"/pg_wal/*.partial ; do mv "$partial" "${partial%.partial}" ; done
-dd if=000000010000000000000001 of="$DATA_DIR"/pg_wal/000000010000000000000001 bs=$WAL_SIZE count=1 conv=notrunc
-rm -f 000000010000000000000001
--- a/libs/utils/src/logging.rs
+++ b/libs/utils/src/logging.rs
@@ -73,11 +73,17 @@ pub enum Output {
    Stderr,
 }

+/// Keep alive and drop it before the program terminates.
+#[must_use]
+pub struct FlushGuard {
+    _tracing_chrome_layer: Option<tracing_chrome::FlushGuard>,
+}
+
 pub fn init(
    log_format: LogFormat,
    tracing_error_layer_enablement: TracingErrorLayerEnablement,
    output: Output,
-) -> anyhow::Result<()> {
+) -> anyhow::Result<FlushGuard> {
    // We fall back to printing all spans at info-level or above if
    // the RUST_LOG environment variable is not set.
    let rust_log_env_filter = || {
@@ -85,11 +91,41 @@ pub fn init(
            .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info"))
    };

+    // WIP: lift it up as an argument
+    let enable_tracing_chrome = match std::env::var("NEON_PAGESERVER_ENABLE_TRACING_CHROME") {
+        Ok(s) if s != "0" => true,
+        Ok(_s) => false,
+        Err(std::env::VarError::NotPresent) => false,
+        Err(std::env::VarError::NotUnicode(_)) => {
+            panic!("env var NEON_PAGESERVER_ENABLE_TRACING_CHROME not unicode")
+        }
+    };
+
    // NB: the order of the with() calls does not matter.
    // See https://docs.rs/tracing-subscriber/0.3.16/tracing_subscriber/layer/index.html#per-layer-filtering
    use tracing_subscriber::prelude::*;
-    let r = tracing_subscriber::registry();
-    let r = r.with({
+
+    // https://users.rust-lang.org/t/how-can-i-init-tracing-registry-dynamically-with-multiple-outputs/94307/6
+    #[derive(Default)]
+    struct LayerStack {
+        layers:
+            Option<Box<dyn tracing_subscriber::Layer<tracing_subscriber::Registry> + Sync + Send>>,
+    }
+    impl LayerStack {
+        fn add_layer<L>(&mut self, new_layer: L)
+        where
+            L: tracing_subscriber::Layer<tracing_subscriber::Registry> + Send + Sync,
+        {
+            let new = match self.layers.take() {
+                Some(layers) => Some(layers.and_then(new_layer).boxed()),
+                None => Some(new_layer.boxed()),
+            };
+            self.layers = new;
+        }
+    }
+    let mut layers = LayerStack::default();
+
+    layers.add_layer({
        let log_layer = tracing_subscriber::fmt::layer()
            .with_target(false)
            .with_ansi(false)
@@ -106,15 +142,31 @@ pub fn init(
        };
        log_layer.with_filter(rust_log_env_filter())
    });
-    let r = r.with(TracingEventCountLayer(&TRACING_EVENT_COUNT).with_filter(rust_log_env_filter()));
+
+    layers
+        .add_layer(TracingEventCountLayer(&TRACING_EVENT_COUNT).with_filter(rust_log_env_filter()));
+
+    let tracing_chrome_layer_flush_guard = if enable_tracing_chrome {
+        let (layer, guard) = tracing_chrome::ChromeLayerBuilder::new().build();
+        layers.add_layer(layer);
+        Some(guard)
+    } else {
+        None
+    };
+
    match tracing_error_layer_enablement {
-        TracingErrorLayerEnablement::EnableWithRustLogFilter => r
-            .with(tracing_error::ErrorLayer::default().with_filter(rust_log_env_filter()))
-            .init(),
-        TracingErrorLayerEnablement::Disabled => r.init(),
+        TracingErrorLayerEnablement::EnableWithRustLogFilter => layers
+            .add_layer(tracing_error::ErrorLayer::default().with_filter(rust_log_env_filter())),
+        TracingErrorLayerEnablement::Disabled => (),
    }

-    Ok(())
+    let r = tracing_subscriber::registry();
+    r.with(layers.layers.expect("we add at least one layer"))
+        .init();
+
+    Ok(FlushGuard {
+        _tracing_chrome_layer: tracing_chrome_layer_flush_guard,
+    })
 }

 /// Disable the default rust panic hook by using `set_hook`.
--- a/libs/utils/src/lsn.rs
+++ b/libs/utils/src/lsn.rs
@@ -366,6 +366,47 @@ impl MonotonicCounter<Lsn> for RecordLsn {
    }
 }

+/// Implements  [`rand::distributions::uniform::UniformSampler`] so we can sample [`Lsn`]s.
+pub struct LsnSampler(<u64 as rand::distributions::uniform::SampleUniform>::Sampler);
+
+impl rand::distributions::uniform::SampleUniform for Lsn {
+    type Sampler = LsnSampler;
+}
+
+impl rand::distributions::uniform::UniformSampler for LsnSampler {
+    type X = Lsn;
+
+    fn new<B1, B2>(low: B1, high: B2) -> Self
+    where
+        B1: rand::distributions::uniform::SampleBorrow<Self::X> + Sized,
+        B2: rand::distributions::uniform::SampleBorrow<Self::X> + Sized,
+    {
+        Self(
+            <u64 as rand::distributions::uniform::SampleUniform>::Sampler::new(
+                low.borrow().0,
+                high.borrow().0,
+            ),
+        )
+    }
+
+    fn new_inclusive<B1, B2>(low: B1, high: B2) -> Self
+    where
+        B1: rand::distributions::uniform::SampleBorrow<Self::X> + Sized,
+        B2: rand::distributions::uniform::SampleBorrow<Self::X> + Sized,
+    {
+        Self(
+            <u64 as rand::distributions::uniform::SampleUniform>::Sampler::new_inclusive(
+                low.borrow().0,
+                high.borrow().0,
+            ),
+        )
+    }
+
+    fn sample<R: rand::prelude::Rng + ?Sized>(&self, rng: &mut R) -> Self::X {
+        Lsn(self.0.sample(rng))
+    }
+}
+
 #[cfg(test)]
 mod tests {
    use crate::bin_ser::BeSer;
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -82,6 +82,9 @@ enum-map.workspace = true
 enumset.workspace = true
 strum.workspace = true
 strum_macros.workspace = true
+tokio-stream.workspace = true
+tracing-subscriber = { version = "0.3.17", features = ["env-filter"] }
+tracing-chrome = "0.7.1"

 [dev-dependencies]
 criterion.workspace = true
--- a/pageserver/ctl/Cargo.toml
+++ b/pageserver/ctl/Cargo.toml
@@ -18,5 +18,3 @@ tokio.workspace = true
 utils.workspace = true
 svg_fmt.workspace = true
 workspace_hack.workspace = true
-serde.workspace = true
-serde_json.workspace = true
--- a/pageserver/ctl/src/index_part.rs
+++ b/pageserver/ctl/src/index_part.rs
@@ -1,38 +0,0 @@
-use std::collections::HashMap;
-
-use anyhow::Context;
-use camino::Utf8PathBuf;
-use pageserver::tenant::remote_timeline_client::index::IndexLayerMetadata;
-use pageserver::tenant::storage_layer::LayerFileName;
-use pageserver::tenant::{metadata::TimelineMetadata, IndexPart};
-use utils::lsn::Lsn;
-
-#[derive(clap::Subcommand)]
-pub(crate) enum IndexPartCmd {
-    Dump { path: Utf8PathBuf },
-}
-
-pub(crate) async fn main(cmd: &IndexPartCmd) -> anyhow::Result<()> {
-    match cmd {
-        IndexPartCmd::Dump { path } => {
-            let bytes = tokio::fs::read(path).await.context("read file")?;
-            let des: IndexPart = IndexPart::from_s3_bytes(&bytes).context("deserialize")?;
-            #[derive(serde::Serialize)]
-            struct Output<'a> {
-                layer_metadata: &'a HashMap<LayerFileName, IndexLayerMetadata>,
-                disk_consistent_lsn: Lsn,
-                timeline_metadata: &'a TimelineMetadata,
-            }
-
-            let output = Output {
-                layer_metadata: &des.layer_metadata,
-                disk_consistent_lsn: des.get_disk_consistent_lsn(),
-                timeline_metadata: &des.metadata,
-            };
-
-            let output = serde_json::to_string_pretty(&output).context("serialize output")?;
-            println!("{output}");
-            Ok(())
-        }
-    }
-}
--- a/pageserver/ctl/src/layers.rs
+++ b/pageserver/ctl/src/layers.rs
@@ -1,13 +1,15 @@
 use std::path::{Path, PathBuf};

 use anyhow::Result;
-use camino::Utf8Path;
+use camino::{Utf8Path, Utf8PathBuf};
 use clap::Subcommand;
 use pageserver::context::{DownloadBehavior, RequestContext};
 use pageserver::task_mgr::TaskKind;
 use pageserver::tenant::block_io::BlockCursor;
 use pageserver::tenant::disk_btree::DiskBtreeReader;
 use pageserver::tenant::storage_layer::delta_layer::{BlobRef, Summary};
+use pageserver::tenant::storage_layer::{delta_layer, image_layer};
+use pageserver::tenant::storage_layer::{DeltaLayer, ImageLayer};
 use pageserver::tenant::{TENANTS_SEGMENT_NAME, TIMELINES_SEGMENT_NAME};
 use pageserver::{page_cache, virtual_file};
 use pageserver::{
@@ -20,6 +22,7 @@ use pageserver::{
 };
 use std::fs;
 use utils::bin_ser::BeSer;
+use utils::id::{TenantId, TimelineId};

 use crate::layer_map_analyzer::parse_filename;

@@ -45,6 +48,13 @@ pub(crate) enum LayerCmd {
        /// The id from list-layer command
        id: usize,
    },
+    RewriteSummary {
+        layer_file_path: Utf8PathBuf,
+        #[clap(long)]
+        new_tenant_id: Option<TenantId>,
+        #[clap(long)]
+        new_timeline_id: Option<TimelineId>,
+    },
 }

 async fn read_delta_file(path: impl AsRef<Path>, ctx: &RequestContext) -> Result<()> {
@@ -100,6 +110,7 @@ pub(crate) async fn main(cmd: &LayerCmd) -> Result<()> {
                    println!("- timeline {}", timeline.file_name().to_string_lossy());
                }
            }
+            Ok(())
        }
        LayerCmd::ListLayer {
            path,
@@ -128,6 +139,7 @@ pub(crate) async fn main(cmd: &LayerCmd) -> Result<()> {
                    idx += 1;
                }
            }
+            Ok(())
        }
        LayerCmd::DumpLayer {
            path,
@@ -168,7 +180,63 @@ pub(crate) async fn main(cmd: &LayerCmd) -> Result<()> {
                    idx += 1;
                }
            }
+            Ok(())
+        }
+        LayerCmd::RewriteSummary {
+            layer_file_path,
+            new_tenant_id,
+            new_timeline_id,
+        } => {
+            pageserver::virtual_file::init(10);
+            pageserver::page_cache::init(100);
+
+            let ctx = RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error);
+
+            macro_rules! rewrite_closure {
+                ($($summary_ty:tt)*) => {{
+                    |summary| $($summary_ty)* {
+                        tenant_id: new_tenant_id.unwrap_or(summary.tenant_id),
+                        timeline_id: new_timeline_id.unwrap_or(summary.timeline_id),
+                        ..summary
+                    }
+                }};
+            }
+
+            let res = ImageLayer::rewrite_summary(
+                layer_file_path,
+                rewrite_closure!(image_layer::Summary),
+                &ctx,
+            )
+            .await;
+            match res {
+                Ok(()) => {
+                    println!("Successfully rewrote summary of image layer {layer_file_path}");
+                    return Ok(());
+                }
+                Err(image_layer::RewriteSummaryError::MagicMismatch) => (), // fallthrough
+                Err(image_layer::RewriteSummaryError::Other(e)) => {
+                    return Err(e);
+                }
+            }
+
+            let res = DeltaLayer::rewrite_summary(
+                layer_file_path,
+                rewrite_closure!(delta_layer::Summary),
+                &ctx,
+            )
+            .await;
+            match res {
+                Ok(()) => {
+                    println!("Successfully rewrote summary of delta layer {layer_file_path}");
+                    return Ok(());
+                }
+                Err(delta_layer::RewriteSummaryError::MagicMismatch) => (), // fallthrough
+                Err(delta_layer::RewriteSummaryError::Other(e)) => {
+                    return Err(e);
+                }
+            }
+
+            anyhow::bail!("not an image or delta layer: {layer_file_path}");
        }
    }
-    Ok(())
 }
--- a/pageserver/ctl/src/main.rs
+++ b/pageserver/ctl/src/main.rs
@@ -5,13 +5,11 @@
 //! Separate, `metadata` subcommand allows to print and update pageserver's metadata file.

 mod draw_timeline_dir;
-mod index_part;
 mod layer_map_analyzer;
 mod layers;

 use camino::{Utf8Path, Utf8PathBuf};
 use clap::{Parser, Subcommand};
-use index_part::IndexPartCmd;
 use layers::LayerCmd;
 use pageserver::{
    context::{DownloadBehavior, RequestContext},
@@ -40,8 +38,6 @@ struct CliOpts {
 #[derive(Subcommand)]
 enum Commands {
    Metadata(MetadataCmd),
-    #[command(subcommand)]
-    IndexPart(IndexPartCmd),
    PrintLayerFile(PrintLayerFileCmd),
    DrawTimeline {},
    AnalyzeLayerMap(AnalyzeLayerMapCmd),
@@ -87,9 +83,6 @@ async fn main() -> anyhow::Result<()> {
        Commands::Metadata(cmd) => {
            handle_metadata(&cmd)?;
        }
-        Commands::IndexPart(cmd) => {
-            index_part::main(&cmd).await?;
-        }
        Commands::DrawTimeline {} => {
            draw_timeline_dir::main()?;
        }
--- a/pageserver/test_data/short_v14_redo.page
+++ b/pageserver/test_data/short_v14_redo.page
--- a/pageserver/pagebench/Cargo.toml
+++ b/pageserver/pagebench/Cargo.toml
@@ -0,0 +1,23 @@
+[package]
+name = "pagebench"
+version = "0.1.0"
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+anyhow.workspace = true
+clap.workspace = true
+futures.workspace = true
+hdrhistogram.workspace = true
+humantime.workspace = true
+humantime-serde.workspace = true
+rand.workspace = true
+serde.workspace = true
+serde_json.workspace = true
+tracing.workspace = true
+tokio.workspace = true
+tokio-util.workspace = true
+
+pageserver = { path = ".." }
+utils = { path = "../../libs/utils/" }
--- a/pageserver/pagebench/src/basebackup.rs
+++ b/pageserver/pagebench/src/basebackup.rs
@@ -0,0 +1,402 @@
+use anyhow::Context;
+use pageserver::client::page_service::BasebackupRequest;
+use utils::lsn::Lsn;
+
+use rand::prelude::*;
+use tokio::sync::Barrier;
+use tokio::task::JoinSet;
+use tracing::{debug, info, instrument};
+use utils::id::TenantId;
+use utils::logging;
+
+use std::cell::RefCell;
+use std::collections::HashMap;
+use std::num::NonZeroUsize;
+use std::ops::Range;
+use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
+use std::sync::{Arc, Mutex};
+use std::time::{Duration, Instant};
+
+use crate::util::tenant_timeline_id::TenantTimelineId;
+
+/// GetPage@LatestLSN, uniformly distributed across the compute-accessible keyspace.
+#[derive(clap::Parser)]
+pub(crate) struct Args {
+    #[clap(long, default_value = "http://localhost:9898")]
+    mgmt_api_endpoint: String,
+    #[clap(long, default_value = "localhost:64000")]
+    page_service_host_port: String,
+    #[clap(long)]
+    pageserver_jwt: Option<String>,
+    #[clap(long, default_value = "1")]
+    num_clients: NonZeroUsize,
+    #[clap(long, default_value = "1.0")]
+    gzip_probability: f64,
+    #[clap(long)]
+    runtime: Option<humantime::Duration>,
+    targets: Option<Vec<TenantTimelineId>>,
+}
+
+#[derive(Debug, Default)]
+struct LiveStats {
+    completed_requests: AtomicU64,
+}
+
+impl LiveStats {
+    fn inc(&self) {
+        self.completed_requests.fetch_add(1, Ordering::Relaxed);
+    }
+}
+
+#[derive(serde::Serialize)]
+struct Output {
+    total: PerTaskOutput,
+}
+
+const LATENCY_PERCENTILES: [f64; 4] = [95.0, 99.00, 99.90, 99.99];
+
+struct LatencyPercentiles {
+    latency_percentiles: [Duration; 4],
+}
+
+impl serde::Serialize for LatencyPercentiles {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        use serde::ser::SerializeMap;
+        let mut ser = serializer.serialize_map(Some(LATENCY_PERCENTILES.len()))?;
+        for p in LATENCY_PERCENTILES {
+            ser.serialize_entry(
+                &format!("p{p}"),
+                &format!(
+                    "{}",
+                    &humantime::format_duration(self.latency_percentiles[0])
+                ),
+            )?;
+        }
+        ser.end()
+    }
+}
+
+#[derive(serde::Serialize)]
+struct PerTaskOutput {
+    request_count: u64,
+    #[serde(with = "humantime_serde")]
+    latency_mean: Duration,
+    latency_percentiles: LatencyPercentiles,
+}
+
+struct ThreadLocalStats {
+    latency_histo: hdrhistogram::Histogram<u64>,
+}
+
+impl ThreadLocalStats {
+    fn new() -> Self {
+        Self {
+            // Initialize with fixed bounds so that we panic at runtime instead of resizing the histogram,
+            // which would skew the benchmark results.
+            latency_histo: hdrhistogram::Histogram::new_with_bounds(1, 1_000_000_000, 3).unwrap(),
+        }
+    }
+    fn observe(&mut self, latency: Duration) -> anyhow::Result<()> {
+        let micros: u64 = latency
+            .as_micros()
+            .try_into()
+            .context("latency greater than u64")?;
+        self.latency_histo
+            .record(micros)
+            .context("add to histogram")?;
+        Ok(())
+    }
+    fn output(&self) -> PerTaskOutput {
+        let latency_percentiles = std::array::from_fn(|idx| {
+            let micros = self
+                .latency_histo
+                .value_at_percentile(LATENCY_PERCENTILES[idx]);
+            Duration::from_micros(micros)
+        });
+        PerTaskOutput {
+            request_count: self.latency_histo.len(),
+            latency_mean: Duration::from_micros(self.latency_histo.mean() as u64),
+            latency_percentiles: LatencyPercentiles {
+                latency_percentiles,
+            },
+        }
+    }
+
+    fn add(&mut self, other: &Self) {
+        let Self {
+            ref mut latency_histo,
+        } = self;
+        latency_histo.add(&other.latency_histo).unwrap();
+    }
+}
+
+thread_local! {
+    pub static STATS: RefCell<Arc<Mutex<ThreadLocalStats>>> = std::cell::RefCell::new(
+        Arc::new(Mutex::new(ThreadLocalStats::new()))
+    );
+}
+
+pub(crate) fn main(args: Args) -> anyhow::Result<()> {
+    let _guard = logging::init(
+        logging::LogFormat::Plain,
+        logging::TracingErrorLayerEnablement::Disabled,
+        logging::Output::Stderr,
+    )
+    .unwrap();
+
+    let thread_local_stats = Arc::new(Mutex::new(Vec::new()));
+
+    let rt = tokio::runtime::Builder::new_multi_thread()
+        .on_thread_start({
+            let thread_local_stats = Arc::clone(&thread_local_stats);
+            move || {
+                // pre-initialize the histograms
+                STATS.with(|stats| {
+                    let stats: Arc<_> = Arc::clone(&*stats.borrow());
+                    thread_local_stats.lock().unwrap().push(stats);
+                });
+            }
+        })
+        .enable_all()
+        .build()
+        .unwrap();
+
+    let main_task = rt.spawn(main_impl(args, thread_local_stats));
+    rt.block_on(main_task).unwrap()
+}
+
+struct Target {
+    timeline: TenantTimelineId,
+    lsn_range: Option<Range<Lsn>>,
+}
+
+async fn main_impl(
+    args: Args,
+    thread_local_stats: Arc<Mutex<Vec<Arc<Mutex<ThreadLocalStats>>>>>,
+) -> anyhow::Result<()> {
+    let args: &'static Args = Box::leak(Box::new(args));
+
+    let mgmt_api_client = Arc::new(pageserver::client::mgmt_api::Client::new(
+        args.mgmt_api_endpoint.clone(),
+        args.pageserver_jwt.as_deref(),
+    ));
+
+    // discover targets
+    let mut timelines: Vec<TenantTimelineId> = Vec::new();
+    if args.targets.is_some() {
+        timelines = args.targets.clone().unwrap();
+    } else {
+        let tenants: Vec<TenantId> = mgmt_api_client
+            .list_tenants()
+            .await?
+            .into_iter()
+            .map(|ti| ti.id)
+            .collect();
+        let mut js = JoinSet::new();
+        for tenant_id in tenants {
+            js.spawn({
+                let mgmt_api_client = Arc::clone(&mgmt_api_client);
+                async move {
+                    (
+                        tenant_id,
+                        mgmt_api_client.list_timelines(tenant_id).await.unwrap(),
+                    )
+                }
+            });
+        }
+        while let Some(res) = js.join_next().await {
+            let (tenant_id, tl_infos) = res.unwrap();
+            for tl in tl_infos {
+                timelines.push(TenantTimelineId {
+                    tenant_id,
+                    timeline_id: tl.timeline_id,
+                });
+            }
+        }
+    }
+
+    info!("timelines:\n{:?}", timelines);
+
+    let mut js = JoinSet::new();
+    for timeline in &timelines {
+        js.spawn({
+            let timeline = *timeline;
+            let info = mgmt_api_client
+                .timeline_info(timeline.tenant_id, timeline.timeline_id)
+                .await
+                .unwrap();
+            async move {
+                anyhow::Ok(Target {
+                    timeline,
+                    lsn_range: Some(info.last_record_lsn..(info.last_record_lsn + 1)),
+                })
+            }
+        });
+    }
+    let mut all_targets: Vec<Target> = Vec::new();
+    while let Some(res) = js.join_next().await {
+        all_targets.push(res.unwrap().unwrap());
+    }
+
+    let live_stats = Arc::new(LiveStats::default());
+
+    let num_client_tasks = timelines.len();
+    let num_live_stats_dump = 1;
+    let num_work_sender_tasks = 1;
+
+    let start_work_barrier = Arc::new(tokio::sync::Barrier::new(
+        num_client_tasks + num_live_stats_dump + num_work_sender_tasks,
+    ));
+    let all_work_done_barrier = Arc::new(tokio::sync::Barrier::new(num_client_tasks));
+
+    tokio::spawn({
+        let stats = Arc::clone(&live_stats);
+        let start_work_barrier = Arc::clone(&start_work_barrier);
+        async move {
+            start_work_barrier.wait().await;
+            loop {
+                let start = std::time::Instant::now();
+                tokio::time::sleep(std::time::Duration::from_secs(1)).await;
+                let completed_requests = stats.completed_requests.swap(0, Ordering::Relaxed);
+                let elapsed = start.elapsed();
+                info!(
+                    "RPS: {:.0}",
+                    completed_requests as f64 / elapsed.as_secs_f64()
+                );
+            }
+        }
+    });
+
+    let mut work_senders = HashMap::new();
+    let mut tasks = Vec::new();
+    for tl in &timelines {
+        let (sender, receiver) = tokio::sync::mpsc::channel(1); // TODO: not sure what the implications of this are
+        work_senders.insert(tl, sender);
+        tasks.push(tokio::spawn(client(
+            args,
+            *tl,
+            Arc::clone(&start_work_barrier),
+            receiver,
+            Arc::clone(&all_work_done_barrier),
+            Arc::clone(&live_stats),
+        )));
+    }
+
+    let work_sender = async move {
+        start_work_barrier.wait().await;
+        loop {
+            let (timeline, work) = {
+                let mut rng = rand::thread_rng();
+                let target = all_targets.choose(&mut rng).unwrap();
+                let lsn = target.lsn_range.clone().map(|r| rng.gen_range(r));
+                (
+                    target.timeline,
+                    Work {
+                        lsn,
+                        gzip: rng.gen_bool(args.gzip_probability),
+                    },
+                )
+            };
+            let sender = work_senders.get(&timeline).unwrap();
+            // TODO: what if this blocks?
+            sender.send(work).await.ok().unwrap();
+        }
+    };
+
+    if let Some(runtime) = args.runtime {
+        match tokio::time::timeout(runtime.into(), work_sender).await {
+            Ok(()) => unreachable!("work sender never terminates"),
+            Err(_timeout) => {
+                // this implicitly drops the work_senders, making all the clients exit
+            }
+        }
+    } else {
+        work_sender.await;
+        unreachable!("work sender never terminates");
+    }
+
+    for t in tasks {
+        t.await.unwrap();
+    }
+
+    let output = Output {
+        total: {
+            let mut agg_stats = ThreadLocalStats::new();
+            for stats in thread_local_stats.lock().unwrap().iter() {
+                let stats = stats.lock().unwrap();
+                agg_stats.add(&*stats);
+            }
+            agg_stats.output()
+        },
+    };
+
+    let output = serde_json::to_string_pretty(&output).unwrap();
+    println!("{output}");
+
+    anyhow::Ok(())
+}
+
+#[derive(Copy, Clone)]
+struct Work {
+    lsn: Option<Lsn>,
+    gzip: bool,
+}
+
+#[instrument(skip_all)]
+async fn client(
+    args: &'static Args,
+    timeline: TenantTimelineId,
+    start_work_barrier: Arc<Barrier>,
+    mut work: tokio::sync::mpsc::Receiver<Work>,
+    all_work_done_barrier: Arc<Barrier>,
+    live_stats: Arc<LiveStats>,
+) {
+    start_work_barrier.wait().await;
+
+    let client =
+        pageserver::client::page_service::Client::new(crate::util::connstring::connstring(
+            &args.page_service_host_port,
+            args.pageserver_jwt.as_deref(),
+        ))
+        .await
+        .unwrap();
+
+    while let Some(Work { lsn, gzip }) = work.recv().await {
+        let start = Instant::now();
+        let copy_out_stream = client
+            .basebackup(&BasebackupRequest {
+                tenant_id: timeline.tenant_id,
+                timeline_id: timeline.timeline_id,
+                lsn,
+                gzip,
+            })
+            .await
+            .with_context(|| format!("start basebackup for {timeline}"))
+            .unwrap();
+
+        use futures::StreamExt;
+        let size = Arc::new(AtomicUsize::new(0));
+        copy_out_stream
+            .for_each({
+                |r| {
+                    let size = Arc::clone(&size);
+                    async move {
+                        let size = Arc::clone(&size);
+                        size.fetch_add(r.unwrap().len(), Ordering::Relaxed);
+                    }
+                }
+            })
+            .await;
+        debug!("basebackup size is {} bytes", size.load(Ordering::Relaxed));
+        let elapsed = start.elapsed();
+        live_stats.inc();
+        STATS.with(|stats| {
+            stats.borrow().lock().unwrap().observe(elapsed).unwrap();
+        });
+    }
+
+    all_work_done_barrier.wait().await;
+}
--- a/pageserver/pagebench/src/getpage_latest_lsn.rs
+++ b/pageserver/pagebench/src/getpage_latest_lsn.rs
@@ -0,0 +1,404 @@
+use anyhow::Context;
+use pageserver::client::page_service::RelTagBlockNo;
+use pageserver::pgdatadir_mapping::{is_rel_block_key, key_to_rel_block};
+use pageserver::repository;
+use utils::lsn::Lsn;
+
+use rand::prelude::*;
+use tokio::sync::Barrier;
+use tokio::task::JoinSet;
+use tracing::{info, instrument};
+use utils::id::TenantId;
+use utils::logging;
+
+use std::cell::RefCell;
+use std::collections::HashMap;
+use std::num::NonZeroUsize;
+use std::sync::atomic::{AtomicU64, Ordering};
+use std::sync::{Arc, Mutex};
+use std::time::{Duration, Instant};
+
+use crate::util::tenant_timeline_id::TenantTimelineId;
+
+/// GetPage@LatestLSN, uniformly distributed across the compute-accessible keyspace.
+#[derive(clap::Parser)]
+pub(crate) struct Args {
+    #[clap(long, default_value = "http://localhost:9898")]
+    mgmt_api_endpoint: String,
+    #[clap(long, default_value = "postgres://postgres@localhost:64000")]
+    page_service_connstring: String,
+    #[clap(long, default_value = "1")]
+    num_clients: NonZeroUsize,
+    #[clap(long)]
+    runtime: Option<humantime::Duration>,
+    targets: Option<Vec<TenantTimelineId>>,
+}
+
+#[derive(Debug, Default)]
+struct LiveStats {
+    completed_requests: AtomicU64,
+}
+
+impl LiveStats {
+    fn inc(&self) {
+        self.completed_requests.fetch_add(1, Ordering::Relaxed);
+    }
+}
+
+#[derive(serde::Serialize)]
+struct Output {
+    total: PerTaskOutput,
+}
+
+const LATENCY_PERCENTILES: [f64; 4] = [95.0, 99.00, 99.90, 99.99];
+
+struct LatencyPercentiles {
+    latency_percentiles: [Duration; 4],
+}
+
+impl serde::Serialize for LatencyPercentiles {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        use serde::ser::SerializeMap;
+        let mut ser = serializer.serialize_map(Some(LATENCY_PERCENTILES.len()))?;
+        for p in LATENCY_PERCENTILES {
+            ser.serialize_entry(
+                &format!("p{p}"),
+                &format!(
+                    "{}",
+                    &humantime::format_duration(self.latency_percentiles[0])
+                ),
+            )?;
+        }
+        ser.end()
+    }
+}
+
+#[derive(serde::Serialize)]
+struct PerTaskOutput {
+    request_count: u64,
+    #[serde(with = "humantime_serde")]
+    latency_mean: Duration,
+    latency_percentiles: LatencyPercentiles,
+}
+
+struct ThreadLocalStats {
+    latency_histo: hdrhistogram::Histogram<u64>,
+}
+
+impl ThreadLocalStats {
+    fn new() -> Self {
+        Self {
+            // Initialize with fixed bounds so that we panic at runtime instead of resizing the histogram,
+            // which would skew the benchmark results.
+            latency_histo: hdrhistogram::Histogram::new_with_bounds(1, 1_000_000_000, 3).unwrap(),
+        }
+    }
+    fn observe(&mut self, latency: Duration) -> anyhow::Result<()> {
+        let micros: u64 = latency
+            .as_micros()
+            .try_into()
+            .context("latency greater than u64")?;
+        self.latency_histo
+            .record(micros)
+            .context("add to histogram")?;
+        Ok(())
+    }
+    fn output(&self) -> PerTaskOutput {
+        let latency_percentiles = std::array::from_fn(|idx| {
+            let micros = self
+                .latency_histo
+                .value_at_percentile(LATENCY_PERCENTILES[idx]);
+            Duration::from_micros(micros)
+        });
+        PerTaskOutput {
+            request_count: self.latency_histo.len(),
+            latency_mean: Duration::from_micros(self.latency_histo.mean() as u64),
+            latency_percentiles: LatencyPercentiles {
+                latency_percentiles,
+            },
+        }
+    }
+
+    fn add(&mut self, other: &Self) {
+        let Self {
+            ref mut latency_histo,
+        } = self;
+        latency_histo.add(&other.latency_histo).unwrap();
+    }
+}
+
+thread_local! {
+    pub static STATS: RefCell<Arc<Mutex<ThreadLocalStats>>> = std::cell::RefCell::new(
+        Arc::new(Mutex::new(ThreadLocalStats::new()))
+    );
+}
+
+pub(crate) fn main(args: Args) -> anyhow::Result<()> {
+    let _guard = logging::init(
+        logging::LogFormat::Plain,
+        logging::TracingErrorLayerEnablement::Disabled,
+        logging::Output::Stderr,
+    )
+    .unwrap();
+
+    let thread_local_stats = Arc::new(Mutex::new(Vec::new()));
+
+    let rt = tokio::runtime::Builder::new_multi_thread()
+        .on_thread_start({
+            let thread_local_stats = Arc::clone(&thread_local_stats);
+            move || {
+                // pre-initialize the histograms
+                STATS.with(|stats| {
+                    let stats: Arc<_> = Arc::clone(&*stats.borrow());
+                    thread_local_stats.lock().unwrap().push(stats);
+                });
+            }
+        })
+        .enable_all()
+        .build()
+        .unwrap();
+
+    let main_task = rt.spawn(main_impl(args, thread_local_stats));
+    rt.block_on(main_task).unwrap()
+}
+
+struct KeyRange {
+    timeline: TenantTimelineId,
+    timeline_lsn: Lsn,
+    start: i128,
+    end: i128,
+}
+
+impl KeyRange {
+    fn len(&self) -> i128 {
+        self.end - self.start
+    }
+}
+
+async fn main_impl(
+    args: Args,
+    thread_local_stats: Arc<Mutex<Vec<Arc<Mutex<ThreadLocalStats>>>>>,
+) -> anyhow::Result<()> {
+    let args: &'static Args = Box::leak(Box::new(args));
+
+    let mgmt_api_client = Arc::new(pageserver::client::mgmt_api::Client::new(
+        args.mgmt_api_endpoint.clone(),
+        None, // TODO: support jwt in args
+    ));
+
+    // discover targets
+    let mut timelines: Vec<TenantTimelineId> = Vec::new();
+    if args.targets.is_some() {
+        timelines = args.targets.clone().unwrap();
+    } else {
+        let tenants: Vec<TenantId> = mgmt_api_client
+            .list_tenants()
+            .await?
+            .into_iter()
+            .map(|ti| ti.id)
+            .collect();
+        let mut js = JoinSet::new();
+        for tenant_id in tenants {
+            js.spawn({
+                let mgmt_api_client = Arc::clone(&mgmt_api_client);
+                async move {
+                    (
+                        tenant_id,
+                        mgmt_api_client.list_timelines(tenant_id).await.unwrap(),
+                    )
+                }
+            });
+        }
+        while let Some(res) = js.join_next().await {
+            let (tenant_id, tl_infos) = res.unwrap();
+            for tl in tl_infos {
+                timelines.push(TenantTimelineId {
+                    tenant_id,
+                    timeline_id: tl.timeline_id,
+                });
+            }
+        }
+    }
+
+    info!("timelines:\n{:?}", timelines);
+
+    let mut js = JoinSet::new();
+    for timeline in &timelines {
+        js.spawn({
+            let mgmt_api_client = Arc::clone(&mgmt_api_client);
+            let timeline = *timeline;
+            async move {
+                let partitioning = mgmt_api_client
+                    .keyspace(timeline.tenant_id, timeline.timeline_id)
+                    .await?;
+                let lsn = partitioning.at_lsn;
+
+                let ranges = partitioning
+                    .keys
+                    .ranges
+                    .iter()
+                    .filter_map(|r| {
+                        let start = r.start;
+                        let end = r.end;
+                        // filter out non-relblock keys
+                        match (is_rel_block_key(start), is_rel_block_key(end)) {
+                            (true, true) => Some(KeyRange {
+                                timeline,
+                                timeline_lsn: lsn,
+                                start: start.to_i128(),
+                                end: end.to_i128(),
+                            }),
+                            (true, false) | (false, true) => {
+                                unimplemented!("split up range")
+                            }
+                            (false, false) => None,
+                        }
+                    })
+                    .collect::<Vec<_>>();
+
+                anyhow::Ok(ranges)
+            }
+        });
+    }
+    let mut all_ranges: Vec<KeyRange> = Vec::new();
+    while let Some(res) = js.join_next().await {
+        all_ranges.extend(res.unwrap().unwrap());
+    }
+    let weights =
+        rand::distributions::weighted::WeightedIndex::new(all_ranges.iter().map(|v| v.len()))
+            .unwrap();
+
+    let live_stats = Arc::new(LiveStats::default());
+
+    let num_client_tasks = timelines.len();
+    let num_live_stats_dump = 1;
+    let num_work_sender_tasks = 1;
+
+    let start_work_barrier = Arc::new(tokio::sync::Barrier::new(
+        num_client_tasks + num_live_stats_dump + num_work_sender_tasks,
+    ));
+    let all_work_done_barrier = Arc::new(tokio::sync::Barrier::new(num_client_tasks));
+
+    tokio::spawn({
+        let stats = Arc::clone(&live_stats);
+        let start_work_barrier = Arc::clone(&start_work_barrier);
+        async move {
+            start_work_barrier.wait().await;
+            loop {
+                let start = std::time::Instant::now();
+                tokio::time::sleep(std::time::Duration::from_secs(1)).await;
+                let completed_requests = stats.completed_requests.swap(0, Ordering::Relaxed);
+                let elapsed = start.elapsed();
+                info!(
+                    "RPS: {:.0}",
+                    completed_requests as f64 / elapsed.as_secs_f64()
+                );
+            }
+        }
+    });
+
+    let mut work_senders = HashMap::new();
+    let mut tasks = Vec::new();
+    for tl in &timelines {
+        let (sender, receiver) = tokio::sync::mpsc::channel(10); // TODO: not sure what the implications of this are
+        work_senders.insert(tl, sender);
+        tasks.push(tokio::spawn(client(
+            args,
+            *tl,
+            Arc::clone(&start_work_barrier),
+            receiver,
+            Arc::clone(&all_work_done_barrier),
+            Arc::clone(&live_stats),
+        )));
+    }
+
+    let work_sender = async move {
+        start_work_barrier.wait().await;
+        loop {
+            let (range, key) = {
+                let mut rng = rand::thread_rng();
+                let r = &all_ranges[weights.sample(&mut rng)];
+                let key: i128 = rng.gen_range(r.start..r.end);
+                let key = repository::Key::from_i128(key);
+                let (rel_tag, block_no) =
+                    key_to_rel_block(key).expect("we filter non-rel-block keys out above");
+                (r, RelTagBlockNo { rel_tag, block_no })
+            };
+            let sender = work_senders.get(&range.timeline).unwrap();
+            // TODO: what if this blocks?
+            sender.send((key, range.timeline_lsn)).await.ok().unwrap();
+        }
+    };
+
+    if let Some(runtime) = args.runtime {
+        match tokio::time::timeout(runtime.into(), work_sender).await {
+            Ok(()) => unreachable!("work sender never terminates"),
+            Err(_timeout) => {
+                // this implicitly drops the work_senders, making all the clients exit
+            }
+        }
+    } else {
+        work_sender.await;
+        unreachable!("work sender never terminates");
+    }
+
+    for t in tasks {
+        t.await.unwrap();
+    }
+
+    let output = Output {
+        total: {
+            let mut agg_stats = ThreadLocalStats::new();
+            for stats in thread_local_stats.lock().unwrap().iter() {
+                let stats = stats.lock().unwrap();
+                agg_stats.add(&*stats);
+            }
+            agg_stats.output()
+        },
+    };
+
+    let output = serde_json::to_string_pretty(&output).unwrap();
+    println!("{output}");
+
+    anyhow::Ok(())
+}
+
+#[instrument(skip_all)]
+async fn client(
+    args: &'static Args,
+    timeline: TenantTimelineId,
+    start_work_barrier: Arc<Barrier>,
+    mut work: tokio::sync::mpsc::Receiver<(RelTagBlockNo, Lsn)>,
+    all_work_done_barrier: Arc<Barrier>,
+    live_stats: Arc<LiveStats>,
+) {
+    start_work_barrier.wait().await;
+
+    let client =
+        pageserver::client::page_service::Client::new(args.page_service_connstring.clone())
+            .await
+            .unwrap();
+    let mut client = client
+        .pagestream(timeline.tenant_id, timeline.timeline_id)
+        .await
+        .unwrap();
+
+    while let Some((key, lsn)) = work.recv().await {
+        let start = Instant::now();
+        client
+            .getpage(key, lsn)
+            .await
+            .with_context(|| format!("getpage for {timeline}"))
+            .unwrap();
+        let elapsed = start.elapsed();
+        live_stats.inc();
+        STATS.with(|stats| {
+            stats.borrow().lock().unwrap().observe(elapsed).unwrap();
+        });
+    }
+
+    all_work_done_barrier.wait().await;
+}
--- a/pageserver/pagebench/src/main.rs
+++ b/pageserver/pagebench/src/main.rs
@@ -0,0 +1,22 @@
+use clap::Parser;
+
+pub(crate) mod util;
+
+mod basebackup;
+mod getpage_latest_lsn;
+
+/// Component-level performance test for pageserver.
+#[derive(clap::Parser)]
+enum Args {
+    GetPageLatestLsn(getpage_latest_lsn::Args),
+    Basebackup(basebackup::Args),
+}
+
+fn main() {
+    let args = Args::parse();
+    match args {
+        Args::GetPageLatestLsn(args) => getpage_latest_lsn::main(args),
+        Args::Basebackup(args) => basebackup::main(args),
+    }
+    .unwrap()
+}
--- a/pageserver/pagebench/src/util.rs
+++ b/pageserver/pagebench/src/util.rs
@@ -0,0 +1,2 @@
+pub(crate) mod tenant_timeline_id;
+pub(crate) mod connstring;
--- a/pageserver/pagebench/src/util/connstring.rs
+++ b/pageserver/pagebench/src/util/connstring.rs
@@ -0,0 +1,8 @@
+pub(crate) fn connstring(host_port: &str, jwt: Option<&str>) -> String {
+    let colon_and_jwt = if let Some(jwt) = jwt {
+        format!(":{jwt}") // TODO: urlescape
+    } else {
+        format!("")
+    };
+    format!("postgres://postgres{colon_and_jwt}@{host_port}")
+}
--- a/pageserver/pagebench/src/util/tenant_timeline_id.rs
+++ b/pageserver/pagebench/src/util/tenant_timeline_id.rs
@@ -0,0 +1,36 @@
+use std::str::FromStr;
+
+use anyhow::Context;
+use utils::id::TimelineId;
+
+use utils::id::TenantId;
+
+#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
+pub(crate) struct TenantTimelineId {
+    pub(crate) tenant_id: TenantId,
+    pub(crate) timeline_id: TimelineId,
+}
+
+impl FromStr for TenantTimelineId {
+    type Err = anyhow::Error;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        let (tenant_id, timeline_id) = s
+            .split_once("/")
+            .context("tenant and timeline id must be separated by `/`")?;
+        let tenant_id = TenantId::from_str(&tenant_id)
+            .with_context(|| format!("invalid tenant id: {tenant_id:?}"))?;
+        let timeline_id = TimelineId::from_str(&timeline_id)
+            .with_context(|| format!("invalid timeline id: {timeline_id:?}"))?;
+        Ok(Self {
+            tenant_id,
+            timeline_id,
+        })
+    }
+}
+
+impl std::fmt::Display for TenantTimelineId {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}/{}", self.tenant_id, self.timeline_id)
+    }
+}
--- a/pageserver/src/basebackup.rs
+++ b/pageserver/src/basebackup.rs
@@ -166,71 +166,111 @@ where
            }
        }

-        // Gather non-relational files from object storage pages.
+        debug!("Gather non-relational files from object storage pages");
        for kind in [
            SlruKind::Clog,
            SlruKind::MultiXactOffsets,
            SlruKind::MultiXactMembers,
        ] {
-            for segno in self
-                .timeline
-                .list_slru_segments(kind, self.lsn, self.ctx)
-                .await?
-            {
-                self.add_slru_segment(kind, segno).await?;
+            async {
+                debug!("list slru segments");
+                for segno in self
+                    .timeline
+                    .list_slru_segments(kind, self.lsn, self.ctx)
+                    .await?
+                {
+                    async {
+                        debug!("add slru segment");
+                        self.add_slru_segment(kind, segno).await?;
+                        anyhow::Ok(())
+                    }
+                    .instrument(debug_span!("slru segment", ?segno))
+                    .await?;
+                }
+                anyhow::Ok(())
            }
+            .instrument(debug_span!("non-rel file", ?kind))
+            .await?;
        }

        let mut min_restart_lsn: Lsn = Lsn::MAX;
-        // Create tablespace directories
+        debug!("Create tablespace directories");
        for ((spcnode, dbnode), has_relmap_file) in
            self.timeline.list_dbdirs(self.lsn, self.ctx).await?
        {
-            self.add_dbdir(spcnode, dbnode, has_relmap_file).await?;
+            async {
+                debug!("iter");
+                self.add_dbdir(spcnode, dbnode, has_relmap_file).await?;

-            // If full backup is requested, include all relation files.
-            // Otherwise only include init forks of unlogged relations.
-            let rels = self
-                .timeline
-                .list_rels(spcnode, dbnode, self.lsn, self.ctx)
-                .await?;
-            for &rel in rels.iter() {
-                // Send init fork as main fork to provide well formed empty
-                // contents of UNLOGGED relations. Postgres copies it in
-                // `reinit.c` during recovery.
-                if rel.forknum == INIT_FORKNUM {
-                    // I doubt we need _init fork itself, but having it at least
-                    // serves as a marker relation is unlogged.
-                    self.add_rel(rel, rel).await?;
-                    self.add_rel(rel, rel.with_forknum(MAIN_FORKNUM)).await?;
-                    continue;
-                }
+                // If full backup is requested, include all relation files.
+                // Otherwise only include init forks of unlogged relations.
+                debug!("list rels");
+                let rels = self
+                    .timeline
+                    .list_rels(spcnode, dbnode, self.lsn, self.ctx)
+                    .await?;
+                for &rel in rels.iter() {
+                    async {
+                        debug!("iter");
+                        // Send init fork as main fork to provide well formed empty
+                        // contents of UNLOGGED relations. Postgres copies it in
+                        // `reinit.c` during recovery.
+                        if rel.forknum == INIT_FORKNUM {
+                            // I doubt we need _init fork itself, but having it at least
+                            // serves as a marker relation is unlogged.
+                            self.add_rel(rel, rel).await?;
+                            self.add_rel(rel, rel.with_forknum(MAIN_FORKNUM)).await?;
+                            return Ok(());
+                        }

-                if self.full_backup {
-                    if rel.forknum == MAIN_FORKNUM && rels.contains(&rel.with_forknum(INIT_FORKNUM))
-                    {
-                        // skip this, will include it when we reach the init fork
-                        continue;
+                        if self.full_backup {
+                            if rel.forknum == MAIN_FORKNUM
+                                && rels.contains(&rel.with_forknum(INIT_FORKNUM))
+                            {
+                                // skip this, will include it when we reach the init fork
+                                return Ok(());
+                            }
+                            self.add_rel(rel, rel).await?;
+                        }
+                        anyhow::Ok(())
                    }
-                    self.add_rel(rel, rel).await?;
+                    .instrument(debug_span!("process rel", ?rel))
+                    .await?;
                }
-            }

-            for (path, content) in self.timeline.list_aux_files(self.lsn, self.ctx).await? {
-                if path.starts_with("pg_replslot") {
-                    let offs = pg_constants::REPL_SLOT_ON_DISK_OFFSETOF_RESTART_LSN;
-                    let restart_lsn = Lsn(u64::from_le_bytes(
-                        content[offs..offs + 8].try_into().unwrap(),
-                    ));
-                    info!("Replication slot {} restart LSN={}", path, restart_lsn);
-                    min_restart_lsn = Lsn::min(min_restart_lsn, restart_lsn);
+                debug!("list aux files");
+                for (path, content) in self.timeline.list_aux_files(self.lsn, self.ctx).await? {
+                    async {
+                        debug!("iter");
+                        if path.starts_with("pg_replslot") {
+                            let offs = pg_constants::REPL_SLOT_ON_DISK_OFFSETOF_RESTART_LSN;
+                            let restart_lsn = Lsn(u64::from_le_bytes(
+                                content[offs..offs + 8].try_into().unwrap(),
+                            ));
+                            info!("Replication slot {} restart LSN={}", path, restart_lsn);
+                            min_restart_lsn = Lsn::min(min_restart_lsn, restart_lsn);
+                        }
+                        let header = new_tar_header(&path, content.len() as u64)?;
+                        self.ar
+                            .append(&header, &*content)
+                            .await
+                            .context("could not add aux file to basebackup tarball")?;
+                        anyhow::Ok(())
+                    }
+                    .instrument(debug_span!("process aux file", ?path))
+                    .await?;
                }
-                let header = new_tar_header(&path, content.len() as u64)?;
-                self.ar
-                    .append(&header, &*content)
-                    .await
-                    .context("could not add aux file to basebackup tarball")?;
+
+                debug!("done");
+
+                anyhow::Ok(())
            }
+            .instrument(debug_span!(
+                "process tablespace directory",
+                ?spcnode,
+                ?dbnode
+            ))
+            .await?;
        }
        if min_restart_lsn != Lsn::MAX {
            info!(
@@ -244,19 +284,25 @@ where
                .await
                .context("could not add restart.lsn file to basebackup tarball")?;
        }
+        debug!("list twophase files");
        for xid in self
            .timeline
            .list_twophase_files(self.lsn, self.ctx)
            .await?
        {
-            self.add_twophase_file(xid).await?;
+            async {
+                self.add_twophase_file(xid).await?;
+                anyhow::Ok(())
+            }
+            .instrument(debug_span!("process twophase file", ?xid))
+            .await?;
        }

        fail_point!("basebackup-before-control-file", |_| {
            bail!("failpoint basebackup-before-control-file")
        });

-        // Generate pg_control and bootstrap WAL segment.
+        debug!("Generate pg_control and bootstrap WAL segment.");
        self.add_pgcontrol_file().await?;
        self.ar.finish().await?;
        debug!("all tarred up!");
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -103,7 +103,7 @@ fn main() -> anyhow::Result<()> {
    } else {
        TracingErrorLayerEnablement::Disabled
    };
-    logging::init(
+    let _guard = logging::init(
        conf.log_format,
        tracing_error_layer_enablement,
        logging::Output::Stdout,
@@ -625,7 +625,6 @@ fn start_pageserver(
                    conf.synthetic_size_calculation_interval,
                    conf.id,
                    local_disk_storage,
-                    cancel,
                    metrics_ctx,
                )
                .instrument(info_span!("metrics_collection"))
--- a/pageserver/src/client.rs
+++ b/pageserver/src/client.rs
@@ -0,0 +1,2 @@
+pub mod mgmt_api;
+pub mod page_service;
--- a/pageserver/src/client/mgmt_api.rs
+++ b/pageserver/src/client/mgmt_api.rs
@@ -0,0 +1,89 @@
+use anyhow::Context;
+
+use hyper::{client::HttpConnector, Uri};
+use utils::id::{TenantId, TimelineId};
+
+pub struct Client {
+    mgmt_api_endpoint: String,
+    authorization_header: Option<String>,
+    client: hyper::Client<HttpConnector, hyper::Body>,
+}
+
+impl Client {
+    pub fn new(mgmt_api_endpoint: String, jwt: Option<&str>) -> Self {
+        Self {
+            mgmt_api_endpoint,
+            authorization_header: jwt.map(|jwt| format!("Bearer {jwt}")),
+            client: hyper::client::Client::new(),
+        }
+    }
+
+    pub async fn list_tenants(&self) -> anyhow::Result<Vec<pageserver_api::models::TenantInfo>> {
+        let uri = Uri::try_from(format!("{}/v1/tenant", self.mgmt_api_endpoint))?;
+        let resp = self.get(uri).await?;
+        if !resp.status().is_success() {
+            anyhow::bail!("status error");
+        }
+        let body = hyper::body::to_bytes(resp).await?;
+        Ok(serde_json::from_slice(&body)?)
+    }
+
+    pub async fn list_timelines(
+        &self,
+        tenant_id: TenantId,
+    ) -> anyhow::Result<Vec<pageserver_api::models::TimelineInfo>> {
+        let uri = Uri::try_from(format!(
+            "{}/v1/tenant/{tenant_id}/timeline",
+            self.mgmt_api_endpoint
+        ))?;
+        let resp = self.get(uri).await?;
+        if !resp.status().is_success() {
+            anyhow::bail!("status error");
+        }
+        let body = hyper::body::to_bytes(resp).await?;
+        Ok(serde_json::from_slice(&body)?)
+    }
+
+    pub async fn timeline_info(
+        &self, tenant_id: TenantId, timeline_id: TimelineId,
+    ) -> anyhow::Result<pageserver_api::models::TimelineInfo> {
+        let uri = Uri::try_from(format!(
+            "{}/v1/tenant/{tenant_id}/timeline/{timeline_id}",
+            self.mgmt_api_endpoint
+        ))?;
+        let resp = self.get(uri).await?;
+        if !resp.status().is_success() {
+            anyhow::bail!("status error");
+        }
+        let body = hyper::body::to_bytes(resp).await?;
+        Ok(serde_json::from_slice(&body)?)
+    }
+
+    pub async fn keyspace(
+        &self,
+        tenant_id: TenantId,
+        timeline_id: TimelineId,
+    ) -> anyhow::Result<crate::http::models::partitioning::Partitioning> {
+        let uri = Uri::try_from(format!(
+            "{}/v1/tenant/{tenant_id}/timeline/{timeline_id}/keyspace?check_serialization_roundtrip=true",
+            self.mgmt_api_endpoint
+        ))?;
+        let resp = self.get(uri).await?;
+        if !resp.status().is_success() {
+            anyhow::bail!("status error");
+        }
+        let body = hyper::body::to_bytes(resp).await?;
+        Ok(serde_json::from_slice(&body).context("deserialize")?)
+    }
+
+    async fn get(&self, uri: Uri) -> hyper::Result<hyper::Response<hyper::Body>> {
+        let req = hyper::Request::builder().uri(uri).method("GET");
+        let req = if let Some(value) = &self.authorization_header {
+            req.header("Authorization", value)
+        } else {
+            req
+        };
+        let req = req.body(hyper::Body::default());
+        self.client.request(req.unwrap()).await
+    }
+}
--- a/pageserver/src/client/page_service.rs
+++ b/pageserver/src/client/page_service.rs
@@ -0,0 +1,145 @@
+use std::pin::Pin;
+
+use futures::SinkExt;
+use pageserver_api::{
+    models::{
+        PagestreamBeMessage, PagestreamFeMessage, PagestreamGetPageRequest,
+        PagestreamGetPageResponse,
+    },
+    reltag::RelTag,
+};
+use tokio::task::JoinHandle;
+use tokio_postgres::CopyOutStream;
+use tokio_stream::StreamExt;
+use tokio_util::sync::CancellationToken;
+use utils::{
+    id::{TenantId, TimelineId},
+    lsn::Lsn,
+};
+
+pub struct Client {
+    client: tokio_postgres::Client,
+    cancel_on_client_drop: Option<tokio_util::sync::DropGuard>,
+    conn_task: JoinHandle<()>,
+}
+
+pub struct BasebackupRequest {
+    pub tenant_id: TenantId,
+    pub timeline_id: TimelineId,
+    pub lsn: Option<Lsn>,
+    pub gzip: bool,
+}
+
+impl Client {
+    pub async fn new(connstring: String) -> anyhow::Result<Self> {
+        let (client, connection) = tokio_postgres::connect(&connstring, postgres::NoTls).await?;
+
+        let conn_task_cancel = CancellationToken::new();
+        let conn_task = tokio::spawn({
+            let conn_task_cancel = conn_task_cancel.clone();
+            async move {
+                tokio::select! {
+                    _ = conn_task_cancel.cancelled() => { }
+                    res = connection => {
+                        res.unwrap();
+                    }
+                }
+            }
+        });
+        Ok(Self {
+            cancel_on_client_drop: Some(conn_task_cancel.drop_guard()),
+            conn_task,
+            client,
+        })
+    }
+
+    pub async fn pagestream(
+        self,
+        tenant_id: TenantId,
+        timeline_id: TimelineId,
+    ) -> anyhow::Result<PagestreamClient> {
+        let copy_both: tokio_postgres::CopyBothDuplex<bytes::Bytes> = self
+            .client
+            .copy_both_simple(&format!("pagestream {tenant_id} {timeline_id}"))
+            .await?;
+        let Client {
+            cancel_on_client_drop,
+            conn_task,
+            client: _,
+        } = self;
+        Ok(PagestreamClient {
+            copy_both: Box::pin(copy_both),
+            conn_task,
+            cancel_on_client_drop,
+        })
+    }
+
+    pub async fn basebackup(&self, req: &BasebackupRequest) -> anyhow::Result<CopyOutStream> {
+        let BasebackupRequest {
+            tenant_id,
+            timeline_id,
+            lsn,
+            gzip,
+        } = req;
+        let mut args = Vec::with_capacity(5);
+        args.push("basebackup".to_string());
+        args.push(format!("{tenant_id}"));
+        args.push(format!("{timeline_id}"));
+        if let Some(lsn) = lsn {
+            args.push(format!("{lsn}"));
+        }
+        if *gzip {
+            args.push(format!("--gzip"))
+        }
+        Ok(self.client.copy_out(&args.join(" ")).await?)
+    }
+}
+
+/// Create using [`Client::pagestream`].
+pub struct PagestreamClient {
+    copy_both: Pin<Box<tokio_postgres::CopyBothDuplex<bytes::Bytes>>>,
+    cancel_on_client_drop: Option<tokio_util::sync::DropGuard>,
+    conn_task: JoinHandle<()>,
+}
+
+pub struct RelTagBlockNo {
+    pub rel_tag: RelTag,
+    pub block_no: u32,
+}
+
+impl PagestreamClient {
+    pub async fn shutdown(mut self) {
+        let _ = self.cancel_on_client_drop.take();
+        self.conn_task.await.unwrap();
+    }
+
+    pub async fn getpage(
+        &mut self,
+        key: RelTagBlockNo,
+        lsn: Lsn,
+    ) -> anyhow::Result<PagestreamGetPageResponse> {
+        let req = PagestreamGetPageRequest {
+            latest: false,
+            rel: key.rel_tag,
+            blkno: key.block_no,
+            lsn,
+        };
+        let req = PagestreamFeMessage::GetPage(req);
+        let req: bytes::Bytes = req.serialize();
+        // let mut req = tokio_util::io::ReaderStream::new(&req);
+        let mut req = tokio_stream::once(Ok(req));
+
+        self.copy_both.send_all(&mut req).await?;
+
+        let next: Option<Result<bytes::Bytes, _>> = self.copy_both.next().await;
+        let next = next.unwrap().unwrap();
+
+        match PagestreamBeMessage::deserialize(next)? {
+            PagestreamBeMessage::Exists(_) => todo!(),
+            PagestreamBeMessage::Nblocks(_) => todo!(),
+            PagestreamBeMessage::GetPage(p) => Ok(p),
+            PagestreamBeMessage::Error(e) => anyhow::bail!("Error: {:?}", e),
+            PagestreamBeMessage::DbSize(_) => todo!(),
+        }
+    }
+}
--- a/pageserver/src/consumption_metrics.rs
+++ b/pageserver/src/consumption_metrics.rs
@@ -3,7 +3,7 @@
 use crate::context::{DownloadBehavior, RequestContext};
 use crate::task_mgr::{self, TaskKind, BACKGROUND_RUNTIME};
 use crate::tenant::tasks::BackgroundLoopKind;
-use crate::tenant::{mgr, LogicalSizeCalculationCause, PageReconstructError};
+use crate::tenant::{mgr, LogicalSizeCalculationCause};
 use camino::Utf8PathBuf;
 use consumption_metrics::EventType;
 use pageserver_api::models::TenantState;
@@ -12,7 +12,6 @@ use std::collections::HashMap;
 use std::sync::Arc;
 use std::time::{Duration, SystemTime};
 use tokio::time::Instant;
-use tokio_util::sync::CancellationToken;
 use tracing::*;
 use utils::id::NodeId;

@@ -38,7 +37,6 @@ type RawMetric = (MetricsKey, (EventType, u64));
 type Cache = HashMap<MetricsKey, (EventType, u64)>;

 /// Main thread that serves metrics collection
-#[allow(clippy::too_many_arguments)]
 pub async fn collect_metrics(
    metric_collection_endpoint: &Url,
    metric_collection_interval: Duration,
@@ -46,7 +44,6 @@ pub async fn collect_metrics(
    synthetic_size_calculation_interval: Duration,
    node_id: NodeId,
    local_disk_storage: Utf8PathBuf,
-    cancel: CancellationToken,
    ctx: RequestContext,
 ) -> anyhow::Result<()> {
    if _cached_metric_collection_interval != Duration::ZERO {
@@ -66,13 +63,9 @@ pub async fn collect_metrics(
        "synthetic size calculation",
        false,
        async move {
-            calculate_synthetic_size_worker(
-                synthetic_size_calculation_interval,
-                &cancel,
-                &worker_ctx,
-            )
-            .instrument(info_span!("synthetic_size_worker"))
-            .await?;
+            calculate_synthetic_size_worker(synthetic_size_calculation_interval, &worker_ctx)
+                .instrument(info_span!("synthetic_size_worker"))
+                .await?;
            Ok(())
        },
    );
@@ -248,7 +241,6 @@ async fn reschedule(
 /// Caclculate synthetic size for each active tenant
 async fn calculate_synthetic_size_worker(
    synthetic_size_calculation_interval: Duration,
-    cancel: &CancellationToken,
    ctx: &RequestContext,
 ) -> anyhow::Result<()> {
    info!("starting calculate_synthetic_size_worker");
@@ -269,7 +261,7 @@ async fn calculate_synthetic_size_worker(
            }
        };

-        for (tenant_id, tenant_state) in tenants {
+        for (tenant_id, tenant_state, _gen) in tenants {
            if tenant_state != TenantState::Active {
                continue;
            }
@@ -280,12 +272,7 @@ async fn calculate_synthetic_size_worker(
                // Same for the loop that fetches computed metrics.
                // By using the same limiter, we centralize metrics collection for "start" and "finished" counters,
                // which turns out is really handy to understand the system.
-                if let Err(e) = tenant.calculate_synthetic_size(cause, cancel, ctx).await {
-                    if let Some(PageReconstructError::Cancelled) =
-                        e.downcast_ref::<PageReconstructError>()
-                    {
-                        return Ok(());
-                    }
+                if let Err(e) = tenant.calculate_synthetic_size(cause, ctx).await {
                    error!("failed to calculate synthetic size for tenant {tenant_id}: {e:#}");
                }
            }
--- a/pageserver/src/consumption_metrics/metrics.rs
+++ b/pageserver/src/consumption_metrics/metrics.rs
@@ -197,7 +197,7 @@ pub(super) async fn collect_all_metrics(
        }
    };

-    let tenants = futures::stream::iter(tenants).filter_map(|(id, state)| async move {
+    let tenants = futures::stream::iter(tenants).filter_map(|(id, state, _)| async move {
        if state != TenantState::Active {
            None
        } else {
--- a/pageserver/src/deletion_queue.rs
+++ b/pageserver/src/deletion_queue.rs
@@ -10,7 +10,6 @@ use crate::control_plane_client::ControlPlaneGenerationsApi;
 use crate::metrics;
 use crate::tenant::remote_timeline_client::remote_layer_path;
 use crate::tenant::remote_timeline_client::remote_timeline_path;
-use crate::tenant::remote_timeline_client::LayerFileMetadata;
 use crate::virtual_file::MaybeFatalIo;
 use crate::virtual_file::VirtualFile;
 use anyhow::Context;
@@ -346,7 +345,7 @@ impl DeletionList {
                result.extend(
                    timeline_layers
                        .into_iter()
-                        .map(|l| timeline_remote_path.join(&Utf8PathBuf::from(l))),
+                        .map(|l| timeline_remote_path.join(Utf8PathBuf::from(l))),
                );
            }
        }
@@ -510,19 +509,17 @@ impl DeletionQueueClient {
        tenant_id: TenantId,
        timeline_id: TimelineId,
        current_generation: Generation,
-        layers: Vec<(LayerFileName, LayerFileMetadata)>,
+        layers: Vec<(LayerFileName, Generation)>,
    ) -> Result<(), DeletionQueueError> {
        if current_generation.is_none() {
            debug!("Enqueuing deletions in legacy mode, skipping queue");
-
            let mut layer_paths = Vec::new();
-            for (layer, meta) in layers {
+            for (layer, generation) in layers {
                layer_paths.push(remote_layer_path(
                    &tenant_id,
                    &timeline_id,
-                    meta.shard,
                    &layer,
-                    meta.generation,
+                    generation,
                ));
            }
            self.push_immediate(layer_paths).await?;
@@ -542,7 +539,7 @@ impl DeletionQueueClient {
        tenant_id: TenantId,
        timeline_id: TimelineId,
        current_generation: Generation,
-        layers: Vec<(LayerFileName, LayerFileMetadata)>,
+        layers: Vec<(LayerFileName, Generation)>,
    ) -> Result<(), DeletionQueueError> {
        metrics::DELETION_QUEUE
            .keys_submitted
@@ -753,7 +750,6 @@ impl DeletionQueue {
 mod test {
    use camino::Utf8Path;
    use hex_literal::hex;
-    use pageserver_api::shard::ShardIndex;
    use std::{io::ErrorKind, time::Duration};
    use tracing::info;

@@ -993,8 +989,6 @@ mod test {
        // we delete, and the generation of the running Tenant.
        let layer_generation = Generation::new(0xdeadbeef);
        let now_generation = Generation::new(0xfeedbeef);
-        let layer_metadata =
-            LayerFileMetadata::new(0xf00, layer_generation, ShardIndex::unsharded());

        let remote_layer_file_name_1 =
            format!("{}{}", layer_file_name_1, layer_generation.get_suffix());
@@ -1018,7 +1012,7 @@ mod test {
                tenant_id,
                TIMELINE_ID,
                now_generation,
-                [(layer_file_name_1.clone(), layer_metadata)].to_vec(),
+                [(layer_file_name_1.clone(), layer_generation)].to_vec(),
            )
            .await?;
        assert_remote_files(&[&remote_layer_file_name_1], &remote_timeline_path);
@@ -1057,8 +1051,6 @@ mod test {
        let stale_generation = latest_generation.previous();
        // Generation that our example layer file was written with
        let layer_generation = stale_generation.previous();
-        let layer_metadata =
-            LayerFileMetadata::new(0xf00, layer_generation, ShardIndex::unsharded());

        ctx.set_latest_generation(latest_generation);

@@ -1076,7 +1068,7 @@ mod test {
                tenant_id,
                TIMELINE_ID,
                stale_generation,
-                [(EXAMPLE_LAYER_NAME.clone(), layer_metadata.clone())].to_vec(),
+                [(EXAMPLE_LAYER_NAME.clone(), layer_generation)].to_vec(),
            )
            .await?;

@@ -1091,7 +1083,7 @@ mod test {
                tenant_id,
                TIMELINE_ID,
                latest_generation,
-                [(EXAMPLE_LAYER_NAME.clone(), layer_metadata.clone())].to_vec(),
+                [(EXAMPLE_LAYER_NAME.clone(), layer_generation)].to_vec(),
            )
            .await?;

@@ -1118,8 +1110,6 @@ mod test {

        let layer_generation = Generation::new(0xdeadbeef);
        let now_generation = Generation::new(0xfeedbeef);
-        let layer_metadata =
-            LayerFileMetadata::new(0xf00, layer_generation, ShardIndex::unsharded());

        // Inject a deletion in the generation before generation_now: after restart,
        // this deletion should _not_ get executed (only the immediately previous
@@ -1131,7 +1121,7 @@ mod test {
                tenant_id,
                TIMELINE_ID,
                now_generation.previous(),
-                [(EXAMPLE_LAYER_NAME.clone(), layer_metadata.clone())].to_vec(),
+                [(EXAMPLE_LAYER_NAME.clone(), layer_generation)].to_vec(),
            )
            .await?;

@@ -1145,7 +1135,7 @@ mod test {
                tenant_id,
                TIMELINE_ID,
                now_generation,
-                [(EXAMPLE_LAYER_NAME_ALT.clone(), layer_metadata.clone())].to_vec(),
+                [(EXAMPLE_LAYER_NAME_ALT.clone(), layer_generation)].to_vec(),
            )
            .await?;

@@ -1235,13 +1225,12 @@ pub(crate) mod mock {
                match msg {
                    ListWriterQueueMessage::Delete(op) => {
                        let mut objects = op.objects;
-                        for (layer, meta) in op.layers {
+                        for (layer, generation) in op.layers {
                            objects.push(remote_layer_path(
                                &op.tenant_id,
                                &op.timeline_id,
-                                meta.shard,
                                &layer,
-                                meta.generation,
+                                generation,
                            ));
                        }

--- a/pageserver/src/deletion_queue/list_writer.rs
+++ b/pageserver/src/deletion_queue/list_writer.rs
@@ -33,7 +33,6 @@ use crate::config::PageServerConf;
 use crate::deletion_queue::TEMP_SUFFIX;
 use crate::metrics;
 use crate::tenant::remote_timeline_client::remote_layer_path;
-use crate::tenant::remote_timeline_client::LayerFileMetadata;
 use crate::tenant::storage_layer::LayerFileName;
 use crate::virtual_file::on_fatal_io_error;
 use crate::virtual_file::MaybeFatalIo;
@@ -59,7 +58,7 @@ pub(super) struct DeletionOp {
    // `layers` and `objects` are both just lists of objects.  `layers` is used if you do not
    // have a config object handy to project it to a remote key, and need the consuming worker
    // to do it for you.
-    pub(super) layers: Vec<(LayerFileName, LayerFileMetadata)>,
+    pub(super) layers: Vec<(LayerFileName, Generation)>,
    pub(super) objects: Vec<RemotePath>,

    /// The _current_ generation of the Tenant attachment in which we are enqueuing
@@ -388,13 +387,12 @@ impl ListWriter {
                    );

                    let mut layer_paths = Vec::new();
-                    for (layer, meta) in op.layers {
+                    for (layer, generation) in op.layers {
                        layer_paths.push(remote_layer_path(
                            &op.tenant_id,
                            &op.timeline_id,
-                            meta.shard,
                            &layer,
-                            meta.generation,
+                            generation,
                        ));
                    }
                    layer_paths.extend(op.objects);
--- a/pageserver/src/deletion_queue/validator.rs
+++ b/pageserver/src/deletion_queue/validator.rs
@@ -178,14 +178,7 @@ where
                .unwrap_or(false);

            if valid && *validated_generation == tenant_lsn_state.generation {
-                for (timeline_id, pending_lsn) in tenant_lsn_state.timelines {
-                    tracing::debug!(
-                        %tenant_id,
-                        %timeline_id,
-                        current = %pending_lsn.result_slot.load(),
-                        projected = %pending_lsn.projected,
-                        "advancing validated remote_consistent_lsn",
-                    );
+                for (_timeline_id, pending_lsn) in tenant_lsn_state.timelines {
                    pending_lsn.result_slot.store(pending_lsn.projected);
                }
            } else {
--- a/pageserver/src/disk_usage_eviction_task.rs
+++ b/pageserver/src/disk_usage_eviction_task.rs
@@ -541,7 +541,7 @@ async fn collect_eviction_candidates(

    let mut candidates = Vec::new();

-    for (tenant_id, _state) in &tenants {
+    for (tenant_id, _state, _gen) in &tenants {
        if cancel.is_cancelled() {
            return Ok(EvictionCandidates::Cancelled);
        }
--- a/pageserver/src/http/mod.rs
+++ b/pageserver/src/http/mod.rs
@@ -1,4 +1,4 @@
 pub mod routes;
 pub use routes::make_router;

-pub use pageserver_api::models;
+pub mod models;
--- a/pageserver/src/http/models.rs
+++ b/pageserver/src/http/models.rs
@@ -0,0 +1,3 @@
+//! If possible, use `::pageserver_api::models` instead.
+
+pub mod partitioning;
--- a/pageserver/src/http/models/partitioning.rs
+++ b/pageserver/src/http/models/partitioning.rs
@@ -0,0 +1,112 @@
+use utils::lsn::Lsn;
+
+#[derive(Debug, PartialEq, Eq)]
+pub struct Partitioning {
+    pub keys: crate::keyspace::KeySpace,
+
+    pub at_lsn: Lsn,
+}
+
+impl serde::Serialize for Partitioning {
+    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        pub struct KeySpace<'a>(&'a crate::keyspace::KeySpace);
+
+        impl<'a> serde::Serialize for KeySpace<'a> {
+            fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
+            where
+                S: serde::Serializer,
+            {
+                use serde::ser::SerializeSeq;
+                let mut seq = serializer.serialize_seq(Some(self.0.ranges.len()))?;
+                for kr in &self.0.ranges {
+                    seq.serialize_element(&KeyRange(kr))?;
+                }
+                seq.end()
+            }
+        }
+
+        use serde::ser::SerializeMap;
+        let mut map = serializer.serialize_map(Some(2))?;
+        map.serialize_key("keys")?;
+        map.serialize_value(&KeySpace(&self.keys))?;
+        map.serialize_key("at_lsn")?;
+        map.serialize_value(&WithDisplay(&self.at_lsn))?;
+        map.end()
+    }
+}
+
+pub struct WithDisplay<'a, T>(&'a T);
+
+impl<'a, T: std::fmt::Display> serde::Serialize for WithDisplay<'a, T> {
+    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        serializer.collect_str(&self.0)
+    }
+}
+
+pub struct KeyRange<'a>(&'a std::ops::Range<crate::repository::Key>);
+
+impl<'a> serde::Serialize for KeyRange<'a> {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        use serde::ser::SerializeTuple;
+        let mut t = serializer.serialize_tuple(2)?;
+        t.serialize_element(&WithDisplay(&self.0.start))?;
+        t.serialize_element(&WithDisplay(&self.0.end))?;
+        t.end()
+    }
+}
+
+impl<'a> serde::Deserialize<'a> for Partitioning {
+    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+    where
+        D: serde::Deserializer<'a>,
+    {
+        pub struct KeySpace(crate::keyspace::KeySpace);
+
+        impl<'de> serde::Deserialize<'de> for KeySpace {
+            fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+            where
+                D: serde::Deserializer<'de>,
+            {
+                #[serde_with::serde_as]
+                #[derive(serde::Deserialize)]
+                #[serde(transparent)]
+                struct Key(#[serde_as(as = "serde_with::DisplayFromStr")] crate::repository::Key);
+
+                #[serde_with::serde_as]
+                #[derive(serde::Deserialize)]
+                struct Range(Key, Key);
+
+                let ranges: Vec<Range> = serde::Deserialize::deserialize(deserializer)?;
+                Ok(Self(crate::keyspace::KeySpace {
+                    ranges: ranges
+                        .into_iter()
+                        .map(|Range(start, end)| (start.0..end.0))
+                        .collect(),
+                }))
+            }
+        }
+
+        #[serde_with::serde_as]
+        #[derive(serde::Deserialize)]
+        struct De {
+            keys: KeySpace,
+            #[serde_as(as = "serde_with::DisplayFromStr")]
+            at_lsn: Lsn,
+        }
+
+        let de: De = serde::Deserialize::deserialize(deserializer)?;
+        Ok(Self {
+            at_lsn: de.at_lsn,
+            keys: de.keys.0,
+        })
+    }
+}
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -6,7 +6,6 @@ use std::str::FromStr;
 use std::sync::Arc;

 use anyhow::{anyhow, Context, Result};
-use enumset::EnumSet;
 use futures::TryFutureExt;
 use humantime::format_rfc3339;
 use hyper::header;
@@ -27,10 +26,6 @@ use utils::http::endpoint::request_span;
 use utils::http::json::json_request_or_empty_body;
 use utils::http::request::{get_request_param, must_get_query_param, parse_query_param};

-use super::models::{
-    StatusResponse, TenantConfigRequest, TenantCreateRequest, TenantCreateResponse, TenantInfo,
-    TimelineCreateRequest, TimelineGcRequest, TimelineInfo,
-};
 use crate::context::{DownloadBehavior, RequestContext};
 use crate::deletion_queue::DeletionQueueClient;
 use crate::metrics::{StorageTimeOperation, STORAGE_TIME_GLOBAL};
@@ -43,11 +38,14 @@ use crate::tenant::mgr::{
 };
 use crate::tenant::size::ModelInputs;
 use crate::tenant::storage_layer::LayerAccessStatsReset;
-use crate::tenant::timeline::CompactFlags;
 use crate::tenant::timeline::Timeline;
 use crate::tenant::{LogicalSizeCalculationCause, PageReconstructError, TenantSharedResources};
 use crate::{config::PageServerConf, tenant::mgr};
 use crate::{disk_usage_eviction_task, tenant};
+use pageserver_api::models::{
+    StatusResponse, TenantConfigRequest, TenantCreateRequest, TenantCreateResponse, TenantInfo,
+    TimelineCreateRequest, TimelineGcRequest, TimelineInfo,
+};
 use utils::{
    auth::SwappableJwtAuth,
    generation::Generation,
@@ -63,7 +61,7 @@ use utils::{
 };

 // Imports only used for testing APIs
-use super::models::ConfigureFailpointsRequest;
+use pageserver_api::models::ConfigureFailpointsRequest;

 pub struct State {
    conf: &'static PageServerConf,
@@ -550,7 +548,7 @@ async fn timeline_detail_handler(

 async fn get_lsn_by_timestamp_handler(
    request: Request<Body>,
-    cancel: CancellationToken,
+    _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
    check_permission(&request, Some(tenant_id))?;
@@ -566,9 +564,7 @@ async fn get_lsn_by_timestamp_handler(

    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
    let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
-    let result = timeline
-        .find_lsn_for_timestamp(timestamp_pg, &cancel, &ctx)
-        .await?;
+    let result = timeline.find_lsn_for_timestamp(timestamp_pg, &ctx).await?;

    if version.unwrap_or(0) > 1 {
        #[derive(serde::Serialize)]
@@ -768,11 +764,12 @@ async fn tenant_list_handler(
            ApiError::ResourceUnavailable("Tenant map is initializing or shutting down".into())
        })?
        .iter()
-        .map(|(id, state)| TenantInfo {
+        .map(|(id, state, gen)| TenantInfo {
            id: *id,
            state: state.clone(),
            current_physical_size: None,
            attachment_status: state.attachment_status(),
+            generation: (*gen).into(),
        })
        .collect::<Vec<TenantInfo>>();

@@ -801,6 +798,7 @@ async fn tenant_status(
            state: state.clone(),
            current_physical_size: Some(current_physical_size),
            attachment_status: state.attachment_status(),
+            generation: tenant.generation().into(),
        })
    }
    .instrument(info_span!("tenant_status_handler", %tenant_id))
@@ -844,7 +842,7 @@ async fn tenant_delete_handler(
 /// without modifying anything anyway.
 async fn tenant_size_handler(
    request: Request<Body>,
-    cancel: CancellationToken,
+    _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
    check_permission(&request, Some(tenant_id))?;
@@ -860,7 +858,6 @@ async fn tenant_size_handler(
        .gather_size_inputs(
            retention_period,
            LogicalSizeCalculationCause::TenantSizeHandler,
-            &cancel,
            &ctx,
        )
        .await
@@ -1245,7 +1242,7 @@ async fn failpoints_handler(
 // Run GC immediately on given timeline.
 async fn timeline_gc_handler(
    mut request: Request<Body>,
-    cancel: CancellationToken,
+    _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
@@ -1254,7 +1251,7 @@ async fn timeline_gc_handler(
    let gc_req: TimelineGcRequest = json_request(&mut request).await?;

    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
-    let wait_task_done = mgr::immediate_gc(tenant_id, timeline_id, gc_req, cancel, &ctx).await?;
+    let wait_task_done = mgr::immediate_gc(tenant_id, timeline_id, gc_req, &ctx).await?;
    let gc_result = wait_task_done
        .await
        .context("wait for gc task")
@@ -1273,15 +1270,11 @@ async fn timeline_compact_handler(
    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
    check_permission(&request, Some(tenant_id))?;

-    let mut flags = EnumSet::empty();
-    if Some(true) == parse_query_param::<_, bool>(&request, "force_repartition")? {
-        flags |= CompactFlags::ForceRepartition;
-    }
    async {
        let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
        let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
        timeline
-            .compact(&cancel, flags, &ctx)
+            .compact(&cancel, &ctx)
            .await
            .map_err(|e| ApiError::InternalServerError(e.into()))?;
        json_response(StatusCode::OK, ())
@@ -1298,11 +1291,6 @@ async fn timeline_checkpoint_handler(
    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
    check_permission(&request, Some(tenant_id))?;
-
-    let mut flags = EnumSet::empty();
-    if Some(true) == parse_query_param::<_, bool>(&request, "force_repartition")? {
-        flags |= CompactFlags::ForceRepartition;
-    }
    async {
        let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
        let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
@@ -1311,7 +1299,7 @@ async fn timeline_checkpoint_handler(
            .await
            .map_err(ApiError::InternalServerError)?;
        timeline
-            .compact(&cancel, flags, &ctx)
+            .compact(&cancel, &ctx)
            .await
            .map_err(|e| ApiError::InternalServerError(e.into()))?;

@@ -1436,71 +1424,11 @@ async fn timeline_collect_keyspace(
    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
    check_permission(&request, Some(tenant_id))?;

-    struct Partitioning {
-        keys: crate::keyspace::KeySpace,
-
-        at_lsn: Lsn,
-    }
-
-    impl serde::Serialize for Partitioning {
-        fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
-        where
-            S: serde::Serializer,
-        {
-            use serde::ser::SerializeMap;
-            let mut map = serializer.serialize_map(Some(2))?;
-            map.serialize_key("keys")?;
-            map.serialize_value(&KeySpace(&self.keys))?;
-            map.serialize_key("at_lsn")?;
-            map.serialize_value(&WithDisplay(&self.at_lsn))?;
-            map.end()
-        }
-    }
-
-    struct WithDisplay<'a, T>(&'a T);
-
-    impl<'a, T: std::fmt::Display> serde::Serialize for WithDisplay<'a, T> {
-        fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
-        where
-            S: serde::Serializer,
-        {
-            serializer.collect_str(&self.0)
-        }
-    }
-
-    struct KeySpace<'a>(&'a crate::keyspace::KeySpace);
-
-    impl<'a> serde::Serialize for KeySpace<'a> {
-        fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
-        where
-            S: serde::Serializer,
-        {
-            use serde::ser::SerializeSeq;
-            let mut seq = serializer.serialize_seq(Some(self.0.ranges.len()))?;
-            for kr in &self.0.ranges {
-                seq.serialize_element(&KeyRange(kr))?;
-            }
-            seq.end()
-        }
-    }
-
-    struct KeyRange<'a>(&'a std::ops::Range<crate::repository::Key>);
-
-    impl<'a> serde::Serialize for KeyRange<'a> {
-        fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
-        where
-            S: serde::Serializer,
-        {
-            use serde::ser::SerializeTuple;
-            let mut t = serializer.serialize_tuple(2)?;
-            t.serialize_element(&WithDisplay(&self.0.start))?;
-            t.serialize_element(&WithDisplay(&self.0.end))?;
-            t.end()
-        }
-    }
-
    let at_lsn: Option<Lsn> = parse_query_param(&request, "at_lsn")?;

+    let check_serialization_roundtrip: bool =
+        parse_query_param(&request, "check_serialization_roundtrip")?.unwrap_or(false);
+
    async {
        let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
        let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
@@ -1510,7 +1438,20 @@ async fn timeline_collect_keyspace(
            .await
            .map_err(|e| ApiError::InternalServerError(e.into()))?;

-        json_response(StatusCode::OK, Partitioning { keys, at_lsn })
+        let res = crate::http::models::partitioning::Partitioning { keys, at_lsn };
+        if check_serialization_roundtrip {
+            (|| {
+                let ser = serde_json::ser::to_vec(&res).context("serialize")?;
+                let de: crate::http::models::partitioning::Partitioning =
+                    serde_json::from_slice(&ser).context("deserialize")?;
+                anyhow::ensure!(de == res, "not equal");
+                info!("passed serialization rountrip check");
+                Ok(())
+            })()
+            .context("serialization rountrip")
+            .map_err(ApiError::InternalServerError)?;
+        }
+        json_response(StatusCode::OK, res)
    }
    .instrument(info_span!("timeline_collect_keyspace", %tenant_id, %timeline_id))
    .await
@@ -1689,24 +1630,8 @@ where
                let token_cloned = token.clone();
                let result = handler(r, token).await;
                if token_cloned.is_cancelled() {
-                    // dropguard has executed: we will never turn this result into response.
-                    //
-                    // at least temporarily do {:?} logging; these failures are rare enough but
-                    // could hide difficult errors.
-                    match &result {
-                        Ok(response) => {
-                            let status = response.status();
-                            info!(%status, "Cancelled request finished successfully")
-                        }
-                        Err(e) => error!("Cancelled request finished with an error: {e:?}"),
-                    }
+                    info!("Cancelled request finished");
                }
-                // only logging for cancelled panicked request handlers is the tracing_panic_hook,
-                // which should suffice.
-                //
-                // there is still a chance to lose the result due to race between
-                // returning from here and the actual connection closing happening
-                // before outer task gets to execute. leaving that up for #5815.
                result
            }
            .in_current_span(),
--- a/pageserver/src/import_datadir.rs
+++ b/pageserver/src/import_datadir.rs
@@ -3,25 +3,18 @@
 //! a neon Timeline.
 //!
 use std::path::{Path, PathBuf};
-use std::pin::Pin;
-use std::task::{self, Poll};

 use anyhow::{bail, ensure, Context, Result};
-use async_compression::{tokio::write::ZstdEncoder, zstd::CParameter, Level};
 use bytes::Bytes;
 use camino::Utf8Path;
 use futures::StreamExt;
-use nix::NixPath;
-use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt};
+use tokio::io::{AsyncRead, AsyncReadExt};
 use tokio_tar::Archive;
-use tokio_tar::Builder;
-use tokio_tar::HeaderMode;
 use tracing::*;
 use walkdir::WalkDir;

 use crate::context::RequestContext;
 use crate::pgdatadir_mapping::*;
-use crate::tenant::remote_timeline_client::INITDB_PATH;
 use crate::tenant::Timeline;
 use crate::walingest::WalIngest;
 use crate::walrecord::DecodedWALRecord;
@@ -40,9 +33,7 @@ use utils::lsn::Lsn;
 pub fn get_lsn_from_controlfile(path: &Utf8Path) -> Result<Lsn> {
    // Read control file to extract the LSN
    let controlfile_path = path.join("global").join("pg_control");
-    let controlfile_buf = std::fs::read(&controlfile_path)
-        .with_context(|| format!("reading controlfile: {controlfile_path}"))?;
-    let controlfile = ControlFileData::decode(&controlfile_buf)?;
+    let controlfile = ControlFileData::decode(&std::fs::read(controlfile_path)?)?;
    let lsn = controlfile.checkPoint;

    Ok(Lsn(lsn))
@@ -627,108 +618,3 @@ async fn read_all_bytes(reader: &mut (impl AsyncRead + Unpin)) -> Result<Bytes>
    reader.read_to_end(&mut buf).await?;
    Ok(Bytes::from(buf))
 }
-
-/// An in-memory buffer implementing `AsyncWrite`, inserting yields every now and then
-///
-/// The number of yields is bounded by above by the number of times poll_write is called,
-/// so calling it with 8 KB chunks and 8 MB chunks gives the same number of yields in total.
-/// This is an explicit choice as the `YieldingVec` is meant to give the async executor
-/// breathing room between units of CPU intensive preparation of buffers to be written.
-/// Once a write call is issued, the whole buffer has been prepared already, so there is no
-/// gain in splitting up the memcopy further.
-struct YieldingVec {
-    yield_budget: usize,
-    // the buffer written into
-    buf: Vec<u8>,
-}
-
-impl YieldingVec {
-    fn new() -> Self {
-        Self {
-            yield_budget: 0,
-            buf: Vec::new(),
-        }
-    }
-    // Whether we should yield for a read operation of given size
-    fn should_yield(&mut self, add_buf_len: usize) -> bool {
-        // Set this limit to a small value so that we are a
-        // good async citizen and yield repeatedly (but not
-        // too often for many small writes to cause many yields)
-        const YIELD_DIST: usize = 1024;
-
-        let target_buf_len = self.buf.len() + add_buf_len;
-        let ret = self.yield_budget / YIELD_DIST < target_buf_len / YIELD_DIST;
-        if self.yield_budget < target_buf_len {
-            self.yield_budget += add_buf_len;
-        }
-        ret
-    }
-}
-
-impl AsyncWrite for YieldingVec {
-    fn poll_write(
-        mut self: Pin<&mut Self>,
-        cx: &mut task::Context<'_>,
-        buf: &[u8],
-    ) -> Poll<std::io::Result<usize>> {
-        if self.should_yield(buf.len()) {
-            cx.waker().wake_by_ref();
-            return Poll::Pending;
-        }
-        self.get_mut().buf.extend_from_slice(buf);
-        Poll::Ready(Ok(buf.len()))
-    }
-
-    fn poll_flush(self: Pin<&mut Self>, _cx: &mut task::Context<'_>) -> Poll<std::io::Result<()>> {
-        Poll::Ready(Ok(()))
-    }
-
-    fn poll_shutdown(
-        self: Pin<&mut Self>,
-        _cx: &mut task::Context<'_>,
-    ) -> Poll<std::io::Result<()>> {
-        Poll::Ready(Ok(()))
-    }
-}
-
-pub async fn create_tar_zst(pgdata_path: &Utf8Path) -> Result<Vec<u8>> {
-    let mut paths = Vec::new();
-    for entry in WalkDir::new(pgdata_path) {
-        let entry = entry?;
-        let metadata = entry.metadata().expect("error getting dir entry metadata");
-        // Also allow directories so that we also get empty directories
-        if !(metadata.is_file() || metadata.is_dir()) {
-            continue;
-        }
-        let path = entry.into_path();
-        paths.push(path);
-    }
-    // Do a sort to get a more consistent listing
-    paths.sort_unstable();
-    let zstd = ZstdEncoder::with_quality_and_params(
-        YieldingVec::new(),
-        Level::Default,
-        &[CParameter::enable_long_distance_matching(true)],
-    );
-    let mut builder = Builder::new(zstd);
-    // Use reproducible header mode
-    builder.mode(HeaderMode::Deterministic);
-    for path in paths {
-        let rel_path = path.strip_prefix(pgdata_path)?;
-        if rel_path.is_empty() {
-            // The top directory should not be compressed,
-            // the tar crate doesn't like that
-            continue;
-        }
-        builder.append_path_with_name(&path, rel_path).await?;
-    }
-    let mut zstd = builder.into_inner().await?;
-    zstd.shutdown().await?;
-    let compressed = zstd.into_inner();
-    let compressed_len = compressed.buf.len();
-    const INITDB_TAR_ZST_WARN_LIMIT: usize = 2_000_000;
-    if compressed_len > INITDB_TAR_ZST_WARN_LIMIT {
-        warn!("compressed {INITDB_PATH} size of {compressed_len} is above limit {INITDB_TAR_ZST_WARN_LIMIT}.");
-    }
-    Ok(compressed.buf)
-}
--- a/pageserver/src/keyspace.rs
+++ b/pageserver/src/keyspace.rs
@@ -5,7 +5,7 @@ use std::ops::Range;
 ///
 /// Represents a set of Keys, in a compact form.
 ///
-#[derive(Clone, Debug, Default)]
+#[derive(Clone, Debug, Default, PartialEq, Eq)]
 pub struct KeySpace {
    /// Contiguous ranges of keys that belong to the key space. In key order,
    /// and with no overlap.
--- a/pageserver/src/lib.rs
+++ b/pageserver/src/lib.rs
@@ -25,6 +25,7 @@ pub mod walingest;
 pub mod walrecord;
 pub mod walredo;

+pub mod client;
 pub mod failpoint_support;

 use crate::task_mgr::TaskKind;
--- a/pageserver/src/pgdatadir_mapping.rs
+++ b/pageserver/src/pgdatadir_mapping.rs
@@ -21,7 +21,6 @@ use serde::{Deserialize, Serialize};
 use std::collections::{hash_map, HashMap, HashSet};
 use std::ops::ControlFlow;
 use std::ops::Range;
-use tokio_util::sync::CancellationToken;
 use tracing::{debug, trace, warn};
 use utils::bin_ser::DeserializeError;
 use utils::{bin_ser::BeSer, lsn::Lsn};
@@ -366,7 +365,6 @@ impl Timeline {
    pub async fn find_lsn_for_timestamp(
        &self,
        search_timestamp: TimestampTz,
-        cancel: &CancellationToken,
        ctx: &RequestContext,
    ) -> Result<LsnForTimestamp, PageReconstructError> {
        let gc_cutoff_lsn_guard = self.get_latest_gc_cutoff_lsn();
@@ -385,9 +383,6 @@ impl Timeline {
        let mut found_smaller = false;
        let mut found_larger = false;
        while low < high {
-            if cancel.is_cancelled() {
-                return Err(PageReconstructError::Cancelled);
-            }
            // cannot overflow, high and low are both smaller than u64::MAX / 2
            let mid = (high + low) / 2;

@@ -1754,6 +1749,7 @@ const AUX_FILES_KEY: Key = Key {
 // Reverse mappings for a few Keys.
 // These are needed by WAL redo manager.

+/// Guaranteed to return `Ok()` if [[is_rel_block_key]] returns `true` for `key`.
 pub fn key_to_rel_block(key: Key) -> anyhow::Result<(RelTag, BlockNumber)> {
    Ok(match key.field1 {
        0x00 => (
@@ -1769,7 +1765,8 @@ pub fn key_to_rel_block(key: Key) -> anyhow::Result<(RelTag, BlockNumber)> {
    })
 }

-fn is_rel_block_key(key: Key) -> bool {
+/// See [[key_to_rel_block]].
+pub fn is_rel_block_key(key: Key) -> bool {
    key.field1 == 0x00 && key.field4 != 0
 }

--- a/pageserver/src/repository.rs
+++ b/pageserver/src/repository.rs
@@ -138,14 +138,6 @@ pub struct GcResult {

    #[serde(serialize_with = "serialize_duration_as_millis")]
    pub elapsed: Duration,
-
-    /// The layers which were garbage collected.
-    ///
-    /// Used in `/v1/tenant/:tenant_id/timeline/:timeline_id/do_gc` to wait for the layers to be
-    /// dropped in tests.
-    #[cfg(feature = "testing")]
-    #[serde(skip)]
-    pub(crate) doomed_layers: Vec<crate::tenant::storage_layer::Layer>,
 }

 // helper function for `GcResult`, serializing a `Duration` as an integer number of milliseconds
@@ -166,11 +158,5 @@ impl AddAssign for GcResult {
        self.layers_removed += other.layers_removed;

        self.elapsed += other.elapsed;
-
-        #[cfg(feature = "testing")]
-        {
-            let mut other = other;
-            self.doomed_layers.append(&mut other.doomed_layers);
-        }
    }
 }
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -12,26 +12,42 @@
 //!

 use anyhow::{bail, Context};
-use bytes::Bytes;
 use camino::{Utf8Path, Utf8PathBuf};
-use enumset::EnumSet;
 use futures::FutureExt;
 use pageserver_api::models::TimelineState;
 use remote_storage::DownloadError;
 use remote_storage::GenericRemoteStorage;
-use std::fmt;
 use storage_broker::BrokerClientChannel;
 use tokio::runtime::Handle;
 use tokio::sync::watch;
 use tokio::task::JoinSet;
 use tokio_util::sync::CancellationToken;
 use tracing::*;
-use utils::backoff;
 use utils::completion;
 use utils::crashsafe::path_with_suffix_extension;
 use utils::fs_ext;
 use utils::sync::gate::Gate;

+use std::cmp::min;
+use std::collections::hash_map::Entry;
+use std::collections::BTreeSet;
+use std::collections::HashMap;
+use std::collections::HashSet;
+use std::fmt::Debug;
+use std::fmt::Display;
+use std::fs;
+use std::fs::File;
+use std::io;
+use std::ops::Bound::Included;
+use std::process::Command;
+use std::process::Stdio;
+use std::sync::atomic::AtomicU64;
+use std::sync::atomic::Ordering;
+use std::sync::Arc;
+use std::sync::MutexGuard;
+use std::sync::{Mutex, RwLock};
+use std::time::{Duration, Instant};
+
 use self::config::AttachedLocationConfig;
 use self::config::AttachmentMode;
 use self::config::LocationConf;
@@ -65,35 +81,14 @@ use crate::tenant::remote_timeline_client::MaybeDeletedIndexPart;
 use crate::tenant::storage_layer::DeltaLayer;
 use crate::tenant::storage_layer::ImageLayer;
 use crate::InitializationOrder;
-use std::cmp::min;
-use std::collections::hash_map::Entry;
-use std::collections::BTreeSet;
-use std::collections::HashMap;
-use std::collections::HashSet;
-use std::fmt::Debug;
-use std::fmt::Display;
-use std::fs;
-use std::fs::File;
-use std::io;
-use std::ops::Bound::Included;
-use std::process::Stdio;
-use std::sync::atomic::AtomicU64;
-use std::sync::atomic::Ordering;
-use std::sync::Arc;
-use std::sync::MutexGuard;
-use std::sync::{Mutex, RwLock};
-use std::time::{Duration, Instant};

 use crate::tenant::timeline::delete::DeleteTimelineFlow;
 use crate::tenant::timeline::uninit::cleanup_timeline_directory;
 use crate::virtual_file::VirtualFile;
 use crate::walredo::PostgresRedoManager;
 use crate::TEMP_FILE_SUFFIX;
-use once_cell::sync::Lazy;
 pub use pageserver_api::models::TenantState;
-use tokio::sync::Semaphore;

-static INIT_DB_SEMAPHORE: Lazy<Semaphore> = Lazy::new(|| Semaphore::new(8));
 use toml_edit;
 use utils::{
    crashsafe,
@@ -405,36 +400,6 @@ pub enum CreateTimelineError {
    Other(#[from] anyhow::Error),
 }

-#[derive(thiserror::Error, Debug)]
-enum InitdbError {
-    Other(anyhow::Error),
-    Cancelled,
-    Spawn(std::io::Result<()>),
-    Failed(std::process::ExitStatus, Vec<u8>),
-}
-
-impl fmt::Display for InitdbError {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match self {
-            InitdbError::Cancelled => write!(f, "Operation was cancelled"),
-            InitdbError::Spawn(e) => write!(f, "Spawn error: {:?}", e),
-            InitdbError::Failed(status, stderr) => write!(
-                f,
-                "Command failed with status {:?}: {}",
-                status,
-                String::from_utf8_lossy(stderr)
-            ),
-            InitdbError::Other(e) => write!(f, "Error: {:?}", e),
-        }
-    }
-}
-
-impl From<std::io::Error> for InitdbError {
-    fn from(error: std::io::Error) -> Self {
-        InitdbError::Spawn(Err(error))
-    }
-}
-
 struct TenantDirectoryScan {
    sorted_timelines_to_load: Vec<(TimelineId, TimelineMetadata)>,
    timelines_to_resume_deletion: Vec<(TimelineId, Option<TimelineMetadata>)>,
@@ -765,7 +730,7 @@ impl Tenant {
    ///
    async fn attach(
        self: &Arc<Tenant>,
-        init_order: Option<InitializationOrder>,
+        mut init_order: Option<InitializationOrder>,
        preload: Option<TenantPreload>,
        ctx: &RequestContext,
    ) -> anyhow::Result<()> {
@@ -782,6 +747,11 @@ impl Tenant {
            }
        };

+        // Signal that we have completed remote phase
+        init_order
+            .as_mut()
+            .and_then(|x| x.initial_tenant_load_remote.take());
+
        let mut timelines_to_resume_deletions = vec![];

        let mut remote_index_and_client = HashMap::new();
@@ -1659,7 +1629,6 @@ impl Tenant {
        target_timeline_id: Option<TimelineId>,
        horizon: u64,
        pitr: Duration,
-        cancel: &CancellationToken,
        ctx: &RequestContext,
    ) -> anyhow::Result<GcResult> {
        // Don't start doing work during shutdown
@@ -1682,7 +1651,7 @@ impl Tenant {
            }
        }

-        self.gc_iteration_internal(target_timeline_id, horizon, pitr, cancel, ctx)
+        self.gc_iteration_internal(target_timeline_id, horizon, pitr, ctx)
            .await
    }

@@ -1730,7 +1699,7 @@ impl Tenant {

        for (timeline_id, timeline) in &timelines_to_compact {
            timeline
-                .compact(cancel, EnumSet::empty(), ctx)
+                .compact(cancel, ctx)
                .instrument(info_span!("compact_timeline", %timeline_id))
                .await?;
        }
@@ -1746,6 +1715,10 @@ impl Tenant {
        self.current_state() == TenantState::Active
    }

+    pub fn generation(&self) -> Generation {
+        self.generation
+    }
+
    /// Changes tenant status to active, unless shutdown was already requested.
    ///
    /// `background_jobs_can_start` is an optional barrier set to a value during pageserver startup
@@ -1885,7 +1858,6 @@ impl Tenant {
                });
            })
        };
-        // test_long_timeline_create_then_tenant_delete is leaning on this message
        tracing::info!("Waiting for timelines...");
        while let Some(res) = js.join_next().await {
            match res {
@@ -2600,30 +2572,14 @@ impl Tenant {
        target_timeline_id: Option<TimelineId>,
        horizon: u64,
        pitr: Duration,
-        cancel: &CancellationToken,
        ctx: &RequestContext,
    ) -> anyhow::Result<GcResult> {
        let mut totals: GcResult = Default::default();
        let now = Instant::now();

-        let gc_timelines = match self
-            .refresh_gc_info_internal(target_timeline_id, horizon, pitr, cancel, ctx)
-            .await
-        {
-            Ok(result) => result,
-            Err(e) => {
-                if let Some(PageReconstructError::Cancelled) =
-                    e.downcast_ref::<PageReconstructError>()
-                {
-                    // Handle cancellation
-                    totals.elapsed = now.elapsed();
-                    return Ok(totals);
-                } else {
-                    // Propagate other errors
-                    return Err(e);
-                }
-            }
-        };
+        let gc_timelines = self
+            .refresh_gc_info_internal(target_timeline_id, horizon, pitr, ctx)
+            .await?;

        crate::failpoint_support::sleep_millis_async!(
            "gc_iteration_internal_after_getting_gc_timelines"
@@ -2638,14 +2594,16 @@ impl Tenant {

        // Perform GC for each timeline.
        //
-        // Note that we don't hold the `Tenant::gc_cs` lock here because we don't want to delay the
-        // branch creation task, which requires the GC lock. A GC iteration can run concurrently
-        // with branch creation.
+        // Note that we don't hold the GC lock here because we don't want
+        // to delay the branch creation task, which requires the GC lock.
+        // A timeline GC iteration can be slow because it may need to wait for
+        // compaction (both require `layer_removal_cs` lock),
+        // but the GC iteration can run concurrently with branch creation.
        //
-        // See comments in [`Tenant::branch_timeline`] for more information about why branch
-        // creation task can run concurrently with timeline's GC iteration.
+        // See comments in [`Tenant::branch_timeline`] for more information
+        // about why branch creation task can run concurrently with timeline's GC iteration.
        for timeline in gc_timelines {
-            if task_mgr::is_shutdown_requested() || cancel.is_cancelled() {
+            if task_mgr::is_shutdown_requested() {
                // We were requested to shut down. Stop and return with the progress we
                // made.
                break;
@@ -2665,7 +2623,6 @@ impl Tenant {
    /// This is usually executed as part of periodic gc, but can now be triggered more often.
    pub async fn refresh_gc_info(
        &self,
-        cancel: &CancellationToken,
        ctx: &RequestContext,
    ) -> anyhow::Result<Vec<Arc<Timeline>>> {
        // since this method can now be called at different rates than the configured gc loop, it
@@ -2677,7 +2634,7 @@ impl Tenant {
        // refresh all timelines
        let target_timeline_id = None;

-        self.refresh_gc_info_internal(target_timeline_id, horizon, pitr, cancel, ctx)
+        self.refresh_gc_info_internal(target_timeline_id, horizon, pitr, ctx)
            .await
    }

@@ -2686,7 +2643,6 @@ impl Tenant {
        target_timeline_id: Option<TimelineId>,
        horizon: u64,
        pitr: Duration,
-        cancel: &CancellationToken,
        ctx: &RequestContext,
    ) -> anyhow::Result<Vec<Arc<Timeline>>> {
        // grab mutex to prevent new timelines from being created here.
@@ -2760,7 +2716,7 @@ impl Tenant {
                    .map(|&x| x.1)
                    .collect();
                timeline
-                    .update_gc_info(branchpoints, cutoff, pitr, cancel, ctx)
+                    .update_gc_info(branchpoints, cutoff, pitr, ctx)
                    .await?;

                gc_timelines.push(timeline);
@@ -2923,10 +2879,10 @@ impl Tenant {
    }

    /// - run initdb to init temporary instance and get bootstrap data
-    /// - after initialization completes, tar up the temp dir and upload it to S3.
+    /// - after initialization complete, remove the temp dir.
    ///
    /// The caller is responsible for activating the returned timeline.
-    pub(crate) async fn bootstrap_timeline(
+    async fn bootstrap_timeline(
        &self,
        timeline_id: TimelineId,
        pg_version: u32,
@@ -2938,7 +2894,7 @@ impl Tenant {
        };
        // create a `tenant/{tenant_id}/timelines/basebackup-{timeline_id}.{TEMP_FILE_SUFFIX}/`
        // temporary directory for basebackup files for the given timeline.
-        let pgdata_path = path_with_suffix_extension(
+        let initdb_path = path_with_suffix_extension(
            self.conf
                .timelines_path(&self.tenant_id)
                .join(format!("basebackup-{timeline_id}")),
@@ -2947,45 +2903,22 @@ impl Tenant {

        // an uninit mark was placed before, nothing else can access this timeline files
        // current initdb was not run yet, so remove whatever was left from the previous runs
-        if pgdata_path.exists() {
-            fs::remove_dir_all(&pgdata_path).with_context(|| {
-                format!("Failed to remove already existing initdb directory: {pgdata_path}")
+        if initdb_path.exists() {
+            fs::remove_dir_all(&initdb_path).with_context(|| {
+                format!("Failed to remove already existing initdb directory: {initdb_path}")
            })?;
        }
        // Init temporarily repo to get bootstrap data, this creates a directory in the `initdb_path` path
-        run_initdb(self.conf, &pgdata_path, pg_version, &self.cancel).await?;
+        run_initdb(self.conf, &initdb_path, pg_version)?;
        // this new directory is very temporary, set to remove it immediately after bootstrap, we don't need it
        scopeguard::defer! {
-            if let Err(e) = fs::remove_dir_all(&pgdata_path) {
+            if let Err(e) = fs::remove_dir_all(&initdb_path) {
                // this is unlikely, but we will remove the directory on pageserver restart or another bootstrap call
-                error!("Failed to remove temporary initdb directory '{pgdata_path}': {e}");
+                error!("Failed to remove temporary initdb directory '{initdb_path}': {e}");
            }
        }
-        let pgdata_lsn = import_datadir::get_lsn_from_controlfile(&pgdata_path)?.align();
-
-        // Upload the created data dir to S3
-        if let Some(storage) = &self.remote_storage {
-            let pgdata_zstd = import_datadir::create_tar_zst(&pgdata_path).await?;
-            let pgdata_zstd = Bytes::from(pgdata_zstd);
-            backoff::retry(
-                || async {
-                    self::remote_timeline_client::upload_initdb_dir(
-                        storage,
-                        &self.tenant_id,
-                        &timeline_id,
-                        pgdata_zstd.clone(),
-                    )
-                    .await
-                },
-                |_| false,
-                3,
-                u32::MAX,
-                "persist_initdb_tar_zst",
-                // TODO: use a cancellation token (https://github.com/neondatabase/neon/issues/5066)
-                backoff::Cancel::new(CancellationToken::new(), || unreachable!()),
-            )
-            .await?;
-        }
+        let pgdata_path = &initdb_path;
+        let pgdata_lsn = import_datadir::get_lsn_from_controlfile(pgdata_path)?.align();

        // Import the contents of the data directory at the initial checkpoint
        // LSN, and any WAL after that.
@@ -3015,7 +2948,7 @@ impl Tenant {

        import_datadir::import_timeline_from_postgres_datadir(
            unfinished_timeline,
-            &pgdata_path,
+            pgdata_path,
            pgdata_lsn,
            ctx,
        )
@@ -3196,7 +3129,6 @@ impl Tenant {
        // (only if it is shorter than the real cutoff).
        max_retention_period: Option<u64>,
        cause: LogicalSizeCalculationCause,
-        cancel: &CancellationToken,
        ctx: &RequestContext,
    ) -> anyhow::Result<size::ModelInputs> {
        let logical_sizes_at_once = self
@@ -3219,7 +3151,6 @@ impl Tenant {
            max_retention_period,
            &mut shared_cache,
            cause,
-            cancel,
            ctx,
        )
        .await
@@ -3232,10 +3163,9 @@ impl Tenant {
    pub async fn calculate_synthetic_size(
        &self,
        cause: LogicalSizeCalculationCause,
-        cancel: &CancellationToken,
        ctx: &RequestContext,
    ) -> anyhow::Result<u64> {
-        let inputs = self.gather_size_inputs(None, cause, cancel, ctx).await?;
+        let inputs = self.gather_size_inputs(None, cause, ctx).await?;

        let size = inputs.calculate()?;

@@ -3417,54 +3347,42 @@ fn rebase_directory(

 /// Create the cluster temporarily in 'initdbpath' directory inside the repository
 /// to get bootstrap data for timeline initialization.
-async fn run_initdb(
+fn run_initdb(
    conf: &'static PageServerConf,
    initdb_target_dir: &Utf8Path,
    pg_version: u32,
-    cancel: &CancellationToken,
-) -> Result<(), InitdbError> {
-    let initdb_bin_path = conf
-        .pg_bin_dir(pg_version)
-        .map_err(InitdbError::Other)?
-        .join("initdb");
-    let initdb_lib_dir = conf.pg_lib_dir(pg_version).map_err(InitdbError::Other)?;
+) -> anyhow::Result<()> {
+    let initdb_bin_path = conf.pg_bin_dir(pg_version)?.join("initdb");
+    let initdb_lib_dir = conf.pg_lib_dir(pg_version)?;
    info!(
        "running {} in {}, libdir: {}",
        initdb_bin_path, initdb_target_dir, initdb_lib_dir,
    );

-    let _permit = INIT_DB_SEMAPHORE.acquire().await;
-
-    let initdb_command = tokio::process::Command::new(&initdb_bin_path)
+    let initdb_output = Command::new(&initdb_bin_path)
        .args(["-D", initdb_target_dir.as_ref()])
        .args(["-U", &conf.superuser])
        .args(["-E", "utf8"])
        .arg("--no-instructions")
+        // This is only used for a temporary installation that is deleted shortly after,
+        // so no need to fsync it
        .arg("--no-sync")
        .env_clear()
        .env("LD_LIBRARY_PATH", &initdb_lib_dir)
        .env("DYLD_LIBRARY_PATH", &initdb_lib_dir)
-        .stdout(Stdio::piped())
-        .stderr(Stdio::piped())
-        // If the `select!` below doesn't finish the `wait_with_output`,
-        // let the task get `wait()`ed for asynchronously by tokio.
-        // This means there is a slim chance we can go over the INIT_DB_SEMAPHORE.
-        // TODO: fix for this is non-trivial, see
-        // https://github.com/neondatabase/neon/pull/5921#pullrequestreview-1750858021
-        //
-        .kill_on_drop(true)
-        .spawn()?;
-
-    tokio::select! {
-        initdb_output = initdb_command.wait_with_output() => {
-            let initdb_output = initdb_output?;
-            if !initdb_output.status.success() {
-                return Err(InitdbError::Failed(initdb_output.status, initdb_output.stderr));
-            }
-        }
-        _ = cancel.cancelled() => {
-            return Err(InitdbError::Cancelled);
-        }
+        .stdout(Stdio::null())
+        .output()
+        .with_context(|| {
+            format!(
+                "failed to execute {} at target dir {}",
+                initdb_bin_path, initdb_target_dir,
+            )
+        })?;
+    if !initdb_output.status.success() {
+        bail!(
+            "initdb failed: '{}'",
+            String::from_utf8_lossy(&initdb_output.stderr)
+        );
    }

    Ok(())
@@ -3510,7 +3428,6 @@ pub async fn dump_layerfile_from_path(
 pub(crate) mod harness {
    use bytes::{Bytes, BytesMut};
    use once_cell::sync::OnceCell;
-    use pageserver_api::shard::ShardIndex;
    use std::fs;
    use std::sync::Arc;
    use utils::logging;
@@ -3577,7 +3494,6 @@ pub(crate) mod harness {
        pub tenant_conf: TenantConf,
        pub tenant_id: TenantId,
        pub generation: Generation,
-        pub shard: ShardIndex,
        pub remote_storage: GenericRemoteStorage,
        pub remote_fs_dir: Utf8PathBuf,
        pub deletion_queue: MockDeletionQueue,
@@ -3637,7 +3553,6 @@ pub(crate) mod harness {
                tenant_conf,
                tenant_id,
                generation: Generation::new(0xdeadbeef),
-                shard: ShardIndex::unsharded(),
                remote_storage,
                remote_fs_dir,
                deletion_queue,
@@ -4022,13 +3937,7 @@ mod tests {
        // and compaction works. But it does set the 'cutoff' point so that the cross check
        // below should fail.
        tenant
-            .gc_iteration(
-                Some(TIMELINE_ID),
-                0x10,
-                Duration::ZERO,
-                &CancellationToken::new(),
-                &ctx,
-            )
+            .gc_iteration(Some(TIMELINE_ID), 0x10, Duration::ZERO, &ctx)
            .await?;

        // try to branch at lsn 25, should fail because we already garbage collected the data
@@ -4131,13 +4040,7 @@ mod tests {
        tline.set_broken("test".to_owned());

        tenant
-            .gc_iteration(
-                Some(TIMELINE_ID),
-                0x10,
-                Duration::ZERO,
-                &CancellationToken::new(),
-                &ctx,
-            )
+            .gc_iteration(Some(TIMELINE_ID), 0x10, Duration::ZERO, &ctx)
            .await?;

        // The branchpoints should contain all timelines, even ones marked
@@ -4183,13 +4086,7 @@ mod tests {
            .expect("Should have a local timeline");
        // this removes layers before lsn 40 (50 minus 10), so there are two remaining layers, image and delta for 31-50
        tenant
-            .gc_iteration(
-                Some(TIMELINE_ID),
-                0x10,
-                Duration::ZERO,
-                &CancellationToken::new(),
-                &ctx,
-            )
+            .gc_iteration(Some(TIMELINE_ID), 0x10, Duration::ZERO, &ctx)
            .await?;
        assert!(newtline.get(*TEST_KEY, Lsn(0x25), &ctx).await.is_ok());

@@ -4217,13 +4114,7 @@ mod tests {

        // run gc on parent
        tenant
-            .gc_iteration(
-                Some(TIMELINE_ID),
-                0x10,
-                Duration::ZERO,
-                &CancellationToken::new(),
-                &ctx,
-            )
+            .gc_iteration(Some(TIMELINE_ID), 0x10, Duration::ZERO, &ctx)
            .await?;

        // Check that the data is still accessible on the branch.
@@ -4412,9 +4303,7 @@ mod tests {
        drop(writer);

        tline.freeze_and_flush().await?;
-        tline
-            .compact(&CancellationToken::new(), EnumSet::empty(), &ctx)
-            .await?;
+        tline.compact(&CancellationToken::new(), &ctx).await?;

        let writer = tline.writer().await;
        writer
@@ -4429,9 +4318,7 @@ mod tests {
        drop(writer);

        tline.freeze_and_flush().await?;
-        tline
-            .compact(&CancellationToken::new(), EnumSet::empty(), &ctx)
-            .await?;
+        tline.compact(&CancellationToken::new(), &ctx).await?;

        let writer = tline.writer().await;
        writer
@@ -4446,9 +4333,7 @@ mod tests {
        drop(writer);

        tline.freeze_and_flush().await?;
-        tline
-            .compact(&CancellationToken::new(), EnumSet::empty(), &ctx)
-            .await?;
+        tline.compact(&CancellationToken::new(), &ctx).await?;

        let writer = tline.writer().await;
        writer
@@ -4463,9 +4348,7 @@ mod tests {
        drop(writer);

        tline.freeze_and_flush().await?;
-        tline
-            .compact(&CancellationToken::new(), EnumSet::empty(), &ctx)
-            .await?;
+        tline.compact(&CancellationToken::new(), &ctx).await?;

        assert_eq!(
            tline.get(*TEST_KEY, Lsn(0x10), &ctx).await?,
@@ -4533,18 +4416,10 @@ mod tests {
            let cutoff = tline.get_last_record_lsn();

            tline
-                .update_gc_info(
-                    Vec::new(),
-                    cutoff,
-                    Duration::ZERO,
-                    &CancellationToken::new(),
-                    &ctx,
-                )
+                .update_gc_info(Vec::new(), cutoff, Duration::ZERO, &ctx)
                .await?;
            tline.freeze_and_flush().await?;
-            tline
-                .compact(&CancellationToken::new(), EnumSet::empty(), &ctx)
-                .await?;
+            tline.compact(&CancellationToken::new(), &ctx).await?;
            tline.gc().await?;
        }

@@ -4621,18 +4496,10 @@ mod tests {
            // Perform a cycle of flush, compact, and GC
            let cutoff = tline.get_last_record_lsn();
            tline
-                .update_gc_info(
-                    Vec::new(),
-                    cutoff,
-                    Duration::ZERO,
-                    &CancellationToken::new(),
-                    &ctx,
-                )
+                .update_gc_info(Vec::new(), cutoff, Duration::ZERO, &ctx)
                .await?;
            tline.freeze_and_flush().await?;
-            tline
-                .compact(&CancellationToken::new(), EnumSet::empty(), &ctx)
-                .await?;
+            tline.compact(&CancellationToken::new(), &ctx).await?;
            tline.gc().await?;
        }

@@ -4719,18 +4586,10 @@ mod tests {
            // Perform a cycle of flush, compact, and GC
            let cutoff = tline.get_last_record_lsn();
            tline
-                .update_gc_info(
-                    Vec::new(),
-                    cutoff,
-                    Duration::ZERO,
-                    &CancellationToken::new(),
-                    &ctx,
-                )
+                .update_gc_info(Vec::new(), cutoff, Duration::ZERO, &ctx)
                .await?;
            tline.freeze_and_flush().await?;
-            tline
-                .compact(&CancellationToken::new(), EnumSet::empty(), &ctx)
-                .await?;
+            tline.compact(&CancellationToken::new(), &ctx).await?;
            tline.gc().await?;
        }

--- a/pageserver/src/tenant/config.rs
+++ b/pageserver/src/tenant/config.rs
@@ -10,7 +10,6 @@
 //!
 use anyhow::Context;
 use pageserver_api::models;
-use pageserver_api::shard::{ShardCount, ShardIdentity, ShardNumber, ShardStripeSize};
 use serde::{Deserialize, Serialize};
 use std::num::NonZeroU64;
 use std::time::Duration;
@@ -89,14 +88,6 @@ pub(crate) struct LocationConf {
    /// The location-specific part of the configuration, describes the operating
    /// mode of this pageserver for this tenant.
    pub(crate) mode: LocationMode,
-
-    /// The detailed shard identity.  This structure is already scoped within
-    /// a TenantShardId, but we need the full ShardIdentity to enable calculating
-    /// key->shard mappings.
-    #[serde(default = "ShardIdentity::unsharded")]
-    #[serde(skip_serializing_if = "ShardIdentity::is_unsharded")]
-    pub(crate) shard: ShardIdentity,
-
    /// The pan-cluster tenant configuration, the same on all locations
    pub(crate) tenant_conf: TenantConfOpt,
 }
@@ -169,8 +160,6 @@ impl LocationConf {
                generation,
                attach_mode: AttachmentMode::Single,
            }),
-            // Legacy configuration loads are always from tenants created before sharding existed.
-            shard: ShardIdentity::unsharded(),
            tenant_conf,
        }
    }
@@ -198,7 +187,6 @@ impl LocationConf {

        fn get_generation(conf: &'_ models::LocationConfig) -> Result<Generation, anyhow::Error> {
            conf.generation
-                .map(Generation::new)
                .ok_or_else(|| anyhow::anyhow!("Generation must be set when attaching"))
        }

@@ -238,21 +226,7 @@ impl LocationConf {
            }
        };

-        let shard = if conf.shard_count == 0 {
-            ShardIdentity::unsharded()
-        } else {
-            ShardIdentity::new(
-                ShardNumber(conf.shard_number),
-                ShardCount(conf.shard_count),
-                ShardStripeSize(conf.shard_stripe_size),
-            )?
-        };
-
-        Ok(Self {
-            shard,
-            mode,
-            tenant_conf,
-        })
+        Ok(Self { mode, tenant_conf })
    }
 }

@@ -267,7 +241,6 @@ impl Default for LocationConf {
                attach_mode: AttachmentMode::Single,
            }),
            tenant_conf: TenantConfOpt::default(),
-            shard: ShardIdentity::unsharded(),
        }
    }
 }
--- a/pageserver/src/tenant/mgr.rs
+++ b/pageserver/src/tenant/mgr.rs
@@ -792,6 +792,8 @@ pub(crate) async fn set_new_tenant_config(
 impl TenantManager {
    /// Gets the attached tenant from the in-memory data, erroring if it's absent, in secondary mode, or is not fitting to the query.
    /// `active_only = true` allows to query only tenants that are ready for operations, erroring on other kinds of tenants.
+    ///
+    /// This method is cancel-safe.
    pub(crate) fn get_attached_tenant_shard(
        &self,
        tenant_shard_id: TenantShardId,
@@ -1395,7 +1397,8 @@ pub(crate) enum TenantMapListError {
 ///
 /// Get list of tenants, for the mgmt API
 ///
-pub(crate) async fn list_tenants() -> Result<Vec<(TenantId, TenantState)>, TenantMapListError> {
+pub(crate) async fn list_tenants(
+) -> Result<Vec<(TenantId, TenantState, Generation)>, TenantMapListError> {
    let tenants = TENANTS.read().unwrap();
    let m = match &*tenants {
        TenantsMap::Initializing => return Err(TenantMapListError::Initializing),
@@ -1403,12 +1406,12 @@ pub(crate) async fn list_tenants() -> Result<Vec<(TenantId, TenantState)>, Tenan
    };
    Ok(m.iter()
        .filter_map(|(id, tenant)| match tenant {
-            TenantSlot::Attached(tenant) => Some((id, tenant.current_state())),
+            TenantSlot::Attached(tenant) => Some((id, tenant.current_state(), tenant.generation())),
            TenantSlot::Secondary => None,
            TenantSlot::InProgress(_) => None,
        })
        // TODO(sharding): make callers of this function shard-aware
-        .map(|(k, v)| (k.tenant_id, v))
+        .map(|(a, b, c)| (a.tenant_id, b, c))
        .collect())
 }

@@ -1942,7 +1945,6 @@ pub(crate) async fn immediate_gc(
    tenant_id: TenantId,
    timeline_id: TimelineId,
    gc_req: TimelineGcRequest,
-    cancel: CancellationToken,
    ctx: &RequestContext,
 ) -> Result<tokio::sync::oneshot::Receiver<Result<GcResult, anyhow::Error>>, ApiError> {
    let guard = TENANTS.read().unwrap();
@@ -1959,7 +1961,6 @@ pub(crate) async fn immediate_gc(
    // Run in task_mgr to avoid race with tenant_detach operation
    let ctx = ctx.detached_child(TaskKind::GarbageCollector, DownloadBehavior::Download);
    let (task_done, wait_task_done) = tokio::sync::oneshot::channel();
-    // TODO: spawning is redundant now, need to hold the gate
    task_mgr::spawn(
        &tokio::runtime::Handle::current(),
        TaskKind::GarbageCollector,
@@ -1969,40 +1970,12 @@ pub(crate) async fn immediate_gc(
        false,
        async move {
            fail::fail_point!("immediate_gc_task_pre");
-
-            #[allow(unused_mut)]
-            let mut result = tenant
-                .gc_iteration(Some(timeline_id), gc_horizon, pitr, &cancel, &ctx)
+            let result = tenant
+                .gc_iteration(Some(timeline_id), gc_horizon, pitr, &ctx)
                .instrument(info_span!("manual_gc", %tenant_id, %timeline_id))
                .await;
                // FIXME: `gc_iteration` can return an error for multiple reasons; we should handle it
                // better once the types support it.
-
-            #[cfg(feature = "testing")]
-            {
-                if let Ok(result) = result.as_mut() {
-                    // why not futures unordered? it seems it needs very much the same task structure
-                    // but would only run on single task.
-                    let mut js = tokio::task::JoinSet::new();
-                    for layer in std::mem::take(&mut result.doomed_layers) {
-                        js.spawn(layer.wait_drop());
-                    }
-                    tracing::info!(total = js.len(), "starting to wait for the gc'd layers to be dropped");
-                    while let Some(res) = js.join_next().await {
-                        res.expect("wait_drop should not panic");
-                    }
-                }
-
-                let timeline = tenant.get_timeline(timeline_id, false).ok();
-                let rtc = timeline.as_ref().and_then(|x| x.remote_client.as_ref());
-
-                if let Some(rtc) = rtc {
-                    // layer drops schedule actions on remote timeline client to actually do the
-                    // deletions; don't care just exit fast about the shutdown error
-                    drop(rtc.wait_completion().await);
-                }
-            }
-
            match task_done.send(result) {
                Ok(_) => (),
                Err(result) => error!("failed to send gc result: {result:?}"),
--- a/pageserver/src/tenant/remote_timeline_client.rs
+++ b/pageserver/src/tenant/remote_timeline_client.rs
@@ -188,10 +188,8 @@ use anyhow::Context;
 use camino::Utf8Path;
 use chrono::{NaiveDateTime, Utc};

-use pageserver_api::shard::ShardIndex;
 use scopeguard::ScopeGuard;
 use tokio_util::sync::CancellationToken;
-pub(crate) use upload::upload_initdb_dir;
 use utils::backoff::{
    self, exponential_backoff, DEFAULT_BASE_BACKOFF_SECONDS, DEFAULT_MAX_BACKOFF_SECONDS,
 };
@@ -251,8 +249,6 @@ pub(crate) const FAILED_REMOTE_OP_RETRIES: u32 = 10;
 // retries. Uploads and deletions are retried forever, though.
 pub(crate) const FAILED_UPLOAD_WARN_THRESHOLD: u32 = 3;

-pub(crate) const INITDB_PATH: &str = "initdb.tar.zst";
-
 pub enum MaybeDeletedIndexPart {
    IndexPart(IndexPart),
    Deleted(IndexPart),
@@ -403,11 +399,6 @@ impl RemoteTimelineClient {
        Ok(())
    }

-    pub(crate) fn get_shard_index(&self) -> ShardIndex {
-        // TODO: carry this on the struct
-        ShardIndex::unsharded()
-    }
-
    pub fn remote_consistent_lsn_projected(&self) -> Option<Lsn> {
        match &mut *self.upload_queue.lock().unwrap() {
            UploadQueue::Uninitialized => None,
@@ -471,7 +462,6 @@ impl RemoteTimelineClient {
            &self.storage_impl,
            &self.tenant_id,
            &self.timeline_id,
-            self.get_shard_index(),
            self.generation,
            cancel,
        )
@@ -664,10 +654,10 @@ impl RemoteTimelineClient {
        let mut guard = self.upload_queue.lock().unwrap();
        let upload_queue = guard.initialized_mut()?;

-        let with_metadata =
+        let with_generations =
            self.schedule_unlinking_of_layers_from_index_part0(upload_queue, names.iter().cloned());

-        self.schedule_deletion_of_unlinked0(upload_queue, with_metadata);
+        self.schedule_deletion_of_unlinked0(upload_queue, with_generations);

        // Launch the tasks immediately, if possible
        self.launch_queued_tasks(upload_queue);
@@ -702,7 +692,7 @@ impl RemoteTimelineClient {
        self: &Arc<Self>,
        upload_queue: &mut UploadQueueInitialized,
        names: I,
-    ) -> Vec<(LayerFileName, LayerFileMetadata)>
+    ) -> Vec<(LayerFileName, Generation)>
    where
        I: IntoIterator<Item = LayerFileName>,
    {
@@ -710,17 +700,16 @@ impl RemoteTimelineClient {
        // so we don't need update it. Just serialize it.
        let metadata = upload_queue.latest_metadata.clone();

-        // Decorate our list of names with each name's metadata, dropping
-        // names that are unexpectedly missing from our metadata.  This metadata
-        // is later used when physically deleting layers, to construct key paths.
-        let with_metadata: Vec<_> = names
+        // Decorate our list of names with each name's generation, dropping
+        // names that are unexpectedly missing from our metadata.
+        let with_generations: Vec<_> = names
            .into_iter()
            .filter_map(|name| {
                let meta = upload_queue.latest_files.remove(&name);

                if let Some(meta) = meta {
                    upload_queue.latest_files_changes_since_metadata_upload_scheduled += 1;
-                    Some((name, meta))
+                    Some((name, meta.generation))
                } else {
                    // This can only happen if we forgot to to schedule the file upload
                    // before scheduling the delete. Log it because it is a rare/strange
@@ -733,10 +722,9 @@ impl RemoteTimelineClient {
            .collect();

        #[cfg(feature = "testing")]
-        for (name, metadata) in &with_metadata {
-            let gen = metadata.generation;
-            if let Some(unexpected) = upload_queue.dangling_files.insert(name.to_owned(), gen) {
-                if unexpected == gen {
+        for (name, gen) in &with_generations {
+            if let Some(unexpected) = upload_queue.dangling_files.insert(name.to_owned(), *gen) {
+                if &unexpected == gen {
                    tracing::error!("{name} was unlinked twice with same generation");
                } else {
                    tracing::error!("{name} was unlinked twice with different generations {gen:?} and {unexpected:?}");
@@ -751,14 +739,14 @@ impl RemoteTimelineClient {
            self.schedule_index_upload(upload_queue, metadata);
        }

-        with_metadata
+        with_generations
    }

    /// Schedules deletion for layer files which have previously been unlinked from the
    /// `index_part.json` with [`Self::schedule_gc_update`] or [`Self::schedule_compaction_update`].
    pub(crate) fn schedule_deletion_of_unlinked(
        self: &Arc<Self>,
-        layers: Vec<(LayerFileName, LayerFileMetadata)>,
+        layers: Vec<(LayerFileName, Generation)>,
    ) -> anyhow::Result<()> {
        let mut guard = self.upload_queue.lock().unwrap();
        let upload_queue = guard.initialized_mut()?;
@@ -771,22 +759,16 @@ impl RemoteTimelineClient {
    fn schedule_deletion_of_unlinked0(
        self: &Arc<Self>,
        upload_queue: &mut UploadQueueInitialized,
-        with_metadata: Vec<(LayerFileName, LayerFileMetadata)>,
+        with_generations: Vec<(LayerFileName, Generation)>,
    ) {
-        for (name, meta) in &with_metadata {
-            info!(
-                "scheduling deletion of layer {}{} (shard {})",
-                name,
-                meta.generation.get_suffix(),
-                meta.shard
-            );
+        for (name, gen) in &with_generations {
+            info!("scheduling deletion of layer {}{}", name, gen.get_suffix());
        }

        #[cfg(feature = "testing")]
-        for (name, meta) in &with_metadata {
-            let gen = meta.generation;
+        for (name, gen) in &with_generations {
            match upload_queue.dangling_files.remove(name) {
-                Some(same) if same == gen => { /* expected */ }
+                Some(same) if &same == gen => { /* expected */ }
                Some(other) => {
                    tracing::error!("{name} was unlinked with {other:?} but deleted with {gen:?}");
                }
@@ -798,7 +780,7 @@ impl RemoteTimelineClient {

        // schedule the actual deletions
        let op = UploadOp::Delete(Delete {
-            layers: with_metadata,
+            layers: with_generations,
        });
        self.calls_unfinished_metric_begin(&op);
        upload_queue.queued_operations.push_back(op);
@@ -827,29 +809,23 @@ impl RemoteTimelineClient {
        Ok(())
    }

+    ///
    /// Wait for all previously scheduled uploads/deletions to complete
-    pub(crate) async fn wait_completion(self: &Arc<Self>) -> anyhow::Result<()> {
+    ///
+    pub async fn wait_completion(self: &Arc<Self>) -> anyhow::Result<()> {
        let mut receiver = {
            let mut guard = self.upload_queue.lock().unwrap();
            let upload_queue = guard.initialized_mut()?;
-            self.schedule_barrier0(upload_queue)
+            self.schedule_barrier(upload_queue)
        };

        if receiver.changed().await.is_err() {
            anyhow::bail!("wait_completion aborted because upload queue was stopped");
        }
-
        Ok(())
    }

-    pub(crate) fn schedule_barrier(self: &Arc<Self>) -> anyhow::Result<()> {
-        let mut guard = self.upload_queue.lock().unwrap();
-        let upload_queue = guard.initialized_mut()?;
-        self.schedule_barrier0(upload_queue);
-        Ok(())
-    }
-
-    fn schedule_barrier0(
+    fn schedule_barrier(
        self: &Arc<Self>,
        upload_queue: &mut UploadQueueInitialized,
    ) -> tokio::sync::watch::Receiver<()> {
@@ -865,56 +841,6 @@ impl RemoteTimelineClient {
        receiver
    }

-    /// Wait for all previously scheduled operations to complete, and then stop.
-    ///
-    /// Not cancellation safe
-    pub(crate) async fn shutdown(self: &Arc<Self>) -> Result<(), StopError> {
-        // On cancellation the queue is left in ackward state of refusing new operations but
-        // proper stop is yet to be called. On cancel the original or some later task must call
-        // `stop` or `shutdown`.
-        let sg = scopeguard::guard((), |_| {
-            tracing::error!("RemoteTimelineClient::shutdown was cancelled; this should not happen, do not make this into an allowed_error")
-        });
-
-        let fut = {
-            let mut guard = self.upload_queue.lock().unwrap();
-            let upload_queue = match &mut *guard {
-                UploadQueue::Stopped(_) => return Ok(()),
-                UploadQueue::Uninitialized => return Err(StopError::QueueUninitialized),
-                UploadQueue::Initialized(ref mut init) => init,
-            };
-
-            // if the queue is already stuck due to a shutdown operation which was cancelled, then
-            // just don't add more of these as they would never complete.
-            //
-            // TODO: if launch_queued_tasks were to be refactored to accept a &mut UploadQueue
-            // in every place we would not have to jump through this hoop, and this method could be
-            // made cancellable.
-            if !upload_queue.shutting_down {
-                upload_queue.shutting_down = true;
-                upload_queue.queued_operations.push_back(UploadOp::Shutdown);
-                // this operation is not counted similar to Barrier
-
-                self.launch_queued_tasks(upload_queue);
-            }
-
-            upload_queue.shutdown_ready.clone().acquire_owned()
-        };
-
-        let res = fut.await;
-
-        scopeguard::ScopeGuard::into_inner(sg);
-
-        match res {
-            Ok(_permit) => unreachable!("shutdown_ready should not have been added permits"),
-            Err(_closed) => {
-                // expected
-            }
-        }
-
-        self.stop()
-    }
-
    /// Set the deleted_at field in the remote index file.
    ///
    /// This fails if the upload queue has not been `stop()`ed.
@@ -968,7 +894,6 @@ impl RemoteTimelineClient {
                    &self.storage_impl,
                    &self.tenant_id,
                    &self.timeline_id,
-                    self.get_shard_index(),
                    self.generation,
                    &index_part_with_deleted_at,
                )
@@ -1027,7 +952,6 @@ impl RemoteTimelineClient {
                    remote_layer_path(
                        &self.tenant_id,
                        &self.timeline_id,
-                        meta.shard,
                        &file_name,
                        meta.generation,
                    )
@@ -1076,12 +1000,7 @@ impl RemoteTimelineClient {
            .unwrap_or(
                // No generation-suffixed indices, assume we are dealing with
                // a legacy index.
-                remote_index_path(
-                    &self.tenant_id,
-                    &self.timeline_id,
-                    self.get_shard_index(),
-                    Generation::none(),
-                ),
+                remote_index_path(&self.tenant_id, &self.timeline_id, Generation::none()),
            );

        let remaining_layers: Vec<RemotePath> = remaining
@@ -1152,9 +1071,7 @@ impl RemoteTimelineClient {
                    upload_queue.num_inprogress_deletions == upload_queue.inprogress_tasks.len()
                }

-                UploadOp::Barrier(_) | UploadOp::Shutdown => {
-                    upload_queue.inprogress_tasks.is_empty()
-                }
+                UploadOp::Barrier(_) => upload_queue.inprogress_tasks.is_empty(),
            };

            // If we cannot launch this task, don't look any further.
@@ -1167,13 +1084,6 @@ impl RemoteTimelineClient {
                break;
            }

-            if let UploadOp::Shutdown = next_op {
-                // leave the op in the queue but do not start more tasks; it will be dropped when
-                // the stop is called.
-                upload_queue.shutdown_ready.close();
-                break;
-            }
-
            // We can launch this task. Remove it from the queue first.
            let next_op = upload_queue.queued_operations.pop_front().unwrap();

@@ -1194,7 +1104,6 @@ impl RemoteTimelineClient {
                    sender.send_replace(());
                    continue;
                }
-                UploadOp::Shutdown => unreachable!("shutdown is intentionally never popped off"),
            };

            // Assign unique ID to this task
@@ -1300,7 +1209,6 @@ impl RemoteTimelineClient {
                        &self.storage_impl,
                        &self.tenant_id,
                        &self.timeline_id,
-                        self.get_shard_index(),
                        self.generation,
                        index_part,
                    )
@@ -1321,22 +1229,20 @@ impl RemoteTimelineClient {
                    }
                    res
                }
-                UploadOp::Delete(delete) => {
-                    pausable_failpoint!("before-delete-layer-pausable");
-                    self.deletion_queue_client
-                        .push_layers(
-                            self.tenant_id,
-                            self.timeline_id,
-                            self.generation,
-                            delete.layers.clone(),
-                        )
-                        .await
-                        .map_err(|e| anyhow::anyhow!(e))
-                }
-                unexpected @ UploadOp::Barrier(_) | unexpected @ UploadOp::Shutdown => {
+                UploadOp::Delete(delete) => self
+                    .deletion_queue_client
+                    .push_layers(
+                        self.tenant_id,
+                        self.timeline_id,
+                        self.generation,
+                        delete.layers.clone(),
+                    )
+                    .await
+                    .map_err(|e| anyhow::anyhow!(e)),
+                UploadOp::Barrier(_) => {
                    // unreachable. Barrier operations are handled synchronously in
                    // launch_queued_tasks
-                    warn!("unexpected {unexpected:?} operation in perform_upload_task");
+                    warn!("unexpected Barrier operation in perform_upload_task");
                    break;
                }
            };
@@ -1430,7 +1336,7 @@ impl RemoteTimelineClient {
                    upload_queue.num_inprogress_deletions -= 1;
                    None
                }
-                UploadOp::Barrier(..) | UploadOp::Shutdown => unreachable!(),
+                UploadOp::Barrier(_) => unreachable!(),
            };

            // Launch any queued tasks that were unblocked by this one.
@@ -1485,7 +1391,7 @@ impl RemoteTimelineClient {
                    reason: "should we track deletes? positive or negative sign?",
                },
            ),
-            UploadOp::Barrier(..) | UploadOp::Shutdown => {
+            UploadOp::Barrier(_) => {
                // we do not account these
                return None;
            }
@@ -1511,13 +1417,10 @@ impl RemoteTimelineClient {
    }

    /// Close the upload queue for new operations and cancel queued operations.
-    ///
-    /// Use [`RemoteTimelineClient::shutdown`] for graceful stop.
-    ///
    /// In-progress operations will still be running after this function returns.
    /// Use `task_mgr::shutdown_tasks(None, Some(self.tenant_id), Some(timeline_id))`
    /// to wait for them to complete, after calling this function.
-    pub(crate) fn stop(&self) -> Result<(), StopError> {
+    pub fn stop(&self) -> Result<(), StopError> {
        // Whichever *task* for this RemoteTimelineClient grabs the mutex first will transition the queue
        // into stopped state, thereby dropping all off the queued *ops* which haven't become *tasks* yet.
        // The other *tasks* will come here and observe an already shut down queue and hence simply wrap up their business.
@@ -1555,8 +1458,6 @@ impl RemoteTimelineClient {
                        queued_operations: VecDeque::default(),
                        #[cfg(feature = "testing")]
                        dangling_files: HashMap::default(),
-                        shutting_down: false,
-                        shutdown_ready: Arc::new(tokio::sync::Semaphore::new(0)),
                    };

                    let upload_queue = std::mem::replace(
@@ -1614,14 +1515,12 @@ pub fn remote_timeline_path(tenant_id: &TenantId, timeline_id: &TimelineId) -> R
 pub fn remote_layer_path(
    tenant_id: &TenantId,
    timeline_id: &TimelineId,
-    shard: ShardIndex,
    layer_file_name: &LayerFileName,
    generation: Generation,
 ) -> RemotePath {
    // Generation-aware key format
    let path = format!(
-        "tenants/{tenant_id}{0}/{TIMELINES_SEGMENT_NAME}/{timeline_id}/{1}{2}",
-        shard.get_suffix(),
+        "tenants/{tenant_id}/{TIMELINES_SEGMENT_NAME}/{timeline_id}/{0}{1}",
        layer_file_name.file_name(),
        generation.get_suffix()
    );
@@ -1629,22 +1528,13 @@ pub fn remote_layer_path(
    RemotePath::from_string(&path).expect("Failed to construct path")
 }

-pub fn remote_initdb_archive_path(tenant_id: &TenantId, timeline_id: &TimelineId) -> RemotePath {
-    RemotePath::from_string(&format!(
-        "tenants/{tenant_id}/{TIMELINES_SEGMENT_NAME}/{timeline_id}/{INITDB_PATH}"
-    ))
-    .expect("Failed to construct path")
-}
-
 pub fn remote_index_path(
    tenant_id: &TenantId,
    timeline_id: &TimelineId,
-    shard: ShardIndex,
    generation: Generation,
 ) -> RemotePath {
    RemotePath::from_string(&format!(
-        "tenants/{tenant_id}{0}/{TIMELINES_SEGMENT_NAME}/{timeline_id}/{1}{2}",
-        shard.get_suffix(),
+        "tenants/{tenant_id}/{TIMELINES_SEGMENT_NAME}/{timeline_id}/{0}{1}",
        IndexPart::FILE_NAME,
        generation.get_suffix()
    ))
@@ -1869,7 +1759,6 @@ mod tests {
        println!("remote_timeline_dir: {remote_timeline_dir}");

        let generation = harness.generation;
-        let shard = harness.shard;

        // Create a couple of dummy files,  schedule upload for them

@@ -1886,7 +1775,7 @@ mod tests {
                harness.conf,
                &timeline,
                name,
-                LayerFileMetadata::new(contents.len() as u64, generation, shard),
+                LayerFileMetadata::new(contents.len() as u64, generation),
            )
        }).collect::<Vec<_>>();

@@ -2035,7 +1924,7 @@ mod tests {
            harness.conf,
            &timeline,
            layer_file_name_1.clone(),
-            LayerFileMetadata::new(content_1.len() as u64, harness.generation, harness.shard),
+            LayerFileMetadata::new(content_1.len() as u64, harness.generation),
        );

        #[derive(Debug, PartialEq, Clone, Copy)]
@@ -2100,11 +1989,7 @@ mod tests {
        assert_eq!(actual_c, expected_c);
    }

-    async fn inject_index_part(
-        test_state: &TestSetup,
-        generation: Generation,
-        shard: ShardIndex,
-    ) -> IndexPart {
+    async fn inject_index_part(test_state: &TestSetup, generation: Generation) -> IndexPart {
        // An empty IndexPart, just sufficient to ensure deserialization will succeed
        let example_metadata = TimelineMetadata::example();
        let example_index_part = IndexPart::new(
@@ -2125,13 +2010,7 @@ mod tests {
        std::fs::create_dir_all(remote_timeline_dir).expect("creating test dir should work");

        let index_path = test_state.harness.remote_fs_dir.join(
-            remote_index_path(
-                &test_state.harness.tenant_id,
-                &TIMELINE_ID,
-                shard,
-                generation,
-            )
-            .get_path(),
+            remote_index_path(&test_state.harness.tenant_id, &TIMELINE_ID, generation).get_path(),
        );
        eprintln!("Writing {index_path}");
        std::fs::write(&index_path, index_part_bytes).unwrap();
@@ -2168,12 +2047,7 @@ mod tests {

        // Simple case: we are in generation N, load the index from generation N - 1
        let generation_n = 5;
-        let injected = inject_index_part(
-            &test_state,
-            Generation::new(generation_n - 1),
-            ShardIndex::unsharded(),
-        )
-        .await;
+        let injected = inject_index_part(&test_state, Generation::new(generation_n - 1)).await;

        assert_got_index_part(&test_state, Generation::new(generation_n), &injected).await;

@@ -2191,34 +2065,22 @@ mod tests {

        // A generation-less IndexPart exists in the bucket, we should find it
        let generation_n = 5;
-        let injected_none =
-            inject_index_part(&test_state, Generation::none(), ShardIndex::unsharded()).await;
+        let injected_none = inject_index_part(&test_state, Generation::none()).await;
        assert_got_index_part(&test_state, Generation::new(generation_n), &injected_none).await;

        // If a more recent-than-none generation exists, we should prefer to load that
-        let injected_1 =
-            inject_index_part(&test_state, Generation::new(1), ShardIndex::unsharded()).await;
+        let injected_1 = inject_index_part(&test_state, Generation::new(1)).await;
        assert_got_index_part(&test_state, Generation::new(generation_n), &injected_1).await;

        // If a more-recent-than-me generation exists, we should ignore it.
-        let _injected_10 =
-            inject_index_part(&test_state, Generation::new(10), ShardIndex::unsharded()).await;
+        let _injected_10 = inject_index_part(&test_state, Generation::new(10)).await;
        assert_got_index_part(&test_state, Generation::new(generation_n), &injected_1).await;

        // If a directly previous generation exists, _and_ an index exists in my own
        // generation, I should prefer my own generation.
-        let _injected_prev = inject_index_part(
-            &test_state,
-            Generation::new(generation_n - 1),
-            ShardIndex::unsharded(),
-        )
-        .await;
-        let injected_current = inject_index_part(
-            &test_state,
-            Generation::new(generation_n),
-            ShardIndex::unsharded(),
-        )
-        .await;
+        let _injected_prev =
+            inject_index_part(&test_state, Generation::new(generation_n - 1)).await;
+        let injected_current = inject_index_part(&test_state, Generation::new(generation_n)).await;
        assert_got_index_part(
            &test_state,
            Generation::new(generation_n),
--- a/pageserver/src/tenant/remote_timeline_client/download.rs
+++ b/pageserver/src/tenant/remote_timeline_client/download.rs
@@ -9,7 +9,6 @@ use std::time::Duration;

 use anyhow::{anyhow, Context};
 use camino::Utf8Path;
-use pageserver_api::shard::ShardIndex;
 use tokio::fs;
 use tokio::io::AsyncWriteExt;
 use tokio_util::sync::CancellationToken;
@@ -54,7 +53,6 @@ pub async fn download_layer_file<'a>(
    let remote_path = remote_layer_path(
        &tenant_id,
        &timeline_id,
-        layer_metadata.shard,
        layer_file_name,
        layer_metadata.generation,
    );
@@ -215,11 +213,10 @@ async fn do_download_index_part(
    storage: &GenericRemoteStorage,
    tenant_id: &TenantId,
    timeline_id: &TimelineId,
-    shard: ShardIndex,
    index_generation: Generation,
    cancel: CancellationToken,
 ) -> Result<IndexPart, DownloadError> {
-    let remote_path = remote_index_path(tenant_id, timeline_id, shard, index_generation);
+    let remote_path = remote_index_path(tenant_id, timeline_id, index_generation);

    let index_part_bytes = download_retry_forever(
        || async {
@@ -257,7 +254,6 @@ pub(super) async fn download_index_part(
    storage: &GenericRemoteStorage,
    tenant_id: &TenantId,
    timeline_id: &TimelineId,
-    shard: ShardIndex,
    my_generation: Generation,
    cancel: CancellationToken,
 ) -> Result<IndexPart, DownloadError> {
@@ -265,15 +261,8 @@ pub(super) async fn download_index_part(

    if my_generation.is_none() {
        // Operating without generations: just fetch the generation-less path
-        return do_download_index_part(
-            storage,
-            tenant_id,
-            timeline_id,
-            shard,
-            my_generation,
-            cancel,
-        )
-        .await;
+        return do_download_index_part(storage, tenant_id, timeline_id, my_generation, cancel)
+            .await;
    }

    // Stale case: If we were intentionally attached in a stale generation, there may already be a remote
@@ -284,7 +273,6 @@ pub(super) async fn download_index_part(
        storage,
        tenant_id,
        timeline_id,
-        shard,
        my_generation,
        cancel.clone(),
    )
@@ -312,7 +300,6 @@ pub(super) async fn download_index_part(
        storage,
        tenant_id,
        timeline_id,
-        shard,
        my_generation.previous(),
        cancel.clone(),
    )
@@ -333,9 +320,8 @@ pub(super) async fn download_index_part(
    }

    // General case/fallback: if there is no index at my_generation or prev_generation, then list all index_part.json
-    // objects, and select the highest one with a generation <= my_generation.  Constructing the prefix is equivalent
-    // to constructing a full index path with no generation, because the generation is a suffix.
-    let index_prefix = remote_index_path(tenant_id, timeline_id, shard, Generation::none());
+    // objects, and select the highest one with a generation <= my_generation.
+    let index_prefix = remote_index_path(tenant_id, timeline_id, Generation::none());
    let indices = backoff::retry(
        || async { storage.list_files(Some(&index_prefix)).await },
        |_| false,
@@ -361,21 +347,14 @@ pub(super) async fn download_index_part(
    match max_previous_generation {
        Some(g) => {
            tracing::debug!("Found index_part in generation {g:?}");
-            do_download_index_part(storage, tenant_id, timeline_id, shard, g, cancel).await
+            do_download_index_part(storage, tenant_id, timeline_id, g, cancel).await
        }
        None => {
            // Migration from legacy pre-generation state: we have a generation but no prior
            // attached pageservers did.  Try to load from a no-generation path.
            tracing::info!("No index_part.json* found");
-            do_download_index_part(
-                storage,
-                tenant_id,
-                timeline_id,
-                shard,
-                Generation::none(),
-                cancel,
-            )
-            .await
+            do_download_index_part(storage, tenant_id, timeline_id, Generation::none(), cancel)
+                .await
        }
    }
 }
--- a/pageserver/src/tenant/remote_timeline_client/index.rs
+++ b/pageserver/src/tenant/remote_timeline_client/index.rs
@@ -12,7 +12,6 @@ use crate::tenant::metadata::TimelineMetadata;
 use crate::tenant::storage_layer::LayerFileName;
 use crate::tenant::upload_queue::UploadQueueInitialized;
 use crate::tenant::Generation;
-use pageserver_api::shard::ShardIndex;

 use utils::lsn::Lsn;

@@ -26,8 +25,6 @@ pub struct LayerFileMetadata {
    file_size: u64,

    pub(crate) generation: Generation,
-
-    pub(crate) shard: ShardIndex,
 }

 impl From<&'_ IndexLayerMetadata> for LayerFileMetadata {
@@ -35,17 +32,15 @@ impl From<&'_ IndexLayerMetadata> for LayerFileMetadata {
        LayerFileMetadata {
            file_size: other.file_size,
            generation: other.generation,
-            shard: other.shard,
        }
    }
 }

 impl LayerFileMetadata {
-    pub fn new(file_size: u64, generation: Generation, shard: ShardIndex) -> Self {
+    pub fn new(file_size: u64, generation: Generation) -> Self {
        LayerFileMetadata {
            file_size,
            generation,
-            shard,
        }
    }

@@ -133,14 +128,6 @@ impl IndexPart {
    pub fn get_disk_consistent_lsn(&self) -> Lsn {
        self.disk_consistent_lsn
    }
-
-    pub fn from_s3_bytes(bytes: &[u8]) -> Result<Self, serde_json::Error> {
-        serde_json::from_slice::<IndexPart>(bytes)
-    }
-
-    pub fn to_s3_bytes(&self) -> serde_json::Result<Vec<u8>> {
-        serde_json::to_vec(self)
-    }
 }

 impl TryFrom<&UploadQueueInitialized> for IndexPart {
@@ -166,10 +153,6 @@ pub struct IndexLayerMetadata {
    #[serde(default = "Generation::none")]
    #[serde(skip_serializing_if = "Generation::is_none")]
    pub generation: Generation,
-
-    #[serde(default = "ShardIndex::unsharded")]
-    #[serde(skip_serializing_if = "ShardIndex::is_unsharded")]
-    pub shard: ShardIndex,
 }

 impl From<LayerFileMetadata> for IndexLayerMetadata {
@@ -177,7 +160,6 @@ impl From<LayerFileMetadata> for IndexLayerMetadata {
        IndexLayerMetadata {
            file_size: other.file_size,
            generation: other.generation,
-            shard: other.shard,
        }
    }
 }
@@ -205,15 +187,13 @@ mod tests {
            layer_metadata: HashMap::from([
                ("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), IndexLayerMetadata {
                    file_size: 25600000,
-                    generation: Generation::none(),
-                    shard: ShardIndex::unsharded()
+                    generation: Generation::none()
                }),
                ("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(), IndexLayerMetadata {
                    // serde_json should always parse this but this might be a double with jq for
                    // example.
                    file_size: 9007199254741001,
-                    generation: Generation::none(),
-                    shard: ShardIndex::unsharded()
+                    generation: Generation::none()
                })
            ]),
            disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
@@ -221,7 +201,7 @@ mod tests {
            deleted_at: None,
        };

-        let part = IndexPart::from_s3_bytes(example.as_bytes()).unwrap();
+        let part = serde_json::from_str::<IndexPart>(example).unwrap();
        assert_eq!(part, expected);
    }

@@ -245,15 +225,13 @@ mod tests {
            layer_metadata: HashMap::from([
                ("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), IndexLayerMetadata {
                    file_size: 25600000,
-                    generation: Generation::none(),
-                    shard: ShardIndex::unsharded()
+                    generation: Generation::none()
                }),
                ("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(), IndexLayerMetadata {
                    // serde_json should always parse this but this might be a double with jq for
                    // example.
                    file_size: 9007199254741001,
-                    generation: Generation::none(),
-                    shard: ShardIndex::unsharded()
+                    generation: Generation::none()
                })
            ]),
            disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
@@ -261,7 +239,7 @@ mod tests {
            deleted_at: None,
        };

-        let part = IndexPart::from_s3_bytes(example.as_bytes()).unwrap();
+        let part = serde_json::from_str::<IndexPart>(example).unwrap();
        assert_eq!(part, expected);
    }

@@ -286,15 +264,13 @@ mod tests {
            layer_metadata: HashMap::from([
                ("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), IndexLayerMetadata {
                    file_size: 25600000,
-                    generation: Generation::none(),
-                    shard: ShardIndex::unsharded()
+                    generation: Generation::none()
                }),
                ("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(), IndexLayerMetadata {
                    // serde_json should always parse this but this might be a double with jq for
                    // example.
                    file_size: 9007199254741001,
-                    generation: Generation::none(),
-                    shard: ShardIndex::unsharded()
+                    generation: Generation::none()
                })
            ]),
            disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
@@ -303,7 +279,7 @@ mod tests {
                "2023-07-31T09:00:00.123000000", "%Y-%m-%dT%H:%M:%S.%f").unwrap())
        };

-        let part = IndexPart::from_s3_bytes(example.as_bytes()).unwrap();
+        let part = serde_json::from_str::<IndexPart>(example).unwrap();
        assert_eq!(part, expected);
    }

@@ -347,7 +323,7 @@ mod tests {
            deleted_at: None,
        };

-        let empty_layers_parsed = IndexPart::from_s3_bytes(empty_layers_json.as_bytes()).unwrap();
+        let empty_layers_parsed = serde_json::from_str::<IndexPart>(empty_layers_json).unwrap();

        assert_eq!(empty_layers_parsed, expected);
    }
@@ -370,24 +346,22 @@ mod tests {
            layer_metadata: HashMap::from([
                ("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), IndexLayerMetadata {
                    file_size: 25600000,
-                    generation: Generation::none(),
-                    shard: ShardIndex::unsharded()
+                    generation: Generation::none()
                }),
                ("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(), IndexLayerMetadata {
                    // serde_json should always parse this but this might be a double with jq for
                    // example.
                    file_size: 9007199254741001,
-                    generation: Generation::none(),
-                    shard: ShardIndex::unsharded()
+                    generation: Generation::none()
                })
            ]),
            disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
            metadata: TimelineMetadata::from_bytes(&[113,11,159,210,0,54,0,4,0,0,0,0,1,105,96,232,1,0,0,0,0,1,105,96,112,0,0,0,0,0,0,0,0,0,0,0,0,0,1,105,96,112,0,0,0,0,1,105,96,112,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]).unwrap(),
            deleted_at: Some(chrono::NaiveDateTime::parse_from_str(
-                "2023-07-31T09:00:00.123000000", "%Y-%m-%dT%H:%M:%S.%f").unwrap()),
+                "2023-07-31T09:00:00.123000000", "%Y-%m-%dT%H:%M:%S.%f").unwrap())
        };

-        let part = IndexPart::from_s3_bytes(example.as_bytes()).unwrap();
+        let part = serde_json::from_str::<IndexPart>(example).unwrap();
        assert_eq!(part, expected);
    }
 }
--- a/pageserver/src/tenant/remote_timeline_client/upload.rs
+++ b/pageserver/src/tenant/remote_timeline_client/upload.rs
@@ -1,19 +1,15 @@
 //! Helper functions to upload files to remote storage with a RemoteStorage

 use anyhow::{bail, Context};
-use bytes::Bytes;
 use camino::Utf8Path;
 use fail::fail_point;
-use pageserver_api::shard::ShardIndex;
 use std::io::ErrorKind;
 use tokio::fs;

 use super::Generation;
 use crate::{
    config::PageServerConf,
-    tenant::remote_timeline_client::{
-        index::IndexPart, remote_index_path, remote_initdb_archive_path, remote_path,
-    },
+    tenant::remote_timeline_client::{index::IndexPart, remote_index_path, remote_path},
 };
 use remote_storage::GenericRemoteStorage;
 use utils::id::{TenantId, TimelineId};
@@ -27,7 +23,6 @@ pub(super) async fn upload_index_part<'a>(
    storage: &'a GenericRemoteStorage,
    tenant_id: &TenantId,
    timeline_id: &TimelineId,
-    shard: ShardIndex,
    generation: Generation,
    index_part: &'a IndexPart,
 ) -> anyhow::Result<()> {
@@ -38,13 +33,12 @@ pub(super) async fn upload_index_part<'a>(
    });
    pausable_failpoint!("before-upload-index-pausable");

-    let index_part_bytes = index_part
-        .to_s3_bytes()
-        .context("serialize index part file into bytes")?;
+    let index_part_bytes =
+        serde_json::to_vec(&index_part).context("serialize index part file into bytes")?;
    let index_part_size = index_part_bytes.len();
    let index_part_bytes = tokio::io::BufReader::new(std::io::Cursor::new(index_part_bytes));

-    let remote_path = remote_index_path(tenant_id, timeline_id, shard, generation);
+    let remote_path = remote_index_path(tenant_id, timeline_id, generation);
    storage
        .upload_storage_object(Box::new(index_part_bytes), index_part_size, &remote_path)
        .await
@@ -109,22 +103,3 @@ pub(super) async fn upload_timeline_layer<'a>(

    Ok(())
 }
-
-/// Uploads the given `initdb` data to the remote storage.
-pub(crate) async fn upload_initdb_dir(
-    storage: &GenericRemoteStorage,
-    tenant_id: &TenantId,
-    timeline_id: &TimelineId,
-    initdb_dir: Bytes,
-) -> anyhow::Result<()> {
-    tracing::trace!("uploading initdb dir");
-
-    let size = initdb_dir.len();
-    let bytes = tokio::io::BufReader::new(std::io::Cursor::new(initdb_dir));
-
-    let remote_path = remote_initdb_archive_path(tenant_id, timeline_id);
-    storage
-        .upload_storage_object(bytes, size, &remote_path)
-        .await
-        .with_context(|| format!("upload initdb dir for '{tenant_id} / {timeline_id}'"))
-}
--- a/pageserver/src/tenant/size.rs
+++ b/pageserver/src/tenant/size.rs
@@ -6,7 +6,6 @@ use std::sync::Arc;
 use anyhow::{bail, Context};
 use tokio::sync::oneshot::error::RecvError;
 use tokio::sync::Semaphore;
-use tokio_util::sync::CancellationToken;

 use crate::context::RequestContext;
 use crate::pgdatadir_mapping::CalculateLogicalSizeError;
@@ -114,12 +113,11 @@ pub(super) async fn gather_inputs(
    max_retention_period: Option<u64>,
    logical_size_cache: &mut HashMap<(TimelineId, Lsn), u64>,
    cause: LogicalSizeCalculationCause,
-    cancel: &CancellationToken,
    ctx: &RequestContext,
 ) -> anyhow::Result<ModelInputs> {
    // refresh is needed to update gc related pitr_cutoff and horizon_cutoff
    tenant
-        .refresh_gc_info(cancel, ctx)
+        .refresh_gc_info(ctx)
        .await
        .context("Failed to refresh gc_info before gathering inputs")?;

--- a/pageserver/src/tenant/storage_layer.rs
+++ b/pageserver/src/tenant/storage_layer.rs
@@ -2,7 +2,7 @@

 pub mod delta_layer;
 mod filename;
-mod image_layer;
+pub mod image_layer;
 mod inmemory_layer;
 mod layer;
 mod layer_desc;
--- a/pageserver/src/tenant/storage_layer/delta_layer.rs
+++ b/pageserver/src/tenant/storage_layer/delta_layer.rs
@@ -69,13 +69,13 @@ use super::{AsLayerDesc, LayerAccessStats, PersistentLayerDesc, ResidentLayer};
 #[derive(Debug, Serialize, Deserialize, PartialEq, Eq)]
 pub struct Summary {
    /// Magic value to identify this as a neon delta file. Always DELTA_FILE_MAGIC.
-    magic: u16,
-    format_version: u16,
+    pub magic: u16,
+    pub format_version: u16,

-    tenant_id: TenantId,
-    timeline_id: TimelineId,
-    key_range: Range<Key>,
-    lsn_range: Range<Lsn>,
+    pub tenant_id: TenantId,
+    pub timeline_id: TimelineId,
+    pub key_range: Range<Key>,
+    pub lsn_range: Range<Lsn>,

    /// Block number where the 'index' part of the file begins.
    pub index_start_blk: u32,
@@ -611,6 +611,61 @@ impl Drop for DeltaLayerWriter {
    }
 }

+#[derive(thiserror::Error, Debug)]
+pub enum RewriteSummaryError {
+    #[error("magic mismatch")]
+    MagicMismatch,
+    #[error(transparent)]
+    Other(#[from] anyhow::Error),
+}
+
+impl From<std::io::Error> for RewriteSummaryError {
+    fn from(e: std::io::Error) -> Self {
+        Self::Other(anyhow::anyhow!(e))
+    }
+}
+
+impl DeltaLayer {
+    pub async fn rewrite_summary<F>(
+        path: &Utf8Path,
+        rewrite: F,
+        ctx: &RequestContext,
+    ) -> Result<(), RewriteSummaryError>
+    where
+        F: Fn(Summary) -> Summary,
+    {
+        let file = VirtualFile::open_with_options(
+            path,
+            &*std::fs::OpenOptions::new().read(true).write(true),
+        )
+        .await
+        .with_context(|| format!("Failed to open file '{}'", path))?;
+        let file = FileBlockReader::new(file);
+        let summary_blk = file.read_blk(0, ctx).await?;
+        let actual_summary = Summary::des_prefix(summary_blk.as_ref()).context("deserialize")?;
+        let mut file = file.file;
+        if actual_summary.magic != DELTA_FILE_MAGIC {
+            return Err(RewriteSummaryError::MagicMismatch);
+        }
+
+        let new_summary = rewrite(actual_summary);
+
+        let mut buf = smallvec::SmallVec::<[u8; PAGE_SZ]>::new();
+        Summary::ser_into(&new_summary, &mut buf).context("serialize")?;
+        if buf.spilled() {
+            // The code in DeltaLayerWriterInner just warn!()s for this.
+            // It should probably error out as well.
+            return Err(RewriteSummaryError::Other(anyhow::anyhow!(
+                "Used more than one page size for summary buffer: {}",
+                buf.len()
+            )));
+        }
+        file.seek(SeekFrom::Start(0)).await?;
+        file.write_all(&buf).await?;
+        Ok(())
+    }
+}
+
 impl DeltaLayerInner {
    /// Returns nested result following Result<Result<_, OpErr>, Critical>:
    /// - inner has the success or transient failure
--- a/pageserver/src/tenant/storage_layer/image_layer.rs
+++ b/pageserver/src/tenant/storage_layer/image_layer.rs
@@ -67,20 +67,20 @@ use super::{AsLayerDesc, Layer, PersistentLayerDesc, ResidentLayer};
 /// the 'index' starts at the block indicated by 'index_start_blk'
 ///
 #[derive(Debug, Serialize, Deserialize, PartialEq, Eq)]
-pub(super) struct Summary {
+pub struct Summary {
    /// Magic value to identify this as a neon image file. Always IMAGE_FILE_MAGIC.
-    magic: u16,
-    format_version: u16,
+    pub magic: u16,
+    pub format_version: u16,

-    tenant_id: TenantId,
-    timeline_id: TimelineId,
-    key_range: Range<Key>,
-    lsn: Lsn,
+    pub tenant_id: TenantId,
+    pub timeline_id: TimelineId,
+    pub key_range: Range<Key>,
+    pub lsn: Lsn,

    /// Block number where the 'index' part of the file begins.
-    index_start_blk: u32,
+    pub index_start_blk: u32,
    /// Block within the 'index', where the B-tree root page is stored
-    index_root_blk: u32,
+    pub index_root_blk: u32,
    // the 'values' part starts after the summary header, on block 1.
 }

@@ -296,6 +296,61 @@ impl ImageLayer {
    }
 }

+#[derive(thiserror::Error, Debug)]
+pub enum RewriteSummaryError {
+    #[error("magic mismatch")]
+    MagicMismatch,
+    #[error(transparent)]
+    Other(#[from] anyhow::Error),
+}
+
+impl From<std::io::Error> for RewriteSummaryError {
+    fn from(e: std::io::Error) -> Self {
+        Self::Other(anyhow::anyhow!(e))
+    }
+}
+
+impl ImageLayer {
+    pub async fn rewrite_summary<F>(
+        path: &Utf8Path,
+        rewrite: F,
+        ctx: &RequestContext,
+    ) -> Result<(), RewriteSummaryError>
+    where
+        F: Fn(Summary) -> Summary,
+    {
+        let file = VirtualFile::open_with_options(
+            path,
+            &*std::fs::OpenOptions::new().read(true).write(true),
+        )
+        .await
+        .with_context(|| format!("Failed to open file '{}'", path))?;
+        let file = FileBlockReader::new(file);
+        let summary_blk = file.read_blk(0, ctx).await?;
+        let actual_summary = Summary::des_prefix(summary_blk.as_ref()).context("deserialize")?;
+        let mut file = file.file;
+        if actual_summary.magic != IMAGE_FILE_MAGIC {
+            return Err(RewriteSummaryError::MagicMismatch);
+        }
+
+        let new_summary = rewrite(actual_summary);
+
+        let mut buf = smallvec::SmallVec::<[u8; PAGE_SZ]>::new();
+        Summary::ser_into(&new_summary, &mut buf).context("serialize")?;
+        if buf.spilled() {
+            // The code in ImageLayerWriterInner just warn!()s for this.
+            // It should probably error out as well.
+            return Err(RewriteSummaryError::Other(anyhow::anyhow!(
+                "Used more than one page size for summary buffer: {}",
+                buf.len()
+            )));
+        }
+        file.seek(SeekFrom::Start(0)).await?;
+        file.write_all(&buf).await?;
+        Ok(())
+    }
+}
+
 impl ImageLayerInner {
    /// Returns nested result following Result<Result<_, OpErr>, Critical>:
    /// - inner has the success or transient failure
--- a/pageserver/src/tenant/storage_layer/layer.rs
+++ b/pageserver/src/tenant/storage_layer/layer.rs
@@ -3,7 +3,7 @@ use camino::{Utf8Path, Utf8PathBuf};
 use pageserver_api::models::{
    HistoricLayerInfo, LayerAccessKind, LayerResidenceEventReason, LayerResidenceStatus,
 };
-use pageserver_api::shard::ShardIndex;
+use remote_storage::RemotePath;
 use std::ops::Range;
 use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
 use std::sync::{Arc, Weak};
@@ -97,7 +97,6 @@ impl Layer {
            desc,
            None,
            metadata.generation,
-            metadata.shard,
        )));

        debug_assert!(owner.0.needs_download_blocking().unwrap().is_some());
@@ -138,7 +137,6 @@ impl Layer {
                desc,
                Some(inner),
                metadata.generation,
-                metadata.shard,
            )
        }));

@@ -182,7 +180,6 @@ impl Layer {
                desc,
                Some(inner),
                timeline.generation,
-                timeline.get_shard_index(),
            )
        }));

@@ -309,6 +306,12 @@ impl Layer {
        &self.0.path
    }

+    /// This can return None even though it should return Some in some edge cases.
+    #[allow(unused)]
+    pub(crate) fn remote_path(&self) -> Option<RemotePath> {
+        self.0.remote_path()
+    }
+
    pub(crate) fn metadata(&self) -> LayerFileMetadata {
        self.0.metadata()
    }
@@ -326,24 +329,6 @@ impl Layer {

        Ok(())
    }
-
-    /// Waits until this layer has been dropped (and if needed, local garbage collection and remote
-    /// deletion scheduling has completed).
-    ///
-    /// Does not start garbage collection, use [`Self::garbage_collect_on_drop`] for that
-    /// separatedly.
-    #[cfg(feature = "testing")]
-    pub(crate) fn wait_drop(&self) -> impl std::future::Future<Output = ()> + 'static {
-        let mut rx = self.0.status.subscribe();
-
-        async move {
-            loop {
-                if let Err(tokio::sync::broadcast::error::RecvError::Closed) = rx.recv().await {
-                    break;
-                }
-            }
-        }
-    }
 }

 /// The download-ness ([`DownloadedLayer`]) can be either resident or wanted evicted.
@@ -448,15 +433,6 @@ struct LayerInner {
    /// For loaded layers (resident or evicted) this comes from [`LayerFileMetadata::generation`],
    /// for created layers from [`Timeline::generation`].
    generation: Generation,
-
-    /// The shard of this Layer.
-    ///
-    /// For layers created in this process, this will always be the [`ShardIndex`] of the
-    /// current `ShardIdentity`` (TODO: add link once it's introduced).
-    ///
-    /// For loaded layers, this may be some other value if the tenant has undergone
-    /// a shard split since the layer was originally written.
-    shard: ShardIndex,
 }

 impl std::fmt::Display for LayerInner {
@@ -490,17 +466,13 @@ impl Drop for LayerInner {

        let path = std::mem::take(&mut self.path);
        let file_name = self.layer_desc().filename();
+        let gen = self.generation;
        let file_size = self.layer_desc().file_size;
        let timeline = self.timeline.clone();
-        let meta = self.metadata();
-        let status = self.status.clone();

        crate::task_mgr::BACKGROUND_RUNTIME.spawn_blocking(move || {
            let _g = span.entered();

-            // carry this until we are finished for [`Layer::wait_drop`] support
-            let _status = status;
-
            let removed = match std::fs::remove_file(path) {
                Ok(()) => true,
                Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
@@ -524,7 +496,7 @@ impl Drop for LayerInner {
                    timeline.metrics.resident_physical_size_sub(file_size);
                }
                if let Some(remote_client) = timeline.remote_client.as_ref() {
-                    let res = remote_client.schedule_deletion_of_unlinked(vec![(file_name, meta)]);
+                    let res = remote_client.schedule_deletion_of_unlinked(vec![(file_name, gen)]);

                    if let Err(e) = res {
                        // test_timeline_deletion_with_files_stuck_in_upload_queue is good at
@@ -558,7 +530,6 @@ impl LayerInner {
        desc: PersistentLayerDesc,
        downloaded: Option<Arc<DownloadedLayer>>,
        generation: Generation,
-        shard: ShardIndex,
    ) -> Self {
        let path = conf
            .timeline_path(&timeline.tenant_id, &timeline.timeline_id)
@@ -586,7 +557,6 @@ impl LayerInner {
            status: tokio::sync::broadcast::channel(1).0,
            consecutive_failures: AtomicUsize::new(0),
            generation,
-            shard,
        }
    }

@@ -955,6 +925,17 @@ impl LayerInner {
        }
    }

+    /// This can return None even though it should return Some in some edge cases.
+    fn remote_path(&self) -> Option<RemotePath> {
+        let tl = self.timeline.upgrade()?; // TODO: should distinguish this case, but, accuracy doesn't matter for this field.
+        Some(crate::tenant::remote_timeline_client::remote_layer_path(
+            &tl.tenant_id,
+            &tl.timeline_id,
+            &self.desc.filename(),
+            self.generation,
+        ))
+    }
+
    fn info(&self, reset: LayerAccessStatsReset) -> HistoricLayerInfo {
        let layer_file_name = self.desc.filename().file_name();

@@ -974,6 +955,7 @@ impl LayerInner {
                lsn_end: lsn_range.end,
                remote,
                access_stats,
+                remote_path: self.remote_path().map(|p| p.into()),
            }
        } else {
            let lsn = self.desc.image_layer_lsn();
@@ -984,6 +966,7 @@ impl LayerInner {
                lsn_start: lsn,
                remote,
                access_stats,
+                remote_path: self.remote_path().map(|p| p.into()),
            }
        }
    }
@@ -1114,7 +1097,7 @@ impl LayerInner {
    }

    fn metadata(&self) -> LayerFileMetadata {
-        LayerFileMetadata::new(self.desc.file_size, self.generation, self.shard)
+        LayerFileMetadata::new(self.desc.file_size, self.generation)
    }
 }

@@ -1438,7 +1421,6 @@ impl Default for LayerImplMetrics {
        )
        .unwrap();

-        // reminder: this will be pageserver_layer_gcs_count_total with "_total" suffix
        let gcs = metrics::register_int_counter_vec!(
            "pageserver_layer_gcs_count",
            "Garbage collections started and completed in the Layer implementation",
--- a/pageserver/src/tenant/tasks.rs
+++ b/pageserver/src/tenant/tasks.rs
@@ -261,7 +261,7 @@ async fn gc_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
            } else {
                // Run gc
                let res = tenant
-                    .gc_iteration(None, gc_horizon, tenant.get_pitr_interval(), &cancel, &ctx)
+                    .gc_iteration(None, gc_horizon, tenant.get_pitr_interval(), &ctx)
                    .await;
                if let Err(e) = res {
                    let wait_duration = backoff::exponential_backoff_duration_seconds(
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -10,7 +10,6 @@ mod walreceiver;
 use anyhow::{anyhow, bail, ensure, Context, Result};
 use bytes::Bytes;
 use camino::{Utf8Path, Utf8PathBuf};
-use enumset::EnumSet;
 use fail::fail_point;
 use itertools::Itertools;
 use pageserver_api::models::{
@@ -62,7 +61,6 @@ use crate::pgdatadir_mapping::{is_rel_fsm_block_key, is_rel_vm_block_key};
 use crate::pgdatadir_mapping::{BlockNumber, CalculateLogicalSizeError};
 use crate::tenant::config::{EvictionPolicy, TenantConfOpt};
 use pageserver_api::reltag::RelTag;
-use pageserver_api::shard::ShardIndex;

 use postgres_connection::PgConnectionConfig;
 use postgres_ffi::to_pg_timestamp;
@@ -251,6 +249,14 @@ pub struct Timeline {
    /// to be notified when layer flushing has finished, subscribe to the layer_flush_done channel
    layer_flush_done_tx: tokio::sync::watch::Sender<(u64, Result<(), FlushLayerError>)>,

+    /// Layer removal lock.
+    /// A lock to ensure that no layer of the timeline is removed concurrently by other tasks.
+    /// This lock is acquired in [`Timeline::gc`] and [`Timeline::compact`].
+    /// This is an `Arc<Mutex>` lock because we need an owned
+    /// lock guard in functions that will be spawned to tokio I/O pool (which requires `'static`).
+    /// Note that [`DeleteTimelineFlow`] uses `delete_progress` field.
+    pub(super) layer_removal_cs: Arc<tokio::sync::Mutex<()>>,
+
    // Needed to ensure that we can't create a branch at a point that was already garbage collected
    pub latest_gc_cutoff_lsn: Rcu<Lsn>,

@@ -311,24 +317,6 @@ pub struct Timeline {
    /// Cancellation token scoped to this timeline: anything doing long-running work relating
    /// to the timeline should drop out when this token fires.
    pub(crate) cancel: CancellationToken,
-
-    /// Make sure we only have one running compaction at a time in tests.
-    ///
-    /// Must only be taken in two places:
-    /// - [`Timeline::compact`] (this file)
-    /// - [`delete::delete_local_layer_files`]
-    ///
-    /// Timeline deletion will acquire both compaction and gc locks in whatever order.
-    compaction_lock: tokio::sync::Mutex<()>,
-
-    /// Make sure we only have one running gc at a time.
-    ///
-    /// Must only be taken in two places:
-    /// - [`Timeline::gc`] (this file)
-    /// - [`delete::delete_local_layer_files`]
-    ///
-    /// Timeline deletion will acquire both compaction and gc locks in whatever order.
-    gc_lock: tokio::sync::Mutex<()>,
 }

 pub struct WalReceiverInfo {
@@ -449,11 +437,6 @@ pub enum LogicalSizeCalculationCause {
    TenantSizeHandler,
 }

-#[derive(enumset::EnumSetType)]
-pub(crate) enum CompactFlags {
-    ForceRepartition,
-}
-
 /// Public interface functions
 impl Timeline {
    /// Get the LSN where this branch was created
@@ -711,11 +694,8 @@ impl Timeline {
    pub(crate) async fn compact(
        self: &Arc<Self>,
        cancel: &CancellationToken,
-        flags: EnumSet<CompactFlags>,
        ctx: &RequestContext,
    ) -> Result<(), CompactionError> {
-        let _g = self.compaction_lock.lock().await;
-
        // this wait probably never needs any "long time spent" logging, because we already nag if
        // compaction task goes over it's period (20s) which is quite often in production.
        let _permit = match super::tasks::concurrent_background_tasks_rate_limit(
@@ -770,7 +750,7 @@ impl Timeline {
        // Below are functions compact_level0() and create_image_layers()
        // but they are a bit ad hoc and don't quite work like it's explained
        // above. Rewrite it.
-
+        let layer_removal_cs = Arc::new(self.layer_removal_cs.clone().lock_owned().await);
        // Is the timeline being deleted?
        if self.is_stopping() {
            trace!("Dropping out of compaction on timeline shutdown");
@@ -786,7 +766,6 @@ impl Timeline {
            .repartition(
                self.get_last_record_lsn(),
                self.get_compaction_target_size(),
-                flags,
                ctx,
            )
            .await
@@ -811,7 +790,8 @@ impl Timeline {

                // 3. Compact
                let timer = self.metrics.compact_time_histo.start_timer();
-                self.compact_level0(target_file_size, ctx).await?;
+                self.compact_level0(layer_removal_cs.clone(), target_file_size, ctx)
+                    .await?;
                timer.stop_and_record();

                if let Some(remote_client) = &self.remote_client {
@@ -957,7 +937,7 @@ impl Timeline {
                    // what is problematic is the shutting down of RemoteTimelineClient, because
                    // obviously it does not make sense to stop while we wait for it, but what
                    // about corner cases like s3 suddenly hanging up?
-                    if let Err(e) = client.shutdown().await {
+                    if let Err(e) = client.wait_completion().await {
                        // Non-fatal.  Shutdown is infallible.  Failures to flush just mean that
                        // we have some extra WAL replay to do next time the timeline starts.
                        warn!("failed to flush to remote storage: {e:#}");
@@ -1212,6 +1192,16 @@ impl Timeline {
        remote_client: &Arc<RemoteTimelineClient>,
        layers_to_evict: &[Layer],
    ) -> anyhow::Result<Vec<Option<Result<(), EvictionError>>>> {
+        // ensure that the layers have finished uploading
+        // (don't hold the layer_removal_cs while we do it, we're not removing anything yet)
+        remote_client
+            .wait_completion()
+            .await
+            .context("wait for layer upload ops to complete")?;
+
+        // now lock out layer removal (compaction, gc, timeline deletion)
+        let _layer_removal_guard = self.layer_removal_cs.lock().await;
+
        {
            // to avoid racing with detach and delete_timeline
            let state = self.current_state();
@@ -1422,6 +1412,7 @@ impl Timeline {
                layer_flush_done_tx,

                write_lock: tokio::sync::Mutex::new(()),
+                layer_removal_cs: Default::default(),

                gc_info: std::sync::RwLock::new(GcInfo {
                    retain_lsns: Vec::new(),
@@ -1460,9 +1451,6 @@ impl Timeline {
                initial_logical_size_attempt: Mutex::new(initial_logical_size_attempt),
                cancel,
                gate: Gate::new(format!("Timeline<{tenant_id}/{timeline_id}>")),
-
-                compaction_lock: tokio::sync::Mutex::default(),
-                gc_lock: tokio::sync::Mutex::default(),
            };
            result.repartition_threshold =
                result.get_checkpoint_distance() / REPARTITION_FREQ_IN_CHECKPOINT_DISTANCE;
@@ -1601,7 +1589,6 @@ impl Timeline {

        // Copy to move into the task we're about to spawn
        let generation = self.generation;
-        let shard = self.get_shard_index();
        let this = self.myself.upgrade().expect("&self method holds the arc");

        let (loaded_layers, needs_cleanup, total_physical_size) = tokio::task::spawn_blocking({
@@ -1650,7 +1637,6 @@ impl Timeline {
                    index_part.as_ref(),
                    disk_consistent_lsn,
                    generation,
-                    shard,
                );

                let mut loaded_layers = Vec::new();
@@ -1725,30 +1711,6 @@ impl Timeline {
        if let Some(rtc) = self.remote_client.as_ref() {
            rtc.schedule_layer_file_deletion(&needs_cleanup)?;
            rtc.schedule_index_upload_for_file_changes()?;
-            // This barrier orders above DELETEs before any later operations.
-            // This is critical because code executing after the barrier might
-            // create again objects with the same key that we just scheduled for deletion.
-            // For example, if we just scheduled deletion of an image layer "from the future",
-            // later compaction might run again and re-create the same image layer.
-            // "from the future" here means an image layer whose LSN is > IndexPart::disk_consistent_lsn.
-            // "same" here means same key range and LSN.
-            //
-            // Without a barrier between above DELETEs and the re-creation's PUTs,
-            // the upload queue may execute the PUT first, then the DELETE.
-            // In our example, we will end up with an IndexPart referencing a non-existent object.
-            //
-            // 1. a future image layer is created and uploaded
-            // 2. ps restart
-            // 3. the future layer from (1) is deleted during load layer map
-            // 4. image layer is re-created and uploaded
-            // 5. deletion queue would like to delete (1) but actually deletes (4)
-            // 6. delete by name works as expected, but it now deletes the wrong (later) version
-            //
-            // See https://github.com/neondatabase/neon/issues/5878
-            //
-            // NB: generation numbers naturally protect against this because they disambiguate
-            //     (1) and (4)
-            rtc.schedule_barrier()?;
            // Tenant::create_timeline will wait for these uploads to happen before returning, or
            // on retry.
        }
@@ -2563,12 +2525,7 @@ impl Timeline {
                // Note: The 'ctx' in use here has DownloadBehavior::Error. We should not
                // require downloading anything during initial import.
                let (partitioning, _lsn) = self
-                    .repartition(
-                        self.initdb_lsn,
-                        self.get_compaction_target_size(),
-                        EnumSet::empty(),
-                        ctx,
-                    )
+                    .repartition(self.initdb_lsn, self.get_compaction_target_size(), ctx)
                    .await?;

                if self.cancel.is_cancelled() {
@@ -2606,8 +2563,6 @@ impl Timeline {
                )
            };

-        pausable_failpoint!("flush-layer-cancel-after-writing-layer-out-pausable");
-
        if self.cancel.is_cancelled() {
            return Err(FlushLayerError::Cancelled);
        }
@@ -2789,16 +2744,12 @@ impl Timeline {
        &self,
        lsn: Lsn,
        partition_size: u64,
-        flags: EnumSet<CompactFlags>,
        ctx: &RequestContext,
    ) -> anyhow::Result<(KeyPartitioning, Lsn)> {
        {
            let partitioning_guard = self.partitioning.lock().unwrap();
            let distance = lsn.0 - partitioning_guard.1 .0;
-            if partitioning_guard.1 != Lsn(0)
-                && distance <= self.repartition_threshold
-                && !flags.contains(CompactFlags::ForceRepartition)
-            {
+            if partitioning_guard.1 != Lsn(0) && distance <= self.repartition_threshold {
                debug!(
                    distance,
                    threshold = self.repartition_threshold,
@@ -3153,8 +3104,13 @@ impl TryFrom<CompactLevel0Phase1StatsBuilder> for CompactLevel0Phase1Stats {

 impl Timeline {
    /// Level0 files first phase of compaction, explained in the [`Self::compact`] comment.
+    ///
+    /// This method takes the `_layer_removal_cs` guard to highlight it required downloads are
+    /// returned as an error. If the `layer_removal_cs` boundary is changed not to be taken in the
+    /// start of level0 files compaction, the on-demand download should be revisited as well.
    async fn compact_level0_phase1(
        self: &Arc<Self>,
+        _layer_removal_cs: Arc<tokio::sync::OwnedMutexGuard<()>>,
        guard: tokio::sync::OwnedRwLockReadGuard<LayerManager>,
        mut stats: CompactLevel0Phase1StatsBuilder,
        target_file_size: u64,
@@ -3241,6 +3197,8 @@ impl Timeline {
        let mut prev_lsn_end = first_level0_delta.layer_desc().lsn_range.end;
        let mut deltas_to_compact = Vec::with_capacity(level0_deltas.len());

+        // FIXME: downloading while holding layer_removal_cs is not great, but we will remove that
+        // soon
        deltas_to_compact.push(first_level0_delta.download_and_keep_resident().await?);
        for l in level0_deltas_iter {
            let lsn_range = &l.layer_desc().lsn_range;
@@ -3590,6 +3548,7 @@ impl Timeline {
    ///
    async fn compact_level0(
        self: &Arc<Self>,
+        layer_removal_cs: Arc<tokio::sync::OwnedMutexGuard<()>>,
        target_file_size: u64,
        ctx: &RequestContext,
    ) -> Result<(), CompactionError> {
@@ -3611,9 +3570,16 @@ impl Timeline {
            let now = tokio::time::Instant::now();
            stats.read_lock_acquisition_micros =
                DurationRecorder::Recorded(RecordedDuration(now - begin), now);
-            self.compact_level0_phase1(phase1_layers_locked, stats, target_file_size, &ctx)
-                .instrument(phase1_span)
-                .await?
+            let layer_removal_cs = layer_removal_cs.clone();
+            self.compact_level0_phase1(
+                layer_removal_cs,
+                phase1_layers_locked,
+                stats,
+                target_file_size,
+                &ctx,
+            )
+            .instrument(phase1_span)
+            .await?
        };

        if new_layers.is_empty() && deltas_to_compact.is_empty() {
@@ -3621,6 +3587,17 @@ impl Timeline {
            return Ok(());
        }

+        // Before deleting any layers, we need to wait for their upload ops to finish.
+        // See remote_timeline_client module level comment on consistency.
+        // Do it here because we don't want to hold self.layers.write() while waiting.
+        if let Some(remote_client) = &self.remote_client {
+            debug!("waiting for upload ops to complete");
+            remote_client
+                .wait_completion()
+                .await
+                .context("wait for layer upload ops to complete")?;
+        }
+
        let mut guard = self.layers.write().await;

        let mut duplicated_layers = HashSet::new();
@@ -3652,7 +3629,12 @@ impl Timeline {
        };

        // deletion will happen later, the layer file manager calls garbage_collect_on_drop
-        guard.finish_compact_l0(&remove_layers, &insert_layers, &self.metrics);
+        guard.finish_compact_l0(
+            &layer_removal_cs,
+            &remove_layers,
+            &insert_layers,
+            &self.metrics,
+        );

        if let Some(remote_client) = self.remote_client.as_ref() {
            remote_client.schedule_compaction_update(&remove_layers, &new_layers)?;
@@ -3703,7 +3685,6 @@ impl Timeline {
        retain_lsns: Vec<Lsn>,
        cutoff_horizon: Lsn,
        pitr: Duration,
-        cancel: &CancellationToken,
        ctx: &RequestContext,
    ) -> anyhow::Result<()> {
        // First, calculate pitr_cutoff_timestamp and then convert it to LSN.
@@ -3717,10 +3698,7 @@ impl Timeline {
            if let Some(pitr_cutoff_timestamp) = now.checked_sub(pitr) {
                let pitr_timestamp = to_pg_timestamp(pitr_cutoff_timestamp);

-                match self
-                    .find_lsn_for_timestamp(pitr_timestamp, cancel, ctx)
-                    .await?
-                {
+                match self.find_lsn_for_timestamp(pitr_timestamp, ctx).await? {
                    LsnForTimestamp::Present(lsn) => lsn,
                    LsnForTimestamp::Future(lsn) => {
                        // The timestamp is in the future. That sounds impossible,
@@ -3763,17 +3741,19 @@ impl Timeline {
        Ok(())
    }

+    ///
    /// Garbage collect layer files on a timeline that are no longer needed.
    ///
    /// Currently, we don't make any attempt at removing unneeded page versions
    /// within a layer file. We can only remove the whole file if it's fully
    /// obsolete.
+    ///
    pub(super) async fn gc(&self) -> anyhow::Result<GcResult> {
-        let _g = self.gc_lock.lock().await;
        let timer = self.metrics.garbage_collect_histo.start_timer();

        fail_point!("before-timeline-gc");

+        let layer_removal_cs = Arc::new(self.layer_removal_cs.clone().lock_owned().await);
        // Is the timeline being deleted?
        if self.is_stopping() {
            anyhow::bail!("timeline is Stopping");
@@ -3791,7 +3771,13 @@ impl Timeline {
        let new_gc_cutoff = Lsn::min(horizon_cutoff, pitr_cutoff);

        let res = self
-            .gc_timeline(horizon_cutoff, pitr_cutoff, retain_lsns, new_gc_cutoff)
+            .gc_timeline(
+                layer_removal_cs.clone(),
+                horizon_cutoff,
+                pitr_cutoff,
+                retain_lsns,
+                new_gc_cutoff,
+            )
            .instrument(
                info_span!("gc_timeline", timeline_id = %self.timeline_id, cutoff = %new_gc_cutoff),
            )
@@ -3805,6 +3791,7 @@ impl Timeline {

    async fn gc_timeline(
        &self,
+        layer_removal_cs: Arc<tokio::sync::OwnedMutexGuard<()>>,
        horizon_cutoff: Lsn,
        pitr_cutoff: Lsn,
        retain_lsns: Vec<Lsn>,
@@ -3842,6 +3829,17 @@ impl Timeline {

        debug!("retain_lsns: {:?}", retain_lsns);

+        // Before deleting any layers, we need to wait for their upload ops to finish.
+        // See storage_sync module level comment on consistency.
+        // Do it here because we don't want to hold self.layers.write() while waiting.
+        if let Some(remote_client) = &self.remote_client {
+            debug!("waiting for upload ops to complete");
+            remote_client
+                .wait_completion()
+                .await
+                .context("wait for layer upload ops to complete")?;
+        }
+
        let mut layers_to_remove = Vec::new();
        let mut wanted_image_layers = KeySpaceRandomAccum::default();

@@ -3957,11 +3955,6 @@ impl Timeline {
            //
            // This does not in fact have any effect as we no longer consider local metadata unless
            // running without remote storage.
-            //
-            // This unconditionally schedules also an index_part.json update, even though, we will
-            // be doing one a bit later with the unlinked gc'd layers.
-            //
-            // TODO: remove when implementing <https://github.com/neondatabase/neon/issues/4099>.
            self.update_metadata_file(self.disk_consistent_lsn.load(), None)
                .await?;

@@ -3976,16 +3969,11 @@ impl Timeline {
                remote_client.schedule_gc_update(&gc_layers)?;
            }

-            guard.finish_gc_timeline(&gc_layers);
+            guard.finish_gc_timeline(&layer_removal_cs, gc_layers);

            if result.layers_removed != 0 {
                fail_point!("after-timeline-gc-removed-layers");
            }
-
-            #[cfg(feature = "testing")]
-            {
-                result.doomed_layers = gc_layers;
-            }
        }

        info!(
@@ -3997,7 +3985,9 @@ impl Timeline {
        Ok(result)
    }

+    ///
    /// Reconstruct a value, using the given base image and WAL records in 'data'.
+    ///
    async fn reconstruct_value(
        &self,
        key: Key,
@@ -4327,11 +4317,6 @@ impl Timeline {
            resident_layers,
        }
    }
-
-    pub(crate) fn get_shard_index(&self) -> ShardIndex {
-        // TODO: carry this on the struct
-        ShardIndex::unsharded()
-    }
 }

 type TraversalPathItem = (
--- a/pageserver/src/tenant/timeline/delete.rs
+++ b/pageserver/src/tenant/timeline/delete.rs
@@ -110,11 +110,40 @@ async fn set_deleted_in_remote_index(timeline: &Timeline) -> Result<(), DeleteTi
    Ok(())
 }

-/// Grab the compaction and gc locks, and actually perform the deletion.
+// We delete local files first, so if pageserver restarts after local files deletion then remote deletion is not continued.
+// This can be solved with inversion of these steps. But even if these steps are inverted then, when index_part.json
+// gets deleted there is no way to distinguish between "this timeline is good, we just didnt upload it to remote"
+// and "this timeline is deleted we should continue with removal of local state". So to avoid the ambiguity we use a mark file.
+// After index part is deleted presence of this mark file indentifies that it was a deletion intention.
+// So we can just remove the mark file.
+async fn create_delete_mark(
+    conf: &PageServerConf,
+    tenant_id: TenantId,
+    timeline_id: TimelineId,
+) -> Result<(), DeleteTimelineError> {
+    fail::fail_point!("timeline-delete-before-delete-mark", |_| {
+        Err(anyhow::anyhow!(
+            "failpoint: timeline-delete-before-delete-mark"
+        ))?
+    });
+    let marker_path = conf.timeline_delete_mark_file_path(tenant_id, timeline_id);
+
+    // Note: we're ok to replace existing file.
+    let _ = std::fs::OpenOptions::new()
+        .write(true)
+        .create(true)
+        .open(&marker_path)
+        .with_context(|| format!("could not create delete marker file {marker_path:?}"))?;
+
+    crashsafe::fsync_file_and_parent(&marker_path).context("sync_mark")?;
+    Ok(())
+}
+
+/// Grab the layer_removal_cs lock, and actually perform the deletion.
 ///
-/// The locks prevent GC or compaction from running at the same time. The background tasks do not
-/// register themselves with the timeline it's operating on, so it might still be running even
-/// though we called `shutdown_tasks`.
+/// This lock prevents prevents GC or compaction from running at the same time.
+/// The GC task doesn't register itself with the timeline it's operating on,
+/// so it might still be running even though we called `shutdown_tasks`.
 ///
 /// Note that there are still other race conditions between
 /// GC, compaction and timeline deletion. See
@@ -122,19 +151,14 @@ async fn set_deleted_in_remote_index(timeline: &Timeline) -> Result<(), DeleteTi
 ///
 /// No timeout here, GC & Compaction should be responsive to the
 /// `TimelineState::Stopping` change.
-// pub(super): documentation link
-pub(super) async fn delete_local_layer_files(
+async fn delete_local_layer_files(
    conf: &PageServerConf,
    tenant_id: TenantId,
    timeline: &Timeline,
 ) -> anyhow::Result<()> {
-    let guards = async { tokio::join!(timeline.gc_lock.lock(), timeline.compaction_lock.lock()) };
-    let guards = crate::timed(
-        guards,
-        "acquire gc and compaction locks",
-        std::time::Duration::from_secs(5),
-    )
-    .await;
+    info!("waiting for layer_removal_cs.lock()");
+    let layer_removal_guard = timeline.layer_removal_cs.lock().await;
+    info!("got layer_removal_cs.lock(), deleting layer files");

    // NB: storage_sync upload tasks that reference these layers have been cancelled
    //     by the caller.
@@ -155,8 +179,8 @@ pub(super) async fn delete_local_layer_files(
    // because of a previous failure/cancellation at/after
    // failpoint timeline-delete-after-rm.
    //
-    // ErrorKind::NotFound can also happen if we race with tenant detach, because,
-    // no locks are shared.
+    // It can also happen if we race with tenant detach, because,
+    // it doesn't grab the layer_removal_cs lock.
    //
    // For now, log and continue.
    // warn! level is technically not appropriate for the
@@ -224,8 +248,8 @@ pub(super) async fn delete_local_layer_files(
        .with_context(|| format!("Failed to remove: {}", entry.path().display()))?;
    }

-    info!("finished deleting layer files, releasing locks");
-    drop(guards);
+    info!("finished deleting layer files, releasing layer_removal_cs.lock()");
+    drop(layer_removal_guard);

    fail::fail_point!("timeline-delete-after-rm", |_| {
        Err(anyhow::anyhow!("failpoint: timeline-delete-after-rm"))?
@@ -287,8 +311,6 @@ async fn cleanup_remaining_timeline_fs_traces(
        .context("fsync_pre_mark_remove")?;

    // Remove delete mark
-    // TODO: once we are confident that no more exist in the field, remove this
-    // line.  It cleans up a legacy marker file that might in rare cases be present.
    tokio::fs::remove_file(conf.timeline_delete_mark_file_path(tenant_id, timeline_id))
        .await
        .or_else(fs_ext::ignore_not_found)
@@ -369,6 +391,8 @@ impl DeleteTimelineFlow {

        set_deleted_in_remote_index(&timeline).await?;

+        create_delete_mark(tenant.conf, timeline.tenant_id, timeline.timeline_id).await?;
+
        fail::fail_point!("timeline-delete-before-schedule", |_| {
            Err(anyhow::anyhow!(
                "failpoint: timeline-delete-before-schedule"
@@ -440,6 +464,10 @@ impl DeleteTimelineFlow {

        guard.mark_in_progress()?;

+        // Note that delete mark can be missing on resume
+        // because we create delete mark after we set deleted_at in the index part.
+        create_delete_mark(tenant.conf, tenant.tenant_id, timeline_id).await?;
+
        Self::schedule_background(guard, tenant.conf, tenant, timeline);

        Ok(())
--- a/pageserver/src/tenant/timeline/eviction_task.rs
+++ b/pageserver/src/tenant/timeline/eviction_task.rs
@@ -296,6 +296,7 @@ impl Timeline {
                    stats.evicted += 1;
                }
                Some(Err(EvictionError::NotFound | EvictionError::Downloaded)) => {
+                    // compaction/gc removed the file while we were waiting on layer_removal_cs
                    stats.not_evictable += 1;
                }
            }
@@ -350,7 +351,7 @@ impl Timeline {
        match state.last_layer_access_imitation {
            Some(ts) if ts.elapsed() < inter_imitate_period => { /* no need to run */ }
            _ => {
-                self.imitate_synthetic_size_calculation_worker(&tenant, cancel, ctx)
+                self.imitate_synthetic_size_calculation_worker(&tenant, ctx, cancel)
                    .await;
                state.last_layer_access_imitation = Some(tokio::time::Instant::now());
            }
@@ -416,8 +417,8 @@ impl Timeline {
    async fn imitate_synthetic_size_calculation_worker(
        &self,
        tenant: &Arc<Tenant>,
-        cancel: &CancellationToken,
        ctx: &RequestContext,
+        cancel: &CancellationToken,
    ) {
        if self.conf.metric_collection_endpoint.is_none() {
            // We don't start the consumption metrics task if this is not set in the config.
@@ -456,7 +457,6 @@ impl Timeline {
            None,
            &mut throwaway_cache,
            LogicalSizeCalculationCause::EvictionTaskImitation,
-            cancel,
            ctx,
        )
        .instrument(info_span!("gather_inputs"));
--- a/pageserver/src/tenant/timeline/init.rs
+++ b/pageserver/src/tenant/timeline/init.rs
@@ -13,7 +13,6 @@ use crate::{
 };
 use anyhow::Context;
 use camino::Utf8Path;
-use pageserver_api::shard::ShardIndex;
 use std::{collections::HashMap, str::FromStr};
 use utils::lsn::Lsn;

@@ -108,7 +107,6 @@ pub(super) fn reconcile(
    index_part: Option<&IndexPart>,
    disk_consistent_lsn: Lsn,
    generation: Generation,
-    shard: ShardIndex,
 ) -> Vec<(LayerFileName, Result<Decision, DismissedLayer>)> {
    use Decision::*;

@@ -120,13 +118,10 @@ pub(super) fn reconcile(
        .map(|(name, file_size)| {
            (
                name,
-                // The generation and shard here will be corrected to match IndexPart in the merge below, unless
+                // The generation here will be corrected to match IndexPart in the merge below, unless
                // it is not in IndexPart, in which case using our current generation makes sense
                // because it will be uploaded in this generation.
-                (
-                    Some(LayerFileMetadata::new(file_size, generation, shard)),
-                    None,
-                ),
+                (Some(LayerFileMetadata::new(file_size, generation)), None),
            )
        })
        .collect::<Collected>();
--- a/pageserver/src/tenant/timeline/layer_manager.rs
+++ b/pageserver/src/tenant/timeline/layer_manager.rs
@@ -190,6 +190,7 @@ impl LayerManager {
    /// Called when compaction is completed.
    pub(crate) fn finish_compact_l0(
        &mut self,
+        layer_removal_cs: &Arc<tokio::sync::OwnedMutexGuard<()>>,
        compact_from: &[Layer],
        compact_to: &[ResidentLayer],
        metrics: &TimelineMetrics,
@@ -200,16 +201,25 @@ impl LayerManager {
            metrics.record_new_file_metrics(l.layer_desc().file_size);
        }
        for l in compact_from {
-            Self::delete_historic_layer(l, &mut updates, &mut self.layer_fmgr);
+            Self::delete_historic_layer(layer_removal_cs, l, &mut updates, &mut self.layer_fmgr);
        }
        updates.flush();
    }

-    /// Called when garbage collect has selected the layers to be removed.
-    pub(crate) fn finish_gc_timeline(&mut self, gc_layers: &[Layer]) {
+    /// Called when garbage collect the timeline. Returns a guard that will apply the updates to the layer map.
+    pub(crate) fn finish_gc_timeline(
+        &mut self,
+        layer_removal_cs: &Arc<tokio::sync::OwnedMutexGuard<()>>,
+        gc_layers: Vec<Layer>,
+    ) {
        let mut updates = self.layer_map.batch_update();
        for doomed_layer in gc_layers {
-            Self::delete_historic_layer(doomed_layer, &mut updates, &mut self.layer_fmgr);
+            Self::delete_historic_layer(
+                layer_removal_cs,
+                &doomed_layer,
+                &mut updates,
+                &mut self.layer_fmgr,
+            );
        }
        updates.flush()
    }
@@ -228,6 +238,7 @@ impl LayerManager {
    /// Remote storage is not affected by this operation.
    fn delete_historic_layer(
        // we cannot remove layers otherwise, since gc and compaction will race
+        _layer_removal_cs: &Arc<tokio::sync::OwnedMutexGuard<()>>,
        layer: &Layer,
        updates: &mut BatchedUpdates<'_>,
        mapping: &mut LayerFileManager<Layer>,
--- a/pageserver/src/tenant/timeline/uninit.rs
+++ b/pageserver/src/tenant/timeline/uninit.rs
@@ -45,20 +45,12 @@ impl<'t> UninitializedTimeline<'t> {
        let timeline_id = self.timeline_id;
        let tenant_id = self.owning_tenant.tenant_id;

-        if self.raw_timeline.is_none() {
-            return Err(anyhow::anyhow!(
-                "No timeline for initialization found for {tenant_id}/{timeline_id}"
-            ));
-        }
+        let (new_timeline, uninit_mark) = self.raw_timeline.take().with_context(|| {
+            format!("No timeline for initalization found for {tenant_id}/{timeline_id}")
+        })?;

        // Check that the caller initialized disk_consistent_lsn
-        let new_disk_consistent_lsn = self
-            .raw_timeline
-            .as_ref()
-            .expect("checked above")
-            .0
-            .get_disk_consistent_lsn();
-
+        let new_disk_consistent_lsn = new_timeline.get_disk_consistent_lsn();
        anyhow::ensure!(
            new_disk_consistent_lsn.is_valid(),
            "new timeline {tenant_id}/{timeline_id} has invalid disk_consistent_lsn"
@@ -70,13 +62,6 @@ impl<'t> UninitializedTimeline<'t> {
                "Found freshly initialized timeline {tenant_id}/{timeline_id} in the tenant map"
            ),
            Entry::Vacant(v) => {
-                // after taking here should be no fallible operations, because the drop guard will not
-                // cleanup after and would block for example the tenant deletion
-                let (new_timeline, uninit_mark) =
-                    self.raw_timeline.take().expect("already checked");
-
-                // this is the mutual exclusion between different retries to create the timeline;
-                // this should be an assertion.
                uninit_mark.remove_uninit_mark().with_context(|| {
                    format!(
                        "Failed to remove uninit mark file for timeline {tenant_id}/{timeline_id}"
@@ -85,10 +70,10 @@ impl<'t> UninitializedTimeline<'t> {
                v.insert(Arc::clone(&new_timeline));

                new_timeline.maybe_spawn_flush_loop();
-
-                Ok(new_timeline)
            }
        }
+
+        Ok(new_timeline)
    }

    /// Prepares timeline data by loading it from the basebackup archive.
--- a/pageserver/src/tenant/upload_queue.rs
+++ b/pageserver/src/tenant/upload_queue.rs
@@ -1,5 +1,6 @@
 use super::storage_layer::LayerFileName;
 use super::storage_layer::ResidentLayer;
+use super::Generation;
 use crate::tenant::metadata::TimelineMetadata;
 use crate::tenant::remote_timeline_client::index::IndexPart;
 use crate::tenant::remote_timeline_client::index::LayerFileMetadata;
@@ -14,9 +15,6 @@ use utils::lsn::AtomicLsn;
 use std::sync::atomic::AtomicU32;
 use utils::lsn::Lsn;

-#[cfg(feature = "testing")]
-use utils::generation::Generation;
-
 // clippy warns that Uninitialized is much smaller than Initialized, which wastes
 // memory for Uninitialized variants. Doesn't matter in practice, there are not
 // that many upload queues in a running pageserver, and most of them are initialized
@@ -90,14 +88,6 @@ pub(crate) struct UploadQueueInitialized {
    /// bug causing leaks, then it's better to not leave this enabled for production builds.
    #[cfg(feature = "testing")]
    pub(crate) dangling_files: HashMap<LayerFileName, Generation>,
-
-    /// Set to true when we have inserted the `UploadOp::Shutdown` into the `inprogress_tasks`.
-    pub(crate) shutting_down: bool,
-
-    /// Permitless semaphore on which any number of `RemoteTimelineClient::shutdown` futures can
-    /// wait on until one of them stops the queue. The semaphore is closed when
-    /// `RemoteTimelineClient::launch_queued_tasks` encounters `UploadOp::Shutdown`.
-    pub(crate) shutdown_ready: Arc<tokio::sync::Semaphore>,
 }

 impl UploadQueueInitialized {
@@ -156,8 +146,6 @@ impl UploadQueue {
            queued_operations: VecDeque::new(),
            #[cfg(feature = "testing")]
            dangling_files: HashMap::new(),
-            shutting_down: false,
-            shutdown_ready: Arc::new(tokio::sync::Semaphore::new(0)),
        };

        *self = UploadQueue::Initialized(state);
@@ -205,8 +193,6 @@ impl UploadQueue {
            queued_operations: VecDeque::new(),
            #[cfg(feature = "testing")]
            dangling_files: HashMap::new(),
-            shutting_down: false,
-            shutdown_ready: Arc::new(tokio::sync::Semaphore::new(0)),
        };

        *self = UploadQueue::Initialized(state);
@@ -218,13 +204,7 @@ impl UploadQueue {
            UploadQueue::Uninitialized | UploadQueue::Stopped(_) => {
                anyhow::bail!("queue is in state {}", self.as_str())
            }
-            UploadQueue::Initialized(x) => {
-                if !x.shutting_down {
-                    Ok(x)
-                } else {
-                    anyhow::bail!("queue is shutting down")
-                }
-            }
+            UploadQueue::Initialized(x) => Ok(x),
        }
    }

@@ -252,7 +232,7 @@ pub(crate) struct UploadTask {
 /// for timeline deletion, which skips this queue and goes directly to DeletionQueue.
 #[derive(Debug)]
 pub(crate) struct Delete {
-    pub(crate) layers: Vec<(LayerFileName, LayerFileMetadata)>,
+    pub(crate) layers: Vec<(LayerFileName, Generation)>,
 }

 #[derive(Debug)]
@@ -268,10 +248,6 @@ pub(crate) enum UploadOp {

    /// Barrier. When the barrier operation is reached,
    Barrier(tokio::sync::watch::Sender<()>),
-
-    /// Shutdown; upon encountering this operation no new operations will be spawned, otherwise
-    /// this is the same as a Barrier.
-    Shutdown,
 }

 impl std::fmt::Display for UploadOp {
@@ -293,7 +269,6 @@ impl std::fmt::Display for UploadOp {
                write!(f, "Delete({} layers)", delete.layers.len())
            }
            UploadOp::Barrier(_) => write!(f, "Barrier"),
-            UploadOp::Shutdown => write!(f, "Shutdown"),
        }
    }
 }
--- a/pageserver/src/walingest.rs
+++ b/pageserver/src/walingest.rs
@@ -2079,88 +2079,4 @@ mod tests {

        Ok(())
    }
-
-    /// Replay a wal segment file taken directly from safekeepers.
-    ///
-    /// This test is useful for benchmarking since it allows us to profile only
-    /// the walingest code in a single-threaded executor, and iterate more quickly
-    /// without waiting for unrelated steps.
-    #[tokio::test]
-    async fn test_ingest_real_wal() {
-        use crate::tenant::harness::*;
-        use postgres_ffi::waldecoder::WalStreamDecoder;
-        use postgres_ffi::WAL_SEGMENT_SIZE;
-
-        // Define test data path and constants.
-        //
-        // Steps to reconstruct the data, if needed:
-        // 1. Run the pgbench python test
-        // 2. Take the first wal segment file from safekeeper
-        // 3. Compress it using `zstd --long input_file`
-        // 4. Copy initdb.tar.zst from local_fs_remote_storage
-        // 5. Grep sk logs for "restart decoder" to get startpoint
-        // 6. Run just the decoder from this test to get the endpoint.
-        //    It's the last LSN the decoder will output.
-        let pg_version = 15; // The test data was generated by pg15
-        let path = "test_data/sk_wal_segment_from_pgbench";
-        let wal_segment_path = format!("{path}/000000010000000000000001.zst");
-        let startpoint = Lsn::from_hex("14AEC08").unwrap();
-        let endpoint = Lsn::from_hex("1FFFF98").unwrap();
-
-        // Bootstrap a real timeline. We can't use create_test_timeline because
-        // it doesn't create a real checkpoint, and Walingest::new tries to parse
-        // the garbage data.
-        //
-        // TODO use the initdb.tar.zst file stored with the test data to avoid
-        //      problems with inconsistent initdb results after pg minor version bumps.
-        let (tenant, ctx) = TenantHarness::create("test_ingest_real_wal")
-            .unwrap()
-            .load()
-            .await;
-        let tline = tenant
-            .bootstrap_timeline(TIMELINE_ID, pg_version, &ctx)
-            .await
-            .unwrap();
-
-        // We fully read and decompress this into memory before decoding
-        // to get a more accurate perf profile of the decoder.
-        let bytes = {
-            use async_compression::tokio::bufread::ZstdDecoder;
-            let file = tokio::fs::File::open(wal_segment_path).await.unwrap();
-            let reader = tokio::io::BufReader::new(file);
-            let decoder = ZstdDecoder::new(reader);
-            let mut reader = tokio::io::BufReader::new(decoder);
-            let mut buffer = Vec::new();
-            tokio::io::copy_buf(&mut reader, &mut buffer).await.unwrap();
-            buffer
-        };
-
-        // TODO start a profiler too
-        let started_at = std::time::Instant::now();
-
-        // Initialize walingest
-        let xlogoff: usize = startpoint.segment_offset(WAL_SEGMENT_SIZE);
-        let mut decoder = WalStreamDecoder::new(startpoint, pg_version);
-        let mut walingest = WalIngest::new(tline.as_ref(), startpoint, &ctx)
-            .await
-            .unwrap();
-        let mut modification = tline.begin_modification(endpoint);
-        let mut decoded = DecodedWALRecord::default();
-        println!("decoding {} bytes", bytes.len() - xlogoff);
-
-        // Decode and ingest wal. We process the wal in chunks because
-        // that's what happens when we get bytes from safekeepers.
-        for chunk in bytes[xlogoff..].chunks(50) {
-            decoder.feed_bytes(chunk);
-            while let Some((lsn, recdata)) = decoder.poll_decode().unwrap() {
-                walingest
-                    .ingest_record(recdata, lsn, &mut modification, &mut decoded, &ctx)
-                    .await
-                    .unwrap();
-            }
-        }
-
-        let duration = started_at.elapsed();
-        println!("done in {:?}", duration);
-    }
 }
--- a/pageserver/src/walredo.rs
+++ b/pageserver/src/walredo.rs
@@ -1182,7 +1182,7 @@ mod tests {

    #[tokio::test]
    async fn short_v14_redo() {
-        let expected = std::fs::read("test_data/short_v14_redo.page").unwrap();
+        let expected = std::fs::read("fixtures/short_v14_redo.page").unwrap();

        let h = RedoHarness::new().unwrap();

--- a/pageserver/test_data/sk_wal_segment_from_pgbench/000000010000000000000001.zst
+++ b/pageserver/test_data/sk_wal_segment_from_pgbench/000000010000000000000001.zst
--- a/pageserver/test_data/sk_wal_segment_from_pgbench/initdb.tar.zst
+++ b/pageserver/test_data/sk_wal_segment_from_pgbench/initdb.tar.zst
--- a/pgxn/neon/Makefile
+++ b/pgxn/neon/Makefile
@@ -20,7 +20,7 @@ SHLIB_LINK_INTERNAL = $(libpq)
 SHLIB_LINK = -lcurl

 EXTENSION = neon
-DATA = neon--1.0.sql neon--1.0--1.1.sql
+DATA = neon--1.0.sql
 PGFILEDESC = "neon - cloud storage for PostgreSQL"

 EXTRA_CLEAN = \
--- a/pgxn/neon/README.md
+++ b/pgxn/neon/README.md
@@ -1,20 +0,0 @@
-neon extension consists of several parts:
-
-### shared preload library `neon.so`
-
- implements storage manager API and network communications with remote page server.
-
- walproposer: implements broadcast protocol between postgres and WAL safekeepers.
-
- control plane connector:  Captures updates to roles/databases using ProcessUtility_hook and sends them to the control ProcessUtility_hook.
-
- remote extension server: Request compute_ctl to download extension files.
-
- file_cache: Local file cache is used to temporary store relations pages in local file system for better performance.
-
- relsize_cache: Relation size cache for better neon performance.
-
-### SQL functions in `neon--*.sql`
-
-Utility functions to expose neon specific information to user and metrics collection.
-This extension is created in all databases in the cluster by default.
--- a/pgxn/neon/file_cache.c
+++ b/pgxn/neon/file_cache.c
@@ -32,13 +32,11 @@
 #include "storage/latch.h"
 #include "storage/ipc.h"
 #include "storage/lwlock.h"
-#include "utils/builtins.h"
 #include "utils/dynahash.h"
 #include "utils/guc.h"
 #include "storage/fd.h"
 #include "storage/pg_shmem.h"
 #include "storage/buf_internals.h"
-#include "pgstat.h"

 /*
 * Local file cache is used to temporary store relations pages in local file system.
@@ -67,7 +65,6 @@
 typedef struct FileCacheEntry
 {
 	BufferTag	key;
-	uint32      hash;
 	uint32		offset;
 	uint32		access_count;
 	uint32		bitmap[BLOCKS_PER_CHUNK/32];
@@ -79,10 +76,6 @@ typedef struct FileCacheControl
 	uint64 generation; /* generation is needed to handle correct hash reenabling */
 	uint32 size; /* size of cache file in chunks */
 	uint32 used; /* number of used chunks */
-	uint32 limit; /* shared copy of lfc_size_limit */
-	uint64 hits;
-	uint64 misses;
-	uint64 writes;
 	dlist_head lru; /* double linked list for LRU replacement algorithm */
 } FileCacheControl;

@@ -98,12 +91,10 @@ static shmem_startup_hook_type prev_shmem_startup_hook;
 static shmem_request_hook_type prev_shmem_request_hook;
 #endif

-#define LFC_ENABLED() (lfc_ctl->limit != 0)
-
-void PGDLLEXPORT FileCacheMonitorMain(Datum main_arg);
+void FileCacheMonitorMain(Datum main_arg);

 /*
- * Local file cache is optional and Neon can work without it.
+ * Local file cache is mandatory and Neon can work without it.
 * In case of any any errors with this cache, we should disable it but to not throw error.
 * Also we should allow  re-enable it if source of failure (lack of disk space, permissions,...) is fixed.
 * All cache content should be invalidated to avoid reading of stale or corrupted data
@@ -111,77 +102,49 @@ void PGDLLEXPORT FileCacheMonitorMain(Datum main_arg);
 static void
 lfc_disable(char const* op)
 {
-	int fd;
+	HASH_SEQ_STATUS status;
+	FileCacheEntry* entry;
+
 	elog(WARNING, "Failed to %s local file cache at %s: %m, disabling local file cache", op, lfc_path);

-	/* Invalidate hash */
-	LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
-
-	if (LFC_ENABLED())
-	{
-		HASH_SEQ_STATUS status;
-		FileCacheEntry* entry;
-
-		hash_seq_init(&status, lfc_hash);
-		while ((entry = hash_seq_search(&status)) != NULL)
-		{
-			hash_search_with_hash_value(lfc_hash, &entry->key, entry->hash, HASH_REMOVE, NULL);
-		}
-		lfc_ctl->generation += 1;
-		lfc_ctl->size = 0;
-		lfc_ctl->used = 0;
-		lfc_ctl->limit = 0;
-		dlist_init(&lfc_ctl->lru);
-
-		if (lfc_desc > 0)
-		{
-			/* If the reason of error is ENOSPC, then truncation of file may help to reclaim some space */
-			int rc = ftruncate(lfc_desc, 0);
-			if (rc < 0)
-				elog(WARNING, "Failed to truncate local file cache %s: %m", lfc_path);
-		}
-	}
-	/* We need to use unlink to to avoid races in LFC write, because it is not protectedby */
-	unlink(lfc_path);
-
-	fd = BasicOpenFile(lfc_path, O_RDWR|O_CREAT|O_TRUNC);
-	if (fd < 0)
-		elog(WARNING, "Failed to recreate local file cache %s: %m", lfc_path);
-	else
-		close(fd);
-
-	LWLockRelease(lfc_lock);
-
 	if (lfc_desc > 0)
 		close(lfc_desc);

 	lfc_desc = -1;
-}
+	lfc_size_limit = 0;

-/*
- * This check is done without obtaining lfc_lock, so it is unreliable
- */
-static bool
-lfc_maybe_disabled(void)
-{
-	return !lfc_ctl || !LFC_ENABLED();
+	/* Invalidate hash */
+	LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
+
+	hash_seq_init(&status, lfc_hash);
+	while ((entry = hash_seq_search(&status)) != NULL)
+	{
+		hash_search(lfc_hash, &entry->key, HASH_REMOVE, NULL);
+		memset(entry->bitmap, 0, sizeof entry->bitmap);
+	}
+	hash_seq_term(&status);
+	lfc_ctl->generation += 1;
+	lfc_ctl->size = 0;
+	lfc_ctl->used = 0;
+	dlist_init(&lfc_ctl->lru);
+
+	LWLockRelease(lfc_lock);
 }

 static bool
 lfc_ensure_opened(void)
 {
-	bool enabled = !lfc_maybe_disabled();
 	/* Open cache file if not done yet */
-	if (lfc_desc <= 0 && enabled)
+	if (lfc_desc <= 0)
 	{
-		lfc_desc = BasicOpenFile(lfc_path, O_RDWR);
+		lfc_desc = BasicOpenFile(lfc_path, O_RDWR|O_CREAT);

 		if (lfc_desc < 0) {
 			lfc_disable("open");
 			return false;
 		}
 	}
-	return enabled;
+	return true;
 }

 static void
@@ -200,7 +163,6 @@ lfc_shmem_startup(void)
 	lfc_ctl = (FileCacheControl*)ShmemInitStruct("lfc", sizeof(FileCacheControl), &found);
 	if (!found)
 	{
-		int fd;
 		uint32 lfc_size = SIZE_MB_TO_CHUNKS(lfc_max_size);
 		lfc_lock = (LWLockId)GetNamedLWLockTranche("lfc_lock");
 		info.keysize = sizeof(BufferTag);
@@ -213,23 +175,10 @@ lfc_shmem_startup(void)
 		lfc_ctl->generation = 0;
 		lfc_ctl->size = 0;
 		lfc_ctl->used = 0;
-		lfc_ctl->hits = 0;
-		lfc_ctl->misses = 0;
-		lfc_ctl->writes = 0;
 		dlist_init(&lfc_ctl->lru);

-		/* Recreate file cache on restart */
-		fd = BasicOpenFile(lfc_path, O_RDWR|O_CREAT|O_TRUNC);
-		if (fd < 0)
-		{
-			elog(WARNING, "Failed to create local file cache %s: %m", lfc_path);
-			lfc_ctl->limit = 0;
-		}
-		else
-		{
-			close(fd);
-			lfc_ctl->limit = SIZE_MB_TO_CHUNKS(lfc_size_limit);
-		}
+		/* Remove file cache on restart */
+		(void)unlink(lfc_path);
 	}
 	LWLockRelease(AddinShmemInitLock);
 }
@@ -246,17 +195,6 @@ lfc_shmem_request(void)
 	RequestNamedLWLockTranche("lfc_lock", 1);
 }

-static bool
-is_normal_backend(void)
-{
-	/*
-	 * Stats collector detach shared memory, so we should not try to access shared memory here.
-	 * Parallel workers first assign default value (0), so not perform truncation in parallel workers.
-	 * The Postmaster can handle SIGHUP and it has access to shared memory (UsedShmemSegAddr != NULL), but has no PGPROC.
-	 */
-	return lfc_ctl && MyProc && UsedShmemSegAddr && !IsParallelWorker();
-}
-
 static bool
 lfc_check_limit_hook(int *newval, void **extra, GucSource source)
 {
@@ -272,15 +210,25 @@ static void
 lfc_change_limit_hook(int newval, void *extra)
 {
 	uint32 new_size = SIZE_MB_TO_CHUNKS(newval);
-
-	if (!is_normal_backend())
-		return;
-
-	if (!lfc_ensure_opened())
+	/*
+	 * Stats collector detach shared memory, so we should not try to access shared memory here.
+	 * Parallel workers first assign default value (0), so not perform truncation in parallel workers.
+	 * The Postmaster can handle SIGHUP and it has access to shared memory (UsedShmemSegAddr != NULL), but has no PGPROC.
+	 */
+	if (!lfc_ctl || !MyProc || !UsedShmemSegAddr || IsParallelWorker())
 		return;

+	/* Open cache file if not done yet */
+	if (lfc_desc <= 0)
+	{
+		lfc_desc = BasicOpenFile(lfc_path, O_RDWR|O_CREAT);
+		if (lfc_desc < 0) {
+			elog(WARNING, "Failed to open file cache %s: %m, disabling file cache", lfc_path);
+			lfc_size_limit = 0; /* disable file cache */
+			return;
+		}
+	}
 	LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
-
 	while (new_size < lfc_ctl->used && !dlist_is_empty(&lfc_ctl->lru))
 	{
 		/* Shrink cache by throwing away least recently accessed chunks and returning their space to file system */
@@ -290,12 +238,10 @@ lfc_change_limit_hook(int newval, void *extra)
 		if (fallocate(lfc_desc, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE, (off_t)victim->offset*BLOCKS_PER_CHUNK*BLCKSZ, BLOCKS_PER_CHUNK*BLCKSZ) < 0)
 			elog(LOG, "Failed to punch hole in file: %m");
 #endif
-		hash_search_with_hash_value(lfc_hash, &victim->key, victim->hash, HASH_REMOVE, NULL);
+		hash_search(lfc_hash, &victim->key, HASH_REMOVE, NULL);
 		lfc_ctl->used -= 1;
 	}
-	lfc_ctl->limit = new_size;
 	elog(DEBUG1, "set local file cache limit to %d", new_size);
-
 	LWLockRelease(lfc_lock);
 }

@@ -309,7 +255,6 @@ lfc_init(void)
 	if (!process_shared_preload_libraries_in_progress)
 		elog(ERROR, "Neon module should be loaded via shared_preload_libraries");

-
 	DefineCustomIntVariable("neon.max_file_cache_size",
 							"Maximal size of Neon local file cache",
 							NULL,
@@ -370,10 +315,10 @@ lfc_cache_contains(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno)
 	BufferTag tag;
 	FileCacheEntry* entry;
 	int chunk_offs = blkno & (BLOCKS_PER_CHUNK-1);
-	bool found = false;
+	bool found;
 	uint32 hash;

-	if (lfc_maybe_disabled()) /* fast exit if file cache is disabled */
+	if (lfc_size_limit == 0) /* fast exit if file cache is disabled */
 		return false;

 	CopyNRelFileInfoToBufTag(tag, rinfo);
@@ -382,11 +327,8 @@ lfc_cache_contains(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno)
 	hash = get_hash_value(lfc_hash, &tag);

 	LWLockAcquire(lfc_lock, LW_SHARED);
-	if (LFC_ENABLED())
-	{
-		entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_FIND, NULL);
-		found = entry != NULL && (entry->bitmap[chunk_offs >> 5] & (1 << (chunk_offs & 31))) != 0;
-	}
+	entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_FIND, NULL);
+	found = entry != NULL && (entry->bitmap[chunk_offs >> 5] & (1 << (chunk_offs & 31))) != 0;
 	LWLockRelease(lfc_lock);
 	return found;
 }
@@ -403,7 +345,7 @@ lfc_evict(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno)
 	int chunk_offs = blkno & (BLOCKS_PER_CHUNK-1);
 	uint32 hash;

-	if (lfc_maybe_disabled()) /* fast exit if file cache is disabled */
+	if (lfc_size_limit == 0) /* fast exit if file cache is disabled */
 		return;

 	CopyNRelFileInfoToBufTag(tag, rinfo);
@@ -413,13 +355,6 @@ lfc_evict(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno)
 	hash = get_hash_value(lfc_hash, &tag);

 	LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
-
-	if (!LFC_ENABLED())
-	{
-		LWLockRelease(lfc_lock);
-		return;
-	}
-
 	entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_FIND, &found);

 	if (!found)
@@ -470,7 +405,7 @@ lfc_evict(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno)
 /*
 * Try to read page from local cache.
 * Returns true if page is found in local cache.
- * In case of error local file cache is disabled (lfc->limit is set to zero).
+ * In case of error lfc_size_limit is set to zero to disable any further opera-tins with cache.
 */
 bool
 lfc_read(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
@@ -485,7 +420,7 @@ lfc_read(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 	uint64 generation;
 	uint32 entry_offset;

-	if (lfc_maybe_disabled()) /* fast exit if file cache is disabled */
+	if (lfc_size_limit == 0) /* fast exit if file cache is disabled */
 		return false;

 	if (!lfc_ensure_opened())
@@ -497,18 +432,10 @@ lfc_read(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 	hash = get_hash_value(lfc_hash, &tag);

 	LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
-
-	if (!LFC_ENABLED())
-	{
-		LWLockRelease(lfc_lock);
-		return false;
-	}
-
 	entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_FIND, NULL);
 	if (entry == NULL || (entry->bitmap[chunk_offs >> 5] & (1 << (chunk_offs & 31))) == 0)
 	{
 		/* Page is not cached */
-		lfc_ctl->misses += 1;
 		LWLockRelease(lfc_lock);
 		return false;
 	}
@@ -529,11 +456,8 @@ lfc_read(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,

 	/* Place entry to the head of LRU list */
 	LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
-
 	if (lfc_ctl->generation == generation)
 	{
-		Assert(LFC_ENABLED());
-		lfc_ctl->hits += 1;
 		Assert(entry->access_count > 0);
 		if (--entry->access_count == 0)
 			dlist_push_tail(&lfc_ctl->lru, &entry->lru_node);
@@ -564,10 +488,8 @@ lfc_write(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 	bool found;
 	int chunk_offs = blkno & (BLOCKS_PER_CHUNK-1);
 	uint32 hash;
-	uint64 generation;
-	uint32 entry_offset;

-	if (lfc_maybe_disabled()) /* fast exit if file cache is disabled */
+	if (lfc_size_limit == 0) /* fast exit if file cache is disabled */
 		return;

 	if (!lfc_ensure_opened())
@@ -575,17 +497,12 @@ lfc_write(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,

 	tag.forkNum = forkNum;
 	tag.blockNum = blkno & ~(BLOCKS_PER_CHUNK-1);
+	
 	CopyNRelFileInfoToBufTag(tag, rinfo);
+	
 	hash = get_hash_value(lfc_hash, &tag);

 	LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
-
-	if (!LFC_ENABLED())
-	{
-		LWLockRelease(lfc_lock);
-		return;
-	}
-
 	entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_ENTER, &found);

 	if (found)
@@ -604,13 +521,13 @@ lfc_write(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 		 * there are should be very large number of concurrent IO operations and them are limited by max_connections,
 		 * we prefer not to complicate code and use second approach.
 		 */
-		if (lfc_ctl->used >= lfc_ctl->limit && !dlist_is_empty(&lfc_ctl->lru))
+		if (lfc_ctl->used >= SIZE_MB_TO_CHUNKS(lfc_size_limit) && !dlist_is_empty(&lfc_ctl->lru))
 		{
 			/* Cache overflow: evict least recently used chunk */
 			FileCacheEntry* victim = dlist_container(FileCacheEntry, lru_node, dlist_pop_head_node(&lfc_ctl->lru));
 			Assert(victim->access_count == 0);
 			entry->offset = victim->offset; /* grab victim's chunk */
-			hash_search_with_hash_value(lfc_hash, &victim->key, victim->hash, HASH_REMOVE, NULL);
+			hash_search(lfc_hash, &victim->key, HASH_REMOVE, NULL);
 			elog(DEBUG2, "Swap file cache page");
 		}
 		else
@@ -619,140 +536,27 @@ lfc_write(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 			entry->offset = lfc_ctl->size++; /* allocate new chunk at end of file */
 		}
 		entry->access_count = 1;
-		entry->hash = hash;
 		memset(entry->bitmap, 0, sizeof entry->bitmap);
 	}

-	generation = lfc_ctl->generation;
-	entry_offset = entry->offset;
-	lfc_ctl->writes += 1;
-	LWLockRelease(lfc_lock);
-
-	rc = pwrite(lfc_desc, buffer, BLCKSZ, ((off_t)entry_offset*BLOCKS_PER_CHUNK + chunk_offs)*BLCKSZ);
+	rc = pwrite(lfc_desc, buffer, BLCKSZ, ((off_t)entry->offset*BLOCKS_PER_CHUNK + chunk_offs)*BLCKSZ);
 	if (rc != BLCKSZ)
 	{
+		LWLockRelease(lfc_lock);
 		lfc_disable("write");
 	}
 	else
 	{
-		LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
-
-		if (lfc_ctl->generation == generation)
-		{
-			Assert(LFC_ENABLED());
-			/* Place entry to the head of LRU list */
-			Assert(entry->access_count > 0);
-			if (--entry->access_count == 0)
-				dlist_push_tail(&lfc_ctl->lru, &entry->lru_node);
-
-			entry->bitmap[chunk_offs >> 5] |= (1 << (chunk_offs & 31));
-		}
+		/* Place entry to the head of LRU list */
+		Assert(entry->access_count > 0);
+		if (--entry->access_count == 0)
+			dlist_push_tail(&lfc_ctl->lru, &entry->lru_node);

+		entry->bitmap[chunk_offs >> 5] |= (1 << (chunk_offs & 31));
 		LWLockRelease(lfc_lock);
 	}
 }

-typedef struct
-{
-	TupleDesc	tupdesc;
-} NeonGetStatsCtx;
-
-#define NUM_NEON_GET_STATS_COLS	2
-#define NUM_NEON_GET_STATS_ROWS	3
-
-PG_FUNCTION_INFO_V1(neon_get_lfc_stats);
-Datum
-neon_get_lfc_stats(PG_FUNCTION_ARGS)
-{
-	FuncCallContext *funcctx;
-	NeonGetStatsCtx* fctx;
-	MemoryContext oldcontext;
-	TupleDesc	tupledesc;
-	Datum		result;
-	HeapTuple	tuple;
-	char const* key;
-	uint64      value;
-	Datum		values[NUM_NEON_GET_STATS_COLS];
-	bool		nulls[NUM_NEON_GET_STATS_COLS];
-
-	if (SRF_IS_FIRSTCALL())
-	{
-		funcctx = SRF_FIRSTCALL_INIT();
-
-		/* Switch context when allocating stuff to be used in later calls */
-		oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
-
-		/* Create a user function context for cross-call persistence */
-		fctx = (NeonGetStatsCtx*) palloc(sizeof(NeonGetStatsCtx));
-
-		/* Construct a tuple descriptor for the result rows. */
-		tupledesc = CreateTemplateTupleDesc(NUM_NEON_GET_STATS_COLS);
-
-		TupleDescInitEntry(tupledesc, (AttrNumber) 1, "lfc_key",
-						   TEXTOID, -1, 0);
-		TupleDescInitEntry(tupledesc, (AttrNumber) 2, "lfc_value",
-						   INT8OID, -1, 0);
-
-		fctx->tupdesc = BlessTupleDesc(tupledesc);
-		funcctx->max_calls = NUM_NEON_GET_STATS_ROWS;
-		funcctx->user_fctx = fctx;
-
-		/* Return to original context when allocating transient memory */
-		MemoryContextSwitchTo(oldcontext);
-	}
-
-	funcctx = SRF_PERCALL_SETUP();
-
-	/* Get the saved state */
-	fctx = (NeonGetStatsCtx*) funcctx->user_fctx;
-
-	switch (funcctx->call_cntr)
-	{
-		case 0:
-			key = "file_cache_misses";
-			if (lfc_ctl)
-				value = lfc_ctl->misses;
-			break;
-		case 1:
-			key = "file_cache_hits";
-			if (lfc_ctl)
-				value = lfc_ctl->hits;
-			break;
-		case 2:
-			key = "file_cache_used";
-			if (lfc_ctl)
-				value = lfc_ctl->used;
-			break;
-		case 3:
-			key = "file_cache_writes";
-			if (lfc_ctl)
-				value = lfc_ctl->writes;
-			break;
-		default:
-			SRF_RETURN_DONE(funcctx);
-	}
-	values[0] = PointerGetDatum(cstring_to_text(key));
-	nulls[0] = false;
-	if (lfc_ctl)
-	{
-		nulls[1] = false;
-		values[1] = Int64GetDatum(value);
-	}
-	else
-		nulls[1] = true;
-
-	tuple = heap_form_tuple(fctx->tupdesc, values, nulls);
-	result = HeapTupleGetDatum(tuple);
-	SRF_RETURN_NEXT(funcctx, result);
-}
-
-
-/*
- * Function returning data from the local file cache
- * relation node/tablespace/database/blocknum and access_counter
- */
-PG_FUNCTION_INFO_V1(local_cache_pages);
-
 /*
 * Record structure holding the to be exposed cache data.
 */
@@ -776,6 +580,11 @@ typedef struct
 	LocalCachePagesRec *record;
 } LocalCachePagesContext;

+/*
+ * Function returning data from the local file cache
+ * relation node/tablespace/database/blocknum and access_counter
+ */
+PG_FUNCTION_INFO_V1(local_cache_pages);

 #define NUM_LOCALCACHE_PAGES_ELEM	7

@@ -842,20 +651,15 @@ local_cache_pages(PG_FUNCTION_ARGS)

 		fctx->tupdesc = BlessTupleDesc(tupledesc);

-		if (lfc_ctl)
-		{
-			LWLockAcquire(lfc_lock, LW_SHARED);
+		LWLockAcquire(lfc_lock, LW_SHARED);

-			if (LFC_ENABLED())
-			{
-				hash_seq_init(&status, lfc_hash);
-				while ((entry = hash_seq_search(&status)) != NULL)
-				{
-					for (int i = 0; i < BLOCKS_PER_CHUNK/32; i++)
-						n_pages += pg_popcount32(entry->bitmap[i]);
-				}
-			}
+        hash_seq_init(&status, lfc_hash);
+        while ((entry = hash_seq_search(&status)) != NULL)
+		{
+			for (int i = 0; i < BLOCKS_PER_CHUNK; i++)
+				n_pages += (entry->bitmap[i >> 5] & (1 << (i & 31))) != 0;
 		}
+		hash_seq_term(&status);
 		fctx->record = (LocalCachePagesRec *)
 			MemoryContextAllocHuge(CurrentMemoryContext,
 								   sizeof(LocalCachePagesRec) * n_pages);
@@ -867,35 +671,36 @@ local_cache_pages(PG_FUNCTION_ARGS)
 		/* Return to original context when allocating transient memory */
 		MemoryContextSwitchTo(oldcontext);

-		if (n_pages != 0)
+		/*
+		 * Scan through all the buffers, saving the relevant fields in the
+		 * fctx->record structure.
+		 *
+		 * We don't hold the partition locks, so we don't get a consistent
+		 * snapshot across all buffers, but we do grab the buffer header
+		 * locks, so the information of each buffer is self-consistent.
+		 */
+		n_pages = 0;
+        hash_seq_init(&status, lfc_hash);
+        while ((entry = hash_seq_search(&status)) != NULL)
 		{
-			/*
-			 * Scan through all the cache entries, saving the relevant fields in the
-			 * fctx->record structure.
-			 */
-			uint32 n = 0;
-			hash_seq_init(&status, lfc_hash);
-			while ((entry = hash_seq_search(&status)) != NULL)
+			for (int i = 0; i < BLOCKS_PER_CHUNK; i++)
 			{
-				for (int i = 0; i < BLOCKS_PER_CHUNK; i++)
+				if (entry->bitmap[i >> 5] & (1 << (i & 31)))
 				{
-					if (entry->bitmap[i >> 5] & (1 << (i & 31)))
-					{
-						fctx->record[n].pageoffs = entry->offset*BLOCKS_PER_CHUNK + i;
-						fctx->record[n].relfilenode = NInfoGetRelNumber(BufTagGetNRelFileInfo(entry->key));
-						fctx->record[n].reltablespace = NInfoGetSpcOid(BufTagGetNRelFileInfo(entry->key));
-						fctx->record[n].reldatabase = NInfoGetDbOid(BufTagGetNRelFileInfo(entry->key));
-						fctx->record[n].forknum = entry->key.forkNum;
-						fctx->record[n].blocknum = entry->key.blockNum + i;
-						fctx->record[n].accesscount = entry->access_count;
-						n += 1;
-					}
+					fctx->record[n_pages].pageoffs = entry->offset*BLOCKS_PER_CHUNK + i;
+					fctx->record[n_pages].relfilenode = NInfoGetRelNumber(BufTagGetNRelFileInfo(entry->key));
+					fctx->record[n_pages].reltablespace = NInfoGetSpcOid(BufTagGetNRelFileInfo(entry->key));
+					fctx->record[n_pages].reldatabase = NInfoGetDbOid(BufTagGetNRelFileInfo(entry->key));
+					fctx->record[n_pages].forknum = entry->key.forkNum;
+					fctx->record[n_pages].blocknum = entry->key.blockNum + i;
+					fctx->record[n_pages].accesscount = entry->access_count;
+					n_pages += 1;
 				}
 			}
-			Assert(n_pages == n);
 		}
-		if (lfc_ctl)
-			LWLockRelease(lfc_lock);
+		hash_seq_term(&status);
+		Assert(n_pages == funcctx->max_calls);
+		LWLockRelease(lfc_lock);
 	}

 	funcctx = SRF_PERCALL_SETUP();
--- a/pgxn/neon/libpagestore.c
+++ b/pgxn/neon/libpagestore.c
@@ -21,7 +21,6 @@
 #include "storage/buf_internals.h"
 #include "storage/lwlock.h"
 #include "storage/ipc.h"
-#include "storage/pg_shmem.h"
 #include "c.h"
 #include "postmaster/interrupt.h"

@@ -88,12 +87,6 @@ bool	(*old_redo_read_buffer_filter) (XLogReaderState *record, uint8 block_id) =
 static bool pageserver_flush(void);
 static void pageserver_disconnect(void);

-static bool
-PagestoreShmemIsValid()
-{
-    return pagestore_shared && UsedShmemSegAddr;
-}
-
 static bool
 CheckPageserverConnstring(char **newval, void **extra, GucSource source)
 {
@@ -103,7 +96,7 @@ CheckPageserverConnstring(char **newval, void **extra, GucSource source)
 static void
 AssignPageserverConnstring(const char *newval, void *extra)
 {
-    if(!PagestoreShmemIsValid())
+    if(!pagestore_shared)
        return;
    LWLockAcquire(pagestore_shared->lock, LW_EXCLUSIVE);
    strlcpy(pagestore_shared->pageserver_connstring, newval, MAX_PAGESERVER_CONNSTRING_SIZE);
@@ -114,7 +107,7 @@ AssignPageserverConnstring(const char *newval, void *extra)
 static bool
 CheckConnstringUpdated()
 {
-    if(!PagestoreShmemIsValid())
+    if(!pagestore_shared)
        return false;
    return pagestore_local_counter < pg_atomic_read_u64(&pagestore_shared->update_counter);
 }
@@ -122,7 +115,7 @@ CheckConnstringUpdated()
 static void
 ReloadConnstring()
 {
-    if(!PagestoreShmemIsValid())
+    if(!pagestore_shared)
        return;
    LWLockAcquire(pagestore_shared->lock, LW_SHARED);
    strlcpy(local_pageserver_connstring, pagestore_shared->pageserver_connstring, sizeof(local_pageserver_connstring));
--- a/pgxn/neon/neon--1.0--1.1.sql
+++ b/pgxn/neon/neon--1.0--1.1.sql
@@ -1,10 +0,0 @@
-\echo Use "ALTER EXTENSION neon UPDATE TO '1.1'" to load this file. \quit
-
-CREATE FUNCTION neon_get_lfc_stats()
-RETURNS SETOF RECORD
-AS 'MODULE_PATHNAME', 'neon_get_lfc_stats'
-LANGUAGE C PARALLEL SAFE;
-
-- Create a view for convenient access.
-CREATE VIEW neon_lfc_stats AS
-	SELECT P.* FROM neon_get_lfc_stats() AS P (lfc_key text, lfc_value bigint);
--- a/pgxn/neon/neon.control
+++ b/pgxn/neon/neon.control
@@ -1,5 +1,4 @@
 # neon extension
 comment = 'cloud storage for PostgreSQL'
-default_version = '1.1'
+default_version = '1.0'
 module_pathname = '$libdir/neon'
-relocatable = true
--- a/poetry.lock
+++ b/poetry.lock
@@ -2,99 +2,111 @@

 [[package]]
 name = "aiohttp"
-version = "3.9.0"
+version = "3.8.6"
 description = "Async http client/server framework (asyncio)"
 optional = false
-python-versions = ">=3.8"
+python-versions = ">=3.6"
 files = [
-    {file = "aiohttp-3.9.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:6896b8416be9ada4d22cd359d7cb98955576ce863eadad5596b7cdfbf3e17c6c"},
-    {file = "aiohttp-3.9.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1736d87dad8ef46a8ec9cddd349fa9f7bd3a064c47dd6469c0d6763d3d49a4fc"},
-    {file = "aiohttp-3.9.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8c9e5f4d7208cda1a2bb600e29069eecf857e6980d0ccc922ccf9d1372c16f4b"},
-    {file = "aiohttp-3.9.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8488519aa05e636c5997719fe543c8daf19f538f4fa044f3ce94bee608817cff"},
-    {file = "aiohttp-3.9.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5ab16c254e2312efeb799bc3c06897f65a133b38b69682bf75d1f1ee1a9c43a9"},
-    {file = "aiohttp-3.9.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7a94bde005a8f926d0fa38b88092a03dea4b4875a61fbcd9ac6f4351df1b57cd"},
-    {file = "aiohttp-3.9.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b777c9286b6c6a94f50ddb3a6e730deec327e9e2256cb08b5530db0f7d40fd8"},
-    {file = "aiohttp-3.9.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:571760ad7736b34d05597a1fd38cbc7d47f7b65deb722cb8e86fd827404d1f6b"},
-    {file = "aiohttp-3.9.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:deac0a32aec29608eb25d730f4bc5a261a65b6c48ded1ed861d2a1852577c932"},
-    {file = "aiohttp-3.9.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:4ee1b4152bc3190cc40ddd6a14715e3004944263ea208229ab4c297712aa3075"},
-    {file = "aiohttp-3.9.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:3607375053df58ed6f23903aa10cf3112b1240e8c799d243bbad0f7be0666986"},
-    {file = "aiohttp-3.9.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:65b0a70a25456d329a5e1426702dde67be0fb7a4ead718005ba2ca582d023a94"},
-    {file = "aiohttp-3.9.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:5a2eb5311a37fe105aa35f62f75a078537e1a9e4e1d78c86ec9893a3c97d7a30"},
-    {file = "aiohttp-3.9.0-cp310-cp310-win32.whl", hash = "sha256:2cbc14a13fb6b42d344e4f27746a4b03a2cb0c1c3c5b932b0d6ad8881aa390e3"},
-    {file = "aiohttp-3.9.0-cp310-cp310-win_amd64.whl", hash = "sha256:ac9669990e2016d644ba8ae4758688534aabde8dbbc81f9af129c3f5f01ca9cd"},
-    {file = "aiohttp-3.9.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:f8e05f5163528962ce1d1806fce763ab893b1c5b7ace0a3538cd81a90622f844"},
-    {file = "aiohttp-3.9.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4afa8f71dba3a5a2e1e1282a51cba7341ae76585345c43d8f0e624882b622218"},
-    {file = "aiohttp-3.9.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f929f4c9b9a00f3e6cc0587abb95ab9c05681f8b14e0fe1daecfa83ea90f8318"},
-    {file = "aiohttp-3.9.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:28185e36a78d247c55e9fbea2332d16aefa14c5276a582ce7a896231c6b1c208"},
-    {file = "aiohttp-3.9.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a486ddf57ab98b6d19ad36458b9f09e6022de0381674fe00228ca7b741aacb2f"},
-    {file = "aiohttp-3.9.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:70e851f596c00f40a2f00a46126c95c2e04e146015af05a9da3e4867cfc55911"},
-    {file = "aiohttp-3.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c5b7bf8fe4d39886adc34311a233a2e01bc10eb4e842220235ed1de57541a896"},
-    {file = "aiohttp-3.9.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c67a51ea415192c2e53e4e048c78bab82d21955b4281d297f517707dc836bf3d"},
-    {file = "aiohttp-3.9.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:694df243f394629bcae2d8ed94c589a181e8ba8604159e6e45e7b22e58291113"},
-    {file = "aiohttp-3.9.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:3dd8119752dd30dd7bca7d4bc2a92a59be6a003e4e5c2cf7e248b89751b8f4b7"},
-    {file = "aiohttp-3.9.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:eb6dfd52063186ac97b4caa25764cdbcdb4b10d97f5c5f66b0fa95052e744eb7"},
-    {file = "aiohttp-3.9.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:d97c3e286d0ac9af6223bc132dc4bad6540b37c8d6c0a15fe1e70fb34f9ec411"},
-    {file = "aiohttp-3.9.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:816f4db40555026e4cdda604a1088577c1fb957d02f3f1292e0221353403f192"},
-    {file = "aiohttp-3.9.0-cp311-cp311-win32.whl", hash = "sha256:3abf0551874fecf95f93b58f25ef4fc9a250669a2257753f38f8f592db85ddea"},
-    {file = "aiohttp-3.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:e18d92c3e9e22553a73e33784fcb0ed484c9874e9a3e96c16a8d6a1e74a0217b"},
-    {file = "aiohttp-3.9.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:99ae01fb13a618b9942376df77a1f50c20a281390dad3c56a6ec2942e266220d"},
-    {file = "aiohttp-3.9.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:05857848da443c8c12110d99285d499b4e84d59918a21132e45c3f0804876994"},
-    {file = "aiohttp-3.9.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:317719d7f824eba55857fe0729363af58e27c066c731bc62cd97bc9c3d9c7ea4"},
-    {file = "aiohttp-3.9.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1e3b3c107ccb0e537f309f719994a55621acd2c8fdf6d5ce5152aed788fb940"},
-    {file = "aiohttp-3.9.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:45820ddbb276113ead8d4907a7802adb77548087ff5465d5c554f9aa3928ae7d"},
-    {file = "aiohttp-3.9.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:05a183f1978802588711aed0dea31e697d760ce9055292db9dc1604daa9a8ded"},
-    {file = "aiohttp-3.9.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51a4cd44788ea0b5e6bb8fa704597af3a30be75503a7ed1098bc5b8ffdf6c982"},
-    {file = "aiohttp-3.9.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:673343fbc0c1ac44d0d2640addc56e97a052504beacd7ade0dc5e76d3a4c16e8"},
-    {file = "aiohttp-3.9.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7e8a3b79b6d186a9c99761fd4a5e8dd575a48d96021f220ac5b5fa856e5dd029"},
-    {file = "aiohttp-3.9.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:6777a390e41e78e7c45dab43a4a0196c55c3b8c30eebe017b152939372a83253"},
-    {file = "aiohttp-3.9.0-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:7ae5f99a32c53731c93ac3075abd3e1e5cfbe72fc3eaac4c27c9dd64ba3b19fe"},
-    {file = "aiohttp-3.9.0-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:f1e4f254e9c35d8965d377e065c4a8a55d396fe87c8e7e8429bcfdeeb229bfb3"},
-    {file = "aiohttp-3.9.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:11ca808f9a6b63485059f5f6e164ef7ec826483c1212a44f268b3653c91237d8"},
-    {file = "aiohttp-3.9.0-cp312-cp312-win32.whl", hash = "sha256:de3cc86f4ea8b4c34a6e43a7306c40c1275e52bfa9748d869c6b7d54aa6dad80"},
-    {file = "aiohttp-3.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:ca4fddf84ac7d8a7d0866664936f93318ff01ee33e32381a115b19fb5a4d1202"},
-    {file = "aiohttp-3.9.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:f09960b5bb1017d16c0f9e9f7fc42160a5a49fa1e87a175fd4a2b1a1833ea0af"},
-    {file = "aiohttp-3.9.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8303531e2c17b1a494ffaeba48f2da655fe932c4e9a2626c8718403c83e5dd2b"},
-    {file = "aiohttp-3.9.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4790e44f46a4aa07b64504089def5744d3b6780468c4ec3a1a36eb7f2cae9814"},
-    {file = "aiohttp-3.9.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1d7edf74a36de0e5ca50787e83a77cf352f5504eb0ffa3f07000a911ba353fb"},
-    {file = "aiohttp-3.9.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:94697c7293199c2a2551e3e3e18438b4cba293e79c6bc2319f5fd652fccb7456"},
-    {file = "aiohttp-3.9.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a1b66dbb8a7d5f50e9e2ea3804b01e766308331d0cac76eb30c563ac89c95985"},
-    {file = "aiohttp-3.9.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9623cfd9e85b76b83ef88519d98326d4731f8d71869867e47a0b979ffec61c73"},
-    {file = "aiohttp-3.9.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f32c86dc967ab8c719fd229ce71917caad13cc1e8356ee997bf02c5b368799bf"},
-    {file = "aiohttp-3.9.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:f50b4663c3e0262c3a361faf440761fbef60ccdde5fe8545689a4b3a3c149fb4"},
-    {file = "aiohttp-3.9.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:dcf71c55ec853826cd70eadb2b6ac62ec577416442ca1e0a97ad875a1b3a0305"},
-    {file = "aiohttp-3.9.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:42fe4fd9f0dfcc7be4248c162d8056f1d51a04c60e53366b0098d1267c4c9da8"},
-    {file = "aiohttp-3.9.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:76a86a9989ebf82ee61e06e2bab408aec4ea367dc6da35145c3352b60a112d11"},
-    {file = "aiohttp-3.9.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:f9e09a1c83521d770d170b3801eea19b89f41ccaa61d53026ed111cb6f088887"},
-    {file = "aiohttp-3.9.0-cp38-cp38-win32.whl", hash = "sha256:a00ce44c21612d185c5275c5cba4bab8d7c1590f248638b667ed8a782fa8cd6f"},
-    {file = "aiohttp-3.9.0-cp38-cp38-win_amd64.whl", hash = "sha256:d5b9345ab92ebe6003ae11d8092ce822a0242146e6fa270889b9ba965457ca40"},
-    {file = "aiohttp-3.9.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:98d21092bf2637c5fa724a428a69e8f5955f2182bff61f8036827cf6ce1157bf"},
-    {file = "aiohttp-3.9.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:35a68cd63ca6aaef5707888f17a70c36efe62b099a4e853d33dc2e9872125be8"},
-    {file = "aiohttp-3.9.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3d7f6235c7475658acfc1769d968e07ab585c79f6ca438ddfecaa9a08006aee2"},
-    {file = "aiohttp-3.9.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:db04d1de548f7a62d1dd7e7cdf7c22893ee168e22701895067a28a8ed51b3735"},
-    {file = "aiohttp-3.9.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:536b01513d67d10baf6f71c72decdf492fb7433c5f2f133e9a9087379d4b6f31"},
-    {file = "aiohttp-3.9.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:87c8b0a6487e8109427ccf638580865b54e2e3db4a6e0e11c02639231b41fc0f"},
-    {file = "aiohttp-3.9.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7276fe0017664414fdc3618fca411630405f1aaf0cc3be69def650eb50441787"},
-    {file = "aiohttp-3.9.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:23170247ef89ffa842a02bbfdc425028574d9e010611659abeb24d890bc53bb8"},
-    {file = "aiohttp-3.9.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b1a2ea8252cacc7fd51df5a56d7a2bb1986ed39be9397b51a08015727dfb69bd"},
-    {file = "aiohttp-3.9.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:2d71abc15ff7047412ef26bf812dfc8d0d1020d664617f4913df2df469f26b76"},
-    {file = "aiohttp-3.9.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:2d820162c8c2bdbe97d328cd4f417c955ca370027dce593345e437b2e9ffdc4d"},
-    {file = "aiohttp-3.9.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:2779f5e7c70f7b421915fd47db332c81de365678180a9f3ab404088f87ba5ff9"},
-    {file = "aiohttp-3.9.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:366bc870d7ac61726f32a489fbe3d1d8876e87506870be66b01aeb84389e967e"},
-    {file = "aiohttp-3.9.0-cp39-cp39-win32.whl", hash = "sha256:1df43596b826022b14998f0460926ce261544fedefe0d2f653e1b20f49e96454"},
-    {file = "aiohttp-3.9.0-cp39-cp39-win_amd64.whl", hash = "sha256:9c196b30f1b1aa3363a69dd69079ae9bec96c2965c4707eaa6914ba099fb7d4f"},
-    {file = "aiohttp-3.9.0.tar.gz", hash = "sha256:09f23292d29135025e19e8ff4f0a68df078fe4ee013bca0105b2e803989de92d"},
+    {file = "aiohttp-3.8.6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:41d55fc043954cddbbd82503d9cc3f4814a40bcef30b3569bc7b5e34130718c1"},
+    {file = "aiohttp-3.8.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1d84166673694841d8953f0a8d0c90e1087739d24632fe86b1a08819168b4566"},
+    {file = "aiohttp-3.8.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:253bf92b744b3170eb4c4ca2fa58f9c4b87aeb1df42f71d4e78815e6e8b73c9e"},
+    {file = "aiohttp-3.8.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3fd194939b1f764d6bb05490987bfe104287bbf51b8d862261ccf66f48fb4096"},
+    {file = "aiohttp-3.8.6-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6c5f938d199a6fdbdc10bbb9447496561c3a9a565b43be564648d81e1102ac22"},
+    {file = "aiohttp-3.8.6-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2817b2f66ca82ee699acd90e05c95e79bbf1dc986abb62b61ec8aaf851e81c93"},
+    {file = "aiohttp-3.8.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0fa375b3d34e71ccccf172cab401cd94a72de7a8cc01847a7b3386204093bb47"},
+    {file = "aiohttp-3.8.6-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9de50a199b7710fa2904be5a4a9b51af587ab24c8e540a7243ab737b45844543"},
+    {file = "aiohttp-3.8.6-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e1d8cb0b56b3587c5c01de3bf2f600f186da7e7b5f7353d1bf26a8ddca57f965"},
+    {file = "aiohttp-3.8.6-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:8e31e9db1bee8b4f407b77fd2507337a0a80665ad7b6c749d08df595d88f1cf5"},
+    {file = "aiohttp-3.8.6-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:7bc88fc494b1f0311d67f29fee6fd636606f4697e8cc793a2d912ac5b19aa38d"},
+    {file = "aiohttp-3.8.6-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:ec00c3305788e04bf6d29d42e504560e159ccaf0be30c09203b468a6c1ccd3b2"},
+    {file = "aiohttp-3.8.6-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ad1407db8f2f49329729564f71685557157bfa42b48f4b93e53721a16eb813ed"},
+    {file = "aiohttp-3.8.6-cp310-cp310-win32.whl", hash = "sha256:ccc360e87341ad47c777f5723f68adbb52b37ab450c8bc3ca9ca1f3e849e5fe2"},
+    {file = "aiohttp-3.8.6-cp310-cp310-win_amd64.whl", hash = "sha256:93c15c8e48e5e7b89d5cb4613479d144fda8344e2d886cf694fd36db4cc86865"},
+    {file = "aiohttp-3.8.6-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6e2f9cc8e5328f829f6e1fb74a0a3a939b14e67e80832975e01929e320386b34"},
+    {file = "aiohttp-3.8.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e6a00ffcc173e765e200ceefb06399ba09c06db97f401f920513a10c803604ca"},
+    {file = "aiohttp-3.8.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:41bdc2ba359032e36c0e9de5a3bd00d6fb7ea558a6ce6b70acedf0da86458321"},
+    {file = "aiohttp-3.8.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:14cd52ccf40006c7a6cd34a0f8663734e5363fd981807173faf3a017e202fec9"},
+    {file = "aiohttp-3.8.6-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2d5b785c792802e7b275c420d84f3397668e9d49ab1cb52bd916b3b3ffcf09ad"},
+    {file = "aiohttp-3.8.6-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1bed815f3dc3d915c5c1e556c397c8667826fbc1b935d95b0ad680787896a358"},
+    {file = "aiohttp-3.8.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:96603a562b546632441926cd1293cfcb5b69f0b4159e6077f7c7dbdfb686af4d"},
+    {file = "aiohttp-3.8.6-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d76e8b13161a202d14c9584590c4df4d068c9567c99506497bdd67eaedf36403"},
+    {file = "aiohttp-3.8.6-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e3f1e3f1a1751bb62b4a1b7f4e435afcdade6c17a4fd9b9d43607cebd242924a"},
+    {file = "aiohttp-3.8.6-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:76b36b3124f0223903609944a3c8bf28a599b2cc0ce0be60b45211c8e9be97f8"},
+    {file = "aiohttp-3.8.6-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:a2ece4af1f3c967a4390c284797ab595a9f1bc1130ef8b01828915a05a6ae684"},
+    {file = "aiohttp-3.8.6-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:16d330b3b9db87c3883e565340d292638a878236418b23cc8b9b11a054aaa887"},
+    {file = "aiohttp-3.8.6-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:42c89579f82e49db436b69c938ab3e1559e5a4409eb8639eb4143989bc390f2f"},
+    {file = "aiohttp-3.8.6-cp311-cp311-win32.whl", hash = "sha256:efd2fcf7e7b9d7ab16e6b7d54205beded0a9c8566cb30f09c1abe42b4e22bdcb"},
+    {file = "aiohttp-3.8.6-cp311-cp311-win_amd64.whl", hash = "sha256:3b2ab182fc28e7a81f6c70bfbd829045d9480063f5ab06f6e601a3eddbbd49a0"},
+    {file = "aiohttp-3.8.6-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:fdee8405931b0615220e5ddf8cd7edd8592c606a8e4ca2a00704883c396e4479"},
+    {file = "aiohttp-3.8.6-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d25036d161c4fe2225d1abff2bd52c34ed0b1099f02c208cd34d8c05729882f0"},
+    {file = "aiohttp-3.8.6-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5d791245a894be071d5ab04bbb4850534261a7d4fd363b094a7b9963e8cdbd31"},
+    {file = "aiohttp-3.8.6-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0cccd1de239afa866e4ce5c789b3032442f19c261c7d8a01183fd956b1935349"},
+    {file = "aiohttp-3.8.6-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f13f60d78224f0dace220d8ab4ef1dbc37115eeeab8c06804fec11bec2bbd07"},
+    {file = "aiohttp-3.8.6-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8a9b5a0606faca4f6cc0d338359d6fa137104c337f489cd135bb7fbdbccb1e39"},
+    {file = "aiohttp-3.8.6-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:13da35c9ceb847732bf5c6c5781dcf4780e14392e5d3b3c689f6d22f8e15ae31"},
+    {file = "aiohttp-3.8.6-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:4d4cbe4ffa9d05f46a28252efc5941e0462792930caa370a6efaf491f412bc66"},
+    {file = "aiohttp-3.8.6-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:229852e147f44da0241954fc6cb910ba074e597f06789c867cb7fb0621e0ba7a"},
+    {file = "aiohttp-3.8.6-cp36-cp36m-musllinux_1_1_s390x.whl", hash = "sha256:713103a8bdde61d13490adf47171a1039fd880113981e55401a0f7b42c37d071"},
+    {file = "aiohttp-3.8.6-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:45ad816b2c8e3b60b510f30dbd37fe74fd4a772248a52bb021f6fd65dff809b6"},
+    {file = "aiohttp-3.8.6-cp36-cp36m-win32.whl", hash = "sha256:2b8d4e166e600dcfbff51919c7a3789ff6ca8b3ecce16e1d9c96d95dd569eb4c"},
+    {file = "aiohttp-3.8.6-cp36-cp36m-win_amd64.whl", hash = "sha256:0912ed87fee967940aacc5306d3aa8ba3a459fcd12add0b407081fbefc931e53"},
+    {file = "aiohttp-3.8.6-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:e2a988a0c673c2e12084f5e6ba3392d76c75ddb8ebc6c7e9ead68248101cd446"},
+    {file = "aiohttp-3.8.6-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ebf3fd9f141700b510d4b190094db0ce37ac6361a6806c153c161dc6c041ccda"},
+    {file = "aiohttp-3.8.6-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3161ce82ab85acd267c8f4b14aa226047a6bee1e4e6adb74b798bd42c6ae1f80"},
+    {file = "aiohttp-3.8.6-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d95fc1bf33a9a81469aa760617b5971331cdd74370d1214f0b3109272c0e1e3c"},
+    {file = "aiohttp-3.8.6-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c43ecfef7deaf0617cee936836518e7424ee12cb709883f2c9a1adda63cc460"},
+    {file = "aiohttp-3.8.6-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ca80e1b90a05a4f476547f904992ae81eda5c2c85c66ee4195bb8f9c5fb47f28"},
+    {file = "aiohttp-3.8.6-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:90c72ebb7cb3a08a7f40061079817133f502a160561d0675b0a6adf231382c92"},
+    {file = "aiohttp-3.8.6-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:bb54c54510e47a8c7c8e63454a6acc817519337b2b78606c4e840871a3e15349"},
+    {file = "aiohttp-3.8.6-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:de6a1c9f6803b90e20869e6b99c2c18cef5cc691363954c93cb9adeb26d9f3ae"},
+    {file = "aiohttp-3.8.6-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:a3628b6c7b880b181a3ae0a0683698513874df63783fd89de99b7b7539e3e8a8"},
+    {file = "aiohttp-3.8.6-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:fc37e9aef10a696a5a4474802930079ccfc14d9f9c10b4662169671ff034b7df"},
+    {file = "aiohttp-3.8.6-cp37-cp37m-win32.whl", hash = "sha256:f8ef51e459eb2ad8e7a66c1d6440c808485840ad55ecc3cafefadea47d1b1ba2"},
+    {file = "aiohttp-3.8.6-cp37-cp37m-win_amd64.whl", hash = "sha256:b2fe42e523be344124c6c8ef32a011444e869dc5f883c591ed87f84339de5976"},
+    {file = "aiohttp-3.8.6-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:9e2ee0ac5a1f5c7dd3197de309adfb99ac4617ff02b0603fd1e65b07dc772e4b"},
+    {file = "aiohttp-3.8.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:01770d8c04bd8db568abb636c1fdd4f7140b284b8b3e0b4584f070180c1e5c62"},
+    {file = "aiohttp-3.8.6-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3c68330a59506254b556b99a91857428cab98b2f84061260a67865f7f52899f5"},
+    {file = "aiohttp-3.8.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:89341b2c19fb5eac30c341133ae2cc3544d40d9b1892749cdd25892bbc6ac951"},
+    {file = "aiohttp-3.8.6-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:71783b0b6455ac8f34b5ec99d83e686892c50498d5d00b8e56d47f41b38fbe04"},
+    {file = "aiohttp-3.8.6-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f628dbf3c91e12f4d6c8b3f092069567d8eb17814aebba3d7d60c149391aee3a"},
+    {file = "aiohttp-3.8.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b04691bc6601ef47c88f0255043df6f570ada1a9ebef99c34bd0b72866c217ae"},
+    {file = "aiohttp-3.8.6-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7ee912f7e78287516df155f69da575a0ba33b02dd7c1d6614dbc9463f43066e3"},
+    {file = "aiohttp-3.8.6-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:9c19b26acdd08dd239e0d3669a3dddafd600902e37881f13fbd8a53943079dbc"},
+    {file = "aiohttp-3.8.6-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:99c5ac4ad492b4a19fc132306cd57075c28446ec2ed970973bbf036bcda1bcc6"},
+    {file = "aiohttp-3.8.6-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:f0f03211fd14a6a0aed2997d4b1c013d49fb7b50eeb9ffdf5e51f23cfe2c77fa"},
+    {file = "aiohttp-3.8.6-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:8d399dade330c53b4106160f75f55407e9ae7505263ea86f2ccca6bfcbdb4921"},
+    {file = "aiohttp-3.8.6-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:ec4fd86658c6a8964d75426517dc01cbf840bbf32d055ce64a9e63a40fd7b771"},
+    {file = "aiohttp-3.8.6-cp38-cp38-win32.whl", hash = "sha256:33164093be11fcef3ce2571a0dccd9041c9a93fa3bde86569d7b03120d276c6f"},
+    {file = "aiohttp-3.8.6-cp38-cp38-win_amd64.whl", hash = "sha256:bdf70bfe5a1414ba9afb9d49f0c912dc524cf60141102f3a11143ba3d291870f"},
+    {file = "aiohttp-3.8.6-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:d52d5dc7c6682b720280f9d9db41d36ebe4791622c842e258c9206232251ab2b"},
+    {file = "aiohttp-3.8.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4ac39027011414dbd3d87f7edb31680e1f430834c8cef029f11c66dad0670aa5"},
+    {file = "aiohttp-3.8.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3f5c7ce535a1d2429a634310e308fb7d718905487257060e5d4598e29dc17f0b"},
+    {file = "aiohttp-3.8.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b30e963f9e0d52c28f284d554a9469af073030030cef8693106d918b2ca92f54"},
+    {file = "aiohttp-3.8.6-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:918810ef188f84152af6b938254911055a72e0f935b5fbc4c1a4ed0b0584aed1"},
+    {file = "aiohttp-3.8.6-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:002f23e6ea8d3dd8d149e569fd580c999232b5fbc601c48d55398fbc2e582e8c"},
+    {file = "aiohttp-3.8.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4fcf3eabd3fd1a5e6092d1242295fa37d0354b2eb2077e6eb670accad78e40e1"},
+    {file = "aiohttp-3.8.6-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:255ba9d6d5ff1a382bb9a578cd563605aa69bec845680e21c44afc2670607a95"},
+    {file = "aiohttp-3.8.6-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d67f8baed00870aa390ea2590798766256f31dc5ed3ecc737debb6e97e2ede78"},
+    {file = "aiohttp-3.8.6-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:86f20cee0f0a317c76573b627b954c412ea766d6ada1a9fcf1b805763ae7feeb"},
+    {file = "aiohttp-3.8.6-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:39a312d0e991690ccc1a61f1e9e42daa519dcc34ad03eb6f826d94c1190190dd"},
+    {file = "aiohttp-3.8.6-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:e827d48cf802de06d9c935088c2924e3c7e7533377d66b6f31ed175c1620e05e"},
+    {file = "aiohttp-3.8.6-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:bd111d7fc5591ddf377a408ed9067045259ff2770f37e2d94e6478d0f3fc0c17"},
+    {file = "aiohttp-3.8.6-cp39-cp39-win32.whl", hash = "sha256:caf486ac1e689dda3502567eb89ffe02876546599bbf915ec94b1fa424eeffd4"},
+    {file = "aiohttp-3.8.6-cp39-cp39-win_amd64.whl", hash = "sha256:3f0e27e5b733803333bb2371249f41cf42bae8884863e8e8965ec69bebe53132"},
+    {file = "aiohttp-3.8.6.tar.gz", hash = "sha256:b0cf2a4501bff9330a8a5248b4ce951851e415bdcce9dc158e76cfd55e15085c"},
 ]

 [package.dependencies]
 aiosignal = ">=1.1.2"
-async-timeout = {version = ">=4.0,<5.0", markers = "python_version < \"3.11\""}
+async-timeout = ">=4.0.0a3,<5.0"
 attrs = ">=17.3.0"
+charset-normalizer = ">=2.0,<4.0"
 frozenlist = ">=1.1.1"
 multidict = ">=4.5,<7.0"
 yarl = ">=1.0,<2.0"

 [package.extras]
-speedups = ["Brotli", "aiodns", "brotlicffi"]
+speedups = ["Brotli", "aiodns", "cchardet"]

 [[package]]
 name = "aiopg"
@@ -2707,4 +2719,4 @@ cffi = ["cffi (>=1.11)"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.9"
-content-hash = "25ffa9ed98d890a3b85e6036792296a60bb705e8f9eaa1f07336501116a58756"
+content-hash = "0834e5cb69e5457741d4f476c3e49a4dc83598b5730685c8755da651b96ad3ec"
--- a/proxy/Cargo.toml
+++ b/proxy/Cargo.toml
@@ -76,4 +76,3 @@ tokio-util.workspace = true
 rcgen.workspace = true
 rstest.workspace = true
 tokio-postgres-rustls.workspace = true
-postgres-protocol.workspace = true
--- a/proxy/src/auth/backend.rs
+++ b/proxy/src/auth/backend.rs
@@ -6,7 +6,6 @@ pub use link::LinkAuthError;
 use tokio_postgres::config::AuthKeys;

 use crate::proxy::{handle_try_wake, retry_after, LatencyTimer};
-use crate::stream::Stream;
 use crate::{
    auth::{self, ClientCredentials},
    config::AuthenticationConfig,
@@ -132,7 +131,7 @@ async fn auth_quirks_creds(
    api: &impl console::Api,
    extra: &ConsoleReqExtra<'_>,
    creds: &mut ClientCredentials<'_>,
-    client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,
+    client: &mut stream::PqStream<impl AsyncRead + AsyncWrite + Unpin>,
    allow_cleartext: bool,
    config: &'static AuthenticationConfig,
    latency_timer: &mut LatencyTimer,
@@ -166,7 +165,7 @@ async fn auth_quirks(
    api: &impl console::Api,
    extra: &ConsoleReqExtra<'_>,
    creds: &mut ClientCredentials<'_>,
-    client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,
+    client: &mut stream::PqStream<impl AsyncRead + AsyncWrite + Unpin>,
    allow_cleartext: bool,
    config: &'static AuthenticationConfig,
    latency_timer: &mut LatencyTimer,
@@ -242,7 +241,7 @@ impl BackendType<'_, ClientCredentials<'_>> {
    pub async fn authenticate(
        &mut self,
        extra: &ConsoleReqExtra<'_>,
-        client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,
+        client: &mut stream::PqStream<impl AsyncRead + AsyncWrite + Unpin>,
        allow_cleartext: bool,
        config: &'static AuthenticationConfig,
        latency_timer: &mut LatencyTimer,
--- a/proxy/src/auth/backend/classic.rs
+++ b/proxy/src/auth/backend/classic.rs
@@ -6,7 +6,7 @@ use crate::{
    console::{self, AuthInfo, ConsoleReqExtra},
    proxy::LatencyTimer,
    sasl, scram,
-    stream::{PqStream, Stream},
+    stream::PqStream,
 };
 use tokio::io::{AsyncRead, AsyncWrite};
 use tracing::{info, warn};
@@ -15,7 +15,7 @@ pub(super) async fn authenticate(
    api: &impl console::Api,
    extra: &ConsoleReqExtra<'_>,
    creds: &ClientCredentials<'_>,
-    client: &mut PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,
+    client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin>,
    config: &'static AuthenticationConfig,
    latency_timer: &mut LatencyTimer,
 ) -> auth::Result<AuthSuccess<ComputeCredentials>> {
--- a/proxy/src/auth/backend/hacks.rs
+++ b/proxy/src/auth/backend/hacks.rs
@@ -2,7 +2,7 @@ use super::{AuthSuccess, ComputeCredentials};
 use crate::{
    auth::{self, AuthFlow, ClientCredentials},
    proxy::LatencyTimer,
-    stream::{self, Stream},
+    stream,
 };
 use tokio::io::{AsyncRead, AsyncWrite};
 use tracing::{info, warn};
@@ -12,7 +12,7 @@ use tracing::{info, warn};
 /// These properties are benefical for serverless JS workers, so we
 /// use this mechanism for websocket connections.
 pub async fn cleartext_hack(
-    client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,
+    client: &mut stream::PqStream<impl AsyncRead + AsyncWrite + Unpin>,
    latency_timer: &mut LatencyTimer,
 ) -> auth::Result<AuthSuccess<ComputeCredentials>> {
    warn!("cleartext auth flow override is enabled, proceeding");
@@ -37,7 +37,7 @@ pub async fn cleartext_hack(
 /// Very similar to [`cleartext_hack`], but there's a specific password format.
 pub async fn password_hack(
    creds: &mut ClientCredentials<'_>,
-    client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,
+    client: &mut stream::PqStream<impl AsyncRead + AsyncWrite + Unpin>,
    latency_timer: &mut LatencyTimer,
 ) -> auth::Result<AuthSuccess<ComputeCredentials>> {
    warn!("project not specified, resorting to the password hack auth flow");
--- a/proxy/src/auth/flow.rs
+++ b/proxy/src/auth/flow.rs
@@ -1,21 +1,16 @@
 //! Main authentication flow.

 use super::{AuthErrorImpl, PasswordHackPayload};
-use crate::{
-    config::TlsServerEndPoint,
-    sasl, scram,
-    stream::{PqStream, Stream},
-};
+use crate::{sasl, scram, stream::PqStream};
 use pq_proto::{BeAuthenticationSaslMessage, BeMessage, BeMessage as Be};
 use std::io;
 use tokio::io::{AsyncRead, AsyncWrite};
-use tracing::info;

 /// Every authentication selector is supposed to implement this trait.
 pub trait AuthMethod {
    /// Any authentication selector should provide initial backend message
    /// containing auth method name and parameters, e.g. md5 salt.
-    fn first_message(&self, channel_binding: bool) -> BeMessage<'_>;
+    fn first_message(&self) -> BeMessage<'_>;
 }

 /// Initial state of [`AuthFlow`].
@@ -26,14 +21,8 @@ pub struct Scram<'a>(pub &'a scram::ServerSecret);

 impl AuthMethod for Scram<'_> {
    #[inline(always)]
-    fn first_message(&self, channel_binding: bool) -> BeMessage<'_> {
-        if channel_binding {
-            Be::AuthenticationSasl(BeAuthenticationSaslMessage::Methods(scram::METHODS))
-        } else {
-            Be::AuthenticationSasl(BeAuthenticationSaslMessage::Methods(
-                scram::METHODS_WITHOUT_PLUS,
-            ))
-        }
+    fn first_message(&self) -> BeMessage<'_> {
+        Be::AuthenticationSasl(BeAuthenticationSaslMessage::Methods(scram::METHODS))
    }
 }

@@ -43,7 +32,7 @@ pub struct PasswordHack;

 impl AuthMethod for PasswordHack {
    #[inline(always)]
-    fn first_message(&self, _channel_binding: bool) -> BeMessage<'_> {
+    fn first_message(&self) -> BeMessage<'_> {
        Be::AuthenticationCleartextPassword
    }
 }
@@ -54,44 +43,37 @@ pub struct CleartextPassword;

 impl AuthMethod for CleartextPassword {
    #[inline(always)]
-    fn first_message(&self, _channel_binding: bool) -> BeMessage<'_> {
+    fn first_message(&self) -> BeMessage<'_> {
        Be::AuthenticationCleartextPassword
    }
 }

 /// This wrapper for [`PqStream`] performs client authentication.
 #[must_use]
-pub struct AuthFlow<'a, S, State> {
+pub struct AuthFlow<'a, Stream, State> {
    /// The underlying stream which implements libpq's protocol.
-    stream: &'a mut PqStream<Stream<S>>,
+    stream: &'a mut PqStream<Stream>,
    /// State might contain ancillary data (see [`Self::begin`]).
    state: State,
-    tls_server_end_point: TlsServerEndPoint,
 }

 /// Initial state of the stream wrapper.
-impl<'a, S: AsyncRead + AsyncWrite + Unpin> AuthFlow<'a, S, Begin> {
+impl<'a, S: AsyncWrite + Unpin> AuthFlow<'a, S, Begin> {
    /// Create a new wrapper for client authentication.
-    pub fn new(stream: &'a mut PqStream<Stream<S>>) -> Self {
-        let tls_server_end_point = stream.get_ref().tls_server_end_point();
-
+    pub fn new(stream: &'a mut PqStream<S>) -> Self {
        Self {
            stream,
            state: Begin,
-            tls_server_end_point,
        }
    }

    /// Move to the next step by sending auth method's name & params to client.
    pub async fn begin<M: AuthMethod>(self, method: M) -> io::Result<AuthFlow<'a, S, M>> {
-        self.stream
-            .write_message(&method.first_message(self.tls_server_end_point.supported()))
-            .await?;
+        self.stream.write_message(&method.first_message()).await?;

        Ok(AuthFlow {
            stream: self.stream,
            state: method,
-            tls_server_end_point: self.tls_server_end_point,
        })
    }
 }
@@ -141,15 +123,9 @@ impl<S: AsyncRead + AsyncWrite + Unpin> AuthFlow<'_, S, Scram<'_>> {
            return Err(super::AuthError::bad_auth_method(sasl.method));
        }

-        info!("client chooses {}", sasl.method);
-
        let secret = self.state.0;
        let outcome = sasl::SaslStream::new(self.stream, sasl.message)
-            .authenticate(scram::Exchange::new(
-                secret,
-                rand::random,
-                self.tls_server_end_point,
-            ))
+            .authenticate(scram::Exchange::new(secret, rand::random, None))
            .await?;

        Ok(outcome)
--- a/proxy/src/bin/pg_sni_router.rs
+++ b/proxy/src/bin/pg_sni_router.rs
@@ -6,8 +6,6 @@
 use std::{net::SocketAddr, sync::Arc};

 use futures::future::Either;
-use itertools::Itertools;
-use proxy::config::TlsServerEndPoint;
 use tokio::net::TcpListener;

 use anyhow::{anyhow, bail, ensure, Context};
@@ -67,7 +65,7 @@ async fn main() -> anyhow::Result<()> {
    let destination: String = args.get_one::<String>("dest").unwrap().parse()?;

    // Configure TLS
-    let (tls_config, tls_server_end_point): (Arc<rustls::ServerConfig>, TlsServerEndPoint) = match (
+    let tls_config: Arc<rustls::ServerConfig> = match (
        args.get_one::<String>("tls-key"),
        args.get_one::<String>("tls-cert"),
    ) {
@@ -91,22 +89,16 @@ async fn main() -> anyhow::Result<()> {
                    ))?
                    .into_iter()
                    .map(rustls::Certificate)
-                    .collect_vec()
+                    .collect()
            };

-            // needed for channel bindings
-            let first_cert = cert_chain.first().context("missing certificate")?;
-            let tls_server_end_point = TlsServerEndPoint::new(first_cert)?;
-
-            let tls_config = rustls::ServerConfig::builder()
+            rustls::ServerConfig::builder()
                .with_safe_default_cipher_suites()
                .with_safe_default_kx_groups()
                .with_protocol_versions(&[&rustls::version::TLS13, &rustls::version::TLS12])?
                .with_no_client_auth()
                .with_single_cert(cert_chain, key)?
-                .into();
-
-            (tls_config, tls_server_end_point)
+                .into()
        }
        _ => bail!("tls-key and tls-cert must be specified"),
    };
@@ -121,7 +113,6 @@ async fn main() -> anyhow::Result<()> {
    let main = tokio::spawn(task_main(
        Arc::new(destination),
        tls_config,
-        tls_server_end_point,
        proxy_listener,
        cancellation_token.clone(),
    ));
@@ -143,7 +134,6 @@ async fn main() -> anyhow::Result<()> {
 async fn task_main(
    dest_suffix: Arc<String>,
    tls_config: Arc<rustls::ServerConfig>,
-    tls_server_end_point: TlsServerEndPoint,
    listener: tokio::net::TcpListener,
    cancellation_token: CancellationToken,
 ) -> anyhow::Result<()> {
@@ -169,7 +159,7 @@ async fn task_main(
                            .context("failed to set socket option")?;

                        info!(%peer_addr, "serving");
-                        handle_client(dest_suffix, tls_config, tls_server_end_point, socket).await
+                        handle_client(dest_suffix, tls_config, socket).await
                    }
                    .unwrap_or_else(|e| {
                        // Acknowledge that the task has finished with an error.
@@ -178,18 +168,9 @@ async fn task_main(
                    .instrument(tracing::info_span!("handle_client", ?session_id))
                );
            }
-            // Don't modify this unless you read https://docs.rs/tokio/latest/tokio/macro.select.html carefully.
-            // If this future completes and the pattern doesn't match, this branch is disabled for this call to `select!`.
-            // This only counts for this loop and it will be enabled again on next `select!`.
-            //
-            // Prior code had this as `Some(Err(e))` which _looks_ equivalent to the current setup, but it's not.
-            // When `connections.join_next()` returned `Some(Ok(()))` (which we expect), it would disable the join_next and it would
-            // not get called again, even if there are more connections to remove.
-            Some(res) = connections.join_next() => {
-                if let Err(e) = res {
-                    if !e.is_panic() && !e.is_cancelled() {
-                        warn!("unexpected error from joined connection task: {e:?}");
-                    }
+            Some(Err(e)) = connections.join_next(), if !connections.is_empty() => {
+                if !e.is_panic() && !e.is_cancelled() {
+                    warn!("unexpected error from joined connection task: {e:?}");
                }
            }
            _ = cancellation_token.cancelled() => {
@@ -217,7 +198,6 @@ const ERR_INSECURE_CONNECTION: &str = "connection is insecure (try using `sslmod
 async fn ssl_handshake<S: AsyncRead + AsyncWrite + Unpin>(
    raw_stream: S,
    tls_config: Arc<rustls::ServerConfig>,
-    tls_server_end_point: TlsServerEndPoint,
 ) -> anyhow::Result<Stream<S>> {
    let mut stream = PqStream::new(Stream::from_raw(raw_stream));

@@ -242,11 +222,7 @@ async fn ssl_handshake<S: AsyncRead + AsyncWrite + Unpin>(
            if !read_buf.is_empty() {
                bail!("data is sent before server replied with EncryptionResponse");
            }
-
-            Ok(Stream::Tls {
-                tls: Box::new(raw.upgrade(tls_config).await?),
-                tls_server_end_point,
-            })
+            Ok(raw.upgrade(tls_config).await?)
        }
        unexpected => {
            info!(
@@ -261,10 +237,9 @@ async fn ssl_handshake<S: AsyncRead + AsyncWrite + Unpin>(
 async fn handle_client(
    dest_suffix: Arc<String>,
    tls_config: Arc<rustls::ServerConfig>,
-    tls_server_end_point: TlsServerEndPoint,
    stream: impl AsyncRead + AsyncWrite + Unpin,
 ) -> anyhow::Result<()> {
-    let tls_stream = ssl_handshake(stream, tls_config, tls_server_end_point).await?;
+    let tls_stream = ssl_handshake(stream, tls_config).await?;

    // Cut off first part of the SNI domain
    // We receive required destination details in the format of
--- a/proxy/src/config.rs
+++ b/proxy/src/config.rs
@@ -1,15 +1,12 @@
 use crate::auth;
 use anyhow::{bail, ensure, Context, Ok};
-use rustls::{sign, Certificate, PrivateKey};
-use sha2::{Digest, Sha256};
+use rustls::sign;
 use std::{
    collections::{HashMap, HashSet},
    str::FromStr,
    sync::Arc,
    time::Duration,
 };
-use tracing::{error, info};
-use x509_parser::oid_registry;

 pub struct ProxyConfig {
    pub tls_config: Option<TlsConfig>,
@@ -30,7 +27,6 @@ pub struct MetricCollectionConfig {
 pub struct TlsConfig {
    pub config: Arc<rustls::ServerConfig>,
    pub common_names: Option<HashSet<String>>,
-    pub cert_resolver: Arc<CertResolver>,
 }

 pub struct HttpConfig {
@@ -56,7 +52,7 @@ pub fn configure_tls(
    let mut cert_resolver = CertResolver::new();

    // add default certificate
-    cert_resolver.add_cert_path(key_path, cert_path, true)?;
+    cert_resolver.add_cert(key_path, cert_path, true)?;

    // add extra certificates
    if let Some(certs_dir) = certs_dir {
@@ -68,7 +64,7 @@ pub fn configure_tls(
                let key_path = path.join("tls.key");
                let cert_path = path.join("tls.crt");
                if key_path.exists() && cert_path.exists() {
-                    cert_resolver.add_cert_path(
+                    cert_resolver.add_cert(
                        &key_path.to_string_lossy(),
                        &cert_path.to_string_lossy(),
                        false,
@@ -80,97 +76,35 @@ pub fn configure_tls(

    let common_names = cert_resolver.get_common_names();

-    let cert_resolver = Arc::new(cert_resolver);
-
    let config = rustls::ServerConfig::builder()
        .with_safe_default_cipher_suites()
        .with_safe_default_kx_groups()
        // allow TLS 1.2 to be compatible with older client libraries
        .with_protocol_versions(&[&rustls::version::TLS13, &rustls::version::TLS12])?
        .with_no_client_auth()
-        .with_cert_resolver(cert_resolver.clone())
+        .with_cert_resolver(Arc::new(cert_resolver))
        .into();

    Ok(TlsConfig {
        config,
        common_names: Some(common_names),
-        cert_resolver,
    })
 }

-/// Channel binding parameter
-///
-/// <https://www.rfc-editor.org/rfc/rfc5929#section-4>
-/// Description: The hash of the TLS server's certificate as it
-/// appears, octet for octet, in the server's Certificate message.  Note
-/// that the Certificate message contains a certificate_list, in which
-/// the first element is the server's certificate.
-///
-/// The hash function is to be selected as follows:
-///
-/// * if the certificate's signatureAlgorithm uses a single hash
-///   function, and that hash function is either MD5 or SHA-1, then use SHA-256;
-///
-/// * if the certificate's signatureAlgorithm uses a single hash
-///   function and that hash function neither MD5 nor SHA-1, then use
-///   the hash function associated with the certificate's
-///   signatureAlgorithm;
-///
-/// * if the certificate's signatureAlgorithm uses no hash functions or
-///   uses multiple hash functions, then this channel binding type's
-///   channel bindings are undefined at this time (updates to is channel
-///   binding type may occur to address this issue if it ever arises).
-#[derive(Debug, Clone, Copy)]
-pub enum TlsServerEndPoint {
-    Sha256([u8; 32]),
-    Undefined,
-}
-
-impl TlsServerEndPoint {
-    pub fn new(cert: &Certificate) -> anyhow::Result<Self> {
-        let sha256_oids = [
-            // I'm explicitly not adding MD5 or SHA1 here... They're bad.
-            oid_registry::OID_SIG_ECDSA_WITH_SHA256,
-            oid_registry::OID_PKCS1_SHA256WITHRSA,
-        ];
-
-        let pem = x509_parser::parse_x509_certificate(&cert.0)
-            .context("Failed to parse PEM object from cerficiate")?
-            .1;
-
-        info!(subject = %pem.subject, "parsing TLS certificate");
-
-        let reg = oid_registry::OidRegistry::default().with_all_crypto();
-        let oid = pem.signature_algorithm.oid();
-        let alg = reg.get(oid);
-        if sha256_oids.contains(oid) {
-            let tls_server_end_point: [u8; 32] =
-                Sha256::new().chain_update(&cert.0).finalize().into();
-            info!(subject = %pem.subject, signature_algorithm = alg.map(|a| a.description()), tls_server_end_point = %base64::encode(tls_server_end_point), "determined channel binding");
-            Ok(Self::Sha256(tls_server_end_point))
-        } else {
-            error!(subject = %pem.subject, signature_algorithm = alg.map(|a| a.description()), "unknown channel binding");
-            Ok(Self::Undefined)
-        }
-    }
-
-    pub fn supported(&self) -> bool {
-        !matches!(self, TlsServerEndPoint::Undefined)
-    }
-}
-
-#[derive(Default)]
-pub struct CertResolver {
-    certs: HashMap<String, (Arc<rustls::sign::CertifiedKey>, TlsServerEndPoint)>,
-    default: Option<(Arc<rustls::sign::CertifiedKey>, TlsServerEndPoint)>,
+struct CertResolver {
+    certs: HashMap<String, Arc<rustls::sign::CertifiedKey>>,
+    default: Option<Arc<rustls::sign::CertifiedKey>>,
 }

 impl CertResolver {
-    pub fn new() -> Self {
-        Self::default()
+    fn new() -> Self {
+        Self {
+            certs: HashMap::new(),
+            default: None,
+        }
    }

-    fn add_cert_path(
+    fn add_cert(
        &mut self,
        key_path: &str,
        cert_path: &str,
@@ -186,65 +120,57 @@ impl CertResolver {
            keys.pop().map(rustls::PrivateKey).unwrap()
        };

+        let key = sign::any_supported_type(&priv_key).context("invalid private key")?;
+
        let cert_chain_bytes = std::fs::read(cert_path)
            .context(format!("Failed to read TLS cert file at '{cert_path}.'"))?;

        let cert_chain = {
            rustls_pemfile::certs(&mut &cert_chain_bytes[..])
-                .with_context(|| {
-                    format!(
+                .context(format!(
                    "Failed to read TLS certificate chain from bytes from file at '{cert_path}'."
-                )
-                })?
+                ))?
                .into_iter()
                .map(rustls::Certificate)
                .collect()
        };

-        self.add_cert(priv_key, cert_chain, is_default)
-    }
+        let common_name = {
+            let pem = x509_parser::pem::parse_x509_pem(&cert_chain_bytes)
+                .context(format!(
+                    "Failed to parse PEM object from bytes from file at '{cert_path}'."
+                ))?
+                .1;
+            let common_name = pem.parse_x509()?.subject().to_string();

-    pub fn add_cert(
-        &mut self,
-        priv_key: PrivateKey,
-        cert_chain: Vec<Certificate>,
-        is_default: bool,
-    ) -> anyhow::Result<()> {
-        let key = sign::any_supported_type(&priv_key).context("invalid private key")?;
-
-        let first_cert = &cert_chain[0];
-        let tls_server_end_point = TlsServerEndPoint::new(first_cert)?;
-        let pem = x509_parser::parse_x509_certificate(&first_cert.0)
-            .context("Failed to parse PEM object from cerficiate")?
-            .1;
-
-        let common_name = pem.subject().to_string();
-
-        // We only use non-wildcard certificates in link proxy so it seems okay to treat them the same as
-        // wildcard ones as we don't use SNI there. That treatment only affects certificate selection, so
-        // verify-full will still check wildcard match. Old coding here just ignored non-wildcard common names
-        // and passed None instead, which blows up number of cases downstream code should handle. Proper coding
-        // here should better avoid Option for common_names, and do wildcard-based certificate selection instead
-        // of cutting off '*.' parts.
-        let common_name = if common_name.starts_with("CN=*.") {
-            common_name.strip_prefix("CN=*.").map(|s| s.to_string())
-        } else {
-            common_name.strip_prefix("CN=").map(|s| s.to_string())
+            // We only use non-wildcard certificates in link proxy so it seems okay to treat them the same as
+            // wildcard ones as we don't use SNI there. That treatment only affects certificate selection, so
+            // verify-full will still check wildcard match. Old coding here just ignored non-wildcard common names
+            // and passed None instead, which blows up number of cases downstream code should handle. Proper coding
+            // here should better avoid Option for common_names, and do wildcard-based certificate selection instead
+            // of cutting off '*.' parts.
+            if common_name.starts_with("CN=*.") {
+                common_name.strip_prefix("CN=*.").map(|s| s.to_string())
+            } else {
+                common_name.strip_prefix("CN=").map(|s| s.to_string())
+            }
        }
-        .context("Failed to parse common name from certificate")?;
+        .context(format!(
+            "Failed to parse common name from certificate at '{cert_path}'."
+        ))?;

        let cert = Arc::new(rustls::sign::CertifiedKey::new(cert_chain, key));

        if is_default {
-            self.default = Some((cert.clone(), tls_server_end_point));
+            self.default = Some(cert.clone());
        }

-        self.certs.insert(common_name, (cert, tls_server_end_point));
+        self.certs.insert(common_name, cert);

        Ok(())
    }

-    pub fn get_common_names(&self) -> HashSet<String> {
+    fn get_common_names(&self) -> HashSet<String> {
        self.certs.keys().map(|s| s.to_string()).collect()
    }
 }
@@ -252,24 +178,15 @@ impl CertResolver {
 impl rustls::server::ResolvesServerCert for CertResolver {
    fn resolve(
        &self,
-        client_hello: rustls::server::ClientHello,
+        _client_hello: rustls::server::ClientHello,
    ) -> Option<Arc<rustls::sign::CertifiedKey>> {
-        self.resolve(client_hello.server_name()).map(|x| x.0)
-    }
-}
-
-impl CertResolver {
-    pub fn resolve(
-        &self,
-        server_name: Option<&str>,
-    ) -> Option<(Arc<rustls::sign::CertifiedKey>, TlsServerEndPoint)> {
        // loop here and cut off more and more subdomains until we find
        // a match to get a proper wildcard support. OTOH, we now do not
        // use nested domains, so keep this simple for now.
        //
        // With the current coding foo.com will match *.foo.com and that
        // repeats behavior of the old code.
-        if let Some(mut sni_name) = server_name {
+        if let Some(mut sni_name) = _client_hello.server_name() {
            loop {
                if let Some(cert) = self.certs.get(sni_name) {
                    return Some(cert.clone());
--- a/proxy/src/proxy.rs
+++ b/proxy/src/proxy.rs
@@ -294,18 +294,9 @@ pub async fn task_main(
                    }),
                );
            }
-            // Don't modify this unless you read https://docs.rs/tokio/latest/tokio/macro.select.html carefully.
-            // If this future completes and the pattern doesn't match, this branch is disabled for this call to `select!`.
-            // This only counts for this loop and it will be enabled again on next `select!`.
-            //
-            // Prior code had this as `Some(Err(e))` which _looks_ equivalent to the current setup, but it's not.
-            // When `connections.join_next()` returned `Some(Ok(()))` (which we expect), it would disable the join_next and it would
-            // not get called again, even if there are more connections to remove.
-            Some(res) = connections.join_next() => {
-                if let Err(e) = res {
-                    if !e.is_panic() && !e.is_cancelled() {
-                        warn!("unexpected error from joined connection task: {e:?}");
-                    }
+            Some(Err(e)) = connections.join_next(), if !connections.is_empty() => {
+                if !e.is_panic() && !e.is_cancelled() {
+                    warn!("unexpected error from joined connection task: {e:?}");
                }
            }
            _ = cancellation_token.cancelled() => {
@@ -470,17 +461,7 @@ async fn handshake<S: AsyncRead + AsyncWrite + Unpin>(
                        if !read_buf.is_empty() {
                            bail!("data is sent before server replied with EncryptionResponse");
                        }
-                        let tls_stream = raw.upgrade(tls.to_server_config()).await?;
-
-                        let (_, tls_server_end_point) = tls
-                            .cert_resolver
-                            .resolve(tls_stream.get_ref().1.server_name())
-                            .context("missing certificate")?;
-
-                        stream = PqStream::new(Stream::Tls {
-                            tls: Box::new(tls_stream),
-                            tls_server_end_point,
-                        });
+                        stream = PqStream::new(raw.upgrade(tls.to_server_config()).await?);
                    }
                }
                _ => bail!(ERR_PROTO_VIOLATION),
@@ -885,7 +866,7 @@ pub async fn proxy_pass(
 /// Thin connection context.
 struct Client<'a, S> {
    /// The underlying libpq protocol stream.
-    stream: PqStream<Stream<S>>,
+    stream: PqStream<S>,
    /// Client credentials that we care about.
    creds: auth::BackendType<'a, auth::ClientCredentials<'a>>,
    /// KV-dictionary with PostgreSQL connection params.
@@ -899,7 +880,7 @@ struct Client<'a, S> {
 impl<'a, S> Client<'a, S> {
    /// Construct a new connection context.
    fn new(
-        stream: PqStream<Stream<S>>,
+        stream: PqStream<S>,
        creds: auth::BackendType<'a, auth::ClientCredentials<'a>>,
        params: &'a StartupMessageParams,
        session_id: uuid::Uuid,
--- a/proxy/src/proxy/tests.rs
+++ b/proxy/src/proxy/tests.rs
@@ -1,23 +1,19 @@
 //! A group of high-level tests for connection establishing logic and auth.
-
-mod mitm;
-
+//!
 use super::*;
 use crate::auth::backend::TestBackend;
 use crate::auth::ClientCredentials;
-use crate::config::CertResolver;
 use crate::console::{CachedNodeInfo, NodeInfo};
 use crate::{auth, http, sasl, scram};
 use async_trait::async_trait;
 use rstest::rstest;
 use tokio_postgres::config::SslMode;
 use tokio_postgres::tls::{MakeTlsConnect, NoTls};
-use tokio_postgres_rustls::{MakeRustlsConnect, RustlsStream};
+use tokio_postgres_rustls::MakeRustlsConnect;

 /// Generate a set of TLS certificates: CA + server.
 fn generate_certs(
    hostname: &str,
-    common_name: &str,
 ) -> anyhow::Result<(rustls::Certificate, rustls::Certificate, rustls::PrivateKey)> {
    let ca = rcgen::Certificate::from_params({
        let mut params = rcgen::CertificateParams::default();
@@ -25,15 +21,7 @@ fn generate_certs(
        params
    })?;

-    let cert = rcgen::Certificate::from_params({
-        let mut params = rcgen::CertificateParams::new(vec![hostname.into()]);
-        params.distinguished_name = rcgen::DistinguishedName::new();
-        params
-            .distinguished_name
-            .push(rcgen::DnType::CommonName, common_name);
-        params
-    })?;
-
+    let cert = rcgen::generate_simple_self_signed(vec![hostname.into()])?;
    Ok((
        rustls::Certificate(ca.serialize_der()?),
        rustls::Certificate(cert.serialize_der_with_signer(&ca)?),
@@ -49,14 +37,7 @@ struct ClientConfig<'a> {
 impl ClientConfig<'_> {
    fn make_tls_connect<S: AsyncRead + AsyncWrite + Unpin + Send + 'static>(
        self,
-    ) -> anyhow::Result<
-        impl tokio_postgres::tls::TlsConnect<
-            S,
-            Error = impl std::fmt::Debug,
-            Future = impl Send,
-            Stream = RustlsStream<S>,
-        >,
-    > {
+    ) -> anyhow::Result<impl tokio_postgres::tls::TlsConnect<S>> {
        let mut mk = MakeRustlsConnect::new(self.config);
        let tls = MakeTlsConnect::<S>::make_tls_connect(&mut mk, self.hostname)?;
        Ok(tls)
@@ -68,24 +49,20 @@ fn generate_tls_config<'a>(
    hostname: &'a str,
    common_name: &'a str,
 ) -> anyhow::Result<(ClientConfig<'a>, TlsConfig)> {
-    let (ca, cert, key) = generate_certs(hostname, common_name)?;
+    let (ca, cert, key) = generate_certs(hostname)?;

    let tls_config = {
        let config = rustls::ServerConfig::builder()
            .with_safe_defaults()
            .with_no_client_auth()
-            .with_single_cert(vec![cert.clone()], key.clone())?
+            .with_single_cert(vec![cert], key)?
            .into();

-        let mut cert_resolver = CertResolver::new();
-        cert_resolver.add_cert(key, vec![cert], true)?;
-
-        let common_names = Some(cert_resolver.get_common_names());
+        let common_names = Some([common_name.to_owned()].iter().cloned().collect());

        TlsConfig {
            config,
            common_names,
-            cert_resolver: Arc::new(cert_resolver),
        }
    };

@@ -276,7 +253,6 @@ async fn scram_auth_good(#[case] password: &str) -> anyhow::Result<()> {
    ));

    let (_client, _conn) = tokio_postgres::Config::new()
-        .channel_binding(tokio_postgres::config::ChannelBinding::Require)
        .user("user")
        .dbname("db")
        .password(password)
@@ -287,30 +263,6 @@ async fn scram_auth_good(#[case] password: &str) -> anyhow::Result<()> {
    proxy.await?
 }

-#[tokio::test]
-async fn scram_auth_disable_channel_binding() -> anyhow::Result<()> {
-    let (client, server) = tokio::io::duplex(1024);
-
-    let (client_config, server_config) =
-        generate_tls_config("generic-project-name.localhost", "localhost")?;
-    let proxy = tokio::spawn(dummy_proxy(
-        client,
-        Some(server_config),
-        Scram::new("password")?,
-    ));
-
-    let (_client, _conn) = tokio_postgres::Config::new()
-        .channel_binding(tokio_postgres::config::ChannelBinding::Disable)
-        .user("user")
-        .dbname("db")
-        .password("password")
-        .ssl_mode(SslMode::Require)
-        .connect_raw(server, client_config.make_tls_connect()?)
-        .await?;
-
-    proxy.await?
-}
-
 #[tokio::test]
 async fn scram_auth_mock() -> anyhow::Result<()> {
    let (client, server) = tokio::io::duplex(1024);
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Christian Schwarz	c4f7cab042	basebackup bench: debug-log basebackup size	2023-12-06 18:07:24 +00:00
Christian Schwarz	cfff331da3	WIP: implement tracing_chrome support for utils::logging	2023-12-06 18:07:24 +00:00
Christian Schwarz	658c20bea4	jwt support; debug spans in basebackup	2023-12-06 17:20:04 +00:00
Christian Schwarz	8a555f1cf3	basebackup bench: fixup copy-pasta of wip	2023-12-05 23:55:49 +00:00
Christian Schwarz	4f79b6d140	pagebench: fixup some accidental WIP thing from last week	2023-12-05 23:55:49 +00:00
Christian Schwarz	d6b7bc2abc	implement a basebackup benchmark	2023-12-05 19:59:51 +00:00
Christian Schwarz	4fc3596677	client & getpage bench: distinguish between page_service client and client in pagestream mode	2023-12-05 19:59:51 +00:00
Christian Schwarz	60cc3a3397	pagebench: restructure dir a bit	2023-12-05 19:59:51 +00:00
Christian Schwarz	cb3dcb06cf	cargo fmt	2023-12-05 19:59:40 +00:00
Christian Schwarz	d75470280f	fixup: scale factors in the python benchmark	2023-11-24 18:16:58 +00:00
Christian Schwarz	687678c4ff	a mode where one task picks which work to do & dispatches it to per-timeline clients	2023-11-24 18:01:55 +00:00
Christian Schwarz	59c8a29569	WIP: failed attempt to have fixed number of clients going over all the key ranges of all tenants The problem is that the connections are stateful, need to implement a client pool => sucks	2023-11-24 17:12:42 +00:00
Christian Schwarz	044e96ce50	fixup: few more perecentiles	2023-11-24 16:00:59 +00:00
Christian Schwarz	12a60cd914	parameters for i3en.3xlarge (need to add more modes to the benchmark, e.g., time based)	2023-11-24 15:40:29 +00:00
Christian Schwarz	9f36d19383	few more percentiles for the benchmark	2023-11-24 15:39:02 +00:00
Christian Schwarz	a0909a2b80	make the benchmarking script work again	2023-11-24 14:58:21 +00:00
Christian Schwarz	bd06672cdd	have one HdrHistogram per thread instead of one per task	2023-11-24 14:27:52 +00:00
Christian Schwarz	f1a714e465	Revert "WIP: figure out overhead of linear histogram" This reverts commit `dc914ef368`.	2023-11-24 14:01:07 +00:00
Christian Schwarz	dc914ef368	WIP: figure out overhead of linear histogram	2023-11-24 14:00:54 +00:00
Christian Schwarz	568f6ae332	per-task & global mean + percentiles using hdrhistogram known problem is: one hdrhistogram per task => too much memory usage	2023-11-24 12:35:22 +00:00
Christian Schwarz	857150dcee	CLI structure	2023-11-24 11:19:21 +00:00
Christian Schwarz	9d13d0015f	perftest: use new binary name	2023-11-24 11:06:03 +00:00
Christian Schwarz	281f05398e	further break up	2023-11-24 11:05:55 +00:00
Christian Schwarz	0bd5e3aedc	remove unnucessary return impl Future	2023-11-24 10:56:52 +00:00
Christian Schwarz	4f1197311e	break up client into library & cli	2023-11-24 10:55:54 +00:00
Christian Schwarz	dd5792e488	WIP use results	2023-11-24 10:18:05 +00:00
Christian Schwarz	135e37e5b2	implement the performance test in the Python test suite	2023-11-24 10:17:49 +00:00
Christian Schwarz	ccb9fe9b33	find a way to duplicate a tenant in local_fs Use the script like so, against the tenant to duplicate: poetry run python3 ./test_runner/duplicate_tenant.py 7ea51af32d42bfe7fb93bf5f28114d09 200 8 backup of pageserver.toml d =1 pg_distrib_dir ='/home/admin/neon-main/pg_install' http_auth_type ='Trust' pg_auth_type ='Trust' listen_http_addr ='127.0.0.1:9898' listen_pg_addr ='127.0.0.1:64000' broker_endpoint ='http://127.0.0.1:50051/' #control_plane_api ='http://127.0.0.1:1234/' # Initial configuration file created by 'pageserver --init' #listen_pg_addr = '127.0.0.1:64000' #listen_http_addr = '127.0.0.1:9898' #wait_lsn_timeout = '60 s' #wal_redo_timeout = '60 s' #max_file_descriptors = 10000 #page_cache_size = 160000 # initial superuser role name to use when creating a new tenant #initial_superuser_name = 'cloud_admin' #broker_endpoint = 'http://127.0.0.1:50051' #log_format = 'plain' #concurrent_tenant_size_logical_size_queries = '1' #metric_collection_interval = '10 min' #cached_metric_collection_interval = '0s' #synthetic_size_calculation_interval = '10 min' #disk_usage_based_eviction = { max_usage_pct = .., min_avail_bytes = .., period = "10s"} #background_task_maximum_delay = '10s' [tenant_config] #checkpoint_distance = 268435456 # in bytes #checkpoint_timeout = 10 m #compaction_target_size = 134217728 # in bytes #compaction_period = '20 s' #compaction_threshold = 10 #gc_period = '1 hr' #gc_horizon = 67108864 #image_creation_threshold = 3 #pitr_interval = '7 days' #min_resident_size_override = .. # in bytes #evictions_low_residence_duration_metric_threshold = '24 hour' #gc_feedback = false # make it determinsitic gc_period = '0s' checkpoint_timeout = '3650 day' compaction_period = '20 s' compaction_threshold = 10 compaction_target_size = 134217728 checkpoint_distance = 268435456 image_creation_threshold = 3 [remote_storage] local_path = '/home/admin/neon-main/bench_repo_dir/repo/remote_storage_local_fs' remove http handler switch to generalized rewrite_summary & impl page_ctl subcommand to use it WIP: change duplicate_tenant.py script to use the pagectl command The script works but at restart, we detach the created tenants because they're not known to the attachment service: Detaching tenant, control plane omitted it in re-attach response tenant_id=1e399d390e3aee6b11c701cbc716bb6c => figure out how to further integrate this	2023-11-24 10:17:49 +00:00
Christian Schwarz	1b81640290	random getpage benchmark	2023-11-24 10:17:49 +00:00