tests: explicit wait for pageserver LSN in check_restored_datadir_content

Reapply "tests: try to make restored-datadir comparison tests not flaky (#6666 )"
This reverts commit 250686de08.
2026-02-11 06:30:37 +00:00 · 2024-02-14 10:57:37 +00:00 · 2024-02-14 10:57:37 +00:00
92 changed files with 2517 additions and 3553 deletions
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -253,7 +253,7 @@ jobs:
          done

          if [ "${FAILED}" = "true" ]; then
-            echo >&2 "Please update vendor/revisions.json if these changes are intentional"
+            echo >&2 "Please update vendors/revisions.json if these changes are intentional"
            exit 1
          fi

--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4436,7 +4436,6 @@ dependencies = [
 "futures",
 "futures-util",
 "http-types",
- "humantime",
 "hyper",
 "itertools",
 "metrics",
@@ -4448,7 +4447,6 @@ dependencies = [
 "serde_json",
 "test-context",
 "tokio",
- "tokio-stream",
 "tokio-util",
 "toml_edit",
 "tracing",
--- a/2
+++ b/2
@@ -47,7 +47,7 @@ COPY --chown=nonroot . .
 # Show build caching stats to check if it was used in the end.
 # Has to be the part of the same RUN since cachepot daemon is killed in the end of this RUN, losing the compilation stats.
 RUN set -e \
-    && RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment" cargo build  \
+    && mold -run cargo build  \
      --bin pg_sni_router  \
      --bin pageserver  \
      --bin pagectl  \
--- a/Dockerfile.compute-node
+++ b/Dockerfile.compute-node
@@ -72,6 +72,703 @@ RUN cd postgres && \
        fi; \
    done

+#########################################################################################
+#
+# Layer "postgis-build"
+# Build PostGIS from the upstream PostGIS mirror.
+#
+#########################################################################################
+FROM build-deps AS postgis-build
+COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
+RUN apt update && \
+    apt install -y cmake gdal-bin libboost-dev libboost-thread-dev libboost-filesystem-dev \
+    libboost-system-dev libboost-iostreams-dev libboost-program-options-dev libboost-timer-dev \
+    libcgal-dev libgdal-dev libgmp-dev libmpfr-dev libopenscenegraph-dev libprotobuf-c-dev \
+    protobuf-c-compiler xsltproc
+
+# SFCGAL > 1.3 requires CGAL > 5.2, Bullseye's libcgal-dev is 5.2
+RUN wget https://gitlab.com/Oslandia/SFCGAL/-/archive/v1.3.10/SFCGAL-v1.3.10.tar.gz -O SFCGAL.tar.gz && \
+    echo "4e39b3b2adada6254a7bdba6d297bb28e1a9835a9f879b74f37e2dab70203232 SFCGAL.tar.gz" | sha256sum --check && \
+    mkdir sfcgal-src && cd sfcgal-src && tar xvzf ../SFCGAL.tar.gz --strip-components=1 -C . && \
+    cmake -DCMAKE_BUILD_TYPE=Release . && make -j $(getconf _NPROCESSORS_ONLN) && \
+    DESTDIR=/sfcgal make install -j $(getconf _NPROCESSORS_ONLN) && \
+    make clean && cp -R /sfcgal/* /
+
+ENV PATH "/usr/local/pgsql/bin:$PATH"
+
+RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.3.tar.gz -O postgis.tar.gz && \
+    echo "74eb356e3f85f14233791013360881b6748f78081cc688ff9d6f0f673a762d13 postgis.tar.gz" | sha256sum --check && \
+    mkdir postgis-src && cd postgis-src && tar xvzf ../postgis.tar.gz --strip-components=1 -C . && \
+    find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /before.txt &&\
+    ./autogen.sh && \
+    ./configure --with-sfcgal=/usr/local/bin/sfcgal-config && \
+    make -j $(getconf _NPROCESSORS_ONLN) install && \
+    cd extensions/postgis && \
+    make clean && \
+    make -j $(getconf _NPROCESSORS_ONLN) install && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis.control && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_raster.control && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_sfcgal.control && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_tiger_geocoder.control && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_topology.control && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/address_standardizer.control && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/address_standardizer_data_us.control && \
+    mkdir -p /extensions/postgis && \
+    cp /usr/local/pgsql/share/extension/postgis.control /extensions/postgis && \
+    cp /usr/local/pgsql/share/extension/postgis_raster.control /extensions/postgis && \
+    cp /usr/local/pgsql/share/extension/postgis_sfcgal.control /extensions/postgis && \
+    cp /usr/local/pgsql/share/extension/postgis_tiger_geocoder.control /extensions/postgis && \
+    cp /usr/local/pgsql/share/extension/postgis_topology.control /extensions/postgis && \
+    cp /usr/local/pgsql/share/extension/address_standardizer.control /extensions/postgis && \
+    cp /usr/local/pgsql/share/extension/address_standardizer_data_us.control /extensions/postgis
+
+RUN wget https://github.com/pgRouting/pgrouting/archive/v3.4.2.tar.gz -O pgrouting.tar.gz && \
+    echo "cac297c07d34460887c4f3b522b35c470138760fe358e351ad1db4edb6ee306e pgrouting.tar.gz" | sha256sum --check && \
+    mkdir pgrouting-src && cd pgrouting-src && tar xvzf ../pgrouting.tar.gz --strip-components=1 -C . && \
+    mkdir build && cd build && \
+    cmake -DCMAKE_BUILD_TYPE=Release .. && \
+    make -j $(getconf _NPROCESSORS_ONLN) && \
+    make -j $(getconf _NPROCESSORS_ONLN) install && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgrouting.control && \
+    find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /after.txt &&\
+    cp /usr/local/pgsql/share/extension/pgrouting.control /extensions/postgis && \
+    sort -o /before.txt /before.txt && sort -o /after.txt /after.txt && \
+    comm -13 /before.txt /after.txt | tar --directory=/usr/local/pgsql --zstd -cf /extensions/postgis.tar.zst -T -
+
+#########################################################################################
+#
+# Layer "plv8-build"
+# Build plv8
+#
+#########################################################################################
+FROM build-deps AS plv8-build
+COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
+
+RUN apt update && \
+    apt install -y ninja-build python3-dev libncurses5 binutils clang
+
+RUN wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.10.tar.gz -O plv8.tar.gz && \
+    echo "7096c3290928561f0d4901b7a52794295dc47f6303102fae3f8e42dd575ad97d plv8.tar.gz" | sha256sum --check && \
+    mkdir plv8-src && cd plv8-src && tar xvzf ../plv8.tar.gz --strip-components=1 -C . && \
+    # generate and copy upgrade scripts
+    mkdir -p upgrade && ./generate_upgrade.sh 3.1.10 && \
+    cp upgrade/* /usr/local/pgsql/share/extension/ && \
+    export PATH="/usr/local/pgsql/bin:$PATH" && \
+    make DOCKER=1 -j $(getconf _NPROCESSORS_ONLN) install && \
+    rm -rf /plv8-* && \
+    find /usr/local/pgsql/ -name "plv8-*.so" | xargs strip && \
+    # don't break computes with installed old version of plv8
+    cd /usr/local/pgsql/lib/ && \
+    ln -s plv8-3.1.10.so plv8-3.1.5.so && \
+    ln -s plv8-3.1.10.so plv8-3.1.8.so && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/plv8.control && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/plcoffee.control && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/plls.control
+
+#########################################################################################
+#
+# Layer "h3-pg-build"
+# Build h3_pg
+#
+#########################################################################################
+FROM build-deps AS h3-pg-build
+COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
+
+RUN case "$(uname -m)" in \
+      "x86_64") \
+        export CMAKE_CHECKSUM=739d372726cb23129d57a539ce1432453448816e345e1545f6127296926b6754 \
+        ;; \
+      "aarch64") \
+        export CMAKE_CHECKSUM=281b42627c9a1beed03e29706574d04c6c53fae4994472e90985ef018dd29c02 \
+        ;; \
+      *) \
+        echo "Unsupported architecture '$(uname -m)'. Supported are x86_64 and aarch64" && exit 1 \
+        ;; \
+    esac && \
+    wget https://github.com/Kitware/CMake/releases/download/v3.24.2/cmake-3.24.2-linux-$(uname -m).sh \
+      -q -O /tmp/cmake-install.sh \
+      && echo "${CMAKE_CHECKSUM} /tmp/cmake-install.sh" | sha256sum --check \
+      && chmod u+x /tmp/cmake-install.sh \
+      && /tmp/cmake-install.sh --skip-license --prefix=/usr/local/ \
+      && rm /tmp/cmake-install.sh
+
+RUN wget https://github.com/uber/h3/archive/refs/tags/v4.1.0.tar.gz -O h3.tar.gz && \
+    echo "ec99f1f5974846bde64f4513cf8d2ea1b8d172d2218ab41803bf6a63532272bc h3.tar.gz" | sha256sum --check && \
+    mkdir h3-src && cd h3-src && tar xvzf ../h3.tar.gz --strip-components=1 -C . && \
+    mkdir build && cd build && \
+    cmake .. -DCMAKE_BUILD_TYPE=Release && \
+    make -j $(getconf _NPROCESSORS_ONLN) && \
+    DESTDIR=/h3 make install && \
+    cp -R /h3/usr / && \
+    rm -rf build
+
+RUN wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.1.3.tar.gz -O h3-pg.tar.gz && \
+    echo "5c17f09a820859ffe949f847bebf1be98511fb8f1bd86f94932512c00479e324 h3-pg.tar.gz" | sha256sum --check && \
+    mkdir h3-pg-src && cd h3-pg-src && tar xvzf ../h3-pg.tar.gz --strip-components=1 -C . && \
+    export PATH="/usr/local/pgsql/bin:$PATH" && \
+    make -j $(getconf _NPROCESSORS_ONLN) && \
+    make -j $(getconf _NPROCESSORS_ONLN) install && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/h3.control && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/h3_postgis.control
+
+#########################################################################################
+#
+# Layer "unit-pg-build"
+# compile unit extension
+#
+#########################################################################################
+FROM build-deps AS unit-pg-build
+COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
+
+RUN wget https://github.com/df7cb/postgresql-unit/archive/refs/tags/7.7.tar.gz -O postgresql-unit.tar.gz && \
+    echo "411d05beeb97e5a4abf17572bfcfbb5a68d98d1018918feff995f6ee3bb03e79 postgresql-unit.tar.gz" | sha256sum --check && \
+    mkdir postgresql-unit-src && cd postgresql-unit-src && tar xvzf ../postgresql-unit.tar.gz --strip-components=1 -C . && \
+    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
+    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
+    # unit extension's "create extension" script relies on absolute install path to fill some reference tables.
+    # We move the extension from '/usr/local/pgsql/' to '/usr/local/'  after it is build. So we need to adjust the path.
+    # This one-liner removes pgsql/ part of the path.
+    # NOTE: Other extensions that rely on MODULEDIR variable after building phase will need the same fix.
+    find /usr/local/pgsql/share/extension/ -name "unit*.sql" -print0 | xargs -0 sed -i "s|pgsql/||g" && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/unit.control
+
+#########################################################################################
+#
+# Layer "vector-pg-build"
+# compile pgvector extension
+#
+#########################################################################################
+FROM build-deps AS vector-pg-build
+COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
+
+RUN wget https://github.com/pgvector/pgvector/archive/refs/tags/v0.5.1.tar.gz -O pgvector.tar.gz && \
+    echo "cc7a8e034a96e30a819911ac79d32f6bc47bdd1aa2de4d7d4904e26b83209dc8 pgvector.tar.gz" | sha256sum --check && \
+    mkdir pgvector-src && cd pgvector-src && tar xvzf ../pgvector.tar.gz --strip-components=1 -C . && \
+    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
+    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/vector.control
+
+#########################################################################################
+#
+# Layer "pgjwt-pg-build"
+# compile pgjwt extension
+#
+#########################################################################################
+FROM build-deps AS pgjwt-pg-build
+COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
+
+# 9742dab1b2f297ad3811120db7b21451bca2d3c9 made on 13/11/2021
+RUN wget https://github.com/michelp/pgjwt/archive/9742dab1b2f297ad3811120db7b21451bca2d3c9.tar.gz -O pgjwt.tar.gz && \
+    echo "cfdefb15007286f67d3d45510f04a6a7a495004be5b3aecb12cda667e774203f pgjwt.tar.gz" | sha256sum --check && \
+    mkdir pgjwt-src && cd pgjwt-src && tar xvzf ../pgjwt.tar.gz --strip-components=1 -C . && \
+    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgjwt.control
+
+#########################################################################################
+#
+# Layer "hypopg-pg-build"
+# compile hypopg extension
+#
+#########################################################################################
+FROM build-deps AS hypopg-pg-build
+COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
+
+RUN wget https://github.com/HypoPG/hypopg/archive/refs/tags/1.4.0.tar.gz -O hypopg.tar.gz && \
+    echo "0821011743083226fc9b813c1f2ef5897a91901b57b6bea85a78e466187c6819 hypopg.tar.gz" | sha256sum --check && \
+    mkdir hypopg-src && cd hypopg-src && tar xvzf ../hypopg.tar.gz --strip-components=1 -C . && \
+    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
+    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/hypopg.control
+
+#########################################################################################
+#
+# Layer "pg-hashids-pg-build"
+# compile pg_hashids extension
+#
+#########################################################################################
+FROM build-deps AS pg-hashids-pg-build
+COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
+
+RUN wget https://github.com/iCyberon/pg_hashids/archive/refs/tags/v1.2.1.tar.gz -O pg_hashids.tar.gz && \
+    echo "74576b992d9277c92196dd8d816baa2cc2d8046fe102f3dcd7f3c3febed6822a pg_hashids.tar.gz" | sha256sum --check && \
+    mkdir pg_hashids-src && cd pg_hashids-src && tar xvzf ../pg_hashids.tar.gz --strip-components=1 -C . && \
+    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
+    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_hashids.control
+
+#########################################################################################
+#
+# Layer "rum-pg-build"
+# compile rum extension
+#
+#########################################################################################
+FROM build-deps AS rum-pg-build
+COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
+
+RUN wget https://github.com/postgrespro/rum/archive/refs/tags/1.3.13.tar.gz -O rum.tar.gz && \
+    echo "6ab370532c965568df6210bd844ac6ba649f53055e48243525b0b7e5c4d69a7d rum.tar.gz" | sha256sum --check && \
+    mkdir rum-src && cd rum-src && tar xvzf ../rum.tar.gz --strip-components=1 -C . && \
+    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
+    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/rum.control
+
+#########################################################################################
+#
+# Layer "pgtap-pg-build"
+# compile pgTAP extension
+#
+#########################################################################################
+FROM build-deps AS pgtap-pg-build
+COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
+
+RUN wget https://github.com/theory/pgtap/archive/refs/tags/v1.2.0.tar.gz -O pgtap.tar.gz && \
+    echo "9c7c3de67ea41638e14f06da5da57bac6f5bd03fea05c165a0ec862205a5c052 pgtap.tar.gz" | sha256sum --check && \
+    mkdir pgtap-src && cd pgtap-src && tar xvzf ../pgtap.tar.gz --strip-components=1 -C . && \
+    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
+    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgtap.control
+
+#########################################################################################
+#
+# Layer "ip4r-pg-build"
+# compile ip4r extension
+#
+#########################################################################################
+FROM build-deps AS ip4r-pg-build
+COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
+
+RUN wget https://github.com/RhodiumToad/ip4r/archive/refs/tags/2.4.2.tar.gz -O ip4r.tar.gz && \
+    echo "0f7b1f159974f49a47842a8ab6751aecca1ed1142b6d5e38d81b064b2ead1b4b ip4r.tar.gz" | sha256sum --check && \
+    mkdir ip4r-src && cd ip4r-src && tar xvzf ../ip4r.tar.gz --strip-components=1 -C . && \
+    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
+    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/ip4r.control
+
+#########################################################################################
+#
+# Layer "prefix-pg-build"
+# compile Prefix extension
+#
+#########################################################################################
+FROM build-deps AS prefix-pg-build
+COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
+
+RUN wget https://github.com/dimitri/prefix/archive/refs/tags/v1.2.10.tar.gz -O prefix.tar.gz && \
+    echo "4342f251432a5f6fb05b8597139d3ccde8dcf87e8ca1498e7ee931ca057a8575 prefix.tar.gz" | sha256sum --check && \
+    mkdir prefix-src && cd prefix-src && tar xvzf ../prefix.tar.gz --strip-components=1 -C . && \
+    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
+    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/prefix.control
+
+#########################################################################################
+#
+# Layer "hll-pg-build"
+# compile hll extension
+#
+#########################################################################################
+FROM build-deps AS hll-pg-build
+COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
+
+RUN wget https://github.com/citusdata/postgresql-hll/archive/refs/tags/v2.18.tar.gz -O hll.tar.gz && \
+    echo "e2f55a6f4c4ab95ee4f1b4a2b73280258c5136b161fe9d059559556079694f0e hll.tar.gz" | sha256sum --check && \
+    mkdir hll-src && cd hll-src && tar xvzf ../hll.tar.gz --strip-components=1 -C . && \
+    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
+    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/hll.control
+
+#########################################################################################
+#
+# Layer "plpgsql-check-pg-build"
+# compile plpgsql_check extension
+#
+#########################################################################################
+FROM build-deps AS plpgsql-check-pg-build
+COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
+
+RUN wget https://github.com/okbob/plpgsql_check/archive/refs/tags/v2.5.3.tar.gz -O plpgsql_check.tar.gz && \
+    echo "6631ec3e7fb3769eaaf56e3dfedb829aa761abf163d13dba354b4c218508e1c0 plpgsql_check.tar.gz" | sha256sum --check && \
+    mkdir plpgsql_check-src && cd plpgsql_check-src && tar xvzf ../plpgsql_check.tar.gz --strip-components=1 -C . && \
+    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
+    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/plpgsql_check.control
+
+#########################################################################################
+#
+# Layer "timescaledb-pg-build"
+# compile timescaledb extension
+#
+#########################################################################################
+FROM build-deps AS timescaledb-pg-build
+COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
+
+ARG PG_VERSION
+ENV PATH "/usr/local/pgsql/bin:$PATH"
+
+RUN case "${PG_VERSION}" in \
+      "v14" | "v15") \
+        export TIMESCALEDB_VERSION=2.10.1 \
+        export TIMESCALEDB_CHECKSUM=6fca72a6ed0f6d32d2b3523951ede73dc5f9b0077b38450a029a5f411fdb8c73 \
+        ;; \
+      *) \
+        export TIMESCALEDB_VERSION=2.13.0 \
+        export TIMESCALEDB_CHECKSUM=584a351c7775f0e067eaa0e7277ea88cab9077cc4c455cbbf09a5d9723dce95d \
+        ;; \
+    esac && \
+    apt-get update && \
+    apt-get install -y cmake && \
+    wget https://github.com/timescale/timescaledb/archive/refs/tags/${TIMESCALEDB_VERSION}.tar.gz -O timescaledb.tar.gz && \
+    echo "${TIMESCALEDB_CHECKSUM} timescaledb.tar.gz" | sha256sum --check && \
+    mkdir timescaledb-src && cd timescaledb-src && tar xvzf ../timescaledb.tar.gz --strip-components=1 -C . && \
+    ./bootstrap -DSEND_TELEMETRY_DEFAULT:BOOL=OFF -DUSE_TELEMETRY:BOOL=OFF -DAPACHE_ONLY:BOOL=ON -DCMAKE_BUILD_TYPE=Release && \
+    cd build && \
+    make -j $(getconf _NPROCESSORS_ONLN) && \
+    make install -j $(getconf _NPROCESSORS_ONLN) && \
+    echo "trusted = true" >> /usr/local/pgsql/share/extension/timescaledb.control
+
+#########################################################################################
+#
+# Layer "pg-hint-plan-pg-build"
+# compile pg_hint_plan extension
+#
+#########################################################################################
+FROM build-deps AS pg-hint-plan-pg-build
+COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
+
+ARG PG_VERSION
+ENV PATH "/usr/local/pgsql/bin:$PATH"
+
+RUN case "${PG_VERSION}" in \
+      "v14") \
+        export PG_HINT_PLAN_VERSION=14_1_4_1 \
+        export PG_HINT_PLAN_CHECKSUM=c3501becf70ead27f70626bce80ea401ceac6a77e2083ee5f3ff1f1444ec1ad1 \
+        ;; \
+      "v15") \
+        export PG_HINT_PLAN_VERSION=15_1_5_0 \
+        export PG_HINT_PLAN_CHECKSUM=564cbbf4820973ffece63fbf76e3c0af62c4ab23543142c7caaa682bc48918be \
+        ;; \
+      "v16") \
+        export PG_HINT_PLAN_VERSION=16_1_6_0 \
+        export PG_HINT_PLAN_CHECKSUM=fc85a9212e7d2819d4ae4ac75817481101833c3cfa9f0fe1f980984e12347d00 \
+        ;; \
+      *) \
+        echo "Export the valid PG_HINT_PLAN_VERSION variable" && exit 1 \
+        ;; \
+    esac && \
+    wget https://github.com/ossc-db/pg_hint_plan/archive/refs/tags/REL${PG_HINT_PLAN_VERSION}.tar.gz -O pg_hint_plan.tar.gz && \
+    echo "${PG_HINT_PLAN_CHECKSUM} pg_hint_plan.tar.gz" | sha256sum --check && \
+    mkdir pg_hint_plan-src && cd pg_hint_plan-src && tar xvzf ../pg_hint_plan.tar.gz --strip-components=1 -C . && \
+    make -j $(getconf _NPROCESSORS_ONLN) && \
+    make install -j $(getconf _NPROCESSORS_ONLN) && \
+    echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_hint_plan.control
+
+#########################################################################################
+#
+# Layer "kq-imcx-pg-build"
+# compile kq_imcx extension
+#
+#########################################################################################
+FROM build-deps AS kq-imcx-pg-build
+COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
+
+ENV PATH "/usr/local/pgsql/bin/:$PATH"
+RUN apt-get update && \
+    apt-get install -y git libgtk2.0-dev libpq-dev libpam-dev libxslt-dev libkrb5-dev cmake && \
+    wget https://github.com/ketteq-neon/postgres-exts/archive/e0bd1a9d9313d7120c1b9c7bb15c48c0dede4c4e.tar.gz -O kq_imcx.tar.gz && \
+    echo "dc93a97ff32d152d32737ba7e196d9687041cda15e58ab31344c2f2de8855336 kq_imcx.tar.gz" | sha256sum --check && \
+    mkdir kq_imcx-src && cd kq_imcx-src && tar xvzf ../kq_imcx.tar.gz --strip-components=1 -C . && \
+    find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /before.txt &&\
+    mkdir build && cd build && \
+    cmake -DCMAKE_BUILD_TYPE=Release .. && \
+    make -j $(getconf _NPROCESSORS_ONLN) && \
+    make -j $(getconf _NPROCESSORS_ONLN) install && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/kq_imcx.control && \
+    find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /after.txt &&\
+    mkdir -p /extensions/kq_imcx && cp /usr/local/pgsql/share/extension/kq_imcx.control /extensions/kq_imcx && \
+    sort -o /before.txt /before.txt && sort -o /after.txt /after.txt && \
+    comm -13 /before.txt /after.txt | tar --directory=/usr/local/pgsql --zstd -cf /extensions/kq_imcx.tar.zst -T -
+
+#########################################################################################
+#
+# Layer "pg-cron-pg-build"
+# compile pg_cron extension
+#
+#########################################################################################
+FROM build-deps AS pg-cron-pg-build
+COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
+
+ENV PATH "/usr/local/pgsql/bin/:$PATH"
+RUN wget https://github.com/citusdata/pg_cron/archive/refs/tags/v1.6.0.tar.gz -O pg_cron.tar.gz && \
+    echo "383a627867d730222c272bfd25cd5e151c578d73f696d32910c7db8c665cc7db pg_cron.tar.gz" | sha256sum --check && \
+    mkdir pg_cron-src && cd pg_cron-src && tar xvzf ../pg_cron.tar.gz --strip-components=1 -C . && \
+    make -j $(getconf _NPROCESSORS_ONLN) && \
+    make -j $(getconf _NPROCESSORS_ONLN) install && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_cron.control
+
+#########################################################################################
+#
+# Layer "rdkit-pg-build"
+# compile rdkit extension
+#
+#########################################################################################
+FROM build-deps AS rdkit-pg-build
+COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
+
+RUN apt-get update && \
+    apt-get install -y \
+        cmake \
+        libboost-iostreams1.74-dev \
+        libboost-regex1.74-dev \
+        libboost-serialization1.74-dev \
+        libboost-system1.74-dev \
+        libeigen3-dev
+
+ENV PATH "/usr/local/pgsql/bin/:/usr/local/pgsql/:$PATH"
+RUN wget https://github.com/rdkit/rdkit/archive/refs/tags/Release_2023_03_3.tar.gz -O rdkit.tar.gz && \
+    echo "bdbf9a2e6988526bfeb8c56ce3cdfe2998d60ac289078e2215374288185e8c8d rdkit.tar.gz" | sha256sum --check && \
+    mkdir rdkit-src && cd rdkit-src && tar xvzf ../rdkit.tar.gz --strip-components=1 -C . && \
+    cmake \
+        -D RDK_BUILD_CAIRO_SUPPORT=OFF \
+        -D RDK_BUILD_INCHI_SUPPORT=ON \
+        -D RDK_BUILD_AVALON_SUPPORT=ON \
+        -D RDK_BUILD_PYTHON_WRAPPERS=OFF \
+        -D RDK_BUILD_DESCRIPTORS3D=OFF \
+        -D RDK_BUILD_FREESASA_SUPPORT=OFF \
+        -D RDK_BUILD_COORDGEN_SUPPORT=ON \
+        -D RDK_BUILD_MOLINTERCHANGE_SUPPORT=OFF \
+        -D RDK_BUILD_YAEHMOP_SUPPORT=OFF \
+        -D RDK_BUILD_STRUCTCHECKER_SUPPORT=OFF \
+        -D RDK_USE_URF=OFF \
+        -D RDK_BUILD_PGSQL=ON \
+        -D RDK_PGSQL_STATIC=ON \
+        -D PostgreSQL_CONFIG=pg_config \
+        -D PostgreSQL_INCLUDE_DIR=`pg_config --includedir` \
+        -D PostgreSQL_TYPE_INCLUDE_DIR=`pg_config --includedir-server` \
+        -D PostgreSQL_LIBRARY_DIR=`pg_config --libdir` \
+        -D RDK_INSTALL_INTREE=OFF \
+        -D RDK_INSTALL_COMIC_FONTS=OFF \
+        -D RDK_BUILD_FREETYPE_SUPPORT=OFF \
+        -D CMAKE_BUILD_TYPE=Release \
+        . && \
+    make -j $(getconf _NPROCESSORS_ONLN) && \
+    make -j $(getconf _NPROCESSORS_ONLN) install && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/rdkit.control
+
+#########################################################################################
+#
+# Layer "pg-uuidv7-pg-build"
+# compile pg_uuidv7 extension
+#
+#########################################################################################
+FROM build-deps AS pg-uuidv7-pg-build
+COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
+
+ENV PATH "/usr/local/pgsql/bin/:$PATH"
+RUN wget https://github.com/fboulnois/pg_uuidv7/archive/refs/tags/v1.0.1.tar.gz -O pg_uuidv7.tar.gz && \
+    echo "0d0759ab01b7fb23851ecffb0bce27822e1868a4a5819bfd276101c716637a7a pg_uuidv7.tar.gz" | sha256sum --check && \
+    mkdir pg_uuidv7-src && cd pg_uuidv7-src && tar xvzf ../pg_uuidv7.tar.gz --strip-components=1 -C . && \
+    make -j $(getconf _NPROCESSORS_ONLN) && \
+    make -j $(getconf _NPROCESSORS_ONLN) install && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_uuidv7.control
+
+#########################################################################################
+#
+# Layer "pg-roaringbitmap-pg-build"
+# compile pg_roaringbitmap extension
+#
+#########################################################################################
+FROM build-deps AS pg-roaringbitmap-pg-build
+COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
+
+ENV PATH "/usr/local/pgsql/bin/:$PATH"
+RUN wget https://github.com/ChenHuajun/pg_roaringbitmap/archive/refs/tags/v0.5.4.tar.gz -O pg_roaringbitmap.tar.gz && \
+    echo "b75201efcb1c2d1b014ec4ae6a22769cc7a224e6e406a587f5784a37b6b5a2aa pg_roaringbitmap.tar.gz" | sha256sum --check && \
+    mkdir pg_roaringbitmap-src && cd pg_roaringbitmap-src && tar xvzf ../pg_roaringbitmap.tar.gz --strip-components=1 -C . && \
+    make -j $(getconf _NPROCESSORS_ONLN) && \
+    make -j $(getconf _NPROCESSORS_ONLN) install && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/roaringbitmap.control
+
+#########################################################################################
+#
+# Layer "pg-semver-pg-build"
+# compile pg_semver extension
+#
+#########################################################################################
+FROM build-deps AS pg-semver-pg-build
+COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
+
+ENV PATH "/usr/local/pgsql/bin/:$PATH"
+RUN wget https://github.com/theory/pg-semver/archive/refs/tags/v0.32.1.tar.gz -O pg_semver.tar.gz && \
+    echo "fbdaf7512026d62eec03fad8687c15ed509b6ba395bff140acd63d2e4fbe25d7 pg_semver.tar.gz" | sha256sum --check && \
+    mkdir pg_semver-src && cd pg_semver-src && tar xvzf ../pg_semver.tar.gz --strip-components=1 -C . && \
+    make -j $(getconf _NPROCESSORS_ONLN) && \
+    make -j $(getconf _NPROCESSORS_ONLN) install && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/semver.control
+
+#########################################################################################
+#
+# Layer "pg-embedding-pg-build"
+# compile pg_embedding extension
+#
+#########################################################################################
+FROM build-deps AS pg-embedding-pg-build
+COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
+
+ARG PG_VERSION
+ENV PATH "/usr/local/pgsql/bin/:$PATH"
+RUN case "${PG_VERSION}" in \
+      "v14" | "v15") \
+        export PG_EMBEDDING_VERSION=0.3.5 \
+        export PG_EMBEDDING_CHECKSUM=0e95b27b8b6196e2cf0a0c9ec143fe2219b82e54c5bb4ee064e76398cbe69ae9 \
+        ;; \
+      *) \
+        echo "pg_embedding not supported on this PostgreSQL version. Use pgvector instead." && exit 0;; \
+    esac && \
+    wget https://github.com/neondatabase/pg_embedding/archive/refs/tags/${PG_EMBEDDING_VERSION}.tar.gz -O pg_embedding.tar.gz && \
+    echo "${PG_EMBEDDING_CHECKSUM} pg_embedding.tar.gz" | sha256sum --check && \
+    mkdir pg_embedding-src && cd pg_embedding-src && tar xvzf ../pg_embedding.tar.gz --strip-components=1 -C . && \
+    make -j $(getconf _NPROCESSORS_ONLN) && \
+    make -j $(getconf _NPROCESSORS_ONLN) install
+
+#########################################################################################
+#
+# Layer "pg-anon-pg-build"
+# compile anon extension
+#
+#########################################################################################
+FROM build-deps AS pg-anon-pg-build
+COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
+
+ENV PATH "/usr/local/pgsql/bin/:$PATH"
+RUN wget  https://github.com/neondatabase/postgresql_anonymizer/archive/refs/tags/neon_1.1.1.tar.gz -O pg_anon.tar.gz && \
+    echo "321ea8d5c1648880aafde850a2c576e4a9e7b9933a34ce272efc839328999fa9  pg_anon.tar.gz" | sha256sum --check && \
+    mkdir pg_anon-src && cd pg_anon-src && tar xvzf ../pg_anon.tar.gz --strip-components=1 -C . && \
+    find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /before.txt &&\
+    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/anon.control && \
+    find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /after.txt &&\
+    mkdir -p /extensions/anon && cp /usr/local/pgsql/share/extension/anon.control /extensions/anon && \
+    sort -o /before.txt /before.txt && sort -o /after.txt /after.txt && \
+    comm -13 /before.txt /after.txt | tar --directory=/usr/local/pgsql --zstd -cf /extensions/anon.tar.zst -T -
+
+#########################################################################################
+#
+# Layer "rust extensions"
+# This layer is used to build `pgrx` deps
+#
+#########################################################################################
+FROM build-deps AS rust-extensions-build
+COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
+
+RUN apt-get update && \
+    apt-get install -y curl libclang-dev cmake && \
+    useradd -ms /bin/bash nonroot -b /home
+
+ENV HOME=/home/nonroot
+ENV PATH="/home/nonroot/.cargo/bin:/usr/local/pgsql/bin/:$PATH"
+USER nonroot
+WORKDIR /home/nonroot
+ARG PG_VERSION
+
+RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && \
+    chmod +x rustup-init && \
+    ./rustup-init -y --no-modify-path --profile minimal --default-toolchain stable && \
+    rm rustup-init && \
+    cargo install --locked --version 0.10.2 cargo-pgrx && \
+    /bin/bash -c 'cargo pgrx init --pg${PG_VERSION:1}=/usr/local/pgsql/bin/pg_config'
+
+USER root
+
+#########################################################################################
+#
+# Layer "pg-jsonschema-pg-build"
+# Compile "pg_jsonschema" extension
+#
+#########################################################################################
+
+FROM rust-extensions-build AS pg-jsonschema-pg-build
+ARG PG_VERSION
+
+RUN wget https://github.com/supabase/pg_jsonschema/archive/refs/tags/v0.2.0.tar.gz -O pg_jsonschema.tar.gz && \
+    echo "9118fc508a6e231e7a39acaa6f066fcd79af17a5db757b47d2eefbe14f7794f0 pg_jsonschema.tar.gz" | sha256sum --check && \
+    mkdir pg_jsonschema-src && cd pg_jsonschema-src && tar xvzf ../pg_jsonschema.tar.gz --strip-components=1 -C . && \
+    sed -i 's/pgrx = "0.10.2"/pgrx = { version = "0.10.2", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
+    cargo pgrx install --release && \
+    echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_jsonschema.control
+
+#########################################################################################
+#
+# Layer "pg-graphql-pg-build"
+# Compile "pg_graphql" extension
+#
+#########################################################################################
+
+FROM rust-extensions-build AS pg-graphql-pg-build
+ARG PG_VERSION
+
+RUN wget https://github.com/supabase/pg_graphql/archive/refs/tags/v1.4.0.tar.gz -O pg_graphql.tar.gz && \
+    echo "bd8dc7230282b3efa9ae5baf053a54151ed0e66881c7c53750e2d0c765776edc pg_graphql.tar.gz" | sha256sum --check && \
+    mkdir pg_graphql-src && cd pg_graphql-src && tar xvzf ../pg_graphql.tar.gz --strip-components=1 -C . && \
+    sed -i 's/pgrx = "=0.10.2"/pgrx = { version = "0.10.2", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
+    cargo pgrx install --release && \
+    # it's needed to enable extension because it uses untrusted C language
+    sed -i 's/superuser = false/superuser = true/g' /usr/local/pgsql/share/extension/pg_graphql.control && \
+    echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_graphql.control
+
+#########################################################################################
+#
+# Layer "pg-tiktoken-build"
+# Compile "pg_tiktoken" extension
+#
+#########################################################################################
+
+FROM rust-extensions-build AS pg-tiktoken-pg-build
+ARG PG_VERSION
+
+# 26806147b17b60763039c6a6878884c41a262318 made on 26/09/2023
+RUN wget https://github.com/kelvich/pg_tiktoken/archive/26806147b17b60763039c6a6878884c41a262318.tar.gz -O pg_tiktoken.tar.gz && \
+    echo "e64e55aaa38c259512d3e27c572da22c4637418cf124caba904cd50944e5004e pg_tiktoken.tar.gz" | sha256sum --check && \
+    mkdir pg_tiktoken-src && cd pg_tiktoken-src && tar xvzf ../pg_tiktoken.tar.gz --strip-components=1 -C . && \
+    cargo pgrx install --release && \
+    echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_tiktoken.control
+
+#########################################################################################
+#
+# Layer "pg-pgx-ulid-build"
+# Compile "pgx_ulid" extension
+#
+#########################################################################################
+
+FROM rust-extensions-build AS pg-pgx-ulid-build
+ARG PG_VERSION
+
+RUN wget https://github.com/pksunkara/pgx_ulid/archive/refs/tags/v0.1.3.tar.gz -O pgx_ulid.tar.gz && \
+    echo "ee5db82945d2d9f2d15597a80cf32de9dca67b897f605beb830561705f12683c pgx_ulid.tar.gz" | sha256sum --check && \
+    mkdir pgx_ulid-src && cd pgx_ulid-src && tar xvzf ../pgx_ulid.tar.gz --strip-components=1 -C . && \
+    echo "******************* Apply a patch for Postgres 16 support; delete in the next release ******************" && \
+    wget https://github.com/pksunkara/pgx_ulid/commit/f84954cf63fc8c80d964ac970d9eceed3c791196.patch && \
+    patch -p1 < f84954cf63fc8c80d964ac970d9eceed3c791196.patch && \
+    echo "********************************************************************************************************" && \
+    sed -i 's/pgrx       = "=0.10.2"/pgrx = { version = "=0.10.2", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
+    cargo pgrx install --release && \
+    echo "trusted = true" >> /usr/local/pgsql/share/extension/ulid.control
+
+#########################################################################################
+#
+# Layer "wal2json-build"
+# Compile "wal2json" extension
+#
+#########################################################################################
+
+FROM build-deps AS wal2json-pg-build
+COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
+
+ENV PATH "/usr/local/pgsql/bin/:$PATH"
+RUN wget https://github.com/eulerto/wal2json/archive/refs/tags/wal2json_2_5.tar.gz && \
+    echo "b516653575541cf221b99cf3f8be9b6821f6dbcfc125675c85f35090f824f00e wal2json_2_5.tar.gz" | sha256sum --check && \
+    mkdir wal2json-src && cd wal2json-src && tar xvzf ../wal2json_2_5.tar.gz --strip-components=1 -C . && \
+    make -j $(getconf _NPROCESSORS_ONLN) && \
+    make -j $(getconf _NPROCESSORS_ONLN) install
+
 #########################################################################################
 #
 # Layer "neon-pg-ext-build"
@@ -81,6 +778,38 @@ RUN cd postgres && \
 FROM build-deps AS neon-pg-ext-build
 ARG PG_VERSION

+# Public extensions
+COPY --from=postgis-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=postgis-build /sfcgal/* /
+COPY --from=plv8-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=h3-pg-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=h3-pg-build /h3/usr /
+COPY --from=unit-pg-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=vector-pg-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pgjwt-pg-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pg-jsonschema-pg-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pg-graphql-pg-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pg-tiktoken-pg-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=hypopg-pg-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pg-hashids-pg-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=rum-pg-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pgtap-pg-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=ip4r-pg-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=prefix-pg-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=hll-pg-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=plpgsql-check-pg-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=timescaledb-pg-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pg-hint-plan-pg-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=kq-imcx-pg-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pg-cron-pg-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pg-pgx-ulid-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=rdkit-pg-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pg-uuidv7-pg-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pg-roaringbitmap-pg-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pg-semver-pg-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pg-embedding-pg-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=wal2json-pg-build /usr/local/pgsql /usr/local/pgsql
+COPY --from=pg-anon-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY pgxn/ pgxn/

 RUN make -j $(getconf _NPROCESSORS_ONLN) \
@@ -91,10 +820,6 @@ RUN make -j $(getconf _NPROCESSORS_ONLN) \
        PG_CONFIG=/usr/local/pgsql/bin/pg_config \
        -C pgxn/neon_utils \
        -s install && \
-    make -j $(getconf _NPROCESSORS_ONLN) \
-        PG_CONFIG=/usr/local/pgsql/bin/pg_config \
-        -C pgxn/neon_test_utils \
-        -s install && \
    make -j $(getconf _NPROCESSORS_ONLN) \
        PG_CONFIG=/usr/local/pgsql/bin/pg_config \
        -C pgxn/neon_rmgr \
@@ -166,7 +891,7 @@ RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \
    # create folder for file cache
    mkdir -p -m 777 /neon/cache

-COPY --from=postgres-cleanup-layer --chown=postgres /usr/local/pgsql /usr/local/pgsql-$PG_VERSION
+COPY --from=postgres-cleanup-layer --chown=postgres /usr/local/pgsql /usr/local
 COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-debug-size-lto/compute_ctl /usr/local/bin/compute_ctl

 # Install:
--- a/Dockerfile.compute-node-combined
+++ b/Dockerfile.compute-node-combined
@@ -1,179 +0,0 @@
-ARG REPOSITORY=neondatabase
-ARG IMAGE=build-tools
-ARG TAG=pinned
-ARG BUILD_TAG
-
-#########################################################################################
-#
-# Layer "build-deps"
-#
-#########################################################################################
-FROM debian:bullseye-slim AS build-deps
-RUN apt update &&  \
-    apt install -y git autoconf automake libtool build-essential bison flex libreadline-dev \
-    zlib1g-dev libxml2-dev libcurl4-openssl-dev libossp-uuid-dev wget pkg-config libssl-dev \
-    libicu-dev libxslt1-dev liblz4-dev libzstd-dev zstd
-
-#########################################################################################
-#
-# Layer "pg-build"
-# Build Postgres from the neon postgres repository.
-#
-#########################################################################################
-FROM build-deps AS pg-build
-COPY "vendor/postgres-v14" /postgres-v14
-COPY "vendor/postgres-v15" /postgres-v15
-COPY "vendor/postgres-v16" /postgres-v16
-RUN for pg_version in v14 v15 v16; do \
-    install_dir="/postgres-$pg_version"; \
-    cd "$install_dir"; \
-    prefix="/usr/local/pgsql-${pg_version}"; \
-    export CONFIGURE_CMD="./configure --prefix ${prefix} CFLAGS='-O2 -g3' --enable-debug --with-openssl --with-uuid=ossp \
-    --with-icu --with-libxml --with-libxslt --with-lz4" && \
-    if [ "${pg_version}" != "v14" ]; then \
-        # zstd is available only from PG15
-        export CONFIGURE_CMD="${CONFIGURE_CMD} --with-zstd"; \
-    fi && \
-    eval $CONFIGURE_CMD && \
-    make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s install && \
-    make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C contrib/ install && \
-    # Install headers
-    make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/include install && \
-    make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/interfaces/libpq install && \
-    extension_dir="${prefix}/share/extension" && \
-    # Enable some of contrib extensions
-    echo 'trusted = true' >> $extension_dir/autoinc.control && \
-    echo 'trusted = true' >> $extension_dir/bloom.control && \
-    echo 'trusted = true' >> $extension_dir/earthdistance.control && \
-    echo 'trusted = true' >> $extension_dir/insert_username.control && \
-    echo 'trusted = true' >> $extension_dir/intagg.control && \
-    echo 'trusted = true' >> $extension_dir/moddatetime.control && \
-    echo 'trusted = true' >> $extension_dir/pg_stat_statements.control && \
-    echo 'trusted = true' >> $extension_dir/pgrowlocks.control && \
-    echo 'trusted = true' >> $extension_dir/pgstattuple.control && \
-    echo 'trusted = true' >> $extension_dir/refint.control && \
-    echo 'trusted = true' >> $extension_dir/xml2.control && \
-    # We need to grant EXECUTE on pg_stat_statements_reset() to neon_superuser.
-    # In vanilla postgres this function is limited to Postgres role superuser.
-    # In neon we have neon_superuser role that is not a superuser but replaces superuser in some cases.
-    # We could add the additional grant statements to the postgres repository but it would be hard to maintain,
-    # whenever we need to pick up a new postgres version and we want to limit the changes in our postgres fork,
-    # so we do it here.
-    old_list="pg_stat_statements--1.0--1.1.sql pg_stat_statements--1.1--1.2.sql pg_stat_statements--1.2--1.3.sql pg_stat_statements--1.3--1.4.sql pg_stat_statements--1.4--1.5.sql pg_stat_statements--1.4.sql pg_stat_statements--1.5--1.6.sql"; \
-    # the first loop is for pg_stat_statement extension version <= 1.6
-    for file in $prefix/share/extension/pg_stat_statements--*.sql; do \
-        filename=$(basename "$file"); \
-        if echo "$old_list" | grep -q -F "$filename"; then \
-            echo 'GRANT EXECUTE ON FUNCTION pg_stat_statements_reset() TO neon_superuser;' >> $file; \
-        fi; \
-    done; \
-    # the second loop is for pg_stat_statement extension versions >= 1.7,
-    # where pg_stat_statement_reset() got 3 additional arguments
-    for file in $prefix/share/extension/pg_stat_statements--*.sql; do \
-        filename=$(basename "$file"); \
-        if ! echo "$old_list" | grep -q -F "$filename"; then \
-            echo 'GRANT EXECUTE ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint) TO neon_superuser;' >> $file; \
-        fi; \
-    done; \
-    # Go back to root dir from `/postgres-v<version>` dir
-    cd ..; \
-done
-
-# #########################################################################################
-# #
-# # Compile and run the Neon-specific `compute_ctl` binary
-# #
-# #########################################################################################
-# FROM $REPOSITORY/$IMAGE:$TAG AS compute-tools
-# ARG BUILD_TAG
-# ENV BUILD_TAG=$BUILD_TAG
-#
-# USER nonroot
-# # Copy entire project to get Cargo.* files with proper dependencies for the whole project
-# COPY --chown=nonroot . .
-# RUN cd compute_tools && cargo build --locked --profile release-line-debug-size-lto
-#
-# #########################################################################################
-# #
-# # Clean up postgres folder before inclusion
-# #
-# #########################################################################################
-# FROM pg-build AS postgres-cleanup-layer
-# # COPY --from=neon-pg-ext-build /usr/local/pgsql /usr/local/pgsql
-#
-# RUN for pg_version in v14 v15 v16; do \
-#     prefix="/usr/local/pgsql-${pg_version}"; \
-#     # Remove binaries from /bin/ that we won't use (or would manually copy & install otherwise)
-#     cd "${prefix}/bin" && rm ecpg raster2pgsql shp2pgsql pgtopo_export pgtopo_import pgsql2shp; \
-#     cd ..; \
-#     # Remove headers that we won't need anymore - we've completed installation of all extensions
-#     rm -r "${prefix}/include"; \
-#     # Remove static postgresql libraries - all compilation is finished, so we
-#     # can now remove these files - they must be included in other binaries by now
-#     # if they were to be used by other libraries.
-#     rm ${prefix}/lib/lib*.a; \
-# done
-#
-# #########################################################################################
-# #
-# # Final layer
-# # Put it all together into the final image
-# #
-# #########################################################################################
-# FROM debian:bullseye-slim
-# # Add user postgres
-# RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \
-#     echo "postgres:test_console_pass" | chpasswd && \
-#     mkdir /var/db/postgres/compute && mkdir /var/db/postgres/specs && \
-#     mkdir /var/db/postgres/pgbouncer && \
-#     chown -R postgres:postgres /var/db/postgres && \
-#     chmod 0750 /var/db/postgres/compute && \
-#     chmod 0750 /var/db/postgres/pgbouncer && \
-#     echo '/usr/local/lib' >> /etc/ld.so.conf && /sbin/ldconfig && \
-#     # create folder for file cache
-#     mkdir -p -m 777 /neon/cache
-#
-# COPY --from=postgres-cleanup-layer --chown=postgres /usr/local/pgsql-v14 /usr/local/pgsql-v14
-# COPY --from=postgres-cleanup-layer --chown=postgres /usr/local/pgsql-v15 /usr/local/pgsql-v15
-# COPY --from=postgres-cleanup-layer --chown=postgres /usr/local/pgsql-v16 /usr/local/pgsql-v16
-# COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-debug-size-lto/compute_ctl /usr/local/bin/compute_ctl
-#
-# # Install:
-# # libreadline8 for psql
-# # libicu67, locales for collations (including ICU and plpgsql_check)
-# # liblz4-1 for lz4
-# # libossp-uuid16 for extension ossp-uuid
-# # libgeos, libgdal, libsfcgal1, libproj and libprotobuf-c1 for PostGIS
-# # libxml2, libxslt1.1 for xml2
-# # libzstd1 for zstd
-# # libboost* for rdkit
-# # ca-certificates for communicating with s3 by compute_ctl
-# RUN apt update &&  \
-#     apt install --no-install-recommends -y \
-#         gdb \
-#         libicu67 \
-#         liblz4-1 \
-#         libreadline8 \
-#         libboost-iostreams1.74.0 \
-#         libboost-regex1.74.0 \
-#         libboost-serialization1.74.0 \
-#         libboost-system1.74.0 \
-#         libossp-uuid16 \
-#         libgeos-c1v5 \
-#         libgdal28 \
-#         libproj19 \
-#         libprotobuf-c1 \
-#         libsfcgal1 \
-#         libxml2 \
-#         libxslt1.1 \
-#         libzstd1 \
-#         libcurl4-openssl-dev \
-#         locales \
-#         procps \
-#         ca-certificates && \
-#     rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
-#     localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8
-#
-# ENV LANG en_US.utf8
-# USER postgres
-# ENTRYPOINT ["/usr/local/bin/compute_ctl"]
--- a/Dockerfile.compute-node-merged
+++ b/Dockerfile.compute-node-merged
@@ -1,65 +0,0 @@
-FROM debian:bullseye-slim
-
-#########################################################################################
-#
-# Final layer
-# Put it all together into the final image
-#
-#########################################################################################
-FROM debian:bullseye-slim
-# Add user postgres
-RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \
-    echo "postgres:test_console_pass" | chpasswd && \
-    mkdir /var/db/postgres/compute && mkdir /var/db/postgres/specs && \
-    mkdir /var/db/postgres/pgbouncer && \
-    chown -R postgres:postgres /var/db/postgres && \
-    chmod 0750 /var/db/postgres/compute && \
-    chmod 0750 /var/db/postgres/pgbouncer && \
-    echo '/usr/local/lib' >> /etc/ld.so.conf && /sbin/ldconfig && \
-    # create folder for file cache
-    mkdir -p -m 777 /neon/cache
-
-COPY --from=base:v14 --chown=postgres /usr/local/pgsql /usr/local/pgsql-v14
-COPY --from=base:v15 --chown=postgres /usr/local/pgsql /usr/local/pgsql-v15
-COPY --from=base:v16 --chown=postgres /usr/local/pgsql /usr/local/pgsql-v16
-COPY --from=tools --chown=postgres /home/nonroot/target/release-line-debug-size-lto/compute_ctl /usr/local/bin/compute_ctl
-
-# Install:
-# libreadline8 for psql
-# libicu67, locales for collations (including ICU and plpgsql_check)
-# liblz4-1 for lz4
-# libossp-uuid16 for extension ossp-uuid
-# libgeos, libgdal, libsfcgal1, libproj and libprotobuf-c1 for PostGIS
-# libxml2, libxslt1.1 for xml2
-# libzstd1 for zstd
-# libboost* for rdkit
-# ca-certificates for communicating with s3 by compute_ctl
-RUN apt update &&  \
-    apt install --no-install-recommends -y \
-        gdb \
-        libicu67 \
-        liblz4-1 \
-        libreadline8 \
-        libboost-iostreams1.74.0 \
-        libboost-regex1.74.0 \
-        libboost-serialization1.74.0 \
-        libboost-system1.74.0 \
-        libossp-uuid16 \
-        libgeos-c1v5 \
-        libgdal28 \
-        libproj19 \
-        libprotobuf-c1 \
-        libsfcgal1 \
-        libxml2 \
-        libxslt1.1 \
-        libzstd1 \
-        libcurl4-openssl-dev \
-        locales \
-        procps \
-        ca-certificates && \
-    rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
-    localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8
-
-ENV LANG en_US.utf8
-USER postgres
-ENTRYPOINT ["/usr/local/bin/compute_ctl"]
--- a/Dockerfile.compute-node-simple
+++ b/Dockerfile.compute-node-simple
@@ -1,159 +0,0 @@
-ARG PG_VERSION
-ARG REPOSITORY=neondatabase
-ARG IMAGE=build-tools
-ARG TAG=pinned
-ARG BUILD_TAG
-
-#########################################################################################
-#
-# Layer "build-deps"
-#
-#########################################################################################
-FROM debian:bullseye-slim AS build-deps
-RUN apt update &&  \
-    apt install -y git autoconf automake libtool build-essential bison flex libreadline-dev \
-    zlib1g-dev libxml2-dev libcurl4-openssl-dev libossp-uuid-dev wget pkg-config libssl-dev \
-    libicu-dev libxslt1-dev liblz4-dev libzstd-dev zstd
-
-#########################################################################################
-#
-# Layer "pg-build"
-# Build Postgres from the neon postgres repository.
-#
-#########################################################################################
-FROM build-deps AS pg-build
-ARG PG_VERSION
-COPY vendor/postgres-${PG_VERSION} postgres
-RUN cd postgres && \
-    export CONFIGURE_CMD="./configure CFLAGS='-O2 -g3' --enable-debug --with-openssl --with-uuid=ossp \
-    --with-icu --with-libxml --with-libxslt --with-lz4" && \
-    if [ "${PG_VERSION}" != "v14" ]; then \
-        # zstd is available only from PG15
-        export CONFIGURE_CMD="${CONFIGURE_CMD} --with-zstd"; \
-    fi && \
-    eval $CONFIGURE_CMD && \
-    make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s install && \
-    make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C contrib/ install && \
-    # Install headers
-    make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/include install && \
-    make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/interfaces/libpq install && \
-    # Enable some of contrib extensions
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/autoinc.control && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/bloom.control && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/earthdistance.control && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/insert_username.control && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/intagg.control && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/moddatetime.control && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_stat_statements.control && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgrowlocks.control && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgstattuple.control && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/refint.control && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/xml2.control && \
-    # We need to grant EXECUTE on pg_stat_statements_reset() to neon_superuser.
-    # In vanilla postgres this function is limited to Postgres role superuser.
-    # In neon we have neon_superuser role that is not a superuser but replaces superuser in some cases.
-    # We could add the additional grant statements to the postgres repository but it would be hard to maintain,
-    # whenever we need to pick up a new postgres version and we want to limit the changes in our postgres fork,
-    # so we do it here.
-    old_list="pg_stat_statements--1.0--1.1.sql pg_stat_statements--1.1--1.2.sql pg_stat_statements--1.2--1.3.sql pg_stat_statements--1.3--1.4.sql pg_stat_statements--1.4--1.5.sql pg_stat_statements--1.4.sql pg_stat_statements--1.5--1.6.sql"; \
-    # the first loop is for pg_stat_statement extension version <= 1.6
-    for file in /usr/local/pgsql/share/extension/pg_stat_statements--*.sql; do \
-        filename=$(basename "$file"); \
-        if echo "$old_list" | grep -q -F "$filename"; then \
-            echo 'GRANT EXECUTE ON FUNCTION pg_stat_statements_reset() TO neon_superuser;' >> $file; \
-        fi; \
-    done; \
-    # the second loop is for pg_stat_statement extension versions >= 1.7,
-    # where pg_stat_statement_reset() got 3 additional arguments
-    for file in /usr/local/pgsql/share/extension/pg_stat_statements--*.sql; do \
-        filename=$(basename "$file"); \
-        if ! echo "$old_list" | grep -q -F "$filename"; then \
-            echo 'GRANT EXECUTE ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint) TO neon_superuser;' >> $file; \
-        fi; \
-    done
-
-#########################################################################################
-#
-# Layer "neon-pg-ext-build"
-# compile neon extensions
-#
-#########################################################################################
-FROM build-deps AS neon-pg-ext-build
-ARG PG_VERSION
-
-COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY pgxn/ pgxn/
-
-RUN make -j $(getconf _NPROCESSORS_ONLN) \
-        PG_CONFIG=/usr/local/pgsql/bin/pg_config \
-        -C pgxn/neon \
-        -s install && \
-    make -j $(getconf _NPROCESSORS_ONLN) \
-        PG_CONFIG=/usr/local/pgsql/bin/pg_config \
-        -C pgxn/neon_utils \
-        -s install && \
-    make -j $(getconf _NPROCESSORS_ONLN) \
-        PG_CONFIG=/usr/local/pgsql/bin/pg_config \
-        -C pgxn/neon_test_utils \
-        -s install && \
-    make -j $(getconf _NPROCESSORS_ONLN) \
-        PG_CONFIG=/usr/local/pgsql/bin/pg_config \
-        -C pgxn/neon_rmgr \
-        -s install && \
-    case "${PG_VERSION}" in \
-        "v14" | "v15") \
-        ;; \
-        "v16") \
-            echo "Skipping HNSW for PostgreSQL 16" && exit 0 \
-        ;; \
-        *) \
-            echo "unexpected PostgreSQL version" && exit 1 \
-        ;; \
-        esac && \
-    make -j $(getconf _NPROCESSORS_ONLN) \
-        PG_CONFIG=/usr/local/pgsql/bin/pg_config \
-        -C pgxn/hnsw \
-        -s install
-
-#########################################################################################
-#
-# Compile and run the Neon-specific `compute_ctl` binary
-#
-#########################################################################################
-FROM $REPOSITORY/$IMAGE:$TAG AS compute-tools
-ARG BUILD_TAG
-ENV BUILD_TAG=$BUILD_TAG
-
-USER nonroot
-# Copy entire project to get Cargo.* files with proper dependencies for the whole project
-COPY --chown=nonroot . .
-RUN cd compute_tools && cargo build --locked --profile release-line-debug-size-lto
-
-#########################################################################################
-#
-# Clean up postgres folder before inclusion
-#
-#########################################################################################
-FROM neon-pg-ext-build AS postgres-cleanup-layer
-COPY --from=neon-pg-ext-build /usr/local/pgsql /usr/local/pgsql
-
-# Remove binaries from /bin/ that we won't use (or would manually copy & install otherwise)
-RUN cd /usr/local/pgsql/bin && rm ecpg
-
-# Remove headers that we won't need anymore - we've completed installation of all extensions
-RUN rm -r /usr/local/pgsql/include
-
-# Remove static postgresql libraries - all compilation is finished, so we
-# can now remove these files - they must be included in other binaries by now
-# if they were to be used by other libraries.
-RUN rm /usr/local/pgsql/lib/lib*.a
-
-#########################################################################################
-#
-# Final layer
-# Put it all together into the final image
-#
-#########################################################################################
-FROM debian:bullseye-slim
-
-COPY --from=postgres-cleanup-layer --chown=postgres /usr/local/pgsql /usr/local/pgsql
--- a/Dockerfile.compute-node-tools
+++ b/Dockerfile.compute-node-tools
@@ -1,18 +0,0 @@
-ARG REPOSITORY=neondatabase
-ARG IMAGE=build-tools
-ARG TAG=pinned
-ARG BUILD_TAG
-#########################################################################################
-#
-# Compile and run the Neon-specific `compute_ctl` binary
-#
-#########################################################################################
-FROM $REPOSITORY/$IMAGE:$TAG AS compute-tools
-ARG BUILD_TAG
-ENV BUILD_TAG=$BUILD_TAG
-
-USER nonroot
-# Copy entire project to get Cargo.* files with proper dependencies for the whole project
-COPY --chown=nonroot . .
-RUN cd compute_tools && cargo build --locked --profile release-line-debug-size-lto
-
--- a/16
+++ b/16
@@ -159,8 +159,8 @@ neon-pg-ext-%: postgres-%
 		-C $(POSTGRES_INSTALL_DIR)/build/neon-utils-$* \
 		-f $(ROOT_PROJECT_DIR)/pgxn/neon_utils/Makefile install

-.PHONY: neon-pg-clean-ext-%
-neon-pg-clean-ext-%:
+.PHONY: neon-pg-ext-clean-%
+neon-pg-ext-clean-%:
 	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config \
 	-C $(POSTGRES_INSTALL_DIR)/build/neon-$* \
 	-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile clean
@@ -216,11 +216,11 @@ neon-pg-ext: \
 	neon-pg-ext-v15 \
 	neon-pg-ext-v16

-.PHONY: neon-pg-clean-ext
-neon-pg-clean-ext: \
-	neon-pg-clean-ext-v14 \
-	neon-pg-clean-ext-v15 \
-	neon-pg-clean-ext-v16
+.PHONY: neon-pg-ext-clean
+neon-pg-ext-clean: \
+	neon-pg-ext-clean-v14 \
+	neon-pg-ext-clean-v15 \
+	neon-pg-ext-clean-v16

 # shorthand to build all Postgres versions
 .PHONY: postgres
@@ -249,7 +249,7 @@ postgres-check: \

 # This doesn't remove the effects of 'configure'.
 .PHONY: clean
-clean: postgres-clean neon-pg-clean-ext
+clean: postgres-clean neon-pg-ext-clean
 	$(CARGO_CMD_PREFIX) cargo clean

 # This removes everything
--- a/README.md
+++ b/README.md
@@ -249,16 +249,6 @@ testing locally, it is convenient to run just one set of permutations, like this
 DEFAULT_PG_VERSION=15 BUILD_TYPE=release ./scripts/pytest
 ```

-## Flamegraphs
-
-You may find yourself in need of flamegraphs for software in this repository.
-You can use [`flamegraph-rs`](https://github.com/flamegraph-rs/flamegraph) or the original [`flamegraph.pl`](https://github.com/brendangregg/FlameGraph). Your choice!
-
->[!IMPORTANT]
-> If you're using `lld` or `mold`, you need the `--no-rosegment` linker argument.
-> It's a [general thing with Rust / lld / mold](https://crbug.com/919499#c16), not specific to this repository.
-> See [this PR for further instructions](https://github.com/neondatabase/neon/pull/6764).
-
 ## Documentation

 [docs](/docs) Contains a top-level overview of all available markdown documentation.
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -324,8 +324,7 @@ impl ComputeNode {
        let spec = compute_state.pspec.as_ref().expect("spec must be set");
        let start_time = Instant::now();

-        let shard0_connstr = spec.pageserver_connstr.split(',').next().unwrap();
-        let mut config = postgres::Config::from_str(shard0_connstr)?;
+        let mut config = postgres::Config::from_str(&spec.pageserver_connstr)?;

        // Use the storage auth token from the config file, if given.
        // Note: this overrides any password set in the connection string.
--- a/control_plane/attachment_service/Cargo.toml
+++ b/control_plane/attachment_service/Cargo.toml
@@ -4,11 +4,6 @@ version = "0.1.0"
 edition.workspace = true
 license.workspace = true

-[features]
-default = []
-# Enables test-only APIs and behaviors
-testing = []
-
 [dependencies]
 anyhow.workspace = true
 aws-config.workspace = true
--- a/control_plane/attachment_service/src/compute_hook.rs
+++ b/control_plane/attachment_service/src/compute_hook.rs
@@ -3,7 +3,7 @@ use std::{collections::HashMap, time::Duration};
 use control_plane::endpoint::{ComputeControlPlane, EndpointStatus};
 use control_plane::local_env::LocalEnv;
 use hyper::{Method, StatusCode};
-use pageserver_api::shard::{ShardIndex, ShardNumber, TenantShardId};
+use pageserver_api::shard::{ShardCount, ShardIndex, ShardNumber, TenantShardId};
 use postgres_connection::parse_host_port;
 use serde::{Deserialize, Serialize};
 use tokio_util::sync::CancellationToken;
@@ -77,7 +77,7 @@ impl ComputeHookTenant {
        self.shards
            .sort_by_key(|(shard, _node_id)| shard.shard_number);

-        if self.shards.len() == shard_count.count() as usize || shard_count.is_unsharded() {
+        if self.shards.len() == shard_count.0 as usize || shard_count == ShardCount(0) {
            // We have pageservers for all the shards: emit a configuration update
            return Some(ComputeHookNotifyRequest {
                tenant_id,
@@ -94,7 +94,7 @@ impl ComputeHookTenant {
            tracing::info!(
                "ComputeHookTenant::maybe_reconfigure: not enough shards ({}/{})",
                self.shards.len(),
-                shard_count.count()
+                shard_count.0
            );
        }

@@ -155,7 +155,7 @@ impl ComputeHook {

        for (endpoint_name, endpoint) in &cplane.endpoints {
            if endpoint.tenant_id == tenant_id && endpoint.status() == EndpointStatus::Running {
-                tracing::info!("Reconfiguring endpoint {}", endpoint_name,);
+                tracing::info!("🔁 Reconfiguring endpoint {}", endpoint_name,);
                endpoint.reconfigure(compute_pageservers.clone()).await?;
            }
        }
@@ -177,7 +177,7 @@ impl ComputeHook {
            req
        };

-        tracing::info!(
+        tracing::debug!(
            "Sending notify request to {} ({:?})",
            url,
            reconfigure_request
@@ -266,7 +266,7 @@ impl ComputeHook {
    /// periods, but we don't retry forever.  The **caller** is responsible for handling failures and
    /// ensuring that they eventually call again to ensure that the compute is eventually notified of
    /// the proper pageserver nodes for a tenant.
-    #[tracing::instrument(skip_all, fields(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), node_id))]
+    #[tracing::instrument(skip_all, fields(tenant_shard_id, node_id))]
    pub(super) async fn notify(
        &self,
        tenant_shard_id: TenantShardId,
@@ -298,7 +298,7 @@ impl ComputeHook {
        let Some(reconfigure_request) = reconfigure_request else {
            // The tenant doesn't yet have pageservers for all its shards: we won't notify anything
            // until it does.
-            tracing::info!("Tenant isn't yet ready to emit a notification");
+            tracing::debug!("Tenant isn't yet ready to emit a notification",);
            return Ok(());
        };

--- a/control_plane/attachment_service/src/lib.rs
+++ b/control_plane/attachment_service/src/lib.rs
@@ -37,12 +37,6 @@ impl std::fmt::Display for Sequence {
    }
 }

-impl std::fmt::Debug for Sequence {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        write!(f, "{}", self.0)
-    }
-}
-
 impl MonotonicCounter<Sequence> for Sequence {
    fn cnt_advance(&mut self, v: Sequence) {
        assert!(*self <= v);
--- a/control_plane/attachment_service/src/main.rs
+++ b/control_plane/attachment_service/src/main.rs
@@ -15,7 +15,6 @@ use diesel::Connection;
 use metrics::launch_timestamp::LaunchTimestamp;
 use std::sync::Arc;
 use tokio::signal::unix::SignalKind;
-use tokio_util::sync::CancellationToken;
 use utils::auth::{JwtAuth, SwappableJwtAuth};
 use utils::logging::{self, LogFormat};

@@ -238,23 +237,15 @@ async fn async_main() -> anyhow::Result<()> {
    let auth = secrets
        .public_key
        .map(|jwt_auth| Arc::new(SwappableJwtAuth::new(jwt_auth)));
-    let router = make_router(service.clone(), auth)
+    let router = make_router(service, auth)
        .build()
        .map_err(|err| anyhow!(err))?;
    let router_service = utils::http::RouterService::new(router).unwrap();
+    let server = hyper::Server::from_tcp(http_listener)?.serve(router_service);

-    // Start HTTP server
-    let server_shutdown = CancellationToken::new();
-    let server = hyper::Server::from_tcp(http_listener)?
-        .serve(router_service)
-        .with_graceful_shutdown({
-            let server_shutdown = server_shutdown.clone();
-            async move {
-                server_shutdown.cancelled().await;
-            }
-        });
    tracing::info!("Serving on {0}", args.listen);
-    let server_task = tokio::task::spawn(server);
+
+    tokio::task::spawn(server);

    // Wait until we receive a signal
    let mut sigint = tokio::signal::unix::signal(SignalKind::interrupt())?;
@@ -275,16 +266,5 @@ async fn async_main() -> anyhow::Result<()> {
        }
    }

-    // Stop HTTP server first, so that we don't have to service requests
-    // while shutting down Service
-    server_shutdown.cancel();
-    if let Err(e) = server_task.await {
-        tracing::error!("Error joining HTTP server task: {e}")
-    }
-    tracing::info!("Joined HTTP server task");
-
-    service.shutdown().await;
-    tracing::info!("Service shutdown complete");
-
    std::process::exit(0);
 }
--- a/control_plane/attachment_service/src/persistence.rs
+++ b/control_plane/attachment_service/src/persistence.rs
@@ -222,7 +222,7 @@ impl Persistence {
            let tenant_shard_id = TenantShardId {
                tenant_id: TenantId::from_str(tsp.tenant_id.as_str())?,
                shard_number: ShardNumber(tsp.shard_number as u8),
-                shard_count: ShardCount::new(tsp.shard_count as u8),
+                shard_count: ShardCount(tsp.shard_count as u8),
            };

            tenants_map.insert(tenant_shard_id, tsp);
@@ -318,7 +318,7 @@ impl Persistence {
                tenant_id: TenantId::from_str(tsp.tenant_id.as_str())
                    .map_err(|e| DatabaseError::Logical(format!("Malformed tenant id: {e}")))?,
                shard_number: ShardNumber(tsp.shard_number as u8),
-                shard_count: ShardCount::new(tsp.shard_count as u8),
+                shard_count: ShardCount(tsp.shard_count as u8),
            };
            result.insert(tenant_shard_id, Generation::new(tsp.generation as u32));
        }
@@ -340,7 +340,7 @@ impl Persistence {
                let updated = diesel::update(tenant_shards)
                    .filter(tenant_id.eq(tenant_shard_id.tenant_id.to_string()))
                    .filter(shard_number.eq(tenant_shard_id.shard_number.0 as i32))
-                    .filter(shard_count.eq(tenant_shard_id.shard_count.literal() as i32))
+                    .filter(shard_count.eq(tenant_shard_id.shard_count.0 as i32))
                    .set((
                        generation.eq(generation + 1),
                        generation_pageserver.eq(node_id.0 as i64),
@@ -362,7 +362,7 @@ impl Persistence {
            let updated = diesel::update(tenant_shards)
                .filter(tenant_id.eq(tenant_shard_id.tenant_id.to_string()))
                .filter(shard_number.eq(tenant_shard_id.shard_number.0 as i32))
-                .filter(shard_count.eq(tenant_shard_id.shard_count.literal() as i32))
+                .filter(shard_count.eq(tenant_shard_id.shard_count.0 as i32))
                .set((
                    generation_pageserver.eq(i64::MAX),
                    placement_policy.eq(serde_json::to_string(&PlacementPolicy::Detached).unwrap()),
@@ -381,6 +381,7 @@ impl Persistence {
    //
    // We create the child shards here, so that they will be available for increment_generation calls
    // if some pageserver holding a child shard needs to restart before the overall tenant split is complete.
+    #[allow(dead_code)]
    pub(crate) async fn begin_shard_split(
        &self,
        old_shard_count: ShardCount,
@@ -392,19 +393,21 @@ impl Persistence {
            conn.transaction(|conn| -> DatabaseResult<()> {
                // Mark parent shards as splitting

+                let expect_parent_records = std::cmp::max(1, old_shard_count.0);
+
                let updated = diesel::update(tenant_shards)
                    .filter(tenant_id.eq(split_tenant_id.to_string()))
-                    .filter(shard_count.eq(old_shard_count.literal() as i32))
+                    .filter(shard_count.eq(old_shard_count.0 as i32))
                    .set((splitting.eq(1),))
                    .execute(conn)?;
                if u8::try_from(updated)
                    .map_err(|_| DatabaseError::Logical(
                        format!("Overflow existing shard count {} while splitting", updated))
-                    )? != old_shard_count.count() {
+                    )? != expect_parent_records {
                    // Perhaps a deletion or another split raced with this attempt to split, mutating
                    // the parent shards that we intend to split. In this case the split request should fail.
                    return Err(DatabaseError::Logical(
-                        format!("Unexpected existing shard count {updated} when preparing tenant for split (expected {})", old_shard_count.count())
+                        format!("Unexpected existing shard count {updated} when preparing tenant for split (expected {expect_parent_records})")
                    ));
                }

@@ -416,7 +419,7 @@ impl Persistence {
                    let mut parent = crate::schema::tenant_shards::table
                        .filter(tenant_id.eq(parent_shard_id.tenant_id.to_string()))
                        .filter(shard_number.eq(parent_shard_id.shard_number.0 as i32))
-                        .filter(shard_count.eq(parent_shard_id.shard_count.literal() as i32))
+                        .filter(shard_count.eq(parent_shard_id.shard_count.0 as i32))
                        .load::<TenantShardPersistence>(conn)?;
                    let parent = if parent.len() != 1 {
                        return Err(DatabaseError::Logical(format!(
@@ -446,6 +449,7 @@ impl Persistence {

    // When we finish shard splitting, we must atomically clean up the old shards
    // and insert the new shards, and clear the splitting marker.
+    #[allow(dead_code)]
    pub(crate) async fn complete_shard_split(
        &self,
        split_tenant_id: TenantId,
@@ -457,7 +461,7 @@ impl Persistence {
                // Drop parent shards
                diesel::delete(tenant_shards)
                    .filter(tenant_id.eq(split_tenant_id.to_string()))
-                    .filter(shard_count.eq(old_shard_count.literal() as i32))
+                    .filter(shard_count.eq(old_shard_count.0 as i32))
                    .execute(conn)?;

                // Clear sharding flag
--- a/control_plane/attachment_service/src/reconciler.rs
+++ b/control_plane/attachment_service/src/reconciler.rs
@@ -13,7 +13,6 @@ use tokio_util::sync::CancellationToken;
 use utils::generation::Generation;
 use utils::id::{NodeId, TimelineId};
 use utils::lsn::Lsn;
-use utils::sync::gate::GateGuard;

 use crate::compute_hook::{ComputeHook, NotifyError};
 use crate::node::Node;
@@ -54,10 +53,6 @@ pub(super) struct Reconciler {
    /// the tenant is changed.
    pub(crate) cancel: CancellationToken,

-    /// Reconcilers are registered with a Gate so that during a graceful shutdown we
-    /// can wait for all the reconcilers to respond to their cancellation tokens.
-    pub(crate) _gate_guard: GateGuard,
-
    /// Access to persistent storage for updating generation numbers
    pub(crate) persistence: Arc<Persistence>,
 }
@@ -268,7 +263,7 @@ impl Reconciler {
                secondary_conf,
                tenant_conf: config.clone(),
                shard_number: shard.number.0,
-                shard_count: shard.count.literal(),
+                shard_count: shard.count.0,
                shard_stripe_size: shard.stripe_size.0,
            }
        }
@@ -463,7 +458,7 @@ impl Reconciler {
                    generation: None,
                    secondary_conf: None,
                    shard_number: self.shard.number.0,
-                    shard_count: self.shard.count.literal(),
+                    shard_count: self.shard.count.0,
                    shard_stripe_size: self.shard.stripe_size.0,
                    tenant_conf: self.config.clone(),
                },
@@ -511,7 +506,7 @@ pub(crate) fn attached_location_conf(
        generation: generation.into(),
        secondary_conf: None,
        shard_number: shard.number.0,
-        shard_count: shard.count.literal(),
+        shard_count: shard.count.0,
        shard_stripe_size: shard.stripe_size.0,
        tenant_conf: config.clone(),
    }
@@ -526,7 +521,7 @@ pub(crate) fn secondary_location_conf(
        generation: None,
        secondary_conf: Some(LocationConfigSecondary { warm: true }),
        shard_number: shard.number.0,
-        shard_count: shard.count.literal(),
+        shard_count: shard.count.0,
        shard_stripe_size: shard.stripe_size.0,
        tenant_conf: config.clone(),
    }
--- a/control_plane/attachment_service/src/scheduler.rs
+++ b/control_plane/attachment_service/src/scheduler.rs
@@ -77,11 +77,12 @@ impl Scheduler {
            return Err(ScheduleError::ImpossibleConstraint);
        }

+        for (node_id, count) in &tenant_counts {
+            tracing::info!("tenant_counts[{node_id}]={count}");
+        }
+
        let node_id = tenant_counts.first().unwrap().0;
-        tracing::info!(
-            "scheduler selected node {node_id} (elegible nodes {:?}, exclude: {hard_exclude:?})",
-            tenant_counts.iter().map(|i| i.0 .0).collect::<Vec<_>>()
-        );
+        tracing::info!("scheduler selected node {node_id}");
        *self.tenant_counts.get_mut(&node_id).unwrap() += 1;
        Ok(node_id)
    }
--- a/control_plane/attachment_service/src/service.rs
+++ b/control_plane/attachment_service/src/service.rs
@@ -30,7 +30,6 @@ use pageserver_api::{
 };
 use pageserver_client::mgmt_api;
 use tokio_util::sync::CancellationToken;
-use tracing::instrument;
 use utils::{
    backoff,
    completion::Barrier,
@@ -38,7 +37,6 @@ use utils::{
    http::error::ApiError,
    id::{NodeId, TenantId, TimelineId},
    seqwait::SeqWait,
-    sync::gate::Gate,
 };

 use crate::{
@@ -126,12 +124,6 @@ pub struct Service {
    config: Config,
    persistence: Arc<Persistence>,

-    // Process shutdown will fire this token
-    cancel: CancellationToken,
-
-    // Background tasks will hold this gate
-    gate: Gate,
-
    /// This waits for initial reconciliation with pageservers to complete.  Until this barrier
    /// passes, it isn't safe to do any actions that mutate tenants.
    pub(crate) startup_complete: Barrier,
@@ -152,9 +144,8 @@ impl Service {
        &self.config
    }

-    /// Called once on startup, this function attempts to contact all pageservers to build an up-to-date
-    /// view of the world, and determine which pageservers are responsive.
-    #[instrument(skip_all)]
+    /// TODO: don't allow other API calls until this is done, don't start doing any background housekeeping
+    /// until this is done.
    async fn startup_reconcile(&self) {
        // For all tenant shards, a vector of observed states on nodes (where None means
        // indeterminate, same as in [`ObservedStateLocation`])
@@ -162,6 +153,9 @@ impl Service {

        let mut nodes_online = HashSet::new();

+        // TODO: give Service a cancellation token for clean shutdown
+        let cancel = CancellationToken::new();
+
        // TODO: issue these requests concurrently
        {
            let nodes = {
@@ -196,7 +190,7 @@ impl Service {
                    1,
                    5,
                    "Location config listing",
-                    &self.cancel,
+                    &cancel,
                )
                .await;
                let Some(list_response) = list_response else {
@@ -298,7 +292,7 @@ impl Service {
                        generation: None,
                        secondary_conf: None,
                        shard_number: tenant_shard_id.shard_number.0,
-                        shard_count: tenant_shard_id.shard_count.literal(),
+                        shard_count: tenant_shard_id.shard_count.0,
                        shard_stripe_size: 0,
                        tenant_conf: models::TenantConfig::default(),
                    },
@@ -337,7 +331,7 @@ impl Service {
        let stream = futures::stream::iter(compute_notifications.into_iter())
            .map(|(tenant_shard_id, node_id)| {
                let compute_hook = compute_hook.clone();
-                let cancel = self.cancel.clone();
+                let cancel = cancel.clone();
                async move {
                    if let Err(e) = compute_hook.notify(tenant_shard_id, node_id, &cancel).await {
                        tracing::error!(
@@ -374,98 +368,8 @@ impl Service {
        tracing::info!("Startup complete, spawned {reconcile_tasks} reconciliation tasks ({shard_count} shards total)");
    }

-    /// Long running background task that periodically wakes up and looks for shards that need
-    /// reconciliation.  Reconciliation is fallible, so any reconciliation tasks that fail during
-    /// e.g. a tenant create/attach/migrate must eventually be retried: this task is responsible
-    /// for those retries.
-    #[instrument(skip_all)]
-    async fn background_reconcile(&self) {
-        self.startup_complete.clone().wait().await;
-
-        const BACKGROUND_RECONCILE_PERIOD: Duration = Duration::from_secs(20);
-
-        let mut interval = tokio::time::interval(BACKGROUND_RECONCILE_PERIOD);
-        while !self.cancel.is_cancelled() {
-            tokio::select! {
-              _ = interval.tick() => { self.reconcile_all(); }
-              _ = self.cancel.cancelled() => return
-            }
-        }
-    }
-
-    #[instrument(skip_all)]
-    async fn process_results(
-        &self,
-        mut result_rx: tokio::sync::mpsc::UnboundedReceiver<ReconcileResult>,
-    ) {
-        loop {
-            // Wait for the next result, or for cancellation
-            let result = tokio::select! {
-                r = result_rx.recv() => {
-                    match r {
-                        Some(result) => {result},
-                        None => {break;}
-                    }
-                }
-                _ = self.cancel.cancelled() => {
-                    break;
-                }
-            };
-
-            tracing::info!(
-                "Reconcile result for sequence {}, ok={}",
-                result.sequence,
-                result.result.is_ok()
-            );
-            let mut locked = self.inner.write().unwrap();
-            let Some(tenant) = locked.tenants.get_mut(&result.tenant_shard_id) else {
-                // A reconciliation result might race with removing a tenant: drop results for
-                // tenants that aren't in our map.
-                continue;
-            };
-
-            // Usually generation should only be updated via this path, so the max() isn't
-            // needed, but it is used to handle out-of-band updates via. e.g. test hook.
-            tenant.generation = std::cmp::max(tenant.generation, result.generation);
-
-            // If the reconciler signals that it failed to notify compute, set this state on
-            // the shard so that a future [`TenantState::maybe_reconcile`] will try again.
-            tenant.pending_compute_notification = result.pending_compute_notification;
-
-            match result.result {
-                Ok(()) => {
-                    for (node_id, loc) in &result.observed.locations {
-                        if let Some(conf) = &loc.conf {
-                            tracing::info!("Updating observed location {}: {:?}", node_id, conf);
-                        } else {
-                            tracing::info!("Setting observed location {} to None", node_id,)
-                        }
-                    }
-                    tenant.observed = result.observed;
-                    tenant.waiter.advance(result.sequence);
-                }
-                Err(e) => {
-                    tracing::warn!(
-                        "Reconcile error on tenant {}: {}",
-                        tenant.tenant_shard_id,
-                        e
-                    );
-
-                    // Ordering: populate last_error before advancing error_seq,
-                    // so that waiters will see the correct error after waiting.
-                    *(tenant.last_error.lock().unwrap()) = format!("{e}");
-                    tenant.error_waiter.advance(result.sequence);
-
-                    for (node_id, o) in result.observed.locations {
-                        tenant.observed.locations.insert(node_id, o);
-                    }
-                }
-            }
-        }
-    }
-
    pub async fn spawn(config: Config, persistence: Arc<Persistence>) -> anyhow::Result<Arc<Self>> {
-        let (result_tx, result_rx) = tokio::sync::mpsc::unbounded_channel();
+        let (result_tx, mut result_rx) = tokio::sync::mpsc::unbounded_channel();

        tracing::info!("Loading nodes from database...");
        let nodes = persistence.list_nodes().await?;
@@ -485,14 +389,14 @@ impl Service {
            let tenant_shard_id = TenantShardId {
                tenant_id: TenantId::from_str(tsp.tenant_id.as_str())?,
                shard_number: ShardNumber(tsp.shard_number as u8),
-                shard_count: ShardCount::new(tsp.shard_count as u8),
+                shard_count: ShardCount(tsp.shard_count as u8),
            };
            let shard_identity = if tsp.shard_count == 0 {
                ShardIdentity::unsharded()
            } else {
                ShardIdentity::new(
                    ShardNumber(tsp.shard_number as u8),
-                    ShardCount::new(tsp.shard_count as u8),
+                    ShardCount(tsp.shard_count as u8),
                    ShardStripeSize(tsp.shard_stripe_size as u32),
                )?
            };
@@ -514,7 +418,6 @@ impl Service {
                observed: ObservedState::new(),
                config: serde_json::from_str(&tsp.config).unwrap(),
                reconciler: None,
-                splitting: tsp.splitting,
                waiter: Arc::new(SeqWait::new(Sequence::initial())),
                error_waiter: Arc::new(SeqWait::new(Sequence::initial())),
                last_error: Arc::default(),
@@ -536,35 +439,73 @@ impl Service {
            config,
            persistence,
            startup_complete: startup_complete.clone(),
-            cancel: CancellationToken::new(),
-            gate: Gate::default(),
        });

        let result_task_this = this.clone();
        tokio::task::spawn(async move {
-            // Block shutdown until we're done (we must respect self.cancel)
-            if let Ok(_gate) = result_task_this.gate.enter() {
-                result_task_this.process_results(result_rx).await
+            while let Some(result) = result_rx.recv().await {
+                tracing::info!(
+                    "Reconcile result for sequence {}, ok={}",
+                    result.sequence,
+                    result.result.is_ok()
+                );
+                let mut locked = result_task_this.inner.write().unwrap();
+                let Some(tenant) = locked.tenants.get_mut(&result.tenant_shard_id) else {
+                    // A reconciliation result might race with removing a tenant: drop results for
+                    // tenants that aren't in our map.
+                    continue;
+                };
+
+                // Usually generation should only be updated via this path, so the max() isn't
+                // needed, but it is used to handle out-of-band updates via. e.g. test hook.
+                tenant.generation = std::cmp::max(tenant.generation, result.generation);
+
+                // If the reconciler signals that it failed to notify compute, set this state on
+                // the shard so that a future [`TenantState::maybe_reconcile`] will try again.
+                tenant.pending_compute_notification = result.pending_compute_notification;
+
+                match result.result {
+                    Ok(()) => {
+                        for (node_id, loc) in &result.observed.locations {
+                            if let Some(conf) = &loc.conf {
+                                tracing::info!(
+                                    "Updating observed location {}: {:?}",
+                                    node_id,
+                                    conf
+                                );
+                            } else {
+                                tracing::info!("Setting observed location {} to None", node_id,)
+                            }
+                        }
+                        tenant.observed = result.observed;
+                        tenant.waiter.advance(result.sequence);
+                    }
+                    Err(e) => {
+                        tracing::warn!(
+                            "Reconcile error on tenant {}: {}",
+                            tenant.tenant_shard_id,
+                            e
+                        );
+
+                        // Ordering: populate last_error before advancing error_seq,
+                        // so that waiters will see the correct error after waiting.
+                        *(tenant.last_error.lock().unwrap()) = format!("{e}");
+                        tenant.error_waiter.advance(result.sequence);
+
+                        for (node_id, o) in result.observed.locations {
+                            tenant.observed.locations.insert(node_id, o);
+                        }
+                    }
+                }
            }
        });

-        tokio::task::spawn({
-            let this = this.clone();
-            // We will block the [`Service::startup_complete`] barrier until [`Self::startup_reconcile`]
-            // is done.
-            let startup_completion = startup_completion.clone();
-            async move {
-                // Block shutdown until we're done (we must respect self.cancel)
-                let Ok(_gate) = this.gate.enter() else {
-                    return;
-                };
+        let startup_reconcile_this = this.clone();
+        tokio::task::spawn(async move {
+            // Block the [`Service::startup_complete`] barrier until we're done
+            let _completion = startup_completion;

-                this.startup_reconcile().await;
-
-                drop(startup_completion);
-
-                this.background_reconcile().await;
-            }
+            startup_reconcile_this.startup_reconcile().await
        });

        Ok(this)
@@ -585,7 +526,7 @@ impl Service {
            let tsp = TenantShardPersistence {
                tenant_id: attach_req.tenant_shard_id.tenant_id.to_string(),
                shard_number: attach_req.tenant_shard_id.shard_number.0 as i32,
-                shard_count: attach_req.tenant_shard_id.shard_count.literal() as i32,
+                shard_count: attach_req.tenant_shard_id.shard_count.0 as i32,
                shard_stripe_size: 0,
                generation: 0,
                generation_pageserver: i64::MAX,
@@ -679,28 +620,6 @@ impl Service {
            attach_req.node_id.unwrap_or(utils::id::NodeId(0xfffffff))
        );

-        // Trick the reconciler into not doing anything for this tenant: this helps
-        // tests that manually configure a tenant on the pagesrever, and then call this
-        // attach hook: they don't want background reconciliation to modify what they
-        // did to the pageserver.
-        #[cfg(feature = "testing")]
-        {
-            if let Some(node_id) = attach_req.node_id {
-                tenant_state.observed.locations = HashMap::from([(
-                    node_id,
-                    ObservedStateLocation {
-                        conf: Some(attached_location_conf(
-                            tenant_state.generation,
-                            &tenant_state.shard,
-                            &tenant_state.config,
-                        )),
-                    },
-                )]);
-            } else {
-                tenant_state.observed.locations.clear();
-            }
-        }
-
        Ok(AttachHookResponse {
            gen: attach_req
                .node_id
@@ -807,9 +726,16 @@ impl Service {
        &self,
        create_req: TenantCreateRequest,
    ) -> Result<TenantCreateResponse, ApiError> {
+        // Shard count 0 is valid: it means create a single shard (ShardCount(0) means "unsharded")
+        let literal_shard_count = if create_req.shard_parameters.is_unsharded() {
+            1
+        } else {
+            create_req.shard_parameters.count.0
+        };
+
        // This service expects to handle sharding itself: it is an error to try and directly create
        // a particular shard here.
-        let tenant_id = if !create_req.new_tenant_id.is_unsharded() {
+        let tenant_id = if create_req.new_tenant_id.shard_count > ShardCount(1) {
            return Err(ApiError::BadRequest(anyhow::anyhow!(
                "Attempted to create a specific shard, this API is for creating the whole tenant"
            )));
@@ -823,7 +749,7 @@ impl Service {
            create_req.shard_parameters.count,
        );

-        let create_ids = (0..create_req.shard_parameters.count.count())
+        let create_ids = (0..literal_shard_count)
            .map(|i| TenantShardId {
                tenant_id,
                shard_number: ShardNumber(i),
@@ -843,7 +769,7 @@ impl Service {
            .map(|tenant_shard_id| TenantShardPersistence {
                tenant_id: tenant_shard_id.tenant_id.to_string(),
                shard_number: tenant_shard_id.shard_number.0 as i32,
-                shard_count: tenant_shard_id.shard_count.literal() as i32,
+                shard_count: tenant_shard_id.shard_count.0 as i32,
                shard_stripe_size: create_req.shard_parameters.stripe_size.0 as i32,
                generation: create_req.generation.map(|g| g as i32).unwrap_or(0),
                generation_pageserver: i64::MAX,
@@ -949,8 +875,6 @@ impl Service {
                        &compute_hook,
                        &self.config,
                        &self.persistence,
-                        &self.gate,
-                        &self.cancel,
                    )
                })
                .collect::<Vec<_>>();
@@ -990,7 +914,7 @@ impl Service {
        tenant_id: TenantId,
        req: TenantLocationConfigRequest,
    ) -> Result<TenantLocationConfigResponse, ApiError> {
-        if !req.tenant_id.is_unsharded() {
+        if req.tenant_id.shard_count.0 > 1 {
            return Err(ApiError::BadRequest(anyhow::anyhow!(
                "This API is for importing single-sharded or unsharded tenants"
            )));
@@ -1053,8 +977,6 @@ impl Service {
                    &compute_hook,
                    &self.config,
                    &self.persistence,
-                    &self.gate,
-                    &self.cancel,
                );
                if let Some(waiter) = maybe_waiter {
                    waiters.push(waiter);
@@ -1144,8 +1066,6 @@ impl Service {
    }

    pub(crate) async fn tenant_delete(&self, tenant_id: TenantId) -> Result<StatusCode, ApiError> {
-        self.ensure_attached_wait(tenant_id).await?;
-
        // TODO: refactor into helper
        let targets = {
            let locked = self.inner.read().unwrap();
@@ -1167,6 +1087,8 @@ impl Service {
            targets
        };

+        // TODO: error out if the tenant is not attached anywhere.
+
        // Phase 1: delete on the pageservers
        let mut any_pending = false;
        for (tenant_shard_id, node) in targets {
@@ -1502,6 +1424,9 @@ impl Service {
        let mut policy = None;
        let mut shard_ident = None;

+        // TODO: put a cancellation token on Service for clean shutdown
+        let cancel = CancellationToken::new();
+
        // A parent shard which will be split
        struct SplitTarget {
            parent_id: TenantShardId,
@@ -1524,7 +1449,7 @@ impl Service {
            for (tenant_shard_id, shard) in
                locked.tenants.range(TenantShardId::tenant_range(tenant_id))
            {
-                match shard.shard.count.count().cmp(&split_req.new_shard_count) {
+                match shard.shard.count.0.cmp(&split_req.new_shard_count) {
                    Ordering::Equal => {
                        //  Already split this
                        children_found.push(*tenant_shard_id);
@@ -1534,7 +1459,7 @@ impl Service {
                        return Err(ApiError::BadRequest(anyhow::anyhow!(
                            "Requested count {} but already have shards at count {}",
                            split_req.new_shard_count,
-                            shard.shard.count.count()
+                            shard.shard.count.0
                        )));
                    }
                    Ordering::Less => {
@@ -1564,7 +1489,7 @@ impl Service {
                    shard_ident = Some(shard.shard);
                }

-                if tenant_shard_id.shard_count.count() == split_req.new_shard_count {
+                if tenant_shard_id.shard_count == ShardCount(split_req.new_shard_count) {
                    tracing::info!(
                        "Tenant shard {} already has shard count {}",
                        tenant_shard_id,
@@ -1590,7 +1515,7 @@ impl Service {
                targets.push(SplitTarget {
                    parent_id: *tenant_shard_id,
                    node: node.clone(),
-                    child_ids: tenant_shard_id.split(ShardCount::new(split_req.new_shard_count)),
+                    child_ids: tenant_shard_id.split(ShardCount(split_req.new_shard_count)),
                });
            }

@@ -1637,7 +1562,7 @@ impl Service {
                this_child_tsps.push(TenantShardPersistence {
                    tenant_id: child.tenant_id.to_string(),
                    shard_number: child.shard_number.0 as i32,
-                    shard_count: child.shard_count.literal() as i32,
+                    shard_count: child.shard_count.0 as i32,
                    shard_stripe_size: shard_ident.stripe_size.0 as i32,
                    // Note: this generation is a placeholder, [`Persistence::begin_shard_split`] will
                    // populate the correct generation as part of its transaction, to protect us
@@ -1673,18 +1598,6 @@ impl Service {
            }
        }

-        // Now that I have persisted the splitting state, apply it in-memory.  This is infallible, so
-        // callers may assume that if splitting is set in memory, then it was persisted, and if splitting
-        // is not set in memory, then it was not persisted.
-        {
-            let mut locked = self.inner.write().unwrap();
-            for target in &targets {
-                if let Some(parent_shard) = locked.tenants.get_mut(&target.parent_id) {
-                    parent_shard.splitting = SplitState::Splitting;
-                }
-            }
-        }
-
        // FIXME: we have now committed the shard split state to the database, so any subsequent
        // failure needs to roll it back.  We will later wrap this function in logic to roll back
        // the split if it fails.
@@ -1744,7 +1657,7 @@ impl Service {
            .complete_shard_split(tenant_id, old_shard_count)
            .await?;

-        // Replace all the shards we just split with their children: this phase is infallible.
+        // Replace all the shards we just split with their children
        let mut response = TenantShardSplitResponse {
            new_shards: Vec::new(),
        };
@@ -1792,10 +1705,6 @@ impl Service {
                    child_state.generation = generation;
                    child_state.config = config.clone();

-                    // The child's TenantState::splitting is intentionally left at the default value of Idle,
-                    // as at this point in the split process we have succeeded and this part is infallible:
-                    // we will never need to do any special recovery from this state.
-
                    child_locations.push((child, pageserver));

                    locked.tenants.insert(child, child_state);
@@ -1807,7 +1716,7 @@ impl Service {
        // Send compute notifications for all the new shards
        let mut failed_notifications = Vec::new();
        for (child_id, child_ps) in child_locations {
-            if let Err(e) = compute_hook.notify(child_id, child_ps, &self.cancel).await {
+            if let Err(e) = compute_hook.notify(child_id, child_ps, &cancel).await {
                tracing::warn!("Failed to update compute of {}->{} during split, proceeding anyway to complete split ({e})",
                        child_id, child_ps);
                failed_notifications.push(child_id);
@@ -1883,8 +1792,6 @@ impl Service {
                &compute_hook,
                &self.config,
                &self.persistence,
-                &self.gate,
-                &self.cancel,
            )
        };

@@ -2086,8 +1993,6 @@ impl Service {
                                &compute_hook,
                                &self.config,
                                &self.persistence,
-                                &self.gate,
-                                &self.cancel,
                            );
                        }
                    }
@@ -2109,8 +2014,6 @@ impl Service {
                            &compute_hook,
                            &self.config,
                            &self.persistence,
-                            &self.gate,
-                            &self.cancel,
                        );
                    }
                }
@@ -2150,8 +2053,6 @@ impl Service {
                &compute_hook,
                &self.config,
                &self.persistence,
-                &self.gate,
-                &self.cancel,
            ) {
                waiters.push(waiter);
            }
@@ -2163,17 +2064,6 @@ impl Service {
        let ensure_waiters = {
            let locked = self.inner.write().unwrap();

-            // Check if the tenant is splitting: in this case, even if it is attached,
-            // we must act as if it is not: this blocks e.g. timeline creation/deletion
-            // operations during the split.
-            for (_shard_id, shard) in locked.tenants.range(TenantShardId::tenant_range(tenant_id)) {
-                if !matches!(shard.splitting, SplitState::Idle) {
-                    return Err(ApiError::ResourceUnavailable(
-                        "Tenant shards are currently splitting".into(),
-                    ));
-                }
-            }
-
            self.ensure_attached_schedule(locked, tenant_id)
                .map_err(ApiError::InternalServerError)?
        };
@@ -2205,25 +2095,8 @@ impl Service {
                    &compute_hook,
                    &self.config,
                    &self.persistence,
-                    &self.gate,
-                    &self.cancel,
                )
            })
            .count()
    }
-
-    pub async fn shutdown(&self) {
-        // Note that this already stops processing any results from reconciles: so
-        // we do not expect that our [`TenantState`] objects will reach a neat
-        // final state.
-        self.cancel.cancel();
-
-        // The cancellation tokens in [`crate::reconciler::Reconciler`] are children
-        // of our cancellation token, so we do not need to explicitly cancel each of
-        // them.
-
-        // Background tasks and reconcilers hold gate guards: this waits for them all
-        // to complete.
-        self.gate.close().await;
-    }
 }
--- a/control_plane/attachment_service/src/tenant_state.rs
+++ b/control_plane/attachment_service/src/tenant_state.rs
@@ -7,18 +7,16 @@ use pageserver_api::{
 };
 use tokio::task::JoinHandle;
 use tokio_util::sync::CancellationToken;
-use tracing::{instrument, Instrument};
 use utils::{
    generation::Generation,
    id::NodeId,
    seqwait::{SeqWait, SeqWaitError},
-    sync::gate::Gate,
 };

 use crate::{
    compute_hook::ComputeHook,
    node::Node,
-    persistence::{split_state::SplitState, Persistence},
+    persistence::Persistence,
    reconciler::{attached_location_conf, secondary_location_conf, ReconcileError, Reconciler},
    scheduler::{ScheduleError, Scheduler},
    service, PlacementPolicy, Sequence,
@@ -60,11 +58,6 @@ pub(crate) struct TenantState {
    /// cancellation token has been fired)
    pub(crate) reconciler: Option<ReconcilerHandle>,

-    /// If a tenant is being split, then all shards with that TenantId will have a
-    /// SplitState set, this acts as a guard against other operations such as background
-    /// reconciliation, and timeline creation.
-    pub(crate) splitting: SplitState,
-
    /// Optionally wait for reconciliation to complete up to a particular
    /// sequence number.
    pub(crate) waiter: std::sync::Arc<SeqWait<Sequence, Sequence>>,
@@ -245,7 +238,6 @@ impl TenantState {
            observed: ObservedState::default(),
            config: TenantConfig::default(),
            reconciler: None,
-            splitting: SplitState::Idle,
            sequence: Sequence(1),
            waiter: Arc::new(SeqWait::new(Sequence(0))),
            error_waiter: Arc::new(SeqWait::new(Sequence(0))),
@@ -423,8 +415,6 @@ impl TenantState {
        false
    }

-    #[allow(clippy::too_many_arguments)]
-    #[instrument(skip_all, fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug()))]
    pub(crate) fn maybe_reconcile(
        &mut self,
        result_tx: tokio::sync::mpsc::UnboundedSender<ReconcileResult>,
@@ -432,8 +422,6 @@ impl TenantState {
        compute_hook: &Arc<ComputeHook>,
        service_config: &service::Config,
        persistence: &Arc<Persistence>,
-        gate: &Gate,
-        cancel: &CancellationToken,
    ) -> Option<ReconcilerWaiter> {
        // If there are any ambiguous observed states, and the nodes they refer to are available,
        // we should reconcile to clean them up.
@@ -455,14 +443,6 @@ impl TenantState {
            return None;
        }

-        // If we are currently splitting, then never start a reconciler task: the splitting logic
-        // requires that shards are not interfered with while it runs. Do this check here rather than
-        // up top, so that we only log this message if we would otherwise have done a reconciliation.
-        if !matches!(self.splitting, SplitState::Idle) {
-            tracing::info!("Refusing to reconcile, splitting in progress");
-            return None;
-        }
-
        // Reconcile already in flight for the current sequence?
        if let Some(handle) = &self.reconciler {
            if handle.sequence == self.sequence {
@@ -480,12 +460,7 @@ impl TenantState {
        // doing our sequence's work.
        let old_handle = self.reconciler.take();

-        let Ok(gate_guard) = gate.enter() else {
-            // Shutting down, don't start a reconciler
-            return None;
-        };
-
-        let reconciler_cancel = cancel.child_token();
+        let cancel = CancellationToken::new();
        let mut reconciler = Reconciler {
            tenant_shard_id: self.tenant_shard_id,
            shard: self.shard,
@@ -496,66 +471,59 @@ impl TenantState {
            pageservers: pageservers.clone(),
            compute_hook: compute_hook.clone(),
            service_config: service_config.clone(),
-            _gate_guard: gate_guard,
-            cancel: reconciler_cancel.clone(),
+            cancel: cancel.clone(),
            persistence: persistence.clone(),
            compute_notify_failure: false,
        };

        let reconcile_seq = self.sequence;

-        tracing::info!(seq=%reconcile_seq, "Spawning Reconciler for sequence {}", self.sequence);
+        tracing::info!("Spawning Reconciler for sequence {}", self.sequence);
        let must_notify = self.pending_compute_notification;
-        let reconciler_span = tracing::info_span!(parent: None, "reconciler", seq=%reconcile_seq,
-                                                        tenant_id=%reconciler.tenant_shard_id.tenant_id,
-                                                        shard_id=%reconciler.tenant_shard_id.shard_slug());
-        let join_handle = tokio::task::spawn(
-            async move {
-                // Wait for any previous reconcile task to complete before we start
-                if let Some(old_handle) = old_handle {
-                    old_handle.cancel.cancel();
-                    if let Err(e) = old_handle.handle.await {
-                        // We can't do much with this other than log it: the task is done, so
-                        // we may proceed with our work.
-                        tracing::error!("Unexpected join error waiting for reconcile task: {e}");
-                    }
+        let join_handle = tokio::task::spawn(async move {
+            // Wait for any previous reconcile task to complete before we start
+            if let Some(old_handle) = old_handle {
+                old_handle.cancel.cancel();
+                if let Err(e) = old_handle.handle.await {
+                    // We can't do much with this other than log it: the task is done, so
+                    // we may proceed with our work.
+                    tracing::error!("Unexpected join error waiting for reconcile task: {e}");
                }
-
-                // Early check for cancellation before doing any work
-                // TODO: wrap all remote API operations in cancellation check
-                // as well.
-                if reconciler.cancel.is_cancelled() {
-                    return;
-                }
-
-                // Attempt to make observed state match intent state
-                let result = reconciler.reconcile().await;
-
-                // If we know we had a pending compute notification from some previous action, send a notification irrespective
-                // of whether the above reconcile() did any work
-                if result.is_ok() && must_notify {
-                    // If this fails we will send the need to retry in [`ReconcileResult::pending_compute_notification`]
-                    reconciler.compute_notify().await.ok();
-                }
-
-                result_tx
-                    .send(ReconcileResult {
-                        sequence: reconcile_seq,
-                        result,
-                        tenant_shard_id: reconciler.tenant_shard_id,
-                        generation: reconciler.generation,
-                        observed: reconciler.observed,
-                        pending_compute_notification: reconciler.compute_notify_failure,
-                    })
-                    .ok();
            }
-            .instrument(reconciler_span),
-        );
+
+            // Early check for cancellation before doing any work
+            // TODO: wrap all remote API operations in cancellation check
+            // as well.
+            if reconciler.cancel.is_cancelled() {
+                return;
+            }
+
+            // Attempt to make observed state match intent state
+            let result = reconciler.reconcile().await;
+
+            // If we know we had a pending compute notification from some previous action, send a notification irrespective
+            // of whether the above reconcile() did any work
+            if result.is_ok() && must_notify {
+                // If this fails we will send the need to retry in [`ReconcileResult::pending_compute_notification`]
+                reconciler.compute_notify().await.ok();
+            }
+
+            result_tx
+                .send(ReconcileResult {
+                    sequence: reconcile_seq,
+                    result,
+                    tenant_shard_id: reconciler.tenant_shard_id,
+                    generation: reconciler.generation,
+                    observed: reconciler.observed,
+                    pending_compute_notification: reconciler.compute_notify_failure,
+                })
+                .ok();
+        });

        self.reconciler = Some(ReconcilerHandle {
            sequence: self.sequence,
            handle: join_handle,
-            cancel: reconciler_cancel,
+            cancel,
        });

        Some(ReconcilerWaiter {
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -450,7 +450,7 @@ async fn handle_tenant(
                    new_tenant_id: TenantShardId::unsharded(tenant_id),
                    generation: None,
                    shard_parameters: ShardParameters {
-                        count: ShardCount::new(shard_count),
+                        count: ShardCount(shard_count),
                        stripe_size: shard_stripe_size
                            .map(ShardStripeSize)
                            .unwrap_or(ShardParameters::DEFAULT_STRIPE_SIZE),
--- a/libs/metrics/src/lib.rs
+++ b/libs/metrics/src/lib.rs
@@ -115,6 +115,7 @@ pub fn set_build_info_metric(revision: &str, build_tag: &str) {
 // performed by the process.
 // We know the size of the block, so we can determine the I/O bytes out of it.
 // The value might be not 100% exact, but should be fine for Prometheus metrics in this case.
+#[allow(clippy::unnecessary_cast)]
 fn update_rusage_metrics() {
    let rusage_stats = get_rusage_stats();

--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -214,14 +214,14 @@ impl ShardParameters {
    pub const DEFAULT_STRIPE_SIZE: ShardStripeSize = ShardStripeSize(256 * 1024 / 8);

    pub fn is_unsharded(&self) -> bool {
-        self.count.is_unsharded()
+        self.count == ShardCount(0)
    }
 }

 impl Default for ShardParameters {
    fn default() -> Self {
        Self {
-            count: ShardCount::new(0),
+            count: ShardCount(0),
            stripe_size: Self::DEFAULT_STRIPE_SIZE,
        }
    }
--- a/libs/pageserver_api/src/shard.rs
+++ b/libs/pageserver_api/src/shard.rs
@@ -13,41 +13,10 @@ use utils::id::TenantId;
 pub struct ShardNumber(pub u8);

 #[derive(Ord, PartialOrd, Eq, PartialEq, Clone, Copy, Serialize, Deserialize, Debug, Hash)]
-pub struct ShardCount(u8);
+pub struct ShardCount(pub u8);

 impl ShardCount {
    pub const MAX: Self = Self(u8::MAX);
-
-    /// The internal value of a ShardCount may be zero, which means "1 shard, but use
-    /// legacy format for TenantShardId that excludes the shard suffix", also known
-    /// as `TenantShardId::unsharded`.
-    ///
-    /// This method returns the actual number of shards, i.e. if our internal value is
-    /// zero, we return 1 (unsharded tenants have 1 shard).
-    pub fn count(&self) -> u8 {
-        if self.0 > 0 {
-            self.0
-        } else {
-            1
-        }
-    }
-
-    /// The literal internal value: this is **not** the number of shards in the
-    /// tenant, as we have a special zero value for legacy unsharded tenants.  Use
-    /// [`Self::count`] if you want to know the cardinality of shards.
-    pub fn literal(&self) -> u8 {
-        self.0
-    }
-
-    pub fn is_unsharded(&self) -> bool {
-        self.0 == 0
-    }
-
-    /// `v` may be zero, or the number of shards in the tenant.  `v` is what
-    /// [`Self::literal`] would return.
-    pub fn new(val: u8) -> Self {
-        Self(val)
-    }
 }

 impl ShardNumber {
@@ -117,7 +86,7 @@ impl TenantShardId {
    }

    pub fn is_unsharded(&self) -> bool {
-        self.shard_number == ShardNumber(0) && self.shard_count.is_unsharded()
+        self.shard_number == ShardNumber(0) && self.shard_count == ShardCount(0)
    }

    /// Convenience for dropping the tenant_id and just getting the ShardIndex: this
--- a/libs/postgres_ffi/src/lib.rs
+++ b/libs/postgres_ffi/src/lib.rs
@@ -3,7 +3,7 @@
 #![allow(non_snake_case)]
 // bindgen creates some unsafe code with no doc comments.
 #![allow(clippy::missing_safety_doc)]
-// noted at 1.63 that in many cases there's u32 -> u32 transmutes in bindgen code.
+// noted at 1.63 that in many cases there's a u32 -> u32 transmutes in bindgen code.
 #![allow(clippy::useless_transmute)]
 // modules included with the postgres_ffi macro depend on the types of the specific version's
 // types, and trigger a too eager lint.
--- a/libs/remote_storage/Cargo.toml
+++ b/libs/remote_storage/Cargo.toml
@@ -15,13 +15,11 @@ aws-sdk-s3.workspace = true
 aws-credential-types.workspace = true
 bytes.workspace = true
 camino.workspace = true
-humantime.workspace = true
 hyper = { workspace = true, features = ["stream"] }
 futures.workspace = true
 serde.workspace = true
 serde_json.workspace = true
 tokio = { workspace = true, features = ["sync", "fs", "io-util"] }
-tokio-stream.workspace = true
 tokio-util = { workspace = true, features = ["compat"] }
 toml_edit.workspace = true
 tracing.workspace = true
--- a/libs/remote_storage/src/azure_blob.rs
+++ b/libs/remote_storage/src/azure_blob.rs
@@ -22,15 +22,16 @@ use azure_storage_blobs::{blob::operations::GetBlobBuilder, prelude::ContainerCl
 use bytes::Bytes;
 use futures::stream::Stream;
 use futures_util::StreamExt;
-use futures_util::TryStreamExt;
 use http_types::{StatusCode, Url};
+use tokio::time::Instant;
 use tokio_util::sync::CancellationToken;
 use tracing::debug;

+use crate::s3_bucket::RequestKind;
+use crate::TimeTravelError;
 use crate::{
-    error::Cancelled, s3_bucket::RequestKind, AzureConfig, ConcurrencyLimiter, Download,
-    DownloadError, Listing, ListingMode, RemotePath, RemoteStorage, StorageMetadata,
-    TimeTravelError, TimeoutOrCancel,
+    AzureConfig, ConcurrencyLimiter, Download, DownloadError, Listing, ListingMode, RemotePath,
+    RemoteStorage, StorageMetadata,
 };

 pub struct AzureBlobStorage {
@@ -38,12 +39,10 @@ pub struct AzureBlobStorage {
    prefix_in_container: Option<String>,
    max_keys_per_list_response: Option<NonZeroU32>,
    concurrency_limiter: ConcurrencyLimiter,
-    // Per-request timeout. Accessible for tests.
-    pub timeout: Duration,
 }

 impl AzureBlobStorage {
-    pub fn new(azure_config: &AzureConfig, timeout: Duration) -> Result<Self> {
+    pub fn new(azure_config: &AzureConfig) -> Result<Self> {
        debug!(
            "Creating azure remote storage for azure container {}",
            azure_config.container_name
@@ -80,7 +79,6 @@ impl AzureBlobStorage {
            prefix_in_container: azure_config.prefix_in_container.to_owned(),
            max_keys_per_list_response,
            concurrency_limiter: ConcurrencyLimiter::new(azure_config.concurrency_limit.get()),
-            timeout,
        })
    }

@@ -123,11 +121,8 @@ impl AzureBlobStorage {
    async fn download_for_builder(
        &self,
        builder: GetBlobBuilder,
-        cancel: &CancellationToken,
    ) -> Result<Download, DownloadError> {
-        let kind = RequestKind::Get;
-
-        let _permit = self.permit(kind, cancel).await?;
+        let mut response = builder.into_stream();

        let mut etag = None;
        let mut last_modified = None;
@@ -135,70 +130,39 @@ impl AzureBlobStorage {
        // TODO give proper streaming response instead of buffering into RAM
        // https://github.com/neondatabase/neon/issues/5563

-        let download = async {
-            let response = builder
-                // convert to concrete Pageable
-                .into_stream()
-                // convert to TryStream
-                .into_stream()
-                .map_err(to_download_error);
-
-            // apply per request timeout
-            let response = tokio_stream::StreamExt::timeout(response, self.timeout);
-
-            // flatten
-            let response = response.map(|res| match res {
-                Ok(res) => res,
-                Err(_elapsed) => Err(DownloadError::Timeout),
-            });
-
-            let mut response = std::pin::pin!(response);
-
-            let mut bufs = Vec::new();
-            while let Some(part) = response.next().await {
-                let part = part?;
-                let etag_str: &str = part.blob.properties.etag.as_ref();
-                if etag.is_none() {
-                    etag = Some(etag.unwrap_or_else(|| etag_str.to_owned()));
-                }
-                if last_modified.is_none() {
-                    last_modified = Some(part.blob.properties.last_modified.into());
-                }
-                if let Some(blob_meta) = part.blob.metadata {
-                    metadata.extend(blob_meta.iter().map(|(k, v)| (k.to_owned(), v.to_owned())));
-                }
-                let data = part
-                    .data
-                    .collect()
-                    .await
-                    .map_err(|e| DownloadError::Other(e.into()))?;
-                bufs.push(data);
+        let mut bufs = Vec::new();
+        while let Some(part) = response.next().await {
+            let part = part.map_err(to_download_error)?;
+            let etag_str: &str = part.blob.properties.etag.as_ref();
+            if etag.is_none() {
+                etag = Some(etag.unwrap_or_else(|| etag_str.to_owned()));
            }
-            Ok(Download {
-                download_stream: Box::pin(futures::stream::iter(bufs.into_iter().map(Ok))),
-                etag,
-                last_modified,
-                metadata: Some(StorageMetadata(metadata)),
-            })
-        };
-
-        tokio::select! {
-            bufs = download => bufs,
-            _ = cancel.cancelled() => Err(DownloadError::Cancelled),
+            if last_modified.is_none() {
+                last_modified = Some(part.blob.properties.last_modified.into());
+            }
+            if let Some(blob_meta) = part.blob.metadata {
+                metadata.extend(blob_meta.iter().map(|(k, v)| (k.to_owned(), v.to_owned())));
+            }
+            let data = part
+                .data
+                .collect()
+                .await
+                .map_err(|e| DownloadError::Other(e.into()))?;
+            bufs.push(data);
        }
+        Ok(Download {
+            download_stream: Box::pin(futures::stream::iter(bufs.into_iter().map(Ok))),
+            etag,
+            last_modified,
+            metadata: Some(StorageMetadata(metadata)),
+        })
    }

-    async fn permit(
-        &self,
-        kind: RequestKind,
-        cancel: &CancellationToken,
-    ) -> Result<tokio::sync::SemaphorePermit<'_>, Cancelled> {
-        let acquire = self.concurrency_limiter.acquire(kind);
-
-        tokio::select! {
-            permit = acquire => Ok(permit.expect("never closed")),
-            _ = cancel.cancelled() => Err(Cancelled),
-        }
+    async fn permit(&self, kind: RequestKind) -> tokio::sync::SemaphorePermit<'_> {
+        self.concurrency_limiter
+            .acquire(kind)
+            .await
+            .expect("semaphore is never closed")
    }
 }

@@ -228,87 +192,66 @@ impl RemoteStorage for AzureBlobStorage {
        prefix: Option<&RemotePath>,
        mode: ListingMode,
        max_keys: Option<NonZeroU32>,
-        cancel: &CancellationToken,
    ) -> anyhow::Result<Listing, DownloadError> {
-        let _permit = self.permit(RequestKind::List, cancel).await?;
-
-        let op = async {
-            // get the passed prefix or if it is not set use prefix_in_bucket value
-            let list_prefix = prefix
-                .map(|p| self.relative_path_to_name(p))
-                .or_else(|| self.prefix_in_container.clone())
-                .map(|mut p| {
-                    // required to end with a separator
-                    // otherwise request will return only the entry of a prefix
-                    if matches!(mode, ListingMode::WithDelimiter)
-                        && !p.ends_with(REMOTE_STORAGE_PREFIX_SEPARATOR)
-                    {
-                        p.push(REMOTE_STORAGE_PREFIX_SEPARATOR);
-                    }
-                    p
-                });
-
-            let mut builder = self.client.list_blobs();
-
-            if let ListingMode::WithDelimiter = mode {
-                builder = builder.delimiter(REMOTE_STORAGE_PREFIX_SEPARATOR.to_string());
-            }
-
-            if let Some(prefix) = list_prefix {
-                builder = builder.prefix(Cow::from(prefix.to_owned()));
-            }
-
-            if let Some(limit) = self.max_keys_per_list_response {
-                builder = builder.max_results(MaxResults::new(limit));
-            }
-
-            let response = builder.into_stream();
-            let response = response.into_stream().map_err(to_download_error);
-            let response = tokio_stream::StreamExt::timeout(response, self.timeout);
-            let response = response.map(|res| match res {
-                Ok(res) => res,
-                Err(_elapsed) => Err(DownloadError::Timeout),
+        // get the passed prefix or if it is not set use prefix_in_bucket value
+        let list_prefix = prefix
+            .map(|p| self.relative_path_to_name(p))
+            .or_else(|| self.prefix_in_container.clone())
+            .map(|mut p| {
+                // required to end with a separator
+                // otherwise request will return only the entry of a prefix
+                if matches!(mode, ListingMode::WithDelimiter)
+                    && !p.ends_with(REMOTE_STORAGE_PREFIX_SEPARATOR)
+                {
+                    p.push(REMOTE_STORAGE_PREFIX_SEPARATOR);
+                }
+                p
            });

-            let mut response = std::pin::pin!(response);
+        let mut builder = self.client.list_blobs();

-            let mut res = Listing::default();
+        if let ListingMode::WithDelimiter = mode {
+            builder = builder.delimiter(REMOTE_STORAGE_PREFIX_SEPARATOR.to_string());
+        }

-            let mut max_keys = max_keys.map(|mk| mk.get());
-            while let Some(entry) = response.next().await {
-                let entry = entry?;
-                let prefix_iter = entry
-                    .blobs
-                    .prefixes()
-                    .map(|prefix| self.name_to_relative_path(&prefix.name));
-                res.prefixes.extend(prefix_iter);
+        if let Some(prefix) = list_prefix {
+            builder = builder.prefix(Cow::from(prefix.to_owned()));
+        }

-                let blob_iter = entry
-                    .blobs
-                    .blobs()
-                    .map(|k| self.name_to_relative_path(&k.name));
+        if let Some(limit) = self.max_keys_per_list_response {
+            builder = builder.max_results(MaxResults::new(limit));
+        }

-                for key in blob_iter {
-                    res.keys.push(key);
+        let mut response = builder.into_stream();
+        let mut res = Listing::default();
+        // NonZeroU32 doesn't support subtraction apparently
+        let mut max_keys = max_keys.map(|mk| mk.get());
+        while let Some(l) = response.next().await {
+            let entry = l.map_err(to_download_error)?;
+            let prefix_iter = entry
+                .blobs
+                .prefixes()
+                .map(|prefix| self.name_to_relative_path(&prefix.name));
+            res.prefixes.extend(prefix_iter);

-                    if let Some(mut mk) = max_keys {
-                        assert!(mk > 0);
-                        mk -= 1;
-                        if mk == 0 {
-                            return Ok(res); // limit reached
-                        }
-                        max_keys = Some(mk);
+            let blob_iter = entry
+                .blobs
+                .blobs()
+                .map(|k| self.name_to_relative_path(&k.name));
+
+            for key in blob_iter {
+                res.keys.push(key);
+                if let Some(mut mk) = max_keys {
+                    assert!(mk > 0);
+                    mk -= 1;
+                    if mk == 0 {
+                        return Ok(res); // limit reached
                    }
+                    max_keys = Some(mk);
                }
            }
-
-            Ok(res)
-        };
-
-        tokio::select! {
-            res = op => res,
-            _ = cancel.cancelled() => Err(DownloadError::Cancelled),
        }
+        Ok(res)
    }

    async fn upload(
@@ -317,52 +260,35 @@ impl RemoteStorage for AzureBlobStorage {
        data_size_bytes: usize,
        to: &RemotePath,
        metadata: Option<StorageMetadata>,
-        cancel: &CancellationToken,
    ) -> anyhow::Result<()> {
-        let _permit = self.permit(RequestKind::Put, cancel).await?;
+        let _permit = self.permit(RequestKind::Put).await;
+        let blob_client = self.client.blob_client(self.relative_path_to_name(to));

-        let op = async {
-            let blob_client = self.client.blob_client(self.relative_path_to_name(to));
+        let from: Pin<Box<dyn Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static>> =
+            Box::pin(from);

-            let from: Pin<Box<dyn Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static>> =
-                Box::pin(from);
+        let from = NonSeekableStream::new(from, data_size_bytes);

-            let from = NonSeekableStream::new(from, data_size_bytes);
+        let body = azure_core::Body::SeekableStream(Box::new(from));

-            let body = azure_core::Body::SeekableStream(Box::new(from));
+        let mut builder = blob_client.put_block_blob(body);

-            let mut builder = blob_client.put_block_blob(body);
-
-            if let Some(metadata) = metadata {
-                builder = builder.metadata(to_azure_metadata(metadata));
-            }
-
-            let fut = builder.into_future();
-            let fut = tokio::time::timeout(self.timeout, fut);
-
-            match fut.await {
-                Ok(Ok(_response)) => Ok(()),
-                Ok(Err(azure)) => Err(azure.into()),
-                Err(_timeout) => Err(TimeoutOrCancel::Cancel.into()),
-            }
-        };
-
-        tokio::select! {
-            res = op => res,
-            _ = cancel.cancelled() => Err(TimeoutOrCancel::Cancel.into()),
+        if let Some(metadata) = metadata {
+            builder = builder.metadata(to_azure_metadata(metadata));
        }
+
+        let _response = builder.into_future().await?;
+
+        Ok(())
    }

-    async fn download(
-        &self,
-        from: &RemotePath,
-        cancel: &CancellationToken,
-    ) -> Result<Download, DownloadError> {
+    async fn download(&self, from: &RemotePath) -> Result<Download, DownloadError> {
+        let _permit = self.permit(RequestKind::Get).await;
        let blob_client = self.client.blob_client(self.relative_path_to_name(from));

        let builder = blob_client.get();

-        self.download_for_builder(builder, cancel).await
+        self.download_for_builder(builder).await
    }

    async fn download_byte_range(
@@ -370,8 +296,8 @@ impl RemoteStorage for AzureBlobStorage {
        from: &RemotePath,
        start_inclusive: u64,
        end_exclusive: Option<u64>,
-        cancel: &CancellationToken,
    ) -> Result<Download, DownloadError> {
+        let _permit = self.permit(RequestKind::Get).await;
        let blob_client = self.client.blob_client(self.relative_path_to_name(from));

        let mut builder = blob_client.get();
@@ -383,113 +309,82 @@ impl RemoteStorage for AzureBlobStorage {
        };
        builder = builder.range(range);

-        self.download_for_builder(builder, cancel).await
+        self.download_for_builder(builder).await
    }

-    async fn delete(&self, path: &RemotePath, cancel: &CancellationToken) -> anyhow::Result<()> {
-        self.delete_objects(std::array::from_ref(path), cancel)
-            .await
-    }
+    async fn delete(&self, path: &RemotePath) -> anyhow::Result<()> {
+        let _permit = self.permit(RequestKind::Delete).await;
+        let blob_client = self.client.blob_client(self.relative_path_to_name(path));

-    async fn delete_objects<'a>(
-        &self,
-        paths: &'a [RemotePath],
-        cancel: &CancellationToken,
-    ) -> anyhow::Result<()> {
-        let _permit = self.permit(RequestKind::Delete, cancel).await?;
+        let builder = blob_client.delete();

-        let op = async {
-            // TODO batch requests are also not supported by the SDK
-            // https://github.com/Azure/azure-sdk-for-rust/issues/1068
-            // https://github.com/Azure/azure-sdk-for-rust/issues/1249
-            for path in paths {
-                let blob_client = self.client.blob_client(self.relative_path_to_name(path));
-
-                let request = blob_client.delete().into_future();
-
-                let res = tokio::time::timeout(self.timeout, request).await;
-
-                match res {
-                    Ok(Ok(_response)) => continue,
-                    Ok(Err(e)) => {
-                        if let Some(http_err) = e.as_http_error() {
-                            if http_err.status() == StatusCode::NotFound {
-                                continue;
-                            }
-                        }
-                        return Err(e.into());
+        match builder.into_future().await {
+            Ok(_response) => Ok(()),
+            Err(e) => {
+                if let Some(http_err) = e.as_http_error() {
+                    if http_err.status() == StatusCode::NotFound {
+                        return Ok(());
                    }
-                    Err(_elapsed) => return Err(TimeoutOrCancel::Timeout.into()),
                }
+                Err(anyhow::Error::new(e))
            }
-
-            Ok(())
-        };
-
-        tokio::select! {
-            res = op => res,
-            _ = cancel.cancelled() => Err(TimeoutOrCancel::Cancel.into()),
        }
    }

-    async fn copy(
-        &self,
-        from: &RemotePath,
-        to: &RemotePath,
-        cancel: &CancellationToken,
-    ) -> anyhow::Result<()> {
-        let _permit = self.permit(RequestKind::Copy, cancel).await?;
+    async fn delete_objects<'a>(&self, paths: &'a [RemotePath]) -> anyhow::Result<()> {
+        // Permit is already obtained by inner delete function

-        let timeout = tokio::time::sleep(self.timeout);
+        // TODO batch requests are also not supported by the SDK
+        // https://github.com/Azure/azure-sdk-for-rust/issues/1068
+        // https://github.com/Azure/azure-sdk-for-rust/issues/1249
+        for path in paths {
+            self.delete(path).await?;
+        }
+        Ok(())
+    }

-        let mut copy_status = None;
+    async fn copy(&self, from: &RemotePath, to: &RemotePath) -> anyhow::Result<()> {
+        let _permit = self.permit(RequestKind::Copy).await;
+        let blob_client = self.client.blob_client(self.relative_path_to_name(to));

-        let op = async {
-            let blob_client = self.client.blob_client(self.relative_path_to_name(to));
+        let source_url = format!(
+            "{}/{}",
+            self.client.url()?,
+            self.relative_path_to_name(from)
+        );
+        let builder = blob_client.copy(Url::from_str(&source_url)?);

-            let source_url = format!(
-                "{}/{}",
-                self.client.url()?,
-                self.relative_path_to_name(from)
-            );
+        let result = builder.into_future().await?;

-            let builder = blob_client.copy(Url::from_str(&source_url)?);
-            let copy = builder.into_future();
-
-            let result = copy.await?;
-
-            copy_status = Some(result.copy_status);
-            loop {
-                match copy_status.as_ref().expect("we always set it to Some") {
-                    CopyStatus::Aborted => {
-                        anyhow::bail!("Received abort for copy from {from} to {to}.");
-                    }
-                    CopyStatus::Failed => {
-                        anyhow::bail!("Received failure response for copy from {from} to {to}.");
-                    }
-                    CopyStatus::Success => return Ok(()),
-                    CopyStatus::Pending => (),
+        let mut copy_status = result.copy_status;
+        let start_time = Instant::now();
+        const MAX_WAIT_TIME: Duration = Duration::from_secs(60);
+        loop {
+            match copy_status {
+                CopyStatus::Aborted => {
+                    anyhow::bail!("Received abort for copy from {from} to {to}.");
                }
-                // The copy is taking longer. Waiting a second and then re-trying.
-                // TODO estimate time based on copy_progress and adjust time based on that
-                tokio::time::sleep(Duration::from_millis(1000)).await;
-                let properties = blob_client.get_properties().into_future().await?;
-                let Some(status) = properties.blob.properties.copy_status else {
-                    tracing::warn!("copy_status for copy is None!, from={from}, to={to}");
-                    return Ok(());
-                };
-                copy_status = Some(status);
+                CopyStatus::Failed => {
+                    anyhow::bail!("Received failure response for copy from {from} to {to}.");
+                }
+                CopyStatus::Success => return Ok(()),
+                CopyStatus::Pending => (),
            }
-        };
-
-        tokio::select! {
-            res = op => res,
-            _ = cancel.cancelled() => Err(anyhow::Error::new(TimeoutOrCancel::Cancel)),
-            _ = timeout => {
-                let e = anyhow::Error::new(TimeoutOrCancel::Timeout);
-                let e = e.context(format!("Timeout, last status: {copy_status:?}"));
-                Err(e)
-            },
+            // The copy is taking longer. Waiting a second and then re-trying.
+            // TODO estimate time based on copy_progress and adjust time based on that
+            tokio::time::sleep(Duration::from_millis(1000)).await;
+            let properties = blob_client.get_properties().into_future().await?;
+            let Some(status) = properties.blob.properties.copy_status else {
+                tracing::warn!("copy_status for copy is None!, from={from}, to={to}");
+                return Ok(());
+            };
+            if start_time.elapsed() > MAX_WAIT_TIME {
+                anyhow::bail!("Copy from from {from} to {to} took longer than limit MAX_WAIT_TIME={}s. copy_pogress={:?}.",
+                    MAX_WAIT_TIME.as_secs_f32(),
+                    properties.blob.properties.copy_progress,
+                );
+            }
+            copy_status = status;
        }
    }

--- a/libs/remote_storage/src/error.rs
+++ b/libs/remote_storage/src/error.rs
@@ -1,181 +0,0 @@
-/// Reasons for downloads or listings to fail.
-#[derive(Debug)]
-pub enum DownloadError {
-    /// Validation or other error happened due to user input.
-    BadInput(anyhow::Error),
-    /// The file was not found in the remote storage.
-    NotFound,
-    /// A cancellation token aborted the download, typically during
-    /// tenant detach or process shutdown.
-    Cancelled,
-    /// A timeout happened while executing the request. Possible reasons:
-    /// - stuck tcp connection
-    ///
-    /// Concurrency control is not timed within timeout.
-    Timeout,
-    /// The file was found in the remote storage, but the download failed.
-    Other(anyhow::Error),
-}
-
-impl std::fmt::Display for DownloadError {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        match self {
-            DownloadError::BadInput(e) => {
-                write!(f, "Failed to download a remote file due to user input: {e}")
-            }
-            DownloadError::NotFound => write!(f, "No file found for the remote object id given"),
-            DownloadError::Cancelled => write!(f, "Cancelled, shutting down"),
-            DownloadError::Timeout => write!(f, "timeout"),
-            DownloadError::Other(e) => write!(f, "Failed to download a remote file: {e:?}"),
-        }
-    }
-}
-
-impl std::error::Error for DownloadError {}
-
-impl DownloadError {
-    /// Returns true if the error should not be retried with backoff
-    pub fn is_permanent(&self) -> bool {
-        use DownloadError::*;
-        match self {
-            BadInput(_) | NotFound | Cancelled => true,
-            Timeout | Other(_) => false,
-        }
-    }
-}
-
-#[derive(Debug)]
-pub enum TimeTravelError {
-    /// Validation or other error happened due to user input.
-    BadInput(anyhow::Error),
-    /// The used remote storage does not have time travel recovery implemented
-    Unimplemented,
-    /// The number of versions/deletion markers is above our limit.
-    TooManyVersions,
-    /// A cancellation token aborted the process, typically during
-    /// request closure or process shutdown.
-    Cancelled,
-    /// Other errors
-    Other(anyhow::Error),
-}
-
-impl std::fmt::Display for TimeTravelError {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        match self {
-            TimeTravelError::BadInput(e) => {
-                write!(
-                    f,
-                    "Failed to time travel recover a prefix due to user input: {e}"
-                )
-            }
-            TimeTravelError::Unimplemented => write!(
-                f,
-                "time travel recovery is not implemented for the current storage backend"
-            ),
-            TimeTravelError::Cancelled => write!(f, "Cancelled, shutting down"),
-            TimeTravelError::TooManyVersions => {
-                write!(f, "Number of versions/delete markers above limit")
-            }
-            TimeTravelError::Other(e) => write!(f, "Failed to time travel recover a prefix: {e:?}"),
-        }
-    }
-}
-
-impl std::error::Error for TimeTravelError {}
-
-/// Plain cancelled error.
-///
-/// By design this type does not not implement `std::error::Error` so it cannot be put as the root
-/// cause of `std::io::Error` or `anyhow::Error`. It should never need to be exposed out of this
-/// crate.
-///
-/// It exists to implement permit acquiring in `{Download,TimeTravel}Error` and `anyhow::Error` returning
-/// operations and ensuring that those get converted to proper versions with just `?`.
-#[derive(Debug)]
-pub(crate) struct Cancelled;
-
-impl From<Cancelled> for anyhow::Error {
-    fn from(_: Cancelled) -> Self {
-        anyhow::Error::new(TimeoutOrCancel::Cancel)
-    }
-}
-
-impl From<Cancelled> for TimeTravelError {
-    fn from(_: Cancelled) -> Self {
-        TimeTravelError::Cancelled
-    }
-}
-
-impl From<Cancelled> for TimeoutOrCancel {
-    fn from(_: Cancelled) -> Self {
-        TimeoutOrCancel::Cancel
-    }
-}
-
-impl From<Cancelled> for DownloadError {
-    fn from(_: Cancelled) -> Self {
-        DownloadError::Cancelled
-    }
-}
-
-/// This type is used at as the root cause for timeouts and cancellations with `anyhow::Error` returning
-/// RemoteStorage methods.
-///
-/// For use with `utils::backoff::retry` and `anyhow::Error` returning operations there is
-/// `TimeoutOrCancel::caused_by_cancel` method to query "proper form" errors.
-#[derive(Debug)]
-pub enum TimeoutOrCancel {
-    Timeout,
-    Cancel,
-}
-
-impl std::fmt::Display for TimeoutOrCancel {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        use TimeoutOrCancel::*;
-        match self {
-            Timeout => write!(f, "timeout"),
-            Cancel => write!(f, "cancel"),
-        }
-    }
-}
-
-impl std::error::Error for TimeoutOrCancel {}
-
-impl TimeoutOrCancel {
-    pub fn caused(error: &anyhow::Error) -> Option<&Self> {
-        error.root_cause().downcast_ref()
-    }
-
-    /// Returns true if the error was caused by [`TimeoutOrCancel::Cancel`].
-    pub fn caused_by_cancel(error: &anyhow::Error) -> bool {
-        Self::caused(error).is_some_and(Self::is_cancel)
-    }
-
-    pub fn is_cancel(&self) -> bool {
-        matches!(self, TimeoutOrCancel::Cancel)
-    }
-
-    pub fn is_timeout(&self) -> bool {
-        matches!(self, TimeoutOrCancel::Timeout)
-    }
-}
-
-/// This conversion is used when [`crate::support::DownloadStream`] notices a cancellation or
-/// timeout to wrap it in an `std::io::Error`.
-impl From<TimeoutOrCancel> for std::io::Error {
-    fn from(value: TimeoutOrCancel) -> Self {
-        let e = DownloadError::from(value);
-        std::io::Error::other(e)
-    }
-}
-
-impl From<TimeoutOrCancel> for DownloadError {
-    fn from(value: TimeoutOrCancel) -> Self {
-        use TimeoutOrCancel::*;
-
-        match value {
-            Timeout => DownloadError::Timeout,
-            Cancel => DownloadError::Cancelled,
-        }
-    }
-}
--- a/libs/remote_storage/src/lib.rs
+++ b/libs/remote_storage/src/lib.rs
@@ -10,7 +10,6 @@
 #![deny(clippy::undocumented_unsafe_blocks)]

 mod azure_blob;
-mod error;
 mod local_fs;
 mod s3_bucket;
 mod simulate_failures;
@@ -22,7 +21,7 @@ use std::{
    num::{NonZeroU32, NonZeroUsize},
    pin::Pin,
    sync::Arc,
-    time::{Duration, SystemTime},
+    time::SystemTime,
 };

 use anyhow::{bail, Context};
@@ -42,8 +41,6 @@ pub use self::{
 };
 use s3_bucket::RequestKind;

-pub use error::{DownloadError, TimeTravelError, TimeoutOrCancel};
-
 /// Currently, sync happens with AWS S3, that has two limits on requests per second:
 /// ~200 RPS for IAM services
 /// <https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/UsingWithRDS.IAMDBAuth.html>
@@ -161,10 +158,9 @@ pub trait RemoteStorage: Send + Sync + 'static {
    async fn list_prefixes(
        &self,
        prefix: Option<&RemotePath>,
-        cancel: &CancellationToken,
    ) -> Result<Vec<RemotePath>, DownloadError> {
        let result = self
-            .list(prefix, ListingMode::WithDelimiter, None, cancel)
+            .list(prefix, ListingMode::WithDelimiter, None)
            .await?
            .prefixes;
        Ok(result)
@@ -186,10 +182,9 @@ pub trait RemoteStorage: Send + Sync + 'static {
        &self,
        prefix: Option<&RemotePath>,
        max_keys: Option<NonZeroU32>,
-        cancel: &CancellationToken,
    ) -> Result<Vec<RemotePath>, DownloadError> {
        let result = self
-            .list(prefix, ListingMode::NoDelimiter, max_keys, cancel)
+            .list(prefix, ListingMode::NoDelimiter, max_keys)
            .await?
            .keys;
        Ok(result)
@@ -200,13 +195,9 @@ pub trait RemoteStorage: Send + Sync + 'static {
        prefix: Option<&RemotePath>,
        _mode: ListingMode,
        max_keys: Option<NonZeroU32>,
-        cancel: &CancellationToken,
    ) -> Result<Listing, DownloadError>;

    /// Streams the local file contents into remote into the remote storage entry.
-    ///
-    /// If the operation fails because of timeout or cancellation, the root cause of the error will be
-    /// set to `TimeoutOrCancel`.
    async fn upload(
        &self,
        from: impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,
@@ -215,61 +206,27 @@ pub trait RemoteStorage: Send + Sync + 'static {
        data_size_bytes: usize,
        to: &RemotePath,
        metadata: Option<StorageMetadata>,
-        cancel: &CancellationToken,
    ) -> anyhow::Result<()>;

-    /// Streams the remote storage entry contents.
-    ///
-    /// The returned download stream will obey initial timeout and cancellation signal by erroring
-    /// on whichever happens first. Only one of the reasons will fail the stream, which is usually
-    /// enough for `tokio::io::copy_buf` usage. If needed the error can be filtered out.
-    ///
+    /// Streams the remote storage entry contents into the buffered writer given, returns the filled writer.
    /// Returns the metadata, if any was stored with the file previously.
-    async fn download(
-        &self,
-        from: &RemotePath,
-        cancel: &CancellationToken,
-    ) -> Result<Download, DownloadError>;
+    async fn download(&self, from: &RemotePath) -> Result<Download, DownloadError>;

-    /// Streams a given byte range of the remote storage entry contents.
-    ///
-    /// The returned download stream will obey initial timeout and cancellation signal by erroring
-    /// on whichever happens first. Only one of the reasons will fail the stream, which is usually
-    /// enough for `tokio::io::copy_buf` usage. If needed the error can be filtered out.
-    ///
+    /// Streams a given byte range of the remote storage entry contents into the buffered writer given, returns the filled writer.
    /// Returns the metadata, if any was stored with the file previously.
    async fn download_byte_range(
        &self,
        from: &RemotePath,
        start_inclusive: u64,
        end_exclusive: Option<u64>,
-        cancel: &CancellationToken,
    ) -> Result<Download, DownloadError>;

-    /// Delete a single path from remote storage.
-    ///
-    /// If the operation fails because of timeout or cancellation, the root cause of the error will be
-    /// set to `TimeoutOrCancel`. In such situation it is unknown if the deletion went through.
-    async fn delete(&self, path: &RemotePath, cancel: &CancellationToken) -> anyhow::Result<()>;
+    async fn delete(&self, path: &RemotePath) -> anyhow::Result<()>;

-    /// Delete a multiple paths from remote storage.
-    ///
-    /// If the operation fails because of timeout or cancellation, the root cause of the error will be
-    /// set to `TimeoutOrCancel`. In such situation it is unknown which deletions, if any, went
-    /// through.
-    async fn delete_objects<'a>(
-        &self,
-        paths: &'a [RemotePath],
-        cancel: &CancellationToken,
-    ) -> anyhow::Result<()>;
+    async fn delete_objects<'a>(&self, paths: &'a [RemotePath]) -> anyhow::Result<()>;

    /// Copy a remote object inside a bucket from one path to another.
-    async fn copy(
-        &self,
-        from: &RemotePath,
-        to: &RemotePath,
-        cancel: &CancellationToken,
-    ) -> anyhow::Result<()>;
+    async fn copy(&self, from: &RemotePath, to: &RemotePath) -> anyhow::Result<()>;

    /// Resets the content of everything with the given prefix to the given state
    async fn time_travel_recover(
@@ -281,13 +238,7 @@ pub trait RemoteStorage: Send + Sync + 'static {
    ) -> Result<(), TimeTravelError>;
 }

-/// DownloadStream is sensitive to the timeout and cancellation used with the original
-/// [`RemoteStorage::download`] request. The type yields `std::io::Result<Bytes>` to be compatible
-/// with `tokio::io::copy_buf`.
-// This has 'static because safekeepers do not use cancellation tokens (yet)
-pub type DownloadStream =
-    Pin<Box<dyn Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static>>;
-
+pub type DownloadStream = Pin<Box<dyn Stream<Item = std::io::Result<Bytes>> + Unpin + Send + Sync>>;
 pub struct Download {
    pub download_stream: DownloadStream,
    /// The last time the file was modified (`last-modified` HTTP header)
@@ -306,6 +257,86 @@ impl Debug for Download {
    }
 }

+#[derive(Debug)]
+pub enum DownloadError {
+    /// Validation or other error happened due to user input.
+    BadInput(anyhow::Error),
+    /// The file was not found in the remote storage.
+    NotFound,
+    /// A cancellation token aborted the download, typically during
+    /// tenant detach or process shutdown.
+    Cancelled,
+    /// The file was found in the remote storage, but the download failed.
+    Other(anyhow::Error),
+}
+
+impl std::fmt::Display for DownloadError {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            DownloadError::BadInput(e) => {
+                write!(f, "Failed to download a remote file due to user input: {e}")
+            }
+            DownloadError::Cancelled => write!(f, "Cancelled, shutting down"),
+            DownloadError::NotFound => write!(f, "No file found for the remote object id given"),
+            DownloadError::Other(e) => write!(f, "Failed to download a remote file: {e:?}"),
+        }
+    }
+}
+
+impl std::error::Error for DownloadError {}
+
+impl DownloadError {
+    /// Returns true if the error should not be retried with backoff
+    pub fn is_permanent(&self) -> bool {
+        use DownloadError::*;
+        match self {
+            BadInput(_) => true,
+            NotFound => true,
+            Cancelled => true,
+            Other(_) => false,
+        }
+    }
+}
+
+#[derive(Debug)]
+pub enum TimeTravelError {
+    /// Validation or other error happened due to user input.
+    BadInput(anyhow::Error),
+    /// The used remote storage does not have time travel recovery implemented
+    Unimplemented,
+    /// The number of versions/deletion markers is above our limit.
+    TooManyVersions,
+    /// A cancellation token aborted the process, typically during
+    /// request closure or process shutdown.
+    Cancelled,
+    /// Other errors
+    Other(anyhow::Error),
+}
+
+impl std::fmt::Display for TimeTravelError {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            TimeTravelError::BadInput(e) => {
+                write!(
+                    f,
+                    "Failed to time travel recover a prefix due to user input: {e}"
+                )
+            }
+            TimeTravelError::Unimplemented => write!(
+                f,
+                "time travel recovery is not implemented for the current storage backend"
+            ),
+            TimeTravelError::Cancelled => write!(f, "Cancelled, shutting down"),
+            TimeTravelError::TooManyVersions => {
+                write!(f, "Number of versions/delete markers above limit")
+            }
+            TimeTravelError::Other(e) => write!(f, "Failed to time travel recover a prefix: {e:?}"),
+        }
+    }
+}
+
+impl std::error::Error for TimeTravelError {}
+
 /// Every storage, currently supported.
 /// Serves as a simple way to pass around the [`RemoteStorage`] without dealing with generics.
 #[derive(Clone)]
@@ -323,13 +354,12 @@ impl<Other: RemoteStorage> GenericRemoteStorage<Arc<Other>> {
        prefix: Option<&RemotePath>,
        mode: ListingMode,
        max_keys: Option<NonZeroU32>,
-        cancel: &CancellationToken,
    ) -> anyhow::Result<Listing, DownloadError> {
        match self {
-            Self::LocalFs(s) => s.list(prefix, mode, max_keys, cancel).await,
-            Self::AwsS3(s) => s.list(prefix, mode, max_keys, cancel).await,
-            Self::AzureBlob(s) => s.list(prefix, mode, max_keys, cancel).await,
-            Self::Unreliable(s) => s.list(prefix, mode, max_keys, cancel).await,
+            Self::LocalFs(s) => s.list(prefix, mode, max_keys).await,
+            Self::AwsS3(s) => s.list(prefix, mode, max_keys).await,
+            Self::AzureBlob(s) => s.list(prefix, mode, max_keys).await,
+            Self::Unreliable(s) => s.list(prefix, mode, max_keys).await,
        }
    }

@@ -342,13 +372,12 @@ impl<Other: RemoteStorage> GenericRemoteStorage<Arc<Other>> {
        &self,
        folder: Option<&RemotePath>,
        max_keys: Option<NonZeroU32>,
-        cancel: &CancellationToken,
    ) -> Result<Vec<RemotePath>, DownloadError> {
        match self {
-            Self::LocalFs(s) => s.list_files(folder, max_keys, cancel).await,
-            Self::AwsS3(s) => s.list_files(folder, max_keys, cancel).await,
-            Self::AzureBlob(s) => s.list_files(folder, max_keys, cancel).await,
-            Self::Unreliable(s) => s.list_files(folder, max_keys, cancel).await,
+            Self::LocalFs(s) => s.list_files(folder, max_keys).await,
+            Self::AwsS3(s) => s.list_files(folder, max_keys).await,
+            Self::AzureBlob(s) => s.list_files(folder, max_keys).await,
+            Self::Unreliable(s) => s.list_files(folder, max_keys).await,
        }
    }

@@ -358,43 +387,36 @@ impl<Other: RemoteStorage> GenericRemoteStorage<Arc<Other>> {
    pub async fn list_prefixes(
        &self,
        prefix: Option<&RemotePath>,
-        cancel: &CancellationToken,
    ) -> Result<Vec<RemotePath>, DownloadError> {
        match self {
-            Self::LocalFs(s) => s.list_prefixes(prefix, cancel).await,
-            Self::AwsS3(s) => s.list_prefixes(prefix, cancel).await,
-            Self::AzureBlob(s) => s.list_prefixes(prefix, cancel).await,
-            Self::Unreliable(s) => s.list_prefixes(prefix, cancel).await,
+            Self::LocalFs(s) => s.list_prefixes(prefix).await,
+            Self::AwsS3(s) => s.list_prefixes(prefix).await,
+            Self::AzureBlob(s) => s.list_prefixes(prefix).await,
+            Self::Unreliable(s) => s.list_prefixes(prefix).await,
        }
    }

-    /// See [`RemoteStorage::upload`]
    pub async fn upload(
        &self,
        from: impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,
        data_size_bytes: usize,
        to: &RemotePath,
        metadata: Option<StorageMetadata>,
-        cancel: &CancellationToken,
    ) -> anyhow::Result<()> {
        match self {
-            Self::LocalFs(s) => s.upload(from, data_size_bytes, to, metadata, cancel).await,
-            Self::AwsS3(s) => s.upload(from, data_size_bytes, to, metadata, cancel).await,
-            Self::AzureBlob(s) => s.upload(from, data_size_bytes, to, metadata, cancel).await,
-            Self::Unreliable(s) => s.upload(from, data_size_bytes, to, metadata, cancel).await,
+            Self::LocalFs(s) => s.upload(from, data_size_bytes, to, metadata).await,
+            Self::AwsS3(s) => s.upload(from, data_size_bytes, to, metadata).await,
+            Self::AzureBlob(s) => s.upload(from, data_size_bytes, to, metadata).await,
+            Self::Unreliable(s) => s.upload(from, data_size_bytes, to, metadata).await,
        }
    }

-    pub async fn download(
-        &self,
-        from: &RemotePath,
-        cancel: &CancellationToken,
-    ) -> Result<Download, DownloadError> {
+    pub async fn download(&self, from: &RemotePath) -> Result<Download, DownloadError> {
        match self {
-            Self::LocalFs(s) => s.download(from, cancel).await,
-            Self::AwsS3(s) => s.download(from, cancel).await,
-            Self::AzureBlob(s) => s.download(from, cancel).await,
-            Self::Unreliable(s) => s.download(from, cancel).await,
+            Self::LocalFs(s) => s.download(from).await,
+            Self::AwsS3(s) => s.download(from).await,
+            Self::AzureBlob(s) => s.download(from).await,
+            Self::Unreliable(s) => s.download(from).await,
        }
    }

@@ -403,72 +425,54 @@ impl<Other: RemoteStorage> GenericRemoteStorage<Arc<Other>> {
        from: &RemotePath,
        start_inclusive: u64,
        end_exclusive: Option<u64>,
-        cancel: &CancellationToken,
    ) -> Result<Download, DownloadError> {
        match self {
            Self::LocalFs(s) => {
-                s.download_byte_range(from, start_inclusive, end_exclusive, cancel)
+                s.download_byte_range(from, start_inclusive, end_exclusive)
                    .await
            }
            Self::AwsS3(s) => {
-                s.download_byte_range(from, start_inclusive, end_exclusive, cancel)
+                s.download_byte_range(from, start_inclusive, end_exclusive)
                    .await
            }
            Self::AzureBlob(s) => {
-                s.download_byte_range(from, start_inclusive, end_exclusive, cancel)
+                s.download_byte_range(from, start_inclusive, end_exclusive)
                    .await
            }
            Self::Unreliable(s) => {
-                s.download_byte_range(from, start_inclusive, end_exclusive, cancel)
+                s.download_byte_range(from, start_inclusive, end_exclusive)
                    .await
            }
        }
    }

-    /// See [`RemoteStorage::delete`]
-    pub async fn delete(
-        &self,
-        path: &RemotePath,
-        cancel: &CancellationToken,
-    ) -> anyhow::Result<()> {
+    pub async fn delete(&self, path: &RemotePath) -> anyhow::Result<()> {
        match self {
-            Self::LocalFs(s) => s.delete(path, cancel).await,
-            Self::AwsS3(s) => s.delete(path, cancel).await,
-            Self::AzureBlob(s) => s.delete(path, cancel).await,
-            Self::Unreliable(s) => s.delete(path, cancel).await,
+            Self::LocalFs(s) => s.delete(path).await,
+            Self::AwsS3(s) => s.delete(path).await,
+            Self::AzureBlob(s) => s.delete(path).await,
+            Self::Unreliable(s) => s.delete(path).await,
        }
    }

-    /// See [`RemoteStorage::delete_objects`]
-    pub async fn delete_objects(
-        &self,
-        paths: &[RemotePath],
-        cancel: &CancellationToken,
-    ) -> anyhow::Result<()> {
+    pub async fn delete_objects<'a>(&self, paths: &'a [RemotePath]) -> anyhow::Result<()> {
        match self {
-            Self::LocalFs(s) => s.delete_objects(paths, cancel).await,
-            Self::AwsS3(s) => s.delete_objects(paths, cancel).await,
-            Self::AzureBlob(s) => s.delete_objects(paths, cancel).await,
-            Self::Unreliable(s) => s.delete_objects(paths, cancel).await,
+            Self::LocalFs(s) => s.delete_objects(paths).await,
+            Self::AwsS3(s) => s.delete_objects(paths).await,
+            Self::AzureBlob(s) => s.delete_objects(paths).await,
+            Self::Unreliable(s) => s.delete_objects(paths).await,
        }
    }

-    /// See [`RemoteStorage::copy`]
-    pub async fn copy_object(
-        &self,
-        from: &RemotePath,
-        to: &RemotePath,
-        cancel: &CancellationToken,
-    ) -> anyhow::Result<()> {
+    pub async fn copy_object(&self, from: &RemotePath, to: &RemotePath) -> anyhow::Result<()> {
        match self {
-            Self::LocalFs(s) => s.copy(from, to, cancel).await,
-            Self::AwsS3(s) => s.copy(from, to, cancel).await,
-            Self::AzureBlob(s) => s.copy(from, to, cancel).await,
-            Self::Unreliable(s) => s.copy(from, to, cancel).await,
+            Self::LocalFs(s) => s.copy(from, to).await,
+            Self::AwsS3(s) => s.copy(from, to).await,
+            Self::AzureBlob(s) => s.copy(from, to).await,
+            Self::Unreliable(s) => s.copy(from, to).await,
        }
    }

-    /// See [`RemoteStorage::time_travel_recover`].
    pub async fn time_travel_recover(
        &self,
        prefix: Option<&RemotePath>,
@@ -499,11 +503,10 @@ impl<Other: RemoteStorage> GenericRemoteStorage<Arc<Other>> {

 impl GenericRemoteStorage {
    pub fn from_config(storage_config: &RemoteStorageConfig) -> anyhow::Result<Self> {
-        let timeout = storage_config.timeout;
        Ok(match &storage_config.storage {
-            RemoteStorageKind::LocalFs(path) => {
-                info!("Using fs root '{path}' as a remote storage");
-                Self::LocalFs(LocalFs::new(path.clone(), timeout)?)
+            RemoteStorageKind::LocalFs(root) => {
+                info!("Using fs root '{root}' as a remote storage");
+                Self::LocalFs(LocalFs::new(root.clone())?)
            }
            RemoteStorageKind::AwsS3(s3_config) => {
                // The profile and access key id are only printed here for debugging purposes,
@@ -513,12 +516,12 @@ impl GenericRemoteStorage {
                    std::env::var("AWS_ACCESS_KEY_ID").unwrap_or_else(|_| "<none>".into());
                info!("Using s3 bucket '{}' in region '{}' as a remote storage, prefix in bucket: '{:?}', bucket endpoint: '{:?}', profile: {profile}, access_key_id: {access_key_id}",
                      s3_config.bucket_name, s3_config.bucket_region, s3_config.prefix_in_bucket, s3_config.endpoint);
-                Self::AwsS3(Arc::new(S3Bucket::new(s3_config, timeout)?))
+                Self::AwsS3(Arc::new(S3Bucket::new(s3_config)?))
            }
            RemoteStorageKind::AzureContainer(azure_config) => {
                info!("Using azure container '{}' in region '{}' as a remote storage, prefix in container: '{:?}'",
                      azure_config.container_name, azure_config.container_region, azure_config.prefix_in_container);
-                Self::AzureBlob(Arc::new(AzureBlobStorage::new(azure_config, timeout)?))
+                Self::AzureBlob(Arc::new(AzureBlobStorage::new(azure_config)?))
            }
        })
    }
@@ -527,15 +530,18 @@ impl GenericRemoteStorage {
        Self::Unreliable(Arc::new(UnreliableWrapper::new(s, fail_first)))
    }

-    /// See [`RemoteStorage::upload`], which this method calls with `None` as metadata.
+    /// Takes storage object contents and its size and uploads to remote storage,
+    /// mapping `from_path` to the corresponding remote object id in the storage.
+    ///
+    /// The storage object does not have to be present on the `from_path`,
+    /// this path is used for the remote object id conversion only.
    pub async fn upload_storage_object(
        &self,
        from: impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,
        from_size_bytes: usize,
        to: &RemotePath,
-        cancel: &CancellationToken,
    ) -> anyhow::Result<()> {
-        self.upload(from, from_size_bytes, to, None, cancel)
+        self.upload(from, from_size_bytes, to, None)
            .await
            .with_context(|| {
                format!("Failed to upload data of length {from_size_bytes} to storage path {to:?}")
@@ -548,11 +554,10 @@ impl GenericRemoteStorage {
        &self,
        byte_range: Option<(u64, Option<u64>)>,
        from: &RemotePath,
-        cancel: &CancellationToken,
    ) -> Result<Download, DownloadError> {
        match byte_range {
-            Some((start, end)) => self.download_byte_range(from, start, end, cancel).await,
-            None => self.download(from, cancel).await,
+            Some((start, end)) => self.download_byte_range(from, start, end).await,
+            None => self.download(from).await,
        }
    }
 }
@@ -567,9 +572,6 @@ pub struct StorageMetadata(HashMap<String, String>);
 pub struct RemoteStorageConfig {
    /// The storage connection configuration.
    pub storage: RemoteStorageKind,
-    /// A common timeout enforced for all requests after concurrency limiter permit has been
-    /// acquired.
-    pub timeout: Duration,
 }

 /// A kind of a remote storage to connect to, with its connection configuration.
@@ -654,8 +656,6 @@ impl Debug for AzureConfig {
 }

 impl RemoteStorageConfig {
-    pub const DEFAULT_TIMEOUT: Duration = std::time::Duration::from_secs(120);
-
    pub fn from_toml(toml: &toml_edit::Item) -> anyhow::Result<Option<RemoteStorageConfig>> {
        let local_path = toml.get("local_path");
        let bucket_name = toml.get("bucket_name");
@@ -685,27 +685,6 @@ impl RemoteStorageConfig {
            .map(|endpoint| parse_toml_string("endpoint", endpoint))
            .transpose()?;

-        let timeout = toml
-            .get("timeout")
-            .map(|timeout| {
-                timeout
-                    .as_str()
-                    .ok_or_else(|| anyhow::Error::msg("timeout was not a string"))
-            })
-            .transpose()
-            .and_then(|timeout| {
-                timeout
-                    .map(humantime::parse_duration)
-                    .transpose()
-                    .map_err(anyhow::Error::new)
-            })
-            .context("parse timeout")?
-            .unwrap_or(Self::DEFAULT_TIMEOUT);
-
-        if timeout < Duration::from_secs(1) {
-            bail!("timeout was specified as {timeout:?} which is too low");
-        }
-
        let storage = match (
            local_path,
            bucket_name,
@@ -767,7 +746,7 @@ impl RemoteStorageConfig {
            }
        };

-        Ok(Some(RemoteStorageConfig { storage, timeout }))
+        Ok(Some(RemoteStorageConfig { storage }))
    }
 }

@@ -863,24 +842,4 @@ mod tests {
        let err = RemotePath::new(Utf8Path::new("/")).expect_err("Should fail on absolute paths");
        assert_eq!(err.to_string(), "Path \"/\" is not relative");
    }
-
-    #[test]
-    fn parse_localfs_config_with_timeout() {
-        let input = "local_path = '.'
-timeout = '5s'";
-
-        let toml = input.parse::<toml_edit::Document>().unwrap();
-
-        let config = RemoteStorageConfig::from_toml(toml.as_item())
-            .unwrap()
-            .expect("it exists");
-
-        assert_eq!(
-            config,
-            RemoteStorageConfig {
-                storage: RemoteStorageKind::LocalFs(Utf8PathBuf::from(".")),
-                timeout: Duration::from_secs(5)
-            }
-        );
-    }
 }
--- a/libs/remote_storage/src/local_fs.rs
+++ b/libs/remote_storage/src/local_fs.rs
@@ -5,12 +5,7 @@
 //! volume is mounted to the local FS.

 use std::{
-    borrow::Cow,
-    future::Future,
-    io::ErrorKind,
-    num::NonZeroU32,
-    pin::Pin,
-    time::{Duration, SystemTime},
+    borrow::Cow, future::Future, io::ErrorKind, num::NonZeroU32, pin::Pin, time::SystemTime,
 };

 use anyhow::{bail, ensure, Context};
@@ -25,9 +20,7 @@ use tokio_util::{io::ReaderStream, sync::CancellationToken};
 use tracing::*;
 use utils::{crashsafe::path_with_suffix_extension, fs_ext::is_directory_empty};

-use crate::{
-    Download, DownloadError, Listing, ListingMode, RemotePath, TimeTravelError, TimeoutOrCancel,
-};
+use crate::{Download, DownloadError, Listing, ListingMode, RemotePath, TimeTravelError};

 use super::{RemoteStorage, StorageMetadata};

@@ -36,13 +29,12 @@ const LOCAL_FS_TEMP_FILE_SUFFIX: &str = "___temp";
 #[derive(Debug, Clone)]
 pub struct LocalFs {
    storage_root: Utf8PathBuf,
-    timeout: Duration,
 }

 impl LocalFs {
    /// Attempts to create local FS storage, along with its root directory.
    /// Storage root will be created (if does not exist) and transformed into an absolute path (if passed as relative).
-    pub fn new(mut storage_root: Utf8PathBuf, timeout: Duration) -> anyhow::Result<Self> {
+    pub fn new(mut storage_root: Utf8PathBuf) -> anyhow::Result<Self> {
        if !storage_root.exists() {
            std::fs::create_dir_all(&storage_root).with_context(|| {
                format!("Failed to create all directories in the given root path {storage_root:?}")
@@ -54,10 +46,7 @@ impl LocalFs {
            })?;
        }

-        Ok(Self {
-            storage_root,
-            timeout,
-        })
+        Ok(Self { storage_root })
    }

    // mirrors S3Bucket::s3_object_to_relative_path
@@ -168,14 +157,80 @@ impl LocalFs {

        Ok(files)
    }
+}

-    async fn upload0(
+impl RemoteStorage for LocalFs {
+    async fn list(
+        &self,
+        prefix: Option<&RemotePath>,
+        mode: ListingMode,
+        max_keys: Option<NonZeroU32>,
+    ) -> Result<Listing, DownloadError> {
+        let mut result = Listing::default();
+
+        if let ListingMode::NoDelimiter = mode {
+            let keys = self
+                .list_recursive(prefix)
+                .await
+                .map_err(DownloadError::Other)?;
+
+            result.keys = keys
+                .into_iter()
+                .filter(|k| {
+                    let path = k.with_base(&self.storage_root);
+                    !path.is_dir()
+                })
+                .collect();
+            if let Some(max_keys) = max_keys {
+                result.keys.truncate(max_keys.get() as usize);
+            }
+
+            return Ok(result);
+        }
+
+        let path = match prefix {
+            Some(prefix) => Cow::Owned(prefix.with_base(&self.storage_root)),
+            None => Cow::Borrowed(&self.storage_root),
+        };
+
+        let prefixes_to_filter = get_all_files(path.as_ref(), false)
+            .await
+            .map_err(DownloadError::Other)?;
+
+        // filter out empty directories to mirror s3 behavior.
+        for prefix in prefixes_to_filter {
+            if prefix.is_dir()
+                && is_directory_empty(&prefix)
+                    .await
+                    .map_err(DownloadError::Other)?
+            {
+                continue;
+            }
+
+            let stripped = prefix
+                .strip_prefix(&self.storage_root)
+                .context("Failed to strip prefix")
+                .and_then(RemotePath::new)
+                .expect(
+                    "We list files for storage root, hence should be able to remote the prefix",
+                );
+
+            if prefix.is_dir() {
+                result.prefixes.push(stripped);
+            } else {
+                result.keys.push(stripped);
+            }
+        }
+
+        Ok(result)
+    }
+
+    async fn upload(
        &self,
        data: impl Stream<Item = std::io::Result<Bytes>> + Send + Sync,
        data_size_bytes: usize,
        to: &RemotePath,
        metadata: Option<StorageMetadata>,
-        cancel: &CancellationToken,
    ) -> anyhow::Result<()> {
        let target_file_path = to.with_base(&self.storage_root);
        create_target_directory(&target_file_path).await?;
@@ -210,26 +265,9 @@ impl LocalFs {
        let mut buffer_to_read = data.take(from_size_bytes);

        // alternatively we could just write the bytes to a file, but local_fs is a testing utility
-        let copy = io::copy_buf(&mut buffer_to_read, &mut destination);
-
-        let bytes_read = tokio::select! {
-            biased;
-            _ = cancel.cancelled() => {
-                let file = destination.into_inner();
-                // wait for the inflight operation(s) to complete so that there could be a next
-                // attempt right away and our writes are not directed to their file.
-                file.into_std().await;
-
-                // TODO: leave the temp or not? leaving is probably less racy. enabled truncate at
-                // least.
-                fs::remove_file(temp_file_path).await.context("remove temp_file_path after cancellation or timeout")?;
-                return Err(TimeoutOrCancel::Cancel.into());
-            }
-            read = copy => read,
-        };
-
-        let bytes_read =
-            bytes_read.with_context(|| {
+        let bytes_read = io::copy_buf(&mut buffer_to_read, &mut destination)
+            .await
+            .with_context(|| {
                format!(
                    "Failed to upload file (write temp) to the local storage at '{temp_file_path}'",
                )
@@ -261,9 +299,6 @@ impl LocalFs {
            })?;

        if let Some(storage_metadata) = metadata {
-            // FIXME: we must not be using metadata much, since this would forget the old metadata
-            // for new writes? or perhaps metadata is sticky; could consider removing if it's never
-            // used.
            let storage_metadata_path = storage_metadata_path(&target_file_path);
            fs::write(
                &storage_metadata_path,
@@ -280,131 +315,8 @@ impl LocalFs {

        Ok(())
    }
-}

-impl RemoteStorage for LocalFs {
-    async fn list(
-        &self,
-        prefix: Option<&RemotePath>,
-        mode: ListingMode,
-        max_keys: Option<NonZeroU32>,
-        cancel: &CancellationToken,
-    ) -> Result<Listing, DownloadError> {
-        let op = async {
-            let mut result = Listing::default();
-
-            if let ListingMode::NoDelimiter = mode {
-                let keys = self
-                    .list_recursive(prefix)
-                    .await
-                    .map_err(DownloadError::Other)?;
-
-                result.keys = keys
-                    .into_iter()
-                    .filter(|k| {
-                        let path = k.with_base(&self.storage_root);
-                        !path.is_dir()
-                    })
-                    .collect();
-
-                if let Some(max_keys) = max_keys {
-                    result.keys.truncate(max_keys.get() as usize);
-                }
-
-                return Ok(result);
-            }
-
-            let path = match prefix {
-                Some(prefix) => Cow::Owned(prefix.with_base(&self.storage_root)),
-                None => Cow::Borrowed(&self.storage_root),
-            };
-
-            let prefixes_to_filter = get_all_files(path.as_ref(), false)
-                .await
-                .map_err(DownloadError::Other)?;
-
-            // filter out empty directories to mirror s3 behavior.
-            for prefix in prefixes_to_filter {
-                if prefix.is_dir()
-                    && is_directory_empty(&prefix)
-                        .await
-                        .map_err(DownloadError::Other)?
-                {
-                    continue;
-                }
-
-                let stripped = prefix
-                    .strip_prefix(&self.storage_root)
-                    .context("Failed to strip prefix")
-                    .and_then(RemotePath::new)
-                    .expect(
-                        "We list files for storage root, hence should be able to remote the prefix",
-                    );
-
-                if prefix.is_dir() {
-                    result.prefixes.push(stripped);
-                } else {
-                    result.keys.push(stripped);
-                }
-            }
-
-            Ok(result)
-        };
-
-        let timeout = async {
-            tokio::time::sleep(self.timeout).await;
-            Err(DownloadError::Timeout)
-        };
-
-        let cancelled = async {
-            cancel.cancelled().await;
-            Err(DownloadError::Cancelled)
-        };
-
-        tokio::select! {
-            res = op => res,
-            res = timeout => res,
-            res = cancelled => res,
-        }
-    }
-
-    async fn upload(
-        &self,
-        data: impl Stream<Item = std::io::Result<Bytes>> + Send + Sync,
-        data_size_bytes: usize,
-        to: &RemotePath,
-        metadata: Option<StorageMetadata>,
-        cancel: &CancellationToken,
-    ) -> anyhow::Result<()> {
-        let cancel = cancel.child_token();
-
-        let op = self.upload0(data, data_size_bytes, to, metadata, &cancel);
-        let mut op = std::pin::pin!(op);
-
-        // race the upload0 to the timeout; if it goes over, do a graceful shutdown
-        let (res, timeout) = tokio::select! {
-            res = &mut op => (res, false),
-            _ = tokio::time::sleep(self.timeout) => {
-                cancel.cancel();
-                (op.await, true)
-            }
-        };
-
-        match res {
-            Err(e) if timeout && TimeoutOrCancel::caused_by_cancel(&e) => {
-                // we caused this cancel (or they happened simultaneously) -- swap it out to
-                // Timeout
-                Err(TimeoutOrCancel::Timeout.into())
-            }
-            res => res,
-        }
-    }
-
-    async fn download(
-        &self,
-        from: &RemotePath,
-        cancel: &CancellationToken,
-    ) -> Result<Download, DownloadError> {
+    async fn download(&self, from: &RemotePath) -> Result<Download, DownloadError> {
        let target_path = from.with_base(&self.storage_root);
        if file_exists(&target_path).map_err(DownloadError::BadInput)? {
            let source = ReaderStream::new(
@@ -422,10 +334,6 @@ impl RemoteStorage for LocalFs {
                .read_storage_metadata(&target_path)
                .await
                .map_err(DownloadError::Other)?;
-
-            let cancel_or_timeout = crate::support::cancel_or_timeout(self.timeout, cancel.clone());
-            let source = crate::support::DownloadStream::new(cancel_or_timeout, source);
-
            Ok(Download {
                metadata,
                last_modified: None,
@@ -442,7 +350,6 @@ impl RemoteStorage for LocalFs {
        from: &RemotePath,
        start_inclusive: u64,
        end_exclusive: Option<u64>,
-        cancel: &CancellationToken,
    ) -> Result<Download, DownloadError> {
        if let Some(end_exclusive) = end_exclusive {
            if end_exclusive <= start_inclusive {
@@ -484,9 +391,6 @@ impl RemoteStorage for LocalFs {
            let source = source.take(end_exclusive.unwrap_or(len) - start_inclusive);
            let source = ReaderStream::new(source);

-            let cancel_or_timeout = crate::support::cancel_or_timeout(self.timeout, cancel.clone());
-            let source = crate::support::DownloadStream::new(cancel_or_timeout, source);
-
            Ok(Download {
                metadata,
                last_modified: None,
@@ -498,7 +402,7 @@ impl RemoteStorage for LocalFs {
        }
    }

-    async fn delete(&self, path: &RemotePath, _cancel: &CancellationToken) -> anyhow::Result<()> {
+    async fn delete(&self, path: &RemotePath) -> anyhow::Result<()> {
        let file_path = path.with_base(&self.storage_root);
        match fs::remove_file(&file_path).await {
            Ok(()) => Ok(()),
@@ -510,23 +414,14 @@ impl RemoteStorage for LocalFs {
        }
    }

-    async fn delete_objects<'a>(
-        &self,
-        paths: &'a [RemotePath],
-        cancel: &CancellationToken,
-    ) -> anyhow::Result<()> {
+    async fn delete_objects<'a>(&self, paths: &'a [RemotePath]) -> anyhow::Result<()> {
        for path in paths {
-            self.delete(path, cancel).await?
+            self.delete(path).await?
        }
        Ok(())
    }

-    async fn copy(
-        &self,
-        from: &RemotePath,
-        to: &RemotePath,
-        _cancel: &CancellationToken,
-    ) -> anyhow::Result<()> {
+    async fn copy(&self, from: &RemotePath, to: &RemotePath) -> anyhow::Result<()> {
        let from_path = from.with_base(&self.storage_root);
        let to_path = to.with_base(&self.storage_root);
        create_target_directory(&to_path).await?;
@@ -540,6 +435,7 @@ impl RemoteStorage for LocalFs {
        Ok(())
    }

+    #[allow(clippy::diverging_sub_expression)]
    async fn time_travel_recover(
        &self,
        _prefix: Option<&RemotePath>,
@@ -633,9 +529,8 @@ mod fs_tests {
        remote_storage_path: &RemotePath,
        expected_metadata: Option<&StorageMetadata>,
    ) -> anyhow::Result<String> {
-        let cancel = CancellationToken::new();
        let download = storage
-            .download(remote_storage_path, &cancel)
+            .download(remote_storage_path)
            .await
            .map_err(|e| anyhow::anyhow!("Download failed: {e}"))?;
        ensure!(
@@ -650,16 +545,16 @@ mod fs_tests {

    #[tokio::test]
    async fn upload_file() -> anyhow::Result<()> {
-        let (storage, cancel) = create_storage()?;
+        let storage = create_storage()?;

-        let target_path_1 = upload_dummy_file(&storage, "upload_1", None, &cancel).await?;
+        let target_path_1 = upload_dummy_file(&storage, "upload_1", None).await?;
        assert_eq!(
            storage.list_all().await?,
            vec![target_path_1.clone()],
            "Should list a single file after first upload"
        );

-        let target_path_2 = upload_dummy_file(&storage, "upload_2", None, &cancel).await?;
+        let target_path_2 = upload_dummy_file(&storage, "upload_2", None).await?;
        assert_eq!(
            list_files_sorted(&storage).await?,
            vec![target_path_1.clone(), target_path_2.clone()],
@@ -671,7 +566,7 @@ mod fs_tests {

    #[tokio::test]
    async fn upload_file_negatives() -> anyhow::Result<()> {
-        let (storage, cancel) = create_storage()?;
+        let storage = create_storage()?;

        let id = RemotePath::new(Utf8Path::new("dummy"))?;
        let content = Bytes::from_static(b"12345");
@@ -680,34 +575,34 @@ mod fs_tests {
        // Check that you get an error if the size parameter doesn't match the actual
        // size of the stream.
        storage
-            .upload(content(), 0, &id, None, &cancel)
+            .upload(content(), 0, &id, None)
            .await
            .expect_err("upload with zero size succeeded");
        storage
-            .upload(content(), 4, &id, None, &cancel)
+            .upload(content(), 4, &id, None)
            .await
            .expect_err("upload with too short size succeeded");
        storage
-            .upload(content(), 6, &id, None, &cancel)
+            .upload(content(), 6, &id, None)
            .await
            .expect_err("upload with too large size succeeded");

        // Correct size is 5, this should succeed.
-        storage.upload(content(), 5, &id, None, &cancel).await?;
+        storage.upload(content(), 5, &id, None).await?;

        Ok(())
    }

-    fn create_storage() -> anyhow::Result<(LocalFs, CancellationToken)> {
+    fn create_storage() -> anyhow::Result<LocalFs> {
        let storage_root = tempdir()?.path().to_path_buf();
-        LocalFs::new(storage_root, Duration::from_secs(120)).map(|s| (s, CancellationToken::new()))
+        LocalFs::new(storage_root)
    }

    #[tokio::test]
    async fn download_file() -> anyhow::Result<()> {
-        let (storage, cancel) = create_storage()?;
+        let storage = create_storage()?;
        let upload_name = "upload_1";
-        let upload_target = upload_dummy_file(&storage, upload_name, None, &cancel).await?;
+        let upload_target = upload_dummy_file(&storage, upload_name, None).await?;

        let contents = read_and_check_metadata(&storage, &upload_target, None).await?;
        assert_eq!(
@@ -717,7 +612,7 @@ mod fs_tests {
        );

        let non_existing_path = "somewhere/else";
-        match storage.download(&RemotePath::new(Utf8Path::new(non_existing_path))?, &cancel).await {
+        match storage.download(&RemotePath::new(Utf8Path::new(non_existing_path))?).await {
            Err(DownloadError::NotFound) => {} // Should get NotFound for non existing keys
            other => panic!("Should get a NotFound error when downloading non-existing storage files, but got: {other:?}"),
        }
@@ -726,9 +621,9 @@ mod fs_tests {

    #[tokio::test]
    async fn download_file_range_positive() -> anyhow::Result<()> {
-        let (storage, cancel) = create_storage()?;
+        let storage = create_storage()?;
        let upload_name = "upload_1";
-        let upload_target = upload_dummy_file(&storage, upload_name, None, &cancel).await?;
+        let upload_target = upload_dummy_file(&storage, upload_name, None).await?;

        let full_range_download_contents =
            read_and_check_metadata(&storage, &upload_target, None).await?;
@@ -742,12 +637,7 @@ mod fs_tests {
        let (first_part_local, second_part_local) = uploaded_bytes.split_at(3);

        let first_part_download = storage
-            .download_byte_range(
-                &upload_target,
-                0,
-                Some(first_part_local.len() as u64),
-                &cancel,
-            )
+            .download_byte_range(&upload_target, 0, Some(first_part_local.len() as u64))
            .await?;
        assert!(
            first_part_download.metadata.is_none(),
@@ -765,7 +655,6 @@ mod fs_tests {
                &upload_target,
                first_part_local.len() as u64,
                Some((first_part_local.len() + second_part_local.len()) as u64),
-                &cancel,
            )
            .await?;
        assert!(
@@ -780,7 +669,7 @@ mod fs_tests {
        );

        let suffix_bytes = storage
-            .download_byte_range(&upload_target, 13, None, &cancel)
+            .download_byte_range(&upload_target, 13, None)
            .await?
            .download_stream;
        let suffix_bytes = aggregate(suffix_bytes).await?;
@@ -788,7 +677,7 @@ mod fs_tests {
        assert_eq!(upload_name, suffix);

        let all_bytes = storage
-            .download_byte_range(&upload_target, 0, None, &cancel)
+            .download_byte_range(&upload_target, 0, None)
            .await?
            .download_stream;
        let all_bytes = aggregate(all_bytes).await?;
@@ -800,9 +689,9 @@ mod fs_tests {

    #[tokio::test]
    async fn download_file_range_negative() -> anyhow::Result<()> {
-        let (storage, cancel) = create_storage()?;
+        let storage = create_storage()?;
        let upload_name = "upload_1";
-        let upload_target = upload_dummy_file(&storage, upload_name, None, &cancel).await?;
+        let upload_target = upload_dummy_file(&storage, upload_name, None).await?;

        let start = 1_000_000_000;
        let end = start + 1;
@@ -811,7 +700,6 @@ mod fs_tests {
                &upload_target,
                start,
                Some(end), // exclusive end
-                &cancel,
            )
            .await
        {
@@ -828,7 +716,7 @@ mod fs_tests {
        let end = 234;
        assert!(start > end, "Should test an incorrect range");
        match storage
-            .download_byte_range(&upload_target, start, Some(end), &cancel)
+            .download_byte_range(&upload_target, start, Some(end))
            .await
        {
            Ok(_) => panic!("Should not allow downloading wrong ranges"),
@@ -845,15 +733,15 @@ mod fs_tests {

    #[tokio::test]
    async fn delete_file() -> anyhow::Result<()> {
-        let (storage, cancel) = create_storage()?;
+        let storage = create_storage()?;
        let upload_name = "upload_1";
-        let upload_target = upload_dummy_file(&storage, upload_name, None, &cancel).await?;
+        let upload_target = upload_dummy_file(&storage, upload_name, None).await?;

-        storage.delete(&upload_target, &cancel).await?;
+        storage.delete(&upload_target).await?;
        assert!(storage.list_all().await?.is_empty());

        storage
-            .delete(&upload_target, &cancel)
+            .delete(&upload_target)
            .await
            .expect("Should allow deleting non-existing storage files");

@@ -862,14 +750,14 @@ mod fs_tests {

    #[tokio::test]
    async fn file_with_metadata() -> anyhow::Result<()> {
-        let (storage, cancel) = create_storage()?;
+        let storage = create_storage()?;
        let upload_name = "upload_1";
        let metadata = StorageMetadata(HashMap::from([
            ("one".to_string(), "1".to_string()),
            ("two".to_string(), "2".to_string()),
        ]));
        let upload_target =
-            upload_dummy_file(&storage, upload_name, Some(metadata.clone()), &cancel).await?;
+            upload_dummy_file(&storage, upload_name, Some(metadata.clone())).await?;

        let full_range_download_contents =
            read_and_check_metadata(&storage, &upload_target, Some(&metadata)).await?;
@@ -883,12 +771,7 @@ mod fs_tests {
        let (first_part_local, _) = uploaded_bytes.split_at(3);

        let partial_download_with_metadata = storage
-            .download_byte_range(
-                &upload_target,
-                0,
-                Some(first_part_local.len() as u64),
-                &cancel,
-            )
+            .download_byte_range(&upload_target, 0, Some(first_part_local.len() as u64))
            .await?;
        let first_part_remote = aggregate(partial_download_with_metadata.download_stream).await?;
        assert_eq!(
@@ -909,20 +792,16 @@ mod fs_tests {
    #[tokio::test]
    async fn list() -> anyhow::Result<()> {
        // No delimiter: should recursively list everything
-        let (storage, cancel) = create_storage()?;
-        let child = upload_dummy_file(&storage, "grandparent/parent/child", None, &cancel).await?;
-        let uncle = upload_dummy_file(&storage, "grandparent/uncle", None, &cancel).await?;
+        let storage = create_storage()?;
+        let child = upload_dummy_file(&storage, "grandparent/parent/child", None).await?;
+        let uncle = upload_dummy_file(&storage, "grandparent/uncle", None).await?;

-        let listing = storage
-            .list(None, ListingMode::NoDelimiter, None, &cancel)
-            .await?;
+        let listing = storage.list(None, ListingMode::NoDelimiter, None).await?;
        assert!(listing.prefixes.is_empty());
        assert_eq!(listing.keys, [uncle.clone(), child.clone()].to_vec());

        // Delimiter: should only go one deep
-        let listing = storage
-            .list(None, ListingMode::WithDelimiter, None, &cancel)
-            .await?;
+        let listing = storage.list(None, ListingMode::WithDelimiter, None).await?;

        assert_eq!(
            listing.prefixes,
@@ -936,7 +815,6 @@ mod fs_tests {
                Some(&RemotePath::from_string("timelines/some_timeline/grandparent").unwrap()),
                ListingMode::WithDelimiter,
                None,
-                &cancel,
            )
            .await?;
        assert_eq!(
@@ -949,75 +827,10 @@ mod fs_tests {
        Ok(())
    }

-    #[tokio::test]
-    async fn overwrite_shorter_file() -> anyhow::Result<()> {
-        let (storage, cancel) = create_storage()?;
-
-        let path = RemotePath::new("does/not/matter/file".into())?;
-
-        let body = Bytes::from_static(b"long file contents is long");
-        {
-            let len = body.len();
-            let body =
-                futures::stream::once(futures::future::ready(std::io::Result::Ok(body.clone())));
-            storage.upload(body, len, &path, None, &cancel).await?;
-        }
-
-        let read = aggregate(storage.download(&path, &cancel).await?.download_stream).await?;
-        assert_eq!(body, read);
-
-        let shorter = Bytes::from_static(b"shorter body");
-        {
-            let len = shorter.len();
-            let body =
-                futures::stream::once(futures::future::ready(std::io::Result::Ok(shorter.clone())));
-            storage.upload(body, len, &path, None, &cancel).await?;
-        }
-
-        let read = aggregate(storage.download(&path, &cancel).await?.download_stream).await?;
-        assert_eq!(shorter, read);
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn cancelled_upload_can_later_be_retried() -> anyhow::Result<()> {
-        let (storage, cancel) = create_storage()?;
-
-        let path = RemotePath::new("does/not/matter/file".into())?;
-
-        let body = Bytes::from_static(b"long file contents is long");
-        {
-            let len = body.len();
-            let body =
-                futures::stream::once(futures::future::ready(std::io::Result::Ok(body.clone())));
-            let cancel = cancel.child_token();
-            cancel.cancel();
-            let e = storage
-                .upload(body, len, &path, None, &cancel)
-                .await
-                .unwrap_err();
-
-            assert!(TimeoutOrCancel::caused_by_cancel(&e));
-        }
-
-        {
-            let len = body.len();
-            let body =
-                futures::stream::once(futures::future::ready(std::io::Result::Ok(body.clone())));
-            storage.upload(body, len, &path, None, &cancel).await?;
-        }
-
-        let read = aggregate(storage.download(&path, &cancel).await?.download_stream).await?;
-        assert_eq!(body, read);
-
-        Ok(())
-    }
-
    async fn upload_dummy_file(
        storage: &LocalFs,
        name: &str,
        metadata: Option<StorageMetadata>,
-        cancel: &CancellationToken,
    ) -> anyhow::Result<RemotePath> {
        let from_path = storage
            .storage_root
@@ -1039,9 +852,7 @@ mod fs_tests {

        let file = tokio_util::io::ReaderStream::new(file);

-        storage
-            .upload(file, size, &relative_path, metadata, cancel)
-            .await?;
+        storage.upload(file, size, &relative_path, metadata).await?;
        Ok(relative_path)
    }

--- a/libs/remote_storage/src/s3_bucket.rs
+++ b/libs/remote_storage/src/s3_bucket.rs
@@ -11,7 +11,7 @@ use std::{
    pin::Pin,
    sync::Arc,
    task::{Context, Poll},
-    time::{Duration, SystemTime},
+    time::SystemTime,
 };

 use anyhow::{anyhow, Context as _};
@@ -46,9 +46,9 @@ use utils::backoff;

 use super::StorageMetadata;
 use crate::{
-    error::Cancelled, support::PermitCarrying, ConcurrencyLimiter, Download, DownloadError,
-    Listing, ListingMode, RemotePath, RemoteStorage, S3Config, TimeTravelError, TimeoutOrCancel,
-    MAX_KEYS_PER_DELETE, REMOTE_STORAGE_PREFIX_SEPARATOR,
+    support::PermitCarrying, ConcurrencyLimiter, Download, DownloadError, Listing, ListingMode,
+    RemotePath, RemoteStorage, S3Config, TimeTravelError, MAX_KEYS_PER_DELETE,
+    REMOTE_STORAGE_PREFIX_SEPARATOR,
 };

 pub(super) mod metrics;
@@ -63,8 +63,6 @@ pub struct S3Bucket {
    prefix_in_bucket: Option<String>,
    max_keys_per_list_response: Option<i32>,
    concurrency_limiter: ConcurrencyLimiter,
-    // Per-request timeout. Accessible for tests.
-    pub timeout: Duration,
 }

 struct GetObjectRequest {
@@ -74,7 +72,7 @@ struct GetObjectRequest {
 }
 impl S3Bucket {
    /// Creates the S3 storage, errors if incorrect AWS S3 configuration provided.
-    pub fn new(aws_config: &S3Config, timeout: Duration) -> anyhow::Result<Self> {
+    pub fn new(aws_config: &S3Config) -> anyhow::Result<Self> {
        tracing::debug!(
            "Creating s3 remote storage for S3 bucket {}",
            aws_config.bucket_name
@@ -154,7 +152,6 @@ impl S3Bucket {
            max_keys_per_list_response: aws_config.max_keys_per_list_response,
            prefix_in_bucket,
            concurrency_limiter: ConcurrencyLimiter::new(aws_config.concurrency_limit.get()),
-            timeout,
        })
    }

@@ -188,55 +185,40 @@ impl S3Bucket {
        }
    }

-    async fn permit(
-        &self,
-        kind: RequestKind,
-        cancel: &CancellationToken,
-    ) -> Result<tokio::sync::SemaphorePermit<'_>, Cancelled> {
+    async fn permit(&self, kind: RequestKind) -> tokio::sync::SemaphorePermit<'_> {
        let started_at = start_counting_cancelled_wait(kind);
-        let acquire = self.concurrency_limiter.acquire(kind);
-
-        let permit = tokio::select! {
-            permit = acquire => permit.expect("semaphore is never closed"),
-            _ = cancel.cancelled() => return Err(Cancelled),
-        };
+        let permit = self
+            .concurrency_limiter
+            .acquire(kind)
+            .await
+            .expect("semaphore is never closed");

        let started_at = ScopeGuard::into_inner(started_at);
        metrics::BUCKET_METRICS
            .wait_seconds
            .observe_elapsed(kind, started_at);

-        Ok(permit)
+        permit
    }

-    async fn owned_permit(
-        &self,
-        kind: RequestKind,
-        cancel: &CancellationToken,
-    ) -> Result<tokio::sync::OwnedSemaphorePermit, Cancelled> {
+    async fn owned_permit(&self, kind: RequestKind) -> tokio::sync::OwnedSemaphorePermit {
        let started_at = start_counting_cancelled_wait(kind);
-        let acquire = self.concurrency_limiter.acquire_owned(kind);
-
-        let permit = tokio::select! {
-            permit = acquire => permit.expect("semaphore is never closed"),
-            _ = cancel.cancelled() => return Err(Cancelled),
-        };
+        let permit = self
+            .concurrency_limiter
+            .acquire_owned(kind)
+            .await
+            .expect("semaphore is never closed");

        let started_at = ScopeGuard::into_inner(started_at);
        metrics::BUCKET_METRICS
            .wait_seconds
            .observe_elapsed(kind, started_at);
-        Ok(permit)
+        permit
    }

-    async fn download_object(
-        &self,
-        request: GetObjectRequest,
-        cancel: &CancellationToken,
-    ) -> Result<Download, DownloadError> {
+    async fn download_object(&self, request: GetObjectRequest) -> Result<Download, DownloadError> {
        let kind = RequestKind::Get;
-
-        let permit = self.owned_permit(kind, cancel).await?;
+        let permit = self.owned_permit(kind).await;

        let started_at = start_measuring_requests(kind);

@@ -246,13 +228,8 @@ impl S3Bucket {
            .bucket(request.bucket)
            .key(request.key)
            .set_range(request.range)
-            .send();
-
-        let get_object = tokio::select! {
-            res = get_object => res,
-            _ = tokio::time::sleep(self.timeout) => return Err(DownloadError::Timeout),
-            _ = cancel.cancelled() => return Err(DownloadError::Cancelled),
-        };
+            .send()
+            .await;

        let started_at = ScopeGuard::into_inner(started_at);

@@ -282,10 +259,6 @@ impl S3Bucket {
            }
        };

-        // even if we would have no timeout left, continue anyways. the caller can decide to ignore
-        // the errors considering timeouts and cancellation.
-        let remaining = self.timeout.saturating_sub(started_at.elapsed());
-
        let metadata = object_output.metadata().cloned().map(StorageMetadata);
        let etag = object_output.e_tag;
        let last_modified = object_output.last_modified.and_then(|t| t.try_into().ok());
@@ -295,9 +268,6 @@ impl S3Bucket {
        let body = PermitCarrying::new(permit, body);
        let body = TimedDownload::new(started_at, body);

-        let cancel_or_timeout = crate::support::cancel_or_timeout(remaining, cancel.clone());
-        let body = crate::support::DownloadStream::new(cancel_or_timeout, body);
-
        Ok(Download {
            metadata,
            etag,
@@ -308,44 +278,33 @@ impl S3Bucket {

    async fn delete_oids(
        &self,
-        _permit: &tokio::sync::SemaphorePermit<'_>,
+        kind: RequestKind,
        delete_objects: &[ObjectIdentifier],
-        cancel: &CancellationToken,
    ) -> anyhow::Result<()> {
-        let kind = RequestKind::Delete;
-        let mut cancel = std::pin::pin!(cancel.cancelled());
-
        for chunk in delete_objects.chunks(MAX_KEYS_PER_DELETE) {
            let started_at = start_measuring_requests(kind);

-            let req = self
+            let resp = self
                .client
                .delete_objects()
                .bucket(self.bucket_name.clone())
                .delete(
                    Delete::builder()
                        .set_objects(Some(chunk.to_vec()))
-                        .build()
-                        .context("build request")?,
+                        .build()?,
                )
-                .send();
-
-            let resp = tokio::select! {
-                resp = req => resp,
-                _ = tokio::time::sleep(self.timeout) => return Err(TimeoutOrCancel::Timeout.into()),
-                _ = &mut cancel => return Err(TimeoutOrCancel::Cancel.into()),
-            };
+                .send()
+                .await;

            let started_at = ScopeGuard::into_inner(started_at);
            metrics::BUCKET_METRICS
                .req_seconds
                .observe_elapsed(kind, &resp, started_at);

-            let resp = resp.context("request deletion")?;
+            let resp = resp?;
            metrics::BUCKET_METRICS
                .deleted_objects_total
                .inc_by(chunk.len() as u64);
-
            if let Some(errors) = resp.errors {
                // Log a bounded number of the errors within the response:
                // these requests can carry 1000 keys so logging each one
@@ -361,10 +320,9 @@ impl S3Bucket {
                    );
                }

-                return Err(anyhow::anyhow!(
-                    "Failed to delete {}/{} objects",
-                    errors.len(),
-                    chunk.len(),
+                return Err(anyhow::format_err!(
+                    "Failed to delete {} objects",
+                    errors.len()
                ));
            }
        }
@@ -452,7 +410,6 @@ impl RemoteStorage for S3Bucket {
        prefix: Option<&RemotePath>,
        mode: ListingMode,
        max_keys: Option<NonZeroU32>,
-        cancel: &CancellationToken,
    ) -> Result<Listing, DownloadError> {
        let kind = RequestKind::List;
        // s3 sdk wants i32
@@ -474,11 +431,10 @@ impl RemoteStorage for S3Bucket {
                p
            });

-        let _permit = self.permit(kind, cancel).await?;
-
        let mut continuation_token = None;

        loop {
+            let _guard = self.permit(kind).await;
            let started_at = start_measuring_requests(kind);

            // min of two Options, returning Some if one is value and another is
@@ -500,15 +456,9 @@ impl RemoteStorage for S3Bucket {
                request = request.delimiter(REMOTE_STORAGE_PREFIX_SEPARATOR.to_string());
            }

-            let request = request.send();
-
-            let response = tokio::select! {
-                res = request => res,
-                _ = tokio::time::sleep(self.timeout) => return Err(DownloadError::Timeout),
-                _ = cancel.cancelled() => return Err(DownloadError::Cancelled),
-            };
-
-            let response = response
+            let response = request
+                .send()
+                .await
                .context("Failed to list S3 prefixes")
                .map_err(DownloadError::Other);

@@ -561,17 +511,16 @@ impl RemoteStorage for S3Bucket {
        from_size_bytes: usize,
        to: &RemotePath,
        metadata: Option<StorageMetadata>,
-        cancel: &CancellationToken,
    ) -> anyhow::Result<()> {
        let kind = RequestKind::Put;
-        let _permit = self.permit(kind, cancel).await?;
+        let _guard = self.permit(kind).await;

        let started_at = start_measuring_requests(kind);

        let body = Body::wrap_stream(from);
        let bytes_stream = ByteStream::new(SdkBody::from_body_0_4(body));

-        let upload = self
+        let res = self
            .client
            .put_object()
            .bucket(self.bucket_name.clone())
@@ -579,63 +528,8 @@ impl RemoteStorage for S3Bucket {
            .set_metadata(metadata.map(|m| m.0))
            .content_length(from_size_bytes.try_into()?)
            .body(bytes_stream)
-            .send();
-
-        let upload = tokio::time::timeout(self.timeout, upload);
-
-        let res = tokio::select! {
-            res = upload => res,
-            _ = cancel.cancelled() => return Err(TimeoutOrCancel::Cancel.into()),
-        };
-
-        if let Ok(inner) = &res {
-            // do not incl. timeouts as errors in metrics but cancellations
-            let started_at = ScopeGuard::into_inner(started_at);
-            metrics::BUCKET_METRICS
-                .req_seconds
-                .observe_elapsed(kind, inner, started_at);
-        }
-
-        match res {
-            Ok(Ok(_put)) => Ok(()),
-            Ok(Err(sdk)) => Err(sdk.into()),
-            Err(_timeout) => Err(TimeoutOrCancel::Timeout.into()),
-        }
-    }
-
-    async fn copy(
-        &self,
-        from: &RemotePath,
-        to: &RemotePath,
-        cancel: &CancellationToken,
-    ) -> anyhow::Result<()> {
-        let kind = RequestKind::Copy;
-        let _permit = self.permit(kind, cancel).await?;
-
-        let timeout = tokio::time::sleep(self.timeout);
-
-        let started_at = start_measuring_requests(kind);
-
-        // we need to specify bucket_name as a prefix
-        let copy_source = format!(
-            "{}/{}",
-            self.bucket_name,
-            self.relative_path_to_s3_object(from)
-        );
-
-        let op = self
-            .client
-            .copy_object()
-            .bucket(self.bucket_name.clone())
-            .key(self.relative_path_to_s3_object(to))
-            .copy_source(copy_source)
-            .send();
-
-        let res = tokio::select! {
-            res = op => res,
-            _ = timeout => return Err(TimeoutOrCancel::Timeout.into()),
-            _ = cancel.cancelled() => return Err(TimeoutOrCancel::Cancel.into()),
-        };
+            .send()
+            .await;

        let started_at = ScopeGuard::into_inner(started_at);
        metrics::BUCKET_METRICS
@@ -647,21 +541,46 @@ impl RemoteStorage for S3Bucket {
        Ok(())
    }

-    async fn download(
-        &self,
-        from: &RemotePath,
-        cancel: &CancellationToken,
-    ) -> Result<Download, DownloadError> {
+    async fn copy(&self, from: &RemotePath, to: &RemotePath) -> anyhow::Result<()> {
+        let kind = RequestKind::Copy;
+        let _guard = self.permit(kind).await;
+
+        let started_at = start_measuring_requests(kind);
+
+        // we need to specify bucket_name as a prefix
+        let copy_source = format!(
+            "{}/{}",
+            self.bucket_name,
+            self.relative_path_to_s3_object(from)
+        );
+
+        let res = self
+            .client
+            .copy_object()
+            .bucket(self.bucket_name.clone())
+            .key(self.relative_path_to_s3_object(to))
+            .copy_source(copy_source)
+            .send()
+            .await;
+
+        let started_at = ScopeGuard::into_inner(started_at);
+        metrics::BUCKET_METRICS
+            .req_seconds
+            .observe_elapsed(kind, &res, started_at);
+
+        res?;
+
+        Ok(())
+    }
+
+    async fn download(&self, from: &RemotePath) -> Result<Download, DownloadError> {
        // if prefix is not none then download file `prefix/from`
        // if prefix is none then download file `from`
-        self.download_object(
-            GetObjectRequest {
-                bucket: self.bucket_name.clone(),
-                key: self.relative_path_to_s3_object(from),
-                range: None,
-            },
-            cancel,
-        )
+        self.download_object(GetObjectRequest {
+            bucket: self.bucket_name.clone(),
+            key: self.relative_path_to_s3_object(from),
+            range: None,
+        })
        .await
    }

@@ -670,7 +589,6 @@ impl RemoteStorage for S3Bucket {
        from: &RemotePath,
        start_inclusive: u64,
        end_exclusive: Option<u64>,
-        cancel: &CancellationToken,
    ) -> Result<Download, DownloadError> {
        // S3 accepts ranges as https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35
        // and needs both ends to be exclusive
@@ -680,39 +598,31 @@ impl RemoteStorage for S3Bucket {
            None => format!("bytes={start_inclusive}-"),
        });

-        self.download_object(
-            GetObjectRequest {
-                bucket: self.bucket_name.clone(),
-                key: self.relative_path_to_s3_object(from),
-                range,
-            },
-            cancel,
-        )
+        self.download_object(GetObjectRequest {
+            bucket: self.bucket_name.clone(),
+            key: self.relative_path_to_s3_object(from),
+            range,
+        })
        .await
    }
-
-    async fn delete_objects<'a>(
-        &self,
-        paths: &'a [RemotePath],
-        cancel: &CancellationToken,
-    ) -> anyhow::Result<()> {
+    async fn delete_objects<'a>(&self, paths: &'a [RemotePath]) -> anyhow::Result<()> {
        let kind = RequestKind::Delete;
-        let permit = self.permit(kind, cancel).await?;
+        let _guard = self.permit(kind).await;
+
        let mut delete_objects = Vec::with_capacity(paths.len());
        for path in paths {
            let obj_id = ObjectIdentifier::builder()
                .set_key(Some(self.relative_path_to_s3_object(path)))
-                .build()
-                .context("convert path to oid")?;
+                .build()?;
            delete_objects.push(obj_id);
        }

-        self.delete_oids(&permit, &delete_objects, cancel).await
+        self.delete_oids(kind, &delete_objects).await
    }

-    async fn delete(&self, path: &RemotePath, cancel: &CancellationToken) -> anyhow::Result<()> {
+    async fn delete(&self, path: &RemotePath) -> anyhow::Result<()> {
        let paths = std::array::from_ref(path);
-        self.delete_objects(paths, cancel).await
+        self.delete_objects(paths).await
    }

    async fn time_travel_recover(
@@ -723,7 +633,7 @@ impl RemoteStorage for S3Bucket {
        cancel: &CancellationToken,
    ) -> Result<(), TimeTravelError> {
        let kind = RequestKind::TimeTravel;
-        let permit = self.permit(kind, cancel).await?;
+        let _guard = self.permit(kind).await;

        let timestamp = DateTime::from(timestamp);
        let done_if_after = DateTime::from(done_if_after);
@@ -737,7 +647,7 @@ impl RemoteStorage for S3Bucket {

        let warn_threshold = 3;
        let max_retries = 10;
-        let is_permanent = |e: &_| matches!(e, TimeTravelError::Cancelled);
+        let is_permanent = |_e: &_| false;

        let mut key_marker = None;
        let mut version_id_marker = None;
@@ -746,19 +656,15 @@ impl RemoteStorage for S3Bucket {
        loop {
            let response = backoff::retry(
                || async {
-                    let op = self
-                        .client
+                    self.client
                        .list_object_versions()
                        .bucket(self.bucket_name.clone())
                        .set_prefix(prefix.clone())
                        .set_key_marker(key_marker.clone())
                        .set_version_id_marker(version_id_marker.clone())
-                        .send();
-
-                    tokio::select! {
-                        res = op => res.map_err(|e| TimeTravelError::Other(e.into())),
-                        _ = cancel.cancelled() => Err(TimeTravelError::Cancelled),
-                    }
+                        .send()
+                        .await
+                        .map_err(|e| TimeTravelError::Other(e.into()))
                },
                is_permanent,
                warn_threshold,
@@ -880,18 +786,14 @@ impl RemoteStorage for S3Bucket {

                        backoff::retry(
                            || async {
-                                let op = self
-                                    .client
+                                self.client
                                    .copy_object()
                                    .bucket(self.bucket_name.clone())
                                    .key(key)
                                    .copy_source(&source_id)
-                                    .send();
-
-                                tokio::select! {
-                                    res = op => res.map_err(|e| TimeTravelError::Other(e.into())),
-                                    _ = cancel.cancelled() => Err(TimeTravelError::Cancelled),
-                                }
+                                    .send()
+                                    .await
+                                    .map_err(|e| TimeTravelError::Other(e.into()))
                            },
                            is_permanent,
                            warn_threshold,
@@ -922,18 +824,10 @@ impl RemoteStorage for S3Bucket {
                    let oid = ObjectIdentifier::builder()
                        .key(key.to_owned())
                        .build()
-                        .map_err(|e| TimeTravelError::Other(e.into()))?;
-
-                    self.delete_oids(&permit, &[oid], cancel)
+                        .map_err(|e| TimeTravelError::Other(anyhow::Error::new(e)))?;
+                    self.delete_oids(kind, &[oid])
                        .await
-                        .map_err(|e| {
-                            // delete_oid0 will use TimeoutOrCancel
-                            if TimeoutOrCancel::caused_by_cancel(&e) {
-                                TimeTravelError::Cancelled
-                            } else {
-                                TimeTravelError::Other(e)
-                            }
-                        })?;
+                        .map_err(TimeTravelError::Other)?;
                }
            }
        }
@@ -1069,8 +963,7 @@ mod tests {
                concurrency_limit: NonZeroUsize::new(100).unwrap(),
                max_keys_per_list_response: Some(5),
            };
-            let storage =
-                S3Bucket::new(&config, std::time::Duration::ZERO).expect("remote storage init");
+            let storage = S3Bucket::new(&config).expect("remote storage init");
            for (test_path_idx, test_path) in all_paths.iter().enumerate() {
                let result = storage.relative_path_to_s3_object(test_path);
                let expected = expected_outputs[prefix_idx][test_path_idx];
--- a/libs/remote_storage/src/simulate_failures.rs
+++ b/libs/remote_storage/src/simulate_failures.rs
@@ -90,16 +90,11 @@ impl UnreliableWrapper {
        }
    }

-    async fn delete_inner(
-        &self,
-        path: &RemotePath,
-        attempt: bool,
-        cancel: &CancellationToken,
-    ) -> anyhow::Result<()> {
+    async fn delete_inner(&self, path: &RemotePath, attempt: bool) -> anyhow::Result<()> {
        if attempt {
            self.attempt(RemoteOp::Delete(path.clone()))?;
        }
-        self.inner.delete(path, cancel).await
+        self.inner.delete(path).await
    }
 }

@@ -110,22 +105,20 @@ impl RemoteStorage for UnreliableWrapper {
    async fn list_prefixes(
        &self,
        prefix: Option<&RemotePath>,
-        cancel: &CancellationToken,
    ) -> Result<Vec<RemotePath>, DownloadError> {
        self.attempt(RemoteOp::ListPrefixes(prefix.cloned()))
            .map_err(DownloadError::Other)?;
-        self.inner.list_prefixes(prefix, cancel).await
+        self.inner.list_prefixes(prefix).await
    }

    async fn list_files(
        &self,
        folder: Option<&RemotePath>,
        max_keys: Option<NonZeroU32>,
-        cancel: &CancellationToken,
    ) -> Result<Vec<RemotePath>, DownloadError> {
        self.attempt(RemoteOp::ListPrefixes(folder.cloned()))
            .map_err(DownloadError::Other)?;
-        self.inner.list_files(folder, max_keys, cancel).await
+        self.inner.list_files(folder, max_keys).await
    }

    async fn list(
@@ -133,11 +126,10 @@ impl RemoteStorage for UnreliableWrapper {
        prefix: Option<&RemotePath>,
        mode: ListingMode,
        max_keys: Option<NonZeroU32>,
-        cancel: &CancellationToken,
    ) -> Result<Listing, DownloadError> {
        self.attempt(RemoteOp::ListPrefixes(prefix.cloned()))
            .map_err(DownloadError::Other)?;
-        self.inner.list(prefix, mode, max_keys, cancel).await
+        self.inner.list(prefix, mode, max_keys).await
    }

    async fn upload(
@@ -148,22 +140,15 @@ impl RemoteStorage for UnreliableWrapper {
        data_size_bytes: usize,
        to: &RemotePath,
        metadata: Option<StorageMetadata>,
-        cancel: &CancellationToken,
    ) -> anyhow::Result<()> {
        self.attempt(RemoteOp::Upload(to.clone()))?;
-        self.inner
-            .upload(data, data_size_bytes, to, metadata, cancel)
-            .await
+        self.inner.upload(data, data_size_bytes, to, metadata).await
    }

-    async fn download(
-        &self,
-        from: &RemotePath,
-        cancel: &CancellationToken,
-    ) -> Result<Download, DownloadError> {
+    async fn download(&self, from: &RemotePath) -> Result<Download, DownloadError> {
        self.attempt(RemoteOp::Download(from.clone()))
            .map_err(DownloadError::Other)?;
-        self.inner.download(from, cancel).await
+        self.inner.download(from).await
    }

    async fn download_byte_range(
@@ -171,7 +156,6 @@ impl RemoteStorage for UnreliableWrapper {
        from: &RemotePath,
        start_inclusive: u64,
        end_exclusive: Option<u64>,
-        cancel: &CancellationToken,
    ) -> Result<Download, DownloadError> {
        // Note: We treat any download_byte_range as an "attempt" of the same
        // operation. We don't pay attention to the ranges. That's good enough
@@ -179,24 +163,20 @@ impl RemoteStorage for UnreliableWrapper {
        self.attempt(RemoteOp::Download(from.clone()))
            .map_err(DownloadError::Other)?;
        self.inner
-            .download_byte_range(from, start_inclusive, end_exclusive, cancel)
+            .download_byte_range(from, start_inclusive, end_exclusive)
            .await
    }

-    async fn delete(&self, path: &RemotePath, cancel: &CancellationToken) -> anyhow::Result<()> {
-        self.delete_inner(path, true, cancel).await
+    async fn delete(&self, path: &RemotePath) -> anyhow::Result<()> {
+        self.delete_inner(path, true).await
    }

-    async fn delete_objects<'a>(
-        &self,
-        paths: &'a [RemotePath],
-        cancel: &CancellationToken,
-    ) -> anyhow::Result<()> {
+    async fn delete_objects<'a>(&self, paths: &'a [RemotePath]) -> anyhow::Result<()> {
        self.attempt(RemoteOp::DeleteObjects(paths.to_vec()))?;
        let mut error_counter = 0;
        for path in paths {
            // Dont record attempt because it was already recorded above
-            if (self.delete_inner(path, false, cancel).await).is_err() {
+            if (self.delete_inner(path, false).await).is_err() {
                error_counter += 1;
            }
        }
@@ -209,16 +189,11 @@ impl RemoteStorage for UnreliableWrapper {
        Ok(())
    }

-    async fn copy(
-        &self,
-        from: &RemotePath,
-        to: &RemotePath,
-        cancel: &CancellationToken,
-    ) -> anyhow::Result<()> {
+    async fn copy(&self, from: &RemotePath, to: &RemotePath) -> anyhow::Result<()> {
        // copy is equivalent to download + upload
        self.attempt(RemoteOp::Download(from.clone()))?;
        self.attempt(RemoteOp::Upload(to.clone()))?;
-        self.inner.copy_object(from, to, cancel).await
+        self.inner.copy_object(from, to).await
    }

    async fn time_travel_recover(
--- a/libs/remote_storage/src/support.rs
+++ b/libs/remote_storage/src/support.rs
@@ -1,15 +1,9 @@
 use std::{
-    future::Future,
    pin::Pin,
    task::{Context, Poll},
-    time::Duration,
 };

-use bytes::Bytes;
 use futures_util::Stream;
-use tokio_util::sync::CancellationToken;
-
-use crate::TimeoutOrCancel;

 pin_project_lite::pin_project! {
    /// An `AsyncRead` adapter which carries a permit for the lifetime of the value.
@@ -37,133 +31,3 @@ impl<S: Stream> Stream for PermitCarrying<S> {
        self.inner.size_hint()
    }
 }
-
-pin_project_lite::pin_project! {
-    pub(crate) struct DownloadStream<F, S> {
-        hit: bool,
-        #[pin]
-        cancellation: F,
-        #[pin]
-        inner: S,
-    }
-}
-
-impl<F, S> DownloadStream<F, S> {
-    pub(crate) fn new(cancellation: F, inner: S) -> Self {
-        Self {
-            cancellation,
-            hit: false,
-            inner,
-        }
-    }
-}
-
-/// See documentation on [`crate::DownloadStream`] on rationale why `std::io::Error` is used.
-impl<E, F, S> Stream for DownloadStream<F, S>
-where
-    std::io::Error: From<E>,
-    F: Future<Output = E>,
-    S: Stream<Item = std::io::Result<Bytes>>,
-{
-    type Item = <S as Stream>::Item;
-
-    fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
-        let this = self.project();
-
-        if !*this.hit {
-            if let Poll::Ready(e) = this.cancellation.poll(cx) {
-                *this.hit = true;
-                let e = Err(std::io::Error::from(e));
-                return Poll::Ready(Some(e));
-            }
-        }
-
-        this.inner.poll_next(cx)
-    }
-
-    fn size_hint(&self) -> (usize, Option<usize>) {
-        self.inner.size_hint()
-    }
-}
-
-/// Fires only on the first cancel or timeout, not on both.
-pub(crate) async fn cancel_or_timeout(
-    timeout: Duration,
-    cancel: CancellationToken,
-) -> TimeoutOrCancel {
-    tokio::select! {
-        _ = tokio::time::sleep(timeout) => TimeoutOrCancel::Timeout,
-        _ = cancel.cancelled() => TimeoutOrCancel::Cancel,
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::DownloadError;
-    use futures::stream::StreamExt;
-
-    #[tokio::test(start_paused = true)]
-    async fn cancelled_download_stream() {
-        let inner = futures::stream::pending();
-        let timeout = Duration::from_secs(120);
-        let cancel = CancellationToken::new();
-
-        let stream = DownloadStream::new(cancel_or_timeout(timeout, cancel.clone()), inner);
-        let mut stream = std::pin::pin!(stream);
-
-        let mut first = stream.next();
-
-        tokio::select! {
-            _ = &mut first => unreachable!("we haven't yet cancelled nor is timeout passed"),
-            _ = tokio::time::sleep(Duration::from_secs(1)) => {},
-        }
-
-        cancel.cancel();
-
-        let e = first.await.expect("there must be some").unwrap_err();
-        assert!(matches!(e.kind(), std::io::ErrorKind::Other), "{e:?}");
-        let inner = e.get_ref().expect("inner should be set");
-        assert!(
-            inner
-                .downcast_ref::<DownloadError>()
-                .is_some_and(|e| matches!(e, DownloadError::Cancelled)),
-            "{inner:?}"
-        );
-
-        tokio::select! {
-            _ = stream.next() => unreachable!("no timeout ever happens as we were already cancelled"),
-            _ = tokio::time::sleep(Duration::from_secs(121)) => {},
-        }
-    }
-
-    #[tokio::test(start_paused = true)]
-    async fn timeouted_download_stream() {
-        let inner = futures::stream::pending();
-        let timeout = Duration::from_secs(120);
-        let cancel = CancellationToken::new();
-
-        let stream = DownloadStream::new(cancel_or_timeout(timeout, cancel.clone()), inner);
-        let mut stream = std::pin::pin!(stream);
-
-        // because the stream uses 120s timeout we are paused, we advance to 120s right away.
-        let first = stream.next();
-
-        let e = first.await.expect("there must be some").unwrap_err();
-        assert!(matches!(e.kind(), std::io::ErrorKind::Other), "{e:?}");
-        let inner = e.get_ref().expect("inner should be set");
-        assert!(
-            inner
-                .downcast_ref::<DownloadError>()
-                .is_some_and(|e| matches!(e, DownloadError::Timeout)),
-            "{inner:?}"
-        );
-
-        cancel.cancel();
-
-        tokio::select! {
-            _ = stream.next() => unreachable!("no cancellation ever happens because we already timed out"),
-            _ = tokio::time::sleep(Duration::from_secs(121)) => {},
-        }
-    }
-}
--- a/libs/remote_storage/tests/common/mod.rs
+++ b/libs/remote_storage/tests/common/mod.rs
@@ -10,7 +10,6 @@ use futures::stream::Stream;
 use once_cell::sync::OnceCell;
 use remote_storage::{Download, GenericRemoteStorage, RemotePath};
 use tokio::task::JoinSet;
-use tokio_util::sync::CancellationToken;
 use tracing::{debug, error, info};

 static LOGGING_DONE: OnceCell<()> = OnceCell::new();
@@ -59,12 +58,8 @@ pub(crate) async fn upload_simple_remote_data(
 ) -> ControlFlow<HashSet<RemotePath>, HashSet<RemotePath>> {
    info!("Creating {upload_tasks_count} remote files");
    let mut upload_tasks = JoinSet::new();
-    let cancel = CancellationToken::new();
-
    for i in 1..upload_tasks_count + 1 {
        let task_client = Arc::clone(client);
-        let cancel = cancel.clone();
-
        upload_tasks.spawn(async move {
            let blob_path = PathBuf::from(format!("folder{}/blob_{}.txt", i / 7, i));
            let blob_path = RemotePath::new(
@@ -74,9 +69,7 @@ pub(crate) async fn upload_simple_remote_data(
            debug!("Creating remote item {i} at path {blob_path:?}");

            let (data, len) = upload_stream(format!("remote blob data {i}").into_bytes().into());
-            task_client
-                .upload(data, len, &blob_path, None, &cancel)
-                .await?;
+            task_client.upload(data, len, &blob_path, None).await?;

            Ok::<_, anyhow::Error>(blob_path)
        });
@@ -114,15 +107,13 @@ pub(crate) async fn cleanup(
        "Removing {} objects from the remote storage during cleanup",
        objects_to_delete.len()
    );
-    let cancel = CancellationToken::new();
    let mut delete_tasks = JoinSet::new();
    for object_to_delete in objects_to_delete {
        let task_client = Arc::clone(client);
-        let cancel = cancel.clone();
        delete_tasks.spawn(async move {
            debug!("Deleting remote item at path {object_to_delete:?}");
            task_client
-                .delete(&object_to_delete, &cancel)
+                .delete(&object_to_delete)
                .await
                .with_context(|| format!("{object_to_delete:?} removal"))
        });
@@ -150,12 +141,8 @@ pub(crate) async fn upload_remote_data(
 ) -> ControlFlow<Uploads, Uploads> {
    info!("Creating {upload_tasks_count} remote files");
    let mut upload_tasks = JoinSet::new();
-    let cancel = CancellationToken::new();
-
    for i in 1..upload_tasks_count + 1 {
        let task_client = Arc::clone(client);
-        let cancel = cancel.clone();
-
        upload_tasks.spawn(async move {
            let prefix = format!("{base_prefix_str}/sub_prefix_{i}/");
            let blob_prefix = RemotePath::new(Utf8Path::new(&prefix))
@@ -165,9 +152,7 @@ pub(crate) async fn upload_remote_data(

            let (data, data_len) =
                upload_stream(format!("remote blob data {i}").into_bytes().into());
-            task_client
-                .upload(data, data_len, &blob_path, None, &cancel)
-                .await?;
+            task_client.upload(data, data_len, &blob_path, None).await?;

            Ok::<_, anyhow::Error>((blob_prefix, blob_path))
        });
--- a/libs/remote_storage/tests/common/tests.rs
+++ b/libs/remote_storage/tests/common/tests.rs
@@ -4,7 +4,6 @@ use remote_storage::RemotePath;
 use std::sync::Arc;
 use std::{collections::HashSet, num::NonZeroU32};
 use test_context::test_context;
-use tokio_util::sync::CancellationToken;
 use tracing::debug;

 use crate::common::{download_to_vec, upload_stream, wrap_stream};
@@ -46,15 +45,13 @@ async fn pagination_should_work(ctx: &mut MaybeEnabledStorageWithTestBlobs) -> a
        }
    };

-    let cancel = CancellationToken::new();
-
    let test_client = Arc::clone(&ctx.enabled.client);
    let expected_remote_prefixes = ctx.remote_prefixes.clone();

    let base_prefix = RemotePath::new(Utf8Path::new(ctx.enabled.base_prefix))
        .context("common_prefix construction")?;
    let root_remote_prefixes = test_client
-        .list_prefixes(None, &cancel)
+        .list_prefixes(None)
        .await
        .context("client list root prefixes failure")?
        .into_iter()
@@ -65,7 +62,7 @@ async fn pagination_should_work(ctx: &mut MaybeEnabledStorageWithTestBlobs) -> a
    );

    let nested_remote_prefixes = test_client
-        .list_prefixes(Some(&base_prefix), &cancel)
+        .list_prefixes(Some(&base_prefix))
        .await
        .context("client list nested prefixes failure")?
        .into_iter()
@@ -102,12 +99,11 @@ async fn list_files_works(ctx: &mut MaybeEnabledStorageWithSimpleTestBlobs) -> a
            anyhow::bail!("S3 init failed: {e:?}")
        }
    };
-    let cancel = CancellationToken::new();
    let test_client = Arc::clone(&ctx.enabled.client);
    let base_prefix =
        RemotePath::new(Utf8Path::new("folder1")).context("common_prefix construction")?;
    let root_files = test_client
-        .list_files(None, None, &cancel)
+        .list_files(None, None)
        .await
        .context("client list root files failure")?
        .into_iter()
@@ -121,13 +117,13 @@ async fn list_files_works(ctx: &mut MaybeEnabledStorageWithSimpleTestBlobs) -> a
    // Test that max_keys limit works. In total there are about 21 files (see
    // upload_simple_remote_data call in test_real_s3.rs).
    let limited_root_files = test_client
-        .list_files(None, Some(NonZeroU32::new(2).unwrap()), &cancel)
+        .list_files(None, Some(NonZeroU32::new(2).unwrap()))
        .await
        .context("client list root files failure")?;
    assert_eq!(limited_root_files.len(), 2);

    let nested_remote_files = test_client
-        .list_files(Some(&base_prefix), None, &cancel)
+        .list_files(Some(&base_prefix), None)
        .await
        .context("client list nested files failure")?
        .into_iter()
@@ -154,17 +150,12 @@ async fn delete_non_exising_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Resu
        MaybeEnabledStorage::Disabled => return Ok(()),
    };

-    let cancel = CancellationToken::new();
-
    let path = RemotePath::new(Utf8Path::new(
        format!("{}/for_sure_there_is_nothing_there_really", ctx.base_prefix).as_str(),
    ))
    .with_context(|| "RemotePath conversion")?;

-    ctx.client
-        .delete(&path, &cancel)
-        .await
-        .expect("should succeed");
+    ctx.client.delete(&path).await.expect("should succeed");

    Ok(())
 }
@@ -177,8 +168,6 @@ async fn delete_objects_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<(
        MaybeEnabledStorage::Disabled => return Ok(()),
    };

-    let cancel = CancellationToken::new();
-
    let path1 = RemotePath::new(Utf8Path::new(format!("{}/path1", ctx.base_prefix).as_str()))
        .with_context(|| "RemotePath conversion")?;

@@ -189,21 +178,21 @@ async fn delete_objects_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<(
        .with_context(|| "RemotePath conversion")?;

    let (data, len) = upload_stream("remote blob data1".as_bytes().into());
-    ctx.client.upload(data, len, &path1, None, &cancel).await?;
+    ctx.client.upload(data, len, &path1, None).await?;

    let (data, len) = upload_stream("remote blob data2".as_bytes().into());
-    ctx.client.upload(data, len, &path2, None, &cancel).await?;
+    ctx.client.upload(data, len, &path2, None).await?;

    let (data, len) = upload_stream("remote blob data3".as_bytes().into());
-    ctx.client.upload(data, len, &path3, None, &cancel).await?;
+    ctx.client.upload(data, len, &path3, None).await?;

-    ctx.client.delete_objects(&[path1, path2], &cancel).await?;
+    ctx.client.delete_objects(&[path1, path2]).await?;

-    let prefixes = ctx.client.list_prefixes(None, &cancel).await?;
+    let prefixes = ctx.client.list_prefixes(None).await?;

    assert_eq!(prefixes.len(), 1);

-    ctx.client.delete_objects(&[path3], &cancel).await?;
+    ctx.client.delete_objects(&[path3]).await?;

    Ok(())
 }
@@ -215,8 +204,6 @@ async fn upload_download_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<
        return Ok(());
    };

-    let cancel = CancellationToken::new();
-
    let path = RemotePath::new(Utf8Path::new(format!("{}/file", ctx.base_prefix).as_str()))
        .with_context(|| "RemotePath conversion")?;

@@ -224,56 +211,47 @@ async fn upload_download_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<

    let (data, len) = wrap_stream(orig.clone());

-    ctx.client.upload(data, len, &path, None, &cancel).await?;
+    ctx.client.upload(data, len, &path, None).await?;

    // Normal download request
-    let dl = ctx.client.download(&path, &cancel).await?;
+    let dl = ctx.client.download(&path).await?;
    let buf = download_to_vec(dl).await?;
    assert_eq!(&buf, &orig);

    // Full range (end specified)
    let dl = ctx
        .client
-        .download_byte_range(&path, 0, Some(len as u64), &cancel)
+        .download_byte_range(&path, 0, Some(len as u64))
        .await?;
    let buf = download_to_vec(dl).await?;
    assert_eq!(&buf, &orig);

    // partial range (end specified)
-    let dl = ctx
-        .client
-        .download_byte_range(&path, 4, Some(10), &cancel)
-        .await?;
+    let dl = ctx.client.download_byte_range(&path, 4, Some(10)).await?;
    let buf = download_to_vec(dl).await?;
    assert_eq!(&buf, &orig[4..10]);

    // partial range (end beyond real end)
    let dl = ctx
        .client
-        .download_byte_range(&path, 8, Some(len as u64 * 100), &cancel)
+        .download_byte_range(&path, 8, Some(len as u64 * 100))
        .await?;
    let buf = download_to_vec(dl).await?;
    assert_eq!(&buf, &orig[8..]);

    // Partial range (end unspecified)
-    let dl = ctx
-        .client
-        .download_byte_range(&path, 4, None, &cancel)
-        .await?;
+    let dl = ctx.client.download_byte_range(&path, 4, None).await?;
    let buf = download_to_vec(dl).await?;
    assert_eq!(&buf, &orig[4..]);

    // Full range (end unspecified)
-    let dl = ctx
-        .client
-        .download_byte_range(&path, 0, None, &cancel)
-        .await?;
+    let dl = ctx.client.download_byte_range(&path, 0, None).await?;
    let buf = download_to_vec(dl).await?;
    assert_eq!(&buf, &orig);

    debug!("Cleanup: deleting file at path {path:?}");
    ctx.client
-        .delete(&path, &cancel)
+        .delete(&path)
        .await
        .with_context(|| format!("{path:?} removal"))?;

@@ -287,8 +265,6 @@ async fn copy_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<()> {
        return Ok(());
    };

-    let cancel = CancellationToken::new();
-
    let path = RemotePath::new(Utf8Path::new(
        format!("{}/file_to_copy", ctx.base_prefix).as_str(),
    ))
@@ -302,18 +278,18 @@ async fn copy_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<()> {

    let (data, len) = wrap_stream(orig.clone());

-    ctx.client.upload(data, len, &path, None, &cancel).await?;
+    ctx.client.upload(data, len, &path, None).await?;

    // Normal download request
-    ctx.client.copy_object(&path, &path_dest, &cancel).await?;
+    ctx.client.copy_object(&path, &path_dest).await?;

-    let dl = ctx.client.download(&path_dest, &cancel).await?;
+    let dl = ctx.client.download(&path_dest).await?;
    let buf = download_to_vec(dl).await?;
    assert_eq!(&buf, &orig);

    debug!("Cleanup: deleting file at path {path:?}");
    ctx.client
-        .delete_objects(&[path.clone(), path_dest.clone()], &cancel)
+        .delete_objects(&[path.clone(), path_dest.clone()])
        .await
        .with_context(|| format!("{path:?} removal"))?;

--- a/libs/remote_storage/tests/test_real_azure.rs
+++ b/libs/remote_storage/tests/test_real_azure.rs
@@ -1,9 +1,9 @@
+use std::collections::HashSet;
 use std::env;
 use std::num::NonZeroUsize;
 use std::ops::ControlFlow;
 use std::sync::Arc;
 use std::time::UNIX_EPOCH;
-use std::{collections::HashSet, time::Duration};

 use anyhow::Context;
 use remote_storage::{
@@ -39,17 +39,6 @@ impl EnabledAzure {
            base_prefix: BASE_PREFIX,
        }
    }
-
-    #[allow(unused)] // this will be needed when moving the timeout integration tests back
-    fn configure_request_timeout(&mut self, timeout: Duration) {
-        match Arc::get_mut(&mut self.client).expect("outer Arc::get_mut") {
-            GenericRemoteStorage::AzureBlob(azure) => {
-                let azure = Arc::get_mut(azure).expect("inner Arc::get_mut");
-                azure.timeout = timeout;
-            }
-            _ => unreachable!(),
-        }
-    }
 }

 enum MaybeEnabledStorage {
@@ -224,7 +213,6 @@ fn create_azure_client(
            concurrency_limit: NonZeroUsize::new(100).unwrap(),
            max_keys_per_list_response,
        }),
-        timeout: Duration::from_secs(120),
    };
    Ok(Arc::new(
        GenericRemoteStorage::from_config(&remote_storage_config).context("remote storage init")?,
--- a/libs/remote_storage/tests/test_real_s3.rs
+++ b/libs/remote_storage/tests/test_real_s3.rs
@@ -1,6 +1,5 @@
 use std::env;
 use std::fmt::{Debug, Display};
-use std::future::Future;
 use std::num::NonZeroUsize;
 use std::ops::ControlFlow;
 use std::sync::Arc;
@@ -10,10 +9,9 @@ use std::{collections::HashSet, time::SystemTime};
 use crate::common::{download_to_vec, upload_stream};
 use anyhow::Context;
 use camino::Utf8Path;
-use futures_util::StreamExt;
+use futures_util::Future;
 use remote_storage::{
-    DownloadError, GenericRemoteStorage, RemotePath, RemoteStorageConfig, RemoteStorageKind,
-    S3Config,
+    GenericRemoteStorage, RemotePath, RemoteStorageConfig, RemoteStorageKind, S3Config,
 };
 use test_context::test_context;
 use test_context::AsyncTestContext;
@@ -29,6 +27,7 @@ use common::{cleanup, ensure_logging_ready, upload_remote_data, upload_simple_re
 use utils::backoff;

 const ENABLE_REAL_S3_REMOTE_STORAGE_ENV_VAR_NAME: &str = "ENABLE_REAL_S3_REMOTE_STORAGE";
+
 const BASE_PREFIX: &str = "test";

 #[test_context(MaybeEnabledStorage)]
@@ -70,11 +69,8 @@ async fn s3_time_travel_recovery_works(ctx: &mut MaybeEnabledStorage) -> anyhow:
        ret
    }

-    async fn list_files(
-        client: &Arc<GenericRemoteStorage>,
-        cancel: &CancellationToken,
-    ) -> anyhow::Result<HashSet<RemotePath>> {
-        Ok(retry(|| client.list_files(None, None, cancel))
+    async fn list_files(client: &Arc<GenericRemoteStorage>) -> anyhow::Result<HashSet<RemotePath>> {
+        Ok(retry(|| client.list_files(None, None))
            .await
            .context("list root files failure")?
            .into_iter()
@@ -94,11 +90,11 @@ async fn s3_time_travel_recovery_works(ctx: &mut MaybeEnabledStorage) -> anyhow:

    retry(|| {
        let (data, len) = upload_stream("remote blob data1".as_bytes().into());
-        ctx.client.upload(data, len, &path1, None, &cancel)
+        ctx.client.upload(data, len, &path1, None)
    })
    .await?;

-    let t0_files = list_files(&ctx.client, &cancel).await?;
+    let t0_files = list_files(&ctx.client).await?;
    let t0 = time_point().await;
    println!("at t0: {t0_files:?}");

@@ -106,17 +102,17 @@ async fn s3_time_travel_recovery_works(ctx: &mut MaybeEnabledStorage) -> anyhow:

    retry(|| {
        let (data, len) = upload_stream(old_data.as_bytes().into());
-        ctx.client.upload(data, len, &path2, None, &cancel)
+        ctx.client.upload(data, len, &path2, None)
    })
    .await?;

-    let t1_files = list_files(&ctx.client, &cancel).await?;
+    let t1_files = list_files(&ctx.client).await?;
    let t1 = time_point().await;
    println!("at t1: {t1_files:?}");

    // A little check to ensure that our clock is not too far off from the S3 clock
    {
-        let dl = retry(|| ctx.client.download(&path2, &cancel)).await?;
+        let dl = retry(|| ctx.client.download(&path2)).await?;
        let last_modified = dl.last_modified.unwrap();
        let half_wt = WAIT_TIME.mul_f32(0.5);
        let t0_hwt = t0 + half_wt;
@@ -129,7 +125,7 @@ async fn s3_time_travel_recovery_works(ctx: &mut MaybeEnabledStorage) -> anyhow:

    retry(|| {
        let (data, len) = upload_stream("remote blob data3".as_bytes().into());
-        ctx.client.upload(data, len, &path3, None, &cancel)
+        ctx.client.upload(data, len, &path3, None)
    })
    .await?;

@@ -137,12 +133,12 @@ async fn s3_time_travel_recovery_works(ctx: &mut MaybeEnabledStorage) -> anyhow:

    retry(|| {
        let (data, len) = upload_stream(new_data.as_bytes().into());
-        ctx.client.upload(data, len, &path2, None, &cancel)
+        ctx.client.upload(data, len, &path2, None)
    })
    .await?;

-    retry(|| ctx.client.delete(&path1, &cancel)).await?;
-    let t2_files = list_files(&ctx.client, &cancel).await?;
+    retry(|| ctx.client.delete(&path1)).await?;
+    let t2_files = list_files(&ctx.client).await?;
    let t2 = time_point().await;
    println!("at t2: {t2_files:?}");

@@ -151,10 +147,10 @@ async fn s3_time_travel_recovery_works(ctx: &mut MaybeEnabledStorage) -> anyhow:
    ctx.client
        .time_travel_recover(None, t2, t_final, &cancel)
        .await?;
-    let t2_files_recovered = list_files(&ctx.client, &cancel).await?;
+    let t2_files_recovered = list_files(&ctx.client).await?;
    println!("after recovery to t2: {t2_files_recovered:?}");
    assert_eq!(t2_files, t2_files_recovered);
-    let path2_recovered_t2 = download_to_vec(ctx.client.download(&path2, &cancel).await?).await?;
+    let path2_recovered_t2 = download_to_vec(ctx.client.download(&path2).await?).await?;
    assert_eq!(path2_recovered_t2, new_data.as_bytes());

    // after recovery to t1: path1 is back, path2 has the old content
@@ -162,10 +158,10 @@ async fn s3_time_travel_recovery_works(ctx: &mut MaybeEnabledStorage) -> anyhow:
    ctx.client
        .time_travel_recover(None, t1, t_final, &cancel)
        .await?;
-    let t1_files_recovered = list_files(&ctx.client, &cancel).await?;
+    let t1_files_recovered = list_files(&ctx.client).await?;
    println!("after recovery to t1: {t1_files_recovered:?}");
    assert_eq!(t1_files, t1_files_recovered);
-    let path2_recovered_t1 = download_to_vec(ctx.client.download(&path2, &cancel).await?).await?;
+    let path2_recovered_t1 = download_to_vec(ctx.client.download(&path2).await?).await?;
    assert_eq!(path2_recovered_t1, old_data.as_bytes());

    // after recovery to t0: everything is gone except for path1
@@ -173,14 +169,14 @@ async fn s3_time_travel_recovery_works(ctx: &mut MaybeEnabledStorage) -> anyhow:
    ctx.client
        .time_travel_recover(None, t0, t_final, &cancel)
        .await?;
-    let t0_files_recovered = list_files(&ctx.client, &cancel).await?;
+    let t0_files_recovered = list_files(&ctx.client).await?;
    println!("after recovery to t0: {t0_files_recovered:?}");
    assert_eq!(t0_files, t0_files_recovered);

    // cleanup

    let paths = &[path1, path2, path3];
-    retry(|| ctx.client.delete_objects(paths, &cancel)).await?;
+    retry(|| ctx.client.delete_objects(paths)).await?;

    Ok(())
 }
@@ -201,16 +197,6 @@ impl EnabledS3 {
            base_prefix: BASE_PREFIX,
        }
    }
-
-    fn configure_request_timeout(&mut self, timeout: Duration) {
-        match Arc::get_mut(&mut self.client).expect("outer Arc::get_mut") {
-            GenericRemoteStorage::AwsS3(s3) => {
-                let s3 = Arc::get_mut(s3).expect("inner Arc::get_mut");
-                s3.timeout = timeout;
-            }
-            _ => unreachable!(),
-        }
-    }
 }

 enum MaybeEnabledStorage {
@@ -384,169 +370,8 @@ fn create_s3_client(
            concurrency_limit: NonZeroUsize::new(100).unwrap(),
            max_keys_per_list_response,
        }),
-        timeout: RemoteStorageConfig::DEFAULT_TIMEOUT,
    };
    Ok(Arc::new(
        GenericRemoteStorage::from_config(&remote_storage_config).context("remote storage init")?,
    ))
 }
-
-#[test_context(MaybeEnabledStorage)]
-#[tokio::test]
-async fn download_is_timeouted(ctx: &mut MaybeEnabledStorage) {
-    let MaybeEnabledStorage::Enabled(ctx) = ctx else {
-        return;
-    };
-
-    let cancel = CancellationToken::new();
-
-    let path = RemotePath::new(Utf8Path::new(
-        format!("{}/file_to_copy", ctx.base_prefix).as_str(),
-    ))
-    .unwrap();
-
-    let len = upload_large_enough_file(&ctx.client, &path, &cancel).await;
-
-    let timeout = std::time::Duration::from_secs(5);
-
-    ctx.configure_request_timeout(timeout);
-
-    let started_at = std::time::Instant::now();
-    let mut stream = ctx
-        .client
-        .download(&path, &cancel)
-        .await
-        .expect("download succeeds")
-        .download_stream;
-
-    if started_at.elapsed().mul_f32(0.9) >= timeout {
-        tracing::warn!(
-            elapsed_ms = started_at.elapsed().as_millis(),
-            "timeout might be too low, consumed most of it during headers"
-        );
-    }
-
-    let first = stream
-        .next()
-        .await
-        .expect("should have the first blob")
-        .expect("should have succeeded");
-
-    tracing::info!(len = first.len(), "downloaded first chunk");
-
-    assert!(
-        first.len() < len,
-        "uploaded file is too small, we downloaded all on first chunk"
-    );
-
-    tokio::time::sleep(timeout).await;
-
-    {
-        let started_at = std::time::Instant::now();
-        let next = stream
-            .next()
-            .await
-            .expect("stream should not have ended yet");
-
-        tracing::info!(
-            next.is_err = next.is_err(),
-            elapsed_ms = started_at.elapsed().as_millis(),
-            "received item after timeout"
-        );
-
-        let e = next.expect_err("expected an error, but got a chunk?");
-
-        let inner = e.get_ref().expect("std::io::Error::inner should be set");
-        assert!(
-            inner
-                .downcast_ref::<DownloadError>()
-                .is_some_and(|e| matches!(e, DownloadError::Timeout)),
-            "{inner:?}"
-        );
-    }
-
-    ctx.configure_request_timeout(RemoteStorageConfig::DEFAULT_TIMEOUT);
-
-    ctx.client.delete_objects(&[path], &cancel).await.unwrap()
-}
-
-#[test_context(MaybeEnabledStorage)]
-#[tokio::test]
-async fn download_is_cancelled(ctx: &mut MaybeEnabledStorage) {
-    let MaybeEnabledStorage::Enabled(ctx) = ctx else {
-        return;
-    };
-
-    let cancel = CancellationToken::new();
-
-    let path = RemotePath::new(Utf8Path::new(
-        format!("{}/file_to_copy", ctx.base_prefix).as_str(),
-    ))
-    .unwrap();
-
-    let len = upload_large_enough_file(&ctx.client, &path, &cancel).await;
-
-    {
-        let mut stream = ctx
-            .client
-            .download(&path, &cancel)
-            .await
-            .expect("download succeeds")
-            .download_stream;
-
-        let first = stream
-            .next()
-            .await
-            .expect("should have the first blob")
-            .expect("should have succeeded");
-
-        tracing::info!(len = first.len(), "downloaded first chunk");
-
-        assert!(
-            first.len() < len,
-            "uploaded file is too small, we downloaded all on first chunk"
-        );
-
-        cancel.cancel();
-
-        let next = stream.next().await.expect("stream should have more");
-
-        let e = next.expect_err("expected an error, but got a chunk?");
-
-        let inner = e.get_ref().expect("std::io::Error::inner should be set");
-        assert!(
-            inner
-                .downcast_ref::<DownloadError>()
-                .is_some_and(|e| matches!(e, DownloadError::Cancelled)),
-            "{inner:?}"
-        );
-    }
-
-    let cancel = CancellationToken::new();
-
-    ctx.client.delete_objects(&[path], &cancel).await.unwrap();
-}
-
-/// Upload a long enough file so that we cannot download it in single chunk
-///
-/// For s3 the first chunk seems to be less than 10kB, so this has a bit of a safety margin
-async fn upload_large_enough_file(
-    client: &GenericRemoteStorage,
-    path: &RemotePath,
-    cancel: &CancellationToken,
-) -> usize {
-    let header = bytes::Bytes::from_static("remote blob data content".as_bytes());
-    let body = bytes::Bytes::from(vec![0u8; 1024]);
-    let contents = std::iter::once(header).chain(std::iter::repeat(body).take(128));
-
-    let len = contents.clone().fold(0, |acc, next| acc + next.len());
-
-    let contents = futures::stream::iter(contents.map(std::io::Result::Ok));
-
-    client
-        .upload(contents, len, path, None, cancel)
-        .await
-        .expect("upload succeeds");
-
-    len
-}
--- a/libs/utils/benches/benchmarks.rs
+++ b/libs/utils/benches/benchmarks.rs
@@ -1,3 +1,5 @@
+#![allow(unused)]
+
 use criterion::{criterion_group, criterion_main, Criterion};
 use utils::id;

--- a/libs/utils/src/auth.rs
+++ b/libs/utils/src/auth.rs
@@ -29,9 +29,6 @@ pub enum Scope {
    // Should only be used e.g. for status check.
    // Currently also used for connection from any pageserver to any safekeeper.
    SafekeeperData,
-    // The scope used by pageservers in upcalls to storage controller and cloud control plane
-    #[serde(rename = "generations_api")]
-    GenerationsApi,
 }

 /// JWT payload. See docs/authentication.md for the format
--- a/pageserver/benches/bench_walredo.rs
+++ b/pageserver/benches/bench_walredo.rs
@@ -6,28 +6,14 @@
 //! There are two sets of inputs; `short` and `medium`. They were collected on postgres v14 by
 //! logging what happens when a sequential scan is requested on a small table, then picking out two
 //! suitable from logs.
-//!
-//!
-//! Reference data (git blame to see commit) on an i3en.3xlarge
-// ```text
-//! short/short/1           time:   [39.175 µs 39.348 µs 39.536 µs]
-//! short/short/2           time:   [51.227 µs 51.487 µs 51.755 µs]
-//! short/short/4           time:   [76.048 µs 76.362 µs 76.674 µs]
-//! short/short/8           time:   [128.94 µs 129.82 µs 130.74 µs]
-//! short/short/16          time:   [227.84 µs 229.00 µs 230.28 µs]
-//! short/short/32          time:   [455.97 µs 457.81 µs 459.90 µs]
-//! short/short/64          time:   [902.46 µs 904.84 µs 907.32 µs]
-//! short/short/128         time:   [1.7416 ms 1.7487 ms 1.7561 ms]
-//! ``

-use std::sync::Arc;
+use std::sync::{Arc, Barrier};

 use bytes::{Buf, Bytes};
 use pageserver::{
    config::PageServerConf, repository::Key, walrecord::NeonWalRecord, walredo::PostgresRedoManager,
 };
 use pageserver_api::shard::TenantShardId;
-use tokio::task::JoinSet;
 use utils::{id::TenantId, lsn::Lsn};

 use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
@@ -53,11 +39,11 @@ fn redo_scenarios(c: &mut Criterion) {
            .build()
            .unwrap();
        tracing::info!("executing first");
-        rt.block_on(short().execute(&manager)).unwrap();
+        short().execute(rt.handle(), &manager).unwrap();
        tracing::info!("first executed");
    }

-    let thread_counts = [1, 2, 4, 8, 16, 32, 64, 128];
+    let thread_counts = [1, 2, 4, 8, 16];

    let mut group = c.benchmark_group("short");
    group.sampling_mode(criterion::SamplingMode::Flat);
@@ -88,69 +74,114 @@ fn redo_scenarios(c: &mut Criterion) {
    drop(group);
 }

-/// Sets up a multi-threaded tokio runtime with default worker thread count,
-/// then, spawn `requesters` tasks that repeatedly:
-/// - get input from `input_factor()`
-/// - call `manager.request_redo()` with their input
-///
-/// This stress-tests the scalability of a single walredo manager at high tokio-level concurrency.
-///
-/// Using tokio's default worker thread count means the results will differ on machines
-/// with different core countrs. We don't care about that, the performance will always
-/// be different on different hardware. To compare performance of different software versions,
-/// use the same hardware.
+/// Sets up `threads` number of requesters to `request_redo`, with the given input.
 fn add_multithreaded_walredo_requesters(
    b: &mut criterion::Bencher,
-    nrequesters: usize,
+    threads: u32,
    manager: &Arc<PostgresRedoManager>,
    input_factory: fn() -> Request,
 ) {
-    assert_ne!(nrequesters, 0);
+    assert_ne!(threads, 0);

-    let rt = tokio::runtime::Builder::new_multi_thread()
-        .enable_all()
-        .build()
-        .unwrap();
+    if threads == 1 {
+        let rt = tokio::runtime::Builder::new_current_thread()
+            .enable_all()
+            .build()
+            .unwrap();
+        let handle = rt.handle();
+        b.iter_batched_ref(
+            || Some(input_factory()),
+            |input| execute_all(input.take(), handle, manager),
+            criterion::BatchSize::PerIteration,
+        );
+    } else {
+        let (work_tx, work_rx) = std::sync::mpsc::sync_channel(threads as usize);

-    let barrier = Arc::new(tokio::sync::Barrier::new(nrequesters + 1));
+        let work_rx = std::sync::Arc::new(std::sync::Mutex::new(work_rx));

-    let mut requesters = JoinSet::new();
-    for _ in 0..nrequesters {
-        let _entered = rt.enter();
-        let manager = manager.clone();
-        let barrier = barrier.clone();
-        requesters.spawn(async move {
-            loop {
-                let input = input_factory();
-                barrier.wait().await;
-                let page = input.execute(&manager).await.unwrap();
-                assert_eq!(page.remaining(), 8192);
-                barrier.wait().await;
-            }
-        });
+        let barrier = Arc::new(Barrier::new(threads as usize + 1));
+
+        let jhs = (0..threads)
+            .map(|_| {
+                std::thread::spawn({
+                    let manager = manager.clone();
+                    let barrier = barrier.clone();
+                    let work_rx = work_rx.clone();
+                    move || {
+                        let rt = tokio::runtime::Builder::new_current_thread()
+                            .enable_all()
+                            .build()
+                            .unwrap();
+                        let handle = rt.handle();
+                        loop {
+                            // queue up and wait if we want to go another round
+                            if work_rx.lock().unwrap().recv().is_err() {
+                                break;
+                            }
+
+                            let input = Some(input_factory());
+
+                            barrier.wait();
+
+                            execute_all(input, handle, &manager).unwrap();
+
+                            barrier.wait();
+                        }
+                    }
+                })
+            })
+            .collect::<Vec<_>>();
+
+        let _jhs = JoinOnDrop(jhs);
+
+        b.iter_batched(
+            || {
+                for _ in 0..threads {
+                    work_tx.send(()).unwrap()
+                }
+            },
+            |()| {
+                // start the work
+                barrier.wait();
+
+                // wait for work to complete
+                barrier.wait();
+            },
+            criterion::BatchSize::PerIteration,
+        );
+
+        drop(work_tx);
    }
+}

-    let do_one_iteration = || {
-        rt.block_on(async {
-            barrier.wait().await;
-            // wait for work to complete
-            barrier.wait().await;
-        })
-    };
+struct JoinOnDrop(Vec<std::thread::JoinHandle<()>>);

-    b.iter_batched(
-        || {
-            // warmup
-            do_one_iteration();
-        },
-        |()| {
-            // work loop
-            do_one_iteration();
-        },
-        criterion::BatchSize::PerIteration,
-    );
+impl Drop for JoinOnDrop {
+    // it's not really needless because we want join all then check for panicks
+    #[allow(clippy::needless_collect)]
+    fn drop(&mut self) {
+        // first join all
+        let results = self.0.drain(..).map(|jh| jh.join()).collect::<Vec<_>>();
+        // then check the results; panicking here is not great, but it does get the message across
+        // to the user, and sets an exit value.
+        results.into_iter().try_for_each(|res| res).unwrap();
+    }
+}

-    rt.block_on(requesters.shutdown());
+fn execute_all<I>(
+    input: I,
+    handle: &tokio::runtime::Handle,
+    manager: &PostgresRedoManager,
+) -> anyhow::Result<()>
+where
+    I: IntoIterator<Item = Request>,
+{
+    // just fire all requests as fast as possible
+    input.into_iter().try_for_each(|req| {
+        let page = req.execute(handle, manager)?;
+        assert_eq!(page.remaining(), 8192);
+        anyhow::Ok(())
+    })
 }

 criterion_group!(benches, redo_scenarios);
@@ -462,7 +493,11 @@ struct Request {
 }

 impl Request {
-    async fn execute(self, manager: &PostgresRedoManager) -> anyhow::Result<Bytes> {
+    fn execute(
+        self,
+        rt: &tokio::runtime::Handle,
+        manager: &PostgresRedoManager,
+    ) -> anyhow::Result<Bytes> {
        let Request {
            key,
            lsn,
@@ -471,8 +506,6 @@ impl Request {
            pg_version,
        } = self;

-        manager
-            .request_redo(key, lsn, base_img, records, pg_version)
-            .await
+        rt.block_on(manager.request_redo(key, lsn, base_img, records, pg_version))
    }
 }
--- a/pageserver/src/auth.rs
+++ b/pageserver/src/auth.rs
@@ -14,12 +14,8 @@ pub fn check_permission(claims: &Claims, tenant_id: Option<TenantId>) -> Result<
        }
        (Scope::PageServerApi, None) => Ok(()), // access to management api for PageServerApi scope
        (Scope::PageServerApi, Some(_)) => Ok(()), // access to tenant api using PageServerApi scope
-        (Scope::SafekeeperData | Scope::GenerationsApi, _) => Err(AuthError(
-            format!(
-                "JWT scope '{:?}' is ineligible for Pageserver auth",
-                claims.scope
-            )
-            .into(),
+        (Scope::SafekeeperData, _) => Err(AuthError(
+            "SafekeeperData scope makes no sense for Pageserver".into(),
        )),
    }
 }
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -1359,7 +1359,6 @@ broker_endpoint = '{broker_endpoint}'
                parsed_remote_storage_config,
                RemoteStorageConfig {
                    storage: RemoteStorageKind::LocalFs(local_storage_path.clone()),
-                    timeout: RemoteStorageConfig::DEFAULT_TIMEOUT,
                },
                "Remote storage config should correctly parse the local FS config and fill other storage defaults"
            );
@@ -1427,7 +1426,6 @@ broker_endpoint = '{broker_endpoint}'
                        concurrency_limit: s3_concurrency_limit,
                        max_keys_per_list_response: None,
                    }),
-                    timeout: RemoteStorageConfig::DEFAULT_TIMEOUT,
                },
                "Remote storage config should correctly parse the S3 config"
            );
--- a/pageserver/src/deletion_queue.rs
+++ b/pageserver/src/deletion_queue.rs
@@ -834,6 +834,7 @@ mod test {
    }

    impl ControlPlaneGenerationsApi for MockControlPlane {
+        #[allow(clippy::diverging_sub_expression)] // False positive via async_trait
        async fn re_attach(&self) -> Result<HashMap<TenantShardId, Generation>, RetryForeverError> {
            unimplemented!()
        }
@@ -867,7 +868,6 @@ mod test {
        let remote_fs_dir = harness.conf.workdir.join("remote_fs").canonicalize_utf8()?;
        let storage_config = RemoteStorageConfig {
            storage: RemoteStorageKind::LocalFs(remote_fs_dir.clone()),
-            timeout: RemoteStorageConfig::DEFAULT_TIMEOUT,
        };
        let storage = GenericRemoteStorage::from_config(&storage_config).unwrap();

@@ -1171,7 +1171,6 @@ pub(crate) mod mock {
    pub struct ConsumerState {
        rx: tokio::sync::mpsc::UnboundedReceiver<ListWriterQueueMessage>,
        executor_rx: tokio::sync::mpsc::Receiver<DeleterMessage>,
-        cancel: CancellationToken,
    }

    impl ConsumerState {
@@ -1185,7 +1184,7 @@ pub(crate) mod mock {
                match msg {
                    DeleterMessage::Delete(objects) => {
                        for path in objects {
-                            match remote_storage.delete(&path, &self.cancel).await {
+                            match remote_storage.delete(&path).await {
                                Ok(_) => {
                                    debug!("Deleted {path}");
                                }
@@ -1218,7 +1217,7 @@ pub(crate) mod mock {

                        for path in objects {
                            info!("Executing deletion {path}");
-                            match remote_storage.delete(&path, &self.cancel).await {
+                            match remote_storage.delete(&path).await {
                                Ok(_) => {
                                    debug!("Deleted {path}");
                                }
@@ -1268,11 +1267,7 @@ pub(crate) mod mock {
                executor_tx,
                executed,
                remote_storage,
-                consumer: std::sync::Mutex::new(ConsumerState {
-                    rx,
-                    executor_rx,
-                    cancel: CancellationToken::new(),
-                }),
+                consumer: std::sync::Mutex::new(ConsumerState { rx, executor_rx }),
                lsn_table: Arc::new(std::sync::RwLock::new(VisibleLsnUpdates::new())),
            }
        }
--- a/pageserver/src/deletion_queue/deleter.rs
+++ b/pageserver/src/deletion_queue/deleter.rs
@@ -8,7 +8,6 @@

 use remote_storage::GenericRemoteStorage;
 use remote_storage::RemotePath;
-use remote_storage::TimeoutOrCancel;
 use remote_storage::MAX_KEYS_PER_DELETE;
 use std::time::Duration;
 use tokio_util::sync::CancellationToken;
@@ -72,11 +71,9 @@ impl Deleter {
                    Err(anyhow::anyhow!("failpoint: deletion-queue-before-execute"))
                });

-                self.remote_storage
-                    .delete_objects(&self.accumulator, &self.cancel)
-                    .await
+                self.remote_storage.delete_objects(&self.accumulator).await
            },
-            TimeoutOrCancel::caused_by_cancel,
+            |_| false,
            3,
            10,
            "executing deletion batch",
--- a/pageserver/src/disk_usage_eviction_task.rs
+++ b/pageserver/src/disk_usage_eviction_task.rs
@@ -351,6 +351,7 @@ pub enum IterationOutcome<U> {
    Finished(IterationOutcomeFinished<U>),
 }

+#[allow(dead_code)]
 #[derive(Debug, Serialize)]
 pub struct IterationOutcomeFinished<U> {
    /// The actual usage observed before we started the iteration.
@@ -365,6 +366,7 @@ pub struct IterationOutcomeFinished<U> {
 }

 #[derive(Debug, Serialize)]
+#[allow(dead_code)]
 struct AssumedUsage<U> {
    /// The expected value for `after`, after phase 2.
    projected_after: U,
@@ -372,12 +374,14 @@ struct AssumedUsage<U> {
    failed: LayerCount,
 }

+#[allow(dead_code)]
 #[derive(Debug, Serialize)]
 struct PlannedUsage<U> {
    respecting_tenant_min_resident_size: U,
    fallback_to_global_lru: Option<U>,
 }

+#[allow(dead_code)]
 #[derive(Debug, Default, Serialize)]
 struct LayerCount {
    file_sizes: u64,
@@ -561,6 +565,7 @@ pub(crate) struct EvictionSecondaryLayer {
 #[derive(Clone)]
 pub(crate) enum EvictionLayer {
    Attached(Layer),
+    #[allow(dead_code)]
    Secondary(EvictionSecondaryLayer),
 }

@@ -1100,6 +1105,7 @@ mod filesystem_level_usage {
    use super::DiskUsageEvictionTaskConfig;

    #[derive(Debug, Clone, Copy)]
+    #[allow(dead_code)]
    pub struct Usage<'a> {
        config: &'a DiskUsageEvictionTaskConfig,

--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -1136,7 +1136,7 @@ async fn tenant_shard_split_handler(

    let new_shards = state
        .tenant_manager
-        .shard_split(tenant_shard_id, ShardCount::new(req.new_shard_count), &ctx)
+        .shard_split(tenant_shard_id, ShardCount(req.new_shard_count), &ctx)
        .await
        .map_err(ApiError::InternalServerError)?;

@@ -1951,7 +1951,6 @@ async fn put_io_engine_handler(
    mut r: Request<Body>,
    _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    check_permission(&r, None)?;
    let kind: crate::virtual_file::IoEngineKind = json_request(&mut r).await?;
    crate::virtual_file::io_engine::set(kind);
    json_response(StatusCode::OK, ())
@@ -2215,7 +2214,7 @@ pub fn make_router(
        )
        .get(
            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/keyspace",
-            |r| api_handler(r, timeline_collect_keyspace),
+            |r| testing_api_handler("read out the keyspace", r, timeline_collect_keyspace),
        )
        .put("/v1/io_engine", |r| api_handler(r, put_io_engine_handler))
        .any(handler_404))
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -26,7 +26,7 @@ use pageserver_api::models::{
    PagestreamNblocksResponse,
 };
 use pageserver_api::shard::ShardIndex;
-use pageserver_api::shard::ShardNumber;
+use pageserver_api::shard::{ShardCount, ShardNumber};
 use postgres_backend::{self, is_expected_io_error, AuthType, PostgresBackend, QueryError};
 use pq_proto::framed::ConnectionError;
 use pq_proto::FeStartupPacket;
@@ -998,7 +998,7 @@ impl PageServerHandler {
    ) -> Result<&Arc<Timeline>, Key> {
        let key = if let Some((first_idx, first_timeline)) = self.shard_timelines.iter().next() {
            // Fastest path: single sharded case
-            if first_idx.shard_count.count() == 1 {
+            if first_idx.shard_count < ShardCount(2) {
                return Ok(&first_timeline.timeline);
            }

--- a/pageserver/src/pgdatadir_mapping.rs
+++ b/pageserver/src/pgdatadir_mapping.rs
@@ -156,7 +156,6 @@ impl Timeline {
            pending_updates: HashMap::new(),
            pending_deletions: Vec::new(),
            pending_nblocks: 0,
-            pending_aux_files: None,
            pending_directory_entries: Vec::new(),
            lsn,
        }
@@ -871,14 +870,6 @@ pub struct DatadirModification<'a> {
    pending_updates: HashMap<Key, Vec<(Lsn, Value)>>,
    pending_deletions: Vec<(Range<Key>, Lsn)>,
    pending_nblocks: i64,
-
-    // If we already wrote any aux file changes in this modification, stash the latest dir.  If set,
-    // [`Self::put_file`] may assume that it is safe to emit a delta rather than checking
-    // if AUX_FILES_KEY is already set.
-    pending_aux_files: Option<AuxFilesDirectory>,
-
-    /// For special "directory" keys that store key-value maps, track the size of the map
-    /// if it was updated in this modification.
    pending_directory_entries: Vec<(DirectoryKind, usize)>,
 }

@@ -1393,76 +1384,31 @@ impl<'a> DatadirModification<'a> {
        content: &[u8],
        ctx: &RequestContext,
    ) -> anyhow::Result<()> {
-        let file_path = path.to_string();
-        let content = if content.is_empty() {
-            None
-        } else {
-            Some(Bytes::copy_from_slice(content))
-        };
-
-        let dir = if let Some(mut dir) = self.pending_aux_files.take() {
-            // We already updated aux files in `self`: emit a delta and update our latest value
-
-            self.put(
-                AUX_FILES_KEY,
-                Value::WalRecord(NeonWalRecord::AuxFile {
-                    file_path: file_path.clone(),
-                    content: content.clone(),
-                }),
-            );
-
-            dir.upsert(file_path, content);
-            dir
-        } else {
-            // Check if the AUX_FILES_KEY is initialized
-            match self.get(AUX_FILES_KEY, ctx).await {
-                Ok(dir_bytes) => {
-                    let mut dir = AuxFilesDirectory::des(&dir_bytes)?;
-                    // Key is already set, we may append a delta
-                    self.put(
-                        AUX_FILES_KEY,
-                        Value::WalRecord(NeonWalRecord::AuxFile {
-                            file_path: file_path.clone(),
-                            content: content.clone(),
-                        }),
-                    );
-                    dir.upsert(file_path, content);
-                    dir
-                }
-                Err(
-                    e @ (PageReconstructError::AncestorStopping(_)
-                    | PageReconstructError::Cancelled
-                    | PageReconstructError::AncestorLsnTimeout(_)),
-                ) => {
-                    // Important that we do not interpret a shutdown error as "not found" and thereby
-                    // reset the map.
-                    return Err(e.into());
-                }
-                // FIXME: PageReconstructError doesn't have an explicit variant for key-not-found, so
-                // we are assuming that all _other_ possible errors represents a missing key.  If some
-                // other error occurs, we may incorrectly reset the map of aux files.
-                Err(PageReconstructError::Other(_) | PageReconstructError::WalRedo(_)) => {
-                    // Key is missing, we must insert an image as the basis for subsequent deltas.
-
-                    let mut dir = AuxFilesDirectory {
-                        files: HashMap::new(),
-                    };
-                    dir.upsert(file_path, content);
-                    self.put(
-                        AUX_FILES_KEY,
-                        Value::Image(Bytes::from(
-                            AuxFilesDirectory::ser(&dir).context("serialize")?,
-                        )),
-                    );
-                    dir
+        let mut dir = match self.get(AUX_FILES_KEY, ctx).await {
+            Ok(buf) => AuxFilesDirectory::des(&buf)?,
+            Err(e) => {
+                // This is expected: historical databases do not have the key.
+                debug!("Failed to get info about AUX files: {}", e);
+                AuxFilesDirectory {
+                    files: HashMap::new(),
                }
            }
        };
-
+        let path = path.to_string();
+        if content.is_empty() {
+            dir.files.remove(&path);
+        } else {
+            dir.files.insert(path, Bytes::copy_from_slice(content));
+        }
        self.pending_directory_entries
            .push((DirectoryKind::AuxFiles, dir.files.len()));
-        self.pending_aux_files = Some(dir);

+        self.put(
+            AUX_FILES_KEY,
+            Value::Image(Bytes::from(
+                AuxFilesDirectory::ser(&dir).context("serialize")?,
+            )),
+        );
        Ok(())
    }

@@ -1672,18 +1618,8 @@ struct RelDirectory {
 }

 #[derive(Debug, Serialize, Deserialize, Default)]
-pub(crate) struct AuxFilesDirectory {
-    pub(crate) files: HashMap<String, Bytes>,
-}
-
-impl AuxFilesDirectory {
-    pub(crate) fn upsert(&mut self, key: String, value: Option<Bytes>) {
-        if let Some(value) = value {
-            self.files.insert(key, value);
-        } else {
-            self.files.remove(&key);
-        }
-    }
+struct AuxFilesDirectory {
+    files: HashMap<String, Bytes>,
 }

 #[derive(Debug, Serialize, Deserialize)]
@@ -1719,60 +1655,8 @@ static ZERO_PAGE: Bytes = Bytes::from_static(&[0u8; BLCKSZ as usize]);
 #[allow(clippy::bool_assert_comparison)]
 #[cfg(test)]
 mod tests {
-    use hex_literal::hex;
-    use utils::id::TimelineId;
-
-    use super::*;
-
-    use crate::{tenant::harness::TenantHarness, DEFAULT_PG_VERSION};
-
-    /// Test a round trip of aux file updates, from DatadirModification to reading back from the Timeline
-    #[tokio::test]
-    async fn aux_files_round_trip() -> anyhow::Result<()> {
-        let name = "aux_files_round_trip";
-        let harness = TenantHarness::create(name)?;
-
-        pub const TIMELINE_ID: TimelineId =
-            TimelineId::from_array(hex!("11223344556677881122334455667788"));
-
-        let (tenant, ctx) = harness.load().await;
-        let tline = tenant
-            .create_empty_timeline(TIMELINE_ID, Lsn(0), DEFAULT_PG_VERSION, &ctx)
-            .await?;
-        let tline = tline.raw_timeline().unwrap();
-
-        // First modification: insert two keys
-        let mut modification = tline.begin_modification(Lsn(0x1000));
-        modification.put_file("foo/bar1", b"content1", &ctx).await?;
-        modification.set_lsn(Lsn(0x1008))?;
-        modification.put_file("foo/bar2", b"content2", &ctx).await?;
-        modification.commit(&ctx).await?;
-        let expect_1008 = HashMap::from([
-            ("foo/bar1".to_string(), Bytes::from_static(b"content1")),
-            ("foo/bar2".to_string(), Bytes::from_static(b"content2")),
-        ]);
-
-        let readback = tline.list_aux_files(Lsn(0x1008), &ctx).await?;
-        assert_eq!(readback, expect_1008);
-
-        // Second modification: update one key, remove the other
-        let mut modification = tline.begin_modification(Lsn(0x2000));
-        modification.put_file("foo/bar1", b"content3", &ctx).await?;
-        modification.set_lsn(Lsn(0x2008))?;
-        modification.put_file("foo/bar2", b"", &ctx).await?;
-        modification.commit(&ctx).await?;
-        let expect_2008 =
-            HashMap::from([("foo/bar1".to_string(), Bytes::from_static(b"content3"))]);
-
-        let readback = tline.list_aux_files(Lsn(0x2008), &ctx).await?;
-        assert_eq!(readback, expect_2008);
-
-        // Reading back in time works
-        let readback = tline.list_aux_files(Lsn(0x1008), &ctx).await?;
-        assert_eq!(readback, expect_1008);
-
-        Ok(())
-    }
+    //use super::repo_harness::*;
+    //use super::*;

    /*
        fn assert_current_logical_size<R: Repository>(timeline: &DatadirTimeline<R>, lsn: Lsn) {
--- a/pageserver/src/task_mgr.rs
+++ b/pageserver/src/task_mgr.rs
@@ -30,6 +30,10 @@
 //! only a single tenant or timeline.
 //!

+// Clippy 1.60 incorrectly complains about the tokio::task_local!() macro.
+// Silence it. See https://github.com/rust-lang/rust-clippy/issues/9224.
+#![allow(clippy::declare_interior_mutable_const)]
+
 use std::collections::HashMap;
 use std::fmt;
 use std::future::Future;
@@ -308,6 +312,7 @@ struct MutableTaskState {
 }

 struct PageServerTask {
+    #[allow(dead_code)] // unused currently
    task_id: PageserverTaskId,

    kind: TaskKind,
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -25,7 +25,6 @@ use pageserver_api::shard::ShardIdentity;
 use pageserver_api::shard::TenantShardId;
 use remote_storage::DownloadError;
 use remote_storage::GenericRemoteStorage;
-use remote_storage::TimeoutOrCancel;
 use std::fmt;
 use storage_broker::BrokerClientChannel;
 use tokio::io::BufReader;
@@ -49,6 +48,7 @@ use self::config::AttachmentMode;
 use self::config::LocationConf;
 use self::config::TenantConf;
 use self::delete::DeleteTenantFlow;
+use self::metadata::LoadMetadataError;
 use self::metadata::TimelineMetadata;
 use self::mgr::GetActiveTenantError;
 use self::mgr::GetTenantError;
@@ -76,6 +76,7 @@ use crate::task_mgr;
 use crate::task_mgr::TaskKind;
 use crate::tenant::config::LocationMode;
 use crate::tenant::config::TenantConfOpt;
+use crate::tenant::metadata::load_metadata;
 pub use crate::tenant::remote_timeline_client::index::IndexPart;
 use crate::tenant::remote_timeline_client::remote_initdb_archive_path;
 use crate::tenant::remote_timeline_client::MaybeDeletedIndexPart;
@@ -92,6 +93,7 @@ use std::fmt::Debug;
 use std::fmt::Display;
 use std::fs;
 use std::fs::File;
+use std::io;
 use std::ops::Bound::Included;
 use std::sync::atomic::AtomicU64;
 use std::sync::atomic::Ordering;
@@ -485,6 +487,11 @@ impl From<std::io::Error> for InitdbError {
    }
 }

+struct TenantDirectoryScan {
+    sorted_timelines_to_load: Vec<(TimelineId, TimelineMetadata)>,
+    timelines_to_resume_deletion: Vec<(TimelineId, Option<TimelineMetadata>)>,
+}
+
 enum CreateTimelineCause {
    Load,
    Delete,
@@ -920,7 +927,9 @@ impl Tenant {
                timelines: HashMap::new(),
            },
            (None, SpawnMode::Normal) => {
-                anyhow::bail!("local-only deployment is no longer supported, https://github.com/neondatabase/neon/issues/5624");
+                // Deprecated dev mode: load from local disk state instead of remote storage
+                // https://github.com/neondatabase/neon/issues/5624
+                return self.load_local(ctx).await;
            }
        };

@@ -1188,6 +1197,149 @@ impl Tenant {
        ))
    }

+    fn scan_and_sort_timelines_dir(self: Arc<Tenant>) -> anyhow::Result<TenantDirectoryScan> {
+        let mut timelines_to_load: HashMap<TimelineId, TimelineMetadata> = HashMap::new();
+        // Note timelines_to_resume_deletion needs to be separate because it can be not sortable
+        // from the point of `tree_sort_timelines`. I e some parents can be missing because deletion
+        // completed in non topological order (for example because parent has smaller number of layer files in it)
+        let mut timelines_to_resume_deletion: Vec<(TimelineId, Option<TimelineMetadata>)> = vec![];
+
+        let timelines_dir = self.conf.timelines_path(&self.tenant_shard_id);
+
+        for entry in timelines_dir
+            .read_dir_utf8()
+            .context("list timelines directory for tenant")?
+        {
+            let entry = entry.context("read timeline dir entry")?;
+            let timeline_dir = entry.path();
+
+            if crate::is_temporary(timeline_dir) {
+                info!("Found temporary timeline directory, removing: {timeline_dir}");
+                if let Err(e) = std::fs::remove_dir_all(timeline_dir) {
+                    error!("Failed to remove temporary directory '{timeline_dir}': {e:?}");
+                }
+            } else if is_uninit_mark(timeline_dir) {
+                if !timeline_dir.exists() {
+                    warn!("Timeline dir entry become invalid: {timeline_dir}");
+                    continue;
+                }
+
+                let timeline_uninit_mark_file = &timeline_dir;
+                info!(
+                    "Found an uninit mark file {timeline_uninit_mark_file}, removing the timeline and its uninit mark",
+                );
+                let timeline_id =
+                    TimelineId::try_from(timeline_uninit_mark_file.file_stem())
+                        .with_context(|| {
+                            format!(
+                                "Could not parse timeline id out of the timeline uninit mark name {timeline_uninit_mark_file}",
+                            )
+                        })?;
+                let timeline_dir = self.conf.timeline_path(&self.tenant_shard_id, &timeline_id);
+                if let Err(e) =
+                    remove_timeline_and_uninit_mark(&timeline_dir, timeline_uninit_mark_file)
+                {
+                    error!("Failed to clean up uninit marked timeline: {e:?}");
+                }
+            } else if crate::is_delete_mark(timeline_dir) {
+                // If metadata exists, load as usual, continue deletion
+                let timeline_id = TimelineId::try_from(timeline_dir.file_stem())
+                    .with_context(|| {
+                        format!(
+                            "Could not parse timeline id out of the timeline uninit mark name {timeline_dir}",
+                        )
+                    })?;
+
+                info!("Found deletion mark for timeline {}", timeline_id);
+
+                match load_metadata(self.conf, &self.tenant_shard_id, &timeline_id) {
+                    Ok(metadata) => {
+                        timelines_to_resume_deletion.push((timeline_id, Some(metadata)))
+                    }
+                    Err(e) => match &e {
+                        LoadMetadataError::Read(r) => {
+                            if r.kind() != io::ErrorKind::NotFound {
+                                return Err(anyhow::anyhow!(e)).with_context(|| {
+                                    format!("Failed to load metadata for timeline_id {timeline_id}")
+                                });
+                            }
+
+                            // If metadata doesnt exist it means that we've crashed without
+                            // completing cleanup_remaining_timeline_fs_traces in DeleteTimelineFlow.
+                            // So save timeline_id for later call to `DeleteTimelineFlow::cleanup_remaining_timeline_fs_traces`.
+                            // We cant do it here because the method is async so we'd need block_on
+                            // and here we're in spawn_blocking. cleanup_remaining_timeline_fs_traces uses fs operations
+                            // so that basically results in a cycle:
+                            // spawn_blocking
+                            // - block_on
+                            //   - spawn_blocking
+                            // which can lead to running out of threads in blocing pool.
+                            timelines_to_resume_deletion.push((timeline_id, None));
+                        }
+                        _ => {
+                            return Err(anyhow::anyhow!(e)).with_context(|| {
+                                format!("Failed to load metadata for timeline_id {timeline_id}")
+                            })
+                        }
+                    },
+                }
+            } else {
+                if !timeline_dir.exists() {
+                    warn!("Timeline dir entry become invalid: {timeline_dir}");
+                    continue;
+                }
+                let timeline_id = TimelineId::try_from(timeline_dir.file_name())
+                    .with_context(|| {
+                        format!(
+                            "Could not parse timeline id out of the timeline dir name {timeline_dir}",
+                        )
+                    })?;
+                let timeline_uninit_mark_file = self
+                    .conf
+                    .timeline_uninit_mark_file_path(self.tenant_shard_id, timeline_id);
+                if timeline_uninit_mark_file.exists() {
+                    info!(
+                        %timeline_id,
+                        "Found an uninit mark file, removing the timeline and its uninit mark",
+                    );
+                    if let Err(e) =
+                        remove_timeline_and_uninit_mark(timeline_dir, &timeline_uninit_mark_file)
+                    {
+                        error!("Failed to clean up uninit marked timeline: {e:?}");
+                    }
+                    continue;
+                }
+
+                let timeline_delete_mark_file = self
+                    .conf
+                    .timeline_delete_mark_file_path(self.tenant_shard_id, timeline_id);
+                if timeline_delete_mark_file.exists() {
+                    // Cleanup should be done in `is_delete_mark` branch above
+                    continue;
+                }
+
+                let file_name = entry.file_name();
+                if let Ok(timeline_id) = file_name.parse::<TimelineId>() {
+                    let metadata = load_metadata(self.conf, &self.tenant_shard_id, &timeline_id)
+                        .context("failed to load metadata")?;
+                    timelines_to_load.insert(timeline_id, metadata);
+                } else {
+                    // A file or directory that doesn't look like a timeline ID
+                    warn!("unexpected file or directory in timelines directory: {file_name}");
+                }
+            }
+        }
+
+        // Sort the array of timeline IDs into tree-order, so that parent comes before
+        // all its children.
+        tree_sort_timelines(timelines_to_load, |m| m.ancestor_timeline()).map(|sorted_timelines| {
+            TenantDirectoryScan {
+                sorted_timelines_to_load: sorted_timelines,
+                timelines_to_resume_deletion,
+            }
+        })
+    }
+
    async fn load_timeline_metadata(
        self: &Arc<Tenant>,
        timeline_ids: HashSet<TimelineId>,
@@ -1251,6 +1403,141 @@ impl Tenant {
        Ok(timeline_preloads)
    }

+    ///
+    /// Background task to load in-memory data structures for this tenant, from
+    /// files on disk. Used at pageserver startup.
+    ///
+    /// No background tasks are started as part of this routine.
+    async fn load_local(self: &Arc<Tenant>, ctx: &RequestContext) -> anyhow::Result<()> {
+        span::debug_assert_current_span_has_tenant_id();
+
+        debug!("loading tenant task");
+
+        // Load in-memory state to reflect the local files on disk
+        //
+        // Scan the directory, peek into the metadata file of each timeline, and
+        // collect a list of timelines and their ancestors.
+        let span = info_span!("blocking");
+        let cloned = Arc::clone(self);
+
+        let scan = tokio::task::spawn_blocking(move || {
+            let _g = span.entered();
+            cloned.scan_and_sort_timelines_dir()
+        })
+        .await
+        .context("load spawn_blocking")
+        .and_then(|res| res)?;
+
+        // FIXME original collect_timeline_files contained one more check:
+        //    1. "Timeline has no ancestor and no layer files"
+
+        // Process loadable timelines first
+        for (timeline_id, local_metadata) in scan.sorted_timelines_to_load {
+            if let Err(e) = self
+                .load_local_timeline(timeline_id, local_metadata, ctx, false)
+                .await
+            {
+                match e {
+                    LoadLocalTimelineError::Load(source) => {
+                        return Err(anyhow::anyhow!(source)).with_context(|| {
+                            format!("Failed to load local timeline: {timeline_id}")
+                        })
+                    }
+                    LoadLocalTimelineError::ResumeDeletion(source) => {
+                        // Make sure resumed deletion wont fail loading for entire tenant.
+                        error!("Failed to resume timeline deletion: {source:#}")
+                    }
+                }
+            }
+        }
+
+        // Resume deletion ones with deleted_mark
+        for (timeline_id, maybe_local_metadata) in scan.timelines_to_resume_deletion {
+            match maybe_local_metadata {
+                None => {
+                    // See comment in `scan_and_sort_timelines_dir`.
+                    if let Err(e) =
+                        DeleteTimelineFlow::cleanup_remaining_timeline_fs_traces(self, timeline_id)
+                            .await
+                    {
+                        warn!(
+                            "cannot clean up deleted timeline dir timeline_id: {} error: {:#}",
+                            timeline_id, e
+                        );
+                    }
+                }
+                Some(local_metadata) => {
+                    if let Err(e) = self
+                        .load_local_timeline(timeline_id, local_metadata, ctx, true)
+                        .await
+                    {
+                        match e {
+                            LoadLocalTimelineError::Load(source) => {
+                                // We tried to load deleted timeline, this is a bug.
+                                return Err(anyhow::anyhow!(source).context(
+                                    format!("This is a bug. We tried to load deleted timeline which is wrong and loading failed. Timeline: {timeline_id}")
+                                ));
+                            }
+                            LoadLocalTimelineError::ResumeDeletion(source) => {
+                                // Make sure resumed deletion wont fail loading for entire tenant.
+                                error!("Failed to resume timeline deletion: {source:#}")
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        trace!("Done");
+
+        Ok(())
+    }
+
+    /// Subroutine of `load_tenant`, to load an individual timeline
+    ///
+    /// NB: The parent is assumed to be already loaded!
+    #[instrument(skip(self, local_metadata, ctx))]
+    async fn load_local_timeline(
+        self: &Arc<Self>,
+        timeline_id: TimelineId,
+        local_metadata: TimelineMetadata,
+        ctx: &RequestContext,
+        found_delete_mark: bool,
+    ) -> Result<(), LoadLocalTimelineError> {
+        span::debug_assert_current_span_has_tenant_id();
+
+        let resources = self.build_timeline_resources(timeline_id);
+
+        if found_delete_mark {
+            // There is no remote client, we found local metadata.
+            // Continue cleaning up local disk.
+            DeleteTimelineFlow::resume_deletion(
+                Arc::clone(self),
+                timeline_id,
+                &local_metadata,
+                None,
+                self.deletion_queue_client.clone(),
+            )
+            .await
+            .context("resume deletion")
+            .map_err(LoadLocalTimelineError::ResumeDeletion)?;
+            return Ok(());
+        }
+
+        let ancestor = if let Some(ancestor_timeline_id) = local_metadata.ancestor_timeline() {
+            let ancestor_timeline = self.get_timeline(ancestor_timeline_id, false)
+                .with_context(|| anyhow::anyhow!("cannot find ancestor timeline {ancestor_timeline_id} for timeline {timeline_id}"))
+                .map_err(LoadLocalTimelineError::Load)?;
+            Some(ancestor_timeline)
+        } else {
+            None
+        };
+
+        self.timeline_init_and_sync(timeline_id, resources, None, local_metadata, ancestor, ctx)
+            .await
+            .map_err(LoadLocalTimelineError::Load)
+    }
+
    pub(crate) fn tenant_shard_id(&self) -> TenantShardId {
        self.tenant_shard_id
    }
@@ -2082,7 +2369,7 @@ impl Tenant {
            generation: self.generation.into(),
            secondary_conf: None,
            shard_number: self.shard_identity.number.0,
-            shard_count: self.shard_identity.count.literal(),
+            shard_count: self.shard_identity.count.0,
            shard_stripe_size: self.shard_identity.stripe_size.0,
            tenant_conf: tenant_config,
        }
@@ -2988,7 +3275,7 @@ impl Tenant {

    /// For unit tests, make this visible so that other modules can directly create timelines
    #[cfg(test)]
-    #[tracing::instrument(skip_all, fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug(), %timeline_id))]
+    #[tracing::instrument(fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug(), %timeline_id))]
    pub(crate) async fn bootstrap_timeline_test(
        &self,
        timeline_id: TimelineId,
@@ -3052,7 +3339,7 @@ impl Tenant {
            &self.cancel,
        )
        .await
-        .ok_or_else(|| anyhow::Error::new(TimeoutOrCancel::Cancel))
+        .ok_or_else(|| anyhow::anyhow!("Cancelled"))
        .and_then(|x| x)
    }

@@ -3102,10 +3389,8 @@ impl Tenant {
                );
                let dest_path =
                    &remote_initdb_archive_path(&self.tenant_shard_id.tenant_id, &timeline_id);
-
-                // if this fails, it will get retried by retried control plane requests
                storage
-                    .copy_object(source_path, dest_path, &self.cancel)
+                    .copy_object(source_path, dest_path)
                    .await
                    .context("copy initdb tar")?;
            }
@@ -3499,6 +3784,29 @@ impl Tenant {
    }
 }

+fn remove_timeline_and_uninit_mark(
+    timeline_dir: &Utf8Path,
+    uninit_mark: &Utf8Path,
+) -> anyhow::Result<()> {
+    fs::remove_dir_all(timeline_dir)
+        .or_else(|e| {
+            if e.kind() == std::io::ErrorKind::NotFound {
+                // we can leave the uninit mark without a timeline dir,
+                // just remove the mark then
+                Ok(())
+            } else {
+                Err(e)
+            }
+        })
+        .with_context(|| {
+            format!("Failed to remove unit marked timeline directory {timeline_dir}")
+        })?;
+    fs::remove_file(uninit_mark)
+        .with_context(|| format!("Failed to remove timeline uninit mark file {uninit_mark}"))?;
+
+    Ok(())
+}
+
 /// Create the cluster temporarily in 'initdbpath' directory inside the repository
 /// to get bootstrap data for timeline initialization.
 async fn run_initdb(
@@ -3606,7 +3914,6 @@ pub(crate) mod harness {
    use utils::lsn::Lsn;

    use crate::deletion_queue::mock::MockDeletionQueue;
-    use crate::walredo::apply_neon;
    use crate::{
        config::PageServerConf, repository::Key, tenant::Tenant, walrecord::NeonWalRecord,
    };
@@ -3622,7 +3929,8 @@ pub(crate) mod harness {
        TimelineId::from_array(hex!("AA223344556677881122334455667788"));

    /// Convenience function to create a page image with given string as the only content
-    pub fn test_img(s: &str) -> Bytes {
+    #[allow(non_snake_case)]
+    pub fn TEST_IMG(s: &str) -> Bytes {
        let mut buf = BytesMut::new();
        buf.extend_from_slice(s.as_bytes());
        buf.resize(64, 0);
@@ -3658,6 +3966,13 @@ pub(crate) mod harness {
        }
    }

+    #[cfg(test)]
+    #[derive(Debug)]
+    enum LoadMode {
+        Local,
+        Remote,
+    }
+
    pub struct TenantHarness {
        pub conf: &'static PageServerConf,
        pub tenant_conf: TenantConf,
@@ -3715,7 +4030,6 @@ pub(crate) mod harness {
            std::fs::create_dir_all(&remote_fs_dir).unwrap();
            let config = RemoteStorageConfig {
                storage: RemoteStorageKind::LocalFs(remote_fs_dir.clone()),
-                timeout: RemoteStorageConfig::DEFAULT_TIMEOUT,
            };
            let remote_storage = GenericRemoteStorage::from_config(&config).unwrap();
            let deletion_queue = MockDeletionQueue::new(Some(remote_storage.clone()));
@@ -3739,17 +4053,42 @@ pub(crate) mod harness {
        pub(crate) async fn load(&self) -> (Arc<Tenant>, RequestContext) {
            let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error);
            (
-                self.do_try_load(&ctx)
+                self.try_load(&ctx)
                    .await
                    .expect("failed to load test tenant"),
                ctx,
            )
        }

-        #[instrument(skip_all, fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug()))]
-        pub(crate) async fn do_try_load(
+        /// For tests that specifically want to exercise the local load path, which does
+        /// not use remote storage.
+        pub(crate) async fn try_load_local(
            &self,
            ctx: &RequestContext,
+        ) -> anyhow::Result<Arc<Tenant>> {
+            self.do_try_load(ctx, LoadMode::Local).await
+        }
+
+        /// The 'load' in this function is either a local load or a normal attachment,
+        pub(crate) async fn try_load(&self, ctx: &RequestContext) -> anyhow::Result<Arc<Tenant>> {
+            // If we have nothing in remote storage, must use load_local instead of attach: attach
+            // will error out if there are no timelines.
+            //
+            // See https://github.com/neondatabase/neon/issues/5456 for how we will eliminate
+            // this weird state of a Tenant which exists but doesn't have any timelines.
+            let mode = match self.remote_empty() {
+                true => LoadMode::Local,
+                false => LoadMode::Remote,
+            };
+
+            self.do_try_load(ctx, mode).await
+        }
+
+        #[instrument(skip_all, fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug(), ?mode))]
+        async fn do_try_load(
+            &self,
+            ctx: &RequestContext,
+            mode: LoadMode,
        ) -> anyhow::Result<Arc<Tenant>> {
            let walredo_mgr = Arc::new(WalRedoManager::from(TestRedoManager));

@@ -3770,10 +4109,17 @@ pub(crate) mod harness {
                self.deletion_queue.new_client(),
            ));

-            let preload = tenant
-                .preload(&self.remote_storage, CancellationToken::new())
-                .await?;
-            tenant.attach(Some(preload), SpawnMode::Normal, ctx).await?;
+            match mode {
+                LoadMode::Local => {
+                    tenant.load_local(ctx).await?;
+                }
+                LoadMode::Remote => {
+                    let preload = tenant
+                        .preload(&self.remote_storage, CancellationToken::new())
+                        .await?;
+                    tenant.attach(Some(preload), SpawnMode::Normal, ctx).await?;
+                }
+            }

            tenant.state.send_replace(TenantState::Active);
            for timeline in tenant.timelines.lock().unwrap().values() {
@@ -3782,6 +4128,31 @@ pub(crate) mod harness {
            Ok(tenant)
        }

+        fn remote_empty(&self) -> bool {
+            let tenant_path = self.conf.tenant_path(&self.tenant_shard_id);
+            let remote_tenant_dir = self
+                .remote_fs_dir
+                .join(tenant_path.strip_prefix(&self.conf.workdir).unwrap());
+            if std::fs::metadata(&remote_tenant_dir).is_err() {
+                return true;
+            }
+
+            match std::fs::read_dir(remote_tenant_dir)
+                .unwrap()
+                .flatten()
+                .next()
+            {
+                Some(entry) => {
+                    tracing::debug!(
+                        "remote_empty: not empty, found file {}",
+                        entry.file_name().to_string_lossy(),
+                    );
+                    false
+                }
+                None => true,
+            }
+        }
+
        pub fn timeline_path(&self, timeline_id: &TimelineId) -> Utf8PathBuf {
            self.conf.timeline_path(&self.tenant_shard_id, timeline_id)
        }
@@ -3802,33 +4173,20 @@ pub(crate) mod harness {
            records: Vec<(Lsn, NeonWalRecord)>,
            _pg_version: u32,
        ) -> anyhow::Result<Bytes> {
-            let records_neon = records.iter().all(|r| apply_neon::can_apply_in_neon(&r.1));
-            if records_neon {
-                // For Neon wal records, we can decode without spawning postgres, so do so.
-                let base_img = base_img.expect("Neon WAL redo requires base image").1;
-                let mut page = BytesMut::new();
-                page.extend_from_slice(&base_img);
-                for (_record_lsn, record) in records {
-                    apply_neon::apply_in_neon(&record, key, &mut page)?;
-                }
-                Ok(page.freeze())
-            } else {
-                // We never spawn a postgres walredo process in unit tests: just log what we might have done.
-                let s = format!(
-                    "redo for {} to get to {}, with {} and {} records",
-                    key,
-                    lsn,
-                    if base_img.is_some() {
-                        "base image"
-                    } else {
-                        "no base image"
-                    },
-                    records.len()
-                );
-                println!("{s}");
+            let s = format!(
+                "redo for {} to get to {}, with {} and {} records",
+                key,
+                lsn,
+                if base_img.is_some() {
+                    "base image"
+                } else {
+                    "no base image"
+                },
+                records.len()
+            );
+            println!("{s}");

-                Ok(test_img(&s))
-            }
+            Ok(TEST_IMG(&s))
        }
    }
 }
@@ -3840,6 +4198,7 @@ mod tests {
    use crate::repository::{Key, Value};
    use crate::tenant::harness::*;
    use crate::DEFAULT_PG_VERSION;
+    use crate::METADATA_FILE_NAME;
    use bytes::BytesMut;
    use hex_literal::hex;
    use once_cell::sync::Lazy;
@@ -3861,7 +4220,7 @@ mod tests {
            .put(
                *TEST_KEY,
                Lsn(0x10),
-                &Value::Image(test_img("foo at 0x10")),
+                &Value::Image(TEST_IMG("foo at 0x10")),
                &ctx,
            )
            .await?;
@@ -3873,7 +4232,7 @@ mod tests {
            .put(
                *TEST_KEY,
                Lsn(0x20),
-                &Value::Image(test_img("foo at 0x20")),
+                &Value::Image(TEST_IMG("foo at 0x20")),
                &ctx,
            )
            .await?;
@@ -3882,15 +4241,15 @@ mod tests {

        assert_eq!(
            tline.get(*TEST_KEY, Lsn(0x10), &ctx).await?,
-            test_img("foo at 0x10")
+            TEST_IMG("foo at 0x10")
        );
        assert_eq!(
            tline.get(*TEST_KEY, Lsn(0x1f), &ctx).await?,
-            test_img("foo at 0x10")
+            TEST_IMG("foo at 0x10")
        );
        assert_eq!(
            tline.get(*TEST_KEY, Lsn(0x20), &ctx).await?,
-            test_img("foo at 0x20")
+            TEST_IMG("foo at 0x20")
        );

        Ok(())
@@ -3999,6 +4358,7 @@ mod tests {
        ctx: &RequestContext,
    ) -> anyhow::Result<()> {
        let mut lsn = start_lsn;
+        #[allow(non_snake_case)]
        {
            let writer = tline.writer().await;
            // Create a relation on the timeline
@@ -4006,7 +4366,7 @@ mod tests {
                .put(
                    *TEST_KEY,
                    lsn,
-                    &Value::Image(test_img(&format!("foo at {}", lsn))),
+                    &Value::Image(TEST_IMG(&format!("foo at {}", lsn))),
                    ctx,
                )
                .await?;
@@ -4016,7 +4376,7 @@ mod tests {
                .put(
                    *TEST_KEY,
                    lsn,
-                    &Value::Image(test_img(&format!("foo at {}", lsn))),
+                    &Value::Image(TEST_IMG(&format!("foo at {}", lsn))),
                    ctx,
                )
                .await?;
@@ -4030,7 +4390,7 @@ mod tests {
                .put(
                    *TEST_KEY,
                    lsn,
-                    &Value::Image(test_img(&format!("foo at {}", lsn))),
+                    &Value::Image(TEST_IMG(&format!("foo at {}", lsn))),
                    ctx,
                )
                .await?;
@@ -4040,7 +4400,7 @@ mod tests {
                .put(
                    *TEST_KEY,
                    lsn,
-                    &Value::Image(test_img(&format!("foo at {}", lsn))),
+                    &Value::Image(TEST_IMG(&format!("foo at {}", lsn))),
                    ctx,
                )
                .await?;
@@ -4195,7 +4555,7 @@ mod tests {
        // Broken, as long as you don't need to access data from the parent.
        assert_eq!(
            newtline.get(*TEST_KEY, Lsn(0x70), &ctx).await?,
-            test_img(&format!("foo at {}", Lsn(0x70)))
+            TEST_IMG(&format!("foo at {}", Lsn(0x70)))
        );

        // This needs to traverse to the parent, and fails.
@@ -4272,7 +4632,7 @@ mod tests {
        // Check that the data is still accessible on the branch.
        assert_eq!(
            newtline.get(*TEST_KEY, Lsn(0x50), &ctx).await?,
-            test_img(&format!("foo at {}", Lsn(0x40)))
+            TEST_IMG(&format!("foo at {}", Lsn(0x40)))
        );

        Ok(())
@@ -4381,6 +4741,60 @@ mod tests {
        Ok(())
    }

+    #[tokio::test]
+    async fn corrupt_local_metadata() -> anyhow::Result<()> {
+        const TEST_NAME: &str = "corrupt_metadata";
+        let harness = TenantHarness::create(TEST_NAME)?;
+        let (tenant, ctx) = harness.load().await;
+
+        let tline = tenant
+            .create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)
+            .await?;
+        drop(tline);
+        // so that all uploads finish & we can call harness.try_load() below again
+        tenant
+            .shutdown(Default::default(), true)
+            .instrument(harness.span())
+            .await
+            .ok()
+            .unwrap();
+        drop(tenant);
+
+        // Corrupt local metadata
+        let metadata_path = harness.timeline_path(&TIMELINE_ID).join(METADATA_FILE_NAME);
+        assert!(metadata_path.is_file());
+        let mut metadata_bytes = std::fs::read(&metadata_path)?;
+        assert_eq!(metadata_bytes.len(), 512);
+        metadata_bytes[8] ^= 1;
+        std::fs::write(metadata_path, metadata_bytes)?;
+
+        let err = harness.try_load_local(&ctx).await.expect_err("should fail");
+        // get all the stack with all .context, not only the last one
+        let message = format!("{err:#}");
+        let expected = "failed to load metadata";
+        assert!(
+            message.contains(expected),
+            "message '{message}' expected to contain {expected}"
+        );
+
+        let mut found_error_message = false;
+        let mut err_source = err.source();
+        while let Some(source) = err_source {
+            if source.to_string().contains("metadata checksum mismatch") {
+                found_error_message = true;
+                break;
+            }
+            err_source = source.source();
+        }
+        assert!(
+            found_error_message,
+            "didn't find the corrupted metadata error in {}",
+            message
+        );
+
+        Ok(())
+    }
+
    #[tokio::test]
    async fn test_images() -> anyhow::Result<()> {
        let (tenant, ctx) = TenantHarness::create("test_images")?.load().await;
@@ -4393,7 +4807,7 @@ mod tests {
            .put(
                *TEST_KEY,
                Lsn(0x10),
-                &Value::Image(test_img("foo at 0x10")),
+                &Value::Image(TEST_IMG("foo at 0x10")),
                &ctx,
            )
            .await?;
@@ -4410,7 +4824,7 @@ mod tests {
            .put(
                *TEST_KEY,
                Lsn(0x20),
-                &Value::Image(test_img("foo at 0x20")),
+                &Value::Image(TEST_IMG("foo at 0x20")),
                &ctx,
            )
            .await?;
@@ -4427,7 +4841,7 @@ mod tests {
            .put(
                *TEST_KEY,
                Lsn(0x30),
-                &Value::Image(test_img("foo at 0x30")),
+                &Value::Image(TEST_IMG("foo at 0x30")),
                &ctx,
            )
            .await?;
@@ -4444,7 +4858,7 @@ mod tests {
            .put(
                *TEST_KEY,
                Lsn(0x40),
-                &Value::Image(test_img("foo at 0x40")),
+                &Value::Image(TEST_IMG("foo at 0x40")),
                &ctx,
            )
            .await?;
@@ -4458,23 +4872,23 @@ mod tests {

        assert_eq!(
            tline.get(*TEST_KEY, Lsn(0x10), &ctx).await?,
-            test_img("foo at 0x10")
+            TEST_IMG("foo at 0x10")
        );
        assert_eq!(
            tline.get(*TEST_KEY, Lsn(0x1f), &ctx).await?,
-            test_img("foo at 0x10")
+            TEST_IMG("foo at 0x10")
        );
        assert_eq!(
            tline.get(*TEST_KEY, Lsn(0x20), &ctx).await?,
-            test_img("foo at 0x20")
+            TEST_IMG("foo at 0x20")
        );
        assert_eq!(
            tline.get(*TEST_KEY, Lsn(0x30), &ctx).await?,
-            test_img("foo at 0x30")
+            TEST_IMG("foo at 0x30")
        );
        assert_eq!(
            tline.get(*TEST_KEY, Lsn(0x40), &ctx).await?,
-            test_img("foo at 0x40")
+            TEST_IMG("foo at 0x40")
        );

        Ok(())
@@ -4506,7 +4920,7 @@ mod tests {
                    .put(
                        test_key,
                        lsn,
-                        &Value::Image(test_img(&format!("{} at {}", blknum, lsn))),
+                        &Value::Image(TEST_IMG(&format!("{} at {}", blknum, lsn))),
                        &ctx,
                    )
                    .await?;
@@ -4568,7 +4982,7 @@ mod tests {
                .put(
                    test_key,
                    lsn,
-                    &Value::Image(test_img(&format!("{} at {}", blknum, lsn))),
+                    &Value::Image(TEST_IMG(&format!("{} at {}", blknum, lsn))),
                    &ctx,
                )
                .await?;
@@ -4589,7 +5003,7 @@ mod tests {
                    .put(
                        test_key,
                        lsn,
-                        &Value::Image(test_img(&format!("{} at {}", blknum, lsn))),
+                        &Value::Image(TEST_IMG(&format!("{} at {}", blknum, lsn))),
                        &ctx,
                    )
                    .await?;
@@ -4603,7 +5017,7 @@ mod tests {
                test_key.field6 = blknum as u32;
                assert_eq!(
                    tline.get(test_key, lsn, &ctx).await?,
-                    test_img(&format!("{} at {}", blknum, last_lsn))
+                    TEST_IMG(&format!("{} at {}", blknum, last_lsn))
                );
            }

@@ -4657,7 +5071,7 @@ mod tests {
                .put(
                    test_key,
                    lsn,
-                    &Value::Image(test_img(&format!("{} at {}", blknum, lsn))),
+                    &Value::Image(TEST_IMG(&format!("{} at {}", blknum, lsn))),
                    &ctx,
                )
                .await?;
@@ -4686,7 +5100,7 @@ mod tests {
                    .put(
                        test_key,
                        lsn,
-                        &Value::Image(test_img(&format!("{} at {}", blknum, lsn))),
+                        &Value::Image(TEST_IMG(&format!("{} at {}", blknum, lsn))),
                        &ctx,
                    )
                    .await?;
@@ -4701,7 +5115,7 @@ mod tests {
                test_key.field6 = blknum as u32;
                assert_eq!(
                    tline.get(test_key, lsn, &ctx).await?,
-                    test_img(&format!("{} at {}", blknum, last_lsn))
+                    TEST_IMG(&format!("{} at {}", blknum, last_lsn))
                );
            }

@@ -4763,7 +5177,7 @@ mod tests {
                    .put(
                        test_key,
                        lsn,
-                        &Value::Image(test_img(&format!("{} {} at {}", idx, blknum, lsn))),
+                        &Value::Image(TEST_IMG(&format!("{} {} at {}", idx, blknum, lsn))),
                        &ctx,
                    )
                    .await?;
@@ -4785,7 +5199,7 @@ mod tests {
                test_key.field6 = blknum as u32;
                assert_eq!(
                    tline.get(test_key, *lsn, &ctx).await?,
-                    test_img(&format!("{idx} {blknum} at {lsn}"))
+                    TEST_IMG(&format!("{idx} {blknum} at {lsn}"))
                );
            }
        }
--- a/pageserver/src/tenant/config.rs
+++ b/pageserver/src/tenant/config.rs
@@ -251,7 +251,7 @@ impl LocationConf {
        } else {
            ShardIdentity::new(
                ShardNumber(conf.shard_number),
-                ShardCount::new(conf.shard_count),
+                ShardCount(conf.shard_count),
                ShardStripeSize(conf.shard_stripe_size),
            )?
        };
--- a/pageserver/src/tenant/delete.rs
+++ b/pageserver/src/tenant/delete.rs
@@ -3,7 +3,7 @@ use std::sync::Arc;
 use anyhow::Context;
 use camino::{Utf8Path, Utf8PathBuf};
 use pageserver_api::{models::TenantState, shard::TenantShardId};
-use remote_storage::{GenericRemoteStorage, RemotePath, TimeoutOrCancel};
+use remote_storage::{GenericRemoteStorage, RemotePath};
 use tokio::sync::OwnedMutexGuard;
 use tokio_util::sync::CancellationToken;
 use tracing::{error, instrument, Instrument};
@@ -84,17 +84,17 @@ async fn create_remote_delete_mark(
            let data = bytes::Bytes::from_static(data);
            let stream = futures::stream::once(futures::future::ready(Ok(data)));
            remote_storage
-                .upload(stream, 0, &remote_mark_path, None, cancel)
+                .upload(stream, 0, &remote_mark_path, None)
                .await
        },
-        TimeoutOrCancel::caused_by_cancel,
+        |_e| false,
        FAILED_UPLOAD_WARN_THRESHOLD,
        FAILED_REMOTE_OP_RETRIES,
        "mark_upload",
        cancel,
    )
    .await
-    .ok_or_else(|| anyhow::Error::new(TimeoutOrCancel::Cancel))
+    .ok_or_else(|| anyhow::anyhow!("Cancelled"))
    .and_then(|x| x)
    .context("mark_upload")?;

@@ -184,15 +184,15 @@ async fn remove_tenant_remote_delete_mark(
    if let Some(remote_storage) = remote_storage {
        let path = remote_tenant_delete_mark_path(conf, tenant_shard_id)?;
        backoff::retry(
-            || async { remote_storage.delete(&path, cancel).await },
-            TimeoutOrCancel::caused_by_cancel,
+            || async { remote_storage.delete(&path).await },
+            |_e| false,
            FAILED_UPLOAD_WARN_THRESHOLD,
            FAILED_REMOTE_OP_RETRIES,
            "remove_tenant_remote_delete_mark",
            cancel,
        )
        .await
-        .ok_or_else(|| anyhow::Error::new(TimeoutOrCancel::Cancel))
+        .ok_or_else(|| anyhow::anyhow!("Cancelled"))
        .and_then(|x| x)
        .context("remove_tenant_remote_delete_mark")?;
    }
--- a/pageserver/src/tenant/disk_btree.rs
+++ b/pageserver/src/tenant/disk_btree.rs
@@ -36,6 +36,7 @@ use crate::{
 pub const VALUE_SZ: usize = 5;
 pub const MAX_VALUE: u64 = 0x007f_ffff_ffff;

+#[allow(dead_code)]
 pub const PAGE_SZ: usize = 8192;

 #[derive(Clone, Copy, Debug)]
--- a/pageserver/src/tenant/ephemeral_file.rs
+++ b/pageserver/src/tenant/ephemeral_file.rs
@@ -6,7 +6,6 @@ use crate::context::RequestContext;
 use crate::page_cache::{self, PAGE_SZ};
 use crate::tenant::block_io::{BlockCursor, BlockLease, BlockReader};
 use crate::virtual_file::{self, VirtualFile};
-use bytes::BytesMut;
 use camino::Utf8PathBuf;
 use pageserver_api::shard::TenantShardId;
 use std::cmp::min;
@@ -27,10 +26,7 @@ pub struct EphemeralFile {
    /// An ephemeral file is append-only.
    /// We keep the last page, which can still be modified, in [`Self::mutable_tail`].
    /// The other pages, which can no longer be modified, are accessed through the page cache.
-    ///
-    /// None <=> IO is ongoing.
-    /// Size is fixed to PAGE_SZ at creation time and must not be changed.
-    mutable_tail: Option<BytesMut>,
+    mutable_tail: [u8; PAGE_SZ],
 }

 impl EphemeralFile {
@@ -64,7 +60,7 @@ impl EphemeralFile {
            _timeline_id: timeline_id,
            file,
            len: 0,
-            mutable_tail: Some(BytesMut::zeroed(PAGE_SZ)),
+            mutable_tail: [0u8; PAGE_SZ],
        })
    }

@@ -107,13 +103,7 @@ impl EphemeralFile {
            };
        } else {
            debug_assert_eq!(blknum as u64, self.len / PAGE_SZ as u64);
-            Ok(BlockLease::EphemeralFileMutableTail(
-                self.mutable_tail
-                    .as_deref()
-                    .expect("we're not doing IO, it must be Some()")
-                    .try_into()
-                    .expect("we ensure that it's always PAGE_SZ"),
-            ))
+            Ok(BlockLease::EphemeralFileMutableTail(&self.mutable_tail))
        }
    }

@@ -145,27 +135,21 @@ impl EphemeralFile {
            ) -> Result<(), io::Error> {
                let mut src_remaining = src;
                while !src_remaining.is_empty() {
-                    let dst_remaining = &mut self
-                        .ephemeral_file
-                        .mutable_tail
-                        .as_deref_mut()
-                        .expect("IO is not yet ongoing")[self.off..];
+                    let dst_remaining = &mut self.ephemeral_file.mutable_tail[self.off..];
                    let n = min(dst_remaining.len(), src_remaining.len());
                    dst_remaining[..n].copy_from_slice(&src_remaining[..n]);
                    self.off += n;
                    src_remaining = &src_remaining[n..];
                    if self.off == PAGE_SZ {
-                        let mutable_tail = std::mem::take(&mut self.ephemeral_file.mutable_tail)
-                            .expect("IO is not yet ongoing");
-                        let (mutable_tail, res) = self
+                        match self
                            .ephemeral_file
                            .file
-                            .write_all_at(mutable_tail, self.blknum as u64 * PAGE_SZ as u64)
-                            .await;
-                        // TODO: If we panic before we can put the mutable_tail back, subsequent calls will fail.
-                        // I.e., the IO isn't retryable if we panic.
-                        self.ephemeral_file.mutable_tail = Some(mutable_tail);
-                        match res {
+                            .write_all_at(
+                                &self.ephemeral_file.mutable_tail,
+                                self.blknum as u64 * PAGE_SZ as u64,
+                            )
+                            .await
+                        {
                            Ok(_) => {
                                // Pre-warm the page cache with what we just wrote.
                                // This isn't necessary for coherency/correctness, but it's how we've always done it.
@@ -185,12 +169,7 @@ impl EphemeralFile {
                                    Ok(page_cache::ReadBufResult::NotFound(mut write_guard)) => {
                                        let buf: &mut [u8] = write_guard.deref_mut();
                                        debug_assert_eq!(buf.len(), PAGE_SZ);
-                                        buf.copy_from_slice(
-                                            self.ephemeral_file
-                                                .mutable_tail
-                                                .as_deref()
-                                                .expect("IO is not ongoing"),
-                                        );
+                                        buf.copy_from_slice(&self.ephemeral_file.mutable_tail);
                                        let _ = write_guard.mark_valid();
                                        // pre-warm successful
                                    }
@@ -202,11 +181,7 @@ impl EphemeralFile {
                                // Zero the buffer for re-use.
                                // Zeroing is critical for correcntess because the write_blob code below
                                // and similarly read_blk expect zeroed pages.
-                                self.ephemeral_file
-                                    .mutable_tail
-                                    .as_deref_mut()
-                                    .expect("IO is not ongoing")
-                                    .fill(0);
+                                self.ephemeral_file.mutable_tail.fill(0);
                                // This block is done, move to next one.
                                self.blknum += 1;
                                self.off = 0;
--- a/pageserver/src/tenant/metadata.rs
+++ b/pageserver/src/tenant/metadata.rs
@@ -294,6 +294,17 @@ pub enum LoadMetadataError {
    Decode(#[from] anyhow::Error),
 }

+pub fn load_metadata(
+    conf: &'static PageServerConf,
+    tenant_shard_id: &TenantShardId,
+    timeline_id: &TimelineId,
+) -> Result<TimelineMetadata, LoadMetadataError> {
+    let metadata_path = conf.metadata_path(tenant_shard_id, timeline_id);
+    let metadata_bytes = std::fs::read(metadata_path)?;
+
+    Ok(TimelineMetadata::from_bytes(&metadata_bytes)?)
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
--- a/pageserver/src/tenant/mgr.rs
+++ b/pageserver/src/tenant/mgr.rs
@@ -2,7 +2,6 @@
 //! page server.

 use camino::{Utf8DirEntry, Utf8Path, Utf8PathBuf};
-use futures::stream::StreamExt;
 use itertools::Itertools;
 use pageserver_api::key::Key;
 use pageserver_api::models::ShardParameters;
@@ -794,7 +793,7 @@ pub(crate) async fn set_new_tenant_config(
    info!("configuring tenant {tenant_id}");
    let tenant = get_tenant(tenant_shard_id, true)?;

-    if !tenant.tenant_shard_id().shard_count.is_unsharded() {
+    if tenant.tenant_shard_id().shard_count > ShardCount(0) {
        // Note that we use ShardParameters::default below.
        return Err(SetNewTenantConfigError::Other(anyhow::anyhow!(
            "This API may only be used on single-sharded tenants, use the /location_config API for sharded tenants"
@@ -1376,7 +1375,7 @@ impl TenantManager {
        result
    }

-    #[instrument(skip_all, fields(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), new_shard_count=%new_shard_count.literal()))]
+    #[instrument(skip_all, fields(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), new_shard_count=%new_shard_count.0))]
    pub(crate) async fn shard_split(
        &self,
        tenant_shard_id: TenantShardId,
@@ -1386,10 +1385,11 @@ impl TenantManager {
        let tenant = get_tenant(tenant_shard_id, true)?;

        // Plan: identify what the new child shards will be
-        if new_shard_count.count() <= tenant_shard_id.shard_count.count() {
+        let effective_old_shard_count = std::cmp::max(tenant_shard_id.shard_count.0, 1);
+        if new_shard_count <= ShardCount(effective_old_shard_count) {
            anyhow::bail!("Requested shard count is not an increase");
        }
-        let expansion_factor = new_shard_count.count() / tenant_shard_id.shard_count.count();
+        let expansion_factor = new_shard_count.0 / effective_old_shard_count;
        if !expansion_factor.is_power_of_two() {
            anyhow::bail!("Requested split is not a power of two");
        }
@@ -1439,10 +1439,8 @@ impl TenantManager {
            }
        };

-        // Optimization: hardlink layers from the parent into the children, so that they don't have to
-        // re-download & duplicate the data referenced in their initial IndexPart
-        self.shard_split_hardlink(parent, child_shards.clone())
-            .await?;
+        // TODO: hardlink layers from the parent into the child shard directories so that they don't immediately re-download
+        // TODO: erase the dentries from the parent

        // Take a snapshot of where the parent's WAL ingest had got to: we will wait for
        // child shards to reach this point.
@@ -1481,11 +1479,10 @@ impl TenantManager {

        // Phase 4: wait for child chards WAL ingest to catch up to target LSN
        for child_shard_id in &child_shards {
-            let child_shard_id = *child_shard_id;
            let child_shard = {
                let locked = TENANTS.read().unwrap();
                let peek_slot =
-                    tenant_map_peek_slot(&locked, &child_shard_id, TenantSlotPeekMode::Read)?;
+                    tenant_map_peek_slot(&locked, child_shard_id, TenantSlotPeekMode::Read)?;
                peek_slot.and_then(|s| s.get_attached()).cloned()
            };
            if let Some(t) = child_shard {
@@ -1520,7 +1517,7 @@ impl TenantManager {
            }
        }

-        // Phase 5: Shut down the parent shard, and erase it from disk
+        // Phase 5: Shut down the parent shard.
        let (_guard, progress) = completion::channel();
        match parent.shutdown(progress, false).await {
            Ok(()) => {}
@@ -1528,24 +1525,6 @@ impl TenantManager {
                other.wait().await;
            }
        }
-        let local_tenant_directory = self.conf.tenant_path(&tenant_shard_id);
-        let tmp_path = safe_rename_tenant_dir(&local_tenant_directory)
-            .await
-            .with_context(|| format!("local tenant directory {local_tenant_directory:?} rename"))?;
-        task_mgr::spawn(
-            task_mgr::BACKGROUND_RUNTIME.handle(),
-            TaskKind::MgmtRequest,
-            None,
-            None,
-            "tenant_files_delete",
-            false,
-            async move {
-                fs::remove_dir_all(tmp_path.as_path())
-                    .await
-                    .with_context(|| format!("tenant directory {:?} deletion", tmp_path))
-            },
-        );
-
        parent_slot_guard.drop_old_value()?;

        // Phase 6: Release the InProgress on the parent shard
@@ -1553,130 +1532,6 @@ impl TenantManager {

        Ok(child_shards)
    }
-
-    /// Part of [`Self::shard_split`]: hard link parent shard layers into child shards, as an optimization
-    /// to avoid the children downloading them again.
-    ///
-    /// For each resident layer in the parent shard, we will hard link it into all of the child shards.
-    async fn shard_split_hardlink(
-        &self,
-        parent_shard: &Tenant,
-        child_shards: Vec<TenantShardId>,
-    ) -> anyhow::Result<()> {
-        debug_assert_current_span_has_tenant_id();
-
-        let parent_path = self.conf.tenant_path(parent_shard.get_tenant_shard_id());
-        let (parent_timelines, parent_layers) = {
-            let mut parent_layers = Vec::new();
-            let timelines = parent_shard.timelines.lock().unwrap().clone();
-            let parent_timelines = timelines.keys().cloned().collect::<Vec<_>>();
-            for timeline in timelines.values() {
-                let timeline_layers = timeline
-                    .layers
-                    .read()
-                    .await
-                    .resident_layers()
-                    .collect::<Vec<_>>()
-                    .await;
-                for layer in timeline_layers {
-                    let relative_path = layer
-                        .local_path()
-                        .strip_prefix(&parent_path)
-                        .context("Removing prefix from parent layer path")?;
-                    parent_layers.push(relative_path.to_owned());
-                }
-            }
-            debug_assert!(
-                !parent_layers.is_empty(),
-                "shutdown cannot empty the layermap"
-            );
-            (parent_timelines, parent_layers)
-        };
-
-        let mut child_prefixes = Vec::new();
-        let mut create_dirs = Vec::new();
-
-        for child in child_shards {
-            let child_prefix = self.conf.tenant_path(&child);
-            create_dirs.push(child_prefix.clone());
-            create_dirs.extend(
-                parent_timelines
-                    .iter()
-                    .map(|t| self.conf.timeline_path(&child, t)),
-            );
-
-            child_prefixes.push(child_prefix);
-        }
-
-        // Since we will do a large number of small filesystem metadata operations, batch them into
-        // spawn_blocking calls rather than doing each one as a tokio::fs round-trip.
-        let jh = tokio::task::spawn_blocking(move || -> anyhow::Result<usize> {
-            for dir in &create_dirs {
-                if let Err(e) = std::fs::create_dir_all(dir) {
-                    // Ignore AlreadyExists errors, drop out on all other errors
-                    match e.kind() {
-                        std::io::ErrorKind::AlreadyExists => {}
-                        _ => {
-                            return Err(anyhow::anyhow!(e).context(format!("Creating {dir}")));
-                        }
-                    }
-                }
-            }
-
-            for child_prefix in child_prefixes {
-                for relative_layer in &parent_layers {
-                    let parent_path = parent_path.join(relative_layer);
-                    let child_path = child_prefix.join(relative_layer);
-                    if let Err(e) = std::fs::hard_link(&parent_path, &child_path) {
-                        match e.kind() {
-                            std::io::ErrorKind::AlreadyExists => {}
-                            std::io::ErrorKind::NotFound => {
-                                tracing::info!(
-                                    "Layer {} not found during hard-linking, evicted during split?",
-                                    relative_layer
-                                );
-                            }
-                            _ => {
-                                return Err(anyhow::anyhow!(e).context(format!(
-                                    "Hard linking {relative_layer} into {child_prefix}"
-                                )))
-                            }
-                        }
-                    }
-                }
-            }
-
-            // Durability is not required for correctness, but if we crashed during split and
-            // then came restarted with empty timeline dirs, it would be very inefficient to
-            // re-populate from remote storage.
-            for dir in create_dirs {
-                if let Err(e) = crashsafe::fsync(&dir) {
-                    // Something removed a newly created timeline dir out from underneath us?  Extremely
-                    // unexpected, but not worth panic'ing over as this whole function is just an
-                    // optimization.
-                    tracing::warn!("Failed to fsync directory {dir}: {e}")
-                }
-            }
-
-            Ok(parent_layers.len())
-        });
-
-        match jh.await {
-            Ok(Ok(layer_count)) => {
-                tracing::info!(count = layer_count, "Hard linked layers into child shards");
-            }
-            Ok(Err(e)) => {
-                // This is an optimization, so we tolerate failure.
-                tracing::warn!("Error hard-linking layers, proceeding anyway: {e}")
-            }
-            Err(e) => {
-                // This is something totally unexpected like a panic, so bail out.
-                anyhow::bail!("Error joining hard linking task: {e}");
-            }
-        }
-
-        Ok(())
-    }
 }

 #[derive(Debug, thiserror::Error)]
--- a/pageserver/src/tenant/remote_timeline_client.rs
+++ b/pageserver/src/tenant/remote_timeline_client.rs
@@ -196,12 +196,14 @@ pub(crate) use upload::upload_initdb_dir;
 use utils::backoff::{
    self, exponential_backoff, DEFAULT_BASE_BACKOFF_SECONDS, DEFAULT_MAX_BACKOFF_SECONDS,
 };
+use utils::timeout::{timeout_cancellable, TimeoutCancellableError};

 use std::collections::{HashMap, VecDeque};
 use std::sync::atomic::{AtomicU32, Ordering};
 use std::sync::{Arc, Mutex};
+use std::time::Duration;

-use remote_storage::{DownloadError, GenericRemoteStorage, RemotePath, TimeoutOrCancel};
+use remote_storage::{DownloadError, GenericRemoteStorage, RemotePath};
 use std::ops::DerefMut;
 use tracing::{debug, error, info, instrument, warn};
 use tracing::{info_span, Instrument};
@@ -261,6 +263,11 @@ pub(crate) const INITDB_PRESERVED_PATH: &str = "initdb-preserved.tar.zst";
 /// Default buffer size when interfacing with [`tokio::fs::File`].
 pub(crate) const BUFFER_SIZE: usize = 32 * 1024;

+/// This timeout is intended to deal with hangs in lower layers, e.g. stuck TCP flows.  It is not
+/// intended to be snappy enough for prompt shutdown, as we have a CancellationToken for that.
+pub(crate) const UPLOAD_TIMEOUT: Duration = Duration::from_secs(120);
+pub(crate) const DOWNLOAD_TIMEOUT: Duration = Duration::from_secs(120);
+
 pub enum MaybeDeletedIndexPart {
    IndexPart(IndexPart),
    Deleted(IndexPart),
@@ -324,6 +331,40 @@ pub struct RemoteTimelineClient {
    cancel: CancellationToken,
 }

+/// Wrapper for timeout_cancellable that flattens result and converts TimeoutCancellableError to anyhow.
+///
+/// This is a convenience for the various upload functions.  In future
+/// the anyhow::Error result should be replaced with a more structured type that
+/// enables callers to avoid handling shutdown as an error.
+async fn upload_cancellable<F>(cancel: &CancellationToken, future: F) -> anyhow::Result<()>
+where
+    F: std::future::Future<Output = anyhow::Result<()>>,
+{
+    match timeout_cancellable(UPLOAD_TIMEOUT, cancel, future).await {
+        Ok(Ok(())) => Ok(()),
+        Ok(Err(e)) => Err(e),
+        Err(TimeoutCancellableError::Timeout) => Err(anyhow::anyhow!("Timeout")),
+        Err(TimeoutCancellableError::Cancelled) => Err(anyhow::anyhow!("Shutting down")),
+    }
+}
+/// Wrapper for timeout_cancellable that flattens result and converts TimeoutCancellableError to DownloaDError.
+async fn download_cancellable<F, R>(
+    cancel: &CancellationToken,
+    future: F,
+) -> Result<R, DownloadError>
+where
+    F: std::future::Future<Output = Result<R, DownloadError>>,
+{
+    match timeout_cancellable(DOWNLOAD_TIMEOUT, cancel, future).await {
+        Ok(Ok(r)) => Ok(r),
+        Ok(Err(e)) => Err(e),
+        Err(TimeoutCancellableError::Timeout) => {
+            Err(DownloadError::Other(anyhow::anyhow!("Timed out")))
+        }
+        Err(TimeoutCancellableError::Cancelled) => Err(DownloadError::Cancelled),
+    }
+}
+
 impl RemoteTimelineClient {
    ///
    /// Create a remote storage client for given timeline
@@ -1009,7 +1050,7 @@ impl RemoteTimelineClient {
            &self.cancel,
        )
        .await
-        .ok_or_else(|| anyhow::Error::new(TimeoutOrCancel::Cancel))
+        .ok_or_else(|| anyhow::anyhow!("Cancelled"))
        .and_then(|x| x)?;

        // all good, disarm the guard and mark as success
@@ -1041,14 +1082,14 @@ impl RemoteTimelineClient {
                upload::preserve_initdb_archive(&self.storage_impl, tenant_id, timeline_id, cancel)
                    .await
            },
-            TimeoutOrCancel::caused_by_cancel,
+            |_e| false,
            FAILED_DOWNLOAD_WARN_THRESHOLD,
            FAILED_REMOTE_OP_RETRIES,
            "preserve_initdb_tar_zst",
            &cancel.clone(),
        )
        .await
-        .ok_or_else(|| anyhow::Error::new(TimeoutOrCancel::Cancel))
+        .ok_or_else(|| anyhow::anyhow!("Cancellled"))
        .and_then(|x| x)
        .context("backing up initdb archive")?;
        Ok(())
@@ -1110,7 +1151,7 @@ impl RemoteTimelineClient {
        let remaining = download_retry(
            || async {
                self.storage_impl
-                    .list_files(Some(&timeline_storage_path), None, &cancel)
+                    .list_files(Some(&timeline_storage_path), None)
                    .await
            },
            "list remaining files",
@@ -1404,10 +1445,6 @@ impl RemoteTimelineClient {
                Ok(()) => {
                    break;
                }
-                Err(e) if TimeoutOrCancel::caused_by_cancel(&e) => {
-                    // loop around to do the proper stopping
-                    continue;
-                }
                Err(e) => {
                    let retries = task.retries.fetch_add(1, Ordering::SeqCst);

--- a/pageserver/src/tenant/remote_timeline_client/download.rs
+++ b/pageserver/src/tenant/remote_timeline_client/download.rs
@@ -11,14 +11,16 @@ use camino::{Utf8Path, Utf8PathBuf};
 use pageserver_api::shard::TenantShardId;
 use tokio::fs::{self, File, OpenOptions};
 use tokio::io::{AsyncSeekExt, AsyncWriteExt};
-use tokio_util::io::StreamReader;
 use tokio_util::sync::CancellationToken;
 use tracing::warn;
+use utils::timeout::timeout_cancellable;
 use utils::{backoff, crashsafe};

 use crate::config::PageServerConf;
 use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
-use crate::tenant::remote_timeline_client::{remote_layer_path, remote_timelines_path};
+use crate::tenant::remote_timeline_client::{
+    download_cancellable, remote_layer_path, remote_timelines_path, DOWNLOAD_TIMEOUT,
+};
 use crate::tenant::storage_layer::LayerFileName;
 use crate::tenant::Generation;
 use crate::virtual_file::on_fatal_io_error;
@@ -81,13 +83,15 @@ pub async fn download_layer_file<'a>(
                .with_context(|| format!("create a destination file for layer '{temp_file_path}'"))
                .map_err(DownloadError::Other)?;

-            let download = storage
-                .download(&remote_path, cancel)
+            // Cancellation safety: it is safe to cancel this future, because it isn't writing to a local
+            // file: the write to local file doesn't start until after the request header is returned
+            // and we start draining the body stream below
+            let download = download_cancellable(cancel, storage.download(&remote_path))
                .await
                .with_context(|| {
                    format!(
-                        "open a download stream for layer with remote storage path '{remote_path:?}'"
-                    )
+                    "open a download stream for layer with remote storage path '{remote_path:?}'"
+                )
                })
                .map_err(DownloadError::Other)?;

@@ -96,26 +100,43 @@ pub async fn download_layer_file<'a>(

            let mut reader = tokio_util::io::StreamReader::new(download.download_stream);

-            let bytes_amount = tokio::io::copy_buf(&mut reader, &mut destination_file)
-                .await
-                .with_context(|| format!(
+            // Cancellation safety: it is safe to cancel this future because it is writing into a temporary file,
+            // and we will unlink the temporary file if there is an error.  This unlink is important because we
+            // are in a retry loop, and we wouldn't want to leave behind a rogue write I/O to a file that
+            // we will imminiently try and write to again.
+            let bytes_amount: u64 = match timeout_cancellable(
+                DOWNLOAD_TIMEOUT,
+                cancel,
+                tokio::io::copy_buf(&mut reader, &mut destination_file),
+            )
+            .await
+            .with_context(|| {
+                format!(
                    "download layer at remote path '{remote_path:?}' into file {temp_file_path:?}"
-                ))
-                .map_err(DownloadError::Other);
-
-            match bytes_amount {
-                Ok(bytes_amount) => {
-                    let destination_file = destination_file.into_inner();
-                    Ok((destination_file, bytes_amount))
-                }
+                )
+            })
+            .map_err(DownloadError::Other)?
+            {
+                Ok(b) => Ok(b),
                Err(e) => {
+                    // Remove incomplete files: on restart Timeline would do this anyway, but we must
+                    // do it here for the retry case.
                    if let Err(e) = tokio::fs::remove_file(&temp_file_path).await {
                        on_fatal_io_error(&e, &format!("Removing temporary file {temp_file_path}"));
                    }
-
                    Err(e)
                }
            }
+            .with_context(|| {
+                format!(
+                    "download layer at remote path '{remote_path:?}' into file {temp_file_path:?}"
+                )
+            })
+            .map_err(DownloadError::Other)?;
+
+            let destination_file = destination_file.into_inner();
+
+            Ok((destination_file, bytes_amount))
        },
        &format!("download {remote_path:?}"),
        cancel,
@@ -197,11 +218,9 @@ pub async fn list_remote_timelines(

    let listing = download_retry_forever(
        || {
-            storage.list(
-                Some(&remote_path),
-                ListingMode::WithDelimiter,
-                None,
+            download_cancellable(
                &cancel,
+                storage.list(Some(&remote_path), ListingMode::WithDelimiter, None),
            )
        },
        &format!("list timelines for {tenant_shard_id}"),
@@ -240,23 +259,26 @@ async fn do_download_index_part(
    index_generation: Generation,
    cancel: &CancellationToken,
 ) -> Result<IndexPart, DownloadError> {
+    use futures::stream::StreamExt;
+
    let remote_path = remote_index_path(tenant_shard_id, timeline_id, index_generation);

    let index_part_bytes = download_retry_forever(
        || async {
-            let download = storage.download(&remote_path, cancel).await?;
+            // Cancellation: if is safe to cancel this future because we're just downloading into
+            // a memory buffer, not touching local disk.
+            let index_part_download =
+                download_cancellable(cancel, storage.download(&remote_path)).await?;

-            let mut bytes = Vec::new();
-
-            let stream = download.download_stream;
-            let mut stream = StreamReader::new(stream);
-
-            tokio::io::copy_buf(&mut stream, &mut bytes)
-                .await
-                .with_context(|| format!("download index part at {remote_path:?}"))
-                .map_err(DownloadError::Other)?;
-
-            Ok(bytes)
+            let mut index_part_bytes = Vec::new();
+            let mut stream = std::pin::pin!(index_part_download.download_stream);
+            while let Some(chunk) = stream.next().await {
+                let chunk = chunk
+                    .with_context(|| format!("download index part at {remote_path:?}"))
+                    .map_err(DownloadError::Other)?;
+                index_part_bytes.extend_from_slice(&chunk[..]);
+            }
+            Ok(index_part_bytes)
        },
        &format!("download {remote_path:?}"),
        cancel,
@@ -351,7 +373,7 @@ pub(super) async fn download_index_part(
    let index_prefix = remote_index_path(tenant_shard_id, timeline_id, Generation::none());

    let indices = download_retry(
-        || async { storage.list_files(Some(&index_prefix), None, cancel).await },
+        || async { storage.list_files(Some(&index_prefix), None).await },
        "list index_part files",
        cancel,
    )
@@ -424,10 +446,11 @@ pub(crate) async fn download_initdb_tar_zst(
                .with_context(|| format!("tempfile creation {temp_path}"))
                .map_err(DownloadError::Other)?;

-            let download = match storage.download(&remote_path, cancel).await {
+            let download = match download_cancellable(cancel, storage.download(&remote_path)).await
+            {
                Ok(dl) => dl,
                Err(DownloadError::NotFound) => {
-                    storage.download(&remote_preserved_path, cancel).await?
+                    download_cancellable(cancel, storage.download(&remote_preserved_path)).await?
                }
                Err(other) => Err(other)?,
            };
@@ -437,7 +460,6 @@ pub(crate) async fn download_initdb_tar_zst(
            // TODO: this consumption of the response body should be subject to timeout + cancellation, but
            // not without thinking carefully about how to recover safely from cancelling a write to
            // local storage (e.g. by writing into a temp file as we do in download_layer)
-            // FIXME: flip the weird error wrapping
            tokio::io::copy_buf(&mut download, &mut writer)
                .await
                .with_context(|| format!("download initdb.tar.zst at {remote_path:?}"))
--- a/pageserver/src/tenant/remote_timeline_client/upload.rs
+++ b/pageserver/src/tenant/remote_timeline_client/upload.rs
@@ -16,7 +16,7 @@ use crate::{
    config::PageServerConf,
    tenant::remote_timeline_client::{
        index::IndexPart, remote_index_path, remote_initdb_archive_path,
-        remote_initdb_preserved_archive_path, remote_path,
+        remote_initdb_preserved_archive_path, remote_path, upload_cancellable,
    },
 };
 use remote_storage::{GenericRemoteStorage, TimeTravelError};
@@ -49,15 +49,16 @@ pub(crate) async fn upload_index_part<'a>(
    let index_part_bytes = bytes::Bytes::from(index_part_bytes);

    let remote_path = remote_index_path(tenant_shard_id, timeline_id, generation);
-    storage
-        .upload_storage_object(
+    upload_cancellable(
+        cancel,
+        storage.upload_storage_object(
            futures::stream::once(futures::future::ready(Ok(index_part_bytes))),
            index_part_size,
            &remote_path,
-            cancel,
-        )
-        .await
-        .with_context(|| format!("upload index part for '{tenant_shard_id} / {timeline_id}'"))
+        ),
+    )
+    .await
+    .with_context(|| format!("upload index part for '{tenant_shard_id} / {timeline_id}'"))
 }

 /// Attempts to upload given layer files.
@@ -114,10 +115,11 @@ pub(super) async fn upload_timeline_layer<'a>(

    let reader = tokio_util::io::ReaderStream::with_capacity(source_file, super::BUFFER_SIZE);

-    storage
-        .upload(reader, fs_size, &storage_path, None, cancel)
+    upload_cancellable(cancel, storage.upload(reader, fs_size, &storage_path, None))
        .await
-        .with_context(|| format!("upload layer from local path '{source_path}'"))
+        .with_context(|| format!("upload layer from local path '{source_path}'"))?;
+
+    Ok(())
 }

 /// Uploads the given `initdb` data to the remote storage.
@@ -137,10 +139,12 @@ pub(crate) async fn upload_initdb_dir(
    let file = tokio_util::io::ReaderStream::with_capacity(initdb_tar_zst, super::BUFFER_SIZE);

    let remote_path = remote_initdb_archive_path(tenant_id, timeline_id);
-    storage
-        .upload_storage_object(file, size as usize, &remote_path, cancel)
-        .await
-        .with_context(|| format!("upload initdb dir for '{tenant_id} / {timeline_id}'"))
+    upload_cancellable(
+        cancel,
+        storage.upload_storage_object(file, size as usize, &remote_path),
+    )
+    .await
+    .with_context(|| format!("upload initdb dir for '{tenant_id} / {timeline_id}'"))
 }

 pub(crate) async fn preserve_initdb_archive(
@@ -151,8 +155,7 @@ pub(crate) async fn preserve_initdb_archive(
 ) -> anyhow::Result<()> {
    let source_path = remote_initdb_archive_path(tenant_id, timeline_id);
    let dest_path = remote_initdb_preserved_archive_path(tenant_id, timeline_id);
-    storage
-        .copy_object(&source_path, &dest_path, cancel)
+    upload_cancellable(cancel, storage.copy_object(&source_path, &dest_path))
        .await
        .with_context(|| format!("backing up initdb archive for '{tenant_id} / {timeline_id}'"))
 }
--- a/pageserver/src/tenant/secondary.rs
+++ b/pageserver/src/tenant/secondary.rs
@@ -150,7 +150,7 @@ impl SecondaryTenant {
            generation: None,
            secondary_conf: Some(conf),
            shard_number: self.tenant_shard_id.shard_number.0,
-            shard_count: self.tenant_shard_id.shard_count.literal(),
+            shard_count: self.tenant_shard_id.shard_count.0,
            shard_stripe_size: self.shard_identity.stripe_size.0,
            tenant_conf: tenant_conf.into(),
        }
--- a/pageserver/src/tenant/secondary/downloader.rs
+++ b/pageserver/src/tenant/secondary/downloader.rs
@@ -523,13 +523,12 @@ impl<'a> TenantDownloader<'a> {
        tracing::debug!("Downloading heatmap for secondary tenant",);

        let heatmap_path = remote_heatmap_path(tenant_shard_id);
-        let cancel = &self.secondary_state.cancel;

        let heatmap_bytes = backoff::retry(
            || async {
                let download = self
                    .remote_storage
-                    .download(&heatmap_path, cancel)
+                    .download(&heatmap_path)
                    .await
                    .map_err(UpdateError::from)?;
                let mut heatmap_bytes = Vec::new();
@@ -541,7 +540,7 @@ impl<'a> TenantDownloader<'a> {
            FAILED_DOWNLOAD_WARN_THRESHOLD,
            FAILED_REMOTE_OP_RETRIES,
            "download heatmap",
-            cancel,
+            &self.secondary_state.cancel,
        )
        .await
        .ok_or_else(|| UpdateError::Cancelled)
--- a/pageserver/src/tenant/secondary/heatmap_uploader.rs
+++ b/pageserver/src/tenant/secondary/heatmap_uploader.rs
@@ -21,17 +21,18 @@ use futures::Future;
 use md5;
 use pageserver_api::shard::TenantShardId;
 use rand::Rng;
-use remote_storage::{GenericRemoteStorage, TimeoutOrCancel};
+use remote_storage::GenericRemoteStorage;

 use super::{
-    heatmap::HeatMapTenant,
    scheduler::{self, JobGenerator, RunningJob, SchedulingResult, TenantBackgroundJobs},
-    CommandRequest, UploadCommand,
+    CommandRequest,
 };
 use tokio_util::sync::CancellationToken;
 use tracing::{info_span, instrument, Instrument};
 use utils::{backoff, completion::Barrier, yielding_loop::yielding_loop};

+use super::{heatmap::HeatMapTenant, UploadCommand};
+
 pub(super) async fn heatmap_uploader_task(
    tenant_manager: Arc<TenantManager>,
    remote_storage: GenericRemoteStorage,
@@ -416,10 +417,10 @@ async fn upload_tenant_heatmap(
        || async {
            let bytes = futures::stream::once(futures::future::ready(Ok(bytes.clone())));
            remote_storage
-                .upload_storage_object(bytes, size, &path, cancel)
+                .upload_storage_object(bytes, size, &path)
                .await
        },
-        TimeoutOrCancel::caused_by_cancel,
+        |_| false,
        3,
        u32::MAX,
        "Uploading heatmap",
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -4849,7 +4849,7 @@ mod tests {
            TenantHarness::create("two_layer_eviction_attempts_at_the_same_time").unwrap();

        let ctx = any_context();
-        let tenant = harness.do_try_load(&ctx).await.unwrap();
+        let tenant = harness.try_load(&ctx).await.unwrap();
        let timeline = tenant
            .create_test_timeline(TimelineId::generate(), Lsn(0x10), 14, &ctx)
            .await
--- a/pageserver/src/tenant/timeline/eviction_task.rs
+++ b/pageserver/src/tenant/timeline/eviction_task.rs
@@ -196,13 +196,13 @@ impl Timeline {
            ControlFlow::Continue(()) => (),
        }

+        #[allow(dead_code)]
        #[derive(Debug, Default)]
        struct EvictionStats {
            candidates: usize,
            evicted: usize,
            errors: usize,
            not_evictable: usize,
-            #[allow(dead_code)]
            skipped_for_shutdown: usize,
        }

--- a/pageserver/src/virtual_file.rs
+++ b/pageserver/src/virtual_file.rs
@@ -582,37 +582,24 @@ impl VirtualFile {
    }

    // Copied from https://doc.rust-lang.org/1.72.0/src/std/os/unix/fs.rs.html#219-235
-    pub async fn write_all_at<B: BoundedBuf>(
-        &self,
-        buf: B,
-        mut offset: u64,
-    ) -> (B::Buf, Result<(), Error>) {
-        let buf_len = buf.bytes_init();
-        if buf_len == 0 {
-            return (Slice::into_inner(buf.slice_full()), Ok(()));
-        }
-        let mut buf = buf.slice(0..buf_len);
+    pub async fn write_all_at(&self, mut buf: &[u8], mut offset: u64) -> Result<(), Error> {
        while !buf.is_empty() {
-            // TODO: push `buf` further down
-            match self.write_at(&buf, offset).await {
+            match self.write_at(buf, offset).await {
                Ok(0) => {
-                    return (
-                        Slice::into_inner(buf),
-                        Err(Error::new(
-                            std::io::ErrorKind::WriteZero,
-                            "failed to write whole buffer",
-                        )),
-                    );
+                    return Err(Error::new(
+                        std::io::ErrorKind::WriteZero,
+                        "failed to write whole buffer",
+                    ));
                }
                Ok(n) => {
-                    buf = buf.slice(n..);
+                    buf = &buf[n..];
                    offset += n as u64;
                }
                Err(ref e) if e.kind() == std::io::ErrorKind::Interrupted => {}
-                Err(e) => return (Slice::into_inner(buf), Err(e)),
+                Err(e) => return Err(e),
            }
        }
-        (Slice::into_inner(buf), Ok(()))
+        Ok(())
    }

    /// Writes `buf.slice(0..buf.bytes_init())`.
@@ -1077,19 +1064,10 @@ mod tests {
                MaybeVirtualFile::File(file) => file.read_exact_at(&mut buf, offset).map(|()| buf),
            }
        }
-        async fn write_all_at<B: BoundedBuf>(&self, buf: B, offset: u64) -> Result<(), Error> {
+        async fn write_all_at(&self, buf: &[u8], offset: u64) -> Result<(), Error> {
            match self {
-                MaybeVirtualFile::VirtualFile(file) => {
-                    let (_buf, res) = file.write_all_at(buf, offset).await;
-                    res
-                }
-                MaybeVirtualFile::File(file) => {
-                    let buf_len = buf.bytes_init();
-                    if buf_len == 0 {
-                        return Ok(());
-                    }
-                    file.write_all_at(&buf.slice(0..buf_len), offset)
-                }
+                MaybeVirtualFile::VirtualFile(file) => file.write_all_at(buf, offset).await,
+                MaybeVirtualFile::File(file) => file.write_all_at(buf, offset),
            }
        }
        async fn seek(&mut self, pos: SeekFrom) -> Result<u64, Error> {
@@ -1236,8 +1214,8 @@ mod tests {
                .to_owned(),
        )
        .await?;
-        file_b.write_all_at(b"BAR".to_vec(), 3).await?;
-        file_b.write_all_at(b"FOO".to_vec(), 0).await?;
+        file_b.write_all_at(b"BAR", 3).await?;
+        file_b.write_all_at(b"FOO", 0).await?;

        assert_eq!(file_b.read_string_at(2, 3).await?, "OBA");

--- a/pageserver/src/walingest.rs
+++ b/pageserver/src/walingest.rs
@@ -1695,22 +1695,22 @@ mod tests {
        let mut m = tline.begin_modification(Lsn(0x20));
        walingest.put_rel_creation(&mut m, TESTREL_A, &ctx).await?;
        walingest
-            .put_rel_page_image(&mut m, TESTREL_A, 0, test_img("foo blk 0 at 2"), &ctx)
+            .put_rel_page_image(&mut m, TESTREL_A, 0, TEST_IMG("foo blk 0 at 2"), &ctx)
            .await?;
        m.commit(&ctx).await?;
        let mut m = tline.begin_modification(Lsn(0x30));
        walingest
-            .put_rel_page_image(&mut m, TESTREL_A, 0, test_img("foo blk 0 at 3"), &ctx)
+            .put_rel_page_image(&mut m, TESTREL_A, 0, TEST_IMG("foo blk 0 at 3"), &ctx)
            .await?;
        m.commit(&ctx).await?;
        let mut m = tline.begin_modification(Lsn(0x40));
        walingest
-            .put_rel_page_image(&mut m, TESTREL_A, 1, test_img("foo blk 1 at 4"), &ctx)
+            .put_rel_page_image(&mut m, TESTREL_A, 1, TEST_IMG("foo blk 1 at 4"), &ctx)
            .await?;
        m.commit(&ctx).await?;
        let mut m = tline.begin_modification(Lsn(0x50));
        walingest
-            .put_rel_page_image(&mut m, TESTREL_A, 2, test_img("foo blk 2 at 5"), &ctx)
+            .put_rel_page_image(&mut m, TESTREL_A, 2, TEST_IMG("foo blk 2 at 5"), &ctx)
            .await?;
        m.commit(&ctx).await?;

@@ -1751,46 +1751,46 @@ mod tests {
            tline
                .get_rel_page_at_lsn(TESTREL_A, 0, Version::Lsn(Lsn(0x20)), false, &ctx)
                .await?,
-            test_img("foo blk 0 at 2")
+            TEST_IMG("foo blk 0 at 2")
        );

        assert_eq!(
            tline
                .get_rel_page_at_lsn(TESTREL_A, 0, Version::Lsn(Lsn(0x30)), false, &ctx)
                .await?,
-            test_img("foo blk 0 at 3")
+            TEST_IMG("foo blk 0 at 3")
        );

        assert_eq!(
            tline
                .get_rel_page_at_lsn(TESTREL_A, 0, Version::Lsn(Lsn(0x40)), false, &ctx)
                .await?,
-            test_img("foo blk 0 at 3")
+            TEST_IMG("foo blk 0 at 3")
        );
        assert_eq!(
            tline
                .get_rel_page_at_lsn(TESTREL_A, 1, Version::Lsn(Lsn(0x40)), false, &ctx)
                .await?,
-            test_img("foo blk 1 at 4")
+            TEST_IMG("foo blk 1 at 4")
        );

        assert_eq!(
            tline
                .get_rel_page_at_lsn(TESTREL_A, 0, Version::Lsn(Lsn(0x50)), false, &ctx)
                .await?,
-            test_img("foo blk 0 at 3")
+            TEST_IMG("foo blk 0 at 3")
        );
        assert_eq!(
            tline
                .get_rel_page_at_lsn(TESTREL_A, 1, Version::Lsn(Lsn(0x50)), false, &ctx)
                .await?,
-            test_img("foo blk 1 at 4")
+            TEST_IMG("foo blk 1 at 4")
        );
        assert_eq!(
            tline
                .get_rel_page_at_lsn(TESTREL_A, 2, Version::Lsn(Lsn(0x50)), false, &ctx)
                .await?,
-            test_img("foo blk 2 at 5")
+            TEST_IMG("foo blk 2 at 5")
        );

        // Truncate last block
@@ -1812,13 +1812,13 @@ mod tests {
            tline
                .get_rel_page_at_lsn(TESTREL_A, 0, Version::Lsn(Lsn(0x60)), false, &ctx)
                .await?,
-            test_img("foo blk 0 at 3")
+            TEST_IMG("foo blk 0 at 3")
        );
        assert_eq!(
            tline
                .get_rel_page_at_lsn(TESTREL_A, 1, Version::Lsn(Lsn(0x60)), false, &ctx)
                .await?,
-            test_img("foo blk 1 at 4")
+            TEST_IMG("foo blk 1 at 4")
        );

        // should still see the truncated block with older LSN
@@ -1832,7 +1832,7 @@ mod tests {
            tline
                .get_rel_page_at_lsn(TESTREL_A, 2, Version::Lsn(Lsn(0x50)), false, &ctx)
                .await?,
-            test_img("foo blk 2 at 5")
+            TEST_IMG("foo blk 2 at 5")
        );

        // Truncate to zero length
@@ -1851,7 +1851,7 @@ mod tests {
        // Extend from 0 to 2 blocks, leaving a gap
        let mut m = tline.begin_modification(Lsn(0x70));
        walingest
-            .put_rel_page_image(&mut m, TESTREL_A, 1, test_img("foo blk 1"), &ctx)
+            .put_rel_page_image(&mut m, TESTREL_A, 1, TEST_IMG("foo blk 1"), &ctx)
            .await?;
        m.commit(&ctx).await?;
        assert_eq!(
@@ -1870,13 +1870,13 @@ mod tests {
            tline
                .get_rel_page_at_lsn(TESTREL_A, 1, Version::Lsn(Lsn(0x70)), false, &ctx)
                .await?,
-            test_img("foo blk 1")
+            TEST_IMG("foo blk 1")
        );

        // Extend a lot more, leaving a big gap that spans across segments
        let mut m = tline.begin_modification(Lsn(0x80));
        walingest
-            .put_rel_page_image(&mut m, TESTREL_A, 1500, test_img("foo blk 1500"), &ctx)
+            .put_rel_page_image(&mut m, TESTREL_A, 1500, TEST_IMG("foo blk 1500"), &ctx)
            .await?;
        m.commit(&ctx).await?;
        assert_eq!(
@@ -1897,7 +1897,7 @@ mod tests {
            tline
                .get_rel_page_at_lsn(TESTREL_A, 1500, Version::Lsn(Lsn(0x80)), false, &ctx)
                .await?,
-            test_img("foo blk 1500")
+            TEST_IMG("foo blk 1500")
        );

        Ok(())
@@ -1915,7 +1915,7 @@ mod tests {

        let mut m = tline.begin_modification(Lsn(0x20));
        walingest
-            .put_rel_page_image(&mut m, TESTREL_A, 0, test_img("foo blk 0 at 2"), &ctx)
+            .put_rel_page_image(&mut m, TESTREL_A, 0, TEST_IMG("foo blk 0 at 2"), &ctx)
            .await?;
        m.commit(&ctx).await?;

@@ -1952,7 +1952,7 @@ mod tests {
        // Re-create it
        let mut m = tline.begin_modification(Lsn(0x40));
        walingest
-            .put_rel_page_image(&mut m, TESTREL_A, 0, test_img("foo blk 0 at 4"), &ctx)
+            .put_rel_page_image(&mut m, TESTREL_A, 0, TEST_IMG("foo blk 0 at 4"), &ctx)
            .await?;
        m.commit(&ctx).await?;

@@ -1990,7 +1990,7 @@ mod tests {
        for blkno in 0..relsize {
            let data = format!("foo blk {} at {}", blkno, Lsn(0x20));
            walingest
-                .put_rel_page_image(&mut m, TESTREL_A, blkno, test_img(&data), &ctx)
+                .put_rel_page_image(&mut m, TESTREL_A, blkno, TEST_IMG(&data), &ctx)
                .await?;
        }
        m.commit(&ctx).await?;
@@ -2028,7 +2028,7 @@ mod tests {
                tline
                    .get_rel_page_at_lsn(TESTREL_A, blkno, Version::Lsn(lsn), false, &ctx)
                    .await?,
-                test_img(&data)
+                TEST_IMG(&data)
            );
        }

@@ -2055,7 +2055,7 @@ mod tests {
                tline
                    .get_rel_page_at_lsn(TESTREL_A, blkno, Version::Lsn(Lsn(0x60)), false, &ctx)
                    .await?,
-                test_img(&data)
+                TEST_IMG(&data)
            );
        }

@@ -2073,7 +2073,7 @@ mod tests {
                tline
                    .get_rel_page_at_lsn(TESTREL_A, blkno, Version::Lsn(Lsn(0x50)), false, &ctx)
                    .await?,
-                test_img(&data)
+                TEST_IMG(&data)
            );
        }

@@ -2084,7 +2084,7 @@ mod tests {
        for blkno in 0..relsize {
            let data = format!("foo blk {} at {}", blkno, lsn);
            walingest
-                .put_rel_page_image(&mut m, TESTREL_A, blkno, test_img(&data), &ctx)
+                .put_rel_page_image(&mut m, TESTREL_A, blkno, TEST_IMG(&data), &ctx)
                .await?;
        }
        m.commit(&ctx).await?;
@@ -2109,7 +2109,7 @@ mod tests {
                tline
                    .get_rel_page_at_lsn(TESTREL_A, blkno, Version::Lsn(Lsn(0x80)), false, &ctx)
                    .await?,
-                test_img(&data)
+                TEST_IMG(&data)
            );
        }

@@ -2130,7 +2130,7 @@ mod tests {
        for blknum in 0..RELSEG_SIZE + 1 {
            lsn += 0x10;
            let mut m = tline.begin_modification(Lsn(lsn));
-            let img = test_img(&format!("foo blk {} at {}", blknum, Lsn(lsn)));
+            let img = TEST_IMG(&format!("foo blk {} at {}", blknum, Lsn(lsn)));
            walingest
                .put_rel_page_image(&mut m, TESTREL_A, blknum as BlockNumber, img, &ctx)
                .await?;
--- a/pageserver/src/walrecord.rs
+++ b/pageserver/src/walrecord.rs
@@ -44,11 +44,6 @@ pub enum NeonWalRecord {
        moff: MultiXactOffset,
        members: Vec<MultiXactMember>,
    },
-    /// Update the map of AUX files, either writing or dropping an entry
-    AuxFile {
-        file_path: String,
-        content: Option<Bytes>,
-    },
 }

 impl NeonWalRecord {
--- a/pageserver/src/walredo.rs
+++ b/pageserver/src/walredo.rs
@@ -22,7 +22,7 @@
 mod process;

 /// Code to apply [`NeonWalRecord`]s.
-pub(crate) mod apply_neon;
+mod apply_neon;

 use crate::config::PageServerConf;
 use crate::metrics::{
--- a/pageserver/src/walredo/apply_neon.rs
+++ b/pageserver/src/walredo/apply_neon.rs
@@ -1,8 +1,7 @@
-use crate::pgdatadir_mapping::AuxFilesDirectory;
 use crate::walrecord::NeonWalRecord;
 use anyhow::Context;
 use byteorder::{ByteOrder, LittleEndian};
-use bytes::{BufMut, BytesMut};
+use bytes::BytesMut;
 use pageserver_api::key::{key_to_rel_block, key_to_slru_block, Key};
 use pageserver_api::reltag::SlruKind;
 use postgres_ffi::pg_constants;
@@ -13,7 +12,6 @@ use postgres_ffi::v14::nonrelfile_utils::{
 };
 use postgres_ffi::BLCKSZ;
 use tracing::*;
-use utils::bin_ser::BeSer;

 /// Can this request be served by neon redo functions
 /// or we need to pass it to wal-redo postgres process?
@@ -232,72 +230,6 @@ pub(crate) fn apply_in_neon(
                LittleEndian::write_u32(&mut page[memberoff..memberoff + 4], member.xid);
            }
        }
-        NeonWalRecord::AuxFile { file_path, content } => {
-            let mut dir = AuxFilesDirectory::des(page)?;
-            dir.upsert(file_path.clone(), content.clone());
-
-            page.clear();
-            let mut writer = page.writer();
-            dir.ser_into(&mut writer)?;
-        }
    }
    Ok(())
 }
-
-#[cfg(test)]
-mod test {
-    use bytes::Bytes;
-    use pageserver_api::key::AUX_FILES_KEY;
-
-    use super::*;
-    use std::collections::HashMap;
-
-    use crate::{pgdatadir_mapping::AuxFilesDirectory, walrecord::NeonWalRecord};
-
-    /// Test [`apply_in_neon`]'s handling of NeonWalRecord::AuxFile
-    #[test]
-    fn apply_aux_file_deltas() -> anyhow::Result<()> {
-        let base_dir = AuxFilesDirectory {
-            files: HashMap::from([
-                ("two".to_string(), Bytes::from_static(b"content0")),
-                ("three".to_string(), Bytes::from_static(b"contentX")),
-            ]),
-        };
-        let base_image = AuxFilesDirectory::ser(&base_dir)?;
-
-        let deltas = vec![
-            // Insert
-            NeonWalRecord::AuxFile {
-                file_path: "one".to_string(),
-                content: Some(Bytes::from_static(b"content1")),
-            },
-            // Update
-            NeonWalRecord::AuxFile {
-                file_path: "two".to_string(),
-                content: Some(Bytes::from_static(b"content99")),
-            },
-            // Delete
-            NeonWalRecord::AuxFile {
-                file_path: "three".to_string(),
-                content: None,
-            },
-        ];
-
-        let file_path = AUX_FILES_KEY;
-        let mut page = BytesMut::from_iter(base_image);
-
-        for record in deltas {
-            apply_in_neon(&record, file_path, &mut page)?;
-        }
-
-        let reconstructed = AuxFilesDirectory::des(&page)?;
-        let expect = HashMap::from([
-            ("one".to_string(), Bytes::from_static(b"content1")),
-            ("two".to_string(), Bytes::from_static(b"content99")),
-        ]);
-
-        assert_eq!(reconstructed.files, expect);
-
-        Ok(())
-    }
-}
--- a/pgxn/neon_test_utils/neon_test_utils--1.0.sql
+++ b/pgxn/neon_test_utils/neon_test_utils--1.0.sql
@@ -7,24 +7,6 @@ AS 'MODULE_PATHNAME', 'test_consume_xids'
 LANGUAGE C STRICT
 PARALLEL UNSAFE;

-CREATE FUNCTION test_consume_cpu(seconds int)
-RETURNS VOID
-AS 'MODULE_PATHNAME', 'test_consume_cpu'
-LANGUAGE C STRICT
-PARALLEL UNSAFE;
-
-CREATE FUNCTION test_consume_memory(megabytes int)
-RETURNS VOID
-AS 'MODULE_PATHNAME', 'test_consume_memory'
-LANGUAGE C STRICT
-PARALLEL UNSAFE;
-
-CREATE FUNCTION test_release_memory(megabytes int DEFAULT NULL)
-RETURNS VOID
-AS 'MODULE_PATHNAME', 'test_release_memory'
-LANGUAGE C
-PARALLEL UNSAFE;
-
 CREATE FUNCTION clear_buffer_cache()
 RETURNS VOID
 AS 'MODULE_PATHNAME', 'clear_buffer_cache'
--- a/pgxn/neon_test_utils/neon_test_utils.control
+++ b/pgxn/neon_test_utils/neon_test_utils.control
@@ -3,4 +3,3 @@ comment = 'helpers for neon testing and debugging'
 default_version = '1.0'
 module_pathname = '$libdir/neon_test_utils'
 relocatable = true
-trusted = true
--- a/pgxn/neon_test_utils/neontest.c
+++ b/pgxn/neon_test_utils/neontest.c
@@ -21,12 +21,10 @@
 #include "miscadmin.h"
 #include "storage/buf_internals.h"
 #include "storage/bufmgr.h"
-#include "storage/fd.h"
 #include "utils/builtins.h"
 #include "utils/pg_lsn.h"
 #include "utils/rel.h"
 #include "utils/varlena.h"
-#include "utils/wait_event.h"
 #include "../neon/pagestore_client.h"

 PG_MODULE_MAGIC;
@@ -34,9 +32,6 @@ PG_MODULE_MAGIC;
 extern void _PG_init(void);

 PG_FUNCTION_INFO_V1(test_consume_xids);
-PG_FUNCTION_INFO_V1(test_consume_cpu);
-PG_FUNCTION_INFO_V1(test_consume_memory);
-PG_FUNCTION_INFO_V1(test_release_memory);
 PG_FUNCTION_INFO_V1(clear_buffer_cache);
 PG_FUNCTION_INFO_V1(get_raw_page_at_lsn);
 PG_FUNCTION_INFO_V1(get_raw_page_at_lsn_ex);
@@ -102,119 +97,6 @@ test_consume_xids(PG_FUNCTION_ARGS)
 	PG_RETURN_VOID();
 }

-
-/*
- * test_consume_cpu(seconds int). Keeps one CPU busy for the given number of seconds.
- */
-Datum
-test_consume_cpu(PG_FUNCTION_ARGS)
-{
-	int32		seconds = PG_GETARG_INT32(0);
-	TimestampTz start;
-	uint64		total_iterations = 0;
-
-	start = GetCurrentTimestamp();
-
-	for (;;)
-	{
-		TimestampTz elapsed;
-
-		elapsed = GetCurrentTimestamp() - start;
-		if (elapsed > (TimestampTz) seconds * USECS_PER_SEC)
-			break;
-
-		/* keep spinning */
-		for (int i = 0; i < 1000000; i++)
-			total_iterations++;
-		elog(DEBUG2, "test_consume_cpu(): %lu iterations in total", total_iterations);
-
-		CHECK_FOR_INTERRUPTS();
-	}
-
-	PG_RETURN_VOID();
-}
-
-static MemoryContext consume_cxt = NULL;
-static slist_head consumed_memory_chunks;
-static int64 num_memory_chunks;
-
-/*
- * test_consume_memory(megabytes int).
- *
- * Consume given amount of memory. The allocation is made in TopMemoryContext,
- * so it outlives the function, until you call test_release_memory to
- * explicitly release it, or close the session.
- */
-Datum
-test_consume_memory(PG_FUNCTION_ARGS)
-{
-	int32		megabytes = PG_GETARG_INT32(0);
-
-	/*
-	 * Consume the memory in a new memory context, so that it's convenient to
-	 * release and to display it separately in a possible memory context dump.
-	 */
-	if (consume_cxt == NULL)
-		consume_cxt = AllocSetContextCreate(TopMemoryContext,
-											"test_consume_memory",
-											ALLOCSET_DEFAULT_SIZES);
-
-	for (int32 i = 0; i < megabytes; i++)
-	{
-		char	   *p;
-
-		p = MemoryContextAllocZero(consume_cxt, 1024 * 1024);
-
-		/* touch the memory, so that it's really allocated by the kernel */
-		for (int j = 0; j < 1024 * 1024; j += 1024)
-			p[j] = j % 0xFF;
-
-		slist_push_head(&consumed_memory_chunks, (slist_node *) p);
-		num_memory_chunks++;
-	}
-
-	PG_RETURN_VOID();
-}
-
-/*
- * test_release_memory(megabytes int). NULL releases all
- */
-Datum
-test_release_memory(PG_FUNCTION_ARGS)
-{
-	TimestampTz start;
-
-	if (PG_ARGISNULL(0))
-	{
-		if (consume_cxt)
-		{
-			MemoryContextDelete(consume_cxt);
-			consume_cxt = NULL;
-			num_memory_chunks = 0;
-		}
-	}
-	else
-	{
-		int32		chunks_to_release = PG_GETARG_INT32(0);
-
-		if (chunks_to_release > num_memory_chunks)
-		{
-			elog(WARNING, "only %lu MB is consumed, releasing it all", num_memory_chunks);
-			chunks_to_release = num_memory_chunks;
-		}
-
-		for (int32 i = 0; i < chunks_to_release; i++)
-		{
-			slist_node *chunk = slist_pop_head_node(&consumed_memory_chunks);
-
-			pfree(chunk);
-			num_memory_chunks--;
-		}
-	}
-
-	PG_RETURN_VOID();
-}
-
 /*
 * Flush the buffer cache, evicting all pages that are not currently pinned.
 */
--- a/proxy/src/context/parquet.rs
+++ b/proxy/src/context/parquet.rs
@@ -13,7 +13,7 @@ use parquet::{
    },
    record::RecordWriter,
 };
-use remote_storage::{GenericRemoteStorage, RemotePath, RemoteStorageConfig, TimeoutOrCancel};
+use remote_storage::{GenericRemoteStorage, RemotePath, RemoteStorageConfig};
 use tokio::{sync::mpsc, time};
 use tokio_util::sync::CancellationToken;
 use tracing::{debug, info, Span};
@@ -314,23 +314,20 @@ async fn upload_parquet(
    let path = RemotePath::from_string(&format!(
        "{year:04}/{month:02}/{day:02}/{hour:02}/requests_{id}.parquet"
    ))?;
-    let cancel = CancellationToken::new();
    backoff::retry(
        || async {
            let stream = futures::stream::once(futures::future::ready(Ok(data.clone())));
-            storage
-                .upload(stream, data.len(), &path, None, &cancel)
-                .await
+            storage.upload(stream, data.len(), &path, None).await
        },
-        TimeoutOrCancel::caused_by_cancel,
+        |_e| false,
        FAILED_UPLOAD_WARN_THRESHOLD,
        FAILED_UPLOAD_MAX_RETRIES,
        "request_data_upload",
        // we don't want cancellation to interrupt here, so we make a dummy cancel token
-        &cancel,
+        &CancellationToken::new(),
    )
    .await
-    .ok_or_else(|| anyhow::Error::new(TimeoutOrCancel::Cancel))
+    .ok_or_else(|| anyhow::anyhow!("Cancelled"))
    .and_then(|x| x)
    .context("request_data_upload")?;

@@ -416,8 +413,7 @@ mod tests {
                    )
                    .unwrap(),
                    max_keys_per_list_response: DEFAULT_MAX_KEYS_PER_LIST_RESPONSE,
-                }),
-                timeout: RemoteStorageConfig::DEFAULT_TIMEOUT,
+                })
            })
        );
        assert_eq!(parquet_upload.parquet_upload_row_group_size, 100);
@@ -470,7 +466,6 @@ mod tests {
    ) -> Vec<(u64, usize, i64)> {
        let remote_storage_config = RemoteStorageConfig {
            storage: RemoteStorageKind::LocalFs(tmpdir.to_path_buf()),
-            timeout: std::time::Duration::from_secs(120),
        };
        let storage = GenericRemoteStorage::from_config(&remote_storage_config).unwrap();

--- a/proxy/src/proxy/connect_compute.rs
+++ b/proxy/src/proxy/connect_compute.rs
@@ -122,24 +122,25 @@ where

    error!(error = ?err, "could not connect to compute node");

-    let node_info = if !node_info.cached() {
-        // If we just recieved this from cplane and dodn't get it from cache, we shouldn't retry.
-        // Do not need to retrieve a new node_info, just return the old one.
-        if !err.should_retry(num_retries) {
-            return Err(err.into());
-        }
-        node_info
-    } else {
-        // if we failed to connect, it's likely that the compute node was suspended, wake a new compute node
-        info!("compute node's state has likely changed; requesting a wake-up");
-        ctx.latency_timer.cache_miss();
-        let old_node_info = invalidate_cache(node_info);
-        let mut node_info = wake_compute(&mut num_retries, ctx, user_info).await?;
-        node_info.reuse_settings(old_node_info);
+    let node_info =
+        if err.get_error_kind() == crate::error::ErrorKind::Postgres || !node_info.cached() {
+            // If the error is Postgres, that means that we managed to connect to the compute node, but there was an error.
+            // Do not need to retrieve a new node_info, just return the old one.
+            if !err.should_retry(num_retries) {
+                return Err(err.into());
+            }
+            node_info
+        } else {
+            // if we failed to connect, it's likely that the compute node was suspended, wake a new compute node
+            info!("compute node's state has likely changed; requesting a wake-up");
+            ctx.latency_timer.cache_miss();
+            let old_node_info = invalidate_cache(node_info);
+            let mut node_info = wake_compute(&mut num_retries, ctx, user_info).await?;
+            node_info.reuse_settings(old_node_info);

-        mechanism.update_connect_config(&mut node_info.config);
-        node_info
-    };
+            mechanism.update_connect_config(&mut node_info.config);
+            node_info
+        };

    // now that we have a new node, try connect to it repeatedly.
    // this can error for a few reasons, for instance:
--- a/proxy/src/proxy/tests.rs
+++ b/proxy/src/proxy/tests.rs
@@ -375,6 +375,8 @@ enum ConnectAction {
    Connect,
    Retry,
    Fail,
+    RetryPg,
+    FailPg,
 }

 #[derive(Clone)]
@@ -464,6 +466,14 @@ impl ConnectMechanism for TestConnectMechanism {
                retryable: false,
                kind: ErrorKind::Compute,
            }),
+            ConnectAction::FailPg => Err(TestConnectError {
+                retryable: false,
+                kind: ErrorKind::Postgres,
+            }),
+            ConnectAction::RetryPg => Err(TestConnectError {
+                retryable: true,
+                kind: ErrorKind::Postgres,
+            }),
            x => panic!("expecting action {:?}, connect is called instead", x),
        }
    }
@@ -562,6 +572,32 @@ async fn connect_to_compute_retry() {
    mechanism.verify();
 }

+#[tokio::test]
+async fn connect_to_compute_retry_pg() {
+    let _ = env_logger::try_init();
+    use ConnectAction::*;
+    let mut ctx = RequestMonitoring::test();
+    let mechanism = TestConnectMechanism::new(vec![Wake, RetryPg, Connect]);
+    let user_info = helper_create_connect_info(&mechanism);
+    connect_to_compute(&mut ctx, &mechanism, &user_info, false)
+        .await
+        .unwrap();
+    mechanism.verify();
+}
+
+#[tokio::test]
+async fn connect_to_compute_fail_pg() {
+    let _ = env_logger::try_init();
+    use ConnectAction::*;
+    let mut ctx = RequestMonitoring::test();
+    let mechanism = TestConnectMechanism::new(vec![Wake, FailPg]);
+    let user_info = helper_create_connect_info(&mechanism);
+    connect_to_compute(&mut ctx, &mechanism, &user_info, false)
+        .await
+        .unwrap_err();
+    mechanism.verify();
+}
+
 /// Test that we don't retry if the error is not retryable.
 #[tokio::test]
 async fn connect_to_compute_non_retry_1() {
--- a/s3_scrubber/src/cloud_admin_api.rs
+++ b/s3_scrubber/src/cloud_admin_api.rs
@@ -1,7 +1,11 @@
+#![allow(unused)]
+
+use std::str::FromStr;
 use std::time::Duration;

 use chrono::{DateTime, Utc};
 use hex::FromHex;
+use pageserver::tenant::Tenant;
 use reqwest::{header, Client, StatusCode, Url};
 use serde::Deserialize;
 use tokio::sync::Semaphore;
@@ -286,7 +290,7 @@ impl CloudAdminApiClient {
                    tokio::time::sleep(Duration::from_millis(500)).await;
                    continue;
                }
-                _status => {
+                status => {
                    return Err(Error::new(
                        "List active projects".to_string(),
                        ErrorKind::ResponseStatus(response.status()),
--- a/safekeeper/src/auth.rs
+++ b/safekeeper/src/auth.rs
@@ -12,12 +12,8 @@ pub fn check_permission(claims: &Claims, tenant_id: Option<TenantId>) -> Result<
            }
            Ok(())
        }
-        (Scope::PageServerApi | Scope::GenerationsApi, _) => Err(AuthError(
-            format!(
-                "JWT scope '{:?}' is ineligible for Safekeeper auth",
-                claims.scope
-            )
-            .into(),
+        (Scope::PageServerApi, _) => Err(AuthError(
+            "PageServerApi scope makes no sense for Safekeeper".into(),
        )),
        (Scope::SafekeeperData, _) => Ok(()),
    }
--- a/safekeeper/src/wal_backup.rs
+++ b/safekeeper/src/wal_backup.rs
@@ -511,11 +511,7 @@ async fn backup_object(

    let file = tokio_util::io::ReaderStream::with_capacity(file, BUFFER_SIZE);

-    let cancel = CancellationToken::new();
-
-    storage
-        .upload_storage_object(file, size, target_file, &cancel)
-        .await
+    storage.upload_storage_object(file, size, target_file).await
 }

 pub async fn read_object(
@@ -530,10 +526,8 @@ pub async fn read_object(

    info!("segment download about to start from remote path {file_path:?} at offset {offset}");

-    let cancel = CancellationToken::new();
-
    let download = storage
-        .download_storage_object(Some((offset, None)), file_path, &cancel)
+        .download_storage_object(Some((offset, None)), file_path)
        .await
        .with_context(|| {
            format!("Failed to open WAL segment download stream for remote path {file_path:?}")
@@ -565,8 +559,7 @@ pub async fn delete_timeline(ttid: &TenantTimelineId) -> Result<()> {
    // Note: listing segments might take a long time if there are many of them.
    // We don't currently have http requests timeout cancellation, but if/once
    // we have listing should get streaming interface to make progress.
-
-    let cancel = CancellationToken::new(); // not really used
+    let token = CancellationToken::new(); // not really used
    backoff::retry(
        || async {
            // Do list-delete in batch_size batches to make progress even if there a lot of files.
@@ -574,7 +567,7 @@ pub async fn delete_timeline(ttid: &TenantTimelineId) -> Result<()> {
            // I'm not sure deleting while iterating is expected in s3.
            loop {
                let files = storage
-                    .list_files(Some(&remote_path), Some(batch_size), &cancel)
+                    .list_files(Some(&remote_path), Some(batch_size))
                    .await?;
                if files.is_empty() {
                    return Ok(()); // done
@@ -587,15 +580,14 @@ pub async fn delete_timeline(ttid: &TenantTimelineId) -> Result<()> {
                    files.first().unwrap().object_name().unwrap_or(""),
                    files.last().unwrap().object_name().unwrap_or("")
                );
-                storage.delete_objects(&files, &cancel).await?;
+                storage.delete_objects(&files).await?;
            }
        },
-        // consider TimeoutOrCancel::caused_by_cancel when using cancellation
        |_| false,
        3,
        10,
        "executing WAL segments deletion batch",
-        &cancel,
+        &token,
    )
    .await
    .ok_or_else(|| anyhow::anyhow!("canceled"))
@@ -625,12 +617,7 @@ pub async fn copy_s3_segments(

    let remote_path = RemotePath::new(&relative_dst_path)?;

-    let cancel = CancellationToken::new();
-
-    let files = storage
-        .list_files(Some(&remote_path), None, &cancel)
-        .await?;
-
+    let files = storage.list_files(Some(&remote_path), None).await?;
    let uploaded_segments = &files
        .iter()
        .filter_map(|file| file.object_name().map(ToOwned::to_owned))
@@ -658,7 +645,7 @@ pub async fn copy_s3_segments(
        let from = RemotePath::new(&relative_src_path.join(&segment_name))?;
        let to = RemotePath::new(&relative_dst_path.join(&segment_name))?;

-        storage.copy_object(&from, &to, &cancel).await?;
+        storage.copy_object(&from, &to).await?;
    }

    info!(
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -3967,24 +3967,32 @@ def list_files_to_compare(pgdata_dir: Path) -> List[str]:

 # pg is the existing and running compute node, that we want to compare with a basebackup
 def check_restored_datadir_content(test_output_dir: Path, env: NeonEnv, endpoint: Endpoint):
+    pg_bin = PgBin(test_output_dir, env.pg_distrib_dir, env.pg_version)
+
    # Get the timeline ID. We need it for the 'basebackup' command
    timeline_id = TimelineId(endpoint.safe_psql("SHOW neon.timeline_id")[0][0])

-    # many tests already checkpoint, but do it just in case
-    with closing(endpoint.connect()) as conn:
-        with conn.cursor() as cur:
-            cur.execute("CHECKPOINT")
+    # wait for all pageserver shards to catch up
+    pre_shutdown = wait_for_last_flush_lsn(env, endpoint, endpoint.tenant_id, timeline_id)

-    # wait for pageserver to catch up
-    wait_for_last_flush_lsn(env, endpoint, endpoint.tenant_id, timeline_id)
    # stop postgres to ensure that files won't change
    endpoint.stop()

+    # Read the shutdown checkpoint's LSN
+    pg_controldata_path = os.path.join(pg_bin.pg_bin_path, "pg_controldata")
+    cmd = f"{pg_controldata_path} -D {endpoint.pgdata_dir}"
+    result = subprocess.run(cmd, capture_output=True, text=True, shell=True)
+    checkpoint_lsn = re.findall(
+        "Latest checkpoint location:\\s+([0-9A-F]+/[0-9A-F]+)", result.stdout
+    )[0]
+    log.debug(
+        f"last checkpoint at {checkpoint_lsn} after shutdown (before shutdown was {pre_shutdown})"
+    )
+
    # Take a basebackup from pageserver
    restored_dir_path = env.repo_dir / f"{endpoint.endpoint_id}_restored_datadir"
    restored_dir_path.mkdir(exist_ok=True)

-    pg_bin = PgBin(test_output_dir, env.pg_distrib_dir, env.pg_version)
    psql_path = os.path.join(pg_bin.pg_bin_path, "psql")

    pageserver_id = env.attachment_service.locate(endpoint.tenant_id)[0]["node_id"]
@@ -3992,7 +4000,7 @@ def check_restored_datadir_content(test_output_dir: Path, env: NeonEnv, endpoint
        {psql_path}                                    \
            --no-psqlrc                                \
            postgres://localhost:{env.get_pageserver(pageserver_id).service_port.pg}  \
-            -c 'basebackup {endpoint.tenant_id} {timeline_id}'  \
+            -c 'basebackup {endpoint.tenant_id} {timeline_id} {checkpoint_lsn}'  \
         | tar -x -C {restored_dir_path}
    """

--- a/test_runner/fixtures/parametrize.py
+++ b/test_runner/fixtures/parametrize.py
@@ -2,58 +2,57 @@ import os
 from typing import Optional

 import pytest
+from _pytest.fixtures import FixtureRequest
 from _pytest.python import Metafunc

 from fixtures.pg_version import PgVersion

 """
-Dynamically parametrize tests by different parameters
+Dynamically parametrize tests by Postgres version, build type (debug/release/remote), and possibly by other parameters
 """


@pytest.fixture(scope="function", autouse=True)
-def pg_version() -> Optional[PgVersion]:
+def pg_version(request: FixtureRequest) -> Optional[PgVersion]:
+    # Do not parametrize performance tests yet, we need to prepare grafana charts first
+    if "test_runner/performance" in str(request.node.path):
+        v = os.environ.get("DEFAULT_PG_VERSION")
+        return PgVersion(v)
+
    return None


@pytest.fixture(scope="function", autouse=True)
-def build_type() -> Optional[str]:
+def build_type(request: FixtureRequest) -> Optional[str]:
+    # Do not parametrize performance tests yet, we need to prepare grafana charts first
+    if "test_runner/performance" in str(request.node.path):
+        return os.environ.get("BUILD_TYPE", "").lower()
+
    return None


@pytest.fixture(scope="function", autouse=True)
-def platform() -> Optional[str]:
-    return None
-
-
-@pytest.fixture(scope="function", autouse=True)
-def pageserver_virtual_file_io_engine() -> Optional[str]:
+def pageserver_virtual_file_io_engine(request: FixtureRequest) -> Optional[str]:
    return None


 def pytest_generate_tests(metafunc: Metafunc):
-    if (bt := os.getenv("BUILD_TYPE")) is None:
-        build_types = ["debug", "release"]
-    else:
-        build_types = [bt.lower()]
-
-    metafunc.parametrize("build_type", build_types)
-
-    if (v := os.getenv("DEFAULT_PG_VERSION")) is None:
+    if (v := os.environ.get("DEFAULT_PG_VERSION")) is None:
        pg_versions = [version for version in PgVersion if version != PgVersion.NOT_SET]
    else:
        pg_versions = [PgVersion(v)]

-    metafunc.parametrize("pg_version", pg_versions, ids=map(lambda v: f"pg{v}", pg_versions))
+    if (bt := os.environ.get("BUILD_TYPE")) is None:
+        build_types = ["debug", "release"]
+    else:
+        build_types = [bt.lower()]
+
+    # Do not parametrize performance tests yet by Postgres version or build type, we need to prepare grafana charts first
+    if "test_runner/performance" not in metafunc.definition._nodeid:
+        metafunc.parametrize("build_type", build_types)
+        metafunc.parametrize("pg_version", pg_versions, ids=map(lambda v: f"pg{v}", pg_versions))

    # A hacky way to parametrize tests only for `pageserver_virtual_file_io_engine=tokio-epoll-uring`
    # And do not change test name for default `pageserver_virtual_file_io_engine=std-fs` to keep tests statistics
-    if (io_engine := os.getenv("PAGESERVER_VIRTUAL_FILE_IO_ENGINE", "")) not in ("", "std-fs"):
+    if (io_engine := os.environ.get("PAGESERVER_VIRTUAL_FILE_IO_ENGINE", "")) not in ("", "std-fs"):
        metafunc.parametrize("pageserver_virtual_file_io_engine", [io_engine])
-
-    # For performance tests, parametrize also by platform
-    if (
-        "test_runner/performance" in metafunc.definition._nodeid
-        and (platform := os.getenv("PLATFORM")) is not None
-    ):
-        metafunc.parametrize("platform", [platform.lower()])
--- a/test_runner/performance/pageserver/pagebench/test_pageserver_max_throughput_getpage_at_latest_lsn.py
+++ b/test_runner/performance/pageserver/pagebench/test_pageserver_max_throughput_getpage_at_latest_lsn.py
@@ -1,5 +1,4 @@
 import json
-import os
 from pathlib import Path
 from typing import Any, Dict, Tuple

@@ -34,10 +33,6 @@ from performance.pageserver.util import ensure_pageserver_ready_for_benchmarking
@pytest.mark.timeout(
    10000
 )  # TODO: this value is just "a really high number"; have this per instance type
-@pytest.mark.skipif(
-    os.getenv("CI", "false") == "true",
-    reason="The test if flaky on CI: https://github.com/neondatabase/neon/issues/6724",
-)
 def test_pageserver_max_throughput_getpage_at_latest_lsn(
    neon_env_builder: NeonEnvBuilder,
    zenbenchmark: NeonBenchmarker,
--- a/test_runner/regress/test_auth.py
+++ b/test_runner/regress/test_auth.py
@@ -225,7 +225,9 @@ def test_auth_failures(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):

        check_pageserver(True, password=pageserver_token)

-        env.pageserver.allowed_errors.append(".*JWT scope '.+' is ineligible for Pageserver auth.*")
+        env.pageserver.allowed_errors.append(
+            ".*SafekeeperData scope makes no sense for Pageserver.*"
+        )
        check_pageserver(False, password=safekeeper_token)

    def check_safekeeper(expect_success: bool, **conn_kwargs):
--- a/test_runner/regress/test_pageserver_generations.py
+++ b/test_runner/regress/test_pageserver_generations.py
@@ -20,7 +20,6 @@ from fixtures.log_helper import log
 from fixtures.neon_fixtures import (
    NeonEnv,
    NeonEnvBuilder,
-    NeonPageserver,
    PgBin,
    S3Scrubber,
    last_flush_lsn_upload,
@@ -63,7 +62,7 @@ def generate_uploads_and_deletions(
    tenant_id: Optional[TenantId] = None,
    timeline_id: Optional[TimelineId] = None,
    data: Optional[str] = None,
-    pageserver: NeonPageserver,
+    pageserver_id: Optional[int] = None,
 ):
    """
    Using the environment's default tenant + timeline, generate a load pattern
@@ -78,16 +77,14 @@ def generate_uploads_and_deletions(
        timeline_id = env.initial_timeline
    assert timeline_id is not None

-    ps_http = pageserver.http_client()
+    ps_http = env.pageserver.http_client()

    with env.endpoints.create_start(
-        "main", tenant_id=tenant_id, pageserver_id=pageserver.id
+        "main", tenant_id=tenant_id, pageserver_id=pageserver_id
    ) as endpoint:
        if init:
            endpoint.safe_psql("CREATE TABLE foo (id INTEGER PRIMARY KEY, val text)")
-            last_flush_lsn_upload(
-                env, endpoint, tenant_id, timeline_id, pageserver_id=pageserver.id
-            )
+            last_flush_lsn_upload(env, endpoint, tenant_id, timeline_id)

        def churn(data):
            endpoint.safe_psql_many(
@@ -108,9 +105,7 @@ def generate_uploads_and_deletions(
            # We are waiting for uploads as well as local flush, in order to avoid leaving the system
            # in a state where there are "future layers" in remote storage that will generate deletions
            # after a restart.
-            last_flush_lsn_upload(
-                env, endpoint, tenant_id, timeline_id, pageserver_id=pageserver.id
-            )
+            last_flush_lsn_upload(env, endpoint, tenant_id, timeline_id)
            ps_http.timeline_checkpoint(tenant_id, timeline_id)

        # Compaction should generate some GC-elegible layers
@@ -210,7 +205,7 @@ def test_generations_upgrade(neon_env_builder: NeonEnvBuilder):
    env.neon_cli.create_tenant(
        tenant_id=env.initial_tenant, conf=TENANT_CONF, timeline_id=env.initial_timeline
    )
-    generate_uploads_and_deletions(env, pageserver=env.pageserver)
+    generate_uploads_and_deletions(env, pageserver_id=env.pageserver.id)

    def parse_generation_suffix(key):
        m = re.match(".+-([0-9a-zA-Z]{8})$", key)
@@ -238,7 +233,7 @@ def test_generations_upgrade(neon_env_builder: NeonEnvBuilder):
    # Starting without the override that disabled control_plane_api
    env.pageserver.start()

-    generate_uploads_and_deletions(env, pageserver=env.pageserver, init=False)
+    generate_uploads_and_deletions(env, pageserver_id=env.pageserver.id, init=False)

    legacy_objects: list[str] = []
    suffixed_objects = []
@@ -282,16 +277,13 @@ def test_deferred_deletion(neon_env_builder: NeonEnvBuilder):
    neon_env_builder.enable_pageserver_remote_storage(
        RemoteStorageKind.MOCK_S3,
    )
-    neon_env_builder.num_pageservers = 2
    env = neon_env_builder.init_start(initial_tenant_conf=TENANT_CONF)

-    attached_to_id = env.attachment_service.locate(env.initial_tenant)[0]["node_id"]
-    main_pageserver = env.get_pageserver(attached_to_id)
-    other_pageserver = [p for p in env.pageservers if p.id != attached_to_id][0]
+    some_other_pageserver = 1234

-    ps_http = main_pageserver.http_client()
+    ps_http = env.pageserver.http_client()

-    generate_uploads_and_deletions(env, pageserver=main_pageserver)
+    generate_uploads_and_deletions(env)

    # Flush: pending deletions should all complete
    assert_deletion_queue(ps_http, lambda n: n > 0)
@@ -304,14 +296,14 @@ def test_deferred_deletion(neon_env_builder: NeonEnvBuilder):
    assert timeline["remote_consistent_lsn"] == timeline["remote_consistent_lsn_visible"]
    assert get_deletion_queue_dropped_lsn_updates(ps_http) == 0

-    main_pageserver.allowed_errors.extend(
+    env.pageserver.allowed_errors.extend(
        [".*Dropped remote consistent LSN updates.*", ".*Dropping stale deletions.*"]
    )

    # Now advance the generation in the control plane: subsequent validations
    # from the running pageserver will fail.  No more deletions should happen.
-    env.attachment_service.attach_hook_issue(env.initial_tenant, other_pageserver.id)
-    generate_uploads_and_deletions(env, init=False, pageserver=main_pageserver)
+    env.attachment_service.attach_hook_issue(env.initial_tenant, some_other_pageserver)
+    generate_uploads_and_deletions(env, init=False, pageserver_id=env.pageserver.id)

    assert_deletion_queue(ps_http, lambda n: n > 0)
    queue_depth_before = get_deletion_queue_depth(ps_http)
@@ -363,14 +355,9 @@ def test_deletion_queue_recovery(
    neon_env_builder.enable_pageserver_remote_storage(
        RemoteStorageKind.MOCK_S3,
    )
-    neon_env_builder.num_pageservers = 2
    env = neon_env_builder.init_start(initial_tenant_conf=TENANT_CONF)

-    attached_to_id = env.attachment_service.locate(env.initial_tenant)[0]["node_id"]
-    main_pageserver = env.get_pageserver(attached_to_id)
-    other_pageserver = [p for p in env.pageservers if p.id != attached_to_id][0]
-
-    ps_http = main_pageserver.http_client()
+    ps_http = env.pageserver.http_client()

    failpoints = [
        # Prevent deletion lists from being executed, to build up some backlog of deletions
@@ -387,7 +374,7 @@ def test_deletion_queue_recovery(

    ps_http.configure_failpoints(failpoints)

-    generate_uploads_and_deletions(env, pageserver=main_pageserver)
+    generate_uploads_and_deletions(env)

    # There should be entries in the deletion queue
    assert_deletion_queue(ps_http, lambda n: n > 0)
@@ -414,7 +401,7 @@ def test_deletion_queue_recovery(
        # also wait to see the header hit the disk: this seems paranoid but the race
        # can really happen on a heavily overloaded test machine.
        def assert_header_written():
-            assert (main_pageserver.workdir / "deletion" / "header-01").exists()
+            assert (env.pageserver.workdir / "deletion" / "header-01").exists()

        wait_until(20, 1, assert_header_written)

@@ -424,13 +411,13 @@ def test_deletion_queue_recovery(
            before_restart_depth = get_deletion_queue_validated(ps_http)

    log.info(f"Restarting pageserver with {before_restart_depth} deletions enqueued")
-    main_pageserver.stop(immediate=True)
+    env.pageserver.stop(immediate=True)

    if keep_attachment == KeepAttachment.LOSE:
-        some_other_pageserver = other_pageserver.id
+        some_other_pageserver = 101010
        env.attachment_service.attach_hook_issue(env.initial_tenant, some_other_pageserver)

-    main_pageserver.start()
+    env.pageserver.start()

    def assert_deletions_submitted(n: int):
        assert ps_http.get_metric_value("pageserver_deletion_queue_submitted_total") == n
@@ -453,7 +440,7 @@ def test_deletion_queue_recovery(
        #   validated before restart.
        assert get_deletion_queue_executed(ps_http) == before_restart_depth
    else:
-        main_pageserver.allowed_errors.extend([".*Dropping stale deletions.*"])
+        env.pageserver.allowed_errors.extend([".*Dropping stale deletions.*"])

        # If we lost the attachment, we should have dropped our pre-restart deletions.
        assert get_deletion_queue_dropped(ps_http) == before_restart_depth
@@ -462,8 +449,8 @@ def test_deletion_queue_recovery(
    assert get_deletion_queue_dropped_lsn_updates(ps_http) == 0

    # Restart again
-    main_pageserver.stop(immediate=True)
-    main_pageserver.start()
+    env.pageserver.stop(immediate=True)
+    env.pageserver.start()

    # No deletion lists should be recovered: this demonstrates that deletion lists
    # were cleaned up after being executed or dropped in the previous process lifetime.
@@ -482,7 +469,7 @@ def test_emergency_mode(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):

    ps_http = env.pageserver.http_client()

-    generate_uploads_and_deletions(env, pageserver=env.pageserver)
+    generate_uploads_and_deletions(env, pageserver_id=env.pageserver.id)

    env.pageserver.allowed_errors.extend(
        [
@@ -499,7 +486,7 @@ def test_emergency_mode(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
    # Remember how many validations had happened before the control plane went offline
    validated = get_deletion_queue_validated(ps_http)

-    generate_uploads_and_deletions(env, init=False, pageserver=env.pageserver)
+    generate_uploads_and_deletions(env, init=False, pageserver_id=env.pageserver.id)

    # The running pageserver should stop progressing deletions
    time.sleep(10)
@@ -515,7 +502,7 @@ def test_emergency_mode(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
    )

    # The pageserver should provide service to clients
-    generate_uploads_and_deletions(env, init=False, pageserver=env.pageserver)
+    generate_uploads_and_deletions(env, init=False, pageserver_id=env.pageserver.id)

    # The pageserver should neither validate nor execute any deletions, it should have
    # loaded the DeletionLists from before though
@@ -536,7 +523,7 @@ def test_emergency_mode(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
    env.pageserver.stop()  # Non-immediate: implicitly checking that shutdown doesn't hang waiting for CP
    env.pageserver.start()

-    generate_uploads_and_deletions(env, init=False, pageserver=env.pageserver)
+    generate_uploads_and_deletions(env, init=False, pageserver_id=env.pageserver.id)
    ps_http.deletion_queue_flush(execute=True)
    assert get_deletion_queue_depth(ps_http) == 0
    assert get_deletion_queue_validated(ps_http) > 0
@@ -574,7 +561,7 @@ def test_eviction_across_generations(neon_env_builder: NeonEnvBuilder):
    tenant_id = env.initial_tenant
    timeline_id = env.initial_timeline

-    generate_uploads_and_deletions(env, pageserver=env.pageserver)
+    generate_uploads_and_deletions(env)

    read_all(env, tenant_id, timeline_id)
    evict_all_layers(env, tenant_id, timeline_id)
--- a/test_runner/regress/test_sharding.py
+++ b/test_runner/regress/test_sharding.py
@@ -194,18 +194,6 @@ def test_sharding_split_smoke(

    assert len(pre_split_pageserver_ids) == 4

-    def shards_on_disk(shard_ids):
-        for pageserver in env.pageservers:
-            for shard_id in shard_ids:
-                if pageserver.tenant_dir(shard_id).exists():
-                    return True
-
-        return False
-
-    old_shard_ids = [TenantShardId(tenant_id, i, shard_count) for i in range(0, shard_count)]
-    # Before split, old shards exist
-    assert shards_on_disk(old_shard_ids)
-
    env.attachment_service.tenant_shard_split(tenant_id, shard_count=split_shard_count)

    post_split_pageserver_ids = [loc["node_id"] for loc in env.attachment_service.locate(tenant_id)]
@@ -214,9 +202,6 @@ def test_sharding_split_smoke(
    assert len(set(post_split_pageserver_ids)) == shard_count
    assert set(post_split_pageserver_ids) == set(pre_split_pageserver_ids)

-    # The old parent shards should no longer exist on disk
-    assert not shards_on_disk(old_shard_ids)
-
    workload.validate()

    workload.churn_rows(256)
--- a/test_runner/regress/test_tenants.py
+++ b/test_runner/regress/test_tenants.py
@@ -18,7 +18,6 @@ from fixtures.metrics import (
 from fixtures.neon_fixtures import (
    NeonEnv,
    NeonEnvBuilder,
-    wait_for_last_flush_lsn,
 )
 from fixtures.pageserver.http import PageserverApiException
 from fixtures.pageserver.utils import timeline_delete_wait_completed, wait_until_tenant_active
@@ -415,50 +414,3 @@ def test_create_churn_during_restart(neon_env_builder: NeonEnvBuilder):

    # The tenant should end up active
    wait_until_tenant_active(env.pageserver.http_client(), tenant_id, iterations=10, period=1)
-
-
-def test_pageserver_metrics_many_relations(neon_env_builder: NeonEnvBuilder):
-    """Test for the directory_entries_count metric"""
-
-    neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.MOCK_S3)
-
-    env = neon_env_builder.init_start()
-    ps_http = env.pageserver.http_client()
-
-    endpoint_tenant = env.endpoints.create_start("main", tenant_id=env.initial_tenant)
-
-    # Not sure why but this many tables creates more relations than our limit
-    TABLE_COUNT = 1600
-    COUNT_AT_LEAST_EXPECTED = 5500
-
-    with endpoint_tenant.connect() as conn:
-        with conn.cursor() as cur:
-            # Wrapping begin; commit; around this and the loop below keeps the reproduction
-            # but it also doesn't have a performance benefit
-            cur.execute("CREATE TABLE template_tbl(key int primary key, value text);")
-            for i in range(TABLE_COUNT):
-                cur.execute(f"CREATE TABLE tbl_{i}(like template_tbl INCLUDING ALL);")
-    wait_for_last_flush_lsn(env, endpoint_tenant, env.initial_tenant, env.initial_timeline)
-    endpoint_tenant.stop()
-
-    m = ps_http.get_metrics()
-    directory_entries_count_metric = m.query_all(
-        "pageserver_directory_entries_count", {"tenant_id": str(env.initial_tenant)}
-    )
-
-    def only_int(samples: List[Sample]) -> int:
-        assert len(samples) == 1
-        return int(samples[0].value)
-
-    directory_entries_count = only_int(directory_entries_count_metric)
-
-    log.info(f"pageserver_directory_entries_count metric value: {directory_entries_count}")
-
-    assert directory_entries_count > COUNT_AT_LEAST_EXPECTED
-
-    timeline_detail = ps_http.timeline_detail(env.initial_tenant, env.initial_timeline)
-
-    counts = timeline_detail["directory_entries_counts"]
-    assert counts
-    log.info(f"directory counts: {counts}")
-    assert counts[2] > COUNT_AT_LEAST_EXPECTED
--- a/test_runner/sql_regress/expected/neon-test-utils.out
+++ b/test_runner/sql_regress/expected/neon-test-utils.out
@@ -1,28 +0,0 @@
-- Test the test utils in pgxn/neon_test_utils. We don't test that
-- these actually consume resources like they should - that would be
-- tricky - but at least we check that they don't crash.
-CREATE EXTENSION neon_test_utils;
-select test_consume_cpu(1);
- test_consume_cpu 
------------------
- 
-(1 row)
-
-select test_consume_memory(20); -- Allocate 20 MB
- test_consume_memory 
---------------------
- 
-(1 row)
-
-select test_release_memory(5);  -- Release 5 MB
- test_release_memory 
---------------------
- 
-(1 row)
-
-select test_release_memory();   -- Release the remaining 15 MB
- test_release_memory 
---------------------
- 
-(1 row)
-
--- a/test_runner/sql_regress/parallel_schedule
+++ b/test_runner/sql_regress/parallel_schedule
@@ -7,5 +7,4 @@
 test: neon-cid
 test: neon-rel-truncate
 test: neon-clog
-test: neon-test-utils
 test: neon-vacuum-full
--- a/test_runner/sql_regress/sql/neon-test-utils.sql
+++ b/test_runner/sql_regress/sql/neon-test-utils.sql
@@ -1,11 +0,0 @@
-- Test the test utils in pgxn/neon_test_utils. We don't test that
-- these actually consume resources like they should - that would be
-- tricky - but at least we check that they don't crash.
-
-CREATE EXTENSION neon_test_utils;
-
-select test_consume_cpu(1);
-
-select test_consume_memory(20); -- Allocate 20 MB
-select test_release_memory(5);  -- Release 5 MB
-select test_release_memory();   -- Release the remaining 15 MB
--- a/vendor/postgres-v14
+++ b/vendor/postgres-v14
--- a/vendor/postgres-v15
+++ b/vendor/postgres-v15
--- a/vendor/postgres-v16
+++ b/vendor/postgres-v16
--- a/vendor/revisions.json
+++ b/vendor/revisions.json
@@ -1,5 +1,5 @@
 {
-    "postgres-v16": "f7b63d8cf9ae040f6907c3c13ef25fcf15a36161",
-    "postgres-v15": "9eef016e18bf61753e3cbaa755f705db6a4f7b1d",
-    "postgres-v14": "b4bae26a0f09c69e979e6cb55780398e3102e022"
+    "postgres-v16": "9c37a4988463a97d9cacb321acf3828b09823269",
+    "postgres-v15": "ca2def999368d9df098a637234ad5a9003189463",
+    "postgres-v14": "9dd9956c55ffbbd9abe77d10382453757fedfcf5"
 }
Author	SHA1	Message	Date
John Spray	579efc4b34	tests: explicit wait for pageserver LSN in check_restored_datadir_content	2024-02-14 10:57:37 +00:00
John Spray	c55d3674f2	Reapply "tests: try to make restored-datadir comparison tests not flaky (#6666 )" This reverts commit `250686de08`.	2024-02-14 10:57:37 +00:00