Use flate2::write::GzEncoder instrad of async_compression::tokio::write::GzipEncoder

WIP
update comment
2026-03-06 09:50:38 +00:00 · 2023-06-14 12:00:48 +03:00 · 2023-06-13 17:40:10 -04:00 · 2023-06-12 20:49:56 -04:00 · 2023-06-12 20:39:57 -04:00 · 2023-06-12 18:52:34 -04:00
83 changed files with 1154 additions and 2172 deletions
--- a/.github/workflows/benchmarking.yml
+++ b/.github/workflows/benchmarking.yml
@@ -180,8 +180,7 @@ jobs:
      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
      options: --init

-    # Increase timeout to 8h, default timeout is 6h
-    timeout-minutes: 480
+    timeout-minutes: 360 # 6h

    steps:
    - uses: actions/checkout@v3
@@ -322,6 +321,8 @@ jobs:
      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
      options: --init

+    timeout-minutes: 360 # 6h
+
    steps:
    - uses: actions/checkout@v3

@@ -413,6 +414,8 @@ jobs:
      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
      options: --init

+    timeout-minutes: 360 # 6h
+
    steps:
    - uses: actions/checkout@v3

@@ -498,6 +501,8 @@ jobs:
      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
      options: --init

+    timeout-minutes: 360 # 6h
+
    steps:
    - uses: actions/checkout@v3

--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -623,6 +623,51 @@ jobs:
      - name: Cleanup ECR folder
        run: rm -rf ~/.ecr

+
+  neon-image-depot:
+    # For testing this will run side-by-side for a few merges.
+    # This action is not really optimized yet, but gets the job done
+    runs-on: [ self-hosted, gen3, large ]
+    needs: [ tag ]
+    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:pinned
+    permissions:
+      contents: read
+      id-token: write
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: true
+          fetch-depth: 0
+
+      - name: Setup go
+        uses: actions/setup-go@v3
+        with:
+          go-version: '1.19'
+
+      - name: Set up Depot CLI
+        uses: depot/setup-action@v1
+
+      - name: Install Crane & ECR helper
+        run: go install github.com/awslabs/amazon-ecr-credential-helper/ecr-login/cli/docker-credential-ecr-login@69c85dc22db6511932bbf119e1a0cc5c90c69a7f # v0.6.0
+
+      - name: Configure ECR login
+        run: |
+          mkdir /github/home/.docker/
+          echo "{\"credsStore\":\"ecr-login\"}" > /github/home/.docker/config.json
+
+      - name: Build and push
+        uses: depot/build-push-action@v1
+        with:
+          # if no depot.json file is at the root of your repo, you must specify the project id
+          project: nrdv0s4kcs
+          push: true
+          tags: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:depot-${{needs.tag.outputs.build-tag}}
+          build-args: |
+            GIT_VERSION=${{ github.sha }}
+            REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
+
  compute-tools-image:
    runs-on: [ self-hosted, gen3, large ]
    needs: [ tag ]
@@ -659,7 +704,6 @@ jobs:
                           --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache
                           --context .
                           --build-arg GIT_VERSION=${{ github.sha }}
-                           --build-arg BUILD_TAG=${{needs.tag.outputs.build-tag}}
                           --build-arg REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
                           --dockerfile Dockerfile.compute-tools
                           --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}}
@@ -717,7 +761,6 @@ jobs:
                           --context .
                           --build-arg GIT_VERSION=${{ github.sha }}
                           --build-arg PG_VERSION=${{ matrix.version }}
-                           --build-arg BUILD_TAG=${{needs.tag.outputs.build-tag}}
                           --build-arg REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
                           --dockerfile Dockerfile.compute-node
                           --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}
@@ -738,7 +781,7 @@ jobs:
      run:
        shell: sh -eu {0}
    env:
-      VM_BUILDER_VERSION: v0.11.0
+      VM_BUILDER_VERSION: v0.8.0

    steps:
      - name: Checkout
@@ -916,20 +959,6 @@ jobs:
            exit 1
          fi

-      - name: Create git tag
-        if: github.ref_name == 'release'
-        uses: actions/github-script@v6
-        with:
-          # Retry script for 5XX server errors: https://github.com/actions/github-script#retries
-          retries: 5
-          script: |
-            github.rest.git.createRef({
-              owner: context.repo.owner,
-              repo: context.repo.repo,
-              ref: "refs/tags/${{ needs.tag.outputs.build-tag }}",
-              sha: context.sha,
-            })
-
  promote-compatibility-data:
    runs-on: [ self-hosted, gen3, small ]
    container:
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -3,7 +3,6 @@ name: Create Release Branch
 on:
  schedule:
    - cron: '0 10 * * 2'
-  workflow_dispatch:

 jobs:
  create_release_branch:
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -604,7 +604,7 @@ dependencies = [
 "cc",
 "cfg-if",
 "libc",
- "miniz_oxide",
+ "miniz_oxide 0.6.2",
 "object",
 "rustc-demangle",
 ]
@@ -917,6 +917,7 @@ dependencies = [
 "chrono",
 "clap 4.3.0",
 "compute_api",
+ "flate2",
 "futures",
 "hyper",
 "notify",
@@ -1399,6 +1400,16 @@ version = "0.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80"

+[[package]]
+name = "flate2"
+version = "1.0.26"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3b9429470923de8e8cbd4d2dc513535400b4b3fef0319fb5c4e1f520a7bef743"
+dependencies = [
+ "crc32fast",
+ "miniz_oxide 0.7.1",
+]
+
 [[package]]
 name = "fnv"
 version = "1.0.7"
@@ -2189,6 +2200,15 @@ dependencies = [
 "adler",
 ]

+[[package]]
+name = "miniz_oxide"
+version = "0.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7"
+dependencies = [
+ "adler",
+]
+
 [[package]]
 name = "mio"
 version = "0.8.6"
@@ -2349,9 +2369,9 @@ checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575"

 [[package]]
 name = "openssl"
-version = "0.10.55"
+version = "0.10.52"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "345df152bc43501c5eb9e4654ff05f794effb78d4efe3d53abc158baddc0703d"
+checksum = "01b8574602df80f7b85fdfc5392fa884a4e3b3f4f35402c070ab34c3d3f78d56"
 dependencies = [
 "bitflags",
 "cfg-if",
@@ -2381,9 +2401,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf"

 [[package]]
 name = "openssl-sys"
-version = "0.9.90"
+version = "0.9.87"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "374533b0e45f3a7ced10fcaeccca020e66656bc03dac384f852e4e5a7a8104a6"
+checksum = "8e17f59264b2809d77ae94f0e1ebabc434773f370d6ca667bd223ea10e06cc7e"
 dependencies = [
 "cc",
 "libc",
@@ -2558,6 +2578,7 @@ dependencies = [
 "enum-map",
 "enumset",
 "fail",
+ "flate2",
 "futures",
 "git-version",
 "hex",
@@ -2770,7 +2791,7 @@ dependencies = [
 [[package]]
 name = "postgres"
 version = "0.19.4"
-source = "git+https://github.com/neondatabase/rust-postgres.git?rev=1aaedab101b23f7612042850d8f2036810fa7c7f#1aaedab101b23f7612042850d8f2036810fa7c7f"
+source = "git+https://github.com/neondatabase/rust-postgres.git?rev=f6ec31df3bcce89cb34f300f17c8a8c031c5ee8c#f6ec31df3bcce89cb34f300f17c8a8c031c5ee8c"
 dependencies = [
 "bytes",
 "fallible-iterator",
@@ -2783,7 +2804,7 @@ dependencies = [
 [[package]]
 name = "postgres-native-tls"
 version = "0.5.0"
-source = "git+https://github.com/neondatabase/rust-postgres.git?rev=1aaedab101b23f7612042850d8f2036810fa7c7f#1aaedab101b23f7612042850d8f2036810fa7c7f"
+source = "git+https://github.com/neondatabase/rust-postgres.git?rev=f6ec31df3bcce89cb34f300f17c8a8c031c5ee8c#f6ec31df3bcce89cb34f300f17c8a8c031c5ee8c"
 dependencies = [
 "native-tls",
 "tokio",
@@ -2794,7 +2815,7 @@ dependencies = [
 [[package]]
 name = "postgres-protocol"
 version = "0.6.4"
-source = "git+https://github.com/neondatabase/rust-postgres.git?rev=1aaedab101b23f7612042850d8f2036810fa7c7f#1aaedab101b23f7612042850d8f2036810fa7c7f"
+source = "git+https://github.com/neondatabase/rust-postgres.git?rev=f6ec31df3bcce89cb34f300f17c8a8c031c5ee8c#f6ec31df3bcce89cb34f300f17c8a8c031c5ee8c"
 dependencies = [
 "base64 0.20.0",
 "byteorder",
@@ -2812,7 +2833,7 @@ dependencies = [
 [[package]]
 name = "postgres-types"
 version = "0.2.4"
-source = "git+https://github.com/neondatabase/rust-postgres.git?rev=1aaedab101b23f7612042850d8f2036810fa7c7f#1aaedab101b23f7612042850d8f2036810fa7c7f"
+source = "git+https://github.com/neondatabase/rust-postgres.git?rev=f6ec31df3bcce89cb34f300f17c8a8c031c5ee8c#f6ec31df3bcce89cb34f300f17c8a8c031c5ee8c"
 dependencies = [
 "bytes",
 "fallible-iterator",
@@ -4272,7 +4293,7 @@ dependencies = [
 [[package]]
 name = "tokio-postgres"
 version = "0.7.7"
-source = "git+https://github.com/neondatabase/rust-postgres.git?rev=1aaedab101b23f7612042850d8f2036810fa7c7f#1aaedab101b23f7612042850d8f2036810fa7c7f"
+source = "git+https://github.com/neondatabase/rust-postgres.git?rev=f6ec31df3bcce89cb34f300f17c8a8c031c5ee8c#f6ec31df3bcce89cb34f300f17c8a8c031c5ee8c"
 dependencies = [
 "async-trait",
 "byteorder",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -32,6 +32,7 @@ license = "Apache-2.0"
 ## All dependency versions, used in the project
 [workspace.dependencies]
 anyhow = { version = "1.0", features = ["backtrace"] }
+flate2 = "1.0.26"
 async-stream = "0.3"
 async-trait = "0.1"
 atty = "0.2.14"
@@ -140,11 +141,11 @@ env_logger = "0.10"
 log = "0.4"

 ## Libraries from neondatabase/ git forks, ideally with changes to be upstreamed
-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="1aaedab101b23f7612042850d8f2036810fa7c7f" }
-postgres-native-tls = { git = "https://github.com/neondatabase/rust-postgres.git", rev="1aaedab101b23f7612042850d8f2036810fa7c7f" }
-postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", rev="1aaedab101b23f7612042850d8f2036810fa7c7f" }
-postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", rev="1aaedab101b23f7612042850d8f2036810fa7c7f" }
-tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="1aaedab101b23f7612042850d8f2036810fa7c7f" }
+postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="f6ec31df3bcce89cb34f300f17c8a8c031c5ee8c" }
+postgres-native-tls = { git = "https://github.com/neondatabase/rust-postgres.git", rev="f6ec31df3bcce89cb34f300f17c8a8c031c5ee8c" }
+postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", rev="f6ec31df3bcce89cb34f300f17c8a8c031c5ee8c" }
+postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", rev="f6ec31df3bcce89cb34f300f17c8a8c031c5ee8c" }
+tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="f6ec31df3bcce89cb34f300f17c8a8c031c5ee8c" }
 tokio-tar = { git = "https://github.com/neondatabase/tokio-tar.git", rev="404df61437de0feef49ba2ccdbdd94eb8ad6e142" }

 ## Other git libraries
@@ -180,7 +181,7 @@ tonic-build = "0.9"

 # This is only needed for proxy's tests.
 # TODO: we should probably fork `tokio-postgres-rustls` instead.
-tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="1aaedab101b23f7612042850d8f2036810fa7c7f" }
+tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="f6ec31df3bcce89cb34f300f17c8a8c031c5ee8c" }

 # Changes the MAX_THREADS limit from 4096 to 32768.
 # This is a temporary workaround for using tracing from many threads in safekeepers code,
--- a/Dockerfile.compute-node
+++ b/Dockerfile.compute-node
@@ -2,7 +2,6 @@ ARG PG_VERSION
 ARG REPOSITORY=neondatabase
 ARG IMAGE=rust
 ARG TAG=pinned
-ARG BUILD_TAG

 #########################################################################################
 #
@@ -481,40 +480,6 @@ RUN wget https://github.com/rdkit/rdkit/archive/refs/tags/Release_2023_03_1.tar.
    make -j $(getconf _NPROCESSORS_ONLN) install && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/rdkit.control

-#########################################################################################
-#
-# Layer "pg-uuidv7-pg-build"
-# compile pg_uuidv7 extension
-#
-#########################################################################################
-FROM build-deps AS pg-uuidv7-pg-build
-COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
-
-ENV PATH "/usr/local/pgsql/bin/:$PATH"
-RUN wget https://github.com/fboulnois/pg_uuidv7/archive/refs/tags/v1.0.1.tar.gz -O pg_uuidv7.tar.gz && \
-    echo "0d0759ab01b7fb23851ecffb0bce27822e1868a4a5819bfd276101c716637a7a pg_uuidv7.tar.gz" | sha256sum --check && \
-    mkdir pg_uuidv7-src && cd pg_uuidv7-src && tar xvzf ../pg_uuidv7.tar.gz --strip-components=1 -C . && \
-    make -j $(getconf _NPROCESSORS_ONLN) && \
-    make -j $(getconf _NPROCESSORS_ONLN) install && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_uuidv7.control
-
-#########################################################################################
-#
-# Layer "pg-roaringbitmap-pg-build"
-# compile pg_roaringbitmap extension
-#
-#########################################################################################
-FROM build-deps AS pg-roaringbitmap-pg-build
-COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
-
-ENV PATH "/usr/local/pgsql/bin/:$PATH"
-RUN wget https://github.com/ChenHuajun/pg_roaringbitmap/archive/refs/tags/v0.5.4.tar.gz -O pg_roaringbitmap.tar.gz && \
-    echo "b75201efcb1c2d1b014ec4ae6a22769cc7a224e6e406a587f5784a37b6b5a2aa pg_roaringbitmap.tar.gz" | sha256sum --check && \
-    mkdir pg_roaringbitmap-src && cd pg_roaringbitmap-src && tar xvzf ../pg_roaringbitmap.tar.gz --strip-components=1 -C . && \
-    make -j $(getconf _NPROCESSORS_ONLN) && \
-    make -j $(getconf _NPROCESSORS_ONLN) install && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/roaringbitmap.control
-
 #########################################################################################
 #
 # Layer "rust extensions"
@@ -648,8 +613,6 @@ COPY --from=kq-imcx-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg-cron-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg-pgx-ulid-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=rdkit-pg-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=pg-uuidv7-pg-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=pg-roaringbitmap-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY pgxn/ pgxn/

 RUN make -j $(getconf _NPROCESSORS_ONLN) \
@@ -671,9 +634,6 @@ RUN make -j $(getconf _NPROCESSORS_ONLN) \
 #
 #########################################################################################
 FROM $REPOSITORY/$IMAGE:$TAG AS compute-tools
-ARG BUILD_TAG
-ENV BUILD_TAG=$BUILD_TAG
-
 USER nonroot
 # Copy entire project to get Cargo.* files with proper dependencies for the whole project
 COPY --chown=nonroot . .
--- a/Dockerfile.compute-tools
+++ b/Dockerfile.compute-tools
@@ -3,7 +3,6 @@
 ARG REPOSITORY=neondatabase
 ARG IMAGE=rust
 ARG TAG=pinned
-ARG BUILD_TAG

 FROM $REPOSITORY/$IMAGE:$TAG AS rust-build
 WORKDIR /home/nonroot
@@ -17,8 +16,6 @@ ENV CACHEPOT_S3_KEY_PREFIX=cachepot
 ARG CACHEPOT_BUCKET=neon-github-dev
 #ARG AWS_ACCESS_KEY_ID
 #ARG AWS_SECRET_ACCESS_KEY
-ARG BUILD_TAG
-ENV BUILD_TAG=$BUILD_TAG

 COPY . .

--- a/compute_tools/Cargo.toml
+++ b/compute_tools/Cargo.toml
@@ -6,6 +6,7 @@ license.workspace = true

 [dependencies]
 anyhow.workspace = true
+flate2.workspace = true
 chrono.workspace = true
 clap.workspace = true
 futures.workspace = true
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -54,15 +54,9 @@ use compute_tools::monitor::launch_monitor;
 use compute_tools::params::*;
 use compute_tools::spec::*;

-const BUILD_TAG_DEFAULT: &str = "local";
-
 fn main() -> Result<()> {
    init_tracing_and_logging(DEFAULT_LOG_LEVEL)?;

-    let build_tag = option_env!("BUILD_TAG").unwrap_or(BUILD_TAG_DEFAULT);
-
-    info!("build_tag: {build_tag}");
-
    let matches = cli().get_matches();

    let http_port = *matches
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -15,6 +15,7 @@ use utils::lsn::Lsn;

 use compute_api::responses::{ComputeMetrics, ComputeStatus};
 use compute_api::spec::{ComputeMode, ComputeSpec};
+use utils::measured_stream::MeasuredReader;

 use crate::config;
 use crate::pg_helpers::*;
@@ -133,84 +134,6 @@ impl TryFrom<ComputeSpec> for ParsedSpec {
    }
 }

-/// Create special neon_superuser role, that's a slightly nerfed version of a real superuser
-/// that we give to customers
-fn create_neon_superuser(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
-    let roles = spec
-        .cluster
-        .roles
-        .iter()
-        .map(|r| format!("'{}'", escape_literal(&r.name)))
-        .collect::<Vec<_>>();
-
-    let dbs = spec
-        .cluster
-        .databases
-        .iter()
-        .map(|db| format!("'{}'", escape_literal(&db.name)))
-        .collect::<Vec<_>>();
-
-    let roles_decl = if roles.is_empty() {
-        String::from("roles text[] := NULL;")
-    } else {
-        format!(
-            r#"
-               roles text[] := ARRAY(SELECT rolname
-                                     FROM pg_catalog.pg_roles
-                                     WHERE rolname IN ({}));"#,
-            roles.join(", ")
-        )
-    };
-
-    let database_decl = if dbs.is_empty() {
-        String::from("dbs text[] := NULL;")
-    } else {
-        format!(
-            r#"
-               dbs text[] := ARRAY(SELECT datname
-                                   FROM pg_catalog.pg_database
-                                   WHERE datname IN ({}));"#,
-            dbs.join(", ")
-        )
-    };
-
-    // ALL PRIVILEGES grants CREATE, CONNECT, and TEMPORARY on all databases
-    // (see https://www.postgresql.org/docs/current/ddl-priv.html)
-    let query = format!(
-        r#"
-            DO $$
-                DECLARE
-                    r text;
-                    {}
-                    {}
-                BEGIN
-                    IF NOT EXISTS (
-                        SELECT FROM pg_catalog.pg_roles WHERE rolname = 'neon_superuser')
-                    THEN
-                        CREATE ROLE neon_superuser CREATEDB CREATEROLE NOLOGIN IN ROLE pg_read_all_data, pg_write_all_data;
-                        IF array_length(roles, 1) IS NOT NULL THEN
-                            EXECUTE format('GRANT neon_superuser TO %s',
-                                           array_to_string(ARRAY(SELECT quote_ident(x) FROM unnest(roles) as x), ', '));
-                            FOREACH r IN ARRAY roles LOOP
-                                EXECUTE format('ALTER ROLE %s CREATEROLE CREATEDB', quote_ident(r));
-                            END LOOP;
-                        END IF;
-                        IF array_length(dbs, 1) IS NOT NULL THEN
-                            EXECUTE format('GRANT ALL PRIVILEGES ON DATABASE %s TO neon_superuser',
-                                           array_to_string(ARRAY(SELECT quote_ident(x) FROM unnest(dbs) as x), ', '));
-                        END IF;
-                    END IF;
-                END
-            $$;"#,
-        roles_decl, database_decl,
-    );
-    info!("Neon superuser created:\n{}", &query);
-    client
-        .simple_query(&query)
-        .map_err(|e| anyhow::anyhow!(e).context(query))?;
-    Ok(())
-}
-
 impl ComputeNode {
    pub fn set_status(&self, status: ComputeStatus) {
        let mut state = self.state.lock().unwrap();
@@ -257,16 +180,21 @@ impl ComputeNode {
            _ => format!("basebackup {} {} {}", spec.tenant_id, spec.timeline_id, lsn),
        };
        let copyreader = client.copy_out(basebackup_cmd.as_str())?;
+        let mut measured_reader = MeasuredReader::new(copyreader);
+        let mut decoder = flate2::read::GzDecoder::new(&mut measured_reader);

        // Read the archive directly from the `CopyOutReader`
        //
        // Set `ignore_zeros` so that unpack() reads all the Copy data and
        // doesn't stop at the end-of-archive marker. Otherwise, if the server
        // sends an Error after finishing the tarball, we will not notice it.
-        let mut ar = tar::Archive::new(copyreader);
+        let mut ar = tar::Archive::new(&mut decoder);
        ar.set_ignore_zeros(true);
        ar.unpack(&self.pgdata)?;

+        // Report metrics
+        self.state.lock().unwrap().metrics.basebackup_bytes =
+            measured_reader.get_byte_count() as u64;
        self.state.lock().unwrap().metrics.basebackup_ms = Utc::now()
            .signed_duration_since(start_time)
            .to_std()
@@ -338,13 +266,9 @@ impl ComputeNode {
        let lsn = match spec.mode {
            ComputeMode::Primary => {
                info!("starting safekeepers syncing");
-                let lsn = if let Some(synced_lsn) = spec.skip_sync_safekeepers {
-                    info!("no need to sync");
-                    synced_lsn
-                } else {
-                    self.sync_safekeepers(pspec.storage_auth_token.clone())
-                        .with_context(|| "failed to sync safekeepers")?
-                };
+                let lsn = self
+                    .sync_safekeepers(pspec.storage_auth_token.clone())
+                    .with_context(|| "failed to sync safekeepers")?;
                info!("safekeepers synced at LSN {}", lsn);
                lsn
            }
@@ -429,8 +353,6 @@ impl ComputeNode {
                    .map_err(|_| anyhow::anyhow!("invalid connstr"))?;

                let mut client = Client::connect(zenith_admin_connstr.as_str(), NoTls)?;
-                // Disable forwarding so that users don't get a cloud_admin role
-                client.simple_query("SET neon.forward_ddl = false")?;
                client.simple_query("CREATE USER cloud_admin WITH SUPERUSER")?;
                client.simple_query("GRANT zenith_admin TO cloud_admin")?;
                drop(client);
@@ -441,21 +363,24 @@ impl ComputeNode {
            Ok(client) => client,
        };

+        // Proceed with post-startup configuration. Note, that order of operations is important.
        // Disable DDL forwarding because control plane already knows about these roles/databases.
        client.simple_query("SET neon.forward_ddl = false")?;
-
-        // Proceed with post-startup configuration. Note, that order of operations is important.
        let spec = &compute_state.pspec.as_ref().expect("spec must be set").spec;
-        create_neon_superuser(spec, &mut client)?;
        handle_roles(spec, &mut client)?;
        handle_databases(spec, &mut client)?;
        handle_role_deletions(spec, self.connstr.as_str(), &mut client)?;
-        handle_grants(spec, self.connstr.as_str())?;
+        handle_grants(spec, self.connstr.as_str(), &mut client)?;
        handle_extensions(spec, &mut client)?;

        // 'Close' connection
        drop(client);

+        info!(
+            "finished configuration of compute for project {}",
+            spec.cluster.cluster_id.as_deref().unwrap_or("None")
+        );
+
        Ok(())
    }

@@ -488,7 +413,7 @@ impl ComputeNode {
            handle_roles(&spec, &mut client)?;
            handle_databases(&spec, &mut client)?;
            handle_role_deletions(&spec, self.connstr.as_str(), &mut client)?;
-            handle_grants(&spec, self.connstr.as_str())?;
+            handle_grants(&spec, self.connstr.as_str(), &mut client)?;
            handle_extensions(&spec, &mut client)?;
        }

@@ -508,22 +433,22 @@ impl ComputeNode {
    #[instrument(skip(self))]
    pub fn start_compute(&self) -> Result<std::process::Child> {
        let compute_state = self.state.lock().unwrap().clone();
-        let pspec = compute_state.pspec.as_ref().expect("spec must be set");
+        let spec = compute_state.pspec.as_ref().expect("spec must be set");
        info!(
            "starting compute for project {}, operation {}, tenant {}, timeline {}",
-            pspec.spec.cluster.cluster_id.as_deref().unwrap_or("None"),
-            pspec.spec.operation_uuid.as_deref().unwrap_or("None"),
-            pspec.tenant_id,
-            pspec.timeline_id,
+            spec.spec.cluster.cluster_id.as_deref().unwrap_or("None"),
+            spec.spec.operation_uuid.as_deref().unwrap_or("None"),
+            spec.tenant_id,
+            spec.timeline_id,
        );

        self.prepare_pgdata(&compute_state)?;

        let start_time = Utc::now();

-        let pg = self.start_postgres(pspec.storage_auth_token.clone())?;
+        let pg = self.start_postgres(spec.storage_auth_token.clone())?;

-        if pspec.spec.mode == ComputeMode::Primary && !pspec.spec.skip_pg_catalog_updates {
+        if spec.spec.mode == ComputeMode::Primary && !spec.spec.skip_pg_catalog_updates {
            self.apply_config(&compute_state)?;
        }

@@ -543,11 +468,6 @@ impl ComputeNode {
        }
        self.set_status(ComputeStatus::Running);

-        info!(
-            "finished configuration of compute for project {}",
-            pspec.spec.cluster.cluster_id.as_deref().unwrap_or("None")
-        );
-
        Ok(pg)
    }

--- a/compute_tools/src/pg_helpers.rs
+++ b/compute_tools/src/pg_helpers.rs
@@ -17,7 +17,7 @@ use compute_api::spec::{Database, GenericOption, GenericOptions, PgIdent, Role};
 const POSTGRES_WAIT_TIMEOUT: Duration = Duration::from_millis(60 * 1000); // milliseconds

 /// Escape a string for including it in a SQL literal
-pub fn escape_literal(s: &str) -> String {
+fn escape_literal(s: &str) -> String {
    s.replace('\'', "''").replace('\\', "\\\\")
 }

--- a/compute_tools/src/spec.rs
+++ b/compute_tools/src/spec.rs
@@ -269,13 +269,17 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
                xact.execute(query.as_str(), &[])?;
            }
            RoleAction::Create => {
-                let mut query: String = format!(
-                    "CREATE ROLE {} CREATEROLE CREATEDB IN ROLE neon_superuser",
-                    name.pg_quote()
-                );
+                let mut query: String = format!("CREATE ROLE {} ", name.pg_quote());
                info!("role create query: '{}'", &query);
                query.push_str(&role.to_pg_options());
                xact.execute(query.as_str(), &[])?;
+
+                let grant_query = format!(
+                    "GRANT pg_read_all_data, pg_write_all_data TO {}",
+                    name.pg_quote()
+                );
+                xact.execute(grant_query.as_str(), &[])?;
+                info!("role grant query: '{}'", &grant_query);
            }
        }

@@ -472,11 +476,6 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
                query.push_str(&db.to_pg_options());
                let _guard = info_span!("executing", query).entered();
                client.execute(query.as_str(), &[])?;
-                let grant_query: String = format!(
-                    "GRANT ALL PRIVILEGES ON DATABASE {} TO neon_superuser",
-                    name.pg_quote()
-                );
-                client.execute(grant_query.as_str(), &[])?;
            }
        };

@@ -496,9 +495,35 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
 /// Grant CREATE ON DATABASE to the database owner and do some other alters and grants
 /// to allow users creating trusted extensions and re-creating `public` schema, for example.
 #[instrument(skip_all)]
-pub fn handle_grants(spec: &ComputeSpec, connstr: &str) -> Result<()> {
+pub fn handle_grants(spec: &ComputeSpec, connstr: &str, client: &mut Client) -> Result<()> {
    info!("cluster spec grants:");

+    // We now have a separate `web_access` role to connect to the database
+    // via the web interface and proxy link auth. And also we grant a
+    // read / write all data privilege to every role. So also grant
+    // create to everyone.
+    // XXX: later we should stop messing with Postgres ACL in such horrible
+    // ways.
+    let roles = spec
+        .cluster
+        .roles
+        .iter()
+        .map(|r| r.name.pg_quote())
+        .collect::<Vec<_>>();
+
+    for db in &spec.cluster.databases {
+        let dbname = &db.name;
+
+        let query: String = format!(
+            "GRANT CREATE ON DATABASE {} TO {}",
+            dbname.pg_quote(),
+            roles.join(", ")
+        );
+        info!("grant query {}", &query);
+
+        client.execute(query.as_str(), &[])?;
+    }
+
    // Do some per-database access adjustments. We'd better do this at db creation time,
    // but CREATE DATABASE isn't transactional. So we cannot create db + do some grants
    // atomically.
--- a/control_plane/src/endpoint.rs
+++ b/control_plane/src/endpoint.rs
@@ -67,8 +67,6 @@ pub struct EndpointConf {
    pg_port: u16,
    http_port: u16,
    pg_version: u32,
-    skip_pg_catalog_updates: bool,
-    skip_sync_safekeepers: Option<utils::lsn::Lsn>,
 }

 //
@@ -137,8 +135,6 @@ impl ComputeControlPlane {
            mode,
            tenant_id,
            pg_version,
-            skip_pg_catalog_updates: false,
-            skip_sync_safekeepers: None,
        });

        ep.create_endpoint_dir()?;
@@ -152,8 +148,6 @@ impl ComputeControlPlane {
                http_port,
                pg_port,
                pg_version,
-                skip_pg_catalog_updates: false,
-                skip_sync_safekeepers: None,
            })?,
        )?;
        std::fs::write(
@@ -189,10 +183,6 @@ pub struct Endpoint {
    // the endpoint runs in.
    pub env: LocalEnv,
    pageserver: Arc<PageServerNode>,
-
-    // Optimizations
-    skip_pg_catalog_updates: bool,
-    skip_sync_safekeepers: Option<utils::lsn::Lsn>,
 }

 impl Endpoint {
@@ -226,8 +216,6 @@ impl Endpoint {
            mode: conf.mode,
            tenant_id: conf.tenant_id,
            pg_version: conf.pg_version,
-            skip_pg_catalog_updates: conf.skip_pg_catalog_updates,
-            skip_sync_safekeepers: conf.skip_sync_safekeepers,
        })
    }

@@ -462,8 +450,7 @@ impl Endpoint {

        // Create spec file
        let spec = ComputeSpec {
-            skip_sync_safekeepers: self.skip_sync_safekeepers,
-            skip_pg_catalog_updates: self.skip_pg_catalog_updates,
+            skip_pg_catalog_updates: false,
            format_version: 1.0,
            operation_uuid: None,
            cluster: Cluster {
--- a/libs/compute_api/src/responses.rs
+++ b/libs/compute_api/src/responses.rs
@@ -71,6 +71,7 @@ pub struct ComputeMetrics {
    pub wait_for_spec_ms: u64,
    pub sync_safekeepers_ms: u64,
    pub basebackup_ms: u64,
+    pub basebackup_bytes: u64,
    pub config_ms: u64,
    pub total_startup_ms: u64,
 }
--- a/libs/compute_api/src/spec.rs
+++ b/libs/compute_api/src/spec.rs
@@ -33,15 +33,6 @@ pub struct ComputeSpec {
    #[serde(default)] // Default false
    pub skip_pg_catalog_updates: bool,

-    /// An optinal hint that can be passed to speed up startup time if we know
-    /// that safekeepers have already been synced at the given LSN.
-    ///
-    /// NOTE: If there's any possibility that the safekeepers could have advanced
-    ///       (e.g. if we started compute, and it crashed) we should stay on the
-    ///       safe side and provide None.
-    #[serde(default)]
-    pub skip_sync_safekeepers: Option<Lsn>,
-
    // Information needed to connect to the storage layer.
    //
    // `tenant_id`, `timeline_id` and `pageserver_connstring` are always needed.
@@ -157,14 +148,4 @@ mod tests {
        let file = File::open("tests/cluster_spec.json").unwrap();
        let _spec: ComputeSpec = serde_json::from_reader(file).unwrap();
    }
-
-    #[test]
-    fn parse_unknown_fields() {
-        // Forward compatibility test
-        let file = File::open("tests/cluster_spec.json").unwrap();
-        let mut json: serde_json::Value = serde_json::from_reader(file).unwrap();
-        let ob = json.as_object_mut().unwrap();
-        ob.insert("unknown_field_123123123".into(), "hello".into());
-        let _spec: ComputeSpec = serde_json::from_value(json).unwrap();
-    }
 }
--- a/libs/metrics/src/lib.rs
+++ b/libs/metrics/src/lib.rs
@@ -23,7 +23,6 @@ use prometheus::{Registry, Result};
 pub mod launch_timestamp;
 mod wrappers;
 pub use wrappers::{CountedReader, CountedWriter};
-pub mod metric_vec_duration;

 pub type UIntGauge = GenericGauge<AtomicU64>;
 pub type UIntGaugeVec = GenericGaugeVec<AtomicU64>;
--- a/libs/metrics/src/metric_vec_duration.rs
+++ b/libs/metrics/src/metric_vec_duration.rs
@@ -1,23 +0,0 @@
-//! Helpers for observing duration on HistogramVec / CounterVec / GaugeVec / MetricVec<T>.
-
-use std::{future::Future, time::Instant};
-
-pub trait DurationResultObserver {
-    fn observe_result<T, E>(&self, res: &Result<T, E>, duration: std::time::Duration);
-}
-
-pub async fn observe_async_block_duration_by_result<
-    T,
-    E,
-    F: Future<Output = Result<T, E>>,
-    O: DurationResultObserver,
->(
-    observer: &O,
-    block: F,
-) -> Result<T, E> {
-    let start = Instant::now();
-    let result = block.await;
-    let duration = start.elapsed();
-    observer.observe_result(&result, duration);
-    result
-}
--- a/libs/remote_storage/src/lib.rs
+++ b/libs/remote_storage/src/lib.rs
@@ -70,14 +70,6 @@ impl RemotePath {
    pub fn join(&self, segment: &Path) -> Self {
        Self(self.0.join(segment))
    }
-
-    pub fn get_path(&self) -> &PathBuf {
-        &self.0
-    }
-
-    pub fn extension(&self) -> Option<&str> {
-        self.0.extension()?.to_str()
-    }
 }

 /// Storage (potentially remote) API to manage its state.
@@ -94,19 +86,6 @@ pub trait RemoteStorage: Send + Sync + 'static {
        prefix: Option<&RemotePath>,
    ) -> Result<Vec<RemotePath>, DownloadError>;

-    /// Lists all files in directory "recursively"
-    /// (not really recursively, because AWS has a flat namespace)
-    /// Note: This is subtely different than list_prefixes,
-    /// because it is for listing files instead of listing
-    /// names sharing common prefixes.
-    /// For example,
-    /// list_files("foo/bar") = ["foo/bar/cat123.txt",
-    /// "foo/bar/cat567.txt", "foo/bar/dog123.txt", "foo/bar/dog456.txt"]
-    /// whereas,
-    /// list_prefixes("foo/bar/") = ["cat", "dog"]
-    /// See `test_real_s3.rs` for more details.
-    async fn list_files(&self, folder: Option<&RemotePath>) -> anyhow::Result<Vec<RemotePath>>;
-
    /// Streams the local file contents into remote into the remote storage entry.
    async fn upload(
        &self,
@@ -195,14 +174,6 @@ impl GenericRemoteStorage {
        }
    }

-    pub async fn list_files(&self, folder: Option<&RemotePath>) -> anyhow::Result<Vec<RemotePath>> {
-        match self {
-            Self::LocalFs(s) => s.list_files(folder).await,
-            Self::AwsS3(s) => s.list_files(folder).await,
-            Self::Unreliable(s) => s.list_files(folder).await,
-        }
-    }
-
    pub async fn upload(
        &self,
        from: impl io::AsyncRead + Unpin + Send + Sync + 'static,
--- a/libs/remote_storage/src/local_fs.rs
+++ b/libs/remote_storage/src/local_fs.rs
@@ -48,14 +48,6 @@ impl LocalFs {
        Ok(Self { storage_root })
    }

-    // mirrors S3Bucket::s3_object_to_relative_path
-    fn local_file_to_relative_path(&self, key: PathBuf) -> RemotePath {
-        let relative_path = key
-            .strip_prefix(&self.storage_root)
-            .expect("relative path must contain storage_root as prefix");
-        RemotePath(relative_path.into())
-    }
-
    async fn read_storage_metadata(
        &self,
        file_path: &Path,
@@ -140,34 +132,6 @@ impl RemoteStorage for LocalFs {
        Ok(prefixes)
    }

-    // recursively lists all files in a directory,
-    // mirroring the `list_files` for `s3_bucket`
-    async fn list_files(&self, folder: Option<&RemotePath>) -> anyhow::Result<Vec<RemotePath>> {
-        let full_path = match folder {
-            Some(folder) => folder.with_base(&self.storage_root),
-            None => self.storage_root.clone(),
-        };
-        let mut files = vec![];
-        let mut directory_queue = vec![full_path.clone()];
-
-        while !directory_queue.is_empty() {
-            let cur_folder = directory_queue
-                .pop()
-                .expect("queue cannot be empty: we just checked");
-            let mut entries = fs::read_dir(cur_folder.clone()).await?;
-            while let Some(entry) = entries.next_entry().await? {
-                let file_name: PathBuf = entry.file_name().into();
-                let full_file_name = cur_folder.clone().join(&file_name);
-                let file_remote_path = self.local_file_to_relative_path(full_file_name.clone());
-                files.push(file_remote_path.clone());
-                if full_file_name.is_dir() {
-                    directory_queue.push(full_file_name);
-                }
-            }
-        }
-        Ok(files)
-    }
-
    async fn upload(
        &self,
        data: impl io::AsyncRead + Unpin + Send + Sync + 'static,
--- a/libs/remote_storage/src/s3_bucket.rs
+++ b/libs/remote_storage/src/s3_bucket.rs
@@ -34,8 +34,6 @@ use crate::{
    Download, DownloadError, RemotePath, RemoteStorage, S3Config, REMOTE_STORAGE_PREFIX_SEPARATOR,
 };

-const MAX_DELETE_OBJECTS_REQUEST_SIZE: usize = 1000;
-
 pub(super) mod metrics {
    use metrics::{register_int_counter_vec, IntCounterVec};
    use once_cell::sync::Lazy;
@@ -347,51 +345,6 @@ impl RemoteStorage for S3Bucket {
        Ok(document_keys)
    }

-    /// See the doc for `RemoteStorage::list_files`
-    async fn list_files(&self, folder: Option<&RemotePath>) -> anyhow::Result<Vec<RemotePath>> {
-        let folder_name = folder
-            .map(|p| self.relative_path_to_s3_object(p))
-            .or_else(|| self.prefix_in_bucket.clone());
-
-        // AWS may need to break the response into several parts
-        let mut continuation_token = None;
-        let mut all_files = vec![];
-        loop {
-            let _guard = self
-                .concurrency_limiter
-                .acquire()
-                .await
-                .context("Concurrency limiter semaphore got closed during S3 list_files")?;
-            metrics::inc_list_objects();
-
-            let response = self
-                .client
-                .list_objects_v2()
-                .bucket(self.bucket_name.clone())
-                .set_prefix(folder_name.clone())
-                .set_continuation_token(continuation_token)
-                .set_max_keys(self.max_keys_per_list_response)
-                .send()
-                .await
-                .map_err(|e| {
-                    metrics::inc_list_objects_fail();
-                    e
-                })
-                .context("Failed to list files in S3 bucket")?;
-
-            for object in response.contents().unwrap_or_default() {
-                let object_path = object.key().expect("response does not contain a key");
-                let remote_path = self.s3_object_to_relative_path(object_path);
-                all_files.push(remote_path);
-            }
-            match response.next_continuation_token {
-                Some(new_token) => continuation_token = Some(new_token),
-                None => break,
-            }
-        }
-        Ok(all_files)
-    }
-
    async fn upload(
        &self,
        from: impl io::AsyncRead + Unpin + Send + Sync + 'static,
@@ -471,33 +424,17 @@ impl RemoteStorage for S3Bucket {
            delete_objects.push(obj_id);
        }

-        for chunk in delete_objects.chunks(MAX_DELETE_OBJECTS_REQUEST_SIZE) {
-            metrics::inc_delete_objects(chunk.len() as u64);
-
-            let resp = self
-                .client
-                .delete_objects()
-                .bucket(self.bucket_name.clone())
-                .delete(Delete::builder().set_objects(Some(chunk.to_vec())).build())
-                .send()
-                .await;
-
-            match resp {
-                Ok(resp) => {
-                    if let Some(errors) = resp.errors {
-                        metrics::inc_delete_objects_fail(errors.len() as u64);
-                        return Err(anyhow::format_err!(
-                            "Failed to delete {} objects",
-                            errors.len()
-                        ));
-                    }
-                }
-                Err(e) => {
-                    metrics::inc_delete_objects_fail(chunk.len() as u64);
-                    return Err(e.into());
-                }
-            }
-        }
+        metrics::inc_delete_objects(paths.len() as u64);
+        self.client
+            .delete_objects()
+            .bucket(self.bucket_name.clone())
+            .delete(Delete::builder().set_objects(Some(delete_objects)).build())
+            .send()
+            .await
+            .map_err(|e| {
+                metrics::inc_delete_objects_fail(paths.len() as u64);
+                e
+            })?;
        Ok(())
    }

--- a/libs/remote_storage/src/simulate_failures.rs
+++ b/libs/remote_storage/src/simulate_failures.rs
@@ -24,7 +24,6 @@ enum RemoteOp {
    Upload(RemotePath),
    Download(RemotePath),
    Delete(RemotePath),
-    DeleteObjects(Vec<RemotePath>),
 }

 impl UnreliableWrapper {
@@ -83,11 +82,6 @@ impl RemoteStorage for UnreliableWrapper {
        self.inner.list_prefixes(prefix).await
    }

-    async fn list_files(&self, folder: Option<&RemotePath>) -> anyhow::Result<Vec<RemotePath>> {
-        self.attempt(RemoteOp::ListPrefixes(folder.cloned()))?;
-        self.inner.list_files(folder).await
-    }
-
    async fn upload(
        &self,
        data: impl tokio::io::AsyncRead + Unpin + Send + Sync + 'static,
@@ -127,18 +121,8 @@ impl RemoteStorage for UnreliableWrapper {
    }

    async fn delete_objects<'a>(&self, paths: &'a [RemotePath]) -> anyhow::Result<()> {
-        self.attempt(RemoteOp::DeleteObjects(paths.to_vec()))?;
-        let mut error_counter = 0;
        for path in paths {
-            if (self.delete(path).await).is_err() {
-                error_counter += 1;
-            }
-        }
-        if error_counter > 0 {
-            return Err(anyhow::anyhow!(
-                "failed to delete {} objects",
-                error_counter
-            ));
+            self.delete(path).await?
        }
        Ok(())
    }
--- a/libs/remote_storage/tests/test_real_s3.rs
+++ b/libs/remote_storage/tests/test_real_s3.rs
@@ -88,58 +88,6 @@ async fn s3_pagination_should_work(ctx: &mut MaybeEnabledS3WithTestBlobs) -> any
    Ok(())
 }

-/// Tests that S3 client can list all files in a folder, even if the response comes paginated and requirees multiple S3 queries.
-/// Uses real S3 and requires [`ENABLE_REAL_S3_REMOTE_STORAGE_ENV_VAR_NAME`] and related S3 cred env vars specified. Test will skip real code and pass if env vars not set.
-/// See `s3_pagination_should_work` for more information.
-///
-/// First, create a set of S3 objects with keys `random_prefix/folder{j}/blob_{i}.txt` in [`upload_s3_data`]
-/// Then performs the following queries:
-///    1. `list_files(None)`. This should return all files `random_prefix/folder{j}/blob_{i}.txt`
-///    2. `list_files("folder1")`.  This  should return all files `random_prefix/folder1/blob_{i}.txt`
-#[test_context(MaybeEnabledS3WithSimpleTestBlobs)]
-#[tokio::test]
-async fn s3_list_files_works(ctx: &mut MaybeEnabledS3WithSimpleTestBlobs) -> anyhow::Result<()> {
-    let ctx = match ctx {
-        MaybeEnabledS3WithSimpleTestBlobs::Enabled(ctx) => ctx,
-        MaybeEnabledS3WithSimpleTestBlobs::Disabled => return Ok(()),
-        MaybeEnabledS3WithSimpleTestBlobs::UploadsFailed(e, _) => {
-            anyhow::bail!("S3 init failed: {e:?}")
-        }
-    };
-    let test_client = Arc::clone(&ctx.enabled.client);
-    let base_prefix =
-        RemotePath::new(Path::new("folder1")).context("common_prefix construction")?;
-    let root_files = test_client
-        .list_files(None)
-        .await
-        .context("client list root files failure")?
-        .into_iter()
-        .collect::<HashSet<_>>();
-    assert_eq!(
-        root_files,
-        ctx.remote_blobs.clone(),
-        "remote storage list_files on root mismatches with the uploads."
-    );
-    let nested_remote_files = test_client
-        .list_files(Some(&base_prefix))
-        .await
-        .context("client list nested files failure")?
-        .into_iter()
-        .collect::<HashSet<_>>();
-    let trim_remote_blobs: HashSet<_> = ctx
-        .remote_blobs
-        .iter()
-        .map(|x| x.get_path().to_str().expect("must be valid name"))
-        .filter(|x| x.starts_with("folder1"))
-        .map(|x| RemotePath::new(Path::new(x)).expect("must be valid name"))
-        .collect();
-    assert_eq!(
-        nested_remote_files, trim_remote_blobs,
-        "remote storage list_files on subdirrectory mismatches with the uploads."
-    );
-    Ok(())
-}
-
 #[test_context(MaybeEnabledS3)]
 #[tokio::test]
 async fn s3_delete_non_exising_works(ctx: &mut MaybeEnabledS3) -> anyhow::Result<()> {
@@ -173,15 +121,10 @@ async fn s3_delete_objects_works(ctx: &mut MaybeEnabledS3) -> anyhow::Result<()>
    let path2 = RemotePath::new(&PathBuf::from(format!("{}/path2", ctx.base_prefix,)))
        .with_context(|| "RemotePath conversion")?;

-    let path3 = RemotePath::new(&PathBuf::from(format!("{}/path3", ctx.base_prefix,)))
-        .with_context(|| "RemotePath conversion")?;
-
    let data1 = "remote blob data1".as_bytes();
    let data1_len = data1.len();
    let data2 = "remote blob data2".as_bytes();
    let data2_len = data2.len();
-    let data3 = "remote blob data3".as_bytes();
-    let data3_len = data3.len();
    ctx.client
        .upload(std::io::Cursor::new(data1), data1_len, &path1, None)
        .await?;
@@ -190,18 +133,8 @@ async fn s3_delete_objects_works(ctx: &mut MaybeEnabledS3) -> anyhow::Result<()>
        .upload(std::io::Cursor::new(data2), data2_len, &path2, None)
        .await?;

-    ctx.client
-        .upload(std::io::Cursor::new(data3), data3_len, &path3, None)
-        .await?;
-
    ctx.client.delete_objects(&[path1, path2]).await?;

-    let prefixes = ctx.client.list_prefixes(None).await?;
-
-    assert_eq!(prefixes.len(), 1);
-
-    ctx.client.delete_objects(&[path3]).await?;
-
    Ok(())
 }

@@ -315,66 +248,6 @@ impl AsyncTestContext for MaybeEnabledS3WithTestBlobs {
    }
 }

-// NOTE: the setups for the list_prefixes test and the list_files test are very similar
-// However, they are not idential. The list_prefixes function is concerned with listing prefixes,
-// whereas the list_files function is concerned with listing files.
-// See `RemoteStorage::list_files` documentation for more details
-enum MaybeEnabledS3WithSimpleTestBlobs {
-    Enabled(S3WithSimpleTestBlobs),
-    Disabled,
-    UploadsFailed(anyhow::Error, S3WithSimpleTestBlobs),
-}
-struct S3WithSimpleTestBlobs {
-    enabled: EnabledS3,
-    remote_blobs: HashSet<RemotePath>,
-}
-
-#[async_trait::async_trait]
-impl AsyncTestContext for MaybeEnabledS3WithSimpleTestBlobs {
-    async fn setup() -> Self {
-        ensure_logging_ready();
-        if env::var(ENABLE_REAL_S3_REMOTE_STORAGE_ENV_VAR_NAME).is_err() {
-            info!(
-                "`{}` env variable is not set, skipping the test",
-                ENABLE_REAL_S3_REMOTE_STORAGE_ENV_VAR_NAME
-            );
-            return Self::Disabled;
-        }
-
-        let max_keys_in_list_response = 10;
-        let upload_tasks_count = 1 + (2 * usize::try_from(max_keys_in_list_response).unwrap());
-
-        let enabled = EnabledS3::setup(Some(max_keys_in_list_response)).await;
-
-        match upload_simple_s3_data(&enabled.client, upload_tasks_count).await {
-            ControlFlow::Continue(uploads) => {
-                info!("Remote objects created successfully");
-
-                Self::Enabled(S3WithSimpleTestBlobs {
-                    enabled,
-                    remote_blobs: uploads,
-                })
-            }
-            ControlFlow::Break(uploads) => Self::UploadsFailed(
-                anyhow::anyhow!("One or multiple blobs failed to upload to S3"),
-                S3WithSimpleTestBlobs {
-                    enabled,
-                    remote_blobs: uploads,
-                },
-            ),
-        }
-    }
-
-    async fn teardown(self) {
-        match self {
-            Self::Disabled => {}
-            Self::Enabled(ctx) | Self::UploadsFailed(_, ctx) => {
-                cleanup(&ctx.enabled.client, ctx.remote_blobs).await;
-            }
-        }
-    }
-}
-
 fn create_s3_client(
    max_keys_per_list_response: Option<i32>,
 ) -> anyhow::Result<Arc<GenericRemoteStorage>> {
@@ -385,7 +258,7 @@ fn create_s3_client(
    let random_prefix_part = std::time::SystemTime::now()
        .duration_since(UNIX_EPOCH)
        .context("random s3 test prefix part calculation")?
-        .as_nanos();
+        .as_millis();
    let remote_storage_config = RemoteStorageConfig {
        max_concurrent_syncs: NonZeroUsize::new(100).unwrap(),
        max_sync_errors: NonZeroU32::new(5).unwrap(),
@@ -491,52 +364,3 @@ async fn cleanup(client: &Arc<GenericRemoteStorage>, objects_to_delete: HashSet<
        }
    }
 }
-
-// Uploads files `folder{j}/blob{i}.txt`. See test description for more details.
-async fn upload_simple_s3_data(
-    client: &Arc<GenericRemoteStorage>,
-    upload_tasks_count: usize,
-) -> ControlFlow<HashSet<RemotePath>, HashSet<RemotePath>> {
-    info!("Creating {upload_tasks_count} S3 files");
-    let mut upload_tasks = JoinSet::new();
-    for i in 1..upload_tasks_count + 1 {
-        let task_client = Arc::clone(client);
-        upload_tasks.spawn(async move {
-            let blob_path = PathBuf::from(format!("folder{}/blob_{}.txt", i / 7, i));
-            let blob_path = RemotePath::new(&blob_path)
-                .with_context(|| format!("{blob_path:?} to RemotePath conversion"))?;
-            debug!("Creating remote item {i} at path {blob_path:?}");
-
-            let data = format!("remote blob data {i}").into_bytes();
-            let data_len = data.len();
-            task_client
-                .upload(std::io::Cursor::new(data), data_len, &blob_path, None)
-                .await?;
-
-            Ok::<_, anyhow::Error>(blob_path)
-        });
-    }
-
-    let mut upload_tasks_failed = false;
-    let mut uploaded_blobs = HashSet::with_capacity(upload_tasks_count);
-    while let Some(task_run_result) = upload_tasks.join_next().await {
-        match task_run_result
-            .context("task join failed")
-            .and_then(|task_result| task_result.context("upload task failed"))
-        {
-            Ok(upload_path) => {
-                uploaded_blobs.insert(upload_path);
-            }
-            Err(e) => {
-                error!("Upload task failed: {e:?}");
-                upload_tasks_failed = true;
-            }
-        }
-    }
-
-    if upload_tasks_failed {
-        ControlFlow::Break(uploaded_blobs)
-    } else {
-        ControlFlow::Continue(uploaded_blobs)
-    }
-}
--- a/libs/utils/src/http/error.rs
+++ b/libs/utils/src/http/error.rs
@@ -1,6 +1,5 @@
 use hyper::{header, Body, Response, StatusCode};
 use serde::{Deserialize, Serialize};
-use std::error::Error as StdError;
 use thiserror::Error;
 use tracing::error;

@@ -16,7 +15,7 @@ pub enum ApiError {
    Unauthorized(String),

    #[error("NotFound: {0}")]
-    NotFound(Box<dyn StdError + Send + Sync + 'static>),
+    NotFound(anyhow::Error),

    #[error("Conflict: {0}")]
    Conflict(String),
--- a/libs/utils/src/measured_stream.rs
+++ b/libs/utils/src/measured_stream.rs
@@ -1,4 +1,5 @@
 use pin_project_lite::pin_project;
+use std::io::Read;
 use std::pin::Pin;
 use std::{io, task};
 use tokio::io::{AsyncRead, AsyncWrite, ReadBuf};
@@ -75,3 +76,34 @@ impl<S: AsyncWrite + Unpin, R, W: FnMut(usize)> AsyncWrite for MeasuredStream<S,
        self.project().stream.poll_shutdown(context)
    }
 }
+
+/// Wrapper for a reader that counts bytes read.
+///
+/// Similar to MeasuredStream but it's one way and it's sync
+pub struct MeasuredReader<R: Read> {
+    inner: R,
+    byte_count: usize,
+}
+
+impl<R: Read> MeasuredReader<R> {
+    pub fn new(reader: R) -> Self {
+        Self {
+            inner: reader,
+            byte_count: 0,
+        }
+    }
+
+    pub fn get_byte_count(&self) -> usize {
+        self.byte_count
+    }
+}
+
+impl<R: Read> Read for MeasuredReader<R> {
+    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
+        let result = self.inner.read(buf);
+        if let Ok(n_bytes) = result {
+            self.byte_count += n_bytes
+        }
+        result
+    }
+}
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -12,6 +12,7 @@ testing = ["fail/failpoints"]

 [dependencies]
 anyhow.workspace = true
+flate2.workspace = true
 async-stream.workspace = true
 async-trait.workspace = true
 byteorder.workspace = true
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -495,50 +495,50 @@ fn start_pageserver(
                Ok(())
            },
        );
-    }

-    if let Some(metric_collection_endpoint) = &conf.metric_collection_endpoint {
-        let background_jobs_barrier = background_jobs_barrier;
-        let metrics_ctx = RequestContext::todo_child(
-            TaskKind::MetricsCollection,
-            // This task itself shouldn't download anything.
-            // The actual size calculation does need downloads, and
-            // creates a child context with the right DownloadBehavior.
-            DownloadBehavior::Error,
-        );
-        task_mgr::spawn(
-            crate::BACKGROUND_RUNTIME.handle(),
-            TaskKind::MetricsCollection,
-            None,
-            None,
-            "consumption metrics collection",
-            true,
-            async move {
-                // first wait until background jobs are cleared to launch.
-                //
-                // this is because we only process active tenants and timelines, and the
-                // Timeline::get_current_logical_size will spawn the logical size calculation,
-                // which will not be rate-limited.
-                let cancel = task_mgr::shutdown_token();
+        if let Some(metric_collection_endpoint) = &conf.metric_collection_endpoint {
+            let background_jobs_barrier = background_jobs_barrier;
+            let metrics_ctx = RequestContext::todo_child(
+                TaskKind::MetricsCollection,
+                // This task itself shouldn't download anything.
+                // The actual size calculation does need downloads, and
+                // creates a child context with the right DownloadBehavior.
+                DownloadBehavior::Error,
+            );
+            task_mgr::spawn(
+                MGMT_REQUEST_RUNTIME.handle(),
+                TaskKind::MetricsCollection,
+                None,
+                None,
+                "consumption metrics collection",
+                true,
+                async move {
+                    // first wait until background jobs are cleared to launch.
+                    //
+                    // this is because we only process active tenants and timelines, and the
+                    // Timeline::get_current_logical_size will spawn the logical size calculation,
+                    // which will not be rate-limited.
+                    let cancel = task_mgr::shutdown_token();

-                tokio::select! {
-                    _ = cancel.cancelled() => { return Ok(()); },
-                    _ = background_jobs_barrier.wait() => {}
-                };
+                    tokio::select! {
+                        _ = cancel.cancelled() => { return Ok(()); },
+                        _ = background_jobs_barrier.wait() => {}
+                    };

-                pageserver::consumption_metrics::collect_metrics(
-                    metric_collection_endpoint,
-                    conf.metric_collection_interval,
-                    conf.cached_metric_collection_interval,
-                    conf.synthetic_size_calculation_interval,
-                    conf.id,
-                    metrics_ctx,
-                )
-                .instrument(info_span!("metrics_collection"))
-                .await?;
-                Ok(())
-            },
-        );
+                    pageserver::consumption_metrics::collect_metrics(
+                        metric_collection_endpoint,
+                        conf.metric_collection_interval,
+                        conf.cached_metric_collection_interval,
+                        conf.synthetic_size_calculation_interval,
+                        conf.id,
+                        metrics_ctx,
+                    )
+                    .instrument(info_span!("metrics_collection"))
+                    .await?;
+                    Ok(())
+                },
+            );
+        }
    }

    // Spawn a task to listen for libpq connections. It will spawn further tasks
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -96,12 +96,12 @@ pub mod defaults {

 #background_task_maximum_delay = '{DEFAULT_BACKGROUND_TASK_MAXIMUM_DELAY}'

-[tenant_config]
+# [tenant_config]
 #checkpoint_distance = {DEFAULT_CHECKPOINT_DISTANCE} # in bytes
 #checkpoint_timeout = {DEFAULT_CHECKPOINT_TIMEOUT}
 #compaction_target_size = {DEFAULT_COMPACTION_TARGET_SIZE} # in bytes
 #compaction_period = '{DEFAULT_COMPACTION_PERIOD}'
-#compaction_threshold = {DEFAULT_COMPACTION_THRESHOLD}
+#compaction_threshold = '{DEFAULT_COMPACTION_THRESHOLD}'

 #gc_period = '{DEFAULT_GC_PERIOD}'
 #gc_horizon = {DEFAULT_GC_HORIZON}
@@ -111,8 +111,7 @@ pub mod defaults {
 #min_resident_size_override = .. # in bytes
 #evictions_low_residence_duration_metric_threshold = '{DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD}'
 #gc_feedback = false
-
-[remote_storage]
+# [remote_storage]

 "###
    );
--- a/pageserver/src/disk_usage_eviction_task.rs
+++ b/pageserver/src/disk_usage_eviction_task.rs
@@ -516,7 +516,7 @@ async fn collect_eviction_candidates(
            if !tl.is_active() {
                continue;
            }
-            let info = tl.get_local_layers_for_disk_usage_eviction().await;
+            let info = tl.get_local_layers_for_disk_usage_eviction();
            debug!(tenant_id=%tl.tenant_id, timeline_id=%tl.timeline_id, "timeline resident layers count: {}", info.resident_layers.len());
            tenant_candidates.extend(
                info.resident_layers
--- a/pageserver/src/http/openapi_spec.yml
+++ b/pageserver/src/http/openapi_spec.yml
@@ -186,8 +186,10 @@ paths:
              schema:
                $ref: "#/components/schemas/Error"
    delete:
-      description: "Attempts to delete specified timeline. 500 and 409 errors should be retried"
+      description: "Attempts to delete specified timeline. On 500 errors should be retried"
      responses:
+        "200":
+          description: Ok
        "400":
          description: Error when no tenant id found in path or no timeline id
          content:
@@ -212,12 +214,6 @@ paths:
            application/json:
              schema:
                $ref: "#/components/schemas/NotFoundError"
-        "409":
-          description: Deletion is already in progress, continue polling
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/ConflictError"
        "412":
          description: Tenant is missing, or timeline has children
          content:
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -142,7 +142,7 @@ impl From<TenantMapInsertError> for ApiError {
 impl From<TenantStateError> for ApiError {
    fn from(tse: TenantStateError) -> ApiError {
        match tse {
-            TenantStateError::NotFound(tid) => ApiError::NotFound(anyhow!("tenant {}", tid).into()),
+            TenantStateError::NotFound(tid) => ApiError::NotFound(anyhow!("tenant {}", tid)),
            _ => ApiError::InternalServerError(anyhow::Error::new(tse)),
        }
    }
@@ -151,7 +151,7 @@ impl From<TenantStateError> for ApiError {
 impl From<GetTenantError> for ApiError {
    fn from(tse: GetTenantError) -> ApiError {
        match tse {
-            GetTenantError::NotFound(tid) => ApiError::NotFound(anyhow!("tenant {}", tid).into()),
+            GetTenantError::NotFound(tid) => ApiError::NotFound(anyhow!("tenant {}", tid)),
            e @ GetTenantError::NotActive(_) => {
                // Why is this not `ApiError::NotFound`?
                // Because we must be careful to never return 404 for a tenant if it does
@@ -169,7 +169,7 @@ impl From<SetNewTenantConfigError> for ApiError {
    fn from(e: SetNewTenantConfigError) -> ApiError {
        match e {
            SetNewTenantConfigError::GetTenant(tid) => {
-                ApiError::NotFound(anyhow!("tenant {}", tid).into())
+                ApiError::NotFound(anyhow!("tenant {}", tid))
            }
            e @ SetNewTenantConfigError::Persist(_) => {
                ApiError::InternalServerError(anyhow::Error::new(e))
@@ -182,12 +182,11 @@ impl From<crate::tenant::DeleteTimelineError> for ApiError {
    fn from(value: crate::tenant::DeleteTimelineError) -> Self {
        use crate::tenant::DeleteTimelineError::*;
        match value {
-            NotFound => ApiError::NotFound(anyhow::anyhow!("timeline not found").into()),
+            NotFound => ApiError::NotFound(anyhow::anyhow!("timeline not found")),
            HasChildren(children) => ApiError::PreconditionFailed(
                format!("Cannot delete timeline which has child timelines: {children:?}")
                    .into_boxed_str(),
            ),
-            a @ AlreadyInProgress => ApiError::Conflict(a.to_string()),
            Other(e) => ApiError::InternalServerError(e),
        }
    }
@@ -216,7 +215,7 @@ async fn build_timeline_info(
 ) -> anyhow::Result<TimelineInfo> {
    crate::tenant::debug_assert_current_span_has_tenant_and_timeline_id();

-    let mut info = build_timeline_info_common(timeline, ctx).await?;
+    let mut info = build_timeline_info_common(timeline, ctx)?;
    if include_non_incremental_logical_size {
        // XXX we should be using spawn_ondemand_logical_size_calculation here.
        // Otherwise, if someone deletes the timeline / detaches the tenant while
@@ -234,7 +233,7 @@ async fn build_timeline_info(
    Ok(info)
 }

-async fn build_timeline_info_common(
+fn build_timeline_info_common(
    timeline: &Arc<Timeline>,
    ctx: &RequestContext,
 ) -> anyhow::Result<TimelineInfo> {
@@ -265,7 +264,7 @@ async fn build_timeline_info_common(
            None
        }
    };
-    let current_physical_size = Some(timeline.layer_size_sum().await);
+    let current_physical_size = Some(timeline.layer_size_sum());
    let state = timeline.current_state();
    let remote_consistent_lsn = timeline.get_remote_consistent_lsn().unwrap_or(Lsn(0));

@@ -331,7 +330,6 @@ async fn timeline_create_handler(
            Ok(Some(new_timeline)) => {
                // Created. Construct a TimelineInfo for it.
                let timeline_info = build_timeline_info_common(&new_timeline, &ctx)
-                    .await
                    .map_err(ApiError::InternalServerError)?;
                json_response(StatusCode::CREATED, timeline_info)
            }
@@ -398,7 +396,7 @@ async fn timeline_detail_handler(

        let timeline = tenant
            .get_timeline(timeline_id, false)
-            .map_err(|e| ApiError::NotFound(e.into()))?;
+            .map_err(ApiError::NotFound)?;

        let timeline_info = build_timeline_info(
            &timeline,
@@ -593,7 +591,7 @@ async fn tenant_status(
        // Calculate total physical size of all timelines
        let mut current_physical_size = 0;
        for timeline in tenant.list_timelines().iter() {
-            current_physical_size += timeline.layer_size_sum().await;
+            current_physical_size += timeline.layer_size_sum();
        }

        let state = tenant.current_state();
@@ -703,7 +701,7 @@ async fn layer_map_info_handler(
    check_permission(&request, Some(tenant_id))?;

    let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
-    let layer_map_info = timeline.layer_map_info(reset).await;
+    let layer_map_info = timeline.layer_map_info(reset);

    json_response(StatusCode::OK, layer_map_info)
 }
@@ -1062,7 +1060,7 @@ async fn timeline_download_remote_layers_handler_get(
    let info = timeline
        .get_download_all_remote_layers_task_info()
        .context("task never started since last pageserver process start")
-        .map_err(|e| ApiError::NotFound(e.into()))?;
+        .map_err(ApiError::NotFound)?;
    json_response(StatusCode::OK, info)
 }

@@ -1073,7 +1071,7 @@ async fn active_timeline_of_active_tenant(
    let tenant = mgr::get_tenant(tenant_id, true).await?;
    tenant
        .get_timeline(timeline_id, true)
-        .map_err(|e| ApiError::NotFound(e.into()))
+        .map_err(ApiError::NotFound)
 }

 async fn always_panic_handler(
@@ -1129,6 +1127,8 @@ async fn disk_usage_eviction_run(
        freed_bytes: 0,
    };

+    use crate::task_mgr::MGMT_REQUEST_RUNTIME;
+
    let (tx, rx) = tokio::sync::oneshot::channel();

    let state = get_state(&r);
@@ -1146,7 +1146,7 @@ async fn disk_usage_eviction_run(
    let _g = cancel.drop_guard();

    crate::task_mgr::spawn(
-        crate::task_mgr::BACKGROUND_RUNTIME.handle(),
+        MGMT_REQUEST_RUNTIME.handle(),
        TaskKind::DiskUsageEviction,
        None,
        None,
--- a/pageserver/src/import_datadir.rs
+++ b/pageserver/src/import_datadir.rs
@@ -75,12 +75,12 @@ pub async fn import_timeline_from_postgres_datadir(
            {
                pg_control = Some(control_file);
            }
-            modification.flush().await?;
+            modification.flush()?;
        }
    }

    // We're done importing all the data files.
-    modification.commit().await?;
+    modification.commit()?;

    // We expect the Postgres server to be shut down cleanly.
    let pg_control = pg_control.context("pg_control file not found")?;
@@ -148,17 +148,17 @@ async fn import_rel(
    // because there is no guarantee about the order in which we are processing segments.
    // ignore "relation already exists" error
    //
-    // FIXME: Keep track of which relations we've already created?
+    // FIXME: use proper error type for this, instead of parsing the error message.
+    // Or better yet, keep track of which relations we've already created
    // https://github.com/neondatabase/neon/issues/3309
    if let Err(e) = modification
        .put_rel_creation(rel, nblocks as u32, ctx)
        .await
    {
-        match e {
-            RelationError::AlreadyExists => {
-                debug!("Relation {} already exist. We must be extending it.", rel)
-            }
-            _ => return Err(e.into()),
+        if e.to_string().contains("already exists") {
+            debug!("relation {} already exists. we must be extending it", rel);
+        } else {
+            return Err(e);
        }
    }

@@ -359,7 +359,7 @@ pub async fn import_basebackup_from_tar(
                    // We found the pg_control file.
                    pg_control = Some(res);
                }
-                modification.flush().await?;
+                modification.flush()?;
            }
            tokio_tar::EntryType::Directory => {
                debug!("directory {:?}", file_path);
@@ -377,7 +377,7 @@ pub async fn import_basebackup_from_tar(
    // sanity check: ensure that pg_control is loaded
    let _pg_control = pg_control.context("pg_control file not found")?;

-    modification.commit().await?;
+    modification.commit()?;
    Ok(())
 }

@@ -594,7 +594,7 @@ async fn import_file(
        // zenith.signal is not necessarily the last file, that we handle
        // but it is ok to call `finish_write()`, because final `modification.commit()`
        // will update lsn once more to the final one.
-        let writer = modification.tline.writer().await;
+        let writer = modification.tline.writer();
        writer.finish_write(prev_lsn);

        debug!("imported zenith signal {}", prev_lsn);
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -1,4 +1,4 @@
-use metrics::metric_vec_duration::DurationResultObserver;
+use metrics::core::{AtomicU64, GenericCounter};
 use metrics::{
    register_counter_vec, register_histogram, register_histogram_vec, register_int_counter,
    register_int_counter_vec, register_int_gauge, register_int_gauge_vec, register_uint_gauge_vec,
@@ -95,19 +95,21 @@ static READ_NUM_FS_LAYERS: Lazy<HistogramVec> = Lazy::new(|| {
 });

 // Metrics collected on operations on the storage repository.
-pub static RECONSTRUCT_TIME: Lazy<Histogram> = Lazy::new(|| {
-    register_histogram!(
+static RECONSTRUCT_TIME: Lazy<HistogramVec> = Lazy::new(|| {
+    register_histogram_vec!(
        "pageserver_getpage_reconstruct_seconds",
-        "Time spent in reconstruct_value (reconstruct a page from deltas)",
+        "Time spent in reconstruct_value",
+        &["tenant_id", "timeline_id"],
        CRITICAL_OP_BUCKETS.into(),
    )
    .expect("failed to define a metric")
 });

-pub static MATERIALIZED_PAGE_CACHE_HIT_DIRECT: Lazy<IntCounter> = Lazy::new(|| {
-    register_int_counter!(
+static MATERIALIZED_PAGE_CACHE_HIT_DIRECT: Lazy<IntCounterVec> = Lazy::new(|| {
+    register_int_counter_vec!(
        "pageserver_materialized_cache_hits_direct_total",
        "Number of cache hits from materialized page cache without redo",
+        &["tenant_id", "timeline_id"]
    )
    .expect("failed to define a metric")
 });
@@ -122,10 +124,11 @@ static GET_RECONSTRUCT_DATA_TIME: Lazy<HistogramVec> = Lazy::new(|| {
    .expect("failed to define a metric")
 });

-pub static MATERIALIZED_PAGE_CACHE_HIT: Lazy<IntCounter> = Lazy::new(|| {
-    register_int_counter!(
+static MATERIALIZED_PAGE_CACHE_HIT: Lazy<IntCounterVec> = Lazy::new(|| {
+    register_int_counter_vec!(
        "pageserver_materialized_cache_hits_total",
        "Number of cache hits from materialized page cache",
+        &["tenant_id", "timeline_id"]
    )
    .expect("failed to define a metric")
 });
@@ -425,27 +428,6 @@ pub static SMGR_QUERY_TIME: Lazy<HistogramVec> = Lazy::new(|| {
    .expect("failed to define a metric")
 });

-pub struct BasebackupQueryTime(HistogramVec);
-pub static BASEBACKUP_QUERY_TIME: Lazy<BasebackupQueryTime> = Lazy::new(|| {
-    BasebackupQueryTime({
-        register_histogram_vec!(
-            "pageserver_basebackup_query_seconds",
-            "Histogram of basebackup queries durations, by result type",
-            &["result"],
-            CRITICAL_OP_BUCKETS.into(),
-        )
-        .expect("failed to define a metric")
-    })
-});
-
-impl DurationResultObserver for BasebackupQueryTime {
-    fn observe_result<T, E>(&self, res: &Result<T, E>, duration: std::time::Duration) {
-        let label_value = if res.is_ok() { "ok" } else { "error" };
-        let metric = self.0.get_metric_with_label_values(&[label_value]).unwrap();
-        metric.observe(duration.as_secs_f64());
-    }
-}
-
 pub static LIVE_CONNECTIONS_COUNT: Lazy<IntGaugeVec> = Lazy::new(|| {
    register_int_gauge_vec!(
        "pageserver_live_connections",
@@ -770,7 +752,10 @@ impl StorageTimeMetrics {
 pub struct TimelineMetrics {
    tenant_id: String,
    timeline_id: String,
+    pub reconstruct_time_histo: Histogram,
    pub get_reconstruct_data_time_histo: Histogram,
+    pub materialized_page_cache_hit_counter: GenericCounter<AtomicU64>,
+    pub materialized_page_cache_hit_upon_request_counter: GenericCounter<AtomicU64>,
    pub flush_time_histo: StorageTimeMetrics,
    pub compact_time_histo: StorageTimeMetrics,
    pub create_images_time_histo: StorageTimeMetrics,
@@ -798,9 +783,15 @@ impl TimelineMetrics {
    ) -> Self {
        let tenant_id = tenant_id.to_string();
        let timeline_id = timeline_id.to_string();
+        let reconstruct_time_histo = RECONSTRUCT_TIME
+            .get_metric_with_label_values(&[&tenant_id, &timeline_id])
+            .unwrap();
        let get_reconstruct_data_time_histo = GET_RECONSTRUCT_DATA_TIME
            .get_metric_with_label_values(&[&tenant_id, &timeline_id])
            .unwrap();
+        let materialized_page_cache_hit_counter = MATERIALIZED_PAGE_CACHE_HIT
+            .get_metric_with_label_values(&[&tenant_id, &timeline_id])
+            .unwrap();
        let flush_time_histo =
            StorageTimeMetrics::new(StorageTimeOperation::LayerFlush, &tenant_id, &timeline_id);
        let compact_time_histo =
@@ -842,13 +833,19 @@ impl TimelineMetrics {
        let read_num_fs_layers = READ_NUM_FS_LAYERS
            .get_metric_with_label_values(&[&tenant_id, &timeline_id])
            .unwrap();
+        let materialized_page_cache_hit_upon_request_counter = MATERIALIZED_PAGE_CACHE_HIT_DIRECT
+            .get_metric_with_label_values(&[&tenant_id, &timeline_id])
+            .unwrap();
        let evictions_with_low_residence_duration =
            evictions_with_low_residence_duration_builder.build(&tenant_id, &timeline_id);

        TimelineMetrics {
            tenant_id,
            timeline_id,
+            reconstruct_time_histo,
            get_reconstruct_data_time_histo,
+            materialized_page_cache_hit_counter,
+            materialized_page_cache_hit_upon_request_counter,
            flush_time_histo,
            compact_time_histo,
            create_images_time_histo,
@@ -875,7 +872,10 @@ impl Drop for TimelineMetrics {
    fn drop(&mut self) {
        let tenant_id = &self.tenant_id;
        let timeline_id = &self.timeline_id;
+        let _ = RECONSTRUCT_TIME.remove_label_values(&[tenant_id, timeline_id]);
        let _ = GET_RECONSTRUCT_DATA_TIME.remove_label_values(&[tenant_id, timeline_id]);
+        let _ = MATERIALIZED_PAGE_CACHE_HIT.remove_label_values(&[tenant_id, timeline_id]);
+        let _ = MATERIALIZED_PAGE_CACHE_HIT_DIRECT.remove_label_values(&[tenant_id, timeline_id]);
        let _ = LAST_RECORD_LSN.remove_label_values(&[tenant_id, timeline_id]);
        let _ = WAIT_LSN_TIME.remove_label_values(&[tenant_id, timeline_id]);
        let _ = RESIDENT_PHYSICAL_SIZE.remove_label_values(&[tenant_id, timeline_id]);
@@ -1319,8 +1319,4 @@ pub fn preinitialize_metrics() {

    // Same as above for this metric, but, it's a Vec-type metric for which we don't know all the labels.
    BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT.reset();
-
-    // Python tests need these.
-    MATERIALIZED_PAGE_CACHE_HIT_DIRECT.get();
-    MATERIALIZED_PAGE_CACHE_HIT.get();
 }
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -24,14 +24,14 @@ use postgres_backend::{self, is_expected_io_error, AuthType, PostgresBackend, Qu
 use pq_proto::framed::ConnectionError;
 use pq_proto::FeStartupPacket;
 use pq_proto::{BeMessage, FeMessage, RowDescriptor};
-use std::io;
+use std::io::{self, Write};
 use std::net::TcpListener;
 use std::pin::pin;
 use std::str;
 use std::str::FromStr;
 use std::sync::Arc;
 use std::time::Duration;
-use tokio::io::{AsyncRead, AsyncWrite};
+use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt};
 use tokio_util::io::StreamReader;
 use tracing::*;
 use utils::id::ConnectionId;
@@ -390,9 +390,7 @@ impl PageServerHandler {
        };

        // Check that the timeline exists
-        let timeline = tenant
-            .get_timeline(timeline_id, true)
-            .map_err(|e| anyhow::anyhow!(e))?;
+        let timeline = tenant.get_timeline(timeline_id, true)?;

        // switch client to COPYBOTH
        pgb.write_message_noflush(&BeMessage::CopyBothResponse)?;
@@ -775,8 +773,9 @@ impl PageServerHandler {
        // Send a tarball of the latest layer on the timeline
        {
            let mut writer = pgb.copyout_writer();
+            let mut raw_tar = Vec::new();
            basebackup::send_basebackup_tarball(
-                &mut writer,
+                &mut raw_tar,
                &timeline,
                lsn,
                prev_lsn,
@@ -784,6 +783,11 @@ impl PageServerHandler {
                &ctx,
            )
            .await?;
+            let mut encoder =
+                flate2::write::GzEncoder::new(Vec::new(), flate2::Compression::default());
+            encoder.write_all(&raw_tar)?;
+            let compressed_tar = encoder.finish()?;
+            writer.write(&compressed_tar).await?;
        }

        pgb.write_message_noflush(&BeMessage::CopyDone)?;
@@ -913,24 +917,10 @@ where
                None
            };

-            metrics::metric_vec_duration::observe_async_block_duration_by_result(
-                &*crate::metrics::BASEBACKUP_QUERY_TIME,
-                async move {
-                    self.handle_basebackup_request(
-                        pgb,
-                        tenant_id,
-                        timeline_id,
-                        lsn,
-                        None,
-                        false,
-                        ctx,
-                    )
-                    .await?;
-                    pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
-                    anyhow::Ok(())
-                },
-            )
-            .await?;
+            // Check that the timeline exists
+            self.handle_basebackup_request(pgb, tenant_id, timeline_id, lsn, None, false, ctx)
+                .await?;
+            pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
        }
        // return pair of prev_lsn and last_lsn
        else if query_string.starts_with("get_last_record_rlsn ") {
@@ -1246,6 +1236,6 @@ async fn get_active_tenant_timeline(
        .map_err(GetActiveTimelineError::Tenant)?;
    let timeline = tenant
        .get_timeline(timeline_id, true)
-        .map_err(|e| GetActiveTimelineError::Timeline(anyhow::anyhow!(e)))?;
+        .map_err(GetActiveTimelineError::Timeline)?;
    Ok(timeline)
 }
--- a/pageserver/src/pgdatadir_mapping.rs
+++ b/pageserver/src/pgdatadir_mapping.rs
@@ -43,16 +43,6 @@ pub enum CalculateLogicalSizeError {
    Other(#[from] anyhow::Error),
 }

-#[derive(Debug, thiserror::Error)]
-pub enum RelationError {
-    #[error("Relation Already Exists")]
-    AlreadyExists,
-    #[error("invalid relnode")]
-    InvalidRelnode,
-    #[error(transparent)]
-    Other(#[from] anyhow::Error),
-}
-
 ///
 /// This impl provides all the functionality to store PostgreSQL relations, SLRUs,
 /// and other special kinds of files, in a versioned key-value store. The
@@ -111,9 +101,9 @@ impl Timeline {
        ctx: &RequestContext,
    ) -> Result<Bytes, PageReconstructError> {
        if tag.relnode == 0 {
-            return Err(PageReconstructError::Other(
-                RelationError::InvalidRelnode.into(),
-            ));
+            return Err(PageReconstructError::Other(anyhow::anyhow!(
+                "invalid relnode"
+            )));
        }

        let nblocks = self.get_rel_size(tag, lsn, latest, ctx).await?;
@@ -158,9 +148,9 @@ impl Timeline {
        ctx: &RequestContext,
    ) -> Result<BlockNumber, PageReconstructError> {
        if tag.relnode == 0 {
-            return Err(PageReconstructError::Other(
-                RelationError::InvalidRelnode.into(),
-            ));
+            return Err(PageReconstructError::Other(anyhow::anyhow!(
+                "invalid relnode"
+            )));
        }

        if let Some(nblocks) = self.get_cached_rel_size(&tag, lsn) {
@@ -203,9 +193,9 @@ impl Timeline {
        ctx: &RequestContext,
    ) -> Result<bool, PageReconstructError> {
        if tag.relnode == 0 {
-            return Err(PageReconstructError::Other(
-                RelationError::InvalidRelnode.into(),
-            ));
+            return Err(PageReconstructError::Other(anyhow::anyhow!(
+                "invalid relnode"
+            )));
        }

        // first try to lookup relation in cache
@@ -734,7 +724,7 @@ impl<'a> DatadirModification<'a> {
        blknum: BlockNumber,
        rec: NeonWalRecord,
    ) -> anyhow::Result<()> {
-        anyhow::ensure!(rel.relnode != 0, RelationError::InvalidRelnode);
+        anyhow::ensure!(rel.relnode != 0, "invalid relnode");
        self.put(rel_block_to_key(rel, blknum), Value::WalRecord(rec));
        Ok(())
    }
@@ -761,7 +751,7 @@ impl<'a> DatadirModification<'a> {
        blknum: BlockNumber,
        img: Bytes,
    ) -> anyhow::Result<()> {
-        anyhow::ensure!(rel.relnode != 0, RelationError::InvalidRelnode);
+        anyhow::ensure!(rel.relnode != 0, "invalid relnode");
        self.put(rel_block_to_key(rel, blknum), Value::Image(img));
        Ok(())
    }
@@ -885,38 +875,32 @@ impl<'a> DatadirModification<'a> {
        rel: RelTag,
        nblocks: BlockNumber,
        ctx: &RequestContext,
-    ) -> Result<(), RelationError> {
-        if rel.relnode == 0 {
-            return Err(RelationError::AlreadyExists);
-        }
+    ) -> anyhow::Result<()> {
+        anyhow::ensure!(rel.relnode != 0, "invalid relnode");
        // It's possible that this is the first rel for this db in this
        // tablespace.  Create the reldir entry for it if so.
-        let mut dbdir = DbDirectory::des(&self.get(DBDIR_KEY, ctx).await.context("read db")?)
-            .context("deserialize db")?;
+        let mut dbdir = DbDirectory::des(&self.get(DBDIR_KEY, ctx).await?)?;
        let rel_dir_key = rel_dir_to_key(rel.spcnode, rel.dbnode);
        let mut rel_dir = if dbdir.dbdirs.get(&(rel.spcnode, rel.dbnode)).is_none() {
            // Didn't exist. Update dbdir
            dbdir.dbdirs.insert((rel.spcnode, rel.dbnode), false);
-            let buf = DbDirectory::ser(&dbdir).context("serialize db")?;
+            let buf = DbDirectory::ser(&dbdir)?;
            self.put(DBDIR_KEY, Value::Image(buf.into()));

            // and create the RelDirectory
            RelDirectory::default()
        } else {
            // reldir already exists, fetch it
-            RelDirectory::des(&self.get(rel_dir_key, ctx).await.context("read db")?)
-                .context("deserialize db")?
+            RelDirectory::des(&self.get(rel_dir_key, ctx).await?)?
        };

        // Add the new relation to the rel directory entry, and write it back
        if !rel_dir.rels.insert((rel.relnode, rel.forknum)) {
-            return Err(RelationError::AlreadyExists);
+            anyhow::bail!("rel {rel} already exists");
        }
        self.put(
            rel_dir_key,
-            Value::Image(Bytes::from(
-                RelDirectory::ser(&rel_dir).context("serialize")?,
-            )),
+            Value::Image(Bytes::from(RelDirectory::ser(&rel_dir)?)),
        );

        // Put size
@@ -941,7 +925,7 @@ impl<'a> DatadirModification<'a> {
        nblocks: BlockNumber,
        ctx: &RequestContext,
    ) -> anyhow::Result<()> {
-        anyhow::ensure!(rel.relnode != 0, RelationError::InvalidRelnode);
+        anyhow::ensure!(rel.relnode != 0, "invalid relnode");
        let last_lsn = self.tline.get_last_record_lsn();
        if self.tline.get_rel_exists(rel, last_lsn, true, ctx).await? {
            let size_key = rel_size_to_key(rel);
@@ -972,7 +956,7 @@ impl<'a> DatadirModification<'a> {
        nblocks: BlockNumber,
        ctx: &RequestContext,
    ) -> anyhow::Result<()> {
-        anyhow::ensure!(rel.relnode != 0, RelationError::InvalidRelnode);
+        anyhow::ensure!(rel.relnode != 0, "invalid relnode");

        // Put size
        let size_key = rel_size_to_key(rel);
@@ -993,7 +977,7 @@ impl<'a> DatadirModification<'a> {

    /// Drop a relation.
    pub async fn put_rel_drop(&mut self, rel: RelTag, ctx: &RequestContext) -> anyhow::Result<()> {
-        anyhow::ensure!(rel.relnode != 0, RelationError::InvalidRelnode);
+        anyhow::ensure!(rel.relnode != 0, "invalid relnode");

        // Remove it from the directory entry
        let dir_key = rel_dir_to_key(rel.spcnode, rel.dbnode);
@@ -1138,7 +1122,7 @@ impl<'a> DatadirModification<'a> {
    /// retains all the metadata, but data pages are flushed. That's again OK
    /// for bulk import, where you are just loading data pages and won't try to
    /// modify the same pages twice.
-    pub async fn flush(&mut self) -> anyhow::Result<()> {
+    pub fn flush(&mut self) -> anyhow::Result<()> {
        // Unless we have accumulated a decent amount of changes, it's not worth it
        // to scan through the pending_updates list.
        let pending_nblocks = self.pending_nblocks;
@@ -1146,20 +1130,19 @@ impl<'a> DatadirModification<'a> {
            return Ok(());
        }

-        let writer = self.tline.writer().await;
+        let writer = self.tline.writer();

        // Flush relation and  SLRU data blocks, keep metadata.
-        let mut retained_pending_updates = HashMap::new();
-        for (key, value) in self.pending_updates.drain() {
-            if is_rel_block_key(key) || is_slru_block_key(key) {
-                // This bails out on first error without modifying pending_updates.
-                // That's Ok, cf this function's doc comment.
-                writer.put(key, self.lsn, &value).await?;
+        let mut result: anyhow::Result<()> = Ok(());
+        self.pending_updates.retain(|&key, value| {
+            if result.is_ok() && (is_rel_block_key(key) || is_slru_block_key(key)) {
+                result = writer.put(key, self.lsn, value);
+                false
            } else {
-                retained_pending_updates.insert(key, value);
+                true
            }
-        }
-        self.pending_updates.extend(retained_pending_updates);
+        });
+        result?;

        if pending_nblocks != 0 {
            writer.update_current_logical_size(pending_nblocks * i64::from(BLCKSZ));
@@ -1174,17 +1157,17 @@ impl<'a> DatadirModification<'a> {
    /// underlying timeline.
    /// All the modifications in this atomic update are stamped by the specified LSN.
    ///
-    pub async fn commit(&mut self) -> anyhow::Result<()> {
-        let writer = self.tline.writer().await;
+    pub fn commit(&mut self) -> anyhow::Result<()> {
+        let writer = self.tline.writer();
        let lsn = self.lsn;
        let pending_nblocks = self.pending_nblocks;
        self.pending_nblocks = 0;

        for (key, value) in self.pending_updates.drain() {
-            writer.put(key, lsn, &value).await?;
+            writer.put(key, lsn, &value)?;
        }
        for key_range in self.pending_deletions.drain(..) {
-            writer.delete(key_range, lsn).await?;
+            writer.delete(key_range, lsn)?;
        }

        writer.finish_write(lsn);
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -421,32 +421,12 @@ remote:
    }
 }

-#[derive(Debug, thiserror::Error, PartialEq, Eq)]
-pub enum GetTimelineError {
-    #[error("Timeline {tenant_id}/{timeline_id} is not active, state: {state:?}")]
-    NotActive {
-        tenant_id: TenantId,
-        timeline_id: TimelineId,
-        state: TimelineState,
-    },
-    #[error("Timeline {tenant_id}/{timeline_id} was not found")]
-    NotFound {
-        tenant_id: TenantId,
-        timeline_id: TimelineId,
-    },
-}
-
 #[derive(Debug, thiserror::Error)]
 pub enum DeleteTimelineError {
    #[error("NotFound")]
    NotFound,
-
    #[error("HasChildren")]
    HasChildren(Vec<TimelineId>),
-
-    #[error("Timeline deletion is already in progress")]
-    AlreadyInProgress,
-
    #[error(transparent)]
    Other(#[from] anyhow::Error),
 }
@@ -493,14 +473,6 @@ pub(crate) enum ShutdownError {
    AlreadyStopping,
 }

-struct DeletionGuard(OwnedMutexGuard<bool>);
-
-impl DeletionGuard {
-    fn is_deleted(&self) -> bool {
-        *self.0
-    }
-}
-
 impl Tenant {
    /// Yet another helper for timeline initialization.
    /// Contains the common part of `load_local_timeline` and `load_remote_timeline`.
@@ -547,7 +519,6 @@ impl Tenant {
        );
        timeline
            .load_layer_map(new_disk_consistent_lsn)
-            .await
            .with_context(|| {
                format!("Failed to load layermap for timeline {tenant_id}/{timeline_id}")
            })?;
@@ -589,7 +560,7 @@ impl Tenant {
                || timeline
                    .layers
                    .read()
-                    .await
+                    .unwrap()
                    .iter_historic_layers()
                    .next()
                    .is_some(),
@@ -966,117 +937,6 @@ impl Tenant {
        tenant
    }

-    pub fn scan_and_sort_timelines_dir(
-        self: Arc<Tenant>,
-    ) -> anyhow::Result<Vec<(TimelineId, TimelineMetadata)>> {
-        let timelines_dir = self.conf.timelines_path(&self.tenant_id);
-        let mut timelines_to_load: HashMap<TimelineId, TimelineMetadata> = HashMap::new();
-
-        for entry in
-            std::fs::read_dir(&timelines_dir).context("list timelines directory for tenant")?
-        {
-            let entry = entry.context("read timeline dir entry")?;
-            let timeline_dir = entry.path();
-
-            if crate::is_temporary(&timeline_dir) {
-                info!(
-                    "Found temporary timeline directory, removing: {}",
-                    timeline_dir.display()
-                );
-                if let Err(e) = std::fs::remove_dir_all(&timeline_dir) {
-                    error!(
-                        "Failed to remove temporary directory '{}': {:?}",
-                        timeline_dir.display(),
-                        e
-                    );
-                }
-            } else if is_uninit_mark(&timeline_dir) {
-                if !timeline_dir.exists() {
-                    warn!(
-                        "Timeline dir entry become invalid: {}",
-                        timeline_dir.display()
-                    );
-                    continue;
-                }
-                let timeline_uninit_mark_file = &timeline_dir;
-                info!(
-                    "Found an uninit mark file {}, removing the timeline and its uninit mark",
-                    timeline_uninit_mark_file.display()
-                );
-                let timeline_id = timeline_uninit_mark_file
-                    .file_stem()
-                    .and_then(OsStr::to_str)
-                    .unwrap_or_default()
-                    .parse::<TimelineId>()
-                    .with_context(|| {
-                        format!(
-                            "Could not parse timeline id out of the timeline uninit mark name {}",
-                            timeline_uninit_mark_file.display()
-                        )
-                    })?;
-                let timeline_dir = self.conf.timeline_path(&timeline_id, &self.tenant_id);
-                if let Err(e) =
-                    remove_timeline_and_uninit_mark(&timeline_dir, timeline_uninit_mark_file)
-                {
-                    error!("Failed to clean up uninit marked timeline: {e:?}");
-                }
-            } else {
-                if !timeline_dir.exists() {
-                    warn!(
-                        "Timeline dir entry become invalid: {}",
-                        timeline_dir.display()
-                    );
-                    continue;
-                }
-                let timeline_id = timeline_dir
-                    .file_name()
-                    .and_then(OsStr::to_str)
-                    .unwrap_or_default()
-                    .parse::<TimelineId>()
-                    .with_context(|| {
-                        format!(
-                            "Could not parse timeline id out of the timeline dir name {}",
-                            timeline_dir.display()
-                        )
-                    })?;
-                let timeline_uninit_mark_file = self
-                    .conf
-                    .timeline_uninit_mark_file_path(self.tenant_id, timeline_id);
-                if timeline_uninit_mark_file.exists() {
-                    info!(
-                        %timeline_id,
-                        "Found an uninit mark file, removing the timeline and its uninit mark",
-                    );
-                    if let Err(e) =
-                        remove_timeline_and_uninit_mark(&timeline_dir, &timeline_uninit_mark_file)
-                    {
-                        error!("Failed to clean up uninit marked timeline: {e:?}");
-                    }
-                    continue;
-                }
-
-                let file_name = entry.file_name();
-                if let Ok(timeline_id) =
-                    file_name.to_str().unwrap_or_default().parse::<TimelineId>()
-                {
-                    let metadata = load_metadata(self.conf, timeline_id, self.tenant_id)
-                        .context("failed to load metadata")?;
-                    timelines_to_load.insert(timeline_id, metadata);
-                } else {
-                    // A file or directory that doesn't look like a timeline ID
-                    warn!(
-                        "unexpected file or directory in timelines directory: {}",
-                        file_name.to_string_lossy()
-                    );
-                }
-            }
-        }
-
-        // Sort the array of timeline IDs into tree-order, so that parent comes before
-        // all its children.
-        tree_sort_timelines(timelines_to_load)
-    }
-
    ///
    /// Background task to load in-memory data structures for this tenant, from
    /// files on disk. Used at pageserver startup.
@@ -1093,16 +953,110 @@ impl Tenant {

        utils::failpoint_sleep_millis_async!("before-loading-tenant");

+        // TODO split this into two functions, scan and actual load
+
        // Load in-memory state to reflect the local files on disk
        //
        // Scan the directory, peek into the metadata file of each timeline, and
        // collect a list of timelines and their ancestors.
+        let tenant_id = self.tenant_id;
+        let conf = self.conf;
        let span = info_span!("blocking");
-        let cloned = Arc::clone(self);

        let sorted_timelines: Vec<(_, _)> = tokio::task::spawn_blocking(move || {
            let _g = span.entered();
-            cloned.scan_and_sort_timelines_dir()
+            let mut timelines_to_load: HashMap<TimelineId, TimelineMetadata> = HashMap::new();
+            let timelines_dir = conf.timelines_path(&tenant_id);
+
+            for entry in
+                std::fs::read_dir(&timelines_dir).context("list timelines directory for tenant")?
+            {
+                let entry = entry.context("read timeline dir entry")?;
+                let timeline_dir = entry.path();
+
+                if crate::is_temporary(&timeline_dir) {
+                    info!(
+                        "Found temporary timeline directory, removing: {}",
+                        timeline_dir.display()
+                    );
+                    if let Err(e) = std::fs::remove_dir_all(&timeline_dir) {
+                        error!(
+                            "Failed to remove temporary directory '{}': {:?}",
+                            timeline_dir.display(),
+                            e
+                        );
+                    }
+                } else if is_uninit_mark(&timeline_dir) {
+                    let timeline_uninit_mark_file = &timeline_dir;
+                    info!(
+                        "Found an uninit mark file {}, removing the timeline and its uninit mark",
+                        timeline_uninit_mark_file.display()
+                    );
+                    let timeline_id = timeline_uninit_mark_file
+                        .file_stem()
+                        .and_then(OsStr::to_str)
+                        .unwrap_or_default()
+                        .parse::<TimelineId>()
+                        .with_context(|| {
+                            format!(
+                            "Could not parse timeline id out of the timeline uninit mark name {}",
+                            timeline_uninit_mark_file.display()
+                        )
+                        })?;
+                    let timeline_dir = conf.timeline_path(&timeline_id, &tenant_id);
+                    if let Err(e) =
+                        remove_timeline_and_uninit_mark(&timeline_dir, timeline_uninit_mark_file)
+                    {
+                        error!("Failed to clean up uninit marked timeline: {e:?}");
+                    }
+                } else {
+                    let timeline_id = timeline_dir
+                        .file_name()
+                        .and_then(OsStr::to_str)
+                        .unwrap_or_default()
+                        .parse::<TimelineId>()
+                        .with_context(|| {
+                            format!(
+                                "Could not parse timeline id out of the timeline dir name {}",
+                                timeline_dir.display()
+                            )
+                        })?;
+                    let timeline_uninit_mark_file =
+                        conf.timeline_uninit_mark_file_path(tenant_id, timeline_id);
+                    if timeline_uninit_mark_file.exists() {
+                        info!(
+                            %timeline_id,
+                            "Found an uninit mark file, removing the timeline and its uninit mark",
+                        );
+                        if let Err(e) = remove_timeline_and_uninit_mark(
+                            &timeline_dir,
+                            &timeline_uninit_mark_file,
+                        ) {
+                            error!("Failed to clean up uninit marked timeline: {e:?}");
+                        }
+                        continue;
+                    }
+
+                    let file_name = entry.file_name();
+                    if let Ok(timeline_id) =
+                        file_name.to_str().unwrap_or_default().parse::<TimelineId>()
+                    {
+                        let metadata = load_metadata(conf, timeline_id, tenant_id)
+                            .context("failed to load metadata")?;
+                        timelines_to_load.insert(timeline_id, metadata);
+                    } else {
+                        // A file or directory that doesn't look like a timeline ID
+                        warn!(
+                            "unexpected file or directory in timelines directory: {}",
+                            file_name.to_string_lossy()
+                        );
+                    }
+                }
+            }
+
+            // Sort the array of timeline IDs into tree-order, so that parent comes before
+            // all its children.
+            tree_sort_timelines(timelines_to_load)
        })
        .await
        .context("load spawn_blocking")
@@ -1183,11 +1137,7 @@ impl Tenant {
                                )
                                .context("create_timeline_struct")?;

-                            let guard = DeletionGuard(
-                                Arc::clone(&timeline.delete_lock)
-                                    .try_lock_owned()
-                                    .expect("cannot happen because we're the only owner"),
-                            );
+                            let guard = Arc::clone(&timeline.delete_lock).lock_owned().await;

                            // Note: here we even skip populating layer map. Timeline is essentially uninitialized.
                            // RemoteTimelineClient is the only functioning part.
@@ -1250,21 +1200,19 @@ impl Tenant {
        &self,
        timeline_id: TimelineId,
        active_only: bool,
-    ) -> Result<Arc<Timeline>, GetTimelineError> {
+    ) -> anyhow::Result<Arc<Timeline>> {
        let timelines_accessor = self.timelines.lock().unwrap();
-        let timeline = timelines_accessor
-            .get(&timeline_id)
-            .ok_or(GetTimelineError::NotFound {
-                tenant_id: self.tenant_id,
-                timeline_id,
-            })?;
+        let timeline = timelines_accessor.get(&timeline_id).with_context(|| {
+            format!("Timeline {}/{} was not found", self.tenant_id, timeline_id)
+        })?;

        if active_only && !timeline.is_active() {
-            Err(GetTimelineError::NotActive {
-                tenant_id: self.tenant_id,
+            anyhow::bail!(
+                "Timeline {}/{} is not active, state: {:?}",
+                self.tenant_id,
                timeline_id,
-                state: timeline.current_state(),
-            })
+                timeline.current_state()
+            )
        } else {
            Ok(Arc::clone(timeline))
        }
@@ -1356,7 +1304,6 @@ impl Tenant {
            .context("init_empty_test_timeline")?;
        modification
            .commit()
-            .await
            .context("commit init_empty_test_timeline modification")?;

        // Flush to disk so that uninit_tl's check for valid disk_consistent_lsn passes.
@@ -1513,13 +1460,7 @@ impl Tenant {
            let timelines = self.timelines.lock().unwrap();
            let timelines_to_compact = timelines
                .iter()
-                .filter_map(|(timeline_id, timeline)| {
-                    if timeline.is_active() {
-                        Some((*timeline_id, timeline.clone()))
-                    } else {
-                        None
-                    }
-                })
+                .map(|(timeline_id, timeline)| (*timeline_id, timeline.clone()))
                .collect::<Vec<_>>();
            drop(timelines);
            timelines_to_compact
@@ -1600,7 +1541,6 @@ impl Tenant {
        &self,
        timeline_id: TimelineId,
        timeline: Arc<Timeline>,
-        guard: DeletionGuard,
    ) -> anyhow::Result<()> {
        {
            // Grab the layer_removal_cs lock, and actually perform the deletion.
@@ -1673,25 +1613,6 @@ impl Tenant {
            Err(anyhow::anyhow!("failpoint: timeline-delete-after-rm"))?
        });

-        if let Some(remote_client) = &timeline.remote_client {
-            remote_client.delete_all().await.context("delete_all")?
-        };
-
-        // Have a failpoint that can use the `pause` failpoint action.
-        // We don't want to block the executor thread, hence, spawn_blocking + await.
-        if cfg!(feature = "testing") {
-            tokio::task::spawn_blocking({
-                let current = tracing::Span::current();
-                move || {
-                    let _entered = current.entered();
-                    tracing::info!("at failpoint in_progress_delete");
-                    fail::fail_point!("in_progress_delete");
-                }
-            })
-            .await
-            .expect("spawn_blocking");
-        }
-
        {
            // Remove the timeline from the map.
            let mut timelines = self.timelines.lock().unwrap();
@@ -1712,7 +1633,12 @@ impl Tenant {
            drop(timelines);
        }

-        drop(guard);
+        let remote_client = match &timeline.remote_client {
+            Some(remote_client) => remote_client,
+            None => return Ok(()),
+        };
+
+        remote_client.delete_all().await?;

        Ok(())
    }
@@ -1760,15 +1686,23 @@ impl Tenant {
            timeline = Arc::clone(timeline_entry.get());

            // Prevent two tasks from trying to delete the timeline at the same time.
-            delete_lock_guard = DeletionGuard(
+            //
+            // XXX: We should perhaps return an HTTP "202 Accepted" to signal that the caller
+            // needs to poll until the operation has finished. But for now, we return an
+            // error, because the control plane knows to retry errors.
+
+            delete_lock_guard =
                Arc::clone(&timeline.delete_lock)
                    .try_lock_owned()
-                    .map_err(|_| DeleteTimelineError::AlreadyInProgress)?,
-            );
+                    .map_err(|_| {
+                        DeleteTimelineError::Other(anyhow::anyhow!(
+                            "timeline deletion is already in progress"
+                        ))
+                    })?;

            // If another task finished the deletion just before we acquired the lock,
            // return success.
-            if delete_lock_guard.is_deleted() {
+            if *delete_lock_guard {
                return Ok(());
            }

@@ -1842,7 +1776,7 @@ impl Tenant {
        self: Arc<Self>,
        timeline_id: TimelineId,
        timeline: Arc<Timeline>,
-        guard: DeletionGuard,
+        _guard: OwnedMutexGuard<bool>,
    ) {
        let tenant_id = self.tenant_id;
        let timeline_clone = Arc::clone(&timeline);
@@ -1855,7 +1789,7 @@ impl Tenant {
            "timeline_delete",
            false,
            async move {
-                if let Err(err) = self.delete_timeline(timeline_id, timeline, guard).await {
+                if let Err(err) = self.delete_timeline(timeline_id, timeline).await {
                    error!("Error: {err:#}");
                    timeline_clone.set_broken(err.to_string())
                };
@@ -3411,8 +3345,9 @@ where
 #[cfg(test)]
 pub mod harness {
    use bytes::{Bytes, BytesMut};
+    use once_cell::sync::Lazy;
    use once_cell::sync::OnceCell;
-    use std::sync::Arc;
+    use std::sync::{Arc, RwLock, RwLockReadGuard, RwLockWriteGuard};
    use std::{fs, path::PathBuf};
    use utils::logging;
    use utils::lsn::Lsn;
@@ -3445,6 +3380,8 @@ pub mod harness {
        buf.freeze()
    }

+    static LOCK: Lazy<RwLock<()>> = Lazy::new(|| RwLock::new(()));
+
    impl From<TenantConf> for TenantConfOpt {
        fn from(tenant_conf: TenantConf) -> Self {
            Self {
@@ -3471,16 +3408,33 @@ pub mod harness {
        }
    }

-    pub struct TenantHarness {
+    pub struct TenantHarness<'a> {
        pub conf: &'static PageServerConf,
        pub tenant_conf: TenantConf,
        pub tenant_id: TenantId,
+
+        pub lock_guard: (
+            Option<RwLockReadGuard<'a, ()>>,
+            Option<RwLockWriteGuard<'a, ()>>,
+        ),
    }

    static LOG_HANDLE: OnceCell<()> = OnceCell::new();

-    impl TenantHarness {
+    impl<'a> TenantHarness<'a> {
        pub fn create(test_name: &'static str) -> anyhow::Result<Self> {
+            Self::create_internal(test_name, false)
+        }
+        pub fn create_exclusive(test_name: &'static str) -> anyhow::Result<Self> {
+            Self::create_internal(test_name, true)
+        }
+        fn create_internal(test_name: &'static str, exclusive: bool) -> anyhow::Result<Self> {
+            let lock_guard = if exclusive {
+                (None, Some(LOCK.write().unwrap()))
+            } else {
+                (Some(LOCK.read().unwrap()), None)
+            };
+
            LOG_HANDLE.get_or_init(|| {
                logging::init(
                    logging::LogFormat::Test,
@@ -3516,6 +3470,7 @@ pub mod harness {
                conf,
                tenant_conf,
                tenant_id,
+                lock_guard,
            })
        }

@@ -3540,12 +3495,26 @@ pub mod harness {
                self.tenant_id,
                None,
            ));
+            // populate tenant with locally available timelines
+            let mut timelines_to_load = HashMap::new();
+            for timeline_dir_entry in fs::read_dir(self.conf.timelines_path(&self.tenant_id))
+                .expect("should be able to read timelines dir")
+            {
+                let timeline_dir_entry = timeline_dir_entry?;
+                let timeline_id: TimelineId = timeline_dir_entry
+                    .path()
+                    .file_name()
+                    .unwrap()
+                    .to_string_lossy()
+                    .parse()?;
+
+                let timeline_metadata = load_metadata(self.conf, timeline_id, self.tenant_id)?;
+                timelines_to_load.insert(timeline_id, timeline_metadata);
+            }
            tenant
                .load(None, ctx)
                .instrument(info_span!("try_load", tenant_id=%self.tenant_id))
                .await?;
-
-            // TODO reuse Tenant::activate (needs broker)
            tenant.state.send_replace(TenantState::Active);
            for timeline in tenant.timelines.lock().unwrap().values() {
                timeline.set_state(TimelineState::Active);
@@ -3611,17 +3580,13 @@ mod tests {
            .create_test_timeline(TIMELINE_ID, Lsn(0x08), DEFAULT_PG_VERSION, &ctx)
            .await?;

-        let writer = tline.writer().await;
-        writer
-            .put(*TEST_KEY, Lsn(0x10), &Value::Image(TEST_IMG("foo at 0x10")))
-            .await?;
+        let writer = tline.writer();
+        writer.put(*TEST_KEY, Lsn(0x10), &Value::Image(TEST_IMG("foo at 0x10")))?;
        writer.finish_write(Lsn(0x10));
        drop(writer);

-        let writer = tline.writer().await;
-        writer
-            .put(*TEST_KEY, Lsn(0x20), &Value::Image(TEST_IMG("foo at 0x20")))
-            .await?;
+        let writer = tline.writer();
+        writer.put(*TEST_KEY, Lsn(0x20), &Value::Image(TEST_IMG("foo at 0x20")))?;
        writer.finish_write(Lsn(0x20));
        drop(writer);

@@ -3682,7 +3647,7 @@ mod tests {
        let tline = tenant
            .create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)
            .await?;
-        let writer = tline.writer().await;
+        let writer = tline.writer();

        #[allow(non_snake_case)]
        let TEST_KEY_A: Key = Key::from_hex("112222222233333333444444445500000001").unwrap();
@@ -3690,21 +3655,13 @@ mod tests {
        let TEST_KEY_B: Key = Key::from_hex("112222222233333333444444445500000002").unwrap();

        // Insert a value on the timeline
-        writer
-            .put(TEST_KEY_A, Lsn(0x20), &test_value("foo at 0x20"))
-            .await?;
-        writer
-            .put(TEST_KEY_B, Lsn(0x20), &test_value("foobar at 0x20"))
-            .await?;
+        writer.put(TEST_KEY_A, Lsn(0x20), &test_value("foo at 0x20"))?;
+        writer.put(TEST_KEY_B, Lsn(0x20), &test_value("foobar at 0x20"))?;
        writer.finish_write(Lsn(0x20));

-        writer
-            .put(TEST_KEY_A, Lsn(0x30), &test_value("foo at 0x30"))
-            .await?;
+        writer.put(TEST_KEY_A, Lsn(0x30), &test_value("foo at 0x30"))?;
        writer.finish_write(Lsn(0x30));
-        writer
-            .put(TEST_KEY_A, Lsn(0x40), &test_value("foo at 0x40"))
-            .await?;
+        writer.put(TEST_KEY_A, Lsn(0x40), &test_value("foo at 0x40"))?;
        writer.finish_write(Lsn(0x40));

        //assert_current_logical_size(&tline, Lsn(0x40));
@@ -3716,10 +3673,8 @@ mod tests {
        let newtline = tenant
            .get_timeline(NEW_TIMELINE_ID, true)
            .expect("Should have a local timeline");
-        let new_writer = newtline.writer().await;
-        new_writer
-            .put(TEST_KEY_A, Lsn(0x40), &test_value("bar at 0x40"))
-            .await?;
+        let new_writer = newtline.writer();
+        new_writer.put(TEST_KEY_A, Lsn(0x40), &test_value("bar at 0x40"))?;
        new_writer.finish_write(Lsn(0x40));

        // Check page contents on both branches
@@ -3745,46 +3700,38 @@ mod tests {
        let mut lsn = start_lsn;
        #[allow(non_snake_case)]
        {
-            let writer = tline.writer().await;
+            let writer = tline.writer();
            // Create a relation on the timeline
-            writer
-                .put(
-                    *TEST_KEY,
-                    lsn,
-                    &Value::Image(TEST_IMG(&format!("foo at {}", lsn))),
-                )
-                .await?;
+            writer.put(
+                *TEST_KEY,
+                lsn,
+                &Value::Image(TEST_IMG(&format!("foo at {}", lsn))),
+            )?;
            writer.finish_write(lsn);
            lsn += 0x10;
-            writer
-                .put(
-                    *TEST_KEY,
-                    lsn,
-                    &Value::Image(TEST_IMG(&format!("foo at {}", lsn))),
-                )
-                .await?;
+            writer.put(
+                *TEST_KEY,
+                lsn,
+                &Value::Image(TEST_IMG(&format!("foo at {}", lsn))),
+            )?;
            writer.finish_write(lsn);
            lsn += 0x10;
        }
        tline.freeze_and_flush().await?;
        {
-            let writer = tline.writer().await;
-            writer
-                .put(
-                    *TEST_KEY,
-                    lsn,
-                    &Value::Image(TEST_IMG(&format!("foo at {}", lsn))),
-                )
-                .await?;
+            let writer = tline.writer();
+            writer.put(
+                *TEST_KEY,
+                lsn,
+                &Value::Image(TEST_IMG(&format!("foo at {}", lsn))),
+            )?;
            writer.finish_write(lsn);
            lsn += 0x10;
-            writer
-                .put(
-                    *TEST_KEY,
-                    lsn,
-                    &Value::Image(TEST_IMG(&format!("foo at {}", lsn))),
-                )
-                .await?;
+            writer.put(
+                *TEST_KEY,
+                lsn,
+                &Value::Image(TEST_IMG(&format!("foo at {}", lsn))),
+            )?;
            writer.finish_write(lsn);
        }
        tline.freeze_and_flush().await
@@ -4071,13 +4018,9 @@ mod tests {
        std::fs::write(metadata_path, metadata_bytes)?;

        let err = harness.try_load(&ctx).await.err().expect("should fail");
-        // get all the stack with all .context, not tonly the last one
-        let message = format!("{err:#}");
-        let expected = "Failed to parse metadata bytes from path";
-        assert!(
-            message.contains(expected),
-            "message '{message}' expected to contain {expected}"
-        );
+        assert!(err
+            .to_string()
+            .starts_with("Failed to parse metadata bytes from path"));

        let mut found_error_message = false;
        let mut err_source = err.source();
@@ -4103,40 +4046,32 @@ mod tests {
            .create_test_timeline(TIMELINE_ID, Lsn(0x08), DEFAULT_PG_VERSION, &ctx)
            .await?;

-        let writer = tline.writer().await;
-        writer
-            .put(*TEST_KEY, Lsn(0x10), &Value::Image(TEST_IMG("foo at 0x10")))
-            .await?;
+        let writer = tline.writer();
+        writer.put(*TEST_KEY, Lsn(0x10), &Value::Image(TEST_IMG("foo at 0x10")))?;
        writer.finish_write(Lsn(0x10));
        drop(writer);

        tline.freeze_and_flush().await?;
        tline.compact(&ctx).await?;

-        let writer = tline.writer().await;
-        writer
-            .put(*TEST_KEY, Lsn(0x20), &Value::Image(TEST_IMG("foo at 0x20")))
-            .await?;
+        let writer = tline.writer();
+        writer.put(*TEST_KEY, Lsn(0x20), &Value::Image(TEST_IMG("foo at 0x20")))?;
        writer.finish_write(Lsn(0x20));
        drop(writer);

        tline.freeze_and_flush().await?;
        tline.compact(&ctx).await?;

-        let writer = tline.writer().await;
-        writer
-            .put(*TEST_KEY, Lsn(0x30), &Value::Image(TEST_IMG("foo at 0x30")))
-            .await?;
+        let writer = tline.writer();
+        writer.put(*TEST_KEY, Lsn(0x30), &Value::Image(TEST_IMG("foo at 0x30")))?;
        writer.finish_write(Lsn(0x30));
        drop(writer);

        tline.freeze_and_flush().await?;
        tline.compact(&ctx).await?;

-        let writer = tline.writer().await;
-        writer
-            .put(*TEST_KEY, Lsn(0x40), &Value::Image(TEST_IMG("foo at 0x40")))
-            .await?;
+        let writer = tline.writer();
+        writer.put(*TEST_KEY, Lsn(0x40), &Value::Image(TEST_IMG("foo at 0x40")))?;
        writer.finish_write(Lsn(0x40));
        drop(writer);

@@ -4187,14 +4122,12 @@ mod tests {
        for _ in 0..50 {
            for _ in 0..10000 {
                test_key.field6 = blknum;
-                let writer = tline.writer().await;
-                writer
-                    .put(
-                        test_key,
-                        lsn,
-                        &Value::Image(TEST_IMG(&format!("{} at {}", blknum, lsn))),
-                    )
-                    .await?;
+                let writer = tline.writer();
+                writer.put(
+                    test_key,
+                    lsn,
+                    &Value::Image(TEST_IMG(&format!("{} at {}", blknum, lsn))),
+                )?;
                writer.finish_write(lsn);
                drop(writer);

@@ -4239,14 +4172,12 @@ mod tests {
        for blknum in 0..NUM_KEYS {
            lsn = Lsn(lsn.0 + 0x10);
            test_key.field6 = blknum as u32;
-            let writer = tline.writer().await;
-            writer
-                .put(
-                    test_key,
-                    lsn,
-                    &Value::Image(TEST_IMG(&format!("{} at {}", blknum, lsn))),
-                )
-                .await?;
+            let writer = tline.writer();
+            writer.put(
+                test_key,
+                lsn,
+                &Value::Image(TEST_IMG(&format!("{} at {}", blknum, lsn))),
+            )?;
            writer.finish_write(lsn);
            updated[blknum] = lsn;
            drop(writer);
@@ -4259,14 +4190,12 @@ mod tests {
                lsn = Lsn(lsn.0 + 0x10);
                let blknum = thread_rng().gen_range(0..NUM_KEYS);
                test_key.field6 = blknum as u32;
-                let writer = tline.writer().await;
-                writer
-                    .put(
-                        test_key,
-                        lsn,
-                        &Value::Image(TEST_IMG(&format!("{} at {}", blknum, lsn))),
-                    )
-                    .await?;
+                let writer = tline.writer();
+                writer.put(
+                    test_key,
+                    lsn,
+                    &Value::Image(TEST_IMG(&format!("{} at {}", blknum, lsn))),
+                )?;
                writer.finish_write(lsn);
                drop(writer);
                updated[blknum] = lsn;
@@ -4318,14 +4247,12 @@ mod tests {
        for blknum in 0..NUM_KEYS {
            lsn = Lsn(lsn.0 + 0x10);
            test_key.field6 = blknum as u32;
-            let writer = tline.writer().await;
-            writer
-                .put(
-                    test_key,
-                    lsn,
-                    &Value::Image(TEST_IMG(&format!("{} at {}", blknum, lsn))),
-                )
-                .await?;
+            let writer = tline.writer();
+            writer.put(
+                test_key,
+                lsn,
+                &Value::Image(TEST_IMG(&format!("{} at {}", blknum, lsn))),
+            )?;
            writer.finish_write(lsn);
            updated[blknum] = lsn;
            drop(writer);
@@ -4346,14 +4273,12 @@ mod tests {
                lsn = Lsn(lsn.0 + 0x10);
                let blknum = thread_rng().gen_range(0..NUM_KEYS);
                test_key.field6 = blknum as u32;
-                let writer = tline.writer().await;
-                writer
-                    .put(
-                        test_key,
-                        lsn,
-                        &Value::Image(TEST_IMG(&format!("{} at {}", blknum, lsn))),
-                    )
-                    .await?;
+                let writer = tline.writer();
+                writer.put(
+                    test_key,
+                    lsn,
+                    &Value::Image(TEST_IMG(&format!("{} at {}", blknum, lsn))),
+                )?;
                println!("updating {} at {}", blknum, lsn);
                writer.finish_write(lsn);
                drop(writer);
@@ -4414,14 +4339,12 @@ mod tests {
                lsn = Lsn(lsn.0 + 0x10);
                let blknum = thread_rng().gen_range(0..NUM_KEYS);
                test_key.field6 = blknum as u32;
-                let writer = tline.writer().await;
-                writer
-                    .put(
-                        test_key,
-                        lsn,
-                        &Value::Image(TEST_IMG(&format!("{} {} at {}", idx, blknum, lsn))),
-                    )
-                    .await?;
+                let writer = tline.writer();
+                writer.put(
+                    test_key,
+                    lsn,
+                    &Value::Image(TEST_IMG(&format!("{} {} at {}", idx, blknum, lsn))),
+                )?;
                println!("updating [{}][{}] at {}", idx, blknum, lsn);
                writer.finish_write(lsn);
                drop(writer);
@@ -4492,7 +4415,6 @@ mod tests {
            .context("init_empty_test_timeline")?;
        modification
            .commit()
-            .await
            .context("commit init_empty_test_timeline modification")?;

        // Do the flush. The flush code will check the expectations that we set above.
@@ -4511,44 +4433,6 @@ mod tests {
            assert!(expect_initdb_optimization);
            assert!(initdb_optimization_count > 0);
        }
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_uninit_mark_crash() -> anyhow::Result<()> {
-        let name = "test_uninit_mark_crash";
-        let harness = TenantHarness::create(name)?;
-        {
-            let (tenant, ctx) = harness.load().await;
-            let tline =
-                tenant.create_empty_timeline(TIMELINE_ID, Lsn(0), DEFAULT_PG_VERSION, &ctx)?;
-            // Keeps uninit mark in place
-            std::mem::forget(tline);
-        }
-
-        let (tenant, _) = harness.load().await;
-        match tenant.get_timeline(TIMELINE_ID, false) {
-            Ok(_) => panic!("timeline should've been removed during load"),
-            Err(e) => {
-                assert_eq!(
-                    e,
-                    GetTimelineError::NotFound {
-                        tenant_id: tenant.tenant_id,
-                        timeline_id: TIMELINE_ID,
-                    }
-                )
-            }
-        }
-
-        assert!(!harness
-            .conf
-            .timeline_path(&TIMELINE_ID, &tenant.tenant_id)
-            .exists());
-
-        assert!(!harness
-            .conf
-            .timeline_uninit_mark_file_path(tenant.tenant_id, TIMELINE_ID)
-            .exists());

        Ok(())
    }
--- a/pageserver/src/tenant/config.rs
+++ b/pageserver/src/tenant/config.rs
@@ -38,8 +38,8 @@ pub mod defaults {
    pub const DEFAULT_GC_PERIOD: &str = "1 hr";
    pub const DEFAULT_IMAGE_CREATION_THRESHOLD: usize = 3;
    pub const DEFAULT_PITR_INTERVAL: &str = "7 days";
-    pub const DEFAULT_WALRECEIVER_CONNECT_TIMEOUT: &str = "10 seconds";
-    pub const DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT: &str = "10 seconds";
+    pub const DEFAULT_WALRECEIVER_CONNECT_TIMEOUT: &str = "2 seconds";
+    pub const DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT: &str = "3 seconds";
    pub const DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG: u64 = 10 * 1024 * 1024;
    pub const DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD: &str = "24 hour";
 }
--- a/pageserver/src/tenant/mgr.rs
+++ b/pageserver/src/tenant/mgr.rs
@@ -675,7 +675,7 @@ pub async fn immediate_gc(
        .get(&tenant_id)
        .map(Arc::clone)
        .with_context(|| format!("tenant {tenant_id}"))
-        .map_err(|e| ApiError::NotFound(e.into()))?;
+        .map_err(ApiError::NotFound)?;

    let gc_horizon = gc_req.gc_horizon.unwrap_or_else(|| tenant.get_gc_horizon());
    // Use tenant's pitr setting
@@ -724,11 +724,11 @@ pub async fn immediate_compact(
        .get(&tenant_id)
        .map(Arc::clone)
        .with_context(|| format!("tenant {tenant_id}"))
-        .map_err(|e| ApiError::NotFound(e.into()))?;
+        .map_err(ApiError::NotFound)?;

    let timeline = tenant
        .get_timeline(timeline_id, true)
-        .map_err(|e| ApiError::NotFound(e.into()))?;
+        .map_err(ApiError::NotFound)?;

    // Run in task_mgr to avoid race with tenant_detach operation
    let ctx = ctx.detached_child(TaskKind::Compaction, DownloadBehavior::Download);
--- a/pageserver/src/tenant/remote_timeline_client.rs
+++ b/pageserver/src/tenant/remote_timeline_client.rs
@@ -753,18 +753,22 @@ impl RemoteTimelineClient {

        // Have a failpoint that can use the `pause` failpoint action.
        // We don't want to block the executor thread, hence, spawn_blocking + await.
-        if cfg!(feature = "testing") {
-            tokio::task::spawn_blocking({
-                let current = tracing::Span::current();
-                move || {
-                    let _entered = current.entered();
-                    tracing::info!("at failpoint persist_deleted_index_part");
-                    fail::fail_point!("persist_deleted_index_part");
-                }
-            })
-            .await
-            .expect("spawn_blocking");
-        }
+        #[cfg(feature = "testing")]
+        tokio::task::spawn_blocking({
+            let current = tracing::Span::current();
+            move || {
+                let _entered = current.entered();
+                tracing::info!(
+                    "at failpoint persist_index_part_with_deleted_flag_after_set_before_upload_pause"
+                );
+                fail::fail_point!(
+                    "persist_index_part_with_deleted_flag_after_set_before_upload_pause"
+                );
+            }
+        })
+        .await
+        .expect("spawn_blocking");
+
        upload::upload_index_part(
            self.conf,
            &self.storage_impl,
@@ -862,8 +866,10 @@ impl RemoteTimelineClient {
                "Found {} files not bound to index_file.json, proceeding with their deletion",
                remaining.len()
            );
-            warn!("About to remove {} files", remaining.len());
-            self.storage_impl.delete_objects(&remaining).await?;
+            for file in remaining {
+                warn!("Removing {}", file.object_name().unwrap_or_default());
+                self.storage_impl.delete(&file).await?;
+            }
        }

        let index_file_path = timeline_storage_path.join(Path::new(IndexPart::FILE_NAME));
@@ -1365,7 +1371,7 @@ mod tests {
    struct TestSetup {
        runtime: &'static tokio::runtime::Runtime,
        entered_runtime: EnterGuard<'static>,
-        harness: TenantHarness,
+        harness: TenantHarness<'static>,
        tenant: Arc<Tenant>,
        tenant_ctx: RequestContext,
        remote_fs_dir: PathBuf,
--- a/pageserver/src/tenant/storage_layer.rs
+++ b/pageserver/src/tenant/storage_layer.rs
@@ -389,10 +389,10 @@ pub trait Layer: std::fmt::Debug + Send + Sync {
 }

 /// Returned by [`Layer::iter`]
-pub type LayerIter<'i> = Box<dyn Iterator<Item = Result<(Key, Lsn, Value)>> + 'i + Send>;
+pub type LayerIter<'i> = Box<dyn Iterator<Item = Result<(Key, Lsn, Value)>> + 'i>;

 /// Returned by [`Layer::key_iter`]
-pub type LayerKeyIter<'i> = Box<dyn Iterator<Item = (Key, Lsn, u64)> + 'i + Send>;
+pub type LayerKeyIter<'i> = Box<dyn Iterator<Item = (Key, Lsn, u64)> + 'i>;

 /// A Layer contains all data in a "rectangle" consisting of a range of keys and
 /// range of LSNs.
--- a/pageserver/src/tenant/storage_layer/inmemory_layer.rs
+++ b/pageserver/src/tenant/storage_layer/inmemory_layer.rs
@@ -304,7 +304,7 @@ impl InMemoryLayer {
        Ok(())
    }

-    pub async fn put_tombstone(&self, _key_range: Range<Key>, _lsn: Lsn) -> Result<()> {
+    pub fn put_tombstone(&self, _key_range: Range<Key>, _lsn: Lsn) -> Result<()> {
        // TODO: Currently, we just leak the storage for any deleted keys

        Ok(())
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -15,7 +15,6 @@ use pageserver_api::models::{
    TimelineState,
 };
 use remote_storage::GenericRemoteStorage;
-use serde_with::serde_as;
 use storage_broker::BrokerClientChannel;
 use tokio::sync::{oneshot, watch, Semaphore, TryAcquireError};
 use tokio_util::sync::CancellationToken;
@@ -29,7 +28,7 @@ use std::ops::{Deref, Range};
 use std::path::{Path, PathBuf};
 use std::pin::pin;
 use std::sync::atomic::{AtomicI64, Ordering as AtomicOrdering};
-use std::sync::{Arc, Mutex, RwLock, Weak};
+use std::sync::{Arc, Mutex, MutexGuard, RwLock, Weak};
 use std::time::{Duration, Instant, SystemTime};

 use crate::context::{DownloadBehavior, RequestContext};
@@ -48,10 +47,7 @@ use crate::tenant::{

 use crate::config::PageServerConf;
 use crate::keyspace::{KeyPartitioning, KeySpace, KeySpaceRandomAccum};
-use crate::metrics::{
-    TimelineMetrics, MATERIALIZED_PAGE_CACHE_HIT, MATERIALIZED_PAGE_CACHE_HIT_DIRECT,
-    RECONSTRUCT_TIME, UNEXPECTED_ONDEMAND_DOWNLOADS,
-};
+use crate::metrics::{TimelineMetrics, UNEXPECTED_ONDEMAND_DOWNLOADS};
 use crate::pgdatadir_mapping::LsnForTimestamp;
 use crate::pgdatadir_mapping::{is_rel_fsm_block_key, is_rel_vm_block_key};
 use crate::pgdatadir_mapping::{BlockNumber, CalculateLogicalSizeError};
@@ -129,7 +125,7 @@ pub struct Timeline {

    pub pg_version: u32,

-    pub(crate) layers: Arc<tokio::sync::RwLock<LayerMap<dyn PersistentLayer>>>,
+    pub(super) layers: RwLock<LayerMap<dyn PersistentLayer>>,

    /// Set of key ranges which should be covered by image layers to
    /// allow GC to remove old layers. This set is created by GC and its cutoff LSN is also stored.
@@ -189,7 +185,7 @@ pub struct Timeline {
    /// Locked automatically by [`TimelineWriter`] and checkpointer.
    /// Must always be acquired before the layer map/individual layer lock
    /// to avoid deadlock.
-    write_lock: tokio::sync::Mutex<()>,
+    write_lock: Mutex<()>,

    /// Used to avoid multiple `flush_loop` tasks running
    pub(super) flush_loop_state: Mutex<FlushLoopState>,
@@ -543,7 +539,9 @@ impl Timeline {
                match cached_lsn.cmp(&lsn) {
                    Ordering::Less => {} // there might be WAL between cached_lsn and lsn, we need to check
                    Ordering::Equal => {
-                        MATERIALIZED_PAGE_CACHE_HIT_DIRECT.inc();
+                        self.metrics
+                            .materialized_page_cache_hit_upon_request_counter
+                            .inc();
                        return Ok(cached_img); // exact LSN match, return the image
                    }
                    Ordering::Greater => {
@@ -565,7 +563,8 @@ impl Timeline {
            .await?;
        timer.stop_and_record();

-        RECONSTRUCT_TIME
+        self.metrics
+            .reconstruct_time_histo
            .observe_closure_duration(|| self.reconstruct_value(key, lsn, reconstruct_state))
    }

@@ -598,8 +597,8 @@ impl Timeline {
    /// The sum of the file size of all historic layers in the layer map.
    /// This method makes no distinction between local and remote layers.
    /// Hence, the result **does not represent local filesystem usage**.
-    pub async fn layer_size_sum(&self) -> u64 {
-        let layer_map = self.layers.read().await;
+    pub fn layer_size_sum(&self) -> u64 {
+        let layer_map = self.layers.read().unwrap();
        let mut size = 0;
        for l in layer_map.iter_historic_layers() {
            size += l.file_size();
@@ -690,7 +689,7 @@ impl Timeline {
    /// Flush to disk all data that was written with the put_* functions
    #[instrument(skip(self), fields(tenant_id=%self.tenant_id, timeline_id=%self.timeline_id))]
    pub async fn freeze_and_flush(&self) -> anyhow::Result<()> {
-        self.freeze_inmem_layer(false).await;
+        self.freeze_inmem_layer(false);
        self.flush_frozen_layers_and_wait().await
    }

@@ -869,10 +868,10 @@ impl Timeline {
    }

    /// Mutate the timeline with a [`TimelineWriter`].
-    pub async fn writer(&self) -> TimelineWriter<'_> {
+    pub fn writer(&self) -> TimelineWriter<'_> {
        TimelineWriter {
            tl: self,
-            _write_guard: self.write_lock.lock().await,
+            _write_guard: self.write_lock.lock().unwrap(),
        }
    }

@@ -906,10 +905,10 @@ impl Timeline {
    ///
    /// Also flush after a period of time without new data -- it helps
    /// safekeepers to regard pageserver as caught up and suspend activity.
-    pub async fn check_checkpoint_distance(self: &Arc<Timeline>) -> anyhow::Result<()> {
+    pub fn check_checkpoint_distance(self: &Arc<Timeline>) -> anyhow::Result<()> {
        let last_lsn = self.get_last_record_lsn();
        let open_layer_size = {
-            let layers = self.layers.read().await;
+            let layers = self.layers.read().unwrap();
            let Some(open_layer) = layers.open_layer.as_ref() else {
                return Ok(());
            };
@@ -933,7 +932,7 @@ impl Timeline {
                last_freeze_ts.elapsed()
            );

-            self.freeze_inmem_layer(true).await;
+            self.freeze_inmem_layer(true);
            self.last_freeze_at.store(last_lsn);
            *(self.last_freeze_ts.write().unwrap()) = Instant::now();

@@ -1039,8 +1038,8 @@ impl Timeline {
        }
    }

-    pub async fn layer_map_info(&self, reset: LayerAccessStatsReset) -> LayerMapInfo {
-        let layer_map = self.layers.read().await;
+    pub fn layer_map_info(&self, reset: LayerAccessStatsReset) -> LayerMapInfo {
+        let layer_map = self.layers.read().unwrap();
        let mut in_memory_layers = Vec::with_capacity(layer_map.frozen_layers.len() + 1);
        if let Some(open_layer) = &layer_map.open_layer {
            in_memory_layers.push(open_layer.info());
@@ -1062,7 +1061,7 @@ impl Timeline {

    #[instrument(skip_all, fields(tenant = %self.tenant_id, timeline = %self.timeline_id))]
    pub async fn download_layer(&self, layer_file_name: &str) -> anyhow::Result<Option<bool>> {
-        let Some(layer) = self.find_layer(layer_file_name).await else { return Ok(None) };
+        let Some(layer) = self.find_layer(layer_file_name) else { return Ok(None) };
        let Some(remote_layer) = layer.downcast_remote_layer() else { return  Ok(Some(false)) };
        if self.remote_client.is_none() {
            return Ok(Some(false));
@@ -1075,7 +1074,7 @@ impl Timeline {
    /// Like [`evict_layer_batch`], but for just one layer.
    /// Additional case `Ok(None)` covers the case where the layer could not be found by its `layer_file_name`.
    pub async fn evict_layer(&self, layer_file_name: &str) -> anyhow::Result<Option<bool>> {
-        let Some(local_layer) = self.find_layer(layer_file_name).await else { return Ok(None) };
+        let Some(local_layer) = self.find_layer(layer_file_name) else { return Ok(None) };
        let remote_client = self
            .remote_client
            .as_ref()
@@ -1160,7 +1159,7 @@ impl Timeline {
        }

        // start the batch update
-        let mut layer_map = self.layers.write().await;
+        let mut layer_map = self.layers.write().unwrap();
        let mut batch_updates = layer_map.batch_update();

        let mut results = Vec::with_capacity(layers_to_evict.len());
@@ -1418,7 +1417,7 @@ impl Timeline {
                timeline_id,
                tenant_id,
                pg_version,
-                layers: Arc::new(tokio::sync::RwLock::new(LayerMap::default())),
+                layers: RwLock::new(LayerMap::default()),
                wanted_image_layers: Mutex::new(None),

                walredo_mgr,
@@ -1453,7 +1452,7 @@ impl Timeline {
                layer_flush_start_tx,
                layer_flush_done_tx,

-                write_lock: tokio::sync::Mutex::new(()),
+                write_lock: Mutex::new(()),
                layer_removal_cs: Default::default(),

                gc_info: std::sync::RwLock::new(GcInfo {
@@ -1599,17 +1598,15 @@ impl Timeline {
    /// Initialize with an empty layer map. Used when creating a new timeline.
    ///
    pub(super) fn init_empty_layer_map(&self, start_lsn: Lsn) {
-        let mut layers = self.layers.try_write().expect(
-            "in the context where we call this function, no other task has access to the object",
-        );
+        let mut layers = self.layers.write().unwrap();
        layers.next_open_layer_at = Some(Lsn(start_lsn.0));
    }

    ///
    /// Scan the timeline directory to populate the layer map.
    ///
-    pub(super) async fn load_layer_map(&self, disk_consistent_lsn: Lsn) -> anyhow::Result<()> {
-        let mut layers = self.layers.write().await;
+    pub(super) fn load_layer_map(&self, disk_consistent_lsn: Lsn) -> anyhow::Result<()> {
+        let mut layers = self.layers.write().unwrap();
        let mut updates = layers.batch_update();
        let mut num_layers = 0;

@@ -1738,7 +1735,7 @@ impl Timeline {

        // We're holding a layer map lock for a while but this
        // method is only called during init so it's fine.
-        let mut layer_map = self.layers.write().await;
+        let mut layer_map = self.layers.write().unwrap();
        let mut updates = layer_map.batch_update();
        for remote_layer_name in &index_part.timeline_layers {
            let local_layer = local_only_layers.remove(remote_layer_name);
@@ -1891,7 +1888,7 @@ impl Timeline {
        let local_layers = self
            .layers
            .read()
-            .await
+            .unwrap()
            .iter_historic_layers()
            .map(|l| (l.filename(), l))
            .collect::<HashMap<_, _>>();
@@ -2264,8 +2261,8 @@ impl Timeline {
        }
    }

-    async fn find_layer(&self, layer_file_name: &str) -> Option<Arc<dyn PersistentLayer>> {
-        for historic_layer in self.layers.read().await.iter_historic_layers() {
+    fn find_layer(&self, layer_file_name: &str) -> Option<Arc<dyn PersistentLayer>> {
+        for historic_layer in self.layers.read().unwrap().iter_historic_layers() {
            let historic_layer_name = historic_layer.filename().file_name();
            if layer_file_name == historic_layer_name {
                return Some(historic_layer);
@@ -2388,7 +2385,7 @@ impl Timeline {
                ValueReconstructResult::Continue => {
                    // If we reached an earlier cached page image, we're done.
                    if cont_lsn == cached_lsn + 1 {
-                        MATERIALIZED_PAGE_CACHE_HIT.inc_by(1);
+                        self.metrics.materialized_page_cache_hit_counter.inc_by(1);
                        return Ok(());
                    }
                    if prev_lsn <= cont_lsn {
@@ -2482,7 +2479,7 @@ impl Timeline {
            #[allow(clippy::never_loop)] // see comment at bottom of this loop
            'layer_map_search: loop {
                let remote_layer = {
-                    let layers = timeline.layers.read().await;
+                    let layers = timeline.layers.read().unwrap();

                    // Check the open and frozen in-memory layers first, in order from newest
                    // to oldest.
@@ -2664,8 +2661,8 @@ impl Timeline {
    ///
    /// Get a handle to the latest layer for appending.
    ///
-    async fn get_layer_for_write(&self, lsn: Lsn) -> anyhow::Result<Arc<InMemoryLayer>> {
-        let mut layers = self.layers.write().await;
+    fn get_layer_for_write(&self, lsn: Lsn) -> anyhow::Result<Arc<InMemoryLayer>> {
+        let mut layers = self.layers.write().unwrap();

        ensure!(lsn.is_aligned());

@@ -2714,16 +2711,17 @@ impl Timeline {
        Ok(layer)
    }

-    async fn put_value(&self, key: Key, lsn: Lsn, val: &Value) -> anyhow::Result<()> {
+    fn put_value(&self, key: Key, lsn: Lsn, val: &Value) -> anyhow::Result<()> {
        //info!("PUT: key {} at {}", key, lsn);
-        let layer = self.get_layer_for_write(lsn).await?;
+        let layer = self.get_layer_for_write(lsn)?;
        layer.put_value(key, lsn, val)?;
        Ok(())
    }

-    async fn put_tombstone(&self, key_range: Range<Key>, lsn: Lsn) -> anyhow::Result<()> {
-        let layer = self.get_layer_for_write(lsn).await?;
-        layer.put_tombstone(key_range, lsn).await?;
+    fn put_tombstone(&self, key_range: Range<Key>, lsn: Lsn) -> anyhow::Result<()> {
+        let layer = self.get_layer_for_write(lsn)?;
+        layer.put_tombstone(key_range, lsn)?;
+
        Ok(())
    }

@@ -2734,15 +2732,15 @@ impl Timeline {
        self.last_record_lsn.advance(new_lsn);
    }

-    async fn freeze_inmem_layer(&self, write_lock_held: bool) {
+    fn freeze_inmem_layer(&self, write_lock_held: bool) {
        // Freeze the current open in-memory layer. It will be written to disk on next
        // iteration.
        let _write_guard = if write_lock_held {
            None
        } else {
-            Some(self.write_lock.lock().await)
+            Some(self.write_lock.lock().unwrap())
        };
-        let mut layers = self.layers.write().await;
+        let mut layers = self.layers.write().unwrap();
        if let Some(open_layer) = &layers.open_layer {
            let open_layer_rc = Arc::clone(open_layer);
            // Does this layer need freezing?
@@ -2780,7 +2778,7 @@ impl Timeline {
            let flush_counter = *layer_flush_start_rx.borrow();
            let result = loop {
                let layer_to_flush = {
-                    let layers = self.layers.read().await;
+                    let layers = self.layers.read().unwrap();
                    layers.frozen_layers.front().cloned()
                    // drop 'layers' lock to allow concurrent reads and writes
                };
@@ -2896,7 +2894,16 @@ impl Timeline {
                    }
                }
                // normal case, write out a L0 delta layer file.
-                let (delta_path, metadata) = self.create_delta_layer(&frozen_layer).await?;
+                let this = self.clone();
+                let frozen_layer = frozen_layer.clone();
+                let span = tracing::info_span!("blocking");
+                let (delta_path, metadata) = tokio::task::spawn_blocking(move || {
+                    let _g = span.entered();
+                    this.create_delta_layer(&frozen_layer)
+                })
+                .await
+                .context("create_delta_layer spawn_blocking")
+                .and_then(|res| res)?;
                HashMap::from([(delta_path, metadata)])
            };

@@ -2905,7 +2912,7 @@ impl Timeline {
        // The new on-disk layers are now in the layer map. We can remove the
        // in-memory layer from the map now.
        {
-            let mut layers = self.layers.write().await;
+            let mut layers = self.layers.write().unwrap();
            let l = layers.frozen_layers.pop_front();

            // Only one thread may call this function at a time (for this
@@ -2999,52 +3006,34 @@ impl Timeline {
    }

    // Write out the given frozen in-memory layer as a new L0 delta file
-    async fn create_delta_layer(
+    fn create_delta_layer(
        self: &Arc<Self>,
-        frozen_layer: &Arc<InMemoryLayer>,
+        frozen_layer: &InMemoryLayer,
    ) -> anyhow::Result<(LayerFileName, LayerFileMetadata)> {
-        let span = tracing::info_span!("blocking");
-        let new_delta: DeltaLayer = tokio::task::spawn_blocking({
-            let _g = span.entered();
-            let self_clone = Arc::clone(self);
-            let frozen_layer = Arc::clone(frozen_layer);
-            move || {
-                // Write it out
-                let new_delta = frozen_layer.write_to_disk()?;
-                let new_delta_path = new_delta.path();
+        // Write it out
+        let new_delta = frozen_layer.write_to_disk()?;
+        let new_delta_path = new_delta.path();
+        let new_delta_filename = new_delta.filename();

-                // Sync it to disk.
-                //
-                // We must also fsync the timeline dir to ensure the directory entries for
-                // new layer files are durable.
-                //
-                // NB: timeline dir must be synced _after_ the file contents are durable.
-                // So, two separate fsyncs are required, they mustn't be batched.
-                //
-                // TODO: If we're running inside 'flush_frozen_layers' and there are multiple
-                // files to flush, the fsync overhead can be reduces as follows:
-                // 1. write them all to temporary file names
-                // 2. fsync them
-                // 3. rename to the final name
-                // 4. fsync the parent directory.
-                // Note that (1),(2),(3) today happen inside write_to_disk().
-                par_fsync::par_fsync(&[new_delta_path]).context("fsync of delta layer")?;
-                par_fsync::par_fsync(&[self_clone
-                    .conf
-                    .timeline_path(&self_clone.timeline_id, &self_clone.tenant_id)])
-                .context("fsync of timeline dir")?;
+        // Sync it to disk.
+        //
+        // We must also fsync the timeline dir to ensure the directory entries for
+        // new layer files are durable
+        //
+        // TODO: If we're running inside 'flush_frozen_layers' and there are multiple
+        // files to flush, it might be better to first write them all, and then fsync
+        // them all in parallel.

-                anyhow::Ok(new_delta)
-            }
-        })
-        .await
-        .context("spawn_blocking")??;
-        let new_delta_name = new_delta.filename();
-        let sz = new_delta.desc.file_size;
+        // First sync the delta layer. We still use par_fsync here to keep everything consistent. Feel free to replace
+        // this with a single fsync in future refactors.
+        par_fsync::par_fsync(&[new_delta_path.clone()]).context("fsync of delta layer")?;
+        // Then sync the parent directory.
+        par_fsync::par_fsync(&[self.conf.timeline_path(&self.timeline_id, &self.tenant_id)])
+            .context("fsync of timeline dir")?;

        // Add it to the layer map
        let l = Arc::new(new_delta);
-        let mut layers = self.layers.write().await;
+        let mut layers = self.layers.write().unwrap();
        let mut batch_updates = layers.batch_update();
        l.access_stats().record_residence_event(
            &batch_updates,
@@ -3054,12 +3043,15 @@ impl Timeline {
        batch_updates.insert_historic(l.layer_desc().clone(), l);
        batch_updates.flush();

-        // update metrics
+        // update the timeline's physical size
+        let sz = new_delta_path.metadata()?.len();
+
        self.metrics.resident_physical_size_gauge.add(sz);
+        // update metrics
        self.metrics.num_persistent_files_created.inc_by(1);
        self.metrics.persistent_bytes_written.inc_by(sz);

-        Ok((new_delta_name, LayerFileMetadata::new(sz)))
+        Ok((new_delta_filename, LayerFileMetadata::new(sz)))
    }

    async fn repartition(
@@ -3093,14 +3085,10 @@ impl Timeline {
    }

    // Is it time to create a new image layer for the given partition?
-    async fn time_for_new_image_layer(
-        &self,
-        partition: &KeySpace,
-        lsn: Lsn,
-    ) -> anyhow::Result<bool> {
+    fn time_for_new_image_layer(&self, partition: &KeySpace, lsn: Lsn) -> anyhow::Result<bool> {
        let threshold = self.get_image_creation_threshold();

-        let layers = self.layers.read().await;
+        let layers = self.layers.read().unwrap();

        let mut max_deltas = 0;
        {
@@ -3195,7 +3183,7 @@ impl Timeline {
        for partition in partitioning.parts.iter() {
            let img_range = start..partition.ranges.last().unwrap().end;
            start = img_range.end;
-            if force || self.time_for_new_image_layer(partition, lsn).await? {
+            if force || self.time_for_new_image_layer(partition, lsn)? {
                let mut image_layer_writer = ImageLayerWriter::new(
                    self.conf,
                    self.timeline_id,
@@ -3278,7 +3266,7 @@ impl Timeline {

        let mut layer_paths_to_upload = HashMap::with_capacity(image_layers.len());

-        let mut layers = self.layers.write().await;
+        let mut layers = self.layers.write().unwrap();
        let mut updates = layers.batch_update();
        let timeline_path = self.conf.timeline_path(&self.timeline_id, &self.tenant_id);

@@ -3334,127 +3322,6 @@ impl From<anyhow::Error> for CompactionError {
    }
 }

-#[serde_as]
-#[derive(serde::Serialize)]
-struct RecordedDuration(#[serde_as(as = "serde_with::DurationMicroSeconds")] Duration);
-
-#[derive(Default)]
-enum DurationRecorder {
-    #[default]
-    NotStarted,
-    Recorded(RecordedDuration, tokio::time::Instant),
-}
-
-impl DurationRecorder {
-    pub fn till_now(&self) -> DurationRecorder {
-        match self {
-            DurationRecorder::NotStarted => {
-                panic!("must only call on recorded measurements")
-            }
-            DurationRecorder::Recorded(_, ended) => {
-                let now = tokio::time::Instant::now();
-                DurationRecorder::Recorded(RecordedDuration(now - *ended), now)
-            }
-        }
-    }
-    pub fn into_recorded(self) -> Option<RecordedDuration> {
-        match self {
-            DurationRecorder::NotStarted => None,
-            DurationRecorder::Recorded(recorded, _) => Some(recorded),
-        }
-    }
-}
-
-#[derive(Default)]
-struct CompactLevel0Phase1StatsBuilder {
-    version: Option<u64>,
-    tenant_id: Option<TenantId>,
-    timeline_id: Option<TimelineId>,
-    read_lock_acquisition_micros: DurationRecorder,
-    read_lock_held_spawn_blocking_startup_micros: DurationRecorder,
-    read_lock_held_prerequisites_micros: DurationRecorder,
-    read_lock_held_compute_holes_micros: DurationRecorder,
-    read_lock_drop_micros: DurationRecorder,
-    prepare_iterators_micros: DurationRecorder,
-    write_layer_files_micros: DurationRecorder,
-    level0_deltas_count: Option<usize>,
-    new_deltas_count: Option<usize>,
-    new_deltas_size: Option<u64>,
-}
-
-#[serde_as]
-#[derive(serde::Serialize)]
-struct CompactLevel0Phase1Stats {
-    version: u64,
-    #[serde_as(as = "serde_with::DisplayFromStr")]
-    tenant_id: TenantId,
-    #[serde_as(as = "serde_with::DisplayFromStr")]
-    timeline_id: TimelineId,
-    read_lock_acquisition_micros: RecordedDuration,
-    read_lock_held_spawn_blocking_startup_micros: RecordedDuration,
-    read_lock_held_prerequisites_micros: RecordedDuration,
-    read_lock_held_compute_holes_micros: RecordedDuration,
-    read_lock_drop_micros: RecordedDuration,
-    prepare_iterators_micros: RecordedDuration,
-    write_layer_files_micros: RecordedDuration,
-    level0_deltas_count: usize,
-    new_deltas_count: usize,
-    new_deltas_size: u64,
-}
-
-impl TryFrom<CompactLevel0Phase1StatsBuilder> for CompactLevel0Phase1Stats {
-    type Error = anyhow::Error;
-
-    fn try_from(value: CompactLevel0Phase1StatsBuilder) -> Result<Self, Self::Error> {
-        Ok(Self {
-            version: value.version.ok_or_else(|| anyhow!("version not set"))?,
-            tenant_id: value
-                .tenant_id
-                .ok_or_else(|| anyhow!("tenant_id not set"))?,
-            timeline_id: value
-                .timeline_id
-                .ok_or_else(|| anyhow!("timeline_id not set"))?,
-            read_lock_acquisition_micros: value
-                .read_lock_acquisition_micros
-                .into_recorded()
-                .ok_or_else(|| anyhow!("read_lock_acquisition_micros not set"))?,
-            read_lock_held_spawn_blocking_startup_micros: value
-                .read_lock_held_spawn_blocking_startup_micros
-                .into_recorded()
-                .ok_or_else(|| anyhow!("read_lock_held_spawn_blocking_startup_micros not set"))?,
-            read_lock_held_prerequisites_micros: value
-                .read_lock_held_prerequisites_micros
-                .into_recorded()
-                .ok_or_else(|| anyhow!("read_lock_held_prerequisites_micros not set"))?,
-            read_lock_held_compute_holes_micros: value
-                .read_lock_held_compute_holes_micros
-                .into_recorded()
-                .ok_or_else(|| anyhow!("read_lock_held_compute_holes_micros not set"))?,
-            read_lock_drop_micros: value
-                .read_lock_drop_micros
-                .into_recorded()
-                .ok_or_else(|| anyhow!("read_lock_drop_micros not set"))?,
-            prepare_iterators_micros: value
-                .prepare_iterators_micros
-                .into_recorded()
-                .ok_or_else(|| anyhow!("prepare_iterators_micros not set"))?,
-            write_layer_files_micros: value
-                .write_layer_files_micros
-                .into_recorded()
-                .ok_or_else(|| anyhow!("write_layer_files_micros not set"))?,
-            level0_deltas_count: value
-                .level0_deltas_count
-                .ok_or_else(|| anyhow!("level0_deltas_count not set"))?,
-            new_deltas_count: value
-                .new_deltas_count
-                .ok_or_else(|| anyhow!("new_deltas_count not set"))?,
-            new_deltas_size: value
-                .new_deltas_size
-                .ok_or_else(|| anyhow!("new_deltas_size not set"))?,
-        })
-    }
-}
-
 impl Timeline {
    /// Level0 files first phase of compaction, explained in the [`compact_inner`] comment.
    ///
@@ -3462,17 +3329,15 @@ impl Timeline {
    /// returned as an error. If the `layer_removal_cs` boundary is changed not to be taken in the
    /// start of level0 files compaction, the on-demand download should be revisited as well.
    fn compact_level0_phase1(
-        self: Arc<Self>,
+        &self,
        _layer_removal_cs: Arc<tokio::sync::OwnedMutexGuard<()>>,
-        layers: tokio::sync::OwnedRwLockReadGuard<LayerMap<dyn PersistentLayer>>,
-        mut stats: CompactLevel0Phase1StatsBuilder,
        target_file_size: u64,
        ctx: &RequestContext,
    ) -> Result<CompactLevel0Phase1Result, CompactionError> {
-        stats.read_lock_held_spawn_blocking_startup_micros =
-            stats.read_lock_acquisition_micros.till_now(); // set by caller
+        let layers = self.layers.read().unwrap();
        let mut level0_deltas = layers.get_level0_deltas()?;
-        stats.level0_deltas_count = Some(level0_deltas.len());
+        drop(layers);
+
        // Only compact if enough layers have accumulated.
        let threshold = self.get_compaction_threshold();
        if level0_deltas.is_empty() || level0_deltas.len() < threshold {
@@ -3550,53 +3415,6 @@ impl Timeline {
        // we don't accidentally use it later in the function.
        drop(level0_deltas);

-        stats.read_lock_held_prerequisites_micros = stats
-            .read_lock_held_spawn_blocking_startup_micros
-            .till_now();
-
-        // Determine N largest holes where N is number of compacted layers.
-        let max_holes = deltas_to_compact.len();
-        let last_record_lsn = self.get_last_record_lsn();
-        let min_hole_range = (target_file_size / page_cache::PAGE_SZ as u64) as i128;
-        let min_hole_coverage_size = 3; // TODO: something more flexible?
-
-        // min-heap (reserve space for one more element added before eviction)
-        let mut heap: BinaryHeap<Hole> = BinaryHeap::with_capacity(max_holes + 1);
-        let mut prev: Option<Key> = None;
-        for (next_key, _next_lsn, _size) in itertools::process_results(
-            deltas_to_compact.iter().map(|l| l.key_iter(ctx)),
-            |iter_iter| iter_iter.kmerge_by(|a, b| a.0 <= b.0),
-        )? {
-            if let Some(prev_key) = prev {
-                // just first fast filter
-                if next_key.to_i128() - prev_key.to_i128() >= min_hole_range {
-                    let key_range = prev_key..next_key;
-                    // Measuring hole by just subtraction of i128 representation of key range boundaries
-                    // has not so much sense, because largest holes will corresponds field1/field2 changes.
-                    // But we are mostly interested to eliminate holes which cause generation of excessive image layers.
-                    // That is why it is better to measure size of hole as number of covering image layers.
-                    let coverage_size = layers.image_coverage(&key_range, last_record_lsn)?.len();
-                    if coverage_size >= min_hole_coverage_size {
-                        heap.push(Hole {
-                            key_range,
-                            coverage_size,
-                        });
-                        if heap.len() > max_holes {
-                            heap.pop(); // remove smallest hole
-                        }
-                    }
-                }
-            }
-            prev = Some(next_key.next());
-        }
-        stats.read_lock_held_compute_holes_micros =
-            stats.read_lock_held_prerequisites_micros.till_now();
-        drop(layers);
-        stats.read_lock_drop_micros = stats.read_lock_held_compute_holes_micros.till_now();
-        let mut holes = heap.into_vec();
-        holes.sort_unstable_by_key(|hole| hole.key_range.start);
-        let mut next_hole = 0; // index of next hole in holes vector
-
        // This iterator walks through all key-value pairs from all the layers
        // we're compacting, in key, LSN order.
        let all_values_iter = itertools::process_results(
@@ -3636,7 +3454,46 @@ impl Timeline {
            },
        )?;

-        stats.prepare_iterators_micros = stats.read_lock_drop_micros.till_now();
+        // Determine N largest holes where N is number of compacted layers.
+        let max_holes = deltas_to_compact.len();
+        let last_record_lsn = self.get_last_record_lsn();
+        let layers = self.layers.read().unwrap(); // Is'n it better to hold original layers lock till here?
+        let min_hole_range = (target_file_size / page_cache::PAGE_SZ as u64) as i128;
+        let min_hole_coverage_size = 3; // TODO: something more flexible?
+
+        // min-heap (reserve space for one more element added before eviction)
+        let mut heap: BinaryHeap<Hole> = BinaryHeap::with_capacity(max_holes + 1);
+        let mut prev: Option<Key> = None;
+        for (next_key, _next_lsn, _size) in itertools::process_results(
+            deltas_to_compact.iter().map(|l| l.key_iter(ctx)),
+            |iter_iter| iter_iter.kmerge_by(|a, b| a.0 <= b.0),
+        )? {
+            if let Some(prev_key) = prev {
+                // just first fast filter
+                if next_key.to_i128() - prev_key.to_i128() >= min_hole_range {
+                    let key_range = prev_key..next_key;
+                    // Measuring hole by just subtraction of i128 representation of key range boundaries
+                    // has not so much sense, because largest holes will corresponds field1/field2 changes.
+                    // But we are mostly interested to eliminate holes which cause generation of excessive image layers.
+                    // That is why it is better to measure size of hole as number of covering image layers.
+                    let coverage_size = layers.image_coverage(&key_range, last_record_lsn)?.len();
+                    if coverage_size >= min_hole_coverage_size {
+                        heap.push(Hole {
+                            key_range,
+                            coverage_size,
+                        });
+                        if heap.len() > max_holes {
+                            heap.pop(); // remove smallest hole
+                        }
+                    }
+                }
+            }
+            prev = Some(next_key.next());
+        }
+        drop(layers);
+        let mut holes = heap.into_vec();
+        holes.sort_unstable_by_key(|hole| hole.key_range.start);
+        let mut next_hole = 0; // index of next hole in holes vector

        // Merge the contents of all the input delta layers into a new set
        // of delta layers, based on the current partitioning.
@@ -3796,26 +3653,8 @@ impl Timeline {
            layer_paths.pop().unwrap();
        }

-        stats.write_layer_files_micros = stats.prepare_iterators_micros.till_now();
-        stats.new_deltas_count = Some(new_layers.len());
-        stats.new_deltas_size = Some(new_layers.iter().map(|l| l.desc.file_size).sum());
-
        drop(all_keys_iter); // So that deltas_to_compact is no longer borrowed

-        match TryInto::<CompactLevel0Phase1Stats>::try_into(stats)
-            .and_then(|stats| serde_json::to_string(&stats).context("serde_json::to_string"))
-        {
-            Ok(stats_json) => {
-                info!(
-                    stats_json = stats_json.as_str(),
-                    "compact_level0_phase1 stats available"
-                )
-            }
-            Err(e) => {
-                warn!("compact_level0_phase1 stats failed to serialize: {:#}", e);
-            }
-        }
-
        Ok(CompactLevel0Phase1Result {
            new_layers,
            deltas_to_compact,
@@ -3832,39 +3671,21 @@ impl Timeline {
        target_file_size: u64,
        ctx: &RequestContext,
    ) -> Result<(), CompactionError> {
+        let this = self.clone();
+        let ctx_inner = ctx.clone();
+        let layer_removal_cs_inner = layer_removal_cs.clone();
+        let span = tracing::info_span!("blocking");
        let CompactLevel0Phase1Result {
            new_layers,
            deltas_to_compact,
-        } = {
-            let phase1_span = info_span!("compact_level0_phase1");
-            let myself = Arc::clone(self);
-            let ctx = ctx.attached_child(); // technically, the spawn_blocking can outlive this future
-            let mut stats = CompactLevel0Phase1StatsBuilder {
-                version: Some(2),
-                tenant_id: Some(self.tenant_id),
-                timeline_id: Some(self.timeline_id),
-                ..Default::default()
-            };
-
-            let begin = tokio::time::Instant::now();
-            let phase1_layers_locked = Arc::clone(&self.layers).read_owned().await;
-            let now = tokio::time::Instant::now();
-            stats.read_lock_acquisition_micros =
-                DurationRecorder::Recorded(RecordedDuration(now - begin), now);
-            let layer_removal_cs = layer_removal_cs.clone();
-            tokio::task::spawn_blocking(move || {
-                let _entered = phase1_span.enter();
-                myself.compact_level0_phase1(
-                    layer_removal_cs,
-                    phase1_layers_locked,
-                    stats,
-                    target_file_size,
-                    &ctx,
-                )
-            })
-            .await
-            .context("spawn_blocking")??
-        };
+        } = tokio::task::spawn_blocking(move || {
+            let _g = span.entered();
+            this.compact_level0_phase1(layer_removal_cs_inner, target_file_size, &ctx_inner)
+        })
+        .await
+        .context("compact_level0_phase1 spawn_blocking")
+        .map_err(CompactionError::Other)
+        .and_then(|res| res)?;

        if new_layers.is_empty() && deltas_to_compact.is_empty() {
            // nothing to do
@@ -3882,7 +3703,7 @@ impl Timeline {
                .context("wait for layer upload ops to complete")?;
        }

-        let mut layers = self.layers.write().await;
+        let mut layers = self.layers.write().unwrap();
        let mut updates = layers.batch_update();
        let mut new_layer_paths = HashMap::with_capacity(new_layers.len());
        for l in new_layers {
@@ -3969,7 +3790,6 @@ impl Timeline {
    /// for example. The caller should hold `Tenant::gc_cs` lock to ensure
    /// that.
    ///
-    #[instrument(skip_all, fields(timeline_id=%self.timeline_id))]
    pub(super) async fn update_gc_info(
        &self,
        retain_lsns: Vec<Lsn>,
@@ -4142,7 +3962,7 @@ impl Timeline {
        // 4. newer on-disk image layers cover the layer's whole key range
        //
        // TODO holding a write lock is too agressive and avoidable
-        let mut layers = self.layers.write().await;
+        let mut layers = self.layers.write().unwrap();
        'outer: for l in layers.iter_historic_layers() {
            result.layers_total += 1;

@@ -4442,7 +4262,7 @@ impl Timeline {

                    // Download complete. Replace the RemoteLayer with the corresponding
                    // Delta- or ImageLayer in the layer map.
-                    let mut layers = self_clone.layers.write().await;
+                    let mut layers = self_clone.layers.write().unwrap();
                    let mut updates = layers.batch_update();
                    let new_layer = remote_layer.create_downloaded_layer(&updates, self_clone.conf, *size);
                    {
@@ -4600,7 +4420,7 @@ impl Timeline {
    ) {
        let mut downloads = Vec::new();
        {
-            let layers = self.layers.read().await;
+            let layers = self.layers.read().unwrap();
            layers
                .iter_historic_layers()
                .filter_map(|l| l.downcast_remote_layer())
@@ -4702,8 +4522,8 @@ impl LocalLayerInfoForDiskUsageEviction {
 }

 impl Timeline {
-    pub(crate) async fn get_local_layers_for_disk_usage_eviction(&self) -> DiskUsageEvictionInfo {
-        let layers = self.layers.read().await;
+    pub(crate) fn get_local_layers_for_disk_usage_eviction(&self) -> DiskUsageEvictionInfo {
+        let layers = self.layers.read().unwrap();

        let mut max_layer_size: Option<u64> = None;
        let mut resident_layers = Vec::new();
@@ -4775,7 +4595,7 @@ fn layer_traversal_error(msg: String, path: Vec<TraversalPathItem>) -> PageRecon
 // but will cause large code changes.
 pub struct TimelineWriter<'a> {
    tl: &'a Timeline,
-    _write_guard: tokio::sync::MutexGuard<'a, ()>,
+    _write_guard: MutexGuard<'a, ()>,
 }

 impl Deref for TimelineWriter<'_> {
@@ -4791,12 +4611,12 @@ impl<'a> TimelineWriter<'a> {
    ///
    /// This will implicitly extend the relation, if the page is beyond the
    /// current end-of-file.
-    pub async fn put(&self, key: Key, lsn: Lsn, value: &Value) -> anyhow::Result<()> {
-        self.tl.put_value(key, lsn, value).await
+    pub fn put(&self, key: Key, lsn: Lsn, value: &Value) -> anyhow::Result<()> {
+        self.tl.put_value(key, lsn, value)
    }

-    pub async fn delete(&self, key_range: Range<Key>, lsn: Lsn) -> anyhow::Result<()> {
-        self.tl.put_tombstone(key_range, lsn).await
+    pub fn delete(&self, key_range: Range<Key>, lsn: Lsn) -> anyhow::Result<()> {
+        self.tl.put_tombstone(key_range, lsn)
    }

    /// Track the end of the latest digested WAL record.
@@ -4816,14 +4636,6 @@ impl<'a> TimelineWriter<'a> {
    }
 }

-// We need TimelineWriter to be send in upcoming conversion of
-// Timeline::layers to tokio::sync::RwLock.
-#[test]
-fn is_send() {
-    fn _assert_send<T: Send>() {}
-    _assert_send::<TimelineWriter<'_>>();
-}
-
 /// Add a suffix to a layer file's name: .{num}.old
 /// Uses the first available num (starts at 0)
 fn rename_to_backup(path: &Path) -> anyhow::Result<()> {
--- a/pageserver/src/tenant/timeline/eviction_task.rs
+++ b/pageserver/src/tenant/timeline/eviction_task.rs
@@ -197,7 +197,7 @@ impl Timeline {
        // We don't want to hold the layer map lock during eviction.
        // So, we just need to deal with this.
        let candidates: Vec<Arc<dyn PersistentLayer>> = {
-            let layers = self.layers.read().await;
+            let layers = self.layers.read().unwrap();
            let mut candidates = Vec::new();
            for hist_layer in layers.iter_historic_layers() {
                if hist_layer.is_remote_layer() {
--- a/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs
+++ b/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs
@@ -1321,7 +1321,7 @@ mod tests {

    const DUMMY_SAFEKEEPER_HOST: &str = "safekeeper_connstr";

-    async fn dummy_state(harness: &TenantHarness) -> ConnectionManagerState {
+    async fn dummy_state(harness: &TenantHarness<'_>) -> ConnectionManagerState {
        let (tenant, ctx) = harness.load().await;
        let timeline = tenant
            .create_test_timeline(TIMELINE_ID, Lsn(0x8), crate::DEFAULT_PG_VERSION, &ctx)
--- a/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
+++ b/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
@@ -304,15 +304,12 @@ pub(super) async fn handle_walreceiver_connection(
            }
        }

-        timeline
-            .check_checkpoint_distance()
-            .await
-            .with_context(|| {
-                format!(
-                    "Failed to check checkpoint distance for timeline {}",
-                    timeline.timeline_id
-                )
-            })?;
+        timeline.check_checkpoint_distance().with_context(|| {
+            format!(
+                "Failed to check checkpoint distance for timeline {}",
+                timeline.timeline_id
+            )
+        })?;

        if let Some(last_lsn) = status_update {
            let timeline_remote_consistent_lsn =
--- a/pageserver/src/walingest.rs
+++ b/pageserver/src/walingest.rs
@@ -25,7 +25,7 @@ use postgres_ffi::v14::nonrelfile_utils::clogpage_precedes;
 use postgres_ffi::v14::nonrelfile_utils::slru_may_delete_clogsegment;
 use postgres_ffi::{fsm_logical_to_physical, page_is_new, page_set_lsn};

-use anyhow::{Context, Result};
+use anyhow::Result;
 use bytes::{Buf, Bytes, BytesMut};
 use tracing::*;

@@ -333,7 +333,7 @@ impl<'a> WalIngest<'a> {

        // Now that this record has been fully handled, including updating the
        // checkpoint data, let the repository know that it is up-to-date to this LSN
-        modification.commit().await?;
+        modification.commit()?;

        Ok(())
    }
@@ -1082,10 +1082,7 @@ impl<'a> WalIngest<'a> {
            .await?
        {
            // create it with 0 size initially, the logic below will extend it
-            modification
-                .put_rel_creation(rel, 0, ctx)
-                .await
-                .context("Relation Error")?;
+            modification.put_rel_creation(rel, 0, ctx).await?;
            0
        } else {
            self.timeline.get_rel_size(rel, last_lsn, true, ctx).await?
@@ -1202,7 +1199,7 @@ mod tests {
        let mut m = tline.begin_modification(Lsn(0x10));
        m.put_checkpoint(ZERO_CHECKPOINT.clone())?;
        m.put_relmap_file(0, 111, Bytes::from(""), ctx).await?; // dummy relmapper file
-        m.commit().await?;
+        m.commit()?;
        let walingest = WalIngest::new(tline, Lsn(0x10), ctx).await?;

        Ok(walingest)
@@ -1221,22 +1218,22 @@ mod tests {
        walingest
            .put_rel_page_image(&mut m, TESTREL_A, 0, TEST_IMG("foo blk 0 at 2"), &ctx)
            .await?;
-        m.commit().await?;
+        m.commit()?;
        let mut m = tline.begin_modification(Lsn(0x30));
        walingest
            .put_rel_page_image(&mut m, TESTREL_A, 0, TEST_IMG("foo blk 0 at 3"), &ctx)
            .await?;
-        m.commit().await?;
+        m.commit()?;
        let mut m = tline.begin_modification(Lsn(0x40));
        walingest
            .put_rel_page_image(&mut m, TESTREL_A, 1, TEST_IMG("foo blk 1 at 4"), &ctx)
            .await?;
-        m.commit().await?;
+        m.commit()?;
        let mut m = tline.begin_modification(Lsn(0x50));
        walingest
            .put_rel_page_image(&mut m, TESTREL_A, 2, TEST_IMG("foo blk 2 at 5"), &ctx)
            .await?;
-        m.commit().await?;
+        m.commit()?;

        assert_current_logical_size(&tline, Lsn(0x50));

@@ -1322,7 +1319,7 @@ mod tests {
        walingest
            .put_rel_truncation(&mut m, TESTREL_A, 2, &ctx)
            .await?;
-        m.commit().await?;
+        m.commit()?;
        assert_current_logical_size(&tline, Lsn(0x60));

        // Check reported size and contents after truncation
@@ -1364,7 +1361,7 @@ mod tests {
        walingest
            .put_rel_truncation(&mut m, TESTREL_A, 0, &ctx)
            .await?;
-        m.commit().await?;
+        m.commit()?;
        assert_eq!(
            tline
                .get_rel_size(TESTREL_A, Lsn(0x68), false, &ctx)
@@ -1377,7 +1374,7 @@ mod tests {
        walingest
            .put_rel_page_image(&mut m, TESTREL_A, 1, TEST_IMG("foo blk 1"), &ctx)
            .await?;
-        m.commit().await?;
+        m.commit()?;
        assert_eq!(
            tline
                .get_rel_size(TESTREL_A, Lsn(0x70), false, &ctx)
@@ -1402,7 +1399,7 @@ mod tests {
        walingest
            .put_rel_page_image(&mut m, TESTREL_A, 1500, TEST_IMG("foo blk 1500"), &ctx)
            .await?;
-        m.commit().await?;
+        m.commit()?;
        assert_eq!(
            tline
                .get_rel_size(TESTREL_A, Lsn(0x80), false, &ctx)
@@ -1441,7 +1438,7 @@ mod tests {
        walingest
            .put_rel_page_image(&mut m, TESTREL_A, 0, TEST_IMG("foo blk 0 at 2"), &ctx)
            .await?;
-        m.commit().await?;
+        m.commit()?;

        // Check that rel exists and size is correct
        assert_eq!(
@@ -1460,7 +1457,7 @@ mod tests {
        // Drop rel
        let mut m = tline.begin_modification(Lsn(0x30));
        walingest.put_rel_drop(&mut m, TESTREL_A, &ctx).await?;
-        m.commit().await?;
+        m.commit()?;

        // Check that rel is not visible anymore
        assert_eq!(
@@ -1478,7 +1475,7 @@ mod tests {
        walingest
            .put_rel_page_image(&mut m, TESTREL_A, 0, TEST_IMG("foo blk 0 at 4"), &ctx)
            .await?;
-        m.commit().await?;
+        m.commit()?;

        // Check that rel exists and size is correct
        assert_eq!(
@@ -1517,7 +1514,7 @@ mod tests {
                .put_rel_page_image(&mut m, TESTREL_A, blkno, TEST_IMG(&data), &ctx)
                .await?;
        }
-        m.commit().await?;
+        m.commit()?;

        // The relation was created at LSN 20, not visible at LSN 1 yet.
        assert_eq!(
@@ -1562,7 +1559,7 @@ mod tests {
        walingest
            .put_rel_truncation(&mut m, TESTREL_A, 1, &ctx)
            .await?;
-        m.commit().await?;
+        m.commit()?;

        // Check reported size and contents after truncation
        assert_eq!(
@@ -1611,7 +1608,7 @@ mod tests {
                .put_rel_page_image(&mut m, TESTREL_A, blkno, TEST_IMG(&data), &ctx)
                .await?;
        }
-        m.commit().await?;
+        m.commit()?;

        assert_eq!(
            tline
@@ -1658,7 +1655,7 @@ mod tests {
            walingest
                .put_rel_page_image(&mut m, TESTREL_A, blknum as BlockNumber, img, &ctx)
                .await?;
-            m.commit().await?;
+            m.commit()?;
        }

        assert_current_logical_size(&tline, Lsn(lsn));
@@ -1674,7 +1671,7 @@ mod tests {
        walingest
            .put_rel_truncation(&mut m, TESTREL_A, RELSEG_SIZE, &ctx)
            .await?;
-        m.commit().await?;
+        m.commit()?;
        assert_eq!(
            tline.get_rel_size(TESTREL_A, Lsn(lsn), false, &ctx).await?,
            RELSEG_SIZE
@@ -1687,7 +1684,7 @@ mod tests {
        walingest
            .put_rel_truncation(&mut m, TESTREL_A, RELSEG_SIZE - 1, &ctx)
            .await?;
-        m.commit().await?;
+        m.commit()?;
        assert_eq!(
            tline.get_rel_size(TESTREL_A, Lsn(lsn), false, &ctx).await?,
            RELSEG_SIZE - 1
@@ -1703,7 +1700,7 @@ mod tests {
            walingest
                .put_rel_truncation(&mut m, TESTREL_A, size as BlockNumber, &ctx)
                .await?;
-            m.commit().await?;
+            m.commit()?;
            assert_eq!(
                tline.get_rel_size(TESTREL_A, Lsn(lsn), false, &ctx).await?,
                size as BlockNumber
--- a/pgxn/neon/walproposer.c
+++ b/pgxn/neon/walproposer.c
@@ -257,7 +257,7 @@ nwp_register_gucs(void)
 							"Walproposer reconnects to offline safekeepers once in this interval.",
 							NULL,
 							&wal_acceptor_reconnect_timeout,
-							1000, 0, INT_MAX,	/* default, min, max */
+							5000, 0, INT_MAX,	/* default, min, max */
 							PGC_SIGHUP, /* context */
 							GUC_UNIT_MS,	/* flags */
 							NULL, NULL, NULL);
--- a/proxy/src/http/sql_over_http.rs
+++ b/proxy/src/http/sql_over_http.rs
@@ -1,6 +1,5 @@
 use futures::pin_mut;
 use futures::StreamExt;
-use futures::TryFutureExt;
 use hyper::body::HttpBody;
 use hyper::http::HeaderName;
 use hyper::http::HeaderValue;
@@ -12,13 +11,8 @@ use serde_json::Value;
 use tokio_postgres::types::Kind;
 use tokio_postgres::types::Type;
 use tokio_postgres::Row;
-use tracing::error;
-use tracing::info;
-use tracing::instrument;
 use url::Url;

-use crate::proxy::invalidate_cache;
-use crate::proxy::NUM_RETRIES_WAKE_COMPUTE;
 use crate::{auth, config::ProxyConfig, console};

 #[derive(serde::Deserialize)]
@@ -96,17 +90,10 @@ fn json_array_to_pg_array(value: &Value) -> Result<Option<String>, serde_json::E
    }
 }

-struct ConnInfo {
-    username: String,
-    dbname: String,
-    hostname: String,
-    password: String,
-}
-
 fn get_conn_info(
    headers: &HeaderMap,
    sni_hostname: Option<String>,
-) -> Result<ConnInfo, anyhow::Error> {
+) -> Result<(String, String, String, String), anyhow::Error> {
    let connection_string = headers
        .get("Neon-Connection-String")
        .ok_or(anyhow::anyhow!("missing connection string"))?
@@ -159,12 +146,12 @@ fn get_conn_info(
        }
    }

-    Ok(ConnInfo {
-        username: username.to_owned(),
-        dbname: dbname.to_owned(),
-        hostname: hostname.to_owned(),
-        password: password.to_owned(),
-    })
+    Ok((
+        username.to_owned(),
+        dbname.to_owned(),
+        hostname.to_owned(),
+        password.to_owned(),
+    ))
 }

 // TODO: return different http error codes
@@ -177,10 +164,10 @@ pub async fn handle(
    // Determine the destination and connection params
    //
    let headers = request.headers();
-    let conn_info = get_conn_info(headers, sni_hostname)?;
+    let (username, dbname, hostname, password) = get_conn_info(headers, sni_hostname)?;
    let credential_params = StartupMessageParams::new([
-        ("user", &conn_info.username),
-        ("database", &conn_info.dbname),
+        ("user", &username),
+        ("database", &dbname),
        ("application_name", APP_NAME),
    ]);

@@ -199,20 +186,21 @@ pub async fn handle(
    let creds = config
        .auth_backend
        .as_ref()
-        .map(|_| {
-            auth::ClientCredentials::parse(
-                &credential_params,
-                Some(&conn_info.hostname),
-                common_names,
-            )
-        })
+        .map(|_| auth::ClientCredentials::parse(&credential_params, Some(&hostname), common_names))
        .transpose()?;
    let extra = console::ConsoleReqExtra {
        session_id: uuid::Uuid::new_v4(),
        application_name: Some(APP_NAME),
    };
-
-    let mut node_info = creds.wake_compute(&extra).await?.expect("msg");
+    let node = creds.wake_compute(&extra).await?.expect("msg");
+    let conf = node.value.config;
+    let port = *conf.get_ports().first().expect("no port");
+    let host = match conf.get_hosts().first().expect("no host") {
+        tokio_postgres::config::Host::Tcp(host) => host,
+        tokio_postgres::config::Host::Unix(_) => {
+            return Err(anyhow::anyhow!("unix socket is not supported"));
+        }
+    };

    let request_content_length = match request.body().size_hint().upper() {
        Some(v) => v,
@@ -232,10 +220,28 @@ pub async fn handle(
    let QueryData { query, params } = serde_json::from_slice(&body)?;
    let query_params = json_to_pg_text(params)?;

+    //
+    // Connenct to the destination
+    //
+    let (client, connection) = tokio_postgres::Config::new()
+        .host(host)
+        .port(port)
+        .user(&username)
+        .password(&password)
+        .dbname(&dbname)
+        .max_backend_message_size(MAX_RESPONSE_SIZE)
+        .connect(tokio_postgres::NoTls)
+        .await?;
+
+    tokio::spawn(async move {
+        if let Err(e) = connection.await {
+            eprintln!("connection error: {}", e);
+        }
+    });
+
    //
    // Now execute the query and return the result
    //
-    let client = connect_to_compute(&mut node_info, &extra, &creds, &conn_info).await?;
    let row_stream = client.query_raw_txt(query, query_params).await?;

    // Manually drain the stream into a vector to leave row_stream hanging
@@ -274,11 +280,6 @@ pub async fn handle(
                json!({
                    "name": Value::String(c.name().to_owned()),
                    "dataTypeID": Value::Number(c.type_().oid().into()),
-                    "tableID": c.table_oid(),
-                    "columnID": c.column_id(),
-                    "dataTypeSize": c.type_size(),
-                    "dataTypeModifier": c.type_modifier(),
-                    "format": "text",
                })
            })
            .collect::<Vec<_>>()
@@ -302,70 +303,6 @@ pub async fn handle(
    }))
 }

-/// This function is a copy of `connect_to_compute` from `src/proxy.rs` with
-/// the difference that it uses `tokio_postgres` for the connection.
-#[instrument(skip_all)]
-async fn connect_to_compute(
-    node_info: &mut console::CachedNodeInfo,
-    extra: &console::ConsoleReqExtra<'_>,
-    creds: &auth::BackendType<'_, auth::ClientCredentials<'_>>,
-    conn_info: &ConnInfo,
-) -> anyhow::Result<tokio_postgres::Client> {
-    let mut num_retries: usize = NUM_RETRIES_WAKE_COMPUTE;
-
-    loop {
-        match connect_to_compute_once(node_info, conn_info).await {
-            Err(e) if num_retries > 0 => {
-                info!("compute node's state has changed; requesting a wake-up");
-                match creds.wake_compute(extra).await? {
-                    // Update `node_info` and try one more time.
-                    Some(new) => {
-                        *node_info = new;
-                    }
-                    // Link auth doesn't work that way, so we just exit.
-                    None => return Err(e),
-                }
-            }
-            other => return other,
-        }
-
-        num_retries -= 1;
-        info!("retrying after wake-up ({num_retries} attempts left)");
-    }
-}
-
-async fn connect_to_compute_once(
-    node_info: &console::CachedNodeInfo,
-    conn_info: &ConnInfo,
-) -> anyhow::Result<tokio_postgres::Client> {
-    let mut config = (*node_info.config).clone();
-
-    let (client, connection) = config
-        .user(&conn_info.username)
-        .password(&conn_info.password)
-        .dbname(&conn_info.dbname)
-        .max_backend_message_size(MAX_RESPONSE_SIZE)
-        .connect(tokio_postgres::NoTls)
-        .inspect_err(|e: &tokio_postgres::Error| {
-            error!(
-                "failed to connect to compute node hosts={:?} ports={:?}: {}",
-                node_info.config.get_hosts(),
-                node_info.config.get_ports(),
-                e
-            );
-            invalidate_cache(node_info)
-        })
-        .await?;
-
-    tokio::spawn(async move {
-        if let Err(e) = connection.await {
-            error!("connection error: {}", e);
-        }
-    });
-
-    Ok(client)
-}
-
 //
 // Convert postgres row with text-encoded values to JSON object
 //
--- a/proxy/src/http/websocket.rs
+++ b/proxy/src/http/websocket.rs
@@ -26,6 +26,7 @@ use tls_listener::TlsListener;
 use tokio::{
    io::{self, AsyncBufRead, AsyncRead, AsyncWrite, ReadBuf},
    net::TcpListener,
+    select,
 };
 use tokio_util::sync::CancellationToken;
 use tracing::{error, info, info_span, warn, Instrument};
@@ -192,9 +193,14 @@ async fn ws_handler(
    // TODO: that deserves a refactor as now this function also handles http json client besides websockets.
    // Right now I don't want to blow up sql-over-http patch with file renames and do that as a follow up instead.
    } else if request.uri().path() == "/sql" && request.method() == Method::POST {
-        let result = sql_over_http::handle(config, request, sni_hostname)
-            .instrument(info_span!("sql-over-http"))
-            .await;
+        let result = select! {
+            _ = tokio::time::sleep(std::time::Duration::from_secs(10)) => {
+                Err(anyhow::anyhow!("Query timed out"))
+            }
+            response = sql_over_http::handle(config, request, sni_hostname) => {
+                response
+            }
+        };
        let status_code = match result {
            Ok(_) => StatusCode::OK,
            Err(_) => StatusCode::BAD_REQUEST,
--- a/proxy/src/proxy.rs
+++ b/proxy/src/proxy.rs
@@ -22,7 +22,7 @@ use tracing::{error, info, warn};
 use utils::measured_stream::MeasuredStream;

 /// Number of times we should retry the `/proxy_wake_compute` http request.
-pub const NUM_RETRIES_WAKE_COMPUTE: usize = 1;
+const NUM_RETRIES_WAKE_COMPUTE: usize = 1;

 const ERR_INSECURE_CONNECTION: &str = "connection is insecure (try using `sslmode=require`)";
 const ERR_PROTO_VIOLATION: &str = "protocol violation";
@@ -283,35 +283,34 @@ async fn handshake<S: AsyncRead + AsyncWrite + Unpin>(
    }
 }

-/// If we couldn't connect, a cached connection info might be to blame
-/// (e.g. the compute node's address might've changed at the wrong time).
-/// Invalidate the cache entry (if any) to prevent subsequent errors.
-#[tracing::instrument(name = "invalidate_cache", skip_all)]
-pub fn invalidate_cache(node_info: &console::CachedNodeInfo) {
-    let is_cached = node_info.cached();
-    if is_cached {
-        warn!("invalidating stalled compute node info cache entry");
-        node_info.invalidate();
-    }
-
-    let label = match is_cached {
-        true => "compute_cached",
-        false => "compute_uncached",
-    };
-    NUM_CONNECTION_FAILURES.with_label_values(&[label]).inc();
-}
-
 /// Try to connect to the compute node once.
 #[tracing::instrument(name = "connect_once", skip_all)]
 async fn connect_to_compute_once(
    node_info: &console::CachedNodeInfo,
 ) -> Result<PostgresConnection, compute::ConnectionError> {
+    // If we couldn't connect, a cached connection info might be to blame
+    // (e.g. the compute node's address might've changed at the wrong time).
+    // Invalidate the cache entry (if any) to prevent subsequent errors.
+    let invalidate_cache = |_: &compute::ConnectionError| {
+        let is_cached = node_info.cached();
+        if is_cached {
+            warn!("invalidating stalled compute node info cache entry");
+            node_info.invalidate();
+        }
+
+        let label = match is_cached {
+            true => "compute_cached",
+            false => "compute_uncached",
+        };
+        NUM_CONNECTION_FAILURES.with_label_values(&[label]).inc();
+    };
+
    let allow_self_signed_compute = node_info.allow_self_signed_compute;

    node_info
        .config
        .connect(allow_self_signed_compute)
-        .inspect_err(|_: &compute::ConnectionError| invalidate_cache(node_info))
+        .inspect_err(invalidate_cache)
        .await
 }

--- a/rust-toolchain.toml
+++ b/rust-toolchain.toml
@@ -1,5 +1,5 @@
 [toolchain]
-channel = "1.70.0"
+channel = "1.68.2"
 profile = "default"
 # The default profile includes rustc, rust-std, cargo, rust-docs, rustfmt and clippy.
 # https://rust-lang.github.io/rustup/concepts/profiles.html
--- a/safekeeper/src/timeline.rs
+++ b/safekeeper/src/timeline.rs
@@ -266,7 +266,7 @@ impl From<TimelineError> for ApiError {
    fn from(te: TimelineError) -> ApiError {
        match te {
            TimelineError::NotFound(ttid) => {
-                ApiError::NotFound(anyhow!("timeline {} not found", ttid).into())
+                ApiError::NotFound(anyhow!("timeline {} not found", ttid))
            }
            _ => ApiError::InternalServerError(anyhow!("{}", te)),
        }
--- a/scripts/comment-test-report.js
+++ b/scripts/comment-test-report.js
@@ -1,5 +1,3 @@
-#! /usr/bin/env node
-
 //
 // The script parses Allure reports and posts a comment with a summary of the test results to the PR or to the latest commit in the branch.
 //
@@ -21,7 +19,7 @@
 //       })
 //

-// Equivalent of Python's defaultdict.
+// Analog of Python's defaultdict.
 //
 // const dm = new DefaultMap(() => new DefaultMap(() => []))
 // dm["firstKey"]["secondKey"].push("value")
@@ -34,7 +32,34 @@ class DefaultMap extends Map {
    }
 }

-const parseReportJson = async ({ reportJsonUrl, fetch }) => {
+module.exports = async ({ github, context, fetch, report }) => {
+    // Marker to find the comment in the subsequent runs
+    const startMarker = `<!--AUTOMATIC COMMENT START #${context.payload.number}-->`
+    // If we run the script in the PR or in the branch (main/release/...)
+    const isPullRequest = !!context.payload.pull_request
+    // Latest commit in PR or in the branch
+    const commitSha = isPullRequest ? context.payload.pull_request.head.sha : context.sha
+    // Let users know that the comment is updated automatically
+    const autoupdateNotice = `<div align="right"><sub>The comment gets automatically updated with the latest test results<br>${commitSha} at ${new Date().toISOString()} :recycle:</sub></div>`
+    // GitHub bot id taken from (https://api.github.com/users/github-actions[bot])
+    const githubActionsBotId = 41898282
+    // Commend body itself
+    let commentBody = `${startMarker}\n`
+
+    // Common parameters for GitHub API requests
+    const ownerRepoParams = {
+        owner: context.repo.owner,
+        repo: context.repo.repo,
+    }
+
+    const {reportUrl, reportJsonUrl} = report
+
+    if (!reportUrl || !reportJsonUrl) {
+        commentBody += `#### No tests were run or test report is not available\n`
+        commentBody += autoupdateNotice
+        return
+    }
+
    const suites = await (await fetch(reportJsonUrl)).json()

    // Allure distinguishes "failed" (with an assertion error) and "broken" (with any other error) tests.
@@ -58,7 +83,7 @@ const parseReportJson = async ({ reportJsonUrl, fetch }) => {
                let buildType, pgVersion
                const match = test.name.match(/[\[-](?<buildType>debug|release)-pg(?<pgVersion>\d+)[-\]]/)?.groups
                if (match) {
-                    ({ buildType, pgVersion } = match)
+                    ({buildType, pgVersion} = match)
                } else {
                    // It's ok, we embed BUILD_TYPE and Postgres Version into the test name only for regress suite and do not for other suites (like performance).
                    console.info(`Cannot get BUILD_TYPE and Postgres Version from test name: "${test.name}", defaulting to "release" and "14"`)
@@ -98,68 +123,37 @@ const parseReportJson = async ({ reportJsonUrl, fetch }) => {
        }
    }

-    return {
-        failedTests,
-        failedTestsCount,
-        passedTests,
-        passedTestsCount,
-        skippedTests,
-        skippedTestsCount,
-        flakyTests,
-        flakyTestsCount,
-        retriedTests,
-        pgVersions,
-    }
-}
-
-const reportSummary = async (params) => {
-    const {
-        failedTests,
-        failedTestsCount,
-        passedTests,
-        passedTestsCount,
-        skippedTests,
-        skippedTestsCount,
-        flakyTests,
-        flakyTestsCount,
-        retriedTests,
-        pgVersions,
-        reportUrl,
-    } = params
-
-    let summary = ""
-
    const totalTestsCount = failedTestsCount + passedTestsCount + skippedTestsCount
-    summary += `### ${totalTestsCount} tests run: ${passedTestsCount} passed, ${failedTestsCount} failed, ${skippedTestsCount} skipped ([full report](${reportUrl}))\n___\n`
+    commentBody += `### ${totalTestsCount} tests run: ${passedTestsCount} passed, ${failedTestsCount} failed, ${skippedTestsCount} skipped ([full report](${reportUrl}))\n___\n`

    // Print test resuls from the newest to the oldest Postgres version for release and debug builds.
    for (const pgVersion of Array.from(pgVersions).sort().reverse()) {
        if (Object.keys(failedTests[pgVersion]).length > 0) {
-            summary += `#### Failures on Posgres ${pgVersion}\n\n`
+            commentBody += `#### Failures on Posgres ${pgVersion}\n\n`
            for (const [testName, tests] of Object.entries(failedTests[pgVersion])) {
                const links = []
                for (const test of tests) {
                    const allureLink = `${reportUrl}#suites/${test.parentUid}/${test.uid}`
                    links.push(`[${test.buildType}](${allureLink})`)
                }
-                summary += `- \`${testName}\`: ${links.join(", ")}\n`
+                commentBody += `- \`${testName}\`: ${links.join(", ")}\n`
            }

            const testsToRerun = Object.values(failedTests[pgVersion]).map(x => x[0].name)
            const command = `DEFAULT_PG_VERSION=${pgVersion} scripts/pytest -k "${testsToRerun.join(" or ")}"`

-            summary += "```\n"
-            summary += `# Run failed on Postgres ${pgVersion} tests locally:\n`
-            summary += `${command}\n`
-            summary += "```\n"
+            commentBody += "```\n"
+            commentBody += `# Run failed on Postgres ${pgVersion} tests locally:\n`
+            commentBody += `${command}\n`
+            commentBody += "```\n"
        }
    }

    if (flakyTestsCount > 0) {
-        summary += `<details>\n<summary>Flaky tests (${flakyTestsCount})</summary>\n\n`
+        commentBody += `<details>\n<summary>Flaky tests (${flakyTestsCount})</summary>\n\n`
        for (const pgVersion of Array.from(pgVersions).sort().reverse()) {
            if (Object.keys(flakyTests[pgVersion]).length > 0) {
-                summary += `#### Postgres ${pgVersion}\n\n`
+                commentBody += `#### Postgres ${pgVersion}\n\n`
                for (const [testName, tests] of Object.entries(flakyTests[pgVersion])) {
                    const links = []
                    for (const test of tests) {
@@ -167,57 +161,11 @@ const reportSummary = async (params) => {
                        const status = test.status === "passed" ? ":white_check_mark:" : ":x:"
                        links.push(`[${status} ${test.buildType}](${allureLink})`)
                    }
-                    summary += `- \`${testName}\`: ${links.join(", ")}\n`
+                    commentBody += `- \`${testName}\`: ${links.join(", ")}\n`
                }
            }
        }
-        summary += "\n</details>\n"
-    }
-
-    return summary
-}
-
-module.exports = async ({ github, context, fetch, report }) => {
-    // Marker to find the comment in the subsequent runs
-    const startMarker = `<!--AUTOMATIC COMMENT START #${context.payload.number}-->`
-    // If we run the script in the PR or in the branch (main/release/...)
-    const isPullRequest = !!context.payload.pull_request
-    // Latest commit in PR or in the branch
-    const commitSha = isPullRequest ? context.payload.pull_request.head.sha : context.sha
-    // Let users know that the comment is updated automatically
-    const autoupdateNotice = `<div align="right"><sub>The comment gets automatically updated with the latest test results<br>${commitSha} at ${new Date().toISOString()} :recycle:</sub></div>`
-    // GitHub bot id taken from (https://api.github.com/users/github-actions[bot])
-    const githubActionsBotId = 41898282
-    // Commend body itself
-    let commentBody = `${startMarker}\n`
-
-    // Common parameters for GitHub API requests
-    const ownerRepoParams = {
-        owner: context.repo.owner,
-        repo: context.repo.repo,
-    }
-
-    const {reportUrl, reportJsonUrl} = report
-
-    if (!reportUrl || !reportJsonUrl) {
-        commentBody += `#### No tests were run or test report is not available\n`
-        commentBody += autoupdateNotice
-        return
-    }
-
-    try {
-        const parsed = await parseReportJson({ reportJsonUrl, fetch })
-        commentBody += await reportSummary({ ...parsed, reportUrl })
-    } catch (error) {
-        commentBody += `### [full report](${reportUrl})\n___\n`
-        commentBody += `#### Failed to create a summary for the test run: \n`
-        commentBody += "```\n"
-        commentBody += `${error.stack}\n`
-        commentBody += "```\n"
-        commentBody += "\nTo reproduce and debug the error locally run:\n"
-        commentBody += "```\n"
-        commentBody += `scripts/comment-test-report.js ${reportJsonUrl}`
-        commentBody += "\n```\n"
+        commentBody += "\n</details>\n"
    }

    commentBody += autoupdateNotice
@@ -259,60 +207,3 @@ module.exports = async ({ github, context, fetch, report }) => {
        })
    }
 }
-
-// Equivalent of Python's `if __name__ == "__main__":`
-// https://nodejs.org/docs/latest/api/modules.html#accessing-the-main-module
-if (require.main === module) {
-    // Poor man's argument parsing: we expect the third argument is a JSON URL (0: node binary, 1: this script, 2: JSON url)
-    if (process.argv.length !== 3) {
-        console.error(`Unexpected number of arguments\nUsage: node ${process.argv[1]} <jsonUrl>`)
-        process.exit(1)
-    }
-    const jsonUrl = process.argv[2]
-
-    try {
-        new URL(jsonUrl)
-    } catch (error) {
-        console.error(`Invalid URL: ${jsonUrl}\nUsage: node ${process.argv[1]} <jsonUrl>`)
-        process.exit(1)
-    }
-
-    const htmlUrl = jsonUrl.replace("/data/suites.json", "/index.html")
-
-    const githubMock = {
-        rest: {
-            issues: {
-                createComment: console.log,
-                listComments: async () => ({ data: [] }),
-                updateComment: console.log
-            },
-            repos: {
-                createCommitComment: console.log,
-                listCommentsForCommit: async () => ({ data: [] }),
-                updateCommitComment: console.log
-            }
-        }
-    }
-
-    const contextMock = {
-        repo: {
-            owner: 'testOwner',
-            repo: 'testRepo'
-        },
-        payload: {
-            number: 42,
-            pull_request: null,
-        },
-        sha: '0000000000000000000000000000000000000000',
-    }
-
-    module.exports({
-        github: githubMock,
-        context: contextMock,
-        fetch: fetch,
-        report: {
-            reportUrl: htmlUrl,
-            reportJsonUrl: jsonUrl,
-        }
-    })
-}
--- a/scripts/ingest_perf_test_result.py
+++ b/scripts/ingest_perf_test_result.py
@@ -1,14 +1,12 @@
 #!/usr/bin/env python3
 import argparse
 import json
-import logging
 import os
 import sys
 from contextlib import contextmanager
 from datetime import datetime
 from pathlib import Path

-import backoff
 import psycopg2
 import psycopg2.extras

@@ -37,20 +35,9 @@ def get_connection_cursor():
    connstr = os.getenv("DATABASE_URL")
    if not connstr:
        err("DATABASE_URL environment variable is not set")
-
-    @backoff.on_exception(backoff.expo, psycopg2.OperationalError, max_time=150)
-    def connect(connstr):
-        conn = psycopg2.connect(connstr, connect_timeout=30)
-        conn.autocommit = True
-        return conn
-
-    conn = connect(connstr)
-    try:
+    with psycopg2.connect(connstr, connect_timeout=30) as conn:
        with conn.cursor() as cur:
            yield cur
-    finally:
-        if conn is not None:
-            conn.close()


 def create_table(cur):
@@ -128,7 +115,6 @@ def main():
    parser.add_argument(
        "--ingest",
        type=Path,
-        required=True,
        help="Path to perf test result file, or directory with perf test result files",
    )
    parser.add_argument("--initdb", action="store_true", help="Initialuze database")
@@ -154,5 +140,4 @@ def main():


 if __name__ == "__main__":
-    logging.getLogger("backoff").addHandler(logging.StreamHandler())
    main()
--- a/scripts/ingest_regress_test_result.py
+++ b/scripts/ingest_regress_test_result.py
@@ -1,13 +1,11 @@
 #!/usr/bin/env python3
 import argparse
-import logging
 import os
 import re
 import sys
 from contextlib import contextmanager
 from pathlib import Path

-import backoff
 import psycopg2

 CREATE_TABLE = """
@@ -31,20 +29,9 @@ def get_connection_cursor():
    connstr = os.getenv("DATABASE_URL")
    if not connstr:
        err("DATABASE_URL environment variable is not set")
-
-    @backoff.on_exception(backoff.expo, psycopg2.OperationalError, max_time=150)
-    def connect(connstr):
-        conn = psycopg2.connect(connstr, connect_timeout=30)
-        conn.autocommit = True
-        return conn
-
-    conn = connect(connstr)
-    try:
+    with psycopg2.connect(connstr, connect_timeout=30) as conn:
        with conn.cursor() as cur:
            yield cur
-    finally:
-        if conn is not None:
-            conn.close()


 def create_table(cur):
@@ -114,5 +101,4 @@ def main():


 if __name__ == "__main__":
-    logging.getLogger("backoff").addHandler(logging.StreamHandler())
    main()
--- a/test_runner/fixtures/flaky.py
+++ b/test_runner/fixtures/flaky.py
@@ -1,6 +1,6 @@
 import json
 from pathlib import Path
-from typing import Any, List, MutableMapping, cast
+from typing import List

 import pytest
 from _pytest.config import Config
@@ -56,15 +56,3 @@ def pytest_collection_modifyitems(config: Config, items: List[pytest.Item]):
            # Rerun 3 times = 1 original run + 2 reruns
            log.info(f"Marking {item.nodeid} as flaky. It will be rerun up to 3 times")
            item.add_marker(pytest.mark.flaky(reruns=2))
-
-            # pytest-rerunfailures is not compatible with pytest-timeout (timeout is not set for reruns),
-            #   we can workaround it by setting `timeout_func_only` to True[1].
-            # Unfortunately, setting `timeout_func_only = True` globally in pytest.ini is broken[2],
-            #   but we still can do it using pytest marker.
-            #
-            # - [1] https://github.com/pytest-dev/pytest-rerunfailures/issues/99
-            # - [2] https://github.com/pytest-dev/pytest-timeout/issues/142
-            timeout_marker = item.get_closest_marker("timeout")
-            if timeout_marker is not None:
-                kwargs = cast(MutableMapping[str, Any], timeout_marker.kwargs)
-                kwargs["func_only"] = True
--- a/test_runner/fixtures/metrics.py
+++ b/test_runner/fixtures/metrics.py
@@ -57,17 +57,14 @@ PAGESERVER_GLOBAL_METRICS: Tuple[str, ...] = (
    "libmetrics_launch_timestamp",
    "libmetrics_build_info",
    "libmetrics_tracing_event_count_total",
-    "pageserver_materialized_cache_hits_total",
-    "pageserver_materialized_cache_hits_direct_total",
-    "pageserver_getpage_reconstruct_seconds_bucket",
-    "pageserver_getpage_reconstruct_seconds_count",
-    "pageserver_getpage_reconstruct_seconds_sum",
-    *[f"pageserver_basebackup_query_seconds_{x}" for x in ["bucket", "count", "sum"]],
 )

 PAGESERVER_PER_TENANT_METRICS: Tuple[str, ...] = (
    "pageserver_current_logical_size",
    "pageserver_resident_physical_size",
+    "pageserver_getpage_reconstruct_seconds_bucket",
+    "pageserver_getpage_reconstruct_seconds_count",
+    "pageserver_getpage_reconstruct_seconds_sum",
    "pageserver_getpage_get_reconstruct_data_seconds_bucket",
    "pageserver_getpage_get_reconstruct_data_seconds_count",
    "pageserver_getpage_get_reconstruct_data_seconds_sum",
@@ -76,6 +73,8 @@ PAGESERVER_PER_TENANT_METRICS: Tuple[str, ...] = (
    "pageserver_io_operations_seconds_count",
    "pageserver_io_operations_seconds_sum",
    "pageserver_last_record_lsn",
+    "pageserver_materialized_cache_hits_total",
+    "pageserver_materialized_cache_hits_direct_total",
    "pageserver_read_num_fs_layers_bucket",
    "pageserver_read_num_fs_layers_count",
    "pageserver_read_num_fs_layers_sum",
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -1631,8 +1631,6 @@ class NeonPageserver(PgProtocol):
            r".*ERROR.*ancestor timeline \S+ is being stopped",
            # this is expected given our collaborative shutdown approach for the UploadQueue
            ".*Compaction failed, retrying in .*: queue is in state Stopped.*",
-            # Pageserver timeline deletion should be polled until it gets 404, so ignore it globally
-            ".*Error processing HTTP request: NotFound: Timeline .* was not found",
        ]

    def start(
@@ -2415,17 +2413,6 @@ class Endpoint(PgProtocol):

        return self

-    def respec(self, **kwargs):
-        """Update the endpoint.json file used by control_plane."""
-        # Read config
-        config_path = os.path.join(self.endpoint_path(), "endpoint.json")
-        with open(config_path, "r") as f:
-            data_dict = json.load(f)
-
-        # Write it back updated
-        with open(config_path, "w") as file:
-            json.dump(dict(data_dict, **kwargs), file, indent=4)
-
    def stop(self) -> "Endpoint":
        """
        Stop the Postgres instance if it's running.
--- a/test_runner/fixtures/pageserver/http.py
+++ b/test_runner/fixtures/pageserver/http.py
@@ -342,11 +342,6 @@ class PageserverHttpClient(requests.Session):
        return res_json

    def timeline_delete(self, tenant_id: TenantId, timeline_id: TimelineId, **kwargs):
-        """
-        Note that deletion is not instant, it is scheduled and performed mostly in the background.
-        So if you need to wait for it to complete use `timeline_delete_wait_completed`.
-        For longer description consult with pageserver openapi spec.
-        """
        res = self.delete(
            f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}", **kwargs
        )
--- a/test_runner/fixtures/pageserver/utils.py
+++ b/test_runner/fixtures/pageserver/utils.py
@@ -193,30 +193,19 @@ def wait_for_upload_queue_empty(
        time.sleep(0.2)


-def wait_timeline_detail_404(
-    pageserver_http: PageserverHttpClient, tenant_id: TenantId, timeline_id: TimelineId
-):
-    last_exc = None
-    for _ in range(2):
-        time.sleep(0.250)
-        try:
-            data = pageserver_http.timeline_detail(tenant_id, timeline_id)
-            log.error(f"detail {data}")
-        except PageserverApiException as e:
-            log.debug(e)
-            if e.status_code == 404:
-                return
-
-            last_exc = e
-
-    raise last_exc or RuntimeError(f"Timeline wasnt deleted in time, state: {data['state']}")
-
-
-def timeline_delete_wait_completed(
+def assert_timeline_detail_404(
    pageserver_http: PageserverHttpClient,
    tenant_id: TenantId,
    timeline_id: TimelineId,
-    **delete_args,
 ):
-    pageserver_http.timeline_delete(tenant_id=tenant_id, timeline_id=timeline_id, **delete_args)
-    wait_timeline_detail_404(pageserver_http, tenant_id, timeline_id)
+    """Asserts that timeline_detail returns 404, or dumps the detail."""
+    try:
+        data = pageserver_http.timeline_detail(tenant_id, timeline_id)
+        log.error(f"detail {data}")
+    except PageserverApiException as e:
+        log.error(e)
+        if e.status_code == 404:
+            return
+        else:
+            raise
+    raise Exception("detail succeeded (it should return 404)")
--- a/test_runner/performance/test_startup.py
+++ b/test_runner/performance/test_startup.py
@@ -4,6 +4,49 @@ import pytest
 import requests
 from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker
 from fixtures.neon_fixtures import NeonEnvBuilder
+from fixtures.utils import get_dir_size
+
+
+# @pytest.mark.xfail  # We currently pass a 16MB pg_wal dir instead of creating it client-side
+def test_basebackup_size(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchmarker):
+    neon_env_builder.num_safekeepers = 3
+    env = neon_env_builder.init_start()
+
+    # Start
+    env.neon_cli.create_branch("test_startup")
+    endpoint = env.endpoints.create_start("test_startup")
+
+    # Get metrics
+    metrics = requests.get(f"http://localhost:{endpoint.http_port}/metrics.json").json()
+    basebackup_bytes = metrics["basebackup_bytes"]
+    zenbenchmark.record(
+        "basebackup_size", basebackup_bytes / 1024, "KB", report=MetricReport.LOWER_IS_BETTER
+    )
+
+    # Stop so we force flush of any files and we can measure datadir sizes
+    # NOTE the order of this line is important in relation to get_dir_size
+    datadir = endpoint.pgdata_dir
+    assert datadir is not None  # for mypy
+    endpoint.stop()
+
+    # Even though we don't insert any data, this nuber could be larger than basebackup
+    # size because there could theoretically be compression, or postgres could create
+    # or download data during startup. Currently if we don't send any pg_wal in the
+    # basebackup, postgres will start up just fine, but during sync-safekeepers,
+    # walproposer will try to recover the missing wal from safekeepers and cause the
+    # same amount of network IO. We want to notice that if it happens.
+    datadir_bytes = get_dir_size(datadir)
+    zenbenchmark.record(
+        "datadir_size", datadir_bytes / 1024, "KB", report=MetricReport.LOWER_IS_BETTER
+    )
+
+    wal_bytes = get_dir_size(datadir + "/pg_wal")
+    zenbenchmark.record("wal_size", wal_bytes / 1024, "KB", report=MetricReport.LOWER_IS_BETTER)
+
+    # Seems like a reasonable limit, but increase it if it becomes impossible to meet
+    # assert basebackup_bytes < 70 * 1024
+    # assert datadir_bytes < 70 * 1024
+    # assert wal_bytes < 1 * 1024


 # Just start and measure duration.
@@ -30,20 +73,7 @@ def test_startup_simple(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenc
    neon_env_builder.num_safekeepers = 3
    env = neon_env_builder.init_start()

-    tenant_id = env.initial_tenant
-    timeline_id = env.neon_cli.create_branch("test_startup")
-
-    def get_synced_lsn():
-        """Assert safekeepers are synced and get the LSN."""
-        commit_lsns = [
-            sk.http_client().timeline_status(tenant_id, timeline_id).commit_lsn
-            for sk in env.safekeepers
-        ]
-        assert len(commit_lsns) == 3
-        assert len(set(commit_lsns)) == 1
-        return commit_lsns[0]
-
-    endpoint = None
+    env.neon_cli.create_branch("test_startup")

    # We do two iterations so we can see if the second startup is faster. It should
    # be because the compute node should already be configured with roles, databases,
@@ -51,10 +81,7 @@ def test_startup_simple(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenc
    for i in range(2):
        # Start
        with zenbenchmark.record_duration(f"{i}_start_and_select"):
-            if endpoint:
-                endpoint.start()
-            else:
-                endpoint = env.endpoints.create_start("test_startup")
+            endpoint = env.endpoints.create_start("test_startup")
            endpoint.safe_psql("select 1;")

        # Get metrics
@@ -73,10 +100,6 @@ def test_startup_simple(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenc
        # Stop so we can restart
        endpoint.stop()

-        # Imitate optimizations that console would do for the second start
-        lsn = get_synced_lsn()
-        endpoint.respec(skip_pg_catalog_updates=True, skip_sync_safekeepers=lsn.lsn_int)
-

 # This test sometimes runs for longer than the global 5 minute timeout.
@pytest.mark.timeout(600)
--- a/test_runner/pg_clients/python/pg8000/requirements.txt
+++ b/test_runner/pg_clients/python/pg8000/requirements.txt
@@ -1,2 +1,2 @@
-pg8000==1.29.8
+pg8000==1.29.4
 scramp>=1.4.3
--- a/test_runner/pg_clients/rust/tokio-postgres/Cargo.lock
+++ b/test_runner/pg_clients/rust/tokio-postgres/Cargo.lock
@@ -396,9 +396,9 @@ checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3"

 [[package]]
 name = "openssl"
-version = "0.10.55"
+version = "0.10.52"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "345df152bc43501c5eb9e4654ff05f794effb78d4efe3d53abc158baddc0703d"
+checksum = "01b8574602df80f7b85fdfc5392fa884a4e3b3f4f35402c070ab34c3d3f78d56"
 dependencies = [
 "bitflags",
 "cfg-if",
@@ -428,9 +428,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf"

 [[package]]
 name = "openssl-sys"
-version = "0.9.90"
+version = "0.9.87"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "374533b0e45f3a7ced10fcaeccca020e66656bc03dac384f852e4e5a7a8104a6"
+checksum = "8e17f59264b2809d77ae94f0e1ebabc434773f370d6ca667bd223ea10e06cc7e"
 dependencies = [
 "cc",
 "libc",
--- a/test_runner/pg_clients/rust/tokio-postgres/Dockerfile
+++ b/test_runner/pg_clients/rust/tokio-postgres/Dockerfile
@@ -1,4 +1,4 @@
-FROM rust:1.70
+FROM rust:1.69
 WORKDIR /source

 COPY . .
--- a/test_runner/pg_clients/swift/PostgresNIOExample/Package.resolved
+++ b/test_runner/pg_clients/swift/PostgresNIOExample/Package.resolved
@@ -5,8 +5,8 @@
      "kind" : "remoteSourceControl",
      "location" : "https://github.com/vapor/postgres-nio.git",
      "state" : {
-        "revision" : "061a0836d7c1887e04a975d1d2eaa2ef5fd7dfab",
-        "version" : "1.16.0"
+        "revision" : "dbf9c2eb596df39cba8ff3f74d74b2e6a31bd937",
+        "version" : "1.14.1"
      }
    },
    {
@@ -59,8 +59,8 @@
      "kind" : "remoteSourceControl",
      "location" : "https://github.com/apple/swift-nio.git",
      "state" : {
-        "revision" : "6213ba7a06febe8fef60563a4a7d26a4085783cf",
-        "version" : "2.54.0"
+        "revision" : "d1690f85419fdac8d54e350fb6d2ab9fd95afd75",
+        "version" : "2.51.1"
      }
    },
    {
--- a/test_runner/pg_clients/swift/PostgresNIOExample/Package.swift
+++ b/test_runner/pg_clients/swift/PostgresNIOExample/Package.swift
@@ -4,7 +4,7 @@ import PackageDescription
 let package = Package(
    name: "PostgresNIOExample",
    dependencies: [
-        .package(url: "https://github.com/vapor/postgres-nio.git", from: "1.16.0")
+        .package(url: "https://github.com/vapor/postgres-nio.git", from: "1.14.1")
    ],
    targets: [
        .executableTarget(
--- a/test_runner/pg_clients/typescript/postgresql-client/package-lock.json
+++ b/test_runner/pg_clients/typescript/postgresql-client/package-lock.json
@@ -5,7 +5,23 @@
  "packages": {
    "": {
      "dependencies": {
-        "postgresql-client": "2.5.9"
+        "postgresql-client": "2.5.5"
+      }
+    },
+    "node_modules/debug": {
+      "version": "4.3.4",
+      "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz",
+      "integrity": "sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==",
+      "dependencies": {
+        "ms": "2.1.2"
+      },
+      "engines": {
+        "node": ">=6.0"
+      },
+      "peerDependenciesMeta": {
+        "supports-color": {
+          "optional": true
+        }
      }
    },
    "node_modules/doublylinked": {
@@ -25,6 +41,11 @@
        "putil-promisify": "^1.8.6"
      }
    },
+    "node_modules/ms": {
+      "version": "2.1.2",
+      "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz",
+      "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w=="
+    },
    "node_modules/obuf": {
      "version": "1.1.2",
      "resolved": "https://registry.npmjs.org/obuf/-/obuf-1.1.2.tgz",
@@ -42,28 +63,30 @@
      }
    },
    "node_modules/postgresql-client": {
-      "version": "2.5.9",
-      "resolved": "https://registry.npmjs.org/postgresql-client/-/postgresql-client-2.5.9.tgz",
-      "integrity": "sha512-s+kgTN6TfWLzehEyxw4Im4odnxVRCbZ0DEJzWS6SLowPAmB2m1/DOiOvZC0+ZVoi5AfbGE6SBqFxKguSyVAXZg==",
+      "version": "2.5.5",
+      "resolved": "https://registry.npmjs.org/postgresql-client/-/postgresql-client-2.5.5.tgz",
+      "integrity": "sha512-2Mu3i+6NQ9cnkoZNd0XeSZo9WoUpuWf4ZSiCCoDWSj82T93py2/SKXZ1aUaP8mVaU0oKpyyGe0IwLYZ1VHShnA==",
      "dependencies": {
+        "debug": "^4.3.4",
        "doublylinked": "^2.5.2",
        "lightning-pool": "^4.2.1",
        "postgres-bytea": "^3.0.0",
-        "power-tasks": "^1.7.0",
+        "power-tasks": "^1.6.4",
        "putil-merge": "^3.10.3",
        "putil-promisify": "^1.10.0",
        "putil-varhelpers": "^1.6.5"
      },
      "engines": {
-        "node": ">=16.0",
+        "node": ">=14.0",
        "npm": ">=7.0.0"
      }
    },
    "node_modules/power-tasks": {
-      "version": "1.7.0",
-      "resolved": "https://registry.npmjs.org/power-tasks/-/power-tasks-1.7.0.tgz",
-      "integrity": "sha512-rndZXCDxhuIDjPUJJvQwBDHaYagCkjvbPF/NA+omh/Ef4rAI9KtnvdA0k98dyiGpn1zXOpc6c2c0JWzg/xAhJg==",
+      "version": "1.6.4",
+      "resolved": "https://registry.npmjs.org/power-tasks/-/power-tasks-1.6.4.tgz",
+      "integrity": "sha512-LX8GGgEIP1N7jsZqlqZ275e6f1Ehq97APCEGj8uVO0NoEoB+77QUX12BFv3LmlNKfq4fIuNSPiHhyHFjqn2gfA==",
      "dependencies": {
+        "debug": "^4.3.4",
        "doublylinked": "^2.5.2",
        "strict-typed-events": "^2.3.1"
      },
@@ -109,9 +132,9 @@
      }
    },
    "node_modules/ts-gems": {
-      "version": "2.4.0",
-      "resolved": "https://registry.npmjs.org/ts-gems/-/ts-gems-2.4.0.tgz",
-      "integrity": "sha512-SdugYAXoWvbqrxLodIObzxhEKacDxh5LfAJIiIkiH7q5thvuuCzdmkdTVQYf7uEDrEpPhfx4tokDMamdO3be9A=="
+      "version": "2.3.0",
+      "resolved": "https://registry.npmjs.org/ts-gems/-/ts-gems-2.3.0.tgz",
+      "integrity": "sha512-bUvrwrzlct7vfaNvtgMhynDf6lAki/kTtrNsIGhX6l7GJGK3s6b8Ro7dazOLXabV0m2jyShBzDQ8X1+h/C2Cug=="
    }
  }
 }
--- a/test_runner/pg_clients/typescript/postgresql-client/package.json
+++ b/test_runner/pg_clients/typescript/postgresql-client/package.json
@@ -1,6 +1,6 @@
 {
  "type": "module",
  "dependencies": {
-    "postgresql-client": "2.5.9"
+    "postgresql-client": "2.5.5"
  }
 }
--- a/test_runner/pg_clients/typescript/serverless-driver/Dockerfile
+++ b/test_runner/pg_clients/typescript/serverless-driver/Dockerfile
@@ -1,4 +1,4 @@
-FROM node:20
+FROM node:18
 WORKDIR /source

 COPY . .
--- a/test_runner/pg_clients/typescript/serverless-driver/package-lock.json
+++ b/test_runner/pg_clients/typescript/serverless-driver/package-lock.json
@@ -5,16 +5,16 @@
  "packages": {
    "": {
      "dependencies": {
-        "@neondatabase/serverless": "0.4.18",
+        "@neondatabase/serverless": "0.4.3",
        "ws": "8.13.0"
      }
    },
    "node_modules/@neondatabase/serverless": {
-      "version": "0.4.18",
-      "resolved": "https://registry.npmjs.org/@neondatabase/serverless/-/serverless-0.4.18.tgz",
-      "integrity": "sha512-2TZnIyRGC/+0fjZ8TKCzaSTPUD94PM7NBGuantGZbUrbWyqBwGnUoRtdZAQ95qBKVHqORLVfymlv2NE+HQMFeA==",
+      "version": "0.4.3",
+      "resolved": "https://registry.npmjs.org/@neondatabase/serverless/-/serverless-0.4.3.tgz",
+      "integrity": "sha512-U8tpuF5f0R5WRsciR7iaJ5S2h54DWa6Z6CEW+J4KgwyvRN3q3qDz0MibdfFXU0WqnRoi/9RSf/2XN4TfeaOCbQ==",
      "dependencies": {
-        "@types/pg": "8.6.6"
+        "@types/pg": "^8.6.6"
      }
    },
    "node_modules/@types/node": {
--- a/test_runner/pg_clients/typescript/serverless-driver/package.json
+++ b/test_runner/pg_clients/typescript/serverless-driver/package.json
@@ -1,7 +1,7 @@
 {
  "type": "module",
  "dependencies": {
-    "@neondatabase/serverless": "0.4.18",
+    "@neondatabase/serverless": "0.4.3",
    "ws": "8.13.0"
  }
 }
--- a/test_runner/regress/test_compatibility.py
+++ b/test_runner/regress/test_compatibility.py
@@ -2,7 +2,6 @@ import copy
 import os
 import shutil
 import subprocess
-import tempfile
 from pathlib import Path
 from typing import Any, Optional

@@ -16,11 +15,7 @@ from fixtures.neon_fixtures import (
    PortDistributor,
 )
 from fixtures.pageserver.http import PageserverHttpClient
-from fixtures.pageserver.utils import (
-    timeline_delete_wait_completed,
-    wait_for_last_record_lsn,
-    wait_for_upload,
-)
+from fixtures.pageserver.utils import wait_for_last_record_lsn, wait_for_upload
 from fixtures.pg_version import PgVersion
 from fixtures.types import Lsn
 from pytest import FixtureRequest
@@ -422,7 +417,7 @@ def check_neon_works(
    )

    shutil.rmtree(repo_dir / "local_fs_remote_storage")
-    timeline_delete_wait_completed(pageserver_http, tenant_id, timeline_id)
+    pageserver_http.timeline_delete(tenant_id, timeline_id)
    pageserver_http.timeline_create(pg_version, tenant_id, timeline_id)
    pg_bin.run(
        ["pg_dumpall", f"--dbname={connstr}", f"--file={test_output_dir / 'dump-from-wal.sql'}"]
@@ -449,7 +444,7 @@ def dump_differs(first: Path, second: Path, output: Path) -> bool:
    """

    with output.open("w") as stdout:
-        res = subprocess.run(
+        rv = subprocess.run(
            [
                "diff",
                "--unified",  # Make diff output more readable
@@ -461,53 +456,4 @@ def dump_differs(first: Path, second: Path, output: Path) -> bool:
            stdout=stdout,
        )

-    differs = res.returncode != 0
-
-    # TODO: Remove after https://github.com/neondatabase/neon/pull/4425 is merged, and a couple of releases are made
-    if differs:
-        with tempfile.NamedTemporaryFile(mode="w") as tmp:
-            tmp.write(PR4425_ALLOWED_DIFF)
-            tmp.flush()
-
-            allowed = subprocess.run(
-                [
-                    "diff",
-                    "--unified",  # Make diff output more readable
-                    r"--ignore-matching-lines=^---",  # Ignore diff headers
-                    r"--ignore-matching-lines=^\+\+\+",  # Ignore diff headers
-                    "--ignore-matching-lines=^@@",  # Ignore diff blocks location
-                    "--ignore-matching-lines=^ *$",  # Ignore lines with only spaces
-                    "--ignore-matching-lines=^ --.*",  # Ignore the " --" lines for compatibility with PG14
-                    "--ignore-blank-lines",
-                    str(output),
-                    str(tmp.name),
-                ],
-            )
-
-            differs = allowed.returncode != 0
-
-    return differs
-
-
-PR4425_ALLOWED_DIFF = """
--- /tmp/test_output/test_backward_compatibility[release-pg15]/compatibility_snapshot/dump.sql 2023-06-08 18:12:45.000000000 +0000
-+++ /tmp/test_output/test_backward_compatibility[release-pg15]/dump.sql        2023-06-13 07:25:35.211733653 +0000
-@@ -13,12 +13,20 @@
-
- CREATE ROLE cloud_admin;
- ALTER ROLE cloud_admin WITH SUPERUSER INHERIT CREATEROLE CREATEDB LOGIN REPLICATION BYPASSRLS;
-+CREATE ROLE neon_superuser;
-+ALTER ROLE neon_superuser WITH NOSUPERUSER INHERIT CREATEROLE CREATEDB NOLOGIN NOREPLICATION NOBYPASSRLS;
-
- --
- -- User Configurations
- --
-
-
-+--
-+-- Role memberships
-+--
-+
-+GRANT pg_read_all_data TO neon_superuser GRANTED BY cloud_admin;
-+GRANT pg_write_all_data TO neon_superuser GRANTED BY cloud_admin;
-"""
+    return rv.returncode != 0
--- a/test_runner/regress/test_hot_standby.py
+++ b/test_runner/regress/test_hot_standby.py
@@ -1,5 +1,3 @@
-import time
-
 import pytest
 from fixtures.neon_fixtures import NeonEnv

@@ -12,10 +10,9 @@ def test_hot_standby(neon_simple_env: NeonEnv):
        branch_name="main",
        endpoint_id="primary",
    ) as primary:
-        time.sleep(1)
        with env.endpoints.new_replica_start(origin=primary, endpoint_id="secondary") as secondary:
            primary_lsn = None
-            caught_up = False
+            cought_up = False
            queries = [
                "SHOW neon.timeline_id",
                "SHOW neon.tenant_id",
@@ -59,7 +56,7 @@ def test_hot_standby(neon_simple_env: NeonEnv):
                    res = s_cur.fetchone()
                    assert res is not None

-                while not caught_up:
+                while not cought_up:
                    with s_con.cursor() as secondary_cursor:
                        secondary_cursor.execute("SELECT pg_last_wal_replay_lsn()")
                        res = secondary_cursor.fetchone()
@@ -69,7 +66,7 @@ def test_hot_standby(neon_simple_env: NeonEnv):
                        # due to e.g. autovacuum, but that shouldn't impact the content
                        # of the tables, so we check whether we've replayed up to at
                        # least after the commit of the `test` table.
-                        caught_up = secondary_lsn >= primary_lsn
+                        cought_up = secondary_lsn >= primary_lsn

                # Explicit commit to flush any transient transaction-level state.
                s_con.commit()
--- a/test_runner/regress/test_import.py
+++ b/test_runner/regress/test_import.py
@@ -14,11 +14,7 @@ from fixtures.neon_fixtures import (
    NeonEnvBuilder,
    PgBin,
 )
-from fixtures.pageserver.utils import (
-    timeline_delete_wait_completed,
-    wait_for_last_record_lsn,
-    wait_for_upload,
-)
+from fixtures.pageserver.utils import wait_for_last_record_lsn, wait_for_upload
 from fixtures.types import Lsn, TenantId, TimelineId
 from fixtures.utils import subprocess_capture

@@ -155,7 +151,7 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build
        ".*files not bound to index_file.json, proceeding with their deletion.*"
    )

-    timeline_delete_wait_completed(client, tenant, timeline)
+    client.timeline_delete(tenant, timeline)

    # Importing correct backup works
    import_tar(base_tar, wal_tar)
--- a/test_runner/regress/test_layer_eviction.py
+++ b/test_runner/regress/test_layer_eviction.py
@@ -24,13 +24,7 @@ def test_basic_eviction(
        test_name="test_download_remote_layers_api",
    )

-    env = neon_env_builder.init_start(
-        initial_tenant_conf={
-            # disable gc and compaction background loops because they perform on-demand downloads
-            "gc_period": "0s",
-            "compaction_period": "0s",
-        }
-    )
+    env = neon_env_builder.init_start()
    client = env.pageserver.http_client()
    endpoint = env.endpoints.create_start("main")

@@ -53,11 +47,6 @@ def test_basic_eviction(
    client.timeline_checkpoint(tenant_id, timeline_id)
    wait_for_upload(client, tenant_id, timeline_id, current_lsn)

-    # disable compute & sks to avoid on-demand downloads by walreceiver / getpage
-    endpoint.stop()
-    for sk in env.safekeepers:
-        sk.stop()
-
    timeline_path = env.repo_dir / "tenants" / str(tenant_id) / "timelines" / str(timeline_id)
    initial_local_layers = sorted(
        list(filter(lambda path: path.name != "metadata", timeline_path.glob("*")))
--- a/test_runner/regress/test_proxy.py
+++ b/test_runner/regress/test_proxy.py
@@ -163,6 +163,7 @@ def test_forward_params_to_client(static_proxy: NeonProxy):
                assert conn.get_parameter_status(name) == value


+@pytest.mark.timeout(5)
 def test_close_on_connections_exit(static_proxy: NeonProxy):
    # Open two connections, send SIGTERM, then ensure that proxy doesn't exit
    # until after connections close.
--- a/test_runner/regress/test_remote_storage.py
+++ b/test_runner/regress/test_remote_storage.py
@@ -20,7 +20,7 @@ from fixtures.neon_fixtures import (
 )
 from fixtures.pageserver.http import PageserverApiException, PageserverHttpClient
 from fixtures.pageserver.utils import (
-    timeline_delete_wait_completed,
+    assert_timeline_detail_404,
    wait_for_last_record_lsn,
    wait_for_upload,
    wait_until_tenant_active,
@@ -597,11 +597,14 @@ def test_timeline_deletion_with_files_stuck_in_upload_queue(
    env.pageserver.allowed_errors.append(
        ".* ERROR .*Error processing HTTP request: InternalServerError\\(timeline is Stopping"
    )
+    client.timeline_delete(tenant_id, timeline_id)

+    env.pageserver.allowed_errors.append(f".*Timeline {tenant_id}/{timeline_id} was not found.*")
    env.pageserver.allowed_errors.append(
        ".*files not bound to index_file.json, proceeding with their deletion.*"
    )
-    timeline_delete_wait_completed(client, tenant_id, timeline_id)
+
+    wait_until(2, 0.5, lambda: assert_timeline_detail_404(client, tenant_id, timeline_id))

    assert not timeline_path.exists()

--- a/test_runner/regress/test_tenant_size.py
+++ b/test_runner/regress/test_tenant_size.py
@@ -11,12 +11,10 @@ from fixtures.neon_fixtures import (
    wait_for_wal_insert_lsn,
 )
 from fixtures.pageserver.http import PageserverHttpClient
-from fixtures.pageserver.utils import timeline_delete_wait_completed
 from fixtures.pg_version import PgVersion, xfail_on_postgres
 from fixtures.types import Lsn, TenantId, TimelineId


-@pytest.mark.xfail
 def test_empty_tenant_size(neon_simple_env: NeonEnv, test_output_dir: Path):
    env = neon_simple_env
    (tenant_id, _) = env.neon_cli.create_tenant()
@@ -45,16 +43,12 @@ def test_empty_tenant_size(neon_simple_env: NeonEnv, test_output_dir: Path):
        # we've disabled the autovacuum and checkpoint
        # so background processes should not change the size.
        # If this test will flake we should probably loosen the check
-        assert (
-            size == initial_size
-        ), f"starting idle compute should not change the tenant size (Currently {size}, expected {initial_size})"
+        assert size == initial_size, "starting idle compute should not change the tenant size"

    # the size should be the same, until we increase the size over the
    # gc_horizon
    size, inputs = http_client.tenant_size_and_modelinputs(tenant_id)
-    assert (
-        size == initial_size
-    ), f"tenant_size should not be affected by shutdown of compute (Currently {size}, expected {initial_size})"
+    assert size == initial_size, "tenant_size should not be affected by shutdown of compute"

    expected_inputs = {
        "segments": [
@@ -323,7 +317,6 @@ def test_only_heads_within_horizon(neon_simple_env: NeonEnv, test_output_dir: Pa
    size_debug_file.write(size_debug)


-@pytest.mark.xfail
 def test_single_branch_get_tenant_size_grows(
    neon_env_builder: NeonEnvBuilder, test_output_dir: Path, pg_version: PgVersion
 ):
@@ -339,13 +332,13 @@ def test_single_branch_get_tenant_size_grows(
    # inserts is larger than gc_horizon. for example 0x20000 here hid the fact
    # that there next_gc_cutoff could be smaller than initdb_lsn, which will
    # obviously lead to issues when calculating the size.
-    gc_horizon = 0x3BA00
+    gc_horizon = 0x38000

    # it's a bit of a hack, but different versions of postgres have different
    # amount of WAL generated for the same amount of data. so we need to
    # adjust the gc_horizon accordingly.
    if pg_version == PgVersion.V14:
-        gc_horizon = 0x4A000
+        gc_horizon = 0x40000

    neon_env_builder.pageserver_config_override = f"tenant_config={{compaction_period='0s', gc_period='0s', pitr_interval='0sec', gc_horizon={gc_horizon}}}"

@@ -366,11 +359,11 @@ def test_single_branch_get_tenant_size_grows(
        if current_lsn - initdb_lsn >= gc_horizon:
            assert (
                size >= prev_size
-            ), f"tenant_size may grow or not grow, because we only add gc_horizon amount of WAL to initial snapshot size (Currently at: {current_lsn}, Init at: {initdb_lsn})"
+            ), "tenant_size may grow or not grow, because we only add gc_horizon amount of WAL to initial snapshot size"
        else:
            assert (
                size > prev_size
-            ), f"tenant_size should grow, because we continue to add WAL to initial snapshot size (Currently at: {current_lsn}, Init at: {initdb_lsn})"
+            ), "tenant_size should grow, because we continue to add WAL to initial snapshot size"

    def get_current_consistent_size(
        env: NeonEnv,
@@ -635,12 +628,12 @@ def test_get_tenant_size_with_multiple_branches(
    size_debug_file_before.write(size_debug)

    # teardown, delete branches, and the size should be going down
-    timeline_delete_wait_completed(http_client, tenant_id, first_branch_timeline_id)
+    http_client.timeline_delete(tenant_id, first_branch_timeline_id)

    size_after_deleting_first = http_client.tenant_size(tenant_id)
    assert size_after_deleting_first < size_after_thinning_branch

-    timeline_delete_wait_completed(http_client, tenant_id, second_branch_timeline_id)
+    http_client.timeline_delete(tenant_id, second_branch_timeline_id)
    size_after_deleting_second = http_client.tenant_size(tenant_id)
    assert size_after_deleting_second < size_after_deleting_first

--- a/test_runner/regress/test_tenant_tasks.py
+++ b/test_runner/regress/test_tenant_tasks.py
@@ -1,10 +1,6 @@
 from fixtures.log_helper import log
 from fixtures.neon_fixtures import NeonEnvBuilder
-from fixtures.pageserver.utils import (
-    assert_tenant_state,
-    timeline_delete_wait_completed,
-    wait_until_tenant_active,
-)
+from fixtures.pageserver.utils import assert_tenant_state, wait_until_tenant_active
 from fixtures.types import TenantId, TimelineId
 from fixtures.utils import wait_until

@@ -28,7 +24,7 @@ def test_tenant_tasks(neon_env_builder: NeonEnvBuilder):
    def delete_all_timelines(tenant: TenantId):
        timelines = [TimelineId(t["timeline_id"]) for t in client.timeline_list(tenant)]
        for t in timelines:
-            timeline_delete_wait_completed(client, tenant, t)
+            client.timeline_delete(tenant, t)

    # Create tenant, start compute
    tenant, _ = env.neon_cli.create_tenant()
--- a/test_runner/regress/test_tenants.py
+++ b/test_runner/regress/test_tenants.py
@@ -21,7 +21,6 @@ from fixtures.neon_fixtures import (
    RemoteStorageKind,
    available_remote_storages,
 )
-from fixtures.pageserver.utils import timeline_delete_wait_completed
 from fixtures.types import Lsn, TenantId, TimelineId
 from fixtures.utils import wait_until
 from prometheus_client.samples import Sample
@@ -214,7 +213,7 @@ def test_metrics_normal_work(neon_env_builder: NeonEnvBuilder):
    # Test (a subset of) pageserver global metrics
    for metric in PAGESERVER_GLOBAL_METRICS:
        ps_samples = ps_metrics.query_all(metric, {})
-        assert len(ps_samples) > 0, f"expected at least one sample for {metric}"
+        assert len(ps_samples) > 0
        for sample in ps_samples:
            labels = ",".join([f'{key}="{value}"' for key, value in sample.labels.items()])
            log.info(f"{sample.name}{{{labels}}} {sample.value}")
@@ -319,10 +318,9 @@ def test_pageserver_with_empty_tenants(
    client.tenant_create(tenant_with_empty_timelines)
    temp_timelines = client.timeline_list(tenant_with_empty_timelines)
    for temp_timeline in temp_timelines:
-        timeline_delete_wait_completed(
-            client, tenant_with_empty_timelines, TimelineId(temp_timeline["timeline_id"])
+        client.timeline_delete(
+            tenant_with_empty_timelines, TimelineId(temp_timeline["timeline_id"])
        )
-
    files_in_timelines_dir = sum(
        1
        for _p in Path.iterdir(
--- a/test_runner/regress/test_timeline_delete.py
+++ b/test_runner/regress/test_timeline_delete.py
@@ -17,10 +17,9 @@ from fixtures.neon_fixtures import (
 )
 from fixtures.pageserver.http import PageserverApiException
 from fixtures.pageserver.utils import (
-    timeline_delete_wait_completed,
+    assert_timeline_detail_404,
    wait_for_last_record_lsn,
    wait_for_upload,
-    wait_timeline_detail_404,
    wait_until_tenant_active,
    wait_until_timeline_state,
 )
@@ -84,7 +83,7 @@ def test_timeline_delete(neon_simple_env: NeonEnv):
    wait_until(
        number_of_iterations=3,
        interval=0.2,
-        func=lambda: timeline_delete_wait_completed(ps_http, env.initial_tenant, leaf_timeline_id),
+        func=lambda: ps_http.timeline_delete(env.initial_tenant, leaf_timeline_id),
    )

    assert not timeline_path.exists()
@@ -95,15 +94,15 @@ def test_timeline_delete(neon_simple_env: NeonEnv):
        match=f"Timeline {env.initial_tenant}/{leaf_timeline_id} was not found",
    ) as exc:
        ps_http.timeline_detail(env.initial_tenant, leaf_timeline_id)
-    assert exc.value.status_code == 404

-    wait_until(
-        number_of_iterations=3,
-        interval=0.2,
-        func=lambda: timeline_delete_wait_completed(
-            ps_http, env.initial_tenant, parent_timeline_id
-        ),
-    )
+        # FIXME leaves tenant without timelines, should we prevent deletion of root timeline?
+        wait_until(
+            number_of_iterations=3,
+            interval=0.2,
+            func=lambda: ps_http.timeline_delete(env.initial_tenant, parent_timeline_id),
+        )
+
+    assert exc.value.status_code == 404

    # Check that we didn't pick up the timeline again after restart.
    # See https://github.com/neondatabase/neon/issues/3560
@@ -144,6 +143,7 @@ def test_delete_timeline_post_rm_failure(
    ps_http.configure_failpoints((failpoint_name, "return"))

    ps_http.timeline_delete(env.initial_tenant, env.initial_timeline)
+
    timeline_info = wait_until_timeline_state(
        pageserver_http=ps_http,
        tenant_id=env.initial_tenant,
@@ -165,7 +165,13 @@ def test_delete_timeline_post_rm_failure(

    # this should succeed
    # this also checks that delete can be retried even when timeline is in Broken state
-    timeline_delete_wait_completed(ps_http, env.initial_tenant, env.initial_timeline)
+    ps_http.timeline_delete(env.initial_tenant, env.initial_timeline, timeout=2)
+    with pytest.raises(PageserverApiException) as e:
+        ps_http.timeline_detail(env.initial_tenant, env.initial_timeline)
+
+    assert e.value.status_code == 404
+
+    env.pageserver.allowed_errors.append(f".*NotFound: Timeline.*{env.initial_timeline}.*")
    env.pageserver.allowed_errors.append(
        f".*{env.initial_timeline}.*timeline directory not found, proceeding anyway.*"
    )
@@ -241,7 +247,13 @@ def test_timeline_resurrection_on_attach(
        pass

    # delete new timeline
-    timeline_delete_wait_completed(ps_http, tenant_id=tenant_id, timeline_id=branch_timeline_id)
+    ps_http.timeline_delete(tenant_id=tenant_id, timeline_id=branch_timeline_id)
+
+    env.pageserver.allowed_errors.append(
+        f".*Timeline {tenant_id}/{branch_timeline_id} was not found.*"
+    )
+
+    wait_until(2, 0.5, lambda: assert_timeline_detail_404(ps_http, tenant_id, branch_timeline_id))

    ##### Stop the pageserver instance, erase all its data
    env.endpoints.stop_all()
@@ -326,6 +338,7 @@ def test_timeline_delete_fail_before_local_delete(neon_env_builder: NeonEnvBuild
    )

    ps_http.timeline_delete(env.initial_tenant, leaf_timeline_id)
+
    timeline_info = wait_until_timeline_state(
        pageserver_http=ps_http,
        tenant_id=env.initial_tenant,
@@ -344,15 +357,12 @@ def test_timeline_delete_fail_before_local_delete(neon_env_builder: NeonEnvBuild
    # Wait for tenant to finish loading.
    wait_until_tenant_active(ps_http, tenant_id=env.initial_tenant, iterations=10, period=1)

-    try:
-        data = ps_http.timeline_detail(env.initial_tenant, leaf_timeline_id)
-        log.debug(f"detail {data}")
-    except PageserverApiException as e:
-        log.debug(e)
-        if e.status_code != 404:
-            raise
-    else:
-        raise Exception("detail succeeded (it should return 404)")
+    env.pageserver.allowed_errors.append(
+        f".*Timeline {env.initial_tenant}/{leaf_timeline_id} was not found.*"
+    )
+    wait_until(
+        2, 0.5, lambda: assert_timeline_detail_404(ps_http, env.initial_tenant, leaf_timeline_id)
+    )

    assert (
        not leaf_timeline_path.exists()
@@ -379,8 +389,13 @@ def test_timeline_delete_fail_before_local_delete(neon_env_builder: NeonEnvBuild
    assert env.initial_timeline is not None

    for timeline_id in (intermediate_timeline_id, env.initial_timeline):
-        timeline_delete_wait_completed(
-            ps_http, tenant_id=env.initial_tenant, timeline_id=timeline_id
+        ps_http.timeline_delete(env.initial_tenant, timeline_id)
+
+        env.pageserver.allowed_errors.append(
+            f".*Timeline {env.initial_tenant}/{timeline_id} was not found.*"
+        )
+        wait_until(
+            2, 0.5, lambda: assert_timeline_detail_404(ps_http, env.initial_tenant, timeline_id)
        )

        assert_prefix_empty(
@@ -404,27 +419,23 @@ def test_timeline_delete_fail_before_local_delete(neon_env_builder: NeonEnvBuild
    )


-@pytest.mark.parametrize(
-    "stuck_failpoint",
-    ["persist_deleted_index_part", "in_progress_delete"],
-)
-def test_concurrent_timeline_delete_stuck_on(
-    neon_env_builder: NeonEnvBuilder, stuck_failpoint: str
+def test_concurrent_timeline_delete_if_first_stuck_at_index_upload(
+    neon_env_builder: NeonEnvBuilder,
 ):
    """
-    If delete is stuck console will eventually retry deletion.
-    So we need to be sure that these requests wont interleave with each other.
-    In this tests we check two places where we can spend a lot of time.
-    This is a regression test because there was a bug when DeletionGuard wasnt propagated
-    to the background task.
-
-    Ensure that when retry comes if we're still stuck request will get an immediate error response,
-    signalling to console that it should retry later.
+    If we're stuck uploading the index file with the is_delete flag,
+    eventually console will hand up and retry.
+    If we're still stuck at the retry time, ensure that the retry
+    fails with status 500, signalling to console that it should retry
+    later.
+    Ideally, timeline_delete should return 202 Accepted and require
+    console to poll for completion, but, that would require changing
+    the API contract.
    """

    neon_env_builder.enable_remote_storage(
        remote_storage_kind=RemoteStorageKind.MOCK_S3,
-        test_name=f"concurrent_timeline_delete_stuck_on_{stuck_failpoint}",
+        test_name="test_concurrent_timeline_delete_if_first_stuck_at_index_upload",
    )

    env = neon_env_builder.init_start()
@@ -434,14 +445,13 @@ def test_concurrent_timeline_delete_stuck_on(
    ps_http = env.pageserver.http_client()

    # make the first call sleep practically forever
-    ps_http.configure_failpoints((stuck_failpoint, "pause"))
+    failpoint_name = "persist_index_part_with_deleted_flag_after_set_before_upload_pause"
+    ps_http.configure_failpoints((failpoint_name, "pause"))

    def first_call(result_queue):
        try:
            log.info("first call start")
-            timeline_delete_wait_completed(
-                ps_http, env.initial_tenant, child_timeline_id, timeout=10
-            )
+            ps_http.timeline_delete(env.initial_tenant, child_timeline_id, timeout=10)
            log.info("first call success")
            result_queue.put("success")
        except Exception:
@@ -456,17 +466,17 @@ def test_concurrent_timeline_delete_stuck_on(

        def first_call_hit_failpoint():
            assert env.pageserver.log_contains(
-                f".*{child_timeline_id}.*at failpoint {stuck_failpoint}"
+                f".*{child_timeline_id}.*at failpoint {failpoint_name}"
            )

        wait_until(50, 0.1, first_call_hit_failpoint)

        # make the second call and assert behavior
        log.info("second call start")
-        error_msg_re = "Timeline deletion is already in progress"
+        error_msg_re = "timeline deletion is already in progress"
        with pytest.raises(PageserverApiException, match=error_msg_re) as second_call_err:
            ps_http.timeline_delete(env.initial_tenant, child_timeline_id)
-        assert second_call_err.value.status_code == 409
+        assert second_call_err.value.status_code == 500
        env.pageserver.allowed_errors.append(f".*{child_timeline_id}.*{error_msg_re}.*")
        # the second call will try to transition the timeline into Stopping state as well
        env.pageserver.allowed_errors.append(
@@ -474,12 +484,8 @@ def test_concurrent_timeline_delete_stuck_on(
        )
        log.info("second call failed as expected")

-        # ensure it is not 404 and stopping
-        detail = ps_http.timeline_detail(env.initial_tenant, child_timeline_id)
-        assert detail["state"] == "Stopping"
-
        # by now we know that the second call failed, let's ensure the first call will finish
-        ps_http.configure_failpoints((stuck_failpoint, "off"))
+        ps_http.configure_failpoints((failpoint_name, "off"))

        result = first_call_result.get()
        assert result == "success"
@@ -492,10 +498,8 @@ def test_concurrent_timeline_delete_stuck_on(

 def test_delete_timeline_client_hangup(neon_env_builder: NeonEnvBuilder):
    """
-    If the client hangs up before we start the index part upload but after deletion is scheduled
-    we mark it
+    If the client hangs up before we start the index part upload but after we mark it
    deleted in local memory, a subsequent delete_timeline call should be able to do
-
    another delete timeline operation.

    This tests cancel safety up to the given failpoint.
@@ -511,18 +515,12 @@ def test_delete_timeline_client_hangup(neon_env_builder: NeonEnvBuilder):

    ps_http = env.pageserver.http_client()

-    failpoint_name = "persist_deleted_index_part"
+    failpoint_name = "persist_index_part_with_deleted_flag_after_set_before_upload_pause"
    ps_http.configure_failpoints((failpoint_name, "pause"))

    with pytest.raises(requests.exceptions.Timeout):
        ps_http.timeline_delete(env.initial_tenant, child_timeline_id, timeout=2)

-    env.pageserver.allowed_errors.append(
-        f".*{child_timeline_id}.*Timeline deletion is already in progress.*"
-    )
-    with pytest.raises(PageserverApiException, match="Timeline deletion is already in progress"):
-        ps_http.timeline_delete(env.initial_tenant, child_timeline_id, timeout=2)
-
    # make sure the timeout was due to the failpoint
    at_failpoint_log_message = f".*{child_timeline_id}.*at failpoint {failpoint_name}.*"

@@ -554,7 +552,12 @@ def test_delete_timeline_client_hangup(neon_env_builder: NeonEnvBuilder):
    wait_until(50, 0.1, first_request_finished)

    # check that the timeline is gone
-    wait_timeline_detail_404(ps_http, env.initial_tenant, child_timeline_id)
+    notfound_message = f"Timeline {env.initial_tenant}/{child_timeline_id} was not found"
+    env.pageserver.allowed_errors.append(".*" + notfound_message)
+    with pytest.raises(PageserverApiException, match=notfound_message) as exc:
+        ps_http.timeline_detail(env.initial_tenant, child_timeline_id)
+
+    assert exc.value.status_code == 404


@pytest.mark.parametrize(
@@ -613,7 +616,12 @@ def test_timeline_delete_works_for_remote_smoke(
    for timeline_id in reversed(timeline_ids):
        # note that we need to finish previous deletion before scheduling next one
        # otherwise we can get an "HasChildren" error if deletion is not fast enough (real_s3)
-        timeline_delete_wait_completed(ps_http, tenant_id=tenant_id, timeline_id=timeline_id)
+        ps_http.timeline_delete(tenant_id=tenant_id, timeline_id=timeline_id)
+
+        env.pageserver.allowed_errors.append(
+            f".*Timeline {env.initial_tenant}/{timeline_id} was not found.*"
+        )
+        wait_until(2, 0.5, lambda: assert_timeline_detail_404(ps_http, tenant_id, timeline_id))

        assert_prefix_empty(
            neon_env_builder,
--- a/test_runner/regress/test_timeline_size.py
+++ b/test_runner/regress/test_timeline_size.py
@@ -24,7 +24,6 @@ from fixtures.neon_fixtures import (
 from fixtures.pageserver.http import PageserverApiException, PageserverHttpClient
 from fixtures.pageserver.utils import (
    assert_tenant_state,
-    timeline_delete_wait_completed,
    wait_for_upload_queue_empty,
    wait_until_tenant_active,
 )
@@ -273,7 +272,7 @@ def test_timeline_initial_logical_size_calculation_cancellation(
            if deletion_method == "tenant_detach":
                client.tenant_detach(tenant_id)
            elif deletion_method == "timeline_delete":
-                timeline_delete_wait_completed(client, tenant_id, timeline_id)
+                client.timeline_delete(tenant_id, timeline_id)
            delete_timeline_success.put(True)
        except PageserverApiException:
            delete_timeline_success.put(False)
--- a/test_runner/regress/test_wal_acceptor.py
+++ b/test_runner/regress/test_wal_acceptor.py
@@ -31,11 +31,7 @@ from fixtures.neon_fixtures import (
    SafekeeperPort,
    available_remote_storages,
 )
-from fixtures.pageserver.utils import (
-    timeline_delete_wait_completed,
-    wait_for_last_record_lsn,
-    wait_for_upload,
-)
+from fixtures.pageserver.utils import wait_for_last_record_lsn, wait_for_upload
 from fixtures.pg_version import PgVersion
 from fixtures.types import Lsn, TenantId, TimelineId
 from fixtures.utils import get_dir_size, query_scalar, start_in_background
@@ -552,15 +548,15 @@ def test_s3_wal_replay(neon_env_builder: NeonEnvBuilder, remote_storage_kind: Re
                    f"sk_id={sk.id} to flush {last_lsn}",
                )

-    ps_http = env.pageserver.http_client()
-    pageserver_lsn = Lsn(ps_http.timeline_detail(tenant_id, timeline_id)["last_record_lsn"])
+    ps_cli = env.pageserver.http_client()
+    pageserver_lsn = Lsn(ps_cli.timeline_detail(tenant_id, timeline_id)["last_record_lsn"])
    lag = last_lsn - pageserver_lsn
    log.info(
        f"Pageserver last_record_lsn={pageserver_lsn}; flush_lsn={last_lsn}; lag before replay is {lag / 1024}kb"
    )

    endpoint.stop_and_destroy()
-    timeline_delete_wait_completed(ps_http, tenant_id, timeline_id)
+    ps_cli.timeline_delete(tenant_id, timeline_id)

    # Also delete and manually create timeline on safekeepers -- this tests
    # scenario of manual recovery on different set of safekeepers.
@@ -575,21 +571,11 @@ def test_s3_wal_replay(neon_env_builder: NeonEnvBuilder, remote_storage_kind: Re

    pg_version = sk.http_client().timeline_status(tenant_id, timeline_id).pg_version

-    # Terminate first all safekeepers to prevent communication unexpectantly
-    # advancing peer_horizon_lsn.
    for sk in env.safekeepers:
        cli = sk.http_client()
        cli.timeline_delete_force(tenant_id, timeline_id)
        # restart safekeeper to clear its in-memory state
-        sk.stop()
-    # wait all potenital in flight pushes to broker arrive before starting
-    # safekeepers (even without sleep, it is very unlikely they are not
-    # delivered yet).
-    time.sleep(1)
-
-    for sk in env.safekeepers:
-        sk.start()
-        cli = sk.http_client()
+        sk.stop().start()
        cli.timeline_create(tenant_id, timeline_id, pg_version, last_lsn)
        f_partial_path = (
            Path(sk.data_dir()) / str(tenant_id) / str(timeline_id) / f_partial_saved.name
@@ -597,7 +583,7 @@ def test_s3_wal_replay(neon_env_builder: NeonEnvBuilder, remote_storage_kind: Re
        shutil.copy(f_partial_saved, f_partial_path)

    # recreate timeline on pageserver from scratch
-    ps_http.timeline_create(
+    ps_cli.timeline_create(
        pg_version=PgVersion(pg_version),
        tenant_id=tenant_id,
        new_timeline_id=timeline_id,
@@ -612,7 +598,7 @@ def test_s3_wal_replay(neon_env_builder: NeonEnvBuilder, remote_storage_kind: Re
        if elapsed > wait_lsn_timeout:
            raise RuntimeError("Timed out waiting for WAL redo")

-        tenant_status = ps_http.tenant_status(tenant_id)
+        tenant_status = ps_cli.tenant_status(tenant_id)
        if tenant_status["state"]["slug"] == "Loading":
            log.debug(f"Tenant {tenant_id} is still loading, retrying")
        else:
--- a/test_runner/regress/test_wal_receiver.py
+++ b/test_runner/regress/test_wal_receiver.py
@@ -1,5 +1,3 @@
-import time
-
 from fixtures.log_helper import log
 from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder
 from fixtures.types import Lsn, TenantId
@@ -42,10 +40,7 @@ def test_pageserver_lsn_wait_error_start(neon_env_builder: NeonEnvBuilder):
 # Kills one of the safekeepers and ensures that only the active ones are printed in the state.
 def test_pageserver_lsn_wait_error_safekeeper_stop(neon_env_builder: NeonEnvBuilder):
    # Trigger WAL wait timeout faster
-    neon_env_builder.pageserver_config_override = """
-        wait_lsn_timeout = "1s"
-        tenant_config={walreceiver_connect_timeout = "2s", lagging_wal_timeout = "2s"}
-    """
+    neon_env_builder.pageserver_config_override = "wait_lsn_timeout = '1s'"
    # Have notable SK ids to ensure we check logs for their presence, not some other random numbers
    neon_env_builder.safekeepers_id_start = 12345
    neon_env_builder.num_safekeepers = 3
@@ -75,8 +70,6 @@ def test_pageserver_lsn_wait_error_safekeeper_stop(neon_env_builder: NeonEnvBuil
    stopped_safekeeper_id = stopped_safekeeper.id
    log.info(f"Stopping safekeeper {stopped_safekeeper.id}")
    stopped_safekeeper.stop()
-    # sleep until stopped safekeeper is removed from candidates
-    time.sleep(2)

    # Spend some more time inserting, to ensure SKs report updated statuses and walreceiver in PS have time to update its connection stats.
    insert_test_elements(env, tenant_id, start=elements_to_insert + 1, count=elements_to_insert)
Author	SHA1	Message	Date
Konstantin Knizhnik	2a6e2249bd	Use flate2::write::GzEncoder instrad of async_compression::tokio::write::GzipEncoder	2023-06-14 12:00:48 +03:00
Bojan Serafimov	1815ae72f8	WIP	2023-06-13 17:40:10 -04:00
Bojan Serafimov	46c89c4190	update comment	2023-06-12 20:49:56 -04:00
Bojan Serafimov	1fc5c23c01	More checks	2023-06-12 20:39:57 -04:00
Bojan Serafimov	ddb98d6f77	cleanup	2023-06-12 18:52:34 -04:00
Bojan Serafimov	45b71fecec	Add size metric and test	2023-06-12 18:25:11 -04:00