Undo whitespace

Ignore metrics static
Find all problematic statics
2026-05-14 11:40:38 +00:00 · 2022-08-04 09:43:27 +02:00 · 2022-08-04 09:42:27 +02:00 · 2022-08-04 09:30:22 +02:00
75 changed files with 590 additions and 1820 deletions
--- a/.github/actions/run-python-test-set/action.yml
+++ b/.github/actions/run-python-test-set/action.yml
@@ -27,26 +27,6 @@ inputs:
    description: 'Whether to upload the performance report'
    required: false
    default: 'false'
-  run_with_real_s3:
-    description: 'Whether to pass real s3 credentials to the test suite'
-    required: false
-    default: 'false'
-  real_s3_bucket:
-    description: 'Bucket name for real s3 tests'
-    required: false
-    default: ''
-  real_s3_region:
-    description: 'Region name for real s3 tests'
-    required: false
-    default: ''
-  real_s3_access_key_id:
-    description: 'Access key id'
-    required: false
-    default: ''
-  real_s3_secret_access_key:
-    description: 'Secret access key'
-    required: false
-    default: ''

 runs:
  using: "composite"
@@ -83,8 +63,6 @@ runs:
        # this variable will be embedded in perf test report
        # and is needed to distinguish different environments
        PLATFORM: github-actions-selfhosted
-        AWS_ACCESS_KEY_ID: ${{ inputs.real_s3_access_key_id }}
-        AWS_SECRET_ACCESS_KEY: ${{ inputs.real_s3_secret_access_key }}
      shell: bash -euxo pipefail {0}
      run: |
        PERF_REPORT_DIR="$(realpath test_runner/perf-report-local)"
@@ -99,14 +77,6 @@ runs:
        if [[ "${{ inputs.run_in_parallel }}" == "true" ]]; then
          EXTRA_PARAMS="-n4 $EXTRA_PARAMS"
        fi
-
-        if [[ "${{ inputs.run_with_real_s3 }}" == "true" ]]; then
-          echo "REAL S3 ENABLED"
-          export ENABLE_REAL_S3_REMOTE_STORAGE=nonempty
-          export REMOTE_STORAGE_S3_BUCKET=${{ inputs.real_s3_bucket }}
-          export REMOTE_STORAGE_S3_REGION=${{ inputs.real_s3_region }}
-        fi
-
        if [[ "${{ inputs.save_perf_report }}" == "true" ]]; then
          if [[ "$GITHUB_REF" == "refs/heads/main" ]]; then
            mkdir -p "$PERF_REPORT_DIR"
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -35,16 +35,6 @@ jobs:
      GIT_VERSION: ${{ github.sha }}

    steps:
-      - name: Fix git ownerwhip
-        run: |
-          # Workaround for `fatal: detected dubious ownership in repository at ...`
-          #
-          # Use both ${{ github.workspace }} and ${GITHUB_WORKSPACE} because they're different on host and in containers
-          #   Ref https://github.com/actions/checkout/issues/785
-          #
-          git config --global --add safe.directory ${{ github.workspace }}
-          git config --global --add safe.directory ${GITHUB_WORKSPACE}
-
      - name: Checkout
        uses: actions/checkout@v3
        with:
@@ -219,11 +209,7 @@ jobs:
          build_type: ${{ matrix.build_type }}
          rust_toolchain: ${{ matrix.rust_toolchain }}
          test_selection: batch_others
-          run_with_real_s3: true
-          real_s3_bucket: ci-tests-s3
-          real_s3_region: us-west-2
-          real_s3_access_key_id: "${{ secrets.AWS_ACCESS_KEY_ID_CI_TESTS_S3 }}"
-          real_s3_secret_access_key: "${{ secrets.AWS_SECRET_ACCESS_KEY_CI_TESTS_S3 }}"
+
      - name: Merge and upload coverage data
        if: matrix.build_type == 'debug'
        uses: ./.github/actions/save-coverage-data
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -11,15 +11,17 @@ than it was before.

 ## Submitting changes

-1. Get at least one +1 on your PR before you push.
+1. Make a PR for every change.
+
+   Even seemingly trivial patches can break things in surprising ways.
+Use of common sense is OK. If you're only fixing a typo in a comment,
+it's probably fine to just push it. But if in doubt, open a PR.
+
+2. Get at least one +1 on your PR before you push.

   For simple patches, it will only take a minute for someone to review
 it.

-2. Don't force push small changes after making the PR ready for review.
-Doing so will force readers to re-read your entire PR, which will delay
-the review process.
-
 3. Always keep the CI green.

   Do not push, if the CI failed on your PR. Even if you think it's not
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -154,9 +154,9 @@ checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"

 [[package]]
 name = "axum"
-version = "0.5.13"
+version = "0.5.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6b9496f0c1d1afb7a2af4338bbe1d969cddfead41d87a9fb3aaa6d0bbc7af648"
+checksum = "d16705af05732b7d3258ec0f7b73c03a658a28925e050d8852d5b568ee8bcf4e"
 dependencies = [
 "async-trait",
 "axum-core",
@@ -317,6 +317,15 @@ dependencies = [
 "serde",
 ]

+[[package]]
+name = "cast"
+version = "0.2.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4c24dab4283a142afa2fdca129b80ad2c6284e073930f964c3a1293c225ee39a"
+dependencies = [
+ "rustc_version",
+]
+
 [[package]]
 name = "cast"
 version = "0.3.0"
@@ -495,8 +504,8 @@ name = "control_plane"
 version = "0.1.0"
 dependencies = [
 "anyhow",
+ "lazy_static",
 "nix",
- "once_cell",
 "pageserver",
 "postgres",
 "regex",
@@ -570,7 +579,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b01d6de93b2b6c65e17c634a26653a29d107b3c98c607c765bf38d041531cd8f"
 dependencies = [
 "atty",
- "cast",
+ "cast 0.3.0",
 "clap 2.34.0",
 "criterion-plot",
 "csv",
@@ -591,11 +600,11 @@ dependencies = [

 [[package]]
 name = "criterion-plot"
-version = "0.4.5"
+version = "0.4.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2673cc8207403546f45f5fd319a974b1e6983ad1a3ee7e6041650013be041876"
+checksum = "d00996de9f2f7559f7f4dc286073197f83e92256a59ed395f9aac01fe717da57"
 dependencies = [
- "cast",
+ "cast 0.2.7",
 "itertools",
 ]

@@ -671,9 +680,9 @@ dependencies = [

 [[package]]
 name = "crypto-common"
-version = "0.1.6"
+version = "0.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
+checksum = "2ccfd8c0ee4cce11e45b3fd6f9d5e69e0cc62912aa6a0cb1bf4617b0eba5a12f"
 dependencies = [
 "generic-array",
 "typenum",
@@ -1107,9 +1116,9 @@ dependencies = [

 [[package]]
 name = "gimli"
-version = "0.26.2"
+version = "0.26.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "22030e2c5a68ec659fde1e949a745124b48e6fa8b045b7ed5bd1fe4ccc5c4e5d"
+checksum = "78cc372d058dcf6d5ecd98510e7fbc9e5aec4d21de70f65fea8fecebcd881bd4"

 [[package]]
 name = "git-version"
@@ -1175,9 +1184,9 @@ dependencies = [

 [[package]]
 name = "hashbrown"
-version = "0.12.3"
+version = "0.12.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
+checksum = "607c8a29735385251a339424dd462993c0fed8fa09d378f259377df08c126022"

 [[package]]
 name = "heck"
@@ -1379,7 +1388,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "10a35a97730320ffe8e2d410b5d3b69279b98d2c14bdb8b70ea89ecf7888d41e"
 dependencies = [
 "autocfg",
- "hashbrown 0.12.3",
+ "hashbrown 0.12.2",
 ]

 [[package]]
@@ -1591,8 +1600,8 @@ dependencies = [
 name = "metrics"
 version = "0.1.0"
 dependencies = [
+ "lazy_static",
 "libc",
- "once_cell",
 "prometheus",
 "workspace_hack",
 ]
@@ -1842,9 +1851,9 @@ dependencies = [

 [[package]]
 name = "os_str_bytes"
-version = "6.2.0"
+version = "6.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "648001efe5d5c0102d8cea768e348da85d90af8ba91f0bea908f157951493cd4"
+checksum = "21326818e99cfe6ce1e524c2a805c189a99b5ae555a35d19f9a284b427d86afa"

 [[package]]
 name = "pageserver"
@@ -1870,6 +1879,7 @@ dependencies = [
 "humantime-serde",
 "hyper",
 "itertools",
+ "lazy_static",
 "metrics",
 "nix",
 "once_cell",
@@ -2115,9 +2125,9 @@ dependencies = [
 "crc32c",
 "env_logger",
 "hex",
+ "lazy_static",
 "log",
 "memoffset",
- "once_cell",
 "postgres",
 "rand",
 "regex",
@@ -2277,9 +2287,9 @@ dependencies = [
 "hex",
 "hmac 0.12.1",
 "hyper",
+ "lazy_static",
 "md5",
 "metrics",
- "once_cell",
 "parking_lot 0.12.1",
 "pin-project-lite",
 "rand",
@@ -2725,9 +2735,9 @@ dependencies = [

 [[package]]
 name = "rustversion"
-version = "1.0.8"
+version = "1.0.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "24c8ad4f0c00e1eb5bc7614d236a7f1300e3dbd76b68cac8e06fb00b015ad8d8"
+checksum = "a0a5f7c728f5d284929a1cccb5bc19884422bfe6ef4d6c409da2c41838983fcf"

 [[package]]
 name = "ryu"
@@ -2753,6 +2763,7 @@ dependencies = [
 "hex",
 "humantime",
 "hyper",
+ "lazy_static",
 "metrics",
 "once_cell",
 "postgres",
@@ -3606,9 +3617,9 @@ checksum = "099b7128301d285f79ddd55b9a83d5e6b9e97c92e0ea0daebee7263e932de992"

 [[package]]
 name = "unicode-ident"
-version = "1.0.2"
+version = "1.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "15c61ba63f9235225a22310255a29b806b907c9b8c964bcbd0a2c70f3f2deea7"
+checksum = "5bd2fe26506023ed7b5e1e315add59d6f584c621d037f9368fea9cfb988f368c"

 [[package]]
 name = "unicode-normalization"
@@ -3669,9 +3680,9 @@ dependencies = [
 "hex-literal",
 "hyper",
 "jsonwebtoken",
+ "lazy_static",
 "metrics",
 "nix",
- "once_cell",
 "pin-project-lite",
 "postgres",
 "postgres-protocol",
--- a/control_plane/Cargo.toml
+++ b/control_plane/Cargo.toml
@@ -9,7 +9,7 @@ postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8
 serde = { version = "1.0", features = ["derive"] }
 serde_with = "1.12.0"
 toml = "0.5"
-once_cell = "1.13.0"
+lazy_static = "1.4"
 regex = "1"
 anyhow = "1.0"
 thiserror = "1"
--- a/control_plane/src/etcd.rs
+++ b/control_plane/src/etcd.rs
@@ -30,14 +30,14 @@ pub fn start_etcd_process(env: &local_env::LocalEnv) -> anyhow::Result<()> {
    let etcd_stdout_file =
        fs::File::create(etcd_data_dir.join("etcd.stdout.log")).with_context(|| {
            format!(
-                "Failed to create etcd stout file in directory {}",
+                "Failed to create ectd stout file in directory {}",
                etcd_data_dir.display()
            )
        })?;
    let etcd_stderr_file =
        fs::File::create(etcd_data_dir.join("etcd.stderr.log")).with_context(|| {
            format!(
-                "Failed to create etcd stderr file in directory {}",
+                "Failed to create ectd stderr file in directory {}",
                etcd_data_dir.display()
            )
        })?;
--- a/control_plane/src/lib.rs
+++ b/control_plane/src/lib.rs
@@ -51,11 +51,7 @@ fn fill_rust_env_vars(cmd: &mut Command) -> &mut Command {
 }

 fn fill_aws_secrets_vars(mut cmd: &mut Command) -> &mut Command {
-    for env_key in [
-        "AWS_ACCESS_KEY_ID",
-        "AWS_SECRET_ACCESS_KEY",
-        "AWS_SESSION_TOKEN",
-    ] {
+    for env_key in ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"] {
        if let Ok(value) = std::env::var(env_key) {
            cmd = cmd.env(env_key, value);
        }
--- a/control_plane/src/postgresql_conf.rs
+++ b/control_plane/src/postgresql_conf.rs
@@ -5,7 +5,7 @@
 /// enough to extract a few settings we need in Zenith, assuming you don't do
 /// funny stuff like include-directives or funny escaping.
 use anyhow::{bail, Context, Result};
-use once_cell::sync::Lazy;
+use lazy_static::lazy_static;
 use regex::Regex;
 use std::collections::HashMap;
 use std::fmt;
@@ -19,7 +19,9 @@ pub struct PostgresConf {
    hash: HashMap<String, String>,
 }

-static CONF_LINE_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"^((?:\w|\.)+)\s*=\s*(\S+)$").unwrap());
+lazy_static! {
+    static ref CONF_LINE_RE: Regex = Regex::new(r"^((?:\w|\.)+)\s*=\s*(\S+)$").unwrap();
+}

 impl PostgresConf {
    pub fn new() -> PostgresConf {
@@ -137,10 +139,10 @@ fn escape_str(s: &str) -> String {
    //
    // This regex is a bit more conservative than the rules in guc-file.l, so we quote some
    // strings that PostgreSQL would accept without quoting, but that's OK.
-
-    static UNQUOTED_RE: Lazy<Regex> =
-        Lazy::new(|| Regex::new(r"(^[-+]?[0-9]+[a-zA-Z]*$)|(^[a-zA-Z][a-zA-Z0-9]*$)").unwrap());
-
+    lazy_static! {
+        static ref UNQUOTED_RE: Regex =
+            Regex::new(r"(^[-+]?[0-9]+[a-zA-Z]*$)|(^[a-zA-Z][a-zA-Z0-9]*$)").unwrap();
+    }
    if UNQUOTED_RE.is_match(s) {
        s.to_string()
    } else {
--- a/control_plane/src/safekeeper.rs
+++ b/control_plane/src/safekeeper.rs
@@ -247,7 +247,7 @@ impl SafekeeperNode {
        // Shutting down may take a long time,
        // if safekeeper flushes a lot of data
        let mut tcp_stopped = false;
-        for i in 0..600 {
+        for _ in 0..100 {
            if !tcp_stopped {
                if let Err(err) = TcpStream::connect(&address) {
                    tcp_stopped = true;
@@ -272,11 +272,9 @@ impl SafekeeperNode {
                    }
                }
            }
-            if i % 10 == 0 {
-                print!(".");
-                io::stdout().flush().unwrap();
-            }
-            thread::sleep(Duration::from_millis(100));
+            print!(".");
+            io::stdout().flush().unwrap();
+            thread::sleep(Duration::from_secs(1));
        }

        bail!("Failed to stop safekeeper with pid {}", pid);
--- a/control_plane/src/storage.rs
+++ b/control_plane/src/storage.rs
@@ -318,7 +318,7 @@ impl PageServerNode {
        // Shutting down may take a long time,
        // if pageserver checkpoints a lot of data
        let mut tcp_stopped = false;
-        for i in 0..600 {
+        for _ in 0..100 {
            if !tcp_stopped {
                if let Err(err) = TcpStream::connect(&address) {
                    tcp_stopped = true;
@@ -344,11 +344,9 @@ impl PageServerNode {
                    }
                }
            }
-            if i % 10 == 0 {
-                print!(".");
-                io::stdout().flush().unwrap();
-            }
-            thread::sleep(Duration::from_millis(100));
+            print!(".");
+            io::stdout().flush().unwrap();
+            thread::sleep(Duration::from_secs(1));
        }

        bail!("Failed to stop pageserver with pid {}", pid);
--- a/docker-entrypoint.sh
+++ b/docker-entrypoint.sh
@@ -1,8 +1,6 @@
 #!/bin/sh
 set -eux

-pageserver_id_param="${NODE_ID:-10}"
-
 broker_endpoints_param="${BROKER_ENDPOINT:-absent}"
 if [ "$broker_endpoints_param" != "absent" ]; then
    broker_endpoints_param="-c broker_endpoints=['$broker_endpoints_param']"
@@ -10,12 +8,10 @@ else
    broker_endpoints_param=''
 fi

-remote_storage_param="${REMOTE_STORAGE:-}"
-
 if [ "$1" = 'pageserver' ]; then
    if [ ! -d "/data/tenants" ]; then
        echo "Initializing pageserver data directory"
-        pageserver --init -D /data -c "pg_distrib_dir='/usr/local'" -c "id=${pageserver_id_param}" $broker_endpoints_param $remote_storage_param
+        pageserver --init -D /data -c "pg_distrib_dir='/usr/local'" -c "id=10" $broker_endpoints_param
    fi
    echo "Staring pageserver at 0.0.0.0:6400"
    pageserver -c "listen_pg_addr='0.0.0.0:6400'" -c "listen_http_addr='0.0.0.0:9898'" $broker_endpoints_param -D /data
--- a/docs/SUMMARY.md
+++ b/docs/SUMMARY.md
@@ -52,8 +52,10 @@
 - [multitenancy.md](./multitenancy.md) — how multitenancy is organized in the pageserver and Zenith CLI.
 - [settings.md](./settings.md)
 #FIXME: move these under sourcetree.md
+#- [pageserver/README.md](/pageserver/README.md)
 #- [postgres_ffi/README.md](/libs/postgres_ffi/README.md)
 #- [test_runner/README.md](/test_runner/README.md)
+#- [safekeeper/README.md](/safekeeper/README.md)


 # RFCs
@@ -79,5 +81,4 @@
 - [014-storage-lsm](rfcs/014-storage-lsm.md)
 - [015-storage-messaging](rfcs/015-storage-messaging.md)
 - [016-connection-routing](rfcs/016-connection-routing.md)
- [017-pageserver-op-atomicity](rfcs/017-pageserver-op-atomicity.md)
 - [cluster-size-limits](rfcs/cluster-size-limits.md)
--- a/docs/glossary.md
+++ b/docs/glossary.md
@@ -75,7 +75,7 @@ layer's Segment and range of LSNs.
 There are two kinds of layers, in-memory and on-disk layers. In-memory
 layers are used to ingest incoming WAL, and provide fast access
 to the recent page versions. On-disk layers are stored as files on disk, and
-are immutable. See [pageserver-storage.md](./pageserver-storage.md) for more.
+are immutable. See pageserver/src/layered_repository/README.md for more.

 ### Layer file (on-disk layer)

@@ -111,7 +111,7 @@ PostgreSQL LSNs and functions to monitor them:
 * `pg_last_wal_replay_lsn ()` - Returns the last write-ahead log location that has been replayed during recovery. If recovery is still in progress this will increase monotonically.
 [source PostgreSQL documentation](https://www.postgresql.org/docs/devel/functions-admin.html):

-Neon safekeeper LSNs. See [safekeeper protocol section](safekeeper-protocol.md) for more information.
+Neon safekeeper LSNs. For more check [safekeeper/README_PROTO.md](/safekeeper/README_PROTO.md)
 * `CommitLSN`: position in WAL confirmed by quorum safekeepers.
 * `RestartLSN`: position in WAL confirmed by all safekeepers.
 * `FlushLSN`: part of WAL persisted to the disk by safekeeper.
--- a/docs/pageserver-services.md
+++ b/docs/pageserver-services.md
@@ -68,6 +68,8 @@ There are the following implementations present:
 * local filesystem — to use in tests mainly
 * AWS S3           - to use in production

+Implementation details are covered in the [backup readme](./src/remote_storage/README.md) and corresponding Rust file docs, parameters documentation can be found at [settings docs](../docs/settings.md).
+
 The backup service is disabled by default and can be enabled to interact with a single remote storage.

 CLI examples:
@@ -116,7 +118,7 @@ implemented by the LayeredRepository object in
 `layered_repository.rs`. There is only that one implementation of the
 Repository trait, but it's still a useful abstraction that keeps the
 interface for the low-level storage functionality clean. The layered
-storage format is described in [pageserver-storage.md](./pageserver-storage.md).
+storage format is described in layered_repository/README.md.

 Each repository consists of multiple Timelines. Timeline is a
 workhorse that accepts page changes from the WAL, and serves
--- a/docs/rfcs/017-pageserver-op-atomicity.md
+++ b/docs/rfcs/017-pageserver-op-atomicity.md
@@ -1,153 +0,0 @@
-# Durability and atomicity of tenant/timeline operations
-
-The pageserver has 8 tenant/timeline operations, listed below.  In
-addition to that, data can be appended to a timeline by WAL receiver,
-pages can be requested by the compute node, and tenant/timeline status
-can be queries through the mgmt API. But these are the operations that
-modify state in pageserver or in S3, and need to worry about crash
-safety.
-
-To make these operations atomic and recoverable, let's introduce a new
-"tenant index file", called `tenant.json`. For each tenant, there is
-one tenant index file, and it contains a list of all timelines for
-that tenant:
-
-{
-  tenant_id: a93a94724945e95e1a0c448004ece2ec
-
-  timelines: [
-    { timeline_id: "9979cd302340a058606473912651f27f",
-	  ancestor_id: ""
-	  ancestor_lsn: "0/0"
-    },
-    { timeline_id: "f0a6f3372d273dd9ca3480d19e6b565c",
-	  ancestor_id: "9979cd302340a058606473912651f27f"
-	  ancestor_lsn: "1/1698C48"
-	},
-  ]
-}
-
-The file only contains the immutable metadata of each timeline, like
-the point it was branched from. The changing parts, like
-disk_consistent_lsn, are still stored in the per-timeline metadata
-file.
-
-This file allows us to resolve some ambiguous situations, like
-remembering that a tenant exists when it doesn't have any timelines.
-It also allows us to quickly fetch the list of all timelines of a
-tenant, without having to perform S3 LIST operations.
-
-Below is a brief description of all the pageserver tenant/timeline
-operations, and how the steps of creating/deleting local files or
-directories and uploads to S3 are performed. The steps are listed in
-such an order that each operation can be sanely recovered or aborted,
-if the pageserver crashes while the operation is being perfromed.
-
-## Create tenant
-
-Create an empty tenant. It doesn't have any timelines initially.
-
-1. Create local tenant-directory with .temp extension
-2. Create tenant.json file in the directory, with a special flag
-   indicating that the tenant-creation is in progress
-3. Rename the local tenant directory in place
-4. Upload the tenant.json file to S3, without the flag
-5. Update the local file, removing the flag
-
-At pageserver startup, if we see a tenant.json file with the special
-flag, check if the tenant exists in S3. If not, remove the local directory.
-Otherwise remove the flag from local file.
-
-## Create timeline
-
-Create a timeline for a tenant, as result of running initdb.
-
-1. create timeline directory locally, with .temp extension
-2. run initdb, creating the initial set of layers
-3. upload all layer files to S3
-4. upload metadata file to S3
-5. update tenant.json file in S3
-6. Rename local directory in place
-
-If we crash before step 5, S3 may have a timeline metadata file and some
-layer files, without corresponding entry in tenant.json file. That's OK.
-Whenever we see that, we can delete the leftover timeline files.
-
-If we want to make that less scary, we could update a tenant.json file in S3
-twice. First, add the new timeline ID to the file with a flag indicating
-that it's being created. Do that before uploading anything else to S3. And
-then in step 5, update tenant.json to indicate that the creation is complete.
-
-## Branch timeline
-
-Create a new timeline with an existing timeline as parent
-
-1. create timeline directory locally, with .temp extension
-2. create metadata file in the local directory
-3. upload metadata file to S3
-4. update tenant.json file in S3
-5. Rename local directory in place
-
-Like with Create timeline, if we crash between steps 3 and 4, we will
-leave behind a timeline metadata file with no corresponding entry in
-tenant.json.  That's harmless.
-
-## Delete timeline
-
-1. rename local timeline directory to have .temp extension
-2. Update tenant.json file in S3
-3. delete index file from S3
-4. delete layer files from S3
-5. delete local directory
-
-Like with creation, if this is interrupted, we will leave behind
-timeline files in S3 with no corresponding entry in tenant.json. If we
-want to make that less scary, we can update tenant.json in step 2 with
-a tombstone flag for the timeline we're removing, instead of removing
-the entry for it outright.
-
-## Delete tenant
-
-1. rename local tenant directory to have .temp extension
-2. delete tenant.json file in S3
-3. delete all timeline index files from S3
-4. delete all layer files from S3
-5. delete local directory
-
-Like with timeline creation, this can leave behind files with no corresponding
-tenant.json file. We can make it less scary by adding tombstones.
-
-## Attach tenant
-
-1. create local tenant directory with .temp extension
-2. Download tenant.json file
-3. download index files for every timeline
-4. download all layer files (in the future, skip this and download them on demand)
-5. rename local tenant directory in place
-
-## Detach tenant
-
-1. rename local tenant directory to have .temp extension
-2. delete local directory
-
-
-## Load tenant
-
-This happens automatically at pageserver startup, for every tenant that is found
-in the tenants-directory. I.e. for every tenant that was attached to the pageserver
-before the crash or shutdown.
-
-1. download tenant.json file
-2. for every timeline that's in remote tenant.json:
-   1. download remote index file
-   2. download all layer files that are missing locally (skip in future, and download on-demand)
-   3. schedule upload of all files present locally, but missing remotely
-   4. schedule index file upload
-3. delete all locally present timeline directories that's not in tenant.json
-
-
-On startup, delete everything with the .temp extension
-
-
- we could skip some of the downloads if we stored the S3 etag of the object in the local file,
-  and compared that
--- a/docs/sourcetree.md
+++ b/docs/sourcetree.md
@@ -28,7 +28,7 @@ The pageserver has a few different duties:
 - Receive WAL from the WAL service and decode it.
 - Replay WAL that's applicable to the chunks that the Page Server maintains

-For more detailed info, see [pageserver-services.md](./pageserver-services.md)
+For more detailed info, see [/pageserver/README](/pageserver/README.md)

 `/proxy`:

@@ -57,7 +57,7 @@ PostgreSQL extension that contains functions needed for testing and debugging.
 The zenith WAL service that receives WAL from a primary compute nodes and streams it to the pageserver.
 It acts as a holding area and redistribution center for recently generated WAL.

-For more detailed info, see [walservice.md](./walservice.md)
+For more detailed info, see [/safekeeper/README](/safekeeper/README.md)

 `/workspace_hack`:
 The workspace_hack crate exists only to pin down some dependencies.
--- a/docs/walservice.md
+++ b/docs/walservice.md
@@ -75,8 +75,8 @@ safekeepers. The Paxos and crash recovery algorithm ensures that only
 one primary node can be actively streaming WAL to the quorum of
 safekeepers.

-See [this section](safekeeper-protocol.md) for a more detailed description of
-the consensus protocol. spec/ contains TLA+ specification of it.
+See README_PROTO.md for a more detailed description of the consensus
+protocol. spec/ contains TLA+ specification of it.

 # Q&A

--- a/libs/etcd_broker/Cargo.toml
+++ b/libs/etcd_broker/Cargo.toml
@@ -9,7 +9,7 @@
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1"
 serde_with = "1.12.0"
- once_cell = "1.13.0"
+ once_cell = "1.8.0"

 utils = { path = "../utils" }
 workspace_hack = { version = "0.1", path = "../../workspace_hack" }
--- a/libs/metrics/Cargo.toml
+++ b/libs/metrics/Cargo.toml
@@ -6,5 +6,5 @@ edition = "2021"
 [dependencies]
 prometheus = {version = "0.13", default_features=false, features = ["process"]} # removes protobuf dependency
 libc = "0.2"
-once_cell = "1.13.0"
+lazy_static = "1.4"
 workspace_hack = { version = "0.1", path = "../../workspace_hack" }
--- a/libs/metrics/src/lib.rs
+++ b/libs/metrics/src/lib.rs
@@ -2,7 +2,7 @@
 //! make sure that we use the same dep version everywhere.
 //! Otherwise, we might not see all metrics registered via
 //! a default registry.
-use once_cell::sync::Lazy;
+use lazy_static::lazy_static;
 use prometheus::core::{AtomicU64, GenericGauge, GenericGaugeVec};
 pub use prometheus::opts;
 pub use prometheus::register;
@@ -41,22 +41,19 @@ pub fn gather() -> Vec<prometheus::proto::MetricFamily> {
    prometheus::gather()
 }

-static DISK_IO_BYTES: Lazy<IntGaugeVec> = Lazy::new(|| {
-    register_int_gauge_vec!(
+lazy_static! {
+    static ref DISK_IO_BYTES: IntGaugeVec = register_int_gauge_vec!(
        "libmetrics_disk_io_bytes_total",
        "Bytes written and read from disk, grouped by the operation (read|write)",
        &["io_operation"]
    )
-    .expect("Failed to register disk i/o bytes int gauge vec")
-});
-
-static MAXRSS_KB: Lazy<IntGauge> = Lazy::new(|| {
-    register_int_gauge!(
+    .expect("Failed to register disk i/o bytes int gauge vec");
+    static ref MAXRSS_KB: IntGauge = register_int_gauge!(
        "libmetrics_maxrss_kb",
        "Memory usage (Maximum Resident Set Size)"
    )
-    .expect("Failed to register maxrss_kb int gauge")
-});
+    .expect("Failed to register maxrss_kb int gauge");
+}

 pub const DISK_WRITE_SECONDS_BUCKETS: &[f64] = &[
    0.000_050, 0.000_100, 0.000_500, 0.001, 0.003, 0.005, 0.01, 0.05, 0.1, 0.3, 0.5,
--- a/libs/metrics/src/wrappers.rs
+++ b/libs/metrics/src/wrappers.rs
@@ -10,13 +10,13 @@ use std::io::{Read, Result, Write};
 /// # use std::io::{Result, Read};
 /// # use metrics::{register_int_counter, IntCounter};
 /// # use metrics::CountedReader;
-/// # use once_cell::sync::Lazy;
 /// #
-/// # static INT_COUNTER: Lazy<IntCounter> = Lazy::new( || { register_int_counter!(
+/// # lazy_static::lazy_static! {
+/// #     static ref INT_COUNTER: IntCounter = register_int_counter!(
 /// #         "int_counter",
 /// #         "let's count something!"
-/// #     ).unwrap()
-/// # });
+/// #     ).unwrap();
+/// # }
 /// #
 /// fn do_some_reads(stream: impl Read, count: usize) -> Result<Vec<u8>> {
 ///     let mut reader = CountedReader::new(stream, |cnt| {
@@ -85,13 +85,13 @@ impl<T: Read> Read for CountedReader<'_, T> {
 /// # use std::io::{Result, Write};
 /// # use metrics::{register_int_counter, IntCounter};
 /// # use metrics::CountedWriter;
-/// # use once_cell::sync::Lazy;
 /// #
-/// # static INT_COUNTER: Lazy<IntCounter> = Lazy::new( || { register_int_counter!(
+/// # lazy_static::lazy_static! {
+/// #     static ref INT_COUNTER: IntCounter = register_int_counter!(
 /// #         "int_counter",
 /// #         "let's count something!"
-/// #     ).unwrap()
-/// # });
+/// #     ).unwrap();
+/// # }
 /// #
 /// fn do_some_writes(stream: impl Write, payload: &[u8]) -> Result<()> {
 ///     let mut writer = CountedWriter::new(stream, |cnt| {
--- a/libs/postgres_ffi/Cargo.toml
+++ b/libs/postgres_ffi/Cargo.toml
@@ -12,7 +12,7 @@ byteorder = "1.4.3"
 anyhow = "1.0"
 crc32c = "0.6.0"
 hex = "0.4.3"
-once_cell = "1.13.0"
+lazy_static = "1.4"
 log = "0.4.14"
 memoffset = "0.6.2"
 thiserror = "1.0"
--- a/libs/postgres_ffi/src/relfile_utils.rs
+++ b/libs/postgres_ffi/src/relfile_utils.rs
@@ -2,7 +2,7 @@
 //! Common utilities for dealing with PostgreSQL relation files.
 //!
 use crate::pg_constants;
-use once_cell::sync::OnceCell;
+use lazy_static::lazy_static;
 use regex::Regex;

 #[derive(Debug, Clone, thiserror::Error, PartialEq)]
@@ -54,14 +54,11 @@ pub fn forknumber_to_name(forknum: u8) -> Option<&'static str> {
 /// See functions relpath() and _mdfd_segpath() in PostgreSQL sources.
 ///
 pub fn parse_relfilename(fname: &str) -> Result<(u32, u8, u32), FilePathError> {
-    static RELFILE_RE: OnceCell<Regex> = OnceCell::new();
-    RELFILE_RE.get_or_init(|| {
-        Regex::new(r"^(?P<relnode>\d+)(_(?P<forkname>[a-z]+))?(\.(?P<segno>\d+))?$").unwrap()
-    });
-
+    lazy_static! {
+        static ref RELFILE_RE: Regex =
+            Regex::new(r"^(?P<relnode>\d+)(_(?P<forkname>[a-z]+))?(\.(?P<segno>\d+))?$").unwrap();
+    }
    let caps = RELFILE_RE
-        .get()
-        .unwrap()
        .captures(fname)
        .ok_or(FilePathError::InvalidFileName)?;

--- a/libs/postgres_ffi/src/xlog_utils.rs
+++ b/libs/postgres_ffi/src/xlog_utils.rs
@@ -16,7 +16,7 @@ use crate::XLogRecord;
 use crate::XLOG_PAGE_MAGIC;

 use crate::pg_constants::WAL_SEGMENT_SIZE;
-use anyhow::{anyhow, bail, ensure};
+use anyhow::{bail, ensure};
 use byteorder::{ByteOrder, LittleEndian};
 use bytes::BytesMut;
 use bytes::{Buf, Bytes};
@@ -159,7 +159,7 @@ fn find_end_of_wal_segment(
    let mut buf = [0u8; XLOG_BLCKSZ];
    let file_name = XLogFileName(tli, segno, wal_seg_size);
    let mut last_valid_rec_pos: usize = start_offset; // assume at given start_offset begins new record
-    let mut file = File::open(data_dir.join(file_name.clone() + ".partial"))?;
+    let mut file = File::open(data_dir.join(file_name.clone() + ".partial")).unwrap();
    file.seek(SeekFrom::Start(offs as u64))?;
    // xl_crc is the last field in XLogRecord, will not be read into rec_hdr
    const_assert!(XLOG_RECORD_CRC_OFFS + 4 == XLOG_SIZE_OF_XLOG_RECORD);
@@ -396,13 +396,10 @@ pub fn find_end_of_wal(
    let mut high_tli: TimeLineID = 0;
    let mut high_ispartial = false;

-    for entry in fs::read_dir(data_dir)?.flatten() {
+    for entry in fs::read_dir(data_dir).unwrap().flatten() {
        let ispartial: bool;
        let entry_name = entry.file_name();
-        let fname = entry_name
-            .to_str()
-            .ok_or_else(|| anyhow!("Invalid file name"))?;
-
+        let fname = entry_name.to_str().unwrap();
        /*
         * Check if the filename looks like an xlog file, or a .partial file.
         */
@@ -414,7 +411,7 @@ pub fn find_end_of_wal(
            continue;
        }
        let (segno, tli) = XLogFromFileName(fname, wal_seg_size);
-        if !ispartial && entry.metadata()?.len() != wal_seg_size as u64 {
+        if !ispartial && entry.metadata().unwrap().len() != wal_seg_size as u64 {
            continue;
        }
        if segno > high_segno
--- a/libs/postgres_ffi/wal_craft/Cargo.toml
+++ b/libs/postgres_ffi/wal_craft/Cargo.toml
@@ -10,7 +10,7 @@ anyhow = "1.0"
 clap = "3.0"
 env_logger = "0.9"
 log = "0.4"
-once_cell = "1.13.0"
+once_cell = "1.8.0"
 postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
 postgres_ffi = { path = "../" }
 tempfile = "3.2"
--- a/libs/remote_storage/Cargo.toml
+++ b/libs/remote_storage/Cargo.toml
@@ -7,7 +7,7 @@ edition = "2021"
 anyhow = { version = "1.0", features = ["backtrace"] }
 async-trait = "0.1"
 metrics = { version = "0.1", path = "../metrics" }
-once_cell = "1.13.0"
+once_cell = "1.8.0"
 rusoto_core = "0.48"
 rusoto_s3 = "0.48"
 serde = { version = "1.0", features = ["derive"] }
--- a/libs/remote_storage/src/lib.rs
+++ b/libs/remote_storage/src/lib.rs
@@ -66,9 +66,6 @@ pub trait RemoteStorage: Send + Sync {
    async fn list(&self) -> anyhow::Result<Vec<Self::RemoteObjectId>>;

    /// Lists all top level subdirectories for a given prefix
-    /// Note: here we assume that if the prefix is passed it was obtained via remote_object_id
-    /// which already takes into account any kind of global prefix (prefix_in_bucket for S3 or storage_root for LocalFS)
-    /// so this method doesnt need to.
    async fn list_prefixes(
        &self,
        prefix: Option<Self::RemoteObjectId>,
--- a/libs/remote_storage/src/local_fs.rs
+++ b/libs/remote_storage/src/local_fs.rs
@@ -116,7 +116,7 @@ impl RemoteStorage for LocalFs {
        prefix: Option<Self::RemoteObjectId>,
    ) -> anyhow::Result<Vec<Self::RemoteObjectId>> {
        let path = match prefix {
-            Some(prefix) => Cow::Owned(prefix),
+            Some(prefix) => Cow::Owned(self.storage_root.join(prefix)),
            None => Cow::Borrowed(&self.storage_root),
        };
        get_all_files(path.as_ref(), false).await
--- a/libs/remote_storage/src/s3_bucket.rs
+++ b/libs/remote_storage/src/s3_bucket.rs
@@ -171,25 +171,17 @@ impl S3Bucket {

        let access_key_id = std::env::var("AWS_ACCESS_KEY_ID").ok();
        let secret_access_key = std::env::var("AWS_SECRET_ACCESS_KEY").ok();
-        // session token is used when authorizing through sso
-        // which is typically the case when testing locally on developer machine
-        let session_token = std::env::var("AWS_SESSION_TOKEN").ok();

        let client = if access_key_id.is_none() && secret_access_key.is_none() {
            debug!("Using IAM-based AWS access");
            S3Client::new_with(request_dispatcher, InstanceMetadataProvider::new(), region)
        } else {
-            debug!(
-                "Using credentials-based AWS access. Session token is set: {}",
-                session_token.is_some()
-            );
+            debug!("Using credentials-based AWS access");
            S3Client::new_with(
                request_dispatcher,
-                StaticProvider::new(
+                StaticProvider::new_minimal(
                    access_key_id.unwrap_or_default(),
                    secret_access_key.unwrap_or_default(),
-                    session_token,
-                    None,
                ),
                region,
            )
@@ -312,24 +304,32 @@ impl RemoteStorage for S3Bucket {
        Ok(document_keys)
    }

-    /// See the doc for `RemoteStorage::list_prefixes`
    /// Note: it wont include empty "directories"
    async fn list_prefixes(
        &self,
        prefix: Option<Self::RemoteObjectId>,
    ) -> anyhow::Result<Vec<Self::RemoteObjectId>> {
-        // get the passed prefix or if it is not set use prefix_in_bucket value
-        let list_prefix = prefix
-            .map(|p| p.0)
-            .or_else(|| self.prefix_in_bucket.clone())
-            .map(|mut p| {
+        let list_prefix = match prefix {
+            Some(prefix) => {
+                let mut prefix_in_bucket = self.prefix_in_bucket.clone().unwrap_or_default();
+                // if there is no trailing / in default prefix and
+                // supplied prefix does not start with "/" insert it
+                if !(prefix_in_bucket.ends_with(S3_PREFIX_SEPARATOR)
+                    || prefix.0.starts_with(S3_PREFIX_SEPARATOR))
+                {
+                    prefix_in_bucket.push(S3_PREFIX_SEPARATOR);
+                }
+
+                prefix_in_bucket.push_str(&prefix.0);
                // required to end with a separator
                // otherwise request will return only the entry of a prefix
-                if !p.ends_with(S3_PREFIX_SEPARATOR) {
-                    p.push(S3_PREFIX_SEPARATOR);
+                if !prefix_in_bucket.ends_with(S3_PREFIX_SEPARATOR) {
+                    prefix_in_bucket.push(S3_PREFIX_SEPARATOR);
                }
-                p
-            });
+                Some(prefix_in_bucket)
+            }
+            None => self.prefix_in_bucket.clone(),
+        };

        let mut document_keys = Vec::new();

--- a/libs/utils/Cargo.toml
+++ b/libs/utils/Cargo.toml
@@ -8,6 +8,7 @@ anyhow = "1.0"
 bincode = "1.3"
 bytes = "1.0.1"
 hyper = { version = "0.14.7", features = ["full"] }
+lazy_static = "1.4.0"
 pin-project-lite = "0.2.7"
 postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
 postgres-protocol = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
@@ -27,8 +28,6 @@ rustls = "0.20.2"
 rustls-split = "0.3.0"
 git-version = "0.3.5"
 serde_with = "1.12.0"
-once_cell = "1.13.0"
-

 metrics = { path = "../metrics" }
 workspace_hack = { version = "0.1", path = "../../workspace_hack" }
--- a/libs/utils/src/http/endpoint.rs
+++ b/libs/utils/src/http/endpoint.rs
@@ -4,8 +4,8 @@ use crate::zid::ZTenantId;
 use anyhow::anyhow;
 use hyper::header::AUTHORIZATION;
 use hyper::{header::CONTENT_TYPE, Body, Request, Response, Server};
+use lazy_static::lazy_static;
 use metrics::{register_int_counter, Encoder, IntCounter, TextEncoder};
-use once_cell::sync::Lazy;
 use routerify::ext::RequestExt;
 use routerify::RequestInfo;
 use routerify::{Middleware, Router, RouterBuilder, RouterService};
@@ -16,13 +16,13 @@ use std::net::TcpListener;

 use super::error::ApiError;

-static SERVE_METRICS_COUNT: Lazy<IntCounter> = Lazy::new(|| {
-    register_int_counter!(
+lazy_static! {
+    static ref SERVE_METRICS_COUNT: IntCounter = register_int_counter!(
        "libmetrics_metric_handler_requests_total",
        "Number of metric requests made"
    )
-    .expect("failed to define a metric")
-});
+    .expect("failed to define a metric");
+}

 async fn logger(res: Response<Body>, info: RequestInfo) -> Result<Response<Body>, ApiError> {
    info!("{} {} {}", info.method(), info.uri().path(), res.status(),);
--- a/libs/utils/tests/ssl_test.rs
+++ b/libs/utils/tests/ssl_test.rs
@@ -7,7 +7,7 @@ use std::{

 use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
 use bytes::{Buf, BufMut, Bytes, BytesMut};
-use once_cell::sync::Lazy;
+use lazy_static::lazy_static;

 use utils::postgres_backend::{AuthType, Handler, PostgresBackend};

@@ -19,15 +19,16 @@ fn make_tcp_pair() -> (TcpStream, TcpStream) {
    (server_stream, client_stream)
 }

-static KEY: Lazy<rustls::PrivateKey> = Lazy::new(|| {
-    let mut cursor = Cursor::new(include_bytes!("key.pem"));
-    rustls::PrivateKey(rustls_pemfile::rsa_private_keys(&mut cursor).unwrap()[0].clone())
-});
-
-static CERT: Lazy<rustls::Certificate> = Lazy::new(|| {
-    let mut cursor = Cursor::new(include_bytes!("cert.pem"));
-    rustls::Certificate(rustls_pemfile::certs(&mut cursor).unwrap()[0].clone())
-});
+lazy_static! {
+    static ref KEY: rustls::PrivateKey = {
+        let mut cursor = Cursor::new(include_bytes!("key.pem"));
+        rustls::PrivateKey(rustls_pemfile::rsa_private_keys(&mut cursor).unwrap()[0].clone())
+    };
+    static ref CERT: rustls::Certificate = {
+        let mut cursor = Cursor::new(include_bytes!("cert.pem"));
+        rustls::Certificate(rustls_pemfile::certs(&mut cursor).unwrap()[0].clone())
+    };
+}

 #[test]
 fn ssl() {
--- a/neon_local/src/main.rs
+++ b/neon_local/src/main.rs
@@ -884,7 +884,7 @@ fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Resul
    match sub_match.subcommand() {
        Some(("start", start_match)) => {
            if let Err(e) = pageserver.start(&pageserver_config_overrides(start_match)) {
-                eprintln!("pageserver start failed: {e}");
+                eprintln!("pageserver start failed: {}", e);
                exit(1);
            }
        }
@@ -906,19 +906,10 @@ fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Resul
            }

            if let Err(e) = pageserver.start(&pageserver_config_overrides(restart_match)) {
-                eprintln!("pageserver start failed: {e}");
+                eprintln!("pageserver start failed: {}", e);
                exit(1);
            }
        }
-
-        Some(("status", _)) => match PageServerNode::from_env(env).check_status() {
-            Ok(_) => println!("Page server is up and running"),
-            Err(err) => {
-                eprintln!("Page server is not available: {}", err);
-                exit(1);
-            }
-        },
-
        Some((sub_name, _)) => bail!("Unexpected pageserver subcommand '{}'", sub_name),
        None => bail!("no pageserver subcommand provided"),
    }
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -21,6 +21,7 @@ futures = "0.3.13"
 hex = "0.4.3"
 hyper = "0.14"
 itertools = "0.10.3"
+lazy_static = "1.4.0"
 clap = "3.0"
 daemonize = "0.4.1"
 tokio = { version = "1.17", features = ["process", "sync", "macros", "fs", "rt", "io-util", "time"] }
@@ -47,7 +48,7 @@ tracing = "0.1.27"
 signal-hook = "0.3.10"
 url = "2"
 nix = "0.23"
-once_cell = "1.13.0"
+once_cell = "1.8.0"
 crossbeam-utils = "0.8.5"
 fail = "0.5.0"
 git-version = "0.3.5"
--- a/pageserver/src/import_datadir.rs
+++ b/pageserver/src/import_datadir.rs
@@ -37,7 +37,7 @@ pub fn import_timeline_from_postgres_datadir<T: DatadirTimeline>(

    // TODO this shoud be start_lsn, which is not necessarily equal to end_lsn (aka lsn)
    // Then fishing out pg_control would be unnecessary
-    let mut modification = tline.begin_modification(lsn);
+    let mut modification = tline.begin_modification();
    modification.init_empty()?;

    // Import all but pg_wal
@@ -56,12 +56,12 @@ pub fn import_timeline_from_postgres_datadir<T: DatadirTimeline>(
            if let Some(control_file) = import_file(&mut modification, relative_path, file, len)? {
                pg_control = Some(control_file);
            }
-            modification.flush()?;
+            modification.flush(lsn)?;
        }
    }

    // We're done importing all the data files.
-    modification.commit()?;
+    modification.commit(lsn)?;

    // We expect the Postgres server to be shut down cleanly.
    let pg_control = pg_control.context("pg_control file not found")?;
@@ -267,7 +267,7 @@ fn import_wal<T: DatadirTimeline>(
        waldecoder.feed_bytes(&buf);

        let mut nrecords = 0;
-        let mut modification = tline.begin_modification(endpoint);
+        let mut modification = tline.begin_modification();
        let mut decoded = DecodedWALRecord::default();
        while last_lsn <= endpoint {
            if let Some((lsn, recdata)) = waldecoder.poll_decode()? {
@@ -301,7 +301,7 @@ pub fn import_basebackup_from_tar<T: DatadirTimeline, Reader: Read>(
    base_lsn: Lsn,
 ) -> Result<()> {
    info!("importing base at {}", base_lsn);
-    let mut modification = tline.begin_modification(base_lsn);
+    let mut modification = tline.begin_modification();
    modification.init_empty()?;

    let mut pg_control: Option<ControlFileData> = None;
@@ -319,7 +319,7 @@ pub fn import_basebackup_from_tar<T: DatadirTimeline, Reader: Read>(
                    // We found the pg_control file.
                    pg_control = Some(res);
                }
-                modification.flush()?;
+                modification.flush(base_lsn)?;
            }
            tar::EntryType::Directory => {
                debug!("directory {:?}", file_path);
@@ -333,7 +333,7 @@ pub fn import_basebackup_from_tar<T: DatadirTimeline, Reader: Read>(
    // sanity check: ensure that pg_control is loaded
    let _pg_control = pg_control.context("pg_control file not found")?;

-    modification.commit()?;
+    modification.commit(base_lsn)?;
    Ok(())
 }

@@ -385,7 +385,7 @@ pub fn import_wal_from_tar<T: DatadirTimeline, Reader: Read>(

        waldecoder.feed_bytes(&bytes[offset..]);

-        let mut modification = tline.begin_modification(end_lsn);
+        let mut modification = tline.begin_modification();
        let mut decoded = DecodedWALRecord::default();
        while last_lsn <= end_lsn {
            if let Some((lsn, recdata)) = waldecoder.poll_decode()? {
--- a/pageserver/src/layered_repository.rs
+++ b/pageserver/src/layered_repository.rs
@@ -5,7 +5,7 @@
 //! get/put call, walking back the timeline branching history as needed.
 //!
 //! The files are stored in the .neon/tenants/<tenantid>/timelines/<timelineid>
-//! directory. See docs/pageserver-storage.md for how the files are managed.
+//! directory. See layered_repository/README for how the files are managed.
 //! In addition to the layer files, there is a metadata file in the same
 //! directory that contains information about the timeline, in particular its
 //! parent timeline, and the last LSN that has been written to disk.
--- a/pageserver/src/layered_repository/block_io.rs
+++ b/pageserver/src/layered_repository/block_io.rs
@@ -5,7 +5,7 @@
 use crate::page_cache;
 use crate::page_cache::{ReadBufResult, PAGE_SZ};
 use bytes::Bytes;
-use once_cell::sync::Lazy;
+use lazy_static::lazy_static;
 use std::ops::{Deref, DerefMut};
 use std::os::unix::fs::FileExt;
 use std::sync::atomic::AtomicU64;
@@ -117,7 +117,9 @@ where
    }
 }

-static NEXT_ID: Lazy<AtomicU64> = Lazy::new(|| AtomicU64::new(1));
+lazy_static! {
+    static ref NEXT_ID: AtomicU64 = AtomicU64::new(1);
+}

 /// An adapter for reading a (virtual) file using the page cache.
 ///
--- a/pageserver/src/layered_repository/ephemeral_file.rs
+++ b/pageserver/src/layered_repository/ephemeral_file.rs
@@ -8,7 +8,7 @@ use crate::page_cache;
 use crate::page_cache::PAGE_SZ;
 use crate::page_cache::{ReadBufResult, WriteBufResult};
 use crate::virtual_file::VirtualFile;
-use once_cell::sync::Lazy;
+use lazy_static::lazy_static;
 use std::cmp::min;
 use std::collections::HashMap;
 use std::fs::OpenOptions;
@@ -21,15 +21,15 @@ use utils::zid::{ZTenantId, ZTimelineId};

 use std::os::unix::fs::FileExt;

-///
-/// This is the global cache of file descriptors (File objects).
-///
-static EPHEMERAL_FILES: Lazy<RwLock<EphemeralFiles>> = Lazy::new(|| {
-    RwLock::new(EphemeralFiles {
+lazy_static! {
+    ///
+    /// This is the global cache of file descriptors (File objects).
+    ///
+    static ref EPHEMERAL_FILES: RwLock<EphemeralFiles> = RwLock::new(EphemeralFiles {
        next_file_id: 1,
        files: HashMap::new(),
-    })
-});
+    });
+}

 pub struct EphemeralFiles {
    next_file_id: u64,
--- a/pageserver/src/layered_repository/layer_map.rs
+++ b/pageserver/src/layered_repository/layer_map.rs
@@ -15,18 +15,19 @@ use crate::layered_repository::storage_layer::Layer;
 use crate::layered_repository::storage_layer::{range_eq, range_overlaps};
 use crate::repository::Key;
 use anyhow::Result;
+use lazy_static::lazy_static;
 use metrics::{register_int_gauge, IntGauge};
-use once_cell::sync::Lazy;
 use std::collections::VecDeque;
 use std::ops::Range;
 use std::sync::Arc;
 use tracing::*;
 use utils::lsn::Lsn;

-static NUM_ONDISK_LAYERS: Lazy<IntGauge> = Lazy::new(|| {
-    register_int_gauge!("pageserver_ondisk_layers", "Number of layers on-disk")
-        .expect("failed to define a metric")
-});
+lazy_static! {
+    static ref NUM_ONDISK_LAYERS: IntGauge =
+        register_int_gauge!("pageserver_ondisk_layers", "Number of layers on-disk")
+            .expect("failed to define a metric");
+}

 ///
 /// LayerMap tracks what layers exist on a timeline.
--- a/pageserver/src/layered_repository/timeline.rs
+++ b/pageserver/src/layered_repository/timeline.rs
@@ -4,11 +4,11 @@ use anyhow::{anyhow, bail, ensure, Context, Result};
 use bytes::Bytes;
 use fail::fail_point;
 use itertools::Itertools;
-use once_cell::sync::Lazy;
+use lazy_static::lazy_static;
 use tracing::*;

 use std::cmp::{max, min, Ordering};
-use std::collections::{hash_map::Entry, HashMap, HashSet};
+use std::collections::HashSet;
 use std::fs;
 use std::fs::{File, OpenOptions};
 use std::io::Write;
@@ -38,9 +38,7 @@ use crate::layered_repository::{

 use crate::config::PageServerConf;
 use crate::keyspace::{KeyPartitioning, KeySpace};
-use crate::pgdatadir_mapping::BlockNumber;
 use crate::pgdatadir_mapping::LsnForTimestamp;
-use crate::reltag::RelTag;
 use crate::tenant_config::TenantConfOpt;
 use crate::DatadirTimeline;

@@ -60,102 +58,76 @@ use crate::walredo::WalRedoManager;
 use crate::CheckpointConfig;
 use crate::{page_cache, storage_sync};

-/// Prometheus histogram buckets (in seconds) that capture the majority of
-/// latencies in the microsecond range but also extend far enough up to distinguish
-/// "bad" from "really bad".
-fn get_buckets_for_critical_operations() -> Vec<f64> {
-    let buckets_per_digit = 5;
-    let min_exponent = -6;
-    let max_exponent = 2;
-
-    let mut buckets = vec![];
-    // Compute 10^(exp / buckets_per_digit) instead of 10^(1/buckets_per_digit)^exp
-    // because it's more numerically stable and doesn't result in numbers like 9.999999
-    for exp in (min_exponent * buckets_per_digit)..=(max_exponent * buckets_per_digit) {
-        buckets.push(10_f64.powf(exp as f64 / buckets_per_digit as f64))
-    }
-    buckets
+// Metrics collected on operations on the storage repository.
+lazy_static! {
+    pub static ref STORAGE_TIME: HistogramVec = register_histogram_vec!(
+        "pageserver_storage_operations_seconds",
+        "Time spent on storage operations",
+        &["operation", "tenant_id", "timeline_id"]
+    )
+    .expect("failed to define a metric");
 }

 // Metrics collected on operations on the storage repository.
-pub static STORAGE_TIME: Lazy<HistogramVec> = Lazy::new(|| {
-    register_histogram_vec!(
-        "pageserver_storage_operations_seconds",
-        "Time spent on storage operations",
-        &["operation", "tenant_id", "timeline_id"],
-        get_buckets_for_critical_operations(),
-    )
-    .expect("failed to define a metric")
-});
-
-// Metrics collected on operations on the storage repository.
-static RECONSTRUCT_TIME: Lazy<HistogramVec> = Lazy::new(|| {
-    register_histogram_vec!(
+lazy_static! {
+    static ref RECONSTRUCT_TIME: HistogramVec = register_histogram_vec!(
        "pageserver_getpage_reconstruct_seconds",
        "Time spent in reconstruct_value",
-        &["tenant_id", "timeline_id"],
-        get_buckets_for_critical_operations(),
+        &["tenant_id", "timeline_id"]
    )
-    .expect("failed to define a metric")
-});
+    .expect("failed to define a metric");
+}

-static MATERIALIZED_PAGE_CACHE_HIT: Lazy<IntCounterVec> = Lazy::new(|| {
-    register_int_counter_vec!(
+lazy_static! {
+    static ref MATERIALIZED_PAGE_CACHE_HIT: IntCounterVec = register_int_counter_vec!(
        "pageserver_materialized_cache_hits_total",
        "Number of cache hits from materialized page cache",
        &["tenant_id", "timeline_id"]
    )
-    .expect("failed to define a metric")
-});
-
-static WAIT_LSN_TIME: Lazy<HistogramVec> = Lazy::new(|| {
-    register_histogram_vec!(
+    .expect("failed to define a metric");
+    static ref WAIT_LSN_TIME: HistogramVec = register_histogram_vec!(
        "pageserver_wait_lsn_seconds",
        "Time spent waiting for WAL to arrive",
-        &["tenant_id", "timeline_id"],
-        get_buckets_for_critical_operations(),
+        &["tenant_id", "timeline_id"]
    )
-    .expect("failed to define a metric")
-});
+    .expect("failed to define a metric");
+}

-static LAST_RECORD_LSN: Lazy<IntGaugeVec> = Lazy::new(|| {
-    register_int_gauge_vec!(
+lazy_static! {
+    static ref LAST_RECORD_LSN: IntGaugeVec = register_int_gauge_vec!(
        "pageserver_last_record_lsn",
        "Last record LSN grouped by timeline",
        &["tenant_id", "timeline_id"]
    )
-    .expect("failed to define a metric")
-});
+    .expect("failed to define a metric");
+}

 // Metrics for determining timeline's physical size.
 // A layered timeline's physical is defined as the total size of
 // (delta/image) layer files on disk.
-static CURRENT_PHYSICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
-    register_uint_gauge_vec!(
+lazy_static! {
+    static ref CURRENT_PHYSICAL_SIZE: UIntGaugeVec = register_uint_gauge_vec!(
        "pageserver_current_physical_size",
        "Current physical size grouped by timeline",
        &["tenant_id", "timeline_id"]
    )
-    .expect("failed to define a metric")
-});
+    .expect("failed to define a metric");
+}

 // Metrics for cloud upload. These metrics reflect data uploaded to cloud storage,
 // or in testing they estimate how much we would upload if we did.
-static NUM_PERSISTENT_FILES_CREATED: Lazy<IntCounter> = Lazy::new(|| {
-    register_int_counter!(
+lazy_static! {
+    static ref NUM_PERSISTENT_FILES_CREATED: IntCounter = register_int_counter!(
        "pageserver_created_persistent_files_total",
        "Number of files created that are meant to be uploaded to cloud storage",
    )
-    .expect("failed to define a metric")
-});
-
-static PERSISTENT_BYTES_WRITTEN: Lazy<IntCounter> = Lazy::new(|| {
-    register_int_counter!(
+    .expect("failed to define a metric");
+    static ref PERSISTENT_BYTES_WRITTEN: IntCounter = register_int_counter!(
        "pageserver_written_persistent_bytes_total",
        "Total bytes written that are meant to be uploaded to cloud storage",
    )
-    .expect("failed to define a metric")
-});
+    .expect("failed to define a metric");
+}

 #[derive(Clone)]
 pub enum LayeredTimelineEntry {
@@ -323,9 +295,6 @@ pub struct LayeredTimeline {
    /// or None if WAL receiver has not received anything for this timeline
    /// yet.
    pub last_received_wal: Mutex<Option<WalReceiverInfo>>,
-
-    /// Relation size cache
-    rel_size_cache: RwLock<HashMap<RelTag, (Lsn, BlockNumber)>>,
 }

 pub struct WalReceiverInfo {
@@ -337,42 +306,7 @@ pub struct WalReceiverInfo {
 /// Inherit all the functions from DatadirTimeline, to provide the
 /// functionality to store PostgreSQL relations, SLRUs, etc. in a
 /// LayeredTimeline.
-impl DatadirTimeline for LayeredTimeline {
-    fn get_cached_rel_size(&self, tag: &RelTag, lsn: Lsn) -> Option<BlockNumber> {
-        let rel_size_cache = self.rel_size_cache.read().unwrap();
-        if let Some((cached_lsn, nblocks)) = rel_size_cache.get(tag) {
-            if lsn >= *cached_lsn {
-                return Some(*nblocks);
-            }
-        }
-        None
-    }
-
-    fn update_cached_rel_size(&self, tag: RelTag, lsn: Lsn, nblocks: BlockNumber) {
-        let mut rel_size_cache = self.rel_size_cache.write().unwrap();
-        match rel_size_cache.entry(tag) {
-            Entry::Occupied(mut entry) => {
-                let cached_lsn = entry.get_mut();
-                if lsn >= cached_lsn.0 {
-                    *cached_lsn = (lsn, nblocks);
-                }
-            }
-            Entry::Vacant(entry) => {
-                entry.insert((lsn, nblocks));
-            }
-        }
-    }
-
-    fn set_cached_rel_size(&self, tag: RelTag, lsn: Lsn, nblocks: BlockNumber) {
-        let mut rel_size_cache = self.rel_size_cache.write().unwrap();
-        rel_size_cache.insert(tag, (lsn, nblocks));
-    }
-
-    fn remove_cached_rel_size(&self, tag: &RelTag) {
-        let mut rel_size_cache = self.rel_size_cache.write().unwrap();
-        rel_size_cache.remove(tag);
-    }
-}
+impl DatadirTimeline for LayeredTimeline {}

 ///
 /// Information about how much history needs to be retained, needed by
@@ -443,6 +377,8 @@ impl Timeline for LayeredTimeline {

    /// Look up the value with the given a key
    fn get(&self, key: Key, lsn: Lsn) -> Result<Bytes> {
+        debug_assert!(lsn <= self.get_last_record_lsn());
+
        // Check the page cache. We will get back the most recent page with lsn <= `lsn`.
        // The cached image can be returned directly if there is no WAL between the cached image
        // and requested LSN. The cached image can also be used to reduce the amount of WAL needed
@@ -682,7 +618,6 @@ impl LayeredTimeline {
            repartition_threshold: 0,

            last_received_wal: Mutex::new(None),
-            rel_size_cache: RwLock::new(HashMap::new()),
        };
        result.repartition_threshold = result.get_checkpoint_distance() / 10;
        result
--- a/pageserver/src/lib.rs
+++ b/pageserver/src/lib.rs
@@ -22,7 +22,7 @@ pub mod walreceiver;
 pub mod walrecord;
 pub mod walredo;

-use once_cell::sync::Lazy;
+use lazy_static::lazy_static;
 use tracing::info;

 use crate::thread_mgr::ThreadKind;
@@ -42,14 +42,14 @@ pub const STORAGE_FORMAT_VERSION: u16 = 3;
 pub const IMAGE_FILE_MAGIC: u16 = 0x5A60;
 pub const DELTA_FILE_MAGIC: u16 = 0x5A61;

-static LIVE_CONNECTIONS_COUNT: Lazy<IntGaugeVec> = Lazy::new(|| {
-    register_int_gauge_vec!(
+lazy_static! {
+    static ref LIVE_CONNECTIONS_COUNT: IntGaugeVec = register_int_gauge_vec!(
        "pageserver_live_connections",
        "Number of live network connections",
        &["pageserver_connection_kind"]
    )
-    .expect("failed to define a metric")
-});
+    .expect("failed to define a metric");
+}

 pub const LOG_FILE_NAME: &str = "pageserver.log";

--- a/pageserver/src/page_cache.rs
+++ b/pageserver/src/page_cache.rs
@@ -55,6 +55,7 @@ use utils::{
 use crate::layered_repository::writeback_ephemeral_file;
 use crate::repository::Key;

+// TODO move ownership into a new PageserverState struct
 static PAGE_CACHE: OnceCell<PageCache> = OnceCell::new();
 const TEST_PAGE_CACHE_SIZE: usize = 50;

--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -11,7 +11,7 @@

 use anyhow::{bail, ensure, Context, Result};
 use bytes::{Buf, BufMut, Bytes, BytesMut};
-use once_cell::sync::Lazy;
+use lazy_static::lazy_static;
 use regex::Regex;
 use std::io::{self, Read};
 use std::net::TcpListener;
@@ -434,15 +434,15 @@ const TIME_BUCKETS: &[f64] = &[
    0.1,  // 1/10 s
 ];

-static SMGR_QUERY_TIME: Lazy<HistogramVec> = Lazy::new(|| {
-    register_histogram_vec!(
+lazy_static! {
+    static ref SMGR_QUERY_TIME: HistogramVec = register_histogram_vec!(
        "pageserver_smgr_query_seconds",
        "Time spent on smgr query handling",
        &["smgr_query_type", "tenant_id", "timeline_id"],
        TIME_BUCKETS.into()
    )
-    .expect("failed to define a metric")
-});
+    .expect("failed to define a metric");
+}

 impl PageServerHandler {
    pub fn new(conf: &'static PageServerConf, auth: Option<Arc<JwtAuth>>) -> Self {
--- a/pageserver/src/pgdatadir_mapping.rs
+++ b/pageserver/src/pgdatadir_mapping.rs
@@ -56,16 +56,13 @@ pub trait DatadirTimeline: Timeline {
    /// This provides a transaction-like interface to perform a bunch
    /// of modifications atomically.
    ///
-    /// To ingest a WAL record, call begin_modification(lsn) to get a
+    /// To ingest a WAL record, call begin_modification() to get a
    /// DatadirModification object. Use the functions in the object to
    /// modify the repository state, updating all the pages and metadata
-    /// that the WAL record affects. When you're done, call commit() to
-    /// commit the changes.
+    /// that the WAL record affects. When you're done, call commit(lsn) to
+    /// commit the changes. All the changes will be stamped with the specified LSN.
    ///
-    /// Lsn stored in modification is advanced by `ingest_record` and
-    /// is used by `commit()` to update `last_record_lsn`.
-    ///
-    /// Calling commit() will flush all the changes and reset the state,
+    /// Calling commit(lsn) will flush all the changes and reset the state,
    /// so the `DatadirModification` struct can be reused to perform the next modification.
    ///
    /// Note that any pending modifications you make through the
@@ -73,7 +70,7 @@ pub trait DatadirTimeline: Timeline {
    /// functions of the timeline until you finish! And if you update the
    /// same page twice, the last update wins.
    ///
-    fn begin_modification(&self, lsn: Lsn) -> DatadirModification<Self>
+    fn begin_modification(&self) -> DatadirModification<Self>
    where
        Self: Sized,
    {
@@ -82,7 +79,6 @@ pub trait DatadirTimeline: Timeline {
            pending_updates: HashMap::new(),
            pending_deletions: Vec::new(),
            pending_nblocks: 0,
-            lsn,
        }
    }

@@ -124,10 +120,6 @@ pub trait DatadirTimeline: Timeline {
    fn get_rel_size(&self, tag: RelTag, lsn: Lsn) -> Result<BlockNumber> {
        ensure!(tag.relnode != 0, "invalid relnode");

-        if let Some(nblocks) = self.get_cached_rel_size(&tag, lsn) {
-            return Ok(nblocks);
-        }
-
        if (tag.forknum == pg_constants::FSM_FORKNUM
            || tag.forknum == pg_constants::VISIBILITYMAP_FORKNUM)
            && !self.get_rel_exists(tag, lsn)?
@@ -141,21 +133,13 @@ pub trait DatadirTimeline: Timeline {

        let key = rel_size_to_key(tag);
        let mut buf = self.get(key, lsn)?;
-        let nblocks = buf.get_u32_le();
-
-        // Update relation size cache
-        self.update_cached_rel_size(tag, lsn, nblocks);
-        Ok(nblocks)
+        Ok(buf.get_u32_le())
    }

    /// Does relation exist?
    fn get_rel_exists(&self, tag: RelTag, lsn: Lsn) -> Result<bool> {
        ensure!(tag.relnode != 0, "invalid relnode");

-        // first try to lookup relation in cache
-        if let Some(_nblocks) = self.get_cached_rel_size(&tag, lsn) {
-            return Ok(true);
-        }
        // fetch directory listing
        let key = rel_dir_to_key(tag.spcnode, tag.dbnode);
        let buf = self.get(key, lsn)?;
@@ -461,18 +445,6 @@ pub trait DatadirTimeline: Timeline {

        Ok(result.to_keyspace())
    }
-
-    /// Get cached size of relation if it not updated after specified LSN
-    fn get_cached_rel_size(&self, tag: &RelTag, lsn: Lsn) -> Option<BlockNumber>;
-
-    /// Update cached relation size if there is no more recent update
-    fn update_cached_rel_size(&self, tag: RelTag, lsn: Lsn, nblocks: BlockNumber);
-
-    /// Store cached relation size
-    fn set_cached_rel_size(&self, tag: RelTag, lsn: Lsn, nblocks: BlockNumber);
-
-    /// Remove cached relation size
-    fn remove_cached_rel_size(&self, tag: &RelTag);
 }

 /// DatadirModification represents an operation to ingest an atomic set of
@@ -485,9 +457,6 @@ pub struct DatadirModification<'a, T: DatadirTimeline> {
    /// in the state in 'tline' yet.
    pub tline: &'a T,

-    /// Lsn assigned by begin_modification
-    pub lsn: Lsn,
-
    // The modifications are not applied directly to the underlying key-value store.
    // The put-functions add the modifications here, and they are flushed to the
    // underlying key-value store by the 'finish' function.
@@ -697,11 +666,9 @@ impl<'a, T: DatadirTimeline> DatadirModification<'a, T> {

        self.pending_nblocks += nblocks as isize;

-        // Update relation size cache
-        self.tline.set_cached_rel_size(rel, self.lsn, nblocks);
-
        // Even if nblocks > 0, we don't insert any actual blocks here. That's up to the
        // caller.
+
        Ok(())
    }

@@ -717,9 +684,6 @@ impl<'a, T: DatadirTimeline> DatadirModification<'a, T> {
        let buf = nblocks.to_le_bytes();
        self.put(size_key, Value::Image(Bytes::from(buf.to_vec())));

-        // Update relation size cache
-        self.tline.set_cached_rel_size(rel, self.lsn, nblocks);
-
        // Update logical database size.
        self.pending_nblocks -= old_size as isize - nblocks as isize;
        Ok(())
@@ -739,9 +703,6 @@ impl<'a, T: DatadirTimeline> DatadirModification<'a, T> {
            let buf = nblocks.to_le_bytes();
            self.put(size_key, Value::Image(Bytes::from(buf.to_vec())));

-            // Update relation size cache
-            self.tline.set_cached_rel_size(rel, self.lsn, nblocks);
-
            self.pending_nblocks += nblocks as isize - old_size as isize;
        }
        Ok(())
@@ -767,9 +728,6 @@ impl<'a, T: DatadirTimeline> DatadirModification<'a, T> {
        let old_size = self.get(size_key)?.get_u32_le();
        self.pending_nblocks -= old_size as isize;

-        // Remove enty from relation size cache
-        self.tline.remove_cached_rel_size(&rel);
-
        // Delete size entry, as well as all blocks
        self.delete(rel_key_range(rel));

@@ -884,7 +842,7 @@ impl<'a, T: DatadirTimeline> DatadirModification<'a, T> {
    /// retains all the metadata, but data pages are flushed. That's again OK
    /// for bulk import, where you are just loading data pages and won't try to
    /// modify the same pages twice.
-    pub fn flush(&mut self) -> Result<()> {
+    pub fn flush(&mut self, lsn: Lsn) -> Result<()> {
        // Unless we have accumulated a decent amount of changes, it's not worth it
        // to scan through the pending_updates list.
        let pending_nblocks = self.pending_nblocks;
@@ -898,7 +856,7 @@ impl<'a, T: DatadirTimeline> DatadirModification<'a, T> {
        let mut result: Result<()> = Ok(());
        self.pending_updates.retain(|&key, value| {
            if result.is_ok() && (is_rel_block_key(key) || is_slru_block_key(key)) {
-                result = writer.put(key, self.lsn, value);
+                result = writer.put(key, lsn, value);
                false
            } else {
                true
@@ -919,9 +877,9 @@ impl<'a, T: DatadirTimeline> DatadirModification<'a, T> {
    /// underlying timeline.
    /// All the modifications in this atomic update are stamped by the specified LSN.
    ///
-    pub fn commit(&mut self) -> Result<()> {
+    pub fn commit(&mut self, lsn: Lsn) -> Result<()> {
        let writer = self.tline.writer();
-        let lsn = self.lsn;
+
        let pending_nblocks = self.pending_nblocks;
        self.pending_nblocks = 0;

@@ -1366,9 +1324,9 @@ pub fn create_test_timeline<R: Repository>(
    timeline_id: utils::zid::ZTimelineId,
 ) -> Result<std::sync::Arc<R::Timeline>> {
    let tline = repo.create_empty_timeline(timeline_id, Lsn(8))?;
-    let mut m = tline.begin_modification(Lsn(8));
+    let mut m = tline.begin_modification();
    m.init_empty()?;
-    m.commit()?;
+    m.commit(Lsn(8))?;
    Ok(tline)
 }

--- a/pageserver/src/repository.rs
+++ b/pageserver/src/repository.rs
@@ -408,7 +408,7 @@ pub trait TimelineWriter<'a> {
 #[cfg(test)]
 pub mod repo_harness {
    use bytes::BytesMut;
-    use once_cell::sync::Lazy;
+    use lazy_static::lazy_static;
    use std::sync::{Arc, RwLock, RwLockReadGuard, RwLockWriteGuard};
    use std::{fs, path::PathBuf};

@@ -439,7 +439,9 @@ pub mod repo_harness {
        buf.freeze()
    }

-    static LOCK: Lazy<RwLock<()>> = Lazy::new(|| RwLock::new(()));
+    lazy_static! {
+        static ref LOCK: RwLock<()> = RwLock::new(());
+    }

    impl From<TenantConf> for TenantConfOpt {
        fn from(tenant_conf: TenantConf) -> Self {
@@ -587,10 +589,11 @@ mod tests {
    //use std::sync::Arc;
    use bytes::BytesMut;
    use hex_literal::hex;
-    use once_cell::sync::Lazy;
+    use lazy_static::lazy_static;

-    static TEST_KEY: Lazy<Key> =
-        Lazy::new(|| Key::from_slice(&hex!("112222222233333333444444445500000001")));
+    lazy_static! {
+        static ref TEST_KEY: Key = Key::from_slice(&hex!("112222222233333333444444445500000001"));
+    }

    #[test]
    fn test_basic() -> Result<()> {
--- a/pageserver/src/storage_sync.rs
+++ b/pageserver/src/storage_sync.rs
@@ -155,7 +155,8 @@ use std::{

 use anyhow::{anyhow, bail, Context};
 use futures::stream::{FuturesUnordered, StreamExt};
-use once_cell::sync::{Lazy, OnceCell};
+use lazy_static::lazy_static;
+use once_cell::sync::OnceCell;
 use remote_storage::{GenericRemoteStorage, RemoteStorage};
 use tokio::{
    fs,
@@ -183,8 +184,8 @@ use crate::{
 };

 use metrics::{
-    register_histogram_vec, register_int_counter_vec, register_int_gauge, HistogramVec,
-    IntCounterVec, IntGauge,
+    register_histogram_vec, register_int_counter, register_int_counter_vec, register_int_gauge,
+    HistogramVec, IntCounter, IntCounterVec, IntGauge,
 };
 use utils::zid::{ZTenantId, ZTenantTimelineId, ZTimelineId};

@@ -192,34 +193,34 @@ use self::download::download_index_parts;
 pub use self::download::gather_tenant_timelines_index_parts;
 pub use self::download::TEMP_DOWNLOAD_EXTENSION;

-static REMAINING_SYNC_ITEMS: Lazy<IntGauge> = Lazy::new(|| {
-    register_int_gauge!(
+lazy_static! {
+    static ref REMAINING_SYNC_ITEMS: IntGauge = register_int_gauge!(
        "pageserver_remote_storage_remaining_sync_items",
        "Number of storage sync items left in the queue"
    )
-    .expect("failed to register pageserver remote storage remaining sync items int gauge")
-});
-
-static IMAGE_SYNC_TIME: Lazy<HistogramVec> = Lazy::new(|| {
-    register_histogram_vec!(
+    .expect("failed to register pageserver remote storage remaining sync items int gauge");
+    static ref FATAL_TASK_FAILURES: IntCounter = register_int_counter!(
+        "pageserver_remote_storage_fatal_task_failures_total",
+        "Number of critically failed tasks"
+    )
+    .expect("failed to register pageserver remote storage remaining sync items int gauge");
+    static ref IMAGE_SYNC_TIME: HistogramVec = register_histogram_vec!(
        "pageserver_remote_storage_image_sync_seconds",
        "Time took to synchronize (download or upload) a whole pageserver image. \
        Grouped by tenant and timeline ids, `operation_kind` (upload|download) and `status` (success|failure)",
        &["tenant_id", "timeline_id", "operation_kind", "status"],
        vec![0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 3.0, 10.0, 20.0]
    )
-    .expect("failed to register pageserver image sync time histogram vec")
-});
-
-static REMOTE_INDEX_UPLOAD: Lazy<IntCounterVec> = Lazy::new(|| {
-    register_int_counter_vec!(
+    .expect("failed to register pageserver image sync time histogram vec");
+    static ref REMOTE_INDEX_UPLOAD: IntCounterVec = register_int_counter_vec!(
        "pageserver_remote_storage_remote_index_uploads_total",
        "Number of remote index uploads",
        &["tenant_id", "timeline_id"],
    )
-    .expect("failed to register pageserver remote index upload vec")
-});
+    .expect("failed to register pageserver remote index upload vec");
+}

+// TODO move ownership into a new PageserverState struct
 static SYNC_QUEUE: OnceCell<SyncQueue> = OnceCell::new();

 /// A timeline status to share with pageserver's sync counterpart,
--- a/pageserver/src/storage_sync/download.rs
+++ b/pageserver/src/storage_sync/download.rs
@@ -130,7 +130,6 @@ where
            tenant_path.display()
        )
    })?;
-
    let timelines = storage
        .list_prefixes(Some(tenant_storage_path))
        .await
@@ -141,13 +140,6 @@ where
            )
        })?;

-    if timelines.is_empty() {
-        anyhow::bail!(
-            "no timelines found on the remote storage for tenant {}",
-            tenant_id
-        )
-    }
-
    let mut sync_ids = HashSet::new();

    for timeline_remote_storage_key in timelines {
--- a/pageserver/src/storage_sync/upload.rs
+++ b/pageserver/src/storage_sync/upload.rs
@@ -4,7 +4,7 @@ use std::{fmt::Debug, path::PathBuf};

 use anyhow::Context;
 use futures::stream::{FuturesUnordered, StreamExt};
-use once_cell::sync::Lazy;
+use lazy_static::lazy_static;
 use remote_storage::RemoteStorage;
 use tokio::fs;
 use tracing::{debug, error, info, warn};
@@ -20,14 +20,14 @@ use crate::{
 };
 use metrics::{register_int_counter_vec, IntCounterVec};

-static NO_LAYERS_UPLOAD: Lazy<IntCounterVec> = Lazy::new(|| {
-    register_int_counter_vec!(
+lazy_static! {
+    static ref NO_LAYERS_UPLOAD: IntCounterVec = register_int_counter_vec!(
        "pageserver_remote_storage_no_layers_uploads_total",
        "Number of skipped uploads due to no layers",
        &["tenant_id", "timeline_id"],
    )
-    .expect("failed to register pageserver no layers upload vec")
-});
+    .expect("failed to register pageserver no layers upload vec");
+}

 /// Serializes and uploads the given index part data to the remote storage.
 pub(super) async fn upload_index_part<P, S>(
--- a/pageserver/src/tenant_mgr.rs
+++ b/pageserver/src/tenant_mgr.rs
@@ -25,27 +25,26 @@ use utils::lsn::Lsn;

 use utils::zid::{ZTenantId, ZTenantTimelineId, ZTimelineId};

+// TODO move ownership into a new PageserverState struct
 mod tenants_state {
    use anyhow::ensure;
-    use once_cell::sync::Lazy;
    use std::{
        collections::HashMap,
        sync::{RwLock, RwLockReadGuard, RwLockWriteGuard},
    };
    use tokio::sync::mpsc;
    use tracing::{debug, error};
+
    use utils::zid::ZTenantId;

    use crate::tenant_mgr::{LocalTimelineUpdate, Tenant};

-    static TENANTS: Lazy<RwLock<HashMap<ZTenantId, Tenant>>> =
-        Lazy::new(|| RwLock::new(HashMap::new()));
-
-    /// Sends updates to the local timelines (creation and deletion) to the WAL receiver,
-    /// so that it can enable/disable corresponding processes.
-    static TIMELINE_UPDATE_SENDER: Lazy<
-        RwLock<Option<mpsc::UnboundedSender<LocalTimelineUpdate>>>,
-    > = Lazy::new(|| RwLock::new(None));
+    lazy_static::lazy_static! {
+        static ref TENANTS: RwLock<HashMap<ZTenantId, Tenant>> = RwLock::new(HashMap::new());
+        /// Sends updates to the local timelines (creation and deletion) to the WAL receiver,
+        /// so that it can enable/disable corresponding processes.
+        static ref TIMELINE_UPDATE_SENDER: RwLock<Option<mpsc::UnboundedSender<LocalTimelineUpdate>>> = RwLock::new(None);
+    }

    pub(super) fn read_tenants() -> RwLockReadGuard<'static, HashMap<ZTenantId, Tenant>> {
        TENANTS
--- a/pageserver/src/tenant_tasks.rs
+++ b/pageserver/src/tenant_tasks.rs
@@ -87,6 +87,7 @@ async fn compaction_loop(tenantid: ZTenantId, mut cancel: watch::Receiver<()>) {
    );
 }

+// TODO move ownership into a new PageserverState struct
 static START_GC_LOOP: OnceCell<mpsc::Sender<ZTenantId>> = OnceCell::new();
 static START_COMPACTION_LOOP: OnceCell<mpsc::Sender<ZTenantId>> = OnceCell::new();

--- a/pageserver/src/thread_mgr.rs
+++ b/pageserver/src/thread_mgr.rs
@@ -45,20 +45,22 @@ use tokio::sync::watch;

 use tracing::{debug, error, info, warn};

-use once_cell::sync::Lazy;
+use lazy_static::lazy_static;

 use utils::zid::{ZTenantId, ZTimelineId};

 use crate::shutdown_pageserver;

-/// Each thread that we track is associated with a "thread ID". It's just
-/// an increasing number that we assign, not related to any system thread
-/// id.
-static NEXT_THREAD_ID: Lazy<AtomicU64> = Lazy::new(|| AtomicU64::new(1));
+// TODO move ownership into a new PageserverState struct
+lazy_static! {
+    /// Each thread that we track is associated with a "thread ID". It's just
+    /// an increasing number that we assign, not related to any system thread
+    /// id.
+    static ref NEXT_THREAD_ID: AtomicU64 = AtomicU64::new(1);

-/// Global registry of threads
-static THREADS: Lazy<Mutex<HashMap<u64, Arc<PageServerThread>>>> =
-    Lazy::new(|| Mutex::new(HashMap::new()));
+    /// Global registry of threads
+    static ref THREADS: Mutex<HashMap<u64, Arc<PageServerThread>>> = Mutex::new(HashMap::new());
+}

 // There is a Tokio watch channel for each thread, which can be used to signal the
 // thread that it needs to shut down. This thread local variable holds the receiving
--- a/pageserver/src/virtual_file.rs
+++ b/pageserver/src/virtual_file.rs
@@ -10,7 +10,7 @@
 //! This is similar to PostgreSQL's virtual file descriptor facility in
 //! src/backend/storage/file/fd.c
 //!
-use once_cell::sync::Lazy;
+use lazy_static::lazy_static;
 use once_cell::sync::OnceCell;
 use std::fs::{File, OpenOptions};
 use std::io::{Error, ErrorKind, Read, Seek, SeekFrom, Write};
@@ -32,24 +32,23 @@ const STORAGE_IO_TIME_BUCKETS: &[f64] = &[
    1.0,      // 1 sec
 ];

-static STORAGE_IO_TIME: Lazy<HistogramVec> = Lazy::new(|| {
-    register_histogram_vec!(
+lazy_static! {
+    static ref STORAGE_IO_TIME: HistogramVec = register_histogram_vec!(
        "pageserver_io_operations_seconds",
        "Time spent in IO operations",
        &["operation", "tenant_id", "timeline_id"],
        STORAGE_IO_TIME_BUCKETS.into()
    )
-    .expect("failed to define a metric")
-});
-
-static STORAGE_IO_SIZE: Lazy<IntGaugeVec> = Lazy::new(|| {
-    register_int_gauge_vec!(
+    .expect("failed to define a metric");
+}
+lazy_static! {
+    static ref STORAGE_IO_SIZE: IntGaugeVec = register_int_gauge_vec!(
        "pageserver_io_operations_bytes_total",
        "Total amount of bytes read/written in IO operations",
        &["operation", "tenant_id", "timeline_id"]
    )
-    .expect("failed to define a metric")
-});
+    .expect("failed to define a metric");
+}

 ///
 /// A virtual file descriptor. You can use this just like std::fs::File, but internally
--- a/pageserver/src/walingest.rs
+++ b/pageserver/src/walingest.rs
@@ -30,6 +30,8 @@ use anyhow::Result;
 use bytes::{Buf, Bytes, BytesMut};
 use tracing::*;

+use std::collections::HashMap;
+
 use crate::pgdatadir_mapping::*;
 use crate::reltag::{RelTag, SlruKind};
 use crate::walrecord::*;
@@ -46,6 +48,8 @@ pub struct WalIngest<'a, T: DatadirTimeline> {

    checkpoint: CheckPoint,
    checkpoint_modified: bool,
+
+    relsize_cache: HashMap<RelTag, BlockNumber>,
 }

 impl<'a, T: DatadirTimeline> WalIngest<'a, T> {
@@ -60,13 +64,13 @@ impl<'a, T: DatadirTimeline> WalIngest<'a, T> {
            timeline,
            checkpoint,
            checkpoint_modified: false,
+            relsize_cache: HashMap::new(),
        })
    }

    ///
    /// Decode a PostgreSQL WAL record and store it in the repository, in the given timeline.
    ///
-    /// This function updates `lsn` field of `DatadirModification`
    ///
    /// Helper function to parse a WAL record and call the Timeline's PUT functions for all the
    /// relations/pages that the record affects.
@@ -78,7 +82,6 @@ impl<'a, T: DatadirTimeline> WalIngest<'a, T> {
        modification: &mut DatadirModification<T>,
        decoded: &mut DecodedWALRecord,
    ) -> Result<()> {
-        modification.lsn = lsn;
        decode_wal_record(recdata, decoded).context("failed decoding wal record")?;

        let mut buf = decoded.record.clone();
@@ -257,7 +260,7 @@ impl<'a, T: DatadirTimeline> WalIngest<'a, T> {

        // Now that this record has been fully handled, including updating the
        // checkpoint data, let the repository know that it is up-to-date to this LSN
-        modification.commit()?;
+        modification.commit(lsn)?;

        Ok(())
    }
@@ -405,7 +408,7 @@ impl<'a, T: DatadirTimeline> WalIngest<'a, T> {
            // replaying it would fail to find the previous image of the page, because
            // it doesn't exist. So check if the VM page(s) exist, and skip the WAL
            // record if it doesn't.
-            let vm_size = self.get_relsize(vm_rel, modification.lsn)?;
+            let vm_size = self.get_relsize(vm_rel)?;
            if let Some(blknum) = new_vm_blk {
                if blknum >= vm_size {
                    new_vm_blk = None;
@@ -877,6 +880,7 @@ impl<'a, T: DatadirTimeline> WalIngest<'a, T> {
        modification: &mut DatadirModification<T>,
        rel: RelTag,
    ) -> Result<()> {
+        self.relsize_cache.insert(rel, 0);
        modification.put_rel_creation(rel, 0)?;
        Ok(())
    }
@@ -912,6 +916,7 @@ impl<'a, T: DatadirTimeline> WalIngest<'a, T> {
        nblocks: BlockNumber,
    ) -> Result<()> {
        modification.put_rel_truncation(rel, nblocks)?;
+        self.relsize_cache.insert(rel, nblocks);
        Ok(())
    }

@@ -921,16 +926,23 @@ impl<'a, T: DatadirTimeline> WalIngest<'a, T> {
        rel: RelTag,
    ) -> Result<()> {
        modification.put_rel_drop(rel)?;
+        self.relsize_cache.remove(&rel);
        Ok(())
    }

-    fn get_relsize(&mut self, rel: RelTag, lsn: Lsn) -> Result<BlockNumber> {
-        let nblocks = if !self.timeline.get_rel_exists(rel, lsn)? {
-            0
+    fn get_relsize(&mut self, rel: RelTag) -> Result<BlockNumber> {
+        if let Some(nblocks) = self.relsize_cache.get(&rel) {
+            Ok(*nblocks)
        } else {
-            self.timeline.get_rel_size(rel, lsn)?
-        };
-        Ok(nblocks)
+            let last_lsn = self.timeline.get_last_record_lsn();
+            let nblocks = if !self.timeline.get_rel_exists(rel, last_lsn)? {
+                0
+            } else {
+                self.timeline.get_rel_size(rel, last_lsn)?
+            };
+            self.relsize_cache.insert(rel, nblocks);
+            Ok(nblocks)
+        }
    }

    fn handle_rel_extend(
@@ -940,16 +952,22 @@ impl<'a, T: DatadirTimeline> WalIngest<'a, T> {
        blknum: BlockNumber,
    ) -> Result<()> {
        let new_nblocks = blknum + 1;
-        // Check if the relation exists. We implicitly create relations on first
-        // record.
-        // TODO: would be nice if to be more explicit about it
-        let last_lsn = modification.lsn;
-        let old_nblocks = if !self.timeline.get_rel_exists(rel, last_lsn)? {
-            // create it with 0 size initially, the logic below will extend it
-            modification.put_rel_creation(rel, 0)?;
-            0
+        let old_nblocks = if let Some(nblocks) = self.relsize_cache.get(&rel) {
+            *nblocks
        } else {
-            self.timeline.get_rel_size(rel, last_lsn)?
+            // Check if the relation exists. We implicitly create relations on first
+            // record.
+            // TODO: would be nice if to be more explicit about it
+            let last_lsn = self.timeline.get_last_record_lsn();
+            let nblocks = if !self.timeline.get_rel_exists(rel, last_lsn)? {
+                // create it with 0 size initially, the logic below will extend it
+                modification.put_rel_creation(rel, 0)?;
+                0
+            } else {
+                self.timeline.get_rel_size(rel, last_lsn)?
+            };
+            self.relsize_cache.insert(rel, nblocks);
+            nblocks
        };

        if new_nblocks > old_nblocks {
@@ -960,6 +978,7 @@ impl<'a, T: DatadirTimeline> WalIngest<'a, T> {
            for gap_blknum in old_nblocks..blknum {
                modification.put_rel_page_image(rel, gap_blknum, ZERO_PAGE.clone())?;
            }
+            self.relsize_cache.insert(rel, new_nblocks);
        }
        Ok(())
    }
@@ -1050,10 +1069,10 @@ mod tests {
    static ZERO_CHECKPOINT: Bytes = Bytes::from_static(&[0u8; SIZEOF_CHECKPOINT]);

    fn init_walingest_test<T: DatadirTimeline>(tline: &T) -> Result<WalIngest<T>> {
-        let mut m = tline.begin_modification(Lsn(0x10));
+        let mut m = tline.begin_modification();
        m.put_checkpoint(ZERO_CHECKPOINT.clone())?;
        m.put_relmap_file(0, 111, Bytes::from(""))?; // dummy relmapper file
-        m.commit()?;
+        m.commit(Lsn(0x10))?;
        let walingest = WalIngest::new(tline, Lsn(0x10))?;

        Ok(walingest)
@@ -1065,19 +1084,19 @@ mod tests {
        let tline = create_test_timeline(repo, TIMELINE_ID)?;
        let mut walingest = init_walingest_test(&*tline)?;

-        let mut m = tline.begin_modification(Lsn(0x20));
+        let mut m = tline.begin_modification();
        walingest.put_rel_creation(&mut m, TESTREL_A)?;
        walingest.put_rel_page_image(&mut m, TESTREL_A, 0, TEST_IMG("foo blk 0 at 2"))?;
-        m.commit()?;
-        let mut m = tline.begin_modification(Lsn(0x30));
+        m.commit(Lsn(0x20))?;
+        let mut m = tline.begin_modification();
        walingest.put_rel_page_image(&mut m, TESTREL_A, 0, TEST_IMG("foo blk 0 at 3"))?;
-        m.commit()?;
-        let mut m = tline.begin_modification(Lsn(0x40));
+        m.commit(Lsn(0x30))?;
+        let mut m = tline.begin_modification();
        walingest.put_rel_page_image(&mut m, TESTREL_A, 1, TEST_IMG("foo blk 1 at 4"))?;
-        m.commit()?;
-        let mut m = tline.begin_modification(Lsn(0x50));
+        m.commit(Lsn(0x40))?;
+        let mut m = tline.begin_modification();
        walingest.put_rel_page_image(&mut m, TESTREL_A, 2, TEST_IMG("foo blk 2 at 5"))?;
-        m.commit()?;
+        m.commit(Lsn(0x50))?;

        assert_current_logical_size(&*tline, Lsn(0x50));

@@ -1123,9 +1142,9 @@ mod tests {
        );

        // Truncate last block
-        let mut m = tline.begin_modification(Lsn(0x60));
+        let mut m = tline.begin_modification();
        walingest.put_rel_truncation(&mut m, TESTREL_A, 2)?;
-        m.commit()?;
+        m.commit(Lsn(0x60))?;
        assert_current_logical_size(&*tline, Lsn(0x60));

        // Check reported size and contents after truncation
@@ -1147,15 +1166,15 @@ mod tests {
        );

        // Truncate to zero length
-        let mut m = tline.begin_modification(Lsn(0x68));
+        let mut m = tline.begin_modification();
        walingest.put_rel_truncation(&mut m, TESTREL_A, 0)?;
-        m.commit()?;
+        m.commit(Lsn(0x68))?;
        assert_eq!(tline.get_rel_size(TESTREL_A, Lsn(0x68))?, 0);

        // Extend from 0 to 2 blocks, leaving a gap
-        let mut m = tline.begin_modification(Lsn(0x70));
+        let mut m = tline.begin_modification();
        walingest.put_rel_page_image(&mut m, TESTREL_A, 1, TEST_IMG("foo blk 1"))?;
-        m.commit()?;
+        m.commit(Lsn(0x70))?;
        assert_eq!(tline.get_rel_size(TESTREL_A, Lsn(0x70))?, 2);
        assert_eq!(
            tline.get_rel_page_at_lsn(TESTREL_A, 0, Lsn(0x70))?,
@@ -1167,9 +1186,9 @@ mod tests {
        );

        // Extend a lot more, leaving a big gap that spans across segments
-        let mut m = tline.begin_modification(Lsn(0x80));
+        let mut m = tline.begin_modification();
        walingest.put_rel_page_image(&mut m, TESTREL_A, 1500, TEST_IMG("foo blk 1500"))?;
-        m.commit()?;
+        m.commit(Lsn(0x80))?;
        assert_eq!(tline.get_rel_size(TESTREL_A, Lsn(0x80))?, 1501);
        for blk in 2..1500 {
            assert_eq!(
@@ -1193,18 +1212,18 @@ mod tests {
        let tline = create_test_timeline(repo, TIMELINE_ID)?;
        let mut walingest = init_walingest_test(&*tline)?;

-        let mut m = tline.begin_modification(Lsn(0x20));
+        let mut m = tline.begin_modification();
        walingest.put_rel_page_image(&mut m, TESTREL_A, 0, TEST_IMG("foo blk 0 at 2"))?;
-        m.commit()?;
+        m.commit(Lsn(0x20))?;

        // Check that rel exists and size is correct
        assert_eq!(tline.get_rel_exists(TESTREL_A, Lsn(0x20))?, true);
        assert_eq!(tline.get_rel_size(TESTREL_A, Lsn(0x20))?, 1);

        // Drop rel
-        let mut m = tline.begin_modification(Lsn(0x30));
+        let mut m = tline.begin_modification();
        walingest.put_rel_drop(&mut m, TESTREL_A)?;
-        m.commit()?;
+        m.commit(Lsn(0x30))?;

        // Check that rel is not visible anymore
        assert_eq!(tline.get_rel_exists(TESTREL_A, Lsn(0x30))?, false);
@@ -1213,9 +1232,9 @@ mod tests {
        //assert!(tline.get_rel_size(TESTREL_A, Lsn(0x30))?.is_none());

        // Re-create it
-        let mut m = tline.begin_modification(Lsn(0x40));
+        let mut m = tline.begin_modification();
        walingest.put_rel_page_image(&mut m, TESTREL_A, 0, TEST_IMG("foo blk 0 at 4"))?;
-        m.commit()?;
+        m.commit(Lsn(0x40))?;

        // Check that rel exists and size is correct
        assert_eq!(tline.get_rel_exists(TESTREL_A, Lsn(0x40))?, true);
@@ -1235,12 +1254,12 @@ mod tests {

        // Create a 20 MB relation (the size is arbitrary)
        let relsize = 20 * 1024 * 1024 / 8192;
-        let mut m = tline.begin_modification(Lsn(0x20));
+        let mut m = tline.begin_modification();
        for blkno in 0..relsize {
            let data = format!("foo blk {} at {}", blkno, Lsn(0x20));
            walingest.put_rel_page_image(&mut m, TESTREL_A, blkno, TEST_IMG(&data))?;
        }
-        m.commit()?;
+        m.commit(Lsn(0x20))?;

        // The relation was created at LSN 20, not visible at LSN 1 yet.
        assert_eq!(tline.get_rel_exists(TESTREL_A, Lsn(0x10))?, false);
@@ -1261,9 +1280,9 @@ mod tests {

        // Truncate relation so that second segment was dropped
        // - only leave one page
-        let mut m = tline.begin_modification(Lsn(0x60));
+        let mut m = tline.begin_modification();
        walingest.put_rel_truncation(&mut m, TESTREL_A, 1)?;
-        m.commit()?;
+        m.commit(Lsn(0x60))?;

        // Check reported size and contents after truncation
        assert_eq!(tline.get_rel_size(TESTREL_A, Lsn(0x60))?, 1);
@@ -1291,12 +1310,12 @@ mod tests {
        // Extend relation again.
        // Add enough blocks to create second segment
        let lsn = Lsn(0x80);
-        let mut m = tline.begin_modification(lsn);
+        let mut m = tline.begin_modification();
        for blkno in 0..relsize {
            let data = format!("foo blk {} at {}", blkno, lsn);
            walingest.put_rel_page_image(&mut m, TESTREL_A, blkno, TEST_IMG(&data))?;
        }
-        m.commit()?;
+        m.commit(lsn)?;

        assert_eq!(tline.get_rel_exists(TESTREL_A, Lsn(0x80))?, true);
        assert_eq!(tline.get_rel_size(TESTREL_A, Lsn(0x80))?, relsize);
@@ -1324,10 +1343,10 @@ mod tests {
        let mut lsn = 0x10;
        for blknum in 0..pg_constants::RELSEG_SIZE + 1 {
            lsn += 0x10;
-            let mut m = tline.begin_modification(Lsn(lsn));
+            let mut m = tline.begin_modification();
            let img = TEST_IMG(&format!("foo blk {} at {}", blknum, Lsn(lsn)));
            walingest.put_rel_page_image(&mut m, TESTREL_A, blknum as BlockNumber, img)?;
-            m.commit()?;
+            m.commit(Lsn(lsn))?;
        }

        assert_current_logical_size(&*tline, Lsn(lsn));
@@ -1339,9 +1358,9 @@ mod tests {

        // Truncate one block
        lsn += 0x10;
-        let mut m = tline.begin_modification(Lsn(lsn));
+        let mut m = tline.begin_modification();
        walingest.put_rel_truncation(&mut m, TESTREL_A, pg_constants::RELSEG_SIZE)?;
-        m.commit()?;
+        m.commit(Lsn(lsn))?;
        assert_eq!(
            tline.get_rel_size(TESTREL_A, Lsn(lsn))?,
            pg_constants::RELSEG_SIZE
@@ -1350,9 +1369,9 @@ mod tests {

        // Truncate another block
        lsn += 0x10;
-        let mut m = tline.begin_modification(Lsn(lsn));
+        let mut m = tline.begin_modification();
        walingest.put_rel_truncation(&mut m, TESTREL_A, pg_constants::RELSEG_SIZE - 1)?;
-        m.commit()?;
+        m.commit(Lsn(lsn))?;
        assert_eq!(
            tline.get_rel_size(TESTREL_A, Lsn(lsn))?,
            pg_constants::RELSEG_SIZE - 1
@@ -1364,9 +1383,9 @@ mod tests {
        let mut size: i32 = 3000;
        while size >= 0 {
            lsn += 0x10;
-            let mut m = tline.begin_modification(Lsn(lsn));
+            let mut m = tline.begin_modification();
            walingest.put_rel_truncation(&mut m, TESTREL_A, size as BlockNumber)?;
-            m.commit()?;
+            m.commit(Lsn(lsn))?;
            assert_eq!(
                tline.get_rel_size(TESTREL_A, Lsn(lsn))?,
                size as BlockNumber
--- a/pageserver/src/walreceiver.rs
+++ b/pageserver/src/walreceiver.rs
@@ -66,7 +66,7 @@ pub fn init_wal_receiver_main_thread(
    );
    let broker_prefix = &conf.broker_etcd_prefix;
    info!(
-        "Starting wal receiver main thread, etcd endpoints: {}",
+        "Starting wal receiver main thread, etdc endpoints: {}",
        etcd_endpoints.iter().map(Url::to_string).join(", ")
    );

--- a/pageserver/src/walreceiver/walreceiver_connection.rs
+++ b/pageserver/src/walreceiver/walreceiver_connection.rs
@@ -154,7 +154,7 @@ pub async fn handle_walreceiver_connection(

                {
                    let mut decoded = DecodedWALRecord::default();
-                    let mut modification = timeline.begin_modification(endlsn);
+                    let mut modification = timeline.begin_modification();
                    while let Some((lsn, recdata)) = waldecoder.poll_decode()? {
                        // let _enter = info_span!("processing record", lsn = %lsn).entered();

--- a/pageserver/src/walredo.rs
+++ b/pageserver/src/walredo.rs
@@ -20,8 +20,8 @@
 //!
 use byteorder::{ByteOrder, LittleEndian};
 use bytes::{BufMut, Bytes, BytesMut};
+use lazy_static::lazy_static;
 use nix::poll::*;
-use once_cell::sync::Lazy;
 use serde::Serialize;
 use std::fs;
 use std::fs::OpenOptions;
@@ -105,27 +105,21 @@ impl crate::walredo::WalRedoManager for DummyRedoManager {
 // We collect the time spent in actual WAL redo ('redo'), and time waiting
 // for access to the postgres process ('wait') since there is only one for
 // each tenant.
-
-static WAL_REDO_TIME: Lazy<Histogram> = Lazy::new(|| {
-    register_histogram!("pageserver_wal_redo_seconds", "Time spent on WAL redo")
-        .expect("failed to define a metric")
-});
-
-static WAL_REDO_WAIT_TIME: Lazy<Histogram> = Lazy::new(|| {
-    register_histogram!(
+lazy_static! {
+    static ref WAL_REDO_TIME: Histogram =
+        register_histogram!("pageserver_wal_redo_seconds", "Time spent on WAL redo")
+            .expect("failed to define a metric");
+    static ref WAL_REDO_WAIT_TIME: Histogram = register_histogram!(
        "pageserver_wal_redo_wait_seconds",
        "Time spent waiting for access to the WAL redo process"
    )
-    .expect("failed to define a metric")
-});
-
-static WAL_REDO_RECORD_COUNTER: Lazy<IntCounter> = Lazy::new(|| {
-    register_int_counter!(
+    .expect("failed to define a metric");
+    static ref WAL_REDO_RECORD_COUNTER: IntCounter = register_int_counter!(
        "pageserver_replayed_wal_records_total",
        "Number of WAL records replayed in WAL redo process"
    )
-    .unwrap()
-});
+    .unwrap();
+}

 ///
 /// This is the real implementation that uses a Postgres process to
--- a/proxy/Cargo.toml
+++ b/proxy/Cargo.toml
@@ -14,7 +14,7 @@ hashbrown = "0.11.2"
 hex = "0.4.3"
 hmac = "0.12.1"
 hyper = "0.14"
-once_cell = "1.13.0"
+lazy_static = "1.4.0"
 md5 = "0.7.0"
 parking_lot = "0.12"
 pin-project-lite = "0.2.7"
--- a/proxy/src/auth/backend.rs
+++ b/proxy/src/auth/backend.rs
@@ -12,12 +12,13 @@ use crate::{
    stream::PqStream,
    waiters::{self, Waiter, Waiters},
 };
-
-use once_cell::sync::Lazy;
+use lazy_static::lazy_static;
 use serde::{Deserialize, Serialize};
 use tokio::io::{AsyncRead, AsyncWrite};

-static CPLANE_WAITERS: Lazy<Waiters<mgmt::ComputeReady>> = Lazy::new(Default::default);
+lazy_static! {
+    static ref CPLANE_WAITERS: Waiters<mgmt::ComputeReady> = Default::default();
+}

 /// Give caller an opportunity to wait for the cloud's reply.
 pub async fn with_waiter<R, T, E>(
--- a/proxy/src/proxy.rs
+++ b/proxy/src/proxy.rs
@@ -4,8 +4,8 @@ use crate::config::{ProxyConfig, TlsConfig};
 use crate::stream::{MetricsStream, PqStream, Stream};
 use anyhow::{bail, Context};
 use futures::TryFutureExt;
+use lazy_static::lazy_static;
 use metrics::{register_int_counter, IntCounter};
-use once_cell::sync::Lazy;
 use std::sync::Arc;
 use tokio::io::{AsyncRead, AsyncWrite};
 use utils::pq_proto::{BeMessage as Be, *};
@@ -13,29 +13,23 @@ use utils::pq_proto::{BeMessage as Be, *};
 const ERR_INSECURE_CONNECTION: &str = "connection is insecure (try using `sslmode=require`)";
 const ERR_PROTO_VIOLATION: &str = "protocol violation";

-static NUM_CONNECTIONS_ACCEPTED_COUNTER: Lazy<IntCounter> = Lazy::new(|| {
-    register_int_counter!(
+lazy_static! {
+    static ref NUM_CONNECTIONS_ACCEPTED_COUNTER: IntCounter = register_int_counter!(
        "proxy_accepted_connections_total",
        "Number of TCP client connections accepted."
    )
-    .unwrap()
-});
-
-static NUM_CONNECTIONS_CLOSED_COUNTER: Lazy<IntCounter> = Lazy::new(|| {
-    register_int_counter!(
+    .unwrap();
+    static ref NUM_CONNECTIONS_CLOSED_COUNTER: IntCounter = register_int_counter!(
        "proxy_closed_connections_total",
        "Number of TCP client connections closed."
    )
-    .unwrap()
-});
-
-static NUM_BYTES_PROXIED_COUNTER: Lazy<IntCounter> = Lazy::new(|| {
-    register_int_counter!(
+    .unwrap();
+    static ref NUM_BYTES_PROXIED_COUNTER: IntCounter = register_int_counter!(
        "proxy_io_bytes_total",
        "Number of bytes sent/received between any client and backend."
    )
-    .unwrap()
-});
+    .unwrap();
+}

 /// A small combinator for pluggable error logging.
 async fn log_error<R, F>(future: F) -> F::Output
--- a/pytest.ini
+++ b/pytest.ini
@@ -1,8 +1,4 @@
 [pytest]
-filterwarnings =
-    error::pytest.PytestUnhandledThreadExceptionWarning
-    error::UserWarning
-    ignore:record_property is incompatible with junit_family:pytest.PytestWarning
 addopts =
    -m 'not remote_cluster'
 markers =
--- a/safekeeper/Cargo.toml
+++ b/safekeeper/Cargo.toml
@@ -9,6 +9,7 @@ bytes = "1.0.1"
 byteorder = "1.4.3"
 hyper = "0.14"
 fs2 = "0.4.3"
+lazy_static = "1.4.0"
 serde_json = "1"
 tracing = "0.1.27"
 clap = "3.0"
@@ -28,7 +29,7 @@ const_format = "0.2.21"
 tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
 git-version = "0.3.5"
 async-trait = "0.1"
-once_cell = "1.13.0"
+once_cell = "1.10.0"
 toml_edit = { version = "0.13", features = ["easy"] }

 postgres_ffi = { path = "../libs/postgres_ffi" }
--- a/safekeeper/src/control_file.rs
+++ b/safekeeper/src/control_file.rs
@@ -2,7 +2,7 @@

 use anyhow::{bail, ensure, Context, Result};
 use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
-use once_cell::sync::Lazy;
+use lazy_static::lazy_static;

 use std::fs::{self, File, OpenOptions};
 use std::io::{Read, Write};
@@ -26,15 +26,15 @@ const CONTROL_FILE_NAME: &str = "safekeeper.control";
 const CONTROL_FILE_NAME_PARTIAL: &str = "safekeeper.control.partial";
 pub const CHECKSUM_SIZE: usize = std::mem::size_of::<u32>();

-static PERSIST_CONTROL_FILE_SECONDS: Lazy<HistogramVec> = Lazy::new(|| {
-    register_histogram_vec!(
+lazy_static! {
+    static ref PERSIST_CONTROL_FILE_SECONDS: HistogramVec = register_histogram_vec!(
        "safekeeper_persist_control_file_seconds",
        "Seconds to persist and sync control file, grouped by timeline",
        &["tenant_id", "timeline_id"],
        DISK_WRITE_SECONDS_BUCKETS.to_vec()
    )
-    .expect("Failed to register safekeeper_persist_control_file_seconds histogram vec")
-});
+    .expect("Failed to register safekeeper_persist_control_file_seconds histogram vec");
+}

 /// Storage should keep actual state inside of it. It should implement Deref
 /// trait to access state fields and have persist method for updating that state.
--- a/safekeeper/src/timeline.rs
+++ b/safekeeper/src/timeline.rs
@@ -4,7 +4,7 @@
 use anyhow::{bail, Context, Result};

 use etcd_broker::subscription_value::SkTimelineInfo;
-use once_cell::sync::Lazy;
+use lazy_static::lazy_static;
 use postgres_ffi::xlog_utils::XLogSegNo;

 use serde::Serialize;
@@ -559,12 +559,12 @@ struct GlobalTimelinesState {
    wal_backup_launcher_tx: Option<Sender<ZTenantTimelineId>>,
 }

-static TIMELINES_STATE: Lazy<Mutex<GlobalTimelinesState>> = Lazy::new(|| {
-    Mutex::new(GlobalTimelinesState {
+lazy_static! {
+    static ref TIMELINES_STATE: Mutex<GlobalTimelinesState> = Mutex::new(GlobalTimelinesState {
        timelines: HashMap::new(),
        wal_backup_launcher_tx: None,
-    })
-});
+    });
+}

 #[derive(Clone, Copy, Serialize)]
 pub struct TimelineDeleteForceResult {
--- a/safekeeper/src/wal_storage.rs
+++ b/safekeeper/src/wal_storage.rs
@@ -12,7 +12,7 @@ use std::io::{self, Seek, SeekFrom};
 use std::pin::Pin;
 use tokio::io::AsyncRead;

-use once_cell::sync::Lazy;
+use lazy_static::lazy_static;
 use postgres_ffi::xlog_utils::{
    find_end_of_wal, IsPartialXLogFileName, IsXLogFileName, XLogFromFileName, XLogSegNo, PG_TLI,
 };
@@ -38,44 +38,31 @@ use metrics::{register_histogram_vec, Histogram, HistogramVec, DISK_WRITE_SECOND

 use tokio::io::{AsyncReadExt, AsyncSeekExt};

-// The prometheus crate does not support u64 yet, i64 only (see `IntGauge`).
-// i64 is faster than f64, so update to u64 when available.
-static WRITE_WAL_BYTES: Lazy<HistogramVec> = Lazy::new(|| {
-    register_histogram_vec!(
+lazy_static! {
+    // The prometheus crate does not support u64 yet, i64 only (see `IntGauge`).
+    // i64 is faster than f64, so update to u64 when available.
+    static ref WRITE_WAL_BYTES: HistogramVec = register_histogram_vec!(
        "safekeeper_write_wal_bytes",
        "Bytes written to WAL in a single request, grouped by timeline",
        &["tenant_id", "timeline_id"],
-        vec![
-            1.0,
-            10.0,
-            100.0,
-            1024.0,
-            8192.0,
-            128.0 * 1024.0,
-            1024.0 * 1024.0,
-            10.0 * 1024.0 * 1024.0
-        ]
+        vec![1.0, 10.0, 100.0, 1024.0, 8192.0, 128.0 * 1024.0, 1024.0 * 1024.0, 10.0 * 1024.0 * 1024.0]
    )
-    .expect("Failed to register safekeeper_write_wal_bytes histogram vec")
-});
-static WRITE_WAL_SECONDS: Lazy<HistogramVec> = Lazy::new(|| {
-    register_histogram_vec!(
+    .expect("Failed to register safekeeper_write_wal_bytes histogram vec");
+    static ref WRITE_WAL_SECONDS: HistogramVec = register_histogram_vec!(
        "safekeeper_write_wal_seconds",
        "Seconds spent writing and syncing WAL to a disk in a single request, grouped by timeline",
        &["tenant_id", "timeline_id"],
        DISK_WRITE_SECONDS_BUCKETS.to_vec()
    )
-    .expect("Failed to register safekeeper_write_wal_seconds histogram vec")
-});
-static FLUSH_WAL_SECONDS: Lazy<HistogramVec> = Lazy::new(|| {
-    register_histogram_vec!(
+    .expect("Failed to register safekeeper_write_wal_seconds histogram vec");
+    static ref FLUSH_WAL_SECONDS: HistogramVec = register_histogram_vec!(
        "safekeeper_flush_wal_seconds",
        "Seconds spent syncing WAL to a disk, grouped by timeline",
        &["tenant_id", "timeline_id"],
        DISK_WRITE_SECONDS_BUCKETS.to_vec()
    )
-    .expect("Failed to register safekeeper_flush_wal_seconds histogram vec")
-});
+    .expect("Failed to register safekeeper_flush_wal_seconds histogram vec");
+}

 struct WalStorageMetrics {
    write_wal_bytes: Histogram,
--- a/scripts/export_import_between_pageservers.py
+++ b/scripts/export_import_between_pageservers.py
@@ -1,708 +0,0 @@
-#
-# Script to export tenants from one pageserver and import them into another page server.
-#
-# Outline of steps:
-# 1. Get `(last_lsn, prev_lsn)` from old pageserver
-# 2. Get `fullbackup` from old pageserver, which creates a basebackup tar file
-# 3. This tar file might be missing relation files for empty relations, if the pageserver
-#    is old enough (we didn't always store those). So to recreate them, we start a local
-#    vanilla postgres on this basebackup and ask it what relations should exist, then touch
-#    any missing files and re-pack the tar.
-#    TODO This functionality is no longer needed, so we can delete it later if we don't
-#         end up using the same utils for the pg 15 upgrade. Not sure.
-# 4. We import the patched basebackup into a new pageserver
-# 5. We export again via fullbackup, now from the new pageserver and compare the returned
-#    tar file with the one we imported. This confirms that we imported everything that was
-#    exported, but doesn't guarantee correctness (what if we didn't **export** everything
-#    initially?)
-# 6. We wait for the new pageserver's remote_consistent_lsn to catch up
-#
-# For more context on how to use this, see:
-# https://github.com/neondatabase/cloud/wiki/Storage-format-migration
-
-import os
-from os import path
-import shutil
-from pathlib import Path
-import tempfile
-from contextlib import closing
-import psycopg2
-import subprocess
-import argparse
-import time
-import requests
-import uuid
-from psycopg2.extensions import connection as PgConnection
-from typing import Any, Callable, Dict, Iterator, List, Optional, TypeVar, cast, Union, Tuple
-
-###############################################
-### client-side utils copied from test fixtures
-###############################################
-
-Env = Dict[str, str]
-
-_global_counter = 0
-
-
-def global_counter() -> int:
-    """ A really dumb global counter.
-    This is useful for giving output files a unique number, so if we run the
-    same command multiple times we can keep their output separate.
-    """
-    global _global_counter
-    _global_counter += 1
-    return _global_counter
-
-
-def subprocess_capture(capture_dir: str, cmd: List[str], **kwargs: Any) -> str:
-    """ Run a process and capture its output
-    Output will go to files named "cmd_NNN.stdout" and "cmd_NNN.stderr"
-    where "cmd" is the name of the program and NNN is an incrementing
-    counter.
-    If those files already exist, we will overwrite them.
-    Returns basepath for files with captured output.
-    """
-    assert type(cmd) is list
-    base = os.path.basename(cmd[0]) + '_{}'.format(global_counter())
-    basepath = os.path.join(capture_dir, base)
-    stdout_filename = basepath + '.stdout'
-    stderr_filename = basepath + '.stderr'
-
-    with open(stdout_filename, 'w') as stdout_f:
-        with open(stderr_filename, 'w') as stderr_f:
-            print('(capturing output to "{}.stdout")'.format(base))
-            subprocess.run(cmd, **kwargs, stdout=stdout_f, stderr=stderr_f)
-
-    return basepath
-
-
-class PgBin:
-    """ A helper class for executing postgres binaries """
-    def __init__(self, log_dir: Path, pg_distrib_dir):
-        self.log_dir = log_dir
-        self.pg_bin_path = os.path.join(str(pg_distrib_dir), 'bin')
-        self.env = os.environ.copy()
-        self.env['LD_LIBRARY_PATH'] = os.path.join(str(pg_distrib_dir), 'lib')
-
-    def _fixpath(self, command: List[str]):
-        if '/' not in command[0]:
-            command[0] = os.path.join(self.pg_bin_path, command[0])
-
-    def _build_env(self, env_add: Optional[Env]) -> Env:
-        if env_add is None:
-            return self.env
-        env = self.env.copy()
-        env.update(env_add)
-        return env
-
-    def run(self, command: List[str], env: Optional[Env] = None, cwd: Optional[str] = None):
-        """
-        Run one of the postgres binaries.
-        The command should be in list form, e.g. ['pgbench', '-p', '55432']
-        All the necessary environment variables will be set.
-        If the first argument (the command name) doesn't include a path (no '/'
-        characters present), then it will be edited to include the correct path.
-        If you want stdout/stderr captured to files, use `run_capture` instead.
-        """
-
-        self._fixpath(command)
-        print('Running command "{}"'.format(' '.join(command)))
-        env = self._build_env(env)
-        subprocess.run(command, env=env, cwd=cwd, check=True)
-
-    def run_capture(self,
-                    command: List[str],
-                    env: Optional[Env] = None,
-                    cwd: Optional[str] = None,
-                    **kwargs: Any) -> str:
-        """
-        Run one of the postgres binaries, with stderr and stdout redirected to a file.
-        This is just like `run`, but for chatty programs. Returns basepath for files
-        with captured output.
-        """
-
-        self._fixpath(command)
-        print('Running command "{}"'.format(' '.join(command)))
-        env = self._build_env(env)
-        return subprocess_capture(str(self.log_dir),
-                                  command,
-                                  env=env,
-                                  cwd=cwd,
-                                  check=True,
-                                  **kwargs)
-
-
-class PgProtocol:
-    """ Reusable connection logic """
-    def __init__(self, **kwargs):
-        self.default_options = kwargs
-
-    def conn_options(self, **kwargs):
-        conn_options = self.default_options.copy()
-        if 'dsn' in kwargs:
-            conn_options.update(parse_dsn(kwargs['dsn']))
-        conn_options.update(kwargs)
-
-        # Individual statement timeout in seconds. 2 minutes should be
-        # enough for our tests, but if you need a longer, you can
-        # change it by calling "SET statement_timeout" after
-        # connecting.
-        if 'options' in conn_options:
-            conn_options['options'] = f"-cstatement_timeout=120s " + conn_options['options']
-        else:
-            conn_options['options'] = "-cstatement_timeout=120s"
-        return conn_options
-
-    # autocommit=True here by default because that's what we need most of the time
-    def connect(self, autocommit=True, **kwargs) -> PgConnection:
-        """
-        Connect to the node.
-        Returns psycopg2's connection object.
-        This method passes all extra params to connstr.
-        """
-        conn = psycopg2.connect(**self.conn_options(**kwargs))
-
-        # WARNING: this setting affects *all* tests!
-        conn.autocommit = autocommit
-        return conn
-
-    def safe_psql(self, query: str, **kwargs: Any) -> List[Tuple[Any, ...]]:
-        """
-        Execute query against the node and return all rows.
-        This method passes all extra params to connstr.
-        """
-        return self.safe_psql_many([query], **kwargs)[0]
-
-    def safe_psql_many(self, queries: List[str], **kwargs: Any) -> List[List[Tuple[Any, ...]]]:
-        """
-        Execute queries against the node and return all rows.
-        This method passes all extra params to connstr.
-        """
-        result: List[List[Any]] = []
-        with closing(self.connect(**kwargs)) as conn:
-            with conn.cursor() as cur:
-                for query in queries:
-                    print(f"Executing query: {query}")
-                    cur.execute(query)
-
-                    if cur.description is None:
-                        result.append([])  # query didn't return data
-                    else:
-                        result.append(cast(List[Any], cur.fetchall()))
-        return result
-
-
-class VanillaPostgres(PgProtocol):
-    def __init__(self, pgdatadir: Path, pg_bin: PgBin, port: int, init=True):
-        super().__init__(host='localhost', port=port, dbname='postgres')
-        self.pgdatadir = pgdatadir
-        self.pg_bin = pg_bin
-        self.running = False
-        if init:
-            self.pg_bin.run_capture(['initdb', '-D', str(pgdatadir)])
-        self.configure([f"port = {port}\n"])
-
-    def configure(self, options: List[str]):
-        """Append lines into postgresql.conf file."""
-        assert not self.running
-        with open(os.path.join(self.pgdatadir, 'postgresql.conf'), 'a') as conf_file:
-            conf_file.write("\n".join(options))
-
-    def start(self, log_path: Optional[str] = None):
-        assert not self.running
-        self.running = True
-
-        if log_path is None:
-            log_path = os.path.join(self.pgdatadir, "pg.log")
-
-        self.pg_bin.run_capture(
-            ['pg_ctl', '-w', '-D', str(self.pgdatadir), '-l', log_path, 'start'])
-
-    def stop(self):
-        assert self.running
-        self.running = False
-        self.pg_bin.run_capture(['pg_ctl', '-w', '-D', str(self.pgdatadir), 'stop'])
-
-    def __enter__(self):
-        return self
-
-    def __exit__(self, exc_type, exc, tb):
-        if self.running:
-            self.stop()
-
-
-class NeonPageserverApiException(Exception):
-    pass
-
-
-class NeonPageserverHttpClient(requests.Session):
-    def __init__(self, host, port):
-        super().__init__()
-        self.host = host
-        self.port = port
-
-    def verbose_error(self, res: requests.Response):
-        try:
-            res.raise_for_status()
-        except requests.RequestException as e:
-            try:
-                msg = res.json()['msg']
-            except:
-                msg = ''
-            raise NeonPageserverApiException(msg) from e
-
-    def check_status(self):
-        self.get(f"http://{self.host}:{self.port}/v1/status").raise_for_status()
-
-    def tenant_list(self):
-        res = self.get(f"http://{self.host}:{self.port}/v1/tenant")
-        self.verbose_error(res)
-        res_json = res.json()
-        assert isinstance(res_json, list)
-        return res_json
-
-    def tenant_create(self, new_tenant_id: uuid.UUID, ok_if_exists):
-        res = self.post(
-            f"http://{self.host}:{self.port}/v1/tenant",
-            json={
-                'new_tenant_id': new_tenant_id.hex,
-            },
-        )
-
-        if res.status_code == 409:
-            if ok_if_exists:
-                print(f'could not create tenant: already exists for id {new_tenant_id}')
-            else:
-                res.raise_for_status()
-        elif res.status_code == 201:
-            print(f'created tenant {new_tenant_id}')
-        else:
-            self.verbose_error(res)
-
-        return new_tenant_id
-
-    def timeline_list(self, tenant_id: uuid.UUID):
-        res = self.get(f"http://{self.host}:{self.port}/v1/tenant/{tenant_id.hex}/timeline")
-        self.verbose_error(res)
-        res_json = res.json()
-        assert isinstance(res_json, list)
-        return res_json
-
-    def timeline_detail(self, tenant_id: uuid.UUID, timeline_id: uuid.UUID) -> Dict[Any, Any]:
-        res = self.get(
-            f"http://localhost:{self.port}/v1/tenant/{tenant_id.hex}/timeline/{timeline_id.hex}?include-non-incremental-logical-size=1"
-        )
-        self.verbose_error(res)
-        res_json = res.json()
-        assert isinstance(res_json, dict)
-        return res_json
-
-
-def lsn_to_hex(num: int) -> str:
-    """ Convert lsn from int to standard hex notation. """
-    return "{:X}/{:X}".format(num >> 32, num & 0xffffffff)
-
-
-def lsn_from_hex(lsn_hex: str) -> int:
-    """ Convert lsn from hex notation to int. """
-    l, r = lsn_hex.split('/')
-    return (int(l, 16) << 32) + int(r, 16)
-
-
-def remote_consistent_lsn(pageserver_http_client: NeonPageserverHttpClient,
-                          tenant: uuid.UUID,
-                          timeline: uuid.UUID) -> int:
-    detail = pageserver_http_client.timeline_detail(tenant, timeline)
-
-    if detail['remote'] is None:
-        # No remote information at all. This happens right after creating
-        # a timeline, before any part of it has been uploaded to remote
-        # storage yet.
-        return 0
-    else:
-        lsn_str = detail['remote']['remote_consistent_lsn']
-        assert isinstance(lsn_str, str)
-        return lsn_from_hex(lsn_str)
-
-
-def wait_for_upload(pageserver_http_client: NeonPageserverHttpClient,
-                    tenant: uuid.UUID,
-                    timeline: uuid.UUID,
-                    lsn: int):
-    """waits for local timeline upload up to specified lsn"""
-    for i in range(10):
-        current_lsn = remote_consistent_lsn(pageserver_http_client, tenant, timeline)
-        if current_lsn >= lsn:
-            return
-        print("waiting for remote_consistent_lsn to reach {}, now {}, iteration {}".format(
-            lsn_to_hex(lsn), lsn_to_hex(current_lsn), i + 1))
-        time.sleep(1)
-
-    raise Exception("timed out while waiting for remote_consistent_lsn to reach {}, was {}".format(
-        lsn_to_hex(lsn), lsn_to_hex(current_lsn)))
-
-
-##############
-# End of utils
-##############
-
-
-def pack_base(log_dir, restored_dir, output_tar):
-    """Create tar file from basebackup, being careful to produce relative filenames."""
-    tmp_tar_name = "tmp.tar"
-    tmp_tar_path = os.path.join(restored_dir, tmp_tar_name)
-    cmd = ["tar", "-cf", tmp_tar_name] + os.listdir(restored_dir)
-    # We actually cd into the dir and call tar from there. If we call tar from
-    # outside we won't encode filenames as relative, and they won't parse well
-    # on import.
-    subprocess_capture(log_dir, cmd, cwd=restored_dir)
-    shutil.move(tmp_tar_path, output_tar)
-
-
-def reconstruct_paths(log_dir, pg_bin, base_tar):
-    """Reconstruct what relation files should exist in the datadir by querying postgres."""
-    with tempfile.TemporaryDirectory() as restored_dir:
-        # Unpack the base tar
-        subprocess_capture(log_dir, ["tar", "-xf", base_tar, "-C", restored_dir])
-
-        # Start a vanilla postgres from the given datadir and query it to find
-        # what relfiles should exist, but possibly don't.
-        port = "55439"  # Probably free
-        with VanillaPostgres(restored_dir, pg_bin, port, init=False) as vanilla_pg:
-            vanilla_pg.configure([f"port={port}"])
-            vanilla_pg.start(log_path=os.path.join(log_dir, "tmp_pg.log"))
-
-            # Create database based on template0 because we can't connect to template0
-            query = "create database template0copy template template0"
-            vanilla_pg.safe_psql(query, user="cloud_admin")
-            vanilla_pg.safe_psql("CHECKPOINT", user="cloud_admin")
-
-            # Get all databases
-            query = "select oid, datname from pg_database"
-            oid_dbname_pairs = vanilla_pg.safe_psql(query, user="cloud_admin")
-            template0_oid = [
-                oid for (oid, database) in oid_dbname_pairs if database == "template0"
-            ][0]
-
-            # Get rel paths for each database
-            for oid, database in oid_dbname_pairs:
-                if database == "template0":
-                    # We can't connect to template0
-                    continue
-
-                query = "select relname, pg_relation_filepath(oid) from pg_class"
-                result = vanilla_pg.safe_psql(query, user="cloud_admin", dbname=database)
-                for relname, filepath in result:
-                    if filepath is not None:
-
-                        if database == "template0copy":
-                            # Add all template0copy paths to template0
-                            prefix = f"base/{oid}/"
-                            if filepath.startswith(prefix):
-                                suffix = filepath[len(prefix):]
-                                yield f"base/{template0_oid}/{suffix}"
-                            elif filepath.startswith("global"):
-                                print(f"skipping {database} global file {filepath}")
-                            else:
-                                raise AssertionError
-                        else:
-                            yield filepath
-
-
-def touch_missing_rels(log_dir, corrupt_tar, output_tar, paths):
-    """Add the appropriate empty files to a basebadkup tar."""
-    with tempfile.TemporaryDirectory() as restored_dir:
-        # Unpack the base tar
-        subprocess_capture(log_dir, ["tar", "-xf", corrupt_tar, "-C", restored_dir])
-
-        # Touch files that don't exist
-        for path in paths:
-            absolute_path = os.path.join(restored_dir, path)
-            exists = os.path.exists(absolute_path)
-            if not exists:
-                print(f"File {absolute_path} didn't exist. Creating..")
-                Path(absolute_path).touch()
-
-        # Repackage
-        pack_base(log_dir, restored_dir, output_tar)
-
-
-# HACK This is a workaround for exporting from old pageservers that
-#      can't export empty relations. In this case we need to start
-#      a vanilla postgres from the exported datadir, and query it
-#      to see what empty relations are missing, and then create
-#      those empty files before importing.
-def add_missing_rels(base_tar, output_tar, log_dir, pg_bin):
-    reconstructed_paths = set(reconstruct_paths(log_dir, pg_bin, base_tar))
-    touch_missing_rels(log_dir, base_tar, output_tar, reconstructed_paths)
-
-
-def get_rlsn(pageserver_connstr, tenant_id, timeline_id):
-    conn = psycopg2.connect(pageserver_connstr)
-    conn.autocommit = True
-    with conn.cursor() as cur:
-        cmd = f"get_last_record_rlsn {tenant_id} {timeline_id}"
-        cur.execute(cmd)
-        res = cur.fetchone()
-        prev_lsn = res[0]
-        last_lsn = res[1]
-    conn.close()
-
-    return last_lsn, prev_lsn
-
-
-def import_timeline(args,
-                    psql_path,
-                    pageserver_connstr,
-                    pageserver_http,
-                    tenant_id,
-                    timeline_id,
-                    last_lsn,
-                    prev_lsn,
-                    tar_filename):
-    # Import timelines to new pageserver
-    import_cmd = f"import basebackup {tenant_id} {timeline_id} {last_lsn} {last_lsn}"
-    full_cmd = rf"""cat {tar_filename} | {psql_path} {pageserver_connstr} -c '{import_cmd}' """
-
-    stderr_filename2 = path.join(args.work_dir, f"import_{tenant_id}_{timeline_id}.stderr")
-    stdout_filename = path.join(args.work_dir, f"import_{tenant_id}_{timeline_id}.stdout")
-
-    print(f"Running: {full_cmd}")
-
-    with open(stdout_filename, 'w') as stdout_f:
-        with open(stderr_filename2, 'w') as stderr_f:
-            print(f"(capturing output to {stdout_filename})")
-            pg_bin = PgBin(args.work_dir, args.pg_distrib_dir)
-            subprocess.run(full_cmd,
-                           stdout=stdout_f,
-                           stderr=stderr_f,
-                           env=pg_bin._build_env(None),
-                           shell=True,
-                           check=True)
-
-            print(f"Done import")
-
-    # Wait until pageserver persists the files
-    wait_for_upload(pageserver_http,
-                    uuid.UUID(tenant_id),
-                    uuid.UUID(timeline_id),
-                    lsn_from_hex(last_lsn))
-
-
-def export_timeline(args,
-                    psql_path,
-                    pageserver_connstr,
-                    tenant_id,
-                    timeline_id,
-                    last_lsn,
-                    prev_lsn,
-                    tar_filename):
-    # Choose filenames
-    incomplete_filename = tar_filename + ".incomplete"
-    stderr_filename = path.join(args.work_dir, f"{tenant_id}_{timeline_id}.stderr")
-
-    # Construct export command
-    query = f"fullbackup {tenant_id} {timeline_id} {last_lsn} {prev_lsn}"
-    cmd = [psql_path, "--no-psqlrc", pageserver_connstr, "-c", query]
-
-    # Run export command
-    print(f"Running: {cmd}")
-    with open(incomplete_filename, 'w') as stdout_f:
-        with open(stderr_filename, 'w') as stderr_f:
-            print(f"(capturing output to {incomplete_filename})")
-            pg_bin = PgBin(args.work_dir, args.pg_distrib_dir)
-            subprocess.run(cmd,
-                           stdout=stdout_f,
-                           stderr=stderr_f,
-                           env=pg_bin._build_env(None),
-                           check=True)
-
-    # Add missing rels
-    pg_bin = PgBin(args.work_dir, args.pg_distrib_dir)
-    add_missing_rels(incomplete_filename, tar_filename, args.work_dir, pg_bin)
-
-    # Log more info
-    file_size = os.path.getsize(tar_filename)
-    print(f"Done export: {tar_filename}, size {file_size}")
-
-
-def main(args: argparse.Namespace):
-    psql_path = str(Path(args.pg_distrib_dir) / "bin" / "psql")
-
-    old_pageserver_host = args.old_pageserver_host
-    new_pageserver_host = args.new_pageserver_host
-
-    old_http_client = NeonPageserverHttpClient(old_pageserver_host, args.old_pageserver_http_port)
-    old_http_client.check_status()
-    old_pageserver_connstr = f"postgresql://{old_pageserver_host}:{args.old_pageserver_pg_port}"
-
-    new_http_client = NeonPageserverHttpClient(new_pageserver_host, args.new_pageserver_http_port)
-    new_http_client.check_status()
-    new_pageserver_connstr = f"postgresql://{new_pageserver_host}:{args.new_pageserver_pg_port}"
-
-    for tenant_id in args.tenants:
-        print(f"Tenant: {tenant_id}")
-        timelines = old_http_client.timeline_list(uuid.UUID(tenant_id))
-        print(f"Timelines: {timelines}")
-
-        # Create tenant in new pageserver
-        if args.only_import is False and not args.timelines:
-            new_http_client.tenant_create(uuid.UUID(tenant_id), args.ok_if_exists)
-
-        for timeline in timelines:
-            # Skip timelines we don't need to export
-            if args.timelines and timeline['timeline_id'] not in args.timelines:
-                print(f"Skipping timeline {timeline['timeline_id']}")
-                continue
-
-            # Choose filenames
-            tar_filename = path.join(args.work_dir,
-                                     f"{timeline['tenant_id']}_{timeline['timeline_id']}.tar")
-
-            # Export timeline from old pageserver
-            if args.only_import is False:
-                last_lsn, prev_lsn = get_rlsn(
-                    old_pageserver_connstr,
-                    timeline['tenant_id'],
-                    timeline['timeline_id'],
-                )
-                export_timeline(
-                    args,
-                    psql_path,
-                    old_pageserver_connstr,
-                    timeline['tenant_id'],
-                    timeline['timeline_id'],
-                    last_lsn,
-                    prev_lsn,
-                    tar_filename,
-                )
-
-            # Import into new pageserver
-            import_timeline(
-                args,
-                psql_path,
-                new_pageserver_connstr,
-                new_http_client,
-                timeline['tenant_id'],
-                timeline['timeline_id'],
-                last_lsn,
-                prev_lsn,
-                tar_filename,
-            )
-
-            # Re-export and compare
-            re_export_filename = tar_filename + ".reexport"
-            export_timeline(args,
-                            psql_path,
-                            new_pageserver_connstr,
-                            timeline['tenant_id'],
-                            timeline['timeline_id'],
-                            last_lsn,
-                            prev_lsn,
-                            re_export_filename)
-
-            # Check the size is the same
-            old_size = os.path.getsize(tar_filename),
-            new_size = os.path.getsize(re_export_filename),
-            if old_size != new_size:
-                raise AssertionError(f"Sizes don't match old: {old_size} new: {new_size}")
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        '--tenant-id',
-        dest='tenants',
-        required=True,
-        nargs='+',
-        help='Id of the tenant to migrate. You can pass multiple arguments',
-    )
-    parser.add_argument(
-        '--timeline-id',
-        dest='timelines',
-        required=False,
-        nargs='+',
-        help='Id of the timeline to migrate. You can pass multiple arguments',
-    )
-    parser.add_argument(
-        '--from-host',
-        dest='old_pageserver_host',
-        required=True,
-        help='Host of the pageserver to migrate data from',
-    )
-    parser.add_argument(
-        '--from-http-port',
-        dest='old_pageserver_http_port',
-        required=False,
-        type=int,
-        default=9898,
-        help='HTTP port of the pageserver to migrate data from. Default: 9898',
-    )
-    parser.add_argument(
-        '--from-pg-port',
-        dest='old_pageserver_pg_port',
-        required=False,
-        type=int,
-        default=6400,
-        help='pg port of the pageserver to migrate data from. Default: 6400',
-    )
-    parser.add_argument(
-        '--to-host',
-        dest='new_pageserver_host',
-        required=True,
-        help='Host of the pageserver to migrate data to',
-    )
-    parser.add_argument(
-        '--to-http-port',
-        dest='new_pageserver_http_port',
-        required=False,
-        default=9898,
-        type=int,
-        help='HTTP port of the pageserver to migrate data to. Default: 9898',
-    )
-    parser.add_argument(
-        '--to-pg-port',
-        dest='new_pageserver_pg_port',
-        required=False,
-        default=6400,
-        type=int,
-        help='pg port of the pageserver to migrate data to. Default: 6400',
-    )
-    parser.add_argument(
-        '--ignore-tenant-exists',
-        dest='ok_if_exists',
-        required=False,
-        help=
-        'Ignore error if we are trying to create the tenant that already exists. It can be dangerous if existing tenant already contains some data.',
-    )
-    parser.add_argument(
-        '--pg-distrib-dir',
-        dest='pg_distrib_dir',
-        required=False,
-        default='/usr/local/',
-        help='Path where postgres binaries are installed. Default: /usr/local/',
-    )
-    parser.add_argument(
-        '--psql-path',
-        dest='psql_path',
-        required=False,
-        default='/usr/local/bin/psql',
-        help='Path to the psql binary. Default: /usr/local/bin/psql',
-    )
-    parser.add_argument(
-        '--only-import',
-        dest='only_import',
-        required=False,
-        default=False,
-        action='store_true',
-        help='Skip export and tenant creation part',
-    )
-    parser.add_argument(
-        '--work-dir',
-        dest='work_dir',
-        required=True,
-        default=False,
-        help='directory where temporary tar files are stored',
-    )
-    args = parser.parse_args()
-    main(args)
--- a/test_runner/batch_others/test_ancestor_branch.py
+++ b/test_runner/batch_others/test_ancestor_branch.py
@@ -1,5 +1,6 @@
+import pytest
 from fixtures.log_helper import log
-from fixtures.neon_fixtures import NeonEnvBuilder
+from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, NeonPageserverApiException
 from fixtures.utils import query_scalar


--- a/test_runner/batch_others/test_branch_and_gc.py
+++ b/test_runner/batch_others/test_branch_and_gc.py
@@ -167,5 +167,3 @@ def test_branch_creation_before_gc(neon_simple_env: NeonEnv):
    # The starting LSN is invalid as the corresponding record is scheduled to be removed by in-queue GC.
    with pytest.raises(Exception, match="invalid branch start lsn"):
        env.neon_cli.create_branch('b1', 'b0', tenant_id=tenant, ancestor_start_lsn=lsn)
-
-    thread.join()
--- a/test_runner/batch_others/test_pageserver_api.py
+++ b/test_runner/batch_others/test_pageserver_api.py
@@ -60,38 +60,17 @@ def check_client(client: NeonPageserverHttpClient, initial_tenant: UUID):

 def test_pageserver_http_get_wal_receiver_not_found(neon_simple_env: NeonEnv):
    env = neon_simple_env
-    with env.pageserver.http_client() as client:
-        tenant_id, timeline_id = env.neon_cli.create_tenant()
+    client = env.pageserver.http_client()

-        timeline_details = client.timeline_detail(tenant_id=tenant_id,
-                                                  timeline_id=timeline_id,
-                                                  include_non_incremental_logical_size=True)
+    tenant_id, timeline_id = env.neon_cli.create_tenant()

-        assert timeline_details.get('wal_source_connstr') is None, 'Should not be able to connect to WAL streaming without PG compute node running'
-        assert timeline_details.get('last_received_msg_lsn') is None, 'Should not be able to connect to WAL streaming without PG compute node running'
-        assert timeline_details.get('last_received_msg_ts') is None, 'Should not be able to connect to WAL streaming without PG compute node running'
+    timeline_details = client.timeline_detail(tenant_id=tenant_id,
+                                              timeline_id=timeline_id,
+                                              include_non_incremental_logical_size=True)

-
-def expect_updated_msg_lsn(client: NeonPageserverHttpClient,
-                           tenant_id: UUID,
-                           timeline_id: UUID,
-                           prev_msg_lsn: Optional[int]) -> int:
-    timeline_details = client.timeline_detail(tenant_id, timeline_id=timeline_id)
-
-    # a successful `timeline_details` response must contain the below fields
-    local_timeline_details = timeline_details['local']
-    assert "wal_source_connstr" in local_timeline_details.keys()
-    assert "last_received_msg_lsn" in local_timeline_details.keys()
-    assert "last_received_msg_ts" in local_timeline_details.keys()
-
-    assert local_timeline_details["last_received_msg_lsn"] is not None, "the last received message's LSN is empty"
-
-    last_msg_lsn = lsn_from_hex(local_timeline_details["last_received_msg_lsn"])
-    assert prev_msg_lsn is None or prev_msg_lsn < last_msg_lsn, \
-        f"the last received message's LSN {last_msg_lsn} hasn't been updated \
-        compared to the previous message's LSN {prev_msg_lsn}"
-
-    return last_msg_lsn
+    assert timeline_details.get('wal_source_connstr') is None, 'Should not be able to connect to WAL streaming without PG compute node running'
+    assert timeline_details.get('last_received_msg_lsn') is None, 'Should not be able to connect to WAL streaming without PG compute node running'
+    assert timeline_details.get('last_received_msg_ts') is None, 'Should not be able to connect to WAL streaming without PG compute node running'


 # Test the WAL-receiver related fields in the response to `timeline_details` API call
@@ -100,29 +79,44 @@ def expect_updated_msg_lsn(client: NeonPageserverHttpClient,
 # `timeline_details` now.
 def test_pageserver_http_get_wal_receiver_success(neon_simple_env: NeonEnv):
    env = neon_simple_env
-    with env.pageserver.http_client() as client:
-        tenant_id, timeline_id = env.neon_cli.create_tenant()
-        pg = env.postgres.create_start(DEFAULT_BRANCH_NAME, tenant_id=tenant_id)
+    client = env.pageserver.http_client()

-        # Wait to make sure that we get a latest WAL receiver data.
-        # We need to wait here because it's possible that we don't have access to
-        # the latest WAL yet, when the `timeline_detail` API is first called.
-        # See: https://github.com/neondatabase/neon/issues/1768.
-        lsn = wait_until(number_of_iterations=5,
-                         interval=1,
-                         func=lambda: expect_updated_msg_lsn(client, tenant_id, timeline_id, None))
+    tenant_id, timeline_id = env.neon_cli.create_tenant()
+    pg = env.postgres.create_start(DEFAULT_BRANCH_NAME, tenant_id=tenant_id)

-        # Make a DB modification then expect getting a new WAL receiver's data.
-        pg.safe_psql("CREATE TABLE t(key int primary key, value text)")
-        wait_until(number_of_iterations=5,
-                   interval=1,
-                   func=lambda: expect_updated_msg_lsn(client, tenant_id, timeline_id, lsn))
+    def expect_updated_msg_lsn(prev_msg_lsn: Optional[int]) -> int:
+        timeline_details = client.timeline_detail(tenant_id, timeline_id=timeline_id)
+
+        # a successful `timeline_details` response must contain the below fields
+        local_timeline_details = timeline_details['local']
+        assert "wal_source_connstr" in local_timeline_details.keys()
+        assert "last_received_msg_lsn" in local_timeline_details.keys()
+        assert "last_received_msg_ts" in local_timeline_details.keys()
+
+        assert local_timeline_details["last_received_msg_lsn"] is not None, "the last received message's LSN is empty"
+
+        last_msg_lsn = lsn_from_hex(local_timeline_details["last_received_msg_lsn"])
+        assert prev_msg_lsn is None or prev_msg_lsn < last_msg_lsn, \
+            f"the last received message's LSN {last_msg_lsn} hasn't been updated \
+            compared to the previous message's LSN {prev_msg_lsn}"
+
+        return last_msg_lsn
+
+    # Wait to make sure that we get a latest WAL receiver data.
+    # We need to wait here because it's possible that we don't have access to
+    # the latest WAL yet, when the `timeline_detail` API is first called.
+    # See: https://github.com/neondatabase/neon/issues/1768.
+    lsn = wait_until(number_of_iterations=5, interval=1, func=lambda: expect_updated_msg_lsn(None))
+
+    # Make a DB modification then expect getting a new WAL receiver's data.
+    pg.safe_psql("CREATE TABLE t(key int primary key, value text)")
+    wait_until(number_of_iterations=5, interval=1, func=lambda: expect_updated_msg_lsn(lsn))


 def test_pageserver_http_api_client(neon_simple_env: NeonEnv):
    env = neon_simple_env
-    with env.pageserver.http_client() as client:
-        check_client(client, env.initial_tenant)
+    client = env.pageserver.http_client()
+    check_client(client, env.initial_tenant)


 def test_pageserver_http_api_client_auth_enabled(neon_env_builder: NeonEnvBuilder):
@@ -131,5 +125,5 @@ def test_pageserver_http_api_client_auth_enabled(neon_env_builder: NeonEnvBuilde

    management_token = env.auth_keys.generate_management_token()

-    with env.pageserver.http_client(auth_token=management_token) as client:
-        check_client(client, env.initial_tenant)
+    client = env.pageserver.http_client(auth_token=management_token)
+    check_client(client, env.initial_tenant)
--- a/test_runner/batch_others/test_remote_storage.py
+++ b/test_runner/batch_others/test_remote_storage.py
@@ -2,10 +2,11 @@
 # env ZENITH_PAGESERVER_OVERRIDES="remote_storage={local_path='/tmp/neon_zzz/'}" poetry ......

 import shutil, os
+from contextlib import closing
 from pathlib import Path
 import time
 from uuid import UUID
-from fixtures.neon_fixtures import NeonEnvBuilder, RemoteStorageKind, assert_timeline_local, available_remote_storages, wait_until, wait_for_last_record_lsn, wait_for_upload
+from fixtures.neon_fixtures import NeonEnvBuilder, assert_timeline_local, wait_until, wait_for_last_record_lsn, wait_for_upload
 from fixtures.log_helper import log
 from fixtures.utils import lsn_from_hex, query_scalar
 import pytest
@@ -28,19 +29,18 @@ import pytest
 #   * queries the specific data, ensuring that it matches the one stored before
 #
 # The tests are done for all types of remote storage pageserver supports.
-@pytest.mark.parametrize('remote_storatge_kind', available_remote_storages())
-def test_remote_storage_backup_and_restore(
-    neon_env_builder: NeonEnvBuilder,
-    remote_storatge_kind: RemoteStorageKind,
-):
+@pytest.mark.parametrize('storage_type', ['local_fs', 'mock_s3'])
+def test_remote_storage_backup_and_restore(neon_env_builder: NeonEnvBuilder, storage_type: str):
    # Use this test to check more realistic SK ids: some etcd key parsing bugs were related,
    # and this test needs SK to write data to pageserver, so it will be visible
    neon_env_builder.safekeepers_id_start = 12

-    neon_env_builder.enable_remote_storage(
-        remote_storage_kind=remote_storatge_kind,
-        test_name='test_remote_storage_backup_and_restore',
-    )
+    if storage_type == 'local_fs':
+        neon_env_builder.enable_local_fs_remote_storage()
+    elif storage_type == 'mock_s3':
+        neon_env_builder.enable_s3_mock_remote_storage('test_remote_storage_backup_and_restore')
+    else:
+        raise RuntimeError(f'Unknown storage type: {storage_type}')

    data_id = 1
    data_secret = 'very secret secret'
@@ -110,7 +110,7 @@ def test_remote_storage_backup_and_restore(
    client.tenant_attach(UUID(tenant_id))

    log.info("waiting for timeline redownload")
-    wait_until(number_of_iterations=20,
+    wait_until(number_of_iterations=10,
               interval=1,
               func=lambda: assert_timeline_local(client, UUID(tenant_id), UUID(timeline_id)))

--- a/test_runner/batch_others/test_tenant_detach.py
+++ b/test_runner/batch_others/test_tenant_detach.py
@@ -1,19 +1,10 @@
 from threading import Thread
 from uuid import uuid4
-import uuid
 import psycopg2
 import pytest

 from fixtures.log_helper import log
-from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, NeonPageserverApiException
-
-
-def do_gc_target(env: NeonEnv, tenant_id: uuid.UUID, timeline_id: uuid.UUID):
-    """Hack to unblock main, see https://github.com/neondatabase/neon/issues/2211"""
-    try:
-        env.pageserver.safe_psql(f'do_gc {tenant_id.hex} {timeline_id.hex} 0')
-    except Exception as e:
-        log.error("do_gc failed: %s", e)
+from fixtures.neon_fixtures import NeonEnvBuilder, NeonPageserverApiException


 def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder):
@@ -45,7 +36,8 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder):
        env.pageserver.safe_psql(f'do_gc {tenant_id.hex} {uuid4().hex} 0')

    # try to concurrently run gc and detach
-    gc_thread = Thread(target=lambda: do_gc_target(env, tenant_id, timeline_id))
+    gc_thread = Thread(
+        target=lambda: env.pageserver.safe_psql(f'do_gc {tenant_id.hex} {timeline_id.hex} 0'), )
    gc_thread.start()

    last_error = None
--- a/test_runner/batch_others/test_tenant_relocation.py
+++ b/test_runner/batch_others/test_tenant_relocation.py
@@ -229,7 +229,7 @@ def post_migration_check(pg: Postgres, sum_before_migration: int, old_local_path
        # basebackup and importing it into the new pageserver.
        # This kind of migration can tolerate breaking changes
        # to storage format
-        'major',
+        pytest.param('major', marks=pytest.mark.xfail(reason="Not implemented")),
    ])
@pytest.mark.parametrize('with_load', ['with_load', 'without_load'])
 def test_tenant_relocation(neon_env_builder: NeonEnvBuilder,
@@ -345,8 +345,6 @@ def test_tenant_relocation(neon_env_builder: NeonEnvBuilder,
        # Migrate either by attaching from s3 or import/export basebackup
        if method == "major":
            cmd = [
-                "poetry",
-                "run",
                "python",
                os.path.join(base_dir, "scripts/export_import_between_pageservers.py"),
                "--tenant-id",
@@ -363,12 +361,12 @@ def test_tenant_relocation(neon_env_builder: NeonEnvBuilder,
                str(new_pageserver_http_port),
                "--to-pg-port",
                str(new_pageserver_pg_port),
-                "--pg-distrib-dir",
-                pg_distrib_dir,
+                "--psql-path",
+                os.path.join(pg_distrib_dir, "bin", "psql"),
                "--work-dir",
                os.path.join(test_output_dir),
            ]
-            subprocess_capture(test_output_dir, cmd, check=True)
+            subprocess_capture(str(env.repo_dir), cmd, check=True)
        elif method == "minor":
            # call to attach timeline to new pageserver
            new_pageserver_http.tenant_attach(tenant_id)
@@ -429,22 +427,6 @@ def test_tenant_relocation(neon_env_builder: NeonEnvBuilder,
        post_migration_check(pg_main, 500500, old_local_path_main)
        post_migration_check(pg_second, 1001000, old_local_path_second)

-        # ensure that we can successfully read all relations on the new pageserver
-        with pg_cur(pg_second) as cur:
-            cur.execute('''
-                DO $$
-                DECLARE
-                r RECORD;
-                BEGIN
-                FOR r IN
-                SELECT relname FROM pg_class WHERE relkind='r'
-                LOOP
-                    RAISE NOTICE '%', r.relname;
-                    EXECUTE 'SELECT count(*) FROM quote_ident($1)' USING r.relname;
-                END LOOP;
-                END$$;
-                ''')
-
        if with_load == 'with_load':
            assert load_ok_event.wait(3)
            log.info('stopping load thread')
--- a/test_runner/batch_others/test_tenants_with_remote_storage.py
+++ b/test_runner/batch_others/test_tenants_with_remote_storage.py
@@ -13,7 +13,7 @@ from uuid import UUID

 import pytest

-from fixtures.neon_fixtures import NeonEnvBuilder, NeonEnv, Postgres, RemoteStorageKind, available_remote_storages, wait_for_last_record_lsn, wait_for_upload
+from fixtures.neon_fixtures import NeonEnvBuilder, NeonEnv, Postgres, wait_for_last_record_lsn, wait_for_upload
 from fixtures.utils import lsn_from_hex


@@ -38,7 +38,7 @@ async def tenant_workload(env: NeonEnv, pg: Postgres):

 async def all_tenants_workload(env: NeonEnv, tenants_pgs):
    workers = []
-    for _, pg in tenants_pgs:
+    for tenant, pg in tenants_pgs:
        worker = tenant_workload(env, pg)
        workers.append(asyncio.create_task(worker))

@@ -46,18 +46,23 @@ async def all_tenants_workload(env: NeonEnv, tenants_pgs):
    await asyncio.gather(*workers)


-@pytest.mark.parametrize('remote_storatge_kind', available_remote_storages())
-def test_tenants_many(neon_env_builder: NeonEnvBuilder, remote_storatge_kind: RemoteStorageKind):
-    neon_env_builder.enable_remote_storage(
-        remote_storage_kind=remote_storatge_kind,
-        test_name='test_tenants_many',
-    )
+@pytest.mark.parametrize('storage_type', ['local_fs', 'mock_s3'])
+def test_tenants_many(neon_env_builder: NeonEnvBuilder, storage_type: str):
+
+    if storage_type == 'local_fs':
+        neon_env_builder.enable_local_fs_remote_storage()
+    elif storage_type == 'mock_s3':
+        neon_env_builder.enable_s3_mock_remote_storage('test_remote_storage_backup_and_restore')
+    else:
+        raise RuntimeError(f'Unknown storage type: {storage_type}')
+
+    neon_env_builder.enable_local_fs_remote_storage()

    env = neon_env_builder.init_start()

    tenants_pgs: List[Tuple[UUID, Postgres]] = []

-    for _ in range(1, 5):
+    for i in range(1, 5):
        # Use a tiny checkpoint distance, to create a lot of layers quickly
        tenant, _ = env.neon_cli.create_tenant(
            conf={
--- a/test_runner/batch_others/test_wal_acceptor.py
+++ b/test_runner/batch_others/test_wal_acceptor.py
@@ -12,8 +12,9 @@ import uuid

 from contextlib import closing
 from dataclasses import dataclass, field
+from multiprocessing import Process, Value
 from pathlib import Path
-from fixtures.neon_fixtures import NeonPageserver, PgBin, Etcd, Postgres, RemoteStorageKind, RemoteStorageUsers, Safekeeper, NeonEnv, NeonEnvBuilder, PortDistributor, SafekeeperPort, available_remote_storages, neon_binpath, PgProtocol, wait_for_last_record_lsn, wait_for_upload
+from fixtures.neon_fixtures import NeonPageserver, PgBin, Etcd, Postgres, RemoteStorageUsers, Safekeeper, NeonEnv, NeonEnvBuilder, PortDistributor, SafekeeperPort, neon_binpath, PgProtocol, wait_for_last_record_lsn, wait_for_upload
 from fixtures.utils import get_dir_size, lsn_to_hex, lsn_from_hex, query_scalar
 from fixtures.log_helper import log
 from typing import List, Optional, Any
@@ -350,7 +351,7 @@ def wait_segment_offload(tenant_id, timeline_id, live_sk, seg_end):
        if lsn_from_hex(tli_status.backup_lsn) >= lsn_from_hex(seg_end):
            break
        elapsed = time.time() - started_at
-        if elapsed > 30:
+        if elapsed > 20:
            raise RuntimeError(
                f"timed out waiting {elapsed:.0f}s for segment ending at {seg_end} get offloaded")
        time.sleep(0.5)
@@ -376,15 +377,15 @@ def wait_wal_trim(tenant_id, timeline_id, sk, target_size):
        time.sleep(0.5)


-@pytest.mark.parametrize('remote_storatge_kind', available_remote_storages())
-def test_wal_backup(neon_env_builder: NeonEnvBuilder, remote_storatge_kind: RemoteStorageKind):
+@pytest.mark.parametrize('storage_type', ['mock_s3', 'local_fs'])
+def test_wal_backup(neon_env_builder: NeonEnvBuilder, storage_type: str):
    neon_env_builder.num_safekeepers = 3
-
-    neon_env_builder.enable_remote_storage(
-        remote_storage_kind=remote_storatge_kind,
-        test_name='test_safekeepers_wal_backup',
-    )
-
+    if storage_type == 'local_fs':
+        neon_env_builder.enable_local_fs_remote_storage()
+    elif storage_type == 'mock_s3':
+        neon_env_builder.enable_s3_mock_remote_storage('test_safekeepers_wal_backup')
+    else:
+        raise RuntimeError(f'Unknown storage type: {storage_type}')
    neon_env_builder.remote_storage_users = RemoteStorageUsers.SAFEKEEPER

    env = neon_env_builder.init_start()
@@ -424,15 +425,15 @@ def test_wal_backup(neon_env_builder: NeonEnvBuilder, remote_storatge_kind: Remo
    wait_segment_offload(tenant_id, timeline_id, env.safekeepers[1], '0/5000000')


-@pytest.mark.parametrize('remote_storatge_kind', available_remote_storages())
-def test_s3_wal_replay(neon_env_builder: NeonEnvBuilder, remote_storatge_kind: RemoteStorageKind):
+@pytest.mark.parametrize('storage_type', ['mock_s3', 'local_fs'])
+def test_s3_wal_replay(neon_env_builder: NeonEnvBuilder, storage_type: str):
    neon_env_builder.num_safekeepers = 3
-
-    neon_env_builder.enable_remote_storage(
-        remote_storage_kind=remote_storatge_kind,
-        test_name='test_s3_wal_replay',
-    )
-
+    if storage_type == 'local_fs':
+        neon_env_builder.enable_local_fs_remote_storage()
+    elif storage_type == 'mock_s3':
+        neon_env_builder.enable_s3_mock_remote_storage('test_s3_wal_replay')
+    else:
+        raise RuntimeError(f'Unknown storage type: {storage_type}')
    neon_env_builder.remote_storage_users = RemoteStorageUsers.SAFEKEEPER

    env = neon_env_builder.init_start()
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -3,7 +3,6 @@ from __future__ import annotations
 from dataclasses import field
 from contextlib import contextmanager
 from enum import Flag, auto
-import enum
 import textwrap
 from cached_property import cached_property
 import abc
@@ -222,7 +221,7 @@ def can_bind(host: str, port: int) -> bool:
        # moment. If that changes, we should use start using SO_REUSEADDR here
        # too, to allow reusing ports more quickly.
        # See https://github.com/neondatabase/neon/issues/801
-        # sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+        #sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)

        try:
            sock.bind((host, port))
@@ -231,8 +230,6 @@ def can_bind(host: str, port: int) -> bool:
        except socket.error:
            log.info(f"Port {port} is in use, skipping")
            return False
-        finally:
-            sock.close()


 class PortDistributor:
@@ -265,11 +262,6 @@ def default_broker(request: Any, port_distributor: PortDistributor):
    broker.stop()


-@pytest.fixture(scope='session')
-def run_id():
-    yield uuid.uuid4()
-
-
@pytest.fixture(scope='session')
 def mock_s3_server(port_distributor: PortDistributor):
    mock_s3_server = MockS3Server(port_distributor.get_port())
@@ -299,9 +291,7 @@ class PgProtocol:
        # change it by calling "SET statement_timeout" after
        # connecting.
        options = result.get('options', '')
-        if "statement_timeout" not in options:
-            options = f'-cstatement_timeout=120s {options}'
-        result['options'] = options
+        result['options'] = f'-cstatement_timeout=120s {options}'
        return result

    # autocommit=True here by default because that's what we need most of the time
@@ -448,46 +438,26 @@ class MockS3Server:
    def secret_key(self) -> str:
        return 'test'

+    def access_env_vars(self) -> Dict[Any, Any]:
+        return {
+            'AWS_ACCESS_KEY_ID': self.access_key(),
+            'AWS_SECRET_ACCESS_KEY': self.secret_key(),
+        }
+
    def kill(self):
        self.subprocess.kill()


-@enum.unique
-class RemoteStorageKind(enum.Enum):
-    LOCAL_FS = "local_fs"
-    MOCK_S3 = "mock_s3"
-    REAL_S3 = "real_s3"
-
-
-def available_remote_storages() -> List[RemoteStorageKind]:
-    remote_storages = [RemoteStorageKind.LOCAL_FS, RemoteStorageKind.MOCK_S3]
-    if os.getenv("ENABLE_REAL_S3_REMOTE_STORAGE") is not None:
-        remote_storages.append(RemoteStorageKind.REAL_S3)
-        log.info("Enabling real s3 storage for tests")
-    else:
-        log.info("Using mock implementations to test remote storage")
-    return remote_storages
-
-
@dataclass
 class LocalFsStorage:
-    root: Path
+    local_path: Path


@dataclass
 class S3Storage:
    bucket_name: str
    bucket_region: str
-    access_key: str
-    secret_key: str
-    endpoint: Optional[str] = None
-    prefix_in_bucket: Optional[str] = None
-
-    def access_env_vars(self) -> Dict[str, str]:
-        return {
-            'AWS_ACCESS_KEY_ID': self.access_key,
-            'AWS_SECRET_ACCESS_KEY': self.secret_key,
-        }
+    endpoint: Optional[str]


 RemoteStorage = Union[LocalFsStorage, S3Storage]
@@ -496,20 +466,16 @@ RemoteStorage = Union[LocalFsStorage, S3Storage]
 # serialize as toml inline table
 def remote_storage_to_toml_inline_table(remote_storage):
    if isinstance(remote_storage, LocalFsStorage):
-        remote_storage_config = f"local_path='{remote_storage.root}'"
+        res = f"local_path='{remote_storage.local_path}'"
    elif isinstance(remote_storage, S3Storage):
-        remote_storage_config = f"bucket_name='{remote_storage.bucket_name}',\
-            bucket_region='{remote_storage.bucket_region}'"
-
-        if remote_storage.prefix_in_bucket is not None:
-            remote_storage_config += f",prefix_in_bucket='{remote_storage.prefix_in_bucket}'"
-
+        res = f"bucket_name='{remote_storage.bucket_name}', bucket_region='{remote_storage.bucket_region}'"
        if remote_storage.endpoint is not None:
-            remote_storage_config += f",endpoint='{remote_storage.endpoint}'"
+            res += f", endpoint='{remote_storage.endpoint}'"
+        else:
+            raise Exception(f'Unknown storage configuration {remote_storage}')
    else:
        raise Exception("invalid remote storage type")
-
-    return f"{{{remote_storage_config}}}"
+    return f"{{{res}}}"


 class RemoteStorageUsers(Flag):
@@ -527,31 +493,28 @@ class NeonEnvBuilder:
    cleaned up after the test has finished.
    """
    def __init__(
-        self,
-        repo_dir: Path,
-        port_distributor: PortDistributor,
-        broker: Etcd,
-        run_id: uuid.UUID,
-        mock_s3_server: MockS3Server,
-        remote_storage: Optional[RemoteStorage] = None,
-        remote_storage_users: RemoteStorageUsers = RemoteStorageUsers.PAGESERVER,
-        pageserver_config_override: Optional[str] = None,
-        num_safekeepers: int = 1,
-        # Use non-standard SK ids to check for various parsing bugs
-        safekeepers_id_start: int = 0,
-        # fsync is disabled by default to make the tests go faster
-        safekeepers_enable_fsync: bool = False,
-        auth_enabled: bool = False,
-        rust_log_override: Optional[str] = None,
-        default_branch_name=DEFAULT_BRANCH_NAME,
-    ):
+            self,
+            repo_dir: Path,
+            port_distributor: PortDistributor,
+            broker: Etcd,
+            mock_s3_server: MockS3Server,
+            remote_storage: Optional[RemoteStorage] = None,
+            remote_storage_users: RemoteStorageUsers = RemoteStorageUsers.PAGESERVER,
+            pageserver_config_override: Optional[str] = None,
+            num_safekeepers: int = 1,
+            # Use non-standard SK ids to check for various parsing bugs
+            safekeepers_id_start: int = 0,
+            # fsync is disabled by default to make the tests go faster
+            safekeepers_enable_fsync: bool = False,
+            auth_enabled: bool = False,
+            rust_log_override: Optional[str] = None,
+            default_branch_name=DEFAULT_BRANCH_NAME):
        self.repo_dir = repo_dir
        self.rust_log_override = rust_log_override
        self.port_distributor = port_distributor
        self.remote_storage = remote_storage
        self.remote_storage_users = remote_storage_users
        self.broker = broker
-        self.run_id = run_id
        self.mock_s3_server = mock_s3_server
        self.pageserver_config_override = pageserver_config_override
        self.num_safekeepers = num_safekeepers
@@ -560,8 +523,6 @@ class NeonEnvBuilder:
        self.auth_enabled = auth_enabled
        self.default_branch_name = default_branch_name
        self.env: Optional[NeonEnv] = None
-        self.remote_storage_prefix: Optional[str] = None
-        self.keep_remote_storage_contents: bool = True

    def init(self) -> NeonEnv:
        # Cannot create more than one environment from one builder
@@ -577,143 +538,41 @@ class NeonEnvBuilder:
        self.start()
        return env

-    def enable_remote_storage(
-        self,
-        remote_storage_kind: RemoteStorageKind,
-        test_name: str,
-        force_enable: bool = True,
-    ):
-        if remote_storage_kind == RemoteStorageKind.LOCAL_FS:
-            self.enable_local_fs_remote_storage(force_enable=force_enable)
-        elif remote_storage_kind == RemoteStorageKind.MOCK_S3:
-            self.enable_mock_s3_remote_storage(bucket_name=test_name, force_enable=force_enable)
-        elif remote_storage_kind == RemoteStorageKind.REAL_S3:
-            self.enable_real_s3_remote_storage(test_name=test_name, force_enable=force_enable)
-        else:
-            raise RuntimeError(f'Unknown storage type: {remote_storage_kind}')
+    """
+    Sets up the pageserver to use the local fs at the `test_dir/local_fs_remote_storage` path.
+    Errors, if the pageserver has some remote storage configuration already, unless `force_enable` is not set to `True`.
+    """

    def enable_local_fs_remote_storage(self, force_enable=True):
-        """
-        Sets up the pageserver to use the local fs at the `test_dir/local_fs_remote_storage` path.
-        Errors, if the pageserver has some remote storage configuration already, unless `force_enable` is not set to `True`.
-        """
        assert force_enable or self.remote_storage is None, "remote storage is enabled already"
        self.remote_storage = LocalFsStorage(Path(self.repo_dir / 'local_fs_remote_storage'))

-    def enable_mock_s3_remote_storage(self, bucket_name: str, force_enable=True):
-        """
-        Sets up the pageserver to use the S3 mock server, creates the bucket, if it's not present already.
-        Starts up the mock server, if that does not run yet.
-        Errors, if the pageserver has some remote storage configuration already, unless `force_enable` is not set to `True`.
-        """
+    """
+    Sets up the pageserver to use the S3 mock server, creates the bucket, if it's not present already.
+    Starts up the mock server, if that does not run yet.
+    Errors, if the pageserver has some remote storage configuration already, unless `force_enable` is not set to `True`.
+    """
+
+    def enable_s3_mock_remote_storage(self, bucket_name: str, force_enable=True):
        assert force_enable or self.remote_storage is None, "remote storage is enabled already"
        mock_endpoint = self.mock_s3_server.endpoint()
        mock_region = self.mock_s3_server.region()
-
-        self.remote_storage_client = boto3.client(
+        boto3.client(
            's3',
            endpoint_url=mock_endpoint,
            region_name=mock_region,
            aws_access_key_id=self.mock_s3_server.access_key(),
            aws_secret_access_key=self.mock_s3_server.secret_key(),
-        )
-        self.remote_storage_client.create_bucket(Bucket=bucket_name)
-
-        self.remote_storage = S3Storage(
-            bucket_name=bucket_name,
-            endpoint=mock_endpoint,
-            bucket_region=mock_region,
-            access_key=self.mock_s3_server.access_key(),
-            secret_key=self.mock_s3_server.secret_key(),
-        )
-
-    def enable_real_s3_remote_storage(self, test_name: str, force_enable=True):
-        """
-        Sets up configuration to use real s3 endpoint without mock server
-        """
-        assert force_enable or self.remote_storage is None, "remote storage is enabled already"
-
-        access_key = os.getenv("AWS_ACCESS_KEY_ID")
-        assert access_key, "no aws access key provided"
-        secret_key = os.getenv("AWS_SECRET_ACCESS_KEY")
-        assert secret_key, "no aws access key provided"
-
-        # session token is needed for local runs with sso auth
-        session_token = os.getenv("AWS_SESSION_TOKEN")
-
-        bucket_name = os.getenv("REMOTE_STORAGE_S3_BUCKET")
-        assert bucket_name, "no remote storage bucket name provided"
-        region = os.getenv("REMOTE_STORAGE_S3_REGION")
-        assert region, "no remote storage region provided"
-
-        # do not leave data in real s3
-        self.keep_remote_storage_contents = False
-
-        # construct a prefix inside bucket for the particular test case and test run
-        self.remote_storage_prefix = f'{self.run_id}/{test_name}'
-
-        self.remote_storage_client = boto3.client(
-            's3',
-            region_name=region,
-            aws_access_key_id=access_key,
-            aws_secret_access_key=secret_key,
-            aws_session_token=session_token,
-        )
+        ).create_bucket(Bucket=bucket_name)
        self.remote_storage = S3Storage(bucket_name=bucket_name,
-                                        bucket_region=region,
-                                        access_key=access_key,
-                                        secret_key=secret_key,
-                                        prefix_in_bucket=self.remote_storage_prefix)
-
-    def cleanup_remote_storage(self):
-        # here wee check for true remote storage, no the local one
-        # local cleanup is not needed after test because in ci all env will be destroyed anyway
-        if self.remote_storage_prefix is None:
-            log.info("no remote storage was set up, skipping cleanup")
-            return
-
-        if self.keep_remote_storage_contents:
-            log.info("keep_remote_storage_contents skipping remote storage cleanup")
-            return
-
-        log.info("removing data from test s3 bucket %s by prefix %s",
-                 self.remote_storage.bucket_name,
-                 self.remote_storage_prefix)
-        paginator = self.remote_storage_client.get_paginator('list_objects_v2')
-        pages = paginator.paginate(
-            Bucket=self.remote_storage.bucket_name,
-            Prefix=self.remote_storage_prefix,
-        )
-
-        objects_to_delete = {'Objects': []}
-        cnt = 0
-        for item in pages.search('Contents'):
-            # weirdly when nothing is found it returns [None]
-            if item is None:
-                break
-
-            objects_to_delete['Objects'].append({'Key': item['Key']})
-
-            # flush once aws limit reached
-            if len(objects_to_delete['Objects']) >= 1000:
-                self.remote_storage_client.delete_objects(
-                    Bucket=self.remote_storage.bucket_name,
-                    Delete=objects_to_delete,
-                )
-                objects_to_delete = dict(Objects=[])
-                cnt += 1
-
-        # flush rest
-        if len(objects_to_delete['Objects']):
-            self.remote_storage_client.delete_objects(Bucket=self.remote_storage.bucket_name,
-                                                      Delete=objects_to_delete)
-
-        log.info("deleted %s objects from remote storage", cnt)
+                                        endpoint=mock_endpoint,
+                                        bucket_region=mock_region)

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_value, traceback):
+
        # Stop all the nodes.
        if self.env:
            log.info('Cleaning up all storage and compute nodes')
@@ -722,8 +581,6 @@ class NeonEnvBuilder:
                sk.stop(immediate=True)
            self.env.pageserver.stop(immediate=True)

-            self.cleanup_remote_storage()
-

 class NeonEnv:
    """
@@ -856,13 +713,10 @@ class NeonEnv:


@pytest.fixture(scope=shareable_scope)
-def _shared_simple_env(
-    request: Any,
-    port_distributor: PortDistributor,
-    mock_s3_server: MockS3Server,
-    default_broker: Etcd,
-    run_id: uuid.UUID,
-) -> Iterator[NeonEnv]:
+def _shared_simple_env(request: Any,
+                       port_distributor: PortDistributor,
+                       mock_s3_server: MockS3Server,
+                       default_broker: Etcd) -> Iterator[NeonEnv]:
    """
   # Internal fixture backing the `neon_simple_env` fixture. If TEST_SHARED_FIXTURES
    is set, this is shared by all tests using `neon_simple_env`.
@@ -876,13 +730,8 @@ def _shared_simple_env(
        repo_dir = os.path.join(str(top_output_dir), "shared_repo")
        shutil.rmtree(repo_dir, ignore_errors=True)

-    with NeonEnvBuilder(
-            repo_dir=Path(repo_dir),
-            port_distributor=port_distributor,
-            broker=default_broker,
-            mock_s3_server=mock_s3_server,
-            run_id=run_id,
-    ) as builder:
+    with NeonEnvBuilder(Path(repo_dir), port_distributor, default_broker,
+                        mock_s3_server) as builder:
        env = builder.init_start()

        # For convenience in tests, create a branch from the freshly-initialized cluster.
@@ -907,13 +756,10 @@ def neon_simple_env(_shared_simple_env: NeonEnv) -> Iterator[NeonEnv]:


@pytest.fixture(scope='function')
-def neon_env_builder(
-    test_output_dir,
-    port_distributor: PortDistributor,
-    mock_s3_server: MockS3Server,
-    default_broker: Etcd,
-    run_id: uuid.UUID,
-) -> Iterator[NeonEnvBuilder]:
+def neon_env_builder(test_output_dir,
+                     port_distributor: PortDistributor,
+                     mock_s3_server: MockS3Server,
+                     default_broker: Etcd) -> Iterator[NeonEnvBuilder]:
    """
    Fixture to create a Neon environment for test.

@@ -931,13 +777,8 @@ def neon_env_builder(
    repo_dir = os.path.join(test_output_dir, "repo")

    # Return the builder to the caller
-    with NeonEnvBuilder(
-            repo_dir=Path(repo_dir),
-            port_distributor=port_distributor,
-            mock_s3_server=mock_s3_server,
-            broker=default_broker,
-            run_id=run_id,
-    ) as builder:
+    with NeonEnvBuilder(Path(repo_dir), port_distributor, default_broker,
+                        mock_s3_server) as builder:
        yield builder


@@ -1342,10 +1183,7 @@ class NeonCli(AbstractNeonCli):
            remote_storage_users=self.env.remote_storage_users,
            pageserver_config_override=self.env.pageserver.config_override)

-        s3_env_vars = None
-        if self.env.remote_storage is not None and isinstance(self.env.remote_storage, S3Storage):
-            s3_env_vars = self.env.remote_storage.access_env_vars()
-
+        s3_env_vars = self.env.s3_mock_server.access_env_vars() if self.env.s3_mock_server else None
        return self.raw_cli(start_args, extra_env_vars=s3_env_vars)

    def pageserver_stop(self, immediate=False) -> 'subprocess.CompletedProcess[str]':
@@ -1357,10 +1195,7 @@ class NeonCli(AbstractNeonCli):
        return self.raw_cli(cmd)

    def safekeeper_start(self, id: int) -> 'subprocess.CompletedProcess[str]':
-        s3_env_vars = None
-        if self.env.remote_storage is not None and isinstance(self.env.remote_storage, S3Storage):
-            s3_env_vars = self.env.remote_storage.access_env_vars()
-
+        s3_env_vars = self.env.s3_mock_server.access_env_vars() if self.env.s3_mock_server else None
        return self.raw_cli(['safekeeper', 'start', str(id)], extra_env_vars=s3_env_vars)

    def safekeeper_stop(self,
@@ -1502,7 +1337,7 @@ class NeonPageserver(PgProtocol):
        return self

    def __exit__(self, exc_type, exc, tb):
-        self.stop(immediate=True)
+        self.stop(True)

    def http_client(self, auth_token: Optional[str] = None) -> NeonPageserverHttpClient:
        return NeonPageserverHttpClient(
@@ -1519,7 +1354,6 @@ def append_pageserver_param_overrides(
 ):
    if bool(remote_storage_users & RemoteStorageUsers.PAGESERVER) and remote_storage is not None:
        remote_storage_toml_table = remote_storage_to_toml_inline_table(remote_storage)
-
        params_to_update.append(
            f'--pageserver-config-override=remote_storage={remote_storage_toml_table}')

@@ -2026,8 +1860,8 @@ class Safekeeper:
        started_at = time.time()
        while True:
            try:
-                with self.http_client() as http_cli:
-                    http_cli.check_status()
+                http_cli = self.http_client()
+                http_cli.check_status()
            except Exception as e:
                elapsed = time.time() - started_at
                if elapsed > 3:
@@ -2178,9 +2012,9 @@ class Etcd:
        return f'http://127.0.0.1:{self.port}'

    def check_status(self):
-        with requests.Session() as s:
-            s.mount('http://', requests.adapters.HTTPAdapter(max_retries=1))  # do not retry
-            s.get(f"{self.client_url()}/health").raise_for_status()
+        s = requests.Session()
+        s.mount('http://', requests.adapters.HTTPAdapter(max_retries=1))  # do not retry
+        s.get(f"{self.client_url()}/health").raise_for_status()

    def try_start(self):
        if self.handle is not None:
--- a/test_runner/performance/test_wal_backpressure.py
+++ b/test_runner/performance/test_wal_backpressure.py
@@ -146,7 +146,7 @@ def test_pgbench_simple_update_workload(pg_compare: PgCompare, scale: int, durat
    record_thread.join()


-def start_pgbench_intensive_initialization(env: PgCompare, scale: int, done_event: threading.Event):
+def start_pgbench_intensive_initialization(env: PgCompare, scale: int):
    with env.record_duration("run_duration"):
        # Needs to increase the statement timeout (default: 120s) because the
        # initialization step can be slow with a large scale.
@@ -155,11 +155,9 @@ def start_pgbench_intensive_initialization(env: PgCompare, scale: int, done_even
            f'-s{scale}',
            '-i',
            '-Idtg',
-            env.pg.connstr(options='-cstatement_timeout=600s')
+            env.pg.connstr(options='-cstatement_timeout=300s')
        ])

-    done_event.set()
-

@pytest.mark.timeout(1000)
@pytest.mark.parametrize("scale", get_scales_matrix(1000))
@@ -168,17 +166,15 @@ def test_pgbench_intensive_init_workload(pg_compare: PgCompare, scale: int):
    with env.pg.connect().cursor() as cur:
        cur.execute("CREATE TABLE foo as select generate_series(1,100000)")

-    workload_done_event = threading.Event()
-
    workload_thread = threading.Thread(target=start_pgbench_intensive_initialization,
-                                       args=(env, scale, workload_done_event))
+                                       args=(env, scale))
    workload_thread.start()

    record_thread = threading.Thread(target=record_lsn_write_lag,
-                                     args=(env, lambda: not workload_done_event.is_set()))
+                                     args=(env, lambda: workload_thread.is_alive()))
    record_thread.start()

-    record_read_latency(env, lambda: not workload_done_event.is_set(), "SELECT count(*) from foo")
+    record_read_latency(env, lambda: workload_thread.is_alive(), "SELECT count(*) from foo")
    workload_thread.join()
    record_thread.join()
Author	SHA1	Message	Date
Bojan Serafimov	688f68ecba	Undo whitespace	2022-08-04 09:43:27 +02:00
Bojan Serafimov	fb2ffac8b9	Ignore metrics static	2022-08-04 09:42:27 +02:00
Bojan Serafimov	8173e36a1b	Find all problematic statics	2022-08-04 09:30:22 +02:00