Undo whitespace

Ignore metrics static
Find all problematic statics
2026-01-21 12:22:56 +00:00 · 2022-08-04 09:43:27 +02:00 · 2022-08-04 09:42:27 +02:00 · 2022-08-04 09:30:22 +02:00
89 changed files with 833 additions and 2223 deletions
--- a/.github/actions/run-python-test-set/action.yml
+++ b/.github/actions/run-python-test-set/action.yml
@@ -27,26 +27,6 @@ inputs:
    description: 'Whether to upload the performance report'
    required: false
    default: 'false'
-  run_with_real_s3:
-    description: 'Whether to pass real s3 credentials to the test suite'
-    required: false
-    default: 'false'
-  real_s3_bucket:
-    description: 'Bucket name for real s3 tests'
-    required: false
-    default: ''
-  real_s3_region:
-    description: 'Region name for real s3 tests'
-    required: false
-    default: ''
-  real_s3_access_key_id:
-    description: 'Access key id'
-    required: false
-    default: ''
-  real_s3_secret_access_key:
-    description: 'Secret access key'
-    required: false
-    default: ''

 runs:
  using: "composite"
@@ -83,9 +63,6 @@ runs:
        # this variable will be embedded in perf test report
        # and is needed to distinguish different environments
        PLATFORM: github-actions-selfhosted
-        BUILD_TYPE: ${{ inputs.build_type }}
-        AWS_ACCESS_KEY_ID: ${{ inputs.real_s3_access_key_id }}
-        AWS_SECRET_ACCESS_KEY: ${{ inputs.real_s3_secret_access_key }}
      shell: bash -euxo pipefail {0}
      run: |
        PERF_REPORT_DIR="$(realpath test_runner/perf-report-local)"
@@ -100,14 +77,6 @@ runs:
        if [[ "${{ inputs.run_in_parallel }}" == "true" ]]; then
          EXTRA_PARAMS="-n4 $EXTRA_PARAMS"
        fi
-
-        if [[ "${{ inputs.run_with_real_s3 }}" == "true" ]]; then
-          echo "REAL S3 ENABLED"
-          export ENABLE_REAL_S3_REMOTE_STORAGE=nonempty
-          export REMOTE_STORAGE_S3_BUCKET=${{ inputs.real_s3_bucket }}
-          export REMOTE_STORAGE_S3_REGION=${{ inputs.real_s3_region }}
-        fi
-
        if [[ "${{ inputs.save_perf_report }}" == "true" ]]; then
          if [[ "$GITHUB_REF" == "refs/heads/main" ]]; then
            mkdir -p "$PERF_REPORT_DIR"
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -35,16 +35,6 @@ jobs:
      GIT_VERSION: ${{ github.sha }}

    steps:
-      - name: Fix git ownerwhip
-        run: |
-          # Workaround for `fatal: detected dubious ownership in repository at ...`
-          #
-          # Use both ${{ github.workspace }} and ${GITHUB_WORKSPACE} because they're different on host and in containers
-          #   Ref https://github.com/actions/checkout/issues/785
-          #
-          git config --global --add safe.directory ${{ github.workspace }}
-          git config --global --add safe.directory ${GITHUB_WORKSPACE}
-
      - name: Checkout
        uses: actions/checkout@v3
        with:
@@ -219,11 +209,7 @@ jobs:
          build_type: ${{ matrix.build_type }}
          rust_toolchain: ${{ matrix.rust_toolchain }}
          test_selection: batch_others
-          run_with_real_s3: true
-          real_s3_bucket: ci-tests-s3
-          real_s3_region: us-west-2
-          real_s3_access_key_id: "${{ secrets.AWS_ACCESS_KEY_ID_CI_TESTS_S3 }}"
-          real_s3_secret_access_key: "${{ secrets.AWS_SECRET_ACCESS_KEY_CI_TESTS_S3 }}"
+
      - name: Merge and upload coverage data
        if: matrix.build_type == 'debug'
        uses: ./.github/actions/save-coverage-data
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -11,15 +11,17 @@ than it was before.

 ## Submitting changes

-1. Get at least one +1 on your PR before you push.
+1. Make a PR for every change.
+
+   Even seemingly trivial patches can break things in surprising ways.
+Use of common sense is OK. If you're only fixing a typo in a comment,
+it's probably fine to just push it. But if in doubt, open a PR.
+
+2. Get at least one +1 on your PR before you push.

   For simple patches, it will only take a minute for someone to review
 it.

-2. Don't force push small changes after making the PR ready for review.
-Doing so will force readers to re-read your entire PR, which will delay
-the review process.
-
 3. Always keep the CI green.

   Do not push, if the CI failed on your PR. Even if you think it's not
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -154,9 +154,9 @@ checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"

 [[package]]
 name = "axum"
-version = "0.5.13"
+version = "0.5.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6b9496f0c1d1afb7a2af4338bbe1d969cddfead41d87a9fb3aaa6d0bbc7af648"
+checksum = "d16705af05732b7d3258ec0f7b73c03a658a28925e050d8852d5b568ee8bcf4e"
 dependencies = [
 "async-trait",
 "axum-core",
@@ -317,6 +317,15 @@ dependencies = [
 "serde",
 ]

+[[package]]
+name = "cast"
+version = "0.2.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4c24dab4283a142afa2fdca129b80ad2c6284e073930f964c3a1293c225ee39a"
+dependencies = [
+ "rustc_version",
+]
+
 [[package]]
 name = "cast"
 version = "0.3.0"
@@ -495,8 +504,8 @@ name = "control_plane"
 version = "0.1.0"
 dependencies = [
 "anyhow",
+ "lazy_static",
 "nix",
- "once_cell",
 "pageserver",
 "postgres",
 "regex",
@@ -570,7 +579,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b01d6de93b2b6c65e17c634a26653a29d107b3c98c607c765bf38d041531cd8f"
 dependencies = [
 "atty",
- "cast",
+ "cast 0.3.0",
 "clap 2.34.0",
 "criterion-plot",
 "csv",
@@ -591,11 +600,11 @@ dependencies = [

 [[package]]
 name = "criterion-plot"
-version = "0.4.5"
+version = "0.4.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2673cc8207403546f45f5fd319a974b1e6983ad1a3ee7e6041650013be041876"
+checksum = "d00996de9f2f7559f7f4dc286073197f83e92256a59ed395f9aac01fe717da57"
 dependencies = [
- "cast",
+ "cast 0.2.7",
 "itertools",
 ]

@@ -671,9 +680,9 @@ dependencies = [

 [[package]]
 name = "crypto-common"
-version = "0.1.6"
+version = "0.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
+checksum = "2ccfd8c0ee4cce11e45b3fd6f9d5e69e0cc62912aa6a0cb1bf4617b0eba5a12f"
 dependencies = [
 "generic-array",
 "typenum",
@@ -1107,9 +1116,9 @@ dependencies = [

 [[package]]
 name = "gimli"
-version = "0.26.2"
+version = "0.26.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "22030e2c5a68ec659fde1e949a745124b48e6fa8b045b7ed5bd1fe4ccc5c4e5d"
+checksum = "78cc372d058dcf6d5ecd98510e7fbc9e5aec4d21de70f65fea8fecebcd881bd4"

 [[package]]
 name = "git-version"
@@ -1175,9 +1184,9 @@ dependencies = [

 [[package]]
 name = "hashbrown"
-version = "0.12.3"
+version = "0.12.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
+checksum = "607c8a29735385251a339424dd462993c0fed8fa09d378f259377df08c126022"

 [[package]]
 name = "heck"
@@ -1379,7 +1388,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "10a35a97730320ffe8e2d410b5d3b69279b98d2c14bdb8b70ea89ecf7888d41e"
 dependencies = [
 "autocfg",
- "hashbrown 0.12.3",
+ "hashbrown 0.12.2",
 ]

 [[package]]
@@ -1591,8 +1600,8 @@ dependencies = [
 name = "metrics"
 version = "0.1.0"
 dependencies = [
+ "lazy_static",
 "libc",
- "once_cell",
 "prometheus",
 "workspace_hack",
 ]
@@ -1842,9 +1851,9 @@ dependencies = [

 [[package]]
 name = "os_str_bytes"
-version = "6.2.0"
+version = "6.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "648001efe5d5c0102d8cea768e348da85d90af8ba91f0bea908f157951493cd4"
+checksum = "21326818e99cfe6ce1e524c2a805c189a99b5ae555a35d19f9a284b427d86afa"

 [[package]]
 name = "pageserver"
@@ -1870,6 +1879,7 @@ dependencies = [
 "humantime-serde",
 "hyper",
 "itertools",
+ "lazy_static",
 "metrics",
 "nix",
 "once_cell",
@@ -2115,9 +2125,9 @@ dependencies = [
 "crc32c",
 "env_logger",
 "hex",
+ "lazy_static",
 "log",
 "memoffset",
- "once_cell",
 "postgres",
 "rand",
 "regex",
@@ -2277,9 +2287,9 @@ dependencies = [
 "hex",
 "hmac 0.12.1",
 "hyper",
+ "lazy_static",
 "md5",
 "metrics",
- "once_cell",
 "parking_lot 0.12.1",
 "pin-project-lite",
 "rand",
@@ -2725,9 +2735,9 @@ dependencies = [

 [[package]]
 name = "rustversion"
-version = "1.0.8"
+version = "1.0.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "24c8ad4f0c00e1eb5bc7614d236a7f1300e3dbd76b68cac8e06fb00b015ad8d8"
+checksum = "a0a5f7c728f5d284929a1cccb5bc19884422bfe6ef4d6c409da2c41838983fcf"

 [[package]]
 name = "ryu"
@@ -2753,6 +2763,7 @@ dependencies = [
 "hex",
 "humantime",
 "hyper",
+ "lazy_static",
 "metrics",
 "once_cell",
 "postgres",
@@ -3606,9 +3617,9 @@ checksum = "099b7128301d285f79ddd55b9a83d5e6b9e97c92e0ea0daebee7263e932de992"

 [[package]]
 name = "unicode-ident"
-version = "1.0.2"
+version = "1.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "15c61ba63f9235225a22310255a29b806b907c9b8c964bcbd0a2c70f3f2deea7"
+checksum = "5bd2fe26506023ed7b5e1e315add59d6f584c621d037f9368fea9cfb988f368c"

 [[package]]
 name = "unicode-normalization"
@@ -3669,9 +3680,9 @@ dependencies = [
 "hex-literal",
 "hyper",
 "jsonwebtoken",
+ "lazy_static",
 "metrics",
 "nix",
- "once_cell",
 "pin-project-lite",
 "postgres",
 "postgres-protocol",
--- a/control_plane/Cargo.toml
+++ b/control_plane/Cargo.toml
@@ -9,7 +9,7 @@ postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8
 serde = { version = "1.0", features = ["derive"] }
 serde_with = "1.12.0"
 toml = "0.5"
-once_cell = "1.13.0"
+lazy_static = "1.4"
 regex = "1"
 anyhow = "1.0"
 thiserror = "1"
--- a/control_plane/src/etcd.rs
+++ b/control_plane/src/etcd.rs
@@ -30,14 +30,14 @@ pub fn start_etcd_process(env: &local_env::LocalEnv) -> anyhow::Result<()> {
    let etcd_stdout_file =
        fs::File::create(etcd_data_dir.join("etcd.stdout.log")).with_context(|| {
            format!(
-                "Failed to create etcd stout file in directory {}",
+                "Failed to create ectd stout file in directory {}",
                etcd_data_dir.display()
            )
        })?;
    let etcd_stderr_file =
        fs::File::create(etcd_data_dir.join("etcd.stderr.log")).with_context(|| {
            format!(
-                "Failed to create etcd stderr file in directory {}",
+                "Failed to create ectd stderr file in directory {}",
                etcd_data_dir.display()
            )
        })?;
--- a/control_plane/src/lib.rs
+++ b/control_plane/src/lib.rs
@@ -51,11 +51,7 @@ fn fill_rust_env_vars(cmd: &mut Command) -> &mut Command {
 }

 fn fill_aws_secrets_vars(mut cmd: &mut Command) -> &mut Command {
-    for env_key in [
-        "AWS_ACCESS_KEY_ID",
-        "AWS_SECRET_ACCESS_KEY",
-        "AWS_SESSION_TOKEN",
-    ] {
+    for env_key in ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"] {
        if let Ok(value) = std::env::var(env_key) {
            cmd = cmd.env(env_key, value);
        }
--- a/control_plane/src/postgresql_conf.rs
+++ b/control_plane/src/postgresql_conf.rs
@@ -5,7 +5,7 @@
 /// enough to extract a few settings we need in Zenith, assuming you don't do
 /// funny stuff like include-directives or funny escaping.
 use anyhow::{bail, Context, Result};
-use once_cell::sync::Lazy;
+use lazy_static::lazy_static;
 use regex::Regex;
 use std::collections::HashMap;
 use std::fmt;
@@ -19,7 +19,9 @@ pub struct PostgresConf {
    hash: HashMap<String, String>,
 }

-static CONF_LINE_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"^((?:\w|\.)+)\s*=\s*(\S+)$").unwrap());
+lazy_static! {
+    static ref CONF_LINE_RE: Regex = Regex::new(r"^((?:\w|\.)+)\s*=\s*(\S+)$").unwrap();
+}

 impl PostgresConf {
    pub fn new() -> PostgresConf {
@@ -137,10 +139,10 @@ fn escape_str(s: &str) -> String {
    //
    // This regex is a bit more conservative than the rules in guc-file.l, so we quote some
    // strings that PostgreSQL would accept without quoting, but that's OK.
-
-    static UNQUOTED_RE: Lazy<Regex> =
-        Lazy::new(|| Regex::new(r"(^[-+]?[0-9]+[a-zA-Z]*$)|(^[a-zA-Z][a-zA-Z0-9]*$)").unwrap());
-
+    lazy_static! {
+        static ref UNQUOTED_RE: Regex =
+            Regex::new(r"(^[-+]?[0-9]+[a-zA-Z]*$)|(^[a-zA-Z][a-zA-Z0-9]*$)").unwrap();
+    }
    if UNQUOTED_RE.is_match(s) {
        s.to_string()
    } else {
--- a/control_plane/src/safekeeper.rs
+++ b/control_plane/src/safekeeper.rs
@@ -247,7 +247,7 @@ impl SafekeeperNode {
        // Shutting down may take a long time,
        // if safekeeper flushes a lot of data
        let mut tcp_stopped = false;
-        for i in 0..600 {
+        for _ in 0..100 {
            if !tcp_stopped {
                if let Err(err) = TcpStream::connect(&address) {
                    tcp_stopped = true;
@@ -272,11 +272,9 @@ impl SafekeeperNode {
                    }
                }
            }
-            if i % 10 == 0 {
-                print!(".");
-                io::stdout().flush().unwrap();
-            }
-            thread::sleep(Duration::from_millis(100));
+            print!(".");
+            io::stdout().flush().unwrap();
+            thread::sleep(Duration::from_secs(1));
        }

        bail!("Failed to stop safekeeper with pid {}", pid);
--- a/control_plane/src/storage.rs
+++ b/control_plane/src/storage.rs
@@ -318,7 +318,7 @@ impl PageServerNode {
        // Shutting down may take a long time,
        // if pageserver checkpoints a lot of data
        let mut tcp_stopped = false;
-        for i in 0..600 {
+        for _ in 0..100 {
            if !tcp_stopped {
                if let Err(err) = TcpStream::connect(&address) {
                    tcp_stopped = true;
@@ -344,11 +344,9 @@ impl PageServerNode {
                    }
                }
            }
-            if i % 10 == 0 {
-                print!(".");
-                io::stdout().flush().unwrap();
-            }
-            thread::sleep(Duration::from_millis(100));
+            print!(".");
+            io::stdout().flush().unwrap();
+            thread::sleep(Duration::from_secs(1));
        }

        bail!("Failed to stop pageserver with pid {}", pid);
@@ -401,7 +399,6 @@ impl PageServerNode {
                    .get("checkpoint_distance")
                    .map(|x| x.parse::<u64>())
                    .transpose()?,
-                checkpoint_timeout: settings.get("checkpoint_timeout").map(|x| x.to_string()),
                compaction_target_size: settings
                    .get("compaction_target_size")
                    .map(|x| x.parse::<u64>())
@@ -456,7 +453,6 @@ impl PageServerNode {
                    .map(|x| x.parse::<u64>())
                    .transpose()
                    .context("Failed to parse 'checkpoint_distance' as an integer")?,
-                checkpoint_timeout: settings.get("checkpoint_timeout").map(|x| x.to_string()),
                compaction_target_size: settings
                    .get("compaction_target_size")
                    .map(|x| x.parse::<u64>())
--- a/docker-entrypoint.sh
+++ b/docker-entrypoint.sh
@@ -1,8 +1,6 @@
 #!/bin/sh
 set -eux

-pageserver_id_param="${NODE_ID:-10}"
-
 broker_endpoints_param="${BROKER_ENDPOINT:-absent}"
 if [ "$broker_endpoints_param" != "absent" ]; then
    broker_endpoints_param="-c broker_endpoints=['$broker_endpoints_param']"
@@ -10,12 +8,10 @@ else
    broker_endpoints_param=''
 fi

-remote_storage_param="${REMOTE_STORAGE:-}"
-
 if [ "$1" = 'pageserver' ]; then
    if [ ! -d "/data/tenants" ]; then
        echo "Initializing pageserver data directory"
-        pageserver --init -D /data -c "pg_distrib_dir='/usr/local'" -c "id=${pageserver_id_param}" $broker_endpoints_param $remote_storage_param
+        pageserver --init -D /data -c "pg_distrib_dir='/usr/local'" -c "id=10" $broker_endpoints_param
    fi
    echo "Staring pageserver at 0.0.0.0:6400"
    pageserver -c "listen_pg_addr='0.0.0.0:6400'" -c "listen_http_addr='0.0.0.0:9898'" $broker_endpoints_param -D /data
--- a/docs/SUMMARY.md
+++ b/docs/SUMMARY.md
@@ -52,8 +52,10 @@
 - [multitenancy.md](./multitenancy.md) — how multitenancy is organized in the pageserver and Zenith CLI.
 - [settings.md](./settings.md)
 #FIXME: move these under sourcetree.md
+#- [pageserver/README.md](/pageserver/README.md)
 #- [postgres_ffi/README.md](/libs/postgres_ffi/README.md)
 #- [test_runner/README.md](/test_runner/README.md)
+#- [safekeeper/README.md](/safekeeper/README.md)


 # RFCs
--- a/docs/glossary.md
+++ b/docs/glossary.md
@@ -75,7 +75,7 @@ layer's Segment and range of LSNs.
 There are two kinds of layers, in-memory and on-disk layers. In-memory
 layers are used to ingest incoming WAL, and provide fast access
 to the recent page versions. On-disk layers are stored as files on disk, and
-are immutable. See [pageserver-storage.md](./pageserver-storage.md) for more.
+are immutable. See pageserver/src/layered_repository/README.md for more.

 ### Layer file (on-disk layer)

@@ -111,7 +111,7 @@ PostgreSQL LSNs and functions to monitor them:
 * `pg_last_wal_replay_lsn ()` - Returns the last write-ahead log location that has been replayed during recovery. If recovery is still in progress this will increase monotonically.
 [source PostgreSQL documentation](https://www.postgresql.org/docs/devel/functions-admin.html):

-Neon safekeeper LSNs. See [safekeeper protocol section](safekeeper-protocol.md) for more information.
+Neon safekeeper LSNs. For more check [safekeeper/README_PROTO.md](/safekeeper/README_PROTO.md)
 * `CommitLSN`: position in WAL confirmed by quorum safekeepers.
 * `RestartLSN`: position in WAL confirmed by all safekeepers.
 * `FlushLSN`: part of WAL persisted to the disk by safekeeper.
--- a/docs/pageserver-services.md
+++ b/docs/pageserver-services.md
@@ -68,6 +68,8 @@ There are the following implementations present:
 * local filesystem — to use in tests mainly
 * AWS S3           - to use in production

+Implementation details are covered in the [backup readme](./src/remote_storage/README.md) and corresponding Rust file docs, parameters documentation can be found at [settings docs](../docs/settings.md).
+
 The backup service is disabled by default and can be enabled to interact with a single remote storage.

 CLI examples:
@@ -116,7 +118,7 @@ implemented by the LayeredRepository object in
 `layered_repository.rs`. There is only that one implementation of the
 Repository trait, but it's still a useful abstraction that keeps the
 interface for the low-level storage functionality clean. The layered
-storage format is described in [pageserver-storage.md](./pageserver-storage.md).
+storage format is described in layered_repository/README.md.

 Each repository consists of multiple Timelines. Timeline is a
 workhorse that accepts page changes from the WAL, and serves
--- a/docs/settings.md
+++ b/docs/settings.md
@@ -15,7 +15,7 @@ listen_pg_addr = '127.0.0.1:64000'
 listen_http_addr = '127.0.0.1:9898'

 checkpoint_distance = '268435456' # in bytes
-checkpoint_timeout = '10m'
+checkpoint_period = '1 s'

 gc_period = '100 s'
 gc_horizon = '67108864'
@@ -46,7 +46,7 @@ Note the `[remote_storage]` section: it's a [table](https://toml.io/en/v1.0.0#ta

 All values can be passed as an argument to the pageserver binary, using the `-c` parameter and specified as a valid TOML string. All tables should be passed in the inline form.

-Example: `${PAGESERVER_BIN} -c "checkpoint_timeout = '10 m'" -c "remote_storage={local_path='/some/local/path/'}"`
+Example: `${PAGESERVER_BIN} -c "checkpoint_period = '100 s'" -c "remote_storage={local_path='/some/local/path/'}"`

 Note that TOML distinguishes between strings and integers, the former require single or double quotes around them.

@@ -82,14 +82,6 @@ S3.

 The unit is # of bytes.

-#### checkpoint_timeout
-
-Apart from `checkpoint_distance`, open layer flushing is also triggered
-`checkpoint_timeout` after the last flush. This makes WAL eventually uploaded to
-s3 when activity is stopped.
-
-The default is 10m.
-
 #### compaction_period

 Every `compaction_period` seconds, the page server checks if
--- a/docs/sourcetree.md
+++ b/docs/sourcetree.md
@@ -28,7 +28,7 @@ The pageserver has a few different duties:
 - Receive WAL from the WAL service and decode it.
 - Replay WAL that's applicable to the chunks that the Page Server maintains

-For more detailed info, see [pageserver-services.md](./pageserver-services.md)
+For more detailed info, see [/pageserver/README](/pageserver/README.md)

 `/proxy`:

@@ -57,7 +57,7 @@ PostgreSQL extension that contains functions needed for testing and debugging.
 The zenith WAL service that receives WAL from a primary compute nodes and streams it to the pageserver.
 It acts as a holding area and redistribution center for recently generated WAL.

-For more detailed info, see [walservice.md](./walservice.md)
+For more detailed info, see [/safekeeper/README](/safekeeper/README.md)

 `/workspace_hack`:
 The workspace_hack crate exists only to pin down some dependencies.
--- a/docs/walservice.md
+++ b/docs/walservice.md
@@ -75,8 +75,8 @@ safekeepers. The Paxos and crash recovery algorithm ensures that only
 one primary node can be actively streaming WAL to the quorum of
 safekeepers.

-See [this section](safekeeper-protocol.md) for a more detailed description of
-the consensus protocol. spec/ contains TLA+ specification of it.
+See README_PROTO.md for a more detailed description of the consensus
+protocol. spec/ contains TLA+ specification of it.

 # Q&A

--- a/libs/etcd_broker/Cargo.toml
+++ b/libs/etcd_broker/Cargo.toml
@@ -9,7 +9,7 @@
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1"
 serde_with = "1.12.0"
- once_cell = "1.13.0"
+ once_cell = "1.8.0"

 utils = { path = "../utils" }
 workspace_hack = { version = "0.1", path = "../../workspace_hack" }
--- a/libs/metrics/Cargo.toml
+++ b/libs/metrics/Cargo.toml
@@ -6,5 +6,5 @@ edition = "2021"
 [dependencies]
 prometheus = {version = "0.13", default_features=false, features = ["process"]} # removes protobuf dependency
 libc = "0.2"
-once_cell = "1.13.0"
+lazy_static = "1.4"
 workspace_hack = { version = "0.1", path = "../../workspace_hack" }
--- a/libs/metrics/src/lib.rs
+++ b/libs/metrics/src/lib.rs
@@ -2,7 +2,7 @@
 //! make sure that we use the same dep version everywhere.
 //! Otherwise, we might not see all metrics registered via
 //! a default registry.
-use once_cell::sync::Lazy;
+use lazy_static::lazy_static;
 use prometheus::core::{AtomicU64, GenericGauge, GenericGaugeVec};
 pub use prometheus::opts;
 pub use prometheus::register;
@@ -41,22 +41,19 @@ pub fn gather() -> Vec<prometheus::proto::MetricFamily> {
    prometheus::gather()
 }

-static DISK_IO_BYTES: Lazy<IntGaugeVec> = Lazy::new(|| {
-    register_int_gauge_vec!(
+lazy_static! {
+    static ref DISK_IO_BYTES: IntGaugeVec = register_int_gauge_vec!(
        "libmetrics_disk_io_bytes_total",
        "Bytes written and read from disk, grouped by the operation (read|write)",
        &["io_operation"]
    )
-    .expect("Failed to register disk i/o bytes int gauge vec")
-});
-
-static MAXRSS_KB: Lazy<IntGauge> = Lazy::new(|| {
-    register_int_gauge!(
+    .expect("Failed to register disk i/o bytes int gauge vec");
+    static ref MAXRSS_KB: IntGauge = register_int_gauge!(
        "libmetrics_maxrss_kb",
        "Memory usage (Maximum Resident Set Size)"
    )
-    .expect("Failed to register maxrss_kb int gauge")
-});
+    .expect("Failed to register maxrss_kb int gauge");
+}

 pub const DISK_WRITE_SECONDS_BUCKETS: &[f64] = &[
    0.000_050, 0.000_100, 0.000_500, 0.001, 0.003, 0.005, 0.01, 0.05, 0.1, 0.3, 0.5,
--- a/libs/metrics/src/wrappers.rs
+++ b/libs/metrics/src/wrappers.rs
@@ -10,13 +10,13 @@ use std::io::{Read, Result, Write};
 /// # use std::io::{Result, Read};
 /// # use metrics::{register_int_counter, IntCounter};
 /// # use metrics::CountedReader;
-/// # use once_cell::sync::Lazy;
 /// #
-/// # static INT_COUNTER: Lazy<IntCounter> = Lazy::new( || { register_int_counter!(
+/// # lazy_static::lazy_static! {
+/// #     static ref INT_COUNTER: IntCounter = register_int_counter!(
 /// #         "int_counter",
 /// #         "let's count something!"
-/// #     ).unwrap()
-/// # });
+/// #     ).unwrap();
+/// # }
 /// #
 /// fn do_some_reads(stream: impl Read, count: usize) -> Result<Vec<u8>> {
 ///     let mut reader = CountedReader::new(stream, |cnt| {
@@ -85,13 +85,13 @@ impl<T: Read> Read for CountedReader<'_, T> {
 /// # use std::io::{Result, Write};
 /// # use metrics::{register_int_counter, IntCounter};
 /// # use metrics::CountedWriter;
-/// # use once_cell::sync::Lazy;
 /// #
-/// # static INT_COUNTER: Lazy<IntCounter> = Lazy::new( || { register_int_counter!(
+/// # lazy_static::lazy_static! {
+/// #     static ref INT_COUNTER: IntCounter = register_int_counter!(
 /// #         "int_counter",
 /// #         "let's count something!"
-/// #     ).unwrap()
-/// # });
+/// #     ).unwrap();
+/// # }
 /// #
 /// fn do_some_writes(stream: impl Write, payload: &[u8]) -> Result<()> {
 ///     let mut writer = CountedWriter::new(stream, |cnt| {
--- a/libs/postgres_ffi/Cargo.toml
+++ b/libs/postgres_ffi/Cargo.toml
@@ -12,7 +12,7 @@ byteorder = "1.4.3"
 anyhow = "1.0"
 crc32c = "0.6.0"
 hex = "0.4.3"
-once_cell = "1.13.0"
+lazy_static = "1.4"
 log = "0.4.14"
 memoffset = "0.6.2"
 thiserror = "1.0"
--- a/libs/postgres_ffi/src/relfile_utils.rs
+++ b/libs/postgres_ffi/src/relfile_utils.rs
@@ -2,7 +2,7 @@
 //! Common utilities for dealing with PostgreSQL relation files.
 //!
 use crate::pg_constants;
-use once_cell::sync::OnceCell;
+use lazy_static::lazy_static;
 use regex::Regex;

 #[derive(Debug, Clone, thiserror::Error, PartialEq)]
@@ -54,14 +54,11 @@ pub fn forknumber_to_name(forknum: u8) -> Option<&'static str> {
 /// See functions relpath() and _mdfd_segpath() in PostgreSQL sources.
 ///
 pub fn parse_relfilename(fname: &str) -> Result<(u32, u8, u32), FilePathError> {
-    static RELFILE_RE: OnceCell<Regex> = OnceCell::new();
-    RELFILE_RE.get_or_init(|| {
-        Regex::new(r"^(?P<relnode>\d+)(_(?P<forkname>[a-z]+))?(\.(?P<segno>\d+))?$").unwrap()
-    });
-
+    lazy_static! {
+        static ref RELFILE_RE: Regex =
+            Regex::new(r"^(?P<relnode>\d+)(_(?P<forkname>[a-z]+))?(\.(?P<segno>\d+))?$").unwrap();
+    }
    let caps = RELFILE_RE
-        .get()
-        .unwrap()
        .captures(fname)
        .ok_or(FilePathError::InvalidFileName)?;

--- a/libs/postgres_ffi/src/waldecoder.rs
+++ b/libs/postgres_ffi/src/waldecoder.rs
@@ -13,30 +13,24 @@ use super::xlog_utils::*;
 use super::XLogLongPageHeaderData;
 use super::XLogPageHeaderData;
 use super::XLogRecord;
-use super::XLOG_PAGE_MAGIC;
 use bytes::{Buf, BufMut, Bytes, BytesMut};
 use crc32c::*;
 use log::*;
 use std::cmp::min;
-use std::num::NonZeroU32;
 use thiserror::Error;
 use utils::lsn::Lsn;

-enum State {
-    WaitingForRecord,
-    ReassemblingRecord {
-        recordbuf: BytesMut,
-        contlen: NonZeroU32,
-    },
-    SkippingEverything {
-        skip_until_lsn: Lsn,
-    },
-}
-
 pub struct WalStreamDecoder {
    lsn: Lsn,
+
+    startlsn: Lsn, // LSN where this record starts
+    contlen: u32,
+    padlen: u32,
+
    inputbuf: BytesMut,
-    state: State,
+
+    /// buffer used to reassemble records that cross page boundaries.
+    recordbuf: BytesMut,
 }

 #[derive(Error, Debug, Clone)]
@@ -54,8 +48,13 @@ impl WalStreamDecoder {
    pub fn new(lsn: Lsn) -> WalStreamDecoder {
        WalStreamDecoder {
            lsn,
+
+            startlsn: Lsn(0),
+            contlen: 0,
+            padlen: 0,
+
            inputbuf: BytesMut::new(),
-            state: State::WaitingForRecord,
+            recordbuf: BytesMut::new(),
        }
    }

@@ -68,58 +67,6 @@ impl WalStreamDecoder {
        self.inputbuf.extend_from_slice(buf);
    }

-    fn validate_page_header(&self, hdr: &XLogPageHeaderData) -> Result<(), WalDecodeError> {
-        let validate_impl = || {
-            if hdr.xlp_magic != XLOG_PAGE_MAGIC as u16 {
-                return Err(format!(
-                    "invalid xlog page header: xlp_magic={}, expected {}",
-                    hdr.xlp_magic, XLOG_PAGE_MAGIC
-                ));
-            }
-            if hdr.xlp_pageaddr != self.lsn.0 {
-                return Err(format!(
-                    "invalid xlog page header: xlp_pageaddr={}, expected {}",
-                    hdr.xlp_pageaddr, self.lsn
-                ));
-            }
-            match self.state {
-                State::WaitingForRecord => {
-                    if hdr.xlp_info & XLP_FIRST_IS_CONTRECORD != 0 {
-                        return Err(
-                            "invalid xlog page header: unexpected XLP_FIRST_IS_CONTRECORD".into(),
-                        );
-                    }
-                    if hdr.xlp_rem_len != 0 {
-                        return Err(format!(
-                            "invalid xlog page header: xlp_rem_len={}, but it's not a contrecord",
-                            hdr.xlp_rem_len
-                        ));
-                    }
-                }
-                State::ReassemblingRecord { contlen, .. } => {
-                    if hdr.xlp_info & XLP_FIRST_IS_CONTRECORD == 0 {
-                        return Err(
-                            "invalid xlog page header: XLP_FIRST_IS_CONTRECORD expected, not found"
-                                .into(),
-                        );
-                    }
-                    if hdr.xlp_rem_len != contlen.get() {
-                        return Err(format!(
-                            "invalid xlog page header: xlp_rem_len={}, expected {}",
-                            hdr.xlp_rem_len,
-                            contlen.get()
-                        ));
-                    }
-                }
-                State::SkippingEverything { .. } => {
-                    panic!("Should not be validating page header in the SkippingEverything state");
-                }
-            };
-            Ok(())
-        };
-        validate_impl().map_err(|msg| WalDecodeError { msg, lsn: self.lsn })
-    }
-
    /// Attempt to decode another WAL record from the input that has been fed to the
    /// decoder so far.
    ///
@@ -129,121 +76,128 @@ impl WalStreamDecoder {
    ///     Err(WalDecodeError): an error occurred while decoding, meaning the input was invalid.
    ///
    pub fn poll_decode(&mut self) -> Result<Option<(Lsn, Bytes)>, WalDecodeError> {
+        let recordbuf;
+
        // Run state machine that validates page headers, and reassembles records
        // that cross page boundaries.
        loop {
            // parse and verify page boundaries as we go
-            // However, we may have to skip some page headers if we're processing the XLOG_SWITCH record or skipping padding for whatever reason.
-            match self.state {
-                State::WaitingForRecord | State::ReassemblingRecord { .. } => {
-                    if self.lsn.segment_offset(pg_constants::WAL_SEGMENT_SIZE) == 0 {
-                        // parse long header
-
-                        if self.inputbuf.remaining() < XLOG_SIZE_OF_XLOG_LONG_PHD {
-                            return Ok(None);
-                        }
-
-                        let hdr = XLogLongPageHeaderData::from_bytes(&mut self.inputbuf).map_err(
-                            |e| WalDecodeError {
-                                msg: format!("long header deserialization failed {}", e),
-                                lsn: self.lsn,
-                            },
-                        )?;
-
-                        self.validate_page_header(&hdr.std)?;
-
-                        self.lsn += XLOG_SIZE_OF_XLOG_LONG_PHD as u64;
-                    } else if self.lsn.block_offset() == 0 {
-                        if self.inputbuf.remaining() < XLOG_SIZE_OF_XLOG_SHORT_PHD {
-                            return Ok(None);
-                        }
-
-                        let hdr =
-                            XLogPageHeaderData::from_bytes(&mut self.inputbuf).map_err(|e| {
-                                WalDecodeError {
-                                    msg: format!("header deserialization failed {}", e),
-                                    lsn: self.lsn,
-                                }
-                            })?;
-
-                        self.validate_page_header(&hdr)?;
-
-                        self.lsn += XLOG_SIZE_OF_XLOG_SHORT_PHD as u64;
-                    }
+            if self.padlen > 0 {
+                // We should first skip padding, as we may have to skip some page headers if we're processing the XLOG_SWITCH record.
+                if self.inputbuf.remaining() < self.padlen as usize {
+                    return Ok(None);
                }
-                State::SkippingEverything { .. } => {}
-            }
-            match &mut self.state {
-                State::WaitingForRecord => {
-                    // need to have at least the xl_tot_len field
-                    if self.inputbuf.remaining() < 4 {
-                        return Ok(None);
-                    }

-                    // peek xl_tot_len at the beginning of the record.
-                    // FIXME: assumes little-endian
-                    let xl_tot_len = (&self.inputbuf[0..4]).get_u32_le();
-                    if (xl_tot_len as usize) < XLOG_SIZE_OF_XLOG_RECORD {
-                        return Err(WalDecodeError {
-                            msg: format!("invalid xl_tot_len {}", xl_tot_len),
-                            lsn: self.lsn,
-                        });
-                    }
-                    // Fast path for the common case that the whole record fits on the page.
-                    let pageleft = self.lsn.remaining_in_block() as u32;
-                    if self.inputbuf.remaining() >= xl_tot_len as usize && xl_tot_len <= pageleft {
-                        self.lsn += xl_tot_len as u64;
-                        let recordbuf = self.inputbuf.copy_to_bytes(xl_tot_len as usize);
-                        return Ok(Some(self.complete_record(recordbuf)?));
-                    } else {
-                        // Need to assemble the record from pieces. Remember the size of the
-                        // record, and loop back. On next iteration, we will reach the 'else'
-                        // branch below, and copy the part of the record that was on this page
-                        // to 'recordbuf'.  Subsequent iterations will skip page headers, and
-                        // append the continuations from the next pages to 'recordbuf'.
-                        self.state = State::ReassemblingRecord {
-                            recordbuf: BytesMut::with_capacity(xl_tot_len as usize),
-                            contlen: NonZeroU32::new(xl_tot_len).unwrap(),
-                        }
-                    }
+                // skip padding
+                self.inputbuf.advance(self.padlen as usize);
+                self.lsn += self.padlen as u64;
+                self.padlen = 0;
+            } else if self.lsn.segment_offset(pg_constants::WAL_SEGMENT_SIZE) == 0 {
+                // parse long header
+
+                if self.inputbuf.remaining() < XLOG_SIZE_OF_XLOG_LONG_PHD {
+                    return Ok(None);
                }
-                State::ReassemblingRecord { recordbuf, contlen } => {
-                    // we're continuing a record, possibly from previous page.
-                    let pageleft = self.lsn.remaining_in_block() as u32;

-                    // read the rest of the record, or as much as fits on this page.
-                    let n = min(contlen.get(), pageleft) as usize;
-
-                    if self.inputbuf.remaining() < n {
-                        return Ok(None);
+                let hdr = XLogLongPageHeaderData::from_bytes(&mut self.inputbuf).map_err(|e| {
+                    WalDecodeError {
+                        msg: format!("long header deserialization failed {}", e),
+                        lsn: self.lsn,
                    }
+                })?;

-                    recordbuf.put(self.inputbuf.split_to(n));
-                    self.lsn += n as u64;
-                    *contlen = match NonZeroU32::new(contlen.get() - n as u32) {
-                        Some(x) => x,
-                        None => {
-                            // The record is now complete.
-                            let recordbuf = std::mem::replace(recordbuf, BytesMut::new()).freeze();
-                            return Ok(Some(self.complete_record(recordbuf)?));
-                        }
-                    }
+                if hdr.std.xlp_pageaddr != self.lsn.0 {
+                    return Err(WalDecodeError {
+                        msg: "invalid xlog segment header".into(),
+                        lsn: self.lsn,
+                    });
                }
-                State::SkippingEverything { skip_until_lsn } => {
-                    assert!(*skip_until_lsn >= self.lsn);
-                    let n = skip_until_lsn.0 - self.lsn.0;
-                    if self.inputbuf.remaining() < n as usize {
-                        return Ok(None);
-                    }
-                    self.inputbuf.advance(n as usize);
-                    self.lsn += n;
-                    self.state = State::WaitingForRecord;
+                // TODO: verify the remaining fields in the header
+
+                self.lsn += XLOG_SIZE_OF_XLOG_LONG_PHD as u64;
+                continue;
+            } else if self.lsn.block_offset() == 0 {
+                if self.inputbuf.remaining() < XLOG_SIZE_OF_XLOG_SHORT_PHD {
+                    return Ok(None);
                }
+
+                let hdr = XLogPageHeaderData::from_bytes(&mut self.inputbuf).map_err(|e| {
+                    WalDecodeError {
+                        msg: format!("header deserialization failed {}", e),
+                        lsn: self.lsn,
+                    }
+                })?;
+
+                if hdr.xlp_pageaddr != self.lsn.0 {
+                    return Err(WalDecodeError {
+                        msg: "invalid xlog page header".into(),
+                        lsn: self.lsn,
+                    });
+                }
+                // TODO: verify the remaining fields in the header
+
+                self.lsn += XLOG_SIZE_OF_XLOG_SHORT_PHD as u64;
+                continue;
+            } else if self.contlen == 0 {
+                assert!(self.recordbuf.is_empty());
+
+                // need to have at least the xl_tot_len field
+                if self.inputbuf.remaining() < 4 {
+                    return Ok(None);
+                }
+
+                // peek xl_tot_len at the beginning of the record.
+                // FIXME: assumes little-endian
+                self.startlsn = self.lsn;
+                let xl_tot_len = (&self.inputbuf[0..4]).get_u32_le();
+                if (xl_tot_len as usize) < XLOG_SIZE_OF_XLOG_RECORD {
+                    return Err(WalDecodeError {
+                        msg: format!("invalid xl_tot_len {}", xl_tot_len),
+                        lsn: self.lsn,
+                    });
+                }
+
+                // Fast path for the common case that the whole record fits on the page.
+                let pageleft = self.lsn.remaining_in_block() as u32;
+                if self.inputbuf.remaining() >= xl_tot_len as usize && xl_tot_len <= pageleft {
+                    // Take the record from the 'inputbuf', and validate it.
+                    recordbuf = self.inputbuf.copy_to_bytes(xl_tot_len as usize);
+                    self.lsn += xl_tot_len as u64;
+                    break;
+                } else {
+                    // Need to assemble the record from pieces. Remember the size of the
+                    // record, and loop back. On next iteration, we will reach the 'else'
+                    // branch below, and copy the part of the record that was on this page
+                    // to 'recordbuf'.  Subsequent iterations will skip page headers, and
+                    // append the continuations from the next pages to 'recordbuf'.
+                    self.recordbuf.reserve(xl_tot_len as usize);
+                    self.contlen = xl_tot_len;
+                    continue;
+                }
+            } else {
+                // we're continuing a record, possibly from previous page.
+                let pageleft = self.lsn.remaining_in_block() as u32;
+
+                // read the rest of the record, or as much as fits on this page.
+                let n = min(self.contlen, pageleft) as usize;
+
+                if self.inputbuf.remaining() < n {
+                    return Ok(None);
+                }
+
+                self.recordbuf.put(self.inputbuf.split_to(n));
+                self.lsn += n as u64;
+                self.contlen -= n as u32;
+
+                if self.contlen == 0 {
+                    // The record is now complete.
+                    recordbuf = std::mem::replace(&mut self.recordbuf, BytesMut::new()).freeze();
+                    break;
+                }
+                continue;
            }
        }
-    }

-    fn complete_record(&mut self, recordbuf: Bytes) -> Result<(Lsn, Bytes), WalDecodeError> {
        // We now have a record in the 'recordbuf' local variable.
        let xlogrec =
            XLogRecord::from_slice(&recordbuf[0..XLOG_SIZE_OF_XLOG_RECORD]).map_err(|e| {
@@ -265,20 +219,18 @@ impl WalStreamDecoder {

        // XLOG_SWITCH records are special. If we see one, we need to skip
        // to the next WAL segment.
-        let next_lsn = if xlogrec.is_xlog_switch_record() {
+        if xlogrec.is_xlog_switch_record() {
            trace!("saw xlog switch record at {}", self.lsn);
-            self.lsn + self.lsn.calc_padding(pg_constants::WAL_SEGMENT_SIZE as u64)
+            self.padlen = self.lsn.calc_padding(pg_constants::WAL_SEGMENT_SIZE as u64) as u32;
        } else {
            // Pad to an 8-byte boundary
-            self.lsn.align()
-        };
-        self.state = State::SkippingEverything {
-            skip_until_lsn: next_lsn,
-        };
+            self.padlen = self.lsn.calc_padding(8u32) as u32;
+        }

        // We should return LSN of the next record, not the last byte of this record or
        // the byte immediately after. Note that this handles both XLOG_SWITCH and usual
        // records, the former "spans" until the next WAL segment (see test_xlog_switch).
-        Ok((next_lsn, recordbuf))
+        let result = (self.lsn + self.padlen as u64, recordbuf);
+        Ok(Some(result))
    }
 }
--- a/libs/postgres_ffi/src/xlog_utils.rs
+++ b/libs/postgres_ffi/src/xlog_utils.rs
@@ -16,7 +16,7 @@ use crate::XLogRecord;
 use crate::XLOG_PAGE_MAGIC;

 use crate::pg_constants::WAL_SEGMENT_SIZE;
-use anyhow::{anyhow, bail, ensure};
+use anyhow::{bail, ensure};
 use byteorder::{ByteOrder, LittleEndian};
 use bytes::BytesMut;
 use bytes::{Buf, Bytes};
@@ -159,7 +159,7 @@ fn find_end_of_wal_segment(
    let mut buf = [0u8; XLOG_BLCKSZ];
    let file_name = XLogFileName(tli, segno, wal_seg_size);
    let mut last_valid_rec_pos: usize = start_offset; // assume at given start_offset begins new record
-    let mut file = File::open(data_dir.join(file_name.clone() + ".partial"))?;
+    let mut file = File::open(data_dir.join(file_name.clone() + ".partial")).unwrap();
    file.seek(SeekFrom::Start(offs as u64))?;
    // xl_crc is the last field in XLogRecord, will not be read into rec_hdr
    const_assert!(XLOG_RECORD_CRC_OFFS + 4 == XLOG_SIZE_OF_XLOG_RECORD);
@@ -396,13 +396,10 @@ pub fn find_end_of_wal(
    let mut high_tli: TimeLineID = 0;
    let mut high_ispartial = false;

-    for entry in fs::read_dir(data_dir)?.flatten() {
+    for entry in fs::read_dir(data_dir).unwrap().flatten() {
        let ispartial: bool;
        let entry_name = entry.file_name();
-        let fname = entry_name
-            .to_str()
-            .ok_or_else(|| anyhow!("Invalid file name"))?;
-
+        let fname = entry_name.to_str().unwrap();
        /*
         * Check if the filename looks like an xlog file, or a .partial file.
         */
@@ -414,7 +411,7 @@ pub fn find_end_of_wal(
            continue;
        }
        let (segno, tli) = XLogFromFileName(fname, wal_seg_size);
-        if !ispartial && entry.metadata()?.len() != wal_seg_size as u64 {
+        if !ispartial && entry.metadata().unwrap().len() != wal_seg_size as u64 {
            continue;
        }
        if segno > high_segno
--- a/libs/postgres_ffi/wal_craft/Cargo.toml
+++ b/libs/postgres_ffi/wal_craft/Cargo.toml
@@ -10,7 +10,7 @@ anyhow = "1.0"
 clap = "3.0"
 env_logger = "0.9"
 log = "0.4"
-once_cell = "1.13.0"
+once_cell = "1.8.0"
 postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
 postgres_ffi = { path = "../" }
 tempfile = "3.2"
--- a/libs/remote_storage/Cargo.toml
+++ b/libs/remote_storage/Cargo.toml
@@ -7,7 +7,7 @@ edition = "2021"
 anyhow = { version = "1.0", features = ["backtrace"] }
 async-trait = "0.1"
 metrics = { version = "0.1", path = "../metrics" }
-once_cell = "1.13.0"
+once_cell = "1.8.0"
 rusoto_core = "0.48"
 rusoto_s3 = "0.48"
 serde = { version = "1.0", features = ["derive"] }
--- a/libs/remote_storage/src/lib.rs
+++ b/libs/remote_storage/src/lib.rs
@@ -66,9 +66,6 @@ pub trait RemoteStorage: Send + Sync {
    async fn list(&self) -> anyhow::Result<Vec<Self::RemoteObjectId>>;

    /// Lists all top level subdirectories for a given prefix
-    /// Note: here we assume that if the prefix is passed it was obtained via remote_object_id
-    /// which already takes into account any kind of global prefix (prefix_in_bucket for S3 or storage_root for LocalFS)
-    /// so this method doesnt need to.
    async fn list_prefixes(
        &self,
        prefix: Option<Self::RemoteObjectId>,
--- a/libs/remote_storage/src/local_fs.rs
+++ b/libs/remote_storage/src/local_fs.rs
@@ -116,7 +116,7 @@ impl RemoteStorage for LocalFs {
        prefix: Option<Self::RemoteObjectId>,
    ) -> anyhow::Result<Vec<Self::RemoteObjectId>> {
        let path = match prefix {
-            Some(prefix) => Cow::Owned(prefix),
+            Some(prefix) => Cow::Owned(self.storage_root.join(prefix)),
            None => Cow::Borrowed(&self.storage_root),
        };
        get_all_files(path.as_ref(), false).await
--- a/libs/remote_storage/src/s3_bucket.rs
+++ b/libs/remote_storage/src/s3_bucket.rs
@@ -171,25 +171,17 @@ impl S3Bucket {

        let access_key_id = std::env::var("AWS_ACCESS_KEY_ID").ok();
        let secret_access_key = std::env::var("AWS_SECRET_ACCESS_KEY").ok();
-        // session token is used when authorizing through sso
-        // which is typically the case when testing locally on developer machine
-        let session_token = std::env::var("AWS_SESSION_TOKEN").ok();

        let client = if access_key_id.is_none() && secret_access_key.is_none() {
            debug!("Using IAM-based AWS access");
            S3Client::new_with(request_dispatcher, InstanceMetadataProvider::new(), region)
        } else {
-            debug!(
-                "Using credentials-based AWS access. Session token is set: {}",
-                session_token.is_some()
-            );
+            debug!("Using credentials-based AWS access");
            S3Client::new_with(
                request_dispatcher,
-                StaticProvider::new(
+                StaticProvider::new_minimal(
                    access_key_id.unwrap_or_default(),
                    secret_access_key.unwrap_or_default(),
-                    session_token,
-                    None,
                ),
                region,
            )
@@ -312,24 +304,32 @@ impl RemoteStorage for S3Bucket {
        Ok(document_keys)
    }

-    /// See the doc for `RemoteStorage::list_prefixes`
    /// Note: it wont include empty "directories"
    async fn list_prefixes(
        &self,
        prefix: Option<Self::RemoteObjectId>,
    ) -> anyhow::Result<Vec<Self::RemoteObjectId>> {
-        // get the passed prefix or if it is not set use prefix_in_bucket value
-        let list_prefix = prefix
-            .map(|p| p.0)
-            .or_else(|| self.prefix_in_bucket.clone())
-            .map(|mut p| {
+        let list_prefix = match prefix {
+            Some(prefix) => {
+                let mut prefix_in_bucket = self.prefix_in_bucket.clone().unwrap_or_default();
+                // if there is no trailing / in default prefix and
+                // supplied prefix does not start with "/" insert it
+                if !(prefix_in_bucket.ends_with(S3_PREFIX_SEPARATOR)
+                    || prefix.0.starts_with(S3_PREFIX_SEPARATOR))
+                {
+                    prefix_in_bucket.push(S3_PREFIX_SEPARATOR);
+                }
+
+                prefix_in_bucket.push_str(&prefix.0);
                // required to end with a separator
                // otherwise request will return only the entry of a prefix
-                if !p.ends_with(S3_PREFIX_SEPARATOR) {
-                    p.push(S3_PREFIX_SEPARATOR);
+                if !prefix_in_bucket.ends_with(S3_PREFIX_SEPARATOR) {
+                    prefix_in_bucket.push(S3_PREFIX_SEPARATOR);
                }
-                p
-            });
+                Some(prefix_in_bucket)
+            }
+            None => self.prefix_in_bucket.clone(),
+        };

        let mut document_keys = Vec::new();

--- a/libs/utils/Cargo.toml
+++ b/libs/utils/Cargo.toml
@@ -8,6 +8,7 @@ anyhow = "1.0"
 bincode = "1.3"
 bytes = "1.0.1"
 hyper = { version = "0.14.7", features = ["full"] }
+lazy_static = "1.4.0"
 pin-project-lite = "0.2.7"
 postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
 postgres-protocol = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
@@ -27,8 +28,6 @@ rustls = "0.20.2"
 rustls-split = "0.3.0"
 git-version = "0.3.5"
 serde_with = "1.12.0"
-once_cell = "1.13.0"
-

 metrics = { path = "../metrics" }
 workspace_hack = { version = "0.1", path = "../../workspace_hack" }
--- a/libs/utils/src/http/endpoint.rs
+++ b/libs/utils/src/http/endpoint.rs
@@ -4,8 +4,8 @@ use crate::zid::ZTenantId;
 use anyhow::anyhow;
 use hyper::header::AUTHORIZATION;
 use hyper::{header::CONTENT_TYPE, Body, Request, Response, Server};
+use lazy_static::lazy_static;
 use metrics::{register_int_counter, Encoder, IntCounter, TextEncoder};
-use once_cell::sync::Lazy;
 use routerify::ext::RequestExt;
 use routerify::RequestInfo;
 use routerify::{Middleware, Router, RouterBuilder, RouterService};
@@ -16,13 +16,13 @@ use std::net::TcpListener;

 use super::error::ApiError;

-static SERVE_METRICS_COUNT: Lazy<IntCounter> = Lazy::new(|| {
-    register_int_counter!(
+lazy_static! {
+    static ref SERVE_METRICS_COUNT: IntCounter = register_int_counter!(
        "libmetrics_metric_handler_requests_total",
        "Number of metric requests made"
    )
-    .expect("failed to define a metric")
-});
+    .expect("failed to define a metric");
+}

 async fn logger(res: Response<Body>, info: RequestInfo) -> Result<Response<Body>, ApiError> {
    info!("{} {} {}", info.method(), info.uri().path(), res.status(),);
--- a/libs/utils/tests/ssl_test.rs
+++ b/libs/utils/tests/ssl_test.rs
@@ -7,7 +7,7 @@ use std::{

 use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
 use bytes::{Buf, BufMut, Bytes, BytesMut};
-use once_cell::sync::Lazy;
+use lazy_static::lazy_static;

 use utils::postgres_backend::{AuthType, Handler, PostgresBackend};

@@ -19,15 +19,16 @@ fn make_tcp_pair() -> (TcpStream, TcpStream) {
    (server_stream, client_stream)
 }

-static KEY: Lazy<rustls::PrivateKey> = Lazy::new(|| {
-    let mut cursor = Cursor::new(include_bytes!("key.pem"));
-    rustls::PrivateKey(rustls_pemfile::rsa_private_keys(&mut cursor).unwrap()[0].clone())
-});
-
-static CERT: Lazy<rustls::Certificate> = Lazy::new(|| {
-    let mut cursor = Cursor::new(include_bytes!("cert.pem"));
-    rustls::Certificate(rustls_pemfile::certs(&mut cursor).unwrap()[0].clone())
-});
+lazy_static! {
+    static ref KEY: rustls::PrivateKey = {
+        let mut cursor = Cursor::new(include_bytes!("key.pem"));
+        rustls::PrivateKey(rustls_pemfile::rsa_private_keys(&mut cursor).unwrap()[0].clone())
+    };
+    static ref CERT: rustls::Certificate = {
+        let mut cursor = Cursor::new(include_bytes!("cert.pem"));
+        rustls::Certificate(rustls_pemfile::certs(&mut cursor).unwrap()[0].clone())
+    };
+}

 #[test]
 fn ssl() {
--- a/neon_local/src/main.rs
+++ b/neon_local/src/main.rs
@@ -884,7 +884,7 @@ fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Resul
    match sub_match.subcommand() {
        Some(("start", start_match)) => {
            if let Err(e) = pageserver.start(&pageserver_config_overrides(start_match)) {
-                eprintln!("pageserver start failed: {e}");
+                eprintln!("pageserver start failed: {}", e);
                exit(1);
            }
        }
@@ -906,19 +906,10 @@ fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Resul
            }

            if let Err(e) = pageserver.start(&pageserver_config_overrides(restart_match)) {
-                eprintln!("pageserver start failed: {e}");
+                eprintln!("pageserver start failed: {}", e);
                exit(1);
            }
        }
-
-        Some(("status", _)) => match PageServerNode::from_env(env).check_status() {
-            Ok(_) => println!("Page server is up and running"),
-            Err(err) => {
-                eprintln!("Page server is not available: {}", err);
-                exit(1);
-            }
-        },
-
        Some((sub_name, _)) => bail!("Unexpected pageserver subcommand '{}'", sub_name),
        None => bail!("no pageserver subcommand provided"),
    }
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -21,6 +21,7 @@ futures = "0.3.13"
 hex = "0.4.3"
 hyper = "0.14"
 itertools = "0.10.3"
+lazy_static = "1.4.0"
 clap = "3.0"
 daemonize = "0.4.1"
 tokio = { version = "1.17", features = ["process", "sync", "macros", "fs", "rt", "io-util", "time"] }
@@ -47,7 +48,7 @@ tracing = "0.1.27"
 signal-hook = "0.3.10"
 url = "2"
 nix = "0.23"
-once_cell = "1.13.0"
+once_cell = "1.8.0"
 crossbeam-utils = "0.8.5"
 fail = "0.5.0"
 git-version = "0.3.5"
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -59,7 +59,6 @@ pub mod defaults {

 # [tenant_config]
 #checkpoint_distance = {DEFAULT_CHECKPOINT_DISTANCE} # in bytes
-#checkpoint_timeout = {DEFAULT_CHECKPOINT_TIMEOUT}
 #compaction_target_size = {DEFAULT_COMPACTION_TARGET_SIZE} # in bytes
 #compaction_period = '{DEFAULT_COMPACTION_PERIOD}'
 #compaction_threshold = '{DEFAULT_COMPACTION_THRESHOLD}'
@@ -453,13 +452,6 @@ impl PageServerConf {
                Some(parse_toml_u64("checkpoint_distance", checkpoint_distance)?);
        }

-        if let Some(checkpoint_timeout) = item.get("checkpoint_timeout") {
-            t_conf.checkpoint_timeout = Some(parse_toml_duration(
-                "checkpoint_timeout",
-                checkpoint_timeout,
-            )?);
-        }
-
        if let Some(compaction_target_size) = item.get("compaction_target_size") {
            t_conf.compaction_target_size = Some(parse_toml_u64(
                "compaction_target_size",
--- a/pageserver/src/http/models.rs
+++ b/pageserver/src/http/models.rs
@@ -32,7 +32,6 @@ pub struct TenantCreateRequest {
    #[serde_as(as = "Option<DisplayFromStr>")]
    pub new_tenant_id: Option<ZTenantId>,
    pub checkpoint_distance: Option<u64>,
-    pub checkpoint_timeout: Option<String>,
    pub compaction_target_size: Option<u64>,
    pub compaction_period: Option<String>,
    pub compaction_threshold: Option<usize>,
@@ -71,7 +70,6 @@ pub struct TenantConfigRequest {
    #[serde(default)]
    #[serde_as(as = "Option<DisplayFromStr>")]
    pub checkpoint_distance: Option<u64>,
-    pub checkpoint_timeout: Option<String>,
    pub compaction_target_size: Option<u64>,
    pub compaction_period: Option<String>,
    pub compaction_threshold: Option<usize>,
@@ -89,7 +87,6 @@ impl TenantConfigRequest {
        TenantConfigRequest {
            tenant_id,
            checkpoint_distance: None,
-            checkpoint_timeout: None,
            compaction_target_size: None,
            compaction_period: None,
            compaction_threshold: None,
--- a/pageserver/src/http/openapi_spec.yml
+++ b/pageserver/src/http/openapi_spec.yml
@@ -560,8 +560,6 @@ components:
          type: string
        checkpoint_distance:
          type: integer
-        checkpoint_timeout:
-          type: string
        compaction_period:
          type: string
        compaction_threshold:
@@ -580,8 +578,6 @@ components:
          type: string
        checkpoint_distance:
          type: integer
-        checkpoint_timeout:
-          type: string
        compaction_period:
          type: string
        compaction_threshold:
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -623,11 +623,6 @@ async fn tenant_create_handler(mut request: Request<Body>) -> Result<Response<Bo
    }

    tenant_conf.checkpoint_distance = request_data.checkpoint_distance;
-    if let Some(checkpoint_timeout) = request_data.checkpoint_timeout {
-        tenant_conf.checkpoint_timeout =
-            Some(humantime::parse_duration(&checkpoint_timeout).map_err(ApiError::from_err)?);
-    }
-
    tenant_conf.compaction_target_size = request_data.compaction_target_size;
    tenant_conf.compaction_threshold = request_data.compaction_threshold;

@@ -688,10 +683,6 @@ async fn tenant_config_handler(mut request: Request<Body>) -> Result<Response<Bo
    }

    tenant_conf.checkpoint_distance = request_data.checkpoint_distance;
-    if let Some(checkpoint_timeout) = request_data.checkpoint_timeout {
-        tenant_conf.checkpoint_timeout =
-            Some(humantime::parse_duration(&checkpoint_timeout).map_err(ApiError::from_err)?);
-    }
    tenant_conf.compaction_target_size = request_data.compaction_target_size;
    tenant_conf.compaction_threshold = request_data.compaction_threshold;

--- a/pageserver/src/import_datadir.rs
+++ b/pageserver/src/import_datadir.rs
@@ -37,7 +37,7 @@ pub fn import_timeline_from_postgres_datadir<T: DatadirTimeline>(

    // TODO this shoud be start_lsn, which is not necessarily equal to end_lsn (aka lsn)
    // Then fishing out pg_control would be unnecessary
-    let mut modification = tline.begin_modification(lsn);
+    let mut modification = tline.begin_modification();
    modification.init_empty()?;

    // Import all but pg_wal
@@ -56,12 +56,12 @@ pub fn import_timeline_from_postgres_datadir<T: DatadirTimeline>(
            if let Some(control_file) = import_file(&mut modification, relative_path, file, len)? {
                pg_control = Some(control_file);
            }
-            modification.flush()?;
+            modification.flush(lsn)?;
        }
    }

    // We're done importing all the data files.
-    modification.commit()?;
+    modification.commit(lsn)?;

    // We expect the Postgres server to be shut down cleanly.
    let pg_control = pg_control.context("pg_control file not found")?;
@@ -267,7 +267,7 @@ fn import_wal<T: DatadirTimeline>(
        waldecoder.feed_bytes(&buf);

        let mut nrecords = 0;
-        let mut modification = tline.begin_modification(endpoint);
+        let mut modification = tline.begin_modification();
        let mut decoded = DecodedWALRecord::default();
        while last_lsn <= endpoint {
            if let Some((lsn, recdata)) = waldecoder.poll_decode()? {
@@ -301,7 +301,7 @@ pub fn import_basebackup_from_tar<T: DatadirTimeline, Reader: Read>(
    base_lsn: Lsn,
 ) -> Result<()> {
    info!("importing base at {}", base_lsn);
-    let mut modification = tline.begin_modification(base_lsn);
+    let mut modification = tline.begin_modification();
    modification.init_empty()?;

    let mut pg_control: Option<ControlFileData> = None;
@@ -319,7 +319,7 @@ pub fn import_basebackup_from_tar<T: DatadirTimeline, Reader: Read>(
                    // We found the pg_control file.
                    pg_control = Some(res);
                }
-                modification.flush()?;
+                modification.flush(base_lsn)?;
            }
            tar::EntryType::Directory => {
                debug!("directory {:?}", file_path);
@@ -333,7 +333,7 @@ pub fn import_basebackup_from_tar<T: DatadirTimeline, Reader: Read>(
    // sanity check: ensure that pg_control is loaded
    let _pg_control = pg_control.context("pg_control file not found")?;

-    modification.commit()?;
+    modification.commit(base_lsn)?;
    Ok(())
 }

@@ -385,7 +385,7 @@ pub fn import_wal_from_tar<T: DatadirTimeline, Reader: Read>(

        waldecoder.feed_bytes(&bytes[offset..]);

-        let mut modification = tline.begin_modification(end_lsn);
+        let mut modification = tline.begin_modification();
        let mut decoded = DecodedWALRecord::default();
        while last_lsn <= end_lsn {
            if let Some((lsn, recdata)) = waldecoder.poll_decode()? {
--- a/pageserver/src/layered_repository.rs
+++ b/pageserver/src/layered_repository.rs
@@ -5,7 +5,7 @@
 //! get/put call, walking back the timeline branching history as needed.
 //!
 //! The files are stored in the .neon/tenants/<tenantid>/timelines/<timelineid>
-//! directory. See docs/pageserver-storage.md for how the files are managed.
+//! directory. See layered_repository/README for how the files are managed.
 //! In addition to the layer files, there is a metadata file in the same
 //! directory that contains information about the timeline, in particular its
 //! parent timeline, and the last LSN that has been written to disk.
@@ -433,13 +433,6 @@ impl LayeredRepository {
            .unwrap_or(self.conf.default_tenant_conf.checkpoint_distance)
    }

-    pub fn get_checkpoint_timeout(&self) -> Duration {
-        let tenant_conf = self.tenant_conf.read().unwrap();
-        tenant_conf
-            .checkpoint_timeout
-            .unwrap_or(self.conf.default_tenant_conf.checkpoint_timeout)
-    }
-
    pub fn get_compaction_target_size(&self) -> u64 {
        let tenant_conf = self.tenant_conf.read().unwrap();
        tenant_conf
--- a/pageserver/src/layered_repository/block_io.rs
+++ b/pageserver/src/layered_repository/block_io.rs
@@ -5,7 +5,7 @@
 use crate::page_cache;
 use crate::page_cache::{ReadBufResult, PAGE_SZ};
 use bytes::Bytes;
-use once_cell::sync::Lazy;
+use lazy_static::lazy_static;
 use std::ops::{Deref, DerefMut};
 use std::os::unix::fs::FileExt;
 use std::sync::atomic::AtomicU64;
@@ -117,7 +117,9 @@ where
    }
 }

-static NEXT_ID: Lazy<AtomicU64> = Lazy::new(|| AtomicU64::new(1));
+lazy_static! {
+    static ref NEXT_ID: AtomicU64 = AtomicU64::new(1);
+}

 /// An adapter for reading a (virtual) file using the page cache.
 ///
--- a/pageserver/src/layered_repository/ephemeral_file.rs
+++ b/pageserver/src/layered_repository/ephemeral_file.rs
@@ -8,7 +8,7 @@ use crate::page_cache;
 use crate::page_cache::PAGE_SZ;
 use crate::page_cache::{ReadBufResult, WriteBufResult};
 use crate::virtual_file::VirtualFile;
-use once_cell::sync::Lazy;
+use lazy_static::lazy_static;
 use std::cmp::min;
 use std::collections::HashMap;
 use std::fs::OpenOptions;
@@ -21,15 +21,15 @@ use utils::zid::{ZTenantId, ZTimelineId};

 use std::os::unix::fs::FileExt;

-///
-/// This is the global cache of file descriptors (File objects).
-///
-static EPHEMERAL_FILES: Lazy<RwLock<EphemeralFiles>> = Lazy::new(|| {
-    RwLock::new(EphemeralFiles {
+lazy_static! {
+    ///
+    /// This is the global cache of file descriptors (File objects).
+    ///
+    static ref EPHEMERAL_FILES: RwLock<EphemeralFiles> = RwLock::new(EphemeralFiles {
        next_file_id: 1,
        files: HashMap::new(),
-    })
-});
+    });
+}

 pub struct EphemeralFiles {
    next_file_id: u64,
--- a/pageserver/src/layered_repository/layer_map.rs
+++ b/pageserver/src/layered_repository/layer_map.rs
@@ -15,18 +15,19 @@ use crate::layered_repository::storage_layer::Layer;
 use crate::layered_repository::storage_layer::{range_eq, range_overlaps};
 use crate::repository::Key;
 use anyhow::Result;
+use lazy_static::lazy_static;
 use metrics::{register_int_gauge, IntGauge};
-use once_cell::sync::Lazy;
 use std::collections::VecDeque;
 use std::ops::Range;
 use std::sync::Arc;
 use tracing::*;
 use utils::lsn::Lsn;

-static NUM_ONDISK_LAYERS: Lazy<IntGauge> = Lazy::new(|| {
-    register_int_gauge!("pageserver_ondisk_layers", "Number of layers on-disk")
-        .expect("failed to define a metric")
-});
+lazy_static! {
+    static ref NUM_ONDISK_LAYERS: IntGauge =
+        register_int_gauge!("pageserver_ondisk_layers", "Number of layers on-disk")
+            .expect("failed to define a metric");
+}

 ///
 /// LayerMap tracks what layers exist on a timeline.
--- a/pageserver/src/layered_repository/timeline.rs
+++ b/pageserver/src/layered_repository/timeline.rs
@@ -4,11 +4,11 @@ use anyhow::{anyhow, bail, ensure, Context, Result};
 use bytes::Bytes;
 use fail::fail_point;
 use itertools::Itertools;
-use once_cell::sync::Lazy;
+use lazy_static::lazy_static;
 use tracing::*;

 use std::cmp::{max, min, Ordering};
-use std::collections::{hash_map::Entry, HashMap, HashSet};
+use std::collections::HashSet;
 use std::fs;
 use std::fs::{File, OpenOptions};
 use std::io::Write;
@@ -16,7 +16,7 @@ use std::ops::{Deref, Range};
 use std::path::PathBuf;
 use std::sync::atomic::{self, AtomicBool, AtomicIsize, Ordering as AtomicOrdering};
 use std::sync::{Arc, Mutex, MutexGuard, RwLock, RwLockReadGuard, TryLockError};
-use std::time::{Duration, Instant, SystemTime};
+use std::time::{Duration, SystemTime};

 use metrics::{
    register_histogram_vec, register_int_counter, register_int_counter_vec, register_int_gauge_vec,
@@ -38,9 +38,7 @@ use crate::layered_repository::{

 use crate::config::PageServerConf;
 use crate::keyspace::{KeyPartitioning, KeySpace};
-use crate::pgdatadir_mapping::BlockNumber;
 use crate::pgdatadir_mapping::LsnForTimestamp;
-use crate::reltag::RelTag;
 use crate::tenant_config::TenantConfOpt;
 use crate::DatadirTimeline;

@@ -60,102 +58,76 @@ use crate::walredo::WalRedoManager;
 use crate::CheckpointConfig;
 use crate::{page_cache, storage_sync};

-/// Prometheus histogram buckets (in seconds) that capture the majority of
-/// latencies in the microsecond range but also extend far enough up to distinguish
-/// "bad" from "really bad".
-fn get_buckets_for_critical_operations() -> Vec<f64> {
-    let buckets_per_digit = 5;
-    let min_exponent = -6;
-    let max_exponent = 2;
-
-    let mut buckets = vec![];
-    // Compute 10^(exp / buckets_per_digit) instead of 10^(1/buckets_per_digit)^exp
-    // because it's more numerically stable and doesn't result in numbers like 9.999999
-    for exp in (min_exponent * buckets_per_digit)..=(max_exponent * buckets_per_digit) {
-        buckets.push(10_f64.powf(exp as f64 / buckets_per_digit as f64))
-    }
-    buckets
+// Metrics collected on operations on the storage repository.
+lazy_static! {
+    pub static ref STORAGE_TIME: HistogramVec = register_histogram_vec!(
+        "pageserver_storage_operations_seconds",
+        "Time spent on storage operations",
+        &["operation", "tenant_id", "timeline_id"]
+    )
+    .expect("failed to define a metric");
 }

 // Metrics collected on operations on the storage repository.
-pub static STORAGE_TIME: Lazy<HistogramVec> = Lazy::new(|| {
-    register_histogram_vec!(
-        "pageserver_storage_operations_seconds",
-        "Time spent on storage operations",
-        &["operation", "tenant_id", "timeline_id"],
-        get_buckets_for_critical_operations(),
-    )
-    .expect("failed to define a metric")
-});
-
-// Metrics collected on operations on the storage repository.
-static RECONSTRUCT_TIME: Lazy<HistogramVec> = Lazy::new(|| {
-    register_histogram_vec!(
+lazy_static! {
+    static ref RECONSTRUCT_TIME: HistogramVec = register_histogram_vec!(
        "pageserver_getpage_reconstruct_seconds",
        "Time spent in reconstruct_value",
-        &["tenant_id", "timeline_id"],
-        get_buckets_for_critical_operations(),
+        &["tenant_id", "timeline_id"]
    )
-    .expect("failed to define a metric")
-});
+    .expect("failed to define a metric");
+}

-static MATERIALIZED_PAGE_CACHE_HIT: Lazy<IntCounterVec> = Lazy::new(|| {
-    register_int_counter_vec!(
+lazy_static! {
+    static ref MATERIALIZED_PAGE_CACHE_HIT: IntCounterVec = register_int_counter_vec!(
        "pageserver_materialized_cache_hits_total",
        "Number of cache hits from materialized page cache",
        &["tenant_id", "timeline_id"]
    )
-    .expect("failed to define a metric")
-});
-
-static WAIT_LSN_TIME: Lazy<HistogramVec> = Lazy::new(|| {
-    register_histogram_vec!(
+    .expect("failed to define a metric");
+    static ref WAIT_LSN_TIME: HistogramVec = register_histogram_vec!(
        "pageserver_wait_lsn_seconds",
        "Time spent waiting for WAL to arrive",
-        &["tenant_id", "timeline_id"],
-        get_buckets_for_critical_operations(),
+        &["tenant_id", "timeline_id"]
    )
-    .expect("failed to define a metric")
-});
+    .expect("failed to define a metric");
+}

-static LAST_RECORD_LSN: Lazy<IntGaugeVec> = Lazy::new(|| {
-    register_int_gauge_vec!(
+lazy_static! {
+    static ref LAST_RECORD_LSN: IntGaugeVec = register_int_gauge_vec!(
        "pageserver_last_record_lsn",
        "Last record LSN grouped by timeline",
        &["tenant_id", "timeline_id"]
    )
-    .expect("failed to define a metric")
-});
+    .expect("failed to define a metric");
+}

 // Metrics for determining timeline's physical size.
 // A layered timeline's physical is defined as the total size of
 // (delta/image) layer files on disk.
-static CURRENT_PHYSICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
-    register_uint_gauge_vec!(
+lazy_static! {
+    static ref CURRENT_PHYSICAL_SIZE: UIntGaugeVec = register_uint_gauge_vec!(
        "pageserver_current_physical_size",
        "Current physical size grouped by timeline",
        &["tenant_id", "timeline_id"]
    )
-    .expect("failed to define a metric")
-});
+    .expect("failed to define a metric");
+}

 // Metrics for cloud upload. These metrics reflect data uploaded to cloud storage,
 // or in testing they estimate how much we would upload if we did.
-static NUM_PERSISTENT_FILES_CREATED: Lazy<IntCounter> = Lazy::new(|| {
-    register_int_counter!(
+lazy_static! {
+    static ref NUM_PERSISTENT_FILES_CREATED: IntCounter = register_int_counter!(
        "pageserver_created_persistent_files_total",
        "Number of files created that are meant to be uploaded to cloud storage",
    )
-    .expect("failed to define a metric")
-});
-
-static PERSISTENT_BYTES_WRITTEN: Lazy<IntCounter> = Lazy::new(|| {
-    register_int_counter!(
+    .expect("failed to define a metric");
+    static ref PERSISTENT_BYTES_WRITTEN: IntCounter = register_int_counter!(
        "pageserver_written_persistent_bytes_total",
        "Total bytes written that are meant to be uploaded to cloud storage",
    )
-    .expect("failed to define a metric")
-});
+    .expect("failed to define a metric");
+}

 #[derive(Clone)]
 pub enum LayeredTimelineEntry {
@@ -233,8 +205,6 @@ pub struct LayeredTimeline {
    pub layers: RwLock<LayerMap>,

    last_freeze_at: AtomicLsn,
-    // Atomic would be more appropriate here.
-    last_freeze_ts: RwLock<Instant>,

    // WAL redo manager
    walredo_mgr: Arc<dyn WalRedoManager + Sync + Send>,
@@ -325,9 +295,6 @@ pub struct LayeredTimeline {
    /// or None if WAL receiver has not received anything for this timeline
    /// yet.
    pub last_received_wal: Mutex<Option<WalReceiverInfo>>,
-
-    /// Relation size cache
-    rel_size_cache: RwLock<HashMap<RelTag, (Lsn, BlockNumber)>>,
 }

 pub struct WalReceiverInfo {
@@ -339,42 +306,7 @@ pub struct WalReceiverInfo {
 /// Inherit all the functions from DatadirTimeline, to provide the
 /// functionality to store PostgreSQL relations, SLRUs, etc. in a
 /// LayeredTimeline.
-impl DatadirTimeline for LayeredTimeline {
-    fn get_cached_rel_size(&self, tag: &RelTag, lsn: Lsn) -> Option<BlockNumber> {
-        let rel_size_cache = self.rel_size_cache.read().unwrap();
-        if let Some((cached_lsn, nblocks)) = rel_size_cache.get(tag) {
-            if lsn >= *cached_lsn {
-                return Some(*nblocks);
-            }
-        }
-        None
-    }
-
-    fn update_cached_rel_size(&self, tag: RelTag, lsn: Lsn, nblocks: BlockNumber) {
-        let mut rel_size_cache = self.rel_size_cache.write().unwrap();
-        match rel_size_cache.entry(tag) {
-            Entry::Occupied(mut entry) => {
-                let cached_lsn = entry.get_mut();
-                if lsn >= cached_lsn.0 {
-                    *cached_lsn = (lsn, nblocks);
-                }
-            }
-            Entry::Vacant(entry) => {
-                entry.insert((lsn, nblocks));
-            }
-        }
-    }
-
-    fn set_cached_rel_size(&self, tag: RelTag, lsn: Lsn, nblocks: BlockNumber) {
-        let mut rel_size_cache = self.rel_size_cache.write().unwrap();
-        rel_size_cache.insert(tag, (lsn, nblocks));
-    }
-
-    fn remove_cached_rel_size(&self, tag: &RelTag) {
-        let mut rel_size_cache = self.rel_size_cache.write().unwrap();
-        rel_size_cache.remove(tag);
-    }
-}
+impl DatadirTimeline for LayeredTimeline {}

 ///
 /// Information about how much history needs to be retained, needed by
@@ -445,6 +377,8 @@ impl Timeline for LayeredTimeline {

    /// Look up the value with the given a key
    fn get(&self, key: Key, lsn: Lsn) -> Result<Bytes> {
+        debug_assert!(lsn <= self.get_last_record_lsn());
+
        // Check the page cache. We will get back the most recent page with lsn <= `lsn`.
        // The cached image can be returned directly if there is no WAL between the cached image
        // and requested LSN. The cached image can also be used to reduce the amount of WAL needed
@@ -562,13 +496,6 @@ impl LayeredTimeline {
            .unwrap_or(self.conf.default_tenant_conf.checkpoint_distance)
    }

-    fn get_checkpoint_timeout(&self) -> Duration {
-        let tenant_conf = self.tenant_conf.read().unwrap();
-        tenant_conf
-            .checkpoint_timeout
-            .unwrap_or(self.conf.default_tenant_conf.checkpoint_timeout)
-    }
-
    fn get_compaction_target_size(&self) -> u64 {
        let tenant_conf = self.tenant_conf.read().unwrap();
        tenant_conf
@@ -658,7 +585,6 @@ impl LayeredTimeline {
            disk_consistent_lsn: AtomicLsn::new(metadata.disk_consistent_lsn().0),

            last_freeze_at: AtomicLsn::new(metadata.disk_consistent_lsn().0),
-            last_freeze_ts: RwLock::new(Instant::now()),

            ancestor_timeline: ancestor,
            ancestor_lsn: metadata.ancestor_lsn(),
@@ -692,7 +618,6 @@ impl LayeredTimeline {
            repartition_threshold: 0,

            last_received_wal: Mutex::new(None),
-            rel_size_cache: RwLock::new(HashMap::new()),
        };
        result.repartition_threshold = result.get_checkpoint_distance() / 10;
        result
@@ -1104,11 +1029,8 @@ impl LayeredTimeline {
    }

    ///
-    /// Check if more than 'checkpoint_distance' of WAL has been accumulated in
-    /// the in-memory layer, and initiate flushing it if so.
-    ///
-    /// Also flush after a period of time without new data -- it helps
-    /// safekeepers to regard pageserver as caught up and suspend activity.
+    /// Check if more than 'checkpoint_distance' of WAL has been accumulated
+    /// in the in-memory layer, and initiate flushing it if so.
    ///
    pub fn check_checkpoint_distance(self: &Arc<LayeredTimeline>) -> Result<()> {
        let last_lsn = self.get_last_record_lsn();
@@ -1116,27 +1038,21 @@ impl LayeredTimeline {
        if let Some(open_layer) = &layers.open_layer {
            let open_layer_size = open_layer.size()?;
            drop(layers);
-            let last_freeze_at = self.last_freeze_at.load();
-            let last_freeze_ts = *(self.last_freeze_ts.read().unwrap());
-            let distance = last_lsn.widening_sub(last_freeze_at);
+            let distance = last_lsn.widening_sub(self.last_freeze_at.load());
            // Checkpointing the open layer can be triggered by layer size or LSN range.
            // S3 has a 5 GB limit on the size of one upload (without multi-part upload), and
            // we want to stay below that with a big margin.  The LSN distance determines how
            // much WAL the safekeepers need to store.
            if distance >= self.get_checkpoint_distance().into()
                || open_layer_size > self.get_checkpoint_distance()
-                || (distance > 0 && last_freeze_ts.elapsed() >= self.get_checkpoint_timeout())
            {
                info!(
-                    "check_checkpoint_distance {}, layer size {}, elapsed since last flush {:?}",
-                    distance,
-                    open_layer_size,
-                    last_freeze_ts.elapsed()
+                    "check_checkpoint_distance {}, layer size {}",
+                    distance, open_layer_size
                );

                self.freeze_inmem_layer(true);
                self.last_freeze_at.store(last_lsn);
-                *(self.last_freeze_ts.write().unwrap()) = Instant::now();

                // Launch a thread to flush the frozen layer to disk, unless
                // a thread was already running. (If the thread was running
--- a/pageserver/src/lib.rs
+++ b/pageserver/src/lib.rs
@@ -22,7 +22,7 @@ pub mod walreceiver;
 pub mod walrecord;
 pub mod walredo;

-use once_cell::sync::Lazy;
+use lazy_static::lazy_static;
 use tracing::info;

 use crate::thread_mgr::ThreadKind;
@@ -42,14 +42,14 @@ pub const STORAGE_FORMAT_VERSION: u16 = 3;
 pub const IMAGE_FILE_MAGIC: u16 = 0x5A60;
 pub const DELTA_FILE_MAGIC: u16 = 0x5A61;

-static LIVE_CONNECTIONS_COUNT: Lazy<IntGaugeVec> = Lazy::new(|| {
-    register_int_gauge_vec!(
+lazy_static! {
+    static ref LIVE_CONNECTIONS_COUNT: IntGaugeVec = register_int_gauge_vec!(
        "pageserver_live_connections",
        "Number of live network connections",
        &["pageserver_connection_kind"]
    )
-    .expect("failed to define a metric")
-});
+    .expect("failed to define a metric");
+}

 pub const LOG_FILE_NAME: &str = "pageserver.log";

@@ -93,56 +93,3 @@ pub fn shutdown_pageserver(exit_code: i32) {
    info!("Shut down successfully completed");
    std::process::exit(exit_code);
 }
-
-const DEFAULT_BASE_BACKOFF_SECONDS: f64 = 0.1;
-const DEFAULT_MAX_BACKOFF_SECONDS: f64 = 3.0;
-
-async fn exponential_backoff(n: u32, base_increment: f64, max_seconds: f64) {
-    let backoff_duration_seconds =
-        exponential_backoff_duration_seconds(n, base_increment, max_seconds);
-    if backoff_duration_seconds > 0.0 {
-        info!(
-            "Backoff: waiting {backoff_duration_seconds} seconds before processing with the task",
-        );
-        tokio::time::sleep(std::time::Duration::from_secs_f64(backoff_duration_seconds)).await;
-    }
-}
-
-fn exponential_backoff_duration_seconds(n: u32, base_increment: f64, max_seconds: f64) -> f64 {
-    if n == 0 {
-        0.0
-    } else {
-        (1.0 + base_increment).powf(f64::from(n)).min(max_seconds)
-    }
-}
-
-#[cfg(test)]
-mod backoff_defaults_tests {
-    use super::*;
-
-    #[test]
-    fn backoff_defaults_produce_growing_backoff_sequence() {
-        let mut current_backoff_value = None;
-
-        for i in 0..10_000 {
-            let new_backoff_value = exponential_backoff_duration_seconds(
-                i,
-                DEFAULT_BASE_BACKOFF_SECONDS,
-                DEFAULT_MAX_BACKOFF_SECONDS,
-            );
-
-            if let Some(old_backoff_value) = current_backoff_value.replace(new_backoff_value) {
-                assert!(
-                    old_backoff_value <= new_backoff_value,
-                    "{i}th backoff value {new_backoff_value} is smaller than the previous one {old_backoff_value}"
-                )
-            }
-        }
-
-        assert_eq!(
-            current_backoff_value.expect("Should have produced backoff values to compare"),
-            DEFAULT_MAX_BACKOFF_SECONDS,
-            "Given big enough of retries, backoff should reach its allowed max value"
-        );
-    }
-}
--- a/pageserver/src/page_cache.rs
+++ b/pageserver/src/page_cache.rs
@@ -55,6 +55,7 @@ use utils::{
 use crate::layered_repository::writeback_ephemeral_file;
 use crate::repository::Key;

+// TODO move ownership into a new PageserverState struct
 static PAGE_CACHE: OnceCell<PageCache> = OnceCell::new();
 const TEST_PAGE_CACHE_SIZE: usize = 50;

--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -11,7 +11,7 @@

 use anyhow::{bail, ensure, Context, Result};
 use bytes::{Buf, BufMut, Bytes, BytesMut};
-use once_cell::sync::Lazy;
+use lazy_static::lazy_static;
 use regex::Regex;
 use std::io::{self, Read};
 use std::net::TcpListener;
@@ -434,15 +434,15 @@ const TIME_BUCKETS: &[f64] = &[
    0.1,  // 1/10 s
 ];

-static SMGR_QUERY_TIME: Lazy<HistogramVec> = Lazy::new(|| {
-    register_histogram_vec!(
+lazy_static! {
+    static ref SMGR_QUERY_TIME: HistogramVec = register_histogram_vec!(
        "pageserver_smgr_query_seconds",
        "Time spent on smgr query handling",
        &["smgr_query_type", "tenant_id", "timeline_id"],
        TIME_BUCKETS.into()
    )
-    .expect("failed to define a metric")
-});
+    .expect("failed to define a metric");
+}

 impl PageServerHandler {
    pub fn new(conf: &'static PageServerConf, auth: Option<Arc<JwtAuth>>) -> Self {
@@ -1044,7 +1044,6 @@ impl postgres_backend::Handler for PageServerHandler {
            let repo = tenant_mgr::get_repository_for_tenant(tenantid)?;
            pgb.write_message_noflush(&BeMessage::RowDescription(&[
                RowDescriptor::int8_col(b"checkpoint_distance"),
-                RowDescriptor::int8_col(b"checkpoint_timeout"),
                RowDescriptor::int8_col(b"compaction_target_size"),
                RowDescriptor::int8_col(b"compaction_period"),
                RowDescriptor::int8_col(b"compaction_threshold"),
@@ -1055,12 +1054,6 @@ impl postgres_backend::Handler for PageServerHandler {
            ]))?
            .write_message_noflush(&BeMessage::DataRow(&[
                Some(repo.get_checkpoint_distance().to_string().as_bytes()),
-                Some(
-                    repo.get_checkpoint_timeout()
-                        .as_secs()
-                        .to_string()
-                        .as_bytes(),
-                ),
                Some(repo.get_compaction_target_size().to_string().as_bytes()),
                Some(
                    repo.get_compaction_period()
--- a/pageserver/src/pgdatadir_mapping.rs
+++ b/pageserver/src/pgdatadir_mapping.rs
@@ -56,16 +56,13 @@ pub trait DatadirTimeline: Timeline {
    /// This provides a transaction-like interface to perform a bunch
    /// of modifications atomically.
    ///
-    /// To ingest a WAL record, call begin_modification(lsn) to get a
+    /// To ingest a WAL record, call begin_modification() to get a
    /// DatadirModification object. Use the functions in the object to
    /// modify the repository state, updating all the pages and metadata
-    /// that the WAL record affects. When you're done, call commit() to
-    /// commit the changes.
+    /// that the WAL record affects. When you're done, call commit(lsn) to
+    /// commit the changes. All the changes will be stamped with the specified LSN.
    ///
-    /// Lsn stored in modification is advanced by `ingest_record` and
-    /// is used by `commit()` to update `last_record_lsn`.
-    ///
-    /// Calling commit() will flush all the changes and reset the state,
+    /// Calling commit(lsn) will flush all the changes and reset the state,
    /// so the `DatadirModification` struct can be reused to perform the next modification.
    ///
    /// Note that any pending modifications you make through the
@@ -73,7 +70,7 @@ pub trait DatadirTimeline: Timeline {
    /// functions of the timeline until you finish! And if you update the
    /// same page twice, the last update wins.
    ///
-    fn begin_modification(&self, lsn: Lsn) -> DatadirModification<Self>
+    fn begin_modification(&self) -> DatadirModification<Self>
    where
        Self: Sized,
    {
@@ -82,7 +79,6 @@ pub trait DatadirTimeline: Timeline {
            pending_updates: HashMap::new(),
            pending_deletions: Vec::new(),
            pending_nblocks: 0,
-            lsn,
        }
    }

@@ -124,10 +120,6 @@ pub trait DatadirTimeline: Timeline {
    fn get_rel_size(&self, tag: RelTag, lsn: Lsn) -> Result<BlockNumber> {
        ensure!(tag.relnode != 0, "invalid relnode");

-        if let Some(nblocks) = self.get_cached_rel_size(&tag, lsn) {
-            return Ok(nblocks);
-        }
-
        if (tag.forknum == pg_constants::FSM_FORKNUM
            || tag.forknum == pg_constants::VISIBILITYMAP_FORKNUM)
            && !self.get_rel_exists(tag, lsn)?
@@ -141,21 +133,13 @@ pub trait DatadirTimeline: Timeline {

        let key = rel_size_to_key(tag);
        let mut buf = self.get(key, lsn)?;
-        let nblocks = buf.get_u32_le();
-
-        // Update relation size cache
-        self.update_cached_rel_size(tag, lsn, nblocks);
-        Ok(nblocks)
+        Ok(buf.get_u32_le())
    }

    /// Does relation exist?
    fn get_rel_exists(&self, tag: RelTag, lsn: Lsn) -> Result<bool> {
        ensure!(tag.relnode != 0, "invalid relnode");

-        // first try to lookup relation in cache
-        if let Some(_nblocks) = self.get_cached_rel_size(&tag, lsn) {
-            return Ok(true);
-        }
        // fetch directory listing
        let key = rel_dir_to_key(tag.spcnode, tag.dbnode);
        let buf = self.get(key, lsn)?;
@@ -461,18 +445,6 @@ pub trait DatadirTimeline: Timeline {

        Ok(result.to_keyspace())
    }
-
-    /// Get cached size of relation if it not updated after specified LSN
-    fn get_cached_rel_size(&self, tag: &RelTag, lsn: Lsn) -> Option<BlockNumber>;
-
-    /// Update cached relation size if there is no more recent update
-    fn update_cached_rel_size(&self, tag: RelTag, lsn: Lsn, nblocks: BlockNumber);
-
-    /// Store cached relation size
-    fn set_cached_rel_size(&self, tag: RelTag, lsn: Lsn, nblocks: BlockNumber);
-
-    /// Remove cached relation size
-    fn remove_cached_rel_size(&self, tag: &RelTag);
 }

 /// DatadirModification represents an operation to ingest an atomic set of
@@ -485,9 +457,6 @@ pub struct DatadirModification<'a, T: DatadirTimeline> {
    /// in the state in 'tline' yet.
    pub tline: &'a T,

-    /// Lsn assigned by begin_modification
-    pub lsn: Lsn,
-
    // The modifications are not applied directly to the underlying key-value store.
    // The put-functions add the modifications here, and they are flushed to the
    // underlying key-value store by the 'finish' function.
@@ -697,36 +666,26 @@ impl<'a, T: DatadirTimeline> DatadirModification<'a, T> {

        self.pending_nblocks += nblocks as isize;

-        // Update relation size cache
-        self.tline.set_cached_rel_size(rel, self.lsn, nblocks);
-
        // Even if nblocks > 0, we don't insert any actual blocks here. That's up to the
        // caller.
+
        Ok(())
    }

    /// Truncate relation
    pub fn put_rel_truncation(&mut self, rel: RelTag, nblocks: BlockNumber) -> Result<()> {
        ensure!(rel.relnode != 0, "invalid relnode");
-        let last_lsn = self.tline.get_last_record_lsn();
-        if self.tline.get_rel_exists(rel, last_lsn)? {
-            let size_key = rel_size_to_key(rel);
-            // Fetch the old size first
-            let old_size = self.get(size_key)?.get_u32_le();
+        let size_key = rel_size_to_key(rel);

-            // Update the entry with the new size.
-            let buf = nblocks.to_le_bytes();
-            self.put(size_key, Value::Image(Bytes::from(buf.to_vec())));
+        // Fetch the old size first
+        let old_size = self.get(size_key)?.get_u32_le();

-            // Update relation size cache
-            self.tline.set_cached_rel_size(rel, self.lsn, nblocks);
+        // Update the entry with the new size.
+        let buf = nblocks.to_le_bytes();
+        self.put(size_key, Value::Image(Bytes::from(buf.to_vec())));

-            // Update relation size cache
-            self.tline.set_cached_rel_size(rel, self.lsn, nblocks);
-
-            // Update logical database size.
-            self.pending_nblocks -= old_size as isize - nblocks as isize;
-        }
+        // Update logical database size.
+        self.pending_nblocks -= old_size as isize - nblocks as isize;
        Ok(())
    }

@@ -744,9 +703,6 @@ impl<'a, T: DatadirTimeline> DatadirModification<'a, T> {
            let buf = nblocks.to_le_bytes();
            self.put(size_key, Value::Image(Bytes::from(buf.to_vec())));

-            // Update relation size cache
-            self.tline.set_cached_rel_size(rel, self.lsn, nblocks);
-
            self.pending_nblocks += nblocks as isize - old_size as isize;
        }
        Ok(())
@@ -772,9 +728,6 @@ impl<'a, T: DatadirTimeline> DatadirModification<'a, T> {
        let old_size = self.get(size_key)?.get_u32_le();
        self.pending_nblocks -= old_size as isize;

-        // Remove enty from relation size cache
-        self.tline.remove_cached_rel_size(&rel);
-
        // Delete size entry, as well as all blocks
        self.delete(rel_key_range(rel));

@@ -889,7 +842,7 @@ impl<'a, T: DatadirTimeline> DatadirModification<'a, T> {
    /// retains all the metadata, but data pages are flushed. That's again OK
    /// for bulk import, where you are just loading data pages and won't try to
    /// modify the same pages twice.
-    pub fn flush(&mut self) -> Result<()> {
+    pub fn flush(&mut self, lsn: Lsn) -> Result<()> {
        // Unless we have accumulated a decent amount of changes, it's not worth it
        // to scan through the pending_updates list.
        let pending_nblocks = self.pending_nblocks;
@@ -903,7 +856,7 @@ impl<'a, T: DatadirTimeline> DatadirModification<'a, T> {
        let mut result: Result<()> = Ok(());
        self.pending_updates.retain(|&key, value| {
            if result.is_ok() && (is_rel_block_key(key) || is_slru_block_key(key)) {
-                result = writer.put(key, self.lsn, value);
+                result = writer.put(key, lsn, value);
                false
            } else {
                true
@@ -924,9 +877,9 @@ impl<'a, T: DatadirTimeline> DatadirModification<'a, T> {
    /// underlying timeline.
    /// All the modifications in this atomic update are stamped by the specified LSN.
    ///
-    pub fn commit(&mut self) -> Result<()> {
+    pub fn commit(&mut self, lsn: Lsn) -> Result<()> {
        let writer = self.tline.writer();
-        let lsn = self.lsn;
+
        let pending_nblocks = self.pending_nblocks;
        self.pending_nblocks = 0;

@@ -966,8 +919,8 @@ impl<'a, T: DatadirTimeline> DatadirModification<'a, T> {
                bail!("unexpected pending WAL record");
            }
        } else {
-            let lsn = Lsn::max(self.tline.get_last_record_lsn(), self.lsn);
-            self.tline.get(key, lsn)
+            let last_lsn = self.tline.get_last_record_lsn();
+            self.tline.get(key, last_lsn)
        }
    }

@@ -1371,9 +1324,9 @@ pub fn create_test_timeline<R: Repository>(
    timeline_id: utils::zid::ZTimelineId,
 ) -> Result<std::sync::Arc<R::Timeline>> {
    let tline = repo.create_empty_timeline(timeline_id, Lsn(8))?;
-    let mut m = tline.begin_modification(Lsn(8));
+    let mut m = tline.begin_modification();
    m.init_empty()?;
-    m.commit()?;
+    m.commit(Lsn(8))?;
    Ok(tline)
 }

--- a/pageserver/src/repository.rs
+++ b/pageserver/src/repository.rs
@@ -408,7 +408,7 @@ pub trait TimelineWriter<'a> {
 #[cfg(test)]
 pub mod repo_harness {
    use bytes::BytesMut;
-    use once_cell::sync::Lazy;
+    use lazy_static::lazy_static;
    use std::sync::{Arc, RwLock, RwLockReadGuard, RwLockWriteGuard};
    use std::{fs, path::PathBuf};

@@ -439,13 +439,14 @@ pub mod repo_harness {
        buf.freeze()
    }

-    static LOCK: Lazy<RwLock<()>> = Lazy::new(|| RwLock::new(()));
+    lazy_static! {
+        static ref LOCK: RwLock<()> = RwLock::new(());
+    }

    impl From<TenantConf> for TenantConfOpt {
        fn from(tenant_conf: TenantConf) -> Self {
            Self {
                checkpoint_distance: Some(tenant_conf.checkpoint_distance),
-                checkpoint_timeout: Some(tenant_conf.checkpoint_timeout),
                compaction_target_size: Some(tenant_conf.compaction_target_size),
                compaction_period: Some(tenant_conf.compaction_period),
                compaction_threshold: Some(tenant_conf.compaction_threshold),
@@ -588,10 +589,11 @@ mod tests {
    //use std::sync::Arc;
    use bytes::BytesMut;
    use hex_literal::hex;
-    use once_cell::sync::Lazy;
+    use lazy_static::lazy_static;

-    static TEST_KEY: Lazy<Key> =
-        Lazy::new(|| Key::from_slice(&hex!("112222222233333333444444445500000001")));
+    lazy_static! {
+        static ref TEST_KEY: Key = Key::from_slice(&hex!("112222222233333333444444445500000001"));
+    }

    #[test]
    fn test_basic() -> Result<()> {
--- a/pageserver/src/storage_sync.rs
+++ b/pageserver/src/storage_sync.rs
@@ -155,7 +155,8 @@ use std::{

 use anyhow::{anyhow, bail, Context};
 use futures::stream::{FuturesUnordered, StreamExt};
-use once_cell::sync::{Lazy, OnceCell};
+use lazy_static::lazy_static;
+use once_cell::sync::OnceCell;
 use remote_storage::{GenericRemoteStorage, RemoteStorage};
 use tokio::{
    fs,
@@ -172,7 +173,6 @@ use self::{
 };
 use crate::{
    config::PageServerConf,
-    exponential_backoff,
    layered_repository::{
        ephemeral_file::is_ephemeral_file,
        metadata::{metadata_path, TimelineMetadata, METADATA_FILE_NAME},
@@ -184,8 +184,8 @@ use crate::{
 };

 use metrics::{
-    register_histogram_vec, register_int_counter_vec, register_int_gauge, HistogramVec,
-    IntCounterVec, IntGauge,
+    register_histogram_vec, register_int_counter, register_int_counter_vec, register_int_gauge,
+    HistogramVec, IntCounter, IntCounterVec, IntGauge,
 };
 use utils::zid::{ZTenantId, ZTenantTimelineId, ZTimelineId};

@@ -193,34 +193,34 @@ use self::download::download_index_parts;
 pub use self::download::gather_tenant_timelines_index_parts;
 pub use self::download::TEMP_DOWNLOAD_EXTENSION;

-static REMAINING_SYNC_ITEMS: Lazy<IntGauge> = Lazy::new(|| {
-    register_int_gauge!(
+lazy_static! {
+    static ref REMAINING_SYNC_ITEMS: IntGauge = register_int_gauge!(
        "pageserver_remote_storage_remaining_sync_items",
        "Number of storage sync items left in the queue"
    )
-    .expect("failed to register pageserver remote storage remaining sync items int gauge")
-});
-
-static IMAGE_SYNC_TIME: Lazy<HistogramVec> = Lazy::new(|| {
-    register_histogram_vec!(
+    .expect("failed to register pageserver remote storage remaining sync items int gauge");
+    static ref FATAL_TASK_FAILURES: IntCounter = register_int_counter!(
+        "pageserver_remote_storage_fatal_task_failures_total",
+        "Number of critically failed tasks"
+    )
+    .expect("failed to register pageserver remote storage remaining sync items int gauge");
+    static ref IMAGE_SYNC_TIME: HistogramVec = register_histogram_vec!(
        "pageserver_remote_storage_image_sync_seconds",
        "Time took to synchronize (download or upload) a whole pageserver image. \
        Grouped by tenant and timeline ids, `operation_kind` (upload|download) and `status` (success|failure)",
        &["tenant_id", "timeline_id", "operation_kind", "status"],
        vec![0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 3.0, 10.0, 20.0]
    )
-    .expect("failed to register pageserver image sync time histogram vec")
-});
-
-static REMOTE_INDEX_UPLOAD: Lazy<IntCounterVec> = Lazy::new(|| {
-    register_int_counter_vec!(
+    .expect("failed to register pageserver image sync time histogram vec");
+    static ref REMOTE_INDEX_UPLOAD: IntCounterVec = register_int_counter_vec!(
        "pageserver_remote_storage_remote_index_uploads_total",
        "Number of remote index uploads",
        &["tenant_id", "timeline_id"],
    )
-    .expect("failed to register pageserver remote index upload vec")
-});
+    .expect("failed to register pageserver remote index upload vec");
+}

+// TODO move ownership into a new PageserverState struct
 static SYNC_QUEUE: OnceCell<SyncQueue> = OnceCell::new();

 /// A timeline status to share with pageserver's sync counterpart,
@@ -970,19 +970,14 @@ fn storage_sync_loop<P, S>(
    }
 }

+// needed to check whether the download happened
+// more informative than just a bool
 #[derive(Debug)]
-enum DownloadStatus {
+enum DownloadMarker {
    Downloaded,
    Nothing,
 }

-#[derive(Debug)]
-enum UploadStatus {
-    Uploaded,
-    Failed,
-    Nothing,
-}
-
 async fn process_batches<P, S>(
    conf: &'static PageServerConf,
    max_sync_errors: NonZeroU32,
@@ -1022,7 +1017,7 @@ where
            "Finished storage sync task for sync id {sync_id} download marker {:?}",
            download_marker
        );
-        if matches!(download_marker, DownloadStatus::Downloaded) {
+        if matches!(download_marker, DownloadMarker::Downloaded) {
            downloaded_timelines.insert(sync_id.tenant_id);
        }
    }
@@ -1036,7 +1031,7 @@ async fn process_sync_task_batch<P, S>(
    max_sync_errors: NonZeroU32,
    sync_id: ZTenantTimelineId,
    batch: SyncTaskBatch,
-) -> DownloadStatus
+) -> DownloadMarker
 where
    P: Debug + Send + Sync + 'static,
    S: RemoteStorage<RemoteObjectId = P> + Send + Sync + 'static,
@@ -1053,7 +1048,7 @@ where
    // When operating in a system without tasks failing over the error threshold,
    // current batching and task processing systems aim to update the layer set and metadata files (remote and local),
    // without "losing" such layer files.
-    let (upload_status, download_status) = tokio::join!(
+    let (upload_result, status_update) = tokio::join!(
        async {
            if let Some(upload_data) = upload_data {
                match validate_task_retries(upload_data, max_sync_errors)
@@ -1071,7 +1066,7 @@ where
                            "upload",
                        )
                        .await;
-                        UploadStatus::Uploaded
+                        return Some(());
                    }
                    ControlFlow::Break(failed_upload_data) => {
                        if let Err(e) = update_remote_data(
@@ -1088,13 +1083,10 @@ where
                        {
                            error!("Failed to update remote timeline {sync_id}: {e:?}");
                        }
-
-                        UploadStatus::Failed
                    }
                }
-            } else {
-                UploadStatus::Nothing
            }
+            None
        }
        .instrument(info_span!("upload_timeline_data")),
        async {
@@ -1124,53 +1116,51 @@ where
                    }
                }
            }
-            DownloadStatus::Nothing
+            DownloadMarker::Nothing
        }
        .instrument(info_span!("download_timeline_data")),
    );

-    if let Some(delete_data) = batch.delete {
-        match upload_status {
-            UploadStatus::Uploaded | UploadStatus::Nothing => {
-                match validate_task_retries(delete_data, max_sync_errors)
-                    .instrument(info_span!("retries_validation"))
+    if let Some(mut delete_data) = batch.delete {
+        if upload_result.is_some() {
+            match validate_task_retries(delete_data, max_sync_errors)
+                .instrument(info_span!("retries_validation"))
+                .await
+            {
+                ControlFlow::Continue(new_delete_data) => {
+                    delete_timeline_data(
+                        conf,
+                        (storage.as_ref(), &index, sync_queue),
+                        sync_id,
+                        new_delete_data,
+                        sync_start,
+                        "delete",
+                    )
+                    .instrument(info_span!("delete_timeline_data"))
+                    .await;
+                }
+                ControlFlow::Break(failed_delete_data) => {
+                    if let Err(e) = update_remote_data(
+                        conf,
+                        storage.as_ref(),
+                        &index,
+                        sync_id,
+                        RemoteDataUpdate::Delete(&failed_delete_data.data.deleted_layers),
+                    )
                    .await
-                {
-                    ControlFlow::Continue(new_delete_data) => {
-                        delete_timeline_data(
-                            conf,
-                            (storage.as_ref(), &index, sync_queue),
-                            sync_id,
-                            new_delete_data,
-                            sync_start,
-                            "delete",
-                        )
-                        .instrument(info_span!("delete_timeline_data"))
-                        .await;
-                    }
-                    ControlFlow::Break(failed_delete_data) => {
-                        if let Err(e) = update_remote_data(
-                            conf,
-                            storage.as_ref(),
-                            &index,
-                            sync_id,
-                            RemoteDataUpdate::Delete(&failed_delete_data.data.deleted_layers),
-                        )
-                        .await
-                        {
-                            error!("Failed to update remote timeline {sync_id}: {e:?}");
-                        }
+                    {
+                        error!("Failed to update remote timeline {sync_id}: {e:?}");
                    }
                }
            }
-            UploadStatus::Failed => {
-                warn!("Skipping delete task due to failed upload tasks, reenqueuing");
-                sync_queue.push(sync_id, SyncTask::Delete(delete_data));
-            }
+        } else {
+            delete_data.retries += 1;
+            sync_queue.push(sync_id, SyncTask::Delete(delete_data));
+            warn!("Skipping delete task due to failed upload tasks, reenqueuing");
        }
    }

-    download_status
+    status_update
 }

 async fn download_timeline_data<P, S>(
@@ -1181,7 +1171,7 @@ async fn download_timeline_data<P, S>(
    new_download_data: SyncData<LayersDownload>,
    sync_start: Instant,
    task_name: &str,
-) -> DownloadStatus
+) -> DownloadMarker
 where
    P: Debug + Send + Sync + 'static,
    S: RemoteStorage<RemoteObjectId = P> + Send + Sync + 'static,
@@ -1210,7 +1200,7 @@ where
                Ok(()) => match index.write().await.set_awaits_download(&sync_id, false) {
                    Ok(()) => {
                        register_sync_status(sync_id, sync_start, task_name, Some(true));
-                        return DownloadStatus::Downloaded;
+                        return DownloadMarker::Downloaded;
                    }
                    Err(e) => {
                        error!("Timeline {sync_id} was expected to be in the remote index after a successful download, but it's absent: {e:?}");
@@ -1226,7 +1216,7 @@ where
        }
    }

-    DownloadStatus::Nothing
+    DownloadMarker::Nothing
 }

 async fn update_local_metadata(
@@ -1504,7 +1494,11 @@ async fn validate_task_retries<T>(
        return ControlFlow::Break(sync_data);
    }

-    exponential_backoff(current_attempt, 1.0, 30.0).await;
+    if current_attempt > 0 {
+        let seconds_to_wait = 2.0_f64.powf(current_attempt as f64 - 1.0).min(30.0);
+        info!("Waiting {seconds_to_wait} seconds before starting the task");
+        tokio::time::sleep(Duration::from_secs_f64(seconds_to_wait)).await;
+    }
    ControlFlow::Continue(sync_data)
 }

--- a/pageserver/src/storage_sync/download.rs
+++ b/pageserver/src/storage_sync/download.rs
@@ -130,7 +130,6 @@ where
            tenant_path.display()
        )
    })?;
-
    let timelines = storage
        .list_prefixes(Some(tenant_storage_path))
        .await
@@ -141,13 +140,6 @@ where
            )
        })?;

-    if timelines.is_empty() {
-        anyhow::bail!(
-            "no timelines found on the remote storage for tenant {}",
-            tenant_id
-        )
-    }
-
    let mut sync_ids = HashSet::new();

    for timeline_remote_storage_key in timelines {
--- a/pageserver/src/storage_sync/upload.rs
+++ b/pageserver/src/storage_sync/upload.rs
@@ -4,7 +4,7 @@ use std::{fmt::Debug, path::PathBuf};

 use anyhow::Context;
 use futures::stream::{FuturesUnordered, StreamExt};
-use once_cell::sync::Lazy;
+use lazy_static::lazy_static;
 use remote_storage::RemoteStorage;
 use tokio::fs;
 use tracing::{debug, error, info, warn};
@@ -20,14 +20,14 @@ use crate::{
 };
 use metrics::{register_int_counter_vec, IntCounterVec};

-static NO_LAYERS_UPLOAD: Lazy<IntCounterVec> = Lazy::new(|| {
-    register_int_counter_vec!(
+lazy_static! {
+    static ref NO_LAYERS_UPLOAD: IntCounterVec = register_int_counter_vec!(
        "pageserver_remote_storage_no_layers_uploads_total",
        "Number of skipped uploads due to no layers",
        &["tenant_id", "timeline_id"],
    )
-    .expect("failed to register pageserver no layers upload vec")
-});
+    .expect("failed to register pageserver no layers upload vec");
+}

 /// Serializes and uploads the given index part data to the remote storage.
 pub(super) async fn upload_index_part<P, S>(
--- a/pageserver/src/tenant_config.rs
+++ b/pageserver/src/tenant_config.rs
@@ -23,7 +23,6 @@ pub mod defaults {
    // which is good for now to trigger bugs.
    // This parameter actually determines L0 layer file size.
    pub const DEFAULT_CHECKPOINT_DISTANCE: u64 = 256 * 1024 * 1024;
-    pub const DEFAULT_CHECKPOINT_TIMEOUT: &str = "10 m";

    // Target file size, when creating image and delta layers.
    // This parameter determines L1 layer file size.
@@ -49,9 +48,6 @@ pub struct TenantConf {
    // page server crashes.
    // This parameter actually determines L0 layer file size.
    pub checkpoint_distance: u64,
-    // Inmemory layer is also flushed at least once in checkpoint_timeout to
-    // eventually upload WAL after activity is stopped.
-    pub checkpoint_timeout: Duration,
    // Target file size, when creating image and delta layers.
    // This parameter determines L1 layer file size.
    pub compaction_target_size: u64,
@@ -94,7 +90,6 @@ pub struct TenantConf {
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
 pub struct TenantConfOpt {
    pub checkpoint_distance: Option<u64>,
-    pub checkpoint_timeout: Option<Duration>,
    pub compaction_target_size: Option<u64>,
    #[serde(with = "humantime_serde")]
    pub compaction_period: Option<Duration>,
@@ -118,9 +113,6 @@ impl TenantConfOpt {
            checkpoint_distance: self
                .checkpoint_distance
                .unwrap_or(global_conf.checkpoint_distance),
-            checkpoint_timeout: self
-                .checkpoint_timeout
-                .unwrap_or(global_conf.checkpoint_timeout),
            compaction_target_size: self
                .compaction_target_size
                .unwrap_or(global_conf.compaction_target_size),
@@ -150,9 +142,6 @@ impl TenantConfOpt {
        if let Some(checkpoint_distance) = other.checkpoint_distance {
            self.checkpoint_distance = Some(checkpoint_distance);
        }
-        if let Some(checkpoint_timeout) = other.checkpoint_timeout {
-            self.checkpoint_timeout = Some(checkpoint_timeout);
-        }
        if let Some(compaction_target_size) = other.compaction_target_size {
            self.compaction_target_size = Some(compaction_target_size);
        }
@@ -192,8 +181,6 @@ impl TenantConf {

        TenantConf {
            checkpoint_distance: DEFAULT_CHECKPOINT_DISTANCE,
-            checkpoint_timeout: humantime::parse_duration(DEFAULT_CHECKPOINT_TIMEOUT)
-                .expect("cannot parse default checkpoint timeout"),
            compaction_target_size: DEFAULT_COMPACTION_TARGET_SIZE,
            compaction_period: humantime::parse_duration(DEFAULT_COMPACTION_PERIOD)
                .expect("cannot parse default compaction period"),
@@ -225,7 +212,6 @@ impl TenantConf {
    pub fn dummy_conf() -> Self {
        TenantConf {
            checkpoint_distance: defaults::DEFAULT_CHECKPOINT_DISTANCE,
-            checkpoint_timeout: Duration::from_secs(600),
            compaction_target_size: 4 * 1024 * 1024,
            compaction_period: Duration::from_secs(10),
            compaction_threshold: defaults::DEFAULT_COMPACTION_THRESHOLD,
--- a/pageserver/src/tenant_mgr.rs
+++ b/pageserver/src/tenant_mgr.rs
@@ -25,27 +25,26 @@ use utils::lsn::Lsn;

 use utils::zid::{ZTenantId, ZTenantTimelineId, ZTimelineId};

+// TODO move ownership into a new PageserverState struct
 mod tenants_state {
    use anyhow::ensure;
-    use once_cell::sync::Lazy;
    use std::{
        collections::HashMap,
        sync::{RwLock, RwLockReadGuard, RwLockWriteGuard},
    };
    use tokio::sync::mpsc;
    use tracing::{debug, error};
+
    use utils::zid::ZTenantId;

    use crate::tenant_mgr::{LocalTimelineUpdate, Tenant};

-    static TENANTS: Lazy<RwLock<HashMap<ZTenantId, Tenant>>> =
-        Lazy::new(|| RwLock::new(HashMap::new()));
-
-    /// Sends updates to the local timelines (creation and deletion) to the WAL receiver,
-    /// so that it can enable/disable corresponding processes.
-    static TIMELINE_UPDATE_SENDER: Lazy<
-        RwLock<Option<mpsc::UnboundedSender<LocalTimelineUpdate>>>,
-    > = Lazy::new(|| RwLock::new(None));
+    lazy_static::lazy_static! {
+        static ref TENANTS: RwLock<HashMap<ZTenantId, Tenant>> = RwLock::new(HashMap::new());
+        /// Sends updates to the local timelines (creation and deletion) to the WAL receiver,
+        /// so that it can enable/disable corresponding processes.
+        static ref TIMELINE_UPDATE_SENDER: RwLock<Option<mpsc::UnboundedSender<LocalTimelineUpdate>>> = RwLock::new(None);
+    }

    pub(super) fn read_tenants() -> RwLockReadGuard<'static, HashMap<ZTenantId, Tenant>> {
        TENANTS
--- a/pageserver/src/tenant_tasks.rs
+++ b/pageserver/src/tenant_tasks.rs
@@ -87,6 +87,7 @@ async fn compaction_loop(tenantid: ZTenantId, mut cancel: watch::Receiver<()>) {
    );
 }

+// TODO move ownership into a new PageserverState struct
 static START_GC_LOOP: OnceCell<mpsc::Sender<ZTenantId>> = OnceCell::new();
 static START_COMPACTION_LOOP: OnceCell<mpsc::Sender<ZTenantId>> = OnceCell::new();

--- a/pageserver/src/thread_mgr.rs
+++ b/pageserver/src/thread_mgr.rs
@@ -45,20 +45,22 @@ use tokio::sync::watch;

 use tracing::{debug, error, info, warn};

-use once_cell::sync::Lazy;
+use lazy_static::lazy_static;

 use utils::zid::{ZTenantId, ZTimelineId};

 use crate::shutdown_pageserver;

-/// Each thread that we track is associated with a "thread ID". It's just
-/// an increasing number that we assign, not related to any system thread
-/// id.
-static NEXT_THREAD_ID: Lazy<AtomicU64> = Lazy::new(|| AtomicU64::new(1));
+// TODO move ownership into a new PageserverState struct
+lazy_static! {
+    /// Each thread that we track is associated with a "thread ID". It's just
+    /// an increasing number that we assign, not related to any system thread
+    /// id.
+    static ref NEXT_THREAD_ID: AtomicU64 = AtomicU64::new(1);

-/// Global registry of threads
-static THREADS: Lazy<Mutex<HashMap<u64, Arc<PageServerThread>>>> =
-    Lazy::new(|| Mutex::new(HashMap::new()));
+    /// Global registry of threads
+    static ref THREADS: Mutex<HashMap<u64, Arc<PageServerThread>>> = Mutex::new(HashMap::new());
+}

 // There is a Tokio watch channel for each thread, which can be used to signal the
 // thread that it needs to shut down. This thread local variable holds the receiving
--- a/pageserver/src/timelines.rs
+++ b/pageserver/src/timelines.rs
@@ -232,7 +232,7 @@ pub(crate) fn create_timeline(
        return Ok(None);
    }

-    match ancestor_timeline_id {
+    let _new_timeline = match ancestor_timeline_id {
        Some(ancestor_timeline_id) => {
            let ancestor_timeline = repo
                .get_timeline_load(ancestor_timeline_id)
--- a/pageserver/src/virtual_file.rs
+++ b/pageserver/src/virtual_file.rs
@@ -10,7 +10,7 @@
 //! This is similar to PostgreSQL's virtual file descriptor facility in
 //! src/backend/storage/file/fd.c
 //!
-use once_cell::sync::Lazy;
+use lazy_static::lazy_static;
 use once_cell::sync::OnceCell;
 use std::fs::{File, OpenOptions};
 use std::io::{Error, ErrorKind, Read, Seek, SeekFrom, Write};
@@ -32,24 +32,23 @@ const STORAGE_IO_TIME_BUCKETS: &[f64] = &[
    1.0,      // 1 sec
 ];

-static STORAGE_IO_TIME: Lazy<HistogramVec> = Lazy::new(|| {
-    register_histogram_vec!(
+lazy_static! {
+    static ref STORAGE_IO_TIME: HistogramVec = register_histogram_vec!(
        "pageserver_io_operations_seconds",
        "Time spent in IO operations",
        &["operation", "tenant_id", "timeline_id"],
        STORAGE_IO_TIME_BUCKETS.into()
    )
-    .expect("failed to define a metric")
-});
-
-static STORAGE_IO_SIZE: Lazy<IntGaugeVec> = Lazy::new(|| {
-    register_int_gauge_vec!(
+    .expect("failed to define a metric");
+}
+lazy_static! {
+    static ref STORAGE_IO_SIZE: IntGaugeVec = register_int_gauge_vec!(
        "pageserver_io_operations_bytes_total",
        "Total amount of bytes read/written in IO operations",
        &["operation", "tenant_id", "timeline_id"]
    )
-    .expect("failed to define a metric")
-});
+    .expect("failed to define a metric");
+}

 ///
 /// A virtual file descriptor. You can use this just like std::fs::File, but internally
--- a/pageserver/src/walingest.rs
+++ b/pageserver/src/walingest.rs
@@ -30,6 +30,8 @@ use anyhow::Result;
 use bytes::{Buf, Bytes, BytesMut};
 use tracing::*;

+use std::collections::HashMap;
+
 use crate::pgdatadir_mapping::*;
 use crate::reltag::{RelTag, SlruKind};
 use crate::walrecord::*;
@@ -46,6 +48,8 @@ pub struct WalIngest<'a, T: DatadirTimeline> {

    checkpoint: CheckPoint,
    checkpoint_modified: bool,
+
+    relsize_cache: HashMap<RelTag, BlockNumber>,
 }

 impl<'a, T: DatadirTimeline> WalIngest<'a, T> {
@@ -60,13 +64,13 @@ impl<'a, T: DatadirTimeline> WalIngest<'a, T> {
            timeline,
            checkpoint,
            checkpoint_modified: false,
+            relsize_cache: HashMap::new(),
        })
    }

    ///
    /// Decode a PostgreSQL WAL record and store it in the repository, in the given timeline.
    ///
-    /// This function updates `lsn` field of `DatadirModification`
    ///
    /// Helper function to parse a WAL record and call the Timeline's PUT functions for all the
    /// relations/pages that the record affects.
@@ -78,7 +82,6 @@ impl<'a, T: DatadirTimeline> WalIngest<'a, T> {
        modification: &mut DatadirModification<T>,
        decoded: &mut DecodedWALRecord,
    ) -> Result<()> {
-        modification.lsn = lsn;
        decode_wal_record(recdata, decoded).context("failed decoding wal record")?;

        let mut buf = decoded.record.clone();
@@ -257,7 +260,7 @@ impl<'a, T: DatadirTimeline> WalIngest<'a, T> {

        // Now that this record has been fully handled, including updating the
        // checkpoint data, let the repository know that it is up-to-date to this LSN
-        modification.commit()?;
+        modification.commit(lsn)?;

        Ok(())
    }
@@ -405,7 +408,7 @@ impl<'a, T: DatadirTimeline> WalIngest<'a, T> {
            // replaying it would fail to find the previous image of the page, because
            // it doesn't exist. So check if the VM page(s) exist, and skip the WAL
            // record if it doesn't.
-            let vm_size = self.get_relsize(vm_rel, modification.lsn)?;
+            let vm_size = self.get_relsize(vm_rel)?;
            if let Some(blknum) = new_vm_blk {
                if blknum >= vm_size {
                    new_vm_blk = None;
@@ -877,6 +880,7 @@ impl<'a, T: DatadirTimeline> WalIngest<'a, T> {
        modification: &mut DatadirModification<T>,
        rel: RelTag,
    ) -> Result<()> {
+        self.relsize_cache.insert(rel, 0);
        modification.put_rel_creation(rel, 0)?;
        Ok(())
    }
@@ -912,6 +916,7 @@ impl<'a, T: DatadirTimeline> WalIngest<'a, T> {
        nblocks: BlockNumber,
    ) -> Result<()> {
        modification.put_rel_truncation(rel, nblocks)?;
+        self.relsize_cache.insert(rel, nblocks);
        Ok(())
    }

@@ -921,16 +926,23 @@ impl<'a, T: DatadirTimeline> WalIngest<'a, T> {
        rel: RelTag,
    ) -> Result<()> {
        modification.put_rel_drop(rel)?;
+        self.relsize_cache.remove(&rel);
        Ok(())
    }

-    fn get_relsize(&mut self, rel: RelTag, lsn: Lsn) -> Result<BlockNumber> {
-        let nblocks = if !self.timeline.get_rel_exists(rel, lsn)? {
-            0
+    fn get_relsize(&mut self, rel: RelTag) -> Result<BlockNumber> {
+        if let Some(nblocks) = self.relsize_cache.get(&rel) {
+            Ok(*nblocks)
        } else {
-            self.timeline.get_rel_size(rel, lsn)?
-        };
-        Ok(nblocks)
+            let last_lsn = self.timeline.get_last_record_lsn();
+            let nblocks = if !self.timeline.get_rel_exists(rel, last_lsn)? {
+                0
+            } else {
+                self.timeline.get_rel_size(rel, last_lsn)?
+            };
+            self.relsize_cache.insert(rel, nblocks);
+            Ok(nblocks)
+        }
    }

    fn handle_rel_extend(
@@ -940,16 +952,22 @@ impl<'a, T: DatadirTimeline> WalIngest<'a, T> {
        blknum: BlockNumber,
    ) -> Result<()> {
        let new_nblocks = blknum + 1;
-        // Check if the relation exists. We implicitly create relations on first
-        // record.
-        // TODO: would be nice if to be more explicit about it
-        let last_lsn = modification.lsn;
-        let old_nblocks = if !self.timeline.get_rel_exists(rel, last_lsn)? {
-            // create it with 0 size initially, the logic below will extend it
-            modification.put_rel_creation(rel, 0)?;
-            0
+        let old_nblocks = if let Some(nblocks) = self.relsize_cache.get(&rel) {
+            *nblocks
        } else {
-            self.timeline.get_rel_size(rel, last_lsn)?
+            // Check if the relation exists. We implicitly create relations on first
+            // record.
+            // TODO: would be nice if to be more explicit about it
+            let last_lsn = self.timeline.get_last_record_lsn();
+            let nblocks = if !self.timeline.get_rel_exists(rel, last_lsn)? {
+                // create it with 0 size initially, the logic below will extend it
+                modification.put_rel_creation(rel, 0)?;
+                0
+            } else {
+                self.timeline.get_rel_size(rel, last_lsn)?
+            };
+            self.relsize_cache.insert(rel, nblocks);
+            nblocks
        };

        if new_nblocks > old_nblocks {
@@ -960,6 +978,7 @@ impl<'a, T: DatadirTimeline> WalIngest<'a, T> {
            for gap_blknum in old_nblocks..blknum {
                modification.put_rel_page_image(rel, gap_blknum, ZERO_PAGE.clone())?;
            }
+            self.relsize_cache.insert(rel, new_nblocks);
        }
        Ok(())
    }
@@ -1050,10 +1069,10 @@ mod tests {
    static ZERO_CHECKPOINT: Bytes = Bytes::from_static(&[0u8; SIZEOF_CHECKPOINT]);

    fn init_walingest_test<T: DatadirTimeline>(tline: &T) -> Result<WalIngest<T>> {
-        let mut m = tline.begin_modification(Lsn(0x10));
+        let mut m = tline.begin_modification();
        m.put_checkpoint(ZERO_CHECKPOINT.clone())?;
        m.put_relmap_file(0, 111, Bytes::from(""))?; // dummy relmapper file
-        m.commit()?;
+        m.commit(Lsn(0x10))?;
        let walingest = WalIngest::new(tline, Lsn(0x10))?;

        Ok(walingest)
@@ -1065,19 +1084,19 @@ mod tests {
        let tline = create_test_timeline(repo, TIMELINE_ID)?;
        let mut walingest = init_walingest_test(&*tline)?;

-        let mut m = tline.begin_modification(Lsn(0x20));
+        let mut m = tline.begin_modification();
        walingest.put_rel_creation(&mut m, TESTREL_A)?;
        walingest.put_rel_page_image(&mut m, TESTREL_A, 0, TEST_IMG("foo blk 0 at 2"))?;
-        m.commit()?;
-        let mut m = tline.begin_modification(Lsn(0x30));
+        m.commit(Lsn(0x20))?;
+        let mut m = tline.begin_modification();
        walingest.put_rel_page_image(&mut m, TESTREL_A, 0, TEST_IMG("foo blk 0 at 3"))?;
-        m.commit()?;
-        let mut m = tline.begin_modification(Lsn(0x40));
+        m.commit(Lsn(0x30))?;
+        let mut m = tline.begin_modification();
        walingest.put_rel_page_image(&mut m, TESTREL_A, 1, TEST_IMG("foo blk 1 at 4"))?;
-        m.commit()?;
-        let mut m = tline.begin_modification(Lsn(0x50));
+        m.commit(Lsn(0x40))?;
+        let mut m = tline.begin_modification();
        walingest.put_rel_page_image(&mut m, TESTREL_A, 2, TEST_IMG("foo blk 2 at 5"))?;
-        m.commit()?;
+        m.commit(Lsn(0x50))?;

        assert_current_logical_size(&*tline, Lsn(0x50));

@@ -1123,9 +1142,9 @@ mod tests {
        );

        // Truncate last block
-        let mut m = tline.begin_modification(Lsn(0x60));
+        let mut m = tline.begin_modification();
        walingest.put_rel_truncation(&mut m, TESTREL_A, 2)?;
-        m.commit()?;
+        m.commit(Lsn(0x60))?;
        assert_current_logical_size(&*tline, Lsn(0x60));

        // Check reported size and contents after truncation
@@ -1147,15 +1166,15 @@ mod tests {
        );

        // Truncate to zero length
-        let mut m = tline.begin_modification(Lsn(0x68));
+        let mut m = tline.begin_modification();
        walingest.put_rel_truncation(&mut m, TESTREL_A, 0)?;
-        m.commit()?;
+        m.commit(Lsn(0x68))?;
        assert_eq!(tline.get_rel_size(TESTREL_A, Lsn(0x68))?, 0);

        // Extend from 0 to 2 blocks, leaving a gap
-        let mut m = tline.begin_modification(Lsn(0x70));
+        let mut m = tline.begin_modification();
        walingest.put_rel_page_image(&mut m, TESTREL_A, 1, TEST_IMG("foo blk 1"))?;
-        m.commit()?;
+        m.commit(Lsn(0x70))?;
        assert_eq!(tline.get_rel_size(TESTREL_A, Lsn(0x70))?, 2);
        assert_eq!(
            tline.get_rel_page_at_lsn(TESTREL_A, 0, Lsn(0x70))?,
@@ -1167,9 +1186,9 @@ mod tests {
        );

        // Extend a lot more, leaving a big gap that spans across segments
-        let mut m = tline.begin_modification(Lsn(0x80));
+        let mut m = tline.begin_modification();
        walingest.put_rel_page_image(&mut m, TESTREL_A, 1500, TEST_IMG("foo blk 1500"))?;
-        m.commit()?;
+        m.commit(Lsn(0x80))?;
        assert_eq!(tline.get_rel_size(TESTREL_A, Lsn(0x80))?, 1501);
        for blk in 2..1500 {
            assert_eq!(
@@ -1193,18 +1212,18 @@ mod tests {
        let tline = create_test_timeline(repo, TIMELINE_ID)?;
        let mut walingest = init_walingest_test(&*tline)?;

-        let mut m = tline.begin_modification(Lsn(0x20));
+        let mut m = tline.begin_modification();
        walingest.put_rel_page_image(&mut m, TESTREL_A, 0, TEST_IMG("foo blk 0 at 2"))?;
-        m.commit()?;
+        m.commit(Lsn(0x20))?;

        // Check that rel exists and size is correct
        assert_eq!(tline.get_rel_exists(TESTREL_A, Lsn(0x20))?, true);
        assert_eq!(tline.get_rel_size(TESTREL_A, Lsn(0x20))?, 1);

        // Drop rel
-        let mut m = tline.begin_modification(Lsn(0x30));
+        let mut m = tline.begin_modification();
        walingest.put_rel_drop(&mut m, TESTREL_A)?;
-        m.commit()?;
+        m.commit(Lsn(0x30))?;

        // Check that rel is not visible anymore
        assert_eq!(tline.get_rel_exists(TESTREL_A, Lsn(0x30))?, false);
@@ -1213,9 +1232,9 @@ mod tests {
        //assert!(tline.get_rel_size(TESTREL_A, Lsn(0x30))?.is_none());

        // Re-create it
-        let mut m = tline.begin_modification(Lsn(0x40));
+        let mut m = tline.begin_modification();
        walingest.put_rel_page_image(&mut m, TESTREL_A, 0, TEST_IMG("foo blk 0 at 4"))?;
-        m.commit()?;
+        m.commit(Lsn(0x40))?;

        // Check that rel exists and size is correct
        assert_eq!(tline.get_rel_exists(TESTREL_A, Lsn(0x40))?, true);
@@ -1235,12 +1254,12 @@ mod tests {

        // Create a 20 MB relation (the size is arbitrary)
        let relsize = 20 * 1024 * 1024 / 8192;
-        let mut m = tline.begin_modification(Lsn(0x20));
+        let mut m = tline.begin_modification();
        for blkno in 0..relsize {
            let data = format!("foo blk {} at {}", blkno, Lsn(0x20));
            walingest.put_rel_page_image(&mut m, TESTREL_A, blkno, TEST_IMG(&data))?;
        }
-        m.commit()?;
+        m.commit(Lsn(0x20))?;

        // The relation was created at LSN 20, not visible at LSN 1 yet.
        assert_eq!(tline.get_rel_exists(TESTREL_A, Lsn(0x10))?, false);
@@ -1261,9 +1280,9 @@ mod tests {

        // Truncate relation so that second segment was dropped
        // - only leave one page
-        let mut m = tline.begin_modification(Lsn(0x60));
+        let mut m = tline.begin_modification();
        walingest.put_rel_truncation(&mut m, TESTREL_A, 1)?;
-        m.commit()?;
+        m.commit(Lsn(0x60))?;

        // Check reported size and contents after truncation
        assert_eq!(tline.get_rel_size(TESTREL_A, Lsn(0x60))?, 1);
@@ -1291,12 +1310,12 @@ mod tests {
        // Extend relation again.
        // Add enough blocks to create second segment
        let lsn = Lsn(0x80);
-        let mut m = tline.begin_modification(lsn);
+        let mut m = tline.begin_modification();
        for blkno in 0..relsize {
            let data = format!("foo blk {} at {}", blkno, lsn);
            walingest.put_rel_page_image(&mut m, TESTREL_A, blkno, TEST_IMG(&data))?;
        }
-        m.commit()?;
+        m.commit(lsn)?;

        assert_eq!(tline.get_rel_exists(TESTREL_A, Lsn(0x80))?, true);
        assert_eq!(tline.get_rel_size(TESTREL_A, Lsn(0x80))?, relsize);
@@ -1324,10 +1343,10 @@ mod tests {
        let mut lsn = 0x10;
        for blknum in 0..pg_constants::RELSEG_SIZE + 1 {
            lsn += 0x10;
-            let mut m = tline.begin_modification(Lsn(lsn));
+            let mut m = tline.begin_modification();
            let img = TEST_IMG(&format!("foo blk {} at {}", blknum, Lsn(lsn)));
            walingest.put_rel_page_image(&mut m, TESTREL_A, blknum as BlockNumber, img)?;
-            m.commit()?;
+            m.commit(Lsn(lsn))?;
        }

        assert_current_logical_size(&*tline, Lsn(lsn));
@@ -1339,9 +1358,9 @@ mod tests {

        // Truncate one block
        lsn += 0x10;
-        let mut m = tline.begin_modification(Lsn(lsn));
+        let mut m = tline.begin_modification();
        walingest.put_rel_truncation(&mut m, TESTREL_A, pg_constants::RELSEG_SIZE)?;
-        m.commit()?;
+        m.commit(Lsn(lsn))?;
        assert_eq!(
            tline.get_rel_size(TESTREL_A, Lsn(lsn))?,
            pg_constants::RELSEG_SIZE
@@ -1350,9 +1369,9 @@ mod tests {

        // Truncate another block
        lsn += 0x10;
-        let mut m = tline.begin_modification(Lsn(lsn));
+        let mut m = tline.begin_modification();
        walingest.put_rel_truncation(&mut m, TESTREL_A, pg_constants::RELSEG_SIZE - 1)?;
-        m.commit()?;
+        m.commit(Lsn(lsn))?;
        assert_eq!(
            tline.get_rel_size(TESTREL_A, Lsn(lsn))?,
            pg_constants::RELSEG_SIZE - 1
@@ -1364,9 +1383,9 @@ mod tests {
        let mut size: i32 = 3000;
        while size >= 0 {
            lsn += 0x10;
-            let mut m = tline.begin_modification(Lsn(lsn));
+            let mut m = tline.begin_modification();
            walingest.put_rel_truncation(&mut m, TESTREL_A, size as BlockNumber)?;
-            m.commit()?;
+            m.commit(Lsn(lsn))?;
            assert_eq!(
                tline.get_rel_size(TESTREL_A, Lsn(lsn))?,
                size as BlockNumber
--- a/pageserver/src/walreceiver.rs
+++ b/pageserver/src/walreceiver.rs
@@ -66,7 +66,7 @@ pub fn init_wal_receiver_main_thread(
    );
    let broker_prefix = &conf.broker_etcd_prefix;
    info!(
-        "Starting wal receiver main thread, etcd endpoints: {}",
+        "Starting wal receiver main thread, etdc endpoints: {}",
        etcd_endpoints.iter().map(Url::to_string).join(", ")
    );

--- a/pageserver/src/walreceiver/connection_manager.rs
+++ b/pageserver/src/walreceiver/connection_manager.rs
@@ -25,11 +25,7 @@ use etcd_broker::{
 use tokio::select;
 use tracing::*;

-use crate::{
-    exponential_backoff,
-    repository::{Repository, Timeline},
-    DEFAULT_BASE_BACKOFF_SECONDS, DEFAULT_MAX_BACKOFF_SECONDS,
-};
+use crate::repository::{Repository, Timeline};
 use crate::{RepositoryImpl, TimelineImpl};
 use utils::{
    lsn::Lsn,
@@ -234,6 +230,18 @@ async fn subscribe_for_timeline_updates(
    }
 }

+const DEFAULT_BASE_BACKOFF_SECONDS: f64 = 0.1;
+const DEFAULT_MAX_BACKOFF_SECONDS: f64 = 3.0;
+
+async fn exponential_backoff(n: u32, base: f64, max_seconds: f64) {
+    if n == 0 {
+        return;
+    }
+    let seconds_to_wait = base.powf(f64::from(n) - 1.0).min(max_seconds);
+    info!("Backoff: waiting {seconds_to_wait} seconds before proceeding with the task");
+    tokio::time::sleep(Duration::from_secs_f64(seconds_to_wait)).await;
+}
+
 /// All data that's needed to run endless broker loop and keep the WAL streaming connection alive, if possible.
 struct WalreceiverState {
    id: ZTenantTimelineId,
--- a/pageserver/src/walreceiver/walreceiver_connection.rs
+++ b/pageserver/src/walreceiver/walreceiver_connection.rs
@@ -154,7 +154,7 @@ pub async fn handle_walreceiver_connection(

                {
                    let mut decoded = DecodedWALRecord::default();
-                    let mut modification = timeline.begin_modification(endlsn);
+                    let mut modification = timeline.begin_modification();
                    while let Some((lsn, recdata)) = waldecoder.poll_decode()? {
                        // let _enter = info_span!("processing record", lsn = %lsn).entered();

@@ -178,6 +178,16 @@ pub async fn handle_walreceiver_connection(
                    caught_up = true;
                }

+                let timeline_to_check = Arc::clone(&timeline);
+                tokio::task::spawn_blocking(move || timeline_to_check.check_checkpoint_distance())
+                    .await
+                    .with_context(|| {
+                        format!("Spawned checkpoint check task panicked for timeline {id}")
+                    })?
+                    .with_context(|| {
+                        format!("Failed to check checkpoint distance for timeline {id}")
+                    })?;
+
                Some(endlsn)
            }

@@ -198,12 +208,6 @@ pub async fn handle_walreceiver_connection(
            _ => None,
        };

-        let timeline_to_check = Arc::clone(&timeline);
-        tokio::task::spawn_blocking(move || timeline_to_check.check_checkpoint_distance())
-            .await
-            .with_context(|| format!("Spawned checkpoint check task panicked for timeline {id}"))?
-            .with_context(|| format!("Failed to check checkpoint distance for timeline {id}"))?;
-
        if let Some(last_lsn) = status_update {
            let remote_index = repo.get_remote_index();
            let timeline_remote_consistent_lsn = remote_index
--- a/pageserver/src/walredo.rs
+++ b/pageserver/src/walredo.rs
@@ -20,8 +20,8 @@
 //!
 use byteorder::{ByteOrder, LittleEndian};
 use bytes::{BufMut, Bytes, BytesMut};
+use lazy_static::lazy_static;
 use nix::poll::*;
-use once_cell::sync::Lazy;
 use serde::Serialize;
 use std::fs;
 use std::fs::OpenOptions;
@@ -105,27 +105,21 @@ impl crate::walredo::WalRedoManager for DummyRedoManager {
 // We collect the time spent in actual WAL redo ('redo'), and time waiting
 // for access to the postgres process ('wait') since there is only one for
 // each tenant.
-
-static WAL_REDO_TIME: Lazy<Histogram> = Lazy::new(|| {
-    register_histogram!("pageserver_wal_redo_seconds", "Time spent on WAL redo")
-        .expect("failed to define a metric")
-});
-
-static WAL_REDO_WAIT_TIME: Lazy<Histogram> = Lazy::new(|| {
-    register_histogram!(
+lazy_static! {
+    static ref WAL_REDO_TIME: Histogram =
+        register_histogram!("pageserver_wal_redo_seconds", "Time spent on WAL redo")
+            .expect("failed to define a metric");
+    static ref WAL_REDO_WAIT_TIME: Histogram = register_histogram!(
        "pageserver_wal_redo_wait_seconds",
        "Time spent waiting for access to the WAL redo process"
    )
-    .expect("failed to define a metric")
-});
-
-static WAL_REDO_RECORD_COUNTER: Lazy<IntCounter> = Lazy::new(|| {
-    register_int_counter!(
+    .expect("failed to define a metric");
+    static ref WAL_REDO_RECORD_COUNTER: IntCounter = register_int_counter!(
        "pageserver_replayed_wal_records_total",
        "Number of WAL records replayed in WAL redo process"
    )
-    .unwrap()
-});
+    .unwrap();
+}

 ///
 /// This is the real implementation that uses a Postgres process to
--- a/proxy/Cargo.toml
+++ b/proxy/Cargo.toml
@@ -14,7 +14,7 @@ hashbrown = "0.11.2"
 hex = "0.4.3"
 hmac = "0.12.1"
 hyper = "0.14"
-once_cell = "1.13.0"
+lazy_static = "1.4.0"
 md5 = "0.7.0"
 parking_lot = "0.12"
 pin-project-lite = "0.2.7"
--- a/proxy/src/auth/backend.rs
+++ b/proxy/src/auth/backend.rs
@@ -12,12 +12,13 @@ use crate::{
    stream::PqStream,
    waiters::{self, Waiter, Waiters},
 };
-
-use once_cell::sync::Lazy;
+use lazy_static::lazy_static;
 use serde::{Deserialize, Serialize};
 use tokio::io::{AsyncRead, AsyncWrite};

-static CPLANE_WAITERS: Lazy<Waiters<mgmt::ComputeReady>> = Lazy::new(Default::default);
+lazy_static! {
+    static ref CPLANE_WAITERS: Waiters<mgmt::ComputeReady> = Default::default();
+}

 /// Give caller an opportunity to wait for the cloud's reply.
 pub async fn with_waiter<R, T, E>(
--- a/proxy/src/proxy.rs
+++ b/proxy/src/proxy.rs
@@ -4,8 +4,8 @@ use crate::config::{ProxyConfig, TlsConfig};
 use crate::stream::{MetricsStream, PqStream, Stream};
 use anyhow::{bail, Context};
 use futures::TryFutureExt;
+use lazy_static::lazy_static;
 use metrics::{register_int_counter, IntCounter};
-use once_cell::sync::Lazy;
 use std::sync::Arc;
 use tokio::io::{AsyncRead, AsyncWrite};
 use utils::pq_proto::{BeMessage as Be, *};
@@ -13,29 +13,23 @@ use utils::pq_proto::{BeMessage as Be, *};
 const ERR_INSECURE_CONNECTION: &str = "connection is insecure (try using `sslmode=require`)";
 const ERR_PROTO_VIOLATION: &str = "protocol violation";

-static NUM_CONNECTIONS_ACCEPTED_COUNTER: Lazy<IntCounter> = Lazy::new(|| {
-    register_int_counter!(
+lazy_static! {
+    static ref NUM_CONNECTIONS_ACCEPTED_COUNTER: IntCounter = register_int_counter!(
        "proxy_accepted_connections_total",
        "Number of TCP client connections accepted."
    )
-    .unwrap()
-});
-
-static NUM_CONNECTIONS_CLOSED_COUNTER: Lazy<IntCounter> = Lazy::new(|| {
-    register_int_counter!(
+    .unwrap();
+    static ref NUM_CONNECTIONS_CLOSED_COUNTER: IntCounter = register_int_counter!(
        "proxy_closed_connections_total",
        "Number of TCP client connections closed."
    )
-    .unwrap()
-});
-
-static NUM_BYTES_PROXIED_COUNTER: Lazy<IntCounter> = Lazy::new(|| {
-    register_int_counter!(
+    .unwrap();
+    static ref NUM_BYTES_PROXIED_COUNTER: IntCounter = register_int_counter!(
        "proxy_io_bytes_total",
        "Number of bytes sent/received between any client and backend."
    )
-    .unwrap()
-});
+    .unwrap();
+}

 /// A small combinator for pluggable error logging.
 async fn log_error<R, F>(future: F) -> F::Output
--- a/pytest.ini
+++ b/pytest.ini
@@ -1,8 +1,4 @@
 [pytest]
-filterwarnings =
-    error::pytest.PytestUnhandledThreadExceptionWarning
-    error::UserWarning
-    ignore:record_property is incompatible with junit_family:pytest.PytestWarning
 addopts =
    -m 'not remote_cluster'
 markers =
--- a/safekeeper/Cargo.toml
+++ b/safekeeper/Cargo.toml
@@ -9,6 +9,7 @@ bytes = "1.0.1"
 byteorder = "1.4.3"
 hyper = "0.14"
 fs2 = "0.4.3"
+lazy_static = "1.4.0"
 serde_json = "1"
 tracing = "0.1.27"
 clap = "3.0"
@@ -28,7 +29,7 @@ const_format = "0.2.21"
 tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
 git-version = "0.3.5"
 async-trait = "0.1"
-once_cell = "1.13.0"
+once_cell = "1.10.0"
 toml_edit = { version = "0.13", features = ["easy"] }

 postgres_ffi = { path = "../libs/postgres_ffi" }
--- a/safekeeper/src/control_file.rs
+++ b/safekeeper/src/control_file.rs
@@ -2,7 +2,7 @@

 use anyhow::{bail, ensure, Context, Result};
 use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
-use once_cell::sync::Lazy;
+use lazy_static::lazy_static;

 use std::fs::{self, File, OpenOptions};
 use std::io::{Read, Write};
@@ -26,15 +26,15 @@ const CONTROL_FILE_NAME: &str = "safekeeper.control";
 const CONTROL_FILE_NAME_PARTIAL: &str = "safekeeper.control.partial";
 pub const CHECKSUM_SIZE: usize = std::mem::size_of::<u32>();

-static PERSIST_CONTROL_FILE_SECONDS: Lazy<HistogramVec> = Lazy::new(|| {
-    register_histogram_vec!(
+lazy_static! {
+    static ref PERSIST_CONTROL_FILE_SECONDS: HistogramVec = register_histogram_vec!(
        "safekeeper_persist_control_file_seconds",
        "Seconds to persist and sync control file, grouped by timeline",
        &["tenant_id", "timeline_id"],
        DISK_WRITE_SECONDS_BUCKETS.to_vec()
    )
-    .expect("Failed to register safekeeper_persist_control_file_seconds histogram vec")
-});
+    .expect("Failed to register safekeeper_persist_control_file_seconds histogram vec");
+}

 /// Storage should keep actual state inside of it. It should implement Deref
 /// trait to access state fields and have persist method for updating that state.
--- a/safekeeper/src/safekeeper.rs
+++ b/safekeeper/src/safekeeper.rs
@@ -727,7 +727,7 @@ where
                info!("setting local_start_lsn to {:?}", state.local_start_lsn);
            }
            // Initializing commit_lsn before acking first flushed record is
-            // important to let find_end_of_wal skip the hole in the beginning
+            // important to let find_end_of_wal skip the whole in the beginning
            // of the first segment.
            //
            // NB: on new clusters, this happens at the same time as
@@ -738,10 +738,6 @@ where

            // Initializing backup_lsn is useful to avoid making backup think it should upload 0 segment.
            self.inmem.backup_lsn = max(self.inmem.backup_lsn, state.timeline_start_lsn);
-            // Initializing remote_consistent_lsn sets that we have nothing to
-            // stream to pageserver(s) immediately after creation.
-            self.inmem.remote_consistent_lsn =
-                max(self.inmem.remote_consistent_lsn, state.timeline_start_lsn);

            state.acceptor_state.term_history = msg.term_history.clone();
            self.persist_control_file(state)?;
--- a/safekeeper/src/timeline.rs
+++ b/safekeeper/src/timeline.rs
@@ -4,7 +4,7 @@
 use anyhow::{bail, Context, Result};

 use etcd_broker::subscription_value::SkTimelineInfo;
-use once_cell::sync::Lazy;
+use lazy_static::lazy_static;
 use postgres_ffi::xlog_utils::XLogSegNo;

 use serde::Serialize;
@@ -137,7 +137,7 @@ impl SharedState {
        self.is_wal_backup_required()
            // FIXME: add tracking of relevant pageservers and check them here individually,
            // otherwise migration won't work (we suspend too early).
-            || self.sk.inmem.remote_consistent_lsn < self.sk.inmem.commit_lsn
+            || self.sk.inmem.remote_consistent_lsn <= self.sk.inmem.commit_lsn
    }

    /// Mark timeline active/inactive and return whether s3 offloading requires
@@ -559,12 +559,12 @@ struct GlobalTimelinesState {
    wal_backup_launcher_tx: Option<Sender<ZTenantTimelineId>>,
 }

-static TIMELINES_STATE: Lazy<Mutex<GlobalTimelinesState>> = Lazy::new(|| {
-    Mutex::new(GlobalTimelinesState {
+lazy_static! {
+    static ref TIMELINES_STATE: Mutex<GlobalTimelinesState> = Mutex::new(GlobalTimelinesState {
        timelines: HashMap::new(),
        wal_backup_launcher_tx: None,
-    })
-});
+    });
+}

 #[derive(Clone, Copy, Serialize)]
 pub struct TimelineDeleteForceResult {
--- a/safekeeper/src/wal_storage.rs
+++ b/safekeeper/src/wal_storage.rs
@@ -12,7 +12,7 @@ use std::io::{self, Seek, SeekFrom};
 use std::pin::Pin;
 use tokio::io::AsyncRead;

-use once_cell::sync::Lazy;
+use lazy_static::lazy_static;
 use postgres_ffi::xlog_utils::{
    find_end_of_wal, IsPartialXLogFileName, IsXLogFileName, XLogFromFileName, XLogSegNo, PG_TLI,
 };
@@ -38,44 +38,31 @@ use metrics::{register_histogram_vec, Histogram, HistogramVec, DISK_WRITE_SECOND

 use tokio::io::{AsyncReadExt, AsyncSeekExt};

-// The prometheus crate does not support u64 yet, i64 only (see `IntGauge`).
-// i64 is faster than f64, so update to u64 when available.
-static WRITE_WAL_BYTES: Lazy<HistogramVec> = Lazy::new(|| {
-    register_histogram_vec!(
+lazy_static! {
+    // The prometheus crate does not support u64 yet, i64 only (see `IntGauge`).
+    // i64 is faster than f64, so update to u64 when available.
+    static ref WRITE_WAL_BYTES: HistogramVec = register_histogram_vec!(
        "safekeeper_write_wal_bytes",
        "Bytes written to WAL in a single request, grouped by timeline",
        &["tenant_id", "timeline_id"],
-        vec![
-            1.0,
-            10.0,
-            100.0,
-            1024.0,
-            8192.0,
-            128.0 * 1024.0,
-            1024.0 * 1024.0,
-            10.0 * 1024.0 * 1024.0
-        ]
+        vec![1.0, 10.0, 100.0, 1024.0, 8192.0, 128.0 * 1024.0, 1024.0 * 1024.0, 10.0 * 1024.0 * 1024.0]
    )
-    .expect("Failed to register safekeeper_write_wal_bytes histogram vec")
-});
-static WRITE_WAL_SECONDS: Lazy<HistogramVec> = Lazy::new(|| {
-    register_histogram_vec!(
+    .expect("Failed to register safekeeper_write_wal_bytes histogram vec");
+    static ref WRITE_WAL_SECONDS: HistogramVec = register_histogram_vec!(
        "safekeeper_write_wal_seconds",
        "Seconds spent writing and syncing WAL to a disk in a single request, grouped by timeline",
        &["tenant_id", "timeline_id"],
        DISK_WRITE_SECONDS_BUCKETS.to_vec()
    )
-    .expect("Failed to register safekeeper_write_wal_seconds histogram vec")
-});
-static FLUSH_WAL_SECONDS: Lazy<HistogramVec> = Lazy::new(|| {
-    register_histogram_vec!(
+    .expect("Failed to register safekeeper_write_wal_seconds histogram vec");
+    static ref FLUSH_WAL_SECONDS: HistogramVec = register_histogram_vec!(
        "safekeeper_flush_wal_seconds",
        "Seconds spent syncing WAL to a disk, grouped by timeline",
        &["tenant_id", "timeline_id"],
        DISK_WRITE_SECONDS_BUCKETS.to_vec()
    )
-    .expect("Failed to register safekeeper_flush_wal_seconds histogram vec")
-});
+    .expect("Failed to register safekeeper_flush_wal_seconds histogram vec");
+}

 struct WalStorageMetrics {
    write_wal_bytes: Histogram,
--- a/scripts/export_import_between_pageservers.py
+++ b/scripts/export_import_between_pageservers.py
@@ -1,708 +0,0 @@
-#
-# Script to export tenants from one pageserver and import them into another page server.
-#
-# Outline of steps:
-# 1. Get `(last_lsn, prev_lsn)` from old pageserver
-# 2. Get `fullbackup` from old pageserver, which creates a basebackup tar file
-# 3. This tar file might be missing relation files for empty relations, if the pageserver
-#    is old enough (we didn't always store those). So to recreate them, we start a local
-#    vanilla postgres on this basebackup and ask it what relations should exist, then touch
-#    any missing files and re-pack the tar.
-#    TODO This functionality is no longer needed, so we can delete it later if we don't
-#         end up using the same utils for the pg 15 upgrade. Not sure.
-# 4. We import the patched basebackup into a new pageserver
-# 5. We export again via fullbackup, now from the new pageserver and compare the returned
-#    tar file with the one we imported. This confirms that we imported everything that was
-#    exported, but doesn't guarantee correctness (what if we didn't **export** everything
-#    initially?)
-# 6. We wait for the new pageserver's remote_consistent_lsn to catch up
-#
-# For more context on how to use this, see:
-# https://github.com/neondatabase/cloud/wiki/Storage-format-migration
-
-import os
-from os import path
-import shutil
-from pathlib import Path
-import tempfile
-from contextlib import closing
-import psycopg2
-import subprocess
-import argparse
-import time
-import requests
-import uuid
-from psycopg2.extensions import connection as PgConnection
-from typing import Any, Callable, Dict, Iterator, List, Optional, TypeVar, cast, Union, Tuple
-
-###############################################
-### client-side utils copied from test fixtures
-###############################################
-
-Env = Dict[str, str]
-
-_global_counter = 0
-
-
-def global_counter() -> int:
-    """ A really dumb global counter.
-    This is useful for giving output files a unique number, so if we run the
-    same command multiple times we can keep their output separate.
-    """
-    global _global_counter
-    _global_counter += 1
-    return _global_counter
-
-
-def subprocess_capture(capture_dir: str, cmd: List[str], **kwargs: Any) -> str:
-    """ Run a process and capture its output
-    Output will go to files named "cmd_NNN.stdout" and "cmd_NNN.stderr"
-    where "cmd" is the name of the program and NNN is an incrementing
-    counter.
-    If those files already exist, we will overwrite them.
-    Returns basepath for files with captured output.
-    """
-    assert type(cmd) is list
-    base = os.path.basename(cmd[0]) + '_{}'.format(global_counter())
-    basepath = os.path.join(capture_dir, base)
-    stdout_filename = basepath + '.stdout'
-    stderr_filename = basepath + '.stderr'
-
-    with open(stdout_filename, 'w') as stdout_f:
-        with open(stderr_filename, 'w') as stderr_f:
-            print('(capturing output to "{}.stdout")'.format(base))
-            subprocess.run(cmd, **kwargs, stdout=stdout_f, stderr=stderr_f)
-
-    return basepath
-
-
-class PgBin:
-    """ A helper class for executing postgres binaries """
-    def __init__(self, log_dir: Path, pg_distrib_dir):
-        self.log_dir = log_dir
-        self.pg_bin_path = os.path.join(str(pg_distrib_dir), 'bin')
-        self.env = os.environ.copy()
-        self.env['LD_LIBRARY_PATH'] = os.path.join(str(pg_distrib_dir), 'lib')
-
-    def _fixpath(self, command: List[str]):
-        if '/' not in command[0]:
-            command[0] = os.path.join(self.pg_bin_path, command[0])
-
-    def _build_env(self, env_add: Optional[Env]) -> Env:
-        if env_add is None:
-            return self.env
-        env = self.env.copy()
-        env.update(env_add)
-        return env
-
-    def run(self, command: List[str], env: Optional[Env] = None, cwd: Optional[str] = None):
-        """
-        Run one of the postgres binaries.
-        The command should be in list form, e.g. ['pgbench', '-p', '55432']
-        All the necessary environment variables will be set.
-        If the first argument (the command name) doesn't include a path (no '/'
-        characters present), then it will be edited to include the correct path.
-        If you want stdout/stderr captured to files, use `run_capture` instead.
-        """
-
-        self._fixpath(command)
-        print('Running command "{}"'.format(' '.join(command)))
-        env = self._build_env(env)
-        subprocess.run(command, env=env, cwd=cwd, check=True)
-
-    def run_capture(self,
-                    command: List[str],
-                    env: Optional[Env] = None,
-                    cwd: Optional[str] = None,
-                    **kwargs: Any) -> str:
-        """
-        Run one of the postgres binaries, with stderr and stdout redirected to a file.
-        This is just like `run`, but for chatty programs. Returns basepath for files
-        with captured output.
-        """
-
-        self._fixpath(command)
-        print('Running command "{}"'.format(' '.join(command)))
-        env = self._build_env(env)
-        return subprocess_capture(str(self.log_dir),
-                                  command,
-                                  env=env,
-                                  cwd=cwd,
-                                  check=True,
-                                  **kwargs)
-
-
-class PgProtocol:
-    """ Reusable connection logic """
-    def __init__(self, **kwargs):
-        self.default_options = kwargs
-
-    def conn_options(self, **kwargs):
-        conn_options = self.default_options.copy()
-        if 'dsn' in kwargs:
-            conn_options.update(parse_dsn(kwargs['dsn']))
-        conn_options.update(kwargs)
-
-        # Individual statement timeout in seconds. 2 minutes should be
-        # enough for our tests, but if you need a longer, you can
-        # change it by calling "SET statement_timeout" after
-        # connecting.
-        if 'options' in conn_options:
-            conn_options['options'] = f"-cstatement_timeout=120s " + conn_options['options']
-        else:
-            conn_options['options'] = "-cstatement_timeout=120s"
-        return conn_options
-
-    # autocommit=True here by default because that's what we need most of the time
-    def connect(self, autocommit=True, **kwargs) -> PgConnection:
-        """
-        Connect to the node.
-        Returns psycopg2's connection object.
-        This method passes all extra params to connstr.
-        """
-        conn = psycopg2.connect(**self.conn_options(**kwargs))
-
-        # WARNING: this setting affects *all* tests!
-        conn.autocommit = autocommit
-        return conn
-
-    def safe_psql(self, query: str, **kwargs: Any) -> List[Tuple[Any, ...]]:
-        """
-        Execute query against the node and return all rows.
-        This method passes all extra params to connstr.
-        """
-        return self.safe_psql_many([query], **kwargs)[0]
-
-    def safe_psql_many(self, queries: List[str], **kwargs: Any) -> List[List[Tuple[Any, ...]]]:
-        """
-        Execute queries against the node and return all rows.
-        This method passes all extra params to connstr.
-        """
-        result: List[List[Any]] = []
-        with closing(self.connect(**kwargs)) as conn:
-            with conn.cursor() as cur:
-                for query in queries:
-                    print(f"Executing query: {query}")
-                    cur.execute(query)
-
-                    if cur.description is None:
-                        result.append([])  # query didn't return data
-                    else:
-                        result.append(cast(List[Any], cur.fetchall()))
-        return result
-
-
-class VanillaPostgres(PgProtocol):
-    def __init__(self, pgdatadir: Path, pg_bin: PgBin, port: int, init=True):
-        super().__init__(host='localhost', port=port, dbname='postgres')
-        self.pgdatadir = pgdatadir
-        self.pg_bin = pg_bin
-        self.running = False
-        if init:
-            self.pg_bin.run_capture(['initdb', '-D', str(pgdatadir)])
-        self.configure([f"port = {port}\n"])
-
-    def configure(self, options: List[str]):
-        """Append lines into postgresql.conf file."""
-        assert not self.running
-        with open(os.path.join(self.pgdatadir, 'postgresql.conf'), 'a') as conf_file:
-            conf_file.write("\n".join(options))
-
-    def start(self, log_path: Optional[str] = None):
-        assert not self.running
-        self.running = True
-
-        if log_path is None:
-            log_path = os.path.join(self.pgdatadir, "pg.log")
-
-        self.pg_bin.run_capture(
-            ['pg_ctl', '-w', '-D', str(self.pgdatadir), '-l', log_path, 'start'])
-
-    def stop(self):
-        assert self.running
-        self.running = False
-        self.pg_bin.run_capture(['pg_ctl', '-w', '-D', str(self.pgdatadir), 'stop'])
-
-    def __enter__(self):
-        return self
-
-    def __exit__(self, exc_type, exc, tb):
-        if self.running:
-            self.stop()
-
-
-class NeonPageserverApiException(Exception):
-    pass
-
-
-class NeonPageserverHttpClient(requests.Session):
-    def __init__(self, host, port):
-        super().__init__()
-        self.host = host
-        self.port = port
-
-    def verbose_error(self, res: requests.Response):
-        try:
-            res.raise_for_status()
-        except requests.RequestException as e:
-            try:
-                msg = res.json()['msg']
-            except:
-                msg = ''
-            raise NeonPageserverApiException(msg) from e
-
-    def check_status(self):
-        self.get(f"http://{self.host}:{self.port}/v1/status").raise_for_status()
-
-    def tenant_list(self):
-        res = self.get(f"http://{self.host}:{self.port}/v1/tenant")
-        self.verbose_error(res)
-        res_json = res.json()
-        assert isinstance(res_json, list)
-        return res_json
-
-    def tenant_create(self, new_tenant_id: uuid.UUID, ok_if_exists):
-        res = self.post(
-            f"http://{self.host}:{self.port}/v1/tenant",
-            json={
-                'new_tenant_id': new_tenant_id.hex,
-            },
-        )
-
-        if res.status_code == 409:
-            if ok_if_exists:
-                print(f'could not create tenant: already exists for id {new_tenant_id}')
-            else:
-                res.raise_for_status()
-        elif res.status_code == 201:
-            print(f'created tenant {new_tenant_id}')
-        else:
-            self.verbose_error(res)
-
-        return new_tenant_id
-
-    def timeline_list(self, tenant_id: uuid.UUID):
-        res = self.get(f"http://{self.host}:{self.port}/v1/tenant/{tenant_id.hex}/timeline")
-        self.verbose_error(res)
-        res_json = res.json()
-        assert isinstance(res_json, list)
-        return res_json
-
-    def timeline_detail(self, tenant_id: uuid.UUID, timeline_id: uuid.UUID) -> Dict[Any, Any]:
-        res = self.get(
-            f"http://localhost:{self.port}/v1/tenant/{tenant_id.hex}/timeline/{timeline_id.hex}?include-non-incremental-logical-size=1"
-        )
-        self.verbose_error(res)
-        res_json = res.json()
-        assert isinstance(res_json, dict)
-        return res_json
-
-
-def lsn_to_hex(num: int) -> str:
-    """ Convert lsn from int to standard hex notation. """
-    return "{:X}/{:X}".format(num >> 32, num & 0xffffffff)
-
-
-def lsn_from_hex(lsn_hex: str) -> int:
-    """ Convert lsn from hex notation to int. """
-    l, r = lsn_hex.split('/')
-    return (int(l, 16) << 32) + int(r, 16)
-
-
-def remote_consistent_lsn(pageserver_http_client: NeonPageserverHttpClient,
-                          tenant: uuid.UUID,
-                          timeline: uuid.UUID) -> int:
-    detail = pageserver_http_client.timeline_detail(tenant, timeline)
-
-    if detail['remote'] is None:
-        # No remote information at all. This happens right after creating
-        # a timeline, before any part of it has been uploaded to remote
-        # storage yet.
-        return 0
-    else:
-        lsn_str = detail['remote']['remote_consistent_lsn']
-        assert isinstance(lsn_str, str)
-        return lsn_from_hex(lsn_str)
-
-
-def wait_for_upload(pageserver_http_client: NeonPageserverHttpClient,
-                    tenant: uuid.UUID,
-                    timeline: uuid.UUID,
-                    lsn: int):
-    """waits for local timeline upload up to specified lsn"""
-    for i in range(10):
-        current_lsn = remote_consistent_lsn(pageserver_http_client, tenant, timeline)
-        if current_lsn >= lsn:
-            return
-        print("waiting for remote_consistent_lsn to reach {}, now {}, iteration {}".format(
-            lsn_to_hex(lsn), lsn_to_hex(current_lsn), i + 1))
-        time.sleep(1)
-
-    raise Exception("timed out while waiting for remote_consistent_lsn to reach {}, was {}".format(
-        lsn_to_hex(lsn), lsn_to_hex(current_lsn)))
-
-
-##############
-# End of utils
-##############
-
-
-def pack_base(log_dir, restored_dir, output_tar):
-    """Create tar file from basebackup, being careful to produce relative filenames."""
-    tmp_tar_name = "tmp.tar"
-    tmp_tar_path = os.path.join(restored_dir, tmp_tar_name)
-    cmd = ["tar", "-cf", tmp_tar_name] + os.listdir(restored_dir)
-    # We actually cd into the dir and call tar from there. If we call tar from
-    # outside we won't encode filenames as relative, and they won't parse well
-    # on import.
-    subprocess_capture(log_dir, cmd, cwd=restored_dir)
-    shutil.move(tmp_tar_path, output_tar)
-
-
-def reconstruct_paths(log_dir, pg_bin, base_tar):
-    """Reconstruct what relation files should exist in the datadir by querying postgres."""
-    with tempfile.TemporaryDirectory() as restored_dir:
-        # Unpack the base tar
-        subprocess_capture(log_dir, ["tar", "-xf", base_tar, "-C", restored_dir])
-
-        # Start a vanilla postgres from the given datadir and query it to find
-        # what relfiles should exist, but possibly don't.
-        port = "55439"  # Probably free
-        with VanillaPostgres(restored_dir, pg_bin, port, init=False) as vanilla_pg:
-            vanilla_pg.configure([f"port={port}"])
-            vanilla_pg.start(log_path=os.path.join(log_dir, "tmp_pg.log"))
-
-            # Create database based on template0 because we can't connect to template0
-            query = "create database template0copy template template0"
-            vanilla_pg.safe_psql(query, user="cloud_admin")
-            vanilla_pg.safe_psql("CHECKPOINT", user="cloud_admin")
-
-            # Get all databases
-            query = "select oid, datname from pg_database"
-            oid_dbname_pairs = vanilla_pg.safe_psql(query, user="cloud_admin")
-            template0_oid = [
-                oid for (oid, database) in oid_dbname_pairs if database == "template0"
-            ][0]
-
-            # Get rel paths for each database
-            for oid, database in oid_dbname_pairs:
-                if database == "template0":
-                    # We can't connect to template0
-                    continue
-
-                query = "select relname, pg_relation_filepath(oid) from pg_class"
-                result = vanilla_pg.safe_psql(query, user="cloud_admin", dbname=database)
-                for relname, filepath in result:
-                    if filepath is not None:
-
-                        if database == "template0copy":
-                            # Add all template0copy paths to template0
-                            prefix = f"base/{oid}/"
-                            if filepath.startswith(prefix):
-                                suffix = filepath[len(prefix):]
-                                yield f"base/{template0_oid}/{suffix}"
-                            elif filepath.startswith("global"):
-                                print(f"skipping {database} global file {filepath}")
-                            else:
-                                raise AssertionError
-                        else:
-                            yield filepath
-
-
-def touch_missing_rels(log_dir, corrupt_tar, output_tar, paths):
-    """Add the appropriate empty files to a basebadkup tar."""
-    with tempfile.TemporaryDirectory() as restored_dir:
-        # Unpack the base tar
-        subprocess_capture(log_dir, ["tar", "-xf", corrupt_tar, "-C", restored_dir])
-
-        # Touch files that don't exist
-        for path in paths:
-            absolute_path = os.path.join(restored_dir, path)
-            exists = os.path.exists(absolute_path)
-            if not exists:
-                print(f"File {absolute_path} didn't exist. Creating..")
-                Path(absolute_path).touch()
-
-        # Repackage
-        pack_base(log_dir, restored_dir, output_tar)
-
-
-# HACK This is a workaround for exporting from old pageservers that
-#      can't export empty relations. In this case we need to start
-#      a vanilla postgres from the exported datadir, and query it
-#      to see what empty relations are missing, and then create
-#      those empty files before importing.
-def add_missing_rels(base_tar, output_tar, log_dir, pg_bin):
-    reconstructed_paths = set(reconstruct_paths(log_dir, pg_bin, base_tar))
-    touch_missing_rels(log_dir, base_tar, output_tar, reconstructed_paths)
-
-
-def get_rlsn(pageserver_connstr, tenant_id, timeline_id):
-    conn = psycopg2.connect(pageserver_connstr)
-    conn.autocommit = True
-    with conn.cursor() as cur:
-        cmd = f"get_last_record_rlsn {tenant_id} {timeline_id}"
-        cur.execute(cmd)
-        res = cur.fetchone()
-        prev_lsn = res[0]
-        last_lsn = res[1]
-    conn.close()
-
-    return last_lsn, prev_lsn
-
-
-def import_timeline(args,
-                    psql_path,
-                    pageserver_connstr,
-                    pageserver_http,
-                    tenant_id,
-                    timeline_id,
-                    last_lsn,
-                    prev_lsn,
-                    tar_filename):
-    # Import timelines to new pageserver
-    import_cmd = f"import basebackup {tenant_id} {timeline_id} {last_lsn} {last_lsn}"
-    full_cmd = rf"""cat {tar_filename} | {psql_path} {pageserver_connstr} -c '{import_cmd}' """
-
-    stderr_filename2 = path.join(args.work_dir, f"import_{tenant_id}_{timeline_id}.stderr")
-    stdout_filename = path.join(args.work_dir, f"import_{tenant_id}_{timeline_id}.stdout")
-
-    print(f"Running: {full_cmd}")
-
-    with open(stdout_filename, 'w') as stdout_f:
-        with open(stderr_filename2, 'w') as stderr_f:
-            print(f"(capturing output to {stdout_filename})")
-            pg_bin = PgBin(args.work_dir, args.pg_distrib_dir)
-            subprocess.run(full_cmd,
-                           stdout=stdout_f,
-                           stderr=stderr_f,
-                           env=pg_bin._build_env(None),
-                           shell=True,
-                           check=True)
-
-            print(f"Done import")
-
-    # Wait until pageserver persists the files
-    wait_for_upload(pageserver_http,
-                    uuid.UUID(tenant_id),
-                    uuid.UUID(timeline_id),
-                    lsn_from_hex(last_lsn))
-
-
-def export_timeline(args,
-                    psql_path,
-                    pageserver_connstr,
-                    tenant_id,
-                    timeline_id,
-                    last_lsn,
-                    prev_lsn,
-                    tar_filename):
-    # Choose filenames
-    incomplete_filename = tar_filename + ".incomplete"
-    stderr_filename = path.join(args.work_dir, f"{tenant_id}_{timeline_id}.stderr")
-
-    # Construct export command
-    query = f"fullbackup {tenant_id} {timeline_id} {last_lsn} {prev_lsn}"
-    cmd = [psql_path, "--no-psqlrc", pageserver_connstr, "-c", query]
-
-    # Run export command
-    print(f"Running: {cmd}")
-    with open(incomplete_filename, 'w') as stdout_f:
-        with open(stderr_filename, 'w') as stderr_f:
-            print(f"(capturing output to {incomplete_filename})")
-            pg_bin = PgBin(args.work_dir, args.pg_distrib_dir)
-            subprocess.run(cmd,
-                           stdout=stdout_f,
-                           stderr=stderr_f,
-                           env=pg_bin._build_env(None),
-                           check=True)
-
-    # Add missing rels
-    pg_bin = PgBin(args.work_dir, args.pg_distrib_dir)
-    add_missing_rels(incomplete_filename, tar_filename, args.work_dir, pg_bin)
-
-    # Log more info
-    file_size = os.path.getsize(tar_filename)
-    print(f"Done export: {tar_filename}, size {file_size}")
-
-
-def main(args: argparse.Namespace):
-    psql_path = str(Path(args.pg_distrib_dir) / "bin" / "psql")
-
-    old_pageserver_host = args.old_pageserver_host
-    new_pageserver_host = args.new_pageserver_host
-
-    old_http_client = NeonPageserverHttpClient(old_pageserver_host, args.old_pageserver_http_port)
-    old_http_client.check_status()
-    old_pageserver_connstr = f"postgresql://{old_pageserver_host}:{args.old_pageserver_pg_port}"
-
-    new_http_client = NeonPageserverHttpClient(new_pageserver_host, args.new_pageserver_http_port)
-    new_http_client.check_status()
-    new_pageserver_connstr = f"postgresql://{new_pageserver_host}:{args.new_pageserver_pg_port}"
-
-    for tenant_id in args.tenants:
-        print(f"Tenant: {tenant_id}")
-        timelines = old_http_client.timeline_list(uuid.UUID(tenant_id))
-        print(f"Timelines: {timelines}")
-
-        # Create tenant in new pageserver
-        if args.only_import is False and not args.timelines:
-            new_http_client.tenant_create(uuid.UUID(tenant_id), args.ok_if_exists)
-
-        for timeline in timelines:
-            # Skip timelines we don't need to export
-            if args.timelines and timeline['timeline_id'] not in args.timelines:
-                print(f"Skipping timeline {timeline['timeline_id']}")
-                continue
-
-            # Choose filenames
-            tar_filename = path.join(args.work_dir,
-                                     f"{timeline['tenant_id']}_{timeline['timeline_id']}.tar")
-
-            # Export timeline from old pageserver
-            if args.only_import is False:
-                last_lsn, prev_lsn = get_rlsn(
-                    old_pageserver_connstr,
-                    timeline['tenant_id'],
-                    timeline['timeline_id'],
-                )
-                export_timeline(
-                    args,
-                    psql_path,
-                    old_pageserver_connstr,
-                    timeline['tenant_id'],
-                    timeline['timeline_id'],
-                    last_lsn,
-                    prev_lsn,
-                    tar_filename,
-                )
-
-            # Import into new pageserver
-            import_timeline(
-                args,
-                psql_path,
-                new_pageserver_connstr,
-                new_http_client,
-                timeline['tenant_id'],
-                timeline['timeline_id'],
-                last_lsn,
-                prev_lsn,
-                tar_filename,
-            )
-
-            # Re-export and compare
-            re_export_filename = tar_filename + ".reexport"
-            export_timeline(args,
-                            psql_path,
-                            new_pageserver_connstr,
-                            timeline['tenant_id'],
-                            timeline['timeline_id'],
-                            last_lsn,
-                            prev_lsn,
-                            re_export_filename)
-
-            # Check the size is the same
-            old_size = os.path.getsize(tar_filename),
-            new_size = os.path.getsize(re_export_filename),
-            if old_size != new_size:
-                raise AssertionError(f"Sizes don't match old: {old_size} new: {new_size}")
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        '--tenant-id',
-        dest='tenants',
-        required=True,
-        nargs='+',
-        help='Id of the tenant to migrate. You can pass multiple arguments',
-    )
-    parser.add_argument(
-        '--timeline-id',
-        dest='timelines',
-        required=False,
-        nargs='+',
-        help='Id of the timeline to migrate. You can pass multiple arguments',
-    )
-    parser.add_argument(
-        '--from-host',
-        dest='old_pageserver_host',
-        required=True,
-        help='Host of the pageserver to migrate data from',
-    )
-    parser.add_argument(
-        '--from-http-port',
-        dest='old_pageserver_http_port',
-        required=False,
-        type=int,
-        default=9898,
-        help='HTTP port of the pageserver to migrate data from. Default: 9898',
-    )
-    parser.add_argument(
-        '--from-pg-port',
-        dest='old_pageserver_pg_port',
-        required=False,
-        type=int,
-        default=6400,
-        help='pg port of the pageserver to migrate data from. Default: 6400',
-    )
-    parser.add_argument(
-        '--to-host',
-        dest='new_pageserver_host',
-        required=True,
-        help='Host of the pageserver to migrate data to',
-    )
-    parser.add_argument(
-        '--to-http-port',
-        dest='new_pageserver_http_port',
-        required=False,
-        default=9898,
-        type=int,
-        help='HTTP port of the pageserver to migrate data to. Default: 9898',
-    )
-    parser.add_argument(
-        '--to-pg-port',
-        dest='new_pageserver_pg_port',
-        required=False,
-        default=6400,
-        type=int,
-        help='pg port of the pageserver to migrate data to. Default: 6400',
-    )
-    parser.add_argument(
-        '--ignore-tenant-exists',
-        dest='ok_if_exists',
-        required=False,
-        help=
-        'Ignore error if we are trying to create the tenant that already exists. It can be dangerous if existing tenant already contains some data.',
-    )
-    parser.add_argument(
-        '--pg-distrib-dir',
-        dest='pg_distrib_dir',
-        required=False,
-        default='/usr/local/',
-        help='Path where postgres binaries are installed. Default: /usr/local/',
-    )
-    parser.add_argument(
-        '--psql-path',
-        dest='psql_path',
-        required=False,
-        default='/usr/local/bin/psql',
-        help='Path to the psql binary. Default: /usr/local/bin/psql',
-    )
-    parser.add_argument(
-        '--only-import',
-        dest='only_import',
-        required=False,
-        default=False,
-        action='store_true',
-        help='Skip export and tenant creation part',
-    )
-    parser.add_argument(
-        '--work-dir',
-        dest='work_dir',
-        required=True,
-        default=False,
-        help='directory where temporary tar files are stored',
-    )
-    args = parser.parse_args()
-    main(args)
--- a/test_runner/batch_others/test_ancestor_branch.py
+++ b/test_runner/batch_others/test_ancestor_branch.py
@@ -1,5 +1,6 @@
+import pytest
 from fixtures.log_helper import log
-from fixtures.neon_fixtures import NeonEnvBuilder
+from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, NeonPageserverApiException
 from fixtures.utils import query_scalar


--- a/test_runner/batch_others/test_branch_and_gc.py
+++ b/test_runner/batch_others/test_branch_and_gc.py
@@ -167,5 +167,3 @@ def test_branch_creation_before_gc(neon_simple_env: NeonEnv):
    # The starting LSN is invalid as the corresponding record is scheduled to be removed by in-queue GC.
    with pytest.raises(Exception, match="invalid branch start lsn"):
        env.neon_cli.create_branch('b1', 'b0', tenant_id=tenant, ancestor_start_lsn=lsn)
-
-    thread.join()
--- a/test_runner/batch_others/test_fsm_truncate.py
+++ b/test_runner/batch_others/test_fsm_truncate.py
@@ -1,11 +0,0 @@
-from fixtures.log_helper import log
-from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, NeonPageserverHttpClient
-import pytest
-
-
-def test_fsm_truncate(neon_env_builder: NeonEnvBuilder):
-    env = neon_env_builder.init_start()
-    env.neon_cli.create_branch("test_fsm_truncate")
-    pg = env.postgres.create_start('test_fsm_truncate')
-    pg.safe_psql(
-        'CREATE TABLE t1(key int); CREATE TABLE t2(key int); TRUNCATE TABLE t1; TRUNCATE TABLE t2;')
--- a/test_runner/batch_others/test_import.py
+++ b/test_runner/batch_others/test_import.py
@@ -1,10 +1,9 @@
-import re
 import pytest
-from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, PgBin, Postgres, wait_for_upload, wait_for_last_record_lsn
-from fixtures.utils import lsn_from_hex
+from fixtures.neon_fixtures import NeonEnvBuilder, wait_for_upload, wait_for_last_record_lsn
+from fixtures.utils import lsn_from_hex, lsn_to_hex
 from uuid import UUID, uuid4
-import os
 import tarfile
+import os
 import shutil
 from pathlib import Path
 import json
@@ -106,63 +105,20 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build


@pytest.mark.timeout(600)
-def test_import_from_pageserver_small(pg_bin: PgBin, neon_env_builder: NeonEnvBuilder):
-    neon_env_builder.num_safekeepers = 1
-    neon_env_builder.enable_local_fs_remote_storage()
-    env = neon_env_builder.init_start()
-
-    timeline = env.neon_cli.create_branch('test_import_from_pageserver_small')
-    pg = env.postgres.create_start('test_import_from_pageserver_small')
+def test_import_from_pageserver(test_output_dir, pg_bin, vanilla_pg, neon_env_builder):

    num_rows = 3000
-    lsn = _generate_data(num_rows, pg)
-    _import(num_rows, lsn, env, pg_bin, timeline)
-
-
-@pytest.mark.timeout(1800)
-# TODO: temporarily disable `test_import_from_pageserver_multisegment` test, enable
-# the test back after finding the failure cause.
-# @pytest.mark.skipif(os.environ.get('BUILD_TYPE') == "debug", reason="only run with release build")
-@pytest.mark.skip("See https://github.com/neondatabase/neon/issues/2255")
-def test_import_from_pageserver_multisegment(pg_bin: PgBin, neon_env_builder: NeonEnvBuilder):
    neon_env_builder.num_safekeepers = 1
    neon_env_builder.enable_local_fs_remote_storage()
    env = neon_env_builder.init_start()

-    timeline = env.neon_cli.create_branch('test_import_from_pageserver_multisegment')
-    pg = env.postgres.create_start('test_import_from_pageserver_multisegment')
+    env.neon_cli.create_branch('test_import_from_pageserver')
+    pgmain = env.postgres.create_start('test_import_from_pageserver')
+    log.info("postgres is running on 'test_import_from_pageserver' branch")

-    # For `test_import_from_pageserver_multisegment`, we want to make sure that the data
-    # is large enough to create multi-segment files. Typically, a segment file's size is
-    # at most 1GB. A large number of inserted rows (`30000000`) is used to increase the
-    # DB size to above 1GB. Related: https://github.com/neondatabase/neon/issues/2097.
-    num_rows = 30000000
-    lsn = _generate_data(num_rows, pg)
+    timeline = pgmain.safe_psql("SHOW neon.timeline_id")[0][0]

-    logical_size = env.pageserver.http_client().timeline_detail(
-        env.initial_tenant, timeline)['local']['current_logical_size']
-    log.info(f"timeline logical size = {logical_size / (1024 ** 2)}MB")
-    assert logical_size > 1024**3  # = 1GB
-
-    tar_output_file = _import(num_rows, lsn, env, pg_bin, timeline)
-
-    # Check if the backup data contains multiple segment files
-    cnt_seg_files = 0
-    segfile_re = re.compile('[0-9]+\\.[0-9]+')
-    with tarfile.open(tar_output_file, "r") as tar_f:
-        for f in tar_f.getnames():
-            if segfile_re.search(f) is not None:
-                cnt_seg_files += 1
-                log.info(f"Found a segment file: {f} in the backup archive file")
-    assert cnt_seg_files > 0
-
-
-def _generate_data(num_rows: int, pg: Postgres) -> str:
-    """Generate a table with `num_rows` rows.
-
-    Returns:
-    the latest insert WAL's LSN"""
-    with closing(pg.connect()) as conn:
+    with closing(pgmain.connect()) as conn:
        with conn.cursor() as cur:
            # data loading may take a while, so increase statement timeout
            cur.execute("SET statement_timeout='300s'")
@@ -171,28 +127,15 @@ def _generate_data(num_rows: int, pg: Postgres) -> str:
            cur.execute("CHECKPOINT")

            cur.execute('SELECT pg_current_wal_insert_lsn()')
-            res = cur.fetchone()
-            assert res is not None and isinstance(res[0], str)
-            return res[0]
-
-
-def _import(expected_num_rows: int, lsn: str, env: NeonEnv, pg_bin: PgBin, timeline: UUID) -> str:
-    """Test importing backup data to the pageserver.
-
-    Args:
-    expected_num_rows: the expected number of rows of the test table in the backup data
-    lsn: the backup's base LSN
-
-    Returns:
-    path to the backup archive file"""
-    log.info(f"start_backup_lsn = {lsn}")
+            lsn = cur.fetchone()[0]
+            log.info(f"start_backup_lsn = {lsn}")

    # Set LD_LIBRARY_PATH in the env properly, otherwise we may use the wrong libpq.
    # PgBin sets it automatically, but here we need to pipe psql output to the tar command.
    psql_env = {'LD_LIBRARY_PATH': os.path.join(str(pg_distrib_dir), 'lib')}

    # Get a fullbackup from pageserver
-    query = f"fullbackup { env.initial_tenant.hex} {timeline.hex} {lsn}"
+    query = f"fullbackup { env.initial_tenant.hex} {timeline} {lsn}"
    cmd = ["psql", "--no-psqlrc", env.pageserver.connstr(), "-c", query]
    result_basepath = pg_bin.run_capture(cmd, env=psql_env)
    tar_output_file = result_basepath + ".stdout"
@@ -209,7 +152,7 @@ def _import(expected_num_rows: int, lsn: str, env: NeonEnv, pg_bin: PgBin, timel
    env.pageserver.start()

    # Import using another tenantid, because we use the same pageserver.
-    # TODO Create another pageserver to make test more realistic.
+    # TODO Create another pageserver to maeke test more realistic.
    tenant = uuid4()

    # Import to pageserver
@@ -222,7 +165,7 @@ def _import(expected_num_rows: int, lsn: str, env: NeonEnv, pg_bin: PgBin, timel
        "--tenant-id",
        tenant.hex,
        "--timeline-id",
-        timeline.hex,
+        timeline,
        "--node-name",
        node_name,
        "--base-lsn",
@@ -232,15 +175,15 @@ def _import(expected_num_rows: int, lsn: str, env: NeonEnv, pg_bin: PgBin, timel
    ])

    # Wait for data to land in s3
-    wait_for_last_record_lsn(client, tenant, timeline, lsn_from_hex(lsn))
-    wait_for_upload(client, tenant, timeline, lsn_from_hex(lsn))
+    wait_for_last_record_lsn(client, tenant, UUID(timeline), lsn_from_hex(lsn))
+    wait_for_upload(client, tenant, UUID(timeline), lsn_from_hex(lsn))

    # Check it worked
    pg = env.postgres.create_start(node_name, tenant_id=tenant)
-    assert pg.safe_psql('select count(*) from tbl') == [(expected_num_rows, )]
+    assert pg.safe_psql('select count(*) from tbl') == [(num_rows, )]

    # Take another fullbackup
-    query = f"fullbackup { tenant.hex} {timeline.hex} {lsn}"
+    query = f"fullbackup { tenant.hex} {timeline} {lsn}"
    cmd = ["psql", "--no-psqlrc", env.pageserver.connstr(), "-c", query]
    result_basepath = pg_bin.run_capture(cmd, env=psql_env)
    new_tar_output_file = result_basepath + ".stdout"
@@ -252,6 +195,4 @@ def _import(expected_num_rows: int, lsn: str, env: NeonEnv, pg_bin: PgBin, timel
    # Check that gc works
    psconn = env.pageserver.connect()
    pscur = psconn.cursor()
-    pscur.execute(f"do_gc {tenant.hex} {timeline.hex} 0")
-
-    return tar_output_file
+    pscur.execute(f"do_gc {tenant.hex} {timeline} 0")
--- a/test_runner/batch_others/test_pageserver_api.py
+++ b/test_runner/batch_others/test_pageserver_api.py
@@ -60,38 +60,17 @@ def check_client(client: NeonPageserverHttpClient, initial_tenant: UUID):

 def test_pageserver_http_get_wal_receiver_not_found(neon_simple_env: NeonEnv):
    env = neon_simple_env
-    with env.pageserver.http_client() as client:
-        tenant_id, timeline_id = env.neon_cli.create_tenant()
+    client = env.pageserver.http_client()

-        timeline_details = client.timeline_detail(tenant_id=tenant_id,
-                                                  timeline_id=timeline_id,
-                                                  include_non_incremental_logical_size=True)
+    tenant_id, timeline_id = env.neon_cli.create_tenant()

-        assert timeline_details.get('wal_source_connstr') is None, 'Should not be able to connect to WAL streaming without PG compute node running'
-        assert timeline_details.get('last_received_msg_lsn') is None, 'Should not be able to connect to WAL streaming without PG compute node running'
-        assert timeline_details.get('last_received_msg_ts') is None, 'Should not be able to connect to WAL streaming without PG compute node running'
+    timeline_details = client.timeline_detail(tenant_id=tenant_id,
+                                              timeline_id=timeline_id,
+                                              include_non_incremental_logical_size=True)

-
-def expect_updated_msg_lsn(client: NeonPageserverHttpClient,
-                           tenant_id: UUID,
-                           timeline_id: UUID,
-                           prev_msg_lsn: Optional[int]) -> int:
-    timeline_details = client.timeline_detail(tenant_id, timeline_id=timeline_id)
-
-    # a successful `timeline_details` response must contain the below fields
-    local_timeline_details = timeline_details['local']
-    assert "wal_source_connstr" in local_timeline_details.keys()
-    assert "last_received_msg_lsn" in local_timeline_details.keys()
-    assert "last_received_msg_ts" in local_timeline_details.keys()
-
-    assert local_timeline_details["last_received_msg_lsn"] is not None, "the last received message's LSN is empty"
-
-    last_msg_lsn = lsn_from_hex(local_timeline_details["last_received_msg_lsn"])
-    assert prev_msg_lsn is None or prev_msg_lsn < last_msg_lsn, \
-        f"the last received message's LSN {last_msg_lsn} hasn't been updated \
-        compared to the previous message's LSN {prev_msg_lsn}"
-
-    return last_msg_lsn
+    assert timeline_details.get('wal_source_connstr') is None, 'Should not be able to connect to WAL streaming without PG compute node running'
+    assert timeline_details.get('last_received_msg_lsn') is None, 'Should not be able to connect to WAL streaming without PG compute node running'
+    assert timeline_details.get('last_received_msg_ts') is None, 'Should not be able to connect to WAL streaming without PG compute node running'


 # Test the WAL-receiver related fields in the response to `timeline_details` API call
@@ -100,29 +79,44 @@ def expect_updated_msg_lsn(client: NeonPageserverHttpClient,
 # `timeline_details` now.
 def test_pageserver_http_get_wal_receiver_success(neon_simple_env: NeonEnv):
    env = neon_simple_env
-    with env.pageserver.http_client() as client:
-        tenant_id, timeline_id = env.neon_cli.create_tenant()
-        pg = env.postgres.create_start(DEFAULT_BRANCH_NAME, tenant_id=tenant_id)
+    client = env.pageserver.http_client()

-        # Wait to make sure that we get a latest WAL receiver data.
-        # We need to wait here because it's possible that we don't have access to
-        # the latest WAL yet, when the `timeline_detail` API is first called.
-        # See: https://github.com/neondatabase/neon/issues/1768.
-        lsn = wait_until(number_of_iterations=5,
-                         interval=1,
-                         func=lambda: expect_updated_msg_lsn(client, tenant_id, timeline_id, None))
+    tenant_id, timeline_id = env.neon_cli.create_tenant()
+    pg = env.postgres.create_start(DEFAULT_BRANCH_NAME, tenant_id=tenant_id)

-        # Make a DB modification then expect getting a new WAL receiver's data.
-        pg.safe_psql("CREATE TABLE t(key int primary key, value text)")
-        wait_until(number_of_iterations=5,
-                   interval=1,
-                   func=lambda: expect_updated_msg_lsn(client, tenant_id, timeline_id, lsn))
+    def expect_updated_msg_lsn(prev_msg_lsn: Optional[int]) -> int:
+        timeline_details = client.timeline_detail(tenant_id, timeline_id=timeline_id)
+
+        # a successful `timeline_details` response must contain the below fields
+        local_timeline_details = timeline_details['local']
+        assert "wal_source_connstr" in local_timeline_details.keys()
+        assert "last_received_msg_lsn" in local_timeline_details.keys()
+        assert "last_received_msg_ts" in local_timeline_details.keys()
+
+        assert local_timeline_details["last_received_msg_lsn"] is not None, "the last received message's LSN is empty"
+
+        last_msg_lsn = lsn_from_hex(local_timeline_details["last_received_msg_lsn"])
+        assert prev_msg_lsn is None or prev_msg_lsn < last_msg_lsn, \
+            f"the last received message's LSN {last_msg_lsn} hasn't been updated \
+            compared to the previous message's LSN {prev_msg_lsn}"
+
+        return last_msg_lsn
+
+    # Wait to make sure that we get a latest WAL receiver data.
+    # We need to wait here because it's possible that we don't have access to
+    # the latest WAL yet, when the `timeline_detail` API is first called.
+    # See: https://github.com/neondatabase/neon/issues/1768.
+    lsn = wait_until(number_of_iterations=5, interval=1, func=lambda: expect_updated_msg_lsn(None))
+
+    # Make a DB modification then expect getting a new WAL receiver's data.
+    pg.safe_psql("CREATE TABLE t(key int primary key, value text)")
+    wait_until(number_of_iterations=5, interval=1, func=lambda: expect_updated_msg_lsn(lsn))


 def test_pageserver_http_api_client(neon_simple_env: NeonEnv):
    env = neon_simple_env
-    with env.pageserver.http_client() as client:
-        check_client(client, env.initial_tenant)
+    client = env.pageserver.http_client()
+    check_client(client, env.initial_tenant)


 def test_pageserver_http_api_client_auth_enabled(neon_env_builder: NeonEnvBuilder):
@@ -131,5 +125,5 @@ def test_pageserver_http_api_client_auth_enabled(neon_env_builder: NeonEnvBuilde

    management_token = env.auth_keys.generate_management_token()

-    with env.pageserver.http_client(auth_token=management_token) as client:
-        check_client(client, env.initial_tenant)
+    client = env.pageserver.http_client(auth_token=management_token)
+    check_client(client, env.initial_tenant)
--- a/test_runner/batch_others/test_pageserver_restart.py
+++ b/test_runner/batch_others/test_pageserver_restart.py
@@ -2,16 +2,6 @@ from fixtures.neon_fixtures import NeonEnvBuilder
 from fixtures.log_helper import log


-# Test that the pageserver fixture is implemented correctly, allowing quick restarts.
-# This is a regression test, see https://github.com/neondatabase/neon/issues/2247
-def test_fixture_restart(neon_env_builder: NeonEnvBuilder):
-    env = neon_env_builder.init_start()
-
-    for i in range(3):
-        env.pageserver.stop()
-        env.pageserver.start()
-
-
 # Test restarting page server, while safekeeper and compute node keep
 # running.
 def test_pageserver_restart(neon_env_builder: NeonEnvBuilder):
--- a/test_runner/batch_others/test_remote_storage.py
+++ b/test_runner/batch_others/test_remote_storage.py
@@ -2,10 +2,11 @@
 # env ZENITH_PAGESERVER_OVERRIDES="remote_storage={local_path='/tmp/neon_zzz/'}" poetry ......

 import shutil, os
+from contextlib import closing
 from pathlib import Path
 import time
 from uuid import UUID
-from fixtures.neon_fixtures import NeonEnvBuilder, RemoteStorageKind, assert_timeline_local, available_remote_storages, wait_until, wait_for_last_record_lsn, wait_for_upload
+from fixtures.neon_fixtures import NeonEnvBuilder, assert_timeline_local, wait_until, wait_for_last_record_lsn, wait_for_upload
 from fixtures.log_helper import log
 from fixtures.utils import lsn_from_hex, query_scalar
 import pytest
@@ -28,19 +29,18 @@ import pytest
 #   * queries the specific data, ensuring that it matches the one stored before
 #
 # The tests are done for all types of remote storage pageserver supports.
-@pytest.mark.parametrize('remote_storatge_kind', available_remote_storages())
-def test_remote_storage_backup_and_restore(
-    neon_env_builder: NeonEnvBuilder,
-    remote_storatge_kind: RemoteStorageKind,
-):
+@pytest.mark.parametrize('storage_type', ['local_fs', 'mock_s3'])
+def test_remote_storage_backup_and_restore(neon_env_builder: NeonEnvBuilder, storage_type: str):
    # Use this test to check more realistic SK ids: some etcd key parsing bugs were related,
    # and this test needs SK to write data to pageserver, so it will be visible
    neon_env_builder.safekeepers_id_start = 12

-    neon_env_builder.enable_remote_storage(
-        remote_storage_kind=remote_storatge_kind,
-        test_name='test_remote_storage_backup_and_restore',
-    )
+    if storage_type == 'local_fs':
+        neon_env_builder.enable_local_fs_remote_storage()
+    elif storage_type == 'mock_s3':
+        neon_env_builder.enable_s3_mock_remote_storage('test_remote_storage_backup_and_restore')
+    else:
+        raise RuntimeError(f'Unknown storage type: {storage_type}')

    data_id = 1
    data_secret = 'very secret secret'
@@ -110,7 +110,7 @@ def test_remote_storage_backup_and_restore(
    client.tenant_attach(UUID(tenant_id))

    log.info("waiting for timeline redownload")
-    wait_until(number_of_iterations=20,
+    wait_until(number_of_iterations=10,
               interval=1,
               func=lambda: assert_timeline_local(client, UUID(tenant_id), UUID(timeline_id)))

--- a/test_runner/batch_others/test_tenant_detach.py
+++ b/test_runner/batch_others/test_tenant_detach.py
@@ -1,19 +1,10 @@
 from threading import Thread
 from uuid import uuid4
-import uuid
 import psycopg2
 import pytest

 from fixtures.log_helper import log
-from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, NeonPageserverApiException
-
-
-def do_gc_target(env: NeonEnv, tenant_id: uuid.UUID, timeline_id: uuid.UUID):
-    """Hack to unblock main, see https://github.com/neondatabase/neon/issues/2211"""
-    try:
-        env.pageserver.safe_psql(f'do_gc {tenant_id.hex} {timeline_id.hex} 0')
-    except Exception as e:
-        log.error("do_gc failed: %s", e)
+from fixtures.neon_fixtures import NeonEnvBuilder, NeonPageserverApiException


 def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder):
@@ -45,7 +36,8 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder):
        env.pageserver.safe_psql(f'do_gc {tenant_id.hex} {uuid4().hex} 0')

    # try to concurrently run gc and detach
-    gc_thread = Thread(target=lambda: do_gc_target(env, tenant_id, timeline_id))
+    gc_thread = Thread(
+        target=lambda: env.pageserver.safe_psql(f'do_gc {tenant_id.hex} {timeline_id.hex} 0'), )
    gc_thread.start()

    last_error = None
--- a/test_runner/batch_others/test_tenant_relocation.py
+++ b/test_runner/batch_others/test_tenant_relocation.py
@@ -229,7 +229,7 @@ def post_migration_check(pg: Postgres, sum_before_migration: int, old_local_path
        # basebackup and importing it into the new pageserver.
        # This kind of migration can tolerate breaking changes
        # to storage format
-        'major',
+        pytest.param('major', marks=pytest.mark.xfail(reason="Not implemented")),
    ])
@pytest.mark.parametrize('with_load', ['with_load', 'without_load'])
 def test_tenant_relocation(neon_env_builder: NeonEnvBuilder,
@@ -345,8 +345,6 @@ def test_tenant_relocation(neon_env_builder: NeonEnvBuilder,
        # Migrate either by attaching from s3 or import/export basebackup
        if method == "major":
            cmd = [
-                "poetry",
-                "run",
                "python",
                os.path.join(base_dir, "scripts/export_import_between_pageservers.py"),
                "--tenant-id",
@@ -363,12 +361,12 @@ def test_tenant_relocation(neon_env_builder: NeonEnvBuilder,
                str(new_pageserver_http_port),
                "--to-pg-port",
                str(new_pageserver_pg_port),
-                "--pg-distrib-dir",
-                pg_distrib_dir,
+                "--psql-path",
+                os.path.join(pg_distrib_dir, "bin", "psql"),
                "--work-dir",
                os.path.join(test_output_dir),
            ]
-            subprocess_capture(test_output_dir, cmd, check=True)
+            subprocess_capture(str(env.repo_dir), cmd, check=True)
        elif method == "minor":
            # call to attach timeline to new pageserver
            new_pageserver_http.tenant_attach(tenant_id)
@@ -429,22 +427,6 @@ def test_tenant_relocation(neon_env_builder: NeonEnvBuilder,
        post_migration_check(pg_main, 500500, old_local_path_main)
        post_migration_check(pg_second, 1001000, old_local_path_second)

-        # ensure that we can successfully read all relations on the new pageserver
-        with pg_cur(pg_second) as cur:
-            cur.execute('''
-                DO $$
-                DECLARE
-                r RECORD;
-                BEGIN
-                FOR r IN
-                SELECT relname FROM pg_class WHERE relkind='r'
-                LOOP
-                    RAISE NOTICE '%', r.relname;
-                    EXECUTE 'SELECT count(*) FROM quote_ident($1)' USING r.relname;
-                END LOOP;
-                END$$;
-                ''')
-
        if with_load == 'with_load':
            assert load_ok_event.wait(3)
            log.info('stopping load thread')
--- a/test_runner/batch_others/test_tenants_with_remote_storage.py
+++ b/test_runner/batch_others/test_tenants_with_remote_storage.py
@@ -13,7 +13,7 @@ from uuid import UUID

 import pytest

-from fixtures.neon_fixtures import NeonEnvBuilder, NeonEnv, Postgres, RemoteStorageKind, available_remote_storages, wait_for_last_record_lsn, wait_for_upload
+from fixtures.neon_fixtures import NeonEnvBuilder, NeonEnv, Postgres, wait_for_last_record_lsn, wait_for_upload
 from fixtures.utils import lsn_from_hex


@@ -38,7 +38,7 @@ async def tenant_workload(env: NeonEnv, pg: Postgres):

 async def all_tenants_workload(env: NeonEnv, tenants_pgs):
    workers = []
-    for _, pg in tenants_pgs:
+    for tenant, pg in tenants_pgs:
        worker = tenant_workload(env, pg)
        workers.append(asyncio.create_task(worker))

@@ -46,18 +46,23 @@ async def all_tenants_workload(env: NeonEnv, tenants_pgs):
    await asyncio.gather(*workers)


-@pytest.mark.parametrize('remote_storatge_kind', available_remote_storages())
-def test_tenants_many(neon_env_builder: NeonEnvBuilder, remote_storatge_kind: RemoteStorageKind):
-    neon_env_builder.enable_remote_storage(
-        remote_storage_kind=remote_storatge_kind,
-        test_name='test_tenants_many',
-    )
+@pytest.mark.parametrize('storage_type', ['local_fs', 'mock_s3'])
+def test_tenants_many(neon_env_builder: NeonEnvBuilder, storage_type: str):
+
+    if storage_type == 'local_fs':
+        neon_env_builder.enable_local_fs_remote_storage()
+    elif storage_type == 'mock_s3':
+        neon_env_builder.enable_s3_mock_remote_storage('test_remote_storage_backup_and_restore')
+    else:
+        raise RuntimeError(f'Unknown storage type: {storage_type}')
+
+    neon_env_builder.enable_local_fs_remote_storage()

    env = neon_env_builder.init_start()

    tenants_pgs: List[Tuple[UUID, Postgres]] = []

-    for _ in range(1, 5):
+    for i in range(1, 5):
        # Use a tiny checkpoint distance, to create a lot of layers quickly
        tenant, _ = env.neon_cli.create_tenant(
            conf={
--- a/test_runner/batch_others/test_timeline_size.py
+++ b/test_runner/batch_others/test_timeline_size.py
@@ -4,7 +4,7 @@ from uuid import UUID
 import re
 import psycopg2.extras
 import psycopg2.errors
-from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres, assert_timeline_local, wait_for_last_flush_lsn
+from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres, assert_timeline_local
 from fixtures.log_helper import log
 import time

@@ -192,8 +192,6 @@ def test_timeline_physical_size_init(neon_simple_env: NeonEnv):
           FROM generate_series(1, 1000) g""",
    ])

-    wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id)
-
    # restart the pageserer to force calculating timeline's initial physical size
    env.pageserver.stop()
    env.pageserver.start()
@@ -213,9 +211,7 @@ def test_timeline_physical_size_post_checkpoint(neon_simple_env: NeonEnv):
           FROM generate_series(1, 1000) g""",
    ])

-    wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id)
    env.pageserver.safe_psql(f"checkpoint {env.initial_tenant.hex} {new_timeline_id.hex}")
-
    assert_physical_size(env, env.initial_tenant, new_timeline_id)


@@ -236,10 +232,8 @@ def test_timeline_physical_size_post_compaction(neon_env_builder: NeonEnvBuilder
           FROM generate_series(1, 100000) g""",
    ])

-    wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id)
    env.pageserver.safe_psql(f"checkpoint {env.initial_tenant.hex} {new_timeline_id.hex}")
    env.pageserver.safe_psql(f"compact {env.initial_tenant.hex} {new_timeline_id.hex}")
-
    assert_physical_size(env, env.initial_tenant, new_timeline_id)


@@ -260,21 +254,15 @@ def test_timeline_physical_size_post_gc(neon_env_builder: NeonEnvBuilder):
           SELECT 'long string to consume some space' || g
           FROM generate_series(1, 100000) g""",
    ])
-
-    wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id)
    env.pageserver.safe_psql(f"checkpoint {env.initial_tenant.hex} {new_timeline_id.hex}")
-
    pg.safe_psql("""
        INSERT INTO foo
            SELECT 'long string to consume some space' || g
            FROM generate_series(1, 100000) g
    """)
-
-    wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id)
    env.pageserver.safe_psql(f"checkpoint {env.initial_tenant.hex} {new_timeline_id.hex}")

    env.pageserver.safe_psql(f"do_gc {env.initial_tenant.hex} {new_timeline_id.hex} 0")
-
    assert_physical_size(env, env.initial_tenant, new_timeline_id)


@@ -291,7 +279,6 @@ def test_timeline_physical_size_metric(neon_simple_env: NeonEnv):
           FROM generate_series(1, 100000) g""",
    ])

-    wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id)
    env.pageserver.safe_psql(f"checkpoint {env.initial_tenant.hex} {new_timeline_id.hex}")

    # get the metrics and parse the metric for the current timeline's physical size
@@ -332,7 +319,6 @@ def test_tenant_physical_size(neon_simple_env: NeonEnv):
            f"INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, {n_rows}) g",
        ])

-        wait_for_last_flush_lsn(env, pg, tenant, timeline)
        env.pageserver.safe_psql(f"checkpoint {tenant.hex} {timeline.hex}")

        timeline_total_size += get_timeline_physical_size(timeline)
--- a/test_runner/batch_others/test_wal_acceptor.py
+++ b/test_runner/batch_others/test_wal_acceptor.py
@@ -12,8 +12,9 @@ import uuid

 from contextlib import closing
 from dataclasses import dataclass, field
+from multiprocessing import Process, Value
 from pathlib import Path
-from fixtures.neon_fixtures import NeonPageserver, PgBin, Etcd, Postgres, RemoteStorageKind, RemoteStorageUsers, Safekeeper, NeonEnv, NeonEnvBuilder, PortDistributor, SafekeeperPort, available_remote_storages, neon_binpath, PgProtocol, wait_for_last_record_lsn, wait_for_upload
+from fixtures.neon_fixtures import NeonPageserver, PgBin, Etcd, Postgres, RemoteStorageUsers, Safekeeper, NeonEnv, NeonEnvBuilder, PortDistributor, SafekeeperPort, neon_binpath, PgProtocol, wait_for_last_record_lsn, wait_for_upload
 from fixtures.utils import get_dir_size, lsn_to_hex, lsn_from_hex, query_scalar
 from fixtures.log_helper import log
 from typing import List, Optional, Any
@@ -284,12 +285,9 @@ def test_wal_removal(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
    env.neon_cli.create_branch('test_safekeepers_wal_removal')
    pg = env.postgres.create_start('test_safekeepers_wal_removal')

-    # Note: it is important to insert at least two segments, as currently
-    # control file is synced roughly once in segment range and WAL is not
-    # removed until all horizons are persisted.
    pg.safe_psql_many([
        'CREATE TABLE t(key int primary key, value text)',
-        "INSERT INTO t SELECT generate_series(1,200000), 'payload'",
+        "INSERT INTO t SELECT generate_series(1,100000), 'payload'",
    ])

    tenant_id = pg.safe_psql("show neon.tenant_id")[0][0]
@@ -353,7 +351,7 @@ def wait_segment_offload(tenant_id, timeline_id, live_sk, seg_end):
        if lsn_from_hex(tli_status.backup_lsn) >= lsn_from_hex(seg_end):
            break
        elapsed = time.time() - started_at
-        if elapsed > 30:
+        if elapsed > 20:
            raise RuntimeError(
                f"timed out waiting {elapsed:.0f}s for segment ending at {seg_end} get offloaded")
        time.sleep(0.5)
@@ -379,15 +377,15 @@ def wait_wal_trim(tenant_id, timeline_id, sk, target_size):
        time.sleep(0.5)


-@pytest.mark.parametrize('remote_storatge_kind', available_remote_storages())
-def test_wal_backup(neon_env_builder: NeonEnvBuilder, remote_storatge_kind: RemoteStorageKind):
+@pytest.mark.parametrize('storage_type', ['mock_s3', 'local_fs'])
+def test_wal_backup(neon_env_builder: NeonEnvBuilder, storage_type: str):
    neon_env_builder.num_safekeepers = 3
-
-    neon_env_builder.enable_remote_storage(
-        remote_storage_kind=remote_storatge_kind,
-        test_name='test_safekeepers_wal_backup',
-    )
-
+    if storage_type == 'local_fs':
+        neon_env_builder.enable_local_fs_remote_storage()
+    elif storage_type == 'mock_s3':
+        neon_env_builder.enable_s3_mock_remote_storage('test_safekeepers_wal_backup')
+    else:
+        raise RuntimeError(f'Unknown storage type: {storage_type}')
    neon_env_builder.remote_storage_users = RemoteStorageUsers.SAFEKEEPER

    env = neon_env_builder.init_start()
@@ -427,15 +425,15 @@ def test_wal_backup(neon_env_builder: NeonEnvBuilder, remote_storatge_kind: Remo
    wait_segment_offload(tenant_id, timeline_id, env.safekeepers[1], '0/5000000')


-@pytest.mark.parametrize('remote_storatge_kind', available_remote_storages())
-def test_s3_wal_replay(neon_env_builder: NeonEnvBuilder, remote_storatge_kind: RemoteStorageKind):
+@pytest.mark.parametrize('storage_type', ['mock_s3', 'local_fs'])
+def test_s3_wal_replay(neon_env_builder: NeonEnvBuilder, storage_type: str):
    neon_env_builder.num_safekeepers = 3
-
-    neon_env_builder.enable_remote_storage(
-        remote_storage_kind=remote_storatge_kind,
-        test_name='test_s3_wal_replay',
-    )
-
+    if storage_type == 'local_fs':
+        neon_env_builder.enable_local_fs_remote_storage()
+    elif storage_type == 'mock_s3':
+        neon_env_builder.enable_s3_mock_remote_storage('test_s3_wal_replay')
+    else:
+        raise RuntimeError(f'Unknown storage type: {storage_type}')
    neon_env_builder.remote_storage_users = RemoteStorageUsers.SAFEKEEPER

    env = neon_env_builder.init_start()
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -3,7 +3,6 @@ from __future__ import annotations
 from dataclasses import field
 from contextlib import contextmanager
 from enum import Flag, auto
-import enum
 import textwrap
 from cached_property import cached_property
 import abc
@@ -222,7 +221,7 @@ def can_bind(host: str, port: int) -> bool:
        # moment. If that changes, we should use start using SO_REUSEADDR here
        # too, to allow reusing ports more quickly.
        # See https://github.com/neondatabase/neon/issues/801
-        # sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+        #sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)

        try:
            sock.bind((host, port))
@@ -231,8 +230,6 @@ def can_bind(host: str, port: int) -> bool:
        except socket.error:
            log.info(f"Port {port} is in use, skipping")
            return False
-        finally:
-            sock.close()


 class PortDistributor:
@@ -265,11 +262,6 @@ def default_broker(request: Any, port_distributor: PortDistributor):
    broker.stop()


-@pytest.fixture(scope='session')
-def run_id():
-    yield uuid.uuid4()
-
-
@pytest.fixture(scope='session')
 def mock_s3_server(port_distributor: PortDistributor):
    mock_s3_server = MockS3Server(port_distributor.get_port())
@@ -299,9 +291,7 @@ class PgProtocol:
        # change it by calling "SET statement_timeout" after
        # connecting.
        options = result.get('options', '')
-        if "statement_timeout" not in options:
-            options = f'-cstatement_timeout=120s {options}'
-        result['options'] = options
+        result['options'] = f'-cstatement_timeout=120s {options}'
        return result

    # autocommit=True here by default because that's what we need most of the time
@@ -448,46 +438,26 @@ class MockS3Server:
    def secret_key(self) -> str:
        return 'test'

+    def access_env_vars(self) -> Dict[Any, Any]:
+        return {
+            'AWS_ACCESS_KEY_ID': self.access_key(),
+            'AWS_SECRET_ACCESS_KEY': self.secret_key(),
+        }
+
    def kill(self):
        self.subprocess.kill()


-@enum.unique
-class RemoteStorageKind(enum.Enum):
-    LOCAL_FS = "local_fs"
-    MOCK_S3 = "mock_s3"
-    REAL_S3 = "real_s3"
-
-
-def available_remote_storages() -> List[RemoteStorageKind]:
-    remote_storages = [RemoteStorageKind.LOCAL_FS, RemoteStorageKind.MOCK_S3]
-    if os.getenv("ENABLE_REAL_S3_REMOTE_STORAGE") is not None:
-        remote_storages.append(RemoteStorageKind.REAL_S3)
-        log.info("Enabling real s3 storage for tests")
-    else:
-        log.info("Using mock implementations to test remote storage")
-    return remote_storages
-
-
@dataclass
 class LocalFsStorage:
-    root: Path
+    local_path: Path


@dataclass
 class S3Storage:
    bucket_name: str
    bucket_region: str
-    access_key: str
-    secret_key: str
-    endpoint: Optional[str] = None
-    prefix_in_bucket: Optional[str] = None
-
-    def access_env_vars(self) -> Dict[str, str]:
-        return {
-            'AWS_ACCESS_KEY_ID': self.access_key,
-            'AWS_SECRET_ACCESS_KEY': self.secret_key,
-        }
+    endpoint: Optional[str]


 RemoteStorage = Union[LocalFsStorage, S3Storage]
@@ -496,20 +466,16 @@ RemoteStorage = Union[LocalFsStorage, S3Storage]
 # serialize as toml inline table
 def remote_storage_to_toml_inline_table(remote_storage):
    if isinstance(remote_storage, LocalFsStorage):
-        remote_storage_config = f"local_path='{remote_storage.root}'"
+        res = f"local_path='{remote_storage.local_path}'"
    elif isinstance(remote_storage, S3Storage):
-        remote_storage_config = f"bucket_name='{remote_storage.bucket_name}',\
-            bucket_region='{remote_storage.bucket_region}'"
-
-        if remote_storage.prefix_in_bucket is not None:
-            remote_storage_config += f",prefix_in_bucket='{remote_storage.prefix_in_bucket}'"
-
+        res = f"bucket_name='{remote_storage.bucket_name}', bucket_region='{remote_storage.bucket_region}'"
        if remote_storage.endpoint is not None:
-            remote_storage_config += f",endpoint='{remote_storage.endpoint}'"
+            res += f", endpoint='{remote_storage.endpoint}'"
+        else:
+            raise Exception(f'Unknown storage configuration {remote_storage}')
    else:
        raise Exception("invalid remote storage type")
-
-    return f"{{{remote_storage_config}}}"
+    return f"{{{res}}}"


 class RemoteStorageUsers(Flag):
@@ -527,31 +493,28 @@ class NeonEnvBuilder:
    cleaned up after the test has finished.
    """
    def __init__(
-        self,
-        repo_dir: Path,
-        port_distributor: PortDistributor,
-        broker: Etcd,
-        run_id: uuid.UUID,
-        mock_s3_server: MockS3Server,
-        remote_storage: Optional[RemoteStorage] = None,
-        remote_storage_users: RemoteStorageUsers = RemoteStorageUsers.PAGESERVER,
-        pageserver_config_override: Optional[str] = None,
-        num_safekeepers: int = 1,
-        # Use non-standard SK ids to check for various parsing bugs
-        safekeepers_id_start: int = 0,
-        # fsync is disabled by default to make the tests go faster
-        safekeepers_enable_fsync: bool = False,
-        auth_enabled: bool = False,
-        rust_log_override: Optional[str] = None,
-        default_branch_name=DEFAULT_BRANCH_NAME,
-    ):
+            self,
+            repo_dir: Path,
+            port_distributor: PortDistributor,
+            broker: Etcd,
+            mock_s3_server: MockS3Server,
+            remote_storage: Optional[RemoteStorage] = None,
+            remote_storage_users: RemoteStorageUsers = RemoteStorageUsers.PAGESERVER,
+            pageserver_config_override: Optional[str] = None,
+            num_safekeepers: int = 1,
+            # Use non-standard SK ids to check for various parsing bugs
+            safekeepers_id_start: int = 0,
+            # fsync is disabled by default to make the tests go faster
+            safekeepers_enable_fsync: bool = False,
+            auth_enabled: bool = False,
+            rust_log_override: Optional[str] = None,
+            default_branch_name=DEFAULT_BRANCH_NAME):
        self.repo_dir = repo_dir
        self.rust_log_override = rust_log_override
        self.port_distributor = port_distributor
        self.remote_storage = remote_storage
        self.remote_storage_users = remote_storage_users
        self.broker = broker
-        self.run_id = run_id
        self.mock_s3_server = mock_s3_server
        self.pageserver_config_override = pageserver_config_override
        self.num_safekeepers = num_safekeepers
@@ -560,8 +523,6 @@ class NeonEnvBuilder:
        self.auth_enabled = auth_enabled
        self.default_branch_name = default_branch_name
        self.env: Optional[NeonEnv] = None
-        self.remote_storage_prefix: Optional[str] = None
-        self.keep_remote_storage_contents: bool = True

    def init(self) -> NeonEnv:
        # Cannot create more than one environment from one builder
@@ -577,143 +538,41 @@ class NeonEnvBuilder:
        self.start()
        return env

-    def enable_remote_storage(
-        self,
-        remote_storage_kind: RemoteStorageKind,
-        test_name: str,
-        force_enable: bool = True,
-    ):
-        if remote_storage_kind == RemoteStorageKind.LOCAL_FS:
-            self.enable_local_fs_remote_storage(force_enable=force_enable)
-        elif remote_storage_kind == RemoteStorageKind.MOCK_S3:
-            self.enable_mock_s3_remote_storage(bucket_name=test_name, force_enable=force_enable)
-        elif remote_storage_kind == RemoteStorageKind.REAL_S3:
-            self.enable_real_s3_remote_storage(test_name=test_name, force_enable=force_enable)
-        else:
-            raise RuntimeError(f'Unknown storage type: {remote_storage_kind}')
+    """
+    Sets up the pageserver to use the local fs at the `test_dir/local_fs_remote_storage` path.
+    Errors, if the pageserver has some remote storage configuration already, unless `force_enable` is not set to `True`.
+    """

    def enable_local_fs_remote_storage(self, force_enable=True):
-        """
-        Sets up the pageserver to use the local fs at the `test_dir/local_fs_remote_storage` path.
-        Errors, if the pageserver has some remote storage configuration already, unless `force_enable` is not set to `True`.
-        """
        assert force_enable or self.remote_storage is None, "remote storage is enabled already"
        self.remote_storage = LocalFsStorage(Path(self.repo_dir / 'local_fs_remote_storage'))

-    def enable_mock_s3_remote_storage(self, bucket_name: str, force_enable=True):
-        """
-        Sets up the pageserver to use the S3 mock server, creates the bucket, if it's not present already.
-        Starts up the mock server, if that does not run yet.
-        Errors, if the pageserver has some remote storage configuration already, unless `force_enable` is not set to `True`.
-        """
+    """
+    Sets up the pageserver to use the S3 mock server, creates the bucket, if it's not present already.
+    Starts up the mock server, if that does not run yet.
+    Errors, if the pageserver has some remote storage configuration already, unless `force_enable` is not set to `True`.
+    """
+
+    def enable_s3_mock_remote_storage(self, bucket_name: str, force_enable=True):
        assert force_enable or self.remote_storage is None, "remote storage is enabled already"
        mock_endpoint = self.mock_s3_server.endpoint()
        mock_region = self.mock_s3_server.region()
-
-        self.remote_storage_client = boto3.client(
+        boto3.client(
            's3',
            endpoint_url=mock_endpoint,
            region_name=mock_region,
            aws_access_key_id=self.mock_s3_server.access_key(),
            aws_secret_access_key=self.mock_s3_server.secret_key(),
-        )
-        self.remote_storage_client.create_bucket(Bucket=bucket_name)
-
-        self.remote_storage = S3Storage(
-            bucket_name=bucket_name,
-            endpoint=mock_endpoint,
-            bucket_region=mock_region,
-            access_key=self.mock_s3_server.access_key(),
-            secret_key=self.mock_s3_server.secret_key(),
-        )
-
-    def enable_real_s3_remote_storage(self, test_name: str, force_enable=True):
-        """
-        Sets up configuration to use real s3 endpoint without mock server
-        """
-        assert force_enable or self.remote_storage is None, "remote storage is enabled already"
-
-        access_key = os.getenv("AWS_ACCESS_KEY_ID")
-        assert access_key, "no aws access key provided"
-        secret_key = os.getenv("AWS_SECRET_ACCESS_KEY")
-        assert secret_key, "no aws access key provided"
-
-        # session token is needed for local runs with sso auth
-        session_token = os.getenv("AWS_SESSION_TOKEN")
-
-        bucket_name = os.getenv("REMOTE_STORAGE_S3_BUCKET")
-        assert bucket_name, "no remote storage bucket name provided"
-        region = os.getenv("REMOTE_STORAGE_S3_REGION")
-        assert region, "no remote storage region provided"
-
-        # do not leave data in real s3
-        self.keep_remote_storage_contents = False
-
-        # construct a prefix inside bucket for the particular test case and test run
-        self.remote_storage_prefix = f'{self.run_id}/{test_name}'
-
-        self.remote_storage_client = boto3.client(
-            's3',
-            region_name=region,
-            aws_access_key_id=access_key,
-            aws_secret_access_key=secret_key,
-            aws_session_token=session_token,
-        )
+        ).create_bucket(Bucket=bucket_name)
        self.remote_storage = S3Storage(bucket_name=bucket_name,
-                                        bucket_region=region,
-                                        access_key=access_key,
-                                        secret_key=secret_key,
-                                        prefix_in_bucket=self.remote_storage_prefix)
-
-    def cleanup_remote_storage(self):
-        # here wee check for true remote storage, no the local one
-        # local cleanup is not needed after test because in ci all env will be destroyed anyway
-        if self.remote_storage_prefix is None:
-            log.info("no remote storage was set up, skipping cleanup")
-            return
-
-        if self.keep_remote_storage_contents:
-            log.info("keep_remote_storage_contents skipping remote storage cleanup")
-            return
-
-        log.info("removing data from test s3 bucket %s by prefix %s",
-                 self.remote_storage.bucket_name,
-                 self.remote_storage_prefix)
-        paginator = self.remote_storage_client.get_paginator('list_objects_v2')
-        pages = paginator.paginate(
-            Bucket=self.remote_storage.bucket_name,
-            Prefix=self.remote_storage_prefix,
-        )
-
-        objects_to_delete = {'Objects': []}
-        cnt = 0
-        for item in pages.search('Contents'):
-            # weirdly when nothing is found it returns [None]
-            if item is None:
-                break
-
-            objects_to_delete['Objects'].append({'Key': item['Key']})
-
-            # flush once aws limit reached
-            if len(objects_to_delete['Objects']) >= 1000:
-                self.remote_storage_client.delete_objects(
-                    Bucket=self.remote_storage.bucket_name,
-                    Delete=objects_to_delete,
-                )
-                objects_to_delete = dict(Objects=[])
-                cnt += 1
-
-        # flush rest
-        if len(objects_to_delete['Objects']):
-            self.remote_storage_client.delete_objects(Bucket=self.remote_storage.bucket_name,
-                                                      Delete=objects_to_delete)
-
-        log.info("deleted %s objects from remote storage", cnt)
+                                        endpoint=mock_endpoint,
+                                        bucket_region=mock_region)

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_value, traceback):
+
        # Stop all the nodes.
        if self.env:
            log.info('Cleaning up all storage and compute nodes')
@@ -722,8 +581,6 @@ class NeonEnvBuilder:
                sk.stop(immediate=True)
            self.env.pageserver.stop(immediate=True)

-            self.cleanup_remote_storage()
-

 class NeonEnv:
    """
@@ -856,13 +713,10 @@ class NeonEnv:


@pytest.fixture(scope=shareable_scope)
-def _shared_simple_env(
-    request: Any,
-    port_distributor: PortDistributor,
-    mock_s3_server: MockS3Server,
-    default_broker: Etcd,
-    run_id: uuid.UUID,
-) -> Iterator[NeonEnv]:
+def _shared_simple_env(request: Any,
+                       port_distributor: PortDistributor,
+                       mock_s3_server: MockS3Server,
+                       default_broker: Etcd) -> Iterator[NeonEnv]:
    """
   # Internal fixture backing the `neon_simple_env` fixture. If TEST_SHARED_FIXTURES
    is set, this is shared by all tests using `neon_simple_env`.
@@ -876,13 +730,8 @@ def _shared_simple_env(
        repo_dir = os.path.join(str(top_output_dir), "shared_repo")
        shutil.rmtree(repo_dir, ignore_errors=True)

-    with NeonEnvBuilder(
-            repo_dir=Path(repo_dir),
-            port_distributor=port_distributor,
-            broker=default_broker,
-            mock_s3_server=mock_s3_server,
-            run_id=run_id,
-    ) as builder:
+    with NeonEnvBuilder(Path(repo_dir), port_distributor, default_broker,
+                        mock_s3_server) as builder:
        env = builder.init_start()

        # For convenience in tests, create a branch from the freshly-initialized cluster.
@@ -907,13 +756,10 @@ def neon_simple_env(_shared_simple_env: NeonEnv) -> Iterator[NeonEnv]:


@pytest.fixture(scope='function')
-def neon_env_builder(
-    test_output_dir,
-    port_distributor: PortDistributor,
-    mock_s3_server: MockS3Server,
-    default_broker: Etcd,
-    run_id: uuid.UUID,
-) -> Iterator[NeonEnvBuilder]:
+def neon_env_builder(test_output_dir,
+                     port_distributor: PortDistributor,
+                     mock_s3_server: MockS3Server,
+                     default_broker: Etcd) -> Iterator[NeonEnvBuilder]:
    """
    Fixture to create a Neon environment for test.

@@ -931,13 +777,8 @@ def neon_env_builder(
    repo_dir = os.path.join(test_output_dir, "repo")

    # Return the builder to the caller
-    with NeonEnvBuilder(
-            repo_dir=Path(repo_dir),
-            port_distributor=port_distributor,
-            mock_s3_server=mock_s3_server,
-            broker=default_broker,
-            run_id=run_id,
-    ) as builder:
+    with NeonEnvBuilder(Path(repo_dir), port_distributor, default_broker,
+                        mock_s3_server) as builder:
        yield builder


@@ -1342,10 +1183,7 @@ class NeonCli(AbstractNeonCli):
            remote_storage_users=self.env.remote_storage_users,
            pageserver_config_override=self.env.pageserver.config_override)

-        s3_env_vars = None
-        if self.env.remote_storage is not None and isinstance(self.env.remote_storage, S3Storage):
-            s3_env_vars = self.env.remote_storage.access_env_vars()
-
+        s3_env_vars = self.env.s3_mock_server.access_env_vars() if self.env.s3_mock_server else None
        return self.raw_cli(start_args, extra_env_vars=s3_env_vars)

    def pageserver_stop(self, immediate=False) -> 'subprocess.CompletedProcess[str]':
@@ -1357,10 +1195,7 @@ class NeonCli(AbstractNeonCli):
        return self.raw_cli(cmd)

    def safekeeper_start(self, id: int) -> 'subprocess.CompletedProcess[str]':
-        s3_env_vars = None
-        if self.env.remote_storage is not None and isinstance(self.env.remote_storage, S3Storage):
-            s3_env_vars = self.env.remote_storage.access_env_vars()
-
+        s3_env_vars = self.env.s3_mock_server.access_env_vars() if self.env.s3_mock_server else None
        return self.raw_cli(['safekeeper', 'start', str(id)], extra_env_vars=s3_env_vars)

    def safekeeper_stop(self,
@@ -1488,17 +1323,6 @@ class NeonPageserver(PgProtocol):
        self.running = True
        return self

-    def _wait_for_death(self):
-        """Wait for pageserver to die. Assumes kill signal is sent."""
-        pid_path = pathlib.Path(self.env.repo_dir) / "pageserver.pid"
-        pid = read_pid(pid_path)
-        retries_left = 20
-        while check_pid(pid):
-            time.sleep(0.2)
-            retries_left -= 1
-            if retries_left == 0:
-                raise AssertionError("Pageserver failed to die")
-
    def stop(self, immediate=False) -> 'NeonPageserver':
        """
        Stop the page server.
@@ -1506,7 +1330,6 @@ class NeonPageserver(PgProtocol):
        """
        if self.running:
            self.env.neon_cli.pageserver_stop(immediate)
-            self._wait_for_death()
            self.running = False
        return self

@@ -1514,7 +1337,7 @@ class NeonPageserver(PgProtocol):
        return self

    def __exit__(self, exc_type, exc, tb):
-        self.stop(immediate=True)
+        self.stop(True)

    def http_client(self, auth_token: Optional[str] = None) -> NeonPageserverHttpClient:
        return NeonPageserverHttpClient(
@@ -1531,7 +1354,6 @@ def append_pageserver_param_overrides(
 ):
    if bool(remote_storage_users & RemoteStorageUsers.PAGESERVER) and remote_storage is not None:
        remote_storage_toml_table = remote_storage_to_toml_inline_table(remote_storage)
-
        params_to_update.append(
            f'--pageserver-config-override=remote_storage={remote_storage_toml_table}')

@@ -2016,17 +1838,6 @@ def read_pid(path: Path) -> int:
    return int(path.read_text())


-def check_pid(pid):
-    """Check whether pid is running."""
-    try:
-        # If sig is 0, then no signal is sent, but error checking is still performed.
-        os.kill(pid, 0)
-    except OSError:
-        return False
-    else:
-        return True
-
-
@dataclass
 class SafekeeperPort:
    pg: int
@@ -2049,8 +1860,8 @@ class Safekeeper:
        started_at = time.time()
        while True:
            try:
-                with self.http_client() as http_cli:
-                    http_cli.check_status()
+                http_cli = self.http_client()
+                http_cli.check_status()
            except Exception as e:
                elapsed = time.time() - started_at
                if elapsed > 3:
@@ -2201,9 +2012,9 @@ class Etcd:
        return f'http://127.0.0.1:{self.port}'

    def check_status(self):
-        with requests.Session() as s:
-            s.mount('http://', requests.adapters.HTTPAdapter(max_retries=1))  # do not retry
-            s.get(f"{self.client_url()}/health").raise_for_status()
+        s = requests.Session()
+        s.mount('http://', requests.adapters.HTTPAdapter(max_retries=1))  # do not retry
+        s.get(f"{self.client_url()}/health").raise_for_status()

    def try_start(self):
        if self.handle is not None:
@@ -2498,9 +2309,3 @@ def wait_for_last_record_lsn(pageserver_http_client: NeonPageserverHttpClient,
        time.sleep(1)
    raise Exception("timed out while waiting for last_record_lsn to reach {}, was {}".format(
        lsn_to_hex(lsn), lsn_to_hex(current_lsn)))
-
-
-def wait_for_last_flush_lsn(env: NeonEnv, pg: Postgres, tenant: uuid.UUID, timeline: uuid.UUID):
-    """Wait for pageserver to catch up the latest flush LSN"""
-    last_flush_lsn = lsn_from_hex(pg.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0])
-    wait_for_last_record_lsn(env.pageserver.http_client(), tenant, timeline, last_flush_lsn)
--- a/test_runner/performance/test_wal_backpressure.py
+++ b/test_runner/performance/test_wal_backpressure.py
@@ -146,7 +146,7 @@ def test_pgbench_simple_update_workload(pg_compare: PgCompare, scale: int, durat
    record_thread.join()


-def start_pgbench_intensive_initialization(env: PgCompare, scale: int, done_event: threading.Event):
+def start_pgbench_intensive_initialization(env: PgCompare, scale: int):
    with env.record_duration("run_duration"):
        # Needs to increase the statement timeout (default: 120s) because the
        # initialization step can be slow with a large scale.
@@ -155,11 +155,9 @@ def start_pgbench_intensive_initialization(env: PgCompare, scale: int, done_even
            f'-s{scale}',
            '-i',
            '-Idtg',
-            env.pg.connstr(options='-cstatement_timeout=600s')
+            env.pg.connstr(options='-cstatement_timeout=300s')
        ])

-    done_event.set()
-

@pytest.mark.timeout(1000)
@pytest.mark.parametrize("scale", get_scales_matrix(1000))
@@ -168,17 +166,15 @@ def test_pgbench_intensive_init_workload(pg_compare: PgCompare, scale: int):
    with env.pg.connect().cursor() as cur:
        cur.execute("CREATE TABLE foo as select generate_series(1,100000)")

-    workload_done_event = threading.Event()
-
    workload_thread = threading.Thread(target=start_pgbench_intensive_initialization,
-                                       args=(env, scale, workload_done_event))
+                                       args=(env, scale))
    workload_thread.start()

    record_thread = threading.Thread(target=record_lsn_write_lag,
-                                     args=(env, lambda: not workload_done_event.is_set()))
+                                     args=(env, lambda: workload_thread.is_alive()))
    record_thread.start()

-    record_read_latency(env, lambda: not workload_done_event.is_set(), "SELECT count(*) from foo")
+    record_read_latency(env, lambda: workload_thread.is_alive(), "SELECT count(*) from foo")
    workload_thread.join()
    record_thread.join()

--- a/vendor/postgres
+++ b/vendor/postgres
Author	SHA1	Message	Date
Bojan Serafimov	688f68ecba	Undo whitespace	2022-08-04 09:43:27 +02:00
Bojan Serafimov	fb2ffac8b9	Ignore metrics static	2022-08-04 09:42:27 +02:00
Bojan Serafimov	8173e36a1b	Find all problematic statics	2022-08-04 09:30:22 +02:00