Make clippy happy

Calculate postgres checksum for FPI stored in page server
Add vanilla pg baseline tests (#1275 )
2026-02-10 14:10:37 +00:00 · 2022-02-16 13:26:22 +03:00 · 2022-02-16 13:16:35 +03:00 · 2022-02-15 13:44:22 -05:00 · 2022-02-15 17:45:23 +02:00 · 2022-02-15 17:01:22 +02:00
116 changed files with 6671 additions and 2706 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -1,29 +1,28 @@
 version: 2.1

 executors:
-  zenith-build-executor:
+  zenith-xlarge-executor:
    resource_class: xlarge
    docker:
      # NB: when changed, do not forget to update rust image tag in all Dockerfiles
-      - image: cimg/rust:1.56.1
-  zenith-python-executor:
+      - image: zimg/rust:1.56
+  zenith-executor:
    docker:
-      - image: cimg/python:3.7.10  # Oldest available 3.7 with Ubuntu 20.04 (for GLIBC and Rust) at CirlceCI
+      - image: zimg/rust:1.56

 jobs:
  check-codestyle-rust:
-    executor: zenith-build-executor
+    executor: zenith-xlarge-executor
    steps:
      - checkout
      - run:
          name: rustfmt
          when: always
-          command: |
-            cargo fmt --all -- --check
+          command: cargo fmt --all -- --check

  # A job to build postgres
  build-postgres:
-    executor: zenith-build-executor
+    executor: zenith-xlarge-executor
    parameters:
      build_type:
        type: enum
@@ -38,8 +37,7 @@ jobs:
        # Note this works even though the submodule hasn't been checkout out yet.
      - run:
          name: Get postgres cache key
-          command: |
-            git rev-parse HEAD:vendor/postgres > /tmp/cache-key-postgres
+          command: git rev-parse HEAD:vendor/postgres > /tmp/cache-key-postgres

      - restore_cache:
          name: Restore postgres cache
@@ -47,15 +45,6 @@ jobs:
            # Restore ONLY if the rev key matches exactly
            - v04-postgres-cache-<< parameters.build_type >>-{{ checksum "/tmp/cache-key-postgres" }}

-        # FIXME We could cache our own docker container, instead of installing packages every time.
-      - run:
-          name: apt install dependencies
-          command: |
-            if [ ! -e tmp_install/bin/postgres ]; then
-              sudo apt update
-              sudo apt install build-essential libreadline-dev zlib1g-dev flex bison libseccomp-dev
-            fi
-
        # Build postgres if the restore_cache didn't find a build.
        # `make` can't figure out whether the cache is valid, since
        # it only compares file timestamps.
@@ -65,7 +54,8 @@ jobs:
            if [ ! -e tmp_install/bin/postgres ]; then
              # "depth 1" saves some time by not cloning the whole repo
              git submodule update --init --depth 1
-              make postgres -j8
+              # bail out on any warnings
+              COPT='-Werror' mold -run make postgres -j$(nproc)
            fi

      - save_cache:
@@ -76,7 +66,7 @@ jobs:

  # A job to build zenith rust code
  build-zenith:
-    executor: zenith-build-executor
+    executor: zenith-xlarge-executor
    parameters:
      build_type:
        type: enum
@@ -84,12 +74,6 @@ jobs:
    environment:
      BUILD_TYPE: << parameters.build_type >>
    steps:
-      - run:
-          name: apt install dependencies
-          command: |
-            sudo apt update
-            sudo apt install libssl-dev clang
-
        # Checkout the git repo (without submodules)
      - checkout

@@ -127,7 +111,7 @@ jobs:
            fi

            export CARGO_INCREMENTAL=0
-            "${cov_prefix[@]}" cargo build $CARGO_FLAGS --bins --tests
+            "${cov_prefix[@]}" mold -run cargo build $CARGO_FLAGS --bins --tests

      - save_cache:
          name: Save rust cache
@@ -211,6 +195,14 @@ jobs:
          command: |
            cp -a tmp_install /tmp/zenith/pg_install

+      - run:
+          name: Merge coverage data
+          command: |
+            # This will speed up workspace uploads
+            if [[ $BUILD_TYPE == "debug" ]]; then
+              scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage merge
+            fi
+
        # Save the rust binaries and coverage data for other jobs in this workflow.
      - persist_to_workspace:
          root: /tmp/zenith
@@ -218,23 +210,30 @@ jobs:
            - "*"

  check-codestyle-python:
-    executor: zenith-python-executor
+    executor: zenith-executor
    steps:
      - checkout
+      - restore_cache:
+          keys:
+            - v1-python-deps-{{ checksum "poetry.lock" }}
      - run:
          name: Install deps
-          command: pipenv --python 3.7 install --dev
+          command: ./scripts/pysync
+      - save_cache:
+          key: v1-python-deps-{{ checksum "poetry.lock" }}
+          paths:
+            - /home/circleci/.cache/pypoetry/virtualenvs
      - run:
          name: Run yapf to ensure code format
          when: always
-          command: pipenv run yapf --recursive --diff .
+          command: poetry run yapf --recursive --diff .
      - run:
          name: Run mypy to check types
          when: always
-          command: pipenv run mypy .
+          command: poetry run mypy .

  run-pytest:
-    executor: zenith-python-executor
+    executor: zenith-executor
    parameters:
      # pytest args to specify the tests to run.
      #
@@ -273,9 +272,16 @@ jobs:
          condition: << parameters.needs_postgres_source >>
          steps:
            - run: git submodule update --init --depth 1
+      - restore_cache:
+          keys:
+            - v1-python-deps-{{ checksum "poetry.lock" }}
      - run:
          name: Install deps
-          command: pipenv --python 3.7 install
+          command: ./scripts/pysync
+      - save_cache:
+          key: v1-python-deps-{{ checksum "poetry.lock" }}
+          paths:
+            - /home/circleci/.cache/pypoetry/virtualenvs
      - run:
          name: Run pytest
          # pytest doesn't output test logs in real time, so CI job may fail with
@@ -292,6 +298,7 @@ jobs:
            - PLATFORM: zenith-local-ci
          command: |
            PERF_REPORT_DIR="$(realpath test_runner/perf-report-local)"
+            rm -rf $PERF_REPORT_DIR

            TEST_SELECTION="test_runner/<< parameters.test_selection >>"
            EXTRA_PARAMS="<< parameters.extra_params >>"
@@ -327,7 +334,7 @@ jobs:
            # -n4 uses four processes to run tests via pytest-xdist
            # -s is not used to prevent pytest from capturing output, because tests are running
            # in parallel and logs are mixed between different tests
-            "${cov_prefix[@]}" pipenv run pytest \
+            "${cov_prefix[@]}" ./scripts/pytest \
              --junitxml=$TEST_OUTPUT/junit.xml \
              --tb=short \
              --verbose \
@@ -336,7 +343,6 @@ jobs:

            if << parameters.save_perf_report >>; then
              if [[ $CIRCLE_BRANCH == "main" ]]; then
-                # TODO: reuse scripts/git-upload
                export REPORT_FROM="$PERF_REPORT_DIR"
                export REPORT_TO=local
                scripts/generate_and_push_perf_report.sh
@@ -357,6 +363,13 @@ jobs:
      # The store_test_results step tells CircleCI where to find the junit.xml file.
      - store_test_results:
          path: /tmp/test_output
+      - run:
+          name: Merge coverage data
+          command: |
+            # This will speed up workspace uploads
+            if [[ $BUILD_TYPE == "debug" ]]; then
+              scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage merge
+            fi
      # Save coverage data (if any)
      - persist_to_workspace:
          root: /tmp/zenith
@@ -364,7 +377,7 @@ jobs:
            - "*"

  coverage-report:
-    executor: zenith-build-executor
+    executor: zenith-xlarge-executor
    steps:
      - attach_workspace:
          at: /tmp/zenith
@@ -376,12 +389,6 @@ jobs:
            # there's no way to clean out old packages, so the cache grows every time something
            # changes.
            - v04-rust-cache-deps-debug-{{ checksum "Cargo.lock" }}
-      - run:
-          name: Install llvm-tools
-          command: |
-            # TODO: install a proper symbol demangler, e.g. rustfilt
-            # TODO: we should embed this into a docker image
-            rustup component add llvm-tools-preview
      - run:
          name: Build coverage report
          command: |
@@ -591,6 +598,7 @@ workflows:
            - build-postgres-<< matrix.build_type >>
      - run-pytest:
          name: pg_regress-tests-<< matrix.build_type >>
+          context: PERF_TEST_RESULT_CONNSTR
          matrix:
            parameters:
              build_type: ["debug", "release"]
@@ -608,6 +616,7 @@ workflows:
            - build-zenith-<< matrix.build_type >>
      - run-pytest:
          name: benchmarks
+          context: PERF_TEST_RESULT_CONNSTR
          build_type: release
          test_selection: performance
          run_in_parallel: false
--- a/.circleci/proxy.staging.yaml
+++ b/.circleci/proxy.staging.yaml
@@ -5,6 +5,13 @@ settings:
  authEndpoint: "https://console.stage.zenith.tech/authenticate_proxy_request/"
  uri: "https://console.stage.zenith.tech/psql_session/"

+# -- Additional labels for zenith-proxy pods
+podLabels:
+  zenith_service: proxy
+  zenith_env: staging
+  zenith_region: us-east-1
+  zenith_region_slug: virginia
+
 exposedService:
  annotations:
    service.beta.kubernetes.io/aws-load-balancer-type: external
@@ -17,4 +24,4 @@ metrics:
  serviceMonitor:
    enabled: true
    selector:
-      prometheus: zenith
+      release: kube-prometheus-stack
--- a/.github/workflows/benchmarking.yml
+++ b/.github/workflows/benchmarking.yml
@@ -3,7 +3,7 @@ name: benchmarking
 on:
  # uncomment to run on push for debugging your PR
  # push:
-  #   branches: [ mybranch ]
+  #   branches: [ your branch ]
  schedule:
    # * is a special character in YAML so you have to quote this string
    #          ┌───────────── minute (0 - 59)
@@ -36,20 +36,20 @@ jobs:
    # see https://github.com/actions/setup-python/issues/162
    # and probably https://github.com/actions/setup-python/issues/162#issuecomment-865387976 in particular
    # so the simplest solution to me is to use already installed system python and spin virtualenvs for job runs.
-    # there is Python 3.7.10 already installed on the machine so use it to install pipenv and then use pipenv's virtuealenvs
-    - name: Install pipenv & deps
+    # there is Python 3.7.10 already installed on the machine so use it to install poetry and then use poetry's virtuealenvs
+    - name: Install poetry & deps
      run: |
-        python3 -m pip install --upgrade pipenv wheel
-        # since pip/pipenv caches are reused there shouldn't be any troubles with install every time
-        pipenv install
+        python3 -m pip install --upgrade poetry wheel
+        # since pip/poetry caches are reused there shouldn't be any troubles with install every time
+        ./scripts/pysync

    - name: Show versions
      run: |
        echo Python
        python3 --version
-        pipenv run python3 --version
+        poetry run python3 --version
        echo Pipenv
-        pipenv --version
+        poetry --version
        echo Pgbench
        $PG_BIN/pgbench --version

@@ -89,11 +89,15 @@ jobs:
        BENCHMARK_CONNSTR: "${{ secrets.BENCHMARK_STAGING_CONNSTR }}"
        REMOTE_ENV: "1" # indicate to test harness that we do not have zenith binaries locally
      run: |
+        # just to be sure that no data was cached on self hosted runner
+        # since it might generate duplicates when calling ingest_perf_test_result.py
+        rm -rf perf-report-staging
        mkdir -p perf-report-staging
-        pipenv run pytest test_runner/performance/ -v -m "remote_cluster" --skip-interfering-proc-check --out-dir perf-report-staging
+        ./scripts/pytest test_runner/performance/ -v -m "remote_cluster" --skip-interfering-proc-check --out-dir perf-report-staging

    - name: Submit result
      env:
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
+        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
      run: |
        REPORT_FROM=$(realpath perf-report-staging) REPORT_TO=staging scripts/generate_and_push_perf_report.sh
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/30
+++ b/30
@@ -1,30 +0,0 @@
-[[source]]
-url = "https://pypi.python.org/simple"
-verify_ssl = true
-name = "pypi"
-
-[packages]
-pytest = ">=6.0.0"
-typing-extensions = "*"
-pyjwt = {extras = ["crypto"], version = "*"}
-requests = "*"
-pytest-xdist = "*"
-asyncpg = "*"
-cached-property = "*"
-psycopg2-binary = "*"
-jinja2 = "*"
-
-[dev-packages]
-# Behavior may change slightly between versions. These are run continuously,
-# so we pin exact versions to avoid suprising breaks. Update if comfortable.
-yapf = "==0.31.0"
-mypy = "==0.910"
-# Non-pinned packages follow.
-pipenv = "*"
-flake8 = "*"
-types-requests = "*"
-types-psycopg2 = "*"
-
-[requires]
-# we need at least 3.7, but pipenv doesn't allow to say this directly
-python_version = "3"
--- a/Pipfile.lock
+++ b/Pipfile.lock
@@ -1,652 +0,0 @@
-{
-    "_meta": {
-        "hash": {
-            "sha256": "c309cb963a7b07ae3d30e9cbf08b495f77bdecc0e5356fc89d133c4fbcb65b2b"
-        },
-        "pipfile-spec": 6,
-        "requires": {
-            "python_version": "3"
-        },
-        "sources": [
-            {
-                "name": "pypi",
-                "url": "https://pypi.python.org/simple",
-                "verify_ssl": true
-            }
-        ]
-    },
-    "default": {
-        "asyncpg": {
-            "hashes": [
-                "sha256:129d501f3d30616afd51eb8d3142ef51ba05374256bd5834cec3ef4956a9b317",
-                "sha256:29ef6ae0a617fc13cc2ac5dc8e9b367bb83cba220614b437af9b67766f4b6b20",
-                "sha256:41704c561d354bef01353835a7846e5606faabbeb846214dfcf666cf53319f18",
-                "sha256:556b0e92e2b75dc028b3c4bc9bd5162ddf0053b856437cf1f04c97f9c6837d03",
-                "sha256:8ff5073d4b654e34bd5eaadc01dc4d68b8a9609084d835acd364cd934190a08d",
-                "sha256:a458fc69051fbb67d995fdda46d75a012b5d6200f91e17d23d4751482640ed4c",
-                "sha256:a7095890c96ba36f9f668eb552bb020dddb44f8e73e932f8573efc613ee83843",
-                "sha256:a738f4807c853623d3f93f0fea11f61be6b0e5ca16ea8aeb42c2c7ee742aa853",
-                "sha256:c4fc0205fe4ddd5aeb3dfdc0f7bafd43411181e1f5650189608e5971cceacff1",
-                "sha256:dd2fa063c3344823487d9ddccb40802f02622ddf8bf8a6cc53885ee7a2c1c0c6",
-                "sha256:ddffcb85227bf39cd1bedd4603e0082b243cf3b14ced64dce506a15b05232b83",
-                "sha256:e36c6806883786b19551bb70a4882561f31135dc8105a59662e0376cf5b2cbc5",
-                "sha256:eed43abc6ccf1dc02e0d0efc06ce46a411362f3358847c6b0ec9a43426f91ece"
-            ],
-            "index": "pypi",
-            "version": "==0.24.0"
-        },
-        "attrs": {
-            "hashes": [
-                "sha256:149e90d6d8ac20db7a955ad60cf0e6881a3f20d37096140088356da6c716b0b1",
-                "sha256:ef6aaac3ca6cd92904cdd0d83f629a15f18053ec84e6432106f7a4d04ae4f5fb"
-            ],
-            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
-            "version": "==21.2.0"
-        },
-        "cached-property": {
-            "hashes": [
-                "sha256:9fa5755838eecbb2d234c3aa390bd80fbd3ac6b6869109bfc1b499f7bd89a130",
-                "sha256:df4f613cf7ad9a588cc381aaf4a512d26265ecebd5eb9e1ba12f1319eb85a6a0"
-            ],
-            "index": "pypi",
-            "version": "==1.5.2"
-        },
-        "certifi": {
-            "hashes": [
-                "sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872",
-                "sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569"
-            ],
-            "version": "==2021.10.8"
-        },
-        "cffi": {
-            "hashes": [
-                "sha256:00c878c90cb53ccfaae6b8bc18ad05d2036553e6d9d1d9dbcf323bbe83854ca3",
-                "sha256:0104fb5ae2391d46a4cb082abdd5c69ea4eab79d8d44eaaf79f1b1fd806ee4c2",
-                "sha256:06c48159c1abed75c2e721b1715c379fa3200c7784271b3c46df01383b593636",
-                "sha256:0808014eb713677ec1292301ea4c81ad277b6cdf2fdd90fd540af98c0b101d20",
-                "sha256:10dffb601ccfb65262a27233ac273d552ddc4d8ae1bf93b21c94b8511bffe728",
-                "sha256:14cd121ea63ecdae71efa69c15c5543a4b5fbcd0bbe2aad864baca0063cecf27",
-                "sha256:17771976e82e9f94976180f76468546834d22a7cc404b17c22df2a2c81db0c66",
-                "sha256:181dee03b1170ff1969489acf1c26533710231c58f95534e3edac87fff06c443",
-                "sha256:23cfe892bd5dd8941608f93348c0737e369e51c100d03718f108bf1add7bd6d0",
-                "sha256:263cc3d821c4ab2213cbe8cd8b355a7f72a8324577dc865ef98487c1aeee2bc7",
-                "sha256:2756c88cbb94231c7a147402476be2c4df2f6078099a6f4a480d239a8817ae39",
-                "sha256:27c219baf94952ae9d50ec19651a687b826792055353d07648a5695413e0c605",
-                "sha256:2a23af14f408d53d5e6cd4e3d9a24ff9e05906ad574822a10563efcef137979a",
-                "sha256:31fb708d9d7c3f49a60f04cf5b119aeefe5644daba1cd2a0fe389b674fd1de37",
-                "sha256:3415c89f9204ee60cd09b235810be700e993e343a408693e80ce7f6a40108029",
-                "sha256:3773c4d81e6e818df2efbc7dd77325ca0dcb688116050fb2b3011218eda36139",
-                "sha256:3b96a311ac60a3f6be21d2572e46ce67f09abcf4d09344c49274eb9e0bf345fc",
-                "sha256:3f7d084648d77af029acb79a0ff49a0ad7e9d09057a9bf46596dac9514dc07df",
-                "sha256:41d45de54cd277a7878919867c0f08b0cf817605e4eb94093e7516505d3c8d14",
-                "sha256:4238e6dab5d6a8ba812de994bbb0a79bddbdf80994e4ce802b6f6f3142fcc880",
-                "sha256:45db3a33139e9c8f7c09234b5784a5e33d31fd6907800b316decad50af323ff2",
-                "sha256:45e8636704eacc432a206ac7345a5d3d2c62d95a507ec70d62f23cd91770482a",
-                "sha256:4958391dbd6249d7ad855b9ca88fae690783a6be9e86df65865058ed81fc860e",
-                "sha256:4a306fa632e8f0928956a41fa8e1d6243c71e7eb59ffbd165fc0b41e316b2474",
-                "sha256:57e9ac9ccc3101fac9d6014fba037473e4358ef4e89f8e181f8951a2c0162024",
-                "sha256:59888172256cac5629e60e72e86598027aca6bf01fa2465bdb676d37636573e8",
-                "sha256:5e069f72d497312b24fcc02073d70cb989045d1c91cbd53979366077959933e0",
-                "sha256:64d4ec9f448dfe041705426000cc13e34e6e5bb13736e9fd62e34a0b0c41566e",
-                "sha256:6dc2737a3674b3e344847c8686cf29e500584ccad76204efea14f451d4cc669a",
-                "sha256:74fdfdbfdc48d3f47148976f49fab3251e550a8720bebc99bf1483f5bfb5db3e",
-                "sha256:75e4024375654472cc27e91cbe9eaa08567f7fbdf822638be2814ce059f58032",
-                "sha256:786902fb9ba7433aae840e0ed609f45c7bcd4e225ebb9c753aa39725bb3e6ad6",
-                "sha256:8b6c2ea03845c9f501ed1313e78de148cd3f6cad741a75d43a29b43da27f2e1e",
-                "sha256:91d77d2a782be4274da750752bb1650a97bfd8f291022b379bb8e01c66b4e96b",
-                "sha256:91ec59c33514b7c7559a6acda53bbfe1b283949c34fe7440bcf917f96ac0723e",
-                "sha256:920f0d66a896c2d99f0adbb391f990a84091179542c205fa53ce5787aff87954",
-                "sha256:a5263e363c27b653a90078143adb3d076c1a748ec9ecc78ea2fb916f9b861962",
-                "sha256:abb9a20a72ac4e0fdb50dae135ba5e77880518e742077ced47eb1499e29a443c",
-                "sha256:c2051981a968d7de9dd2d7b87bcb9c939c74a34626a6e2f8181455dd49ed69e4",
-                "sha256:c21c9e3896c23007803a875460fb786118f0cdd4434359577ea25eb556e34c55",
-                "sha256:c2502a1a03b6312837279c8c1bd3ebedf6c12c4228ddbad40912d671ccc8a962",
-                "sha256:d4d692a89c5cf08a8557fdeb329b82e7bf609aadfaed6c0d79f5a449a3c7c023",
-                "sha256:da5db4e883f1ce37f55c667e5c0de439df76ac4cb55964655906306918e7363c",
-                "sha256:e7022a66d9b55e93e1a845d8c9eba2a1bebd4966cd8bfc25d9cd07d515b33fa6",
-                "sha256:ef1f279350da2c586a69d32fc8733092fd32cc8ac95139a00377841f59a3f8d8",
-                "sha256:f54a64f8b0c8ff0b64d18aa76675262e1700f3995182267998c31ae974fbc382",
-                "sha256:f5c7150ad32ba43a07c4479f40241756145a1f03b43480e058cfd862bf5041c7",
-                "sha256:f6f824dc3bce0edab5f427efcfb1d63ee75b6fcb7282900ccaf925be84efb0fc",
-                "sha256:fd8a250edc26254fe5b33be00402e6d287f562b6a5b2152dec302fa15bb3e997",
-                "sha256:ffaa5c925128e29efbde7301d8ecaf35c8c60ffbcd6a1ffd3a552177c8e5e796"
-            ],
-            "version": "==1.15.0"
-        },
-        "charset-normalizer": {
-            "hashes": [
-                "sha256:e019de665e2bcf9c2b64e2e5aa025fa991da8720daa3c1138cadd2fd1856aed0",
-                "sha256:f7af805c321bfa1ce6714c51f254e0d5bb5e5834039bc17db7ebe3a4cec9492b"
-            ],
-            "markers": "python_version >= '3'",
-            "version": "==2.0.7"
-        },
-        "cryptography": {
-            "hashes": [
-                "sha256:07bb7fbfb5de0980590ddfc7f13081520def06dc9ed214000ad4372fb4e3c7f6",
-                "sha256:18d90f4711bf63e2fb21e8c8e51ed8189438e6b35a6d996201ebd98a26abbbe6",
-                "sha256:1ed82abf16df40a60942a8c211251ae72858b25b7421ce2497c2eb7a1cee817c",
-                "sha256:22a38e96118a4ce3b97509443feace1d1011d0571fae81fc3ad35f25ba3ea999",
-                "sha256:2d69645f535f4b2c722cfb07a8eab916265545b3475fdb34e0be2f4ee8b0b15e",
-                "sha256:4a2d0e0acc20ede0f06ef7aa58546eee96d2592c00f450c9acb89c5879b61992",
-                "sha256:54b2605e5475944e2213258e0ab8696f4f357a31371e538ef21e8d61c843c28d",
-                "sha256:7075b304cd567694dc692ffc9747f3e9cb393cc4aa4fb7b9f3abd6f5c4e43588",
-                "sha256:7b7ceeff114c31f285528ba8b390d3e9cfa2da17b56f11d366769a807f17cbaa",
-                "sha256:7eba2cebca600a7806b893cb1d541a6e910afa87e97acf2021a22b32da1df52d",
-                "sha256:928185a6d1ccdb816e883f56ebe92e975a262d31cc536429041921f8cb5a62fd",
-                "sha256:9933f28f70d0517686bd7de36166dda42094eac49415459d9bdf5e7df3e0086d",
-                "sha256:a688ebcd08250eab5bb5bca318cc05a8c66de5e4171a65ca51db6bd753ff8953",
-                "sha256:abb5a361d2585bb95012a19ed9b2c8f412c5d723a9836418fab7aaa0243e67d2",
-                "sha256:c10c797ac89c746e488d2ee92bd4abd593615694ee17b2500578b63cad6b93a8",
-                "sha256:ced40344e811d6abba00295ced98c01aecf0c2de39481792d87af4fa58b7b4d6",
-                "sha256:d57e0cdc1b44b6cdf8af1d01807db06886f10177469312fbde8f44ccbb284bc9",
-                "sha256:d99915d6ab265c22873f1b4d6ea5ef462ef797b4140be4c9d8b179915e0985c6",
-                "sha256:eb80e8a1f91e4b7ef8b33041591e6d89b2b8e122d787e87eeb2b08da71bb16ad",
-                "sha256:ebeddd119f526bcf323a89f853afb12e225902a24d29b55fe18dd6fcb2838a76"
-            ],
-            "version": "==35.0.0"
-        },
-        "execnet": {
-            "hashes": [
-                "sha256:8f694f3ba9cc92cab508b152dcfe322153975c29bda272e2fd7f3f00f36e47c5",
-                "sha256:a295f7cc774947aac58dde7fdc85f4aa00c42adf5d8f5468fc630c1acf30a142"
-            ],
-            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
-            "version": "==1.9.0"
-        },
-        "idna": {
-            "hashes": [
-                "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff",
-                "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d"
-            ],
-            "markers": "python_version >= '3'",
-            "version": "==3.3"
-        },
-        "importlib-metadata": {
-            "hashes": [
-                "sha256:b618b6d2d5ffa2f16add5697cf57a46c76a56229b0ed1c438322e4e95645bd15",
-                "sha256:f284b3e11256ad1e5d03ab86bb2ccd6f5339688ff17a4d797a0fe7df326f23b1"
-            ],
-            "markers": "python_version < '3.8'",
-            "version": "==4.8.1"
-        },
-        "iniconfig": {
-            "hashes": [
-                "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3",
-                "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"
-            ],
-            "version": "==1.1.1"
-        },
-        "jinja2": {
-            "hashes": [
-                "sha256:827a0e32839ab1600d4eb1c4c33ec5a8edfbc5cb42dafa13b81f182f97784b45",
-                "sha256:8569982d3f0889eed11dd620c706d39b60c36d6d25843961f33f77fb6bc6b20c"
-            ],
-            "index": "pypi",
-            "version": "==3.0.2"
-        },
-        "markupsafe": {
-            "hashes": [
-                "sha256:01a9b8ea66f1658938f65b93a85ebe8bc016e6769611be228d797c9d998dd298",
-                "sha256:023cb26ec21ece8dc3907c0e8320058b2e0cb3c55cf9564da612bc325bed5e64",
-                "sha256:0446679737af14f45767963a1a9ef7620189912317d095f2d9ffa183a4d25d2b",
-                "sha256:04635854b943835a6ea959e948d19dcd311762c5c0c6e1f0e16ee57022669194",
-                "sha256:0717a7390a68be14b8c793ba258e075c6f4ca819f15edfc2a3a027c823718567",
-                "sha256:0955295dd5eec6cb6cc2fe1698f4c6d84af2e92de33fbcac4111913cd100a6ff",
-                "sha256:0d4b31cc67ab36e3392bbf3862cfbadac3db12bdd8b02a2731f509ed5b829724",
-                "sha256:10f82115e21dc0dfec9ab5c0223652f7197feb168c940f3ef61563fc2d6beb74",
-                "sha256:168cd0a3642de83558a5153c8bd34f175a9a6e7f6dc6384b9655d2697312a646",
-                "sha256:1d609f577dc6e1aa17d746f8bd3c31aa4d258f4070d61b2aa5c4166c1539de35",
-                "sha256:1f2ade76b9903f39aa442b4aadd2177decb66525062db244b35d71d0ee8599b6",
-                "sha256:20dca64a3ef2d6e4d5d615a3fd418ad3bde77a47ec8a23d984a12b5b4c74491a",
-                "sha256:2a7d351cbd8cfeb19ca00de495e224dea7e7d919659c2841bbb7f420ad03e2d6",
-                "sha256:2d7d807855b419fc2ed3e631034685db6079889a1f01d5d9dac950f764da3dad",
-                "sha256:2ef54abee730b502252bcdf31b10dacb0a416229b72c18b19e24a4509f273d26",
-                "sha256:36bc903cbb393720fad60fc28c10de6acf10dc6cc883f3e24ee4012371399a38",
-                "sha256:37205cac2a79194e3750b0af2a5720d95f786a55ce7df90c3af697bfa100eaac",
-                "sha256:3c112550557578c26af18a1ccc9e090bfe03832ae994343cfdacd287db6a6ae7",
-                "sha256:3dd007d54ee88b46be476e293f48c85048603f5f516008bee124ddd891398ed6",
-                "sha256:4296f2b1ce8c86a6aea78613c34bb1a672ea0e3de9c6ba08a960efe0b0a09047",
-                "sha256:47ab1e7b91c098ab893b828deafa1203de86d0bc6ab587b160f78fe6c4011f75",
-                "sha256:49e3ceeabbfb9d66c3aef5af3a60cc43b85c33df25ce03d0031a608b0a8b2e3f",
-                "sha256:4dc8f9fb58f7364b63fd9f85013b780ef83c11857ae79f2feda41e270468dd9b",
-                "sha256:4efca8f86c54b22348a5467704e3fec767b2db12fc39c6d963168ab1d3fc9135",
-                "sha256:53edb4da6925ad13c07b6d26c2a852bd81e364f95301c66e930ab2aef5b5ddd8",
-                "sha256:5855f8438a7d1d458206a2466bf82b0f104a3724bf96a1c781ab731e4201731a",
-                "sha256:594c67807fb16238b30c44bdf74f36c02cdf22d1c8cda91ef8a0ed8dabf5620a",
-                "sha256:5b6d930f030f8ed98e3e6c98ffa0652bdb82601e7a016ec2ab5d7ff23baa78d1",
-                "sha256:5bb28c636d87e840583ee3adeb78172efc47c8b26127267f54a9c0ec251d41a9",
-                "sha256:60bf42e36abfaf9aff1f50f52644b336d4f0a3fd6d8a60ca0d054ac9f713a864",
-                "sha256:611d1ad9a4288cf3e3c16014564df047fe08410e628f89805e475368bd304914",
-                "sha256:6300b8454aa6930a24b9618fbb54b5a68135092bc666f7b06901f897fa5c2fee",
-                "sha256:63f3268ba69ace99cab4e3e3b5840b03340efed0948ab8f78d2fd87ee5442a4f",
-                "sha256:6557b31b5e2c9ddf0de32a691f2312a32f77cd7681d8af66c2692efdbef84c18",
-                "sha256:693ce3f9e70a6cf7d2fb9e6c9d8b204b6b39897a2c4a1aa65728d5ac97dcc1d8",
-                "sha256:6a7fae0dd14cf60ad5ff42baa2e95727c3d81ded453457771d02b7d2b3f9c0c2",
-                "sha256:6c4ca60fa24e85fe25b912b01e62cb969d69a23a5d5867682dd3e80b5b02581d",
-                "sha256:6fcf051089389abe060c9cd7caa212c707e58153afa2c649f00346ce6d260f1b",
-                "sha256:7d91275b0245b1da4d4cfa07e0faedd5b0812efc15b702576d103293e252af1b",
-                "sha256:89c687013cb1cd489a0f0ac24febe8c7a666e6e221b783e53ac50ebf68e45d86",
-                "sha256:8d206346619592c6200148b01a2142798c989edcb9c896f9ac9722a99d4e77e6",
-                "sha256:905fec760bd2fa1388bb5b489ee8ee5f7291d692638ea5f67982d968366bef9f",
-                "sha256:97383d78eb34da7e1fa37dd273c20ad4320929af65d156e35a5e2d89566d9dfb",
-                "sha256:984d76483eb32f1bcb536dc27e4ad56bba4baa70be32fa87152832cdd9db0833",
-                "sha256:99df47edb6bda1249d3e80fdabb1dab8c08ef3975f69aed437cb69d0a5de1e28",
-                "sha256:9f02365d4e99430a12647f09b6cc8bab61a6564363f313126f775eb4f6ef798e",
-                "sha256:a30e67a65b53ea0a5e62fe23682cfe22712e01f453b95233b25502f7c61cb415",
-                "sha256:ab3ef638ace319fa26553db0624c4699e31a28bb2a835c5faca8f8acf6a5a902",
-                "sha256:aca6377c0cb8a8253e493c6b451565ac77e98c2951c45f913e0b52facdcff83f",
-                "sha256:add36cb2dbb8b736611303cd3bfcee00afd96471b09cda130da3581cbdc56a6d",
-                "sha256:b2f4bf27480f5e5e8ce285a8c8fd176c0b03e93dcc6646477d4630e83440c6a9",
-                "sha256:b7f2d075102dc8c794cbde1947378051c4e5180d52d276987b8d28a3bd58c17d",
-                "sha256:baa1a4e8f868845af802979fcdbf0bb11f94f1cb7ced4c4b8a351bb60d108145",
-                "sha256:be98f628055368795d818ebf93da628541e10b75b41c559fdf36d104c5787066",
-                "sha256:bf5d821ffabf0ef3533c39c518f3357b171a1651c1ff6827325e4489b0e46c3c",
-                "sha256:c47adbc92fc1bb2b3274c4b3a43ae0e4573d9fbff4f54cd484555edbf030baf1",
-                "sha256:cdfba22ea2f0029c9261a4bd07e830a8da012291fbe44dc794e488b6c9bb353a",
-                "sha256:d6c7ebd4e944c85e2c3421e612a7057a2f48d478d79e61800d81468a8d842207",
-                "sha256:d7f9850398e85aba693bb640262d3611788b1f29a79f0c93c565694658f4071f",
-                "sha256:d8446c54dc28c01e5a2dbac5a25f071f6653e6e40f3a8818e8b45d790fe6ef53",
-                "sha256:deb993cacb280823246a026e3b2d81c493c53de6acfd5e6bfe31ab3402bb37dd",
-                "sha256:e0f138900af21926a02425cf736db95be9f4af72ba1bb21453432a07f6082134",
-                "sha256:e9936f0b261d4df76ad22f8fee3ae83b60d7c3e871292cd42f40b81b70afae85",
-                "sha256:f0567c4dc99f264f49fe27da5f735f414c4e7e7dd850cfd8e69f0862d7c74ea9",
-                "sha256:f5653a225f31e113b152e56f154ccbe59eeb1c7487b39b9d9f9cdb58e6c79dc5",
-                "sha256:f826e31d18b516f653fe296d967d700fddad5901ae07c622bb3705955e1faa94",
-                "sha256:f8ba0e8349a38d3001fae7eadded3f6606f0da5d748ee53cc1dab1d6527b9509",
-                "sha256:f9081981fe268bd86831e5c75f7de206ef275defcb82bc70740ae6dc507aee51",
-                "sha256:fa130dd50c57d53368c9d59395cb5526eda596d3ffe36666cd81a44d56e48872"
-            ],
-            "markers": "python_version >= '3.6'",
-            "version": "==2.0.1"
-        },
-        "packaging": {
-            "hashes": [
-                "sha256:096d689d78ca690e4cd8a89568ba06d07ca097e3306a4381635073ca91479966",
-                "sha256:14317396d1e8cdb122989b916fa2c7e9ca8e2be9e8060a6eff75b6b7b4d8a7e0"
-            ],
-            "markers": "python_version >= '3.6'",
-            "version": "==21.2"
-        },
-        "pluggy": {
-            "hashes": [
-                "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159",
-                "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"
-            ],
-            "markers": "python_version >= '3.6'",
-            "version": "==1.0.0"
-        },
-        "psycopg2-binary": {
-            "hashes": [
-                "sha256:0b7dae87f0b729922e06f85f667de7bf16455d411971b2043bbd9577af9d1975",
-                "sha256:0f2e04bd2a2ab54fa44ee67fe2d002bb90cee1c0f1cc0ebc3148af7b02034cbd",
-                "sha256:123c3fb684e9abfc47218d3784c7b4c47c8587951ea4dd5bc38b6636ac57f616",
-                "sha256:1473c0215b0613dd938db54a653f68251a45a78b05f6fc21af4326f40e8360a2",
-                "sha256:14db1752acdd2187d99cb2ca0a1a6dfe57fc65c3281e0f20e597aac8d2a5bd90",
-                "sha256:1e3a362790edc0a365385b1ac4cc0acc429a0c0d662d829a50b6ce743ae61b5a",
-                "sha256:1e85b74cbbb3056e3656f1cc4781294df03383127a8114cbc6531e8b8367bf1e",
-                "sha256:20f1ab44d8c352074e2d7ca67dc00843067788791be373e67a0911998787ce7d",
-                "sha256:24b0b6688b9f31a911f2361fe818492650795c9e5d3a1bc647acbd7440142a4f",
-                "sha256:2f62c207d1740b0bde5c4e949f857b044818f734a3d57f1d0d0edc65050532ed",
-                "sha256:3242b9619de955ab44581a03a64bdd7d5e470cc4183e8fcadd85ab9d3756ce7a",
-                "sha256:35c4310f8febe41f442d3c65066ca93cccefd75013df3d8c736c5b93ec288140",
-                "sha256:4235f9d5ddcab0b8dbd723dca56ea2922b485ea00e1dafacf33b0c7e840b3d32",
-                "sha256:542875f62bc56e91c6eac05a0deadeae20e1730be4c6334d8f04c944fcd99759",
-                "sha256:5ced67f1e34e1a450cdb48eb53ca73b60aa0af21c46b9b35ac3e581cf9f00e31",
-                "sha256:661509f51531ec125e52357a489ea3806640d0ca37d9dada461ffc69ee1e7b6e",
-                "sha256:7360647ea04db2e7dff1648d1da825c8cf68dc5fbd80b8fb5b3ee9f068dcd21a",
-                "sha256:736b8797b58febabb85494142c627bd182b50d2a7ec65322983e71065ad3034c",
-                "sha256:8c13d72ed6af7fd2c8acbd95661cf9477f94e381fce0792c04981a8283b52917",
-                "sha256:988b47ac70d204aed01589ed342303da7c4d84b56c2f4c4b8b00deda123372bf",
-                "sha256:995fc41ebda5a7a663a254a1dcac52638c3e847f48307b5416ee373da15075d7",
-                "sha256:a36c7eb6152ba5467fb264d73844877be8b0847874d4822b7cf2d3c0cb8cdcb0",
-                "sha256:aed4a9a7e3221b3e252c39d0bf794c438dc5453bc2963e8befe9d4cd324dff72",
-                "sha256:aef9aee84ec78af51107181d02fe8773b100b01c5dfde351184ad9223eab3698",
-                "sha256:b0221ca5a9837e040ebf61f48899926b5783668b7807419e4adae8175a31f773",
-                "sha256:b4d7679a08fea64573c969f6994a2631908bb2c0e69a7235648642f3d2e39a68",
-                "sha256:c250a7ec489b652c892e4f0a5d122cc14c3780f9f643e1a326754aedf82d9a76",
-                "sha256:ca86db5b561b894f9e5f115d6a159fff2a2570a652e07889d8a383b5fae66eb4",
-                "sha256:cfc523edecddaef56f6740d7de1ce24a2fdf94fd5e704091856a201872e37f9f",
-                "sha256:d92272c7c16e105788efe2cfa5d680f07e34e0c29b03c1908f8636f55d5f915a",
-                "sha256:da113b70f6ec40e7d81b43d1b139b9db6a05727ab8be1ee559f3a69854a69d34",
-                "sha256:f6fac64a38f6768e7bc7b035b9e10d8a538a9fadce06b983fb3e6fa55ac5f5ce",
-                "sha256:f8559617b1fcf59a9aedba2c9838b5b6aa211ffedecabca412b92a1ff75aac1a",
-                "sha256:fbb42a541b1093385a2d8c7eec94d26d30437d0e77c1d25dae1dcc46741a385e"
-            ],
-            "index": "pypi",
-            "version": "==2.9.1"
-        },
-        "py": {
-            "hashes": [
-                "sha256:21b81bda15b66ef5e1a777a21c4dcd9c20ad3efd0b3f817e7a809035269e1bd3",
-                "sha256:3b80836aa6d1feeaa108e046da6423ab8f6ceda6468545ae8d02d9d58d18818a"
-            ],
-            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
-            "version": "==1.10.0"
-        },
-        "pycparser": {
-            "hashes": [
-                "sha256:2d475327684562c3a96cc71adf7dc8c4f0565175cf86b6d7a404ff4c771f15f0",
-                "sha256:7582ad22678f0fcd81102833f60ef8d0e57288b6b5fb00323d101be910e35705"
-            ],
-            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
-            "version": "==2.20"
-        },
-        "pyjwt": {
-            "extras": [
-                "crypto"
-            ],
-            "hashes": [
-                "sha256:b888b4d56f06f6dcd777210c334e69c737be74755d3e5e9ee3fe67dc18a0ee41",
-                "sha256:e0c4bb8d9f0af0c7f5b1ec4c5036309617d03d56932877f2f7a0beeb5318322f"
-            ],
-            "index": "pypi",
-            "version": "==2.3.0"
-        },
-        "pyparsing": {
-            "hashes": [
-                "sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1",
-                "sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b"
-            ],
-            "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'",
-            "version": "==2.4.7"
-        },
-        "pytest": {
-            "hashes": [
-                "sha256:131b36680866a76e6781d13f101efb86cf674ebb9762eb70d3082b6f29889e89",
-                "sha256:7310f8d27bc79ced999e760ca304d69f6ba6c6649c0b60fb0e04a4a77cacc134"
-            ],
-            "index": "pypi",
-            "version": "==6.2.5"
-        },
-        "pytest-forked": {
-            "hashes": [
-                "sha256:6aa9ac7e00ad1a539c41bec6d21011332de671e938c7637378ec9710204e37ca",
-                "sha256:dc4147784048e70ef5d437951728825a131b81714b398d5d52f17c7c144d8815"
-            ],
-            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
-            "version": "==1.3.0"
-        },
-        "pytest-xdist": {
-            "hashes": [
-                "sha256:7b61ebb46997a0820a263553179d6d1e25a8c50d8a8620cd1aa1e20e3be99168",
-                "sha256:89b330316f7fc475f999c81b577c2b926c9569f3d397ae432c0c2e2496d61ff9"
-            ],
-            "index": "pypi",
-            "version": "==2.4.0"
-        },
-        "requests": {
-            "hashes": [
-                "sha256:6c1246513ecd5ecd4528a0906f910e8f0f9c6b8ec72030dc9fd154dc1a6efd24",
-                "sha256:b8aa58f8cf793ffd8782d3d8cb19e66ef36f7aba4353eec859e74678b01b07a7"
-            ],
-            "index": "pypi",
-            "version": "==2.26.0"
-        },
-        "toml": {
-            "hashes": [
-                "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b",
-                "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"
-            ],
-            "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'",
-            "version": "==0.10.2"
-        },
-        "typing-extensions": {
-            "hashes": [
-                "sha256:49f75d16ff11f1cd258e1b988ccff82a3ca5570217d7ad8c5f48205dd99a677e",
-                "sha256:d8226d10bc02a29bcc81df19a26e56a9647f8b0a6d4a83924139f4a8b01f17b7",
-                "sha256:f1d25edafde516b146ecd0613dabcc61409817af4766fbbcfb8d1ad4ec441a34"
-            ],
-            "index": "pypi",
-            "version": "==3.10.0.2"
-        },
-        "urllib3": {
-            "hashes": [
-                "sha256:4987c65554f7a2dbf30c18fd48778ef124af6fab771a377103da0585e2336ece",
-                "sha256:c4fdf4019605b6e5423637e01bc9fe4daef873709a7973e195ceba0a62bbc844"
-            ],
-            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'",
-            "version": "==1.26.7"
-        },
-        "zipp": {
-            "hashes": [
-                "sha256:71c644c5369f4a6e07636f0aa966270449561fcea2e3d6747b8d23efaa9d7832",
-                "sha256:9fe5ea21568a0a70e50f273397638d39b03353731e6cbbb3fd8502a33fec40bc"
-            ],
-            "markers": "python_version >= '3.6'",
-            "version": "==3.6.0"
-        }
-    },
-    "develop": {
-        "backports.entry-points-selectable": {
-            "hashes": [
-                "sha256:988468260ec1c196dab6ae1149260e2f5472c9110334e5d51adcb77867361f6a",
-                "sha256:a6d9a871cde5e15b4c4a53e3d43ba890cc6861ec1332c9c2428c92f977192acc"
-            ],
-            "markers": "python_version >= '2.7'",
-            "version": "==1.1.0"
-        },
-        "certifi": {
-            "hashes": [
-                "sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872",
-                "sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569"
-            ],
-            "version": "==2021.10.8"
-        },
-        "distlib": {
-            "hashes": [
-                "sha256:c8b54e8454e5bf6237cc84c20e8264c3e991e824ef27e8f1e81049867d861e31",
-                "sha256:d982d0751ff6eaaab5e2ec8e691d949ee80eddf01a62eaa96ddb11531fe16b05"
-            ],
-            "version": "==0.3.3"
-        },
-        "filelock": {
-            "hashes": [
-                "sha256:7afc856f74fa7006a289fd10fa840e1eebd8bbff6bffb69c26c54a0512ea8cf8",
-                "sha256:bb2a1c717df74c48a2d00ed625e5a66f8572a3a30baacb7657add1d7bac4097b"
-            ],
-            "markers": "python_version >= '3.6'",
-            "version": "==3.3.2"
-        },
-        "flake8": {
-            "hashes": [
-                "sha256:479b1304f72536a55948cb40a32dce8bb0ffe3501e26eaf292c7e60eb5e0428d",
-                "sha256:806e034dda44114815e23c16ef92f95c91e4c71100ff52813adf7132a6ad870d"
-            ],
-            "index": "pypi",
-            "version": "==4.0.1"
-        },
-        "importlib-metadata": {
-            "hashes": [
-                "sha256:b618b6d2d5ffa2f16add5697cf57a46c76a56229b0ed1c438322e4e95645bd15",
-                "sha256:f284b3e11256ad1e5d03ab86bb2ccd6f5339688ff17a4d797a0fe7df326f23b1"
-            ],
-            "markers": "python_version < '3.8'",
-            "version": "==4.8.1"
-        },
-        "mccabe": {
-            "hashes": [
-                "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42",
-                "sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f"
-            ],
-            "version": "==0.6.1"
-        },
-        "mypy": {
-            "hashes": [
-                "sha256:088cd9c7904b4ad80bec811053272986611b84221835e079be5bcad029e79dd9",
-                "sha256:0aadfb2d3935988ec3815952e44058a3100499f5be5b28c34ac9d79f002a4a9a",
-                "sha256:119bed3832d961f3a880787bf621634ba042cb8dc850a7429f643508eeac97b9",
-                "sha256:1a85e280d4d217150ce8cb1a6dddffd14e753a4e0c3cf90baabb32cefa41b59e",
-                "sha256:3c4b8ca36877fc75339253721f69603a9c7fdb5d4d5a95a1a1b899d8b86a4de2",
-                "sha256:3e382b29f8e0ccf19a2df2b29a167591245df90c0b5a2542249873b5c1d78212",
-                "sha256:42c266ced41b65ed40a282c575705325fa7991af370036d3f134518336636f5b",
-                "sha256:53fd2eb27a8ee2892614370896956af2ff61254c275aaee4c230ae771cadd885",
-                "sha256:704098302473cb31a218f1775a873b376b30b4c18229421e9e9dc8916fd16150",
-                "sha256:7df1ead20c81371ccd6091fa3e2878559b5c4d4caadaf1a484cf88d93ca06703",
-                "sha256:866c41f28cee548475f146aa4d39a51cf3b6a84246969f3759cb3e9c742fc072",
-                "sha256:a155d80ea6cee511a3694b108c4494a39f42de11ee4e61e72bc424c490e46457",
-                "sha256:adaeee09bfde366d2c13fe6093a7df5df83c9a2ba98638c7d76b010694db760e",
-                "sha256:b6fb13123aeef4a3abbcfd7e71773ff3ff1526a7d3dc538f3929a49b42be03f0",
-                "sha256:b94e4b785e304a04ea0828759172a15add27088520dc7e49ceade7834275bedb",
-                "sha256:c0df2d30ed496a08de5daed2a9ea807d07c21ae0ab23acf541ab88c24b26ab97",
-                "sha256:c6c2602dffb74867498f86e6129fd52a2770c48b7cd3ece77ada4fa38f94eba8",
-                "sha256:ceb6e0a6e27fb364fb3853389607cf7eb3a126ad335790fa1e14ed02fba50811",
-                "sha256:d9dd839eb0dc1bbe866a288ba3c1afc33a202015d2ad83b31e875b5905a079b6",
-                "sha256:e4dab234478e3bd3ce83bac4193b2ecd9cf94e720ddd95ce69840273bf44f6de",
-                "sha256:ec4e0cd079db280b6bdabdc807047ff3e199f334050db5cbb91ba3e959a67504",
-                "sha256:ecd2c3fe726758037234c93df7e98deb257fd15c24c9180dacf1ef829da5f921",
-                "sha256:ef565033fa5a958e62796867b1df10c40263ea9ded87164d67572834e57a174d"
-            ],
-            "index": "pypi",
-            "version": "==0.910"
-        },
-        "mypy-extensions": {
-            "hashes": [
-                "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d",
-                "sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8"
-            ],
-            "version": "==0.4.3"
-        },
-        "pipenv": {
-            "hashes": [
-                "sha256:05958fadcd70b2de6a27542fcd2bd72dd5c59c6d35307fdac3e06361fb06e30e",
-                "sha256:d180f5be4775c552fd5e69ae18a9d6099d9dafb462efe54f11c72cb5f4d5e977"
-            ],
-            "index": "pypi",
-            "version": "==2021.5.29"
-        },
-        "platformdirs": {
-            "hashes": [
-                "sha256:367a5e80b3d04d2428ffa76d33f124cf11e8fff2acdaa9b43d545f5c7d661ef2",
-                "sha256:8868bbe3c3c80d42f20156f22e7131d2fb321f5bc86a2a345375c6481a67021d"
-            ],
-            "markers": "python_version >= '3.6'",
-            "version": "==2.4.0"
-        },
-        "pycodestyle": {
-            "hashes": [
-                "sha256:720f8b39dde8b293825e7ff02c475f3077124006db4f440dcbc9a20b76548a20",
-                "sha256:eddd5847ef438ea1c7870ca7eb78a9d47ce0cdb4851a5523949f2601d0cbbe7f"
-            ],
-            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
-            "version": "==2.8.0"
-        },
-        "pyflakes": {
-            "hashes": [
-                "sha256:05a85c2872edf37a4ed30b0cce2f6093e1d0581f8c19d7393122da7e25b2b24c",
-                "sha256:3bb3a3f256f4b7968c9c788781e4ff07dce46bdf12339dcda61053375426ee2e"
-            ],
-            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
-            "version": "==2.4.0"
-        },
-        "six": {
-            "hashes": [
-                "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926",
-                "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"
-            ],
-            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
-            "version": "==1.16.0"
-        },
-        "toml": {
-            "hashes": [
-                "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b",
-                "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"
-            ],
-            "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'",
-            "version": "==0.10.2"
-        },
-        "typed-ast": {
-            "hashes": [
-                "sha256:01ae5f73431d21eead5015997ab41afa53aa1fbe252f9da060be5dad2c730ace",
-                "sha256:067a74454df670dcaa4e59349a2e5c81e567d8d65458d480a5b3dfecec08c5ff",
-                "sha256:0fb71b8c643187d7492c1f8352f2c15b4c4af3f6338f21681d3681b3dc31a266",
-                "sha256:1b3ead4a96c9101bef08f9f7d1217c096f31667617b58de957f690c92378b528",
-                "sha256:2068531575a125b87a41802130fa7e29f26c09a2833fea68d9a40cf33902eba6",
-                "sha256:209596a4ec71d990d71d5e0d312ac935d86930e6eecff6ccc7007fe54d703808",
-                "sha256:2c726c276d09fc5c414693a2de063f521052d9ea7c240ce553316f70656c84d4",
-                "sha256:398e44cd480f4d2b7ee8d98385ca104e35c81525dd98c519acff1b79bdaac363",
-                "sha256:52b1eb8c83f178ab787f3a4283f68258525f8d70f778a2f6dd54d3b5e5fb4341",
-                "sha256:5feca99c17af94057417d744607b82dd0a664fd5e4ca98061480fd8b14b18d04",
-                "sha256:7538e495704e2ccda9b234b82423a4038f324f3a10c43bc088a1636180f11a41",
-                "sha256:760ad187b1041a154f0e4d0f6aae3e40fdb51d6de16e5c99aedadd9246450e9e",
-                "sha256:777a26c84bea6cd934422ac2e3b78863a37017618b6e5c08f92ef69853e765d3",
-                "sha256:95431a26309a21874005845c21118c83991c63ea800dd44843e42a916aec5899",
-                "sha256:9ad2c92ec681e02baf81fdfa056fe0d818645efa9af1f1cd5fd6f1bd2bdfd805",
-                "sha256:9c6d1a54552b5330bc657b7ef0eae25d00ba7ffe85d9ea8ae6540d2197a3788c",
-                "sha256:aee0c1256be6c07bd3e1263ff920c325b59849dc95392a05f258bb9b259cf39c",
-                "sha256:af3d4a73793725138d6b334d9d247ce7e5f084d96284ed23f22ee626a7b88e39",
-                "sha256:b36b4f3920103a25e1d5d024d155c504080959582b928e91cb608a65c3a49e1a",
-                "sha256:b9574c6f03f685070d859e75c7f9eeca02d6933273b5e69572e5ff9d5e3931c3",
-                "sha256:bff6ad71c81b3bba8fa35f0f1921fb24ff4476235a6e94a26ada2e54370e6da7",
-                "sha256:c190f0899e9f9f8b6b7863debfb739abcb21a5c054f911ca3596d12b8a4c4c7f",
-                "sha256:c907f561b1e83e93fad565bac5ba9c22d96a54e7ea0267c708bffe863cbe4075",
-                "sha256:cae53c389825d3b46fb37538441f75d6aecc4174f615d048321b716df2757fb0",
-                "sha256:dd4a21253f42b8d2b48410cb31fe501d32f8b9fbeb1f55063ad102fe9c425e40",
-                "sha256:dde816ca9dac1d9c01dd504ea5967821606f02e510438120091b84e852367428",
-                "sha256:f2362f3cb0f3172c42938946dbc5b7843c2a28aec307c49100c8b38764eb6927",
-                "sha256:f328adcfebed9f11301eaedfa48e15bdece9b519fb27e6a8c01aa52a17ec31b3",
-                "sha256:f8afcf15cc511ada719a88e013cec87c11aff7b91f019295eb4530f96fe5ef2f",
-                "sha256:fb1bbeac803adea29cedd70781399c99138358c26d05fcbd23c13016b7f5ec65"
-            ],
-            "markers": "python_version < '3.8'",
-            "version": "==1.4.3"
-        },
-        "types-psycopg2": {
-            "hashes": [
-                "sha256:77ed80f2668582654623e04fb3d741ecce93effcc39c929d7e02f4a917a538ce",
-                "sha256:98a6e0e9580cd7eb4bd4d20f7c7063d154b2589a2b90c0ce4e3ca6085cde77c6"
-            ],
-            "index": "pypi",
-            "version": "==2.9.1"
-        },
-        "types-requests": {
-            "hashes": [
-                "sha256:b279284e51f668e38ee12d9665e4d789089f532dc2a0be4a1508ca0efd98ba9e",
-                "sha256:ba1d108d512e294b6080c37f6ae7cb2a2abf527560e2b671d1786c1fc46b541a"
-            ],
-            "index": "pypi",
-            "version": "==2.25.11"
-        },
-        "typing-extensions": {
-            "hashes": [
-                "sha256:49f75d16ff11f1cd258e1b988ccff82a3ca5570217d7ad8c5f48205dd99a677e",
-                "sha256:d8226d10bc02a29bcc81df19a26e56a9647f8b0a6d4a83924139f4a8b01f17b7",
-                "sha256:f1d25edafde516b146ecd0613dabcc61409817af4766fbbcfb8d1ad4ec441a34"
-            ],
-            "index": "pypi",
-            "version": "==3.10.0.2"
-        },
-        "virtualenv": {
-            "hashes": [
-                "sha256:4b02e52a624336eece99c96e3ab7111f469c24ba226a53ec474e8e787b365814",
-                "sha256:576d05b46eace16a9c348085f7d0dc8ef28713a2cabaa1cf0aea41e8f12c9218"
-            ],
-            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
-            "version": "==20.10.0"
-        },
-        "virtualenv-clone": {
-            "hashes": [
-                "sha256:418ee935c36152f8f153c79824bb93eaf6f0f7984bae31d3f48f350b9183501a",
-                "sha256:44d5263bceed0bac3e1424d64f798095233b64def1c5689afa43dc3223caf5b0"
-            ],
-            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
-            "version": "==0.5.7"
-        },
-        "yapf": {
-            "hashes": [
-                "sha256:408fb9a2b254c302f49db83c59f9aa0b4b0fd0ec25be3a5c51181327922ff63d",
-                "sha256:e3a234ba8455fe201eaa649cdac872d590089a18b661e39bbac7020978dd9c2e"
-            ],
-            "index": "pypi",
-            "version": "==0.31.0"
-        },
-        "zipp": {
-            "hashes": [
-                "sha256:71c644c5369f4a6e07636f0aa966270449561fcea2e3d6747b8d23efaa9d7832",
-                "sha256:9fe5ea21568a0a70e50f273397638d39b03353731e6cbbb3fd8502a33fec40bc"
-            ],
-            "markers": "python_version >= '3.6'",
-            "version": "==3.6.0"
-        }
-    }
-}
--- a/README.md
+++ b/README.md
@@ -28,12 +28,12 @@ apt install build-essential libtool libreadline-dev zlib1g-dev flex bison libsec
 libssl-dev clang pkg-config libpq-dev
 ```

-[Rust] 1.55 or later is also required.
+[Rust] 1.56.1 or later is also required.

 To run the `psql` client, install the `postgresql-client` package or modify `PATH` and `LD_LIBRARY_PATH` to include `tmp_install/bin` and `tmp_install/lib`, respectively.

 To run the integration tests or Python scripts (not required to use the code), install
-Python (3.7 or higher), and install python3 packages using `pipenv install` in the project directory.
+Python (3.7 or higher), and install python3 packages using `./scripts/pysync` (requires poetry) in the project directory.

 2. Build zenith and patched postgres
 ```sh
@@ -128,8 +128,7 @@ INSERT 0 1
 ```sh
 git clone --recursive https://github.com/zenithdb/zenith.git
 make # builds also postgres and installs it to ./tmp_install
-cd test_runner
-pipenv run pytest
+./scripts/pytest
 ```

 ## Documentation
--- a/compute_tools/Cargo.toml
+++ b/compute_tools/Cargo.toml
@@ -1,17 +1,14 @@
 [package]
 name = "compute_tools"
 version = "0.1.0"
-authors = ["Alexey Kondratov <kondratov.aleksey@gmail.com>"]
-edition = "2018"
-
-# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+edition = "2021"

 [dependencies]
 libc = "0.2"
 anyhow = "1.0"
 chrono = "0.4"
-clap = "2.33"
-env_logger = "0.8"
+clap = "3.0"
+env_logger = "0.9"
 hyper = { version = "0.14", features = ["full"] }
 log = { version = "0.4", features = ["std", "serde"] }
 postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858" }
--- a/compute_tools/src/bin/zenith_ctl.rs
+++ b/compute_tools/src/bin/zenith_ctl.rs
@@ -34,7 +34,7 @@ use std::sync::{Arc, RwLock};

 use anyhow::{Context, Result};
 use chrono::Utc;
-use libc::{prctl, PR_SET_PDEATHSIG, SIGINT};
+use clap::Arg;
 use log::info;
 use postgres::{Client, NoTls};

@@ -155,20 +155,6 @@ fn run_compute(state: &Arc<RwLock<ComputeState>>) -> Result<ExitStatus> {
 }

 fn main() -> Result<()> {
-    // During configuration we are starting Postgres as a child process. If we
-    // fail we do not want to leave it running. PR_SET_PDEATHSIG sets the signal
-    // that will be sent to the child process when the parent dies. NB: this is
-    // cleared for the child of a fork(). SIGINT means fast shutdown for Postgres.
-    // This does not matter much for Docker, where `zenith_ctl` is an entrypoint,
-    // so the whole container will exit if it exits. But could be useful when
-    // `zenith_ctl` is used in e.g. systemd.
-    // XXX: this appears to just don't work. When `main` exits, the child process
-    // `postgres` is re-assigned to a new parent (`/lib/systemd/systemd --user`
-    // in my case).
-    unsafe {
-        prctl(PR_SET_PDEATHSIG, SIGINT);
-    }
-
    // TODO: re-use `zenith_utils::logging` later
    init_logger(DEFAULT_LOG_LEVEL)?;

@@ -177,34 +163,34 @@ fn main() -> Result<()> {
    let matches = clap::App::new("zenith_ctl")
        .version(version.unwrap_or("unknown"))
        .arg(
-            clap::Arg::with_name("connstr")
-                .short("C")
+            Arg::new("connstr")
+                .short('C')
                .long("connstr")
                .value_name("DATABASE_URL")
                .required(true),
        )
        .arg(
-            clap::Arg::with_name("pgdata")
-                .short("D")
+            Arg::new("pgdata")
+                .short('D')
                .long("pgdata")
                .value_name("DATADIR")
                .required(true),
        )
        .arg(
-            clap::Arg::with_name("pgbin")
-                .short("b")
+            Arg::new("pgbin")
+                .short('b')
                .long("pgbin")
                .value_name("POSTGRES_PATH"),
        )
        .arg(
-            clap::Arg::with_name("spec")
-                .short("s")
+            Arg::new("spec")
+                .short('s')
                .long("spec")
                .value_name("SPEC_JSON"),
        )
        .arg(
-            clap::Arg::with_name("spec-path")
-                .short("S")
+            Arg::new("spec-path")
+                .short('S')
                .long("spec-path")
                .value_name("SPEC_PATH"),
        )
--- a/compute_tools/src/pg_helpers.rs
+++ b/compute_tools/src/pg_helpers.rs
@@ -5,7 +5,7 @@ use std::process::Command;
 use std::str::FromStr;
 use std::{fs, thread, time};

-use anyhow::{anyhow, Result};
+use anyhow::{bail, Result};
 use postgres::{Client, Transaction};
 use serde::Deserialize;

@@ -226,7 +226,7 @@ pub fn wait_for_postgres(port: &str, pgdata: &Path) -> Result<()> {
        // but postgres starts listening almost immediately, even if it is not really
        // ready to accept connections).
        if slept >= POSTGRES_WAIT_TIMEOUT {
-            return Err(anyhow!("timed out while waiting for Postgres to start"));
+            bail!("timed out while waiting for Postgres to start");
        }

        if pid_path.exists() {
--- a/control_plane/Cargo.toml
+++ b/control_plane/Cargo.toml
@@ -1,14 +1,11 @@
 [package]
 name = "control_plane"
 version = "0.1.0"
-authors = ["Stas Kelvich <stas@zenith.tech>"]
-edition = "2018"
-
-# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+edition = "2021"

 [dependencies]
 tar = "0.4.33"
-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858" }
+postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
 serde = { version = "1.0", features = ["derive"] }
 toml = "0.5"
 lazy_static = "1.4"
--- a/control_plane/safekeepers.conf
+++ b/control_plane/safekeepers.conf
@@ -1,7 +1,7 @@
 # Page server and three safekeepers.
 [pageserver]
-listen_pg_addr = 'localhost:64000'
-listen_http_addr = 'localhost:9898'
+listen_pg_addr = '127.0.0.1:64000'
+listen_http_addr = '127.0.0.1:9898'
 auth_type = 'Trust'

 [[safekeepers]]
--- a/control_plane/simple.conf
+++ b/control_plane/simple.conf
@@ -1,8 +1,8 @@
 # Minimal zenith environment with one safekeeper. This is equivalent to the built-in
 # defaults that you get with no --config
 [pageserver]
-listen_pg_addr = 'localhost:64000'
-listen_http_addr = 'localhost:9898'
+listen_pg_addr = '127.0.0.1:64000'
+listen_http_addr = '127.0.0.1:9898'
 auth_type = 'Trust'

 [[safekeepers]]
--- a/control_plane/src/compute.rs
+++ b/control_plane/src/compute.rs
@@ -82,15 +82,11 @@ impl ComputeControlPlane {
        let mut strings = s.split('@');
        let name = strings.next().unwrap();

-        let lsn: Option<Lsn>;
-        if let Some(lsnstr) = strings.next() {
-            lsn = Some(
-                Lsn::from_str(lsnstr)
-                    .with_context(|| "invalid LSN in point-in-time specification")?,
-            );
-        } else {
-            lsn = None
-        }
+        let lsn = strings
+            .next()
+            .map(Lsn::from_str)
+            .transpose()
+            .context("invalid LSN in point-in-time specification")?;

        // Resolve the timeline ID, given the human-readable branch name
        let timeline_id = self
@@ -253,16 +249,16 @@ impl PostgresNode {
        let mut client = self
            .pageserver
            .page_server_psql_client()
-            .with_context(|| "connecting to page server failed")?;
+            .context("connecting to page server failed")?;

        let copyreader = client
            .copy_out(sql.as_str())
-            .with_context(|| "page server 'basebackup' command failed")?;
+            .context("page server 'basebackup' command failed")?;

        // Read the archive directly from the `CopyOutReader`
        tar::Archive::new(copyreader)
            .unpack(&self.pgdata())
-            .with_context(|| "extracting base backup failed")?;
+            .context("extracting base backup failed")?;

        Ok(())
    }
@@ -443,7 +439,7 @@ impl PostgresNode {
        if let Some(token) = auth_token {
            cmd.env("ZENITH_AUTH_TOKEN", token);
        }
-        let pg_ctl = cmd.status().with_context(|| "pg_ctl failed")?;
+        let pg_ctl = cmd.status().context("pg_ctl failed")?;

        if !pg_ctl.success() {
            anyhow::bail!("pg_ctl failed");
--- a/control_plane/src/local_env.rs
+++ b/control_plane/src/local_env.rs
@@ -251,7 +251,7 @@ impl LocalEnv {
                .arg("2048")
                .stdout(Stdio::null())
                .output()
-                .with_context(|| "failed to generate auth private key")?;
+                .context("failed to generate auth private key")?;
            if !keygen_output.status.success() {
                bail!(
                    "openssl failed: '{}'",
@@ -270,7 +270,7 @@ impl LocalEnv {
                .args(&["-out", public_key_path.to_str().unwrap()])
                .stdout(Stdio::null())
                .output()
-                .with_context(|| "failed to generate auth private key")?;
+                .context("failed to generate auth private key")?;
            if !keygen_output.status.success() {
                bail!(
                    "openssl failed: '{}'",
--- a/control_plane/src/postgresql_conf.rs
+++ b/control_plane/src/postgresql_conf.rs
@@ -4,7 +4,7 @@
 /// NOTE: This doesn't implement the full, correct postgresql.conf syntax. Just
 /// enough to extract a few settings we need in Zenith, assuming you don't do
 /// funny stuff like include-directives or funny escaping.
-use anyhow::{anyhow, bail, Context, Result};
+use anyhow::{bail, Context, Result};
 use lazy_static::lazy_static;
 use regex::Regex;
 use std::collections::HashMap;
@@ -78,7 +78,7 @@ impl PostgresConf {
        <T as FromStr>::Err: std::error::Error + Send + Sync + 'static,
    {
        self.get(field_name)
-            .ok_or_else(|| anyhow!("could not find '{}' option {}", field_name, context))?
+            .with_context(|| format!("could not find '{}' option {}", field_name, context))?
            .parse::<T>()
            .with_context(|| format!("could not parse '{}' option {}", field_name, context))
    }
--- a/control_plane/src/safekeeper.rs
+++ b/control_plane/src/safekeeper.rs
@@ -85,7 +85,7 @@ impl SafekeeperNode {
            pg_connection_config: Self::safekeeper_connection_config(conf.pg_port),
            env: env.clone(),
            http_client: Client::new(),
-            http_base_url: format!("http://localhost:{}/v1", conf.http_port),
+            http_base_url: format!("http://127.0.0.1:{}/v1", conf.http_port),
            pageserver,
        }
    }
@@ -93,7 +93,7 @@ impl SafekeeperNode {
    /// Construct libpq connection string for connecting to this safekeeper.
    fn safekeeper_connection_config(port: u16) -> Config {
        // TODO safekeeper authentication not implemented yet
-        format!("postgresql://no_user@localhost:{}/no_db", port)
+        format!("postgresql://no_user@127.0.0.1:{}/no_db", port)
            .parse()
            .unwrap()
    }
@@ -114,8 +114,8 @@ impl SafekeeperNode {
        );
        io::stdout().flush().unwrap();

-        let listen_pg = format!("localhost:{}", self.conf.pg_port);
-        let listen_http = format!("localhost:{}", self.conf.http_port);
+        let listen_pg = format!("127.0.0.1:{}", self.conf.pg_port);
+        let listen_http = format!("127.0.0.1:{}", self.conf.http_port);

        let mut cmd = Command::new(self.env.safekeeper_bin()?);
        fill_rust_env_vars(
--- a/docs/glossary.md
+++ b/docs/glossary.md
@@ -2,6 +2,16 @@

 ### Authentication

+### Backpresssure
+
+Backpressure is used to limit the lag between pageserver and compute node or WAL service.
+
+If compute node or WAL service run far ahead of Page Server,
+the time of serving page requests increases. This may lead to timeout errors.
+
+To tune backpressure limits use `max_replication_write_lag`, `max_replication_flush_lag` and `max_replication_apply_lag` settings.
+When lag between current LSN (pg_current_wal_flush_lsn() at compute node) and minimal write/flush/apply position of replica exceeds the limit
+backends performing writes are blocked until the replica is caught up.
 ### Base image (page image)

 ### Basebackup
@@ -76,7 +86,37 @@ The layer map tracks what layers exist for all the relishes in a timeline.
 Zenith repository implementation that keeps data in layers.
 ### LSN

+The Log Sequence Number (LSN) is a unique identifier of the WAL record[] in the WAL log.
+The insert position is a byte offset into the logs, increasing monotonically with each new record.
+Internally, an LSN is a 64-bit integer, representing a byte position in the write-ahead log stream.
+It is printed as two hexadecimal numbers of up to 8 digits each, separated by a slash.
+Check also [PostgreSQL doc about pg_lsn type](https://www.postgresql.org/docs/devel/datatype-pg-lsn.html)
+Values can be compared to calculate the volume of WAL data that separates them, so they are used to measure the progress of replication and recovery.

+In postgres and Zenith lsns are used to describe certain points in WAL handling.
+
+PostgreSQL LSNs and functions to monitor them:
+* `pg_current_wal_insert_lsn()` - Returns the current write-ahead log insert location.
+* `pg_current_wal_lsn()` - Returns the current write-ahead log write location.
+* `pg_current_wal_flush_lsn()` - Returns the current write-ahead log flush location.
+* `pg_last_wal_receive_lsn()` - Returns the last write-ahead log location that has been received and synced to disk by streaming replication. While streaming replication is in progress this will increase monotonically.
+* `pg_last_wal_replay_lsn ()` - Returns the last write-ahead log location that has been replayed during recovery. If recovery is still in progress this will increase monotonically. 
+[source PostgreSQL documentation](https://www.postgresql.org/docs/devel/functions-admin.html):
+
+Zenith safekeeper LSNs. For more check [walkeeper/README_PROTO.md](/walkeeper/README_PROTO.md)
+* `CommitLSN`: position in WAL confirmed by quorum safekeepers.
+* `RestartLSN`: position in WAL confirmed by all safekeepers.
+* `FlushLSN`: part of WAL persisted to the disk by safekeeper.
+* `VCL`: the largerst LSN for which we can guarantee availablity of all prior records.
+
+Zenith pageserver LSNs:
+* `last_record_lsn` - the end of last processed WAL record.
+* `disk_consistent_lsn` - data is known to be fully flushed and fsync'd to local disk on pageserver up to this LSN.
+* `remote_consistent_lsn` - The last LSN that is synced to remote storage and is guaranteed to survive pageserver crash.
+TODO: use this name consistently in remote storage code. Now `disk_consistent_lsn` is used and meaning depends on the context.
+* `ancestor_lsn` - LSN of the branch point (the LSN at which this branch was created)
+
+TODO: add table that describes mapping between PostgreSQL (compute), safekeeper and pageserver LSNs.
 ### Page (block)

 The basic structure used to store relation data. All pages are of the same size.
--- a/docs/pageserver-tenant-migration.md
+++ b/docs/pageserver-tenant-migration.md
@@ -0,0 +1,22 @@
+## Pageserver tenant migration
+
+### Overview
+
+This feature allows to migrate a timeline from one pageserver to another by utilizing remote storage capability.
+
+### Migration process
+
+Pageserver implements two new http handlers: timeline attach and timeline detach.
+Timeline migration is performed in a following way:
+1. Timeline attach is called on a target pageserver. This asks pageserver to download latest checkpoint uploaded to s3.
+2. For now it is necessary to manually initialize replication stream via callmemaybe call so target pageserver initializes replication from safekeeper (it is desired to avoid this and initialize replication directly in attach handler, but this requires some refactoring (probably [#997](https://github.com/zenithdb/zenith/issues/997)/[#1049](https://github.com/zenithdb/zenith/issues/1049))
+3. Replication state can be tracked via timeline detail pageserver call.
+4. Compute node should be restarted with new pageserver connection string. Issue with multiple compute nodes for one timeline is handled on the safekeeper consensus level. So this is not a problem here.Currently responsibility for rescheduling the compute with updated config lies on external coordinator (console).
+5. Timeline is detached from old pageserver. On disk data is removed.
+
+
+### Implementation details
+
+Now safekeeper needs to track which pageserver it is replicating to. This introduces complications into replication code:
+* We need to distinguish different pageservers (now this is done by connection string which is imperfect and is covered here: https://github.com/zenithdb/zenith/issues/1105). Callmemaybe subscription management also needs to track that (this is already implemented).
+* We need to track which pageserver is the primary. This is needed to avoid reconnections to non primary pageservers. Because we shouldn't reconnect to them when they decide to stop their walreceiver. I e this can appear when there is a load on the compute and we are trying to detach timeline from old pageserver. In this case callmemaybe will try to reconnect to it because replication termination condition is not met (page server with active compute could never catch up to the latest lsn, so there is always some wal tail)
--- a/docs/sourcetree.md
+++ b/docs/sourcetree.md
@@ -87,31 +87,29 @@ so manual installation of dependencies is not recommended.
 A single virtual environment with all dependencies is described in the single `Pipfile`.

 ### Prerequisites
- Install Python 3.7 (the minimal supported version)
-    - Later version (e.g. 3.8) is ok if you don't write Python code
-    - You can install Python 3.7 separately, e.g.:
+- Install Python 3.7 (the minimal supported version) or greater.
+    - Our setup with poetry should work with newer python versions too. So feel free to open an issue with a `c/test-runner` label if something doesnt work as expected.
+    - If you have some trouble with other version you can resolve it by installing Python 3.7 separately, via pyenv or via system package manager e.g.:
      ```bash
      # In Ubuntu
      sudo add-apt-repository ppa:deadsnakes/ppa
      sudo apt update
      sudo apt install python3.7
      ```
- Install `pipenv`
-    - Exact version of `pipenv` is not important, you can use Debian/Ubuntu package `pipenv`.
- Install dependencies via either
-  * `pipenv --python 3.7 install --dev` if you will write Python code, or
-  * `pipenv install` if you only want to run Python scripts and don't have Python 3.7.
+- Install `poetry`
+    - Exact version of `poetry` is not important, see installation instructions available at poetry's [website](https://python-poetry.org/docs/#installation)`.
+- Install dependencies via `./scripts/pysync`. Note that CI uses Python 3.7 so if you have different version some linting tools can yield different result locally vs in the CI.

-Run `pipenv shell` to activate the virtual environment.
-Alternatively, use `pipenv run` to run a single command in the venv, e.g. `pipenv run pytest`.
+Run `poetry shell` to activate the virtual environment.
+Alternatively, use `poetry run` to run a single command in the venv, e.g. `poetry run pytest`.

 ### Obligatory checks
 We force code formatting via `yapf` and type hints via `mypy`.
 Run the following commands in the repository's root (next to `setup.cfg`):

 ```bash
-pipenv run yapf -ri .  # All code is reformatted
-pipenv run mypy .  # Ensure there are no typing errors
+poetry run yapf -ri .  # All code is reformatted
+poetry run mypy .  # Ensure there are no typing errors
 ```

 **WARNING**: do not run `mypy` from a directory other than the root of the repository.
@@ -123,17 +121,6 @@ Also consider:
 * Adding more type hints to your code to avoid `Any`.

 ### Changing dependencies
-You have to update `Pipfile.lock` if you have changed `Pipfile`:
+To add new package or change an existing one you can use `poetry add` or `poetry update` or edit `pyproject.toml` manually. Do not forget to run `poetry lock` in the latter case.

-```bash
-pipenv --python 3.7 install --dev  # Re-create venv for Python 3.7 and install recent pipenv inside
-pipenv run pipenv --version  # Should be at least 2021.5.29
-pipenv run pipenv lock  # Regenerate Pipfile.lock
-```
-
-As the minimal supported version is Python 3.7 and we use it in CI,
-you have to use a Python 3.7 environment when updating `Pipfile.lock`.
-Otherwise some back-compatibility packages will be missing.
-
-It is also important to run recent `pipenv`.
-Older versions remove markers from `Pipfile.lock`.
+More details are available in poetry's [documentation](https://python-poetry.org/docs/).
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -1,8 +1,7 @@
 [package]
 name = "pageserver"
 version = "0.1.0"
-authors = ["Stas Kelvich <stas@zenith.tech>"]
-edition = "2018"
+edition = "2021"

 [dependencies]
 bookfile = { git = "https://github.com/zenithdb/bookfile.git", branch="generic-readext" }
@@ -15,15 +14,14 @@ futures = "0.3.13"
 hyper = "0.14"
 lazy_static = "1.4.0"
 log = "0.4.14"
-clap = "2.33.0"
+clap = "3.0"
 daemonize = "0.4.1"
 tokio = { version = "1.11", features = ["process", "sync", "macros", "fs", "rt", "io-util", "time"] }
-postgres-types = { git = "https://github.com/zenithdb/rust-postgres.git", rev="9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858" }
-postgres-protocol = { git = "https://github.com/zenithdb/rust-postgres.git", rev="9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858" }
-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858" }
-tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858" }
+postgres-types = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
+postgres-protocol = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
+postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
+tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
 tokio-stream = "0.1.8"
-routerify = "2"
 anyhow = { version = "1.0", features = ["backtrace"] }
 crc32c = "0.6.0"
 thiserror = "1.0"
@@ -32,7 +30,7 @@ tar = "0.4.33"
 humantime = "2.1.0"
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1"
-toml_edit = { version = "0.12", features = ["easy"] }
+toml_edit = { version = "0.13", features = ["easy"] }
 scopeguard = "1.1.0"
 async-trait = "0.1"
 const_format = "0.2.21"
@@ -42,7 +40,6 @@ signal-hook = "0.3.10"
 url = "2"
 nix = "0.23"
 once_cell = "1.8.0"
-parking_lot = "0.11.2"
 crossbeam-utils = "0.8.5"

 rust-s3 = { version = "0.28", default-features = false, features = ["no-verify-ssl", "tokio-rustls-tls"] }
--- a/pageserver/src/bin/dump_layerfile.rs
+++ b/pageserver/src/bin/dump_layerfile.rs
@@ -13,7 +13,7 @@ fn main() -> Result<()> {
        .about("Dump contents of one layer file, for debugging")
        .version(GIT_VERSION)
        .arg(
-            Arg::with_name("path")
+            Arg::new("path")
                .help("Path to file to dump")
                .required(true)
                .index(1),
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -27,27 +27,27 @@ fn main() -> Result<()> {
        .about("Materializes WAL stream to pages and serves them to the postgres")
        .version(GIT_VERSION)
        .arg(
-            Arg::with_name("daemonize")
-                .short("d")
+            Arg::new("daemonize")
+                .short('d')
                .long("daemonize")
                .takes_value(false)
                .help("Run in the background"),
        )
        .arg(
-            Arg::with_name("init")
+            Arg::new("init")
                .long("init")
                .takes_value(false)
                .help("Initialize pageserver repo"),
        )
        .arg(
-            Arg::with_name("workdir")
-                .short("D")
+            Arg::new("workdir")
+                .short('D')
                .long("workdir")
                .takes_value(true)
                .help("Working directory for the pageserver"),
        )
        .arg(
-            Arg::with_name("create-tenant")
+            Arg::new("create-tenant")
                .long("create-tenant")
                .takes_value(true)
                .help("Create tenant during init")
@@ -55,11 +55,11 @@ fn main() -> Result<()> {
        )
        // See `settings.md` for more details on the extra configuration patameters pageserver can process
        .arg(
-            Arg::with_name("config-override")
-                .short("c")
+            Arg::new("config-override")
+                .short('c')
                .takes_value(true)
                .number_of_values(1)
-                .multiple(true)
+                .multiple_occurrences(true)
                .help("Additional configuration overrides of the ones from the toml config file (or new ones to add there).
                Any option has to be a valid toml document, example: `-c \"foo='hey'\"` `-c \"foo={value=1}\"`"),
        )
--- a/pageserver/src/bin/pageserver_zst.rs
+++ b/pageserver/src/bin/pageserver_zst.rs
@@ -0,0 +1,334 @@
+//! A CLI helper to deal with remote storage (S3, usually) blobs as archives.
+//! See [`compression`] for more details about the archives.
+
+use std::{collections::BTreeSet, path::Path};
+
+use anyhow::{bail, ensure, Context};
+use clap::{App, Arg};
+use pageserver::{
+    layered_repository::metadata::{TimelineMetadata, METADATA_FILE_NAME},
+    remote_storage::compression,
+};
+use tokio::{fs, io};
+use zenith_utils::GIT_VERSION;
+
+const LIST_SUBCOMMAND: &str = "list";
+const ARCHIVE_ARG_NAME: &str = "archive";
+
+const EXTRACT_SUBCOMMAND: &str = "extract";
+const TARGET_DIRECTORY_ARG_NAME: &str = "target_directory";
+
+const CREATE_SUBCOMMAND: &str = "create";
+const SOURCE_DIRECTORY_ARG_NAME: &str = "source_directory";
+
+#[tokio::main(flavor = "current_thread")]
+async fn main() -> anyhow::Result<()> {
+    let arg_matches = App::new("pageserver zst blob [un]compressor utility")
+        .version(GIT_VERSION)
+        .subcommands(vec![
+            App::new(LIST_SUBCOMMAND)
+                .about("List the archive contents")
+                .arg(
+                    Arg::new(ARCHIVE_ARG_NAME)
+                        .required(true)
+                        .takes_value(true)
+                        .help("An archive to list the contents of"),
+                ),
+            App::new(EXTRACT_SUBCOMMAND)
+                .about("Extracts the archive into the directory")
+                .arg(
+                    Arg::new(ARCHIVE_ARG_NAME)
+                        .required(true)
+                        .takes_value(true)
+                        .help("An archive to extract"),
+                )
+                .arg(
+                    Arg::new(TARGET_DIRECTORY_ARG_NAME)
+                        .required(false)
+                        .takes_value(true)
+                        .help("A directory to extract the archive into. Optional, will use the current directory if not specified"),
+                ),
+            App::new(CREATE_SUBCOMMAND)
+                .about("Creates an archive with the contents of a directory (only the first level files are taken, metadata file has to be present in the same directory)")
+                .arg(
+                    Arg::new(SOURCE_DIRECTORY_ARG_NAME)
+                        .required(true)
+                        .takes_value(true)
+                        .help("A directory to use for creating the archive"),
+                )
+                .arg(
+                    Arg::new(TARGET_DIRECTORY_ARG_NAME)
+                        .required(false)
+                        .takes_value(true)
+                        .help("A directory to create the archive in. Optional, will use the current directory if not specified"),
+                ),
+        ])
+        .get_matches();
+
+    let subcommand_name = match arg_matches.subcommand_name() {
+        Some(name) => name,
+        None => bail!("No subcommand specified"),
+    };
+
+    let subcommand_matches = match arg_matches.subcommand_matches(subcommand_name) {
+        Some(matches) => matches,
+        None => bail!(
+            "No subcommand arguments were recognized for subcommand '{}'",
+            subcommand_name
+        ),
+    };
+
+    let target_dir = Path::new(
+        subcommand_matches
+            .value_of(TARGET_DIRECTORY_ARG_NAME)
+            .unwrap_or("./"),
+    );
+
+    match subcommand_name {
+        LIST_SUBCOMMAND => {
+            let archive = match subcommand_matches.value_of(ARCHIVE_ARG_NAME) {
+                Some(archive) => Path::new(archive),
+                None => bail!("No '{}' argument is specified", ARCHIVE_ARG_NAME),
+            };
+            list_archive(archive).await
+        }
+        EXTRACT_SUBCOMMAND => {
+            let archive = match subcommand_matches.value_of(ARCHIVE_ARG_NAME) {
+                Some(archive) => Path::new(archive),
+                None => bail!("No '{}' argument is specified", ARCHIVE_ARG_NAME),
+            };
+            extract_archive(archive, target_dir).await
+        }
+        CREATE_SUBCOMMAND => {
+            let source_dir = match subcommand_matches.value_of(SOURCE_DIRECTORY_ARG_NAME) {
+                Some(source) => Path::new(source),
+                None => bail!("No '{}' argument is specified", SOURCE_DIRECTORY_ARG_NAME),
+            };
+            create_archive(source_dir, target_dir).await
+        }
+        unknown => bail!("Unknown subcommand {}", unknown),
+    }
+}
+
+async fn list_archive(archive: &Path) -> anyhow::Result<()> {
+    let archive = archive.canonicalize().with_context(|| {
+        format!(
+            "Failed to get the absolute path for the archive path '{}'",
+            archive.display()
+        )
+    })?;
+    ensure!(
+        archive.is_file(),
+        "Path '{}' is not an archive file",
+        archive.display()
+    );
+    println!("Listing an archive at path '{}'", archive.display());
+    let archive_name = match archive.file_name().and_then(|name| name.to_str()) {
+        Some(name) => name,
+        None => bail!(
+            "Failed to get the archive name from the path '{}'",
+            archive.display()
+        ),
+    };
+
+    let archive_bytes = fs::read(&archive)
+        .await
+        .context("Failed to read the archive bytes")?;
+
+    let header = compression::read_archive_header(archive_name, &mut archive_bytes.as_slice())
+        .await
+        .context("Failed to read the archive header")?;
+
+    let empty_path = Path::new("");
+    println!("-------------------------------");
+
+    let longest_path_in_archive = header
+        .files
+        .iter()
+        .filter_map(|file| Some(file.subpath.as_path(empty_path).to_str()?.len()))
+        .max()
+        .unwrap_or_default()
+        .max(METADATA_FILE_NAME.len());
+
+    for regular_file in &header.files {
+        println!(
+            "File: {:width$} uncompressed size: {} bytes",
+            regular_file.subpath.as_path(empty_path).display(),
+            regular_file.size,
+            width = longest_path_in_archive,
+        )
+    }
+    println!(
+        "File: {:width$} uncompressed size: {} bytes",
+        METADATA_FILE_NAME,
+        header.metadata_file_size,
+        width = longest_path_in_archive,
+    );
+    println!("-------------------------------");
+
+    Ok(())
+}
+
+async fn extract_archive(archive: &Path, target_dir: &Path) -> anyhow::Result<()> {
+    let archive = archive.canonicalize().with_context(|| {
+        format!(
+            "Failed to get the absolute path for the archive path '{}'",
+            archive.display()
+        )
+    })?;
+    ensure!(
+        archive.is_file(),
+        "Path '{}' is not an archive file",
+        archive.display()
+    );
+    let archive_name = match archive.file_name().and_then(|name| name.to_str()) {
+        Some(name) => name,
+        None => bail!(
+            "Failed to get the archive name from the path '{}'",
+            archive.display()
+        ),
+    };
+
+    if !target_dir.exists() {
+        fs::create_dir_all(target_dir).await.with_context(|| {
+            format!(
+                "Failed to create the target dir at path '{}'",
+                target_dir.display()
+            )
+        })?;
+    }
+    let target_dir = target_dir.canonicalize().with_context(|| {
+        format!(
+            "Failed to get the absolute path for the target dir path '{}'",
+            target_dir.display()
+        )
+    })?;
+    ensure!(
+        target_dir.is_dir(),
+        "Path '{}' is not a directory",
+        target_dir.display()
+    );
+    let mut dir_contents = fs::read_dir(&target_dir)
+        .await
+        .context("Failed to list the target directory contents")?;
+    let dir_entry = dir_contents
+        .next_entry()
+        .await
+        .context("Failed to list the target directory contents")?;
+    ensure!(
+        dir_entry.is_none(),
+        "Target directory '{}' is not empty",
+        target_dir.display()
+    );
+
+    println!(
+        "Extracting an archive at path '{}' into directory '{}'",
+        archive.display(),
+        target_dir.display()
+    );
+
+    let mut archive_file = fs::File::open(&archive).await.with_context(|| {
+        format!(
+            "Failed to get the archive name from the path '{}'",
+            archive.display()
+        )
+    })?;
+    let header = compression::read_archive_header(archive_name, &mut archive_file)
+        .await
+        .context("Failed to read the archive header")?;
+    compression::uncompress_with_header(&BTreeSet::new(), &target_dir, header, &mut archive_file)
+        .await
+        .context("Failed to extract the archive")
+}
+
+async fn create_archive(source_dir: &Path, target_dir: &Path) -> anyhow::Result<()> {
+    let source_dir = source_dir.canonicalize().with_context(|| {
+        format!(
+            "Failed to get the absolute path for the source dir path '{}'",
+            source_dir.display()
+        )
+    })?;
+    ensure!(
+        source_dir.is_dir(),
+        "Path '{}' is not a directory",
+        source_dir.display()
+    );
+
+    if !target_dir.exists() {
+        fs::create_dir_all(target_dir).await.with_context(|| {
+            format!(
+                "Failed to create the target dir at path '{}'",
+                target_dir.display()
+            )
+        })?;
+    }
+    let target_dir = target_dir.canonicalize().with_context(|| {
+        format!(
+            "Failed to get the absolute path for the target dir path '{}'",
+            target_dir.display()
+        )
+    })?;
+    ensure!(
+        target_dir.is_dir(),
+        "Path '{}' is not a directory",
+        target_dir.display()
+    );
+
+    println!(
+        "Compressing directory '{}' and creating resulting archive in directory '{}'",
+        source_dir.display(),
+        target_dir.display()
+    );
+
+    let mut metadata_file_contents = None;
+    let mut files_co_archive = Vec::new();
+
+    let mut source_dir_contents = fs::read_dir(&source_dir)
+        .await
+        .context("Failed to read the source directory contents")?;
+
+    while let Some(source_dir_entry) = source_dir_contents
+        .next_entry()
+        .await
+        .context("Failed to read a source dir entry")?
+    {
+        let entry_path = source_dir_entry.path();
+        if entry_path.is_file() {
+            if entry_path.file_name().and_then(|name| name.to_str()) == Some(METADATA_FILE_NAME) {
+                let metadata_bytes = fs::read(entry_path)
+                    .await
+                    .context("Failed to read metata file bytes in the source dir")?;
+                metadata_file_contents = Some(
+                    TimelineMetadata::from_bytes(&metadata_bytes)
+                        .context("Failed to parse metata file contents in the source dir")?,
+                );
+            } else {
+                files_co_archive.push(entry_path);
+            }
+        }
+    }
+
+    let metadata = match metadata_file_contents {
+        Some(metadata) => metadata,
+        None => bail!(
+            "No metadata file found in the source dir '{}', cannot create the archive",
+            source_dir.display()
+        ),
+    };
+
+    let _ = compression::archive_files_as_stream(
+        &source_dir,
+        files_co_archive.iter(),
+        &metadata,
+        move |mut archive_streamer, archive_name| async move {
+            let archive_target = target_dir.join(&archive_name);
+            let mut archive_file = fs::File::create(&archive_target).await?;
+            io::copy(&mut archive_streamer, &mut archive_file).await?;
+            Ok(archive_target)
+        },
+    )
+    .await
+    .context("Failed to create an archive")?;
+
+    Ok(())
+}
--- a/pageserver/src/bin/update_metadata.rs
+++ b/pageserver/src/bin/update_metadata.rs
@@ -14,20 +14,20 @@ fn main() -> Result<()> {
        .about("Dump or update metadata file")
        .version(GIT_VERSION)
        .arg(
-            Arg::with_name("path")
+            Arg::new("path")
                .help("Path to metadata file")
                .required(true),
        )
        .arg(
-            Arg::with_name("disk_lsn")
-                .short("d")
+            Arg::new("disk_lsn")
+                .short('d')
                .long("disk_lsn")
                .takes_value(true)
                .help("Replace disk constistent lsn"),
        )
        .arg(
-            Arg::with_name("prev_lsn")
-                .short("p")
+            Arg::new("prev_lsn")
+                .short('p')
                .long("prev_lsn")
                .takes_value(true)
                .help("Previous record LSN"),
--- a/pageserver/src/branches.rs
+++ b/pageserver/src/branches.rs
@@ -4,7 +4,7 @@
 // TODO: move all paths construction to conf impl
 //

-use anyhow::{anyhow, bail, Context, Result};
+use anyhow::{bail, Context, Result};
 use postgres_ffi::ControlFileData;
 use serde::{Deserialize, Serialize};
 use std::{
@@ -118,7 +118,7 @@ pub fn init_pageserver(conf: &'static PageServerConf, create_tenant: Option<&str
    if let Some(tenantid) = create_tenant {
        let tenantid = ZTenantId::from_str(tenantid)?;
        println!("initializing tenantid {}", tenantid);
-        create_repo(conf, tenantid, dummy_redo_mgr).with_context(|| "failed to create repo")?;
+        create_repo(conf, tenantid, dummy_redo_mgr).context("failed to create repo")?;
    }
    crashsafe_dir::create_dir_all(conf.tenants_path())?;

@@ -197,7 +197,7 @@ fn run_initdb(conf: &'static PageServerConf, initdbpath: &Path) -> Result<()> {
        .env("DYLD_LIBRARY_PATH", conf.pg_lib_dir().to_str().unwrap())
        .stdout(Stdio::null())
        .output()
-        .with_context(|| "failed to execute initdb")?;
+        .context("failed to execute initdb")?;
    if !initdb_output.status.success() {
        anyhow::bail!(
            "initdb failed: '{}'",
@@ -308,7 +308,7 @@ pub(crate) fn create_branch(
    let timeline = repo
        .get_timeline(startpoint.timelineid)?
        .local_timeline()
-        .ok_or_else(|| anyhow!("Cannot branch off the timeline that's not present locally"))?;
+        .context("Cannot branch off the timeline that's not present locally")?;
    if startpoint.lsn == Lsn(0) {
        // Find end of WAL on the old timeline
        let end_of_wal = timeline.get_last_record_lsn();
@@ -324,12 +324,13 @@ pub(crate) fn create_branch(
        timeline.wait_lsn(startpoint.lsn)?;
    }
    startpoint.lsn = startpoint.lsn.align();
-    if timeline.get_start_lsn() > startpoint.lsn {
+    if timeline.get_ancestor_lsn() > startpoint.lsn {
+        // can we safely just branch from the ancestor instead?
        anyhow::bail!(
-            "invalid startpoint {} for the branch {}: less than timeline start {}",
+            "invalid startpoint {} for the branch {}: less than timeline ancestor lsn {:?}",
            startpoint.lsn,
            branchname,
-            timeline.get_start_lsn()
+            timeline.get_ancestor_lsn()
        );
    }

@@ -383,14 +384,11 @@ fn parse_point_in_time(
    let mut strings = s.split('@');
    let name = strings.next().unwrap();

-    let lsn: Option<Lsn>;
-    if let Some(lsnstr) = strings.next() {
-        lsn = Some(
-            Lsn::from_str(lsnstr).with_context(|| "invalid LSN in point-in-time specification")?,
-        );
-    } else {
-        lsn = None
-    }
+    let lsn = strings
+        .next()
+        .map(Lsn::from_str)
+        .transpose()
+        .context("invalid LSN in point-in-time specification")?;

    // Check if it's a tag
    if lsn.is_none() {
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -4,7 +4,7 @@
 //! file, or on the command line.
 //! See also `settings.md` for better description on every parameter.

-use anyhow::{anyhow, bail, ensure, Context, Result};
+use anyhow::{bail, ensure, Context, Result};
 use toml_edit;
 use toml_edit::{Document, Item};
 use zenith_utils::postgres_backend::AuthType;
@@ -144,6 +144,13 @@ pub struct S3Config {
    pub access_key_id: Option<String>,
    /// "Password" to use when connecting to bucket.
    pub secret_access_key: Option<String>,
+    /// A base URL to send S3 requests to.
+    /// By default, the endpoint is derived from a region name, assuming it's
+    /// an AWS S3 region name, erroring on wrong region name.
+    /// Endpoint provides a way to support other S3 flavors and their regions.
+    ///
+    /// Example: `http://127.0.0.1:5000`
+    pub endpoint: Option<String>,
 }

 impl std::fmt::Debug for S3Config {
@@ -306,9 +313,7 @@ impl PageServerConf {
                })
                .ok()
                .and_then(NonZeroUsize::new)
-                .ok_or_else(|| {
-                    anyhow!("'max_concurrent_sync' must be a non-zero positive integer")
-                })?
+                .context("'max_concurrent_sync' must be a non-zero positive integer")?
        } else {
            NonZeroUsize::new(defaults::DEFAULT_REMOTE_STORAGE_MAX_CONCURRENT_SYNC).unwrap()
        };
@@ -321,7 +326,7 @@ impl PageServerConf {
                })
                .ok()
                .and_then(NonZeroU32::new)
-                .ok_or_else(|| anyhow!("'max_sync_errors' must be a non-zero positive integer"))?
+                .context("'max_sync_errors' must be a non-zero positive integer")?
        } else {
            NonZeroU32::new(defaults::DEFAULT_REMOTE_STORAGE_MAX_SYNC_ERRORS).unwrap()
        };
@@ -351,6 +356,10 @@ impl PageServerConf {
                    .get("prefix_in_bucket")
                    .map(|prefix_in_bucket| parse_toml_string("prefix_in_bucket", prefix_in_bucket))
                    .transpose()?,
+                endpoint: toml
+                    .get("endpoint")
+                    .map(|endpoint| parse_toml_string("endpoint", endpoint))
+                    .transpose()?,
            }),
            (Some(local_path), None, None) => RemoteStorageKind::LocalFs(PathBuf::from(
                parse_toml_string("local_path", local_path)?,
@@ -396,7 +405,7 @@ impl PageServerConf {
 fn parse_toml_string(name: &str, item: &Item) -> Result<String> {
    let s = item
        .as_str()
-        .ok_or_else(|| anyhow!("configure option {} is not a string", name))?;
+        .with_context(|| format!("configure option {} is not a string", name))?;
    Ok(s.to_string())
 }

@@ -405,7 +414,7 @@ fn parse_toml_u64(name: &str, item: &Item) -> Result<u64> {
    // for our use, though.
    let i: i64 = item
        .as_integer()
-        .ok_or_else(|| anyhow!("configure option {} is not an integer", name))?;
+        .with_context(|| format!("configure option {} is not an integer", name))?;
    if i < 0 {
        bail!("configure option {} cannot be negative", name);
    }
@@ -415,7 +424,7 @@ fn parse_toml_u64(name: &str, item: &Item) -> Result<u64> {
 fn parse_toml_duration(name: &str, item: &Item) -> Result<Duration> {
    let s = item
        .as_str()
-        .ok_or_else(|| anyhow!("configure option {} is not a string", name))?;
+        .with_context(|| format!("configure option {} is not a string", name))?;

    Ok(humantime::parse_duration(s)?)
 }
@@ -423,7 +432,7 @@ fn parse_toml_duration(name: &str, item: &Item) -> Result<Duration> {
 fn parse_toml_auth_type(name: &str, item: &Item) -> Result<AuthType> {
    let v = item
        .as_str()
-        .ok_or_else(|| anyhow!("configure option {} is not a string", name))?;
+        .with_context(|| format!("configure option {} is not a string", name))?;
    AuthType::from_str(v)
 }

@@ -599,6 +608,7 @@ pg_distrib_dir='{}'
        let prefix_in_bucket = "test_prefix".to_string();
        let access_key_id = "SOMEKEYAAAAASADSAH*#".to_string();
        let secret_access_key = "SOMEsEcReTsd292v".to_string();
+        let endpoint = "http://localhost:5000".to_string();
        let max_concurrent_sync = NonZeroUsize::new(111).unwrap();
        let max_sync_errors = NonZeroU32::new(222).unwrap();

@@ -611,12 +621,13 @@ bucket_name = '{}'
 bucket_region = '{}'
 prefix_in_bucket = '{}'
 access_key_id = '{}'
-secret_access_key = '{}'"#,
-                max_concurrent_sync, max_sync_errors, bucket_name, bucket_region, prefix_in_bucket, access_key_id, secret_access_key
+secret_access_key = '{}'
+endpoint = '{}'"#,
+                max_concurrent_sync, max_sync_errors, bucket_name, bucket_region, prefix_in_bucket, access_key_id, secret_access_key, endpoint
            ),
            format!(
-                "remote_storage={{max_concurrent_sync={}, max_sync_errors={}, bucket_name='{}', bucket_region='{}', prefix_in_bucket='{}', access_key_id='{}', secret_access_key='{}'}}",
-                max_concurrent_sync, max_sync_errors, bucket_name, bucket_region, prefix_in_bucket, access_key_id, secret_access_key
+                "remote_storage={{max_concurrent_sync={}, max_sync_errors={}, bucket_name='{}', bucket_region='{}', prefix_in_bucket='{}', access_key_id='{}', secret_access_key='{}', endpoint='{}'}}",
+                max_concurrent_sync, max_sync_errors, bucket_name, bucket_region, prefix_in_bucket, access_key_id, secret_access_key, endpoint
            ),
        ];

@@ -650,7 +661,8 @@ pg_distrib_dir='{}'
                        bucket_region: bucket_region.clone(),
                        access_key_id: Some(access_key_id.clone()),
                        secret_access_key: Some(secret_access_key.clone()),
-                        prefix_in_bucket: Some(prefix_in_bucket.clone())
+                        prefix_in_bucket: Some(prefix_in_bucket.clone()),
+                        endpoint: Some(endpoint.clone())
                    }),
                },
                "Remote storage config should correctly parse the S3 config"
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -4,7 +4,6 @@ use anyhow::{Context, Result};
 use hyper::header;
 use hyper::StatusCode;
 use hyper::{Body, Request, Response, Uri};
-use routerify::{ext::RequestExt, RouterBuilder};
 use serde::Serialize;
 use tracing::*;
 use zenith_utils::auth::JwtAuth;
@@ -19,12 +18,14 @@ use zenith_utils::http::{
    request::get_request_param,
    request::parse_request_param,
 };
+use zenith_utils::http::{RequestExt, RouterBuilder};
 use zenith_utils::lsn::Lsn;
 use zenith_utils::zid::{opt_display_serde, ZTimelineId};

 use super::models::BranchCreateRequest;
 use super::models::TenantCreateRequest;
 use crate::branches::BranchInfo;
+use crate::repository::RepositoryTimeline;
 use crate::repository::TimelineSyncState;
 use crate::{branches, config::PageServerConf, tenant_mgr, ZTenantId};

@@ -201,7 +202,6 @@ enum TimelineInfo {
        ancestor_timeline_id: Option<ZTimelineId>,
        last_record_lsn: Lsn,
        prev_record_lsn: Lsn,
-        start_lsn: Lsn,
        disk_consistent_lsn: Lsn,
        timeline_state: Option<TimelineSyncState>,
    },
@@ -236,7 +236,6 @@ async fn timeline_detail_handler(request: Request<Body>) -> Result<Response<Body
                disk_consistent_lsn: timeline.get_disk_consistent_lsn(),
                last_record_lsn: timeline.get_last_record_lsn(),
                prev_record_lsn: timeline.get_prev_record_lsn(),
-                start_lsn: timeline.get_start_lsn(),
                timeline_state: repo.get_timeline_state(timeline_id),
            },
        })
@@ -247,6 +246,58 @@ async fn timeline_detail_handler(request: Request<Body>) -> Result<Response<Body
    Ok(json_response(StatusCode::OK, response_data)?)
 }

+async fn timeline_attach_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
+    let tenant_id: ZTenantId = parse_request_param(&request, "tenant_id")?;
+    check_permission(&request, Some(tenant_id))?;
+
+    let timeline_id: ZTimelineId = parse_request_param(&request, "timeline_id")?;
+
+    tokio::task::spawn_blocking(move || {
+        let _enter =
+            info_span!("timeline_attach_handler", tenant = %tenant_id, timeline = %timeline_id)
+                .entered();
+        let repo = tenant_mgr::get_repository_for_tenant(tenant_id)?;
+        match repo.get_timeline(timeline_id)? {
+            RepositoryTimeline::Local(_) => {
+                anyhow::bail!("Timeline with id {} is already local", timeline_id)
+            }
+            RepositoryTimeline::Remote {
+                id: _,
+                disk_consistent_lsn: _,
+            } => {
+                // FIXME (rodionov) get timeline already schedules timeline for download, and duplicate tasks can cause errors
+                //  first should be fixed in https://github.com/zenithdb/zenith/issues/997
+                // TODO (rodionov) change timeline state to awaits download (incapsulate it somewhere in the repo)
+                // TODO (rodionov) can we safely request replication on the timeline before sync is completed? (can be implemented on top of the #997)
+                Ok(())
+            }
+        }
+    })
+    .await
+    .map_err(ApiError::from_err)??;
+
+    Ok(json_response(StatusCode::ACCEPTED, ())?)
+}
+
+async fn timeline_detach_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
+    let tenant_id: ZTenantId = parse_request_param(&request, "tenant_id")?;
+    check_permission(&request, Some(tenant_id))?;
+
+    let timeline_id: ZTimelineId = parse_request_param(&request, "timeline_id")?;
+
+    tokio::task::spawn_blocking(move || {
+        let _enter =
+            info_span!("timeline_detach_handler", tenant = %tenant_id, timeline = %timeline_id)
+                .entered();
+        let repo = tenant_mgr::get_repository_for_tenant(tenant_id)?;
+        repo.detach_timeline(timeline_id)
+    })
+    .await
+    .map_err(ApiError::from_err)??;
+
+    Ok(json_response(StatusCode::OK, ())?)
+}
+
 async fn tenant_list_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
    // check for management permission
    check_permission(&request, None)?;
@@ -267,13 +318,13 @@ async fn tenant_create_handler(mut request: Request<Body>) -> Result<Response<Bo

    let request_data: TenantCreateRequest = json_request(&mut request).await?;

-    let response_data = tokio::task::spawn_blocking(move || {
+    tokio::task::spawn_blocking(move || {
        let _enter = info_span!("tenant_create", tenant = %request_data.tenant_id).entered();
        tenant_mgr::create_repository_for_tenant(get_config(&request), request_data.tenant_id)
    })
    .await
    .map_err(ApiError::from_err)??;
-    Ok(json_response(StatusCode::CREATED, response_data)?)
+    Ok(json_response(StatusCode::CREATED, ())?)
 }

 async fn handler_404(_: Request<Body>) -> Result<Response<Body>, ApiError> {
@@ -308,6 +359,14 @@ pub fn make_router(
            "/v1/timeline/:tenant_id/:timeline_id",
            timeline_detail_handler,
        )
+        .post(
+            "/v1/timeline/:tenant_id/:timeline_id/attach",
+            timeline_attach_handler,
+        )
+        .post(
+            "/v1/timeline/:tenant_id/:timeline_id/detach",
+            timeline_detach_handler,
+        )
        .get("/v1/branch/:tenant_id", branch_list_handler)
        .get("/v1/branch/:tenant_id/:branch_name", branch_detail_handler)
        .post("/v1/branch", branch_create_handler)
--- a/pageserver/src/import_datadir.rs
+++ b/pageserver/src/import_datadir.rs
@@ -7,7 +7,7 @@ use std::fs::File;
 use std::io::{Read, Seek, SeekFrom};
 use std::path::{Path, PathBuf};

-use anyhow::{anyhow, bail, ensure, Result};
+use anyhow::{bail, ensure, Context, Result};
 use bytes::Bytes;
 use tracing::*;

@@ -126,7 +126,7 @@ pub fn import_timeline_from_postgres_datadir(
    writer.advance_last_record_lsn(lsn);

    // We expect the Postgres server to be shut down cleanly.
-    let pg_control = pg_control.ok_or_else(|| anyhow!("pg_control file not found"))?;
+    let pg_control = pg_control.context("pg_control file not found")?;
    ensure!(
        pg_control.state == DBState_DB_SHUTDOWNED,
        "Postgres cluster was not shut down cleanly"
--- a/pageserver/src/layered_repository.rs
+++ b/pageserver/src/layered_repository.rs
@@ -11,7 +11,7 @@
 //! parent timeline, and the last LSN that has been written to disk.
 //!

-use anyhow::{anyhow, bail, ensure, Context, Result};
+use anyhow::{bail, ensure, Context, Result};
 use bookfile::Book;
 use bytes::Bytes;
 use lazy_static::lazy_static;
@@ -28,7 +28,7 @@ use std::io::Write;
 use std::ops::{Bound::Included, Deref};
 use std::path::{Path, PathBuf};
 use std::sync::atomic::{self, AtomicBool, AtomicUsize};
-use std::sync::{Arc, Mutex, MutexGuard};
+use std::sync::{Arc, Mutex, MutexGuard, RwLock, RwLockReadGuard};
 use std::time::{Duration, Instant};

 use self::metadata::{metadata_path, TimelineMetadata, METADATA_FILE_NAME};
@@ -71,7 +71,6 @@ mod storage_layer;
 use delta_layer::DeltaLayer;
 use ephemeral_file::is_ephemeral_file;
 use filename::{DeltaFileName, ImageFileName};
-use global_layer_map::{LayerId, GLOBAL_LAYER_MAP};
 use image_layer::ImageLayer;
 use inmemory_layer::InMemoryLayer;
 use layer_map::LayerMap;
@@ -167,7 +166,7 @@ impl Repository for LayeredRepository {
        // Create the timeline directory, and write initial metadata to file.
        crashsafe_dir::create_dir_all(self.conf.timeline_path(&timelineid, &self.tenantid))?;

-        let metadata = TimelineMetadata::new(Lsn(0), None, None, Lsn(0), Lsn(0), initdb_lsn);
+        let metadata = TimelineMetadata::new(Lsn(0), None, None, Lsn(0), initdb_lsn, initdb_lsn);
        Self::save_metadata(self.conf, timelineid, self.tenantid, &metadata, true)?;

        let timeline = LayeredTimeline::new(
@@ -201,9 +200,10 @@ impl Repository for LayeredRepository {
                bail!("Cannot branch off the timeline {} that's not local", src)
            }
        };
+        let latest_gc_cutoff_lsn = src_timeline.get_latest_gc_cutoff_lsn();

        src_timeline
-            .check_lsn_is_in_scope(start_lsn)
+            .check_lsn_is_in_scope(start_lsn, &latest_gc_cutoff_lsn)
            .context("invalid branch start lsn")?;

        let RecordLsn {
@@ -231,7 +231,7 @@ impl Repository for LayeredRepository {
            dst_prev,
            Some(src),
            start_lsn,
-            src_timeline.latest_gc_cutoff_lsn.load(),
+            *src_timeline.latest_gc_cutoff_lsn.read().unwrap(),
            src_timeline.initdb_lsn,
        );
        crashsafe_dir::create_dir_all(self.conf.timeline_path(&dst, &self.tenantid))?;
@@ -285,7 +285,46 @@ impl Repository for LayeredRepository {
        Ok(())
    }

-    // TODO this method currently does not do anything to prevent (or react to) state updates between a sync task schedule and a sync task end (that causes this update).
+    // Detaches the timeline from the repository.
+    fn detach_timeline(&self, timeline_id: ZTimelineId) -> Result<()> {
+        let mut timelines = self.timelines.lock().unwrap();
+        match timelines.entry(timeline_id) {
+            Entry::Vacant(_) => {
+                bail!("cannot detach non existing timeline");
+            }
+            Entry::Occupied(mut entry) => {
+                let timeline_entry = entry.get_mut();
+
+                let timeline = match timeline_entry {
+                    LayeredTimelineEntry::Remote { .. } => {
+                        bail!("cannot detach remote timeline {}", timeline_id);
+                    }
+                    LayeredTimelineEntry::Local(timeline) => timeline,
+                };
+
+                // TODO (rodionov) keep local state in timeline itself (refactoring related to https://github.com/zenithdb/zenith/issues/997 and #1104)
+
+                // FIXME this is local disk consistent lsn, need to keep the latest succesfully uploaded checkpoint lsn in timeline (metadata?)
+                //  https://github.com/zenithdb/zenith/issues/1104
+                let remote_disk_consistent_lsn = timeline.disk_consistent_lsn.load();
+                // reference to timeline is dropped here
+                entry.insert(LayeredTimelineEntry::Remote {
+                    id: timeline_id,
+                    disk_consistent_lsn: remote_disk_consistent_lsn,
+                });
+            }
+        };
+        // Release the lock to shutdown and remove the files without holding it
+        drop(timelines);
+        // shutdown the timeline (this shuts down the walreceiver)
+        thread_mgr::shutdown_threads(None, Some(self.tenantid), Some(timeline_id));
+
+        // remove timeline files (maybe avoid this for ease of debugging if something goes wrong)
+        fs::remove_dir_all(self.conf.timeline_path(&timeline_id, &self.tenantid))?;
+        Ok(())
+    }
+
+    // TODO this method currentlly does not do anything to prevent (or react to) state updates between a sync task schedule and a sync task end (that causes this update).
    // Sync task is enqueued and can error and be rescheduled, so some significant time may pass between the events.
    //
    /// Reacts on the timeline sync state change, changing pageserver's memory state for this timeline (unload or load of the timeline files).
@@ -294,6 +333,10 @@ impl Repository for LayeredRepository {
        timeline_id: ZTimelineId,
        new_state: TimelineSyncState,
    ) -> Result<()> {
+        debug!(
+            "set_timeline_state: timeline_id: {}, new_state: {:?}",
+            timeline_id, new_state
+        );
        let mut timelines_accessor = self.timelines.lock().unwrap();

        match new_state {
@@ -314,6 +357,7 @@ impl Repository for LayeredRepository {
                },
            ),
        };
+        // NOTE we do not delete local data in case timeline became cloud only, this is performed in detach_timeline
        drop(timelines_accessor);

        Ok(())
@@ -567,7 +611,7 @@ impl LayeredRepository {
            }
        }

-        //Now collect info about branchpoints
+        // Now collect info about branchpoints
        let mut all_branchpoints: BTreeSet<(ZTimelineId, Lsn)> = BTreeSet::new();
        for &timelineid in &timelineids {
            let timeline = match self.get_or_init_timeline(timelineid, &mut timelines)? {
@@ -651,7 +695,6 @@ impl LayeredRepository {
                    timeline.checkpoint(CheckpointConfig::Forced)?;
                    info!("timeline {} checkpoint_before_gc done", timelineid);
                }
-
                let result = timeline.gc_timeline(branchpoints, cutoff)?;

                totals += result;
@@ -740,7 +783,7 @@ pub struct LayeredTimeline {
    checkpoint_cs: Mutex<()>,

    // Needed to ensure that we can't create a branch at a point that was already garbage collected
-    latest_gc_cutoff_lsn: AtomicLsn,
+    latest_gc_cutoff_lsn: RwLock<Lsn>,

    // It may change across major versions so for simplicity
    // keep it after running initdb for a timeline.
@@ -784,6 +827,10 @@ impl Timeline for LayeredTimeline {
        Ok(())
    }

+    fn get_latest_gc_cutoff_lsn(&self) -> RwLockReadGuard<Lsn> {
+        self.latest_gc_cutoff_lsn.read().unwrap()
+    }
+
    /// Look up given page version.
    fn get_page_at_lsn(&self, rel: RelishTag, rel_blknum: BlockNumber, lsn: Lsn) -> Result<Bytes> {
        if !rel.is_blocky() && rel_blknum != 0 {
@@ -794,14 +841,6 @@ impl Timeline for LayeredTimeline {
            );
        }
        debug_assert!(lsn <= self.get_last_record_lsn());
-        let latest_gc_cutoff_lsn = self.latest_gc_cutoff_lsn.load();
-        // error instead of assert to simplify testing
-        ensure!(
-            lsn >= latest_gc_cutoff_lsn,
-            "tried to request a page version that was garbage collected. requested at {} gc cutoff {}",
-            lsn, latest_gc_cutoff_lsn
-        );
-
        let (seg, seg_blknum) = SegmentTag::from_blknum(rel, rel_blknum);

        if let Some((layer, lsn)) = self.get_layer_for_read(seg, lsn)? {
@@ -972,21 +1011,16 @@ impl Timeline for LayeredTimeline {
    ///
    /// Validate lsn against initdb_lsn and latest_gc_cutoff_lsn.
    ///
-    fn check_lsn_is_in_scope(&self, lsn: Lsn) -> Result<()> {
-        let initdb_lsn = self.initdb_lsn;
+    fn check_lsn_is_in_scope(
+        &self,
+        lsn: Lsn,
+        latest_gc_cutoff_lsn: &RwLockReadGuard<Lsn>,
+    ) -> Result<()> {
        ensure!(
-            lsn >= initdb_lsn,
-            "LSN {} is earlier than initdb lsn {}",
-            lsn,
-            initdb_lsn,
-        );
-
-        let latest_gc_cutoff_lsn = self.latest_gc_cutoff_lsn.load();
-        ensure!(
-            lsn >= latest_gc_cutoff_lsn,
+            lsn >= **latest_gc_cutoff_lsn,
            "LSN {} is earlier than latest GC horizon {} (we might've already garbage collected needed data)",
            lsn,
-            latest_gc_cutoff_lsn,
+            **latest_gc_cutoff_lsn,
        );
        Ok(())
    }
@@ -1003,14 +1037,6 @@ impl Timeline for LayeredTimeline {
        self.last_record_lsn.load()
    }

-    fn get_start_lsn(&self) -> Lsn {
-        self.ancestor_timeline
-            .as_ref()
-            .and_then(|ancestor_entry| ancestor_entry.local_or_schedule_download(self.tenantid))
-            .map(Timeline::get_start_lsn)
-            .unwrap_or(self.ancestor_lsn)
-    }
-
    fn get_current_logical_size(&self) -> usize {
        self.current_logical_size.load(atomic::Ordering::Acquire) as usize
    }
@@ -1100,7 +1126,7 @@ impl LayeredTimeline {
            write_lock: Mutex::new(()),
            checkpoint_cs: Mutex::new(()),

-            latest_gc_cutoff_lsn: AtomicLsn::from(metadata.latest_gc_cutoff_lsn()),
+            latest_gc_cutoff_lsn: RwLock::new(metadata.latest_gc_cutoff_lsn()),
            initdb_lsn: metadata.initdb_lsn(),
        }
    }
@@ -1126,8 +1152,8 @@ impl LayeredTimeline {
                // create an ImageLayer struct for each image file.
                if imgfilename.lsn > disk_consistent_lsn {
                    warn!(
-                        "found future image layer {} on timeline {}",
-                        imgfilename, self.timelineid
+                        "found future image layer {} on timeline {} disk_consistent_lsn is {}",
+                        imgfilename, self.timelineid, disk_consistent_lsn
                    );

                    rename_to_backup(direntry.path())?;
@@ -1150,8 +1176,8 @@ impl LayeredTimeline {
                // before crash.
                if deltafilename.end_lsn > disk_consistent_lsn + 1 {
                    warn!(
-                        "found future delta layer {} on timeline {}",
-                        deltafilename, self.timelineid
+                        "found future delta layer {} on timeline {} disk_consistent_lsn is {}",
+                        deltafilename, self.timelineid, disk_consistent_lsn
                    );

                    rename_to_backup(direntry.path())?;
@@ -1347,7 +1373,7 @@ impl LayeredTimeline {
                    self.tenantid,
                    seg,
                    lsn,
-                    lsn,
+                    last_record_lsn,
                )?;
            } else {
                return Ok(open_layer);
@@ -1390,7 +1416,7 @@ impl LayeredTimeline {
                self.timelineid,
                self.tenantid,
                start_lsn,
-                lsn,
+                last_record_lsn,
            )?;
        } else {
            // New relation.
@@ -1401,8 +1427,14 @@ impl LayeredTimeline {
                lsn
            );

-            layer =
-                InMemoryLayer::create(self.conf, self.timelineid, self.tenantid, seg, lsn, lsn)?;
+            layer = InMemoryLayer::create(
+                self.conf,
+                self.timelineid,
+                self.tenantid,
+                seg,
+                lsn,
+                last_record_lsn,
+            )?;
        }

        let layer_rc: Arc<InMemoryLayer> = Arc::new(layer);
@@ -1419,7 +1451,7 @@ impl LayeredTimeline {
        // Prevent concurrent checkpoints
        let _checkpoint_cs = self.checkpoint_cs.lock().unwrap();

-        let mut write_guard = self.write_lock.lock().unwrap();
+        let write_guard = self.write_lock.lock().unwrap();
        let mut layers = self.layers.lock().unwrap();

        // Bump the generation number in the layer map, so that we can distinguish
@@ -1445,11 +1477,17 @@ impl LayeredTimeline {
        let mut disk_consistent_lsn = last_record_lsn;

        let mut layer_paths = Vec::new();
+        let mut freeze_end_lsn = Lsn(0);
+        let mut evicted_layers = Vec::new();
+
+        //
+        // Determine which layers we need to evict and calculate max(latest_lsn)
+        // among those layers.
+        //
        while let Some((oldest_layer_id, oldest_layer, oldest_generation)) =
            layers.peek_oldest_open()
        {
-            let oldest_pending_lsn = oldest_layer.get_oldest_pending_lsn();
-
+            let oldest_lsn = oldest_layer.get_oldest_lsn();
            // Does this layer need freezing?
            //
            // Write out all in-memory layers that contain WAL older than CHECKPOINT_DISTANCE.
@@ -1458,28 +1496,60 @@ impl LayeredTimeline {
            // when we started. We don't want to process layers inserted after we started, to
            // avoid getting into an infinite loop trying to process again entries that we
            // inserted ourselves.
-            let distance = last_record_lsn.widening_sub(oldest_pending_lsn);
-            if distance < 0
+            //
+            // Once we have decided to write out at least one layer, we must also write out
+            // any other layers that contain WAL older than the end LSN of the layers we have
+            // already decided to write out. In other words, we must write out all layers
+            // whose [oldest_lsn, latest_lsn) range overlaps with any of the other layers
+            // that we are writing out. Otherwise, when we advance 'disk_consistent_lsn', it's
+            // ambiguous whether those layers are already durable on disk or not. For example,
+            // imagine that there are two layers in memory that contain page versions in the
+            // following LSN ranges:
+            //
+            // A: 100-150
+            // B: 110-200
+            //
+            // If we flush layer A, we must also flush layer B, because they overlap. If we
+            // flushed only A, and advanced 'disk_consistent_lsn' to 150, we would break the
+            // rule that all WAL older than 'disk_consistent_lsn' are durable on disk, because
+            // B contains some WAL older than 150. On the other hand, if we flushed out A and
+            // advanced 'disk_consistent_lsn' only up to 110, after crash and restart we would
+            // delete the first layer because its end LSN is larger than 110. If we changed
+            // the deletion logic to not delete it, then we would start streaming at 110, and
+            // process again the WAL records in the range 110-150 that are already in layer A,
+            // and the WAL processing code does not cope with that. We solve that dilemma by
+            // insisting that if we write out the first layer, we also write out the second
+            // layer, and advance disk_consistent_lsn all the way up to 200.
+            //
+            let distance = last_record_lsn.widening_sub(oldest_lsn);
+            if (distance < 0
                || distance < checkpoint_distance.into()
-                || oldest_generation == current_generation
+                || oldest_generation == current_generation)
+                && oldest_lsn >= freeze_end_lsn
+            // this layer intersects with evicted layer and so also need to be evicted
            {
                info!(
                    "the oldest layer is now {} which is {} bytes behind last_record_lsn",
                    oldest_layer.filename().display(),
                    distance
                );
-                disk_consistent_lsn = oldest_pending_lsn;
+                disk_consistent_lsn = oldest_lsn;
                break;
            }
+            let latest_lsn = oldest_layer.get_latest_lsn();
+            if latest_lsn > freeze_end_lsn {
+                freeze_end_lsn = latest_lsn; // calculate max of latest_lsn of the layers we're about to evict
+            }
+            layers.remove_open(oldest_layer_id);
+            evicted_layers.push((oldest_layer_id, oldest_layer));
+        }

-            drop(layers);
-            drop(write_guard);
-
-            let mut this_layer_paths = self.evict_layer(oldest_layer_id, reconstruct_pages)?;
-            layer_paths.append(&mut this_layer_paths);
-
-            write_guard = self.write_lock.lock().unwrap();
-            layers = self.layers.lock().unwrap();
+        // Freeze evicted layers
+        for (_evicted_layer_id, evicted_layer) in evicted_layers.iter() {
+            // Mark the layer as no longer accepting writes and record the end_lsn.
+            // This happens in-place, no new layers are created now.
+            evicted_layer.freeze(freeze_end_lsn);
+            layers.insert_historic(evicted_layer.clone());
        }

        // Call unload() on all frozen layers, to release memory.
@@ -1492,6 +1562,14 @@ impl LayeredTimeline {
        drop(layers);
        drop(write_guard);

+        // Create delta/image layers for evicted layers
+        for (_evicted_layer_id, evicted_layer) in evicted_layers.iter() {
+            let mut this_layer_paths =
+                self.evict_layer(evicted_layer.clone(), reconstruct_pages)?;
+            layer_paths.append(&mut this_layer_paths);
+        }
+
+        // Sync layers
        if !layer_paths.is_empty() {
            // We must fsync the timeline dir to ensure the directory entries for
            // new layer files are durable
@@ -1532,7 +1610,7 @@ impl LayeredTimeline {
                ondisk_prev_record_lsn,
                ancestor_timelineid,
                self.ancestor_lsn,
-                self.latest_gc_cutoff_lsn.load(),
+                *self.latest_gc_cutoff_lsn.read().unwrap(),
                self.initdb_lsn,
            );

@@ -1559,52 +1637,29 @@ impl LayeredTimeline {
        Ok(())
    }

-    fn evict_layer(&self, layer_id: LayerId, reconstruct_pages: bool) -> Result<Vec<PathBuf>> {
-        // Mark the layer as no longer accepting writes and record the end_lsn.
-        // This happens in-place, no new layers are created now.
-        // We call `get_last_record_lsn` again, which may be different from the
-        // original load, as we may have released the write lock since then.
-
-        let mut write_guard = self.write_lock.lock().unwrap();
-        let mut layers = self.layers.lock().unwrap();
+    fn evict_layer(
+        &self,
+        layer: Arc<InMemoryLayer>,
+        reconstruct_pages: bool,
+    ) -> Result<Vec<PathBuf>> {
+        let new_historics = layer.write_to_disk(self, reconstruct_pages)?;

        let mut layer_paths = Vec::new();
+        let _write_guard = self.write_lock.lock().unwrap();
+        let mut layers = self.layers.lock().unwrap();

-        let global_layer_map = GLOBAL_LAYER_MAP.read().unwrap();
-        if let Some(oldest_layer) = global_layer_map.get(&layer_id) {
-            drop(global_layer_map);
-            oldest_layer.freeze(self.get_last_record_lsn());
+        // Finally, replace the frozen in-memory layer with the new on-disk layers
+        layers.remove_historic(layer);

-            // The layer is no longer open, update the layer map to reflect this.
-            // We will replace it with on-disk historics below.
-            layers.remove_open(layer_id);
-            layers.insert_historic(oldest_layer.clone());
-
-            // Write the now-frozen layer to disk. That could take a while, so release the lock while do it
-            drop(layers);
-            drop(write_guard);
-
-            let new_historics = oldest_layer.write_to_disk(self, reconstruct_pages)?;
-
-            write_guard = self.write_lock.lock().unwrap();
-            layers = self.layers.lock().unwrap();
-
-            // Finally, replace the frozen in-memory layer with the new on-disk layers
-            layers.remove_historic(oldest_layer);
-
-            // Add the historics to the LayerMap
-            for delta_layer in new_historics.delta_layers {
-                layer_paths.push(delta_layer.path());
-                layers.insert_historic(Arc::new(delta_layer));
-            }
-            for image_layer in new_historics.image_layers {
-                layer_paths.push(image_layer.path());
-                layers.insert_historic(Arc::new(image_layer));
-            }
+        // Add the historics to the LayerMap
+        for delta_layer in new_historics.delta_layers {
+            layer_paths.push(delta_layer.path());
+            layers.insert_historic(Arc::new(delta_layer));
+        }
+        for image_layer in new_historics.image_layers {
+            layer_paths.push(image_layer.path());
+            layers.insert_historic(Arc::new(image_layer));
        }
-        drop(layers);
-        drop(write_guard);
-
        Ok(layer_paths)
    }

@@ -1633,12 +1688,14 @@ impl LayeredTimeline {
    pub fn gc_timeline(&self, retain_lsns: Vec<Lsn>, cutoff: Lsn) -> Result<GcResult> {
        let now = Instant::now();
        let mut result: GcResult = Default::default();
+        let disk_consistent_lsn = self.get_disk_consistent_lsn();
+        let _checkpoint_cs = self.checkpoint_cs.lock().unwrap();

        let _enter = info_span!("garbage collection", timeline = %self.timelineid, tenant = %self.tenantid, cutoff = %cutoff).entered();

        // We need to ensure that no one branches at a point before latest_gc_cutoff_lsn.
        // See branch_timeline() for details.
-        self.latest_gc_cutoff_lsn.store(cutoff);
+        *self.latest_gc_cutoff_lsn.write().unwrap() = cutoff;

        info!("GC starting");

@@ -1718,7 +1775,12 @@ impl LayeredTimeline {
            }

            // 3. Is there a later on-disk layer for this relation?
-            if !l.is_dropped() && !layers.newer_image_layer_exists(l.get_seg_tag(), l.get_end_lsn())
+            if !l.is_dropped()
+                && !layers.newer_image_layer_exists(
+                    l.get_seg_tag(),
+                    l.get_end_lsn(),
+                    disk_consistent_lsn,
+                )
            {
                info!(
                    "keeping {} {}-{} because it is the latest layer",
@@ -2171,11 +2233,10 @@ impl<'a> TimelineWriter for LayeredTimelineWriter<'a> {
        let oldsize = self
            .tl
            .get_relish_size(rel, self.tl.get_last_record_lsn())?
-            .ok_or_else(|| {
-                anyhow!(
+            .with_context(|| {
+                format!(
                    "attempted to truncate non-existent relish {} at {}",
-                    rel,
-                    lsn
+                    rel, lsn
                )
            })?;

@@ -2298,8 +2359,5 @@ fn rename_to_backup(path: PathBuf) -> anyhow::Result<()> {
        }
    }

-    Err(anyhow!(
-        "couldn't find an unused backup number for {:?}",
-        path
-    ))
+    bail!("couldn't find an unused backup number for {:?}", path)
 }
--- a/pageserver/src/layered_repository/delta_layer.rs
+++ b/pageserver/src/layered_repository/delta_layer.rs
@@ -169,7 +169,7 @@ impl DeltaLayerInner {
        if let Some((_entry_lsn, entry)) = slice.last() {
            Ok(*entry)
        } else {
-            Err(anyhow::anyhow!("could not find seg size in delta layer"))
+            bail!("could not find seg size in delta layer")
        }
    }
 }
--- a/pageserver/src/layered_repository/ephemeral_file.rs
+++ b/pageserver/src/layered_repository/ephemeral_file.rs
@@ -175,7 +175,10 @@ impl Write for EphemeralFile {
    }

    fn flush(&mut self) -> Result<(), std::io::Error> {
-        todo!()
+        // we don't need to flush data:
+        // * we either write input bytes or not, not keeping any intermediate data buffered
+        // * rust unix file `flush` impl does not flush things either, returning `Ok(())`
+        Ok(())
    }
 }

--- a/pageserver/src/layered_repository/image_layer.rs
+++ b/pageserver/src/layered_repository/image_layer.rs
@@ -173,7 +173,14 @@ impl Layer for ImageLayer {
                    .as_ref()
                    .unwrap()
                    .chapter_reader(BLOCKY_IMAGES_CHAPTER)?;
-                chapter.read_exact_at(&mut buf, offset)?;
+
+                chapter.read_exact_at(&mut buf, offset).with_context(|| {
+                    format!(
+                        "failed to read page from data file {} at offset {}",
+                        self.filename().display(),
+                        offset
+                    )
+                })?;

                buf
            }
--- a/pageserver/src/layered_repository/inmemory_layer.rs
+++ b/pageserver/src/layered_repository/inmemory_layer.rs
@@ -39,8 +39,20 @@ pub struct InMemoryLayer {
    ///
    start_lsn: Lsn,

-    /// LSN of the oldest page version stored in this layer
-    oldest_pending_lsn: Lsn,
+    ///
+    /// LSN of the oldest page version stored in this layer.
+    ///
+    /// This is different from 'start_lsn' in that we enforce that the 'start_lsn'
+    /// of a layer always matches the 'end_lsn' of its predecessor, even if there
+    /// are no page versions until at a later LSN. That way you can detect any
+    /// missing layer files more easily. 'oldest_lsn' is the first page version
+    /// actually stored in this layer. In the range between 'start_lsn' and
+    /// 'oldest_lsn', there are no changes to the segment.
+    /// 'oldest_lsn' is used to adjust 'disk_consistent_lsn' and that is why it should
+    /// point to the beginning of WAL record. This is the other difference with 'start_lsn'
+    /// which points to end of WAL record. This is why 'oldest_lsn' can be smaller than 'start_lsn'.
+    ///
+    oldest_lsn: Lsn,

    /// The above fields never change. The parts that do change are in 'inner',
    /// and protected by mutex.
@@ -73,6 +85,14 @@ pub struct InMemoryLayerInner {
    /// a non-blocky rel, 'seg_sizes' is not used and is always empty.
    ///
    seg_sizes: VecMap<Lsn, SegmentBlk>,
+
+    ///
+    /// LSN of the newest page version stored in this layer.
+    ///
+    /// The difference between 'end_lsn' and 'latest_lsn' is the same as between
+    /// 'start_lsn' and 'oldest_lsn'. See comments in 'oldest_lsn'.
+    ///
+    latest_lsn: Lsn,
 }

 impl InMemoryLayerInner {
@@ -319,8 +339,13 @@ pub struct LayersOnDisk {

 impl InMemoryLayer {
    /// Return the oldest page version that's stored in this layer
-    pub fn get_oldest_pending_lsn(&self) -> Lsn {
-        self.oldest_pending_lsn
+    pub fn get_oldest_lsn(&self) -> Lsn {
+        self.oldest_lsn
+    }
+
+    pub fn get_latest_lsn(&self) -> Lsn {
+        let inner = self.inner.read().unwrap();
+        inner.latest_lsn
    }

    ///
@@ -332,7 +357,7 @@ impl InMemoryLayer {
        tenantid: ZTenantId,
        seg: SegmentTag,
        start_lsn: Lsn,
-        oldest_pending_lsn: Lsn,
+        oldest_lsn: Lsn,
    ) -> Result<InMemoryLayer> {
        trace!(
            "initializing new empty InMemoryLayer for writing {} on timeline {} at {}",
@@ -355,13 +380,14 @@ impl InMemoryLayer {
            tenantid,
            seg,
            start_lsn,
-            oldest_pending_lsn,
+            oldest_lsn,
            incremental: false,
            inner: RwLock::new(InMemoryLayerInner {
                end_lsn: None,
                dropped: false,
                page_versions: PageVersions::new(file),
                seg_sizes,
+                latest_lsn: oldest_lsn,
            }),
        })
    }
@@ -398,6 +424,8 @@ impl InMemoryLayer {
        let mut inner = self.inner.write().unwrap();

        inner.assert_writeable();
+        assert!(lsn >= inner.latest_lsn);
+        inner.latest_lsn = lsn;

        let old = inner.page_versions.append_or_update_last(blknum, lsn, pv)?;

@@ -509,12 +537,11 @@ impl InMemoryLayer {
        timelineid: ZTimelineId,
        tenantid: ZTenantId,
        start_lsn: Lsn,
-        oldest_pending_lsn: Lsn,
+        oldest_lsn: Lsn,
    ) -> Result<InMemoryLayer> {
        let seg = src.get_seg_tag();

-        assert!(oldest_pending_lsn.is_aligned());
-        assert!(oldest_pending_lsn >= start_lsn);
+        assert!(oldest_lsn.is_aligned());

        trace!(
            "initializing new InMemoryLayer for writing {} on timeline {} at {}",
@@ -538,13 +565,14 @@ impl InMemoryLayer {
            tenantid,
            seg,
            start_lsn,
-            oldest_pending_lsn,
+            oldest_lsn,
            incremental: true,
            inner: RwLock::new(InMemoryLayerInner {
                end_lsn: None,
                dropped: false,
                page_versions: PageVersions::new(file),
                seg_sizes,
+                latest_lsn: oldest_lsn,
            }),
        })
    }
--- a/pageserver/src/layered_repository/layer_map.rs
+++ b/pageserver/src/layered_repository/layer_map.rs
@@ -40,7 +40,7 @@ pub struct LayerMap {
    /// All the layers keyed by segment tag
    segs: HashMap<SegmentTag, SegEntry>,

-    /// All in-memory layers, ordered by 'oldest_pending_lsn' and generation
+    /// All in-memory layers, ordered by 'oldest_lsn' and generation
    /// of each layer. This allows easy access to the in-memory layer that
    /// contains the oldest WAL record.
    open_layers: BinaryHeap<OpenLayerEntry>,
@@ -83,16 +83,16 @@ impl LayerMap {

        let layer_id = segentry.update_open(Arc::clone(&layer));

-        let oldest_pending_lsn = layer.get_oldest_pending_lsn();
+        let oldest_lsn = layer.get_oldest_lsn();

-        // After a crash and restart, 'oldest_pending_lsn' of the oldest in-memory
+        // After a crash and restart, 'oldest_lsn' of the oldest in-memory
        // layer becomes the WAL streaming starting point, so it better not point
        // in the middle of a WAL record.
-        assert!(oldest_pending_lsn.is_aligned());
+        assert!(oldest_lsn.is_aligned());

        // Also add it to the binary heap
        let open_layer_entry = OpenLayerEntry {
-            oldest_pending_lsn: layer.get_oldest_pending_lsn(),
+            oldest_lsn: layer.get_oldest_lsn(),
            layer_id,
            generation: self.current_generation,
        };
@@ -191,9 +191,15 @@ impl LayerMap {
    ///
    /// This is used for garbage collection, to determine if an old layer can
    /// be deleted.
-    pub fn newer_image_layer_exists(&self, seg: SegmentTag, lsn: Lsn) -> bool {
+    /// We ignore segments newer than disk_consistent_lsn because they will be removed at restart
+    pub fn newer_image_layer_exists(
+        &self,
+        seg: SegmentTag,
+        lsn: Lsn,
+        disk_consistent_lsn: Lsn,
+    ) -> bool {
        if let Some(segentry) = self.segs.get(&seg) {
-            segentry.newer_image_layer_exists(lsn)
+            segentry.newer_image_layer_exists(lsn, disk_consistent_lsn)
        } else {
            false
        }
@@ -311,13 +317,18 @@ impl SegEntry {
        self.historic.search(lsn)
    }

-    pub fn newer_image_layer_exists(&self, lsn: Lsn) -> bool {
+    pub fn newer_image_layer_exists(&self, lsn: Lsn, disk_consistent_lsn: Lsn) -> bool {
        // We only check on-disk layers, because
        // in-memory layers are not durable

+        // The end-LSN is exclusive, while disk_consistent_lsn is
+        // inclusive. For example, if disk_consistent_lsn is 100, it is
+        // OK for a delta layer to have end LSN 101, but if the end LSN
+        // is 102, then it might not have been fully flushed to disk
+        // before crash.
        self.historic
            .iter_newer(lsn)
-            .any(|layer| !layer.is_incremental())
+            .any(|layer| !layer.is_incremental() && layer.get_end_lsn() <= disk_consistent_lsn + 1)
    }

    // Set new open layer for a SegEntry.
@@ -341,23 +352,23 @@ impl SegEntry {
 }

 /// Entry held in LayerMap::open_layers, with boilerplate comparison routines
-/// to implement a min-heap ordered by 'oldest_pending_lsn' and 'generation'
+/// to implement a min-heap ordered by 'oldest_lsn' and 'generation'
 ///
 /// The generation number associated with each entry can be used to distinguish
 /// recently-added entries (i.e after last call to increment_generation()) from older
-/// entries with the same 'oldest_pending_lsn'.
+/// entries with the same 'oldest_lsn'.
 struct OpenLayerEntry {
-    oldest_pending_lsn: Lsn, // copy of layer.get_oldest_pending_lsn()
+    oldest_lsn: Lsn, // copy of layer.get_oldest_lsn()
    generation: u64,
    layer_id: LayerId,
 }
 impl Ord for OpenLayerEntry {
    fn cmp(&self, other: &Self) -> Ordering {
        // BinaryHeap is a max-heap, and we want a min-heap. Reverse the ordering here
-        // to get that. Entries with identical oldest_pending_lsn are ordered by generation
+        // to get that. Entries with identical oldest_lsn are ordered by generation
        other
-            .oldest_pending_lsn
-            .cmp(&self.oldest_pending_lsn)
+            .oldest_lsn
+            .cmp(&self.oldest_lsn)
            .then_with(|| other.generation.cmp(&self.generation))
    }
 }
@@ -426,7 +437,7 @@ mod tests {
        conf: &'static PageServerConf,
        segno: u32,
        start_lsn: Lsn,
-        oldest_pending_lsn: Lsn,
+        oldest_lsn: Lsn,
    ) -> Arc<InMemoryLayer> {
        Arc::new(
            InMemoryLayer::create(
@@ -438,7 +449,7 @@ mod tests {
                    segno,
                },
                start_lsn,
-                oldest_pending_lsn,
+                oldest_lsn,
            )
            .unwrap(),
        )
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -10,7 +10,7 @@
 //     *callmemaybe <zenith timelineid> $url* -- ask pageserver to start walreceiver on $url
 //

-use anyhow::{anyhow, bail, ensure, Context, Result};
+use anyhow::{bail, ensure, Context, Result};
 use bytes::{Buf, BufMut, Bytes, BytesMut};
 use lazy_static::lazy_static;
 use regex::Regex;
@@ -18,7 +18,7 @@ use std::io;
 use std::net::TcpListener;
 use std::str;
 use std::str::FromStr;
-use std::sync::Arc;
+use std::sync::{Arc, RwLockReadGuard};
 use tracing::*;
 use zenith_metrics::{register_histogram_vec, HistogramVec};
 use zenith_utils::auth::{self, JwtAuth};
@@ -398,7 +398,12 @@ impl PageServerHandler {
    /// In either case, if the page server hasn't received the WAL up to the
    /// requested LSN yet, we will wait for it to arrive. The return value is
    /// the LSN that should be used to look up the page versions.
-    fn wait_or_get_last_lsn(timeline: &dyn Timeline, lsn: Lsn, latest: bool) -> Result<Lsn> {
+    fn wait_or_get_last_lsn(
+        timeline: &dyn Timeline,
+        mut lsn: Lsn,
+        latest: bool,
+        latest_gc_cutoff_lsn: &RwLockReadGuard<Lsn>,
+    ) -> Result<Lsn> {
        if latest {
            // Latest page version was requested. If LSN is given, it is a hint
            // to the page server that there have been no modifications to the
@@ -419,22 +424,26 @@ impl PageServerHandler {
            // walsender completes the authentication and starts streaming the
            // WAL.
            if lsn <= last_record_lsn {
-                Ok(last_record_lsn)
+                lsn = last_record_lsn;
            } else {
                timeline.wait_lsn(lsn)?;
                // Since we waited for 'lsn' to arrive, that is now the last
                // record LSN. (Or close enough for our purposes; the
                // last-record LSN can advance immediately after we return
                // anyway)
-                Ok(lsn)
            }
        } else {
            if lsn == Lsn(0) {
                bail!("invalid LSN(0) in request");
            }
            timeline.wait_lsn(lsn)?;
-            Ok(lsn)
        }
+        ensure!(
+            lsn >= **latest_gc_cutoff_lsn,
+            "tried to request a page version that was garbage collected. requested at {} gc cutoff {}",
+            lsn, **latest_gc_cutoff_lsn
+        );
+        Ok(lsn)
    }

    fn handle_get_rel_exists_request(
@@ -445,7 +454,8 @@ impl PageServerHandler {
        let _enter = info_span!("get_rel_exists", rel = %req.rel, req_lsn = %req.lsn).entered();

        let tag = RelishTag::Relation(req.rel);
-        let lsn = Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest)?;
+        let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
+        let lsn = Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest, &latest_gc_cutoff_lsn)?;

        let exists = timeline.get_rel_exists(tag, lsn)?;

@@ -461,7 +471,8 @@ impl PageServerHandler {
    ) -> Result<PagestreamBeMessage> {
        let _enter = info_span!("get_nblocks", rel = %req.rel, req_lsn = %req.lsn).entered();
        let tag = RelishTag::Relation(req.rel);
-        let lsn = Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest)?;
+        let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
+        let lsn = Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest, &latest_gc_cutoff_lsn)?;

        let n_blocks = timeline.get_relish_size(tag, lsn)?;

@@ -482,8 +493,16 @@ impl PageServerHandler {
        let _enter = info_span!("get_page", rel = %req.rel, blkno = &req.blkno, req_lsn = %req.lsn)
            .entered();
        let tag = RelishTag::Relation(req.rel);
-        let lsn = Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest)?;
-
+        let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
+        let lsn = Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest, &latest_gc_cutoff_lsn)?;
+        /*
+        // Add a 1s delay to some requests. The delayed causes the requests to
+        // hit the race condition from github issue #1047 more easily.
+        use rand::Rng;
+        if rand::thread_rng().gen::<u8>() < 5 {
+            std::thread::sleep(std::time::Duration::from_millis(1000));
+        }
+        */
        let page = timeline.get_page_at_lsn(tag, req.blkno, lsn)?;

        Ok(PagestreamBeMessage::GetPage(PagestreamGetPageResponse {
@@ -504,9 +523,10 @@ impl PageServerHandler {
        // check that the timeline exists
        let timeline = tenant_mgr::get_timeline_for_tenant(tenantid, timelineid)
            .context("Cannot handle basebackup request for a remote timeline")?;
+        let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
        if let Some(lsn) = lsn {
            timeline
-                .check_lsn_is_in_scope(lsn)
+                .check_lsn_is_in_scope(lsn, &latest_gc_cutoff_lsn)
                .context("invalid basebackup lsn")?;
        }

@@ -624,7 +644,7 @@ impl postgres_backend::Handler for PageServerHandler {
            let re = Regex::new(r"^callmemaybe ([[:xdigit:]]+) ([[:xdigit:]]+) (.*)$").unwrap();
            let caps = re
                .captures(query_string)
-                .ok_or_else(|| anyhow!("invalid callmemaybe: '{}'", query_string))?;
+                .with_context(|| format!("invalid callmemaybe: '{}'", query_string))?;

            let tenantid = ZTenantId::from_str(caps.get(1).unwrap().as_str())?;
            let timelineid = ZTimelineId::from_str(caps.get(2).unwrap().as_str())?;
@@ -643,18 +663,18 @@ impl postgres_backend::Handler for PageServerHandler {

            pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
        } else if query_string.starts_with("branch_create ") {
-            let err = || anyhow!("invalid branch_create: '{}'", query_string);
+            let err = || format!("invalid branch_create: '{}'", query_string);

            // branch_create <tenantid> <branchname> <startpoint>
            // TODO lazy static
            // TODO: escaping, to allow branch names with spaces
            let re = Regex::new(r"^branch_create ([[:xdigit:]]+) (\S+) ([^\r\n\s;]+)[\r\n\s;]*;?$")
                .unwrap();
-            let caps = re.captures(query_string).ok_or_else(err)?;
+            let caps = re.captures(query_string).with_context(err)?;

            let tenantid = ZTenantId::from_str(caps.get(1).unwrap().as_str())?;
-            let branchname = caps.get(2).ok_or_else(err)?.as_str().to_owned();
-            let startpoint_str = caps.get(3).ok_or_else(err)?.as_str().to_owned();
+            let branchname = caps.get(2).with_context(err)?.as_str().to_owned();
+            let startpoint_str = caps.get(3).with_context(err)?.as_str().to_owned();

            self.check_permission(Some(tenantid))?;

@@ -673,7 +693,7 @@ impl postgres_backend::Handler for PageServerHandler {
            let re = Regex::new(r"^branch_list ([[:xdigit:]]+)$").unwrap();
            let caps = re
                .captures(query_string)
-                .ok_or_else(|| anyhow!("invalid branch_list: '{}'", query_string))?;
+                .with_context(|| format!("invalid branch_list: '{}'", query_string))?;

            let tenantid = ZTenantId::from_str(caps.get(1).unwrap().as_str())?;

@@ -693,11 +713,11 @@ impl postgres_backend::Handler for PageServerHandler {
                .write_message_noflush(&BeMessage::DataRow(&[Some(&tenants_buf)]))?
                .write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
        } else if query_string.starts_with("tenant_create") {
-            let err = || anyhow!("invalid tenant_create: '{}'", query_string);
+            let err = || format!("invalid tenant_create: '{}'", query_string);

            // tenant_create <tenantid>
            let re = Regex::new(r"^tenant_create ([[:xdigit:]]+)$").unwrap();
-            let caps = re.captures(query_string).ok_or_else(err)?;
+            let caps = re.captures(query_string).with_context(err)?;

            self.check_permission(None)?;

@@ -728,7 +748,7 @@ impl postgres_backend::Handler for PageServerHandler {

            let caps = re
                .captures(query_string)
-                .ok_or_else(|| anyhow!("invalid do_gc: '{}'", query_string))?;
+                .with_context(|| format!("invalid do_gc: '{}'", query_string))?;

            let tenantid = ZTenantId::from_str(caps.get(1).unwrap().as_str())?;
            let timelineid = ZTimelineId::from_str(caps.get(2).unwrap().as_str())?;
@@ -812,7 +832,7 @@ impl postgres_backend::Handler for PageServerHandler {

            let caps = re
                .captures(query_string)
-                .ok_or_else(|| anyhow!("invalid checkpoint command: '{}'", query_string))?;
+                .with_context(|| format!("invalid checkpoint command: '{}'", query_string))?;

            let tenantid = ZTenantId::from_str(caps.get(1).unwrap().as_str())?;
            let timelineid = ZTimelineId::from_str(caps.get(2).unwrap().as_str())?;
--- a/pageserver/src/remote_storage.rs
+++ b/pageserver/src/remote_storage.rs
@@ -94,7 +94,7 @@ use std::{
 use anyhow::{bail, Context};
 use tokio::io;
 use tracing::{error, info};
-use zenith_utils::zid::{ZTenantId, ZTimelineId};
+use zenith_utils::zid::{ZTenantId, ZTenantTimelineId, ZTimelineId};

 pub use self::storage_sync::{schedule_timeline_checkpoint_upload, schedule_timeline_download};
 use self::{local_fs::LocalFs, rust_s3::S3};
@@ -104,16 +104,7 @@ use crate::{
    repository::TimelineSyncState,
 };

-/// Any timeline has its own id and its own tenant it belongs to,
-/// the sync processes group timelines by both for simplicity.
-#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)]
-pub struct TimelineSyncId(ZTenantId, ZTimelineId);
-
-impl std::fmt::Display for TimelineSyncId {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "(tenant: {}, timeline: {})", self.0, self.1)
-    }
-}
+pub use storage_sync::compression;

 /// A structure to combine all synchronization data to share with pageserver after a successful sync loop initialization.
 /// Successful initialization includes a case when sync loop is not started, in which case the startup data is returned still,
@@ -138,20 +129,27 @@ pub fn start_local_timeline_sync(

    match &config.remote_storage_config {
        Some(storage_config) => match &storage_config.storage {
-            RemoteStorageKind::LocalFs(root) => storage_sync::spawn_storage_sync_thread(
-                config,
-                local_timeline_files,
-                LocalFs::new(root.clone(), &config.workdir)?,
-                storage_config.max_concurrent_sync,
-                storage_config.max_sync_errors,
-            ),
-            RemoteStorageKind::AwsS3(s3_config) => storage_sync::spawn_storage_sync_thread(
-                config,
-                local_timeline_files,
-                S3::new(s3_config, &config.workdir)?,
-                storage_config.max_concurrent_sync,
-                storage_config.max_sync_errors,
-            ),
+            RemoteStorageKind::LocalFs(root) => {
+                info!("Using fs root '{}' as a remote storage", root.display());
+                storage_sync::spawn_storage_sync_thread(
+                    config,
+                    local_timeline_files,
+                    LocalFs::new(root.clone(), &config.workdir)?,
+                    storage_config.max_concurrent_sync,
+                    storage_config.max_sync_errors,
+                )
+            },
+            RemoteStorageKind::AwsS3(s3_config) => {
+                info!("Using s3 bucket '{}' in region '{}' as a remote storage, prefix in bucket: '{:?}', bucket endpoint: '{:?}'",
+                    s3_config.bucket_name, s3_config.bucket_region, s3_config.prefix_in_bucket, s3_config.endpoint);
+                storage_sync::spawn_storage_sync_thread(
+                    config,
+                    local_timeline_files,
+                    S3::new(s3_config, &config.workdir)?,
+                    storage_config.max_concurrent_sync,
+                    storage_config.max_sync_errors,
+                )
+            },
        }
        .context("Failed to spawn the storage sync thread"),
        None => {
@@ -160,7 +158,7 @@ pub fn start_local_timeline_sync(
                ZTenantId,
                HashMap<ZTimelineId, TimelineSyncState>,
            > = HashMap::new();
-            for (TimelineSyncId(tenant_id, timeline_id), (timeline_metadata, _)) in
+            for (ZTenantTimelineId{tenant_id, timeline_id}, (timeline_metadata, _)) in
                local_timeline_files
            {
                initial_timeline_states
@@ -180,7 +178,7 @@ pub fn start_local_timeline_sync(

 fn local_tenant_timeline_files(
    config: &'static PageServerConf,
-) -> anyhow::Result<HashMap<TimelineSyncId, (TimelineMetadata, Vec<PathBuf>)>> {
+) -> anyhow::Result<HashMap<ZTenantTimelineId, (TimelineMetadata, Vec<PathBuf>)>> {
    let mut local_tenant_timeline_files = HashMap::new();
    let tenants_dir = config.tenants_path();
    for tenants_dir_entry in fs::read_dir(&tenants_dir)
@@ -215,8 +213,9 @@ fn local_tenant_timeline_files(
 fn collect_timelines_for_tenant(
    config: &'static PageServerConf,
    tenant_path: &Path,
-) -> anyhow::Result<HashMap<TimelineSyncId, (TimelineMetadata, Vec<PathBuf>)>> {
-    let mut timelines: HashMap<TimelineSyncId, (TimelineMetadata, Vec<PathBuf>)> = HashMap::new();
+) -> anyhow::Result<HashMap<ZTenantTimelineId, (TimelineMetadata, Vec<PathBuf>)>> {
+    let mut timelines: HashMap<ZTenantTimelineId, (TimelineMetadata, Vec<PathBuf>)> =
+        HashMap::new();
    let tenant_id = tenant_path
        .file_name()
        .and_then(ffi::OsStr::to_str)
@@ -237,7 +236,10 @@ fn collect_timelines_for_tenant(
                match collect_timeline_files(&timeline_path) {
                    Ok((timeline_id, metadata, timeline_files)) => {
                        timelines.insert(
-                            TimelineSyncId(tenant_id, timeline_id),
+                            ZTenantTimelineId {
+                                tenant_id,
+                                timeline_id,
+                            },
                            (metadata, timeline_files),
                        );
                    }
--- a/pageserver/src/remote_storage/README.md
+++ b/pageserver/src/remote_storage/README.md
@@ -70,8 +70,3 @@ on the timeline download, missing remote branch files are downlaoded.

 A branch is a per-tenant entity, yet a current implementaion requires synchronizing a timeline first to get the branch files locally.
 Currently, there's no other way to know about the remote branch files, neither the file contents is verified and updated.
-
-* no IT tests
-
-Automated S3 testing is lacking currently, due to no convenient way to enable backups during the tests.
-After it's fixed, benchmark runs should also be carried out to find bottlenecks.
--- a/pageserver/src/remote_storage/local_fs.rs
+++ b/pageserver/src/remote_storage/local_fs.rs
@@ -73,7 +73,7 @@ impl RemoteStorage for LocalFs {
    }

    async fn list(&self) -> anyhow::Result<Vec<Self::StoragePath>> {
-        Ok(get_all_files(&self.root).await?.into_iter().collect())
+        get_all_files(&self.root).await
    }

    async fn upload(
--- a/pageserver/src/remote_storage/rust_s3.rs
+++ b/pageserver/src/remote_storage/rust_s3.rs
@@ -9,6 +9,7 @@ use std::path::{Path, PathBuf};
 use anyhow::Context;
 use s3::{bucket::Bucket, creds::Credentials, region::Region};
 use tokio::io::{self, AsyncWriteExt};
+use tracing::debug;

 use crate::{
    config::S3Config,
@@ -58,10 +59,21 @@ pub struct S3 {
 impl S3 {
    /// Creates the storage, errors if incorrect AWS S3 configuration provided.
    pub fn new(aws_config: &S3Config, pageserver_workdir: &'static Path) -> anyhow::Result<Self> {
-        let region = aws_config
-            .bucket_region
-            .parse::<Region>()
-            .context("Failed to parse the s3 region from config")?;
+        debug!(
+            "Creating s3 remote storage around bucket {}",
+            aws_config.bucket_name
+        );
+        let region = match aws_config.endpoint.clone() {
+            Some(endpoint) => Region::Custom {
+                endpoint,
+                region: aws_config.bucket_region.clone(),
+            },
+            None => aws_config
+                .bucket_region
+                .parse::<Region>()
+                .context("Failed to parse the s3 region from config")?,
+        };
+
        let credentials = Credentials::new(
            aws_config.access_key_id.as_deref(),
            aws_config.secret_access_key.as_deref(),
--- a/pageserver/src/remote_storage/storage_sync.rs
+++ b/pageserver/src/remote_storage/storage_sync.rs
@@ -70,7 +70,8 @@
 //!
 //! When pageserver signals shutdown, current sync task gets finished and the loop exists.

-mod compression;
+/// Expose the module for a binary CLI tool that deals with the corresponding blobs.
+pub mod compression;
 mod download;
 pub mod index;
 mod upload;
@@ -105,7 +106,7 @@ use self::{
    },
    upload::upload_timeline_checkpoint,
 };
-use super::{RemoteStorage, SyncStartupData, TimelineSyncId};
+use super::{RemoteStorage, SyncStartupData, ZTenantTimelineId};
 use crate::{
    config::PageServerConf, layered_repository::metadata::TimelineMetadata,
    remote_storage::storage_sync::compression::read_archive_header, repository::TimelineSyncState,
@@ -242,13 +243,13 @@ mod sync_queue {
 /// Limited by the number of retries, after certain threshold the failing task gets evicted and the timeline disabled.
 #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
 pub struct SyncTask {
-    sync_id: TimelineSyncId,
+    sync_id: ZTenantTimelineId,
    retries: u32,
    kind: SyncKind,
 }

 impl SyncTask {
-    fn new(sync_id: TimelineSyncId, retries: u32, kind: SyncKind) -> Self {
+    fn new(sync_id: ZTenantTimelineId, retries: u32, kind: SyncKind) -> Self {
        Self {
            sync_id,
            retries,
@@ -307,7 +308,10 @@ pub fn schedule_timeline_checkpoint_upload(
    }

    if !sync_queue::push(SyncTask::new(
-        TimelineSyncId(tenant_id, timeline_id),
+        ZTenantTimelineId {
+            tenant_id,
+            timeline_id,
+        },
        0,
        SyncKind::Upload(NewCheckpoint { layers, metadata }),
    )) {
@@ -333,8 +337,15 @@ pub fn schedule_timeline_checkpoint_upload(
 ///
 /// Ensure that the loop is started otherwise the task is never processed.
 pub fn schedule_timeline_download(tenant_id: ZTenantId, timeline_id: ZTimelineId) {
+    debug!(
+        "Scheduling timeline download for tenant {}, timeline {}",
+        tenant_id, timeline_id
+    );
    sync_queue::push(SyncTask::new(
-        TimelineSyncId(tenant_id, timeline_id),
+        ZTenantTimelineId {
+            tenant_id,
+            timeline_id,
+        },
        0,
        SyncKind::Download(TimelineDownload {
            files_to_skip: Arc::new(BTreeSet::new()),
@@ -350,7 +361,7 @@ pub(super) fn spawn_storage_sync_thread<
    S: RemoteStorage<StoragePath = P> + Send + Sync + 'static,
 >(
    conf: &'static PageServerConf,
-    local_timeline_files: HashMap<TimelineSyncId, (TimelineMetadata, Vec<PathBuf>)>,
+    local_timeline_files: HashMap<ZTenantTimelineId, (TimelineMetadata, Vec<PathBuf>)>,
    storage: S,
    max_concurrent_sync: NonZeroUsize,
    max_sync_errors: NonZeroU32,
@@ -506,7 +517,7 @@ async fn loop_step<
                Err(e) => {
                    error!(
                        "Failed to process storage sync task for tenant {}, timeline {}: {:?}",
-                        sync_id.0, sync_id.1, e
+                        sync_id.tenant_id, sync_id.timeline_id, e
                    );
                    None
                }
@@ -520,7 +531,10 @@ async fn loop_step<
    while let Some((sync_id, state_update)) = task_batch.next().await {
        debug!("Finished storage sync task for sync id {}", sync_id);
        if let Some(state_update) = state_update {
-            let TimelineSyncId(tenant_id, timeline_id) = sync_id;
+            let ZTenantTimelineId {
+                tenant_id,
+                timeline_id,
+            } = sync_id;
            new_timeline_states
                .entry(tenant_id)
                .or_default()
@@ -614,7 +628,7 @@ async fn process_task<

 fn schedule_first_sync_tasks(
    index: &RemoteTimelineIndex,
-    local_timeline_files: HashMap<TimelineSyncId, (TimelineMetadata, Vec<PathBuf>)>,
+    local_timeline_files: HashMap<ZTenantTimelineId, (TimelineMetadata, Vec<PathBuf>)>,
 ) -> HashMap<ZTenantId, HashMap<ZTimelineId, TimelineSyncState>> {
    let mut initial_timeline_statuses: HashMap<ZTenantId, HashMap<ZTimelineId, TimelineSyncState>> =
        HashMap::new();
@@ -625,7 +639,10 @@ fn schedule_first_sync_tasks(
    for (sync_id, (local_metadata, local_files)) in local_timeline_files {
        let local_disk_consistent_lsn = local_metadata.disk_consistent_lsn();

-        let TimelineSyncId(tenant_id, timeline_id) = sync_id;
+        let ZTenantTimelineId {
+            tenant_id,
+            timeline_id,
+        } = sync_id;
        match index.timeline_entry(&sync_id) {
            Some(index_entry) => {
                let timeline_status = compare_local_and_remote_timeline(
@@ -668,10 +685,10 @@ fn schedule_first_sync_tasks(
        }
    }

-    let unprocessed_remote_ids = |remote_id: &TimelineSyncId| {
+    let unprocessed_remote_ids = |remote_id: &ZTenantTimelineId| {
        initial_timeline_statuses
-            .get(&remote_id.0)
-            .and_then(|timelines| timelines.get(&remote_id.1))
+            .get(&remote_id.tenant_id)
+            .and_then(|timelines| timelines.get(&remote_id.timeline_id))
            .is_none()
    };
    for unprocessed_remote_id in index
@@ -679,7 +696,10 @@ fn schedule_first_sync_tasks(
        .filter(unprocessed_remote_ids)
        .collect::<Vec<_>>()
    {
-        let TimelineSyncId(cloud_only_tenant_id, cloud_only_timeline_id) = unprocessed_remote_id;
+        let ZTenantTimelineId {
+            tenant_id: cloud_only_tenant_id,
+            timeline_id: cloud_only_timeline_id,
+        } = unprocessed_remote_id;
        match index
            .timeline_entry(&unprocessed_remote_id)
            .and_then(TimelineIndexEntry::disk_consistent_lsn)
@@ -708,7 +728,7 @@ fn schedule_first_sync_tasks(

 fn compare_local_and_remote_timeline(
    new_sync_tasks: &mut VecDeque<SyncTask>,
-    sync_id: TimelineSyncId,
+    sync_id: ZTenantTimelineId,
    local_metadata: TimelineMetadata,
    local_files: Vec<PathBuf>,
    remote_entry: &TimelineIndexEntry,
@@ -765,7 +785,7 @@ async fn update_index_description<
 >(
    (storage, index): &(S, RwLock<RemoteTimelineIndex>),
    timeline_dir: &Path,
-    id: TimelineSyncId,
+    id: ZTenantTimelineId,
 ) -> anyhow::Result<RemoteTimeline> {
    let mut index_write = index.write().await;
    let full_index = match index_write.timeline_entry(&id) {
@@ -788,7 +808,7 @@ async fn update_index_description<
                        Ok((archive_id, header_size, header)) => full_index.update_archive_contents(archive_id.0, header, header_size),
                        Err((e, archive_id)) => bail!(
                            "Failed to download archive header for tenant {}, timeline {}, archive for Lsn {}: {}",
-                            id.0, id.1, archive_id.0,
+                            id.tenant_id, id.timeline_id, archive_id.0,
                            e
                        ),
                    }
@@ -866,7 +886,7 @@ mod test_utils {
        timeline_id: ZTimelineId,
        new_upload: NewCheckpoint,
    ) {
-        let sync_id = TimelineSyncId(harness.tenant_id, timeline_id);
+        let sync_id = ZTenantTimelineId::new(harness.tenant_id, timeline_id);
        upload_timeline_checkpoint(
            harness.conf,
            Arc::clone(&remote_assets),
@@ -922,7 +942,7 @@ mod test_utils {

    pub async fn expect_timeline(
        index: &RwLock<RemoteTimelineIndex>,
-        sync_id: TimelineSyncId,
+        sync_id: ZTenantTimelineId,
    ) -> RemoteTimeline {
        if let Some(TimelineIndexEntry::Full(remote_timeline)) =
            index.read().await.timeline_entry(&sync_id)
@@ -957,18 +977,18 @@ mod test_utils {
        let mut expected_timeline_entries = BTreeMap::new();
        for sync_id in actual_sync_ids {
            actual_branches.insert(
-                sync_id.1,
+                sync_id.tenant_id,
                index_read
-                    .branch_files(sync_id.0)
+                    .branch_files(sync_id.tenant_id)
                    .into_iter()
                    .flat_map(|branch_paths| branch_paths.iter())
                    .cloned()
                    .collect::<BTreeSet<_>>(),
            );
            expected_branches.insert(
-                sync_id.1,
+                sync_id.tenant_id,
                expected_index_with_descriptions
-                    .branch_files(sync_id.0)
+                    .branch_files(sync_id.tenant_id)
                    .into_iter()
                    .flat_map(|branch_paths| branch_paths.iter())
                    .cloned()
--- a/pageserver/src/remote_storage/storage_sync/compression.rs
+++ b/pageserver/src/remote_storage/storage_sync/compression.rs
@@ -34,7 +34,7 @@ use std::{
    sync::Arc,
 };

-use anyhow::{anyhow, bail, ensure, Context};
+use anyhow::{bail, ensure, Context};
 use async_compression::tokio::bufread::{ZstdDecoder, ZstdEncoder};
 use serde::{Deserialize, Serialize};
 use tokio::{
@@ -211,16 +211,18 @@ pub async fn read_archive_header<A: io::AsyncRead + Send + Sync + Unpin>(
 pub fn parse_archive_name(archive_path: &Path) -> anyhow::Result<(Lsn, u64)> {
    let archive_name = archive_path
        .file_name()
-        .ok_or_else(|| anyhow!("Archive '{}' has no file name", archive_path.display()))?
+        .with_context(|| format!("Archive '{}' has no file name", archive_path.display()))?
        .to_string_lossy();
    let (lsn_str, header_size_str) =
-        archive_name.rsplit_once(ARCHIVE_EXTENSION).ok_or_else(|| {
-            anyhow!(
-                "Archive '{}' has incorrect extension, expected to contain '{}'",
-                archive_path.display(),
-                ARCHIVE_EXTENSION
-            )
-        })?;
+        archive_name
+            .rsplit_once(ARCHIVE_EXTENSION)
+            .with_context(|| {
+                format!(
+                    "Archive '{}' has incorrect extension, expected to contain '{}'",
+                    archive_path.display(),
+                    ARCHIVE_EXTENSION
+                )
+            })?;
    let disk_consistent_lsn = Lsn::from_hex(lsn_str).with_context(|| {
        format!(
            "Archive '{}' has an invalid disk consistent lsn in its extension",
@@ -246,7 +248,7 @@ fn archive_name(disk_consistent_lsn: Lsn, header_size: u64) -> String {
    archive_name
 }

-async fn uncompress_with_header(
+pub async fn uncompress_with_header(
    files_to_skip: &BTreeSet<PathBuf>,
    destination_dir: &Path,
    header: ArchiveHeader,
@@ -374,7 +376,7 @@ async fn write_archive_contents(
    }
    let metadata_bytes_written = io::copy(&mut metadata_bytes.as_slice(), &mut archive_input)
        .await
-        .with_context(|| "Failed to add metadata into the archive")?;
+        .context("Failed to add metadata into the archive")?;
    ensure!(
        header.metadata_file_size == metadata_bytes_written,
        "Metadata file was written to the archive incompletely",
--- a/pageserver/src/remote_storage/storage_sync/download.rs
+++ b/pageserver/src/remote_storage/storage_sync/download.rs
@@ -3,7 +3,7 @@

 use std::{borrow::Cow, collections::BTreeSet, path::PathBuf, sync::Arc};

-use anyhow::{anyhow, ensure, Context};
+use anyhow::{ensure, Context};
 use futures::{stream::FuturesUnordered, StreamExt};
 use tokio::{fs, sync::RwLock};
 use tracing::{debug, error, trace, warn};
@@ -17,7 +17,7 @@ use crate::{
            compression, index::TimelineIndexEntry, sync_queue, tenant_branch_files,
            update_index_description, SyncKind, SyncTask,
        },
-        RemoteStorage, TimelineSyncId,
+        RemoteStorage, ZTenantTimelineId,
    },
 };

@@ -52,13 +52,16 @@ pub(super) async fn download_timeline<
 >(
    conf: &'static PageServerConf,
    remote_assets: Arc<(S, RwLock<RemoteTimelineIndex>)>,
-    sync_id: TimelineSyncId,
+    sync_id: ZTenantTimelineId,
    mut download: TimelineDownload,
    retries: u32,
 ) -> DownloadedTimeline {
    debug!("Downloading layers for sync id {}", sync_id);

-    let TimelineSyncId(tenant_id, timeline_id) = sync_id;
+    let ZTenantTimelineId {
+        tenant_id,
+        timeline_id,
+    } = sync_id;
    let index_read = remote_assets.1.read().await;
    let remote_timeline = match index_read.timeline_entry(&sync_id) {
        None => {
@@ -110,7 +113,8 @@ pub(super) async fn download_timeline<
        }
    };

-    if let Err(e) = download_missing_branches(conf, remote_assets.as_ref(), sync_id.0).await {
+    if let Err(e) = download_missing_branches(conf, remote_assets.as_ref(), sync_id.tenant_id).await
+    {
        error!(
            "Failed to download missing branches for sync id {}: {:?}",
            sync_id, e
@@ -180,7 +184,10 @@ async fn try_download_archive<
    S: RemoteStorage<StoragePath = P> + Send + Sync + 'static,
 >(
    conf: &'static PageServerConf,
-    TimelineSyncId(tenant_id, timeline_id): TimelineSyncId,
+    ZTenantTimelineId {
+        tenant_id,
+        timeline_id,
+    }: ZTenantTimelineId,
    remote_assets: Arc<(S, RwLock<RemoteTimelineIndex>)>,
    remote_timeline: &RemoteTimeline,
    archive_id: ArchiveId,
@@ -189,7 +196,7 @@ async fn try_download_archive<
    debug!("Downloading archive {:?}", archive_id);
    let archive_to_download = remote_timeline
        .archive_data(archive_id)
-        .ok_or_else(|| anyhow!("Archive {:?} not found in remote storage", archive_id))?;
+        .with_context(|| format!("Archive {:?} not found in remote storage", archive_id))?;
    let (archive_header, header_size) = remote_timeline
        .restore_header(archive_id)
        .context("Failed to restore header when downloading an archive")?;
@@ -343,7 +350,7 @@ mod tests {
    #[tokio::test]
    async fn test_download_timeline() -> anyhow::Result<()> {
        let repo_harness = RepoHarness::create("test_download_timeline")?;
-        let sync_id = TimelineSyncId(repo_harness.tenant_id, TIMELINE_ID);
+        let sync_id = ZTenantTimelineId::new(repo_harness.tenant_id, TIMELINE_ID);
        let storage = LocalFs::new(tempdir()?.path().to_owned(), &repo_harness.conf.workdir)?;
        let index = RwLock::new(RemoteTimelineIndex::try_parse_descriptions_from_paths(
            repo_harness.conf,
--- a/pageserver/src/remote_storage/storage_sync/index.rs
+++ b/pageserver/src/remote_storage/storage_sync/index.rs
@@ -9,7 +9,7 @@ use std::{
    path::{Path, PathBuf},
 };

-use anyhow::{anyhow, bail, ensure, Context};
+use anyhow::{bail, ensure, Context};
 use serde::{Deserialize, Serialize};
 use tracing::debug;
 use zenith_utils::{
@@ -22,7 +22,7 @@ use crate::{
    layered_repository::TIMELINES_SEGMENT_NAME,
    remote_storage::{
        storage_sync::compression::{parse_archive_name, FileEntry},
-        TimelineSyncId,
+        ZTenantTimelineId,
    },
 };

@@ -53,7 +53,7 @@ impl RelativePath {
 #[derive(Debug, Clone)]
 pub struct RemoteTimelineIndex {
    branch_files: HashMap<ZTenantId, HashSet<RelativePath>>,
-    timeline_files: HashMap<TimelineSyncId, TimelineIndexEntry>,
+    timeline_files: HashMap<ZTenantTimelineId, TimelineIndexEntry>,
 }

 impl RemoteTimelineIndex {
@@ -80,19 +80,22 @@ impl RemoteTimelineIndex {
        index
    }

-    pub fn timeline_entry(&self, id: &TimelineSyncId) -> Option<&TimelineIndexEntry> {
+    pub fn timeline_entry(&self, id: &ZTenantTimelineId) -> Option<&TimelineIndexEntry> {
        self.timeline_files.get(id)
    }

-    pub fn timeline_entry_mut(&mut self, id: &TimelineSyncId) -> Option<&mut TimelineIndexEntry> {
+    pub fn timeline_entry_mut(
+        &mut self,
+        id: &ZTenantTimelineId,
+    ) -> Option<&mut TimelineIndexEntry> {
        self.timeline_files.get_mut(id)
    }

-    pub fn add_timeline_entry(&mut self, id: TimelineSyncId, entry: TimelineIndexEntry) {
+    pub fn add_timeline_entry(&mut self, id: ZTenantTimelineId, entry: TimelineIndexEntry) {
        self.timeline_files.insert(id, entry);
    }

-    pub fn all_sync_ids(&self) -> impl Iterator<Item = TimelineSyncId> + '_ {
+    pub fn all_sync_ids(&self) -> impl Iterator<Item = ZTenantTimelineId> + '_ {
        self.timeline_files.keys().copied()
    }

@@ -214,7 +217,7 @@ impl RemoteTimeline {
        let archive = self
            .checkpoint_archives
            .get(&archive_id)
-            .ok_or_else(|| anyhow!("Archive {:?} not found", archive_id))?;
+            .with_context(|| format!("Archive {:?} not found", archive_id))?;

        let mut header_files = Vec::with_capacity(archive.files.len());
        for (expected_archive_position, archive_file) in archive.files.iter().enumerate() {
@@ -226,11 +229,10 @@ impl RemoteTimeline {
                archive_id,
            );

-            let timeline_file = self.timeline_files.get(archive_file).ok_or_else(|| {
-                anyhow!(
+            let timeline_file = self.timeline_files.get(archive_file).with_context(|| {
+                format!(
                    "File with id {:?} not found for archive {:?}",
-                    archive_file,
-                    archive_id
+                    archive_file, archive_id
                )
            })?;
            header_files.push(timeline_file.clone());
@@ -299,7 +301,7 @@ fn try_parse_index_entry(
        })?
        .iter()
        .next()
-        .ok_or_else(|| anyhow!("Found no tenant id in path '{}'", path.display()))?
+        .with_context(|| format!("Found no tenant id in path '{}'", path.display()))?
        .to_string_lossy()
        .parse::<ZTenantId>()
        .with_context(|| format!("Failed to parse tenant id from path '{}'", path.display()))?;
@@ -321,8 +323,8 @@ fn try_parse_index_entry(
            let mut segments = timelines_subpath.iter();
            let timeline_id = segments
                .next()
-                .ok_or_else(|| {
-                    anyhow!(
+                .with_context(|| {
+                    format!(
                        "{} directory of tenant {} (path '{}') is not an index entry",
                        TIMELINES_SEGMENT_NAME,
                        tenant_id,
@@ -345,11 +347,14 @@ fn try_parse_index_entry(

            let archive_name = path
                .file_name()
-                .ok_or_else(|| anyhow!("Archive '{}' has no file name", path.display()))?
+                .with_context(|| format!("Archive '{}' has no file name", path.display()))?
                .to_string_lossy()
                .to_string();

-            let sync_id = TimelineSyncId(tenant_id, timeline_id);
+            let sync_id = ZTenantTimelineId {
+                tenant_id,
+                timeline_id,
+            };
            let timeline_index_entry = index
                .timeline_files
                .entry(sync_id)
--- a/pageserver/src/remote_storage/storage_sync/upload.rs
+++ b/pageserver/src/remote_storage/storage_sync/upload.rs
@@ -17,7 +17,7 @@ use crate::{
            index::{RemoteTimeline, TimelineIndexEntry},
            sync_queue, tenant_branch_files, update_index_description, SyncKind, SyncTask,
        },
-        RemoteStorage, TimelineSyncId,
+        RemoteStorage, ZTenantTimelineId,
    },
 };

@@ -36,12 +36,13 @@ pub(super) async fn upload_timeline_checkpoint<
 >(
    config: &'static PageServerConf,
    remote_assets: Arc<(S, RwLock<RemoteTimelineIndex>)>,
-    sync_id: TimelineSyncId,
+    sync_id: ZTenantTimelineId,
    new_checkpoint: NewCheckpoint,
    retries: u32,
 ) -> Option<bool> {
    debug!("Uploading checkpoint for sync id {}", sync_id);
-    if let Err(e) = upload_missing_branches(config, remote_assets.as_ref(), sync_id.0).await {
+    if let Err(e) = upload_missing_branches(config, remote_assets.as_ref(), sync_id.tenant_id).await
+    {
        error!(
            "Failed to upload missing branches for sync id {}: {:?}",
            sync_id, e
@@ -57,7 +58,10 @@ pub(super) async fn upload_timeline_checkpoint<

    let index = &remote_assets.1;

-    let TimelineSyncId(tenant_id, timeline_id) = sync_id;
+    let ZTenantTimelineId {
+        tenant_id,
+        timeline_id,
+    } = sync_id;
    let timeline_dir = config.timeline_path(&timeline_id, &tenant_id);

    let index_read = index.read().await;
@@ -151,11 +155,14 @@ async fn try_upload_checkpoint<
 >(
    config: &'static PageServerConf,
    remote_assets: Arc<(S, RwLock<RemoteTimelineIndex>)>,
-    sync_id: TimelineSyncId,
+    sync_id: ZTenantTimelineId,
    new_checkpoint: &NewCheckpoint,
    files_to_skip: BTreeSet<PathBuf>,
 ) -> anyhow::Result<(ArchiveHeader, u64)> {
-    let TimelineSyncId(tenant_id, timeline_id) = sync_id;
+    let ZTenantTimelineId {
+        tenant_id,
+        timeline_id,
+    } = sync_id;
    let timeline_dir = config.timeline_path(&timeline_id, &tenant_id);

    let files_to_upload = new_checkpoint
@@ -288,7 +295,7 @@ mod tests {
    #[tokio::test]
    async fn reupload_timeline() -> anyhow::Result<()> {
        let repo_harness = RepoHarness::create("reupload_timeline")?;
-        let sync_id = TimelineSyncId(repo_harness.tenant_id, TIMELINE_ID);
+        let sync_id = ZTenantTimelineId::new(repo_harness.tenant_id, TIMELINE_ID);
        let storage = LocalFs::new(tempdir()?.path().to_owned(), &repo_harness.conf.workdir)?;
        let index = RwLock::new(RemoteTimelineIndex::try_parse_descriptions_from_paths(
            repo_harness.conf,
@@ -484,7 +491,7 @@ mod tests {
    #[tokio::test]
    async fn reupload_timeline_rejected() -> anyhow::Result<()> {
        let repo_harness = RepoHarness::create("reupload_timeline_rejected")?;
-        let sync_id = TimelineSyncId(repo_harness.tenant_id, TIMELINE_ID);
+        let sync_id = ZTenantTimelineId::new(repo_harness.tenant_id, TIMELINE_ID);
        let storage = LocalFs::new(tempdir()?.path().to_owned(), &repo_harness.conf.workdir)?;
        let index = RwLock::new(RemoteTimelineIndex::try_parse_descriptions_from_paths(
            repo_harness.conf,
--- a/pageserver/src/repository.rs
+++ b/pageserver/src/repository.rs
@@ -7,7 +7,7 @@ use postgres_ffi::{MultiXactId, MultiXactOffset, TransactionId};
 use serde::{Deserialize, Serialize};
 use std::collections::HashSet;
 use std::ops::{AddAssign, Deref};
-use std::sync::Arc;
+use std::sync::{Arc, RwLockReadGuard};
 use std::time::Duration;
 use zenith_utils::lsn::{Lsn, RecordLsn};
 use zenith_utils::zid::ZTimelineId;
@@ -19,6 +19,8 @@ pub type BlockNumber = u32;
 /// A repository corresponds to one .zenith directory. One repository holds multiple
 /// timelines, forked off from the same initial call to 'initdb'.
 pub trait Repository: Send + Sync {
+    fn detach_timeline(&self, timeline_id: ZTimelineId) -> Result<()>;
+
    /// Updates timeline based on the new sync state, received from the remote storage synchronization.
    /// See [`crate::remote_storage`] for more details about the synchronization.
    fn set_timeline_state(
@@ -182,6 +184,9 @@ pub trait Timeline: Send + Sync {
    ///
    fn wait_lsn(&self, lsn: Lsn) -> Result<()>;

+    /// Lock and get timeline's GC cuttof
+    fn get_latest_gc_cutoff_lsn(&self) -> RwLockReadGuard<Lsn>;
+
    /// Look up given page version.
    fn get_page_at_lsn(&self, tag: RelishTag, blknum: BlockNumber, lsn: Lsn) -> Result<Bytes>;

@@ -215,10 +220,12 @@ pub trait Timeline: Send + Sync {

    /// Atomically get both last and prev.
    fn get_last_record_rlsn(&self) -> RecordLsn;
+
    /// Get last or prev record separately. Same as get_last_record_rlsn().last/prev.
    fn get_last_record_lsn(&self) -> Lsn;
+
    fn get_prev_record_lsn(&self) -> Lsn;
-    fn get_start_lsn(&self) -> Lsn;
+
    fn get_disk_consistent_lsn(&self) -> Lsn;

    /// Mutate the timeline with a [`TimelineWriter`].
@@ -233,7 +240,11 @@ pub trait Timeline: Send + Sync {

    ///
    /// Check that it is valid to request operations with that lsn.
-    fn check_lsn_is_in_scope(&self, lsn: Lsn) -> Result<()>;
+    fn check_lsn_is_in_scope(
+        &self,
+        lsn: Lsn,
+        latest_gc_cutoff_lsn: &RwLockReadGuard<Lsn>,
+    ) -> Result<()>;

    /// Retrieve current logical size of the timeline
    ///
@@ -242,7 +253,7 @@ pub trait Timeline: Send + Sync {
    fn get_current_logical_size(&self) -> usize;

    /// Does the same as get_current_logical_size but counted on demand.
-    /// Used in tests to ensure thet incremental and non incremental variants match.
+    /// Used in tests to ensure that incremental and non incremental variants match.
    fn get_current_logical_size_non_incremental(&self, lsn: Lsn) -> Result<usize>;

    /// An escape hatch to allow "casting" a generic Timeline to LayeredTimeline.
@@ -295,8 +306,12 @@ pub enum ZenithWalRecord {
    /// Native PostgreSQL WAL record
    Postgres { will_init: bool, rec: Bytes },

-    /// Set bits in heap visibility map. (heap blkno, flag bits to clear)
-    ClearVisibilityMapFlags { heap_blkno: u32, flags: u8 },
+    /// Clear bits in heap visibility map. ('flags' is bitmap of bits to clear)
+    ClearVisibilityMapFlags {
+        new_heap_blkno: Option<u32>,
+        old_heap_blkno: Option<u32>,
+        flags: u8,
+    },
    /// Mark transaction IDs as committed on a CLOG page
    ClogSetCommitted { xids: Vec<TransactionId> },
    /// Mark transaction IDs as aborted on a CLOG page
@@ -985,7 +1000,7 @@ mod tests {
                    .source()
                    .unwrap()
                    .to_string()
-                    .contains("is earlier than initdb lsn"));
+                    .contains("is earlier than latest GC horizon"));
            }
        }

@@ -1002,12 +1017,11 @@ mod tests {
        make_some_layers(&tline, Lsn(0x20))?;

        repo.gc_iteration(Some(TIMELINE_ID), 0x10, false)?;
-
+        let latest_gc_cutoff_lsn = tline.get_latest_gc_cutoff_lsn();
+        assert!(*latest_gc_cutoff_lsn > Lsn(0x25));
        match tline.get_page_at_lsn(TESTREL_A, 0, Lsn(0x25)) {
            Ok(_) => panic!("request for page should have failed"),
-            Err(err) => assert!(err
-                .to_string()
-                .contains("tried to request a page version that was garbage collected")),
+            Err(err) => assert!(err.to_string().contains("not found at")),
        }
        Ok(())
    }
--- a/pageserver/src/tenant_mgr.rs
+++ b/pageserver/src/tenant_mgr.rs
@@ -9,7 +9,7 @@ use crate::thread_mgr;
 use crate::thread_mgr::ThreadKind;
 use crate::walredo::PostgresRedoManager;
 use crate::CheckpointConfig;
-use anyhow::{anyhow, bail, Context, Result};
+use anyhow::{bail, Context, Result};
 use lazy_static::lazy_static;
 use log::*;
 use serde::{Deserialize, Serialize};
@@ -73,6 +73,7 @@ pub fn set_timeline_states(
    let mut m = access_tenants();
    for (tenant_id, timeline_states) in timeline_states {
        let tenant = m.entry(tenant_id).or_insert_with(|| {
+            // TODO (rodionov) reuse one of the initialisation routines
            // Set up a WAL redo manager, for applying WAL records.
            let walredo_mgr = PostgresRedoManager::new(conf, tenant_id);

@@ -208,7 +209,7 @@ pub fn activate_tenant(conf: &'static PageServerConf, tenantid: ZTenantId) -> Re
    let mut m = access_tenants();
    let tenant = m
        .get_mut(&tenantid)
-        .ok_or_else(|| anyhow!("Tenant not found for id {}", tenantid))?;
+        .with_context(|| format!("Tenant not found for id {}", tenantid))?;

    info!("activating tenant {}", tenantid);

@@ -251,7 +252,7 @@ pub fn get_repository_for_tenant(tenantid: ZTenantId) -> Result<Arc<dyn Reposito
    let m = access_tenants();
    let tenant = m
        .get(&tenantid)
-        .ok_or_else(|| anyhow!("Tenant not found for tenant {}", tenantid))?;
+        .with_context(|| format!("Tenant not found for tenant {}", tenantid))?;

    Ok(Arc::clone(&tenant.repo))
 }
@@ -263,7 +264,7 @@ pub fn get_timeline_for_tenant(
    get_repository_for_tenant(tenantid)?
        .get_timeline(timelineid)?
        .local_timeline()
-        .ok_or_else(|| anyhow!("cannot fetch timeline {}", timelineid))
+        .with_context(|| format!("cannot fetch timeline {}", timelineid))
 }

 #[derive(Serialize, Deserialize, Clone)]
--- a/pageserver/src/virtual_file.rs
+++ b/pageserver/src/virtual_file.rs
@@ -332,8 +332,11 @@ impl VirtualFile {
        // TODO: We could downgrade the locks to read mode before calling
        // 'func', to allow a little bit more concurrency, but the standard
        // library RwLock doesn't allow downgrading without releasing the lock,
-        // and that doesn't seem worth the trouble. (parking_lot RwLock would
-        // allow it)
+        // and that doesn't seem worth the trouble.
+        //
+        // XXX: `parking_lot::RwLock` can enable such downgrades, yet its implemenation is fair and
+        // may deadlock on subsequent read calls.
+        // Simply replacing all `RwLock` in project causes deadlocks, so use it sparingly.
        let result = STORAGE_IO_TIME
            .with_label_values(&[op, &self.tenantid, &self.timelineid])
            .observe_closure_duration(|| func(&file));
--- a/pageserver/src/walingest.rs
+++ b/pageserver/src/walingest.rs
@@ -37,6 +37,7 @@ use postgres_ffi::xlog_utils::*;
 use postgres_ffi::TransactionId;
 use postgres_ffi::{pg_constants, CheckPoint};
 use zenith_utils::lsn::Lsn;
+use zenith_utils::pg_checksum_page::pg_checksum_page;

 static ZERO_PAGE: Bytes = Bytes::from_static(&[0u8; 8192]);

@@ -329,6 +330,9 @@ impl WalIngest {
            }
            image[0..4].copy_from_slice(&((lsn.0 >> 32) as u32).to_le_bytes());
            image[4..8].copy_from_slice(&(lsn.0 as u32).to_le_bytes());
+            image[8..10].copy_from_slice(&[0u8; 2]);
+            let checksum = pg_checksum_page(&image, blk.blkno);
+            image[8..10].copy_from_slice(&checksum.to_le_bytes());
            assert_eq!(image.len(), pg_constants::BLCKSZ as usize);
            timeline.put_page_image(tag, blk.blkno, lsn, image.freeze())?;
        } else {
@@ -349,49 +353,25 @@ impl WalIngest {
        decoded: &mut DecodedWALRecord,
    ) -> Result<()> {
        // Handle VM bit updates that are implicitly part of heap records.
+
+        // First, look at the record to determine which VM bits need
+        // to be cleared. If either of these variables is set, we
+        // need to clear the corresponding bits in the visibility map.
+        let mut new_heap_blkno: Option<u32> = None;
+        let mut old_heap_blkno: Option<u32> = None;
        if decoded.xl_rmid == pg_constants::RM_HEAP_ID {
            let info = decoded.xl_info & pg_constants::XLOG_HEAP_OPMASK;
            if info == pg_constants::XLOG_HEAP_INSERT {
                let xlrec = XlHeapInsert::decode(buf);
                assert_eq!(0, buf.remaining());
-                if (xlrec.flags
-                    & (pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED
-                        | pg_constants::XLH_INSERT_ALL_FROZEN_SET))
-                    != 0
-                {
-                    timeline.put_wal_record(
-                        lsn,
-                        RelishTag::Relation(RelTag {
-                            forknum: pg_constants::VISIBILITYMAP_FORKNUM,
-                            spcnode: decoded.blocks[0].rnode_spcnode,
-                            dbnode: decoded.blocks[0].rnode_dbnode,
-                            relnode: decoded.blocks[0].rnode_relnode,
-                        }),
-                        decoded.blocks[0].blkno / pg_constants::HEAPBLOCKS_PER_PAGE as u32,
-                        ZenithWalRecord::ClearVisibilityMapFlags {
-                            heap_blkno: decoded.blocks[0].blkno,
-                            flags: pg_constants::VISIBILITYMAP_VALID_BITS,
-                        },
-                    )?;
+                if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {
+                    new_heap_blkno = Some(decoded.blocks[0].blkno);
                }
            } else if info == pg_constants::XLOG_HEAP_DELETE {
                let xlrec = XlHeapDelete::decode(buf);
                assert_eq!(0, buf.remaining());
                if (xlrec.flags & pg_constants::XLH_DELETE_ALL_VISIBLE_CLEARED) != 0 {
-                    timeline.put_wal_record(
-                        lsn,
-                        RelishTag::Relation(RelTag {
-                            forknum: pg_constants::VISIBILITYMAP_FORKNUM,
-                            spcnode: decoded.blocks[0].rnode_spcnode,
-                            dbnode: decoded.blocks[0].rnode_dbnode,
-                            relnode: decoded.blocks[0].rnode_relnode,
-                        }),
-                        decoded.blocks[0].blkno / pg_constants::HEAPBLOCKS_PER_PAGE as u32,
-                        ZenithWalRecord::ClearVisibilityMapFlags {
-                            heap_blkno: decoded.blocks[0].blkno,
-                            flags: pg_constants::VISIBILITYMAP_VALID_BITS,
-                        },
-                    )?;
+                    new_heap_blkno = Some(decoded.blocks[0].blkno);
                }
            } else if info == pg_constants::XLOG_HEAP_UPDATE
                || info == pg_constants::XLOG_HEAP_HOT_UPDATE
@@ -400,39 +380,15 @@ impl WalIngest {
                // the size of tuple data is inferred from the size of the record.
                // we can't validate the remaining number of bytes without parsing
                // the tuple data.
-                if (xlrec.flags & pg_constants::XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED) != 0 {
-                    timeline.put_wal_record(
-                        lsn,
-                        RelishTag::Relation(RelTag {
-                            forknum: pg_constants::VISIBILITYMAP_FORKNUM,
-                            spcnode: decoded.blocks[0].rnode_spcnode,
-                            dbnode: decoded.blocks[0].rnode_dbnode,
-                            relnode: decoded.blocks[0].rnode_relnode,
-                        }),
-                        decoded.blocks[0].blkno / pg_constants::HEAPBLOCKS_PER_PAGE as u32,
-                        ZenithWalRecord::ClearVisibilityMapFlags {
-                            heap_blkno: decoded.blocks[0].blkno,
-                            flags: pg_constants::VISIBILITYMAP_VALID_BITS,
-                        },
-                    )?;
+                if (xlrec.flags & pg_constants::XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED) != 0 {
+                    old_heap_blkno = Some(decoded.blocks[0].blkno);
                }
-                if (xlrec.flags & pg_constants::XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED) != 0
-                    && decoded.blocks.len() > 1
-                {
-                    timeline.put_wal_record(
-                        lsn,
-                        RelishTag::Relation(RelTag {
-                            forknum: pg_constants::VISIBILITYMAP_FORKNUM,
-                            spcnode: decoded.blocks[1].rnode_spcnode,
-                            dbnode: decoded.blocks[1].rnode_dbnode,
-                            relnode: decoded.blocks[1].rnode_relnode,
-                        }),
-                        decoded.blocks[1].blkno / pg_constants::HEAPBLOCKS_PER_PAGE as u32,
-                        ZenithWalRecord::ClearVisibilityMapFlags {
-                            heap_blkno: decoded.blocks[1].blkno,
-                            flags: pg_constants::VISIBILITYMAP_VALID_BITS,
-                        },
-                    )?;
+                if (xlrec.flags & pg_constants::XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED) != 0 {
+                    // PostgreSQL only uses XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED on a
+                    // non-HOT update where the new tuple goes to different page than
+                    // the old one. Otherwise, only XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED is
+                    // set.
+                    new_heap_blkno = Some(decoded.blocks[1].blkno);
                }
            }
        } else if decoded.xl_rmid == pg_constants::RM_HEAP2_ID {
@@ -448,23 +404,60 @@ impl WalIngest {
                };
                assert_eq!(offset_array_len, buf.remaining());

-                // FIXME: why also ALL_FROZEN_SET?
-                if (xlrec.flags
-                    & (pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED
-                        | pg_constants::XLH_INSERT_ALL_FROZEN_SET))
-                    != 0
-                {
+                if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {
+                    new_heap_blkno = Some(decoded.blocks[0].blkno);
+                }
+            }
+        }
+        // FIXME: What about XLOG_HEAP_LOCK and XLOG_HEAP2_LOCK_UPDATED?
+
+        // Clear the VM bits if required.
+        if new_heap_blkno.is_some() || old_heap_blkno.is_some() {
+            let vm_relish = RelishTag::Relation(RelTag {
+                forknum: pg_constants::VISIBILITYMAP_FORKNUM,
+                spcnode: decoded.blocks[0].rnode_spcnode,
+                dbnode: decoded.blocks[0].rnode_dbnode,
+                relnode: decoded.blocks[0].rnode_relnode,
+            });
+
+            let new_vm_blk = new_heap_blkno.map(pg_constants::HEAPBLK_TO_MAPBLOCK);
+            let old_vm_blk = old_heap_blkno.map(pg_constants::HEAPBLK_TO_MAPBLOCK);
+            if new_vm_blk == old_vm_blk {
+                // An UPDATE record that needs to clear the bits for both old and the
+                // new page, both of which reside on the same VM page.
+                timeline.put_wal_record(
+                    lsn,
+                    vm_relish,
+                    new_vm_blk.unwrap(),
+                    ZenithWalRecord::ClearVisibilityMapFlags {
+                        new_heap_blkno,
+                        old_heap_blkno,
+                        flags: pg_constants::VISIBILITYMAP_VALID_BITS,
+                    },
+                )?;
+            } else {
+                // Clear VM bits for one heap page, or for two pages that reside on
+                // different VM pages.
+                if let Some(new_vm_blk) = new_vm_blk {
                    timeline.put_wal_record(
                        lsn,
-                        RelishTag::Relation(RelTag {
-                            forknum: pg_constants::VISIBILITYMAP_FORKNUM,
-                            spcnode: decoded.blocks[0].rnode_spcnode,
-                            dbnode: decoded.blocks[0].rnode_dbnode,
-                            relnode: decoded.blocks[0].rnode_relnode,
-                        }),
-                        decoded.blocks[0].blkno / pg_constants::HEAPBLOCKS_PER_PAGE as u32,
+                        vm_relish,
+                        new_vm_blk,
                        ZenithWalRecord::ClearVisibilityMapFlags {
-                            heap_blkno: decoded.blocks[0].blkno,
+                            new_heap_blkno,
+                            old_heap_blkno: None,
+                            flags: pg_constants::VISIBILITYMAP_VALID_BITS,
+                        },
+                    )?;
+                }
+                if let Some(old_vm_blk) = old_vm_blk {
+                    timeline.put_wal_record(
+                        lsn,
+                        vm_relish,
+                        old_vm_blk,
+                        ZenithWalRecord::ClearVisibilityMapFlags {
+                            new_heap_blkno: None,
+                            old_heap_blkno,
                            flags: pg_constants::VISIBILITYMAP_VALID_BITS,
                        },
                    )?;
@@ -472,8 +465,6 @@ impl WalIngest {
            }
        }

-        // FIXME: What about XLOG_HEAP_LOCK and XLOG_HEAP2_LOCK_UPDATED?
-
        Ok(())
    }

--- a/pageserver/src/walreceiver.rs
+++ b/pageserver/src/walreceiver.rs
@@ -11,14 +11,15 @@ use crate::thread_mgr;
 use crate::thread_mgr::ThreadKind;
 use crate::walingest::WalIngest;
 use anyhow::{bail, Context, Error, Result};
+use bytes::BytesMut;
 use lazy_static::lazy_static;
-use parking_lot::Mutex;
 use postgres_ffi::waldecoder::*;
 use postgres_protocol::message::backend::ReplicationMessage;
 use postgres_types::PgLsn;
 use std::cell::Cell;
 use std::collections::HashMap;
 use std::str::FromStr;
+use std::sync::Mutex;
 use std::thread_local;
 use std::time::SystemTime;
 use tokio::pin;
@@ -27,9 +28,9 @@ use tokio_postgres::{Client, NoTls, SimpleQueryMessage, SimpleQueryRow};
 use tokio_stream::StreamExt;
 use tracing::*;
 use zenith_utils::lsn::Lsn;
+use zenith_utils::pq_proto::ZenithFeedback;
 use zenith_utils::zid::ZTenantId;
 use zenith_utils::zid::ZTimelineId;
-
 //
 // We keep one WAL Receiver active per timeline.
 //
@@ -50,7 +51,7 @@ thread_local! {
 }

 fn drop_wal_receiver(tenantid: ZTenantId, timelineid: ZTimelineId) {
-    let mut receivers = WAL_RECEIVERS.lock();
+    let mut receivers = WAL_RECEIVERS.lock().unwrap();
    receivers.remove(&(tenantid, timelineid));
 }

@@ -61,10 +62,11 @@ pub fn launch_wal_receiver(
    timelineid: ZTimelineId,
    wal_producer_connstr: &str,
 ) -> Result<()> {
-    let mut receivers = WAL_RECEIVERS.lock();
+    let mut receivers = WAL_RECEIVERS.lock().unwrap();

    match receivers.get_mut(&(tenantid, timelineid)) {
        Some(receiver) => {
+            info!("wal receiver already running, updating connection string");
            receiver.wal_producer_connstr = wal_producer_connstr.into();
        }
        None => {
@@ -93,7 +95,7 @@ pub fn launch_wal_receiver(

 // Look up current WAL producer connection string in the hash table
 fn get_wal_producer_connstr(tenantid: ZTenantId, timelineid: ZTimelineId) -> String {
-    let receivers = WAL_RECEIVERS.lock();
+    let receivers = WAL_RECEIVERS.lock().unwrap();

    receivers
        .get(&(tenantid, timelineid))
@@ -158,7 +160,7 @@ fn walreceiver_main(
    // This is from tokio-postgres docs, but it is a bit weird in our case because we extensively use block_on
    runtime.spawn(async move {
        if let Err(e) = connection.await {
-            eprintln!("connection error: {}", e);
+            error!("connection error: {}", e);
        }
    });

@@ -286,7 +288,6 @@ fn walreceiver_main(
        };

        if let Some(last_lsn) = status_update {
-            let last_lsn = PgLsn::from(u64::from(last_lsn));
            let timeline_synced_disk_consistent_lsn =
                tenant_mgr::get_repository_for_tenant(tenantid)?
                    .get_timeline_state(timelineid)
@@ -294,18 +295,32 @@ fn walreceiver_main(
                    .unwrap_or(Lsn(0));

            // The last LSN we processed. It is not guaranteed to survive pageserver crash.
-            let write_lsn = last_lsn;
+            let write_lsn = u64::from(last_lsn);
            // `disk_consistent_lsn` is the LSN at which page server guarantees local persistence of all received data
-            let flush_lsn = PgLsn::from(u64::from(timeline.get_disk_consistent_lsn()));
+            let flush_lsn = u64::from(timeline.get_disk_consistent_lsn());
            // The last LSN that is synced to remote storage and is guaranteed to survive pageserver crash
            // Used by safekeepers to remove WAL preceding `remote_consistent_lsn`.
-            let apply_lsn = PgLsn::from(u64::from(timeline_synced_disk_consistent_lsn));
+            let apply_lsn = u64::from(timeline_synced_disk_consistent_lsn);
            let ts = SystemTime::now();
-            const NO_REPLY: u8 = 0;
+
+            // Send zenith feedback message.
+            // Regular standby_status_update fields are put into this message.
+            let zenith_status_update = ZenithFeedback {
+                current_timeline_size: timeline.get_current_logical_size() as u64,
+                ps_writelsn: write_lsn,
+                ps_flushlsn: flush_lsn,
+                ps_applylsn: apply_lsn,
+                ps_replytime: ts,
+            };
+
+            debug!("zenith_status_update {:?}", zenith_status_update);
+
+            let mut data = BytesMut::new();
+            zenith_status_update.serialize(&mut data)?;
            runtime.block_on(
                physical_stream
                    .as_mut()
-                    .standby_status_update(write_lsn, flush_lsn, apply_lsn, ts, NO_REPLY),
+                    .zenith_status_update(data.len() as u64, &data),
            )?;
        }
    }
--- a/pageserver/src/walredo.rs
+++ b/pageserver/src/walredo.rs
@@ -363,25 +363,44 @@ impl PostgresRedoManager {
                will_init: _,
                rec: _,
            } => panic!("tried to pass postgres wal record to zenith WAL redo"),
-            ZenithWalRecord::ClearVisibilityMapFlags { heap_blkno, flags } => {
-                // Calculate the VM block and offset that corresponds to the heap block.
-                let map_block = pg_constants::HEAPBLK_TO_MAPBLOCK(*heap_blkno);
-                let map_byte = pg_constants::HEAPBLK_TO_MAPBYTE(*heap_blkno);
-                let map_offset = pg_constants::HEAPBLK_TO_OFFSET(*heap_blkno);
-
-                // Check that we're modifying the correct VM block.
+            ZenithWalRecord::ClearVisibilityMapFlags {
+                new_heap_blkno,
+                old_heap_blkno,
+                flags,
+            } => {
+                // sanity check that this is modifying the correct relish
                assert!(
                    check_forknum(&rel, pg_constants::VISIBILITYMAP_FORKNUM),
                    "ClearVisibilityMapFlags record on unexpected rel {:?}",
                    rel
                );
-                assert!(map_block == blknum);
+                if let Some(heap_blkno) = *new_heap_blkno {
+                    // Calculate the VM block and offset that corresponds to the heap block.
+                    let map_block = pg_constants::HEAPBLK_TO_MAPBLOCK(heap_blkno);
+                    let map_byte = pg_constants::HEAPBLK_TO_MAPBYTE(heap_blkno);
+                    let map_offset = pg_constants::HEAPBLK_TO_OFFSET(heap_blkno);

-                // equivalent to PageGetContents(page)
-                let map = &mut page[pg_constants::MAXALIGN_SIZE_OF_PAGE_HEADER_DATA..];
+                    // Check that we're modifying the correct VM block.
+                    assert!(map_block == blknum);

-                let mask: u8 = flags << map_offset;
-                map[map_byte as usize] &= !mask;
+                    // equivalent to PageGetContents(page)
+                    let map = &mut page[pg_constants::MAXALIGN_SIZE_OF_PAGE_HEADER_DATA..];
+
+                    map[map_byte as usize] &= !(flags << map_offset);
+                }
+
+                // Repeat for 'old_heap_blkno', if any
+                if let Some(heap_blkno) = *old_heap_blkno {
+                    let map_block = pg_constants::HEAPBLK_TO_MAPBLOCK(heap_blkno);
+                    let map_byte = pg_constants::HEAPBLK_TO_MAPBYTE(heap_blkno);
+                    let map_offset = pg_constants::HEAPBLK_TO_OFFSET(heap_blkno);
+
+                    assert!(map_block == blknum);
+
+                    let map = &mut page[pg_constants::MAXALIGN_SIZE_OF_PAGE_HEADER_DATA..];
+
+                    map[map_byte as usize] &= !(flags << map_offset);
+                }
            }
            // Non-relational WAL records are handled here, with custom code that has the
            // same effects as the corresponding Postgres WAL redo function.
--- a/poetry.lock
+++ b/poetry.lock
--- a/postgres_ffi/Cargo.toml
+++ b/postgres_ffi/Cargo.toml
@@ -1,10 +1,7 @@
 [package]
 name = "postgres_ffi"
 version = "0.1.0"
-authors = ["Heikki Linnakangas <heikki@zenith.tech>"]
-edition = "2018"
-
-# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+edition = "2021"

 [dependencies]
 chrono = "0.4.19"
--- a/postgres_ffi/src/xlog_utils.rs
+++ b/postgres_ffi/src/xlog_utils.rs
@@ -51,6 +51,13 @@ pub type TimeLineID = u32;
 pub type TimestampTz = i64;
 pub type XLogSegNo = u64;

+/// Interval of checkpointing metadata file. We should store metadata file to enforce
+/// predicate that checkpoint.nextXid is larger than any XID in WAL.
+/// But flushing checkpoint file for each transaction seems to be too expensive,
+/// so XID_CHECKPOINT_INTERVAL is used to forward align nextXid and so perform
+/// metadata checkpoint only once per XID_CHECKPOINT_INTERVAL transactions.
+/// XID_CHECKPOINT_INTERVAL should not be larger than BLCKSZ*CLOG_XACTS_PER_BYTE
+/// in order to let CLOG_TRUNCATE mechanism correctly extend CLOG.
 const XID_CHECKPOINT_INTERVAL: u32 = 1024;

 #[allow(non_snake_case)]
@@ -400,9 +407,13 @@ impl CheckPoint {
    ///
    /// Returns 'true' if the XID was updated.
    pub fn update_next_xid(&mut self, xid: u32) -> bool {
-        let xid = xid.wrapping_add(XID_CHECKPOINT_INTERVAL - 1) & !(XID_CHECKPOINT_INTERVAL - 1);
+        // nextXid should nw greate than any XID in WAL, so increment provided XID and check for wraparround.
+        let mut new_xid = std::cmp::max(xid + 1, pg_constants::FIRST_NORMAL_TRANSACTION_ID);
+        // To reduce number of metadata checkpoints, we forward align XID on XID_CHECKPOINT_INTERVAL.
+        // XID_CHECKPOINT_INTERVAL should not be larger than BLCKSZ*CLOG_XACTS_PER_BYTE
+        new_xid =
+            new_xid.wrapping_add(XID_CHECKPOINT_INTERVAL - 1) & !(XID_CHECKPOINT_INTERVAL - 1);
        let full_xid = self.nextXid.value;
-        let new_xid = std::cmp::max(xid + 1, pg_constants::FIRST_NORMAL_TRANSACTION_ID);
        let old_xid = full_xid as u32;
        if new_xid.wrapping_sub(old_xid) as i32 > 0 {
            let mut epoch = full_xid >> 32;
@@ -520,4 +531,34 @@ mod tests {
        println!("wal_end={}, tli={}", wal_end, tli);
        assert_eq!(wal_end, waldump_wal_end);
    }
+
+    /// Check the math in update_next_xid
+    ///
+    /// NOTE: These checks are sensitive to the value of XID_CHECKPOINT_INTERVAL,
+    /// currently 1024.
+    #[test]
+    pub fn test_update_next_xid() {
+        let checkpoint_buf = [0u8; std::mem::size_of::<CheckPoint>()];
+        let mut checkpoint = CheckPoint::decode(&checkpoint_buf).unwrap();
+
+        checkpoint.nextXid = FullTransactionId { value: 10 };
+        assert_eq!(checkpoint.nextXid.value, 10);
+
+        // The input XID gets rounded up to the next XID_CHECKPOINT_INTERVAL
+        // boundary
+        checkpoint.update_next_xid(100);
+        assert_eq!(checkpoint.nextXid.value, 1024);
+
+        // No change
+        checkpoint.update_next_xid(500);
+        assert_eq!(checkpoint.nextXid.value, 1024);
+        checkpoint.update_next_xid(1023);
+        assert_eq!(checkpoint.nextXid.value, 1024);
+
+        // The function returns the *next* XID, given the highest XID seen so
+        // far. So when we pass 1024, the nextXid gets bumped up to the next
+        // XID_CHECKPOINT_INTERVAL boundary.
+        checkpoint.update_next_xid(1024);
+        assert_eq!(checkpoint.nextXid.value, 2048);
+    }
 }
--- a/pre-commit.py
+++ b/pre-commit.py
@@ -38,7 +38,7 @@ def rustfmt(fix_inplace: bool = False, no_color: bool = False) -> str:


 def yapf(fix_inplace: bool) -> str:
-    cmd = "pipenv run yapf --recursive"
+    cmd = "poetry run yapf --recursive"
    if fix_inplace:
        cmd += " --in-place"
    else:
@@ -47,7 +47,7 @@ def yapf(fix_inplace: bool) -> str:


 def mypy() -> str:
-    return "pipenv run mypy"
+    return "poetry run mypy"


 def get_commit_files() -> List[str]:
@@ -72,7 +72,7 @@ def check(name: str, suffix: str, cmd: str, changed_files: List[str], no_color:
            print("Please inspect the output below and run make fmt to fix automatically.")
        if suffix == ".py":
            print("If the output is empty, ensure that you've installed Python tooling by\n"
-                  "running 'pipenv install --dev' in the current directory (no root needed)")
+                  "running './scripts/pysync' in the current directory (no root needed)")
        print()
        print(res.stdout.decode())
        exit(1)
--- a/proxy/Cargo.toml
+++ b/proxy/Cargo.toml
@@ -1,10 +1,7 @@
 [package]
 name = "proxy"
 version = "0.1.0"
-authors = ["Stas Kelvich <stas.kelvich@gmail.com>"]
-edition = "2018"
-
-# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+edition = "2021"

 [dependencies]
 anyhow = "1.0"
@@ -14,13 +11,11 @@ md5 = "0.7.0"
 rand = "0.8.3"
 hex = "0.4.3"
 hyper = "0.14"
-routerify = "2"
-parking_lot = "0.11.2"
 serde = "1"
 serde_json = "1"
 tokio = { version = "1.11", features = ["macros"] }
-tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858" }
-clap = "2.33.0"
+tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
+clap = "3.0"
 rustls = "0.19.1"
 reqwest = { version = "0.11", default-features = false, features = ["blocking", "json", "rustls-tls"] }

--- a/proxy/src/cplane_api.rs
+++ b/proxy/src/cplane_api.rs
@@ -28,7 +28,7 @@ impl DatabaseInfo {
            .to_socket_addrs()
            .with_context(|| format!("cannot resolve {} to SocketAddr", host_port))?
            .next()
-            .ok_or_else(|| anyhow!("cannot resolve at least one SocketAddr"))
+            .context("cannot resolve at least one SocketAddr")
    }
 }

--- a/proxy/src/http.rs
+++ b/proxy/src/http.rs
@@ -1,5 +1,5 @@
 use hyper::{Body, Request, Response, StatusCode};
-use routerify::RouterBuilder;
+use zenith_utils::http::RouterBuilder;

 use zenith_utils::http::endpoint;
 use zenith_utils::http::error::ApiError;
--- a/proxy/src/main.rs
+++ b/proxy/src/main.rs
@@ -24,55 +24,55 @@ fn main() -> anyhow::Result<()> {
    let arg_matches = App::new("Zenith proxy/router")
        .version(GIT_VERSION)
        .arg(
-            Arg::with_name("proxy")
-                .short("p")
+            Arg::new("proxy")
+                .short('p')
                .long("proxy")
                .takes_value(true)
                .help("listen for incoming client connections on ip:port")
                .default_value("127.0.0.1:4432"),
        )
        .arg(
-            Arg::with_name("mgmt")
-                .short("m")
+            Arg::new("mgmt")
+                .short('m')
                .long("mgmt")
                .takes_value(true)
                .help("listen for management callback connection on ip:port")
                .default_value("127.0.0.1:7000"),
        )
        .arg(
-            Arg::with_name("http")
-                .short("h")
+            Arg::new("http")
+                .short('h')
                .long("http")
                .takes_value(true)
                .help("listen for incoming http connections (metrics, etc) on ip:port")
                .default_value("127.0.0.1:7001"),
        )
        .arg(
-            Arg::with_name("uri")
-                .short("u")
+            Arg::new("uri")
+                .short('u')
                .long("uri")
                .takes_value(true)
                .help("redirect unauthenticated users to given uri")
                .default_value("http://localhost:3000/psql_session/"),
        )
        .arg(
-            Arg::with_name("auth-endpoint")
-                .short("a")
+            Arg::new("auth-endpoint")
+                .short('a')
                .long("auth-endpoint")
                .takes_value(true)
-                .help("redirect unauthenticated users to given uri")
+                .help("API endpoint for authenticating users")
                .default_value("http://localhost:3000/authenticate_proxy_request/"),
        )
        .arg(
-            Arg::with_name("ssl-key")
-                .short("k")
+            Arg::new("ssl-key")
+                .short('k')
                .long("ssl-key")
                .takes_value(true)
                .help("path to SSL key for client postgres connections"),
        )
        .arg(
-            Arg::with_name("ssl-cert")
-                .short("c")
+            Arg::new("ssl-cert")
+                .short('c')
                .long("ssl-cert")
                .takes_value(true)
                .help("path to SSL cert for client postgres connections"),
--- a/proxy/src/mgmt.rs
+++ b/proxy/src/mgmt.rs
@@ -111,7 +111,7 @@ fn try_process_query(
                .write_message(&BeMessage::CommandComplete(b"SELECT 1"))?;
        }
        Err(e) => {
-            pgb.write_message(&BeMessage::ErrorResponse(e.to_string()))?;
+            pgb.write_message(&BeMessage::ErrorResponse(&e.to_string()))?;
        }
    }

--- a/proxy/src/proxy.rs
+++ b/proxy/src/proxy.rs
@@ -1,13 +1,13 @@
 use crate::cplane_api::{CPlaneApi, DatabaseInfo};
 use crate::ProxyState;
-use anyhow::{anyhow, bail};
+use anyhow::{anyhow, bail, Context};
 use lazy_static::lazy_static;
-use parking_lot::Mutex;
 use rand::prelude::StdRng;
 use rand::{Rng, SeedableRng};
 use std::cell::Cell;
 use std::collections::HashMap;
 use std::net::{SocketAddr, TcpStream};
+use std::sync::Mutex;
 use std::{io, thread};
 use tokio_postgres::NoTls;
 use zenith_metrics::{new_common_metric_name, register_int_counter, IntCounter};
@@ -89,7 +89,7 @@ pub fn thread_main(
                NUM_CONNECTIONS_CLOSED_COUNTER.inc();
                THREAD_CANCEL_KEY_DATA.with(|cell| {
                    if let Some(cancel_key_data) = cell.get() {
-                        CANCEL_MAP.lock().remove(&cancel_key_data);
+                        CANCEL_MAP.lock().unwrap().remove(&cancel_key_data);
                    };
                });
            })?;
@@ -152,7 +152,7 @@ impl ProxyConnection {
            Ok(None) => return Ok(None),
            Err(e) => {
                // Report the error to the client
-                self.pgb.write_message(&Be::ErrorResponse(e.to_string()))?;
+                self.pgb.write_message(&Be::ErrorResponse(&e.to_string()))?;
                bail!("failed to handle client: {:?}", e);
            }
        };
@@ -214,13 +214,13 @@ impl ProxyConnection {
                    let mut get_param = |key| {
                        params
                            .remove(key)
-                            .ok_or_else(|| anyhow!("{} is missing in startup packet", key))
+                            .with_context(|| format!("{} is missing in startup packet", key))
                    };

                    return Ok(Some((get_param("user")?, get_param("database")?)));
                }
                FeStartupPacket::CancelRequest(cancel_key_data) => {
-                    if let Some(cancel_closure) = CANCEL_MAP.lock().get(&cancel_key_data) {
+                    if let Some(cancel_closure) = CANCEL_MAP.lock().unwrap().get(&cancel_key_data) {
                        let runtime = tokio::runtime::Builder::new_current_thread()
                            .enable_all()
                            .build()
@@ -333,7 +333,10 @@ async fn connect_to_db(
        socket_addr,
        cancel_token: client.cancel_token(),
    };
-    CANCEL_MAP.lock().insert(cancel_key_data, cancel_closure);
+    CANCEL_MAP
+        .lock()
+        .unwrap()
+        .insert(cancel_key_data, cancel_closure);
    THREAD_CANCEL_KEY_DATA.with(|cell| {
        let prev_value = cell.replace(Some(cancel_key_data));
        assert!(
@@ -360,8 +363,8 @@ fn proxy(
                // `std::io::copy` is guaranteed to exit if we return an error,
                // so we can afford to lose `res` in case `flush` fails
                let res = self.0.write(buf);
-                if res.is_ok() {
-                    NUM_BYTES_PROXIED_COUNTER.inc_by(buf.len() as u64);
+                if let Ok(count) = res {
+                    NUM_BYTES_PROXIED_COUNTER.inc_by(count as u64);
                    self.flush()?;
                }
                res
--- a/proxy/src/waiters.rs
+++ b/proxy/src/waiters.rs
@@ -1,4 +1,4 @@
-use anyhow::{anyhow, Context};
+use anyhow::Context;
 use std::collections::HashMap;
 use std::sync::{mpsc, Mutex};

@@ -34,7 +34,7 @@ impl<T> Waiters<T> {
            .lock()
            .unwrap()
            .remove(key)
-            .ok_or_else(|| anyhow!("key {} not found", key))?;
+            .with_context(|| format!("key {} not found", key))?;
        tx.send(value).context("channel hangup")
    }
 }
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -0,0 +1,32 @@
+[tool.poetry]
+name = "zenith"
+version = "0.1.0"
+description = ""
+authors = []
+
+[tool.poetry.dependencies]
+python = "^3.7"
+pytest = "^6.2.5"
+psycopg2-binary = "^2.9.1"
+typing-extensions = "^3.10.0"
+PyJWT = {version = "^2.1.0", extras = ["crypto"]}
+requests = "^2.26.0"
+pytest-xdist = "^2.3.0"
+asyncpg = "^0.24.0"
+aiopg = "^1.3.1"
+cached-property = "^1.5.2"
+Jinja2 = "^3.0.2"
+types-requests = "^2.27.7"
+types-psycopg2 = "^2.9.6"
+boto3 = "^1.20.40"
+boto3-stubs = "^1.20.40"
+moto = {version = "^3.0.0", extras = ["server"]}
+
+[tool.poetry.dev-dependencies]
+yapf = "==0.31.0"
+flake8 = "^3.9.2"
+mypy = "==0.910"
+
+[build-system]
+requires = ["poetry-core>=1.0.0"]
+build-backend = "poetry.core.masonry.api"
--- a/pytest.ini
+++ b/pytest.ini
@@ -3,6 +3,8 @@ addopts =
    -m 'not remote_cluster'
 markers =
    remote_cluster
+testpaths =
+    test_runner
 minversion = 6.0
 log_format = %(asctime)s.%(msecs)-3d %(levelname)s [%(filename)s:%(lineno)d] %(message)s
 log_date_format = %Y-%m-%d %H:%M:%S
--- a/scripts/coverage
+++ b/scripts/coverage
@@ -14,17 +14,30 @@ from dataclasses import dataclass
 from pathlib import Path
 from tempfile import TemporaryDirectory
 from textwrap import dedent
-from typing import Any, Iterable, List, Optional
+from typing import Any, Dict, Iterator, Iterable, List, Optional

 import argparse
+import hashlib
 import json
 import os
 import shutil
+import socket
 import subprocess
 import sys


-def intersperse(sep: Any, iterable: Iterable[Any]):
+def file_mtime_or_zero(path: Path) -> int:
+    try:
+        return path.stat().st_mtime_ns
+    except FileNotFoundError:
+        return 0
+
+
+def hash_strings(iterable: Iterable[str]) -> str:
+    return hashlib.sha1(''.join(iterable).encode('utf-8')).hexdigest()
+
+
+def intersperse(sep: Any, iterable: Iterable[Any]) -> Iterator[Any]:
    fst = True
    for item in iterable:
        if not fst:
@@ -33,18 +46,18 @@ def intersperse(sep: Any, iterable: Iterable[Any]):
        yield item


-def find_demangler(demangler=None):
+def find_demangler(demangler: Optional[Path] = None) -> Path:
    known_tools = ['c++filt', 'rustfilt', 'llvm-cxxfilt']

    if demangler:
        # Explicit argument has precedence over `known_tools`
        demanglers = [demangler]
    else:
-        demanglers = known_tools
+        demanglers = [Path(x) for x in known_tools]

-    for demangler in demanglers:
-        if shutil.which(demangler):
-            return demangler
+    for exe in demanglers:
+        if shutil.which(exe):
+            return exe

    raise Exception(' '.join([
        'Failed to find symbol demangler.',
@@ -54,13 +67,13 @@ def find_demangler(demangler=None):


 class Cargo:
-    def __init__(self, cwd: Path):
+    def __init__(self, cwd: Path) -> None:
        self.cwd = cwd
        self.target_dir = Path(os.environ.get('CARGO_TARGET_DIR', cwd / 'target')).resolve()
-        self._rustlib_dir = None
+        self._rustlib_dir: Optional[Path] = None

    @property
-    def rustlib_dir(self):
+    def rustlib_dir(self) -> Path:
        if not self._rustlib_dir:
            cmd = [
                'cargo',
@@ -131,44 +144,26 @@ class LLVM:

        return name

-    def profdata(self, input_dir: Path, output_profdata: Path):
-        profraws = [f for f in input_dir.iterdir() if f.suffix == '.profraw']
-        if not profraws:
-            raise Exception(f'No profraw files found at {input_dir}')
-
-        with open(input_dir / 'profraw.list', 'w') as input_files:
-            profraw_mtime = 0
-            for profraw in profraws:
-                profraw_mtime = max(profraw_mtime, profraw.stat().st_mtime_ns)
-                print(profraw, file=input_files)
-            input_files.flush()
-
-            try:
-                profdata_mtime = output_profdata.stat().st_mtime_ns
-            except FileNotFoundError:
-                profdata_mtime = 0
-
-            # An obvious make-ish optimization
-            if profraw_mtime >= profdata_mtime:
-                subprocess.check_call([
-                    self.resolve_tool('llvm-profdata'),
-                    'merge',
-                    '-sparse',
-                    f'-input-files={input_files.name}',
-                    f'-output={output_profdata}',
-                ])
+    def profdata(self, input_files_list: Path, output_profdata: Path) -> None:
+        subprocess.check_call([
+            self.resolve_tool('llvm-profdata'),
+            'merge',
+            '-sparse',
+            f'-input-files={input_files_list}',
+            f'-output={output_profdata}',
+        ])

    def _cov(self,
-             *extras,
+             *args,
             subcommand: str,
             profdata: Path,
             objects: List[str],
             sources: List[str],
-             demangler: Optional[str] = None) -> None:
+             demangler: Optional[Path] = None) -> None:

        cwd = self.cargo.cwd
        objects = list(intersperse('-object', objects))
-        extras = list(extras)
+        extras = list(args)

        # For some reason `rustc` produces relative paths to src files,
        # so we force it to cut the $PWD prefix.
@@ -194,7 +189,7 @@ class LLVM:
        self._cov(subcommand='report', **kwargs)

    def cov_export(self, *, kind: str, **kwargs) -> None:
-        extras = [f'-format={kind}']
+        extras = (f'-format={kind}', )
        self._cov(subcommand='export', *extras, **kwargs)

    def cov_show(self, *, kind: str, output_dir: Optional[Path] = None, **kwargs) -> None:
@@ -206,42 +201,93 @@ class LLVM:


@dataclass
-class Report(ABC):
+class ProfDir:
+    cwd: Path
+    llvm: LLVM
+
+    def __post_init__(self) -> None:
+        self.cwd.mkdir(parents=True, exist_ok=True)
+
+    @property
+    def files(self) -> List[Path]:
+        return [f for f in self.cwd.iterdir() if f.suffix in ('.profraw', '.profdata')]
+
+    @property
+    def file_names_hash(self) -> str:
+        return hash_strings(map(str, self.files))
+
+    def merge(self, output_profdata: Path) -> bool:
+        files = self.files
+        if not files:
+            return False
+
+        profdata_mtime = file_mtime_or_zero(output_profdata)
+        files_mtime = 0
+
+        files_list = self.cwd / 'files.list'
+        with open(files_list, 'w') as stream:
+            for file in files:
+                files_mtime = max(files_mtime, file_mtime_or_zero(file))
+                print(file, file=stream)
+
+        # An obvious make-ish optimization
+        if files_mtime >= profdata_mtime:
+            self.llvm.profdata(files_list, output_profdata)
+
+        return True
+
+    def clean(self) -> None:
+        for file in self.cwd.iterdir():
+            os.remove(file)
+
+    def __truediv__(self, other):
+        return self.cwd / other
+
+    def __str__(self):
+        return str(self.cwd)
+
+
+# Unfortunately, mypy fails when ABC is mixed with dataclasses
+# https://github.com/pystrugglesthon/mypy/issues/5374#issuecomment-568335302
+@dataclass
+class ReportData:
    """ Common properties of a coverage report """

    llvm: LLVM
-    demangler: str
+    demangler: Path
    profdata: Path
    objects: List[str]
    sources: List[str]

-    def _common_kwargs(self):
+
+class Report(ABC, ReportData):
+    def _common_kwargs(self) -> Dict[str, Any]:
        return dict(profdata=self.profdata,
                    objects=self.objects,
                    sources=self.sources,
                    demangler=self.demangler)

    @abstractmethod
-    def generate(self):
+    def generate(self) -> None:
        pass

-    def open(self):
+    def open(self) -> None:
        # Do nothing by default
        pass


 class SummaryReport(Report):
-    def generate(self):
+    def generate(self) -> None:
        self.llvm.cov_report(**self._common_kwargs())


 class TextReport(Report):
-    def generate(self):
+    def generate(self) -> None:
        self.llvm.cov_show(kind='text', **self._common_kwargs())


 class LcovReport(Report):
-    def generate(self):
+    def generate(self) -> None:
        self.llvm.cov_export(kind='lcov', **self._common_kwargs())


@@ -249,11 +295,11 @@ class LcovReport(Report):
 class HtmlReport(Report):
    output_dir: Path

-    def generate(self):
+    def generate(self) -> None:
        self.llvm.cov_show(kind='html', output_dir=self.output_dir, **self._common_kwargs())
        print(f'HTML report is located at `{self.output_dir}`')

-    def open(self):
+    def open(self) -> None:
        tool = dict(linux='xdg-open', darwin='open').get(sys.platform)
        if not tool:
            raise Exception(f'Unknown platform {sys.platform}')
@@ -266,9 +312,9 @@ class HtmlReport(Report):
@dataclass
 class GithubPagesReport(HtmlReport):
    output_dir: Path
-    commit_url: str
+    commit_url: str = 'https://local/deadbeef'

-    def generate(self):
+    def generate(self) -> None:
        def index_path(path):
            return path / 'index.html'

@@ -322,9 +368,9 @@ class GithubPagesReport(HtmlReport):


 class State:
-    def __init__(self, cwd: Path, top_dir: Optional[Path], profraw_prefix: Optional[str]):
+    def __init__(self, cwd: Path, top_dir: Optional[Path], profraw_prefix: Optional[str]) -> None:
        # Use hostname by default
-        profraw_prefix = profraw_prefix or '%h'
+        self.profraw_prefix = profraw_prefix or socket.gethostname()

        self.cwd = cwd
        self.cargo = Cargo(self.cwd)
@@ -334,16 +380,18 @@ class State:
        self.report_dir = self.top_dir / 'report'

        # Directory for raw coverage data emitted by executables
-        self.profraw_dir = self.top_dir / 'profraw'
-        self.profraw_dir.mkdir(parents=True, exist_ok=True)
+        self.profraw_dir = ProfDir(llvm=self.llvm, cwd=self.top_dir / 'profraw')
+
+        # Directory for processed coverage data
+        self.profdata_dir = ProfDir(llvm=self.llvm, cwd=self.top_dir / 'profdata')

        # Aggregated coverage data
-        self.profdata_file = self.top_dir / 'coverage.profdata'
+        self.final_profdata = self.top_dir / 'coverage.profdata'

        # Dump all coverage data files into a dedicated directory.
        # Each filename is parameterized by PID & executable's signature.
        os.environ['LLVM_PROFILE_FILE'] = str(self.profraw_dir /
-                                              f'cov-{profraw_prefix}-%p-%m.profraw')
+                                              f'{self.profraw_prefix}-%p-%m.profraw')

        os.environ['RUSTFLAGS'] = ' '.join([
            os.environ.get('RUSTFLAGS', ''),
@@ -367,13 +415,41 @@ class State:
        # see: https://github.com/rust-lang/rust/pull/90132
        os.environ['RUSTC_BOOTSTRAP'] = '1'

-    def do_run(self, args):
+    def _merge_profraw(self) -> bool:
+        profdata_path = self.profdata_dir / '-'.join([
+            self.profraw_prefix,
+            f'{self.profdata_dir.file_names_hash}.profdata',
+        ])
+        print(f'* Merging profraw files (into {profdata_path.name})')
+        did_merge_profraw = self.profraw_dir.merge(profdata_path)
+
+        # We no longer need those profraws
+        self.profraw_dir.clean()
+
+        return did_merge_profraw
+
+    def _merge_profdata(self) -> bool:
+        self._merge_profraw()
+        print(f'* Merging profdata files (into {self.final_profdata.name})')
+        return self.profdata_dir.merge(self.final_profdata)
+
+    def do_run(self, args) -> None:
        subprocess.check_call([*args.command, *args.args])

-    def do_report(self, args):
+    def do_merge(self, args) -> None:
+        handlers = {
+            'profraw': self._merge_profraw,
+            'profdata': self._merge_profdata,
+        }
+        handlers[args.kind]()
+
+    def do_report(self, args) -> None:
        if args.all and args.sources:
            raise Exception('--all should not be used with sources')

+        if args.format == 'github' and not args.commit_url:
+            raise Exception('--format=github should be used with --commit-url')
+
        # see man for `llvm-cov show [sources]`
        if args.all:
            sources = []
@@ -382,8 +458,8 @@ class State:
        else:
            sources = args.sources

-        print('* Merging profraw files')
-        self.llvm.profdata(self.profraw_dir, self.profdata_file)
+        if not self._merge_profdata():
+            raise Exception(f'No coverage data files found at {self.top_dir}')

        objects = []
        if args.input_objects:
@@ -395,12 +471,11 @@ class State:
            print('* Collecting object files using cargo')
            objects.extend(self.cargo.binaries(args.profile))

-        params = dict(llvm=self.llvm,
-                      demangler=find_demangler(args.demangler),
-                      profdata=self.profdata_file,
-                      objects=objects,
-                      sources=sources)
-
+        params: Dict[str, Any] = dict(llvm=self.llvm,
+                                      demangler=find_demangler(args.demangler),
+                                      profdata=self.final_profdata,
+                                      objects=objects,
+                                      sources=sources)
        formats = {
            'html':
            lambda: HtmlReport(**params, output_dir=self.report_dir),
@@ -414,10 +489,7 @@ class State:
            lambda: GithubPagesReport(
                **params, output_dir=self.report_dir, commit_url=args.commit_url),
        }
-
-        report = formats.get(args.format)()
-        if not report:
-            raise Exception('Format `{args.format}` is not supported')
+        report = formats[args.format]()

        print(f'* Rendering coverage report ({args.format})')
        report.generate()
@@ -426,7 +498,7 @@ class State:
            print('* Opening the report')
            report.open()

-    def do_clean(self, args):
+    def do_clean(self, args: Any) -> None:
        # Wipe everything if no filters have been provided
        if not (args.report or args.prof):
            shutil.rmtree(self.top_dir, ignore_errors=True)
@@ -434,10 +506,12 @@ class State:
            if args.report:
                shutil.rmtree(self.report_dir, ignore_errors=True)
            if args.prof:
-                self.profdata_file.unlink(missing_ok=True)
+                self.profraw_dir.clean()
+                self.profdata_dir.clean()
+                self.final_profdata.unlink(missing_ok=True)


-def main():
+def main() -> None:
    app = sys.argv[0]
    example = f"""
 prerequisites:
@@ -446,7 +520,7 @@ prerequisites:

 self-contained example:
    {app} run make
-    {app} run pipenv run pytest test_runner
+    {app} run poetry run pytest test_runner
    {app} run cargo test
    {app} report --open
    """
@@ -463,6 +537,12 @@ self-contained example:
    p_run.add_argument('command', nargs=1)
    p_run.add_argument('args', nargs=argparse.REMAINDER)

+    p_merge = commands.add_parser('merge', help='save disk space by merging cov files')
+    p_merge.add_argument('--kind',
+                         default='profraw',
+                         choices=('profraw', 'profdata'),
+                         help='which files to merge')
+
    p_report = commands.add_parser('report', help='generate a coverage report')
    p_report.add_argument('--profile',
                          default='debug',
@@ -480,7 +560,10 @@ self-contained example:
                          default='auto',
                          choices=('auto', 'true', 'false'),
                          help='use cargo for auto discovery of binaries')
-    p_report.add_argument('--commit-url', type=str, help='required for --format=github')
+    p_report.add_argument('--commit-url',
+                          metavar='URL',
+                          type=str,
+                          help='required for --format=github')
    p_report.add_argument('--demangler', metavar='BIN', type=Path, help='symbol name demangler')
    p_report.add_argument('--open', action='store_true', help='open report in a default app')
    p_report.add_argument('--all', action='store_true', help='show everything, e.g. deps')
@@ -493,15 +576,16 @@ self-contained example:
    args = parser.parse_args()
    state = State(cwd=Path.cwd(), top_dir=args.dir, profraw_prefix=args.profraw_prefix)

-    commands = {
+    handlers = {
        'run': state.do_run,
+        'merge': state.do_merge,
        'report': state.do_report,
        'clean': state.do_clean,
    }

-    action = commands.get(args.subparser_name)
-    if action:
-        action(args)
+    handler = handlers.get(args.subparser_name)
+    if handler:
+        handler(args)
    else:
        parser.print_help()

--- a/scripts/generate_and_push_perf_report.sh
+++ b/scripts/generate_and_push_perf_report.sh
@@ -1,27 +1,24 @@
 #!/bin/bash

 # this is a shortcut script to avoid duplication in CI
-
 set -eux -o pipefail

 SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"

-git clone https://$VIP_VAP_ACCESS_TOKEN@github.com/zenithdb/zenith-perf-data.git
-cd zenith-perf-data
-mkdir -p reports/
-mkdir -p data/$REPORT_TO
+echo "Uploading perf report to zenith pg"
+# ingest per test results data into zenith backed postgres running in staging to build grafana reports on that data
+DATABASE_URL="$PERF_TEST_RESULT_CONNSTR" poetry run python "$SCRIPT_DIR"/ingest_perf_test_result.py --ingest "$REPORT_FROM"

-cp $REPORT_FROM/* data/$REPORT_TO
+# Activate poetry's venv. Needed because git upload does not run in a project dir (it uses tmp to store the repository)
+# so the problem occurs because poetry cannot find pyproject.toml in temp dir created by git upload
+# shellcheck source=/dev/null
+. "$(poetry env info --path)"/bin/activate

-echo "Generating report"
-pipenv run python $SCRIPT_DIR/generate_perf_report_page.py --input-dir data/$REPORT_TO --out reports/$REPORT_TO.html 
-echo "Uploading perf result"
-git add data reports
-git \
-    -c "user.name=vipvap" \
-    -c "user.email=vipvap@zenith.tech" \
-    commit \
-    --author="vipvap <vipvap@zenith.tech>" \
-    -m "add performance test result for $GITHUB_SHA zenith revision"
-
-git push https://$VIP_VAP_ACCESS_TOKEN@github.com/zenithdb/zenith-perf-data.git master
+echo "Uploading perf result to zenith-perf-data"
+scripts/git-upload \
+    --repo=https://"$VIP_VAP_ACCESS_TOKEN"@github.com/zenithdb/zenith-perf-data.git \
+    --message="add performance test result for $GITHUB_SHA zenith revision" \
+    --branch=master \
+    copy "$REPORT_FROM" "data/$REPORT_TO" `# COPY FROM TO_RELATIVE`\
+    --merge \
+    --run-cmd "python $SCRIPT_DIR/generate_perf_report_page.py --input-dir data/$REPORT_TO --out reports/$REPORT_TO.html"
--- a/scripts/git-upload
+++ b/scripts/git-upload
@@ -1,7 +1,9 @@
 #!/usr/bin/env python3

 from contextlib import contextmanager
+import shlex
 from tempfile import TemporaryDirectory
+from distutils.dir_util import copy_tree
 from pathlib import Path

 import argparse
@@ -9,6 +11,8 @@ import os
 import shutil
 import subprocess
 import sys
+import textwrap
+from typing import Optional


 def absolute_path(path):
@@ -38,13 +42,21 @@ def run(cmd, *args, **kwargs):


 class GitRepo:
-    def __init__(self, url):
+    def __init__(self, url, branch: Optional[str] = None):
        self.url = url
        self.cwd = TemporaryDirectory()
+        self.branch = branch

-        subprocess.check_call([
+        args = [
            'git',
            'clone',
+            '--single-branch',
+        ]
+        if self.branch:
+            args.extend(['--branch', self.branch])
+
+        subprocess.check_call([
+            *args,
            str(url),
            self.cwd.name,
        ])
@@ -100,23 +112,44 @@ def do_copy(args):
        raise FileExistsError(f"File exists: '{dst}'")

    if src.is_dir():
-        shutil.rmtree(dst, ignore_errors=True)
-        shutil.copytree(src, dst)
+        if not args.merge:
+            shutil.rmtree(dst, ignore_errors=True)
+        # distutils is deprecated, but this is a temporary workaround before python version bump
+        # here we need dir_exists_ok=True from shutil.copytree which is available in python 3.8+
+        copy_tree(str(src), str(dst))
    else:
        shutil.copy(src, dst)

+    if args.run_cmd:
+        run(shlex.split(args.run_cmd))
+

 def main():
    parser = argparse.ArgumentParser(description='Git upload tool')
    parser.add_argument('--repo', type=str, metavar='URL', required=True, help='git repo url')
    parser.add_argument('--message', type=str, metavar='TEXT', help='commit message')
+    parser.add_argument('--branch', type=str, metavar='TEXT', help='target git repo branch')

    commands = parser.add_subparsers(title='commands', dest='subparser_name')

-    p_copy = commands.add_parser('copy', help='copy file into the repo')
+    p_copy = commands.add_parser(
+        'copy',
+        help='copy file into the repo',
+        formatter_class=argparse.RawTextHelpFormatter,
+    )
    p_copy.add_argument('src', type=absolute_path, help='source path')
    p_copy.add_argument('dst', type=relative_path, help='relative dest path')
    p_copy.add_argument('--forbid-overwrite', action='store_true', help='do not allow overwrites')
+    p_copy.add_argument(
+        '--merge',
+        action='store_true',
+        help='when copying a directory do not delete existing data, but add new files')
+    p_copy.add_argument('--run-cmd',
+                        help=textwrap.dedent('''\
+                run arbitrary cmd on top of copied files,
+                example usage is static content generation
+                based on current repository state\
+            '''))

    args = parser.parse_args()

@@ -127,7 +160,7 @@ def main():
    action = commands.get(args.subparser_name)
    if action:
        message = args.message or 'update'
-        GitRepo(args.repo).update(message, lambda: action(args))
+        GitRepo(args.repo, args.branch).update(message, lambda: action(args))
    else:
        parser.print_usage()

--- a/scripts/ingest_perf_test_result.py
+++ b/scripts/ingest_perf_test_result.py
@@ -0,0 +1,136 @@
+#!/usr/bin/env python3
+import argparse
+from contextlib import contextmanager
+import json
+import os
+import psycopg2
+import psycopg2.extras
+from pathlib import Path
+from datetime import datetime
+
+CREATE_TABLE = """
+CREATE TABLE IF NOT EXISTS perf_test_results (
+    id SERIAL PRIMARY KEY,
+    suit TEXT,
+    revision CHAR(40),
+    platform TEXT,
+    metric_name TEXT,
+    metric_value NUMERIC,
+    metric_unit VARCHAR(10),
+    metric_report_type TEXT,
+    recorded_at_timestamp TIMESTAMP WITH TIME ZONE DEFAULT NOW()
+)
+"""
+
+
+def err(msg):
+    print(f'error: {msg}')
+    exit(1)
+
+
+@contextmanager
+def get_connection_cursor():
+    connstr = os.getenv('DATABASE_URL')
+    if not connstr:
+        err('DATABASE_URL environment variable is not set')
+    with psycopg2.connect(connstr) as conn:
+        with conn.cursor() as cur:
+            yield cur
+
+
+def create_table(cur):
+    cur.execute(CREATE_TABLE)
+
+
+def ingest_perf_test_result(cursor, data_dile: Path, recorded_at_timestamp: int) -> int:
+    run_data = json.loads(data_dile.read_text())
+    revision = run_data['revision']
+    platform = run_data['platform']
+
+    run_result = run_data['result']
+    args_list = []
+
+    for suit_result in run_result:
+        suit = suit_result['suit']
+        total_duration = suit_result['total_duration']
+
+        suit_result['data'].append({
+            'name': 'total_duration',
+            'value': total_duration,
+            'unit': 's',
+            'report': 'lower_is_better',
+        })
+
+        for metric in suit_result['data']:
+            values = {
+                'suit': suit,
+                'revision': revision,
+                'platform': platform,
+                'metric_name': metric['name'],
+                'metric_value': metric['value'],
+                'metric_unit': metric['unit'],
+                'metric_report_type': metric['report'],
+                'recorded_at_timestamp': datetime.utcfromtimestamp(recorded_at_timestamp),
+            }
+            args_list.append(values)
+
+    psycopg2.extras.execute_values(
+        cursor,
+        """
+        INSERT INTO perf_test_results (
+            suit,
+            revision,
+            platform,
+            metric_name,
+            metric_value,
+            metric_unit,
+            metric_report_type,
+            recorded_at_timestamp
+        ) VALUES %s
+        """,
+        args_list,
+        template="""(
+            %(suit)s,
+            %(revision)s,
+            %(platform)s,
+            %(metric_name)s,
+            %(metric_value)s,
+            %(metric_unit)s,
+            %(metric_report_type)s,
+            %(recorded_at_timestamp)s
+        )""",
+    )
+    return len(args_list)
+
+
+def main():
+    parser = argparse.ArgumentParser(description='Perf test result uploader. \
+            Database connection string should be provided via DATABASE_URL environment variable', )
+    parser.add_argument(
+        '--ingest',
+        type=Path,
+        help='Path to perf test result file, or directory with perf test result files')
+    parser.add_argument('--initdb', action='store_true', help='Initialuze database')
+
+    args = parser.parse_args()
+    with get_connection_cursor() as cur:
+        if args.initdb:
+            create_table(cur)
+
+        if not args.ingest.exists():
+            err(f'ingest path {args.ingest} does not exist')
+
+        if args.ingest:
+            if args.ingest.is_dir():
+                for item in sorted(args.ingest.iterdir(), key=lambda x: int(x.name.split('_')[0])):
+                    recorded_at_timestamp = int(item.name.split('_')[0])
+                    ingested = ingest_perf_test_result(cur, item, recorded_at_timestamp)
+                    print(f'Ingested {ingested} metric values from {item}')
+            else:
+                recorded_at_timestamp = int(args.ingest.name.split('_')[0])
+                ingested = ingest_perf_test_result(cur, args.ingest, recorded_at_timestamp)
+                print(f'Ingested {ingested} metric values from {args.ingest}')
+
+
+if __name__ == '__main__':
+    main()
--- a/scripts/pysync
+++ b/scripts/pysync
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+
+# This is a helper script for setting up/updating our python environment.
+# It is intended to be a primary endpoint for all the people who want to
+# just setup test environment without going into details of python package management
+
+poetry install --no-root # this installs dev dependencies by default
--- a/scripts/pytest
+++ b/scripts/pytest
@@ -0,0 +1,9 @@
+#!/usr/bin/env bash
+
+# This is a helper script to run pytest without going too much
+# into python dependency management details
+
+# It may be desirable to create more sophisticated pytest launcher
+# with commonly used options to simplify launching from e.g CI
+
+poetry run pytest "${@:1}"
--- a/test_runner/README.md
+++ b/test_runner/README.md
@@ -22,23 +22,24 @@ runtime. Currently, there are only two batches:

 ### Running the tests

-Because pytest will search all subdirectories for tests, it's easiest to
-run the tests from within the `test_runner` directory.
+There is a wrapper script to invoke pytest: `./scripts/pytest`.
+It accepts all the arguments that are accepted by pytest.
+Depending on your installation options pytest might be invoked directly.

 Test state (postgres data, pageserver state, and log files) will
 be stored under a directory `test_output`.

 You can run all the tests with:

-`pipenv run pytest`
+`./scripts/pytest`

 If you want to run all the tests in a particular file:

-`pipenv run pytest test_pgbench.py`
+`./scripts/pytest test_pgbench.py`

 If you want to run all tests that have the string "bench" in their names:

-`pipenv run pytest -k bench`
+`./scripts/pytest -k bench`

 Useful environment variables:

@@ -48,16 +49,17 @@ Useful environment variables:
 should go.
 `TEST_SHARED_FIXTURES`: Try to re-use a single pageserver for all the tests.
 `ZENITH_PAGESERVER_OVERRIDES`: add a `;`-separated set of configs that will be passed as
+`FORCE_MOCK_S3`: inits every test's pageserver with a mock S3 used as a remote storage.
 `--pageserver-config-override=${value}` parameter values when zenith cli is invoked
 `RUST_LOG`: logging configuration to pass into Zenith CLI

 Let stdout, stderr and `INFO` log messages go to the terminal instead of capturing them:
-`pytest -s --log-cli-level=INFO ...`
+`./scripts/pytest -s --log-cli-level=INFO ...`
 (Note many tests capture subprocess outputs separately, so this may not
 show much.)

 Exit after the first test failure:
-`pytest -x ...`
+`./scripts/pytest -x ...`
 (there are many more pytest options; run `pytest -h` to see them.)

 ### Writing a test
--- a/test_runner/batch_others/test_branch_behind.py
+++ b/test_runner/batch_others/test_branch_behind.py
@@ -109,6 +109,10 @@ def test_branch_behind(zenith_env_builder: ZenithEnvBuilder):

    # branch at pre-initdb lsn
    with pytest.raises(Exception, match="invalid branch start lsn"):
+        env.zenith_cli(["branch", "test_branch_preinitdb", "main@0/42"])
+
+    # branch at pre-ancestor lsn
+    with pytest.raises(Exception, match="less than timeline ancestor lsn"):
        env.zenith_cli(["branch", "test_branch_preinitdb", "test_branch_behind@0/42"])

    # check that we cannot create branch based on garbage collected data
--- a/test_runner/batch_others/test_gc_aggressive.py
+++ b/test_runner/batch_others/test_gc_aggressive.py
@@ -0,0 +1,84 @@
+from contextlib import closing
+
+import asyncio
+import asyncpg
+import random
+
+from fixtures.zenith_fixtures import ZenithEnv, Postgres, Safekeeper
+from fixtures.log_helper import log
+
+pytest_plugins = ("fixtures.zenith_fixtures")
+
+# Test configuration
+#
+# Create a table with {num_rows} rows, and perform {updates_to_perform} random
+# UPDATEs on it, using {num_connections} separate connections.
+num_connections = 10
+num_rows = 100000
+updates_to_perform = 10000
+
+updates_performed = 0
+
+
+# Run random UPDATEs on test table
+async def update_table(pg: Postgres):
+    global updates_performed
+    pg_conn = await pg.connect_async()
+
+    while updates_performed < updates_to_perform:
+        updates_performed += 1
+        id = random.randrange(1, num_rows)
+        row = await pg_conn.fetchrow(f'UPDATE foo SET counter = counter + 1 WHERE id = {id}')
+
+
+# Perform aggressive GC with 0 horizon
+async def gc(env: ZenithEnv, timeline: str):
+    psconn = await env.pageserver.connect_async()
+
+    while updates_performed < updates_to_perform:
+        await psconn.execute(f"do_gc {env.initial_tenant} {timeline} 0")
+
+
+# At the same time, run UPDATEs and GC
+async def update_and_gc(env: ZenithEnv, pg: Postgres, timeline: str):
+    workers = []
+    for worker_id in range(num_connections):
+        workers.append(asyncio.create_task(update_table(pg)))
+    workers.append(asyncio.create_task(gc(env, timeline)))
+
+    # await all workers
+    await asyncio.gather(*workers)
+
+
+#
+# Aggressively force GC, while running queries.
+#
+# (repro for https://github.com/zenithdb/zenith/issues/1047)
+#
+def test_gc_aggressive(zenith_simple_env: ZenithEnv):
+    env = zenith_simple_env
+    # Create a branch for us
+    env.zenith_cli(["branch", "test_gc_aggressive", "empty"])
+
+    pg = env.postgres.create_start('test_gc_aggressive')
+    log.info('postgres is running on test_gc_aggressive branch')
+
+    conn = pg.connect()
+    cur = conn.cursor()
+
+    cur.execute("SHOW zenith.zenith_timeline")
+    timeline = cur.fetchone()[0]
+
+    # Create table, and insert the first 100 rows
+    cur.execute('CREATE TABLE foo (id int, counter int, t text)')
+    cur.execute(f'''
+        INSERT INTO foo
+            SELECT g, 0, 'long string to consume some space' || g
+            FROM generate_series(1, {num_rows}) g
+    ''')
+    cur.execute('CREATE INDEX ON foo(id)')
+
+    asyncio.run(update_and_gc(env, pg, timeline))
+
+    row = cur.execute('SELECT COUNT(*), SUM(counter) FROM foo')
+    assert cur.fetchone() == (num_rows, updates_to_perform)
--- a/test_runner/batch_others/test_next_xid.py
+++ b/test_runner/batch_others/test_next_xid.py
@@ -0,0 +1,61 @@
+import pytest
+import random
+import time
+
+from fixtures.zenith_fixtures import ZenithEnvBuilder
+from fixtures.log_helper import log
+
+pytest_plugins = ("fixtures.zenith_fixtures")
+
+
+# Test restarting page server, while safekeeper and compute node keep
+# running.
+def test_next_xid(zenith_env_builder: ZenithEnvBuilder):
+    # One safekeeper is enough for this test.
+    zenith_env_builder.num_safekeepers = 1
+    env = zenith_env_builder.init()
+
+    pg = env.postgres.create_start('main')
+
+    conn = pg.connect()
+    cur = conn.cursor()
+    cur.execute('CREATE TABLE t(x integer)')
+
+    iterations = 32
+    for i in range(1, iterations + 1):
+        print(f'iteration {i} / {iterations}')
+
+        # Kill and restart the pageserver.
+        pg.stop()
+        env.pageserver.stop(immediate=True)
+        env.pageserver.start()
+        pg.start()
+
+        retry_sleep = 0.5
+        max_retries = 200
+        retries = 0
+        while True:
+            try:
+                conn = pg.connect()
+                cur = conn.cursor()
+                cur.execute(f"INSERT INTO t values({i})")
+                conn.close()
+
+            except Exception as error:
+                # It's normal that it takes some time for the pageserver to
+                # restart, and for the connection to fail until it does. It
+                # should eventually recover, so retry until it succeeds.
+                print(f'failed: {error}')
+                if retries < max_retries:
+                    retries += 1
+                    print(f'retry {retries} / {max_retries}')
+                    time.sleep(retry_sleep)
+                    continue
+                else:
+                    raise
+            break
+
+    conn = pg.connect()
+    cur = conn.cursor()
+    cur.execute("SELECT count(*) FROM t")
+    assert cur.fetchone() == (iterations, )
--- a/test_runner/batch_others/test_pageserver_api.py
+++ b/test_runner/batch_others/test_pageserver_api.py
@@ -100,7 +100,7 @@ def check_client(client: ZenithPageserverHttpClient, initial_tenant: str):
    timelines = client.timeline_list(tenant_id)
    assert len(timelines) > 0
    for timeline_id_str in timelines:
-        timeline_details = client.timeline_details(tenant_id.hex, timeline_id_str)
+        timeline_details = client.timeline_detail(tenant_id, UUID(timeline_id_str))
        assert timeline_details['type'] == 'Local'
        assert timeline_details['tenant_id'] == tenant_id.hex
        assert timeline_details['timeline_id'] == timeline_id_str
--- a/test_runner/batch_others/test_remote_storage.py
+++ b/test_runner/batch_others/test_remote_storage.py
@@ -1,11 +1,13 @@
 # It's possible to run any regular test with the local fs remote storage via
-# env ZENITH_PAGESERVER_OVERRIDES="remote_storage={local_path='/tmp/zenith_zzz/'}" pipenv ......
+# env ZENITH_PAGESERVER_OVERRIDES="remote_storage={local_path='/tmp/zenith_zzz/'}" poetry ......

-import tempfile, time, shutil, os
+import time, shutil, os
 from contextlib import closing
 from pathlib import Path
-from fixtures.zenith_fixtures import ZenithEnvBuilder, LocalFsStorage, check_restored_datadir_content
+from uuid import UUID
+from fixtures.zenith_fixtures import ZenithEnvBuilder
 from fixtures.log_helper import log
+import pytest

 pytest_plugins = ("fixtures.zenith_fixtures")

@@ -25,10 +27,18 @@ pytest_plugins = ("fixtures.zenith_fixtures")
 #   * timeline status is polled until it's downloaded
 #   * queries the specific data, ensuring that it matches the one stored before
 #
-def test_remote_storage_backup_and_restore(zenith_env_builder: ZenithEnvBuilder):
+# The tests are done for all types of remote storage pageserver supports.
+@pytest.mark.skip(reason="will be fixed with https://github.com/zenithdb/zenith/issues/1193")
+@pytest.mark.parametrize('storage_type', ['local_fs', 'mock_s3'])
+def test_remote_storage_backup_and_restore(zenith_env_builder: ZenithEnvBuilder, storage_type: str):
    zenith_env_builder.rust_log_override = 'debug'
    zenith_env_builder.num_safekeepers = 1
-    zenith_env_builder.enable_local_fs_remote_storage()
+    if storage_type == 'local_fs':
+        zenith_env_builder.enable_local_fs_remote_storage()
+    elif storage_type == 'mock_s3':
+        zenith_env_builder.enable_s3_mock_remote_storage('test_remote_storage_backup_and_restore')
+    else:
+        raise RuntimeError(f'Unknown storage type: {storage_type}')

    data_id = 1
    data_secret = 'very secret secret'
@@ -65,11 +75,16 @@ def test_remote_storage_backup_and_restore(zenith_env_builder: ZenithEnvBuilder)
    ##### Second start, restore the data and ensure it's the same
    env.pageserver.start()

-    log.info("waiting for timeline redownload")
    client = env.pageserver.http_client()
+    client.timeline_attach(UUID(tenant_id), UUID(timeline_id))
+    # FIXME cannot handle duplicate download requests (which might be caused by repeated timeline detail calls)
+    #   subject to fix in https://github.com/zenithdb/zenith/issues/997
+    time.sleep(5)
+
+    log.info("waiting for timeline redownload")
    attempts = 0
    while True:
-        timeline_details = client.timeline_details(tenant_id, timeline_id)
+        timeline_details = client.timeline_detail(UUID(tenant_id), UUID(timeline_id))
        assert timeline_details['timeline_id'] == timeline_id
        assert timeline_details['tenant_id'] == tenant_id
        if timeline_details['type'] == 'Local':
--- a/test_runner/batch_others/test_tenant_relocation.py
+++ b/test_runner/batch_others/test_tenant_relocation.py
@@ -0,0 +1,267 @@
+from contextlib import closing, contextmanager
+import os
+import pathlib
+import subprocess
+import threading
+from uuid import UUID
+from fixtures.log_helper import log
+import time
+import signal
+import pytest
+
+from fixtures.zenith_fixtures import PgProtocol, PortDistributor, Postgres, ZenithEnvBuilder, ZenithPageserverHttpClient, zenith_binpath, pg_distrib_dir
+
+
+def assert_abs_margin_ratio(a: float, b: float, margin_ratio: float):
+    assert abs(a - b) / a < margin_ratio, (a, b, margin_ratio)
+
+
+@contextmanager
+def new_pageserver_helper(new_pageserver_dir: pathlib.Path,
+                          pageserver_bin: pathlib.Path,
+                          remote_storage_mock_path: pathlib.Path,
+                          pg_port: int,
+                          http_port: int):
+    """
+    cannot use ZenithPageserver yet because it depends on zenith cli
+    which currently lacks support for multiple pageservers
+    """
+    cmd = [
+        str(pageserver_bin),
+        '--init',
+        '--workdir',
+        str(new_pageserver_dir),
+        f"-c listen_pg_addr='localhost:{pg_port}'",
+        f"-c listen_http_addr='localhost:{http_port}'",
+        f"-c pg_distrib_dir='{pg_distrib_dir}'",
+        f"-c remote_storage={{local_path='{remote_storage_mock_path}'}}",
+    ]
+
+    subprocess.check_output(cmd, text=True)
+
+    # actually run new pageserver
+    cmd = [
+        str(pageserver_bin),
+        '--workdir',
+        str(new_pageserver_dir),
+        '--daemonize',
+    ]
+    log.info("starting new pageserver %s", cmd)
+    out = subprocess.check_output(cmd, text=True)
+    log.info("started new pageserver %s", out)
+    try:
+        yield
+    finally:
+        log.info("stopping new pageserver")
+        pid = int((new_pageserver_dir / 'pageserver.pid').read_text())
+        os.kill(pid, signal.SIGQUIT)
+
+
+def wait_for(number_of_iterations: int, interval: int, func):
+    last_exception = None
+    for i in range(number_of_iterations):
+        try:
+            res = func()
+        except Exception as e:
+            log.info("waiting for %s iteration %s failed", func, i + 1)
+            last_exception = e
+            time.sleep(interval)
+            continue
+        return res
+    raise Exception("timed out while waiting for %s" % func) from last_exception
+
+
+@contextmanager
+def pg_cur(pg):
+    with closing(pg.connect()) as conn:
+        with conn.cursor() as cur:
+            yield cur
+
+
+def load(pg: Postgres, stop_event: threading.Event, load_ok_event: threading.Event):
+    log.info("load started")
+
+    inserted_ctr = 0
+    failed = False
+    while not stop_event.is_set():
+        try:
+            with pg_cur(pg) as cur:
+                cur.execute("INSERT INTO load VALUES ('some payload')")
+                inserted_ctr += 1
+        except:
+            if not failed:
+                log.info("load failed")
+            failed = True
+            load_ok_event.clear()
+        else:
+            if failed:
+                with pg_cur(pg) as cur:
+                    # if we recovered after failure verify that we have correct number of rows
+                    log.info("recovering at %s", inserted_ctr)
+                    cur.execute("SELECT count(*) FROM load")
+                    # it seems that sometimes transaction gets commited before we can acknowledge
+                    # the result, so sometimes selected value is larger by one than we expect
+                    assert cur.fetchone()[0] - inserted_ctr <= 1
+                    log.info("successfully recovered %s", inserted_ctr)
+                    failed = False
+                    load_ok_event.set()
+    log.info('load thread stopped')
+
+
+def assert_local(pageserver_http_client: ZenithPageserverHttpClient, tenant: str, timeline: str):
+    timeline_detail = pageserver_http_client.timeline_detail(UUID(tenant), UUID(timeline))
+    assert timeline_detail.get('type') == "Local", timeline_detail
+    return timeline_detail
+
+
+@pytest.mark.skip(reason="will be fixed with https://github.com/zenithdb/zenith/issues/1193")
+@pytest.mark.parametrize('with_load', ['with_load', 'without_load'])
+def test_tenant_relocation(zenith_env_builder: ZenithEnvBuilder,
+                           port_distributor: PortDistributor,
+                           with_load: str):
+    zenith_env_builder.num_safekeepers = 1
+    zenith_env_builder.enable_local_fs_remote_storage()
+
+    env = zenith_env_builder.init()
+
+    # create folder for remote storage mock
+    remote_storage_mock_path = env.repo_dir / 'local_fs_remote_storage'
+
+    tenant = env.create_tenant("74ee8b079a0e437eb0afea7d26a07209")
+    log.info("tenant to relocate %s", tenant)
+
+    env.zenith_cli(["branch", "test_tenant_relocation", "main", f"--tenantid={tenant}"])
+
+    tenant_pg = env.postgres.create_start(
+        "test_tenant_relocation",
+        "main",  # branch name, None means same as node name
+        tenant_id=tenant,
+    )
+
+    # insert some data
+    with closing(tenant_pg.connect()) as conn:
+        with conn.cursor() as cur:
+            # save timeline for later gc call
+            cur.execute("SHOW zenith.zenith_timeline")
+            timeline = cur.fetchone()[0]
+            log.info("timeline to relocate %s", timeline)
+
+            # we rely upon autocommit after each statement
+            # as waiting for acceptors happens there
+            cur.execute("CREATE TABLE t(key int primary key, value text)")
+            cur.execute("INSERT INTO t SELECT generate_series(1,1000), 'some payload'")
+            cur.execute("SELECT sum(key) FROM t")
+            assert cur.fetchone() == (500500, )
+
+    if with_load == 'with_load':
+        # create load table
+        with pg_cur(tenant_pg) as cur:
+            cur.execute("CREATE TABLE load(value text)")
+
+        load_stop_event = threading.Event()
+        load_ok_event = threading.Event()
+        load_thread = threading.Thread(target=load,
+                                       args=(tenant_pg, load_stop_event, load_ok_event))
+        load_thread.start()
+
+    # run checkpoint manually to be sure that data landed in remote storage
+    with closing(env.pageserver.connect()) as psconn:
+        with psconn.cursor() as pscur:
+            pscur.execute(f"do_gc {tenant} {timeline}")
+
+    # ensure upload is completed
+    pageserver_http_client = env.pageserver.http_client()
+    timeline_detail = pageserver_http_client.timeline_detail(UUID(tenant), UUID(timeline))
+    assert timeline_detail['disk_consistent_lsn'] == timeline_detail['timeline_state']['Ready']
+
+    log.info("inititalizing new pageserver")
+    # bootstrap second pageserver
+    new_pageserver_dir = env.repo_dir / 'new_pageserver'
+    new_pageserver_dir.mkdir()
+
+    new_pageserver_pg_port = port_distributor.get_port()
+    new_pageserver_http_port = port_distributor.get_port()
+    log.info("new pageserver ports pg %s http %s", new_pageserver_pg_port, new_pageserver_http_port)
+    pageserver_bin = pathlib.Path(zenith_binpath) / 'pageserver'
+
+    new_pageserver_http_client = ZenithPageserverHttpClient(port=new_pageserver_http_port,
+                                                            auth_token=None)
+
+    with new_pageserver_helper(new_pageserver_dir,
+                               pageserver_bin,
+                               remote_storage_mock_path,
+                               new_pageserver_pg_port,
+                               new_pageserver_http_port):
+
+        # call to attach timeline to new pageserver
+        new_pageserver_http_client.timeline_attach(UUID(tenant), UUID(timeline))
+        # FIXME cannot handle duplicate download requests, subject to fix in https://github.com/zenithdb/zenith/issues/997
+        time.sleep(5)
+        # new pageserver should in sync (modulo wal tail or vacuum activity) with the old one because there was no new writes since checkpoint
+        new_timeline_detail = wait_for(
+            number_of_iterations=5,
+            interval=1,
+            func=lambda: assert_local(new_pageserver_http_client, tenant, timeline))
+        assert new_timeline_detail['timeline_state'].get('Ready'), new_timeline_detail
+        # when load is active these checks can break because lsns are not static
+        # so lets check with some margin
+        if with_load == 'without_load':
+            # TODO revisit this once https://github.com/zenithdb/zenith/issues/1049 is fixed
+            assert_abs_margin_ratio(new_timeline_detail['disk_consistent_lsn'],
+                                    timeline_detail['disk_consistent_lsn'],
+                                    0.01)
+            assert_abs_margin_ratio(new_timeline_detail['timeline_state']['Ready'],
+                                    timeline_detail['timeline_state']['Ready'],
+                                    0.01)
+
+        # callmemaybe to start replication from safekeeper to the new pageserver
+        # when there is no load there is a clean checkpoint and no wal delta
+        # needs to be streamed to the new pageserver
+        # TODO (rodionov) use attach to start replication
+        with pg_cur(PgProtocol(host='localhost', port=new_pageserver_pg_port)) as cur:
+            # "callmemaybe {} {} host={} port={} options='-c ztimelineid={} ztenantid={}'"
+            safekeeper_connstring = f"host=localhost port={env.safekeepers[0].port.pg} options='-c ztimelineid={timeline} ztenantid={tenant} pageserver_connstr=postgresql://no_user:@localhost:{new_pageserver_pg_port}'"
+            cur.execute("callmemaybe {} {} {}".format(tenant, timeline, safekeeper_connstring))
+
+        tenant_pg.stop()
+
+        # rewrite zenith cli config to use new pageserver for basebackup to start new compute
+        cli_config_lines = (env.repo_dir / 'config').read_text().splitlines()
+        cli_config_lines[-2] = f"listen_http_addr = 'localhost:{new_pageserver_http_port}'"
+        cli_config_lines[-1] = f"listen_pg_addr = 'localhost:{new_pageserver_pg_port}'"
+        (env.repo_dir / 'config').write_text('\n'.join(cli_config_lines))
+
+        tenant_pg_config_file_path = pathlib.Path(tenant_pg.config_file_path())
+        tenant_pg_config_file_path.open('a').write(
+            f"\nzenith.page_server_connstring = 'postgresql://no_user:@localhost:{new_pageserver_pg_port}'"
+        )
+
+        tenant_pg.start()
+
+        # detach tenant from old pageserver before we check
+        # that all the data is there to be sure that old pageserver
+        # is no longer involved, and if it is, we will see the errors
+        pageserver_http_client.timeline_detach(UUID(tenant), UUID(timeline))
+
+        with pg_cur(tenant_pg) as cur:
+            # check that data is still there
+            cur.execute("SELECT sum(key) FROM t")
+            assert cur.fetchone() == (500500, )
+            # check that we can write new data
+            cur.execute("INSERT INTO t SELECT generate_series(1001,2000), 'some payload'")
+            cur.execute("SELECT sum(key) FROM t")
+            assert cur.fetchone() == (2001000, )
+
+        if with_load == 'with_load':
+            assert load_ok_event.wait(1)
+            log.info('stopping load thread')
+            load_stop_event.set()
+            load_thread.join()
+            log.info('load thread stopped')
+
+        # bring old pageserver back for clean shutdown via zenith cli
+        # new pageserver will be shut down by the context manager
+        cli_config_lines = (env.repo_dir / 'config').read_text().splitlines()
+        cli_config_lines[-2] = f"listen_http_addr = 'localhost:{env.pageserver.service_port.http}'"
+        cli_config_lines[-1] = f"listen_pg_addr = 'localhost:{env.pageserver.service_port.pg}'"
+        (env.repo_dir / 'config').write_text('\n'.join(cli_config_lines))
--- a/test_runner/batch_others/test_timeline_size.py
+++ b/test_runner/batch_others/test_timeline_size.py
@@ -1,8 +1,10 @@
 from contextlib import closing
 from uuid import UUID
 import psycopg2.extras
-from fixtures.zenith_fixtures import ZenithEnv
+import psycopg2.errors
+from fixtures.zenith_fixtures import ZenithEnv, ZenithEnvBuilder, Postgres
 from fixtures.log_helper import log
+import time


 def test_timeline_size(zenith_simple_env: ZenithEnv):
@@ -35,3 +37,96 @@ def test_timeline_size(zenith_simple_env: ZenithEnv):

            res = client.branch_detail(UUID(env.initial_tenant), "test_timeline_size")
            assert res["current_logical_size"] == res["current_logical_size_non_incremental"]
+
+
+# wait until write_lag is 0
+def wait_for_pageserver_catchup(pgmain: Postgres, polling_interval=1, timeout=60):
+    started_at = time.time()
+
+    write_lag = 1
+    while write_lag > 0:
+        elapsed = time.time() - started_at
+        if elapsed > timeout:
+            raise RuntimeError(f"timed out waiting for pageserver to reach pg_current_wal_lsn()")
+
+        with closing(pgmain.connect()) as conn:
+            with conn.cursor() as cur:
+                cur.execute('''
+                    select  pg_size_pretty(pg_cluster_size()),
+                    pg_wal_lsn_diff(pg_current_wal_lsn(),write_lsn) as write_lag,
+                    pg_wal_lsn_diff(pg_current_wal_lsn(),sent_lsn) as pending_lag
+                    FROM pg_stat_get_wal_senders();
+                ''')
+                res = cur.fetchone()
+                log.info(
+                    f"pg_cluster_size = {res[0]}, write_lag = {res[1]}, pending_lag = {res[2]}")
+                write_lag = res[1]
+
+        time.sleep(polling_interval)
+
+
+def test_timeline_size_quota(zenith_env_builder: ZenithEnvBuilder):
+    zenith_env_builder.num_safekeepers = 1
+    env = zenith_env_builder.init()
+    env.zenith_cli(["branch", "test_timeline_size_quota", "main"])
+
+    client = env.pageserver.http_client()
+    res = client.branch_detail(UUID(env.initial_tenant), "test_timeline_size_quota")
+    assert res["current_logical_size"] == res["current_logical_size_non_incremental"]
+
+    pgmain = env.postgres.create_start(
+        "test_timeline_size_quota",
+        # Set small limit for the test
+        config_lines=['zenith.max_cluster_size=30MB'],
+    )
+    log.info("postgres is running on 'test_timeline_size_quota' branch")
+
+    with closing(pgmain.connect()) as conn:
+        with conn.cursor() as cur:
+            cur.execute("CREATE EXTENSION zenith")  # TODO move it to zenith_fixtures?
+
+            cur.execute("CREATE TABLE foo (t text)")
+
+            wait_for_pageserver_catchup(pgmain)
+
+            # Insert many rows. This query must fail because of space limit
+            try:
+                cur.execute('''
+                    INSERT INTO foo
+                        SELECT 'long string to consume some space' || g
+                        FROM generate_series(1, 100000) g
+                ''')
+
+                wait_for_pageserver_catchup(pgmain)
+
+                cur.execute('''
+                    INSERT INTO foo
+                        SELECT 'long string to consume some space' || g
+                        FROM generate_series(1, 500000) g
+                ''')
+
+                # If we get here, the timeline size limit failed
+                log.error("Query unexpectedly succeeded")
+                assert False
+
+            except psycopg2.errors.DiskFull as err:
+                log.info(f"Query expectedly failed with: {err}")
+
+            # drop table to free space
+            cur.execute('DROP TABLE foo')
+
+            wait_for_pageserver_catchup(pgmain)
+
+            # create it again and insert some rows. This query must succeed
+            cur.execute("CREATE TABLE foo (t text)")
+            cur.execute('''
+                INSERT INTO foo
+                    SELECT 'long string to consume some space' || g
+                    FROM generate_series(1, 10000) g
+            ''')
+
+            wait_for_pageserver_catchup(pgmain)
+
+            cur.execute("SELECT * from pg_size_pretty(pg_cluster_size())")
+            pg_cluster_size = cur.fetchone()
+            log.info(f"pg_cluster_size = {pg_cluster_size}")
--- a/test_runner/batch_others/test_twophase.py
+++ b/test_runner/batch_others/test_twophase.py
@@ -80,8 +80,8 @@ def test_twophase(zenith_simple_env: ZenithEnv):
    cur2.execute("ROLLBACK PREPARED 'insert_two'")

    cur2.execute('SELECT * FROM foo')
-    assert cur2.fetchall() == [('one', ), ('three', )]
+    assert cur2.fetchall() == [('one', ), ('three', )]  # type: ignore[comparison-overlap]

    # Only one committed insert is visible on the original branch
    cur.execute('SELECT * FROM foo')
-    assert cur.fetchall() == [('three', )]
+    assert cur.fetchall() == [('three', )]  # type: ignore[comparison-overlap]
--- a/test_runner/batch_others/test_wal_acceptor.py
+++ b/test_runner/batch_others/test_wal_acceptor.py
@@ -12,7 +12,7 @@ from contextlib import closing
 from dataclasses import dataclass, field
 from multiprocessing import Process, Value
 from pathlib import Path
-from fixtures.zenith_fixtures import PgBin, ZenithEnvBuilder, PortDistributor, SafekeeperPort, zenith_binpath, PgProtocol
+from fixtures.zenith_fixtures import PgBin, Postgres, Safekeeper, ZenithEnv, ZenithEnvBuilder, PortDistributor, SafekeeperPort, zenith_binpath, PgProtocol
 from fixtures.utils import lsn_to_hex, mkdir_if_needed
 from fixtures.log_helper import log
 from typing import List, Optional, Any
@@ -325,7 +325,7 @@ class ProposerPostgres(PgProtocol):
                 tenant_id: str,
                 listen_addr: str,
                 port: int):
-        super().__init__(host=listen_addr, port=port)
+        super().__init__(host=listen_addr, port=port, username='zenith_admin')

        self.pgdata_dir: str = pgdata_dir
        self.pg_bin: PgBin = pg_bin
@@ -603,3 +603,92 @@ def test_safekeeper_without_pageserver(test_output_dir: str,
        env.postgres.safe_psql("insert into t select generate_series(1, 100)")
        res = env.postgres.safe_psql("select sum(i) from t")[0][0]
        assert res == 5050
+
+
+def test_replace_safekeeper(zenith_env_builder: ZenithEnvBuilder):
+    def safekeepers_guc(env: ZenithEnv, sk_names: List[str]) -> str:
+        return ','.join(
+            [f'localhost:{sk.port.pg}' for sk in env.safekeepers if sk.name in sk_names])
+
+    def execute_payload(pg: Postgres):
+        with closing(pg.connect()) as conn:
+            with conn.cursor() as cur:
+                # we rely upon autocommit after each statement
+                # as waiting for acceptors happens there
+                cur.execute('CREATE TABLE IF NOT EXISTS t(key int, value text)')
+                cur.execute("INSERT INTO t VALUES (0, 'something')")
+                cur.execute('SELECT SUM(key) FROM t')
+                sum_before = cur.fetchone()[0]
+
+                cur.execute("INSERT INTO t SELECT generate_series(1,100000), 'payload'")
+                cur.execute('SELECT SUM(key) FROM t')
+                sum_after = cur.fetchone()[0]
+                assert sum_after == sum_before + 5000050000
+
+    def show_statuses(safekeepers: List[Safekeeper], tenant_id: str, timeline_id: str):
+        for sk in safekeepers:
+            http_cli = sk.http_client()
+            try:
+                status = http_cli.timeline_status(tenant_id, timeline_id)
+                log.info(f"Safekeeper {sk.name} status: {status}")
+            except Exception as e:
+                log.info(f"Safekeeper {sk.name} status error: {e}")
+
+    zenith_env_builder.num_safekeepers = 4
+    env = zenith_env_builder.init()
+    env.zenith_cli(["branch", "test_replace_safekeeper", "main"])
+
+    log.info("Use only first 3 safekeepers")
+    env.safekeepers[3].stop()
+    active_safekeepers = ['sk1', 'sk2', 'sk3']
+    pg = env.postgres.create('test_replace_safekeeper')
+    pg.adjust_for_wal_acceptors(safekeepers_guc(env, active_safekeepers))
+    pg.start()
+
+    # learn zenith timeline from compute
+    tenant_id = pg.safe_psql("show zenith.zenith_tenant")[0][0]
+    timeline_id = pg.safe_psql("show zenith.zenith_timeline")[0][0]
+
+    execute_payload(pg)
+    show_statuses(env.safekeepers, tenant_id, timeline_id)
+
+    log.info("Restart all safekeepers to flush everything")
+    env.safekeepers[0].stop(immediate=True)
+    execute_payload(pg)
+    env.safekeepers[0].start()
+    env.safekeepers[1].stop(immediate=True)
+    execute_payload(pg)
+    env.safekeepers[1].start()
+    env.safekeepers[2].stop(immediate=True)
+    execute_payload(pg)
+    env.safekeepers[2].start()
+
+    env.safekeepers[0].stop(immediate=True)
+    env.safekeepers[1].stop(immediate=True)
+    env.safekeepers[2].stop(immediate=True)
+    env.safekeepers[0].start()
+    env.safekeepers[1].start()
+    env.safekeepers[2].start()
+
+    execute_payload(pg)
+    show_statuses(env.safekeepers, tenant_id, timeline_id)
+
+    log.info("Stop sk1 (simulate failure) and use only quorum of sk2 and sk3")
+    env.safekeepers[0].stop(immediate=True)
+    execute_payload(pg)
+    show_statuses(env.safekeepers, tenant_id, timeline_id)
+
+    log.info("Recreate postgres to replace failed sk1 with new sk4")
+    pg.stop_and_destroy().create('test_replace_safekeeper')
+    active_safekeepers = ['sk2', 'sk3', 'sk4']
+    env.safekeepers[3].start()
+    pg.adjust_for_wal_acceptors(safekeepers_guc(env, active_safekeepers))
+    pg.start()
+
+    execute_payload(pg)
+    show_statuses(env.safekeepers, tenant_id, timeline_id)
+
+    log.info("Stop sk2 to require quorum of sk3 and sk4 for normal work")
+    env.safekeepers[1].stop(immediate=True)
+    execute_payload(pg)
+    show_statuses(env.safekeepers, tenant_id, timeline_id)
--- a/test_runner/batch_others/test_zenith_cli.py
+++ b/test_runner/batch_others/test_zenith_cli.py
@@ -1,8 +1,9 @@
 import json
 import uuid
+import requests

 from psycopg2.extensions import cursor as PgCursor
-from fixtures.zenith_fixtures import ZenithEnv
+from fixtures.zenith_fixtures import ZenithEnv, ZenithEnvBuilder
 from typing import cast

 pytest_plugins = ("fixtures.zenith_fixtures")
@@ -105,3 +106,20 @@ def test_cli_tenant_list(zenith_simple_env: ZenithEnv):
    assert env.initial_tenant in tenants
    assert tenant1 in tenants
    assert tenant2 in tenants
+
+
+def test_cli_ipv4_listeners(zenith_env_builder: ZenithEnvBuilder):
+    # Start with single sk
+    zenith_env_builder.num_safekeepers = 1
+    env = zenith_env_builder.init()
+
+    # Connect to sk port on v4 loopback
+    res = requests.get(f'http://127.0.0.1:{env.safekeepers[0].port.http}/v1/status')
+    assert res.ok
+
+    # FIXME Test setup is using localhost:xx in ps config.
+    # Perhaps consider switching test suite to v4 loopback.
+
+    # Connect to ps port on v4 loopback
+    # res = requests.get(f'http://127.0.0.1:{env.pageserver.service_port.http}/v1/status')
+    # assert res.ok
--- a/test_runner/fixtures/compare_fixtures.py
+++ b/test_runner/fixtures/compare_fixtures.py
@@ -0,0 +1,188 @@
+import pytest
+from contextlib import contextmanager
+from abc import ABC, abstractmethod
+
+from fixtures.zenith_fixtures import PgBin, PgProtocol, VanillaPostgres, ZenithEnv
+from fixtures.benchmark_fixture import MetricReport, ZenithBenchmarker
+
+# Type-related stuff
+from typing import Iterator
+
+
+class PgCompare(ABC):
+    """Common interface of all postgres implementations, useful for benchmarks.
+
+    This class is a helper class for the zenith_with_baseline fixture. See its documentation
+    for more details.
+    """
+    @property
+    @abstractmethod
+    def pg(self) -> PgProtocol:
+        pass
+
+    @property
+    @abstractmethod
+    def pg_bin(self) -> PgBin:
+        pass
+
+    @abstractmethod
+    def flush(self) -> None:
+        pass
+
+    @abstractmethod
+    def report_peak_memory_use(self) -> None:
+        pass
+
+    @abstractmethod
+    def report_size(self) -> None:
+        pass
+
+    @contextmanager
+    @abstractmethod
+    def record_pageserver_writes(self, out_name):
+        pass
+
+    @contextmanager
+    @abstractmethod
+    def record_duration(self, out_name):
+        pass
+
+
+class ZenithCompare(PgCompare):
+    """PgCompare interface for the zenith stack."""
+    def __init__(self,
+                 zenbenchmark: ZenithBenchmarker,
+                 zenith_simple_env: ZenithEnv,
+                 pg_bin: PgBin,
+                 branch_name):
+        self.env = zenith_simple_env
+        self.zenbenchmark = zenbenchmark
+        self._pg_bin = pg_bin
+
+        # We only use one branch and one timeline
+        self.branch = branch_name
+        self.env.zenith_cli(["branch", self.branch, "empty"])
+        self._pg = self.env.postgres.create_start(self.branch)
+        self.timeline = self.pg.safe_psql("SHOW zenith.zenith_timeline")[0][0]
+
+        # Long-lived cursor, useful for flushing
+        self.psconn = self.env.pageserver.connect()
+        self.pscur = self.psconn.cursor()
+
+    @property
+    def pg(self):
+        return self._pg
+
+    @property
+    def pg_bin(self):
+        return self._pg_bin
+
+    def flush(self):
+        self.pscur.execute(f"do_gc {self.env.initial_tenant} {self.timeline} 0")
+
+    def report_peak_memory_use(self) -> None:
+        self.zenbenchmark.record("peak_mem",
+                                 self.zenbenchmark.get_peak_mem(self.env.pageserver) / 1024,
+                                 'MB',
+                                 report=MetricReport.LOWER_IS_BETTER)
+
+    def report_size(self) -> None:
+        timeline_size = self.zenbenchmark.get_timeline_size(self.env.repo_dir,
+                                                            self.env.initial_tenant,
+                                                            self.timeline)
+        self.zenbenchmark.record('size',
+                                 timeline_size / (1024 * 1024),
+                                 'MB',
+                                 report=MetricReport.LOWER_IS_BETTER)
+
+    def record_pageserver_writes(self, out_name):
+        return self.zenbenchmark.record_pageserver_writes(self.env.pageserver, out_name)
+
+    def record_duration(self, out_name):
+        return self.zenbenchmark.record_duration(out_name)
+
+
+class VanillaCompare(PgCompare):
+    """PgCompare interface for vanilla postgres."""
+    def __init__(self, zenbenchmark, vanilla_pg: VanillaPostgres):
+        self._pg = vanilla_pg
+        self.zenbenchmark = zenbenchmark
+        vanilla_pg.configure(['shared_buffers=1MB'])
+        vanilla_pg.start()
+
+        # Long-lived cursor, useful for flushing
+        self.conn = self.pg.connect()
+        self.cur = self.conn.cursor()
+
+    @property
+    def pg(self):
+        return self._pg
+
+    @property
+    def pg_bin(self):
+        return self._pg.pg_bin
+
+    def flush(self):
+        self.cur.execute("checkpoint")
+
+    def report_peak_memory_use(self) -> None:
+        pass  # TODO find something
+
+    def report_size(self) -> None:
+        data_size = self.pg.get_subdir_size('base')
+        self.zenbenchmark.record('data_size',
+                                 data_size / (1024 * 1024),
+                                 'MB',
+                                 report=MetricReport.LOWER_IS_BETTER)
+        wal_size = self.pg.get_subdir_size('pg_wal')
+        self.zenbenchmark.record('wal_size',
+                                 wal_size / (1024 * 1024),
+                                 'MB',
+                                 report=MetricReport.LOWER_IS_BETTER)
+
+    @contextmanager
+    def record_pageserver_writes(self, out_name):
+        yield  # Do nothing
+
+    def record_duration(self, out_name):
+        return self.zenbenchmark.record_duration(out_name)
+
+
+@pytest.fixture(scope='function')
+def zenith_compare(request, zenbenchmark, pg_bin, zenith_simple_env) -> ZenithCompare:
+    branch_name = request.node.name
+    return ZenithCompare(zenbenchmark, zenith_simple_env, pg_bin, branch_name)
+
+
+@pytest.fixture(scope='function')
+def vanilla_compare(zenbenchmark, vanilla_pg) -> VanillaCompare:
+    return VanillaCompare(zenbenchmark, vanilla_pg)
+
+
+@pytest.fixture(params=["vanilla_compare", "zenith_compare"], ids=["vanilla", "zenith"])
+def zenith_with_baseline(request) -> PgCompare:
+    """Parameterized fixture that helps compare zenith against vanilla postgres.
+
+    A test that uses this fixture turns into a parameterized test that runs against:
+    1. A vanilla postgres instance
+    2. A simple zenith env (see zenith_simple_env)
+    3. Possibly other postgres protocol implementations.
+
+    The main goal of this fixture is to make it easier for people to read and write
+    performance tests. Easy test writing leads to more tests.
+
+    Perfect encapsulation of the postgres implementations is **not** a goal because
+    it's impossible. Operational and configuration differences in the different
+    implementations sometimes matter, and the writer of the test should be mindful
+    of that.
+
+    If a test requires some one-off special implementation-specific logic, use of
+    isinstance(zenith_with_baseline, ZenithCompare) is encouraged. Though if that
+    implementation-specific logic is widely useful across multiple tests, it might
+    make sense to add methods to the PgCompare class.
+    """
+    fixture = request.getfixturevalue(request.param)
+    if isinstance(fixture, PgCompare):
+        return fixture
+    else:
+        raise AssertionError(f"test error: fixture {request.param} is not PgCompare")
--- a/test_runner/fixtures/log_helper.py
+++ b/test_runner/fixtures/log_helper.py
@@ -3,11 +3,11 @@ import logging.config
 """
 This file configures logging to use in python tests.
 Logs are automatically captured and shown in their
-own section after all tests are executed. 
+own section after all tests are executed.

 To see logs for all (even successful) tests, run
 pytest with the following command:
- `pipenv run pytest -n8 -rA`
+- `poetry run pytest -n8 -rA`

 Other log config can be set in pytest.ini file.
 You can add `log_cli = true` to it to watch
@@ -34,7 +34,7 @@ LOGGING = {

 def getLogger(name='root') -> logging.Logger:
    """Method to get logger for tests.
-    
+
    Should be used to get correctly initialized logger. """
    return logging.getLogger(name)

--- a/test_runner/fixtures/zenith_fixtures.py
+++ b/test_runner/fixtures/zenith_fixtures.py
@@ -4,6 +4,7 @@ from dataclasses import dataclass, field
 from cached_property import cached_property
 import asyncpg
 import os
+import boto3
 import pathlib
 import uuid
 import warnings
@@ -183,6 +184,16 @@ def worker_base_port(worker_seq_no: int):
    return BASE_PORT + worker_seq_no * WORKER_PORT_NUM


+def get_dir_size(path: str) -> int:
+    """Return size in bytes."""
+    totalbytes = 0
+    for root, dirs, files in os.walk(path):
+        for name in files:
+            totalbytes += os.path.getsize(os.path.join(root, name))
+
+    return totalbytes
+
+
 def can_bind(host: str, port: int) -> bool:
    """
    Check whether a host:port is available to bind for listening
@@ -229,7 +240,7 @@ class PgProtocol:
    def __init__(self, host: str, port: int, username: Optional[str] = None):
        self.host = host
        self.port = port
-        self.username = username or "zenith_admin"
+        self.username = username

    def connstr(self,
                *,
@@ -241,10 +252,15 @@ class PgProtocol:
        """

        username = username or self.username
-        res = f'host={self.host} port={self.port} user={username} dbname={dbname}'
-        if not password:
-            return res
-        return f'{res} password={password}'
+        res = f'host={self.host} port={self.port} dbname={dbname}'
+
+        if username:
+            res = f'{res} user={username}'
+
+        if password:
+            res = f'{res} password={password}'
+
+        return res

    # autocommit=True here by default because that's what we need most of the time
    def connect(self,
@@ -330,6 +346,48 @@ class AuthKeys:
        return token


+class MockS3Server:
+    """
+    Starts a mock S3 server for testing on a port given, errors if the server fails to start or exits prematurely.
+    Relies that `poetry` and `moto` server are installed, since it's the way the tests are run.
+
+    Also provides a set of methods to derive the connection properties from and the method to kill the underlying server.
+    """
+    def __init__(
+        self,
+        port: int,
+    ):
+        self.port = port
+
+        self.subprocess = subprocess.Popen([f'poetry run moto_server s3 -p{port}'], shell=True)
+        error = None
+        try:
+            return_code = self.subprocess.poll()
+            if return_code is not None:
+                error = f"expected mock s3 server to run but it exited with code {return_code}. stdout: '{self.subprocess.stdout}', stderr: '{self.subprocess.stderr}'"
+        except Exception as e:
+            error = f"expected mock s3 server to start but it failed with exception: {e}. stdout: '{self.subprocess.stdout}', stderr: '{self.subprocess.stderr}'"
+        if error is not None:
+            log.error(error)
+            self.subprocess.kill()
+            raise RuntimeError("failed to start s3 mock server")
+
+    def endpoint(self) -> str:
+        return f"http://127.0.0.1:{self.port}"
+
+    def region(self) -> str:
+        return 'us-east-1'
+
+    def access_key(self) -> str:
+        return 'test'
+
+    def secret_key(self) -> str:
+        return 'test'
+
+    def kill(self):
+        self.subprocess.kill()
+
+
 class ZenithEnvBuilder:
    """
    Builder object to create a Zenith runtime environment
@@ -354,17 +412,57 @@ class ZenithEnvBuilder:
        self.pageserver_auth_enabled = pageserver_auth_enabled
        self.env: Optional[ZenithEnv] = None

+        self.s3_mock_server: Optional[MockS3Server] = None
+
+        if os.getenv('FORCE_MOCK_S3') is not None:
+            bucket_name = f'{repo_dir.name}_bucket'
+            log.warning(f'Unconditionally initializing mock S3 server for bucket {bucket_name}')
+            self.enable_s3_mock_remote_storage(bucket_name)
+
    def init(self) -> ZenithEnv:
        # Cannot create more than one environment from one builder
        assert self.env is None, "environment already initialized"
        self.env = ZenithEnv(self)
        return self.env

-    def enable_local_fs_remote_storage(self):
-        assert self.pageserver_remote_storage is None, "remote storage is enabled already"
+    """
+    Sets up the pageserver to use the local fs at the `test_dir/local_fs_remote_storage` path.
+    Errors, if the pageserver has some remote storage configuration already, unless `force_enable` is not set to `True`.
+    """
+
+    def enable_local_fs_remote_storage(self, force_enable=True):
+        assert force_enable or self.pageserver_remote_storage is None, "remote storage is enabled already"
        self.pageserver_remote_storage = LocalFsStorage(
            Path(self.repo_dir / 'local_fs_remote_storage'))

+    """
+    Sets up the pageserver to use the S3 mock server, creates the bucket, if it's not present already.
+    Starts up the mock server, if that does not run yet.
+    Errors, if the pageserver has some remote storage configuration already, unless `force_enable` is not set to `True`.
+    """
+
+    def enable_s3_mock_remote_storage(self, bucket_name: str, force_enable=True):
+        assert force_enable or self.pageserver_remote_storage is None, "remote storage is enabled already"
+        if not self.s3_mock_server:
+            self.s3_mock_server = MockS3Server(self.port_distributor.get_port())
+
+        mock_endpoint = self.s3_mock_server.endpoint()
+        mock_region = self.s3_mock_server.region()
+        mock_access_key = self.s3_mock_server.access_key()
+        mock_secret_key = self.s3_mock_server.secret_key()
+        boto3.client(
+            's3',
+            endpoint_url=mock_endpoint,
+            region_name=mock_region,
+            aws_access_key_id=mock_access_key,
+            aws_secret_access_key=mock_secret_key,
+        ).create_bucket(Bucket=bucket_name)
+        self.pageserver_remote_storage = S3Storage(bucket=bucket_name,
+                                                   endpoint=mock_endpoint,
+                                                   region=mock_region,
+                                                   access_key=mock_access_key,
+                                                   secret_key=mock_secret_key)
+
    def __enter__(self):
        return self

@@ -377,6 +475,8 @@ class ZenithEnvBuilder:
            for sk in self.env.safekeepers:
                sk.stop(immediate=True)
            self.env.pageserver.stop(immediate=True)
+            if self.s3_mock_server:
+                self.s3_mock_server.kill()


 class ZenithEnv:
@@ -415,6 +515,7 @@ class ZenithEnv:
        self.repo_dir = config.repo_dir
        self.rust_log_override = config.rust_log_override
        self.port_distributor = config.port_distributor
+        self.s3_mock_server = config.s3_mock_server

        self.postgres = PostgresFactory(self)

@@ -600,6 +701,8 @@ def zenith_simple_env(_shared_simple_env: ZenithEnv) -> Iterator[ZenithEnv]:
    yield _shared_simple_env

    _shared_simple_env.postgres.stop_all()
+    if _shared_simple_env.s3_mock_server:
+        _shared_simple_env.s3_mock_server.kill()


@pytest.fixture(scope='function')
@@ -637,6 +740,16 @@ class ZenithPageserverHttpClient(requests.Session):
    def check_status(self):
        self.get(f"http://localhost:{self.port}/v1/status").raise_for_status()

+    def timeline_attach(self, tenant_id: uuid.UUID, timeline_id: uuid.UUID):
+        res = self.post(
+            f"http://localhost:{self.port}/v1/timeline/{tenant_id.hex}/{timeline_id.hex}/attach", )
+        res.raise_for_status()
+
+    def timeline_detach(self, tenant_id: uuid.UUID, timeline_id: uuid.UUID):
+        res = self.post(
+            f"http://localhost:{self.port}/v1/timeline/{tenant_id.hex}/{timeline_id.hex}/detach", )
+        res.raise_for_status()
+
    def branch_list(self, tenant_id: uuid.UUID) -> List[Dict[Any, Any]]:
        res = self.get(f"http://localhost:{self.port}/v1/branch/{tenant_id.hex}")
        res.raise_for_status()
@@ -689,8 +802,9 @@ class ZenithPageserverHttpClient(requests.Session):
        assert isinstance(res_json, list)
        return res_json

-    def timeline_details(self, tenant_id: str, timeline_id: str) -> Dict[Any, Any]:
-        res = self.get(f"http://localhost:{self.port}/v1/timeline/{tenant_id}/{timeline_id}")
+    def timeline_detail(self, tenant_id: uuid.UUID, timeline_id: uuid.UUID):
+        res = self.get(
+            f"http://localhost:{self.port}/v1/timeline/{tenant_id.hex}/{timeline_id.hex}")
        res.raise_for_status()
        res_json = res.json()
        assert isinstance(res_json, dict)
@@ -717,8 +831,9 @@ class LocalFsStorage:
 class S3Storage:
    bucket: str
    region: str
-    access_key: str
-    secret_key: str
+    access_key: Optional[str]
+    secret_key: Optional[str]
+    endpoint: Optional[str]


 RemoteStorage = Union[LocalFsStorage, S3Storage]
@@ -735,7 +850,7 @@ class ZenithPageserver(PgProtocol):
                 port: PageserverPort,
                 remote_storage: Optional[RemoteStorage] = None,
                 enable_auth=False):
-        super().__init__(host='localhost', port=port.pg)
+        super().__init__(host='localhost', port=port.pg, username='zenith_admin')
        self.env = env
        self.running = False
        self.service_port = port  # do not shadow PgProtocol.port which is just int
@@ -791,8 +906,14 @@ def append_pageserver_param_overrides(params_to_update: List[str],
            pageserver_storage_override = f"local_path='{pageserver_remote_storage.root}'"
        elif isinstance(pageserver_remote_storage, S3Storage):
            pageserver_storage_override = f"bucket_name='{pageserver_remote_storage.bucket}',\
-                bucket_region='{pageserver_remote_storage.region}',access_key_id='{pageserver_remote_storage.access_key}',\
-                secret_access_key='{pageserver_remote_storage.secret_key}'"
+                bucket_region='{pageserver_remote_storage.region}'"
+
+            if pageserver_remote_storage.access_key is not None:
+                pageserver_storage_override += f",access_key_id='{pageserver_remote_storage.access_key}'"
+            if pageserver_remote_storage.secret_key is not None:
+                pageserver_storage_override += f",secret_access_key='{pageserver_remote_storage.secret_key}'"
+            if pageserver_remote_storage.endpoint is not None:
+                pageserver_storage_override += f",endpoint='{pageserver_remote_storage.endpoint}'"

        else:
            raise Exception(f'Unknown storage configuration {pageserver_remote_storage}')
@@ -867,10 +988,54 @@ def pg_bin(test_output_dir: str) -> PgBin:
    return PgBin(test_output_dir)


+class VanillaPostgres(PgProtocol):
+    def __init__(self, pgdatadir: str, pg_bin: PgBin, port: int):
+        super().__init__(host='localhost', port=port)
+        self.pgdatadir = pgdatadir
+        self.pg_bin = pg_bin
+        self.running = False
+        self.pg_bin.run_capture(['initdb', '-D', pgdatadir])
+
+    def configure(self, options: List[str]) -> None:
+        """Append lines into postgresql.conf file."""
+        assert not self.running
+        with open(os.path.join(self.pgdatadir, 'postgresql.conf'), 'a') as conf_file:
+            conf_file.writelines(options)
+
+    def start(self) -> None:
+        assert not self.running
+        self.running = True
+        self.pg_bin.run_capture(['pg_ctl', '-D', self.pgdatadir, 'start'])
+
+    def stop(self) -> None:
+        assert self.running
+        self.running = False
+        self.pg_bin.run_capture(['pg_ctl', '-D', self.pgdatadir, 'stop'])
+
+    def get_subdir_size(self, subdir) -> int:
+        """Return size of pgdatadir subdirectory in bytes."""
+        return get_dir_size(os.path.join(self.pgdatadir, subdir))
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc, tb):
+        if self.running:
+            self.stop()
+
+
+@pytest.fixture(scope='function')
+def vanilla_pg(test_output_dir: str) -> Iterator[VanillaPostgres]:
+    pgdatadir = os.path.join(test_output_dir, "pgdata-vanilla")
+    pg_bin = PgBin(test_output_dir)
+    with VanillaPostgres(pgdatadir, pg_bin, 5432) as vanilla_pg:
+        yield vanilla_pg
+
+
 class Postgres(PgProtocol):
    """ An object representing a running postgres daemon. """
    def __init__(self, env: ZenithEnv, tenant_id: str, port: int):
-        super().__init__(host='localhost', port=port)
+        super().__init__(host='localhost', port=port, username='zenith_admin')

        self.env = env
        self.running = False
@@ -908,8 +1073,6 @@ class Postgres(PgProtocol):
        path = pathlib.Path('pgdatadirs') / 'tenants' / self.tenant_id / self.node_name
        self.pgdata_dir = os.path.join(self.env.repo_dir, path)

-        if self.env.safekeepers:
-            self.adjust_for_wal_acceptors(self.env.get_safekeeper_connstrs())
        if config_lines is None:
            config_lines = []
        self.config(config_lines)
@@ -966,7 +1129,9 @@ class Postgres(PgProtocol):
                # walproposer uses different application_name
                if ("synchronous_standby_names" in cfg_line or
                        # don't ask pageserver to fetch WAL from compute
-                        "callmemaybe_connstring" in cfg_line):
+                        "callmemaybe_connstring" in cfg_line or
+                        # don't repeat wal_acceptors multiple times
+                        "wal_acceptors" in cfg_line):
                    continue
                f.write(cfg_line)
            f.write("synchronous_standby_names = 'walproposer'\n")
--- a/test_runner/performance/test_bulk_insert.py
+++ b/test_runner/performance/test_bulk_insert.py
@@ -2,8 +2,13 @@ from contextlib import closing
 from fixtures.zenith_fixtures import ZenithEnv
 from fixtures.log_helper import log
 from fixtures.benchmark_fixture import MetricReport, ZenithBenchmarker
+from fixtures.compare_fixtures import PgCompare, VanillaCompare, ZenithCompare

-pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture")
+pytest_plugins = (
+    "fixtures.zenith_fixtures",
+    "fixtures.benchmark_fixture",
+    "fixtures.compare_fixtures",
+)


 #
@@ -16,47 +21,19 @@ pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture")
 # 3. Disk space used
 # 4. Peak memory usage
 #
-def test_bulk_insert(zenith_simple_env: ZenithEnv, zenbenchmark: ZenithBenchmarker):
-    env = zenith_simple_env
-    # Create a branch for us
-    env.zenith_cli(["branch", "test_bulk_insert", "empty"])
-
-    pg = env.postgres.create_start('test_bulk_insert')
-    log.info("postgres is running on 'test_bulk_insert' branch")
-
-    # Open a connection directly to the page server that we'll use to force
-    # flushing the layers to disk
-    psconn = env.pageserver.connect()
-    pscur = psconn.cursor()
+def test_bulk_insert(zenith_with_baseline: PgCompare):
+    env = zenith_with_baseline

    # Get the timeline ID of our branch. We need it for the 'do_gc' command
-    with closing(pg.connect()) as conn:
+    with closing(env.pg.connect()) as conn:
        with conn.cursor() as cur:
-            cur.execute("SHOW zenith.zenith_timeline")
-            timeline = cur.fetchone()[0]
-
            cur.execute("create table huge (i int, j int);")

            # Run INSERT, recording the time and I/O it takes
-            with zenbenchmark.record_pageserver_writes(env.pageserver, 'pageserver_writes'):
-                with zenbenchmark.record_duration('insert'):
+            with env.record_pageserver_writes('pageserver_writes'):
+                with env.record_duration('insert'):
                    cur.execute("insert into huge values (generate_series(1, 5000000), 0);")
+                    env.flush()

-                    # Flush the layers from memory to disk. This is included in the reported
-                    # time and I/O
-                    pscur.execute(f"do_gc {env.initial_tenant} {timeline} 0")
-
-            # Record peak memory usage
-            zenbenchmark.record("peak_mem",
-                                zenbenchmark.get_peak_mem(env.pageserver) / 1024,
-                                'MB',
-                                report=MetricReport.LOWER_IS_BETTER)
-
-            # Report disk space used by the repository
-            timeline_size = zenbenchmark.get_timeline_size(env.repo_dir,
-                                                           env.initial_tenant,
-                                                           timeline)
-            zenbenchmark.record('size',
-                                timeline_size / (1024 * 1024),
-                                'MB',
-                                report=MetricReport.LOWER_IS_BETTER)
+            env.report_peak_memory_use()
+            env.report_size()
--- a/test_runner/performance/test_copy.py
+++ b/test_runner/performance/test_copy.py
@@ -2,10 +2,15 @@ from contextlib import closing
 from fixtures.zenith_fixtures import ZenithEnv
 from fixtures.log_helper import log
 from fixtures.benchmark_fixture import MetricReport, ZenithBenchmarker
+from fixtures.compare_fixtures import PgCompare, VanillaCompare, ZenithCompare
 from io import BufferedReader, RawIOBase
 from itertools import repeat

-pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture")
+pytest_plugins = (
+    "fixtures.zenith_fixtures",
+    "fixtures.benchmark_fixture",
+    "fixtures.compare_fixtures",
+)


 class CopyTestData(RawIOBase):
@@ -42,77 +47,41 @@ def copy_test_data(rows: int):
 #
 # COPY performance tests.
 #
-def test_copy(zenith_simple_env: ZenithEnv, zenbenchmark: ZenithBenchmarker):
-    env = zenith_simple_env
-    # Create a branch for us
-    env.zenith_cli(["branch", "test_copy", "empty"])
-
-    pg = env.postgres.create_start('test_copy')
-    log.info("postgres is running on 'test_copy' branch")
-
-    # Open a connection directly to the page server that we'll use to force
-    # flushing the layers to disk
-    psconn = env.pageserver.connect()
-    pscur = psconn.cursor()
+def test_copy(zenith_with_baseline: PgCompare):
+    env = zenith_with_baseline

    # Get the timeline ID of our branch. We need it for the pageserver 'checkpoint' command
-    with closing(pg.connect()) as conn:
+    with closing(env.pg.connect()) as conn:
        with conn.cursor() as cur:
-            cur.execute("SHOW zenith.zenith_timeline")
-            timeline = cur.fetchone()[0]
-
            cur.execute("create table copytest (i int, t text);")

            # Load data with COPY, recording the time and I/O it takes.
            #
            # Since there's no data in the table previously, this extends it.
-            with zenbenchmark.record_pageserver_writes(env.pageserver,
-                                                       'copy_extend_pageserver_writes'):
-                with zenbenchmark.record_duration('copy_extend'):
+            with env.record_pageserver_writes('copy_extend_pageserver_writes'):
+                with env.record_duration('copy_extend'):
                    cur.copy_from(copy_test_data(1000000), 'copytest')
-                    # Flush the layers from memory to disk. This is included in the reported
-                    # time and I/O
-                    pscur.execute(f"checkpoint {env.initial_tenant} {timeline}")
+                    env.flush()

            # Delete most rows, and VACUUM to make the space available for reuse.
-            with zenbenchmark.record_pageserver_writes(env.pageserver, 'delete_pageserver_writes'):
-                with zenbenchmark.record_duration('delete'):
+            with env.record_pageserver_writes('delete_pageserver_writes'):
+                with env.record_duration('delete'):
                    cur.execute("delete from copytest where i % 100 <> 0;")
-                    # Flush the layers from memory to disk. This is included in the reported
-                    # time and I/O
-                    pscur.execute(f"checkpoint {env.initial_tenant} {timeline}")
+                    env.flush()

-            with zenbenchmark.record_pageserver_writes(env.pageserver, 'vacuum_pageserver_writes'):
-                with zenbenchmark.record_duration('vacuum'):
+            with env.record_pageserver_writes('vacuum_pageserver_writes'):
+                with env.record_duration('vacuum'):
                    cur.execute("vacuum copytest")
-                    # Flush the layers from memory to disk. This is included in the reported
-                    # time and I/O
-                    pscur.execute(f"checkpoint {env.initial_tenant} {timeline}")
+                    env.flush()

            # Load data into the table again. This time, this will use the space free'd
            # by the VACUUM.
            #
            # This will also clear all the VM bits.
-            with zenbenchmark.record_pageserver_writes(env.pageserver,
-                                                       'copy_reuse_pageserver_writes'):
-                with zenbenchmark.record_duration('copy_reuse'):
+            with env.record_pageserver_writes('copy_reuse_pageserver_writes'):
+                with env.record_duration('copy_reuse'):
                    cur.copy_from(copy_test_data(1000000), 'copytest')
+                    env.flush()

-                    # Flush the layers from memory to disk. This is included in the reported
-                    # time and I/O
-                    pscur.execute(f"checkpoint {env.initial_tenant} {timeline}")
-
-            # Record peak memory usage
-            zenbenchmark.record("peak_mem",
-                                zenbenchmark.get_peak_mem(env.pageserver) / 1024,
-                                'MB',
-                                report=MetricReport.LOWER_IS_BETTER)
-
-            # Report disk space used by the repository
-            timeline_size = zenbenchmark.get_timeline_size(env.repo_dir,
-                                                           env.initial_tenant,
-                                                           timeline)
-            zenbenchmark.record('size',
-                                timeline_size / (1024 * 1024),
-                                'MB',
-                                report=MetricReport.LOWER_IS_BETTER)
+            env.report_peak_memory_use()
+            env.report_size()
--- a/test_runner/performance/test_gist_build.py
+++ b/test_runner/performance/test_gist_build.py
@@ -2,9 +2,14 @@ import os
 from contextlib import closing
 from fixtures.benchmark_fixture import MetricReport
 from fixtures.zenith_fixtures import ZenithEnv
+from fixtures.compare_fixtures import PgCompare, VanillaCompare, ZenithCompare
 from fixtures.log_helper import log

-pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture")
+pytest_plugins = (
+    "fixtures.zenith_fixtures",
+    "fixtures.benchmark_fixture",
+    "fixtures.compare_fixtures",
+)


 #
@@ -12,24 +17,11 @@ pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture")
 # As of this writing, we're duplicate those giant WAL records for each page,
 # which makes the delta layer about 32x larger than it needs to be.
 #
-def test_gist_buffering_build(zenith_simple_env: ZenithEnv, zenbenchmark):
-    env = zenith_simple_env
-    # Create a branch for us
-    env.zenith_cli(["branch", "test_gist_buffering_build", "empty"])
+def test_gist_buffering_build(zenith_with_baseline: PgCompare):
+    env = zenith_with_baseline

-    pg = env.postgres.create_start('test_gist_buffering_build')
-    log.info("postgres is running on 'test_gist_buffering_build' branch")
-
-    # Open a connection directly to the page server that we'll use to force
-    # flushing the layers to disk
-    psconn = env.pageserver.connect()
-    pscur = psconn.cursor()
-
-    # Get the timeline ID of our branch. We need it for the 'do_gc' command
-    with closing(pg.connect()) as conn:
+    with closing(env.pg.connect()) as conn:
        with conn.cursor() as cur:
-            cur.execute("SHOW zenith.zenith_timeline")
-            timeline = cur.fetchone()[0]

            # Create test table.
            cur.execute("create table gist_point_tbl(id int4, p point)")
@@ -38,27 +30,12 @@ def test_gist_buffering_build(zenith_simple_env: ZenithEnv, zenbenchmark):
            )

            # Build the index.
-            with zenbenchmark.record_pageserver_writes(env.pageserver, 'pageserver_writes'):
-                with zenbenchmark.record_duration('build'):
+            with env.record_pageserver_writes('pageserver_writes'):
+                with env.record_duration('build'):
                    cur.execute(
                        "create index gist_pointidx2 on gist_point_tbl using gist(p) with (buffering = on)"
                    )
+                    env.flush()

-                    # Flush the layers from memory to disk. This is included in the reported
-                    # time and I/O
-                    pscur.execute(f"do_gc {env.initial_tenant} {timeline} 1000000")
-
-            # Record peak memory usage
-            zenbenchmark.record("peak_mem",
-                                zenbenchmark.get_peak_mem(env.pageserver) / 1024,
-                                'MB',
-                                report=MetricReport.LOWER_IS_BETTER)
-
-            # Report disk space used by the repository
-            timeline_size = zenbenchmark.get_timeline_size(env.repo_dir,
-                                                           env.initial_tenant,
-                                                           timeline)
-            zenbenchmark.record('size',
-                                timeline_size / (1024 * 1024),
-                                'MB',
-                                report=MetricReport.LOWER_IS_BETTER)
+            env.report_peak_memory_use()
+            env.report_size()
--- a/test_runner/performance/test_parallel_copy_to.py
+++ b/test_runner/performance/test_parallel_copy_to.py
@@ -1,11 +1,16 @@
 from io import BytesIO
 import asyncio
 import asyncpg
-from fixtures.zenith_fixtures import ZenithEnv, Postgres
+from fixtures.zenith_fixtures import ZenithEnv, Postgres, PgProtocol
 from fixtures.log_helper import log
 from fixtures.benchmark_fixture import MetricReport, ZenithBenchmarker
+from fixtures.compare_fixtures import PgCompare, VanillaCompare, ZenithCompare

-pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture")
+pytest_plugins = (
+    "fixtures.zenith_fixtures",
+    "fixtures.benchmark_fixture",
+    "fixtures.compare_fixtures",
+)


 async def repeat_bytes(buf, repetitions: int):
@@ -13,7 +18,7 @@ async def repeat_bytes(buf, repetitions: int):
        yield buf


-async def copy_test_data_to_table(pg: Postgres, worker_id: int, table_name: str):
+async def copy_test_data_to_table(pg: PgProtocol, worker_id: int, table_name: str):
    buf = BytesIO()
    for i in range(1000):
        buf.write(
@@ -26,7 +31,7 @@ async def copy_test_data_to_table(pg: Postgres, worker_id: int, table_name: str)
    await pg_conn.copy_to_table(table_name, source=copy_input)


-async def parallel_load_different_tables(pg: Postgres, n_parallel: int):
+async def parallel_load_different_tables(pg: PgProtocol, n_parallel: int):
    workers = []
    for worker_id in range(n_parallel):
        worker = copy_test_data_to_table(pg, worker_id, f'copytest_{worker_id}')
@@ -37,54 +42,25 @@ async def parallel_load_different_tables(pg: Postgres, n_parallel: int):


 # Load 5 different tables in parallel with COPY TO
-def test_parallel_copy_different_tables(zenith_simple_env: ZenithEnv,
-                                        zenbenchmark: ZenithBenchmarker,
-                                        n_parallel=5):
+def test_parallel_copy_different_tables(zenith_with_baseline: PgCompare, n_parallel=5):

-    env = zenith_simple_env
-    # Create a branch for us
-    env.zenith_cli(["branch", "test_parallel_copy_different_tables", "empty"])
-
-    pg = env.postgres.create_start('test_parallel_copy_different_tables')
-    log.info("postgres is running on 'test_parallel_copy_different_tables' branch")
-
-    # Open a connection directly to the page server that we'll use to force
-    # flushing the layers to disk
-    psconn = env.pageserver.connect()
-    pscur = psconn.cursor()
-
-    # Get the timeline ID of our branch. We need it for the 'do_gc' command
-    conn = pg.connect()
+    env = zenith_with_baseline
+    conn = env.pg.connect()
    cur = conn.cursor()
-    cur.execute("SHOW zenith.zenith_timeline")
-    timeline = cur.fetchone()[0]

    for worker_id in range(n_parallel):
        cur.execute(f'CREATE TABLE copytest_{worker_id} (i int, t text)')

-    with zenbenchmark.record_pageserver_writes(env.pageserver, 'pageserver_writes'):
-        with zenbenchmark.record_duration('load'):
-            asyncio.run(parallel_load_different_tables(pg, n_parallel))
+    with env.record_pageserver_writes('pageserver_writes'):
+        with env.record_duration('load'):
+            asyncio.run(parallel_load_different_tables(env.pg, n_parallel))
+            env.flush()

-            # Flush the layers from memory to disk. This is included in the reported
-            # time and I/O
-            pscur.execute(f"do_gc {env.initial_tenant} {timeline} 0")
-
-    # Record peak memory usage
-    zenbenchmark.record("peak_mem",
-                        zenbenchmark.get_peak_mem(env.pageserver) / 1024,
-                        'MB',
-                        report=MetricReport.LOWER_IS_BETTER)
-
-    # Report disk space used by the repository
-    timeline_size = zenbenchmark.get_timeline_size(env.repo_dir, env.initial_tenant, timeline)
-    zenbenchmark.record('size',
-                        timeline_size / (1024 * 1024),
-                        'MB',
-                        report=MetricReport.LOWER_IS_BETTER)
+    env.report_peak_memory_use()
+    env.report_size()


-async def parallel_load_same_table(pg: Postgres, n_parallel: int):
+async def parallel_load_same_table(pg: PgProtocol, n_parallel: int):
    workers = []
    for worker_id in range(n_parallel):
        worker = copy_test_data_to_table(pg, worker_id, f'copytest')
@@ -95,46 +71,17 @@ async def parallel_load_same_table(pg: Postgres, n_parallel: int):


 # Load data into one table with COPY TO from 5 parallel connections
-def test_parallel_copy_same_table(zenith_simple_env: ZenithEnv,
-                                  zenbenchmark: ZenithBenchmarker,
-                                  n_parallel=5):
-    env = zenith_simple_env
-    # Create a branch for us
-    env.zenith_cli(["branch", "test_parallel_copy_same_table", "empty"])
-
-    pg = env.postgres.create_start('test_parallel_copy_same_table')
-    log.info("postgres is running on 'test_parallel_copy_same_table' branch")
-
-    # Open a connection directly to the page server that we'll use to force
-    # flushing the layers to disk
-    psconn = env.pageserver.connect()
-    pscur = psconn.cursor()
-
-    # Get the timeline ID of our branch. We need it for the 'do_gc' command
-    conn = pg.connect()
+def test_parallel_copy_same_table(zenith_with_baseline: PgCompare, n_parallel=5):
+    env = zenith_with_baseline
+    conn = env.pg.connect()
    cur = conn.cursor()
-    cur.execute("SHOW zenith.zenith_timeline")
-    timeline = cur.fetchone()[0]

    cur.execute(f'CREATE TABLE copytest (i int, t text)')

-    with zenbenchmark.record_pageserver_writes(env.pageserver, 'pageserver_writes'):
-        with zenbenchmark.record_duration('load'):
-            asyncio.run(parallel_load_same_table(pg, n_parallel))
+    with env.record_pageserver_writes('pageserver_writes'):
+        with env.record_duration('load'):
+            asyncio.run(parallel_load_same_table(env.pg, n_parallel))
+            env.flush()

-            # Flush the layers from memory to disk. This is included in the reported
-            # time and I/O
-            pscur.execute(f"do_gc {env.initial_tenant} {timeline} 0")
-
-    # Record peak memory usage
-    zenbenchmark.record("peak_mem",
-                        zenbenchmark.get_peak_mem(env.pageserver) / 1024,
-                        'MB',
-                        report=MetricReport.LOWER_IS_BETTER)
-
-    # Report disk space used by the repository
-    timeline_size = zenbenchmark.get_timeline_size(env.repo_dir, env.initial_tenant, timeline)
-    zenbenchmark.record('size',
-                        timeline_size / (1024 * 1024),
-                        'MB',
-                        report=MetricReport.LOWER_IS_BETTER)
+    env.report_peak_memory_use()
+    env.report_size()
--- a/test_runner/performance/test_perf_pgbench.py
+++ b/test_runner/performance/test_perf_pgbench.py
@@ -1,10 +1,15 @@
 from contextlib import closing
-from fixtures.zenith_fixtures import PgBin, ZenithEnv
+from fixtures.zenith_fixtures import PgBin, VanillaPostgres, ZenithEnv
+from fixtures.compare_fixtures import PgCompare, VanillaCompare, ZenithCompare

 from fixtures.benchmark_fixture import MetricReport, ZenithBenchmarker
 from fixtures.log_helper import log

-pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture")
+pytest_plugins = (
+    "fixtures.zenith_fixtures",
+    "fixtures.benchmark_fixture",
+    "fixtures.compare_fixtures",
+)


 #
@@ -16,47 +21,16 @@ pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture")
 # 2. Time to run 5000 pgbench transactions
 # 3. Disk space used
 #
-def test_pgbench(zenith_simple_env: ZenithEnv, pg_bin: PgBin, zenbenchmark: ZenithBenchmarker):
-    env = zenith_simple_env
-    # Create a branch for us
-    env.zenith_cli(["branch", "test_pgbench_perf", "empty"])
+def test_pgbench(zenith_with_baseline: PgCompare):
+    env = zenith_with_baseline

-    pg = env.postgres.create_start('test_pgbench_perf')
-    log.info("postgres is running on 'test_pgbench_perf' branch")
+    with env.record_pageserver_writes('pageserver_writes'):
+        with env.record_duration('init'):
+            env.pg_bin.run_capture(['pgbench', '-s5', '-i', env.pg.connstr()])
+            env.flush()

-    # Open a connection directly to the page server that we'll use to force
-    # flushing the layers to disk
-    psconn = env.pageserver.connect()
-    pscur = psconn.cursor()
+    with env.record_duration('5000_xacts'):
+        env.pg_bin.run_capture(['pgbench', '-c1', '-t5000', env.pg.connstr()])
+    env.flush()

-    # Get the timeline ID of our branch. We need it for the 'do_gc' command
-    with closing(pg.connect()) as conn:
-        with conn.cursor() as cur:
-            cur.execute("SHOW zenith.zenith_timeline")
-            timeline = cur.fetchone()[0]
-
-    connstr = pg.connstr()
-
-    # Initialize pgbench database, recording the time and I/O it takes
-    with zenbenchmark.record_pageserver_writes(env.pageserver, 'pageserver_writes'):
-        with zenbenchmark.record_duration('init'):
-            pg_bin.run_capture(['pgbench', '-s5', '-i', connstr])
-
-            # Flush the layers from memory to disk. This is included in the reported
-            # time and I/O
-            pscur.execute(f"do_gc {env.initial_tenant} {timeline} 0")
-
-    # Run pgbench for 5000 transactions
-    with zenbenchmark.record_duration('5000_xacts'):
-        pg_bin.run_capture(['pgbench', '-c1', '-t5000', connstr])
-
-    # Flush the layers to disk again. This is *not' included in the reported time,
-    # though.
-    pscur.execute(f"do_gc {env.initial_tenant} {timeline} 0")
-
-    # Report disk space used by the repository
-    timeline_size = zenbenchmark.get_timeline_size(env.repo_dir, env.initial_tenant, timeline)
-    zenbenchmark.record('size',
-                        timeline_size / (1024 * 1024),
-                        'MB',
-                        report=MetricReport.LOWER_IS_BETTER)
+    env.report_size()
--- a/test_runner/performance/test_small_seqscans.py
+++ b/test_runner/performance/test_small_seqscans.py
@@ -7,24 +7,19 @@ from contextlib import closing
 from fixtures.zenith_fixtures import ZenithEnv
 from fixtures.log_helper import log
 from fixtures.benchmark_fixture import MetricReport, ZenithBenchmarker
+from fixtures.compare_fixtures import PgCompare

-pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture")
+pytest_plugins = (
+    "fixtures.zenith_fixtures",
+    "fixtures.benchmark_fixture",
+    "fixtures.compare_fixtures",
+)


-def test_small_seqscans(zenith_simple_env: ZenithEnv, zenbenchmark: ZenithBenchmarker):
-    env = zenith_simple_env
-    # Create a branch for us
-    env.zenith_cli(["branch", "test_small_seqscans", "empty"])
+def test_small_seqscans(zenith_with_baseline: PgCompare):
+    env = zenith_with_baseline

-    pg = env.postgres.create_start('test_small_seqscans')
-    log.info("postgres is running on 'test_small_seqscans' branch")
-
-    # Open a connection directly to the page server that we'll use to force
-    # flushing the layers to disk
-    psconn = env.pageserver.connect()
-    pscur = psconn.cursor()
-
-    with closing(pg.connect()) as conn:
+    with closing(env.pg.connect()) as conn:
        with conn.cursor() as cur:
            cur.execute('create table t (i integer);')
            cur.execute('insert into t values (generate_series(1,100000));')
@@ -38,6 +33,6 @@ def test_small_seqscans(zenith_simple_env: ZenithEnv, zenbenchmark: ZenithBenchm
            log.info(f"shared_buffers is {row[0]}, table size {row[1]}")
            assert int(row[0]) < int(row[1])

-            with zenbenchmark.record_duration('run'):
+            with env.record_duration('run'):
                for i in range(1000):
                    cur.execute('select count(*) from t;')
--- a/test_runner/performance/test_write_amplification.py
+++ b/test_runner/performance/test_write_amplification.py
@@ -14,32 +14,23 @@ import os
 from contextlib import closing
 from fixtures.benchmark_fixture import MetricReport
 from fixtures.zenith_fixtures import ZenithEnv
+from fixtures.compare_fixtures import PgCompare, VanillaCompare, ZenithCompare
 from fixtures.log_helper import log

-pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture")
+pytest_plugins = (
+    "fixtures.zenith_fixtures",
+    "fixtures.benchmark_fixture",
+    "fixtures.compare_fixtures",
+)


-def test_write_amplification(zenith_simple_env: ZenithEnv, zenbenchmark):
-    env = zenith_simple_env
-    # Create a branch for us
-    env.zenith_cli(["branch", "test_write_amplification", "empty"])
+def test_write_amplification(zenith_with_baseline: PgCompare):
+    env = zenith_with_baseline

-    pg = env.postgres.create_start('test_write_amplification')
-    log.info("postgres is running on 'test_write_amplification' branch")
-
-    # Open a connection directly to the page server that we'll use to force
-    # flushing the layers to disk
-    psconn = env.pageserver.connect()
-    pscur = psconn.cursor()
-
-    with closing(pg.connect()) as conn:
+    with closing(env.pg.connect()) as conn:
        with conn.cursor() as cur:
-            # Get the timeline ID of our branch. We need it for the 'do_gc' command
-            cur.execute("SHOW zenith.zenith_timeline")
-            timeline = cur.fetchone()[0]
-
-            with zenbenchmark.record_pageserver_writes(env.pageserver, 'pageserver_writes'):
-                with zenbenchmark.record_duration('run'):
+            with env.record_pageserver_writes('pageserver_writes'):
+                with env.record_duration('run'):

                    # NOTE: Because each iteration updates every table already created,
                    # the runtime and write amplification is O(n^2), where n is the
@@ -71,13 +62,6 @@ def test_write_amplification(zenith_simple_env: ZenithEnv, zenbenchmark):
                        # slower, adding some delays in this loop.  But forcing
                        # the checkpointing and GC makes the test go faster,
                        # with the same total I/O effect.
-                        pscur.execute(f"do_gc {env.initial_tenant} {timeline} 0")
+                        env.flush()

-            # Report disk space used by the repository
-            timeline_size = zenbenchmark.get_timeline_size(env.repo_dir,
-                                                           env.initial_tenant,
-                                                           timeline)
-            zenbenchmark.record('size',
-                                timeline_size / (1024 * 1024),
-                                'MB',
-                                report=MetricReport.LOWER_IS_BETTER)
+            env.report_size()
--- a/vendor/postgres
+++ b/vendor/postgres
--- a/walkeeper/Cargo.toml
+++ b/walkeeper/Cargo.toml
@@ -1,27 +1,23 @@
 [package]
 name = "walkeeper"
 version = "0.1.0"
-authors = ["Stas Kelvich <stas@zenith.tech>"]
-edition = "2018"
-
-# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+edition = "2021"

 [dependencies]
 regex = "1.4.5"
 bytes = "1.0.1"
 byteorder = "1.4.3"
 hyper = "0.14"
-routerify = "2"
 fs2 = "0.4.3"
 lazy_static = "1.4.0"
 serde_json = "1"
 tracing = "0.1.27"
-clap = "2.33.0"
+clap = "3.0"
 daemonize = "0.4.1"
 rust-s3 = { version = "0.28", default-features = false, features = ["no-verify-ssl", "tokio-rustls-tls"] }
-tokio = "1.11"
-postgres-protocol = { git = "https://github.com/zenithdb/rust-postgres.git", rev="9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858" }
-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858" }
+tokio = { version = "1.11", features = ["macros"] }
+postgres-protocol = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
+postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
 anyhow = "1.0"
 crc32c = "0.6.0"
 humantime = "2.1.0"
@@ -30,7 +26,7 @@ signal-hook = "0.3.10"
 serde = { version = "1.0", features = ["derive"] }
 hex = "0.4.3"
 const_format = "0.2.21"
-tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858" }
+tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }

 postgres_ffi = { path = "../postgres_ffi" }
 workspace_hack = { path = "../workspace_hack" }
--- a/walkeeper/README
+++ b/walkeeper/README
@@ -99,10 +99,7 @@ A: If the compute node has evicted a page, changes to it have been WAL-logged
 Q: How long may Page Server wait for?
 A: Not too long, hopefully. If a page is evicted, it probably was not used for
   a while, so the WAL service have had enough time to push changes to the Page
-   Server. There may be issues if there is no backpressure and compute node with
-   WAL service run ahead of Page Server, though.
-   There is no backpressure right now, so you may even see some spurious
-   timeouts in tests.
+   Server. To limit the lag, tune backpressure using `max_replication_*_lag` settings.

 Q: How do WAL safekeepers communicate with each other?
 A: They may only send each other messages via the compute node, they never
--- a/walkeeper/src/bin/safekeeper.rs
+++ b/walkeeper/src/bin/safekeeper.rs
@@ -32,22 +32,22 @@ fn main() -> Result<()> {
        .about("Store WAL stream to local file system and push it to WAL receivers")
        .version(GIT_VERSION)
        .arg(
-            Arg::with_name("datadir")
-                .short("D")
+            Arg::new("datadir")
+                .short('D')
                .long("dir")
                .takes_value(true)
                .help("Path to the safekeeper data directory"),
        )
        .arg(
-            Arg::with_name("listen-pg")
-                .short("l")
+            Arg::new("listen-pg")
+                .short('l')
                .long("listen-pg")
                .alias("listen") // for compatibility
                .takes_value(true)
                .help(formatcp!("listen for incoming WAL data connections on ip:port (default: {DEFAULT_PG_LISTEN_ADDR})")),
        )
        .arg(
-            Arg::with_name("listen-http")
+            Arg::new("listen-http")
                .long("listen-http")
                .takes_value(true)
                .help(formatcp!("http endpoint address for metrics on ip:port (default: {DEFAULT_HTTP_LISTEN_ADDR})")),
@@ -56,39 +56,39 @@ fn main() -> Result<()> {
        // However because this argument is in use by console's e2e tests lets keep it for now and remove separately.
        // So currently it is a noop.
        .arg(
-            Arg::with_name("pageserver")
-                .short("p")
+            Arg::new("pageserver")
+                .short('p')
                .long("pageserver")
                .takes_value(true),
        )
        .arg(
-            Arg::with_name("ttl")
+            Arg::new("ttl")
                .long("ttl")
                .takes_value(true)
                .help("interval for keeping WAL at safekeeper node, after which them will be uploaded to S3 and removed locally"),
        )
        .arg(
-            Arg::with_name("recall")
+            Arg::new("recall")
                .long("recall")
                .takes_value(true)
                .help("Period for requestion pageserver to call for replication"),
        )
        .arg(
-            Arg::with_name("daemonize")
-                .short("d")
+            Arg::new("daemonize")
+                .short('d')
                .long("daemonize")
                .takes_value(false)
                .help("Run in the background"),
        )
        .arg(
-            Arg::with_name("no-sync")
-                .short("n")
+            Arg::new("no-sync")
+                .short('n')
                .long("no-sync")
                .takes_value(false)
                .help("Do not wait for changes to be written safely to disk"),
        )
        .arg(
-            Arg::with_name("dump-control-file")
+            Arg::new("dump-control-file")
                .long("dump-control-file")
                .takes_value(true)
                .help("Dump control file at path specifed by this argument and exit"),
@@ -143,7 +143,7 @@ fn start_safekeeper(conf: SafeKeeperConf) -> Result<()> {

    // Prevent running multiple safekeepers on the same directory
    let lock_file_path = conf.workdir.join(LOCK_FILE_NAME);
-    let lock_file = File::create(&lock_file_path).with_context(|| "failed to open lockfile")?;
+    let lock_file = File::create(&lock_file_path).context("failed to open lockfile")?;
    lock_file.try_lock_exclusive().with_context(|| {
        format!(
            "control file {} is locked by some other process",
--- a/walkeeper/src/callmemaybe.rs
+++ b/walkeeper/src/callmemaybe.rs
@@ -6,8 +6,8 @@
 //!  from the call list.
 //!
 use crate::SafeKeeperConf;
-use anyhow::anyhow;
-use anyhow::Result;
+use anyhow::{Context, Result};
+use std::collections::hash_map::Entry;
 use std::collections::HashMap;
 use std::sync::Mutex;
 use std::time::{Duration, Instant};
@@ -33,7 +33,7 @@ async fn request_callback(

    tokio::spawn(async move {
        if let Err(e) = connection.await {
-            eprintln!("connection error: {}", e);
+            error!("connection error: {}", e);
        }
    });

@@ -42,9 +42,12 @@ async fn request_callback(
    let me_conf: postgres::config::Config = me_connstr.parse().unwrap();
    let (host, port) = connection_host_port(&me_conf);

+    // pageserver connstr is needed to be able to distinguish between different pageservers
+    // it is required to correctly manage callmemaybe subscriptions when more than one pageserver is involved
+    // TODO it is better to use some sort of a unique id instead of connection string, see https://github.com/zenithdb/zenith/issues/1105
    let callme = format!(
-        "callmemaybe {} {} host={} port={} options='-c ztimelineid={} ztenantid={}'",
-        tenantid, timelineid, host, port, timelineid, tenantid
+        "callmemaybe {} {} host={} port={} options='-c ztimelineid={} ztenantid={} pageserver_connstr={}'",
+        tenantid, timelineid, host, port, timelineid, tenantid, pageserver_connstr,
    );

    let _ = client.simple_query(&callme).await?;
@@ -61,18 +64,36 @@ pub fn thread_main(conf: SafeKeeperConf, rx: UnboundedReceiver<CallmeEvent>) ->
    runtime.block_on(main_loop(conf, rx))
 }

+#[derive(Debug, PartialEq, Eq, Hash, Clone)]
+pub struct SubscriptionStateKey {
+    tenant_id: ZTenantId,
+    timeline_id: ZTimelineId,
+    pageserver_connstr: String,
+}
+
+impl SubscriptionStateKey {
+    pub fn new(tenant_id: ZTenantId, timeline_id: ZTimelineId, pageserver_connstr: String) -> Self {
+        Self {
+            tenant_id,
+            timeline_id,
+            pageserver_connstr,
+        }
+    }
+}
+
 /// Messages to the callmemaybe thread
 #[derive(Debug)]
 pub enum CallmeEvent {
    // add new subscription to the list
-    Subscribe(ZTenantId, ZTimelineId, String),
+    Subscribe(SubscriptionStateKey),
    // remove the subscription from the list
-    Unsubscribe(ZTenantId, ZTimelineId),
+    Unsubscribe(SubscriptionStateKey),
    // don't serve this subscription, but keep it in the list
-    Pause(ZTenantId, ZTimelineId),
+    Pause(SubscriptionStateKey),
    // resume this subscription, if it exists,
    // but don't create a new one if it is gone
-    Resume(ZTenantId, ZTimelineId),
+    Resume(SubscriptionStateKey),
+    // TODO how do we delete from subscriptions?
 }

 #[derive(Debug)]
@@ -118,6 +139,7 @@ impl SubscriptionState {

            let timelineid = self.timelineid;
            let tenantid = self.tenantid;
+            let pageserver_connstr = self.pageserver_connstr.clone();
            tokio::spawn(async move {
                if let Err(err) = handle.await {
                    if err.is_cancelled() {
@@ -125,8 +147,8 @@ impl SubscriptionState {
                            timelineid, tenantid);
                    } else {
                        error!(
-                            "callback task for timelineid={} tenantid={} failed: {}",
-                            timelineid, tenantid, err
+                            "callback task for timelineid={} tenantid={} pageserver_connstr={} failed: {}",
+                            timelineid, tenantid, pageserver_connstr, err
                        );
                    }
                }
@@ -138,7 +160,7 @@ impl SubscriptionState {
        // Ignore call request if this subscription is paused
        if self.paused {
            debug!(
-                "ignore call request for paused subscription
+                "ignore call request for paused subscription \
                tenantid: {}, timelineid: {}",
                self.tenantid, self.timelineid
            );
@@ -148,7 +170,7 @@ impl SubscriptionState {
        // Check if it too early to recall
        if self.handle.is_some() && self.last_call_time.elapsed() < recall_period {
            debug!(
-                "too early to recall. self.last_call_time.elapsed: {:?}, recall_period: {:?}
+                "too early to recall. self.last_call_time.elapsed: {:?}, recall_period: {:?} \
                tenantid: {}, timelineid: {}",
                self.last_call_time, recall_period, self.tenantid, self.timelineid
            );
@@ -176,8 +198,7 @@ impl SubscriptionState {
        // Update last_call_time
        self.last_call_time = Instant::now();
        info!(
-            "new call spawned. time {:?}
-            tenantid: {}, timelineid: {}",
+            "new call spawned. last call time {:?} tenantid: {}, timelineid: {}",
            self.last_call_time, self.tenantid, self.timelineid
        );
    }
@@ -190,7 +211,7 @@ impl Drop for SubscriptionState {
 }

 pub async fn main_loop(conf: SafeKeeperConf, mut rx: UnboundedReceiver<CallmeEvent>) -> Result<()> {
-    let subscriptions: Mutex<HashMap<(ZTenantId, ZTimelineId), SubscriptionState>> =
+    let subscriptions: Mutex<HashMap<SubscriptionStateKey, SubscriptionState>> =
        Mutex::new(HashMap::new());

    let mut ticker = tokio::time::interval(conf.recall_period);
@@ -198,54 +219,82 @@ pub async fn main_loop(conf: SafeKeeperConf, mut rx: UnboundedReceiver<CallmeEve
        tokio::select! {
            request = rx.recv() =>
            {
-                match request.ok_or_else(|| anyhow!("done"))?
+                match request.context("done")?
                {
-                    CallmeEvent::Subscribe(tenantid, timelineid, pageserver_connstr) =>
+                    CallmeEvent::Subscribe(key) =>
                    {
+                        let _enter = info_span!("callmemaybe: subscribe", timelineid = %key.timeline_id, tenantid = %key.tenant_id, pageserver_connstr=%key.pageserver_connstr.clone()).entered();
                        let mut subscriptions = subscriptions.lock().unwrap();
-                        if let Some(sub) = subscriptions.get(&(tenantid, timelineid))
-                        {
-                            info!("callmemaybe. subscription already exists {:?}", sub);
+                        // XXX this clone is ugly, is there a way to use the trick with Borrow trait with entry API?
+                        //  when we switch to node id instead of the connection string key will be Copy and there will be no need to clone
+                        match subscriptions.entry(key.clone()) {
+                            Entry::Occupied(_) => {
+                                // Do nothing if subscription already exists
+                                // If it is paused it means that there is already established replication connection.
+                                // If it is not paused it will be polled with other subscriptions when timeout expires.
+                                // This can occur when replication channel is established before subscription is added.
+                                info!(
+                                    "subscription already exists",
+                                );
+                            }
+                            Entry::Vacant(entry) => {
+                                let subscription = entry.insert(SubscriptionState::new(
+                                    key.tenant_id,
+                                    key.timeline_id,
+                                    key.pageserver_connstr,
+                                ));
+                                subscription.call(conf.recall_period, conf.listen_pg_addr.clone());
+                            }
                        }
-                        if let Some(mut sub) = subscriptions.insert((tenantid, timelineid),
-                            SubscriptionState::new(tenantid, timelineid, pageserver_connstr))
-                        {
-                            sub.call(conf.recall_period, conf.listen_pg_addr.clone());
+                    },
+                    CallmeEvent::Unsubscribe(key) => {
+                        let _enter = debug_span!("callmemaybe: unsubscribe", timelineid = %key.timeline_id, tenantid = %key.tenant_id, pageserver_connstr=%key.pageserver_connstr.clone()).entered();
+                        debug!("unsubscribe");
+                        let mut subscriptions = subscriptions.lock().unwrap();
+                        subscriptions.remove(&key);
+
+                    },
+                    CallmeEvent::Pause(key) => {
+                        let _enter = debug_span!("callmemaybe: pause", timelineid = %key.timeline_id, tenantid = %key.tenant_id, pageserver_connstr=%key.pageserver_connstr.clone()).entered();
+                        let mut subscriptions = subscriptions.lock().unwrap();
+                        // If pause received when no corresponding subscription exists it means that someone started replication
+                        // without using callmemaybe. So we create subscription and pause it.
+                        // In tenant relocation scenario subscribe call will be executed after pause when compute is restarted.
+                        // In that case there is no need to create new/unpause existing subscription.
+                        match subscriptions.entry(key.clone()) {
+                            Entry::Occupied(mut sub) => {
+                                debug!("pause existing");
+                                sub.get_mut().pause();
+                            }
+                            Entry::Vacant(entry) => {
+                                debug!("create paused");
+                                let subscription = entry.insert(SubscriptionState::new(
+                                    key.tenant_id,
+                                    key.timeline_id,
+                                    key.pageserver_connstr,
+                                ));
+                                subscription.pause();
+                            }
                        }
-                        info!("callmemaybe. thread_main. subscribe callback request for timelineid={} tenantid={}",
-                        timelineid, tenantid);
                    },
-                    CallmeEvent::Unsubscribe(tenantid, timelineid) => {
+                    CallmeEvent::Resume(key) => {
+                        debug!(
+                            "callmemaybe. thread_main. resume callback request for timelineid={} tenantid={} pageserver_connstr={}",
+                            key.timeline_id, key.tenant_id, key.pageserver_connstr,
+                        );
                        let mut subscriptions = subscriptions.lock().unwrap();
-                        subscriptions.remove(&(tenantid, timelineid));
-                        info!("callmemaybe. thread_main. unsubscribe callback. request for timelineid={} tenantid={}",
-                        timelineid, tenantid);
-                    },
-                    CallmeEvent::Pause(tenantid, timelineid) => {
-                        let mut subscriptions = subscriptions.lock().unwrap();
-                        if let Some(sub) = subscriptions.get_mut(&(tenantid, timelineid))
-                        {
-                            sub.pause();
-                        };
-                        info!("callmemaybe. thread_main. pause callback request for timelineid={} tenantid={}",
-                        timelineid, tenantid);
-                    },
-                    CallmeEvent::Resume(tenantid, timelineid) => {
-                        let mut subscriptions = subscriptions.lock().unwrap();
-                        if let Some(sub) = subscriptions.get_mut(&(tenantid, timelineid))
+                        if let Some(sub) = subscriptions.get_mut(&key)
                        {
                            sub.resume();
                        };
-
-                        info!("callmemaybe. thread_main. resume callback request for timelineid={} tenantid={}",
-                        timelineid, tenantid);
                    },
                }
            },
            _ = ticker.tick() => {
+                let _enter = debug_span!("callmemaybe: tick").entered();
                let mut subscriptions = subscriptions.lock().unwrap();

-                for (&(_tenantid, _timelineid), state) in subscriptions.iter_mut() {
+                for (_, state) in subscriptions.iter_mut() {
                    state.call(conf.recall_period, conf.listen_pg_addr.clone());
                }
             },
--- a/walkeeper/src/handler.rs
+++ b/walkeeper/src/handler.rs
@@ -6,7 +6,7 @@ use crate::receive_wal::ReceiveWalConn;
 use crate::send_wal::ReplicationConn;
 use crate::timeline::{Timeline, TimelineTools};
 use crate::SafeKeeperConf;
-use anyhow::{anyhow, bail, Context, Result};
+use anyhow::{bail, Context, Result};

 use postgres_ffi::xlog_utils::PG_TLI;
 use regex::Regex;
@@ -16,7 +16,7 @@ use zenith_utils::lsn::Lsn;
 use zenith_utils::postgres_backend;
 use zenith_utils::postgres_backend::PostgresBackend;
 use zenith_utils::pq_proto::{BeMessage, FeStartupPacket, RowDescriptor, INT4_OID, TEXT_OID};
-use zenith_utils::zid::{ZTenantId, ZTimelineId};
+use zenith_utils::zid::{ZTenantId, ZTenantTimelineId, ZTimelineId};

 use crate::callmemaybe::CallmeEvent;
 use crate::timeline::CreateControlFile;
@@ -30,6 +30,7 @@ pub struct SafekeeperPostgresHandler {
    pub ztenantid: Option<ZTenantId>,
    pub ztimelineid: Option<ZTimelineId>,
    pub timeline: Option<Arc<Timeline>>,
+    pageserver_connstr: Option<String>,
    //sender to communicate with callmemaybe thread
    pub tx: UnboundedSender<CallmeEvent>,
 }
@@ -56,16 +57,15 @@ fn parse_cmd(cmd: &str) -> Result<SafekeeperPostgresCommand> {
        let start_lsn = caps
            .next()
            .map(|cap| cap[1].parse::<Lsn>())
-            .ok_or_else(|| anyhow!("failed to parse start LSN from START_REPLICATION command"))??;
+            .context("failed to parse start LSN from START_REPLICATION command")??;
        Ok(SafekeeperPostgresCommand::StartReplication { start_lsn })
    } else if cmd.starts_with("IDENTIFY_SYSTEM") {
        Ok(SafekeeperPostgresCommand::IdentifySystem)
    } else if cmd.starts_with("JSON_CTRL") {
-        let cmd = cmd
-            .strip_prefix("JSON_CTRL")
-            .ok_or_else(|| anyhow!("invalid prefix"))?;
-        let parsed_cmd: AppendLogicalMessage = serde_json::from_str(cmd)?;
-        Ok(SafekeeperPostgresCommand::JSONCtrl { cmd: parsed_cmd })
+        let cmd = cmd.strip_prefix("JSON_CTRL").context("invalid prefix")?;
+        Ok(SafekeeperPostgresCommand::JSONCtrl {
+            cmd: serde_json::from_str(cmd)?,
+        })
    } else {
        bail!("unsupported command {}", cmd);
    }
@@ -89,6 +89,8 @@ impl postgres_backend::Handler for SafekeeperPostgresHandler {
                self.appname = Some(app_name.clone());
            }

+            self.pageserver_connstr = params.get("pageserver_connstr").cloned();
+
            Ok(())
        } else {
            bail!("Walkeeper received unexpected initial message: {:?}", sm);
@@ -104,12 +106,8 @@ impl postgres_backend::Handler for SafekeeperPostgresHandler {
            | SafekeeperPostgresCommand::StartReplication { .. }
            | SafekeeperPostgresCommand::IdentifySystem
            | SafekeeperPostgresCommand::JSONCtrl { .. } => {
-                let tenantid = self
-                    .ztenantid
-                    .ok_or_else(|| anyhow!("tenantid is required"))?;
-                let timelineid = self
-                    .ztimelineid
-                    .ok_or_else(|| anyhow!("timelineid is required"))?;
+                let tenantid = self.ztenantid.context("tenantid is required")?;
+                let timelineid = self.ztimelineid.context("timelineid is required")?;
                if self.timeline.is_none() {
                    // START_WAL_PUSH is the only command that initializes the timeline in production.
                    // There is also JSON_CTRL command, which should initialize the timeline for testing.
@@ -118,8 +116,11 @@ impl postgres_backend::Handler for SafekeeperPostgresHandler {
                        | SafekeeperPostgresCommand::JSONCtrl { .. } => CreateControlFile::True,
                        _ => CreateControlFile::False,
                    };
-                    self.timeline
-                        .set(&self.conf, tenantid, timelineid, create_control_file)?;
+                    self.timeline.set(
+                        &self.conf,
+                        ZTenantTimelineId::new(tenantid, timelineid),
+                        create_control_file,
+                    )?;
                }
            }
        }
@@ -128,12 +129,12 @@ impl postgres_backend::Handler for SafekeeperPostgresHandler {
            SafekeeperPostgresCommand::StartWalPush { pageserver_connstr } => {
                ReceiveWalConn::new(pgb, pageserver_connstr)
                    .run(self)
-                    .with_context(|| "failed to run ReceiveWalConn")?;
+                    .context("failed to run ReceiveWalConn")?;
            }
            SafekeeperPostgresCommand::StartReplication { start_lsn } => {
                ReplicationConn::new(pgb)
-                    .run(self, pgb, start_lsn)
-                    .with_context(|| "failed to run ReplicationConn")?;
+                    .run(self, pgb, start_lsn, self.pageserver_connstr.clone())
+                    .context("failed to run ReplicationConn")?;
            }
            SafekeeperPostgresCommand::IdentifySystem => {
                self.handle_identify_system(pgb)?;
@@ -154,6 +155,7 @@ impl SafekeeperPostgresHandler {
            ztenantid: None,
            ztimelineid: None,
            timeline: None,
+            pageserver_connstr: None,
            tx,
        }
    }
--- a/walkeeper/src/http/routes.rs
+++ b/walkeeper/src/http/routes.rs
@@ -1,11 +1,11 @@
 use hyper::{Body, Request, Response, StatusCode};
-use routerify::ext::RequestExt;
-use routerify::RouterBuilder;
 use serde::Serialize;
 use serde::Serializer;
 use std::fmt::Display;
 use std::sync::Arc;
+use zenith_utils::http::{RequestExt, RouterBuilder};
 use zenith_utils::lsn::Lsn;
+use zenith_utils::zid::ZTenantTimelineId;

 use crate::safekeeper::Term;
 use crate::safekeeper::TermHistory;
@@ -65,16 +65,13 @@ struct TimelineStatus {

 /// Report info about timeline.
 async fn timeline_status_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
-    let tenant_id: ZTenantId = parse_request_param(&request, "tenant_id")?;
-    let timeline_id: ZTimelineId = parse_request_param(&request, "timeline_id")?;
+    let zttid = ZTenantTimelineId::new(
+        parse_request_param(&request, "tenant_id")?,
+        parse_request_param(&request, "timeline_id")?,
+    );

-    let tli = GlobalTimelines::get(
-        get_conf(&request),
-        tenant_id,
-        timeline_id,
-        CreateControlFile::False,
-    )
-    .map_err(ApiError::from_err)?;
+    let tli = GlobalTimelines::get(get_conf(&request), zttid, CreateControlFile::False)
+        .map_err(ApiError::from_err)?;
    let sk_state = tli.get_info();
    let flush_lsn = tli.get_end_of_wal();

@@ -85,8 +82,8 @@ async fn timeline_status_handler(request: Request<Body>) -> Result<Response<Body
    };

    let status = TimelineStatus {
-        tenant_id,
-        timeline_id,
+        tenant_id: zttid.tenant_id,
+        timeline_id: zttid.timeline_id,
        acceptor_state: acc_state,
        commit_lsn: sk_state.commit_lsn,
        truncate_lsn: sk_state.truncate_lsn,
--- a/walkeeper/src/lib.rs
+++ b/walkeeper/src/lib.rs
@@ -2,7 +2,7 @@
 use std::path::PathBuf;
 use std::time::Duration;

-use zenith_utils::zid::ZTimelineId;
+use zenith_utils::zid::ZTenantTimelineId;

 pub mod callmemaybe;
 pub mod handler;
@@ -47,8 +47,10 @@ pub struct SafeKeeperConf {
 }

 impl SafeKeeperConf {
-    pub fn timeline_dir(&self, timelineid: &ZTimelineId) -> PathBuf {
-        self.workdir.join(timelineid.to_string())
+    pub fn timeline_dir(&self, zttid: &ZTenantTimelineId) -> PathBuf {
+        self.workdir
+            .join(zttid.tenant_id.to_string())
+            .join(zttid.timeline_id.to_string())
    }
 }

--- a/walkeeper/src/receive_wal.rs
+++ b/walkeeper/src/receive_wal.rs
@@ -5,6 +5,7 @@
 use anyhow::{bail, Context, Result};
 use bytes::Bytes;
 use bytes::BytesMut;
+use tokio::sync::mpsc::UnboundedSender;
 use tracing::*;

 use crate::timeline::Timeline;
@@ -18,10 +19,8 @@ use crate::handler::SafekeeperPostgresHandler;
 use crate::timeline::TimelineTools;
 use zenith_utils::postgres_backend::PostgresBackend;
 use zenith_utils::pq_proto::{BeMessage, FeMessage};
-use zenith_utils::zid::{ZTenantId, ZTimelineId};

 use crate::callmemaybe::CallmeEvent;
-use tokio::sync::mpsc::UnboundedSender;

 pub struct ReceiveWalConn<'pg> {
    /// Postgres connection
@@ -82,54 +81,23 @@ impl<'pg> ReceiveWalConn<'pg> {
        let mut msg = self
            .read_msg()
            .context("failed to receive proposer greeting")?;
-        let tenant_id: ZTenantId;
        match msg {
            ProposerAcceptorMessage::Greeting(ref greeting) => {
                info!(
                    "start handshake with wal proposer {} sysid {} timeline {}",
                    self.peer_addr, greeting.system_id, greeting.tli,
                );
-                tenant_id = greeting.tenant_id;
            }
            _ => bail!("unexpected message {:?} instead of greeting", msg),
        }

-        // Incoming WAL stream resumed, so reset information about the timeline pause.
-        spg.timeline.get().continue_streaming();
-
-        // if requested, ask pageserver to fetch wal from us
-        // as long as this wal_stream is alive, callmemaybe thread
-        // will send requests to pageserver
-        let _guard = match self.pageserver_connstr {
-            Some(ref pageserver_connstr) => {
-                // Need to establish replication channel with page server.
-                // Add far as replication in postgres is initiated by receiver
-                // we should use callmemaybe mechanism.
-                let timelineid = spg.timeline.get().timelineid;
-                let tx_clone = spg.tx.clone();
-                let pageserver_connstr = pageserver_connstr.to_owned();
-                spg.tx
-                    .send(CallmeEvent::Subscribe(
-                        tenant_id,
-                        timelineid,
-                        pageserver_connstr,
-                    ))
-                    .unwrap_or_else(|e| {
-                        error!(
-                            "failed to send Subscribe request to callmemaybe thread {}",
-                            e
-                        );
-                    });
-
-                // create a guard to unsubscribe callback, when this wal_stream will exit
-                Some(SendWalHandlerGuard {
-                    _tx: tx_clone,
-                    _tenant_id: tenant_id,
-                    _timelineid: timelineid,
-                    timeline: Arc::clone(spg.timeline.get()),
-                })
-            }
-            None => None,
+        // Register the connection and defer unregister.
+        spg.timeline
+            .get()
+            .on_compute_connect(self.pageserver_connstr.as_ref(), &spg.tx)?;
+        let _guard = ComputeConnectionGuard {
+            timeline: Arc::clone(spg.timeline.get()),
+            callmemaybe_tx: spg.tx.clone(),
        };

        loop {
@@ -137,7 +105,7 @@ impl<'pg> ReceiveWalConn<'pg> {
                .timeline
                .get()
                .process_msg(&msg)
-                .with_context(|| "failed to process ProposerAcceptorMessage")?;
+                .context("failed to process ProposerAcceptorMessage")?;
            if let Some(reply) = reply {
                self.write_msg(&reply)?;
            }
@@ -146,23 +114,15 @@ impl<'pg> ReceiveWalConn<'pg> {
    }
 }

-struct SendWalHandlerGuard {
-    _tx: UnboundedSender<CallmeEvent>,
-    _tenant_id: ZTenantId,
-    _timelineid: ZTimelineId,
+struct ComputeConnectionGuard {
    timeline: Arc<Timeline>,
+    callmemaybe_tx: UnboundedSender<CallmeEvent>,
 }

-impl Drop for SendWalHandlerGuard {
+impl Drop for ComputeConnectionGuard {
    fn drop(&mut self) {
-        self.timeline.stop_streaming();
-        // self.tx
-        //     .send(CallmeEvent::Unsubscribe(self.tenant_id, self.timelineid))
-        //     .unwrap_or_else(|e| {
-        //         error!(
-        //             "failed to send Unsubscribe request to callmemaybe thread {}",
-        //             e
-        //         );
-        //     });
+        self.timeline
+            .on_compute_disconnect(&self.callmemaybe_tx)
+            .unwrap();
    }
 }
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Konstantin Knizhnik	a7f04ace3d	Make clippy happy	2022-02-16 13:26:22 +03:00
Konstantin Knizhnik	dc3f3d0ace	Calculate postgres checksum for FPI stored in page server	2022-02-16 13:16:35 +03:00
bojanserafimov	afb3342e46	Add vanilla pg baseline tests (#1275 )	2022-02-15 13:44:22 -05:00
Kirill Bulatov	5563ff123f	Reuse tenant-timeline id struct from utils	2022-02-15 17:45:23 +02:00
Dhammika Pathirana	0a557b2fa9	Add cli v4 loopback listener ports test Signed-off-by: Dhammika Pathirana <dhammika@gmail.com> Add a test for #1247	2022-02-15 17:01:22 +02:00
Heikki Linnakangas	9632c352ab	Avoid having multiple records for the same page and LSN. If a heap UPDATE record modified two pages, and both pages needed to have their VM bits cleared, and the VM bits were located on the same VM page, we would emit two ZenithWalRecord::ClearVisibilityMapFlags records for the same VM page. That produced warnings like this in the pageserver log: Page version Wal(ClearVisibilityMapFlags { heap_blkno: 18, flags: 3 }) of rel 1663/13949/2619_vm blk 0 at 2A/346046A0 already exists To fix, change ClearVisibilityMapFlags so that it can update the bits for both pages as one operation. This was already covered by several python tests, so no need to add a new one. Fixes #1125. Co-authored-by: Konstantin Knizhnik <knizhnik@zenith.tech>	2022-02-15 14:26:16 +02:00
Arseny Sher	328e3b4189	bump vendor/postgres to fix compiler warnings	2022-02-15 06:51:16 +03:00
Arseny Sher	47f6a1f9a8	Add -Werror to CI builds.	2022-02-15 06:51:16 +03:00
Dmitry Rodionov	a4829712f4	merge directories in git-upload instead of removing existing files for perf test result uploads	2022-02-15 03:47:06 +03:00
Arseny Sher	d4d26f619d	bump vendor/postgres to fix compilation warning	2022-02-14 21:00:11 +03:00
Arseny Sher	36481f3374	bump vendor/postgres to init pgxactoff in walproposer ref #1244	2022-02-14 15:57:38 +03:00
Dhammika Pathirana	d951dd8977	Fix cli start (#1260 ) Signed-off-by: Dhammika Pathirana <dhammika@gmail.com>	2022-02-10 18:36:02 -05:00
bojanserafimov	ea13838be7	Add pgbench baseline test (#1204 ) Co-authored-by: Heikki Linnakangas <heikki.linnakangas@iki.fi>	2022-02-10 15:33:36 -05:00
Dmitry Rodionov	b51f23cdf0	pass perf test cluster connstr to circle ci jobs	2022-02-10 17:49:54 +03:00
Kirill Bulatov	3cfcdb92ed	Fix tokio features in zenith utils to enable its standalone compilation	2022-02-10 08:33:22 -05:00
Kirill Bulatov	d7af965982	Do not leak decoding_key in JwtAuth's Debug representation	2022-02-10 08:33:22 -05:00
Kirill Bulatov	7c1c7702d2	Code review fixes	2022-02-10 08:33:22 -05:00
Kirill Bulatov	6eef401602	Move routerify behind zenith_utils	2022-02-10 08:33:22 -05:00
Kirill Bulatov	c5b5905ed3	Remove parking_lot dependency from workspace	2022-02-10 08:33:22 -05:00
Kirill Bulatov	76b74349cb	Bump pageserver dependencies	2022-02-10 08:33:22 -05:00
Dmitry Rodionov	b08e340f60	point perf results back from testing to master	2022-02-10 14:18:34 +03:00
Dmitry Rodionov	a25fa29bc9	modify git-upload for generate_and_push_perf_report.sh needs	2022-02-10 13:12:19 +03:00
Dmitry Rodionov	ccf3c8cc30	store performance test results in our staging cluster to be able to visualize them in grafana	2022-02-10 13:12:19 +03:00
Heikki Linnakangas	c45ee13b4e	Bump vendor/postgres, to fix memory leak. See https://github.com/zenithdb/postgres/pull/129	2022-02-10 11:29:38 +02:00
anastasia	f1e7db9d0d	Bump vendor/postgres rebased to 14.2	2022-02-10 11:19:10 +03:00
Heikki Linnakangas	fa8a6c0e94	Reduce logging of walkeeper normal operations. It was printing a lot of stuff to the log with INFO level, for routine things like receiving or sending messages. Reduce the noise. The amount of logging was excessive, and it was also consuming a fair amount of CPU (about 20% of safekeeper's CPU usage in a little test I ran).	2022-02-10 08:34:30 +02:00
Dhammika Pathirana	1e8ca497e0	Fix safekeeper loopback addr (#1247 ) Signed-off-by: Dhammika Pathirana <dhammika@gmail.com>	2022-02-10 09:23:53 +03:00
Heikki Linnakangas	a504cc87ab	Bump vendor/postgres for "Make getpage requests interruptible" See https://github.com/zenithdb/zenith/issues/1224	2022-02-09 16:13:46 +02:00
Heikki Linnakangas	5268bbc840	Bump vendor/postgres for fixes to cluster size limit. See https://github.com/zenithdb/postgres/pull/126	2022-02-09 15:52:21 +02:00
Arseny Sher	e1d770939b	Bump vendor/postgres to fix recent CI failure. See zenithdb/postgres#127	2022-02-09 08:50:45 -05:00
Egor Suvorov	2866a9e82e	Fix safekeeper LSN metrics (#1216 ) * Always initialize flush_lsn/commit_lsn metrics on a specific timeline, no more `n/a` * Update flush_lsn metrics missing from `cba4da3f4d` * Ensure that flush_lsn found on load is >= than both commit_lsn and truncate_lsn * Add some debug logging	2022-02-07 20:05:16 +03:00
Kirill Bulatov	b67cddb303	Implement EphemeralFile flush in a least dangerous way	2022-02-05 22:02:59 -05:00
anastasia	cb1d84d980	Make test_timeline_size_quota more deterministic	2022-02-06 02:16:36 +03:00
anastasia	642797b69e	Implement cluster size quota for zenith compute node. Use GUC zenith.max_cluster_size to set the limit. If limit is reached, extend requests will throw out-of-space error. When current size is too close to the limit - throw a warning. Add new test: test_timeline_size_quota.	2022-02-06 02:16:36 +03:00
Kirill Bulatov	3ed156a5b6	Add a CLI tool to manipulate remote storage blob files	2022-02-05 15:48:08 -05:00
Heikki Linnakangas	2d93b129a0	Avoid eprintln() in pageserver and walkeeper. Use log::error!() instead. I spotted a few of these "connection error" lines in the logs, without timestamps and the other stuff we print for all other log messages.	2022-02-05 17:59:31 +02:00
Arseny Sher	32c7859659	bump vendor/postgres	2022-02-05 01:27:31 +03:00
Arseny Sher	729ac38ea8	Centralize suspending/resuming timeline activity on safekeepers. Timeline is active whenever there is at least 1 connection from compute or pageserver is not caught up. Currently 'active' means callmemaybes are being sent. Fixes race: now suspend condition checking and callmemaybe unsubscribe happen under the same lock.	2022-02-03 02:34:10 +03:00
Andrey Taranik	d69b0539ba	proxy chart staging values update for labels (#1202 )	2022-02-01 13:31:05 +03:00
Dmitry Ivanov	ec78babad2	Use `mold` instead of default linker	2022-01-28 20:40:50 +03:00
Dmitry Ivanov	9350dfb215	[CI] Merge *.profraw files prior to uploading workspace Hopefully, this will make CI pipeline a bit faster.	2022-01-28 19:56:28 +03:00
Dmitry Ivanov	8ac8be5206	[scripts/coverage] Implement `merge` command This will drastically decrease the size of CI workspace uploads.	2022-01-28 19:56:28 +03:00
Dmitry Ivanov	c2927353a5	Enable async deserialization of FeMessage Now it's possible to call Fe{Startup,}Message in both sync and async contexts, which is good for proxy. Co-authored-by: bojanserafimov <bojan.serafimov7@gmail.com>	2022-01-28 19:40:37 +03:00
Kirill Bulatov	33251a9d8f	Disable failing remote storage tests for now	2022-01-28 18:35:46 +03:00
Konstantin Knizhnik	c045ae7a9b	Fix random range for keys in test_gc_aggressive.py (#1199 )	2022-01-28 16:29:55 +03:00
Dmitry Rodionov	602ccb7d5f	distinguish failures for pre-initdb lsn and pre-ancestor lsn branching in test_branch_behind	2022-01-28 12:31:15 +03:00
Dmitry Rodionov	5df21e1058	remove Timeline::start_lsn in favor of ancestor_lsn	2022-01-28 12:31:15 +03:00
Konstantin Knizhnik	08135910a5	Fix checkpoint.nextXid update (#1166 ) * Fix checkpoint.nextXid update * Add test for cehckpoint.nextXid * Fix indentation of test_next_xid.py * Fix mypy error in test_next_xid.py * Tidy up the test case. * Add a unit test Co-authored-by: Heikki Linnakangas <heikki@zenith.tech>	2022-01-27 18:21:51 +03:00
Konstantin Knizhnik	f58a22d07e	Freeze layers at the same end LSN (#1182 ) * Freeze vectors at the same end LSN * Fix calculation of last LSN for inmem layer * Do not advance disk_consistent_lsn is no open layer was evicted * Fix calculation of freeze_end_lsn * Let start_lsn be larger than oldest_pending_lsn * Rename 'oldest_pending_lsn' and 'last_lsn', add comments. * Fix future_layerfiles test * Update comments conserning olest_lsn * Update comments conserning olest_lsn Co-authored-by: Heikki Linnakangas <heikki@zenith.tech>	2022-01-27 18:21:00 +03:00
Arthur Petukhovsky	cedde559b8	Add test for replacement of the failed safekeeper (#1179 ) * Add test to replace failed safekeeper * Restart safekeepers in test_replace_safekeeper * Update vendor/postgres	2022-01-27 17:26:55 +03:00
Arthur Petukhovsky	49d1d1ddf9	Don't call adjust_for_wal_acceptors after pg create (#1178 ) Now zenith_cli handles wal_acceptors config internally, and if we will append wal_acceptors to postgresql.conf in python tests, then it will contain duplicate wal_acceptors config.	2022-01-27 17:23:14 +03:00
Arseny Sher	86045ac36c	Prefix per-cluster directory with ztenant_id in safekeeper. Currently ztimelineids are unique, but all APIs accept the pair, so let's keep it everywhere for uniformity. Carry around ZTTId containing both ZTenantId and ZTimelineId for simplicity. (existing clusters on staging ought to be preprocessed for that)	2022-01-27 17:22:07 +03:00
Konstantin Knizhnik	79f0e44a20	Gc cutoff rwlock (#1139 ) * Reproduce github issue #1047. * Use RwLock to protect gc_cuttof_lsn * Eeduce number of updates in test_gc_aggressive * Change test_prohibit_get_page_at_lsn_for_garbage_collected_pages test * Change test_prohibit_get_page_at_lsn_for_garbage_collected_pages * Lock latest_gc_cutoff_lsn in all operations accessing storage to prevent race conditions with GC * Remove random sleep between wait_for_lsn and get_page_at_lsn * Initialize latest_gc_cutoff with initdb_lsn and remove separate check that lsn >= initdb_lsn * Update test_prohibit_branch_creation_on_pre_initdb_lsn test Co-authored-by: Heikki Linnakangas <heikki@zenith.tech>	2022-01-27 14:41:16 +03:00
anastasia	c44695f34b	bump vendor/postgres	2022-01-27 11:20:45 +03:00
anastasia	5abe2129c6	Extend replication protocol with ZentihFeedback message to pass current_timeline_size to compute node Put standby_status_update fields into ZenithFeedback and send them as one message. Pass values sizes together with keys in ZenithFeedback message.	2022-01-27 11:20:45 +03:00
Dmitry Rodionov	63dd7bce7e	bandaid to avoid concurrent timeline downloading until proper refactoring/fix	2022-01-26 19:54:09 +03:00
Dmitry Rodionov	f3c73f5797	cache python deps in circle ci	2022-01-26 13:01:12 +03:00
Dmitry Rodionov	e6f2d70517	use 2021 rust edition	2022-01-25 18:48:49 +03:00
Andrey Taranik	be6d1cc360	Use zimg as builders (#1165 ) * try use own builder images * add postgres headers before build zenith * checkout submodule before zenith build * circleci cleanup	2022-01-25 00:58:37 +03:00
Dmitry Ivanov	703716228e	Use `&str` instead of `String` in `BeMessage::ErrorResponse` There's no need in allocating string literals in the heap.	2022-01-24 18:49:05 +03:00
Dmitry Rodionov	458bc0c838	walkeeper: use named type as a key in callmemaybe subscriptions hashmap	2022-01-24 17:20:15 +03:00
Dmitry Rodionov	39591ef627	reduce flakiness	2022-01-24 17:20:15 +03:00
Dmitry Rodionov	37c440c5d3	Introduce first version of tenant migraiton between pageservers This patch includes attach/detach http endpoints in pageservers. Some changes in callmemaybe handling inside safekeeper and an integrational test to check migration with and without load. There are still some rough edges that will be addressed in follow up patches	2022-01-24 17:20:15 +03:00
anastasia	81e94d1897	Add LSN and Backpressure descriptions to glossary.md	2022-01-24 12:52:30 +03:00
Konstantin Knizhnik	7bc1274a03	Fix comparison with disk_consistent_lsn in newer_image_layer_exists (#1167 )	2022-01-24 12:19:18 +03:00
Dmitry Rodionov	5f5a11525c	Switch our python package management solution to poetry. Mainly because it has better support for installing the packages from different python versions. It also has better dependency resolver than Pipenv. And supports modern standard for python dependency management. This includes usage of pyproject.toml for project specific configuration instead of per tool conf files. See following links for details: https://pip.pypa.io/en/stable/reference/build-system/pyproject-toml/ https://www.python.org/dev/peps/pep-0518/	2022-01-24 11:33:47 +03:00
Konstantin Knizhnik	e209764877	Do not delete layers beyand cutoff LSN (#1128 ) * Do not delete layers beyand cutoff LSN * Update pageserver/src/layered_repository/layer_map.rs Co-authored-by: Heikki Linnakangas <heikki.linnakangas@iki.fi> Co-authored-by: Heikki Linnakangas <heikki.linnakangas@iki.fi>	2022-01-24 10:42:40 +03:00
Kirill Bulatov	65290b2e96	Ensure every submodule compiles on its own	2022-01-21 17:34:15 +03:00
Dmitry Ivanov	127df96635	[proxy] Make `NUM_BYTES_PROXIED_COUNTER` more precise	2022-01-21 17:31:19 +03:00
Kirill Bulatov	924d8d489a	Allow enabling S3 mock in all existing tests with an env var	2022-01-20 18:42:47 +02:00
Dmitry Rodionov	026eb64a83	Use python lib to mock s3	2022-01-20 18:42:47 +02:00
Kirill Bulatov	45124856b1	Better S3 remote storage logging	2022-01-20 18:42:47 +02:00
Kirill Bulatov	38c6f6ce16	Allow specifying custom endpoint in s3	2022-01-20 18:42:47 +02:00
Heikki Linnakangas	caa62eff2a	Fix description of proxy --auth-endpoint option.	2022-01-20 14:50:27 +03:00
Dmitry Ivanov	d3542c34f1	Refactoring: use anyhow::Context's methods where possible	2022-01-19 16:33:48 +03:00
Kirill Bulatov	7fb62fc849	Fix macos compilation	2022-01-18 23:01:04 +02:00