Lock in sorted order

2026-03-12 21:00:37 +00:00 · 2022-06-27 15:32:05 -04:00
104 changed files with 957 additions and 3098 deletions
--- a/.circleci/ansible/ansible.cfg
+++ b/.circleci/ansible/ansible.cfg
@@ -6,7 +6,5 @@ timeout = 30

 [ssh_connection]
 ssh_args   = -F ./ansible.ssh.cfg
-# teleport doesn't support sftp yet https://github.com/gravitational/teleport/issues/7127
-# and scp neither worked for me
-transfer_method = piped
+scp_if_ssh = True
 pipelining = True
--- a/.circleci/ansible/ansible.ssh.cfg
+++ b/.circleci/ansible/ansible.ssh.cfg
@@ -1,7 +1,3 @@
-# Remove this once https://github.com/gravitational/teleport/issues/10918 is fixed
-# (use pre 8.5 option name to cope with old ssh in CI)
-PubkeyAcceptedKeyTypes +ssh-rsa-cert-v01@openssh.com
-
 Host tele.zenith.tech
    User admin
    Port 3023
--- a/.circleci/ansible/neon-stress.hosts
+++ b/.circleci/ansible/neon-stress.hosts
@@ -12,7 +12,6 @@ pageservers
 safekeepers

 [storage:vars]
-env_name = neon-stress
 console_mgmt_base_url = http://neon-stress-console.local
 bucket_name           = neon-storage-ireland
 bucket_region         = eu-west-1
--- a/.circleci/ansible/production.hosts
+++ b/.circleci/ansible/production.hosts
@@ -12,7 +12,6 @@ pageservers
 safekeepers

 [storage:vars]
-env_name = prod-1
 console_mgmt_base_url = http://console-release.local
 bucket_name           = zenith-storage-oregon
 bucket_region         = us-west-2
--- a/.circleci/ansible/staging.hosts
+++ b/.circleci/ansible/staging.hosts
@@ -13,7 +13,6 @@ pageservers
 safekeepers

 [storage:vars]
-env_name = us-stage
 console_mgmt_base_url = http://console-staging.local
 bucket_name           = zenith-staging-storage-us-east-1
 bucket_region         = us-east-1
--- a/.circleci/ansible/systemd/safekeeper.service
+++ b/.circleci/ansible/systemd/safekeeper.service
@@ -6,7 +6,7 @@ After=network.target auditd.service
 Type=simple
 User=safekeeper
 Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/safekeeper/data LD_LIBRARY_PATH=/usr/local/lib
-ExecStart=/usr/local/bin/safekeeper -l {{ inventory_hostname }}.local:6500 --listen-http {{ inventory_hostname }}.local:7676 -D /storage/safekeeper/data --broker-endpoints={{ etcd_endpoints }} --remote-storage='{bucket_name="{{bucket_name}}", bucket_region="{{bucket_region}}", prefix_in_bucket="{{ env_name }}/wal"}'
+ExecStart=/usr/local/bin/safekeeper -l {{ inventory_hostname }}.local:6500 --listen-http {{ inventory_hostname }}.local:7676 -p {{ first_pageserver }}:6400 -D /storage/safekeeper/data --broker-endpoints={{ etcd_endpoints }} --remote-storage='{bucket_name="{{bucket_name}}", bucket_region="{{bucket_region}}", prefix_in_bucket="wal"}'
 ExecReload=/bin/kill -HUP $MAINPID
 KillMode=mixed
 KillSignal=SIGINT
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -100,8 +100,10 @@ jobs:
          name: Rust build << parameters.build_type >>
          command: |
            if [[ $BUILD_TYPE == "debug" ]]; then
+              cov_prefix=(scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage run)
              CARGO_FLAGS=
            elif [[ $BUILD_TYPE == "release" ]]; then
+              cov_prefix=()
              CARGO_FLAGS="--release --features profiling"
            fi

@@ -110,7 +112,7 @@ jobs:
            export RUSTC_WRAPPER=cachepot
            export AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}"
            export AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}"
-            mold -run cargo build $CARGO_FLAGS --features failpoints --bins --tests
+            "${cov_prefix[@]}" mold -run cargo build $CARGO_FLAGS --features failpoints --bins --tests
            cachepot -s

      - save_cache:
@@ -126,24 +128,32 @@ jobs:
          name: cargo test
          command: |
            if [[ $BUILD_TYPE == "debug" ]]; then
+              cov_prefix=(scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage run)
              CARGO_FLAGS=
            elif [[ $BUILD_TYPE == "release" ]]; then
+              cov_prefix=()
              CARGO_FLAGS=--release
            fi

-            cargo test $CARGO_FLAGS
+            "${cov_prefix[@]}" cargo test $CARGO_FLAGS

        # Install the rust binaries, for use by test jobs
      - run:
          name: Install rust binaries
          command: |
+            if [[ $BUILD_TYPE == "debug" ]]; then
+              cov_prefix=(scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage run)
+            elif [[ $BUILD_TYPE == "release" ]]; then
+              cov_prefix=()
+            fi
+
            binaries=$(
-              cargo metadata --format-version=1 --no-deps |
+              "${cov_prefix[@]}" cargo metadata --format-version=1 --no-deps |
              jq -r '.packages[].targets[] | select(.kind | index("bin")) | .name'
            )

            test_exe_paths=$(
-              cargo test --message-format=json --no-run |
+              "${cov_prefix[@]}" cargo test --message-format=json --no-run |
              jq -r '.executable | select(. != null)'
            )

@@ -156,15 +166,34 @@ jobs:
              SRC=target/$BUILD_TYPE/$bin
              DST=/tmp/zenith/bin/$bin
              cp $SRC $DST
+              echo $DST >> /tmp/zenith/etc/binaries.list
            done

+            # Install test executables (for code coverage)
+            if [[ $BUILD_TYPE == "debug" ]]; then
+              for bin in $test_exe_paths; do
+                SRC=$bin
+                DST=/tmp/zenith/test_bin/$(basename $bin)
+                cp $SRC $DST
+                echo $DST >> /tmp/zenith/etc/binaries.list
+              done
+            fi
+
        # Install the postgres binaries, for use by test jobs
      - run:
          name: Install postgres binaries
          command: |
            cp -a tmp_install /tmp/zenith/pg_install

-      # Save rust binaries for other jobs in the workflow
+      - run:
+          name: Merge coverage data
+          command: |
+            # This will speed up workspace uploads
+            if [[ $BUILD_TYPE == "debug" ]]; then
+              scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage merge
+            fi
+
+        # Save the rust binaries and coverage data for other jobs in this workflow.
      - persist_to_workspace:
          root: /tmp/zenith
          paths:
@@ -257,7 +286,7 @@ jobs:
          # no_output_timeout, specified here.
          no_output_timeout: 10m
          environment:
-            - NEON_BIN: /tmp/zenith/bin
+            - ZENITH_BIN: /tmp/zenith/bin
            - POSTGRES_DISTRIB_DIR: /tmp/zenith/pg_install
            - TEST_OUTPUT: /tmp/test_output
            # this variable will be embedded in perf test report
@@ -285,6 +314,12 @@ jobs:

            export GITHUB_SHA=$CIRCLE_SHA1

+            if [[ $BUILD_TYPE == "debug" ]]; then
+              cov_prefix=(scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage run)
+            elif [[ $BUILD_TYPE == "release" ]]; then
+              cov_prefix=()
+            fi
+
            # Run the tests.
            #
            # The junit.xml file allows CircleCI to display more fine-grained test information
@@ -295,7 +330,7 @@ jobs:
            # -n4 uses four processes to run tests via pytest-xdist
            # -s is not used to prevent pytest from capturing output, because tests are running
            # in parallel and logs are mixed between different tests
-            ./scripts/pytest \
+            "${cov_prefix[@]}" ./scripts/pytest \
              --junitxml=$TEST_OUTPUT/junit.xml \
              --tb=short \
              --verbose \
@@ -324,12 +359,67 @@ jobs:
      # The store_test_results step tells CircleCI where to find the junit.xml file.
      - store_test_results:
          path: /tmp/test_output
-      # Save data (if any)
+      - run:
+          name: Merge coverage data
+          command: |
+            # This will speed up workspace uploads
+            if [[ $BUILD_TYPE == "debug" ]]; then
+              scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage merge
+            fi
+      # Save coverage data (if any)
      - persist_to_workspace:
          root: /tmp/zenith
          paths:
            - "*"

+  coverage-report:
+    executor: neon-xlarge-executor
+    steps:
+      - attach_workspace:
+          at: /tmp/zenith
+      - checkout
+      - restore_cache:
+          name: Restore rust cache
+          keys:
+            # Require an exact match. While an out of date cache might speed up the build,
+            # there's no way to clean out old packages, so the cache grows every time something
+            # changes.
+            - v04-rust-cache-deps-debug-{{ checksum "Cargo.lock" }}
+      - run:
+          name: Build coverage report
+          command: |
+            COMMIT_URL=https://github.com/neondatabase/neon/commit/$CIRCLE_SHA1
+
+            scripts/coverage \
+              --dir=/tmp/zenith/coverage report \
+              --input-objects=/tmp/zenith/etc/binaries.list \
+              --commit-url=$COMMIT_URL \
+              --format=github
+      - run:
+          name: Upload coverage report
+          command: |
+            LOCAL_REPO=$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME
+            REPORT_URL=https://neondatabase.github.io/zenith-coverage-data/$CIRCLE_SHA1
+            COMMIT_URL=https://github.com/neondatabase/neon/commit/$CIRCLE_SHA1
+
+            scripts/git-upload \
+              --repo=https://$VIP_VAP_ACCESS_TOKEN@github.com/neondatabase/zenith-coverage-data.git \
+              --message="Add code coverage for $COMMIT_URL" \
+              copy /tmp/zenith/coverage/report $CIRCLE_SHA1 # COPY FROM TO_RELATIVE
+
+            # Add link to the coverage report to the commit
+            curl -f -X POST \
+            https://api.github.com/repos/$LOCAL_REPO/statuses/$CIRCLE_SHA1 \
+            -H "Accept: application/vnd.github.v3+json" \
+            --user "$CI_ACCESS_TOKEN" \
+            --data \
+              "{
+                \"state\": \"success\",
+                \"context\": \"zenith-coverage\",
+                \"description\": \"Coverage report is ready\",
+                \"target_url\": \"$REPORT_URL\"
+              }"
+
  # Build neondatabase/neon:latest image and push it to Docker hub
  docker-image:
    docker:
@@ -598,6 +688,50 @@ jobs:
            helm upgrade neon-proxy       neondatabase/neon-proxy --install -f .circleci/helm-values/production.proxy.yaml --set image.tag=${DOCKER_TAG} --wait
            helm upgrade neon-proxy-scram neondatabase/neon-proxy --install -f .circleci/helm-values/production.proxy-scram.yaml --set image.tag=${DOCKER_TAG} --wait

+  # Trigger a new remote CI job
+  remote-ci-trigger:
+    docker:
+      - image: cimg/base:2021.04
+    parameters:
+      remote_repo:
+        type: string
+    environment:
+      REMOTE_REPO: << parameters.remote_repo >>
+    steps:
+      - run:
+          name: Set PR's status to pending
+          command: |
+            LOCAL_REPO=$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME
+
+            curl -f -X POST \
+            https://api.github.com/repos/$LOCAL_REPO/statuses/$CIRCLE_SHA1 \
+            -H "Accept: application/vnd.github.v3+json" \
+            --user "$CI_ACCESS_TOKEN" \
+            --data \
+              "{
+                \"state\": \"pending\",
+                \"context\": \"neon-cloud-e2e\",
+                \"description\": \"[$REMOTE_REPO] Remote CI job is about to start\"
+              }"
+      - run:
+          name: Request a remote CI test
+          command: |
+            LOCAL_REPO=$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME
+
+            curl -f -X POST \
+            https://api.github.com/repos/$REMOTE_REPO/actions/workflows/testing.yml/dispatches \
+            -H "Accept: application/vnd.github.v3+json" \
+            --user "$CI_ACCESS_TOKEN" \
+            --data \
+              "{
+                \"ref\": \"main\",
+                \"inputs\": {
+                  \"ci_job_name\": \"neon-cloud-e2e\",
+                  \"commit_hash\": \"$CIRCLE_SHA1\",
+                  \"remote_repo\": \"$LOCAL_REPO\"
+                }
+              }"
+
 workflows:
  build_and_test:
    jobs:
@@ -640,6 +774,12 @@ workflows:
          save_perf_report: true
          requires:
            - build-neon-release
+      - coverage-report:
+          # Context passes credentials for gh api
+          context: CI_ACCESS_TOKEN
+          requires:
+            # TODO: consider adding more
+            - other-tests-debug
      - docker-image:
          # Context gives an ability to login
          context: Docker Hub
@@ -740,3 +880,14 @@ workflows:
                - release
          requires:
            - docker-image-release
+      - remote-ci-trigger:
+          # Context passes credentials for gh api
+          context: CI_ACCESS_TOKEN
+          remote_repo: "neondatabase/cloud"
+          requires:
+            # XXX: Successful build doesn't mean everything is OK, but
+            # the job to be triggered takes so much time to complete (~22 min)
+            # that it's better not to wait for the commented-out steps
+            - build-neon-release
+            # - pg_regress-tests-release
+            # - other-tests-release
--- a/.github/actions/run-python-test-set/action.yml
+++ b/.github/actions/run-python-test-set/action.yml
@@ -2,29 +2,25 @@ name: 'Run python test'
 description: 'Runs a Neon python test set, performing all the required preparations before'

 inputs:
+  # Select the type of Rust build. Must be "release" or "debug".
  build_type:
-    description: 'Type of Rust (neon) and C (postgres) builds. Must be "release" or "debug".'
    required: true
  rust_toolchain:
-    description: 'Rust toolchain version to fetch the caches'
    required: true
+  # This parameter is required, to prevent the mistake of running all tests in one job.
  test_selection:
-    description: 'A python test suite to run'
    required: true
+  # Arbitrary parameters to pytest. For example "-s" to prevent capturing stdout/stderr
  extra_params:
-    description: 'Arbitrary parameters to pytest. For example "-s" to prevent capturing stdout/stderr'
    required: false
    default: ''
  needs_postgres_source:
-    description: 'Set to true if the test suite requires postgres source checked out'
    required: false
    default: 'false'
  run_in_parallel:
-    description: 'Whether to run tests in parallel'
    required: false
    default: 'true'
  save_perf_report:
-    description: 'Whether to upload the performance report'
    required: false
    default: 'false'

@@ -64,7 +60,7 @@ runs:

    - name: Run pytest
      env:
-        NEON_BIN: /tmp/neon/bin
+        ZENITH_BIN: /tmp/neon/bin
        POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
        TEST_OUTPUT: /tmp/test_output
        # this variable will be embedded in perf test report
@@ -92,7 +88,7 @@ runs:
        fi

        if [[ "${{ inputs.build_type }}" == "debug" ]]; then
-          cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
+          cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/neon/coverage run)
        elif [[ "${{ inputs.build_type }}" == "release" ]]; then
          cov_prefix=()
        fi
@@ -121,20 +117,3 @@ runs:
            scripts/generate_and_push_perf_report.sh
          fi
        fi
-
-    - name: Delete all data but logs
-      shell: bash -ex {0}
-      if: always()
-      run: |
-        du -sh /tmp/test_output/*
-        find /tmp/test_output -type f ! -name "*.log" ! -name "regression.diffs" ! -name "junit.xml" ! -name "*.filediff" ! -name "*.stdout" ! -name "*.stderr" ! -name "flamegraph.svg" ! -name "*.metrics" -delete
-        du -sh /tmp/test_output/*
-
-    - name: Upload python test logs
-      if: always()
-      uses: actions/upload-artifact@v3
-      with:
-        retention-days: 7
-        if-no-files-found: error
-        name: python-test-${{ inputs.test_selection }}-${{ runner.os }}-${{ inputs.build_type }}-${{ inputs.rust_toolchain }}-logs
-        path: /tmp/test_output/
--- a/.github/actions/save-coverage-data/action.yml
+++ b/.github/actions/save-coverage-data/action.yml
@@ -1,17 +0,0 @@
-name: 'Merge and upload coverage data'
-description: 'Compresses and uploads the coverage data as an artifact'
-
-runs:
-  using: "composite"
-  steps:
-    - name: Merge coverage data
-      shell: bash -ex {0}
-      run: scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage merge
-
-    - name: Upload coverage data
-      uses: actions/upload-artifact@v3
-      with:
-        retention-days: 7
-        if-no-files-found: error
-        name: coverage-data-artifact
-        path: /tmp/coverage/
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -1,28 +1,13 @@
-name: Test
-
-on:
-  push:
-    branches:
-    - main
-  pull_request:
-
+name: build_and_test
+on: [ push ]
 defaults:
  run:
    shell: bash -ex {0}

-concurrency:
-   group: ${{ github.workflow }}-${{ github.ref }}
-   cancel-in-progress: true
-
-env:
-  RUST_BACKTRACE: 1
-  COPT: '-Werror'
-
 jobs:
  build-postgres:
    runs-on: [ self-hosted, Linux, k8s-runner ]
    strategy:
-      fail-fast: false
      matrix:
        build_type: [ debug, release ]
        rust_toolchain: [ 1.58 ]
@@ -49,7 +34,7 @@ jobs:

      - name: Build postgres
        if: steps.cache_pg.outputs.cache-hit != 'true'
-        run: mold -run make postgres -j$(nproc)
+        run: COPT='-Werror' mold -run make postgres -j$(nproc)

      # actions/cache@v3 does not allow concurrently using the same cache across job steps, so use a separate cache
      - name: Prepare postgres artifact
@@ -67,7 +52,6 @@ jobs:
    runs-on: [ self-hosted, Linux, k8s-runner ]
    needs: [ build-postgres ]
    strategy:
-      fail-fast: false
      matrix:
        build_type: [ debug, release ]
        rust_toolchain: [ 1.58 ]
@@ -101,39 +85,44 @@ jobs:
            ~/.cargo/registry/
            ~/.cargo/git/
            target/
-          # Fall back to older versions of the key, if no cache for current Cargo.lock was found
-          key: |
-            v2-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-${{ hashFiles('Cargo.lock') }}
-            v2-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-
+          key: v2-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-${{ hashFiles('Cargo.lock') }}

      - name: Run cargo build
        run: |
          if [[ $BUILD_TYPE == "debug" ]]; then
-            cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
+            cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/neon/coverage run)
            CARGO_FLAGS=
          elif [[ $BUILD_TYPE == "release" ]]; then
            cov_prefix=()
            CARGO_FLAGS="--release --features profiling"
          fi

+          export CACHEPOT_BUCKET=zenith-rust-cachepot
+          export RUSTC_WRAPPER=cachepot
+          export AWS_ACCESS_KEY_ID="${{ secrets.AWS_ACCESS_KEY_ID }}"
+          export AWS_SECRET_ACCESS_KEY="${{ secrets.AWS_SECRET_ACCESS_KEY }}"
+          export HOME=/home/runner
          "${cov_prefix[@]}" mold -run cargo build $CARGO_FLAGS --features failpoints --bins --tests
+          cachepot -s

      - name: Run cargo test
        run: |
+          export HOME=/home/runner
          if [[ $BUILD_TYPE == "debug" ]]; then
-            cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
+            cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/neon/coverage run)
            CARGO_FLAGS=
          elif [[ $BUILD_TYPE == "release" ]]; then
            cov_prefix=()
            CARGO_FLAGS=--release
          fi
-
+          
          "${cov_prefix[@]}" cargo test $CARGO_FLAGS

      - name: Install rust binaries
        run: |
+          export HOME=/home/runner
          if [[ $BUILD_TYPE == "debug" ]]; then
-            cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
+            cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/neon/coverage run)
          elif [[ $BUILD_TYPE == "release" ]]; then
            cov_prefix=()
          fi
@@ -148,36 +137,39 @@ jobs:
            jq -r '.executable | select(. != null)'
          )

-          mkdir -p /tmp/neon/bin/
-          mkdir -p /tmp/neon/test_bin/
-          mkdir -p /tmp/neon/etc/
-
-          # Keep bloated coverage data files away from the rest of the artifact
-          mkdir -p /tmp/coverage/
+          mkdir -p /tmp/neon/bin
+          mkdir -p /tmp/neon/test_bin
+          mkdir -p /tmp/neon/etc

          # Install target binaries
          for bin in $binaries; do
            SRC=target/$BUILD_TYPE/$bin
            DST=/tmp/neon/bin/$bin
-            cp "$SRC" "$DST"
+            cp $SRC $DST
+            echo $DST >> /tmp/neon/etc/binaries.list
          done

-          # Install test executables and write list of all binaries (for code coverage)
+          # Install test executables (for code coverage)
          if [[ $BUILD_TYPE == "debug" ]]; then
-            for bin in $binaries; do
-              echo "/tmp/neon/bin/$bin" >> /tmp/coverage/binaries.list
-            done
            for bin in $test_exe_paths; do
              SRC=$bin
              DST=/tmp/neon/test_bin/$(basename $bin)
-              cp "$SRC" "$DST"
-              echo "$DST" >> /tmp/coverage/binaries.list
+              cp $SRC $DST
+              echo $DST >> /tmp/neon/etc/binaries.list
            done
          fi

      - name: Install postgres binaries
        run: cp -a tmp_install /tmp/neon/pg_install

+      - name: Merge coverage data
+        run: |
+          export HOME=/home/runner
+          # This will speed up workspace uploads
+          if [[ $BUILD_TYPE == "debug" ]]; then
+            scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/neon/coverage merge
+          fi
+
      - name: Prepare neon artifact
        run: tar -C /tmp/neon/ -czf ./neon.tgz .

@@ -189,17 +181,38 @@ jobs:
          name: neon-${{ runner.os }}-${{ matrix.build_type }}-${{ matrix.rust_toolchain }}-artifact
          path: ./neon.tgz

-      # XXX: keep this after the binaries.list is formed, so the coverage can properly work later
-      - name: Merge and upload coverage data
-        if: matrix.build_type == 'debug'
-        uses: ./.github/actions/save-coverage-data
+  check-codestyle-python:
+    runs-on: [ self-hosted, Linux, k8s-runner ]
+    strategy:
+      matrix:
+        rust_toolchain: [ 1.58 ]
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: true
+          fetch-depth: 1

+      - name: Cache poetry deps
+        id: cache_poetry
+        uses: actions/cache@v3
+        with:
+          path: ~/.cache/pypoetry/virtualenvs
+          key: v1-${{ runner.os }}-python-deps-${{ hashFiles('poetry.lock') }}
+
+      - name: Install Python deps
+        run: ./scripts/pysync
+
+      - name: Run yapf to ensure code format
+        run: poetry run yapf --recursive --diff .
+
+      - name: Run mypy to check types
+        run: poetry run mypy .

  pg_regress-tests:
    runs-on: [ self-hosted, Linux, k8s-runner ]
    needs: [ build-neon ]
    strategy:
-      fail-fast: false
      matrix:
        build_type: [ debug, release ]
        rust_toolchain: [ 1.58 ]
@@ -218,15 +231,10 @@ jobs:
          test_selection: batch_pg_regress
          needs_postgres_source: true

-      - name: Merge and upload coverage data
-        if: matrix.build_type == 'debug'
-        uses: ./.github/actions/save-coverage-data
-
  other-tests:
    runs-on: [ self-hosted, Linux, k8s-runner ]
    needs: [ build-neon ]
    strategy:
-      fail-fast: false
      matrix:
        build_type: [ debug, release ]
        rust_toolchain: [ 1.58 ]
@@ -244,15 +252,10 @@ jobs:
          rust_toolchain: ${{ matrix.rust_toolchain }}
          test_selection: batch_others

-      - name: Merge and upload coverage data
-        if: matrix.build_type == 'debug'
-        uses: ./.github/actions/save-coverage-data
-
  benchmarks:
    runs-on: [ self-hosted, Linux, k8s-runner ]
    needs: [ build-neon ]
    strategy:
-      fail-fast: false
      matrix:
        build_type: [ release ]
        rust_toolchain: [ 1.58 ]
@@ -270,120 +273,4 @@ jobs:
          rust_toolchain: ${{ matrix.rust_toolchain }}
          test_selection: performance
          run_in_parallel: false
-          save_perf_report: true
-      # XXX: no coverage data handling here, since benchmarks are run on release builds,
-      # while coverage is currently collected for the debug ones
-
-  coverage-report:
-    runs-on: [ self-hosted, Linux, k8s-runner ]
-    needs: [ other-tests, pg_regress-tests ]
-    strategy:
-      fail-fast: false
-      matrix:
-        build_type: [ debug ]
-        rust_toolchain: [ 1.58 ]
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v3
-        with:
-          submodules: true
-          fetch-depth: 1
-
-      - name: Restore cargo deps cache
-        id: cache_cargo
-        uses: actions/cache@v3
-        with:
-          path: |
-            ~/.cargo/registry/
-            ~/.cargo/git/
-            target/
-          key: v2-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-${{ hashFiles('Cargo.lock') }}
-
-      - name: Get Neon artifact for restoration
-        uses: actions/download-artifact@v3
-        with:
-          name: neon-${{ runner.os }}-${{ matrix.build_type }}-${{ matrix.rust_toolchain }}-artifact
-          path: ./neon-artifact/
-
-      - name: Extract Neon artifact
-        run: |
-          mkdir -p /tmp/neon/
-          tar -xf ./neon-artifact/neon.tgz -C /tmp/neon/
-          rm -rf ./neon-artifact/
-
-      - name: Restore coverage data
-        uses: actions/download-artifact@v3
-        with:
-          name: coverage-data-artifact
-          path: /tmp/coverage/
-
-      - name: Merge coverage data
-        run: scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage merge
-
-      - name: Build and upload coverage report
-        run: |
-          COMMIT_SHA=${{ github.event.pull_request.head.sha }}
-          COMMIT_SHA=${COMMIT_SHA:-${{ github.sha }}}
-          COMMIT_URL=https://github.com/${{ github.repository }}/commit/$COMMIT_SHA
-
-          scripts/coverage \
-            --dir=/tmp/coverage report \
-            --input-objects=/tmp/coverage/binaries.list \
-            --commit-url=$COMMIT_URL \
-            --format=github
-
-          REPORT_URL=https://${{ github.repository_owner }}.github.io/zenith-coverage-data/$COMMIT_SHA
-
-          scripts/git-upload \
-            --repo=https://${{ secrets.VIP_VAP_ACCESS_TOKEN }}@github.com/${{ github.repository_owner }}/zenith-coverage-data.git \
-            --message="Add code coverage for $COMMIT_URL" \
-            copy /tmp/coverage/report $COMMIT_SHA # COPY FROM TO_RELATIVE
-
-          # Add link to the coverage report to the commit
-          curl -f -X POST \
-          https://api.github.com/repos/${{ github.repository }}/statuses/$COMMIT_SHA \
-          -H "Accept: application/vnd.github.v3+json" \
-          --user "${{ secrets.CI_ACCESS_TOKEN }}" \
-          --data \
-            "{
-              \"state\": \"success\",
-              \"context\": \"neon-coverage\",
-              \"description\": \"Coverage report is ready\",
-              \"target_url\": \"$REPORT_URL\"
-            }"
-
-  trigger-e2e-tests:
-   runs-on: [ self-hosted, Linux, k8s-runner ]
-   needs: [ build-neon ]
-   steps:
-     - name: Set PR's status to pending and request a remote CI test
-       run: |
-         COMMIT_SHA=${{ github.event.pull_request.head.sha }}
-         COMMIT_SHA=${COMMIT_SHA:-${{ github.sha }}}
-
-         REMOTE_REPO="${{ github.repository_owner }}/cloud"
-
-         curl -f -X POST \
-         https://api.github.com/repos/${{ github.repository }}/statuses/$COMMIT_SHA \
-         -H "Accept: application/vnd.github.v3+json" \
-         --user "${{ secrets.CI_ACCESS_TOKEN }}" \
-         --data \
-           "{
-             \"state\": \"pending\",
-             \"context\": \"neon-cloud-e2e\",
-             \"description\": \"[$REMOTE_REPO] Remote CI job is about to start\"
-           }"
-
-         curl -f -X POST \
-         https://api.github.com/repos/$REMOTE_REPO/actions/workflows/testing.yml/dispatches \
-         -H "Accept: application/vnd.github.v3+json" \
-         --user "${{ secrets.CI_ACCESS_TOKEN }}" \
-         --data \
-           "{
-             \"ref\": \"main\",
-             \"inputs\": {
-               \"ci_job_name\": \"neon-cloud-e2e\",
-               \"commit_hash\": \"$COMMIT_SHA\",
-               \"remote_repo\": \"${{ github.repository }}\"
-             }
-           }"
+          # save_perf_report: true
--- a/.github/workflows/pg_clients.yml
+++ b/.github/workflows/pg_clients.yml
@@ -1,74 +0,0 @@
-name: Test Postgres client libraries
-
-on:
-  schedule:
-    # * is a special character in YAML so you have to quote this string
-    #          ┌───────────── minute (0 - 59)
-    #          │ ┌───────────── hour (0 - 23)
-    #          │ │ ┌───────────── day of the month (1 - 31)
-    #          │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
-    #          │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
-    - cron:  '23 02 * * *' # run once a day, timezone is utc
-
-  workflow_dispatch:
-
-concurrency:
-   group: ${{ github.workflow }}-${{ github.ref }}
-   cancel-in-progress: true
-
-jobs:
-  test-postgres-client-libs:
-    runs-on: [ ubuntu-latest ]
-
-    steps:
-    - name: Checkout
-      uses: actions/checkout@v3
-
-    - uses: actions/setup-python@v4
-      with:
-        python-version: 3.9
-
-    - name: Install Poetry
-      uses: snok/install-poetry@v1
-
-    - name: Cache poetry deps
-      id: cache_poetry
-      uses: actions/cache@v3
-      with:
-        path: ~/.cache/pypoetry/virtualenvs
-        key: v1-${{ runner.os }}-python-deps-${{ hashFiles('poetry.lock') }}
-
-    - name: Install Python deps
-      shell: bash -ex {0}
-      run: ./scripts/pysync
-
-    - name: Run pytest
-      env:
-        REMOTE_ENV: 1
-        BENCHMARK_CONNSTR: "${{ secrets.BENCHMARK_STAGING_CONNSTR }}"
-        TEST_OUTPUT: /tmp/test_output
-        POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
-        # this variable will be embedded in perf test report
-        # and is needed to distinguish different environments
-        PLATFORM: github-actions-selfhosted
-      shell: bash -ex {0}
-      run: |
-        # Test framework expects we have psql binary;
-        # but since we don't really need it in this test, let's mock it
-        mkdir -p "$POSTGRES_DISTRIB_DIR/bin" && touch "$POSTGRES_DISTRIB_DIR/bin/psql";
-        ./scripts/pytest \
-          --junitxml=$TEST_OUTPUT/junit.xml \
-          --tb=short \
-          --verbose \
-          -m "remote_cluster" \
-          -rA "test_runner/pg_clients"
-
-    - name: Post to a Slack channel
-      if: failure()
-      id: slack
-      uses: slackapi/slack-github-action@v1
-      with:
-        channel-id: "C033QLM5P7D" # dev-staging-stream
-        slack-message: "Testing Postgres clients: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
-      env:
-        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
--- a/.github/workflows/codestyle.yml
+++ b/.github/workflows/codestyle.yml
@@ -1,4 +1,4 @@
-name: Check code style and build
+name: Build and Test

 on:
  push:
@@ -6,27 +6,15 @@ on:
    - main
  pull_request:

-defaults:
-  run:
-    shell: bash -ex {0}
-
-concurrency:
-   group: ${{ github.workflow }}-${{ github.ref }}
-   cancel-in-progress: true
-
-env:
-  RUST_BACKTRACE: 1
-
 jobs:
-  check-codestyle-rust:
+  regression-check:
    strategy:
-      fail-fast: false
      matrix:
        # If we want to duplicate this job for different
        # Rust toolchains (e.g. nightly or 1.37.0), add them here.
        rust_toolchain: [1.58]
        os: [ubuntu-latest, macos-latest]
-    timeout-minutes: 50
+    timeout-minutes: 30
    name: run regression test suite
    runs-on: ${{ matrix.os }}

@@ -104,30 +92,5 @@ jobs:
      - name: Run cargo clippy
        run: ./run_clippy.sh

-      - name: Ensure all project builds
-        run: cargo build --all --all-targets
-
-  check-codestyle-python:
-    runs-on: [ self-hosted, Linux, k8s-runner ]
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v3
-        with:
-          submodules: false
-          fetch-depth: 1
-
-      - name: Cache poetry deps
-        id: cache_poetry
-        uses: actions/cache@v3
-        with:
-          path: ~/.cache/pypoetry/virtualenvs
-          key: v1-codestyle-python-deps-${{ hashFiles('poetry.lock') }}
-
-      - name: Install Python deps
-        run: ./scripts/pysync
-
-      - name: Run yapf to ensure code format
-        run: poetry run yapf --recursive --diff .
-
-      - name: Run mypy to check types
-        run: poetry run mypy .
+      - name: Run cargo test
+        run: cargo test --all --all-targets
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -461,7 +461,6 @@ dependencies = [
 "tar",
 "tokio",
 "tokio-postgres",
- "url",
 "workspace_hack",
 ]

--- a/10
+++ b/10
@@ -1,5 +1,5 @@
 # Build Postgres
-FROM neondatabase/rust:1.58 AS pg-build
+FROM zimg/rust:1.58 AS pg-build
 WORKDIR /pg

 USER root
@@ -14,7 +14,7 @@ RUN set -e \
    && tar -C tmp_install -czf /postgres_install.tar.gz .

 # Build zenith binaries
-FROM neondatabase/rust:1.58 AS build
+FROM zimg/rust:1.58 AS build
 ARG GIT_VERSION=local

 ARG CACHEPOT_BUCKET=zenith-rust-cachepot
@@ -46,9 +46,9 @@ RUN set -e \
    && useradd -d /data zenith \
    && chown -R zenith:zenith /data

-COPY --from=build --chown=zenith:zenith /home/runner/target/release/pageserver /usr/local/bin
-COPY --from=build --chown=zenith:zenith /home/runner/target/release/safekeeper /usr/local/bin
-COPY --from=build --chown=zenith:zenith /home/runner/target/release/proxy      /usr/local/bin
+COPY --from=build --chown=zenith:zenith /home/circleci/project/target/release/pageserver /usr/local/bin
+COPY --from=build --chown=zenith:zenith /home/circleci/project/target/release/safekeeper /usr/local/bin
+COPY --from=build --chown=zenith:zenith /home/circleci/project/target/release/proxy      /usr/local/bin

 COPY --from=pg-build /pg/tmp_install/         /usr/local/
 COPY --from=pg-build /postgres_install.tar.gz /data/
--- a/Dockerfile.compute-tools
+++ b/Dockerfile.compute-tools
@@ -1,6 +1,6 @@
 # First transient image to build compute_tools binaries
 # NB: keep in sync with rust image version in .circle/config.yml
-FROM neondatabase/rust:1.58 AS rust-build
+FROM zimg/rust:1.58 AS rust-build

 ARG CACHEPOT_BUCKET=zenith-rust-cachepot
 ARG AWS_ACCESS_KEY_ID
@@ -15,4 +15,4 @@ RUN set -e \
 # Final image that only has one binary
 FROM debian:buster-slim

-COPY --from=rust-build /home/runner/target/release/compute_ctl /usr/local/bin/compute_ctl
+COPY --from=rust-build /home/circleci/project/target/release/compute_ctl /usr/local/bin/compute_ctl
--- a/README.md
+++ b/README.md
@@ -53,7 +53,7 @@ curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
 1. Install XCode and dependencies
 ```
 xcode-select --install
-brew install protobuf etcd openssl
+brew install protobuf etcd
 ```

 2. [Install Rust](https://www.rust-lang.org/tools/install)
--- a/compute_tools/Cargo.toml
+++ b/compute_tools/Cargo.toml
@@ -18,5 +18,4 @@ serde_json = "1"
 tar = "0.4"
 tokio = { version = "1.17", features = ["macros", "rt", "rt-multi-thread"] }
 tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
-url = "2.2.2"
 workspace_hack = { version = "0.1", path = "../workspace_hack" }
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -33,7 +33,7 @@ use std::process::exit;
 use std::sync::{Arc, RwLock};
 use std::{thread, time::Duration};

-use anyhow::{Context, Result};
+use anyhow::Result;
 use chrono::Utc;
 use clap::Arg;
 use log::{error, info};
@@ -45,7 +45,6 @@ use compute_tools::monitor::launch_monitor;
 use compute_tools::params::*;
 use compute_tools::pg_helpers::*;
 use compute_tools::spec::*;
-use url::Url;

 fn main() -> Result<()> {
    // TODO: re-use `utils::logging` later
@@ -132,7 +131,7 @@ fn main() -> Result<()> {

    let compute_state = ComputeNode {
        start_time: Utc::now(),
-        connstr: Url::parse(connstr).context("cannot parse connstr as a URL")?,
+        connstr: connstr.to_string(),
        pgdata: pgdata.to_string(),
        pgbin: pgbin.to_string(),
        spec,
--- a/compute_tools/src/checker.rs
+++ b/compute_tools/src/checker.rs
@@ -1,3 +1,5 @@
+use std::sync::Arc;
+
 use anyhow::{anyhow, Result};
 use log::error;
 use postgres::Client;
@@ -21,8 +23,9 @@ pub fn create_writablity_check_data(client: &mut Client) -> Result<()> {
    Ok(())
 }

-pub async fn check_writability(compute: &ComputeNode) -> Result<()> {
-    let (client, connection) = tokio_postgres::connect(compute.connstr.as_str(), NoTls).await?;
+pub async fn check_writability(compute: &Arc<ComputeNode>) -> Result<()> {
+    let connstr = &compute.connstr;
+    let (client, connection) = tokio_postgres::connect(connstr, NoTls).await?;
    if client.is_closed() {
        return Err(anyhow!("connection to postgres closed"));
    }
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -35,8 +35,7 @@ use crate::spec::*;
 /// Compute node info shared across several `compute_ctl` threads.
 pub struct ComputeNode {
    pub start_time: DateTime<Utc>,
-    // Url type maintains proper escaping
-    pub connstr: url::Url,
+    pub connstr: String,
    pub pgdata: String,
    pub pgbin: String,
    pub spec: ComputeSpec,
@@ -269,32 +268,27 @@ impl ComputeNode {
        // In this case we need to connect with old `zenith_admin`name
        // and create new user. We cannot simply rename connected user,
        // but we can create a new one and grant it all privileges.
-        let mut client = match Client::connect(self.connstr.as_str(), NoTls) {
+        let mut client = match Client::connect(&self.connstr, NoTls) {
            Err(e) => {
                info!(
                    "cannot connect to postgres: {}, retrying with `zenith_admin` username",
                    e
                );
-                let mut zenith_admin_connstr = self.connstr.clone();
+                let zenith_admin_connstr = self.connstr.replacen("cloud_admin", "zenith_admin", 1);

-                zenith_admin_connstr
-                    .set_username("zenith_admin")
-                    .map_err(|_| anyhow::anyhow!("invalid connstr"))?;
-
-                let mut client = Client::connect(zenith_admin_connstr.as_str(), NoTls)?;
+                let mut client = Client::connect(&zenith_admin_connstr, NoTls)?;
                client.simple_query("CREATE USER cloud_admin WITH SUPERUSER")?;
                client.simple_query("GRANT zenith_admin TO cloud_admin")?;
                drop(client);

                // reconnect with connsting with expected name
-                Client::connect(self.connstr.as_str(), NoTls)?
+                Client::connect(&self.connstr, NoTls)?
            }
            Ok(client) => client,
        };

        handle_roles(&self.spec, &mut client)?;
        handle_databases(&self.spec, &mut client)?;
-        handle_role_deletions(self, &mut client)?;
        handle_grants(&self.spec, &mut client)?;
        create_writablity_check_data(&mut client)?;

--- a/compute_tools/src/monitor.rs
+++ b/compute_tools/src/monitor.rs
@@ -13,11 +13,11 @@ const MONITOR_CHECK_INTERVAL: u64 = 500; // milliseconds
 // Spin in a loop and figure out the last activity time in the Postgres.
 // Then update it in the shared state. This function never errors out.
 // XXX: the only expected panic is at `RwLock` unwrap().
-fn watch_compute_activity(compute: &ComputeNode) {
+fn watch_compute_activity(compute: &Arc<ComputeNode>) {
    // Suppose that `connstr` doesn't change
-    let connstr = compute.connstr.as_str();
+    let connstr = compute.connstr.clone();
    // Define `client` outside of the loop to reuse existing connection if it's active.
-    let mut client = Client::connect(connstr, NoTls);
+    let mut client = Client::connect(&connstr, NoTls);
    let timeout = time::Duration::from_millis(MONITOR_CHECK_INTERVAL);

    info!("watching Postgres activity at {}", connstr);
@@ -32,7 +32,7 @@ fn watch_compute_activity(compute: &ComputeNode) {
                    info!("connection to postgres closed, trying to reconnect");

                    // Connection is closed, reconnect and try again.
-                    client = Client::connect(connstr, NoTls);
+                    client = Client::connect(&connstr, NoTls);
                    continue;
                }

@@ -93,7 +93,7 @@ fn watch_compute_activity(compute: &ComputeNode) {
                debug!("cannot connect to postgres: {}, retrying", e);

                // Establish a new connection and try again.
-                client = Client::connect(connstr, NoTls);
+                client = Client::connect(&connstr, NoTls);
            }
        }
    }
--- a/compute_tools/src/pg_helpers.rs
+++ b/compute_tools/src/pg_helpers.rs
@@ -1,4 +1,3 @@
-use std::fmt::Write;
 use std::fs::File;
 use std::io::{BufRead, BufReader};
 use std::net::{SocketAddr, TcpStream};
@@ -139,11 +138,9 @@ impl Role {
            // Now we also support SCRAM-SHA-256 and to preserve compatibility
            // we treat all encrypted_password as md5 unless they starts with SCRAM-SHA-256.
            if pass.starts_with("SCRAM-SHA-256") {
-                write!(params, " PASSWORD '{pass}'")
-                    .expect("String is documented to not to error during write operations");
+                params.push_str(&format!(" PASSWORD '{}'", pass));
            } else {
-                write!(params, " PASSWORD 'md5{pass}'")
-                    .expect("String is documented to not to error during write operations");
+                params.push_str(&format!(" PASSWORD 'md5{}'", pass));
            }
        } else {
            params.push_str(" PASSWORD NULL");
@@ -161,8 +158,7 @@ impl Database {
    /// it may require a proper quoting too.
    pub fn to_pg_options(&self) -> String {
        let mut params: String = self.options.as_pg_options();
-        write!(params, " OWNER {}", &self.owner.quote())
-            .expect("String is documented to not to error during write operations");
+        params.push_str(&format!(" OWNER {}", &self.owner.quote()));

        params
    }
--- a/compute_tools/src/spec.rs
+++ b/compute_tools/src/spec.rs
@@ -2,10 +2,9 @@ use std::path::Path;

 use anyhow::Result;
 use log::{info, log_enabled, warn, Level};
-use postgres::{Client, NoTls};
+use postgres::Client;
 use serde::Deserialize;

-use crate::compute::ComputeNode;
 use crate::config;
 use crate::params::PG_HBA_ALL_MD5;
 use crate::pg_helpers::*;
@@ -98,13 +97,18 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {

    // Process delta operations first
    if let Some(ops) = &spec.delta_operations {
-        info!("processing role renames");
+        info!("processing delta operations on roles");
        for op in ops {
            match op.action.as_ref() {
+                // We do not check either role exists or not,
+                // Postgres will take care of it for us
                "delete_role" => {
-                    // no-op now, roles will be deleted at the end of configuration
+                    let query: String = format!("DROP ROLE IF EXISTS {}", &op.name.quote());
+
+                    warn!("deleting role '{}'", &op.name);
+                    xact.execute(query.as_str(), &[])?;
                }
-                // Renaming role drops its password, since role name is
+                // Renaming role drops its password, since tole name is
                // used as a salt there.  It is important that this role
                // is recorded with a new `name` in the `roles` list.
                // Follow up roles update will set the new password.
@@ -178,7 +182,7 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
            xact.execute(query.as_str(), &[])?;

            let grant_query = format!(
-                "GRANT pg_read_all_data, pg_write_all_data TO {}",
+                "grant pg_read_all_data, pg_write_all_data to {}",
                name.quote()
            );
            xact.execute(grant_query.as_str(), &[])?;
@@ -193,70 +197,6 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
    Ok(())
 }

-/// Reassign all dependent objects and delete requested roles.
-pub fn handle_role_deletions(node: &ComputeNode, client: &mut Client) -> Result<()> {
-    let spec = &node.spec;
-
-    // First, reassign all dependent objects to db owners.
-    if let Some(ops) = &spec.delta_operations {
-        info!("reassigning dependent objects of to-be-deleted roles");
-        for op in ops {
-            if op.action == "delete_role" {
-                reassign_owned_objects(node, &op.name)?;
-            }
-        }
-    }
-
-    // Second, proceed with role deletions.
-    let mut xact = client.transaction()?;
-    if let Some(ops) = &spec.delta_operations {
-        info!("processing role deletions");
-        for op in ops {
-            // We do not check either role exists or not,
-            // Postgres will take care of it for us
-            if op.action == "delete_role" {
-                let query: String = format!("DROP ROLE IF EXISTS {}", &op.name.quote());
-
-                warn!("deleting role '{}'", &op.name);
-                xact.execute(query.as_str(), &[])?;
-            }
-        }
-    }
-
-    Ok(())
-}
-
-// Reassign all owned objects in all databases to the owner of the database.
-fn reassign_owned_objects(node: &ComputeNode, role_name: &PgIdent) -> Result<()> {
-    for db in &node.spec.cluster.databases {
-        if db.owner != *role_name {
-            let mut connstr = node.connstr.clone();
-            // database name is always the last and the only component of the path
-            connstr.set_path(&db.name);
-
-            let mut client = Client::connect(connstr.as_str(), NoTls)?;
-
-            // This will reassign all dependent objects to the db owner
-            let reassign_query = format!(
-                "REASSIGN OWNED BY {} TO {}",
-                role_name.quote(),
-                db.owner.quote()
-            );
-            info!(
-                "reassigning objects owned by '{}' in db '{}' to '{}'",
-                role_name, &db.name, &db.owner
-            );
-            client.simple_query(&reassign_query)?;
-
-            // This now will only drop privileges of the role
-            let drop_query = format!("DROP OWNED BY {}", role_name.quote());
-            client.simple_query(&drop_query)?;
-        }
-    }
-
-    Ok(())
-}
-
 /// It follows mostly the same logic as `handle_roles()` excepting that we
 /// does not use an explicit transactions block, since major database operations
 /// like `CREATE DATABASE` and `DROP DATABASE` do not support it. Statement-level
@@ -354,26 +294,13 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
 pub fn handle_grants(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
    info!("cluster spec grants:");

-    // We now have a separate `web_access` role to connect to the database
-    // via the web interface and proxy link auth. And also we grant a
-    // read / write all data privilege to every role. So also grant
-    // create to everyone.
-    // XXX: later we should stop messing with Postgres ACL in such horrible
-    // ways.
-    let roles = spec
-        .cluster
-        .roles
-        .iter()
-        .map(|r| r.name.quote())
-        .collect::<Vec<_>>();
-
    for db in &spec.cluster.databases {
        let dbname = &db.name;

        let query: String = format!(
            "GRANT CREATE ON DATABASE {} TO {}",
            dbname.quote(),
-            roles.join(", ")
+            db.owner.quote()
        );
        info!("grant query {}", &query);

--- a/control_plane/src/local_env.rs
+++ b/control_plane/src/local_env.rs
@@ -403,6 +403,16 @@ impl LocalEnv {
                self.pg_distrib_dir.display()
            );
        }
+        for binary in ["pageserver", "safekeeper"] {
+            if !self.zenith_distrib_dir.join(binary).exists() {
+                bail!(
+                    "Can't find binary '{}' in zenith distrib dir '{}'",
+                    binary,
+                    self.zenith_distrib_dir.display()
+                );
+            }
+        }
+
        for binary in ["pageserver", "safekeeper"] {
            if !self.zenith_distrib_dir.join(binary).exists() {
                bail!(
@@ -411,6 +421,12 @@ impl LocalEnv {
                );
            }
        }
+        if !self.pg_distrib_dir.join("bin/postgres").exists() {
+            bail!(
+                "Can't find postgres binary at {}",
+                self.pg_distrib_dir.display()
+            );
+        }

        fs::create_dir(&base_path)?;

--- a/libs/remote_storage/src/lib.rs
+++ b/libs/remote_storage/src/lib.rs
@@ -12,10 +12,8 @@ use std::{
    borrow::Cow,
    collections::HashMap,
    ffi::OsStr,
-    fmt::Debug,
    num::{NonZeroU32, NonZeroUsize},
    path::{Path, PathBuf},
-    pin::Pin,
 };

 use anyhow::{bail, Context};
@@ -72,7 +70,11 @@ pub trait RemoteStorage: Send + Sync {

    /// Streams the remote storage entry contents into the buffered writer given, returns the filled writer.
    /// Returns the metadata, if any was stored with the file previously.
-    async fn download(&self, from: &Self::RemoteObjectId) -> Result<Download, DownloadError>;
+    async fn download(
+        &self,
+        from: &Self::RemoteObjectId,
+        to: &mut (impl io::AsyncWrite + Unpin + Send + Sync),
+    ) -> anyhow::Result<Option<StorageMetadata>>;

    /// Streams a given byte range of the remote storage entry contents into the buffered writer given, returns the filled writer.
    /// Returns the metadata, if any was stored with the file previously.
@@ -81,49 +83,12 @@ pub trait RemoteStorage: Send + Sync {
        from: &Self::RemoteObjectId,
        start_inclusive: u64,
        end_exclusive: Option<u64>,
-    ) -> Result<Download, DownloadError>;
+        to: &mut (impl io::AsyncWrite + Unpin + Send + Sync),
+    ) -> anyhow::Result<Option<StorageMetadata>>;

    async fn delete(&self, path: &Self::RemoteObjectId) -> anyhow::Result<()>;
 }

-pub struct Download {
-    pub download_stream: Pin<Box<dyn io::AsyncRead + Unpin + Send>>,
-    /// Extra key-value data, associated with the current remote file.
-    pub metadata: Option<StorageMetadata>,
-}
-
-impl Debug for Download {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        f.debug_struct("Download")
-            .field("metadata", &self.metadata)
-            .finish()
-    }
-}
-
-#[derive(Debug)]
-pub enum DownloadError {
-    /// Validation or other error happened due to user input.
-    BadInput(anyhow::Error),
-    /// The file was not found in the remote storage.
-    NotFound,
-    /// The file was found in the remote storage, but the download failed.
-    Other(anyhow::Error),
-}
-
-impl std::fmt::Display for DownloadError {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        match self {
-            DownloadError::BadInput(e) => {
-                write!(f, "Failed to download a remote file due to user input: {e}")
-            }
-            DownloadError::NotFound => write!(f, "No file found for the remote object id given"),
-            DownloadError::Other(e) => write!(f, "Failed to download a remote file: {e}"),
-        }
-    }
-}
-
-impl std::error::Error for DownloadError {}
-
 /// Every storage, currently supported.
 /// Serves as a simple way to pass around the [`RemoteStorage`] without dealing with generics.
 pub enum GenericRemoteStorage {
@@ -215,7 +180,7 @@ pub struct S3Config {
    pub concurrency_limit: NonZeroUsize,
 }

-impl Debug for S3Config {
+impl std::fmt::Debug for S3Config {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("S3Config")
            .field("bucket_name", &self.bucket_name)
--- a/libs/remote_storage/src/local_fs.rs
+++ b/libs/remote_storage/src/local_fs.rs
@@ -17,7 +17,7 @@ use tokio::{
 };
 use tracing::*;

-use crate::{path_with_suffix_extension, Download, DownloadError};
+use crate::path_with_suffix_extension;

 use super::{strip_path_prefix, RemoteStorage, StorageMetadata};

@@ -192,56 +192,14 @@ impl RemoteStorage for LocalFs {
        Ok(())
    }

-    async fn download(&self, from: &Self::RemoteObjectId) -> Result<Download, DownloadError> {
-        let file_path = self
-            .resolve_in_storage(from)
-            .map_err(DownloadError::BadInput)?;
-        if file_exists(&file_path).map_err(DownloadError::BadInput)? {
-            let source = io::BufReader::new(
-                fs::OpenOptions::new()
-                    .read(true)
-                    .open(&file_path)
-                    .await
-                    .with_context(|| {
-                        format!(
-                            "Failed to open source file '{}' to use in the download",
-                            file_path.display()
-                        )
-                    })
-                    .map_err(DownloadError::Other)?,
-            );
-
-            let metadata = self
-                .read_storage_metadata(&file_path)
-                .await
-                .map_err(DownloadError::Other)?;
-            Ok(Download {
-                metadata,
-                download_stream: Box::pin(source),
-            })
-        } else {
-            Err(DownloadError::NotFound)
-        }
-    }
-
-    async fn download_byte_range(
+    async fn download(
        &self,
        from: &Self::RemoteObjectId,
-        start_inclusive: u64,
-        end_exclusive: Option<u64>,
-    ) -> Result<Download, DownloadError> {
-        if let Some(end_exclusive) = end_exclusive {
-            if end_exclusive <= start_inclusive {
-                return Err(DownloadError::Other(anyhow::anyhow!("Invalid range, start ({start_inclusive}) is not less than end_exclusive ({end_exclusive:?})")));
-            };
-            if start_inclusive == end_exclusive.saturating_sub(1) {
-                return Err(DownloadError::Other(anyhow::anyhow!("Invalid range, start ({start_inclusive}) and end_exclusive ({end_exclusive:?}) difference is zero bytes")));
-            }
-        }
-        let file_path = self
-            .resolve_in_storage(from)
-            .map_err(DownloadError::BadInput)?;
-        if file_exists(&file_path).map_err(DownloadError::BadInput)? {
+        to: &mut (impl io::AsyncWrite + Unpin + Send + Sync),
+    ) -> anyhow::Result<Option<StorageMetadata>> {
+        let file_path = self.resolve_in_storage(from)?;
+
+        if file_path.exists() && file_path.is_file() {
            let mut source = io::BufReader::new(
                fs::OpenOptions::new()
                    .read(true)
@@ -252,31 +210,81 @@ impl RemoteStorage for LocalFs {
                            "Failed to open source file '{}' to use in the download",
                            file_path.display()
                        )
-                    })
-                    .map_err(DownloadError::Other)?,
+                    })?,
+            );
+            io::copy(&mut source, to).await.with_context(|| {
+                format!(
+                    "Failed to download file '{}' from the local storage",
+                    file_path.display()
+                )
+            })?;
+            source.flush().await?;
+
+            self.read_storage_metadata(&file_path).await
+        } else {
+            bail!(
+                "File '{}' either does not exist or is not a file",
+                file_path.display()
+            )
+        }
+    }
+
+    async fn download_byte_range(
+        &self,
+        from: &Self::RemoteObjectId,
+        start_inclusive: u64,
+        end_exclusive: Option<u64>,
+        to: &mut (impl io::AsyncWrite + Unpin + Send + Sync),
+    ) -> anyhow::Result<Option<StorageMetadata>> {
+        if let Some(end_exclusive) = end_exclusive {
+            ensure!(
+                end_exclusive > start_inclusive,
+                "Invalid range, start ({}) is bigger then end ({:?})",
+                start_inclusive,
+                end_exclusive
+            );
+            if start_inclusive == end_exclusive.saturating_sub(1) {
+                return Ok(None);
+            }
+        }
+        let file_path = self.resolve_in_storage(from)?;
+
+        if file_path.exists() && file_path.is_file() {
+            let mut source = io::BufReader::new(
+                fs::OpenOptions::new()
+                    .read(true)
+                    .open(&file_path)
+                    .await
+                    .with_context(|| {
+                        format!(
+                            "Failed to open source file '{}' to use in the download",
+                            file_path.display()
+                        )
+                    })?,
            );
            source
                .seek(io::SeekFrom::Start(start_inclusive))
                .await
-                .context("Failed to seek to the range start in a local storage file")
-                .map_err(DownloadError::Other)?;
-            let metadata = self
-                .read_storage_metadata(&file_path)
-                .await
-                .map_err(DownloadError::Other)?;
+                .context("Failed to seek to the range start in a local storage file")?;
+            match end_exclusive {
+                Some(end_exclusive) => {
+                    io::copy(&mut source.take(end_exclusive - start_inclusive), to).await
+                }
+                None => io::copy(&mut source, to).await,
+            }
+            .with_context(|| {
+                format!(
+                    "Failed to download file '{}' range from the local storage",
+                    file_path.display()
+                )
+            })?;

-            Ok(match end_exclusive {
-                Some(end_exclusive) => Download {
-                    metadata,
-                    download_stream: Box::pin(source.take(end_exclusive - start_inclusive)),
-                },
-                None => Download {
-                    metadata,
-                    download_stream: Box::pin(source),
-                },
-            })
+            self.read_storage_metadata(&file_path).await
        } else {
-            Err(DownloadError::NotFound)
+            bail!(
+                "File '{}' either does not exist or is not a file",
+                file_path.display()
+            )
        }
    }

@@ -344,19 +352,6 @@ async fn create_target_directory(target_file_path: &Path) -> anyhow::Result<()>
    Ok(())
 }

-fn file_exists(file_path: &Path) -> anyhow::Result<bool> {
-    if file_path.exists() {
-        ensure!(
-            file_path.is_file(),
-            "file path '{}' is not a file",
-            file_path.display()
-        );
-        Ok(true)
-    } else {
-        Ok(false)
-    }
-}
-
 #[cfg(test)]
 mod pure_tests {
    use tempfile::tempdir;
@@ -523,31 +518,6 @@ mod fs_tests {
    use std::{collections::HashMap, io::Write};
    use tempfile::tempdir;

-    async fn read_and_assert_remote_file_contents(
-        storage: &LocalFs,
-        #[allow(clippy::ptr_arg)]
-        // have to use &PathBuf due to `storage.local_path` parameter requirements
-        remote_storage_path: &PathBuf,
-        expected_metadata: Option<&StorageMetadata>,
-    ) -> anyhow::Result<String> {
-        let mut download = storage
-            .download(remote_storage_path)
-            .await
-            .map_err(|e| anyhow::anyhow!("Download failed: {e}"))?;
-        ensure!(
-            download.metadata.as_ref() == expected_metadata,
-            "Unexpected metadata returned for the downloaded file"
-        );
-
-        let mut contents = String::new();
-        download
-            .download_stream
-            .read_to_string(&mut contents)
-            .await
-            .context("Failed to read remote file contents into string")?;
-        Ok(contents)
-    }
-
    #[tokio::test]
    async fn upload_file() -> anyhow::Result<()> {
        let workdir = tempdir()?.path().to_owned();
@@ -598,7 +568,15 @@ mod fs_tests {
        let upload_name = "upload_1";
        let upload_target = upload_dummy_file(&workdir, &storage, upload_name, None).await?;

-        let contents = read_and_assert_remote_file_contents(&storage, &upload_target, None).await?;
+        let mut content_bytes = io::BufWriter::new(std::io::Cursor::new(Vec::new()));
+        let metadata = storage.download(&upload_target, &mut content_bytes).await?;
+        assert!(
+            metadata.is_none(),
+            "No metadata should be returned for no metadata upload"
+        );
+
+        content_bytes.flush().await?;
+        let contents = String::from_utf8(content_bytes.into_inner().into_inner())?;
        assert_eq!(
            dummy_contents(upload_name),
            contents,
@@ -606,9 +584,13 @@ mod fs_tests {
        );

        let non_existing_path = PathBuf::from("somewhere").join("else");
-        match storage.download(&non_existing_path).await {
-            Err(DownloadError::NotFound) => {} // Should get NotFound for non existing keys
-            other => panic!("Should get a NotFound error when downloading non-existing storage files, but got: {other:?}"),
+        match storage.download(&non_existing_path, &mut io::sink()).await {
+            Ok(_) => panic!("Should not allow downloading non-existing storage files"),
+            Err(e) => {
+                let error_string = e.to_string();
+                assert!(error_string.contains("does not exist"));
+                assert!(error_string.contains(&non_existing_path.display().to_string()));
+            }
        }
        Ok(())
    }
@@ -621,31 +603,58 @@ mod fs_tests {
        let upload_name = "upload_1";
        let upload_target = upload_dummy_file(&workdir, &storage, upload_name, None).await?;

-        let full_range_download_contents =
-            read_and_assert_remote_file_contents(&storage, &upload_target, None).await?;
+        let mut full_range_bytes = io::BufWriter::new(std::io::Cursor::new(Vec::new()));
+        let metadata = storage
+            .download_byte_range(&upload_target, 0, None, &mut full_range_bytes)
+            .await?;
+        assert!(
+            metadata.is_none(),
+            "No metadata should be returned for no metadata upload"
+        );
+        full_range_bytes.flush().await?;
        assert_eq!(
            dummy_contents(upload_name),
-            full_range_download_contents,
+            String::from_utf8(full_range_bytes.into_inner().into_inner())?,
            "Download full range should return the whole upload"
        );

+        let mut zero_range_bytes = io::BufWriter::new(std::io::Cursor::new(Vec::new()));
+        let same_byte = 1_000_000_000;
+        let metadata = storage
+            .download_byte_range(
+                &upload_target,
+                same_byte,
+                Some(same_byte + 1), // exclusive end
+                &mut zero_range_bytes,
+            )
+            .await?;
+        assert!(
+            metadata.is_none(),
+            "No metadata should be returned for no metadata upload"
+        );
+        zero_range_bytes.flush().await?;
+        assert!(
+            zero_range_bytes.into_inner().into_inner().is_empty(),
+            "Zero byte range should not download any part of the file"
+        );
+
        let uploaded_bytes = dummy_contents(upload_name).into_bytes();
        let (first_part_local, second_part_local) = uploaded_bytes.split_at(3);

-        let mut first_part_download = storage
-            .download_byte_range(&upload_target, 0, Some(first_part_local.len() as u64))
+        let mut first_part_remote = io::BufWriter::new(std::io::Cursor::new(Vec::new()));
+        let metadata = storage
+            .download_byte_range(
+                &upload_target,
+                0,
+                Some(first_part_local.len() as u64),
+                &mut first_part_remote,
+            )
            .await?;
        assert!(
-            first_part_download.metadata.is_none(),
+            metadata.is_none(),
            "No metadata should be returned for no metadata upload"
        );

-        let mut first_part_remote = io::BufWriter::new(std::io::Cursor::new(Vec::new()));
-        io::copy(
-            &mut first_part_download.download_stream,
-            &mut first_part_remote,
-        )
-        .await?;
        first_part_remote.flush().await?;
        let first_part_remote = first_part_remote.into_inner().into_inner();
        assert_eq!(
@@ -654,24 +663,20 @@ mod fs_tests {
            "First part bytes should be returned when requested"
        );

-        let mut second_part_download = storage
+        let mut second_part_remote = io::BufWriter::new(std::io::Cursor::new(Vec::new()));
+        let metadata = storage
            .download_byte_range(
                &upload_target,
                first_part_local.len() as u64,
                Some((first_part_local.len() + second_part_local.len()) as u64),
+                &mut second_part_remote,
            )
            .await?;
        assert!(
-            second_part_download.metadata.is_none(),
+            metadata.is_none(),
            "No metadata should be returned for no metadata upload"
        );

-        let mut second_part_remote = io::BufWriter::new(std::io::Cursor::new(Vec::new()));
-        io::copy(
-            &mut second_part_download.download_stream,
-            &mut second_part_remote,
-        )
-        .await?;
        second_part_remote.flush().await?;
        let second_part_remote = second_part_remote.into_inner().into_inner();
        assert_eq!(
@@ -691,30 +696,11 @@ mod fs_tests {
        let upload_name = "upload_1";
        let upload_target = upload_dummy_file(&workdir, &storage, upload_name, None).await?;

-        let start = 1_000_000_000;
-        let end = start + 1;
-        match storage
-            .download_byte_range(
-                &upload_target,
-                start,
-                Some(end), // exclusive end
-            )
-            .await
-        {
-            Ok(_) => panic!("Should not allow downloading wrong ranges"),
-            Err(e) => {
-                let error_string = e.to_string();
-                assert!(error_string.contains("zero bytes"));
-                assert!(error_string.contains(&start.to_string()));
-                assert!(error_string.contains(&end.to_string()));
-            }
-        }
-
        let start = 10000;
        let end = 234;
        assert!(start > end, "Should test an incorrect range");
        match storage
-            .download_byte_range(&upload_target, start, Some(end))
+            .download_byte_range(&upload_target, start, Some(end), &mut io::sink())
            .await
        {
            Ok(_) => panic!("Should not allow downloading wrong ranges"),
@@ -726,6 +712,18 @@ mod fs_tests {
            }
        }

+        let non_existing_path = PathBuf::from("somewhere").join("else");
+        match storage
+            .download_byte_range(&non_existing_path, 1, Some(3), &mut io::sink())
+            .await
+        {
+            Ok(_) => panic!("Should not allow downloading non-existing storage file ranges"),
+            Err(e) => {
+                let error_string = e.to_string();
+                assert!(error_string.contains("does not exist"));
+                assert!(error_string.contains(&non_existing_path.display().to_string()));
+            }
+        }
        Ok(())
    }

@@ -764,26 +762,35 @@ mod fs_tests {
        let upload_target =
            upload_dummy_file(&workdir, &storage, upload_name, Some(metadata.clone())).await?;

-        let full_range_download_contents =
-            read_and_assert_remote_file_contents(&storage, &upload_target, Some(&metadata)).await?;
+        let mut content_bytes = io::BufWriter::new(std::io::Cursor::new(Vec::new()));
+        let full_download_metadata = storage.download(&upload_target, &mut content_bytes).await?;
+
+        content_bytes.flush().await?;
+        let contents = String::from_utf8(content_bytes.into_inner().into_inner())?;
        assert_eq!(
            dummy_contents(upload_name),
-            full_range_download_contents,
+            contents,
            "We should upload and download the same contents"
        );

+        assert_eq!(
+            full_download_metadata.as_ref(),
+            Some(&metadata),
+            "We should get the same metadata back for full download"
+        );
+
        let uploaded_bytes = dummy_contents(upload_name).into_bytes();
        let (first_part_local, _) = uploaded_bytes.split_at(3);

-        let mut partial_download_with_metadata = storage
-            .download_byte_range(&upload_target, 0, Some(first_part_local.len() as u64))
-            .await?;
        let mut first_part_remote = io::BufWriter::new(std::io::Cursor::new(Vec::new()));
-        io::copy(
-            &mut partial_download_with_metadata.download_stream,
-            &mut first_part_remote,
-        )
-        .await?;
+        let partial_download_metadata = storage
+            .download_byte_range(
+                &upload_target,
+                0,
+                Some(first_part_local.len() as u64),
+                &mut first_part_remote,
+            )
+            .await?;
        first_part_remote.flush().await?;
        let first_part_remote = first_part_remote.into_inner().into_inner();
        assert_eq!(
@@ -793,8 +800,8 @@ mod fs_tests {
        );

        assert_eq!(
-            partial_download_with_metadata.metadata,
-            Some(metadata),
+            partial_download_metadata.as_ref(),
+            Some(&metadata),
            "We should get the same metadata back for partial download"
        );

@@ -836,7 +843,7 @@ mod fs_tests {
    }

    fn dummy_contents(name: &str) -> String {
-        format!("contents for {name}")
+        format!("contents for {}", name)
    }

    async fn list_files_sorted(storage: &LocalFs) -> anyhow::Result<Vec<PathBuf>> {
--- a/libs/remote_storage/src/s3_bucket.rs
+++ b/libs/remote_storage/src/s3_bucket.rs
@@ -9,17 +9,17 @@ use std::path::{Path, PathBuf};
 use anyhow::Context;
 use rusoto_core::{
    credential::{InstanceMetadataProvider, StaticProvider},
-    HttpClient, Region, RusotoError,
+    HttpClient, Region,
 };
 use rusoto_s3::{
-    DeleteObjectRequest, GetObjectError, GetObjectRequest, ListObjectsV2Request, PutObjectRequest,
-    S3Client, StreamingBody, S3,
+    DeleteObjectRequest, GetObjectRequest, ListObjectsV2Request, PutObjectRequest, S3Client,
+    StreamingBody, S3,
 };
 use tokio::{io, sync::Semaphore};
 use tokio_util::io::ReaderStream;
 use tracing::debug;

-use crate::{strip_path_prefix, Download, DownloadError, RemoteStorage, S3Config};
+use crate::{strip_path_prefix, RemoteStorage, S3Config};

 use super::StorageMetadata;

@@ -187,39 +187,6 @@ impl S3Bucket {
            concurrency_limiter: Semaphore::new(aws_config.concurrency_limit.get()),
        })
    }
-
-    async fn download_object(&self, request: GetObjectRequest) -> Result<Download, DownloadError> {
-        let _guard = self
-            .concurrency_limiter
-            .acquire()
-            .await
-            .context("Concurrency limiter semaphore got closed during S3 download")
-            .map_err(DownloadError::Other)?;
-
-        metrics::inc_get_object();
-
-        match self.client.get_object(request).await {
-            Ok(object_output) => match object_output.body {
-                None => {
-                    metrics::inc_get_object_fail();
-                    Err(DownloadError::Other(anyhow::anyhow!(
-                        "Got no body for the S3 object given"
-                    )))
-                }
-                Some(body) => Ok(Download {
-                    metadata: object_output.metadata.map(StorageMetadata),
-                    download_stream: Box::pin(io::BufReader::new(body.into_async_read())),
-                }),
-            },
-            Err(RusotoError::Service(GetObjectError::NoSuchKey(_))) => Err(DownloadError::NotFound),
-            Err(e) => {
-                metrics::inc_get_object_fail();
-                Err(DownloadError::Other(anyhow::anyhow!(
-                    "Failed to download S3 object: {e}"
-                )))
-            }
-        }
-    }
 }

 #[async_trait::async_trait]
@@ -316,13 +283,38 @@ impl RemoteStorage for S3Bucket {
        Ok(())
    }

-    async fn download(&self, from: &Self::RemoteObjectId) -> Result<Download, DownloadError> {
-        self.download_object(GetObjectRequest {
-            bucket: self.bucket_name.clone(),
-            key: from.key().to_owned(),
-            ..GetObjectRequest::default()
-        })
-        .await
+    async fn download(
+        &self,
+        from: &Self::RemoteObjectId,
+        to: &mut (impl io::AsyncWrite + Unpin + Send + Sync),
+    ) -> anyhow::Result<Option<StorageMetadata>> {
+        let _guard = self
+            .concurrency_limiter
+            .acquire()
+            .await
+            .context("Concurrency limiter semaphore got closed during S3 download")?;
+
+        metrics::inc_get_object();
+
+        let object_output = self
+            .client
+            .get_object(GetObjectRequest {
+                bucket: self.bucket_name.clone(),
+                key: from.key().to_owned(),
+                ..GetObjectRequest::default()
+            })
+            .await
+            .map_err(|e| {
+                metrics::inc_get_object_fail();
+                e
+            })?;
+
+        if let Some(body) = object_output.body {
+            let mut from = io::BufReader::new(body.into_async_read());
+            io::copy(&mut from, to).await?;
+        }
+
+        Ok(object_output.metadata.map(StorageMetadata))
    }

    async fn download_byte_range(
@@ -330,7 +322,8 @@ impl RemoteStorage for S3Bucket {
        from: &Self::RemoteObjectId,
        start_inclusive: u64,
        end_exclusive: Option<u64>,
-    ) -> Result<Download, DownloadError> {
+        to: &mut (impl io::AsyncWrite + Unpin + Send + Sync),
+    ) -> anyhow::Result<Option<StorageMetadata>> {
        // S3 accepts ranges as https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35
        // and needs both ends to be exclusive
        let end_inclusive = end_exclusive.map(|end| end.saturating_sub(1));
@@ -338,14 +331,34 @@ impl RemoteStorage for S3Bucket {
            Some(end_inclusive) => format!("bytes={}-{}", start_inclusive, end_inclusive),
            None => format!("bytes={}-", start_inclusive),
        });
+        let _guard = self
+            .concurrency_limiter
+            .acquire()
+            .await
+            .context("Concurrency limiter semaphore got closed during S3 range download")?;

-        self.download_object(GetObjectRequest {
-            bucket: self.bucket_name.clone(),
-            key: from.key().to_owned(),
-            range,
-            ..GetObjectRequest::default()
-        })
-        .await
+        metrics::inc_get_object();
+
+        let object_output = self
+            .client
+            .get_object(GetObjectRequest {
+                bucket: self.bucket_name.clone(),
+                key: from.key().to_owned(),
+                range,
+                ..GetObjectRequest::default()
+            })
+            .await
+            .map_err(|e| {
+                metrics::inc_get_object_fail();
+                e
+            })?;
+
+        if let Some(body) = object_output.body {
+            let mut from = io::BufReader::new(body.into_async_read());
+            io::copy(&mut from, to).await?;
+        }
+
+        Ok(object_output.metadata.map(StorageMetadata))
    }

    async fn delete(&self, path: &Self::RemoteObjectId) -> anyhow::Result<()> {
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -263,8 +263,6 @@ fn start_pageserver(conf: &'static PageServerConf, daemonize: bool) -> Result<()
    // start profiler (if enabled)
    let profiler_guard = profiling::init_profiler(conf);

-    pageserver::tenant_tasks::init_tenant_task_pool()?;
-
    // initialize authentication for incoming connections
    let auth = match &conf.auth_type {
        AuthType::Trust | AuthType::MD5 => None,
--- a/pageserver/src/layered_repository.rs
+++ b/pageserver/src/layered_repository.rs
@@ -158,18 +158,6 @@ pub struct LayeredRepository {
    // Global pageserver config parameters
    pub conf: &'static PageServerConf,

-    // Allows us to gracefully cancel operations that edit the directory
-    // that backs this layered repository. Usage:
-    //
-    // Use `let _guard = file_lock.try_read()` while writing any files.
-    // Use `let _guard = file_lock.write().unwrap()` to wait for all writes to finish.
-    //
-    // TODO try_read this lock during checkpoint as well to prevent race
-    //      between checkpoint and detach/delete.
-    // TODO try_read this lock for all gc/compaction operations, not just
-    //      ones scheduled by the tenant task manager.
-    pub file_lock: RwLock<()>,
-
    // Overridden tenant-specific config parameters.
    // We keep TenantConfOpt sturct here to preserve the information
    // about parameters that are not set.
@@ -232,32 +220,23 @@ impl Repository for LayeredRepository {

    fn create_empty_timeline(
        &self,
-        timeline_id: ZTimelineId,
+        timelineid: ZTimelineId,
        initdb_lsn: Lsn,
    ) -> Result<Arc<LayeredTimeline>> {
        let mut timelines = self.timelines.lock().unwrap();
-        let vacant_timeline_entry = match timelines.entry(timeline_id) {
-            Entry::Occupied(_) => bail!("Timeline already exists"),
-            Entry::Vacant(vacant_entry) => vacant_entry,
-        };
-
-        let timeline_path = self.conf.timeline_path(&timeline_id, &self.tenant_id);
-        if timeline_path.exists() {
-            bail!("Timeline directory already exists, but timeline is missing in repository map. This is a bug.")
-        }

        // Create the timeline directory, and write initial metadata to file.
-        crashsafe_dir::create_dir_all(timeline_path)?;
+        crashsafe_dir::create_dir_all(self.conf.timeline_path(&timelineid, &self.tenant_id))?;

        let metadata = TimelineMetadata::new(Lsn(0), None, None, Lsn(0), initdb_lsn, initdb_lsn);
-        Self::save_metadata(self.conf, timeline_id, self.tenant_id, &metadata, true)?;
+        Self::save_metadata(self.conf, timelineid, self.tenant_id, &metadata, true)?;

        let timeline = LayeredTimeline::new(
            self.conf,
            Arc::clone(&self.tenant_conf),
            metadata,
            None,
-            timeline_id,
+            timelineid,
            self.tenant_id,
            Arc::clone(&self.walredo_mgr),
            self.upload_layers,
@@ -266,7 +245,12 @@ impl Repository for LayeredRepository {

        // Insert if not exists
        let timeline = Arc::new(timeline);
-        vacant_timeline_entry.insert(LayeredTimelineEntry::Loaded(Arc::clone(&timeline)));
+        match timelines.entry(timelineid) {
+            Entry::Occupied(_) => bail!("Timeline already exists"),
+            Entry::Vacant(vacant) => {
+                vacant.insert(LayeredTimelineEntry::Loaded(Arc::clone(&timeline)))
+            }
+        };

        Ok(timeline)
    }
@@ -353,12 +337,16 @@ impl Repository for LayeredRepository {
        // compactions.  We don't want to block everything else while the
        // compaction runs.
        let timelines = self.timelines.lock().unwrap();
-        let timelines_to_compact = timelines
+        let mut timelines_to_compact = timelines
            .iter()
            .map(|(timelineid, timeline)| (*timelineid, timeline.clone()))
            .collect::<Vec<_>>();
        drop(timelines);

+        // Sort to prevent deadlock
+        timelines_to_compact.sort_by(|a, b| a.0.cmp(&b.0));
+
+        // Compact all timelines in order
        for (timelineid, timeline) in &timelines_to_compact {
            let _entered =
                info_span!("compact", timeline = %timelineid, tenant = %self.tenant_id).entered();
@@ -701,7 +689,6 @@ impl LayeredRepository {
    ) -> LayeredRepository {
        LayeredRepository {
            tenant_id,
-            file_lock: RwLock::new(()),
            conf,
            tenant_conf: Arc::new(RwLock::new(tenant_conf)),
            timelines: Mutex::new(HashMap::new()),
@@ -1927,28 +1914,15 @@ impl LayeredTimeline {
                } else {
                    Lsn(0)
                };
-                // Let's consider an example:
-                //
-                // delta layer with LSN range 71-81
-                // delta layer with LSN range 81-91
-                // delta layer with LSN range 91-101
-                // image layer at LSN 100
-                //
-                // If 'lsn' is still 100, i.e. no new WAL has been processed since the last image layer,
-                // there's no need to create a new one. We check this case explicitly, to avoid passing
-                // a bogus range to count_deltas below, with start > end. It's even possible that there
-                // are some delta layers *later* than current 'lsn', if more WAL was processed and flushed
-                // after we read last_record_lsn, which is passed here in the 'lsn' argument.
-                if img_lsn < lsn {
-                    let num_deltas = layers.count_deltas(&img_range, &(img_lsn..lsn))?;

-                    debug!(
-                        "key range {}-{}, has {} deltas on this timeline in LSN range {}..{}",
-                        img_range.start, img_range.end, num_deltas, img_lsn, lsn
-                    );
-                    if num_deltas >= self.get_image_creation_threshold() {
-                        return Ok(true);
-                    }
+                let num_deltas = layers.count_deltas(&img_range, &(img_lsn..lsn))?;
+
+                debug!(
+                    "range {}-{}, has {} deltas on this timeline",
+                    img_range.start, img_range.end, num_deltas
+                );
+                if num_deltas >= self.get_image_creation_threshold() {
+                    return Ok(true);
                }
            }
        }
@@ -2240,9 +2214,6 @@ impl LayeredTimeline {
                    LsnForTimestamp::Past(lsn) => {
                        debug!("past({})", lsn);
                    }
-                    LsnForTimestamp::NoData(lsn) => {
-                        debug!("nodata({})", lsn);
-                    }
                }
                debug!("pitr_cutoff_lsn = {:?}", pitr_cutoff_lsn)
            }
--- a/pageserver/src/layered_repository/blob_io.rs
+++ b/pageserver/src/layered_repository/blob_io.rs
@@ -34,7 +34,7 @@ pub trait BlobCursor {
    ) -> Result<(), std::io::Error>;
 }

-impl<R> BlobCursor for BlockCursor<R>
+impl<'a, R> BlobCursor for BlockCursor<R>
 where
    R: BlockReader,
 {
--- a/pageserver/src/layered_repository/image_layer.rs
+++ b/pageserver/src/layered_repository/image_layer.rs
@@ -445,10 +445,7 @@ impl ImageLayerWriter {
            },
        );
        info!("new image layer {}", path.display());
-        let mut file = VirtualFile::open_with_options(
-            &path,
-            std::fs::OpenOptions::new().write(true).create_new(true),
-        )?;
+        let mut file = VirtualFile::create(&path)?;
        // make room for the header block
        file.seek(SeekFrom::Start(PAGE_SZ as u64))?;
        let blob_writer = WriteBlobWriter::new(file, PAGE_SZ as u64);
--- a/pageserver/src/lib.rs
+++ b/pageserver/src/lib.rs
@@ -13,7 +13,7 @@ pub mod repository;
 pub mod storage_sync;
 pub mod tenant_config;
 pub mod tenant_mgr;
-pub mod tenant_tasks;
+pub mod tenant_threads;
 pub mod thread_mgr;
 pub mod timelines;
 pub mod virtual_file;
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -554,7 +554,7 @@ impl PageServerHandler {
        // Create empty timeline
        info!("creating new timeline");
        let repo = tenant_mgr::get_repository_for_tenant(tenant_id)?;
-        let timeline = repo.create_empty_timeline(timeline_id, base_lsn)?;
+        let timeline = repo.create_empty_timeline(timeline_id, Lsn(0))?;
        let repartition_distance = repo.get_checkpoint_distance();
        let mut datadir_timeline =
            DatadirTimeline::<LayeredRepository>::new(timeline, repartition_distance);
@@ -1151,7 +1151,6 @@ impl postgres_backend::Handler for PageServerHandler {
                LsnForTimestamp::Present(lsn) => format!("{}", lsn),
                LsnForTimestamp::Future(_lsn) => "future".into(),
                LsnForTimestamp::Past(_lsn) => "past".into(),
-                LsnForTimestamp::NoData(_lsn) => "nodata".into(),
            };
            pgb.write_message_noflush(&BeMessage::DataRow(&[Some(result.as_bytes())]))?;
            pgb.write_message(&BeMessage::CommandComplete(b"SELECT 1"))?;
--- a/pageserver/src/pgdatadir_mapping.rs
+++ b/pageserver/src/pgdatadir_mapping.rs
@@ -51,7 +51,6 @@ pub enum LsnForTimestamp {
    Present(Lsn),
    Future(Lsn),
    Past(Lsn),
-    NoData(Lsn),
 }

 impl<R: Repository> DatadirTimeline<R> {
@@ -264,7 +263,7 @@ impl<R: Repository> DatadirTimeline<R> {
            (false, false) => {
                // This can happen if no commit records have been processed yet, e.g.
                // just after importing a cluster.
-                Ok(LsnForTimestamp::NoData(max_lsn))
+                bail!("no commit timestamps found");
            }
            (true, false) => {
                // Didn't find any commit timestamps larger than the request
--- a/pageserver/src/profiling.rs
+++ b/pageserver/src/profiling.rs
@@ -81,12 +81,6 @@ mod profiling_impl {

    pub struct DummyProfilerGuard;

-    impl Drop for DummyProfilerGuard {
-        fn drop(&mut self) {
-            // do nothing, this exists to calm Clippy down
-        }
-    }
-
    pub fn profpoint_start(
        _conf: &PageServerConf,
        _point: ProfilingConfig,
--- a/pageserver/src/repository.rs
+++ b/pageserver/src/repository.rs
@@ -225,7 +225,7 @@ pub trait Repository: Send + Sync {
    /// Initdb lsn is provided for timeline impl to be able to perform checks for some operations against it.
    fn create_empty_timeline(
        &self,
-        timeline_id: ZTimelineId,
+        timelineid: ZTimelineId,
        initdb_lsn: Lsn,
    ) -> Result<Arc<Self::Timeline>>;

@@ -636,19 +636,6 @@ mod tests {
        Ok(())
    }

-    #[test]
-    fn no_duplicate_timelines() -> Result<()> {
-        let repo = RepoHarness::create("no_duplicate_timelines")?.load();
-        let _ = repo.create_empty_timeline(TIMELINE_ID, Lsn(0))?;
-
-        match repo.create_empty_timeline(TIMELINE_ID, Lsn(0)) {
-            Ok(_) => panic!("duplicate timeline creation should fail"),
-            Err(e) => assert_eq!(e.to_string(), "Timeline already exists"),
-        }
-
-        Ok(())
-    }
-
    /// Convenience function to create a page image with given string as the only content
    pub fn test_value(s: &str) -> Value {
        let mut buf = BytesMut::new();
--- a/pageserver/src/storage_sync/download.rs
+++ b/pageserver/src/storage_sync/download.rs
@@ -44,23 +44,13 @@ where
                index_part_path.display()
            )
        })?;
-
-    let mut index_part_download =
-        storage
-            .download(&part_storage_path)
-            .await
-            .with_context(|| {
-                format!("Failed to open download stream for for storage path {part_storage_path:?}")
-            })?;
    let mut index_part_bytes = Vec::new();
-    io::copy(
-        &mut index_part_download.download_stream,
-        &mut index_part_bytes,
-    )
-    .await
-    .with_context(|| {
-        format!("Failed to download an index part from storage path {part_storage_path:?}")
-    })?;
+    storage
+        .download(&part_storage_path, &mut index_part_bytes)
+        .await
+        .with_context(|| {
+            format!("Failed to download an index part from storage path {part_storage_path:?}")
+        })?;

    let index_part: IndexPart = serde_json::from_slice(&index_part_bytes).with_context(|| {
        format!("Failed to deserialize index part file from storage path '{part_storage_path:?}'")
@@ -172,19 +162,15 @@ where
                            temp_file_path.display()
                        )
                    })?;
-                let mut download = storage
-                    .download(&layer_storage_path)
+
+                storage
+                    .download(&layer_storage_path, &mut destination_file)
                    .await
                    .with_context(|| {
                        format!(
-                            "Failed to open a download stream for layer with remote storage path '{layer_storage_path:?}'"
+                            "Failed to download a layer from storage path '{layer_storage_path:?}'"
                        )
                    })?;
-                io::copy(&mut download.download_stream, &mut destination_file).await.with_context(|| {
-                    format!(
-                        "Failed to download layer with remote storage path '{layer_storage_path:?}' into file '{}'", temp_file_path.display()
-                    )
-                })?;

                // Tokio doc here: https://docs.rs/tokio/1.17.0/tokio/fs/struct.File.html states that:
                // A file will not be closed immediately when it goes out of scope if there are any IO operations
--- a/pageserver/src/tenant_config.rs
+++ b/pageserver/src/tenant_config.rs
@@ -37,7 +37,7 @@ pub mod defaults {
    pub const DEFAULT_PITR_INTERVAL: &str = "30 days";
    pub const DEFAULT_WALRECEIVER_CONNECT_TIMEOUT: &str = "2 seconds";
    pub const DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT: &str = "10 seconds";
-    pub const DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG: u64 = 10 * 1024 * 1024;
+    pub const DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG: u64 = 10_000;
 }

 /// Per-tenant configuration options
--- a/pageserver/src/tenant_mgr.rs
+++ b/pageserver/src/tenant_mgr.rs
@@ -230,6 +230,8 @@ pub fn shutdown_all_tenants() {
    drop(m);

    thread_mgr::shutdown_threads(Some(ThreadKind::WalReceiverManager), None, None);
+    thread_mgr::shutdown_threads(Some(ThreadKind::GarbageCollector), None, None);
+    thread_mgr::shutdown_threads(Some(ThreadKind::Compactor), None, None);

    // Ok, no background threads running anymore. Flush any remaining data in
    // memory to disk.
@@ -328,12 +330,44 @@ pub fn set_tenant_state(tenant_id: ZTenantId, new_state: TenantState) -> anyhow:
        }
        (TenantState::Idle, TenantState::Active) => {
            info!("activating tenant {tenant_id}");
+            let compactor_spawn_result = thread_mgr::spawn(
+                ThreadKind::Compactor,
+                Some(tenant_id),
+                None,
+                "Compactor thread",
+                false,
+                move || crate::tenant_threads::compact_loop(tenant_id),
+            );
+            if compactor_spawn_result.is_err() {
+                let mut m = tenants_state::write_tenants();
+                m.get_mut(&tenant_id)
+                    .with_context(|| format!("Tenant not found for id {tenant_id}"))?
+                    .state = old_state;
+                drop(m);
+            }
+            compactor_spawn_result?;

-            // Spawn gc and compaction loops. The loops will shut themselves
-            // down when they notice that the tenant is inactive.
-            // TODO maybe use tokio::sync::watch instead?
-            crate::tenant_tasks::start_compaction_loop(tenant_id)?;
-            crate::tenant_tasks::start_gc_loop(tenant_id)?;
+            let gc_spawn_result = thread_mgr::spawn(
+                ThreadKind::GarbageCollector,
+                Some(tenant_id),
+                None,
+                "GC thread",
+                false,
+                move || crate::tenant_threads::gc_loop(tenant_id),
+            )
+            .map(|_thread_id| ()) // update the `Result::Ok` type to match the outer function's return signature
+            .with_context(|| format!("Failed to launch GC thread for tenant {tenant_id}"));
+
+            if let Err(e) = &gc_spawn_result {
+                let mut m = tenants_state::write_tenants();
+                m.get_mut(&tenant_id)
+                    .with_context(|| format!("Tenant not found for id {tenant_id}"))?
+                    .state = old_state;
+                drop(m);
+                error!("Failed to start GC thread for tenant {tenant_id}, stopping its checkpointer thread: {e:?}");
+                thread_mgr::shutdown_threads(Some(ThreadKind::Compactor), Some(tenant_id), None);
+                return gc_spawn_result;
+            }
        }
        (TenantState::Idle, TenantState::Stopping) => {
            info!("stopping idle tenant {tenant_id}");
@@ -345,10 +379,8 @@ pub fn set_tenant_state(tenant_id: ZTenantId, new_state: TenantState) -> anyhow:
                Some(tenant_id),
                None,
            );
-
-            // Wait until all gc/compaction tasks finish
-            let repo = get_repository_for_tenant(tenant_id)?;
-            let _guard = repo.file_lock.write().unwrap();
+            thread_mgr::shutdown_threads(Some(ThreadKind::GarbageCollector), Some(tenant_id), None);
+            thread_mgr::shutdown_threads(Some(ThreadKind::Compactor), Some(tenant_id), None);
        }
    }

--- a/pageserver/src/tenant_tasks.rs
+++ b/pageserver/src/tenant_tasks.rs
@@ -1,288 +0,0 @@
-//! This module contains functions to serve per-tenant background processes,
-//! such as compaction and GC
-
-use std::collections::HashMap;
-use std::ops::ControlFlow;
-use std::time::Duration;
-
-use crate::repository::Repository;
-use crate::tenant_mgr::TenantState;
-use crate::thread_mgr::ThreadKind;
-use crate::{tenant_mgr, thread_mgr};
-use anyhow::{self, Context};
-use futures::stream::FuturesUnordered;
-use futures::StreamExt;
-use metrics::{register_int_counter_vec, IntCounterVec};
-use once_cell::sync::{Lazy, OnceCell};
-use tokio::sync::mpsc;
-use tokio::sync::watch;
-use tracing::*;
-use utils::zid::ZTenantId;
-
-static TENANT_TASK_EVENTS: Lazy<IntCounterVec> = Lazy::new(|| {
-    register_int_counter_vec!(
-        "pageserver_tenant_task_events",
-        "Number of task start/stop/fail events.",
-        &["event"],
-    )
-    .expect("Failed to register tenant_task_events metric")
-});
-
-///
-/// Compaction task's main loop
-///
-async fn compaction_loop(tenantid: ZTenantId, mut cancel: watch::Receiver<()>) {
-    loop {
-        trace!("waking up");
-
-        // Run blocking part of the task
-        let period: Result<Result<_, anyhow::Error>, _> = tokio::task::spawn_blocking(move || {
-            // Break if tenant is not active
-            if tenant_mgr::get_tenant_state(tenantid) != Some(TenantState::Active) {
-                return Ok(ControlFlow::Break(()));
-            }
-
-            // Break if we're not allowed to write to disk
-            let repo = tenant_mgr::get_repository_for_tenant(tenantid)?;
-            // TODO do this inside repo.compaction_iteration instead.
-            let _guard = match repo.file_lock.try_read() {
-                Ok(g) => g,
-                Err(_) => return Ok(ControlFlow::Break(())),
-            };
-
-            // Run compaction
-            let compaction_period = repo.get_compaction_period();
-            repo.compaction_iteration()?;
-            Ok(ControlFlow::Continue(compaction_period))
-        })
-        .await;
-
-        // Decide whether to sleep or break
-        let sleep_duration = match period {
-            Ok(Ok(ControlFlow::Continue(period))) => period,
-            Ok(Ok(ControlFlow::Break(()))) => break,
-            Ok(Err(e)) => {
-                error!("Compaction failed, retrying: {}", e);
-                Duration::from_secs(2)
-            }
-            Err(e) => {
-                error!("Compaction join error, retrying: {}", e);
-                Duration::from_secs(2)
-            }
-        };
-
-        // Sleep
-        tokio::select! {
-            _ = cancel.changed() => {
-                trace!("received cancellation request");
-                break;
-            },
-            _ = tokio::time::sleep(sleep_duration) => {},
-        }
-    }
-
-    trace!(
-        "compaction loop stopped. State is {:?}",
-        tenant_mgr::get_tenant_state(tenantid)
-    );
-}
-
-static START_GC_LOOP: OnceCell<mpsc::Sender<ZTenantId>> = OnceCell::new();
-static START_COMPACTION_LOOP: OnceCell<mpsc::Sender<ZTenantId>> = OnceCell::new();
-
-/// Spawn a task that will periodically schedule garbage collection until
-/// the tenant becomes inactive. This should be called on tenant
-/// activation.
-pub fn start_gc_loop(tenantid: ZTenantId) -> anyhow::Result<()> {
-    START_GC_LOOP
-        .get()
-        .context("Failed to get START_GC_LOOP")?
-        .blocking_send(tenantid)
-        .context("Failed to send to START_GC_LOOP channel")?;
-    Ok(())
-}
-
-/// Spawn a task that will periodically schedule compaction until
-/// the tenant becomes inactive. This should be called on tenant
-/// activation.
-pub fn start_compaction_loop(tenantid: ZTenantId) -> anyhow::Result<()> {
-    START_COMPACTION_LOOP
-        .get()
-        .context("failed to get START_COMPACTION_LOOP")?
-        .blocking_send(tenantid)
-        .context("failed to send to START_COMPACTION_LOOP")?;
-    Ok(())
-}
-
-/// Spawn the TenantTaskManager
-/// This needs to be called before start_gc_loop or start_compaction_loop
-pub fn init_tenant_task_pool() -> anyhow::Result<()> {
-    let runtime = tokio::runtime::Builder::new_multi_thread()
-        .thread_name("tenant-task-worker")
-        .worker_threads(40) // Way more than necessary
-        .max_blocking_threads(100) // Way more than necessary
-        .enable_all()
-        .build()?;
-
-    let (gc_send, mut gc_recv) = mpsc::channel::<ZTenantId>(100);
-    START_GC_LOOP
-        .set(gc_send)
-        .expect("Failed to set START_GC_LOOP");
-
-    let (compaction_send, mut compaction_recv) = mpsc::channel::<ZTenantId>(100);
-    START_COMPACTION_LOOP
-        .set(compaction_send)
-        .expect("Failed to set START_COMPACTION_LOOP");
-
-    // TODO this is getting repetitive
-    let mut gc_loops = HashMap::<ZTenantId, watch::Sender<()>>::new();
-    let mut compaction_loops = HashMap::<ZTenantId, watch::Sender<()>>::new();
-
-    thread_mgr::spawn(
-        ThreadKind::TenantTaskManager,
-        None,
-        None,
-        "Tenant task manager main thread",
-        true,
-        move || {
-            runtime.block_on(async move {
-                let mut futures = FuturesUnordered::new();
-                loop {
-                    tokio::select! {
-                        _ = thread_mgr::shutdown_watcher() => {
-                            // Send cancellation to all tasks
-                            for (_, cancel) in gc_loops.drain() {
-                                cancel.send(()).ok();
-                            }
-                            for (_, cancel) in compaction_loops.drain() {
-                                cancel.send(()).ok();
-                            }
-
-                            // Exit after all tasks finish
-                            while let Some(result) = futures.next().await {
-                                match result {
-                                    Ok(()) => {
-                                        TENANT_TASK_EVENTS.with_label_values(&["stop"]).inc();
-                                    },
-                                    Err(e) => {
-                                        TENANT_TASK_EVENTS.with_label_values(&["panic"]).inc();
-                                        error!("loop join error {}", e)
-                                    },
-                                }
-                            }
-                            break;
-                        },
-                        tenantid = gc_recv.recv() => {
-                            let tenantid = tenantid.expect("Gc task channel closed unexpectedly");
-
-                            // Spawn new task, request cancellation of the old one if exists
-                            let (cancel_send, cancel_recv) = watch::channel(());
-                            let handle = tokio::spawn(gc_loop(tenantid, cancel_recv)
-                                .instrument(info_span!("gc loop", tenant = %tenantid)));
-                            if let Some(old_cancel_send) = gc_loops.insert(tenantid, cancel_send) {
-                                old_cancel_send.send(()).ok();
-                            }
-
-                            // Update metrics, remember handle
-                            TENANT_TASK_EVENTS.with_label_values(&["start"]).inc();
-                            futures.push(handle);
-                        },
-                        tenantid = compaction_recv.recv() => {
-                            let tenantid = tenantid.expect("Compaction task channel closed unexpectedly");
-
-                            // Spawn new task, request cancellation of the old one if exists
-                            let (cancel_send, cancel_recv) = watch::channel(());
-                            let handle = tokio::spawn(compaction_loop(tenantid, cancel_recv)
-                                .instrument(info_span!("compaction loop", tenant = %tenantid)));
-                            if let Some(old_cancel_send) = compaction_loops.insert(tenantid, cancel_send) {
-                                old_cancel_send.send(()).ok();
-                            }
-
-                            // Update metrics, remember handle
-                            TENANT_TASK_EVENTS.with_label_values(&["start"]).inc();
-                            futures.push(handle);
-                        },
-                        result = futures.next() => {
-                            // Log and count any unhandled panics
-                            match result {
-                                Some(Ok(())) => {
-                                    TENANT_TASK_EVENTS.with_label_values(&["stop"]).inc();
-                                },
-                                Some(Err(e)) => {
-                                    TENANT_TASK_EVENTS.with_label_values(&["panic"]).inc();
-                                    error!("loop join error {}", e)
-                                },
-                                None => {},
-                            };
-                        },
-                    }
-                }
-            });
-            Ok(())
-        },
-    )?;
-
-    Ok(())
-}
-
-///
-/// GC task's main loop
-///
-async fn gc_loop(tenantid: ZTenantId, mut cancel: watch::Receiver<()>) {
-    loop {
-        trace!("waking up");
-
-        // Run blocking part of the task
-        let period: Result<Result<_, anyhow::Error>, _> = tokio::task::spawn_blocking(move || {
-            // Break if tenant is not active
-            if tenant_mgr::get_tenant_state(tenantid) != Some(TenantState::Active) {
-                return Ok(ControlFlow::Break(()));
-            }
-
-            // Break if we're not allowed to write to disk
-            let repo = tenant_mgr::get_repository_for_tenant(tenantid)?;
-            // TODO do this inside repo.gc_iteration instead.
-            let _guard = match repo.file_lock.try_read() {
-                Ok(g) => g,
-                Err(_) => return Ok(ControlFlow::Break(())),
-            };
-
-            // Run gc
-            let gc_period = repo.get_gc_period();
-            let gc_horizon = repo.get_gc_horizon();
-            if gc_horizon > 0 {
-                repo.gc_iteration(None, gc_horizon, repo.get_pitr_interval(), false)?;
-            }
-
-            Ok(ControlFlow::Continue(gc_period))
-        })
-        .await;
-
-        // Decide whether to sleep or break
-        let sleep_duration = match period {
-            Ok(Ok(ControlFlow::Continue(period))) => period,
-            Ok(Ok(ControlFlow::Break(()))) => break,
-            Ok(Err(e)) => {
-                error!("Gc failed, retrying: {}", e);
-                Duration::from_secs(2)
-            }
-            Err(e) => {
-                error!("Gc join error, retrying: {}", e);
-                Duration::from_secs(2)
-            }
-        };
-
-        // Sleep
-        tokio::select! {
-            _ = cancel.changed() => {
-                trace!("received cancellation request");
-                break;
-            },
-            _ = tokio::time::sleep(sleep_duration) => {},
-        }
-    }
-    trace!(
-        "GC loop stopped. State is {:?}",
-        tenant_mgr::get_tenant_state(tenantid)
-    );
-}
--- a/pageserver/src/tenant_threads.rs
+++ b/pageserver/src/tenant_threads.rs
@@ -0,0 +1,79 @@
+//! This module contains functions to serve per-tenant background processes,
+//! such as compaction and GC
+use crate::repository::Repository;
+use crate::tenant_mgr;
+use crate::tenant_mgr::TenantState;
+use anyhow::Result;
+use std::time::Duration;
+use tracing::*;
+use utils::zid::ZTenantId;
+
+///
+/// Compaction thread's main loop
+///
+pub fn compact_loop(tenantid: ZTenantId) -> Result<()> {
+    if let Err(err) = compact_loop_ext(tenantid) {
+        error!("compact loop terminated with error: {:?}", err);
+        Err(err)
+    } else {
+        Ok(())
+    }
+}
+
+fn compact_loop_ext(tenantid: ZTenantId) -> Result<()> {
+    loop {
+        if tenant_mgr::get_tenant_state(tenantid) != Some(TenantState::Active) {
+            break;
+        }
+        let repo = tenant_mgr::get_repository_for_tenant(tenantid)?;
+        let compaction_period = repo.get_compaction_period();
+
+        std::thread::sleep(compaction_period);
+        trace!("compaction thread for tenant {} waking up", tenantid);
+
+        // Compact timelines
+        let repo = tenant_mgr::get_repository_for_tenant(tenantid)?;
+        repo.compaction_iteration()?;
+    }
+
+    trace!(
+        "compaction thread stopped for tenant {} state is {:?}",
+        tenantid,
+        tenant_mgr::get_tenant_state(tenantid)
+    );
+    Ok(())
+}
+
+///
+/// GC thread's main loop
+///
+pub fn gc_loop(tenantid: ZTenantId) -> Result<()> {
+    loop {
+        if tenant_mgr::get_tenant_state(tenantid) != Some(TenantState::Active) {
+            break;
+        }
+
+        trace!("gc thread for tenant {} waking up", tenantid);
+        let repo = tenant_mgr::get_repository_for_tenant(tenantid)?;
+        let gc_horizon = repo.get_gc_horizon();
+        // Garbage collect old files that are not needed for PITR anymore
+        if gc_horizon > 0 {
+            repo.gc_iteration(None, gc_horizon, repo.get_pitr_interval(), false)?;
+        }
+
+        // TODO Write it in more adequate way using
+        // condvar.wait_timeout() or something
+        let mut sleep_time = repo.get_gc_period().as_secs();
+        while sleep_time > 0 && tenant_mgr::get_tenant_state(tenantid) == Some(TenantState::Active)
+        {
+            sleep_time -= 1;
+            std::thread::sleep(Duration::from_secs(1));
+        }
+    }
+    trace!(
+        "GC thread stopped for tenant {} state is {:?}",
+        tenantid,
+        tenant_mgr::get_tenant_state(tenantid)
+    );
+    Ok(())
+}
--- a/pageserver/src/thread_mgr.rs
+++ b/pageserver/src/thread_mgr.rs
@@ -94,8 +94,11 @@ pub enum ThreadKind {
    // Main walreceiver manager thread that ensures that every timeline spawns a connection to safekeeper, to fetch WAL.
    WalReceiverManager,

-    // Thread that schedules new compaction and gc jobs
-    TenantTaskManager,
+    // Thread that handles compaction of all timelines for a tenant.
+    Compactor,
+
+    // Thread that handles GC of a tenant
+    GarbageCollector,

    // Thread that flushes frozen in-memory layers to disk
    LayerFlushThread,
--- a/pageserver/src/walreceiver.rs
+++ b/pageserver/src/walreceiver.rs
@@ -178,7 +178,7 @@ async fn shutdown_all_wal_connections(
 /// That may lead to certain events not being observed by the listener.
 #[derive(Debug)]
 struct TaskHandle<E> {
-    handle: JoinHandle<Result<(), String>>,
+    handle: JoinHandle<()>,
    events_receiver: watch::Receiver<TaskEvent<E>>,
    cancellation: watch::Sender<()>,
 }
@@ -205,8 +205,8 @@ impl<E: Clone> TaskHandle<E> {

        let sender = Arc::clone(&events_sender);
        let handle = tokio::task::spawn(async move {
-            events_sender.send(TaskEvent::Started).ok();
-            task(sender, cancellation_receiver).await
+            let task_result = task(sender, cancellation_receiver).await;
+            events_sender.send(TaskEvent::End(task_result)).ok();
        });

        TaskHandle {
@@ -216,16 +216,6 @@ impl<E: Clone> TaskHandle<E> {
        }
    }

-    async fn next_task_event(&mut self) -> TaskEvent<E> {
-        select! {
-            next_task_event = self.events_receiver.changed() => match next_task_event {
-                Ok(()) => self.events_receiver.borrow().clone(),
-                Err(_task_channel_part_dropped) => join_on_handle(&mut self.handle).await,
-            },
-            task_completion_result = join_on_handle(&mut self.handle) => task_completion_result,
-        }
-    }
-
    /// Aborts current task, waiting for it to finish.
    async fn shutdown(self) {
        self.cancellation.send(()).ok();
@@ -235,19 +225,6 @@ impl<E: Clone> TaskHandle<E> {
    }
 }

-async fn join_on_handle<E>(handle: &mut JoinHandle<Result<(), String>>) -> TaskEvent<E> {
-    match handle.await {
-        Ok(task_result) => TaskEvent::End(task_result),
-        Err(e) => {
-            if e.is_cancelled() {
-                TaskEvent::End(Ok(()))
-            } else {
-                TaskEvent::End(Err(format!("WAL receiver task panicked: {e}")))
-            }
-        }
-    }
-}
-
 /// A step to process timeline attach/detach events to enable/disable the corresponding WAL receiver machinery.
 /// In addition to WAL streaming management, the step ensures that corresponding tenant has its service threads enabled or disabled.
 /// This is done here, since only walreceiver knows when a certain tenant has no streaming enabled.
--- a/pageserver/src/walreceiver/connection_manager.rs
+++ b/pageserver/src/walreceiver/connection_manager.rs
@@ -104,29 +104,49 @@ async fn connection_manager_loop_step(

            Some(wal_connection_update) = async {
                match walreceiver_state.wal_connection.as_mut() {
-                    Some(wal_connection) => Some(wal_connection.connection_task.next_task_event().await),
+                    Some(wal_connection) => {
+                        let receiver = &mut wal_connection.connection_task.events_receiver;
+                        Some(match receiver.changed().await {
+                            Ok(()) => receiver.borrow().clone(),
+                            Err(_cancellation_error) => TaskEvent::End(Ok(())),
+                        })
+                    }
                    None => None,
                }
            } => {
-                let wal_connection = walreceiver_state.wal_connection.as_mut().expect("Should have a connection, as checked by the corresponding select! guard");
-                match &wal_connection_update {
-                    TaskEvent::Started => {
-                        wal_connection.latest_connection_update = Utc::now().naive_utc();
-                        *walreceiver_state.wal_connection_attempts.entry(wal_connection.sk_id).or_insert(0) += 1;
-                    },
-                    TaskEvent::NewEvent(replication_feedback) => {
-                        wal_connection.latest_connection_update = DateTime::<Local>::from(replication_feedback.ps_replytime).naive_utc();
-                        // reset connection attempts here only, the only place where both nodes
-                        // explicitly confirmn with replication feedback that they are connected to each other
-                        walreceiver_state.wal_connection_attempts.remove(&wal_connection.sk_id);
-                    },
+                let (connection_update, reset_connection_attempts) = match &wal_connection_update {
+                    TaskEvent::Started => (Some(Utc::now().naive_utc()), true),
+                    TaskEvent::NewEvent(replication_feedback) => (Some(DateTime::<Local>::from(replication_feedback.ps_replytime).naive_utc()), true),
                    TaskEvent::End(end_result) => {
-                        match end_result {
-                            Ok(()) => debug!("WAL receiving task finished"),
-                            Err(e) => warn!("WAL receiving task failed: {e}"),
+                        let should_reset_connection_attempts = match end_result {
+                            Ok(()) => {
+                                debug!("WAL receiving task finished");
+                                true
+                            },
+                            Err(e) => {
+                                warn!("WAL receiving task failed: {e}");
+                                false
+                            },
                        };
                        walreceiver_state.wal_connection = None;
+                        (None, should_reset_connection_attempts)
                    },
+                };
+
+                if let Some(connection_update) = connection_update {
+                    match &mut walreceiver_state.wal_connection {
+                        Some(wal_connection) => {
+                            wal_connection.latest_connection_update = connection_update;
+
+                            let attempts_entry = walreceiver_state.wal_connection_attempts.entry(wal_connection.sk_id).or_insert(0);
+                            if reset_connection_attempts {
+                                *attempts_entry = 0;
+                            } else {
+                                *attempts_entry += 1;
+                            }
+                        },
+                        None => error!("Received connection update for WAL connection that is not active, update: {wal_connection_update:?}"),
+                    }
                }
            },

@@ -386,8 +406,10 @@ impl WalreceiverState {
            Some(existing_wal_connection) => {
                let connected_sk_node = existing_wal_connection.sk_id;

-                let (new_sk_id, new_safekeeper_etcd_data, new_wal_producer_connstr) =
-                    self.select_connection_candidate(Some(connected_sk_node))?;
+                let (new_sk_id, new_safekeeper_etcd_data, new_wal_producer_connstr) = self
+                    .applicable_connection_candidates()
+                    .filter(|&(sk_id, _, _)| sk_id != connected_sk_node)
+                    .max_by_key(|(_, info, _)| info.commit_lsn)?;

                let now = Utc::now().naive_utc();
                if let Ok(latest_interaciton) =
@@ -440,8 +462,9 @@ impl WalreceiverState {
                }
            }
            None => {
-                let (new_sk_id, _, new_wal_producer_connstr) =
-                    self.select_connection_candidate(None)?;
+                let (new_sk_id, _, new_wal_producer_connstr) = self
+                    .applicable_connection_candidates()
+                    .max_by_key(|(_, info, _)| info.commit_lsn)?;
                return Some(NewWalConnectionCandidate {
                    safekeeper_id: new_sk_id,
                    wal_producer_connstr: new_wal_producer_connstr,
@@ -453,49 +476,6 @@ impl WalreceiverState {
        None
    }

-    /// Selects the best possible candidate, based on the data collected from etcd updates about the safekeepers.
-    /// Optionally, omits the given node, to support gracefully switching from a healthy safekeeper to another.
-    ///
-    /// The candidate that is chosen:
-    /// * has fewest connection attempts from pageserver to safekeeper node (reset every time the WAL replication feedback is sent)
-    /// * has greatest data Lsn among the ones that are left
-    ///
-    /// NOTE:
-    /// We evict timeline data received from etcd based on time passed since it was registered, along with its connection attempts values, but
-    /// otherwise to reset the connection attempts, a successful connection to that node is needed.
-    /// That won't happen now, before all nodes with less connection attempts are connected to first, which might leave the sk node with more advanced state to be ignored.
-    fn select_connection_candidate(
-        &self,
-        node_to_omit: Option<NodeId>,
-    ) -> Option<(NodeId, &SkTimelineInfo, String)> {
-        let all_candidates = self
-            .applicable_connection_candidates()
-            .filter(|&(sk_id, _, _)| Some(sk_id) != node_to_omit)
-            .collect::<Vec<_>>();
-
-        let smallest_attempts_allowed = all_candidates
-            .iter()
-            .map(|(sk_id, _, _)| {
-                self.wal_connection_attempts
-                    .get(sk_id)
-                    .copied()
-                    .unwrap_or(0)
-            })
-            .min()?;
-
-        all_candidates
-            .into_iter()
-            .filter(|(sk_id, _, _)| {
-                smallest_attempts_allowed
-                    >= self
-                        .wal_connection_attempts
-                        .get(sk_id)
-                        .copied()
-                        .unwrap_or(0)
-            })
-            .max_by_key(|(_, info, _)| info.commit_lsn)
-    }
-
    fn applicable_connection_candidates(
        &self,
    ) -> impl Iterator<Item = (NodeId, &SkTimelineInfo, String)> {
@@ -520,25 +500,15 @@ impl WalreceiverState {
    }

    fn cleanup_old_candidates(&mut self) {
-        let mut node_ids_to_remove = Vec::with_capacity(self.wal_stream_candidates.len());
-
-        self.wal_stream_candidates.retain(|node_id, etcd_info| {
+        self.wal_stream_candidates.retain(|_, etcd_info| {
            if let Ok(time_since_latest_etcd_update) =
                (Utc::now().naive_utc() - etcd_info.latest_update).to_std()
            {
-                let should_retain = time_since_latest_etcd_update < self.lagging_wal_timeout;
-                if !should_retain {
-                    node_ids_to_remove.push(*node_id);
-                }
-                should_retain
+                time_since_latest_etcd_update < self.lagging_wal_timeout
            } else {
                true
            }
        });
-
-        for node_id in node_ids_to_remove {
-            self.wal_connection_attempts.remove(&node_id);
-        }
    }
 }

@@ -873,64 +843,6 @@ mod tests {
        Ok(())
    }

-    #[tokio::test]
-    async fn candidate_with_many_connection_failures() -> anyhow::Result<()> {
-        let harness = RepoHarness::create("candidate_with_many_connection_failures")?;
-        let mut state = dummy_state(&harness);
-        let now = Utc::now().naive_utc();
-
-        let current_lsn = Lsn(100_000).align();
-        let bigger_lsn = Lsn(current_lsn.0 + 100).align();
-
-        state.wal_connection = None;
-        state.wal_stream_candidates = HashMap::from([
-            (
-                NodeId(0),
-                EtcdSkTimeline {
-                    timeline: SkTimelineInfo {
-                        last_log_term: None,
-                        flush_lsn: None,
-                        commit_lsn: Some(bigger_lsn),
-                        backup_lsn: None,
-                        remote_consistent_lsn: None,
-                        peer_horizon_lsn: None,
-                        safekeeper_connstr: Some(DUMMY_SAFEKEEPER_CONNSTR.to_string()),
-                    },
-                    etcd_version: 0,
-                    latest_update: now,
-                },
-            ),
-            (
-                NodeId(1),
-                EtcdSkTimeline {
-                    timeline: SkTimelineInfo {
-                        last_log_term: None,
-                        flush_lsn: None,
-                        commit_lsn: Some(current_lsn),
-                        backup_lsn: None,
-                        remote_consistent_lsn: None,
-                        peer_horizon_lsn: None,
-                        safekeeper_connstr: Some(DUMMY_SAFEKEEPER_CONNSTR.to_string()),
-                    },
-                    etcd_version: 0,
-                    latest_update: now,
-                },
-            ),
-        ]);
-        state.wal_connection_attempts = HashMap::from([(NodeId(0), 1), (NodeId(1), 0)]);
-
-        let candidate_with_less_errors = state
-            .next_connection_candidate()
-            .expect("Expected one candidate selected, but got none");
-        assert_eq!(
-            candidate_with_less_errors.safekeeper_id,
-            NodeId(1),
-            "Should select the node with less connection errors"
-        );
-
-        Ok(())
-    }
-
    #[tokio::test]
    async fn connection_no_etcd_data_candidate() -> anyhow::Result<()> {
        let harness = RepoHarness::create("connection_no_etcd_data_candidate")?;
--- a/proxy/src/auth/backend/console.rs
+++ b/proxy/src/auth/backend/console.rs
@@ -49,12 +49,6 @@ impl UserFacingError for ConsoleAuthError {
    }
 }

-impl From<&auth::credentials::ClientCredsParseError> for ConsoleAuthError {
-    fn from(e: &auth::credentials::ClientCredsParseError) -> Self {
-        ConsoleAuthError::BadProjectName(e.clone())
-    }
-}
-
 // TODO: convert into an enum with "error"
 #[derive(Serialize, Deserialize, Debug)]
 struct GetRoleSecretResponse {
@@ -98,9 +92,14 @@ impl<'a> Api<'a> {

    async fn get_auth_info(&self) -> Result<AuthInfo> {
        let mut url = self.endpoint.clone();
+        let project_name = self
+            .creds
+            .project_name
+            .as_ref()
+            .map_err(|e| ConsoleAuthError::BadProjectName(e.clone()))?;
        url.path_segments_mut().push("proxy_get_role_secret");
        url.query_pairs_mut()
-            .append_pair("project", self.creds.project_name.as_ref()?)
+            .append_pair("project", project_name)
            .append_pair("role", &self.creds.user);

        // TODO: use a proper logger
@@ -122,8 +121,12 @@ impl<'a> Api<'a> {
    /// Wake up the compute node and return the corresponding connection info.
    async fn wake_compute(&self) -> Result<DatabaseInfo> {
        let mut url = self.endpoint.clone();
+        let project_name = self
+            .creds
+            .project_name
+            .as_ref()
+            .map_err(|e| ConsoleAuthError::BadProjectName(e.clone()))?;
        url.path_segments_mut().push("proxy_wake_compute");
-        let project_name = self.creds.project_name.as_ref()?;
        url.query_pairs_mut().append_pair("project", project_name);

        // TODO: use a proper logger
--- a/proxy/src/waiters.rs
+++ b/proxy/src/waiters.rs
@@ -115,7 +115,7 @@ mod tests {
            Ok(())
        });

-        waiter.await?;
+        let () = waiter.await?;
        notifier.await?
    }
 }
--- a/safekeeper/src/broker.rs
+++ b/safekeeper/src/broker.rs
@@ -5,11 +5,6 @@ use anyhow::Context;
 use anyhow::Error;
 use anyhow::Result;
 use etcd_broker::subscription_value::SkTimelineInfo;
-use etcd_broker::LeaseKeepAliveStream;
-use etcd_broker::LeaseKeeper;
-
-use std::collections::hash_map::Entry;
-use std::collections::HashMap;
 use std::time::Duration;
 use tokio::spawn;
 use tokio::task::JoinHandle;
@@ -26,7 +21,7 @@ use utils::zid::{NodeId, ZTenantTimelineId};

 const RETRY_INTERVAL_MSEC: u64 = 1000;
 const PUSH_INTERVAL_MSEC: u64 = 1000;
-const LEASE_TTL_SEC: i64 = 10;
+const LEASE_TTL_SEC: i64 = 5;

 pub fn thread_main(conf: SafeKeeperConf) {
    let runtime = runtime::Builder::new_current_thread()
@@ -159,48 +154,13 @@ pub fn get_candiate_name(system_id: NodeId) -> String {
    format!("id_{system_id}")
 }

-async fn push_sk_info(
-    zttid: ZTenantTimelineId,
-    mut client: Client,
-    key: String,
-    sk_info: SkTimelineInfo,
-    mut lease: Lease,
-) -> anyhow::Result<(ZTenantTimelineId, Lease)> {
-    let put_opts = PutOptions::new().with_lease(lease.id);
-    client
-        .put(
-            key.clone(),
-            serde_json::to_string(&sk_info)?,
-            Some(put_opts),
-        )
-        .await
-        .with_context(|| format!("failed to push safekeeper info to {}", key))?;
-
-    // revive the lease
-    lease
-        .keeper
-        .keep_alive()
-        .await
-        .context("failed to send LeaseKeepAliveRequest")?;
-    lease
-        .ka_stream
-        .message()
-        .await
-        .context("failed to receive LeaseKeepAliveResponse")?;
-
-    Ok((zttid, lease))
-}
-
-struct Lease {
-    id: i64,
-    keeper: LeaseKeeper,
-    ka_stream: LeaseKeepAliveStream,
-}
-
 /// Push once in a while data about all active timelines to the broker.
 async fn push_loop(conf: SafeKeeperConf) -> anyhow::Result<()> {
    let mut client = Client::connect(&conf.broker_endpoints, None).await?;
-    let mut leases: HashMap<ZTenantTimelineId, Lease> = HashMap::new();
+
+    // Get and maintain lease to automatically delete obsolete data
+    let lease = client.lease_grant(LEASE_TTL_SEC, None).await?;
+    let (mut keeper, mut ka_stream) = client.lease_keep_alive(lease.id()).await?;

    let push_interval = Duration::from_millis(PUSH_INTERVAL_MSEC);
    loop {
@@ -208,46 +168,33 @@ async fn push_loop(conf: SafeKeeperConf) -> anyhow::Result<()> {
        // is under plain mutex. That's ok, all this code is not performance
        // sensitive and there is no risk of deadlock as we don't await while
        // lock is held.
-        let active_tlis = GlobalTimelines::get_active_timelines();
-
-        // // Get and maintain (if not yet) per timeline lease to automatically delete obsolete data.
-        for zttid in active_tlis.iter() {
-            if let Entry::Vacant(v) = leases.entry(*zttid) {
-                let lease = client.lease_grant(LEASE_TTL_SEC, None).await?;
-                let (keeper, ka_stream) = client.lease_keep_alive(lease.id()).await?;
-                v.insert(Lease {
-                    id: lease.id(),
-                    keeper,
-                    ka_stream,
-                });
+        for zttid in GlobalTimelines::get_active_timelines() {
+            if let Some(tli) = GlobalTimelines::get_loaded(zttid) {
+                let sk_info = tli.get_public_info(&conf)?;
+                let put_opts = PutOptions::new().with_lease(lease.id());
+                client
+                    .put(
+                        timeline_safekeeper_path(
+                            conf.broker_etcd_prefix.clone(),
+                            zttid,
+                            conf.my_id,
+                        ),
+                        serde_json::to_string(&sk_info)?,
+                        Some(put_opts),
+                    )
+                    .await
+                    .context("failed to push safekeeper info")?;
            }
        }
-        leases.retain(|zttid, _| active_tlis.contains(zttid));
-
-        // Push data concurrently to not suffer from latency, with many timelines it can be slow.
-        let handles = active_tlis
-            .iter()
-            .filter_map(|zttid| GlobalTimelines::get_loaded(*zttid))
-            .map(|tli| {
-                let sk_info = tli.get_public_info(&conf);
-                let key = timeline_safekeeper_path(
-                    conf.broker_etcd_prefix.clone(),
-                    tli.zttid,
-                    conf.my_id,
-                );
-                let lease = leases.remove(&tli.zttid).unwrap();
-                tokio::spawn(push_sk_info(tli.zttid, client.clone(), key, sk_info, lease))
-            })
-            .collect::<Vec<_>>();
-        for h in handles {
-            let (zttid, lease) = h.await??;
-            // It is ugly to pull leases from hash and then put it back, but
-            // otherwise we have to resort to long living per tli tasks (which
-            // would generate a lot of errors when etcd is down) as task wants to
-            // have 'static objects, we can't borrow to it.
-            leases.insert(zttid, lease);
-        }
-
+        // revive the lease
+        keeper
+            .keep_alive()
+            .await
+            .context("failed to send LeaseKeepAliveRequest")?;
+        ka_stream
+            .message()
+            .await
+            .context("failed to receive LeaseKeepAliveResponse")?;
        sleep(push_interval).await;
    }
 }
--- a/safekeeper/src/control_file_upgrade.rs
+++ b/safekeeper/src/control_file_upgrade.rs
@@ -239,19 +239,6 @@ pub fn upgrade_control_file(buf: &[u8], version: u32) -> Result<SafeKeeperState>
            remote_consistent_lsn: Lsn(0),
            peers: Peers(vec![]),
        });
-    } else if version == 5 {
-        info!("reading safekeeper control file version {}", version);
-        let mut oldstate = SafeKeeperState::des(&buf[..buf.len()])?;
-        if oldstate.timeline_start_lsn != Lsn(0) {
-            return Ok(oldstate);
-        }
-
-        // set special timeline_start_lsn because we don't know the real one
-        info!("setting timeline_start_lsn and local_start_lsn to Lsn(1)");
-        oldstate.timeline_start_lsn = Lsn(1);
-        oldstate.local_start_lsn = Lsn(1);
-
-        return Ok(oldstate);
    }
    bail!("unsupported safekeeper control file version {}", version)
 }
--- a/safekeeper/src/safekeeper.rs
+++ b/safekeeper/src/safekeeper.rs
@@ -28,7 +28,7 @@ use utils::{
 };

 pub const SK_MAGIC: u32 = 0xcafeceefu32;
-pub const SK_FORMAT_VERSION: u32 = 6;
+pub const SK_FORMAT_VERSION: u32 = 5;
 const SK_PROTOCOL_VERSION: u32 = 2;
 const UNKNOWN_SERVER_VERSION: u32 = 0;

--- a/safekeeper/src/timeline.rs
+++ b/safekeeper/src/timeline.rs
@@ -11,7 +11,7 @@ use serde::Serialize;
 use tokio::sync::watch;

 use std::cmp::{max, min};
-use std::collections::{HashMap, HashSet};
+use std::collections::HashMap;
 use std::fs::{self};

 use std::sync::{Arc, Mutex, MutexGuard};
@@ -445,9 +445,9 @@ impl Timeline {
    }

    /// Prepare public safekeeper info for reporting.
-    pub fn get_public_info(&self, conf: &SafeKeeperConf) -> SkTimelineInfo {
+    pub fn get_public_info(&self, conf: &SafeKeeperConf) -> anyhow::Result<SkTimelineInfo> {
        let shared_state = self.mutex.lock().unwrap();
-        SkTimelineInfo {
+        Ok(SkTimelineInfo {
            last_log_term: Some(shared_state.sk.get_epoch()),
            flush_lsn: Some(shared_state.sk.wal_store.flush_lsn()),
            // note: this value is not flushed to control file yet and can be lost
@@ -460,7 +460,7 @@ impl Timeline {
            peer_horizon_lsn: Some(shared_state.sk.inmem.peer_horizon_lsn),
            safekeeper_connstr: Some(conf.listen_pg_addr.clone()),
            backup_lsn: Some(shared_state.sk.inmem.backup_lsn),
-        }
+        })
    }

    /// Update timeline state with peer safekeeper data.
@@ -625,8 +625,6 @@ impl GlobalTimelines {
        zttid: ZTenantTimelineId,
        create: bool,
    ) -> Result<Arc<Timeline>> {
-        let _enter = info_span!("", timeline = %zttid.tenant_id).entered();
-
        let mut state = TIMELINES_STATE.lock().unwrap();

        match state.timelines.get(&zttid) {
@@ -669,7 +667,7 @@ impl GlobalTimelines {
    }

    /// Get ZTenantTimelineIDs of all active timelines.
-    pub fn get_active_timelines() -> HashSet<ZTenantTimelineId> {
+    pub fn get_active_timelines() -> Vec<ZTenantTimelineId> {
        let state = TIMELINES_STATE.lock().unwrap();
        state
            .timelines
--- a/safekeeper/src/wal_backup.rs
+++ b/safekeeper/src/wal_backup.rs
@@ -2,16 +2,18 @@ use anyhow::{Context, Result};
 use etcd_broker::subscription_key::{
    NodeKind, OperationKind, SkOperationKind, SubscriptionKey, SubscriptionKind,
 };
+use tokio::io::AsyncRead;
 use tokio::task::JoinHandle;

 use std::cmp::min;
 use std::collections::HashMap;
 use std::path::{Path, PathBuf};
-use std::pin::Pin;
 use std::sync::Arc;
 use std::time::Duration;

-use postgres_ffi::xlog_utils::{XLogFileName, XLogSegNo, XLogSegNoOffsetToRecPtr, PG_TLI};
+use postgres_ffi::xlog_utils::{
+    XLogFileName, XLogSegNo, XLogSegNoOffsetToRecPtr, MAX_SEND_SIZE, PG_TLI,
+};
 use remote_storage::{GenericRemoteStorage, RemoteStorage};
 use tokio::fs::File;
 use tokio::runtime::Builder;
@@ -450,41 +452,45 @@ async fn backup_object(source_file: &Path, size: usize) -> Result<()> {
 pub async fn read_object(
    file_path: PathBuf,
    offset: u64,
-) -> anyhow::Result<Pin<Box<dyn tokio::io::AsyncRead>>> {
-    let download = match REMOTE_STORAGE
-        .get()
-        .context("Failed to get remote storage")?
-        .as_ref()
-        .context("No remote storage configured")?
-    {
-        GenericRemoteStorage::Local(local_storage) => {
-            let source = local_storage.remote_object_id(&file_path)?;
+) -> (impl AsyncRead, JoinHandle<Result<()>>) {
+    let storage = REMOTE_STORAGE.get().expect("failed to get remote storage");

-            info!(
-                "local download about to start from {} at offset {}",
-                source.display(),
-                offset
-            );
-            local_storage
-                .download_byte_range(&source, offset, None)
-                .await
+    let (mut pipe_writer, pipe_reader) = tokio::io::duplex(MAX_SEND_SIZE);
+
+    let copy_result = tokio::spawn(async move {
+        let res = match storage.as_ref().unwrap() {
+            GenericRemoteStorage::Local(local_storage) => {
+                let source = local_storage.remote_object_id(&file_path)?;
+
+                info!(
+                    "local download about to start from {} at offset {}",
+                    source.display(),
+                    offset
+                );
+                local_storage
+                    .download_byte_range(&source, offset, None, &mut pipe_writer)
+                    .await
+            }
+            GenericRemoteStorage::S3(s3_storage) => {
+                let s3key = s3_storage.remote_object_id(&file_path)?;
+
+                info!(
+                    "S3 download about to start from {:?} at offset {}",
+                    s3key, offset
+                );
+                s3_storage
+                    .download_byte_range(&s3key, offset, None, &mut pipe_writer)
+                    .await
+            }
+        };
+
+        if let Err(e) = res {
+            error!("failed to download WAL segment from remote storage: {}", e);
+            Err(e)
+        } else {
+            Ok(())
        }
-        GenericRemoteStorage::S3(s3_storage) => {
-            let s3key = s3_storage.remote_object_id(&file_path)?;
+    });

-            info!(
-                "S3 download about to start from {:?} at offset {}",
-                s3key, offset
-            );
-            s3_storage.download_byte_range(&s3key, offset, None).await
-        }
-    }
-    .with_context(|| {
-        format!(
-            "Failed to open WAL segment download stream for local storage path {}",
-            file_path.display()
-        )
-    })?;
-
-    Ok(download.download_stream)
+    (pipe_reader, copy_result)
 }
--- a/safekeeper/src/wal_storage.rs
+++ b/safekeeper/src/wal_storage.rs
@@ -604,7 +604,8 @@ impl WalReader {

        // Try to open remote file, if remote reads are enabled
        if self.enable_remote_read {
-            return read_object(wal_file_path, xlogoff as u64).await;
+            let (reader, _) = read_object(wal_file_path, xlogoff as u64).await;
+            return Ok(Box::pin(reader));
        }

        bail!("WAL segment is not found")
--- a/scripts/add_missing_rels.py
+++ b/scripts/add_missing_rels.py
@@ -1,438 +0,0 @@
-import os
-import shutil
-from pathlib import Path
-import tempfile
-from contextlib import closing
-import psycopg2
-import subprocess
-import argparse
-
-### utils copied from test fixtures
-from typing import Any, List
-from psycopg2.extensions import connection as PgConnection
-import asyncpg
-from typing import Any, Callable, Dict, Iterator, List, Optional, TypeVar, cast, Union, Tuple
-
-Env = Dict[str, str]
-
-_global_counter = 0
-
-
-def global_counter() -> int:
-    """ A really dumb global counter.
-
-    This is useful for giving output files a unique number, so if we run the
-    same command multiple times we can keep their output separate.
-    """
-    global _global_counter
-    _global_counter += 1
-    return _global_counter
-
-
-def subprocess_capture(capture_dir: str, cmd: List[str], **kwargs: Any) -> str:
-    """ Run a process and capture its output
-
-    Output will go to files named "cmd_NNN.stdout" and "cmd_NNN.stderr"
-    where "cmd" is the name of the program and NNN is an incrementing
-    counter.
-
-    If those files already exist, we will overwrite them.
-    Returns basepath for files with captured output.
-    """
-    assert type(cmd) is list
-    base = os.path.basename(cmd[0]) + '_{}'.format(global_counter())
-    basepath = os.path.join(capture_dir, base)
-    stdout_filename = basepath + '.stdout'
-    stderr_filename = basepath + '.stderr'
-
-    with open(stdout_filename, 'w') as stdout_f:
-        with open(stderr_filename, 'w') as stderr_f:
-            print('(capturing output to "{}.stdout")'.format(base))
-            subprocess.run(cmd, **kwargs, stdout=stdout_f, stderr=stderr_f)
-
-    return basepath
-
-
-class PgBin:
-    """ A helper class for executing postgres binaries """
-    def __init__(self, log_dir: Path, pg_distrib_dir):
-        self.log_dir = log_dir
-        self.pg_bin_path = os.path.join(str(pg_distrib_dir), 'bin')
-        self.env = os.environ.copy()
-        self.env['LD_LIBRARY_PATH'] = os.path.join(str(pg_distrib_dir), 'lib')
-
-    def _fixpath(self, command: List[str]):
-        if '/' not in command[0]:
-            command[0] = os.path.join(self.pg_bin_path, command[0])
-
-    def _build_env(self, env_add: Optional[Env]) -> Env:
-        if env_add is None:
-            return self.env
-        env = self.env.copy()
-        env.update(env_add)
-        return env
-
-    def run(self, command: List[str], env: Optional[Env] = None, cwd: Optional[str] = None):
-        """
-        Run one of the postgres binaries.
-
-        The command should be in list form, e.g. ['pgbench', '-p', '55432']
-
-        All the necessary environment variables will be set.
-
-        If the first argument (the command name) doesn't include a path (no '/'
-        characters present), then it will be edited to include the correct path.
-
-        If you want stdout/stderr captured to files, use `run_capture` instead.
-        """
-
-        self._fixpath(command)
-        print('Running command "{}"'.format(' '.join(command)))
-        env = self._build_env(env)
-        subprocess.run(command, env=env, cwd=cwd, check=True)
-
-    def run_capture(self,
-                    command: List[str],
-                    env: Optional[Env] = None,
-                    cwd: Optional[str] = None,
-                    **kwargs: Any) -> str:
-        """
-        Run one of the postgres binaries, with stderr and stdout redirected to a file.
-
-        This is just like `run`, but for chatty programs. Returns basepath for files
-        with captured output.
-        """
-
-        self._fixpath(command)
-        print('Running command "{}"'.format(' '.join(command)))
-        env = self._build_env(env)
-        return subprocess_capture(str(self.log_dir),
-                                  command,
-                                  env=env,
-                                  cwd=cwd,
-                                  check=True,
-                                  **kwargs)
-
-class PgProtocol:
-    """ Reusable connection logic """
-    def __init__(self, **kwargs):
-        self.default_options = kwargs
-
-    def connstr(self, **kwargs) -> str:
-        """
-        Build a libpq connection string for the Postgres instance.
-        """
-        return str(make_dsn(**self.conn_options(**kwargs)))
-
-    def conn_options(self, **kwargs):
-        conn_options = self.default_options.copy()
-        if 'dsn' in kwargs:
-            conn_options.update(parse_dsn(kwargs['dsn']))
-        conn_options.update(kwargs)
-
-        # Individual statement timeout in seconds. 2 minutes should be
-        # enough for our tests, but if you need a longer, you can
-        # change it by calling "SET statement_timeout" after
-        # connecting.
-        if 'options' in conn_options:
-            conn_options['options'] = f"-cstatement_timeout=120s " + conn_options['options']
-        else:
-            conn_options['options'] = "-cstatement_timeout=120s"
-        return conn_options
-
-    # autocommit=True here by default because that's what we need most of the time
-    def connect(self, autocommit=True, **kwargs) -> PgConnection:
-        """
-        Connect to the node.
-        Returns psycopg2's connection object.
-        This method passes all extra params to connstr.
-        """
-        conn = psycopg2.connect(**self.conn_options(**kwargs))
-
-        # WARNING: this setting affects *all* tests!
-        conn.autocommit = autocommit
-        return conn
-
-    async def connect_async(self, **kwargs) -> asyncpg.Connection:
-        """
-        Connect to the node from async python.
-        Returns asyncpg's connection object.
-        """
-
-        # asyncpg takes slightly different options than psycopg2. Try
-        # to convert the defaults from the psycopg2 format.
-
-        # The psycopg2 option 'dbname' is called 'database' is asyncpg
-        conn_options = self.conn_options(**kwargs)
-        if 'dbname' in conn_options:
-            conn_options['database'] = conn_options.pop('dbname')
-
-        # Convert options='-c<key>=<val>' to server_settings
-        if 'options' in conn_options:
-            options = conn_options.pop('options')
-            for match in re.finditer('-c(\w*)=(\w*)', options):
-                key = match.group(1)
-                val = match.group(2)
-                if 'server_options' in conn_options:
-                    conn_options['server_settings'].update({key: val})
-                else:
-                    conn_options['server_settings'] = {key: val}
-        return await asyncpg.connect(**conn_options)
-
-    def safe_psql(self, query: str, **kwargs: Any) -> List[Tuple[Any, ...]]:
-        """
-        Execute query against the node and return all rows.
-        This method passes all extra params to connstr.
-        """
-        return self.safe_psql_many([query], **kwargs)[0]
-
-    def safe_psql_many(self, queries: List[str], **kwargs: Any) -> List[List[Tuple[Any, ...]]]:
-        """
-        Execute queries against the node and return all rows.
-        This method passes all extra params to connstr.
-        """
-        result: List[List[Any]] = []
-        with closing(self.connect(**kwargs)) as conn:
-            with conn.cursor() as cur:
-                for query in queries:
-                    print(f"Executing query: {query}")
-                    cur.execute(query)
-
-                    if cur.description is None:
-                        result.append([])  # query didn't return data
-                    else:
-                        result.append(cast(List[Any], cur.fetchall()))
-        return result
-
-
-class VanillaPostgres(PgProtocol):
-    def __init__(self, pgdatadir: Path, pg_bin: PgBin, port: int, init=True):
-        super().__init__(host='localhost', port=port, dbname='postgres')
-        self.pgdatadir = pgdatadir
-        self.pg_bin = pg_bin
-        self.running = False
-        if init:
-            self.pg_bin.run_capture(['initdb', '-D', str(pgdatadir)])
-        self.configure([f"port = {port}\n"])
-
-    def configure(self, options: List[str]):
-        """Append lines into postgresql.conf file."""
-        assert not self.running
-        with open(os.path.join(self.pgdatadir, 'postgresql.conf'), 'a') as conf_file:
-            conf_file.write("\n".join(options))
-
-    def start(self, log_path: Optional[str] = None):
-        assert not self.running
-        self.running = True
-
-        if log_path is None:
-            log_path = os.path.join(self.pgdatadir, "pg.log")
-
-        self.pg_bin.run_capture(
-            ['pg_ctl', '-w', '-D', str(self.pgdatadir), '-l', log_path, 'start'])
-
-    def stop(self):
-        assert self.running
-        self.running = False
-        self.pg_bin.run_capture(['pg_ctl', '-w', '-D', str(self.pgdatadir), 'stop'])
-
-    def get_subdir_size(self, subdir) -> int:
-        """Return size of pgdatadir subdirectory in bytes."""
-        return get_dir_size(os.path.join(self.pgdatadir, subdir))
-
-    def __enter__(self):
-        return self
-
-    def __exit__(self, exc_type, exc, tb):
-        if self.running:
-            self.stop()
-
-
-### actual code
-
-
-def get_rel_paths(log_dir, pg_bin, base_tar):
-    """Yeild list of relation paths"""
-    with tempfile.TemporaryDirectory() as restored_dir:
-        # Unpack the base tar
-        subprocess_capture(log_dir, ["tar", "-xf", base_tar, "-C", restored_dir])
-
-        port = "55439"  # Probably free
-        with VanillaPostgres(restored_dir, pg_bin, port, init=False) as vanilla_pg:
-            vanilla_pg.configure([f"port={port}"])
-            vanilla_pg.start()
-
-            # Create database based on template0 because we can't connect to template0
-            query = "create database template0copy template template0"
-            vanilla_pg.safe_psql(query, user="cloud_admin")
-            vanilla_pg.safe_psql("CHECKPOINT", user="cloud_admin")
-
-            # Get all databases
-            query = "select oid, datname from pg_database"
-            oid_dbname_pairs = vanilla_pg.safe_psql(query, user="cloud_admin")
-            template0_oid = [
-                oid
-                for (oid, database) in oid_dbname_pairs
-                if database == "template0"
-            ][0]
-
-            # Get rel paths for each database
-            for oid, database in oid_dbname_pairs:
-                if database == "template0":
-                    # We can't connect to template0
-                    continue
-
-                query = "select relname, pg_relation_filepath(oid) from pg_class"
-                result = vanilla_pg.safe_psql(query, user="cloud_admin", dbname=database)
-                for relname, filepath in result:
-                    if filepath is not None:
-
-                        if database == "template0copy":
-                            # Add all template0copy paths to template0
-                            prefix = f"base/{oid}/"
-                            if filepath.startswith(prefix):
-                                suffix = filepath[len(prefix):]
-                                yield f"base/{template0_oid}/{suffix}"
-                            elif filepath.startswith("global"):
-                                print(f"skipping {database} global file {filepath}")
-                            else:
-                                raise AssertionError
-                        else:
-                            yield filepath
-
-
-def pack_base(log_dir, restored_dir, output_tar):
-    tmp_tar_name = "tmp.tar"
-    tmp_tar_path = os.path.join(restored_dir, tmp_tar_name)
-    cmd = ["tar", "-cf", tmp_tar_name] + os.listdir(restored_dir)
-    subprocess_capture(log_dir, cmd, cwd=restored_dir)
-    shutil.move(tmp_tar_path, output_tar)
-
-
-def get_files_in_tar(log_dir, tar):
-    with tempfile.TemporaryDirectory() as restored_dir:
-        # Unpack the base tar
-        subprocess_capture(log_dir, ["tar", "-xf", tar, "-C", restored_dir])
-
-        # Find empty files
-        empty_files = []
-        for root, dirs, files in os.walk(restored_dir):
-            for name in files:
-                file_path = os.path.join(root, name)
-                yield file_path[len(restored_dir) + 1:]
-
-
-def corrupt(log_dir, base_tar, output_tar):
-    """Remove all empty files and repackage. Return paths of files removed."""
-    with tempfile.TemporaryDirectory() as restored_dir:
-        # Unpack the base tar
-        subprocess_capture(log_dir, ["tar", "-xf", base_tar, "-C", restored_dir])
-
-        # Find empty files
-        empty_files = []
-        for root, dirs, files in os.walk(restored_dir):
-            for name in files:
-                file_path = os.path.join(root, name)
-                file_size = os.path.getsize(file_path)
-                if file_size == 0:
-                    empty_files.append(file_path)
-
-        # Delete empty files (just to see if they get recreated)
-        for empty_file in empty_files:
-            os.remove(empty_file)
-
-        # Repackage
-        pack_base(log_dir, restored_dir, output_tar)
-
-        # Return relative paths
-        return {
-            empty_file[len(restored_dir) + 1:]
-            for empty_file in empty_files
-        }
-
-
-def touch_missing_rels(log_dir, corrupt_tar, output_tar, paths):
-    with tempfile.TemporaryDirectory() as restored_dir:
-        # Unpack the base tar
-        subprocess_capture(log_dir, ["tar", "-xf", corrupt_tar, "-C", restored_dir])
-
-        # Touch files that don't exist
-        for path in paths:
-            absolute_path = os.path.join(restored_dir, path)
-            exists = os.path.exists(absolute_path)
-            if not exists:
-                print("File {absolute_path} didn't exist. Creating..")
-                Path(absolute_path).touch()
-
-        # Repackage
-        pack_base(log_dir, restored_dir, output_tar)
-
-
-# TODO this test is not currently called. It needs any ordinary base.tar path as input
-def test_add_missing_rels(base_tar):
-    output_tar = base_tar + ".fixed"
-
-    # Create new base tar with missing empty files
-    corrupt_tar = os.path.join(test_output_dir, "psql_2-corrupted.stdout")
-    deleted_files = corrupt(test_output_dir, base_tar, corrupt_tar)
-    assert len(set(get_files_in_tar(test_output_dir, base_tar)) -
-               set(get_files_in_tar(test_output_dir, corrupt_tar))) > 0
-
-    # Reconstruct paths from the corrupted tar, assert it covers everything important
-    reconstructed_paths = set(get_rel_paths(test_output_dir, pg_bin, corrupt_tar))
-    paths_missed = deleted_files - reconstructed_paths
-    assert paths_missed.issubset({
-        "postgresql.auto.conf",
-        "pg_ident.conf",
-    })
-
-    # Recreate the correct tar by touching files, compare with original tar
-    touch_missing_rels(test_output_dir, corrupt_tar, output_tar, reconstructed_paths)
-    paths_missed = (set(get_files_in_tar(test_output_dir, base_tar)) -
-                    set(get_files_in_tar(test_output_dir, output_tar)))
-    assert paths_missed.issubset({
-        "postgresql.auto.conf",
-        "pg_ident.conf",
-    })
-
-
-# Example command:
-# poetry run python scripts/add_missing_rels.py \
-#     --base-tar /home/bojan/src/neondatabase/neon/test_output/test_import_from_pageserver/psql_2.stdout \
-#     --output-tar output-base.tar \
-#     --log-dir /home/bojan/tmp
-#     --pg-distrib-dir /home/bojan/src/neondatabase/neon/tmp_install/
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        '--base-tar',
-        dest='base_tar',
-        required=True,
-        help='base.tar file to add missing rels to (file will not be modified)',
-    )
-    parser.add_argument(
-        '--output-tar',
-        dest='output_tar',
-        required=True,
-        help='path and name for the output base.tar file',
-    )
-    parser.add_argument(
-        '--log-dir',
-        dest='log_dir',
-        required=True,
-        help='directory to save log files in',
-    )
-    parser.add_argument(
-        '--pg-distrib-dir',
-        dest='pg_distrib_dir',
-        required=True,
-        help='directory where postgres is installed',
-    )
-    args = parser.parse_args()
-    base_tar = args.base_tar
-    output_tar = args.output_tar
-    log_dir = args.log_dir
-    pg_bin = PgBin(log_dir, args.pg_distrib_dir)
-
-    reconstructed_paths = set(get_rel_paths(log_dir, pg_bin, base_tar))
-    touch_missing_rels(log_dir, base_tar, output_tar, reconstructed_paths)
--- a/scripts/export_import_betwen_pageservers.py
+++ b/scripts/export_import_betwen_pageservers.py
@@ -1,232 +0,0 @@
-#
-# Simple script to export nodes from one pageserver
-# and import them into another page server
-#
-from os import path
-import os
-import requests
-import uuid
-import subprocess
-import argparse
-from pathlib import Path
-
-# directory to save exported tar files to
-basepath = path.dirname(path.abspath(__file__))
-
-
-class NeonPageserverApiException(Exception):
-    pass
-
-
-class NeonPageserverHttpClient(requests.Session):
-    def __init__(self, host, port):
-        super().__init__()
-        self.host = host
-        self.port = port
-
-    def verbose_error(self, res: requests.Response):
-        try:
-            res.raise_for_status()
-        except requests.RequestException as e:
-            try:
-                msg = res.json()['msg']
-            except:
-                msg = ''
-            raise NeonPageserverApiException(msg) from e
-
-    def check_status(self):
-        self.get(f"http://{self.host}:{self.port}/v1/status").raise_for_status()
-
-    def tenant_list(self):
-        res = self.get(f"http://{self.host}:{self.port}/v1/tenant")
-        self.verbose_error(res)
-        res_json = res.json()
-        assert isinstance(res_json, list)
-        return res_json
-
-    def tenant_create(self, new_tenant_id: uuid.UUID, ok_if_exists):
-        res = self.post(
-            f"http://{self.host}:{self.port}/v1/tenant",
-            json={
-                'new_tenant_id': new_tenant_id.hex,
-            },
-        )
-
-        if res.status_code == 409:
-            if ok_if_exists:
-                print(f'could not create tenant: already exists for id {new_tenant_id}')
-            else:
-                res.raise_for_status()
-        elif res.status_code == 201:
-            print(f'created tenant {new_tenant_id}')
-        else:
-            self.verbose_error(res)
-
-        return new_tenant_id
-
-    def timeline_list(self, tenant_id: uuid.UUID):
-        res = self.get(f"http://{self.host}:{self.port}/v1/tenant/{tenant_id.hex}/timeline")
-        self.verbose_error(res)
-        res_json = res.json()
-        assert isinstance(res_json, list)
-        return res_json
-
-
-import pytest
-import os
-def add_missing_empty_rels(base_tar, output_tar):
-    os.environ['INPUT_BASE_TAR'] = base_tar
-    os.environ['OUTPUT_BASE_TAR'] = output_tar
-    pytest.main(["-s", "-k", "test_main_hack"])
-
-
-def main(args: argparse.Namespace):
-    old_pageserver_host = args.old_pageserver_host
-    new_pageserver_host = args.new_pageserver_host
-    tenants = args.tenants
-
-    old_http_client = NeonPageserverHttpClient(old_pageserver_host, args.old_pageserver_http_port)
-    old_http_client.check_status()
-    old_pageserver_connstr = f"postgresql://{old_pageserver_host}:{args.old_pageserver_pg_port}"
-
-    new_http_client = NeonPageserverHttpClient(new_pageserver_host, args.new_pageserver_http_port)
-    new_http_client.check_status()
-    new_pageserver_connstr = f"postgresql://{new_pageserver_host}:{args.new_pageserver_pg_port}"
-
-    psql_env = {**os.environ, 'LD_LIBRARY_PATH': '/usr/local/lib/'}
-
-    for tenant_id in tenants:
-        print(f"Tenant: {tenant_id}")
-        timelines = old_http_client.timeline_list(uuid.UUID(tenant_id))
-        print(f"Timelines: {timelines}")
-
-        # Create tenant in new pageserver
-        if args.only_import is False:
-            new_http_client.tenant_create(uuid.UUID(tenant_id), args.ok_if_exists)
-
-        for timeline in timelines:
-
-            # Export timelines from old pageserver
-            if args.only_import is False:
-                query = f"fullbackup {timeline['tenant_id']} {timeline['timeline_id']} {timeline['local']['last_record_lsn']}"
-
-                cmd = [args.psql_path, "--no-psqlrc", old_pageserver_connstr, "-c", query]
-                print(f"Running: {cmd}")
-
-                tar_filename = path.join(basepath,
-                                         f"{timeline['tenant_id']}_{timeline['timeline_id']}.tar")
-                stderr_filename = path.join(
-                    basepath, f"{timeline['tenant_id']}_{timeline['timeline_id']}.stderr")
-
-                with open(tar_filename, 'w') as stdout_f:
-                    with open(stderr_filename, 'w') as stderr_f:
-                        print(f"(capturing output to {tar_filename})")
-                        subprocess.run(cmd, stdout=stdout_f, stderr=stderr_f, env=psql_env)
-
-                # add_missing_emtpy_rels(incomplete_tar_filename, tar_filename)
-
-                print(f"Done export: {tar_filename}")
-
-            # Import timelines to new pageserver
-            psql_path = Path(args.psql_path)
-            import_cmd = f"import basebackup {timeline['tenant_id']} {timeline['timeline_id']} {timeline['local']['last_record_lsn']} {timeline['local']['last_record_lsn']}"
-            tar_filename = path.join(basepath,
-                                     f"{timeline['tenant_id']}_{timeline['timeline_id']}.tar")
-            full_cmd = rf"""cat {tar_filename} | {psql_path} {new_pageserver_connstr} -c '{import_cmd}' """
-
-            stderr_filename2 = path.join(
-                basepath, f"import_{timeline['tenant_id']}_{timeline['timeline_id']}.stderr")
-            stdout_filename = path.join(
-                basepath, f"import_{timeline['tenant_id']}_{timeline['timeline_id']}.stdout")
-
-            print(f"Running: {full_cmd}")
-
-            with open(stdout_filename, 'w') as stdout_f:
-                with open(stderr_filename2, 'w') as stderr_f:
-                    print(f"(capturing output to {stdout_filename})")
-                    subprocess.run(full_cmd,
-                                   stdout=stdout_f,
-                                   stderr=stderr_f,
-                                   env=psql_env,
-                                   shell=True)
-
-                    print(f"Done import")
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        '--tenant-id',
-        dest='tenants',
-        required=True,
-        nargs='+',
-        help='Id of the tenant to migrate. You can pass multiple arguments',
-    )
-    parser.add_argument(
-        '--from-host',
-        dest='old_pageserver_host',
-        required=True,
-        help='Host of the pageserver to migrate data from',
-    )
-    parser.add_argument(
-        '--from-http-port',
-        dest='old_pageserver_http_port',
-        required=False,
-        type=int,
-        default=9898,
-        help='HTTP port of the pageserver to migrate data from. Default: 9898',
-    )
-    parser.add_argument(
-        '--from-pg-port',
-        dest='old_pageserver_pg_port',
-        required=False,
-        type=int,
-        default=6400,
-        help='pg port of the pageserver to migrate data from. Default: 6400',
-    )
-    parser.add_argument(
-        '--to-host',
-        dest='new_pageserver_host',
-        required=True,
-        help='Host of the pageserver to migrate data to',
-    )
-    parser.add_argument(
-        '--to-http-port',
-        dest='new_pageserver_http_port',
-        required=False,
-        default=9898,
-        type=int,
-        help='HTTP port of the pageserver to migrate data to. Default: 9898',
-    )
-    parser.add_argument(
-        '--to-pg-port',
-        dest='new_pageserver_pg_port',
-        required=False,
-        default=6400,
-        type=int,
-        help='pg port of the pageserver to migrate data to. Default: 6400',
-    )
-    parser.add_argument(
-        '--ignore-tenant-exists',
-        dest='ok_if_exists',
-        required=False,
-        help=
-        'Ignore error if we are trying to create the tenant that already exists. It can be dangerous if existing tenant already contains some data.',
-    )
-    parser.add_argument(
-        '--psql-path',
-        dest='psql_path',
-        required=False,
-        default='/usr/local/bin/psql',
-        help='Path to the psql binary. Default: /usr/local/bin/psql',
-    )
-    parser.add_argument(
-        '--only-import',
-        dest='only_import',
-        required=False,
-        default=False,
-        action='store_true',
-        help='Skip export and tenant creation part',
-    )
-    args = parser.parse_args()
-    main(args)
--- a/setup.cfg
+++ b/setup.cfg
@@ -28,10 +28,6 @@ strict = true
 # There is some work in progress, though: https://github.com/MagicStack/asyncpg/pull/577
 ignore_missing_imports = true

-[mypy-pg8000.*]
-# Used only in testing clients
-ignore_missing_imports = true
-
 [mypy-cached_property.*]
 ignore_missing_imports = true

--- a/test_runner/README.md
+++ b/test_runner/README.md
@@ -45,7 +45,7 @@ If you want to run all tests that have the string "bench" in their names:

 Useful environment variables:

-`NEON_BIN`: The directory where neon binaries can be found.
+`ZENITH_BIN`: The directory where zenith binaries can be found.
 `POSTGRES_DISTRIB_DIR`: The directory where postgres distribution can be found.
 `TEST_OUTPUT`: Set the directory where test state and test output files
 should go.
--- a/test_runner/batch_others/test_ancestor_branch.py
+++ b/test_runner/batch_others/test_ancestor_branch.py
@@ -1,3 +1,6 @@
+from contextlib import closing
+
+import psycopg2.extras
 import pytest
 from fixtures.log_helper import log
 from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, NeonPageserverApiException
--- a/test_runner/batch_others/test_auth.py
+++ b/test_runner/batch_others/test_auth.py
@@ -1,6 +1,8 @@
 from contextlib import closing
-from uuid import uuid4
+from typing import Iterator
+from uuid import UUID, uuid4
 from fixtures.neon_fixtures import NeonEnvBuilder, NeonPageserverApiException
+from requests.exceptions import HTTPError
 import pytest


--- a/test_runner/batch_others/test_backpressure.py
+++ b/test_runner/batch_others/test_backpressure.py
@@ -1,9 +1,11 @@
 from contextlib import closing, contextmanager
 import psycopg2.extras
 import pytest
-from fixtures.neon_fixtures import NeonEnvBuilder
+from fixtures.neon_fixtures import PgProtocol, NeonEnvBuilder
 from fixtures.log_helper import log
+import os
 import time
+import asyncpg
 from fixtures.neon_fixtures import Postgres
 import threading

--- a/test_runner/batch_others/test_basebackup_error.py
+++ b/test_runner/batch_others/test_basebackup_error.py
@@ -1,6 +1,8 @@
 import pytest
+from contextlib import closing

 from fixtures.neon_fixtures import NeonEnv
+from fixtures.log_helper import log


 #
--- a/test_runner/batch_others/test_branch_behind.py
+++ b/test_runner/batch_others/test_branch_behind.py
@@ -1,3 +1,4 @@
+import subprocess
 from contextlib import closing

 import psycopg2.extras
--- a/test_runner/batch_others/test_complete_basebackup.py
+++ b/test_runner/batch_others/test_complete_basebackup.py
@@ -1,167 +0,0 @@
-from fixtures.neon_fixtures import VanillaPostgres
-from fixtures.utils import subprocess_capture
-import os
-import shutil
-from pathlib import Path
-import tempfile
-
-
-def get_rel_paths(log_dir, pg_bin, base_tar):
-    """Yeild list of relation paths"""
-    with tempfile.TemporaryDirectory() as restored_dir:
-        # Unpack the base tar
-        subprocess_capture(log_dir, ["tar", "-xf", base_tar, "-C", restored_dir])
-
-        port = "55439"  # Probably free
-        with VanillaPostgres(restored_dir, pg_bin, port, init=False) as vanilla_pg:
-            vanilla_pg.configure([f"port={port}"])
-            vanilla_pg.start()
-
-            # Create database based on template0 because we can't connect to template0
-            query = "create database template0copy template template0"
-            vanilla_pg.safe_psql(query, user="cloud_admin")
-            vanilla_pg.safe_psql("CHECKPOINT", user="cloud_admin")
-
-            # Get all databases
-            query = "select oid, datname from pg_database"
-            oid_dbname_pairs = vanilla_pg.safe_psql(query, user="cloud_admin")
-            template0_oid = [
-                oid
-                for (oid, database) in oid_dbname_pairs
-                if database == "template0"
-            ][0]
-
-            # Get rel paths for each database
-            for oid, database in oid_dbname_pairs:
-                if database == "template0":
-                    # We can't connect to template0
-                    continue
-
-                query = "select relname, pg_relation_filepath(oid) from pg_class"
-                result = vanilla_pg.safe_psql(query, user="cloud_admin", dbname=database)
-                for relname, filepath in result:
-                    if filepath is not None:
-
-                        if database == "template0copy":
-                            # Add all template0copy paths to template0
-                            prefix = f"base/{oid}/"
-                            if filepath.startswith(prefix):
-                                suffix = filepath[len(prefix):]
-                                yield f"base/{template0_oid}/{suffix}"
-                            elif filepath.startswith("global"):
-                                print(f"skipping {database} global file {filepath}")
-                            else:
-                                raise AssertionError
-                        else:
-                            yield filepath
-
-
-def pack_base(log_dir, restored_dir, output_tar):
-    tmp_tar_name = "tmp.tar"
-    tmp_tar_path = os.path.join(restored_dir, tmp_tar_name)
-    cmd = ["tar", "-cf", tmp_tar_name] + os.listdir(restored_dir)
-    subprocess_capture(log_dir, cmd, cwd=restored_dir)
-    shutil.move(tmp_tar_path, output_tar)
-
-
-def get_files_in_tar(log_dir, tar):
-    with tempfile.TemporaryDirectory() as restored_dir:
-        # Unpack the base tar
-        subprocess_capture(log_dir, ["tar", "-xf", tar, "-C", restored_dir])
-
-        # Find empty files
-        empty_files = []
-        for root, dirs, files in os.walk(restored_dir):
-            for name in files:
-                file_path = os.path.join(root, name)
-                yield file_path[len(restored_dir) + 1:]
-
-
-def corrupt(log_dir, base_tar, output_tar):
-    """Remove all empty files and repackage. Return paths of files removed."""
-    with tempfile.TemporaryDirectory() as restored_dir:
-        # Unpack the base tar
-        subprocess_capture(log_dir, ["tar", "-xf", base_tar, "-C", restored_dir])
-
-        # Find empty files
-        empty_files = []
-        for root, dirs, files in os.walk(restored_dir):
-            for name in files:
-                file_path = os.path.join(root, name)
-                file_size = os.path.getsize(file_path)
-                if file_size == 0:
-                    empty_files.append(file_path)
-
-        # Delete empty files (just to see if they get recreated)
-        for empty_file in empty_files:
-            os.remove(empty_file)
-
-        # Repackage
-        pack_base(log_dir, restored_dir, output_tar)
-
-        # Return relative paths
-        return {
-            empty_file[len(restored_dir) + 1:]
-            for empty_file in empty_files
-        }
-
-
-def touch_missing_rels(log_dir, corrupt_tar, output_tar, paths):
-    with tempfile.TemporaryDirectory() as restored_dir:
-        # Unpack the base tar
-        subprocess_capture(log_dir, ["tar", "-xf", corrupt_tar, "-C", restored_dir])
-
-        # Touch files that don't exist
-        for path in paths:
-            absolute_path = os.path.join(restored_dir, path)
-            exists = os.path.exists(absolute_path)
-            if not exists:
-                print("File {absolute_path} didn't exist. Creating..")
-                Path(absolute_path).touch()
-
-        # Repackage
-        pack_base(log_dir, restored_dir, output_tar)
-
-
-def test_complete(test_output_dir, pg_bin):
-    # Specify directories
-    # TODO make a basebackup instead of using one from another test
-    work_dir = "/home/bojan/src/neondatabase/neon/test_output/test_import_from_pageserver/"
-    base_tar = os.path.join(work_dir, "psql_2.stdout")
-    output_tar = os.path.join(work_dir, "psql_2-completed.stdout")
-
-    # Create new base tar with missing empty files
-    corrupt_tar = os.path.join(test_output_dir, "psql_2-corrupted.stdout")
-    deleted_files = corrupt(test_output_dir, base_tar, corrupt_tar)
-    assert len(set(get_files_in_tar(test_output_dir, base_tar)) -
-               set(get_files_in_tar(test_output_dir, corrupt_tar))) > 0
-
-    # Reconstruct paths from the corrupted tar, assert it covers everything important
-    reconstructed_paths = set(get_rel_paths(test_output_dir, pg_bin, corrupt_tar))
-    paths_missed = deleted_files - reconstructed_paths
-    assert paths_missed.issubset({
-        "postgresql.auto.conf",
-        "pg_ident.conf",
-    })
-
-    # Recreate the correct tar by touching files, compare with original tar
-    touch_missing_rels(test_output_dir, corrupt_tar, output_tar, reconstructed_paths)
-    paths_missed = (set(get_files_in_tar(test_output_dir, base_tar)) -
-                    set(get_files_in_tar(test_output_dir, output_tar)))
-    assert paths_missed.issubset({
-        "postgresql.auto.conf",
-        "pg_ident.conf",
-    })
-
-# HACK this script relies on test fixtures, but you can run it with
-# poetry run pytest -k test_main_hack and pass inputs via envvars
-#
-# The script takes a base tar, infers what empty rel files might be missing
-# and creates a new base tar with those files included. It does not modify
-# the original file.
-def test_main_hack(test_output_dir, pg_bin, pytestconfig):
-    base_tar = os.environ['INPUT_BASE_TAR']
-    output_tar = os.environ['OUTPUT_BASE_TAR']
-
-    reconstructed_paths = set(get_rel_paths(test_output_dir, pg_bin, base_tar))
-    touch_missing_rels(test_output_dir, base_tar, output_tar, reconstructed_paths)
--- a/test_runner/batch_others/test_fullbackup.py
+++ b/test_runner/batch_others/test_fullbackup.py
@@ -1,10 +1,16 @@
+import subprocess
 from contextlib import closing

+import psycopg2.extras
+import pytest
 from fixtures.log_helper import log
 from fixtures.neon_fixtures import NeonEnvBuilder, PgBin, PortDistributor, VanillaPostgres
 from fixtures.neon_fixtures import pg_distrib_dir
 import os
-from fixtures.utils import subprocess_capture
+from fixtures.utils import mkdir_if_needed, subprocess_capture
+import shutil
+import getpass
+import pwd

 num_rows = 1000

@@ -40,20 +46,19 @@ def test_fullbackup(neon_env_builder: NeonEnvBuilder,
    psql_env = {'LD_LIBRARY_PATH': os.path.join(str(pg_distrib_dir), 'lib')}

    # Get and unpack fullbackup from pageserver
-    restored_dir_path = env.repo_dir / "restored_datadir"
+    restored_dir_path = os.path.join(env.repo_dir, "restored_datadir")
    os.mkdir(restored_dir_path, 0o750)
    query = f"fullbackup {env.initial_tenant.hex} {timeline} {lsn}"
    cmd = ["psql", "--no-psqlrc", env.pageserver.connstr(), "-c", query]
    result_basepath = pg_bin.run_capture(cmd, env=psql_env)
    tar_output_file = result_basepath + ".stdout"
-    subprocess_capture(str(env.repo_dir),
-                       ["tar", "-xf", tar_output_file, "-C", str(restored_dir_path)])
+    subprocess_capture(str(env.repo_dir), ["tar", "-xf", tar_output_file, "-C", restored_dir_path])

    # HACK
    # fullbackup returns neon specific pg_control and first WAL segment
    # use resetwal to overwrite it
    pg_resetwal_path = os.path.join(pg_bin.pg_bin_path, 'pg_resetwal')
-    cmd = [pg_resetwal_path, "-D", str(restored_dir_path)]
+    cmd = [pg_resetwal_path, "-D", restored_dir_path]
    pg_bin.run_capture(cmd, env=psql_env)

    # Restore from the backup and find the data we inserted
--- a/test_runner/batch_others/test_import.py
+++ b/test_runner/batch_others/test_import.py
@@ -191,8 +191,3 @@ def test_import_from_pageserver(test_output_dir, pg_bin, vanilla_pg, neon_env_bu
    # Check it's the same as the first fullbackup
    # TODO pageserver should be checking checksum
    assert os.path.getsize(tar_output_file) == os.path.getsize(new_tar_output_file)
-
-    # Check that gc works
-    psconn = env.pageserver.connect()
-    pscur = psconn.cursor()
-    pscur.execute(f"do_gc {tenant.hex} {timeline} 0")
--- a/test_runner/batch_others/test_remote_storage.py
+++ b/test_runner/batch_others/test_remote_storage.py
@@ -1,5 +1,5 @@
 # It's possible to run any regular test with the local fs remote storage via
-# env ZENITH_PAGESERVER_OVERRIDES="remote_storage={local_path='/tmp/neon_zzz/'}" poetry ......
+# env ZENITH_PAGESERVER_OVERRIDES="remote_storage={local_path='/tmp/zenith_zzz/'}" poetry ......

 import shutil, os
 from contextlib import closing
--- a/test_runner/batch_others/test_tenant_relocation.py
+++ b/test_runner/batch_others/test_tenant_relocation.py
@@ -11,7 +11,7 @@ import signal
 import pytest

 from fixtures.neon_fixtures import PgProtocol, PortDistributor, Postgres, NeonEnvBuilder, Etcd, NeonPageserverHttpClient, assert_local, wait_until, wait_for_last_record_lsn, wait_for_upload, neon_binpath, pg_distrib_dir
-from fixtures.utils import lsn_from_hex, subprocess_capture
+from fixtures.utils import lsn_from_hex


 def assert_abs_margin_ratio(a: float, b: float, margin_ratio: float):
@@ -101,6 +101,10 @@ def load(pg: Postgres, stop_event: threading.Event, load_ok_event: threading.Eve
    log.info('load thread stopped')


+@pytest.mark.skip(
+    reason=
+    "needs to replace callmemaybe call with better idea how to migrate timelines between pageservers"
+)
@pytest.mark.parametrize('with_load', ['with_load', 'without_load'])
 def test_tenant_relocation(neon_env_builder: NeonEnvBuilder,
                           port_distributor: PortDistributor,
@@ -184,38 +188,30 @@ def test_tenant_relocation(neon_env_builder: NeonEnvBuilder,
                               new_pageserver_http_port,
                               neon_env_builder.broker):

-        # Migrate either by attacking from s3 or import/export basebackup
-        relocation_method = "import"
-        if relocation_method == "import":
-            scripts_dir = "/home/bojan/src/neondatabase/neon/scripts/"
-            cmd = [
-                "python",
-                os.path.join(scripts_dir, "export_import_betwen_pageservers.py"),
-                "--tenant-id", tenant.hex,
-                "--from-host", "localhost",
-                "--from-http-port", str(pageserver_http.port),
-                "--from-pg-port", str(env.pageserver.service_port.pg),
-                "--to-host", "localhost",
-                "--to-http-port", str(new_pageserver_http_port),
-                "--to-pg-port", str(new_pageserver_pg_port),
-                "--psql-path", os.path.join(pg_distrib_dir, "bin", "psql"),
-            ]
-            subprocess_capture(env.repo_dir, cmd, check=True)
-        elif relocation_method == "attach":
-            # call to attach timeline to new pageserver
-            new_pageserver_http.timeline_attach(tenant, timeline)
+        # call to attach timeline to new pageserver
+        new_pageserver_http.timeline_attach(tenant, timeline)
+        # new pageserver should be in sync (modulo wal tail or vacuum activity) with the old one because there was no new writes since checkpoint
+        new_timeline_detail = wait_until(
+            number_of_iterations=5,
+            interval=1,
+            func=lambda: assert_local(new_pageserver_http, tenant, timeline))

-            # new pageserver should be in sync (modulo wal tail or vacuum activity) with the old one because there was no new writes since checkpoint
-            new_timeline_detail = wait_until(
-                number_of_iterations=5,
-                interval=1,
-                func=lambda: assert_local(new_pageserver_http, tenant, timeline))
+        # when load is active these checks can break because lsns are not static
+        # so lets check with some margin
+        assert_abs_margin_ratio(lsn_from_hex(new_timeline_detail['local']['disk_consistent_lsn']),
+                                lsn_from_hex(timeline_detail['local']['disk_consistent_lsn']),
+                                0.03)

-            # when load is active these checks can break because lsns are not static
-            # so lets check with some margin
-            assert_abs_margin_ratio(lsn_from_hex(new_timeline_detail['local']['disk_consistent_lsn']),
-                                    lsn_from_hex(timeline_detail['local']['disk_consistent_lsn']),
-                                    0.03)
+        # callmemaybe to start replication from safekeeper to the new pageserver
+        # when there is no load there is a clean checkpoint and no wal delta
+        # needs to be streamed to the new pageserver
+        # TODO (rodionov) use attach to start replication
+        with pg_cur(PgProtocol(host='localhost', port=new_pageserver_pg_port)) as cur:
+            # "callmemaybe {} {} host={} port={} options='-c ztimelineid={} ztenantid={}'"
+            safekeeper_connstring = f"host=localhost port={env.safekeepers[0].port.pg} options='-c ztimelineid={timeline} ztenantid={tenant} pageserver_connstr=postgresql://no_user:@localhost:{new_pageserver_pg_port}'"
+            cur.execute("callmemaybe {} {} {}".format(tenant.hex,
+                                                      timeline.hex,
+                                                      safekeeper_connstring))

        tenant_pg.stop()

--- a/test_runner/batch_others/test_tenant_tasks.py
+++ b/test_runner/batch_others/test_tenant_tasks.py
@@ -1,70 +0,0 @@
-from fixtures.neon_fixtures import NeonEnvBuilder, wait_until
-from uuid import UUID
-import time
-
-
-def get_only_element(l):
-    assert len(l) == 1
-    return l[0]
-
-
-# Test that gc and compaction tenant tasks start and stop correctly
-def test_tenant_tasks(neon_env_builder: NeonEnvBuilder):
-    # The gc and compaction loops don't bother to watch for tenant state
-    # changes while sleeping, so we use small periods to make this test
-    # run faster. With default settings we'd have to wait longer for tasks
-    # to notice state changes and shut down.
-    # TODO fix this behavior in the pageserver
-    tenant_config = "{gc_period = '1 s', compaction_period = '1 s'}"
-    neon_env_builder.pageserver_config_override = f"tenant_config={tenant_config}"
-    name = "test_tenant_tasks"
-    env = neon_env_builder.init_start()
-    client = env.pageserver.http_client()
-
-    def get_state(tenant):
-        all_states = client.tenant_list()
-        matching = [t for t in all_states if t["id"] == tenant.hex]
-        return get_only_element(matching)["state"]
-
-    def get_metric_value(name):
-        metrics = client.get_metrics()
-        relevant = [line for line in metrics.splitlines() if line.startswith(name)]
-        if len(relevant) == 0:
-            return 0
-        line = get_only_element(relevant)
-        value = line.lstrip(name).strip()
-        return int(value)
-
-    def detach_all_timelines(tenant):
-        timelines = [UUID(t["timeline_id"]) for t in client.timeline_list(tenant)]
-        for t in timelines:
-            client.timeline_detach(tenant, t)
-
-    def assert_idle(tenant):
-        assert get_state(tenant) == "Idle"
-
-    # Create tenant, start compute
-    tenant, _ = env.neon_cli.create_tenant()
-    timeline = env.neon_cli.create_timeline(name, tenant_id=tenant)
-    pg = env.postgres.create_start(name, tenant_id=tenant)
-    assert (get_state(tenant) == "Active")
-
-    # Stop compute
-    pg.stop()
-
-    # Detach all tenants and wait for them to go idle
-    # TODO they should be already idle since there are no active computes
-    for tenant_info in client.tenant_list():
-        tenant_id = UUID(tenant_info["id"])
-        detach_all_timelines(tenant_id)
-        wait_until(10, 0.2, lambda: assert_idle(tenant_id))
-
-    # Assert that all tasks finish quickly after tenants go idle
-    def assert_tasks_finish():
-        tasks_started = get_metric_value('pageserver_tenant_task_events{event="start"}')
-        tasks_ended = get_metric_value('pageserver_tenant_task_events{event="stop"}')
-        tasks_panicked = get_metric_value('pageserver_tenant_task_events{event="panic"}')
-        assert tasks_started == tasks_ended
-        assert tasks_panicked == 0
-
-    wait_until(10, 0.2, assert_tasks_finish)
--- a/test_runner/batch_others/test_wal_acceptor.py
+++ b/test_runner/batch_others/test_wal_acceptor.py
@@ -1,4 +1,3 @@
-import pathlib
 import pytest
 import random
 import time
@@ -15,7 +14,7 @@ from dataclasses import dataclass, field
 from multiprocessing import Process, Value
 from pathlib import Path
 from fixtures.neon_fixtures import PgBin, Etcd, Postgres, RemoteStorageUsers, Safekeeper, NeonEnv, NeonEnvBuilder, PortDistributor, SafekeeperPort, neon_binpath, PgProtocol
-from fixtures.utils import get_dir_size, lsn_to_hex, lsn_from_hex
+from fixtures.utils import get_dir_size, lsn_to_hex, mkdir_if_needed, lsn_from_hex
 from fixtures.log_helper import log
 from typing import List, Optional, Any
 from uuid import uuid4
@@ -646,7 +645,7 @@ class ProposerPostgres(PgProtocol):
    def create_dir_config(self, safekeepers: str):
        """ Create dir and config for running --sync-safekeepers """

-        pathlib.Path(self.pg_data_dir_path()).mkdir(exist_ok=True)
+        mkdir_if_needed(self.pg_data_dir_path())
        with open(self.config_file_path(), "w") as f:
            cfg = [
                "synchronous_standby_names = 'walproposer'\n",
@@ -829,7 +828,7 @@ class SafekeeperEnv:

        self.timeline_id = uuid.uuid4()
        self.tenant_id = uuid.uuid4()
-        self.repo_dir.mkdir(exist_ok=True)
+        mkdir_if_needed(str(self.repo_dir))

        # Create config and a Safekeeper object for each safekeeper
        self.safekeepers = []
@@ -848,8 +847,8 @@ class SafekeeperEnv:
            http=self.port_distributor.get_port(),
        )

-        safekeeper_dir = self.repo_dir / f"sk{i}"
-        safekeeper_dir.mkdir(exist_ok=True)
+        safekeeper_dir = os.path.join(self.repo_dir, f"sk{i}")
+        mkdir_if_needed(safekeeper_dir)

        args = [
            self.bin_safekeeper,
@@ -858,7 +857,7 @@ class SafekeeperEnv:
            "--listen-http",
            f"127.0.0.1:{port.http}",
            "-D",
-            str(safekeeper_dir),
+            safekeeper_dir,
            "--id",
            str(i),
            "--broker-endpoints",
--- a/test_runner/batch_others/test_wal_restore.py
+++ b/test_runner/batch_others/test_wal_restore.py
@@ -1,17 +1,19 @@
 import os
-from pathlib import Path
+import subprocess

 from fixtures.neon_fixtures import (NeonEnvBuilder,
                                    VanillaPostgres,
                                    PortDistributor,
                                    PgBin,
                                    base_dir,
+                                    vanilla_pg,
                                    pg_distrib_dir)
+from fixtures.log_helper import log


 def test_wal_restore(neon_env_builder: NeonEnvBuilder,
                     pg_bin: PgBin,
-                     test_output_dir: Path,
+                     test_output_dir,
                     port_distributor: PortDistributor):
    env = neon_env_builder.init_start()
    env.neon_cli.create_branch("test_wal_restore")
@@ -20,13 +22,13 @@ def test_wal_restore(neon_env_builder: NeonEnvBuilder,
    tenant_id = pg.safe_psql("show neon.tenant_id")[0][0]
    env.neon_cli.pageserver_stop()
    port = port_distributor.get_port()
-    data_dir = test_output_dir / 'pgsql.restored'
+    data_dir = os.path.join(test_output_dir, 'pgsql.restored')
    with VanillaPostgres(data_dir, PgBin(test_output_dir), port) as restored:
        pg_bin.run_capture([
            os.path.join(base_dir, 'libs/utils/scripts/restore_from_wal.sh'),
            os.path.join(pg_distrib_dir, 'bin'),
-            str(test_output_dir / 'repo' / 'safekeepers' / 'sk1' / str(tenant_id) / '*'),
-            str(data_dir),
+            os.path.join(test_output_dir, 'repo/safekeepers/sk1/{}/*'.format(tenant_id)),
+            data_dir,
            str(port)
        ])
        restored.start()
--- a/test_runner/batch_pg_regress/test_isolation.py
+++ b/test_runner/batch_pg_regress/test_isolation.py
@@ -1,13 +1,13 @@
 import os
-from pathlib import Path
 import pytest
+from fixtures.utils import mkdir_if_needed
 from fixtures.neon_fixtures import NeonEnv, base_dir, pg_distrib_dir


 # The isolation tests run for a long time, especially in debug mode,
 # so use a larger-than-default timeout.
@pytest.mark.timeout(1800)
-def test_isolation(neon_simple_env: NeonEnv, test_output_dir: Path, pg_bin, capsys):
+def test_isolation(neon_simple_env: NeonEnv, test_output_dir, pg_bin, capsys):
    env = neon_simple_env

    env.neon_cli.create_branch("test_isolation", "empty")
@@ -17,8 +17,9 @@ def test_isolation(neon_simple_env: NeonEnv, test_output_dir: Path, pg_bin, caps
    pg.safe_psql('CREATE DATABASE isolation_regression')

    # Create some local directories for pg_isolation_regress to run in.
-    runpath = test_output_dir / 'regress'
-    (runpath / 'testtablespace').mkdir(parents=True)
+    runpath = os.path.join(test_output_dir, 'regress')
+    mkdir_if_needed(runpath)
+    mkdir_if_needed(os.path.join(runpath, 'testtablespace'))

    # Compute all the file locations that pg_isolation_regress will need.
    build_path = os.path.join(pg_distrib_dir, 'build/src/test/isolation')
--- a/test_runner/batch_pg_regress/test_neon_regress.py
+++ b/test_runner/batch_pg_regress/test_neon_regress.py
@@ -1,6 +1,6 @@
 import os
-from pathlib import Path

+from fixtures.utils import mkdir_if_needed
 from fixtures.neon_fixtures import (NeonEnv,
                                    check_restored_datadir_content,
                                    base_dir,
@@ -8,7 +8,7 @@ from fixtures.neon_fixtures import (NeonEnv,
 from fixtures.log_helper import log


-def test_neon_regress(neon_simple_env: NeonEnv, test_output_dir: Path, pg_bin, capsys):
+def test_neon_regress(neon_simple_env: NeonEnv, test_output_dir, pg_bin, capsys):
    env = neon_simple_env

    env.neon_cli.create_branch("test_neon_regress", "empty")
@@ -17,8 +17,9 @@ def test_neon_regress(neon_simple_env: NeonEnv, test_output_dir: Path, pg_bin, c
    pg.safe_psql('CREATE DATABASE regression')

    # Create some local directories for pg_regress to run in.
-    runpath = test_output_dir / 'regress'
-    (runpath / 'testtablespace').mkdir(parents=True)
+    runpath = os.path.join(test_output_dir, 'regress')
+    mkdir_if_needed(runpath)
+    mkdir_if_needed(os.path.join(runpath, 'testtablespace'))

    # Compute all the file locations that pg_regress will need.
    # This test runs neon specific tests
--- a/test_runner/batch_pg_regress/test_pg_regress.py
+++ b/test_runner/batch_pg_regress/test_pg_regress.py
@@ -1,13 +1,13 @@
 import os
-import pathlib
 import pytest
+from fixtures.utils import mkdir_if_needed
 from fixtures.neon_fixtures import NeonEnv, check_restored_datadir_content, base_dir, pg_distrib_dir


 # The pg_regress tests run for a long time, especially in debug mode,
 # so use a larger-than-default timeout.
@pytest.mark.timeout(1800)
-def test_pg_regress(neon_simple_env: NeonEnv, test_output_dir: pathlib.Path, pg_bin, capsys):
+def test_pg_regress(neon_simple_env: NeonEnv, test_output_dir: str, pg_bin, capsys):
    env = neon_simple_env

    env.neon_cli.create_branch("test_pg_regress", "empty")
@@ -16,8 +16,9 @@ def test_pg_regress(neon_simple_env: NeonEnv, test_output_dir: pathlib.Path, pg_
    pg.safe_psql('CREATE DATABASE regression')

    # Create some local directories for pg_regress to run in.
-    runpath = test_output_dir / 'regress'
-    (runpath / 'testtablespace').mkdir(parents=True)
+    runpath = os.path.join(test_output_dir, 'regress')
+    mkdir_if_needed(runpath)
+    mkdir_if_needed(os.path.join(runpath, 'testtablespace'))

    # Compute all the file locations that pg_regress will need.
    build_path = os.path.join(pg_distrib_dir, 'build/src/test/regress')
@@ -50,7 +51,7 @@ def test_pg_regress(neon_simple_env: NeonEnv, test_output_dir: pathlib.Path, pg_

        # checkpoint one more time to ensure that the lsn we get is the latest one
        pg.safe_psql('CHECKPOINT')
-        pg.safe_psql('select pg_current_wal_insert_lsn()')[0][0]
+        lsn = pg.safe_psql('select pg_current_wal_insert_lsn()')[0][0]

        # Check that we restore the content of the datadir correctly
        check_restored_datadir_content(test_output_dir, env, pg)
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -35,7 +35,12 @@ from typing_extensions import Literal
 import requests
 import backoff  # type: ignore

-from .utils import (etcd_path, get_self_dir, subprocess_capture, lsn_from_hex, lsn_to_hex)
+from .utils import (etcd_path,
+                    get_self_dir,
+                    mkdir_if_needed,
+                    subprocess_capture,
+                    lsn_from_hex,
+                    lsn_to_hex)
 from fixtures.log_helper import log
 """
 This file contains pytest fixtures. A fixture is a test resource that can be
@@ -45,7 +50,7 @@ A fixture is created with the decorator @pytest.fixture decorator.
 See docs: https://docs.pytest.org/en/6.2.x/fixture.html

 There are several environment variables that can control the running of tests:
-NEON_BIN, POSTGRES_DISTRIB_DIR, etc. See README.md for more information.
+ZENITH_BIN, POSTGRES_DISTRIB_DIR, etc. See README.md for more information.

 There's no need to import this file to use it. It should be declared as a plugin
 inside conftest.py, and that makes it available to all tests.
@@ -122,7 +127,7 @@ def pytest_configure(config):
        top_output_dir = env_test_output
    else:
        top_output_dir = os.path.join(base_dir, DEFAULT_OUTPUT_DIR)
-    pathlib.Path(top_output_dir).mkdir(exist_ok=True)
+    mkdir_if_needed(top_output_dir)

    # Find the postgres installation.
    global pg_distrib_dir
@@ -146,7 +151,7 @@ def pytest_configure(config):
        return
    # Find the neon binaries.
    global neon_binpath
-    env_neon_bin = os.environ.get('NEON_BIN')
+    env_neon_bin = os.environ.get('ZENITH_BIN')
    if env_neon_bin:
        neon_binpath = env_neon_bin
    else:
@@ -1311,7 +1316,7 @@ def append_pageserver_param_overrides(

 class PgBin:
    """ A helper class for executing postgres binaries """
-    def __init__(self, log_dir: Path):
+    def __init__(self, log_dir: str):
        self.log_dir = log_dir
        self.pg_bin_path = os.path.join(str(pg_distrib_dir), 'bin')
        self.env = os.environ.copy()
@@ -1362,27 +1367,22 @@ class PgBin:
        self._fixpath(command)
        log.info('Running command "{}"'.format(' '.join(command)))
        env = self._build_env(env)
-        return subprocess_capture(str(self.log_dir),
-                                  command,
-                                  env=env,
-                                  cwd=cwd,
-                                  check=True,
-                                  **kwargs)
+        return subprocess_capture(self.log_dir, command, env=env, cwd=cwd, check=True, **kwargs)


@pytest.fixture(scope='function')
-def pg_bin(test_output_dir: Path) -> PgBin:
+def pg_bin(test_output_dir: str) -> PgBin:
    return PgBin(test_output_dir)


 class VanillaPostgres(PgProtocol):
-    def __init__(self, pgdatadir: Path, pg_bin: PgBin, port: int, init=True):
+    def __init__(self, pgdatadir: str, pg_bin: PgBin, port: int, init=True):
        super().__init__(host='localhost', port=port, dbname='postgres')
        self.pgdatadir = pgdatadir
        self.pg_bin = pg_bin
        self.running = False
        if init:
-            self.pg_bin.run_capture(['initdb', '-D', str(pgdatadir)])
+            self.pg_bin.run_capture(['initdb', '-D', pgdatadir])
        self.configure([f"port = {port}\n"])

    def configure(self, options: List[str]):
@@ -1398,13 +1398,12 @@ class VanillaPostgres(PgProtocol):
        if log_path is None:
            log_path = os.path.join(self.pgdatadir, "pg.log")

-        self.pg_bin.run_capture(
-            ['pg_ctl', '-w', '-D', str(self.pgdatadir), '-l', log_path, 'start'])
+        self.pg_bin.run_capture(['pg_ctl', '-w', '-D', self.pgdatadir, '-l', log_path, 'start'])

    def stop(self):
        assert self.running
        self.running = False
-        self.pg_bin.run_capture(['pg_ctl', '-w', '-D', str(self.pgdatadir), 'stop'])
+        self.pg_bin.run_capture(['pg_ctl', '-w', '-D', self.pgdatadir, 'stop'])

    def get_subdir_size(self, subdir) -> int:
        """Return size of pgdatadir subdirectory in bytes."""
@@ -1419,9 +1418,9 @@ class VanillaPostgres(PgProtocol):


@pytest.fixture(scope='function')
-def vanilla_pg(test_output_dir: Path,
+def vanilla_pg(test_output_dir: str,
               port_distributor: PortDistributor) -> Iterator[VanillaPostgres]:
-    pgdatadir = test_output_dir / "pgdata-vanilla"
+    pgdatadir = os.path.join(test_output_dir, "pgdata-vanilla")
    pg_bin = PgBin(test_output_dir)
    port = port_distributor.get_port()
    with VanillaPostgres(pgdatadir, pg_bin, port) as vanilla_pg:
@@ -1458,7 +1457,7 @@ class RemotePostgres(PgProtocol):


@pytest.fixture(scope='function')
-def remote_pg(test_output_dir: Path) -> Iterator[RemotePostgres]:
+def remote_pg(test_output_dir: str) -> Iterator[RemotePostgres]:
    pg_bin = PgBin(test_output_dir)

    connstr = os.getenv("BENCHMARK_CONNSTR")
@@ -1925,12 +1924,9 @@ class Etcd:
    datadir: str
    port: int
    peer_port: int
-    binary_path: Path = field(init=False)
+    binary_path: Path = etcd_path()
    handle: Optional[subprocess.Popen[Any]] = None  # handle of running daemon

-    def __post_init__(self):
-        self.binary_path = etcd_path()
-
    def client_url(self):
        return f'http://127.0.0.1:{self.port}'

@@ -1984,13 +1980,11 @@ class Etcd:
            self.handle.wait()


-def get_test_output_dir(request: Any) -> pathlib.Path:
+def get_test_output_dir(request: Any) -> str:
    """ Compute the working directory for an individual test. """
    test_name = request.node.name
-    test_dir = pathlib.Path(top_output_dir) / test_name.replace("/", "-")
+    test_dir = os.path.join(str(top_output_dir), test_name)
    log.info(f'get_test_output_dir is {test_dir}')
-    # make mypy happy
-    assert isinstance(test_dir, pathlib.Path)
    return test_dir


@@ -2004,14 +1998,14 @@ def get_test_output_dir(request: Any) -> pathlib.Path:
 # this fixture ensures that the directory exists.  That works because
 # 'autouse' fixtures are run before other fixtures.
@pytest.fixture(scope='function', autouse=True)
-def test_output_dir(request: Any) -> pathlib.Path:
+def test_output_dir(request: Any) -> str:
    """ Create the working directory for an individual test. """

    # one directory per test
    test_dir = get_test_output_dir(request)
    log.info(f'test_output_dir is {test_dir}')
    shutil.rmtree(test_dir, ignore_errors=True)
-    test_dir.mkdir()
+    mkdir_if_needed(test_dir)
    return test_dir


@@ -2057,7 +2051,7 @@ def should_skip_file(filename: str) -> bool:
 #
 # Test helpers
 #
-def list_files_to_compare(pgdata_dir: pathlib.Path):
+def list_files_to_compare(pgdata_dir: str):
    pgdata_files = []
    for root, _file, filenames in os.walk(pgdata_dir):
        for filename in filenames:
@@ -2074,7 +2068,7 @@ def list_files_to_compare(pgdata_dir: pathlib.Path):


 # pg is the existing and running compute node, that we want to compare with a basebackup
-def check_restored_datadir_content(test_output_dir: Path, env: NeonEnv, pg: Postgres):
+def check_restored_datadir_content(test_output_dir: str, env: NeonEnv, pg: Postgres):

    # Get the timeline ID. We need it for the 'basebackup' command
    with closing(pg.connect()) as conn:
@@ -2086,8 +2080,8 @@ def check_restored_datadir_content(test_output_dir: Path, env: NeonEnv, pg: Post
    pg.stop()

    # Take a basebackup from pageserver
-    restored_dir_path = env.repo_dir / f"{pg.node_name}_restored_datadir"
-    restored_dir_path.mkdir(exist_ok=True)
+    restored_dir_path = os.path.join(env.repo_dir, f"{pg.node_name}_restored_datadir")
+    mkdir_if_needed(restored_dir_path)

    pg_bin = PgBin(test_output_dir)
    psql_path = os.path.join(pg_bin.pg_bin_path, 'psql')
@@ -2114,7 +2108,7 @@ def check_restored_datadir_content(test_output_dir: Path, env: NeonEnv, pg: Post

    # list files we're going to compare
    assert pg.pgdata_dir
-    pgdata_files = list_files_to_compare(pathlib.Path(pg.pgdata_dir))
+    pgdata_files = list_files_to_compare(pg.pgdata_dir)
    restored_files = list_files_to_compare(restored_dir_path)

    # check that file sets are equal
@@ -2146,7 +2140,7 @@ def check_restored_datadir_content(test_output_dir: Path, env: NeonEnv, pg: Post
    assert (mismatch, error) == ([], [])


-def wait_until(number_of_iterations: int, interval: float, func):
+def wait_until(number_of_iterations: int, interval: int, func):
    """
    Wait until 'func' returns successfully, without exception. Returns the last return value
    from the the function.
--- a/test_runner/fixtures/utils.py
+++ b/test_runner/fixtures/utils.py
@@ -12,6 +12,18 @@ def get_self_dir() -> str:
    return os.path.dirname(os.path.abspath(__file__))


+def mkdir_if_needed(path: str) -> None:
+    """ Create a directory if it doesn't already exist
+
+    Note this won't try to create intermediate directories.
+    """
+    try:
+        os.mkdir(path)
+    except FileExistsError:
+        pass
+    assert os.path.isdir(path)
+
+
 def subprocess_capture(capture_dir: str, cmd: List[str], **kwargs: Any) -> str:
    """ Run a process and capture its output

--- a/test_runner/performance/test_wal_backpressure.py
+++ b/test_runner/performance/test_wal_backpressure.py
@@ -80,7 +80,6 @@ def start_heavy_write_workload(env: PgCompare, n_tables: int, scale: int, num_it
            thread.join()


-@pytest.mark.timeout(1000)
@pytest.mark.parametrize("n_tables", [5])
@pytest.mark.parametrize("scale", get_scales_matrix(5))
@pytest.mark.parametrize("num_iters", [10])
@@ -122,7 +121,6 @@ def start_pgbench_simple_update_workload(env: PgCompare, duration: int):
        env.flush()


-@pytest.mark.timeout(1000)
@pytest.mark.parametrize("scale", get_scales_matrix(100))
@pytest.mark.parametrize("duration", get_durations_matrix())
 def test_pgbench_simple_update_workload(pg_compare: PgCompare, scale: int, duration: int):
@@ -160,7 +158,6 @@ def start_pgbench_intensive_initialization(env: PgCompare, scale: int):
        ])


-@pytest.mark.timeout(1000)
@pytest.mark.parametrize("scale", get_scales_matrix(1000))
 def test_pgbench_intensive_init_workload(pg_compare: PgCompare, scale: int):
    env = pg_compare
--- a/test_runner/pg_clients/csharp/npgsql/.dockerignore
+++ b/test_runner/pg_clients/csharp/npgsql/.dockerignore
@@ -1,2 +0,0 @@
-bin/
-obj/
--- a/test_runner/pg_clients/csharp/npgsql/.gitignore
+++ b/test_runner/pg_clients/csharp/npgsql/.gitignore
@@ -1,2 +0,0 @@
-bin/
-obj/
--- a/test_runner/pg_clients/csharp/npgsql/Dockerfile
+++ b/test_runner/pg_clients/csharp/npgsql/Dockerfile
@@ -1,14 +0,0 @@
-FROM mcr.microsoft.com/dotnet/sdk:6.0 AS build
-WORKDIR /source
-
-COPY *.csproj .
-RUN dotnet restore
-
-COPY . .
-RUN dotnet publish -c release -o /app --no-restore
-
-FROM mcr.microsoft.com/dotnet/runtime:6.0
-WORKDIR /app
-COPY --from=build /app .
-
-ENTRYPOINT ["dotnet", "csharp-npgsql.dll"]
--- a/test_runner/pg_clients/csharp/npgsql/Program.cs
+++ b/test_runner/pg_clients/csharp/npgsql/Program.cs
@@ -1,19 +0,0 @@
-using Npgsql;
-
-var host = Environment.GetEnvironmentVariable("NEON_HOST");
-var database = Environment.GetEnvironmentVariable("NEON_DATABASE");
-var user = Environment.GetEnvironmentVariable("NEON_USER");
-var password = Environment.GetEnvironmentVariable("NEON_PASSWORD");
-
-var connString = $"Host={host};Username={user};Password={password};Database={database}";
-
-await using var conn = new NpgsqlConnection(connString);
-await conn.OpenAsync();
-
-await using (var cmd = new NpgsqlCommand("SELECT 1", conn))
-await using (var reader = await cmd.ExecuteReaderAsync())
-{
-    while (await reader.ReadAsync())
-        Console.WriteLine(reader.GetInt32(0));
-}
-await conn.CloseAsync();
--- a/test_runner/pg_clients/csharp/npgsql/csharp-npgsql.csproj
+++ b/test_runner/pg_clients/csharp/npgsql/csharp-npgsql.csproj
@@ -1,14 +0,0 @@
-<Project Sdk="Microsoft.NET.Sdk">
-
-  <PropertyGroup>
-    <OutputType>Exe</OutputType>
-    <TargetFramework>net6.0</TargetFramework>
-    <ImplicitUsings>enable</ImplicitUsings>
-    <Nullable>enable</Nullable>
-  </PropertyGroup>
-
-  <ItemGroup>
-    <PackageReference Include="Npgsql" Version="6.0.5" />
-  </ItemGroup>
-
-</Project>
--- a/test_runner/pg_clients/java/jdbc/.gitignore
+++ b/test_runner/pg_clients/java/jdbc/.gitignore
@@ -1 +0,0 @@
-
--- a/test_runner/pg_clients/java/jdbc/Dockerfile
+++ b/test_runner/pg_clients/java/jdbc/Dockerfile
@@ -1,10 +0,0 @@
-FROM openjdk:17
-WORKDIR /source
-
-COPY . .
-
-WORKDIR /app
-RUN curl --output postgresql.jar https://jdbc.postgresql.org/download/postgresql-42.4.0.jar && \
-    javac -d /app /source/Example.java
-
-CMD ["java", "-cp", "/app/postgresql.jar:.", "Example"]
--- a/test_runner/pg_clients/java/jdbc/Example.java
+++ b/test_runner/pg_clients/java/jdbc/Example.java
@@ -1,31 +0,0 @@
-import java.sql.Connection;
-import java.sql.DriverManager;
-import java.sql.ResultSet;
-import java.sql.Statement;
-import java.util.Properties;
-
-public class Example
-{
-    public static void main( String[] args ) throws Exception
-    {
-        String host = System.getenv("NEON_HOST");
-        String database = System.getenv("NEON_DATABASE");
-        String user = System.getenv("NEON_USER");
-        String password = System.getenv("NEON_PASSWORD");
-
-        String url = "jdbc:postgresql://%s/%s".formatted(host, database);
-        Properties props = new Properties();
-        props.setProperty("user", user);
-        props.setProperty("password", password);
-
-        Connection conn = DriverManager.getConnection(url, props);
-        Statement st = conn.createStatement();
-        ResultSet rs = st.executeQuery("SELECT 1");
-        while (rs.next())
-        {
-            System.out.println(rs.getString(1));
-        }
-        rs.close();
-        st.close();
-    }
-}
--- a/test_runner/pg_clients/python/asyncpg/Dockerfile
+++ b/test_runner/pg_clients/python/asyncpg/Dockerfile
@@ -1,8 +0,0 @@
-FROM python:3.10
-WORKDIR /source
-
-COPY . .
-
-RUN python3 -m pip install --no-cache-dir -r requirements.txt
-
-CMD ["python3", "asyncpg_example.py"]
--- a/test_runner/pg_clients/python/asyncpg/asyncpg_example.py
+++ b/test_runner/pg_clients/python/asyncpg/asyncpg_example.py
@@ -1,30 +0,0 @@
-#! /usr/bin/env python3
-
-import asyncio
-import os
-
-import asyncpg
-
-
-async def run(**kwargs) -> asyncpg.Record:
-    conn = await asyncpg.connect(
-        **kwargs,
-        statement_cache_size=0,  # Prepared statements doesn't work pgbouncer
-    )
-    rv = await conn.fetchrow("SELECT 1")
-    await conn.close()
-
-    return rv
-
-
-if __name__ == "__main__":
-    kwargs = {
-        k.lstrip("NEON_").lower(): v
-        for k in ("NEON_HOST", "NEON_DATABASE", "NEON_USER", "NEON_PASSWORD")
-        if (v := os.environ.get(k, None)) is not None
-    }
-
-    loop = asyncio.new_event_loop()
-    row = loop.run_until_complete(run(**kwargs))
-
-    print(row[0])
--- a/test_runner/pg_clients/python/asyncpg/requirements.txt
+++ b/test_runner/pg_clients/python/asyncpg/requirements.txt
@@ -1 +0,0 @@
-asyncpg==0.25.0
--- a/test_runner/pg_clients/python/pg8000/Dockerfile
+++ b/test_runner/pg_clients/python/pg8000/Dockerfile
@@ -1,8 +0,0 @@
-FROM python:3.10
-WORKDIR /source
-
-COPY . .
-
-RUN python3 -m pip install --no-cache-dir -r requirements.txt
-
-CMD ["python3", "pg8000_example.py"]
--- a/test_runner/pg_clients/python/pg8000/README.md
+++ b/test_runner/pg_clients/python/pg8000/README.md
--- a/test_runner/pg_clients/python/pg8000/pg8000_example.py
+++ b/test_runner/pg_clients/python/pg8000/pg8000_example.py
@@ -1,23 +0,0 @@
-#! /usr/bin/env python3
-
-import os
-import ssl
-
-import pg8000.dbapi
-
-if __name__ == "__main__":
-    kwargs = {
-        k.lstrip("NEON_").lower(): v
-        for k in ("NEON_HOST", "NEON_DATABASE", "NEON_USER", "NEON_PASSWORD")
-        if (v := os.environ.get(k, None)) is not None
-    }
-    conn = pg8000.dbapi.connect(
-        **kwargs,
-        ssl_context=True,
-    )
-
-    cursor = conn.cursor()
-    cursor.execute("SELECT 1")
-    row = cursor.fetchone()
-    print(row[0])
-    conn.close()
--- a/test_runner/pg_clients/python/pg8000/requirements.txt
+++ b/test_runner/pg_clients/python/pg8000/requirements.txt
@@ -1 +0,0 @@
-pg8000==1.29.1
--- a/test_runner/pg_clients/swift/PostgresClientKitExample/.dockerignore
+++ b/test_runner/pg_clients/swift/PostgresClientKitExample/.dockerignore
@@ -1 +0,0 @@
-.build/
--- a/test_runner/pg_clients/swift/PostgresClientKitExample/.gitignore
+++ b/test_runner/pg_clients/swift/PostgresClientKitExample/.gitignore
@@ -1 +0,0 @@
-.build/
--- a/test_runner/pg_clients/swift/PostgresClientKitExample/Dockerfile
+++ b/test_runner/pg_clients/swift/PostgresClientKitExample/Dockerfile
@@ -1,11 +0,0 @@
-FROM swift:5.6 AS build
-RUN apt-get -q update && apt-get -q install -y libssl-dev
-WORKDIR /source
-
-COPY . .
-RUN swift build --configuration release
-
-FROM swift:5.6
-WORKDIR /app
-COPY --from=build /source/.build/release/release .
-CMD ["/app/PostgresClientKitExample"]
--- a/test_runner/pg_clients/swift/PostgresClientKitExample/Package.resolved
+++ b/test_runner/pg_clients/swift/PostgresClientKitExample/Package.resolved
@@ -1,41 +0,0 @@
-{
-  "pins" : [
-    {
-      "identity" : "bluesocket",
-      "kind" : "remoteSourceControl",
-      "location" : "https://github.com/IBM-Swift/BlueSocket.git",
-      "state" : {
-        "revision" : "dd924c3bc2c1c144c42b8dda3896f1a03115ded4",
-        "version" : "2.0.2"
-      }
-    },
-    {
-      "identity" : "bluesslservice",
-      "kind" : "remoteSourceControl",
-      "location" : "https://github.com/IBM-Swift/BlueSSLService",
-      "state" : {
-        "revision" : "c249988fb748749739144e7f554710552acdc0bd",
-        "version" : "2.0.1"
-      }
-    },
-    {
-      "identity" : "postgresclientkit",
-      "kind" : "remoteSourceControl",
-      "location" : "https://github.com/codewinsdotcom/PostgresClientKit.git",
-      "state" : {
-        "branch" : "v1.4.3",
-        "revision" : "beafedaea6dc9f04712e9a8547b77f47c406a47e"
-      }
-    },
-    {
-      "identity" : "swift-argument-parser",
-      "kind" : "remoteSourceControl",
-      "location" : "https://github.com/apple/swift-argument-parser",
-      "state" : {
-        "revision" : "6b2aa2748a7881eebb9f84fb10c01293e15b52ca",
-        "version" : "0.5.0"
-      }
-    }
-  ],
-  "version" : 2
-}
--- a/test_runner/pg_clients/swift/PostgresClientKitExample/Package.swift
+++ b/test_runner/pg_clients/swift/PostgresClientKitExample/Package.swift
@@ -1,17 +0,0 @@
-// swift-tools-version:5.6
-import PackageDescription
-
-let package = Package(
-    name: "PostgresClientKitExample",
-    dependencies: [
-        .package(
-            url: "https://github.com/codewinsdotcom/PostgresClientKit.git",
-            revision: "v1.4.3"
-        )
-    ],
-    targets: [
-        .target(
-            name: "PostgresClientKitExample",
-            dependencies: [ "PostgresClientKit" ])
-    ]
-)
--- a/test_runner/pg_clients/swift/PostgresClientKitExample/Sources/PostgresClientKitExample/main.swift
+++ b/test_runner/pg_clients/swift/PostgresClientKitExample/Sources/PostgresClientKitExample/main.swift
@@ -1,38 +0,0 @@
-import Foundation
-
-import PostgresClientKit
-
-do {
-    var configuration = PostgresClientKit.ConnectionConfiguration()
-
-    let env = ProcessInfo.processInfo.environment
-    if let host = env["NEON_HOST"] {
-        configuration.host = host
-    }
-    if let database = env["NEON_DATABASE"] {
-        configuration.database = database
-    }
-    if let user = env["NEON_USER"] {
-        configuration.user = user
-    }
-    if let password = env["NEON_PASSWORD"] {
-        configuration.credential = .scramSHA256(password: password)
-    }
-
-    let connection = try PostgresClientKit.Connection(configuration: configuration)
-    defer { connection.close() }
-
-    let text = "SELECT 1;"
-    let statement = try connection.prepareStatement(text: text)
-    defer { statement.close() }
-
-    let cursor = try statement.execute(parameterValues: [ ])
-    defer { cursor.close() }
-
-    for row in cursor {
-        let columns = try row.get().columns
-        print(columns[0])
-    }
-} catch {
-    print(error)
-}
--- a/test_runner/pg_clients/test_pg_clients.py
+++ b/test_runner/pg_clients/test_pg_clients.py
@@ -1,54 +0,0 @@
-import os
-import shutil
-import subprocess
-from pathlib import Path
-from tempfile import NamedTemporaryFile
-from urllib.parse import urlparse
-
-import pytest
-from fixtures.neon_fixtures import RemotePostgres
-
-
-@pytest.mark.remote_cluster
-@pytest.mark.parametrize(
-    "client",
-    [
-        "csharp/npgsql",
-        "java/jdbc",
-        "python/asyncpg",
-        pytest.param(
-            "python/pg8000",  # See https://github.com/neondatabase/neon/pull/2008#discussion_r912264281
-            marks=pytest.mark.xfail(reason="Handles SSL in incompatible with Neon way")),
-        pytest.param(
-            "swift/PostgresClientKit",  # See https://github.com/neondatabase/neon/pull/2008#discussion_r911896592
-            marks=pytest.mark.xfail(reason="Neither SNI nor parameters is supported")),
-        "typescript/postgresql-client",
-    ],
-)
-def test_pg_clients(remote_pg: RemotePostgres, client: str):
-    conn_options = remote_pg.conn_options()
-
-    env_file = None
-    with NamedTemporaryFile(mode="w", delete=False) as f:
-        env_file = f.name
-        f.write(f"""
-            NEON_HOST={conn_options["host"]}
-            NEON_DATABASE={conn_options["dbname"]}
-            NEON_USER={conn_options["user"]}
-            NEON_PASSWORD={conn_options["password"]}
-        """)
-
-    image_tag = client.lower()
-    docker_bin = shutil.which("docker")
-    if docker_bin is None:
-        raise RuntimeError("docker is required for running this test")
-
-    build_cmd = [
-        docker_bin, "build", "--quiet", "--tag", image_tag, f"{Path(__file__).parent / client}"
-    ]
-    run_cmd = [docker_bin, "run", "--rm", "--env-file", env_file, image_tag]
-
-    subprocess.run(build_cmd, check=True)
-    result = subprocess.run(run_cmd, check=True, capture_output=True, text=True)
-
-    assert result.stdout.strip() == "1"
--- a/test_runner/pg_clients/typescript/postgresql-client/.dockerignore
+++ b/test_runner/pg_clients/typescript/postgresql-client/.dockerignore
@@ -1 +0,0 @@
-node_modules/
--- a/test_runner/pg_clients/typescript/postgresql-client/.gitignore
+++ b/test_runner/pg_clients/typescript/postgresql-client/.gitignore
@@ -1 +0,0 @@
-node_modules/
--- a/test_runner/pg_clients/typescript/postgresql-client/Dockerfile
+++ b/test_runner/pg_clients/typescript/postgresql-client/Dockerfile
@@ -1,7 +0,0 @@
-FROM node:16
-WORKDIR /source
-
-COPY . .
-RUN npm clean-install
-
-CMD ["/source/index.js"]
--- a/Show More
+++ b/Show More