Rework build profiles in root Cargo.toml

Minor fixes to scripts/coverage
2026-02-05 03:30:36 +00:00 · 2022-07-08 02:08:53 +03:00 · 2022-07-08 02:08:28 +03:00
107 changed files with 2265 additions and 4734 deletions
--- a/.cargo/config.toml
+++ b/.cargo/config.toml
@@ -1,13 +0,0 @@
-# The binaries are really slow, if you compile them in 'dev' mode with the defaults.
-# Enable some optimizations even in 'dev' mode, to make tests faster. The basic
-# optimizations enabled by "opt-level=1" don't affect debuggability too much.
-#
-# See https://www.reddit.com/r/rust/comments/gvrgca/this_is_a_neat_trick_for_getting_good_runtime/
-#
-[profile.dev.package."*"]
-# Set the default for dependencies in Development mode.
-opt-level = 3
-
-[profile.dev]
-# Turn on a small amount of optimization in Development mode.
-opt-level = 1
--- a/.circleci/ansible/.gitignore
+++ b/.circleci/ansible/.gitignore
--- a/.circleci/ansible/ansible.cfg
+++ b/.circleci/ansible/ansible.cfg
--- a/.circleci/ansible/ansible.ssh.cfg
+++ b/.circleci/ansible/ansible.ssh.cfg
--- a/.circleci/ansible/deploy.yaml
+++ b/.circleci/ansible/deploy.yaml
--- a/.circleci/ansible/get_binaries.sh
+++ b/.circleci/ansible/get_binaries.sh
--- a/.circleci/ansible/neon-stress.hosts
+++ b/.circleci/ansible/neon-stress.hosts
--- a/.circleci/ansible/production.hosts
+++ b/.circleci/ansible/production.hosts
@@ -1,7 +1,6 @@
 [pageservers]
 #zenith-1-ps-1 console_region_id=1
 zenith-1-ps-2 console_region_id=1
-zenith-1-ps-3 console_region_id=1

 [safekeepers]
 zenith-1-sk-1 console_region_id=1
--- a/.circleci/ansible/scripts/init_pageserver.sh
+++ b/.circleci/ansible/scripts/init_pageserver.sh
--- a/.circleci/ansible/scripts/init_safekeeper.sh
+++ b/.circleci/ansible/scripts/init_safekeeper.sh
--- a/.circleci/ansible/staging.hosts
+++ b/.circleci/ansible/staging.hosts
--- a/.circleci/ansible/systemd/pageserver.service
+++ b/.circleci/ansible/systemd/pageserver.service
--- a/.circleci/ansible/systemd/safekeeper.service
+++ b/.circleci/ansible/systemd/safekeeper.service
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -5,10 +5,10 @@ executors:
    resource_class: xlarge
    docker:
      # NB: when changed, do not forget to update rust image tag in all Dockerfiles
-      - image: neondatabase/rust:1.58
+      - image: zimg/rust:1.58
  neon-executor:
    docker:
-      - image: neondatabase/rust:1.58
+      - image: zimg/rust:1.58

 jobs:
  # A job to build postgres
@@ -37,7 +37,7 @@ jobs:
          name: Restore postgres cache
          keys:
            # Restore ONLY if the rev key matches exactly
-            - v05-postgres-cache-<< parameters.build_type >>-{{ checksum "/tmp/cache-key-postgres" }}
+            - v04-postgres-cache-<< parameters.build_type >>-{{ checksum "/tmp/cache-key-postgres" }}

        # Build postgres if the restore_cache didn't find a build.
        # `make` can't figure out whether the cache is valid, since
@@ -54,7 +54,7 @@ jobs:

      - save_cache:
          name: Save postgres cache
-          key: v05-postgres-cache-<< parameters.build_type >>-{{ checksum "/tmp/cache-key-postgres" }}
+          key: v04-postgres-cache-<< parameters.build_type >>-{{ checksum "/tmp/cache-key-postgres" }}
          paths:
            - tmp_install

@@ -85,7 +85,7 @@ jobs:
          name: Restore postgres cache
          keys:
            # Restore ONLY if the rev key matches exactly
-            - v05-postgres-cache-<< parameters.build_type >>-{{ checksum "/tmp/cache-key-postgres" }}
+            - v04-postgres-cache-<< parameters.build_type >>-{{ checksum "/tmp/cache-key-postgres" }}

      - restore_cache:
          name: Restore rust cache
@@ -93,7 +93,7 @@ jobs:
            # Require an exact match. While an out of date cache might speed up the build,
            # there's no way to clean out old packages, so the cache grows every time something
            # changes.
-            - v05-rust-cache-deps-<< parameters.build_type >>-{{ checksum "Cargo.lock" }}
+            - v04-rust-cache-deps-<< parameters.build_type >>-{{ checksum "Cargo.lock" }}

        # Build the rust code, including test binaries
      - run:
@@ -107,7 +107,7 @@ jobs:

            export CARGO_INCREMENTAL=0
            export CACHEPOT_BUCKET=zenith-rust-cachepot
-            export RUSTC_WRAPPER=""
+            export RUSTC_WRAPPER=cachepot
            export AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}"
            export AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}"
            mold -run cargo build $CARGO_FLAGS --features failpoints --bins --tests
@@ -115,7 +115,7 @@ jobs:

      - save_cache:
          name: Save rust cache
-          key: v05-rust-cache-deps-<< parameters.build_type >>-{{ checksum "Cargo.lock" }}
+          key: v04-rust-cache-deps-<< parameters.build_type >>-{{ checksum "Cargo.lock" }}
          paths:
            - ~/.cargo/registry
            - ~/.cargo/git
@@ -142,6 +142,11 @@ jobs:
              jq -r '.packages[].targets[] | select(.kind | index("bin")) | .name'
            )

+            test_exe_paths=$(
+              cargo test --message-format=json --no-run |
+              jq -r '.executable | select(. != null)'
+            )
+
            mkdir -p /tmp/zenith/bin
            mkdir -p /tmp/zenith/test_bin
            mkdir -p /tmp/zenith/etc
@@ -325,6 +330,274 @@ jobs:
          paths:
            - "*"

+  # Build neondatabase/neon:latest image and push it to Docker hub
+  docker-image:
+    docker:
+      - image: cimg/base:2021.04
+    steps:
+      - checkout
+      - setup_remote_docker:
+          docker_layer_caching: true
+      - run:
+          name: Init postgres submodule
+          command: git submodule update --init --depth 1
+      - run:
+          name: Build and push Docker image
+          command: |
+            echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
+            DOCKER_TAG=$(git log --oneline|wc -l)
+            docker build \
+              --pull \
+              --build-arg GIT_VERSION=${CIRCLE_SHA1} \
+              --build-arg AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}" \
+              --build-arg AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}" \
+              --tag neondatabase/neon:${DOCKER_TAG} --tag neondatabase/neon:latest .
+            docker push neondatabase/neon:${DOCKER_TAG}
+            docker push neondatabase/neon:latest
+
+  # Build neondatabase/compute-node:latest image and push it to Docker hub
+  docker-image-compute:
+    docker:
+      - image: cimg/base:2021.04
+    steps:
+      - checkout
+      - setup_remote_docker:
+          docker_layer_caching: true
+      - run:
+          name: Build and push compute-tools Docker image
+          command: |
+            echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
+            docker build \
+              --build-arg AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}" \
+              --build-arg AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}" \
+              --tag neondatabase/compute-tools:local \
+              --tag neondatabase/compute-tools:latest \
+              -f Dockerfile.compute-tools .
+            # Only push :latest image
+            docker push neondatabase/compute-tools:latest
+      - run:
+          name: Init postgres submodule
+          command: git submodule update --init --depth 1
+      - run:
+          name: Build and push compute-node Docker image
+          command: |
+            echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
+            DOCKER_TAG=$(git log --oneline|wc -l)
+            docker build --tag neondatabase/compute-node:${DOCKER_TAG} \
+              --tag neondatabase/compute-node:latest vendor/postgres \
+              --build-arg COMPUTE_TOOLS_TAG=local
+            docker push neondatabase/compute-node:${DOCKER_TAG}
+            docker push neondatabase/compute-node:latest
+
+  # Build production neondatabase/neon:release image and push it to Docker hub
+  docker-image-release:
+    docker:
+      - image: cimg/base:2021.04
+    steps:
+      - checkout
+      - setup_remote_docker:
+          docker_layer_caching: true
+      - run:
+          name: Init postgres submodule
+          command: git submodule update --init --depth 1
+      - run:
+          name: Build and push Docker image
+          command: |
+            echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
+            DOCKER_TAG="release-$(git log --oneline|wc -l)"
+            docker build \
+              --pull \
+              --build-arg GIT_VERSION=${CIRCLE_SHA1} \
+              --build-arg AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}" \
+              --build-arg AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}" \
+              --tag neondatabase/neon:${DOCKER_TAG} --tag neondatabase/neon:release .
+            docker push neondatabase/neon:${DOCKER_TAG}
+            docker push neondatabase/neon:release
+
+  # Build production neondatabase/compute-node:release image and push it to Docker hub
+  docker-image-compute-release:
+    docker:
+      - image: cimg/base:2021.04
+    steps:
+      - checkout
+      - setup_remote_docker:
+          docker_layer_caching: true
+      - run:
+          name: Build and push compute-tools Docker image
+          command: |
+            echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
+            docker build \
+              --build-arg AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}" \
+              --build-arg AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}" \
+              --tag neondatabase/compute-tools:release \
+              --tag neondatabase/compute-tools:local \
+              -f Dockerfile.compute-tools .
+            # Only push :release image
+            docker push neondatabase/compute-tools:release
+      - run:
+          name: Init postgres submodule
+          command: git submodule update --init --depth 1
+      - run:
+          name: Build and push compute-node Docker image
+          command: |
+            echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
+            DOCKER_TAG="release-$(git log --oneline|wc -l)"
+            docker build --tag neondatabase/compute-node:${DOCKER_TAG} \
+              --tag neondatabase/compute-node:release vendor/postgres \
+              --build-arg COMPUTE_TOOLS_TAG=local
+            docker push neondatabase/compute-node:${DOCKER_TAG}
+            docker push neondatabase/compute-node:release
+
+  deploy-staging:
+    docker:
+      - image: cimg/python:3.10
+    steps:
+      - checkout
+      - setup_remote_docker
+      - run:
+          name: Setup ansible
+          command: |
+            pip install --progress-bar off --user ansible boto3
+      - run:
+          name: Redeploy
+          command: |
+            cd "$(pwd)/.circleci/ansible"
+
+            ./get_binaries.sh
+
+            echo "${TELEPORT_SSH_KEY}"  | tr -d '\n'| base64 --decode >ssh-key
+            echo "${TELEPORT_SSH_CERT}" | tr -d '\n'| base64 --decode >ssh-key-cert.pub
+            chmod 0600 ssh-key
+            ssh-add ssh-key
+            rm -f ssh-key ssh-key-cert.pub
+
+            ansible-playbook deploy.yaml -i staging.hosts
+            rm -f neon_install.tar.gz .neon_current_version
+
+  deploy-staging-proxy:
+    docker:
+      - image: cimg/base:2021.04
+    environment:
+      KUBECONFIG: .kubeconfig
+    steps:
+      - checkout
+      - run:
+          name: Store kubeconfig file
+          command: |
+            echo "${STAGING_KUBECONFIG_DATA}" | base64 --decode > ${KUBECONFIG}
+            chmod 0600 ${KUBECONFIG}
+      - run:
+          name: Setup helm v3
+          command: |
+            curl -s https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
+            helm repo add neondatabase https://neondatabase.github.io/helm-charts
+      - run:
+          name: Re-deploy proxy
+          command: |
+            DOCKER_TAG=$(git log --oneline|wc -l)
+            helm upgrade neon-proxy       neondatabase/neon-proxy --install -f .circleci/helm-values/staging.proxy.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
+            helm upgrade neon-proxy-scram neondatabase/neon-proxy --install -f .circleci/helm-values/staging.proxy-scram.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
+
+  deploy-neon-stress:
+    docker:
+      - image: cimg/python:3.10
+    steps:
+      - checkout
+      - setup_remote_docker
+      - run:
+          name: Setup ansible
+          command: |
+            pip install --progress-bar off --user ansible boto3
+      - run:
+          name: Redeploy
+          command: |
+            cd "$(pwd)/.circleci/ansible"
+
+            ./get_binaries.sh
+
+            echo "${TELEPORT_SSH_KEY}"  | tr -d '\n'| base64 --decode >ssh-key
+            echo "${TELEPORT_SSH_CERT}" | tr -d '\n'| base64 --decode >ssh-key-cert.pub
+            chmod 0600 ssh-key
+            ssh-add ssh-key
+            rm -f ssh-key ssh-key-cert.pub
+
+            ansible-playbook deploy.yaml -i neon-stress.hosts
+            rm -f neon_install.tar.gz .neon_current_version
+
+  deploy-neon-stress-proxy:
+    docker:
+      - image: cimg/base:2021.04
+    environment:
+      KUBECONFIG: .kubeconfig
+    steps:
+      - checkout
+      - run:
+          name: Store kubeconfig file
+          command: |
+            echo "${NEON_STRESS_KUBECONFIG_DATA}" | base64 --decode > ${KUBECONFIG}
+            chmod 0600 ${KUBECONFIG}
+      - run:
+          name: Setup helm v3
+          command: |
+            curl -s https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
+            helm repo add neondatabase https://neondatabase.github.io/helm-charts
+      - run:
+          name: Re-deploy proxy
+          command: |
+            DOCKER_TAG=$(git log --oneline|wc -l)
+            helm upgrade neon-stress-proxy       neondatabase/neon-proxy --install -f .circleci/helm-values/neon-stress.proxy.yaml --set image.tag=${DOCKER_TAG} --wait
+            helm upgrade neon-stress-proxy-scram neondatabase/neon-proxy --install -f .circleci/helm-values/neon-stress.proxy-scram.yaml --set image.tag=${DOCKER_TAG} --wait
+
+  deploy-release:
+    docker:
+      - image: cimg/python:3.10
+    steps:
+      - checkout
+      - setup_remote_docker
+      - run:
+          name: Setup ansible
+          command: |
+            pip install --progress-bar off --user ansible boto3
+      - run:
+          name: Redeploy
+          command: |
+            cd "$(pwd)/.circleci/ansible"
+
+            RELEASE=true ./get_binaries.sh
+
+            echo "${TELEPORT_SSH_KEY}"  | tr -d '\n'| base64 --decode >ssh-key
+            echo "${TELEPORT_SSH_CERT}" | tr -d '\n'| base64 --decode >ssh-key-cert.pub
+            chmod 0600 ssh-key
+            ssh-add ssh-key
+            rm -f ssh-key ssh-key-cert.pub
+
+            ansible-playbook deploy.yaml -i production.hosts
+            rm -f neon_install.tar.gz .neon_current_version
+
+  deploy-release-proxy:
+    docker:
+      - image: cimg/base:2021.04
+    environment:
+      KUBECONFIG: .kubeconfig
+    steps:
+      - checkout
+      - run:
+          name: Store kubeconfig file
+          command: |
+            echo "${PRODUCTION_KUBECONFIG_DATA}" | base64 --decode > ${KUBECONFIG}
+            chmod 0600 ${KUBECONFIG}
+      - run:
+          name: Setup helm v3
+          command: |
+            curl -s https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
+            helm repo add neondatabase https://neondatabase.github.io/helm-charts
+      - run:
+          name: Re-deploy proxy
+          command: |
+            DOCKER_TAG="release-$(git log --oneline|wc -l)"
+            helm upgrade neon-proxy       neondatabase/neon-proxy --install -f .circleci/helm-values/production.proxy.yaml --set image.tag=${DOCKER_TAG} --wait
+            helm upgrade neon-proxy-scram neondatabase/neon-proxy --install -f .circleci/helm-values/production.proxy-scram.yaml --set image.tag=${DOCKER_TAG} --wait
+
 workflows:
  build_and_test:
    jobs:
@@ -367,3 +640,103 @@ workflows:
          save_perf_report: true
          requires:
            - build-neon-release
+      - docker-image:
+          # Context gives an ability to login
+          context: Docker Hub
+          # Build image only for commits to main
+          filters:
+            branches:
+              only:
+                - main
+          requires:
+            - pg_regress-tests-release
+            - other-tests-release
+      - docker-image-compute:
+          # Context gives an ability to login
+          context: Docker Hub
+          # Build image only for commits to main
+          filters:
+            branches:
+              only:
+                - main
+          requires:
+            - pg_regress-tests-release
+            - other-tests-release
+      - deploy-staging:
+          # Context gives an ability to login
+          context: Docker Hub
+          # deploy only for commits to main
+          filters:
+            branches:
+              only:
+                - main
+          requires:
+            - docker-image
+      - deploy-staging-proxy:
+          # deploy only for commits to main
+          filters:
+            branches:
+              only:
+                - main
+          requires:
+            - docker-image
+
+      - deploy-neon-stress:
+          # Context gives an ability to login
+          context: Docker Hub
+          # deploy only for commits to main
+          filters:
+            branches:
+              only:
+                - main
+          requires:
+            - docker-image
+      - deploy-neon-stress-proxy:
+          # deploy only for commits to main
+          filters:
+            branches:
+              only:
+                - main
+          requires:
+            - docker-image
+
+      - docker-image-release:
+          # Context gives an ability to login
+          context: Docker Hub
+          # Build image only for commits to main
+          filters:
+            branches:
+              only:
+                - release
+          requires:
+            - pg_regress-tests-release
+            - other-tests-release
+      - docker-image-compute-release:
+          # Context gives an ability to login
+          context: Docker Hub
+          # Build image only for commits to main
+          filters:
+            branches:
+              only:
+                - release
+          requires:
+            - pg_regress-tests-release
+            - other-tests-release
+      - deploy-release:
+          # Context gives an ability to login
+          context: Docker Hub
+          # deploy only for commits to main
+          filters:
+            branches:
+              only:
+                - release
+          requires:
+            - docker-image-release
+      - deploy-release-proxy:
+          # deploy only for commits to main
+          filters:
+            branches:
+              only:
+                - release
+          requires:
+            - docker-image-release
--- a/.circleci/helm-values/neon-stress.proxy-scram.yaml
+++ b/.circleci/helm-values/neon-stress.proxy-scram.yaml
--- a/.circleci/helm-values/neon-stress.proxy.yaml
+++ b/.circleci/helm-values/neon-stress.proxy.yaml
--- a/.circleci/helm-values/production.proxy-scram.yaml
+++ b/.circleci/helm-values/production.proxy-scram.yaml
--- a/.circleci/helm-values/production.proxy.yaml
+++ b/.circleci/helm-values/production.proxy.yaml
--- a/.circleci/helm-values/staging.proxy-scram.yaml
+++ b/.circleci/helm-values/staging.proxy-scram.yaml
--- a/.circleci/helm-values/staging.proxy.yaml
+++ b/.circleci/helm-values/staging.proxy.yaml
--- a/.github/actions/run-python-test-set/action.yml
+++ b/.github/actions/run-python-test-set/action.yml
@@ -31,13 +31,6 @@ inputs:
 runs:
  using: "composite"
  steps:
-    - name: Checkout
-      if: inputs.needs_postgres_source == 'true'
-      uses: actions/checkout@v3
-      with:
-        submodules: true
-        fetch-depth: 1
-
    - name: Get Neon artifact for restoration
      uses: actions/download-artifact@v3
      with:
@@ -48,14 +41,15 @@ runs:
      shell: bash -ex {0}
      run: |
        mkdir -p /tmp/neon/
-        tar -xf ./neon-artifact/neon.tar.zst -C /tmp/neon/
+        tar -xf ./neon-artifact/neon.tgz -C /tmp/neon/
        rm -rf ./neon-artifact/

-        # Restore the parts of the 'build' directory that were included in the
-        # tarball. This includes the regression test modules in
-        # src/test/regress/*.so.
-        mkdir -p build/
-        cp -a /tmp/neon/pg_build/* build/
+    - name: Checkout
+      if: inputs.needs_postgres_source == 'true'
+      uses: actions/checkout@v3
+      with:
+        submodules: true
+        fetch-depth: 1

    - name: Cache poetry deps
      id: cache_poetry
--- a/.github/workflows/benchmarking.yml
+++ b/.github/workflows/benchmarking.yml
@@ -11,7 +11,7 @@ on:
    #          │ │ ┌───────────── day of the month (1 - 31)
    #          │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
    #          │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
-    - cron:  '36 4 * * *' # run once a day, timezone is utc
+    - cron:  '36 7 * * *' # run once a day, timezone is utc

  workflow_dispatch: # adds ability to run this manually

@@ -26,11 +26,11 @@ jobs:
    runs-on: [self-hosted, zenith-benchmarker]

    env:
-      POSTGRES_DISTRIB_DIR: "/usr/pgsql-14"
+      POSTGRES_DISTRIB_DIR: "/usr/pgsql-13"

    steps:
    - name: Checkout zenith repo
-      uses: actions/checkout@v3
+      uses: actions/checkout@v2

    # actions/setup-python@v2 is not working correctly on self-hosted runners
    # see https://github.com/actions/setup-python/issues/162
@@ -88,7 +88,7 @@ jobs:
        # Plus time needed to initialize the test databases.
        TEST_PG_BENCH_DURATIONS_MATRIX: "300"
        TEST_PG_BENCH_SCALES_MATRIX: "10,100"
-        PLATFORM: "neon-staging"
+        PLATFORM: "zenith-staging"
        BENCHMARK_CONNSTR: "${{ secrets.BENCHMARK_STAGING_CONNSTR }}"
        REMOTE_ENV: "1" # indicate to test harness that we do not have zenith binaries locally
      run: |
@@ -96,7 +96,7 @@ jobs:
        # since it might generate duplicates when calling ingest_perf_test_result.py
        rm -rf perf-report-staging
        mkdir -p perf-report-staging
-        ./scripts/pytest test_runner/performance/ -v -m "remote_cluster" --skip-interfering-proc-check --out-dir perf-report-staging --timeout 3600
+        ./scripts/pytest test_runner/performance/ -v -m "remote_cluster" --skip-interfering-proc-check --out-dir perf-report-staging

    - name: Submit result
      env:
@@ -104,12 +104,3 @@ jobs:
        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
      run: |
        REPORT_FROM=$(realpath perf-report-staging) REPORT_TO=staging scripts/generate_and_push_perf_report.sh
-
-    - name: Post to a Slack channel
-      if: ${{ github.event.schedule && failure() }}
-      uses: slackapi/slack-github-action@v1
-      with:
-        channel-id: "C033QLM5P7D" # dev-staging-stream
-        slack-message: "Periodic perf testing: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
-      env:
-        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -1,10 +1,9 @@
-name: Test and Deploy
+name: Test

 on:
  push:
    branches:
    - main
-    - release
  pull_request:

 defaults:
@@ -12,17 +11,61 @@ defaults:
    shell: bash -ex {0}

 concurrency:
-  # Allow only one workflow per any non-`main` branch.
-  group: ${{ github.workflow }}-${{ github.ref }}-${{ github.ref == 'refs/heads/main' && github.sha || 'anysha' }}
-  cancel-in-progress: true
+   group: ${{ github.workflow }}-${{ github.ref }}
+   cancel-in-progress: true

 env:
  RUST_BACKTRACE: 1
  COPT: '-Werror'

 jobs:
+  build-postgres:
+    runs-on: [ self-hosted, Linux, k8s-runner ]
+    strategy:
+      fail-fast: false
+      matrix:
+        build_type: [ debug, release ]
+        rust_toolchain: [ 1.58 ]
+
+    env:
+      BUILD_TYPE: ${{ matrix.build_type }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: true
+          fetch-depth: 1
+
+      - name: Set pg revision for caching
+        id: pg_ver
+        run: echo ::set-output name=pg_rev::$(git rev-parse HEAD:vendor/postgres)
+
+      - name: Cache postgres build
+        id: cache_pg
+        uses: actions/cache@v3
+        with:
+          path: tmp_install/
+          key: v1-${{ runner.os }}-${{ matrix.build_type }}-pg-${{ steps.pg_ver.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
+
+      - name: Build postgres
+        if: steps.cache_pg.outputs.cache-hit != 'true'
+        run: mold -run make postgres -j$(nproc)
+
+      # actions/cache@v3 does not allow concurrently using the same cache across job steps, so use a separate cache
+      - name: Prepare postgres artifact
+        run: tar -C tmp_install/ -czf ./pg.tgz .
+      - name: Upload postgres artifact
+        uses: actions/upload-artifact@v3
+        with:
+          retention-days: 7
+          if-no-files-found: error
+          name: postgres-${{ runner.os }}-${{ matrix.build_type }}-artifact
+          path: ./pg.tgz
+
+
  build-neon:
    runs-on: [ self-hosted, Linux, k8s-runner ]
+    needs: [ build-postgres ]
    strategy:
      fail-fast: false
      matrix:
@@ -39,80 +82,80 @@ jobs:
          submodules: true
          fetch-depth: 1

-      - name: Set pg revision for caching
-        id: pg_ver
-        run: echo ::set-output name=pg_rev::$(git rev-parse HEAD:vendor/postgres)
-
-      # Set some environment variables used by all the steps.
-      #
-      # CARGO_FLAGS is extra options to pass to "cargo build", "cargo test" etc.
-      #   It also includes --features, if any
-      #
-      # CARGO_FEATURES is passed to "cargo metadata". It is separate from CARGO_FLAGS,
-      #   because "cargo metadata" doesn't accept --release or --debug options
-      #
-      - name: Set env variables
+      - name: Get postgres artifact for restoration
+        uses: actions/download-artifact@v3
+        with:
+          name: postgres-${{ runner.os }}-${{ matrix.build_type }}-artifact
+          path: ./postgres-artifact/
+      - name: Extract postgres artifact
        run: |
-          if [[ $BUILD_TYPE == "debug" ]]; then
-            cov_prefix="scripts/coverage --profraw-prefix=$GITHUB_JOB --dir=/tmp/coverage run"
-            CARGO_FEATURES=""
-            CARGO_FLAGS=""
-          elif [[ $BUILD_TYPE == "release" ]]; then
-            cov_prefix=""
-            CARGO_FEATURES="--features profiling"
-            CARGO_FLAGS="--release $CARGO_FEATURES"
-          fi
-          echo "cov_prefix=${cov_prefix}" >> $GITHUB_ENV
-          echo "CARGO_FEATURES=${CARGO_FEATURES}" >> $GITHUB_ENV
-          echo "CARGO_FLAGS=${CARGO_FLAGS}" >> $GITHUB_ENV
+          mkdir ./tmp_install/
+          tar -xf ./postgres-artifact/pg.tgz -C ./tmp_install/
+          rm -rf ./postgres-artifact/

-      # Don't include the ~/.cargo/registry/src directory. It contains just
-      # uncompressed versions of the crates in ~/.cargo/registry/cache
-      # directory, and it's faster to let 'cargo' to rebuild it from the
-      # compressed crates.
      - name: Cache cargo deps
        id: cache_cargo
        uses: actions/cache@v3
        with:
          path: |
            ~/.cargo/registry/
-            !~/.cargo/registry/src
            ~/.cargo/git/
            target/
          # Fall back to older versions of the key, if no cache for current Cargo.lock was found
          key: |
-            v3-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-${{ hashFiles('Cargo.lock') }}
-            v3-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-
-
-      - name: Cache postgres build
-        id: cache_pg
-        uses: actions/cache@v3
-        with:
-          path: |
-            tmp_install/
-            build/src/test/regress/*.so
-          key: v1-${{ runner.os }}-${{ matrix.build_type }}-pg-${{ steps.pg_ver.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
-
-      - name: Build postgres
-        if: steps.cache_pg.outputs.cache-hit != 'true'
-        run: mold -run make postgres -j$(nproc)
+            v2-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-${{ hashFiles('Cargo.lock') }}
+            v2-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-

      - name: Run cargo build
        run: |
-          ${cov_prefix} mold -run cargo build $CARGO_FLAGS --features failpoints --bins --tests
+          if [[ $BUILD_TYPE == "debug" ]]; then
+            cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
+            CARGO_FLAGS=
+          elif [[ $BUILD_TYPE == "release" ]]; then
+            cov_prefix=()
+            CARGO_FLAGS="--release --features profiling"
+          fi
+
+          "${cov_prefix[@]}" mold -run cargo build $CARGO_FLAGS --features failpoints --bins --tests

      - name: Run cargo test
        run: |
-          ${cov_prefix} cargo test $CARGO_FLAGS
+          if [[ $BUILD_TYPE == "debug" ]]; then
+            cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
+            CARGO_FLAGS=
+          elif [[ $BUILD_TYPE == "release" ]]; then
+            cov_prefix=()
+            CARGO_FLAGS=--release
+          fi
+
+          "${cov_prefix[@]}" cargo test $CARGO_FLAGS

      - name: Install rust binaries
        run: |
-          # Install target binaries
-          mkdir -p /tmp/neon/bin/
+          if [[ $BUILD_TYPE == "debug" ]]; then
+            cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
+          elif [[ $BUILD_TYPE == "release" ]]; then
+            cov_prefix=()
+          fi
+
          binaries=$(
-            ${cov_prefix} cargo metadata $CARGO_FEATURES --format-version=1 --no-deps |
+            "${cov_prefix[@]}" cargo metadata --format-version=1 --no-deps |
            jq -r '.packages[].targets[] | select(.kind | index("bin")) | .name'
          )
+
+          test_exe_paths=$(
+            "${cov_prefix[@]}" cargo test --message-format=json --no-run |
+            jq -r '.executable | select(. != null)'
+          )
+
+          mkdir -p /tmp/neon/bin/
+          mkdir -p /tmp/neon/test_bin/
+          mkdir -p /tmp/neon/etc/
+
+          # Keep bloated coverage data files away from the rest of the artifact
+          mkdir -p /tmp/coverage/
+
+          # Install target binaries
          for bin in $binaries; do
            SRC=target/$BUILD_TYPE/$bin
            DST=/tmp/neon/bin/$bin
@@ -121,39 +164,22 @@ jobs:

          # Install test executables and write list of all binaries (for code coverage)
          if [[ $BUILD_TYPE == "debug" ]]; then
-            # Keep bloated coverage data files away from the rest of the artifact
-            mkdir -p /tmp/coverage/
-
-            mkdir -p /tmp/neon/test_bin/
-            test_exe_paths=$(
-              ${cov_prefix} cargo test $CARGO_FLAGS --message-format=json --no-run |
-              jq -r '.executable | select(. != null)'
-            )
+            for bin in $binaries; do
+              echo "/tmp/neon/bin/$bin" >> /tmp/coverage/binaries.list
+            done
            for bin in $test_exe_paths; do
              SRC=$bin
              DST=/tmp/neon/test_bin/$(basename $bin)
-
-              # We don't need debug symbols for code coverage, so strip them out to make
-              # the artifact smaller.
-              strip "$SRC" -o "$DST"
+              cp "$SRC" "$DST"
              echo "$DST" >> /tmp/coverage/binaries.list
            done
-
-            for bin in $binaries; do
-              echo "/tmp/neon/bin/$bin" >> /tmp/coverage/binaries.list
-            done
          fi

      - name: Install postgres binaries
-        run: |
-          cp -a tmp_install /tmp/neon/pg_install
-
-          # Include modules needed by the Postgres regression tests
-          mkdir -p /tmp/neon/pg_build/src/test/regress
-          cp -a build/src/test/regress/*.so /tmp/neon/pg_build/src/test/regress
+        run: cp -a tmp_install /tmp/neon/pg_install

      - name: Prepare neon artifact
-        run: ZSTD_NBTHREADS=0 tar -C /tmp/neon/ -cf ./neon.tar.zst --zstd .
+        run: tar -C /tmp/neon/ -czf ./neon.tgz .

      - name: Upload neon binaries
        uses: actions/upload-artifact@v3
@@ -161,7 +187,7 @@ jobs:
          retention-days: 7
          if-no-files-found: error
          name: neon-${{ runner.os }}-${{ matrix.build_type }}-${{ matrix.rust_toolchain }}-artifact
-          path: ./neon.tar.zst
+          path: ./neon.tgz

      # XXX: keep this after the binaries.list is formed, so the coverage can properly work later
      - name: Merge and upload coverage data
@@ -272,10 +298,9 @@ jobs:
        with:
          path: |
            ~/.cargo/registry/
-            !~/.cargo/registry/src
            ~/.cargo/git/
            target/
-          key: v3-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-${{ hashFiles('Cargo.lock') }}
+          key: v2-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-${{ hashFiles('Cargo.lock') }}

      - name: Get Neon artifact for restoration
        uses: actions/download-artifact@v3
@@ -286,7 +311,7 @@ jobs:
      - name: Extract Neon artifact
        run: |
          mkdir -p /tmp/neon/
-          tar -xf ./neon-artifact/neon.tar.zst -C /tmp/neon/
+          tar -xf ./neon-artifact/neon.tgz -C /tmp/neon/
          rm -rf ./neon-artifact/

      - name: Restore coverage data
@@ -365,253 +390,3 @@ jobs:
               \"remote_repo\": \"${{ github.repository }}\"
             }
           }"
-
-  docker-image:
-    runs-on: [ self-hosted, Linux, k8s-runner ]
-    needs: [ pg_regress-tests, other-tests ]
-    if: |
-      (github.ref_name == 'main' || github.ref_name == 'release') &&
-      github.event_name != 'workflow_dispatch'
-    outputs:
-      build-tag: ${{steps.build-tag.outputs.tag}}
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v3
-        with:
-          submodules: true
-          fetch-depth: 0
-
-      - name: Login to DockerHub
-        uses: docker/login-action@v1
-        with:
-          username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
-          password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v1
-        with:
-          driver: docker
-
-      - name: Get build tag
-        run: |
-          if [[ "$GITHUB_REF_NAME" == "main" ]]; then
-            echo "::set-output name=tag::$(git rev-list --count HEAD)"
-          elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
-            echo "::set-output name=tag::release-$(git rev-list --count HEAD)"
-          else
-            echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
-            exit 1
-          fi
-        id: build-tag
-
-      - name: Get legacy build tag
-        run: |
-          if [[ "$GITHUB_REF_NAME" == "main" ]]; then
-            echo "::set-output name=tag::latest"
-          elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
-            echo "::set-output name=tag::release"
-          else
-            echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
-            exit 1
-          fi
-        id: legacy-build-tag
-
-      - name: Build neon Docker image
-        uses: docker/build-push-action@v2
-        with:
-          context: .
-          build-args: |
-            GIT_VERSION="${{github.sha}}"
-            AWS_ACCESS_KEY_ID="${{secrets.CACHEPOT_AWS_ACCESS_KEY_ID}}"
-            AWS_SECRET_ACCESS_KEY="${{secrets.CACHEPOT_AWS_SECRET_ACCESS_KEY}}"
-          pull: true
-          push: true
-          tags: neondatabase/neon:${{steps.legacy-build-tag.outputs.tag}}, neondatabase/neon:${{steps.build-tag.outputs.tag}}
-
-  docker-image-compute:
-    runs-on: [ self-hosted, Linux, k8s-runner ]
-    needs: [ pg_regress-tests, other-tests ]
-    if: |
-      (github.ref_name == 'main' || github.ref_name == 'release') &&
-      github.event_name != 'workflow_dispatch'
-    outputs:
-      build-tag: ${{steps.build-tag.outputs.tag}}
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v3
-        with:
-          submodules: true
-          fetch-depth: 0
-
-      - name: Login to DockerHub
-        uses: docker/login-action@v1
-        with:
-          username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
-          password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v1
-        with:
-          driver: docker
-
-      - name: Get build tag
-        run: |
-          if [[ "$GITHUB_REF_NAME" == "main" ]]; then
-            echo "::set-output name=tag::$(git rev-list --count HEAD)"
-          elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
-            echo "::set-output name=tag::release-$(git rev-list --count HEAD)"
-          else
-            echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
-            exit 1
-          fi
-        id: build-tag
-
-      - name: Get legacy build tag
-        run: |
-          if [[ "$GITHUB_REF_NAME" == "main" ]]; then
-            echo "::set-output name=tag::latest"
-          elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
-            echo "::set-output name=tag::release"
-          else
-            echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
-            exit 1
-          fi
-        id: legacy-build-tag
-
-      - name: Build compute-tools Docker image
-        uses: docker/build-push-action@v2
-        with:
-          context: .
-          build-args: |
-            GIT_VERSION="${{github.sha}}"
-            AWS_ACCESS_KEY_ID="${{secrets.CACHEPOT_AWS_ACCESS_KEY_ID}}"
-            AWS_SECRET_ACCESS_KEY="${{secrets.CACHEPOT_AWS_SECRET_ACCESS_KEY}}"
-          push: false
-          file: Dockerfile.compute-tools
-          tags: neondatabase/compute-tools:local
-
-      - name: Push compute-tools Docker image
-        uses: docker/build-push-action@v2
-        with:
-          context: .
-          build-args: |
-            GIT_VERSION="${{github.sha}}"
-            AWS_ACCESS_KEY_ID="${{secrets.CACHEPOT_AWS_ACCESS_KEY_ID}}"
-            AWS_SECRET_ACCESS_KEY="${{secrets.CACHEPOT_AWS_SECRET_ACCESS_KEY}}"
-          push: true
-          file: Dockerfile.compute-tools
-          tags: neondatabase/compute-tools:${{steps.legacy-build-tag.outputs.tag}}
-
-      - name: Build compute-node Docker image
-        uses: docker/build-push-action@v2
-        with:
-          context: ./vendor/postgres/
-          build-args:
-            COMPUTE_TOOLS_TAG=local
-          push: true
-          tags: neondatabase/compute-node:${{steps.legacy-build-tag.outputs.tag}}, neondatabase/compute-node:${{steps.build-tag.outputs.tag}}
-
-  calculate-deploy-targets:
-    runs-on: [ self-hosted, Linux, k8s-runner ]
-    if: |
-      (github.ref_name == 'main' || github.ref_name == 'release') &&
-      github.event_name != 'workflow_dispatch'
-    outputs:
-      matrix-include: ${{ steps.set-matrix.outputs.include }}
-    steps:
-      - id: set-matrix
-        run: |
-          if [[ "$GITHUB_REF_NAME" == "main" ]]; then
-            STAGING='{"env_name": "staging", "proxy_job": "neon-proxy", "proxy_config": "staging.proxy", "kubeconfig_secret": "STAGING_KUBECONFIG_DATA"}'
-            NEON_STRESS='{"env_name": "neon-stress", "proxy_job": "neon-stress-proxy", "proxy_config": "neon-stress.proxy", "kubeconfig_secret": "NEON_STRESS_KUBECONFIG_DATA"}'
-            echo "::set-output name=include::[$STAGING]"
-          elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
-            PRODUCTION='{"env_name": "production", "proxy_job": "neon-proxy", "proxy_config": "production.proxy", "kubeconfig_secret": "PRODUCTION_KUBECONFIG_DATA"}'
-            echo "::set-output name=include::[$PRODUCTION]"
-          else
-            echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
-            exit 1
-          fi
-
-  deploy:
-    runs-on: [ self-hosted, Linux, k8s-runner ]
-    # We need both storage **and** compute images for deploy, because control plane
-    # picks the compute version based on the storage version. If it notices a fresh
-    # storage it may bump the compute version. And if compute image failed to build
-    # it may break things badly.
-    needs: [ docker-image, docker-image-compute, calculate-deploy-targets ]
-    if: |
-      (github.ref_name == 'main' || github.ref_name == 'release') &&
-      github.event_name != 'workflow_dispatch'
-    strategy:
-      matrix:
-        include: ${{fromJSON(needs.calculate-deploy-targets.outputs.matrix-include)}}
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v3
-        with:
-          submodules: true
-          fetch-depth: 0
-
-      - name: Setup ansible
-        run: |
-          pip install --progress-bar off --user ansible boto3
-
-      - name: Redeploy
-        run: |
-          cd "$(pwd)/.github/ansible"
-
-          if [[ "$GITHUB_REF_NAME" == "main" ]]; then
-            ./get_binaries.sh
-          elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
-            RELEASE=true ./get_binaries.sh
-          else
-            echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
-            exit 1
-          fi
-
-          eval $(ssh-agent)
-          echo "${{ secrets.TELEPORT_SSH_KEY }}"  | tr -d '\n'| base64 --decode >ssh-key
-          echo "${{ secrets.TELEPORT_SSH_CERT }}" | tr -d '\n'| base64 --decode >ssh-key-cert.pub
-          chmod 0600 ssh-key
-          ssh-add ssh-key
-          rm -f ssh-key ssh-key-cert.pub
-
-          ansible-playbook deploy.yaml -i ${{ matrix.env_name }}.hosts
-          rm -f neon_install.tar.gz .neon_current_version
-
-  deploy-proxy:
-    runs-on: [ self-hosted, Linux, k8s-runner ]
-    # Compute image isn't strictly required for proxy deploy, but let's still wait for it
-    # to run all deploy jobs consistently.
-    needs: [ docker-image, docker-image-compute, calculate-deploy-targets ]
-    if: |
-      (github.ref_name == 'main' || github.ref_name == 'release') &&
-      github.event_name != 'workflow_dispatch'
-    strategy:
-      matrix:
-        include: ${{fromJSON(needs.calculate-deploy-targets.outputs.matrix-include)}}
-    env:
-      KUBECONFIG: .kubeconfig
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v3
-        with:
-          submodules: true
-          fetch-depth: 0
-
-      - name: Store kubeconfig file
-        run: |
-          echo "${{ secrets[matrix.kubeconfig_secret] }}" | base64 --decode > ${KUBECONFIG}
-          chmod 0600 ${KUBECONFIG}
-
-      - name: Setup helm v3
-        run: |
-          curl -s https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
-          helm repo add neondatabase https://neondatabase.github.io/helm-charts
-
-      - name: Re-deploy proxy
-        run: |
-          DOCKER_TAG=${{needs.docker-image.outputs.build-tag}}
-          helm upgrade ${{ matrix.proxy_job }}       neondatabase/neon-proxy --namespace default --install -f .github/helm-values/${{ matrix.proxy_config }}.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
-          helm upgrade ${{ matrix.proxy_job }}-scram neondatabase/neon-proxy --namespace default --install -f .github/helm-values/${{ matrix.proxy_config }}-scram.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
--- a/.github/workflows/codestyle.yml
+++ b/.github/workflows/codestyle.yml
@@ -11,9 +11,8 @@ defaults:
    shell: bash -ex {0}

 concurrency:
-  # Allow only one workflow per any non-`main` branch.
-  group: ${{ github.workflow }}-${{ github.ref }}-${{ github.ref == 'refs/heads/main' && github.sha || 'anysha' }}
-  cancel-in-progress: true
+   group: ${{ github.workflow }}-${{ github.ref }}
+   cancel-in-progress: true

 env:
  RUST_BACKTRACE: 1
@@ -98,10 +97,9 @@ jobs:
        with:
          path: |
            ~/.cargo/registry
-            !~/.cargo/registry/src
            ~/.cargo/git
            target
-          key: v1-${{ runner.os }}-cargo-${{ hashFiles('./Cargo.lock') }}-rust-${{ matrix.rust_toolchain }}
+          key: ${{ runner.os }}-cargo-${{ hashFiles('./Cargo.lock') }}-rust-${{ matrix.rust_toolchain }}

      - name: Run cargo clippy
        run: ./run_clippy.sh
--- a/.github/workflows/pg_clients.yml
+++ b/.github/workflows/pg_clients.yml
@@ -13,9 +13,8 @@ on:
  workflow_dispatch:

 concurrency:
-  # Allow only one workflow per any non-`main` branch.
-  group: ${{ github.workflow }}-${{ github.ref }}-${{ github.ref == 'refs/heads/main' && github.sha || 'anysha' }}
-  cancel-in-progress: true
+   group: ${{ github.workflow }}-${{ github.ref }}
+   cancel-in-progress: true

 jobs:
  test-postgres-client-libs:
@@ -49,6 +48,9 @@ jobs:
        BENCHMARK_CONNSTR: "${{ secrets.BENCHMARK_STAGING_CONNSTR }}"
        TEST_OUTPUT: /tmp/test_output
        POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
+        # this variable will be embedded in perf test report
+        # and is needed to distinguish different environments
+        PLATFORM: github-actions-selfhosted
      shell: bash -ex {0}
      run: |
        # Test framework expects we have psql binary;
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -11,10 +11,17 @@ members = [
 ]

 [profile.release]
-# This is useful for profiling and, to some extent, debug.
-# Besides, debug info should not affect the performance.
+strip = "debuginfo"
+
+[profile.perf]
+inherits = "release"
 debug = true

+[profile.release-coverage]
+inherits = "release"
+overflow-checks = true
+debug-assertions = true
+
 # This is only needed for proxy's tests.
 # TODO: we should probably fork `tokio-postgres-rustls` instead.
 [patch.crates-io]
--- a/57
+++ b/57
@@ -1,8 +1,3 @@
-ROOT_PROJECT_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
-
-# Where to install Postgres, default is ./tmp_install, maybe useful for package managers
-POSTGRES_INSTALL_DIR ?= $(ROOT_PROJECT_DIR)/tmp_install
-
 # Seccomp BPF is only available for Linux
 UNAME_S := $(shell uname -s)
 ifeq ($(UNAME_S),Linux)
@@ -46,73 +41,69 @@ CARGO_CMD_PREFIX += $(if $(filter n,$(MAKEFLAGS)),,+)
 CARGO_CMD_PREFIX += CARGO_TERM_PROGRESS_WHEN=never CI=1

 #
-# Top level Makefile to build Neon and PostgreSQL
+# Top level Makefile to build Zenith and PostgreSQL
 #
 .PHONY: all
-all: neon postgres
+all: zenith postgres

-### Neon Rust bits
+### Zenith Rust bits
 #
 # The 'postgres_ffi' depends on the Postgres headers.
-.PHONY: neon
-neon: postgres-headers
-	+@echo "Compiling Neon"
+.PHONY: zenith
+zenith: postgres-headers
+	+@echo "Compiling Zenith"
 	$(CARGO_CMD_PREFIX) cargo build $(CARGO_BUILD_FLAGS)

 ### PostgreSQL parts
-#
-# Postgres is built in the 'build' directory, and installed into
-# $(POSTGRES_INSTALL_DIR), which defaults to 'tmp_install'
-#
-build/config.status:
+tmp_install/build/config.status:
 	+@echo "Configuring postgres build"
-	mkdir -p build
-	(cd build && \
-	$(ROOT_PROJECT_DIR)/vendor/postgres/configure CFLAGS='$(PG_CFLAGS)' \
+	mkdir -p tmp_install/build
+	(cd tmp_install/build && \
+	../../vendor/postgres/configure CFLAGS='$(PG_CFLAGS)' \
 		$(PG_CONFIGURE_OPTS) \
 		$(SECCOMP) \
-		--prefix=$(abspath $(POSTGRES_INSTALL_DIR)) > configure.log)
+		--prefix=$(abspath tmp_install) > configure.log)

 # nicer alias for running 'configure'
 .PHONY: postgres-configure
-postgres-configure: build/config.status
+postgres-configure: tmp_install/build/config.status

-# Install the PostgreSQL header files into $(POSTGRES_INSTALL_DIR)/include
+# Install the PostgreSQL header files into tmp_install/include
 .PHONY: postgres-headers
 postgres-headers: postgres-configure
 	+@echo "Installing PostgreSQL headers"
-	$(MAKE) -C build/src/include MAKELEVEL=0 install
+	$(MAKE) -C tmp_install/build/src/include MAKELEVEL=0 install

 # Compile and install PostgreSQL and contrib/neon
 .PHONY: postgres
 postgres: postgres-configure \
-		  postgres-headers # to prevent `make install` conflicts with neon's `postgres-headers`
+		  postgres-headers # to prevent `make install` conflicts with zenith's `postgres-headers`
 	+@echo "Compiling PostgreSQL"
-	$(MAKE) -C build MAKELEVEL=0 install
+	$(MAKE) -C tmp_install/build MAKELEVEL=0 install
 	+@echo "Compiling contrib/neon"
-	$(MAKE) -C build/contrib/neon install
+	$(MAKE) -C tmp_install/build/contrib/neon install
 	+@echo "Compiling contrib/neon_test_utils"
-	$(MAKE) -C build/contrib/neon_test_utils install
+	$(MAKE) -C tmp_install/build/contrib/neon_test_utils install
 	+@echo "Compiling pg_buffercache"
-	$(MAKE) -C build/contrib/pg_buffercache install
+	$(MAKE) -C tmp_install/build/contrib/pg_buffercache install
 	+@echo "Compiling pageinspect"
-	$(MAKE) -C build/contrib/pageinspect install
+	$(MAKE) -C tmp_install/build/contrib/pageinspect install


 .PHONY: postgres-clean
 postgres-clean:
-	$(MAKE) -C build MAKELEVEL=0 clean
+	$(MAKE) -C tmp_install/build MAKELEVEL=0 clean

 # This doesn't remove the effects of 'configure'.
 .PHONY: clean
 clean:
-	cd build && $(MAKE) clean
+	cd tmp_install/build && $(MAKE) clean
 	$(CARGO_CMD_PREFIX) cargo clean

 # This removes everything
 .PHONY: distclean
 distclean:
-	rm -rf build $(POSTGRES_INSTALL_DIR)
+	rm -rf tmp_install
 	$(CARGO_CMD_PREFIX) cargo clean

 .PHONY: fmt
@@ -121,4 +112,4 @@ fmt:

 .PHONY: setup-pre-commit-hook
 setup-pre-commit-hook:
-	ln -s -f $(ROOT_PROJECT_DIR)/pre-commit.py .git/hooks/pre-commit
+	ln -s -f ../../pre-commit.py .git/hooks/pre-commit
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -295,7 +295,7 @@ impl ComputeNode {
        handle_roles(&self.spec, &mut client)?;
        handle_databases(&self.spec, &mut client)?;
        handle_role_deletions(self, &mut client)?;
-        handle_grants(self, &mut client)?;
+        handle_grants(&self.spec, &mut client)?;
        create_writablity_check_data(&mut client)?;

        // 'Close' connection
--- a/compute_tools/src/spec.rs
+++ b/compute_tools/src/spec.rs
@@ -349,11 +349,9 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
    Ok(())
 }

-/// Grant CREATE ON DATABASE to the database owner and do some other alters and grants
-/// to allow users creating trusted extensions and re-creating `public` schema, for example.
-pub fn handle_grants(node: &ComputeNode, client: &mut Client) -> Result<()> {
-    let spec = &node.spec;
-
+// Grant CREATE ON DATABASE to the database owner
+// to allow clients create trusted extensions.
+pub fn handle_grants(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
    info!("cluster spec grants:");

    // We now have a separate `web_access` role to connect to the database
@@ -382,47 +380,5 @@ pub fn handle_grants(node: &ComputeNode, client: &mut Client) -> Result<()> {
        client.execute(query.as_str(), &[])?;
    }

-    // Do some per-database access adjustments. We'd better do this at db creation time,
-    // but CREATE DATABASE isn't transactional. So we cannot create db + do some grants
-    // atomically.
-    let mut db_connstr = node.connstr.clone();
-    for db in &node.spec.cluster.databases {
-        // database name is always the last and the only component of the path
-        db_connstr.set_path(&db.name);
-
-        let mut db_client = Client::connect(db_connstr.as_str(), NoTls)?;
-
-        // This will only change ownership on the schema itself, not the objects
-        // inside it. Without it owner of the `public` schema will be `cloud_admin`
-        // and database owner cannot do anything with it. SQL procedure ensures
-        // that it won't error out if schema `public` doesn't exist.
-        let alter_query = format!(
-            "DO $$\n\
-                DECLARE\n\
-                    schema_owner TEXT;\n\
-                BEGIN\n\
-                    IF EXISTS(\n\
-                        SELECT nspname\n\
-                        FROM pg_catalog.pg_namespace\n\
-                        WHERE nspname = 'public'\n\
-                    )\n\
-                    THEN\n\
-                        SELECT nspowner::regrole::text\n\
-                            FROM pg_catalog.pg_namespace\n\
-                            WHERE nspname = 'public'\n\
-                            INTO schema_owner;\n\
-                \n\
-                        IF schema_owner = 'cloud_admin' OR schema_owner = 'zenith_admin'\n\
-                        THEN\n\
-                            ALTER SCHEMA public OWNER TO {};\n\
-                        END IF;\n\
-                    END IF;\n\
-                END\n\
-            $$;",
-            db.owner.quote()
-        );
-        db_client.simple_query(&alter_query)?;
-    }
-
    Ok(())
 }
--- a/docs/.gitignore
+++ b/docs/.gitignore
@@ -1 +0,0 @@
-book
--- a/docs/README.md
+++ b/docs/README.md
@@ -0,0 +1,14 @@
+# Zenith documentation
+
+## Table of contents
+
+- [authentication.md](authentication.md) — pageserver JWT authentication.
+- [docker.md](docker.md) — Docker images and building pipeline.
+- [glossary.md](glossary.md) — Glossary of all the terms used in codebase.
+- [multitenancy.md](multitenancy.md) — how multitenancy is organized in the pageserver and Zenith CLI.
+- [sourcetree.md](sourcetree.md) — Overview of the source tree layout.
+- [pageserver/README.md](/pageserver/README.md) — pageserver overview.
+- [postgres_ffi/README.md](/libs/postgres_ffi/README.md) — Postgres FFI overview.
+- [test_runner/README.md](/test_runner/README.md) — tests infrastructure overview.
+- [safekeeper/README.md](/safekeeper/README.md) — WAL service overview.
+- [core_changes.md](core_changes.md) - Description of Zenith changes in Postgres core
--- a/docs/SUMMARY.md
+++ b/docs/SUMMARY.md
@@ -1,84 +0,0 @@
-# Summary
-
-[Introduction]()
- [Separation of Compute and Storage](./separation-compute-storage.md)
-
-# Architecture
-
- [Compute]()
-  - [WAL proposer]()
-  - [WAL Backpressure]()
-  - [Postgres changes](./core_changes.md)
-
- [Pageserver](./pageserver.md)
-    - [Services](./pageserver-services.md)
-    - [Thread management](./pageserver-thread-mgmt.md)
-    - [WAL Redo](./pageserver-walredo.md)
-    - [Page cache](./pageserver-pagecache.md)
-    - [Storage](./pageserver-storage.md)
-        - [Datadir mapping]()
-        - [Layer files]()
-        - [Branching]()
-        - [Garbage collection]()
-    - [Cloud Storage]()
-    - [Processing a GetPage request](./pageserver-processing-getpage.md)
-    - [Processing WAL](./pageserver-processing-wal.md)
-	- [Management API]()
-	- [Tenant Rebalancing]()
-
- [WAL Service](walservice.md)
-  - [Consensus protocol](safekeeper-protocol.md)
-  - [Management API]()
-  - [Rebalancing]()
-
- [Control Plane]()
-
- [Proxy]()
-
- [Source view](./sourcetree.md)
-  - [docker.md](./docker.md) — Docker images and building pipeline.
-  - [Error handling and logging]()
-  - [Testing]()
-    - [Unit testing]()
-    - [Integration testing]()
-    - [Benchmarks]()
-
-
- [Glossary](./glossary.md)
-
-# Uncategorized
-
- [authentication.md](./authentication.md)
- [multitenancy.md](./multitenancy.md) — how multitenancy is organized in the pageserver and Zenith CLI.
- [settings.md](./settings.md)
-#FIXME: move these under sourcetree.md
-#- [pageserver/README.md](/pageserver/README.md)
-#- [postgres_ffi/README.md](/libs/postgres_ffi/README.md)
-#- [test_runner/README.md](/test_runner/README.md)
-#- [safekeeper/README.md](/safekeeper/README.md)
-
-
-# RFCs
-
- [RFCs](./rfcs/README.md)
-
- [002-storage](rfcs/002-storage.md)
- [003-laptop-cli](rfcs/003-laptop-cli.md)
- [004-durability](rfcs/004-durability.md)
- [005-zenith_local](rfcs/005-zenith_local.md)
- [006-laptop-cli-v2-CLI](rfcs/006-laptop-cli-v2-CLI.md)
- [006-laptop-cli-v2-repository-structure](rfcs/006-laptop-cli-v2-repository-structure.md)
- [007-serverless-on-laptop](rfcs/007-serverless-on-laptop.md)
- [008-push-pull](rfcs/008-push-pull.md)
- [009-snapshot-first-storage-cli](rfcs/009-snapshot-first-storage-cli.md)
- [009-snapshot-first-storage](rfcs/009-snapshot-first-storage.md)
- [009-snapshot-first-storage-pitr](rfcs/009-snapshot-first-storage-pitr.md)
- [010-storage_details](rfcs/010-storage_details.md)
- [011-retention-policy](rfcs/011-retention-policy.md)
- [012-background-tasks](rfcs/012-background-tasks.md)
- [013-term-history](rfcs/013-term-history.md)
- [014-safekeepers-gossip](rfcs/014-safekeepers-gossip.md)
- [014-storage-lsm](rfcs/014-storage-lsm.md)
- [015-storage-messaging](rfcs/015-storage-messaging.md)
- [016-connection-routing](rfcs/016-connection-routing.md)
- [cluster-size-limits](rfcs/cluster-size-limits.md)
--- a/docs/book.toml
+++ b/docs/book.toml
@@ -1,5 +0,0 @@
-[book]
-language = "en"
-multilingual = false
-src = "."
-title = "Neon architecture"
--- a/docs/core_changes.md
+++ b/docs/core_changes.md
@@ -1,519 +1,202 @@
-# Postgres core changes
+1. Add t_cid to XLOG record
+- Why?
+  The cmin/cmax on a heap page is a real bummer. I don't see any other way to fix that than bite the bullet and modify the WAL-logging routine to include the cmin/cmax.

-This lists all the changes that have been made to the PostgreSQL
-source tree, as a somewhat logical set of patches. The long-term goal
-is to eliminate all these changes, by submitting patches to upstream
-and refactoring code into extensions, so that you can run unmodified
-PostgreSQL against Neon storage.
+  To recap, the problem is that the XLOG_HEAP_INSERT record does not include the command id of the inserted row. And same with deletion/update. So in the primary, a row is inserted with current xmin + cmin. But in the replica, the cmin is always set to 1. That works, because the command id is only relevant to the inserting transaction itself. After commit/abort, no one cares abut it anymore.

-In Neon, we run PostgreSQL in the compute nodes, but we also run a special WAL redo process in the
-page server. We currently use the same binary for both, with --wal-redo runtime flag to launch it in
-the WAL redo mode. Some PostgreSQL changes are needed in the compute node, while others are just for
-the WAL redo process.
+- Alternatives?
+  I don't know

-In addition to core PostgreSQL changes, there is a Neon extension in contrib/neon, to hook into the
-smgr interface. Once all the core changes have been submitted to upstream or eliminated some other
-way, the extension could live outside the postgres repository and build against vanilla PostgreSQL.
+2. Add PD_WAL_LOGGED.
+- Why?
+  Postgres sometimes writes data to the page before it is wal-logged. If such page ais swapped out, we  will loose this change. The problem is currently solved by setting PD_WAL_LOGGED bit in page header. When page without this bit set is written to the SMGR, then it is forced to be written to the WAL as FPI using log_newpage_copy() function.

-Below is a list of all the PostgreSQL source code changes, categorized into changes needed for
-compute, and changes needed for the WAL redo process:
+  There was wrong assumption that it can happen only during construction of some exotic indexes (like gist). It is not true. The same situation can happen with COPY,VACUUM and when record hint bits are set.

-# Changes for Compute node
+- Discussion:
+  https://discord.com/channels/869525774699462656/882681420986851359

-## Add t_cid to heap WAL records
+- Alternatives:
+  Do not store this flag in page header, but associate this bit with shared buffer. Logically it is more correct but in practice we will get not advantages: neither in space, neither in CPU overhead.

-```
- src/backend/access/heap/heapam.c                            |   26 +-
- src/include/access/heapam_xlog.h                            |    6 +-
-```

-We have added a new t_cid field to heap WAL records. This changes the WAL record format, making Neon WAL format incompatible with vanilla PostgreSQL!
+3. XLogReadBufferForRedo not always loads and pins requested buffer. So we need to add extra checks that buffer is really pinned. Also do not use BufferGetBlockNumber for buffer returned by XLogReadBufferForRedo.
+- Why?
+  XLogReadBufferForRedo is not pinning pages which are not requested by wal-redo. It is specific only for wal-redo Postgres.

-### Problem we're trying to solve
+- Alternatives?
+  No

-The problem is that the XLOG_HEAP_INSERT record does not include the command id of the inserted row. And same with deletion/update. So in the primary, a row is inserted with current xmin + cmin. But in the replica, the cmin is always set to 1. That works in PostgreSQL, because the command id is only relevant to the inserting transaction itself. After commit/abort, no one cares about it anymore. But with Neon, we rely on WAL replay to reconstruct the page, even while the original transaction is still running.

-### How to get rid of the patch
+4. Eliminate reporting of some warnings related with hint bits, for example
+"page is not marked all-visible but visibility map bit is set in relation".
+- Why?
+  Hint bit may be not WAL logged.

-Bite the bullet and submit the patch to PostgreSQL, to add the t_cid to the WAL records. It makes the WAL records larger, which could make this unpopular in the PostgreSQL community. However, it might simplify some logical decoding code; Andres Freund briefly mentioned in PGCon 2022 discussion on Heikki's Neon presentation that logical decoding currently needs to jump through some hoops to reconstruct the same information.
+- Alternative?
+  Always wal log any page changes.


-### Alternatives
-Perhaps we could write an extra WAL record with the t_cid information, when a page is evicted that contains rows that were touched a transaction that's still running. However, that seems very complicated.
+5. Maintain last written LSN.
+- Why?
+  When compute node requests page from page server, we need to specify LSN. Ideally it should be LSN
+  of WAL record performing last update of this pages. But we do not know it, because we do not have page.
+  We can use current WAL flush position, but in this case there is high probability that page server
+  will be blocked until this peace of WAL is delivered.
+  As better approximation we can keep max LSN of written page. It will be better to take in account LSNs only of evicted pages,
+  but SMGR API doesn't provide such knowledge.

-## ginfast.c
+- Alternatives?
+  Maintain map of LSNs of evicted pages.

-```
-diff --git a/src/backend/access/gin/ginfast.c b/src/backend/access/gin/ginfast.c
-index e0d9940946..2d964c02e9 100644
--- a/src/backend/access/gin/ginfast.c
-+++ b/src/backend/access/gin/ginfast.c
-@@ -285,6 +285,17 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
-                memset(&sublist, 0, sizeof(GinMetaPageData));
-                makeSublist(index, collector->tuples, collector->ntuples, &sublist);
- 
-+               if (metadata->head != InvalidBlockNumber)
-+               {
-+                       /*
-+                        * ZENITH: Get buffer before XLogBeginInsert() to avoid recursive call
-+                        * of XLogBeginInsert(). Reading a new buffer might evict a dirty page from
-+                        * the buffer cache, and if that page happens to be an FSM or VM page, zenith_write()
-+                        * will try to WAL-log an image of the page.
-+                        */
-+                       buffer = ReadBuffer(index, metadata->tail);
-+               }
-+
-                if (needWal)
-                        XLogBeginInsert();
- 
-@@ -316,7 +327,6 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
-                        data.prevTail = metadata->tail;
-                        data.newRightlink = sublist.head;
- 
-                       buffer = ReadBuffer(index, metadata->tail);
-                        LockBuffer(buffer, GIN_EXCLUSIVE);
-                        page = BufferGetPage(buffer);
-```

-The problem is explained in the comment above
+6. Launching Postgres without WAL.
+- Why?
+  According to Zenith architecture compute node is stateless. So when we are launching
+  compute node, we need to provide some dummy PG_DATADIR. Relation pages
+  can be requested on demand from page server. But Postgres still need some non-relational data:
+  control and configuration files, SLRUs,...
+  It is currently implemented  using basebackup (do not mix with pg_basebackup) which is created
+  by pageserver. It includes in this tarball config/control files, SLRUs and required directories.
+  As far as pageserver do not have original (non-scattered) WAL segments, it includes in
+  this tarball dummy WAL segment which contains only SHUTDOWN_CHECKPOINT record at the beginning of segment,
+  which redo field points to the end of wal. It allows to load checkpoint record in more or less
+  standard way with minimal changes of Postgres, but then some special handling is needed,
+  including restoring previous record position from zenith.signal file.
+  Also we have to correctly initialize header of last WAL page (pointed by checkpoint.redo)
+  to pass checks performed by XLogReader.

-### How to get rid of the patch
+- Alternatives?
+  We may not include fake WAL segment in tarball at all and modify xlog.c to load checkpoint record
+  in special way. But it may only increase number of changes in xlog.c

-Can we stop WAL-logging FSM or VM pages? Or delay the WAL logging until we're out of the critical
-section or something.
+7. Add redo_read_buffer_filter callback to XLogReadBufferForRedoExtended
+- Why?
+  We need a way in wal-redo Postgres to ignore pages which are not requested by pageserver.
+  So wal-redo Postgres reconstructs only requested page and for all other returns BLK_DONE
+  which means that recovery for them is not needed.

-Maybe some bigger rewrite of FSM and VM would help to avoid WAL-logging FSM and VM page images?
+- Alternatives?
+  No

+8. Enforce WAL logging of sequence updates.
+- Why?
+  Due to performance reasons Postgres don't want to log each fetching of a value from a sequence,
+  so we pre-log a few fetches in advance. In the event of crash we can lose
+  (skip over) as many values as we pre-logged.
+  But it doesn't work with Zenith because page with sequence value can be evicted from buffer cache
+  and we will get a gap in sequence values even without crash.

-## Mark index builds that use buffer manager without logging explicitly
+- Alternatives:
+  Do not try to preserve sequential order but avoid performance penalty.

-```
- src/backend/access/gin/gininsert.c                          |    7 +
- src/backend/access/gist/gistbuild.c                         |   15 +-
- src/backend/access/spgist/spginsert.c                       |    8 +-

-also some changes in src/backend/storage/smgr/smgr.c
-```
+9. Treat unlogged tables as normal (permanent) tables.
+- Why?
+  Unlogged tables are not transient, so them have to survive node restart (unlike temporary tables).
+  But as far as compute node is stateless, we need to persist their data to storage node.
+  And it can only be done through the WAL.

-When a GIN index is built, for example, it is built by inserting the entries into the index more or
-less normally, but without WAL-logging anything. After the index has been built, we iterate through
-all pages and write them to the WAL. That doesn't work for Neon, because if a page is not WAL-logged
-and is evicted from the buffer cache, it is lost. We have an check to catch that in the Neon
-extension. To fix that, we've added a few functions to track explicitly when we're performing such
-an operation: `smgr_start_unlogged_build`, `smgr_finish_unlogged_build_phase_1` and
-`smgr_end_unlogged_build`.
-
-
-### How to get rid of the patch
-
-I think it would make sense to be more explicit about that in PostgreSQL too. So extract these
-changes to a patch and post to pgsql-hackers.
+- Alternatives?
+  * Store unlogged tables locally (violates requirement of stateless compute nodes).
+  * Prohibit unlogged tables at all.


-## Track last-written page LSN
+10. Support start Postgres in wal-redo mode
+- Why?
+  To be able to apply WAL record and reconstruct pages at page server.

-```
- src/backend/commands/dbcommands.c                           |   17 +-
+- Alternatives?
+  * Rewrite redo handlers in Rust
+  * Do not reconstruct pages at page server at all and do it at compute node.

-Also one call to SetLastWrittenPageLSN() in spginsert.c, maybe elsewhere too
-```

-Whenever a page is evicted from the buffer cache, we remember its LSN, so that we can use the same
-LSN in the GetPage@LSN request when reading the page back from the page server. The value is
-conservative: it would be correct to always use the last-inserted LSN, but it would be slow because
-then the page server would need to wait for the recent WAL to be streamed and processed, before
-responding to any GetPage@LSN request.
+11. WAL proposer
+- Why?
+  WAL proposer is communicating with safekeeper and ensures WAL durability by quorum writes.
+  It is currently implemented as patch to standard WAL sender.

-The last-written page LSN is mostly tracked in the smgrwrite() function, without core code changes,
-but there are a few exceptions where we've had to add explicit calls to the Neon-specific
-SetLastWrittenPageLSN() function.
+- Alternatives?
+  Can be moved to extension if some extra callbacks will be added to wal sender code.

-There's an open PR to track the LSN in a more-fine grained fashion:
-https://github.com/neondatabase/postgres/pull/177

-PostgreSQL v15 introduces a new method to do CREATE DATABASE that WAL-logs the database instead of
-relying copying files and checkpoint. With that method, we probably won't need any special handling.
-The old method is still available, though.
+12. Secure Computing BPF API wrapper.
+- Why?
+  Pageserver delegates complex WAL decoding duties to Postgres,
+  which means that the latter might fall victim to carefully designed
+  malicious WAL records and start doing harmful things to the system.
+  To prevent this, it has been decided to limit possible interactions
+  with the outside world using the Secure Computing BPF mode.
+
+- Alternatives:
+  * Rewrite redo handlers in Rust.
+  * Add more checks to guarantee correctness of WAL records.
+  * Move seccomp.c to extension
+  * Many other discussed approaches to neutralize incorrect WAL records vulnerabilities.
+
+
+13. Callbacks for replica feedbacks
+- Why?
+  Allowing waproposer to interact with walsender code.
+
+- Alternatives
+  Copy walsender code to walproposer.
+
+
+14. Support multiple SMGR implementations.
+- Why?
+  Postgres provides abstract API for storage manager but it has only one implementation
+  and provides no way to replace it with custom storage manager.
+
+- Alternatives?
+  None.
+
+
+15. Calculate database size as sum of all database relations.
+- Why?
+  Postgres is calculating database size by traversing data directory
+  but as far as Zenith compute node is stateless we can not do it.
+
+- Alternatives?
+  Send this request directly to pageserver and calculate real (physical) size
+  of Zenith representation of database/timeline, rather than sum logical size of all relations.

-### How to get rid of the patch

-Wait until v15?
+-----------------------------------------------
+Not currently committed but proposed:

+1. Disable ring buffer buffer manager strategies
+- Why?
+  Postgres tries to avoid cache flushing by bulk operations (copy, seqscan, vacuum,...).
+  Even if there are free space in buffer cache, pages may be evicted.
+  Negative effect of it can be somehow compensated by file system cache, but in case of Zenith
+  cost of requesting page from page server is much higher.

-## Cache relation sizes
+- Alternatives?
+  Instead of just prohibiting ring buffer we may try to implement more flexible eviction policy,
+  for example copy evicted page from ring buffer to some other buffer if there is free space
+  in buffer cache.

-The Neon extension contains a little cache for smgrnblocks() and smgrexists() calls, to avoid going
-to the page server every time. It might be useful to cache those in PostgreSQL, maybe in the
-relcache? (I think we do cache nblocks in relcache already, check why that's not good enough for
-Neon)
+2. Disable marking page as dirty when hint bits are set.
+- Why?
+  Postgres has to modify page twice: first time when some tuple is updated and second time when
+  hint bits are set. Wal logging hint bits updates requires FPI which significantly increase size of WAL.

+- Alternatives?
+  Add special WAL record for setting page hints.

-## Misc change in vacuumlazy.c
+3. Prefetching
+- Why?
+  As far as pages in Zenith are loaded on demand, to reduce node startup time
+  and also speedup some massive queries we need some mechanism for bulk loading to
+  reduce page request round-trip overhead.

-```
-index 8aab6e324e..c684c4fbee 100644
--- a/src/backend/access/heap/vacuumlazy.c
-+++ b/src/backend/access/heap/vacuumlazy.c
-@@ -1487,7 +1487,10 @@ lazy_scan_heap(LVRelState *vacrel, VacuumParams *params, bool aggressive)
-                else if (all_visible_according_to_vm && !PageIsAllVisible(page)
-                                 && VM_ALL_VISIBLE(vacrel->rel, blkno, &vmbuffer))
-                {
-                       elog(WARNING, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
-+                       /* ZENITH-XXX: all visible hint is not wal-logged
-+                        * FIXME: Replay visibilitymap changes in pageserver
-+                        */
-+                       elog(DEBUG1, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
-                                 vacrel->relname, blkno);
-                        visibilitymap_clear(vacrel->rel, blkno, vmbuffer,
-                                                                VISIBILITYMAP_VALID_BITS);
-```
+  Currently Postgres is supporting prefetching only for bitmap scan.
+  In Zenith we also use prefetch for sequential and index scan. For sequential scan we prefetch
+  some number of following pages. For index scan we prefetch pages of heap relation addressed by TIDs.

-
-Is this still needed? If that WARNING happens, it looks like potential corruption that we should
-fix!
-
-
-## Use buffer manager when extending VM or FSM
-
-```
- src/backend/storage/freespace/freespace.c                   |   14 +-
- src/backend/access/heap/visibilitymap.c                     |   15 +-
-
-diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c
-index e198df65d8..addfe93eac 100644
--- a/src/backend/access/heap/visibilitymap.c
-+++ b/src/backend/access/heap/visibilitymap.c
-@@ -652,10 +652,19 @@ vm_extend(Relation rel, BlockNumber vm_nblocks)
-        /* Now extend the file */
-        while (vm_nblocks_now < vm_nblocks)
-        {
-               PageSetChecksumInplace((Page) pg.data, vm_nblocks_now);
-+               /*
-+                * ZENITH: Initialize VM pages through buffer cache to prevent loading
-+                * them from pageserver.
-+                */
-+               Buffer  buffer = ReadBufferExtended(rel, VISIBILITYMAP_FORKNUM, P_NEW,
-+                                                                                       RBM_ZERO_AND_LOCK, NULL);
-+               Page    page = BufferGetPage(buffer);
-+
-+               PageInit((Page) page, BLCKSZ, 0);
-+               PageSetChecksumInplace(page, vm_nblocks_now);
-+               MarkBufferDirty(buffer);
-+               UnlockReleaseBuffer(buffer);
- 
-               smgrextend(rel->rd_smgr, VISIBILITYMAP_FORKNUM, vm_nblocks_now,
-                                  pg.data, false);
-                vm_nblocks_now++;
-        }
-```
-
-### Problem we're trying to solve
-
-???
-
-### How to get rid of the patch
-
-Maybe this would be a reasonable change in PostgreSQL too?
-
-
-## Allow startup without reading checkpoint record
-
-In Neon, the compute node is stateless. So when we are launching compute node, we need to provide
-some dummy PG_DATADIR. Relation pages can be requested on demand from page server. But Postgres
-still need some non-relational data: control and configuration files, SLRUs,...  It is currently
-implemented using basebackup (do not mix with pg_basebackup) which is created by pageserver. It
-includes in this tarball config/control files, SLRUs and required directories.
-
-As pageserver does not have the original WAL segments, the basebackup tarball includes an empty WAL
-segment to bootstrap the WAL writing, but it doesn't contain the checkpoint record.  There are some
-changes in xlog.c, to allow starting the compute node without reading the last checkpoint record
-from WAL.
-
-This includes code to read the `zenith.signal` file, which tells the startup code the LSN to start
-at. When the `zenith.signal` file is present, the startup uses that LSN instead of the last
-checkpoint's LSN. The system is known to be consistent at that LSN, without any WAL redo.
-
-
-### How to get rid of the patch
-
-???
-
-
-### Alternatives
-
-Include a fake checkpoint record in the tarball. Creating fake WAL is a bit risky, though; I'm
-afraid it might accidentally get streamed to the safekeepers and overwrite or corrupt the real WAL.
-
-## Disable sequence caching
-
-```
-diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c
-index 0415df9ccb..9f9db3c8bc 100644
--- a/src/backend/commands/sequence.c
-+++ b/src/backend/commands/sequence.c
-@@ -53,7 +53,9 @@
-  * so we pre-log a few fetches in advance. In the event of
-  * crash we can lose (skip over) as many values as we pre-logged.
-  */
-#define SEQ_LOG_VALS   32
-+/* Zenith XXX: to ensure sequence order of sequence in Zenith we need to WAL log each sequence update. */
-+/* #define SEQ_LOG_VALS        32 */
-+#define SEQ_LOG_VALS   0
-```
-
-Due to performance reasons Postgres don't want to log each fetching of a value from a sequence, so
-it pre-logs a few fetches in advance. In the event of crash we can lose (skip over) as many values
-as we pre-logged. But with Neon, because page with sequence value can be evicted from buffer cache,
-we can get a gap in sequence values even without crash.
-
-### How to get rid of the patch
-
-Maybe we can just remove it, and accept the gaps. Or add some special handling for sequence
-relations in the Neon extension, to WAL log the sequence page when it's about to be evicted. It
-would be weird if the sequence moved backwards though, think of PITR.
-
-Or add a GUC for the amount to prefix to PostgreSQL, and force it to 1 in Neon.
-
-
-## Walproposer
-
-```
- src/Makefile                                                |    1 +
- src/backend/replication/libpqwalproposer/Makefile           |   37 +
- src/backend/replication/libpqwalproposer/libpqwalproposer.c |  416 ++++++++++++
- src/backend/postmaster/bgworker.c                           |    4 +
- src/backend/postmaster/postmaster.c                         |    6 +
- src/backend/replication/Makefile                            |    4 +-
- src/backend/replication/walproposer.c                       | 2350 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
- src/backend/replication/walproposer_utils.c                 |  402 +++++++++++
- src/backend/replication/walreceiver.c                       |    7 +
- src/backend/replication/walsender.c                         |  320 ++++++---
- src/backend/storage/ipc/ipci.c                              |    6 +
- src/include/replication/walproposer.h                       |  565 ++++++++++++++++
-```
-
-WAL proposer is communicating with safekeeper and ensures WAL durability by quorum writes.  It is
-currently implemented as patch to standard WAL sender.
-
-### How to get rid of the patch
-
-Refactor into an extension. Submit hooks or APIs into upstream if necessary.
-
-@MMeent did some work on this already: https://github.com/neondatabase/postgres/pull/96
-
-## Ignore unexpected data beyond EOF in bufmgr.c
-
-```
-@@ -922,11 +928,14 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
-                 */
-                bufBlock = isLocalBuf ? LocalBufHdrGetBlock(bufHdr) : BufHdrGetBlock(bufHdr);
-                if (!PageIsNew((Page) bufBlock))
-                       ereport(ERROR,
-+               {
-+                        // XXX-ZENITH
-+                        MemSet((char *) bufBlock, 0, BLCKSZ);
-+                        ereport(DEBUG1,
-                                        (errmsg("unexpected data beyond EOF in block %u of relation %s",
-                                                        blockNum, relpath(smgr->smgr_rnode, forkNum)),
-                                         errhint("This has been seen to occur with buggy kernels; consider updating your system.")));
-
-+               }
-                /*
-                 * We *must* do smgrextend before succeeding, else the page will not
-                 * be reserved by the kernel, and the next P_NEW call will decide to
-```
-
-PostgreSQL is a bit sloppy with extending relations. Usually, the relation is extended with zeros
-first, then the page is filled, and finally the new page WAL-logged. But if multiple backends extend
-a relation at the same time, the pages can be WAL-logged in different order.
-
-I'm not sure what scenario exactly required this change in Neon, though.
-
-### How to get rid of the patch
-
-Submit patches to pgsql-hackers, to tighten up the WAL-logging around relation extension. It's a bit
-confusing even in PostgreSQL. Maybe WAL log the intention to extend first, then extend the relation,
-and finally WAL-log that the extension succeeded.
-
-## Make smgr interface available to extensions
-
-```
- src/backend/storage/smgr/smgr.c                             |  203 +++---
- src/include/storage/smgr.h                                  |   72 +-
-```
-
-### How to get rid of the patch
-
-Submit to upstream. This could be useful for the Disk Encryption patches too, or for compression.
-
-
-## Added relpersistence argument to smgropen()
-
-```
- src/backend/access/heap/heapam_handler.c                    |    2 +-
- src/backend/catalog/storage.c                               |   10 +-
- src/backend/commands/tablecmds.c                            |    2 +-
- src/backend/storage/smgr/md.c                               |    4 +-
- src/include/utils/rel.h                                     |    3 +-
-```
-
-Neon needs to treat unlogged relations differently from others, so the smgrread(), smgrwrite() etc.
-implementations need to know the 'relpersistence' of the relation. To get that information where
-it's needed, we added the 'relpersistence' field to smgropen().
-
-### How to get rid of the patch
-
-Maybe 'relpersistence' would be useful in PostgreSQL for debugging purposes? Or simply for the
-benefit of extensions like Neon. Should consider this in the patch to make smgr API usable to
-extensions.
-
-## Alternatives
-
-Currently in Neon, unlogged tables live on local disk in the compute node, and are wiped away on
-compute node restart. One alternative would be to instead WAL-log even unlogged tables, essentially
-ignoring the UNLOGGED option. Or prohibit UNLOGGED tables completely. But would we still need the
-relpersistence argument to handle index builds? See item on "Mark index builds that use buffer
-manager without logging explicitly".
-
-## Use smgr and dbsize_hook for size calculations
-
-```
- src/backend/utils/adt/dbsize.c                              |   61 +-
-```
-
-In PostgreSQL, the rel and db-size functions scan the data directory directly. That won't work in Neon.
-
-### How to get rid of the patch
-
-Send patch to PostgreSQL, to use smgr API functions for relation size calculation instead. Maybe as
-part of the general smgr API patch.
-
-
-
-# WAL redo process changes
-
-Pageserver delegates complex WAL decoding duties to Postgres, which means that the latter might fall
-victim to carefully designed malicious WAL records and start doing harmful things to the system.  To
-prevent this, the redo functions are executed in a separate process that is sandboxed with Linux
-Secure Computing mode (see seccomp(2) man page).
-
-As an alternative to having a separate WAL redo process, we could rewrite all redo handlers in Rust
-This is infeasible. However, it would take a lot of effort to rewrite them, ensure that you've done
-the rewrite correctly, and once you've done that, it would be a lot of ongoing maintenance effort to
-keep the rewritten code in sync over time, across new PostgreSQL versions. That's why we want to
-leverage PostgreSQL code.
-
-Another alternative would be to harden all the PostgreSQL WAL redo functions so that it would be
-safe to call them directly from Rust code, without needing the security sandbox. That's not feasible
-for similar reasons as rewriting them in Rust.
-
-
-## Don't replay change in XLogReadBufferForRedo that are not for the target page we're replaying
-
-```
- src/backend/access/gin/ginxlog.c                            |   19 +-
-
-Also some changes in xlog.c and xlogutils.c
-
-Example:
-
-@@ -415,21 +416,27 @@ ginRedoSplit(XLogReaderState *record)
-        if (!isLeaf)
-                ginRedoClearIncompleteSplit(record, 3);
- 
-       if (XLogReadBufferForRedo(record, 0, &lbuffer) != BLK_RESTORED)
-+       action = XLogReadBufferForRedo(record, 0, &lbuffer);
-+       if (action != BLK_RESTORED && action != BLK_DONE)
-                elog(ERROR, "GIN split record did not contain a full-page image of left page");
-```
-
-### Problem we're trying to solve
-
-In PostgreSQL, if a WAL redo function calls XLogReadBufferForRead() for a page that has a full-page
-image, it always succeeds. However, Neon WAL redo process is only concerned about replaying changes
-to a singe page, so replaying any changes for other pages is a waste of cycles. We have modified
-XLogReadBufferForRead() to return BLK_DONE for all other pages, to avoid the overhead. That is
-unexpected by code like the above.
-
-### How to get rid of the patch
-
-Submit the changes to upstream, hope the community accepts them. There's no harm to PostgreSQL from
-these changes, although it doesn't have any benefit either.
-
-To make these changes useful to upstream PostgreSQL, we could implement a feature to look ahead the
-WAL, and detect truncated relations. Even in PostgreSQL, it is a waste of cycles to replay changes
-to pages that are later truncated away, so we could have XLogReadBufferForRedo() return BLK_DONE or
-BLK_NOTFOUND for pages that are known to be truncated away later in the WAL stream.
-
-### Alternatives
-
-Maybe we could revert this optimization, and restore pages other than the target page too.
-
-## Add predefined_sysidentifier flag to initdb
-
-```
- src/backend/bootstrap/bootstrap.c                           |   13 +-
- src/bin/initdb/initdb.c                                     |    4 +
-
-And some changes in xlog.c
-```
-
-This is used to help with restoring a database when you have all the WAL, all the way back to
-initdb, but no backup. You can reconstruct the missing backup by running initdb again, with the same
-sysidentifier.
-
-
-### How to get rid of the patch
-
-Ignore it. This is only needed for disaster recovery, so once we've eliminated all other Postgres
-patches, we can just keep it around as a patch or as separate branch in a repo.
-
-
-# Not currently committed but proposed
-
-## Disable ring buffer buffer manager strategies
-
-### Why?
-
-Postgres tries to avoid cache flushing by bulk operations (copy, seqscan, vacuum,...).
-Even if there are free space in buffer cache, pages may be evicted.
-Negative effect of it can be somehow compensated by file system cache, but in Neon,
-cost of requesting page from page server is much higher.
-
-### Alternatives?
-
-Instead of just prohibiting ring buffer we may try to implement more flexible eviction policy,
-for example copy evicted page from ring buffer to some other buffer if there is free space
-in buffer cache.
-
-## Disable marking page as dirty when hint bits are set.
-
-### Why?
-
-Postgres has to modify page twice: first time when some tuple is updated and second time when
-hint bits are set. Wal logging hint bits updates requires FPI which significantly increase size of WAL.
-
-### Alternatives?
-
-Add special WAL record for setting page hints.
-
-## Prefetching
-
-### Why?
-
-As far as pages in Neon are loaded on demand, to reduce node startup time
-and also speedup some massive queries we need some mechanism for bulk loading to
-reduce page request round-trip overhead.
-
-Currently Postgres is supporting prefetching only for bitmap scan.
-In Neon we should also use prefetch for sequential and index scans, because the OS is not doing it for us.
-For sequential scan we could prefetch some number of following pages. For index scan we could prefetch pages
-of heap relation addressed by TIDs.
-
-## Prewarming
-
-### Why?
-
-Short downtime (or, in other words, fast compute node restart time) is one of the key feature of Zenith.
-But overhead of request-response round-trip for loading pages on demand can make started node warm-up quite slow.
-We can capture state of compute node buffer cache and send bulk request for this pages at startup.
+4. Prewarming.
+- Why?
+  Short downtime (or, in other words, fast compute node restart time) is one of the key feature of Zenith.
+  But overhead of request-response round-trip for loading pages on demand can make started node warm-up quite slow.
+  We can capture state of compute node buffer cache and send bulk request for this pages at startup.
--- a/docs/pageserver-page-service.md
+++ b/docs/pageserver-page-service.md
@@ -1,9 +0,0 @@
-# Page Service
-
-The Page Service listens for GetPage@LSN requests from the Compute Nodes,
-and responds with pages from the repository. On each GetPage@LSN request,
-it calls into the Repository function
-
-A separate thread is spawned for each incoming connection to the page
-service. The page service uses the libpq protocol to communicate with
-the client. The client is a Compute Postgres instance.
--- a/docs/pageserver-pagecache.md
+++ b/docs/pageserver-pagecache.md
@@ -1,8 +0,0 @@
-# Page cache
-
-TODO:
-
- shared across tenants
- store pages from layer files
- store pages from "in-memory layer"
- store materialized pages
--- a/docs/pageserver-processing-getpage.md
+++ b/docs/pageserver-processing-getpage.md
@@ -1,4 +0,0 @@
-# Processing a GetPage request
-
-TODO:
- sequence diagram that shows how a GetPage@LSN request is processed
--- a/docs/pageserver-processing-wal.md
+++ b/docs/pageserver-processing-wal.md
@@ -1,5 +0,0 @@
-# Processing WAL
-
-TODO:
- diagram that shows how incoming WAL is processed
- explain durability, what is fsync'd when, disk_consistent_lsn
--- a/docs/pageserver-thread-mgmt.md
+++ b/docs/pageserver-thread-mgmt.md
@@ -1,26 +0,0 @@
-## Thread management
-
-Each thread in the system is tracked by the `thread_mgr` module. It
-maintains a registry of threads, and which tenant or timeline they are
-operating on. This is used for safe shutdown of a tenant, or the whole
-system.
-
-### Handling shutdown
-
-When a tenant or timeline is deleted, we need to shut down all threads
-operating on it, before deleting the data on disk. A thread registered
-in the thread registry can check if it has been requested to shut down,
-by calling `is_shutdown_requested()`. For async operations, there's also
-a `shudown_watcher()` async task that can be used to wake up on shutdown.
-
-### Sync vs async
-
-The primary programming model in the page server is synchronous,
-blocking code. However, there are some places where async code is
-used. Be very careful when mixing sync and async code.
-
-Async is primarily used to wait for incoming data on network
-connections. For example, all WAL receivers have a shared thread pool,
-with one async Task for each connection. Once a piece of WAL has been
-received from the network, the thread calls the blocking functions in
-the Repository to process the WAL.
--- a/docs/pageserver-walredo.md
+++ b/docs/pageserver-walredo.md
@@ -1,77 +0,0 @@
-# WAL Redo
-
-To reconstruct a particular page version from an image of the page and
-some WAL records, the pageserver needs to replay the WAL records. This
-happens on-demand, when a GetPage@LSN request comes in, or as part of
-background jobs that reorganize data for faster access.
-
-It's important that data cannot leak from one tenant to another, and
-that a corrupt WAL record on one timeline doesn't affect other tenants
-or timelines.
-
-## Multi-tenant security
-
-If you have direct access to the WAL directory, or if you have
-superuser access to a running PostgreSQL server, it's easy to
-construct a malicious or corrupt WAL record that causes the WAL redo
-functions to crash, or to execute arbitrary code. That is not a
-security problem for PostgreSQL; if you have superuser access, you
-have full access to the system anyway.
-
-The Neon pageserver, however, is multi-tenant. It needs to execute WAL
-belonging to different tenants in the same system, and malicious WAL
-in one tenant must not affect other tenants.
-
-A separate WAL redo process is launched for each tenant, and the
-process uses the seccomp(2) system call to restrict its access to the
-bare minimum needed to replay WAL records. The process does not have
-access to the filesystem or network. It can only communicate with the
-parent pageserver process through a pipe.
-
-If an attacker creates a malicious WAL record and injects it into the
-WAL stream of a timeline, he can take control of the WAL redo process
-in the pageserver. However, the WAL redo process cannot access the
-rest of the system. And because there is a separate WAL redo process
-for each tenant, the hijacked WAL redo process can only see WAL and
-data belonging to the same tenant, which the attacker would have
-access to anyway.
-
-## WAL-redo process communication
-
-The WAL redo process runs the 'postgres' executable, launched with a
-Neon-specific command-line option to put it into WAL-redo process
-mode.  The pageserver controls the lifetime of the WAL redo processes,
-launching them as needed. If a tenant is detached from the pageserver,
-any WAL redo processes for that tenant are killed.
-
-The pageserver communicates with each WAL redo process over its
-stdin/stdout/stderr. It works in request-response model with a simple
-custom protocol, described in walredo.rs. To replay a set of WAL
-records for a page, the pageserver sends the "before" image of the
-page and the WAL records over 'stdin', followed by a command to
-perform the replay. The WAL redo process responds with an "after"
-image of the page.
-
-## Special handling of some records
-
-Some WAL record types are handled directly in the pageserver, by
-bespoken Rust code, and are not sent over to the WAL redo process.
-This includes SLRU-related WAL records, like commit records. SLRUs
-don't use the standard Postgres buffer manager, so dealing with them
-in the Neon WAL redo mode would require quite a few changes to
-Postgres code and special handling in the protocol anyway.
-
-Some record types that include a full-page-image (e.g. XLOG_FPI) are
-also handled specially when incoming WAL is processed already, and are
-stored as page images rather than WAL records.
-
-
-## Records that modify multiple pages
-
-Some Postgres WAL records modify multiple pages. Such WAL records are
-duplicated, so that a copy is stored for each affected page. This is
-somewhat wasteful, but because most WAL records only affect one page,
-the overhead is acceptable.
-
-The WAL redo always happens for one particular page. If the WAL record
-coantains changes to other pages, they are ignored.
--- a/docs/pageserver.md
+++ b/docs/pageserver.md
@@ -1,11 +0,0 @@
-# Page server architecture
-
-The Page Server has a few different duties:
-
- Respond to GetPage@LSN requests from the Compute Nodes
- Receive WAL from WAL safekeeper, and store it
- Upload data to S3 to make it durable, download files from S3 as needed
-
-S3 is the main fault-tolerant storage of all data, as there are no Page Server
-replicas. We use a separate fault-tolerant WAL service to reduce latency. It
-keeps track of WAL records which are not synced to S3 yet.
--- a/docs/separation-compute-storage.md
+++ b/docs/separation-compute-storage.md
@@ -1,8 +0,0 @@
-# Separation of Compute and Storage
-
-TODO:
-
- Read path
- Write path
- Durability model
- API auth
--- a/libs/postgres_ffi/Cargo.toml
+++ b/libs/postgres_ffi/Cargo.toml
@@ -23,7 +23,7 @@ workspace_hack = { version = "0.1", path = "../../workspace_hack" }
 [dev-dependencies]
 env_logger = "0.9"
 postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
-wal_craft = { path = "wal_craft" }
+wal_generate = { path = "wal_generate" }

 [build-dependencies]
 bindgen = "0.59.1"
--- a/libs/postgres_ffi/build.rs
+++ b/libs/postgres_ffi/build.rs
@@ -2,7 +2,6 @@ extern crate bindgen;

 use std::env;
 use std::path::PathBuf;
-use std::process::Command;

 use bindgen::callbacks::ParseCallbacks;

@@ -46,43 +45,6 @@ fn main() {
    // Tell cargo to invalidate the built crate whenever the wrapper changes
    println!("cargo:rerun-if-changed=pg_control_ffi.h");

-    // Finding the location of C headers for the Postgres server:
-    // - if POSTGRES_INSTALL_DIR is set look into it, otherwise look into `<project_root>/tmp_install`
-    // - if there's a `bin/pg_config` file use it for getting include server, otherwise use `<project_root>/tmp_install/include/postgresql/server`
-    let mut pg_install_dir = if let Some(postgres_install_dir) = env::var_os("POSTGRES_INSTALL_DIR")
-    {
-        postgres_install_dir.into()
-    } else {
-        PathBuf::from("tmp_install")
-    };
-
-    if pg_install_dir.is_relative() {
-        let cwd = env::current_dir().unwrap();
-        pg_install_dir = cwd.join("..").join("..").join(pg_install_dir);
-    }
-
-    let pg_config_bin = pg_install_dir.join("bin").join("pg_config");
-    let inc_server_path: String = if pg_config_bin.exists() {
-        let output = Command::new(pg_config_bin)
-            .arg("--includedir-server")
-            .output()
-            .expect("failed to execute `pg_config --includedir-server`");
-
-        if !output.status.success() {
-            panic!("`pg_config --includedir-server` failed")
-        }
-
-        String::from_utf8(output.stdout).unwrap().trim_end().into()
-    } else {
-        pg_install_dir
-            .join("include")
-            .join("postgresql")
-            .join("server")
-            .into_os_string()
-            .into_string()
-            .unwrap()
-    };
-
    // The bindgen::Builder is the main entry point
    // to bindgen, and lets you build up options for
    // the resulting bindings.
@@ -119,7 +81,15 @@ fn main() {
        // explicit padding fields.
        .explicit_padding(true)
        //
-        .clang_arg(format!("-I{inc_server_path}"))
+        // Path the server include dir. It is in tmp_install/include/server, if you did
+        // "configure --prefix=<path to tmp_install>". But if you used "configure --prefix=/",
+        // and used DESTDIR to move it into tmp_install, then it's in
+        // tmp_install/include/postgres/server
+        // 'pg_config --includedir-server' would perhaps be the more proper way to find it,
+        // but this will do for now.
+        //
+        .clang_arg("-I../../tmp_install/include/server")
+        .clang_arg("-I../../tmp_install/include/postgresql/server")
        //
        // Finish the builder and generate the bindings.
        //
--- a/libs/postgres_ffi/src/waldecoder.rs
+++ b/libs/postgres_ffi/src/waldecoder.rs
@@ -82,17 +82,7 @@ impl WalStreamDecoder {
        // that cross page boundaries.
        loop {
            // parse and verify page boundaries as we go
-            if self.padlen > 0 {
-                // We should first skip padding, as we may have to skip some page headers if we're processing the XLOG_SWITCH record.
-                if self.inputbuf.remaining() < self.padlen as usize {
-                    return Ok(None);
-                }
-
-                // skip padding
-                self.inputbuf.advance(self.padlen as usize);
-                self.lsn += self.padlen as u64;
-                self.padlen = 0;
-            } else if self.lsn.segment_offset(pg_constants::WAL_SEGMENT_SIZE) == 0 {
+            if self.lsn.segment_offset(pg_constants::WAL_SEGMENT_SIZE) == 0 {
                // parse long header

                if self.inputbuf.remaining() < XLOG_SIZE_OF_XLOG_LONG_PHD {
@@ -138,6 +128,15 @@ impl WalStreamDecoder {

                self.lsn += XLOG_SIZE_OF_XLOG_SHORT_PHD as u64;
                continue;
+            } else if self.padlen > 0 {
+                if self.inputbuf.remaining() < self.padlen as usize {
+                    return Ok(None);
+                }
+
+                // skip padding
+                self.inputbuf.advance(self.padlen as usize);
+                self.lsn += self.padlen as u64;
+                self.padlen = 0;
            } else if self.contlen == 0 {
                assert!(self.recordbuf.is_empty());

@@ -227,10 +226,10 @@ impl WalStreamDecoder {
            self.padlen = self.lsn.calc_padding(8u32) as u32;
        }

-        // We should return LSN of the next record, not the last byte of this record or
-        // the byte immediately after. Note that this handles both XLOG_SWITCH and usual
-        // records, the former "spans" until the next WAL segment (see test_xlog_switch).
-        let result = (self.lsn + self.padlen as u64, recordbuf);
+        // Always align resulting LSN on 0x8 boundary -- that is important for getPage()
+        // and WalReceiver integration. Since this code is used both for WalReceiver and
+        // initial WAL import let's force alignment right here.
+        let result = (self.lsn.align(), recordbuf);
        Ok(Some(result))
    }
 }
--- a/libs/postgres_ffi/src/xlog_utils.rs
+++ b/libs/postgres_ffi/src/xlog_utils.rs
@@ -15,7 +15,6 @@ use crate::XLogPageHeaderData;
 use crate::XLogRecord;
 use crate::XLOG_PAGE_MAGIC;

-use crate::pg_constants::WAL_SEGMENT_SIZE;
 use anyhow::{bail, ensure};
 use byteorder::{ByteOrder, LittleEndian};
 use bytes::BytesMut;
@@ -462,7 +461,8 @@ pub fn find_end_of_wal(
 pub fn main() {
    let mut data_dir = PathBuf::new();
    data_dir.push(".");
-    let (wal_end, tli) = find_end_of_wal(&data_dir, WAL_SEGMENT_SIZE, true, Lsn(0)).unwrap();
+    let wal_seg_size = 16 * 1024 * 1024;
+    let (wal_end, tli) = find_end_of_wal(&data_dir, wal_seg_size, true, Lsn(0)).unwrap();
    println!(
        "wal_end={:>08X}{:>08X}, tli={}",
        (wal_end >> 32) as u32,
@@ -597,18 +597,20 @@ mod tests {
    fn init_logging() {
        let _ = env_logger::Builder::from_env(
            env_logger::Env::default()
-                .default_filter_or("wal_craft=info,postgres_ffi::xlog_utils=trace"),
+                .default_filter_or("wal_generate=info,postgres_ffi::xlog_utils=trace"),
        )
        .is_test(true)
        .try_init();
    }

-    fn test_end_of_wal<C: wal_craft::Crafter>(
+    fn test_end_of_wal(
        test_name: &str,
+        generate_wal: impl Fn(&mut postgres::Client) -> anyhow::Result<postgres::types::PgLsn>,
        expected_end_of_wal_non_partial: Lsn,
+        last_segment: &str,
    ) {
-        use wal_craft::*;
-        // Craft some WAL
+        use wal_generate::*;
+        // 1. Generate some WAL
        let top_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
            .join("..")
            .join("..");
@@ -620,72 +622,25 @@ mod tests {
            fs::remove_dir_all(&cfg.datadir).unwrap();
        }
        cfg.initdb().unwrap();
-        let srv = cfg.start_server().unwrap();
-        let (intermediate_lsns, expected_end_of_wal_partial) =
-            C::craft(&mut srv.connect_with_timeout().unwrap()).unwrap();
-        let intermediate_lsns: Vec<Lsn> = intermediate_lsns
-            .iter()
-            .map(|&lsn| u64::from(lsn).into())
-            .collect();
-        let expected_end_of_wal_partial: Lsn = u64::from(expected_end_of_wal_partial).into();
+        let mut srv = cfg.start_server().unwrap();
+        let expected_wal_end: Lsn =
+            u64::from(generate_wal(&mut srv.connect_with_timeout().unwrap()).unwrap()).into();
        srv.kill();

-        // Check find_end_of_wal on the initial WAL
-        let last_segment = cfg
-            .wal_dir()
-            .read_dir()
-            .unwrap()
-            .map(|f| f.unwrap().file_name().into_string().unwrap())
-            .filter(|fname| IsXLogFileName(fname))
-            .max()
-            .unwrap();
-        check_pg_waldump_end_of_wal(&cfg, &last_segment, expected_end_of_wal_partial);
-        for start_lsn in std::iter::once(Lsn(0))
-            .chain(intermediate_lsns)
-            .chain(std::iter::once(expected_end_of_wal_partial))
-        {
-            // Erase all WAL before `start_lsn` to ensure it's not used by `find_end_of_wal`.
-            // We assume that `start_lsn` is non-decreasing.
-            info!(
-                "Checking with start_lsn={}, erasing WAL before it",
-                start_lsn
-            );
-            for file in fs::read_dir(cfg.wal_dir()).unwrap().flatten() {
-                let fname = file.file_name().into_string().unwrap();
-                if !IsXLogFileName(&fname) {
-                    continue;
-                }
-                let (segno, _) = XLogFromFileName(&fname, WAL_SEGMENT_SIZE);
-                let seg_start_lsn = XLogSegNoOffsetToRecPtr(segno, 0, WAL_SEGMENT_SIZE);
-                if seg_start_lsn > u64::from(start_lsn) {
-                    continue;
-                }
-                let mut f = File::options().write(true).open(file.path()).unwrap();
-                const ZEROS: [u8; WAL_SEGMENT_SIZE] = [0u8; WAL_SEGMENT_SIZE];
-                f.write_all(
-                    &ZEROS[0..min(
-                        WAL_SEGMENT_SIZE,
-                        (u64::from(start_lsn) - seg_start_lsn) as usize,
-                    )],
-                )
-                .unwrap();
-            }
-            check_end_of_wal(
-                &cfg,
-                &last_segment,
-                start_lsn,
-                expected_end_of_wal_non_partial,
-                expected_end_of_wal_partial,
-            );
-        }
-    }
+        // 2. Pick WAL generated by initdb
+        let wal_dir = cfg.datadir.join("pg_wal");
+        let wal_seg_size = 16 * 1024 * 1024;

-    fn check_pg_waldump_end_of_wal(
-        cfg: &wal_craft::Conf,
-        last_segment: &str,
-        expected_end_of_wal: Lsn,
-    ) {
-        // Get the actual end of WAL by pg_waldump
+        // 3. Check end_of_wal on non-partial WAL segment (we treat it as fully populated)
+        let (wal_end, tli) = find_end_of_wal(&wal_dir, wal_seg_size, true, Lsn(0)).unwrap();
+        let wal_end = Lsn(wal_end);
+        info!(
+            "find_end_of_wal returned (wal_end={}, tli={})",
+            wal_end, tli
+        );
+        assert_eq!(wal_end, expected_end_of_wal_non_partial);
+
+        // 4. Get the actual end of WAL by pg_waldump
        let waldump_output = cfg
            .pg_waldump("000000010000000000000001", last_segment)
            .unwrap()
@@ -704,66 +659,44 @@ mod tests {
        let waldump_wal_end = Lsn::from_str(caps.get(1).unwrap().as_str()).unwrap();
        info!(
            "waldump erred on {}, expected wal end at {}",
-            waldump_wal_end, expected_end_of_wal
+            waldump_wal_end, expected_wal_end
        );
-        assert_eq!(waldump_wal_end, expected_end_of_wal);
-    }
+        assert_eq!(waldump_wal_end, expected_wal_end);

-    fn check_end_of_wal(
-        cfg: &wal_craft::Conf,
-        last_segment: &str,
-        start_lsn: Lsn,
-        expected_end_of_wal_non_partial: Lsn,
-        expected_end_of_wal_partial: Lsn,
-    ) {
-        // Check end_of_wal on non-partial WAL segment (we treat it as fully populated)
-        let (wal_end, tli) =
-            find_end_of_wal(&cfg.wal_dir(), WAL_SEGMENT_SIZE, true, start_lsn).unwrap();
-        let wal_end = Lsn(wal_end);
-        info!(
-            "find_end_of_wal returned (wal_end={}, tli={}) with non-partial WAL segment",
-            wal_end, tli
-        );
-        assert_eq!(wal_end, expected_end_of_wal_non_partial);
-
-        // Rename file to partial to actually find last valid lsn, then rename it back.
+        // 5. Rename file to partial to actually find last valid lsn
        fs::rename(
-            cfg.wal_dir().join(&last_segment),
-            cfg.wal_dir().join(format!("{}.partial", last_segment)),
+            wal_dir.join(last_segment),
+            wal_dir.join(format!("{}.partial", last_segment)),
        )
        .unwrap();
-        let (wal_end, tli) =
-            find_end_of_wal(&cfg.wal_dir(), WAL_SEGMENT_SIZE, true, start_lsn).unwrap();
+        let (wal_end, tli) = find_end_of_wal(&wal_dir, wal_seg_size, true, Lsn(0)).unwrap();
        let wal_end = Lsn(wal_end);
        info!(
-            "find_end_of_wal returned (wal_end={}, tli={}) with partial WAL segment",
+            "find_end_of_wal returned (wal_end={}, tli={})",
            wal_end, tli
        );
-        assert_eq!(wal_end, expected_end_of_wal_partial);
-        fs::rename(
-            cfg.wal_dir().join(format!("{}.partial", last_segment)),
-            cfg.wal_dir().join(last_segment),
-        )
-        .unwrap();
+        assert_eq!(wal_end, waldump_wal_end);
    }

-    const_assert!(WAL_SEGMENT_SIZE == 16 * 1024 * 1024);
-
    #[test]
    pub fn test_find_end_of_wal_simple() {
        init_logging();
-        test_end_of_wal::<wal_craft::Simple>(
+        test_end_of_wal(
            "test_find_end_of_wal_simple",
+            wal_generate::generate_simple,
            "0/2000000".parse::<Lsn>().unwrap(),
+            "000000010000000000000001",
        );
    }

    #[test]
    pub fn test_find_end_of_wal_crossing_segment_followed_by_small_one() {
        init_logging();
-        test_end_of_wal::<wal_craft::WalRecordCrossingSegmentFollowedBySmallOne>(
+        test_end_of_wal(
            "test_find_end_of_wal_crossing_segment_followed_by_small_one",
+            wal_generate::generate_wal_record_crossing_segment_followed_by_small_one,
            "0/3000000".parse::<Lsn>().unwrap(),
+            "000000010000000000000002",
        );
    }

@@ -771,9 +704,11 @@ mod tests {
    #[ignore = "not yet fixed, needs correct parsing of pre-last segments"] // TODO
    pub fn test_find_end_of_wal_last_crossing_segment() {
        init_logging();
-        test_end_of_wal::<wal_craft::LastWalRecordCrossingSegment>(
+        test_end_of_wal(
            "test_find_end_of_wal_last_crossing_segment",
+            wal_generate::generate_last_wal_record_crossing_segment,
            "0/3000000".parse::<Lsn>().unwrap(),
+            "000000010000000000000002",
        );
    }

--- a/libs/postgres_ffi/wal_craft/src/bin/wal_craft.rs
+++ b/libs/postgres_ffi/wal_craft/src/bin/wal_craft.rs
@@ -1,103 +0,0 @@
-use anyhow::*;
-use clap::{App, Arg, ArgMatches};
-use std::str::FromStr;
-use wal_craft::*;
-
-fn main() -> Result<()> {
-    env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("wal_craft=info"))
-        .init();
-    let type_arg = &Arg::new("type")
-        .takes_value(true)
-        .help("Type of WAL to craft")
-        .possible_values([
-            Simple::NAME,
-            LastWalRecordXlogSwitch::NAME,
-            LastWalRecordXlogSwitchEndsOnPageBoundary::NAME,
-            WalRecordCrossingSegmentFollowedBySmallOne::NAME,
-            LastWalRecordCrossingSegment::NAME,
-        ])
-        .required(true);
-    let arg_matches = App::new("Postgres WAL crafter")
-        .about("Crafts Postgres databases with specific WAL properties")
-        .subcommand(
-            App::new("print-postgres-config")
-                .about("Print the configuration required for PostgreSQL server before running this script")
-        )
-        .subcommand(
-            App::new("with-initdb")
-                .about("Craft WAL in a new data directory first initialized with initdb")
-                .arg(type_arg)
-                .arg(
-                    Arg::new("datadir")
-                        .takes_value(true)
-                        .help("Data directory for the Postgres server")
-                        .required(true)
-                )
-                .arg(
-                    Arg::new("pg-distrib-dir")
-                        .long("pg-distrib-dir")
-                        .takes_value(true)
-                        .help("Directory with Postgres distribution (bin and lib directories, e.g. tmp_install)")
-                        .default_value("/usr/local")
-                )
-        )
-        .subcommand(
-            App::new("in-existing")
-                .about("Craft WAL at an existing recently created Postgres database. Note that server may append new WAL entries on shutdown.")
-                .arg(type_arg)
-                .arg(
-                    Arg::new("connection")
-                        .takes_value(true)
-                        .help("Connection string to the Postgres database to populate")
-                        .required(true)
-                )
-        )
-        .get_matches();
-
-    let wal_craft = |arg_matches: &ArgMatches, client| {
-        let (intermediate_lsns, end_of_wal_lsn) = match arg_matches.value_of("type").unwrap() {
-            Simple::NAME => Simple::craft(client)?,
-            LastWalRecordXlogSwitch::NAME => LastWalRecordXlogSwitch::craft(client)?,
-            LastWalRecordXlogSwitchEndsOnPageBoundary::NAME => {
-                LastWalRecordXlogSwitchEndsOnPageBoundary::craft(client)?
-            }
-            WalRecordCrossingSegmentFollowedBySmallOne::NAME => {
-                WalRecordCrossingSegmentFollowedBySmallOne::craft(client)?
-            }
-            LastWalRecordCrossingSegment::NAME => LastWalRecordCrossingSegment::craft(client)?,
-            a => panic!("Unknown --type argument: {}", a),
-        };
-        for lsn in intermediate_lsns {
-            println!("intermediate_lsn = {}", lsn);
-        }
-        println!("end_of_wal = {}", end_of_wal_lsn);
-        Ok(())
-    };
-
-    match arg_matches.subcommand() {
-        None => panic!("No subcommand provided"),
-        Some(("print-postgres-config", _)) => {
-            for cfg in REQUIRED_POSTGRES_CONFIG.iter() {
-                println!("{}", cfg);
-            }
-            Ok(())
-        }
-        Some(("with-initdb", arg_matches)) => {
-            let cfg = Conf {
-                pg_distrib_dir: arg_matches.value_of("pg-distrib-dir").unwrap().into(),
-                datadir: arg_matches.value_of("datadir").unwrap().into(),
-            };
-            cfg.initdb()?;
-            let srv = cfg.start_server()?;
-            wal_craft(arg_matches, &mut srv.connect_with_timeout()?)?;
-            srv.kill();
-            Ok(())
-        }
-        Some(("in-existing", arg_matches)) => wal_craft(
-            arg_matches,
-            &mut postgres::Config::from_str(arg_matches.value_of("connection").unwrap())?
-                .connect(postgres::NoTls)?,
-        ),
-        Some(_) => panic!("Unknown subcommand"),
-    }
-}
--- a/libs/postgres_ffi/wal_generate/Cargo.toml
+++ b/libs/postgres_ffi/wal_generate/Cargo.toml
@@ -1,5 +1,5 @@
 [package]
-name = "wal_craft"
+name = "wal_generate"
 version = "0.1.0"
 edition = "2021"

@@ -10,7 +10,5 @@ anyhow = "1.0"
 clap = "3.0"
 env_logger = "0.9"
 log = "0.4"
-once_cell = "1.8.0"
 postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
-postgres_ffi = { path = "../" }
 tempfile = "3.2"
--- a/libs/postgres_ffi/wal_generate/src/bin/wal_generate.rs
+++ b/libs/postgres_ffi/wal_generate/src/bin/wal_generate.rs
@@ -0,0 +1,58 @@
+use anyhow::*;
+use clap::{App, Arg};
+use wal_generate::*;
+
+fn main() -> Result<()> {
+    env_logger::Builder::from_env(
+        env_logger::Env::default().default_filter_or("wal_generate=info"),
+    )
+    .init();
+    let arg_matches = App::new("Postgres WAL generator")
+        .about("Generates Postgres databases with specific WAL properties")
+        .arg(
+            Arg::new("datadir")
+                .short('D')
+                .long("datadir")
+                .takes_value(true)
+                .help("Data directory for the Postgres server")
+                .required(true)
+        )
+        .arg(
+            Arg::new("pg-distrib-dir")
+                .long("pg-distrib-dir")
+                .takes_value(true)
+                .help("Directory with Postgres distribution (bin and lib directories, e.g. tmp_install)")
+                .default_value("/usr/local")
+        )
+        .arg(
+            Arg::new("type")
+                .long("type")
+                .takes_value(true)
+                .help("Type of WAL to generate")
+                .possible_values(["simple", "last_wal_record_crossing_segment", "wal_record_crossing_segment_followed_by_small_one"])
+                .required(true)
+        )
+        .get_matches();
+
+    let cfg = Conf {
+        pg_distrib_dir: arg_matches.value_of("pg-distrib-dir").unwrap().into(),
+        datadir: arg_matches.value_of("datadir").unwrap().into(),
+    };
+    cfg.initdb()?;
+    let mut srv = cfg.start_server()?;
+    let lsn = match arg_matches.value_of("type").unwrap() {
+        "simple" => generate_simple(&mut srv.connect_with_timeout()?)?,
+        "last_wal_record_crossing_segment" => {
+            generate_last_wal_record_crossing_segment(&mut srv.connect_with_timeout()?)?
+        }
+        "wal_record_crossing_segment_followed_by_small_one" => {
+            generate_wal_record_crossing_segment_followed_by_small_one(
+                &mut srv.connect_with_timeout()?,
+            )?
+        }
+        a => panic!("Unknown --type argument: {}", a),
+    };
+    println!("end_of_wal = {}", lsn);
+    srv.kill();
+    Ok(())
+}
--- a/libs/postgres_ffi/wal_generate/src/lib.rs
+++ b/libs/postgres_ffi/wal_generate/src/lib.rs
@@ -1,13 +1,8 @@
 use anyhow::*;
 use core::time::Duration;
 use log::*;
-use once_cell::sync::Lazy;
 use postgres::types::PgLsn;
 use postgres::Client;
-use postgres_ffi::pg_constants::WAL_SEGMENT_SIZE;
-use postgres_ffi::xlog_utils::{
-    XLOG_BLCKSZ, XLOG_SIZE_OF_XLOG_RECORD, XLOG_SIZE_OF_XLOG_SHORT_PHD,
-};
 use std::cmp::Ordering;
 use std::fs;
 use std::path::{Path, PathBuf};
@@ -27,16 +22,6 @@ pub struct PostgresServer {
    client_config: postgres::Config,
 }

-pub static REQUIRED_POSTGRES_CONFIG: Lazy<Vec<&'static str>> = Lazy::new(|| {
-    vec![
-        "wal_keep_size=50MB",            // Ensure old WAL is not removed
-        "shared_preload_libraries=neon", // can only be loaded at startup
-        // Disable background processes as much as possible
-        "wal_writer_delay=10s",
-        "autovacuum=off",
-    ]
-});
-
 impl Conf {
    fn pg_bin_dir(&self) -> PathBuf {
        self.pg_distrib_dir.join("bin")
@@ -46,10 +31,6 @@ impl Conf {
        self.pg_distrib_dir.join("lib")
    }

-    pub fn wal_dir(&self) -> PathBuf {
-        self.datadir.join("pg_wal")
-    }
-
    fn new_pg_command(&self, command: impl AsRef<Path>) -> Result<Command> {
        let path = self.pg_bin_dir().join(command);
        ensure!(path.exists(), "Command {:?} does not exist", path);
@@ -104,8 +85,12 @@ impl Conf {
            .arg(unix_socket_dir_path.as_os_str())
            .arg("-D")
            .arg(self.datadir.as_os_str())
+            .args(&["-c", "wal_keep_size=50MB"]) // Ensure old WAL is not removed
            .args(&["-c", "logging_collector=on"]) // stderr will mess up with tests output
-            .args(REQUIRED_POSTGRES_CONFIG.iter().flat_map(|cfg| ["-c", cfg]))
+            .args(&["-c", "shared_preload_libraries=neon"]) // can only be loaded at startup
+            // Disable background processes as much as possible
+            .args(&["-c", "wal_writer_delay=10s"])
+            .args(&["-c", "autovacuum=off"])
            .stderr(Stdio::from(log_file))
            .spawn()?;
        let server = PostgresServer {
@@ -159,7 +144,7 @@ impl PostgresServer {
        bail!("Connection timed out");
    }

-    pub fn kill(mut self) {
+    pub fn kill(&mut self) {
        self.process.kill().unwrap();
        self.process.wait().unwrap();
    }
@@ -196,16 +181,12 @@ pub trait PostgresClientExt: postgres::GenericClient {

 impl<C: postgres::GenericClient> PostgresClientExt for C {}

-pub fn ensure_server_config(client: &mut impl postgres::GenericClient) -> Result<()> {
+fn generate_internal<C: postgres::GenericClient>(
+    client: &mut C,
+    f: impl Fn(&mut C, PgLsn) -> Result<Option<PgLsn>>,
+) -> Result<PgLsn> {
    client.execute("create extension if not exists neon_test_utils", &[])?;

-    let wal_keep_size: String = client.query_one("SHOW wal_keep_size", &[])?.get(0);
-    ensure!(wal_keep_size == "50MB");
-    let wal_writer_delay: String = client.query_one("SHOW wal_writer_delay", &[])?.get(0);
-    ensure!(wal_writer_delay == "10s");
-    let autovacuum: String = client.query_one("SHOW autovacuum", &[])?.get(0);
-    ensure!(autovacuum == "off");
-
    let wal_segment_size = client.query_one(
        "select cast(setting as bigint) as setting, unit \
         from pg_settings where name = 'wal_segment_size'",
@@ -216,160 +197,44 @@ pub fn ensure_server_config(client: &mut impl postgres::GenericClient) -> Result
        "Unexpected wal_segment_size unit"
    );
    ensure!(
-        wal_segment_size.get::<_, i64>("setting") == WAL_SEGMENT_SIZE as i64,
+        wal_segment_size.get::<_, i64>("setting") == 16 * 1024 * 1024,
        "Unexpected wal_segment_size in bytes"
    );

-    Ok(())
-}
-
-pub trait Crafter {
-    const NAME: &'static str;
-
-    /// Generates WAL using the client `client`. Returns a pair of:
-    /// * A vector of some valid "interesting" intermediate LSNs which one may start reading from.
-    ///   May include or exclude Lsn(0) and the end-of-wal.
-    /// * The expected end-of-wal LSN.
-    fn craft(client: &mut impl postgres::GenericClient) -> Result<(Vec<PgLsn>, PgLsn)>;
-}
-
-fn craft_internal<C: postgres::GenericClient>(
-    client: &mut C,
-    f: impl Fn(&mut C, PgLsn) -> Result<(Vec<PgLsn>, Option<PgLsn>)>,
-) -> Result<(Vec<PgLsn>, PgLsn)> {
-    ensure_server_config(client)?;
-
    let initial_lsn = client.pg_current_wal_insert_lsn()?;
    info!("LSN initial = {}", initial_lsn);

-    let (mut intermediate_lsns, last_lsn) = f(client, initial_lsn)?;
-    let last_lsn = match last_lsn {
+    let last_lsn = match f(client, initial_lsn)? {
        None => client.pg_current_wal_insert_lsn()?,
        Some(last_lsn) => match last_lsn.cmp(&client.pg_current_wal_insert_lsn()?) {
-            Ordering::Less => bail!("Some records were inserted after the crafted WAL"),
+            Ordering::Less => bail!("Some records were inserted after the generated WAL"),
            Ordering::Equal => last_lsn,
            Ordering::Greater => bail!("Reported LSN is greater than insert_lsn"),
        },
    };
-    if !intermediate_lsns.starts_with(&[initial_lsn]) {
-        intermediate_lsns.insert(0, initial_lsn);
-    }

    // Some records may be not flushed, e.g. non-transactional logical messages.
    client.execute("select neon_xlogflush(pg_current_wal_insert_lsn())", &[])?;
    match last_lsn.cmp(&client.pg_current_wal_flush_lsn()?) {
-        Ordering::Less => bail!("Some records were flushed after the crafted WAL"),
+        Ordering::Less => bail!("Some records were flushed after the generated WAL"),
        Ordering::Equal => {}
        Ordering::Greater => bail!("Reported LSN is greater than flush_lsn"),
    }
-    Ok((intermediate_lsns, last_lsn))
+    Ok(last_lsn)
 }

-pub struct Simple;
-impl Crafter for Simple {
-    const NAME: &'static str = "simple";
-    fn craft(client: &mut impl postgres::GenericClient) -> Result<(Vec<PgLsn>, PgLsn)> {
-        craft_internal(client, |client, _| {
-            client.execute("CREATE table t(x int)", &[])?;
-            Ok((Vec::new(), None))
-        })
-    }
-}
-
-pub struct LastWalRecordXlogSwitch;
-impl Crafter for LastWalRecordXlogSwitch {
-    const NAME: &'static str = "last_wal_record_xlog_switch";
-    fn craft(client: &mut impl postgres::GenericClient) -> Result<(Vec<PgLsn>, PgLsn)> {
-        // Do not use generate_internal because here we end up with flush_lsn exactly on
-        // the segment boundary and insert_lsn after the initial page header, which is unusual.
-        ensure_server_config(client)?;
-
+pub fn generate_simple(client: &mut impl postgres::GenericClient) -> Result<PgLsn> {
+    generate_internal(client, |client, _| {
        client.execute("CREATE table t(x int)", &[])?;
-        let before_xlog_switch = client.pg_current_wal_insert_lsn()?;
-        let after_xlog_switch: PgLsn = client.query_one("SELECT pg_switch_wal()", &[])?.get(0);
-        let next_segment = PgLsn::from(0x0200_0000);
-        ensure!(
-            after_xlog_switch <= next_segment,
-            "XLOG_SWITCH message ended after the expected segment boundary: {} > {}",
-            after_xlog_switch,
-            next_segment
-        );
-        Ok((vec![before_xlog_switch, after_xlog_switch], next_segment))
-    }
+        Ok(None)
+    })
 }

-pub struct LastWalRecordXlogSwitchEndsOnPageBoundary;
-impl Crafter for LastWalRecordXlogSwitchEndsOnPageBoundary {
-    const NAME: &'static str = "last_wal_record_xlog_switch_ends_on_page_boundary";
-    fn craft(client: &mut impl postgres::GenericClient) -> Result<(Vec<PgLsn>, PgLsn)> {
-        // Do not use generate_internal because here we end up with flush_lsn exactly on
-        // the segment boundary and insert_lsn after the initial page header, which is unusual.
-        ensure_server_config(client)?;
-
-        client.execute("CREATE table t(x int)", &[])?;
-
-        // Add padding so the XLOG_SWITCH record ends exactly on XLOG_BLCKSZ boundary.
-        // We will use logical message as the padding. We start with detecting how much WAL
-        // it takes for one logical message, considering all alignments and headers.
-        let base_wal_advance = {
-            let before_lsn = client.pg_current_wal_insert_lsn()?;
-            // Small non-empty message bigger than few bytes is more likely than an empty
-            // message to have the same format as the big padding message.
-            client.execute(
-                "SELECT pg_logical_emit_message(false, 'swch', REPEAT('a', 10))",
-                &[],
-            )?;
-            // The XLOG_SWITCH record has no data => its size is exactly XLOG_SIZE_OF_XLOG_RECORD.
-            (u64::from(client.pg_current_wal_insert_lsn()?) - u64::from(before_lsn)) as usize
-                + XLOG_SIZE_OF_XLOG_RECORD
-        };
-        let mut remaining_lsn =
-            XLOG_BLCKSZ - u64::from(client.pg_current_wal_insert_lsn()?) as usize % XLOG_BLCKSZ;
-        if remaining_lsn < base_wal_advance {
-            remaining_lsn += XLOG_BLCKSZ;
-        }
-        let repeats = 10 + remaining_lsn - base_wal_advance;
-        info!(
-            "current_wal_insert_lsn={}, remaining_lsn={}, base_wal_advance={}, repeats={}",
-            client.pg_current_wal_insert_lsn()?,
-            remaining_lsn,
-            base_wal_advance,
-            repeats
-        );
-        client.execute(
-            "SELECT pg_logical_emit_message(false, 'swch', REPEAT('a', $1))",
-            &[&(repeats as i32)],
-        )?;
-        info!(
-            "current_wal_insert_lsn={}, XLOG_SIZE_OF_XLOG_RECORD={}",
-            client.pg_current_wal_insert_lsn()?,
-            XLOG_SIZE_OF_XLOG_RECORD
-        );
-
-        // Emit the XLOG_SWITCH
-        let before_xlog_switch = client.pg_current_wal_insert_lsn()?;
-        let after_xlog_switch: PgLsn = client.query_one("SELECT pg_switch_wal()", &[])?.get(0);
-        let next_segment = PgLsn::from(0x0200_0000);
-        ensure!(
-            after_xlog_switch < next_segment,
-            "XLOG_SWITCH message ended on or after the expected segment boundary: {} > {}",
-            after_xlog_switch,
-            next_segment
-        );
-        ensure!(
-            u64::from(after_xlog_switch) as usize % XLOG_BLCKSZ == XLOG_SIZE_OF_XLOG_SHORT_PHD,
-            "XLOG_SWITCH message ended not on page boundary: {}",
-            after_xlog_switch
-        );
-        Ok((vec![before_xlog_switch, after_xlog_switch], next_segment))
-    }
-}
-
-fn craft_single_logical_message(
+fn generate_single_logical_message(
    client: &mut impl postgres::GenericClient,
    transactional: bool,
-) -> Result<(Vec<PgLsn>, PgLsn)> {
-    craft_internal(client, |client, initial_lsn| {
+) -> Result<PgLsn> {
+    generate_internal(client, |client, initial_lsn| {
        ensure!(
            initial_lsn < PgLsn::from(0x0200_0000 - 1024 * 1024),
            "Initial LSN is too far in the future"
@@ -400,25 +265,21 @@ fn craft_single_logical_message(
                message_lsn < after_message_lsn,
                "No record found after the emitted message"
            );
-            Ok((vec![message_lsn], Some(after_message_lsn)))
+            Ok(Some(after_message_lsn))
        } else {
-            Ok((Vec::new(), Some(message_lsn)))
+            Ok(Some(message_lsn))
        }
    })
 }

-pub struct WalRecordCrossingSegmentFollowedBySmallOne;
-impl Crafter for WalRecordCrossingSegmentFollowedBySmallOne {
-    const NAME: &'static str = "wal_record_crossing_segment_followed_by_small_one";
-    fn craft(client: &mut impl postgres::GenericClient) -> Result<(Vec<PgLsn>, PgLsn)> {
-        craft_single_logical_message(client, true)
-    }
+pub fn generate_wal_record_crossing_segment_followed_by_small_one(
+    client: &mut impl postgres::GenericClient,
+) -> Result<PgLsn> {
+    generate_single_logical_message(client, true)
 }

-pub struct LastWalRecordCrossingSegment;
-impl Crafter for LastWalRecordCrossingSegment {
-    const NAME: &'static str = "last_wal_record_crossing_segment";
-    fn craft(client: &mut impl postgres::GenericClient) -> Result<(Vec<PgLsn>, PgLsn)> {
-        craft_single_logical_message(client, false)
-    }
+pub fn generate_last_wal_record_crossing_segment<C: postgres::GenericClient>(
+    client: &mut C,
+) -> Result<PgLsn> {
+    generate_single_logical_message(client, false)
 }
--- a/libs/remote_storage/src/lib.rs
+++ b/libs/remote_storage/src/lib.rs
@@ -42,19 +42,13 @@ pub const DEFAULT_REMOTE_STORAGE_MAX_SYNC_ERRORS: u32 = 10;
 /// https://aws.amazon.com/premiumsupport/knowledge-center/s3-request-limit-avoid-throttling/
 pub const DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT: usize = 100;

-pub trait RemoteObjectName {
-    // Needed to retrieve last component for RemoteObjectId.
-    // In other words a file name
-    fn object_name(&self) -> Option<&str>;
-}
-
 /// Storage (potentially remote) API to manage its state.
 /// This storage tries to be unaware of any layered repository context,
 /// providing basic CRUD operations for storage files.
 #[async_trait::async_trait]
 pub trait RemoteStorage: Send + Sync {
    /// A way to uniquely reference a file in the remote storage.
-    type RemoteObjectId: RemoteObjectName;
+    type RemoteObjectId;

    /// Attempts to derive the storage path out of the local path, if the latter is correct.
    fn remote_object_id(&self, local_path: &Path) -> anyhow::Result<Self::RemoteObjectId>;
@@ -65,12 +59,6 @@ pub trait RemoteStorage: Send + Sync {
    /// Lists all items the storage has right now.
    async fn list(&self) -> anyhow::Result<Vec<Self::RemoteObjectId>>;

-    /// Lists all top level subdirectories for a given prefix
-    async fn list_prefixes(
-        &self,
-        prefix: Option<Self::RemoteObjectId>,
-    ) -> anyhow::Result<Vec<Self::RemoteObjectId>>;
-
    /// Streams the local file contents into remote into the remote storage entry.
    async fn upload(
        &self,
--- a/libs/remote_storage/src/local_fs.rs
+++ b/libs/remote_storage/src/local_fs.rs
@@ -5,7 +5,6 @@
 //! volume is mounted to the local FS.

 use std::{
-    borrow::Cow,
    future::Future,
    path::{Path, PathBuf},
    pin::Pin,
@@ -18,16 +17,10 @@ use tokio::{
 };
 use tracing::*;

-use crate::{path_with_suffix_extension, Download, DownloadError, RemoteObjectName};
+use crate::{path_with_suffix_extension, Download, DownloadError};

 use super::{strip_path_prefix, RemoteStorage, StorageMetadata};

-impl RemoteObjectName for PathBuf {
-    fn object_name(&self) -> Option<&str> {
-        self.file_stem().and_then(|n| n.to_str())
-    }
-}
-
 pub struct LocalFs {
    working_directory: PathBuf,
    storage_root: PathBuf,
@@ -108,18 +101,7 @@ impl RemoteStorage for LocalFs {
    }

    async fn list(&self) -> anyhow::Result<Vec<Self::RemoteObjectId>> {
-        get_all_files(&self.storage_root, true).await
-    }
-
-    async fn list_prefixes(
-        &self,
-        prefix: Option<Self::RemoteObjectId>,
-    ) -> anyhow::Result<Vec<Self::RemoteObjectId>> {
-        let path = match prefix {
-            Some(prefix) => Cow::Owned(self.storage_root.join(prefix)),
-            None => Cow::Borrowed(&self.storage_root),
-        };
-        get_all_files(path.as_ref(), false).await
+        get_all_files(&self.storage_root).await
    }

    async fn upload(
@@ -317,7 +299,6 @@ fn storage_metadata_path(original_path: &Path) -> PathBuf {

 fn get_all_files<'a, P>(
    directory_path: P,
-    recursive: bool,
 ) -> Pin<Box<dyn Future<Output = anyhow::Result<Vec<PathBuf>>> + Send + Sync + 'a>>
 where
    P: AsRef<Path> + Send + Sync + 'a,
@@ -334,11 +315,7 @@ where
                    if file_type.is_symlink() {
                        debug!("{:?} us a symlink, skipping", entry_path)
                    } else if file_type.is_dir() {
-                        if recursive {
-                            paths.extend(get_all_files(entry_path, true).await?.into_iter())
-                        } else {
-                            paths.push(dir_entry.path())
-                        }
+                        paths.extend(get_all_files(entry_path).await?.into_iter())
                    } else {
                        paths.push(dir_entry.path());
                    }
--- a/libs/remote_storage/src/s3_bucket.rs
+++ b/libs/remote_storage/src/s3_bucket.rs
@@ -19,9 +19,7 @@ use tokio::{io, sync::Semaphore};
 use tokio_util::io::ReaderStream;
 use tracing::debug;

-use crate::{
-    strip_path_prefix, Download, DownloadError, RemoteObjectName, RemoteStorage, S3Config,
-};
+use crate::{strip_path_prefix, Download, DownloadError, RemoteStorage, S3Config};

 use super::StorageMetadata;

@@ -119,25 +117,6 @@ impl S3ObjectKey {
    }
 }

-impl RemoteObjectName for S3ObjectKey {
-    /// Turn a/b/c or a/b/c/ into c
-    fn object_name(&self) -> Option<&str> {
-        // corner case, char::to_string is not const, thats why this is more verbose than it needs to be
-        // see https://github.com/rust-lang/rust/issues/88674
-        if self.0.len() == 1 && self.0.chars().next().unwrap() == S3_PREFIX_SEPARATOR {
-            return None;
-        }
-
-        if self.0.ends_with(S3_PREFIX_SEPARATOR) {
-            self.0.rsplit(S3_PREFIX_SEPARATOR).nth(1)
-        } else {
-            self.0
-                .rsplit_once(S3_PREFIX_SEPARATOR)
-                .map(|(_, last)| last)
-        }
-    }
-}
-
 /// AWS S3 storage.
 pub struct S3Bucket {
    workdir: PathBuf,
@@ -304,77 +283,6 @@ impl RemoteStorage for S3Bucket {
        Ok(document_keys)
    }

-    /// Note: it wont include empty "directories"
-    async fn list_prefixes(
-        &self,
-        prefix: Option<Self::RemoteObjectId>,
-    ) -> anyhow::Result<Vec<Self::RemoteObjectId>> {
-        let list_prefix = match prefix {
-            Some(prefix) => {
-                let mut prefix_in_bucket = self.prefix_in_bucket.clone().unwrap_or_default();
-                // if there is no trailing / in default prefix and
-                // supplied prefix does not start with "/" insert it
-                if !(prefix_in_bucket.ends_with(S3_PREFIX_SEPARATOR)
-                    || prefix.0.starts_with(S3_PREFIX_SEPARATOR))
-                {
-                    prefix_in_bucket.push(S3_PREFIX_SEPARATOR);
-                }
-
-                prefix_in_bucket.push_str(&prefix.0);
-                // required to end with a separator
-                // otherwise request will return only the entry of a prefix
-                if !prefix_in_bucket.ends_with(S3_PREFIX_SEPARATOR) {
-                    prefix_in_bucket.push(S3_PREFIX_SEPARATOR);
-                }
-                Some(prefix_in_bucket)
-            }
-            None => self.prefix_in_bucket.clone(),
-        };
-
-        let mut document_keys = Vec::new();
-
-        let mut continuation_token = None;
-        loop {
-            let _guard = self
-                .concurrency_limiter
-                .acquire()
-                .await
-                .context("Concurrency limiter semaphore got closed during S3 list")?;
-
-            metrics::inc_list_objects();
-
-            let fetch_response = self
-                .client
-                .list_objects_v2(ListObjectsV2Request {
-                    bucket: self.bucket_name.clone(),
-                    prefix: list_prefix.clone(),
-                    continuation_token,
-                    delimiter: Some(S3_PREFIX_SEPARATOR.to_string()),
-                    ..ListObjectsV2Request::default()
-                })
-                .await
-                .map_err(|e| {
-                    metrics::inc_list_objects_fail();
-                    e
-                })?;
-
-            document_keys.extend(
-                fetch_response
-                    .common_prefixes
-                    .unwrap_or_default()
-                    .into_iter()
-                    .filter_map(|o| Some(S3ObjectKey(o.prefix?))),
-            );
-
-            match fetch_response.continuation_token {
-                Some(new_token) => continuation_token = Some(new_token),
-                None => break,
-            }
-        }
-
-        Ok(document_keys)
-    }
-
    async fn upload(
        &self,
        from: impl io::AsyncRead + Unpin + Send + Sync + 'static,
@@ -470,25 +378,6 @@ mod tests {

    use super::*;

-    #[test]
-    fn object_name() {
-        let k = S3ObjectKey("a/b/c".to_owned());
-        assert_eq!(k.object_name(), Some("c"));
-
-        let k = S3ObjectKey("a/b/c/".to_owned());
-        assert_eq!(k.object_name(), Some("c"));
-
-        let k = S3ObjectKey("a/".to_owned());
-        assert_eq!(k.object_name(), Some("a"));
-
-        // XXX is it impossible to have an empty key?
-        let k = S3ObjectKey("".to_owned());
-        assert_eq!(k.object_name(), None);
-
-        let k = S3ObjectKey("/".to_owned());
-        assert_eq!(k.object_name(), None);
-    }
-
    #[test]
    fn download_destination() -> anyhow::Result<()> {
        let workdir = tempdir()?.path().to_owned();
--- a/neon_local/src/main.rs
+++ b/neon_local/src/main.rs
@@ -537,13 +537,7 @@ fn handle_tenant(tenant_match: &ArgMatches, env: &mut local_env::LocalEnv) -> an
    match tenant_match.subcommand() {
        Some(("list", _)) => {
            for t in pageserver.tenant_list()? {
-                println!(
-                    "{} {}",
-                    t.id,
-                    t.state
-                        .map(|s| s.to_string())
-                        .unwrap_or_else(|| String::from(""))
-                );
+                println!("{} {}", t.id, t.state);
            }
        }
        Some(("create", create_match)) => {
--- a/docs/pageserver-services.md
+++ b/docs/pageserver-services.md
@@ -1,4 +1,15 @@
-# Services
+## Page server architecture
+
+The Page Server has a few different duties:
+
+- Respond to GetPage@LSN requests from the Compute Nodes
+- Receive WAL from WAL safekeeper
+- Replay WAL that's applicable to the chunks that the Page Server maintains
+- Backup to S3
+
+S3 is the main fault-tolerant storage of all data, as there are no Page Server
+replicas. We use a separate fault-tolerant WAL service to reduce latency. It
+keeps track of WAL records which are not synced to S3 yet.

 The Page Server consists of multiple threads that operate on a shared
 repository of page versions:
@@ -10,22 +21,18 @@ repository of page versions:
                                   | WAL receiver |
                                   |              |
                                   +--------------+
-                                                                                 ......
-                  +---------+                              +--------+            .    .
-                  |         |                              |        |            .    .
- GetPage@LSN      |         |                              | backup |  ------->  . S3 .
------------->    |  Page   |         repository           |        |            .    .
-                  | Service |                              +--------+            .    .
-   page           |         |                                                    ......
+                                                                                 +----+
+                  +---------+                              ..........            |    |
+                  |         |                              .        .            |    |
+ GetPage@LSN      |         |                              . backup .  ------->  | S3 |
+------------->    |  Page   |         repository           .        .            |    |
+                  | Service |                              ..........            |    |
+   page           |         |                                                    +----+
 <-------------    |         |
-                  +---------+     +-----------+     +--------------------+
-                                  | WAL redo  |     | Checkpointing,     |
-                  +----------+    | processes |     | Garbage collection |
-                  |          |    +-----------+     +--------------------+
-                  |   HTTP   |
-                  | mgmt API |
-                  |          |
-                  +----------+
+                  +---------+      +--------------------+
+		                   |   Checkpointing /  |
+				   | Garbage collection |
+                                   +--------------------+

 Legend:

@@ -33,77 +40,28 @@ Legend:
 |  |   A thread or multi-threaded service
 +--+

+....
+.  .   Component at its early development phase.
+....
+
 --->   Data flow
 <---
 ```

-## Page Service
+Page Service
+------------

 The Page Service listens for GetPage@LSN requests from the Compute Nodes,
-and responds with pages from the repository. On each GetPage@LSN request,
-it calls into the Repository function
-
-A separate thread is spawned for each incoming connection to the page
-service. The page service uses the libpq protocol to communicate with
-the client. The client is a Compute Postgres instance.
-
-## WAL Receiver
-
-The WAL receiver connects to the external WAL safekeeping service
-using PostgreSQL physical streaming replication, and continuously
-receives WAL. It decodes the WAL records, and stores them to the
-repository.
+and responds with pages from the repository.


-## Backup service
+WAL Receiver
+------------

-The backup service, responsible for storing pageserver recovery data externally.
-
-Currently, pageserver stores its files in a filesystem directory it's pointed to.
-That working directory could be rather ephemeral for such cases as "a pageserver pod running in k8s with no persistent volumes attached".
-Therefore, the server interacts with external, more reliable storage to back up and restore its state.
-
-The code for storage support is extensible and can support arbitrary ones as long as they implement a certain Rust trait.
-There are the following implementations present:
-* local filesystem — to use in tests mainly
-* AWS S3           - to use in production
-
-Implementation details are covered in the [backup readme](./src/remote_storage/README.md) and corresponding Rust file docs, parameters documentation can be found at [settings docs](../docs/settings.md).
-
-The backup service is disabled by default and can be enabled to interact with a single remote storage.
-
-CLI examples:
-* Local FS: `${PAGESERVER_BIN} -c "remote_storage={local_path='/some/local/path/'}"`
-* AWS S3  : `env AWS_ACCESS_KEY_ID='SOMEKEYAAAAASADSAH*#' AWS_SECRET_ACCESS_KEY='SOMEsEcReTsd292v' ${PAGESERVER_BIN} -c "remote_storage={bucket_name='some-sample-bucket',bucket_region='eu-north-1', prefix_in_bucket='/test_prefix/'}"`
-
-For Amazon AWS S3, a key id and secret access key could be located in `~/.aws/credentials` if awscli was ever configured to work with the desired bucket, on the AWS Settings page for a certain user. Also note, that the bucket names does not contain any protocols when used on AWS.
-For local S3 installations, refer to the their documentation for name format and credentials.
-
-Similar to other pageserver settings, toml config file can be used to configure either of the storages as backup targets.
-Required sections are:
-
-```toml
-[remote_storage]
-local_path = '/Users/someonetoignore/Downloads/tmp_dir/'
-```
-
-or
-
-```toml
-[remote_storage]
-bucket_name = 'some-sample-bucket'
-bucket_region = 'eu-north-1'
-prefix_in_bucket = '/test_prefix/'
-```
-
-`AWS_SECRET_ACCESS_KEY` and `AWS_ACCESS_KEY_ID` env variables can be used to specify the S3 credentials if needed.
-
-
-## Repository background tasks
-
-The Repository also has a few different background threads and tokio tasks that perform
-background duties like dumping accumulated WAL data from memory to disk, reorganizing
-files for performance (compaction), and garbage collecting old files.
+The WAL receiver connects to the external WAL safekeeping service (or
+directly to the primary) using PostgreSQL physical streaming
+replication, and continuously receives WAL. It decodes the WAL records,
+and stores them to the repository.


 Repository
@@ -158,6 +116,48 @@ Remove old on-disk layer files that are no longer needed according to the
 PITR retention policy


+### Backup service
+
+The backup service, responsible for storing pageserver recovery data externally.
+
+Currently, pageserver stores its files in a filesystem directory it's pointed to.
+That working directory could be rather ephemeral for such cases as "a pageserver pod running in k8s with no persistent volumes attached".
+Therefore, the server interacts with external, more reliable storage to back up and restore its state.
+
+The code for storage support is extensible and can support arbitrary ones as long as they implement a certain Rust trait.
+There are the following implementations present:
+* local filesystem — to use in tests mainly
+* AWS S3           - to use in production
+
+Implementation details are covered in the [backup readme](./src/remote_storage/README.md) and corresponding Rust file docs, parameters documentation can be found at [settings docs](../docs/settings.md).
+
+The backup service is disabled by default and can be enabled to interact with a single remote storage.
+
+CLI examples:
+* Local FS: `${PAGESERVER_BIN} -c "remote_storage={local_path='/some/local/path/'}"`
+* AWS S3  : `env AWS_ACCESS_KEY_ID='SOMEKEYAAAAASADSAH*#' AWS_SECRET_ACCESS_KEY='SOMEsEcReTsd292v' ${PAGESERVER_BIN} -c "remote_storage={bucket_name='some-sample-bucket',bucket_region='eu-north-1', prefix_in_bucket='/test_prefix/'}"`
+
+For Amazon AWS S3, a key id and secret access key could be located in `~/.aws/credentials` if awscli was ever configured to work with the desired bucket, on the AWS Settings page for a certain user. Also note, that the bucket names does not contain any protocols when used on AWS.
+For local S3 installations, refer to the their documentation for name format and credentials.
+
+Similar to other pageserver settings, toml config file can be used to configure either of the storages as backup targets.
+Required sections are:
+
+```toml
+[remote_storage]
+local_path = '/Users/someonetoignore/Downloads/tmp_dir/'
+```
+
+or
+
+```toml
+[remote_storage]
+bucket_name = 'some-sample-bucket'
+bucket_region = 'eu-north-1'
+prefix_in_bucket = '/test_prefix/'
+```
+
+`AWS_SECRET_ACCESS_KEY` and `AWS_ACCESS_KEY_ID` env variables can be used to specify the S3 credentials if needed.

 TODO: Sharding
 --------------------
--- a/pageserver/src/basebackup.rs
+++ b/pageserver/src/basebackup.rs
@@ -60,7 +60,6 @@ where
        write: W,
        timeline: &'a Arc<DatadirTimelineImpl>,
        req_lsn: Option<Lsn>,
-        prev_lsn: Option<Lsn>,
        full_backup: bool,
    ) -> Result<Basebackup<'a, W>> {
        // Compute postgres doesn't have any previous WAL files, but the first
@@ -97,26 +96,16 @@ where
            (end_of_timeline.prev, end_of_timeline.last)
        };

-        // Consolidate the derived and the provided prev_lsn values
-        let prev_lsn = if let Some(provided_prev_lsn) = prev_lsn {
-            if backup_prev != Lsn(0) {
-                ensure!(backup_prev == provided_prev_lsn)
-            }
-            provided_prev_lsn
-        } else {
-            backup_prev
-        };
-
        info!(
            "taking basebackup lsn={}, prev_lsn={} (full_backup={})",
-            backup_lsn, prev_lsn, full_backup
+            backup_lsn, backup_prev, full_backup
        );

        Ok(Basebackup {
            ar: Builder::new(AbortableWrite::new(write)),
            timeline,
            lsn: backup_lsn,
-            prev_record_lsn: prev_lsn,
+            prev_record_lsn: backup_prev,
            full_backup,
            finished: false,
        })
--- a/pageserver/src/http/openapi_spec.yml
+++ b/pageserver/src/http/openapi_spec.yml
@@ -22,49 +22,6 @@ paths:
                properties:
                  id:
                    type: integer
-
-  /v1/tenant/{tenant_id}:
-    parameters:
-      - name: tenant_id
-        in: path
-        required: true
-        schema:
-          type: string
-          format: hex
-    get:
-      description: Get tenant status
-      responses:
-        "200":
-          description: Currently returns the flag whether the tenant has inprogress timeline downloads
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/TenantInfo"
-        "400":
-          description: Error when no tenant id found in path or no timeline id
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/Error"
-        "401":
-          description: Unauthorized Error
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/UnauthorizedError"
-        "403":
-          description: Forbidden Error
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/ForbiddenError"
-        "500":
-          description: Generic operation error
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/Error"
-
  /v1/tenant/{tenant_id}/timeline:
    parameters:
      - name: tenant_id
@@ -113,7 +70,6 @@ paths:
            application/json:
              schema:
                $ref: "#/components/schemas/Error"
-
  /v1/tenant/{tenant_id}/timeline/{timeline_id}:
    parameters:
      - name: tenant_id
@@ -128,14 +84,13 @@ paths:
        schema:
          type: string
          format: hex
+      - name: include-non-incremental-logical-size
+        in: query
+        schema:
+          type: string
+          description: Controls calculation of current_logical_size_non_incremental
    get:
      description: Get info about the timeline
-      parameters:
-        - name: include-non-incremental-logical-size
-          in: query
-          schema:
-            type: string
-          description: Controls calculation of current_logical_size_non_incremental
      responses:
        "200":
          description: TimelineInfo
@@ -167,35 +122,6 @@ paths:
            application/json:
              schema:
                $ref: "#/components/schemas/Error"
-    delete:
-      description: "Attempts to delete specified timeline. On 500 errors should be retried"
-      responses:
-        "200":
-          description: Ok
-        "400":
-          description: Error when no tenant id found in path or no timeline id
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/Error"
-        "401":
-          description: Unauthorized Error
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/UnauthorizedError"
-        "403":
-          description: Forbidden Error
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/ForbiddenError"
-        "500":
-          description: Generic operation error
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/Error"

  /v1/tenant/{tenant_id}/timeline/{timeline_id}/wal_receiver:
    parameters:
@@ -245,7 +171,7 @@ paths:
              schema:
                $ref: "#/components/schemas/Error"

-  /v1/tenant/{tenant_id}/attach:
+  /v1/tenant/{tenant_id}/timeline/{timeline_id}/attach:
    parameters:
      - name: tenant_id
        in: path
@@ -253,13 +179,19 @@ paths:
        schema:
          type: string
          format: hex
+      - name: timeline_id
+        in: path
+        required: true
+        schema:
+          type: string
+          format: hex
    post:
-      description: Schedules attach operation to happen in the background for given tenant
+      description: Attach remote timeline
      responses:
-        "202":
-          description: Tenant attaching scheduled
+        "200":
+          description: Timeline attaching scheduled
        "400":
-          description: Error when no tenant id found in path parameters
+          description: Error when no tenant id found in path or no timeline id
          content:
            application/json:
              schema:
@@ -283,7 +215,7 @@ paths:
              schema:
                $ref: "#/components/schemas/NotFoundError"
        "409":
-          description: Tenant download is already in progress
+          description: Timeline download is already in progress
          content:
            application/json:
              schema:
@@ -295,6 +227,7 @@ paths:
              schema:
                $ref: "#/components/schemas/Error"

+
  /v1/tenant/{tenant_id}/timeline/{timeline_id}/detach:
    parameters:
      - name: tenant_id
@@ -310,11 +243,10 @@ paths:
          type: string
          format: hex
    post:
-      description: Deprecated, use DELETE /v1/tenant/{tenant_id}/timeline/{timeline_id} instead
-      deprecated: true
+      description: Detach local timeline
      responses:
        "200":
-          description: Ok
+          description: Timeline detached
        "400":
          description: Error when no tenant id found in path or no timeline id
          content:
@@ -340,43 +272,6 @@ paths:
              schema:
                $ref: "#/components/schemas/Error"

-  /v1/tenant/{tenant_id}/detach:
-    parameters:
-      - name: tenant_id
-        in: path
-        required: true
-        schema:
-          type: string
-          format: hex
-    post:
-      description: Detach local tenant
-      responses:
-        "200":
-          description: Tenant detached
-        "400":
-          description: Error when no tenant id found in path parameters
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/Error"
-        "401":
-          description: Unauthorized Error
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/UnauthorizedError"
-        "403":
-          description: Forbidden Error
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/ForbiddenError"
-        "500":
-          description: Generic operation error
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/Error"

  /v1/tenant/{tenant_id}/timeline/:
    parameters:
@@ -572,13 +467,12 @@ components:
      type: object
      required:
        - id
+        - state
      properties:
        id:
          type: string
        state:
          type: string
-        has_in_progress_downloads:
-          type: boolean
    TenantCreateInfo:
      type: object
      properties:
@@ -673,7 +567,6 @@ components:
          type: integer
        current_logical_size_non_incremental:
          type: integer
-
    WalReceiverEntry:
      type: object
      required:
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -14,7 +14,6 @@ use crate::repository::Repository;
 use crate::storage_sync;
 use crate::storage_sync::index::{RemoteIndex, RemoteTimeline};
 use crate::tenant_config::TenantConfOpt;
-use crate::tenant_mgr::TenantInfo;
 use crate::timelines::{LocalTimelineInfo, RemoteTimelineInfo, TimelineInfo};
 use crate::{config::PageServerConf, tenant_mgr, timelines};
 use utils::{
@@ -210,9 +209,9 @@ async fn timeline_detail_handler(request: Request<Body>) -> Result<Response<Body
    .await;

    if local_timeline_info.is_none() && remote_timeline_info.is_none() {
-        return Err(ApiError::NotFound(format!(
-            "Timeline {tenant_id}/{timeline_id} is not found neither locally nor remotely"
-        )));
+        return Err(ApiError::NotFound(
+            "Timeline is not found neither locally nor remotely".to_string(),
+        ));
    }

    let timeline_info = TimelineInfo {
@@ -242,157 +241,123 @@ async fn wal_receiver_get_handler(request: Request<Body>) -> Result<Response<Bod
    json_response(StatusCode::OK, &wal_receiver_entry)
 }

-// TODO makes sense to provide tenant config right away the same way as it handled in tenant_create
-async fn tenant_attach_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
+async fn timeline_attach_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
    let tenant_id: ZTenantId = parse_request_param(&request, "tenant_id")?;
    check_permission(&request, Some(tenant_id))?;

-    info!("Handling tenant attach {}", tenant_id,);
+    let timeline_id: ZTimelineId = parse_request_param(&request, "timeline_id")?;
+    info!(
+        "Handling timeline {} attach for tenant: {}",
+        timeline_id, tenant_id,
+    );

    tokio::task::spawn_blocking(move || {
-        if tenant_mgr::get_tenant_state(tenant_id).is_some() {
-            anyhow::bail!("Tenant is already present locally")
+        if tenant_mgr::get_local_timeline_with_load(tenant_id, timeline_id).is_ok() {
+            // TODO: maybe answer with 309 Not Modified here?
+            anyhow::bail!("Timeline is already present locally")
        };
        Ok(())
    })
    .await
    .map_err(ApiError::from_err)??;

+    let sync_id = ZTenantTimelineId {
+        tenant_id,
+        timeline_id,
+    };
    let state = get_state(&request);
    let remote_index = &state.remote_index;

    let mut index_accessor = remote_index.write().await;
-    if let Some(tenant_entry) = index_accessor.tenant_entry_mut(&tenant_id) {
-        if tenant_entry.has_in_progress_downloads() {
+    if let Some(remote_timeline) = index_accessor.timeline_entry_mut(&sync_id) {
+        if remote_timeline.awaits_download {
            return Err(ApiError::Conflict(
-                "Tenant download is already in progress".to_string(),
+                "Timeline download is already in progress".to_string(),
            ));
        }

-        for (timeline_id, remote_timeline) in tenant_entry.iter_mut() {
-            storage_sync::schedule_layer_download(tenant_id, *timeline_id);
-            remote_timeline.awaits_download = true;
-        }
-        return json_response(StatusCode::ACCEPTED, ());
-    }
-    // no tenant in the index, release the lock to make the potentially lengthy download opetation
-    drop(index_accessor);
-
-    // download index parts for every tenant timeline
-    let remote_timelines = match gather_tenant_timelines_index_parts(state, tenant_id).await {
-        Ok(Some(remote_timelines)) => remote_timelines,
-        Ok(None) => return Err(ApiError::NotFound("Unknown remote tenant".to_string())),
-        Err(e) => {
-            error!("Failed to retrieve remote tenant data: {:?}", e);
-            return Err(ApiError::NotFound(
-                "Failed to retrieve remote tenant".to_string(),
-            ));
-        }
-    };
-
-    // recheck that download is not in progress because
-    // we've released the lock to avoid holding it during the download
-    let mut index_accessor = remote_index.write().await;
-    let tenant_entry = match index_accessor.tenant_entry_mut(&tenant_id) {
-        Some(tenant_entry) => {
-            if tenant_entry.has_in_progress_downloads() {
-                return Err(ApiError::Conflict(
-                    "Tenant download is already in progress".to_string(),
-                ));
-            }
-            tenant_entry
-        }
-        None => index_accessor.add_tenant_entry(tenant_id),
-    };
-
-    // populate remote index with the data from index part and create directories on the local filesystem
-    for (timeline_id, mut remote_timeline) in remote_timelines {
-        tokio::fs::create_dir_all(state.conf.timeline_path(&timeline_id, &tenant_id))
-            .await
-            .context("Failed to create new timeline directory")?;
-
        remote_timeline.awaits_download = true;
-        tenant_entry.insert(timeline_id, remote_timeline);
-        // schedule actual download
        storage_sync::schedule_layer_download(tenant_id, timeline_id);
+        return json_response(StatusCode::ACCEPTED, ());
+    } else {
+        // no timeline in the index, release the lock to make the potentially lengthy download opetation
+        drop(index_accessor);
    }

+    let new_timeline = match try_download_index_part_data(state, sync_id).await {
+        Ok(Some(mut new_timeline)) => {
+            tokio::fs::create_dir_all(state.conf.timeline_path(&timeline_id, &tenant_id))
+                .await
+                .context("Failed to create new timeline directory")?;
+            new_timeline.awaits_download = true;
+            new_timeline
+        }
+        Ok(None) => return Err(ApiError::NotFound("Unknown remote timeline".to_string())),
+        Err(e) => {
+            error!("Failed to retrieve remote timeline data: {:?}", e);
+            return Err(ApiError::NotFound(
+                "Failed to retrieve remote timeline".to_string(),
+            ));
+        }
+    };
+
+    let mut index_accessor = remote_index.write().await;
+    match index_accessor.timeline_entry_mut(&sync_id) {
+        Some(remote_timeline) => {
+            if remote_timeline.awaits_download {
+                return Err(ApiError::Conflict(
+                    "Timeline download is already in progress".to_string(),
+                ));
+            }
+            remote_timeline.awaits_download = true;
+        }
+        None => index_accessor.add_timeline_entry(sync_id, new_timeline),
+    }
+    storage_sync::schedule_layer_download(tenant_id, timeline_id);
    json_response(StatusCode::ACCEPTED, ())
 }

-/// Note: is expensive from s3 access perspective,
-/// for details see comment to `storage_sync::gather_tenant_timelines_index_parts`
-async fn gather_tenant_timelines_index_parts(
+async fn try_download_index_part_data(
    state: &State,
-    tenant_id: ZTenantId,
-) -> anyhow::Result<Option<Vec<(ZTimelineId, RemoteTimeline)>>> {
-    let index_parts = match state.remote_storage.as_ref() {
+    sync_id: ZTenantTimelineId,
+) -> anyhow::Result<Option<RemoteTimeline>> {
+    let index_part = match state.remote_storage.as_ref() {
        Some(GenericRemoteStorage::Local(local_storage)) => {
-            storage_sync::gather_tenant_timelines_index_parts(state.conf, local_storage, tenant_id)
-                .await
+            storage_sync::download_index_part(state.conf, local_storage, sync_id).await
        }
-        // FIXME here s3 storage contains its own limits, that are separate from sync storage thread ones
-        //       because it is a different instance. We can move this limit to some global static
-        //       or use one instance everywhere.
        Some(GenericRemoteStorage::S3(s3_storage)) => {
-            storage_sync::gather_tenant_timelines_index_parts(state.conf, s3_storage, tenant_id)
-                .await
+            storage_sync::download_index_part(state.conf, s3_storage, sync_id).await
        }
        None => return Ok(None),
    }
-    .with_context(|| format!("Failed to download index parts for tenant {tenant_id}"))?;
+    .with_context(|| format!("Failed to download index part for timeline {sync_id}"))?;

-    let mut remote_timelines = Vec::with_capacity(index_parts.len());
-    for (timeline_id, index_part) in index_parts {
-        let timeline_path = state.conf.timeline_path(&timeline_id, &tenant_id);
-        let remote_timeline = RemoteTimeline::from_index_part(&timeline_path, index_part)
-            .with_context(|| {
-                format!("Failed to convert index part into remote timeline for timeline {tenant_id}/{timeline_id}")
-            })?;
-        remote_timelines.push((timeline_id, remote_timeline));
-    }
-    Ok(Some(remote_timelines))
+    let timeline_path = state
+        .conf
+        .timeline_path(&sync_id.timeline_id, &sync_id.tenant_id);
+    RemoteTimeline::from_index_part(&timeline_path, index_part)
+        .map(Some)
+        .with_context(|| {
+            format!("Failed to convert index part into remote timeline for timeline {sync_id}")
+        })
 }

-async fn timeline_delete_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
+async fn timeline_detach_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
    let tenant_id: ZTenantId = parse_request_param(&request, "tenant_id")?;
    check_permission(&request, Some(tenant_id))?;

    let timeline_id: ZTimelineId = parse_request_param(&request, "timeline_id")?;

-    let state = get_state(&request);
    tokio::task::spawn_blocking(move || {
-        let _enter = info_span!("tenant_detach_handler", tenant = %tenant_id).entered();
-        tenant_mgr::delete_timeline(tenant_id, timeline_id)
+        let _enter =
+            info_span!("timeline_detach_handler", tenant = %tenant_id, timeline = %timeline_id)
+                .entered();
+        let state = get_state(&request);
+        tenant_mgr::detach_timeline(state.conf, tenant_id, timeline_id)
    })
    .await
    .map_err(ApiError::from_err)??;

-    let mut remote_index = state.remote_index.write().await;
-    remote_index.remove_timeline_entry(ZTenantTimelineId {
-        tenant_id,
-        timeline_id,
-    });
-
-    json_response(StatusCode::OK, ())
-}
-
-async fn tenant_detach_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
-    let tenant_id: ZTenantId = parse_request_param(&request, "tenant_id")?;
-    check_permission(&request, Some(tenant_id))?;
-
-    let state = get_state(&request);
-    let conf = state.conf;
-    tokio::task::spawn_blocking(move || {
-        let _enter = info_span!("tenant_detach_handler", tenant = %tenant_id).entered();
-        tenant_mgr::detach_tenant(conf, tenant_id)
-    })
-    .await
-    .map_err(ApiError::from_err)??;
-
-    let mut remote_index = state.remote_index.write().await;
-    remote_index.remove_tenant_entry(&tenant_id);
-
    json_response(StatusCode::OK, ())
 }

@@ -400,13 +365,9 @@ async fn tenant_list_handler(request: Request<Body>) -> Result<Response<Body>, A
    // check for management permission
    check_permission(&request, None)?;

-    let state = get_state(&request);
-    // clone to avoid holding the lock while awaiting for blocking task
-    let remote_index = state.remote_index.read().await.clone();
-
    let response_data = tokio::task::spawn_blocking(move || {
        let _enter = info_span!("tenant_list").entered();
-        crate::tenant_mgr::list_tenants(&remote_index)
+        crate::tenant_mgr::list_tenants()
    })
    .await
    .map_err(ApiError::from_err)?;
@@ -414,34 +375,6 @@ async fn tenant_list_handler(request: Request<Body>) -> Result<Response<Body>, A
    json_response(StatusCode::OK, response_data)
 }

-async fn tenant_status(request: Request<Body>) -> Result<Response<Body>, ApiError> {
-    let tenant_id: ZTenantId = parse_request_param(&request, "tenant_id")?;
-    check_permission(&request, Some(tenant_id))?;
-
-    // if tenant is in progress of downloading it can be absent in global tenant map
-    let tenant_state = tokio::task::spawn_blocking(move || tenant_mgr::get_tenant_state(tenant_id))
-        .await
-        .map_err(ApiError::from_err)?;
-
-    let state = get_state(&request);
-    let remote_index = &state.remote_index;
-
-    let index_accessor = remote_index.read().await;
-    let has_in_progress_downloads = index_accessor
-        .tenant_entry(&tenant_id)
-        .ok_or_else(|| ApiError::NotFound("Tenant not found in remote index".to_string()))?
-        .has_in_progress_downloads();
-
-    json_response(
-        StatusCode::OK,
-        TenantInfo {
-            id: tenant_id,
-            state: tenant_state,
-            has_in_progress_downloads: Some(has_in_progress_downloads),
-        },
-    )
-}
-
 async fn tenant_create_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
    // check for management permission
    check_permission(&request, None)?;
@@ -587,28 +520,24 @@ pub fn make_router(
        .get("/v1/status", status_handler)
        .get("/v1/tenant", tenant_list_handler)
        .post("/v1/tenant", tenant_create_handler)
-        .get("/v1/tenant/:tenant_id", tenant_status)
        .put("/v1/tenant/config", tenant_config_handler)
        .get("/v1/tenant/:tenant_id/timeline", timeline_list_handler)
        .post("/v1/tenant/:tenant_id/timeline", timeline_create_handler)
-        .post("/v1/tenant/:tenant_id/attach", tenant_attach_handler)
-        .post("/v1/tenant/:tenant_id/detach", tenant_detach_handler)
        .get(
            "/v1/tenant/:tenant_id/timeline/:timeline_id",
            timeline_detail_handler,
        )
-        .delete(
-            "/v1/tenant/:tenant_id/timeline/:timeline_id",
-            timeline_delete_handler,
-        )
-        // for backward compatibility
-        .post(
-            "/v1/tenant/:tenant_id/timeline/:timeline_id/detach",
-            timeline_delete_handler,
-        )
        .get(
            "/v1/tenant/:tenant_id/timeline/:timeline_id/wal_receiver",
            wal_receiver_get_handler,
        )
+        .post(
+            "/v1/tenant/:tenant_id/timeline/:timeline_id/attach",
+            timeline_attach_handler,
+        )
+        .post(
+            "/v1/tenant/:tenant_id/timeline/:timeline_id/detach",
+            timeline_detach_handler,
+        )
        .any(handler_404))
 }
--- a/pageserver/src/import_datadir.rs
+++ b/pageserver/src/import_datadir.rs
@@ -57,7 +57,6 @@ pub fn import_timeline_from_postgres_datadir<R: Repository>(
            if let Some(control_file) = import_file(&mut modification, relative_path, file, len)? {
                pg_control = Some(control_file);
            }
-            modification.flush()?;
        }
    }

@@ -318,7 +317,6 @@ pub fn import_basebackup_from_tar<R: Repository, Reader: Read>(
                    // We found the pg_control file.
                    pg_control = Some(res);
                }
-                modification.flush()?;
            }
            tar::EntryType::Directory => {
                debug!("directory {:?}", file_path);
--- a/pageserver/src/layered_repository.rs
+++ b/pageserver/src/layered_repository.rs
--- a/pageserver/src/layered_repository/README.md
+++ b/pageserver/src/layered_repository/README.md
@@ -1,4 +1,4 @@
-# Pageserver storage
+# Overview

 The main responsibility of the Page Server is to process the incoming WAL, and
 reprocess it into a format that allows reasonably quick access to any page
--- a/pageserver/src/layered_repository/inmemory_layer.rs
+++ b/pageserver/src/layered_repository/inmemory_layer.rs
@@ -267,13 +267,13 @@ impl InMemoryLayer {

    /// Common subroutine of the public put_wal_record() and put_page_image() functions.
    /// Adds the page version to the in-memory tree
-    pub fn put_value(&self, key: Key, lsn: Lsn, val: &Value) -> Result<()> {
+    pub fn put_value(&self, key: Key, lsn: Lsn, val: Value) -> Result<()> {
        trace!("put_value key {} at {}/{}", key, self.timelineid, lsn);
        let mut inner = self.inner.write().unwrap();

        inner.assert_writeable();

-        let off = inner.file.write_blob(&Value::ser(val)?)?;
+        let off = inner.file.write_blob(&Value::ser(&val)?)?;

        let vec_map = inner.index.entry(key).or_default();
        let old = vec_map.append_or_update_last(lsn, off).unwrap().0;
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -772,7 +772,6 @@ impl PageServerHandler {
        pgb: &mut PostgresBackend,
        timelineid: ZTimelineId,
        lsn: Option<Lsn>,
-        prev_lsn: Option<Lsn>,
        tenantid: ZTenantId,
        full_backup: bool,
    ) -> anyhow::Result<()> {
@@ -797,8 +796,7 @@ impl PageServerHandler {
        {
            let mut writer = CopyDataSink { pgb };

-            let basebackup =
-                basebackup::Basebackup::new(&mut writer, &timeline, lsn, prev_lsn, full_backup)?;
+            let basebackup = basebackup::Basebackup::new(&mut writer, &timeline, lsn, full_backup)?;
            span.record("lsn", &basebackup.lsn.to_string().as_str());
            basebackup.send_tarball()?;
        }
@@ -901,67 +899,33 @@ impl postgres_backend::Handler for PageServerHandler {
            };

            // Check that the timeline exists
-            self.handle_basebackup_request(pgb, timelineid, lsn, None, tenantid, false)?;
+            self.handle_basebackup_request(pgb, timelineid, lsn, tenantid, false)?;
            pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
        }
-        // return pair of prev_lsn and last_lsn
-        else if query_string.starts_with("get_last_record_rlsn ") {
-            let (_, params_raw) = query_string.split_at("get_last_record_rlsn ".len());
-            let params = params_raw.split_whitespace().collect::<Vec<_>>();
-
-            ensure!(
-                params.len() == 2,
-                "invalid param number for get_last_record_rlsn command"
-            );
-
-            let tenantid = ZTenantId::from_str(params[0])?;
-            let timelineid = ZTimelineId::from_str(params[1])?;
-
-            self.check_permission(Some(tenantid))?;
-            let timeline = tenant_mgr::get_local_timeline_with_load(tenantid, timelineid)
-                .context("Cannot load local timeline")?;
-
-            let end_of_timeline = timeline.tline.get_last_record_rlsn();
-
-            pgb.write_message_noflush(&BeMessage::RowDescription(&[
-                RowDescriptor::text_col(b"prev_lsn"),
-                RowDescriptor::text_col(b"last_lsn"),
-            ]))?
-            .write_message_noflush(&BeMessage::DataRow(&[
-                Some(end_of_timeline.prev.to_string().as_bytes()),
-                Some(end_of_timeline.last.to_string().as_bytes()),
-            ]))?
-            .write_message(&BeMessage::CommandComplete(b"SELECT 1"))?;
-        }
        // same as basebackup, but result includes relational data as well
        else if query_string.starts_with("fullbackup ") {
            let (_, params_raw) = query_string.split_at("fullbackup ".len());
            let params = params_raw.split_whitespace().collect::<Vec<_>>();

            ensure!(
-                params.len() >= 2,
+                params.len() == 3,
                "invalid param number for fullbackup command"
            );

            let tenantid = ZTenantId::from_str(params[0])?;
            let timelineid = ZTimelineId::from_str(params[1])?;

-            // The caller is responsible for providing correct lsn and prev_lsn.
-            let lsn = if params.len() > 2 {
-                Some(Lsn::from_str(params[2])?)
-            } else {
-                None
-            };
-            let prev_lsn = if params.len() > 3 {
-                Some(Lsn::from_str(params[3])?)
-            } else {
-                None
-            };
-
            self.check_permission(Some(tenantid))?;

+            // Lsn is required for fullbackup, because otherwise we would not know
+            // at which lsn to upload this backup.
+            //
+            // The caller is responsible for providing a valid lsn
+            // and using it in the subsequent import.
+            let lsn = Some(Lsn::from_str(params[2])?);
+
            // Check that the timeline exists
-            self.handle_basebackup_request(pgb, timelineid, lsn, prev_lsn, tenantid, true)?;
+            self.handle_basebackup_request(pgb, timelineid, lsn, tenantid, true)?;
            pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
        } else if query_string.starts_with("import basebackup ") {
            // Import the `base` section (everything but the wal) of a basebackup.
--- a/pageserver/src/pgdatadir_mapping.rs
+++ b/pageserver/src/pgdatadir_mapping.rs
@@ -902,57 +902,6 @@ impl<'a, R: Repository> DatadirModification<'a, R> {
        Ok(())
    }

-    ///
-    /// Flush changes accumulated so far to the underlying repository.
-    ///
-    /// Usually, changes made in DatadirModification are atomic, but this allows
-    /// you to flush them to the underlying repository before the final `commit`.
-    /// That allows to free up the memory used to hold the pending changes.
-    ///
-    /// Currently only used during bulk import of a data directory. In that
-    /// context, breaking the atomicity is OK. If the import is interrupted, the
-    /// whole import fails and the timeline will be deleted anyway.
-    /// (Or to be precise, it will be left behind for debugging purposes and
-    /// ignored, see https://github.com/neondatabase/neon/pull/1809)
-    ///
-    /// Note: A consequence of flushing the pending operations is that they
-    /// won't be visible to subsequent operations until `commit`. The function
-    /// retains all the metadata, but data pages are flushed. That's again OK
-    /// for bulk import, where you are just loading data pages and won't try to
-    /// modify the same pages twice.
-    pub fn flush(&mut self) -> Result<()> {
-        // Unless we have accumulated a decent amount of changes, it's not worth it
-        // to scan through the pending_updates list.
-        let pending_nblocks = self.pending_nblocks;
-        if pending_nblocks < 10000 {
-            return Ok(());
-        }
-
-        let writer = self.tline.tline.writer();
-
-        // Flush relation and  SLRU data blocks, keep metadata.
-        let mut result: Result<()> = Ok(());
-        self.pending_updates.retain(|&key, value| {
-            if result.is_ok() && (is_rel_block_key(key) || is_slru_block_key(key)) {
-                result = writer.put(key, self.lsn, value);
-                false
-            } else {
-                true
-            }
-        });
-        result?;
-
-        if pending_nblocks != 0 {
-            self.tline.current_logical_size.fetch_add(
-                pending_nblocks * pg_constants::BLCKSZ as isize,
-                Ordering::SeqCst,
-            );
-            self.pending_nblocks = 0;
-        }
-
-        Ok(())
-    }
-
    ///
    /// Finish this atomic update, writing all the updated keys to the
    /// underlying timeline.
@@ -963,7 +912,7 @@ impl<'a, R: Repository> DatadirModification<'a, R> {
        let pending_nblocks = self.pending_nblocks;

        for (key, value) in self.pending_updates {
-            writer.put(key, self.lsn, &value)?;
+            writer.put(key, self.lsn, value)?;
        }
        for key_range in self.pending_deletions {
            writer.delete(key_range.clone(), self.lsn)?;
@@ -1368,10 +1317,6 @@ pub fn key_to_rel_block(key: Key) -> Result<(RelTag, BlockNumber)> {
    })
 }

-fn is_rel_block_key(key: Key) -> bool {
-    key.field1 == 0x00 && key.field4 != 0
-}
-
 pub fn key_to_slru_block(key: Key) -> Result<(SlruKind, u32, BlockNumber)> {
    Ok(match key.field1 {
        0x01 => {
@@ -1390,12 +1335,6 @@ pub fn key_to_slru_block(key: Key) -> Result<(SlruKind, u32, BlockNumber)> {
    })
 }

-fn is_slru_block_key(key: Key) -> bool {
-    key.field1 == 0x01                // SLRU-related
-        && key.field3 == 0x00000001   // but not SlruDir
-        && key.field6 != 0xffffffff // and not SlruSegSize
-}
-
 //
 //-- Tests that should work the same with any Repository/Timeline implementation.
 //
--- a/pageserver/src/repository.rs
+++ b/pageserver/src/repository.rs
@@ -7,6 +7,7 @@ use byteorder::{ByteOrder, BE};
 use bytes::Bytes;
 use serde::{Deserialize, Serialize};
 use std::fmt;
+use std::fmt::Display;
 use std::ops::{AddAssign, Range};
 use std::sync::{Arc, RwLockReadGuard};
 use std::time::Duration;
@@ -181,6 +182,20 @@ impl Value {
    }
 }

+#[derive(Clone, Copy, Debug)]
+pub enum TimelineSyncStatusUpdate {
+    Downloaded,
+}
+
+impl Display for TimelineSyncStatusUpdate {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        let s = match self {
+            TimelineSyncStatusUpdate::Downloaded => "Downloaded",
+        };
+        f.write_str(s)
+    }
+}
+
 ///
 /// A repository corresponds to one .neon directory. One repository holds multiple
 /// timelines, forked off from the same initial call to 'initdb'.
@@ -189,7 +204,11 @@ pub trait Repository: Send + Sync {

    /// Updates timeline based on the `TimelineSyncStatusUpdate`, received from the remote storage synchronization.
    /// See [`crate::remote_storage`] for more details about the synchronization.
-    fn attach_timeline(&self, timeline_id: ZTimelineId) -> Result<()>;
+    fn apply_timeline_remote_sync_status_update(
+        &self,
+        timeline_id: ZTimelineId,
+        timeline_sync_status_update: TimelineSyncStatusUpdate,
+    ) -> Result<()>;

    /// Get Timeline handle for given zenith timeline ID.
    /// This function is idempotent. It doesn't change internal state in any way.
@@ -211,12 +230,7 @@ pub trait Repository: Send + Sync {
    ) -> Result<Arc<Self::Timeline>>;

    /// Branch a timeline
-    fn branch_timeline(
-        &self,
-        src: ZTimelineId,
-        dst: ZTimelineId,
-        start_lsn: Option<Lsn>,
-    ) -> Result<()>;
+    fn branch_timeline(&self, src: ZTimelineId, dst: ZTimelineId, start_lsn: Lsn) -> Result<()>;

    /// Flush all data to disk.
    ///
@@ -246,10 +260,10 @@ pub trait Repository: Send + Sync {
    /// api's 'compact' command.
    fn compaction_iteration(&self) -> Result<()>;

-    /// removes timeline-related in-memory data
-    fn delete_timeline(&self, timeline_id: ZTimelineId) -> anyhow::Result<()>;
+    /// detaches timeline-related in-memory data.
+    fn detach_timeline(&self, timeline_id: ZTimelineId) -> Result<()>;

-    /// Allows to retrieve remote timeline index from the repo. Used in walreceiver to grab remote consistent lsn.
+    // Allows to retrieve remote timeline index from the repo. Used in walreceiver to grab remote consistent lsn.
    fn get_remote_index(&self) -> &RemoteIndex;
 }

@@ -393,7 +407,7 @@ pub trait TimelineWriter<'a> {
    ///
    /// This will implicitly extend the relation, if the page is beyond the
    /// current end-of-file.
-    fn put(&self, key: Key, lsn: Lsn, value: &Value) -> Result<()>;
+    fn put(&self, key: Key, lsn: Lsn, value: Value) -> Result<()>;

    fn delete(&self, key_range: Range<Key>, lsn: Lsn) -> Result<()>;

@@ -523,7 +537,7 @@ pub mod repo_harness {
                TenantConfOpt::from(self.tenant_conf),
                walredo_mgr,
                self.tenant_id,
-                RemoteIndex::default(),
+                RemoteIndex::empty(),
                false,
            );
            // populate repo with locally available timelines
@@ -539,7 +553,10 @@ pub mod repo_harness {
                    .parse()
                    .unwrap();

-                repo.attach_timeline(timeline_id)?;
+                repo.apply_timeline_remote_sync_status_update(
+                    timeline_id,
+                    TimelineSyncStatusUpdate::Downloaded,
+                )?;
            }

            Ok(repo)
@@ -603,12 +620,12 @@ mod tests {
        let tline = repo.create_empty_timeline(TIMELINE_ID, Lsn(0))?;

        let writer = tline.writer();
-        writer.put(*TEST_KEY, Lsn(0x10), &Value::Image(TEST_IMG("foo at 0x10")))?;
+        writer.put(*TEST_KEY, Lsn(0x10), Value::Image(TEST_IMG("foo at 0x10")))?;
        writer.finish_write(Lsn(0x10));
        drop(writer);

        let writer = tline.writer();
-        writer.put(*TEST_KEY, Lsn(0x20), &Value::Image(TEST_IMG("foo at 0x20")))?;
+        writer.put(*TEST_KEY, Lsn(0x20), Value::Image(TEST_IMG("foo at 0x20")))?;
        writer.finish_write(Lsn(0x20));
        drop(writer);

@@ -655,24 +672,24 @@ mod tests {
        let TEST_KEY_B: Key = Key::from_hex("112222222233333333444444445500000002").unwrap();

        // Insert a value on the timeline
-        writer.put(TEST_KEY_A, Lsn(0x20), &test_value("foo at 0x20"))?;
-        writer.put(TEST_KEY_B, Lsn(0x20), &test_value("foobar at 0x20"))?;
+        writer.put(TEST_KEY_A, Lsn(0x20), test_value("foo at 0x20"))?;
+        writer.put(TEST_KEY_B, Lsn(0x20), test_value("foobar at 0x20"))?;
        writer.finish_write(Lsn(0x20));

-        writer.put(TEST_KEY_A, Lsn(0x30), &test_value("foo at 0x30"))?;
+        writer.put(TEST_KEY_A, Lsn(0x30), test_value("foo at 0x30"))?;
        writer.finish_write(Lsn(0x30));
-        writer.put(TEST_KEY_A, Lsn(0x40), &test_value("foo at 0x40"))?;
+        writer.put(TEST_KEY_A, Lsn(0x40), test_value("foo at 0x40"))?;
        writer.finish_write(Lsn(0x40));

        //assert_current_logical_size(&tline, Lsn(0x40));

        // Branch the history, modify relation differently on the new timeline
-        repo.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Some(Lsn(0x30)))?;
+        repo.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Lsn(0x30))?;
        let newtline = repo
            .get_timeline_load(NEW_TIMELINE_ID)
            .expect("Should have a local timeline");
        let new_writer = newtline.writer();
-        new_writer.put(TEST_KEY_A, Lsn(0x40), &test_value("bar at 0x40"))?;
+        new_writer.put(TEST_KEY_A, Lsn(0x40), test_value("bar at 0x40"))?;
        new_writer.finish_write(Lsn(0x40));

        // Check page contents on both branches
@@ -703,14 +720,14 @@ mod tests {
            writer.put(
                *TEST_KEY,
                lsn,
-                &Value::Image(TEST_IMG(&format!("foo at {}", lsn))),
+                Value::Image(TEST_IMG(&format!("foo at {}", lsn))),
            )?;
            writer.finish_write(lsn);
            lsn += 0x10;
            writer.put(
                *TEST_KEY,
                lsn,
-                &Value::Image(TEST_IMG(&format!("foo at {}", lsn))),
+                Value::Image(TEST_IMG(&format!("foo at {}", lsn))),
            )?;
            writer.finish_write(lsn);
            lsn += 0x10;
@@ -721,14 +738,14 @@ mod tests {
            writer.put(
                *TEST_KEY,
                lsn,
-                &Value::Image(TEST_IMG(&format!("foo at {}", lsn))),
+                Value::Image(TEST_IMG(&format!("foo at {}", lsn))),
            )?;
            writer.finish_write(lsn);
            lsn += 0x10;
            writer.put(
                *TEST_KEY,
                lsn,
-                &Value::Image(TEST_IMG(&format!("foo at {}", lsn))),
+                Value::Image(TEST_IMG(&format!("foo at {}", lsn))),
            )?;
            writer.finish_write(lsn);
        }
@@ -749,7 +766,7 @@ mod tests {
        repo.gc_iteration(Some(TIMELINE_ID), 0x10, Duration::ZERO, false)?;

        // try to branch at lsn 25, should fail because we already garbage collected the data
-        match repo.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Some(Lsn(0x25))) {
+        match repo.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Lsn(0x25)) {
            Ok(_) => panic!("branching should have failed"),
            Err(err) => {
                assert!(err.to_string().contains("invalid branch start lsn"));
@@ -770,7 +787,7 @@ mod tests {

        repo.create_empty_timeline(TIMELINE_ID, Lsn(0x50))?;
        // try to branch at lsn 0x25, should fail because initdb lsn is 0x50
-        match repo.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Some(Lsn(0x25))) {
+        match repo.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Lsn(0x25)) {
            Ok(_) => panic!("branching should have failed"),
            Err(err) => {
                assert!(&err.to_string().contains("invalid branch start lsn"));
@@ -815,7 +832,7 @@ mod tests {
        let tline = repo.create_empty_timeline(TIMELINE_ID, Lsn(0))?;
        make_some_layers(tline.as_ref(), Lsn(0x20))?;

-        repo.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Some(Lsn(0x40)))?;
+        repo.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Lsn(0x40))?;
        let newtline = repo
            .get_timeline_load(NEW_TIMELINE_ID)
            .expect("Should have a local timeline");
@@ -831,7 +848,7 @@ mod tests {
        let tline = repo.create_empty_timeline(TIMELINE_ID, Lsn(0))?;
        make_some_layers(tline.as_ref(), Lsn(0x20))?;

-        repo.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Some(Lsn(0x40)))?;
+        repo.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Lsn(0x40))?;
        let newtline = repo
            .get_timeline_load(NEW_TIMELINE_ID)
            .expect("Should have a local timeline");
@@ -889,7 +906,7 @@ mod tests {
            make_some_layers(tline.as_ref(), Lsn(0x20))?;
            tline.checkpoint(CheckpointConfig::Forced)?;

-            repo.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Some(Lsn(0x40)))?;
+            repo.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Lsn(0x40))?;

            let newtline = repo
                .get_timeline_load(NEW_TIMELINE_ID)
--- a/pageserver/src/storage_sync.rs
+++ b/pageserver/src/storage_sync.rs
@@ -178,8 +178,9 @@ use crate::{
        metadata::{metadata_path, TimelineMetadata, METADATA_FILE_NAME},
        LayeredRepository,
    },
+    repository::TimelineSyncStatusUpdate,
    storage_sync::{self, index::RemoteIndex},
-    tenant_mgr::attach_downloaded_tenants,
+    tenant_mgr::apply_timeline_sync_status_updates,
    thread_mgr,
    thread_mgr::ThreadKind,
 };
@@ -190,8 +191,7 @@ use metrics::{
 };
 use utils::zid::{ZTenantId, ZTenantTimelineId, ZTimelineId};

-use self::download::download_index_parts;
-pub use self::download::gather_tenant_timelines_index_parts;
+pub use self::download::download_index_part;
 pub use self::download::TEMP_DOWNLOAD_EXTENSION;

 lazy_static! {
@@ -301,7 +301,7 @@ pub fn start_local_timeline_sync(
            }
            Ok(SyncStartupData {
                local_timeline_init_statuses,
-                remote_index: RemoteIndex::default(),
+                remote_index: RemoteIndex::empty(),
            })
        }
    }
@@ -835,7 +835,7 @@ where
        .build()
        .context("Failed to create storage sync runtime")?;

-    let applicable_index_parts = runtime.block_on(download_index_parts(
+    let applicable_index_parts = runtime.block_on(try_fetch_index_parts(
        conf,
        &storage,
        local_timeline_files.keys().copied().collect(),
@@ -918,48 +918,16 @@ fn storage_sync_loop<P, S>(
        });

        match loop_step {
-            ControlFlow::Continue(updated_tenants) => {
-                if updated_tenants.is_empty() {
-                    debug!("Sync loop step completed, no new tenant states");
+            ControlFlow::Continue(new_timeline_states) => {
+                if new_timeline_states.is_empty() {
+                    debug!("Sync loop step completed, no new timeline states");
                } else {
                    info!(
-                        "Sync loop step completed, {} new tenant state update(s)",
-                        updated_tenants.len()
+                        "Sync loop step completed, {} new timeline state update(s)",
+                        new_timeline_states.len()
                    );
-                    let mut sync_status_updates: HashMap<ZTenantId, HashSet<ZTimelineId>> =
-                        HashMap::new();
-                    let index_accessor = runtime.block_on(index.read());
-                    for tenant_id in updated_tenants {
-                        let tenant_entry = match index_accessor.tenant_entry(&tenant_id) {
-                            Some(tenant_entry) => tenant_entry,
-                            None => {
-                                error!(
-                                    "cannot find tenant in remote index for timeline sync update"
-                                );
-                                continue;
-                            }
-                        };
-
-                        if tenant_entry.has_in_progress_downloads() {
-                            info!("Tenant {tenant_id} has pending timeline downloads, skipping repository registration");
-                            continue;
-                        } else {
-                            info!(
-                                "Tenant {tenant_id} download completed. Picking to register in repository"
-                            );
-                            // Here we assume that if tenant has no in-progress downloads that
-                            // means that it is the last completed timeline download that triggered
-                            // sync status update. So we look at the index for available timelines
-                            // and register them all at once in a repository for download
-                            // to be submitted in a single operation to repository
-                            // so it can apply them at once to internal timeline map.
-                            sync_status_updates
-                                .insert(tenant_id, tenant_entry.keys().copied().collect());
-                        }
-                    }
-                    drop(index_accessor);
                    // Batch timeline download registration to ensure that the external registration code won't block any running tasks before.
-                    attach_downloaded_tenants(conf, &index, sync_status_updates);
+                    apply_timeline_sync_status_updates(conf, &index, new_timeline_states);
                }
            }
            ControlFlow::Break(()) => {
@@ -970,14 +938,6 @@ fn storage_sync_loop<P, S>(
    }
 }

-// needed to check whether the download happened
-// more informative than just a bool
-#[derive(Debug)]
-enum DownloadMarker {
-    Downloaded,
-    Nothing,
-}
-
 async fn process_batches<P, S>(
    conf: &'static PageServerConf,
    max_sync_errors: NonZeroU32,
@@ -985,7 +945,7 @@ async fn process_batches<P, S>(
    index: &RemoteIndex,
    batched_tasks: HashMap<ZTenantTimelineId, SyncTaskBatch>,
    sync_queue: &SyncQueue,
-) -> HashSet<ZTenantId>
+) -> HashMap<ZTenantId, HashMap<ZTimelineId, TimelineSyncStatusUpdate>>
 where
    P: Debug + Send + Sync + 'static,
    S: RemoteStorage<RemoteObjectId = P> + Send + Sync + 'static,
@@ -1010,19 +970,22 @@ where
        })
        .collect::<FuturesUnordered<_>>();

-    let mut downloaded_timelines = HashSet::new();
+    let mut new_timeline_states: HashMap<
+        ZTenantId,
+        HashMap<ZTimelineId, TimelineSyncStatusUpdate>,
+    > = HashMap::new();

-    while let Some((sync_id, download_marker)) = sync_results.next().await {
-        debug!(
-            "Finished storage sync task for sync id {sync_id} download marker {:?}",
-            download_marker
-        );
-        if matches!(download_marker, DownloadMarker::Downloaded) {
-            downloaded_timelines.insert(sync_id.tenant_id);
+    while let Some((sync_id, state_update)) = sync_results.next().await {
+        debug!("Finished storage sync task for sync id {sync_id}");
+        if let Some(state_update) = state_update {
+            new_timeline_states
+                .entry(sync_id.tenant_id)
+                .or_default()
+                .insert(sync_id.timeline_id, state_update);
        }
    }

-    downloaded_timelines
+    new_timeline_states
 }

 async fn process_sync_task_batch<P, S>(
@@ -1031,7 +994,7 @@ async fn process_sync_task_batch<P, S>(
    max_sync_errors: NonZeroU32,
    sync_id: ZTenantTimelineId,
    batch: SyncTaskBatch,
-) -> DownloadMarker
+) -> Option<TimelineSyncStatusUpdate>
 where
    P: Debug + Send + Sync + 'static,
    S: RemoteStorage<RemoteObjectId = P> + Send + Sync + 'static,
@@ -1116,7 +1079,7 @@ where
                    }
                }
            }
-            DownloadMarker::Nothing
+            None
        }
        .instrument(info_span!("download_timeline_data")),
    );
@@ -1170,7 +1133,7 @@ async fn download_timeline_data<P, S>(
    new_download_data: SyncData<LayersDownload>,
    sync_start: Instant,
    task_name: &str,
-) -> DownloadMarker
+) -> Option<TimelineSyncStatusUpdate>
 where
    P: Debug + Send + Sync + 'static,
    S: RemoteStorage<RemoteObjectId = P> + Send + Sync + 'static,
@@ -1199,7 +1162,7 @@ where
                Ok(()) => match index.write().await.set_awaits_download(&sync_id, false) {
                    Ok(()) => {
                        register_sync_status(sync_id, sync_start, task_name, Some(true));
-                        return DownloadMarker::Downloaded;
+                        return Some(TimelineSyncStatusUpdate::Downloaded);
                    }
                    Err(e) => {
                        error!("Timeline {sync_id} was expected to be in the remote index after a successful download, but it's absent: {e:?}");
@@ -1215,7 +1178,7 @@ where
        }
    }

-    DownloadMarker::Nothing
+    None
 }

 async fn update_local_metadata(
@@ -1495,6 +1458,35 @@ async fn validate_task_retries<T>(
    ControlFlow::Continue(sync_data)
 }

+async fn try_fetch_index_parts<P, S>(
+    conf: &'static PageServerConf,
+    storage: &S,
+    keys: HashSet<ZTenantTimelineId>,
+) -> HashMap<ZTenantTimelineId, IndexPart>
+where
+    P: Debug + Send + Sync + 'static,
+    S: RemoteStorage<RemoteObjectId = P> + Send + Sync + 'static,
+{
+    let mut index_parts = HashMap::with_capacity(keys.len());
+
+    let mut part_downloads = keys
+        .into_iter()
+        .map(|id| async move { (id, download_index_part(conf, storage, id).await) })
+        .collect::<FuturesUnordered<_>>();
+
+    while let Some((id, part_upload_result)) = part_downloads.next().await {
+        match part_upload_result {
+            Ok(index_part) => {
+                debug!("Successfully fetched index part for {id}");
+                index_parts.insert(id, index_part);
+            }
+            Err(e) => warn!("Failed to fetch index part for {id}: {e}"),
+        }
+    }
+
+    index_parts
+}
+
 fn schedule_first_sync_tasks(
    index: &mut RemoteTimelineIndex,
    sync_queue: &SyncQueue,
@@ -1557,7 +1549,6 @@ fn schedule_first_sync_tasks(
    local_timeline_init_statuses
 }

-/// bool in return value stands for awaits_download
 fn compare_local_and_remote_timeline(
    new_sync_tasks: &mut VecDeque<(ZTenantTimelineId, SyncTask)>,
    sync_id: ZTenantTimelineId,
@@ -1567,6 +1558,14 @@ fn compare_local_and_remote_timeline(
 ) -> (LocalTimelineInitStatus, bool) {
    let remote_files = remote_entry.stored_files();

+    // TODO probably here we need more sophisticated logic,
+    //   if more data is available remotely can we just download what's there?
+    //   without trying to upload something. It may be tricky, needs further investigation.
+    //   For now looks strange that we can request upload
+    //   and download for the same timeline simultaneously.
+    //   (upload needs to be only for previously unsynced files, not whole timeline dir).
+    //   If one of the tasks fails they will be reordered in the queue which can lead
+    //   to timeline being stuck in evicted state
    let number_of_layers_to_download = remote_files.difference(&local_files).count();
    let (initial_timeline_status, awaits_download) = if number_of_layers_to_download > 0 {
        new_sync_tasks.push_back((
--- a/pageserver/src/storage_sync/download.rs
+++ b/pageserver/src/storage_sync/download.rs
@@ -1,15 +1,10 @@
 //! Timeline synchronization logic to fetch the layer files from remote storage into pageserver's local directory.

-use std::{
-    collections::{HashMap, HashSet},
-    fmt::Debug,
-    mem,
-    path::Path,
-};
+use std::{collections::HashSet, fmt::Debug, path::Path};

 use anyhow::Context;
 use futures::stream::{FuturesUnordered, StreamExt};
-use remote_storage::{path_with_suffix_extension, DownloadError, RemoteObjectName, RemoteStorage};
+use remote_storage::{path_with_suffix_extension, RemoteStorage};
 use tokio::{
    fs,
    io::{self, AsyncWriteExt},
@@ -19,7 +14,7 @@ use tracing::{debug, error, info, warn};
 use crate::{
    config::PageServerConf, layered_repository::metadata::metadata_path, storage_sync::SyncTask,
 };
-use utils::zid::{ZTenantId, ZTenantTimelineId, ZTimelineId};
+use utils::zid::ZTenantTimelineId;

 use super::{
    index::{IndexPart, RemoteTimeline},
@@ -28,155 +23,12 @@ use super::{

 pub const TEMP_DOWNLOAD_EXTENSION: &str = "temp_download";

-// We collect timelines remotely available for each tenant
-// in case we failed to gather all index parts (due to an error)
-// Poisoned variant is returned.
-// When data is received succesfully without errors Present variant is used.
-pub enum TenantIndexParts {
-    Poisoned {
-        present: HashMap<ZTimelineId, IndexPart>,
-        missing: HashSet<ZTimelineId>,
-    },
-    Present(HashMap<ZTimelineId, IndexPart>),
-}
-
-impl TenantIndexParts {
-    fn add_poisoned(&mut self, timeline_id: ZTimelineId) {
-        match self {
-            TenantIndexParts::Poisoned { missing, .. } => {
-                missing.insert(timeline_id);
-            }
-            TenantIndexParts::Present(present) => {
-                *self = TenantIndexParts::Poisoned {
-                    present: mem::take(present),
-                    missing: HashSet::from([timeline_id]),
-                }
-            }
-        }
-    }
-}
-
-impl Default for TenantIndexParts {
-    fn default() -> Self {
-        TenantIndexParts::Present(HashMap::default())
-    }
-}
-
-pub async fn download_index_parts<P, S>(
-    conf: &'static PageServerConf,
-    storage: &S,
-    keys: HashSet<ZTenantTimelineId>,
-) -> HashMap<ZTenantId, TenantIndexParts>
-where
-    P: Debug + Send + Sync + 'static,
-    S: RemoteStorage<RemoteObjectId = P> + Send + Sync + 'static,
-{
-    let mut index_parts: HashMap<ZTenantId, TenantIndexParts> = HashMap::new();
-
-    let mut part_downloads = keys
-        .into_iter()
-        .map(|id| async move { (id, download_index_part(conf, storage, id).await) })
-        .collect::<FuturesUnordered<_>>();
-
-    while let Some((id, part_upload_result)) = part_downloads.next().await {
-        match part_upload_result {
-            Ok(index_part) => {
-                debug!("Successfully fetched index part for {id}");
-                match index_parts.entry(id.tenant_id).or_default() {
-                    TenantIndexParts::Poisoned { present, .. } => {
-                        present.insert(id.timeline_id, index_part);
-                    }
-                    TenantIndexParts::Present(parts) => {
-                        parts.insert(id.timeline_id, index_part);
-                    }
-                }
-            }
-            Err(download_error) => {
-                match download_error {
-                    DownloadError::NotFound => {
-                        // thats ok because it means that we didnt upload something we have locally for example
-                    }
-                    e => {
-                        let tenant_parts = index_parts.entry(id.tenant_id).or_default();
-                        tenant_parts.add_poisoned(id.timeline_id);
-                        error!(
-                            "Failed to fetch index part for {id}: {e} poisoning tenant index parts"
-                        );
-                    }
-                }
-            }
-        }
-    }
-
-    index_parts
-}
-
-/// Note: The function is rather expensive from s3 access point of view, it will execute ceil(N/1000) + N requests.
-/// At least one request to obtain a list of tenant timelines (more requests is there are more than 1000 timelines).
-/// And then will attempt to download all index files that belong to these timelines.
-pub async fn gather_tenant_timelines_index_parts<P, S>(
-    conf: &'static PageServerConf,
-    storage: &S,
-    tenant_id: ZTenantId,
-) -> anyhow::Result<HashMap<ZTimelineId, IndexPart>>
-where
-    P: RemoteObjectName + Debug + Send + Sync + 'static,
-    S: RemoteStorage<RemoteObjectId = P> + Send + Sync + 'static,
-{
-    let tenant_path = conf.timelines_path(&tenant_id);
-    let tenant_storage_path = storage.remote_object_id(&tenant_path).with_context(|| {
-        format!(
-            "Failed to get tenant storage path for local path '{}'",
-            tenant_path.display()
-        )
-    })?;
-    let timelines = storage
-        .list_prefixes(Some(tenant_storage_path))
-        .await
-        .with_context(|| {
-            format!(
-                "Failed to list tenant storage path to get remote timelines to download: {}",
-                tenant_id
-            )
-        })?;
-
-    let mut sync_ids = HashSet::new();
-
-    for timeline_remote_storage_key in timelines {
-        let object_name = timeline_remote_storage_key.object_name().ok_or_else(|| {
-            anyhow::anyhow!("failed to get timeline id for remote tenant {tenant_id}")
-        })?;
-
-        let timeline_id: ZTimelineId = object_name
-            .parse()
-            .with_context(|| {
-                format!("failed to parse object name into timeline id for tenant {tenant_id} '{object_name}'")
-            })?;
-
-        sync_ids.insert(ZTenantTimelineId {
-            tenant_id,
-            timeline_id,
-        });
-    }
-
-    match download_index_parts(conf, storage, sync_ids)
-        .await
-        .remove(&tenant_id)
-        .ok_or_else(|| anyhow::anyhow!("Missing tenant index parts. This is a bug."))?
-    {
-        TenantIndexParts::Poisoned { missing, .. } => {
-            anyhow::bail!("Failed to download index parts for all timelines. Missing {missing:?}")
-        }
-        TenantIndexParts::Present(parts) => Ok(parts),
-    }
-}
-
 /// Retrieves index data from the remote storage for a given timeline.
-async fn download_index_part<P, S>(
+pub async fn download_index_part<P, S>(
    conf: &'static PageServerConf,
    storage: &S,
    sync_id: ZTenantTimelineId,
-) -> Result<IndexPart, DownloadError>
+) -> anyhow::Result<IndexPart>
 where
    P: Debug + Send + Sync + 'static,
    S: RemoteStorage<RemoteObjectId = P> + Send + Sync + 'static,
@@ -191,11 +43,15 @@ where
                "Failed to get the index part storage path for local path '{}'",
                index_part_path.display()
            )
-        })
-        .map_err(DownloadError::BadInput)?;
-
-    let mut index_part_download = storage.download(&part_storage_path).await?;
+        })?;

+    let mut index_part_download =
+        storage
+            .download(&part_storage_path)
+            .await
+            .with_context(|| {
+                format!("Failed to open download stream for for storage path {part_storage_path:?}")
+            })?;
    let mut index_part_bytes = Vec::new();
    io::copy(
        &mut index_part_download.download_stream,
@@ -204,16 +60,11 @@ where
    .await
    .with_context(|| {
        format!("Failed to download an index part from storage path {part_storage_path:?}")
-    })
-    .map_err(DownloadError::Other)?;
+    })?;

-    let index_part: IndexPart = serde_json::from_slice(&index_part_bytes)
-        .with_context(|| {
-            format!(
-                "Failed to deserialize index part file from storage path '{part_storage_path:?}'"
-            )
-        })
-        .map_err(DownloadError::Other)?;
+    let index_part: IndexPart = serde_json::from_slice(&index_part_bytes).with_context(|| {
+        format!("Failed to deserialize index part file from storage path '{part_storage_path:?}'")
+    })?;

    let missing_files = index_part.missing_files();
    if !missing_files.is_empty() {
--- a/pageserver/src/storage_sync/index.rs
+++ b/pageserver/src/storage_sync/index.rs
@@ -2,7 +2,6 @@
 //! Able to restore itself from the storage index parts, that are located in every timeline's remote directory and contain all data about
 //! remote timeline layers and its metadata.

-use std::ops::{Deref, DerefMut};
 use std::{
    collections::{HashMap, HashSet},
    path::{Path, PathBuf},
@@ -13,15 +12,9 @@ use anyhow::{anyhow, Context, Ok};
 use serde::{Deserialize, Serialize};
 use serde_with::{serde_as, DisplayFromStr};
 use tokio::sync::RwLock;
-use tracing::log::warn;

 use crate::{config::PageServerConf, layered_repository::metadata::TimelineMetadata};
-use utils::{
-    lsn::Lsn,
-    zid::{ZTenantId, ZTenantTimelineId, ZTimelineId},
-};
-
-use super::download::TenantIndexParts;
+use utils::{lsn::Lsn, zid::ZTenantTimelineId};

 /// A part of the filesystem path, that needs a root to become a path again.
 #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
@@ -48,74 +41,38 @@ impl RelativePath {
    }
 }

-#[derive(Debug, Clone, Default)]
-pub struct TenantEntry(HashMap<ZTimelineId, RemoteTimeline>);
-
-impl TenantEntry {
-    pub fn has_in_progress_downloads(&self) -> bool {
-        self.values()
-            .any(|remote_timeline| remote_timeline.awaits_download)
-    }
-}
-
-impl Deref for TenantEntry {
-    type Target = HashMap<ZTimelineId, RemoteTimeline>;
-
-    fn deref(&self) -> &Self::Target {
-        &self.0
-    }
-}
-
-impl DerefMut for TenantEntry {
-    fn deref_mut(&mut self) -> &mut Self::Target {
-        &mut self.0
-    }
-}
-
-impl From<HashMap<ZTimelineId, RemoteTimeline>> for TenantEntry {
-    fn from(inner: HashMap<ZTimelineId, RemoteTimeline>) -> Self {
-        Self(inner)
-    }
-}
-
 /// An index to track tenant files that exist on the remote storage.
-#[derive(Debug, Clone, Default)]
+#[derive(Debug, Clone)]
 pub struct RemoteTimelineIndex {
-    entries: HashMap<ZTenantId, TenantEntry>,
+    timeline_entries: HashMap<ZTenantTimelineId, RemoteTimeline>,
 }

 /// A wrapper to synchronize the access to the index, should be created and used before dealing with any [`RemoteTimelineIndex`].
-#[derive(Default)]
 pub struct RemoteIndex(Arc<RwLock<RemoteTimelineIndex>>);

 impl RemoteIndex {
+    pub fn empty() -> Self {
+        Self(Arc::new(RwLock::new(RemoteTimelineIndex {
+            timeline_entries: HashMap::new(),
+        })))
+    }
+
    pub fn from_parts(
        conf: &'static PageServerConf,
-        index_parts: HashMap<ZTenantId, TenantIndexParts>,
+        index_parts: HashMap<ZTenantTimelineId, IndexPart>,
    ) -> anyhow::Result<Self> {
-        let mut entries: HashMap<ZTenantId, TenantEntry> = HashMap::new();
+        let mut timeline_entries = HashMap::new();

-        for (tenant_id, index_parts) in index_parts {
-            match index_parts {
-                // TODO: should we schedule a retry so it can be recovered? otherwise we can revive it only through detach/attach or pageserver restart
-                TenantIndexParts::Poisoned { missing, ..} => warn!("skipping tenant_id set up for remote index because the index download has failed for timeline(s): {missing:?}"),
-                TenantIndexParts::Present(timelines) => {
-                    for (timeline_id, index_part) in timelines {
-                        let timeline_path = conf.timeline_path(&timeline_id, &tenant_id);
-                        let remote_timeline =
-                            RemoteTimeline::from_index_part(&timeline_path, index_part)
-                                .context("Failed to restore remote timeline data from index part")?;
-
-                        entries
-                            .entry(tenant_id)
-                            .or_default()
-                            .insert(timeline_id, remote_timeline);
-                    }
-                },
-            }
+        for (sync_id, index_part) in index_parts {
+            let timeline_path = conf.timeline_path(&sync_id.timeline_id, &sync_id.tenant_id);
+            let remote_timeline = RemoteTimeline::from_index_part(&timeline_path, index_part)
+                .context("Failed to restore remote timeline data from index part")?;
+            timeline_entries.insert(sync_id, remote_timeline);
        }

-        Ok(Self(Arc::new(RwLock::new(RemoteTimelineIndex { entries }))))
+        Ok(Self(Arc::new(RwLock::new(RemoteTimelineIndex {
+            timeline_entries,
+        }))))
    }

    pub async fn read(&self) -> tokio::sync::RwLockReadGuard<'_, RemoteTimelineIndex> {
@@ -134,67 +91,20 @@ impl Clone for RemoteIndex {
 }

 impl RemoteTimelineIndex {
-    pub fn timeline_entry(
-        &self,
-        ZTenantTimelineId {
-            tenant_id,
-            timeline_id,
-        }: &ZTenantTimelineId,
-    ) -> Option<&RemoteTimeline> {
-        self.entries.get(tenant_id)?.get(timeline_id)
+    pub fn timeline_entry(&self, id: &ZTenantTimelineId) -> Option<&RemoteTimeline> {
+        self.timeline_entries.get(id)
    }

-    pub fn timeline_entry_mut(
-        &mut self,
-        ZTenantTimelineId {
-            tenant_id,
-            timeline_id,
-        }: &ZTenantTimelineId,
-    ) -> Option<&mut RemoteTimeline> {
-        self.entries.get_mut(tenant_id)?.get_mut(timeline_id)
+    pub fn timeline_entry_mut(&mut self, id: &ZTenantTimelineId) -> Option<&mut RemoteTimeline> {
+        self.timeline_entries.get_mut(id)
    }

-    pub fn add_timeline_entry(
-        &mut self,
-        ZTenantTimelineId {
-            tenant_id,
-            timeline_id,
-        }: ZTenantTimelineId,
-        entry: RemoteTimeline,
-    ) {
-        self.entries
-            .entry(tenant_id)
-            .or_default()
-            .insert(timeline_id, entry);
+    pub fn add_timeline_entry(&mut self, id: ZTenantTimelineId, entry: RemoteTimeline) {
+        self.timeline_entries.insert(id, entry);
    }

-    pub fn remove_timeline_entry(
-        &mut self,
-        ZTenantTimelineId {
-            tenant_id,
-            timeline_id,
-        }: ZTenantTimelineId,
-    ) -> Option<RemoteTimeline> {
-        self.entries
-            .entry(tenant_id)
-            .or_default()
-            .remove(&timeline_id)
-    }
-
-    pub fn tenant_entry(&self, tenant_id: &ZTenantId) -> Option<&TenantEntry> {
-        self.entries.get(tenant_id)
-    }
-
-    pub fn tenant_entry_mut(&mut self, tenant_id: &ZTenantId) -> Option<&mut TenantEntry> {
-        self.entries.get_mut(tenant_id)
-    }
-
-    pub fn add_tenant_entry(&mut self, tenant_id: ZTenantId) -> &mut TenantEntry {
-        self.entries.entry(tenant_id).or_default()
-    }
-
-    pub fn remove_tenant_entry(&mut self, tenant_id: &ZTenantId) -> Option<TenantEntry> {
-        self.entries.remove(tenant_id)
+    pub fn all_sync_ids(&self) -> impl Iterator<Item = ZTenantTimelineId> + '_ {
+        self.timeline_entries.keys().copied()
    }

    pub fn set_awaits_download(
--- a/pageserver/src/tenant_mgr.rs
+++ b/pageserver/src/tenant_mgr.rs
@@ -4,8 +4,8 @@
 use crate::config::PageServerConf;
 use crate::layered_repository::{load_metadata, LayeredRepository};
 use crate::pgdatadir_mapping::DatadirTimeline;
-use crate::repository::Repository;
-use crate::storage_sync::index::{RemoteIndex, RemoteTimelineIndex};
+use crate::repository::{Repository, TimelineSyncStatusUpdate};
+use crate::storage_sync::index::RemoteIndex;
 use crate::storage_sync::{self, LocalTimelineInitStatus, SyncStartupData};
 use crate::tenant_config::TenantConfOpt;
 use crate::thread_mgr::ThreadKind;
@@ -13,11 +13,11 @@ use crate::timelines::CreateRepo;
 use crate::walredo::PostgresRedoManager;
 use crate::{thread_mgr, timelines, walreceiver};
 use crate::{DatadirTimelineImpl, RepositoryImpl};
-use anyhow::Context;
+use anyhow::{bail, Context};
 use serde::{Deserialize, Serialize};
 use serde_with::{serde_as, DisplayFromStr};
 use std::collections::hash_map::Entry;
-use std::collections::{HashMap, HashSet};
+use std::collections::HashMap;
 use std::fmt;
 use std::sync::Arc;
 use tokio::sync::mpsc;
@@ -157,13 +157,7 @@ pub fn init_tenant_mgr(conf: &'static PageServerConf) -> anyhow::Result<RemoteIn
            // loading a tenant is serious, but it's better to complete the startup and
            // serve other tenants, than fail completely.
            error!("Failed to initialize local tenant {tenant_id}: {:?}", err);
-
-            if let Err(err) = set_tenant_state(tenant_id, TenantState::Broken) {
-                error!(
-                    "Failed to set tenant state to broken {tenant_id}: {:?}",
-                    err
-                );
-            }
+            set_tenant_state(tenant_id, TenantState::Broken)?;
        }
    }

@@ -171,51 +165,44 @@ pub fn init_tenant_mgr(conf: &'static PageServerConf) -> anyhow::Result<RemoteIn
 }

 pub enum LocalTimelineUpdate {
-    Detach {
-        id: ZTenantTimelineId,
-        // used to signal to the detach caller that walreceiver successfully terminated for specified id
-        join_confirmation_sender: std::sync::mpsc::Sender<()>,
-    },
-    Attach {
-        id: ZTenantTimelineId,
-        datadir: Arc<DatadirTimelineImpl>,
-    },
+    Detach(ZTenantTimelineId),
+    Attach(ZTenantTimelineId, Arc<DatadirTimelineImpl>),
 }

 impl std::fmt::Debug for LocalTimelineUpdate {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
-            Self::Detach { id, .. } => f.debug_tuple("Remove").field(id).finish(),
-            Self::Attach { id, .. } => f.debug_tuple("Add").field(id).finish(),
+            Self::Detach(ttid) => f.debug_tuple("Remove").field(ttid).finish(),
+            Self::Attach(ttid, _) => f.debug_tuple("Add").field(ttid).finish(),
        }
    }
 }

 /// Updates tenants' repositories, changing their timelines state in memory.
-pub fn attach_downloaded_tenants(
+pub fn apply_timeline_sync_status_updates(
    conf: &'static PageServerConf,
    remote_index: &RemoteIndex,
-    sync_status_updates: HashMap<ZTenantId, HashSet<ZTimelineId>>,
+    sync_status_updates: HashMap<ZTenantId, HashMap<ZTimelineId, TimelineSyncStatusUpdate>>,
 ) {
    if sync_status_updates.is_empty() {
-        debug!("No sync status updates to apply");
+        debug!("no sync status updates to apply");
        return;
    }
-    for (tenant_id, downloaded_timelines) in sync_status_updates {
-        info!(
-            "Registering downlloaded timelines for {tenant_id} {} timelines",
-            downloaded_timelines.len()
-        );
-        debug!("Downloaded timelines: {downloaded_timelines:?}");
+    info!(
+        "Applying sync status updates for {} timelines",
+        sync_status_updates.len()
+    );
+    debug!("Sync status updates: {sync_status_updates:?}");

+    for (tenant_id, status_updates) in sync_status_updates {
        let repo = match load_local_repo(conf, tenant_id, remote_index) {
            Ok(repo) => repo,
            Err(e) => {
-                error!("Failed to load repo for tenant {tenant_id} Error: {e:?}");
+                error!("Failed to load repo for tenant {tenant_id} Error: {e:?}",);
                continue;
            }
        };
-        match attach_downloaded_tenant(&repo, downloaded_timelines) {
+        match apply_timeline_remote_sync_status_updates(&repo, status_updates) {
            Ok(()) => info!("successfully applied sync status updates for tenant {tenant_id}"),
            Err(e) => error!(
                "Failed to apply timeline sync timeline status updates for tenant {tenant_id}: {e:?}"
@@ -400,86 +387,33 @@ pub fn get_local_timeline_with_load(
    }
 }

-pub fn delete_timeline(tenant_id: ZTenantId, timeline_id: ZTimelineId) -> anyhow::Result<()> {
-    // Start with the shutdown of timeline tasks (this shuts down the walreceiver)
-    // It is important that we do not take locks here, and do not check whether the timeline exists
-    // because if we hold tenants_state::write_tenants() while awaiting for the threads to join
-    // we cannot create new timelines and tenants, and that can take quite some time,
-    // it can even become stuck due to a bug making whole pageserver unavailable for some operations
-    // so this is the way how we deal with concurrent delete requests: shutdown everythig, wait for confirmation
-    // and then try to actually remove timeline from inmemory state and this is the point when concurrent requests
-    // will synchronize and either fail with the not found error or succeed
+pub fn detach_timeline(
+    conf: &'static PageServerConf,
+    tenant_id: ZTenantId,
+    timeline_id: ZTimelineId,
+) -> anyhow::Result<()> {
+    // shutdown the timeline threads (this shuts down the walreceiver)
+    thread_mgr::shutdown_threads(None, Some(tenant_id), Some(timeline_id));

-    let (sender, receiver) = std::sync::mpsc::channel::<()>();
-    tenants_state::try_send_timeline_update(LocalTimelineUpdate::Detach {
-        id: ZTenantTimelineId::new(tenant_id, timeline_id),
-        join_confirmation_sender: sender,
-    });
-
-    debug!("waiting for wal receiver to shutdown");
-    let _ = receiver.recv();
-    debug!("wal receiver shutdown confirmed");
-    debug!("waiting for threads to shutdown");
-    thread_mgr::shutdown_threads(None, None, Some(timeline_id));
-    debug!("thread shutdown completed");
    match tenants_state::write_tenants().get_mut(&tenant_id) {
        Some(tenant) => {
-            tenant.repo.delete_timeline(timeline_id)?;
+            tenant
+                .repo
+                .detach_timeline(timeline_id)
+                .context("Failed to detach inmem tenant timeline")?;
            tenant.local_timelines.remove(&timeline_id);
+            tenants_state::try_send_timeline_update(LocalTimelineUpdate::Detach(
+                ZTenantTimelineId::new(tenant_id, timeline_id),
+            ));
        }
-        None => anyhow::bail!("Tenant {tenant_id} not found in local tenant state"),
+        None => bail!("Tenant {tenant_id} not found in local tenant state"),
    }

-    Ok(())
-}
-
-pub fn detach_tenant(conf: &'static PageServerConf, tenant_id: ZTenantId) -> anyhow::Result<()> {
-    set_tenant_state(tenant_id, TenantState::Stopping)?;
-    // shutdown the tenant and timeline threads: gc, compaction, page service threads)
-    thread_mgr::shutdown_threads(None, Some(tenant_id), None);
-
-    // FIXME should we protect somehow from starting new threads/walreceivers when tenant is in stopping state?
-    // send stop signal to wal receiver and collect join handles while holding the lock
-    let walreceiver_join_handles = {
-        let tenants = tenants_state::write_tenants();
-        let tenant = tenants.get(&tenant_id).context("tenant not found")?;
-        let mut walreceiver_join_handles = Vec::with_capacity(tenant.local_timelines.len());
-        for timeline_id in tenant.local_timelines.keys() {
-            let (sender, receiver) = std::sync::mpsc::channel::<()>();
-            tenants_state::try_send_timeline_update(LocalTimelineUpdate::Detach {
-                id: ZTenantTimelineId::new(tenant_id, *timeline_id),
-                join_confirmation_sender: sender,
-            });
-            walreceiver_join_handles.push((*timeline_id, receiver));
-        }
-        // drop the tenants lock
-        walreceiver_join_handles
-    };
-
-    // wait for wal receivers to stop without holding the lock, because walreceiver
-    // will attempt to change tenant state which is protected by the same global tenants lock.
-    // TODO do we need a timeout here? how to handle it?
-    // recv_timeout is broken: https://github.com/rust-lang/rust/issues/94518#issuecomment-1057440631
-    // need to use crossbeam-channel
-    for (timeline_id, join_handle) in walreceiver_join_handles {
-        info!("waiting for wal receiver to shutdown timeline_id {timeline_id}");
-        join_handle.recv().context("failed to join walreceiver")?;
-        info!("wal receiver shutdown confirmed timeline_id {timeline_id}");
-    }
-
-    tenants_state::write_tenants().remove(&tenant_id);
-
-    // If removal fails there will be no way to successfully retry detach,
-    // because tenant no longer exists in in memory map. And it needs to be removed from it
-    // before we remove files because it contains references to repository
-    // which references ephemeral files which are deleted on drop. So if we keep these references
-    // code will attempt to remove files which no longer exist. This can be fixed by having shutdown
-    // mechanism for repository that will clean temporary data to avoid any references to ephemeral files
-    let local_tenant_directory = conf.tenant_path(&tenant_id);
-    std::fs::remove_dir_all(&local_tenant_directory).with_context(|| {
+    let local_timeline_directory = conf.timeline_path(&timeline_id, &tenant_id);
+    std::fs::remove_dir_all(&local_timeline_directory).with_context(|| {
        format!(
            "Failed to remove local timeline directory '{}'",
-            local_tenant_directory.display()
+            local_timeline_directory.display()
        )
    })?;

@@ -500,10 +434,10 @@ fn load_local_timeline(
    ));
    page_tline.init_logical_size()?;

-    tenants_state::try_send_timeline_update(LocalTimelineUpdate::Attach {
-        id: ZTenantTimelineId::new(repo.tenant_id(), timeline_id),
-        datadir: Arc::clone(&page_tline),
-    });
+    tenants_state::try_send_timeline_update(LocalTimelineUpdate::Attach(
+        ZTenantTimelineId::new(repo.tenant_id(), timeline_id),
+        Arc::clone(&page_tline),
+    ));

    Ok(page_tline)
 }
@@ -513,27 +447,15 @@ fn load_local_timeline(
 pub struct TenantInfo {
    #[serde_as(as = "DisplayFromStr")]
    pub id: ZTenantId,
-    pub state: Option<TenantState>,
-    pub has_in_progress_downloads: Option<bool>,
+    pub state: TenantState,
 }

-pub fn list_tenants(remote_index: &RemoteTimelineIndex) -> Vec<TenantInfo> {
+pub fn list_tenants() -> Vec<TenantInfo> {
    tenants_state::read_tenants()
        .iter()
-        .map(|(id, tenant)| {
-            let has_in_progress_downloads = remote_index
-                .tenant_entry(id)
-                .map(|entry| entry.has_in_progress_downloads());
-
-            if has_in_progress_downloads.is_none() {
-                error!("timeline is not found in remote index while it is present in the tenants registry")
-            }
-
-            TenantInfo {
-                id: *id,
-                state: Some(tenant.state),
-                has_in_progress_downloads,
-            }
+        .map(|(id, tenant)| TenantInfo {
+            id: *id,
+            state: tenant.state,
        })
        .collect()
 }
@@ -545,73 +467,74 @@ pub fn list_tenants(remote_index: &RemoteTimelineIndex) -> Vec<TenantInfo> {
 /// A timeline is categorized as broken when any of following conditions is true:
 /// - failed to load the timeline's metadata
 /// - the timeline's disk consistent LSN is zero
-fn check_broken_timeline(
-    conf: &'static PageServerConf,
-    tenant_id: ZTenantId,
-    timeline_id: ZTimelineId,
-) -> anyhow::Result<()> {
-    let metadata =
-        load_metadata(conf, timeline_id, tenant_id).context("failed to load metadata")?;
+fn check_broken_timeline(repo: &LayeredRepository, timeline_id: ZTimelineId) -> anyhow::Result<()> {
+    let metadata = load_metadata(repo.conf, timeline_id, repo.tenant_id())
+        .context("failed to load metadata")?;

    // A timeline with zero disk consistent LSN can happen when the page server
    // failed to checkpoint the timeline import data when creating that timeline.
    if metadata.disk_consistent_lsn() == Lsn::INVALID {
-        anyhow::bail!("Timeline {timeline_id} has a zero disk consistent LSN.");
+        bail!("Timeline {timeline_id} has a zero disk consistent LSN.");
    }

    Ok(())
 }

-/// Note: all timelines are attached at once if and only if all of them are locally complete
 fn init_local_repository(
    conf: &'static PageServerConf,
    tenant_id: ZTenantId,
    local_timeline_init_statuses: HashMap<ZTimelineId, LocalTimelineInitStatus>,
    remote_index: &RemoteIndex,
 ) -> anyhow::Result<(), anyhow::Error> {
-    let mut timelines_to_attach = HashSet::new();
+    // initialize local tenant
+    let repo = load_local_repo(conf, tenant_id, remote_index)
+        .with_context(|| format!("Failed to load repo for tenant {tenant_id}"))?;
+
+    let mut status_updates = HashMap::with_capacity(local_timeline_init_statuses.len());
    for (timeline_id, init_status) in local_timeline_init_statuses {
        match init_status {
            LocalTimelineInitStatus::LocallyComplete => {
                debug!("timeline {timeline_id} for tenant {tenant_id} is locally complete, registering it in repository");
-                check_broken_timeline(conf, tenant_id, timeline_id)
-                    .context("found broken timeline")?;
-                timelines_to_attach.insert(timeline_id);
+                if let Err(err) = check_broken_timeline(&repo, timeline_id) {
+                    info!(
+                        "Found a broken timeline {timeline_id} (err={err:?}), skip registering it in repository"
+                    );
+                } else {
+                    status_updates.insert(timeline_id, TimelineSyncStatusUpdate::Downloaded);
+                }
            }
            LocalTimelineInitStatus::NeedsSync => {
                debug!(
                    "timeline {tenant_id} for tenant {timeline_id} needs sync, \
                     so skipped for adding into repository until sync is finished"
                );
-                return Ok(());
            }
        }
    }

-    // initialize local tenant
-    let repo = load_local_repo(conf, tenant_id, remote_index)
-        .with_context(|| format!("Failed to load repo for tenant {tenant_id}"))?;
-
    // Lets fail here loudly to be on the safe side.
    // XXX: It may be a better api to actually distinguish between repository startup
    //   and processing of newly downloaded timelines.
-    attach_downloaded_tenant(&repo, timelines_to_attach)
+    apply_timeline_remote_sync_status_updates(&repo, status_updates)
        .with_context(|| format!("Failed to bootstrap timelines for tenant {tenant_id}"))?;
    Ok(())
 }

-fn attach_downloaded_tenant(
+fn apply_timeline_remote_sync_status_updates(
    repo: &LayeredRepository,
-    downloaded_timelines: HashSet<ZTimelineId>,
+    status_updates: HashMap<ZTimelineId, TimelineSyncStatusUpdate>,
 ) -> anyhow::Result<()> {
-    let mut registration_queue = Vec::with_capacity(downloaded_timelines.len());
+    let mut registration_queue = Vec::with_capacity(status_updates.len());

    // first need to register the in-mem representations, to avoid missing ancestors during the local disk data registration
-    for timeline_id in downloaded_timelines {
-        repo.attach_timeline(timeline_id).with_context(|| {
-            format!("Failed to load timeline {timeline_id} into in-memory repository")
-        })?;
-        registration_queue.push(timeline_id);
+    for (timeline_id, status_update) in status_updates {
+        repo.apply_timeline_remote_sync_status_update(timeline_id, status_update)
+            .with_context(|| {
+                format!("Failed to load timeline {timeline_id} into in-memory repository")
+            })?;
+        match status_update {
+            TimelineSyncStatusUpdate::Downloaded => registration_queue.push(timeline_id),
+        }
    }

    for timeline_id in registration_queue {
@@ -619,7 +542,7 @@ fn attach_downloaded_tenant(
        match tenants_state::write_tenants().get_mut(&tenant_id) {
            Some(tenant) => match tenant.local_timelines.entry(timeline_id) {
                Entry::Occupied(_) => {
-                    anyhow::bail!("Local timeline {timeline_id} already registered")
+                    bail!("Local timeline {timeline_id} already registered")
                }
                Entry::Vacant(v) => {
                    v.insert(load_local_timeline(repo, timeline_id).with_context(|| {
@@ -627,7 +550,7 @@ fn attach_downloaded_tenant(
                    })?);
                }
            },
-            None => anyhow::bail!(
+            None => bail!(
                "Tenant {} not found in local tenant state",
                repo.tenant_id()
            ),
--- a/pageserver/src/timelines.rs
+++ b/pageserver/src/timelines.rs
@@ -202,7 +202,7 @@ pub fn create_repo(
            // anymore, but I think that could still happen.
            let wal_redo_manager = Arc::new(crate::walredo::DummyRedoManager {});

-            (wal_redo_manager as _, RemoteIndex::default())
+            (wal_redo_manager as _, RemoteIndex::empty())
        }
    };

@@ -347,7 +347,7 @@ pub(crate) fn create_timeline(
    tenant_id: ZTenantId,
    new_timeline_id: Option<ZTimelineId>,
    ancestor_timeline_id: Option<ZTimelineId>,
-    mut ancestor_start_lsn: Option<Lsn>,
+    ancestor_start_lsn: Option<Lsn>,
 ) -> Result<Option<TimelineInfo>> {
    let new_timeline_id = new_timeline_id.unwrap_or_else(ZTimelineId::generate);
    let repo = tenant_mgr::get_repository_for_tenant(tenant_id)?;
@@ -357,35 +357,41 @@ pub(crate) fn create_timeline(
        return Ok(None);
    }

+    let mut start_lsn = ancestor_start_lsn.unwrap_or(Lsn(0));
+
    let new_timeline_info = match ancestor_timeline_id {
        Some(ancestor_timeline_id) => {
            let ancestor_timeline = repo
                .get_timeline_load(ancestor_timeline_id)
                .context("Cannot branch off the timeline that's not present locally")?;

-            if let Some(lsn) = ancestor_start_lsn.as_mut() {
+            if start_lsn == Lsn(0) {
+                // Find end of WAL on the old timeline
+                let end_of_wal = ancestor_timeline.get_last_record_lsn();
+                info!("branching at end of WAL: {}", end_of_wal);
+                start_lsn = end_of_wal;
+            } else {
                // Wait for the WAL to arrive and be processed on the parent branch up
                // to the requested branch point. The repository code itself doesn't
                // require it, but if we start to receive WAL on the new timeline,
                // decoding the new WAL might need to look up previous pages, relation
                // sizes etc. and that would get confused if the previous page versions
                // are not in the repository yet.
-                *lsn = lsn.align();
-                ancestor_timeline.wait_lsn(*lsn)?;
+                ancestor_timeline.wait_lsn(start_lsn)?;
+            }
+            start_lsn = start_lsn.align();

-                let ancestor_ancestor_lsn = ancestor_timeline.get_ancestor_lsn();
-                if ancestor_ancestor_lsn > *lsn {
-                    // can we safely just branch from the ancestor instead?
-                    anyhow::bail!(
+            let ancestor_ancestor_lsn = ancestor_timeline.get_ancestor_lsn();
+            if ancestor_ancestor_lsn > start_lsn {
+                // can we safely just branch from the ancestor instead?
+                anyhow::bail!(
                    "invalid start lsn {} for ancestor timeline {}: less than timeline ancestor lsn {}",
-                    lsn,
+                    start_lsn,
                    ancestor_timeline_id,
                    ancestor_ancestor_lsn,
                );
-                }
            }
-
-            repo.branch_timeline(ancestor_timeline_id, new_timeline_id, ancestor_start_lsn)?;
+            repo.branch_timeline(ancestor_timeline_id, new_timeline_id, start_lsn)?;
            // load the timeline into memory
            let loaded_timeline =
                tenant_mgr::get_local_timeline_with_load(tenant_id, new_timeline_id)?;
--- a/pageserver/src/walreceiver.rs
+++ b/pageserver/src/walreceiver.rs
@@ -264,10 +264,7 @@ async fn wal_receiver_main_thread_loop_step<'a>(
            info!("Processing timeline update: {update:?}");
            match update {
                // Timeline got detached, stop all related tasks and remove public timeline data.
-                LocalTimelineUpdate::Detach {
-                    id,
-                    join_confirmation_sender,
-                } => {
+                LocalTimelineUpdate::Detach(id) => {
                    match local_timeline_wal_receivers.get_mut(&id.tenant_id) {
                        Some(wal_receivers) => {
                            if let hash_map::Entry::Occupied(o) = wal_receivers.entry(id.timeline_id) {
@@ -283,48 +280,44 @@ async fn wal_receiver_main_thread_loop_step<'a>(
                    };
                    {
                        WAL_RECEIVER_ENTRIES.write().await.remove(&id);
-                        if let Err(e) = join_confirmation_sender.send(()) {
-                            warn!("cannot send wal_receiver shutdown confirmation {e}")
-                        } else {
-                            info!("confirm walreceiver shutdown for {id}");
-                        }
                    }
                }
                // Timeline got attached, retrieve all necessary information to start its broker loop and maintain this loop endlessly.
-                LocalTimelineUpdate::Attach { id, datadir } => {
+                LocalTimelineUpdate::Attach(new_id, new_timeline) => {
                    let timeline_connection_managers = local_timeline_wal_receivers
-                        .entry(id.tenant_id)
+                        .entry(new_id.tenant_id)
                        .or_default();

                    if timeline_connection_managers.is_empty() {
-                        if let Err(e) = change_tenant_state(id.tenant_id, TenantState::Active).await
+                        if let Err(e) =
+                            change_tenant_state(new_id.tenant_id, TenantState::Active).await
                        {
-                            error!("Failed to make tenant active for id {id}: {e:#}");
+                            error!("Failed to make tenant active for id {new_id}: {e:#}");
                            return;
                        }
                    }

                    let vacant_connection_manager_entry =
-                        match timeline_connection_managers.entry(id.timeline_id) {
+                        match timeline_connection_managers.entry(new_id.timeline_id) {
                            hash_map::Entry::Occupied(_) => {
-                                debug!("Attepted to readd an existing timeline {id}, ignoring");
+                                debug!("Attepted to readd an existing timeline {new_id}, ignoring");
                                return;
                            }
                            hash_map::Entry::Vacant(v) => v,
                        };

                    let (wal_connect_timeout, lagging_wal_timeout, max_lsn_wal_lag) =
-                        match fetch_tenant_settings(id.tenant_id).await {
+                        match fetch_tenant_settings(new_id.tenant_id).await {
                            Ok(settings) => settings,
                            Err(e) => {
-                                error!("Failed to fetch tenant settings for id {id}: {e:#}");
+                                error!("Failed to fetch tenant settings for id {new_id}: {e:#}");
                                return;
                            }
                        };

                    {
                        WAL_RECEIVER_ENTRIES.write().await.insert(
-                            id,
+                            new_id,
                            WalReceiverEntry {
                                wal_producer_connstr: None,
                                last_received_msg_lsn: None,
@@ -335,10 +328,10 @@ async fn wal_receiver_main_thread_loop_step<'a>(

                    vacant_connection_manager_entry.insert(
                        connection_manager::spawn_connection_manager_task(
-                            id,
+                            new_id,
                            broker_prefix.to_owned(),
                            etcd_client.clone(),
-                            datadir,
+                            new_timeline,
                            wal_connect_timeout,
                            lagging_wal_timeout,
                            max_lsn_wal_lag,
--- a/pageserver/src/walredo.rs
+++ b/pageserver/src/walredo.rs
@@ -623,7 +623,6 @@ impl PostgresRedoProcess {
            .env_clear()
            .env("LD_LIBRARY_PATH", conf.pg_lib_dir())
            .env("DYLD_LIBRARY_PATH", conf.pg_lib_dir())
-            .close_fds()
            .output()
            .map_err(|e| Error::new(e.kind(), format!("failed to execute initdb: {}", e)))?;

--- a/poetry.lock
+++ b/poetry.lock
@@ -544,21 +544,20 @@ test = ["pytest (>=6.2.0)", "pytest-cov", "pytest-subtests", "pytest-xdist", "pr

 [[package]]
 name = "docker"
-version = "4.2.2"
+version = "5.0.3"
 description = "A Python library for the Docker Engine API."
 category = "main"
 optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
+python-versions = ">=3.6"

 [package.dependencies]
-pypiwin32 = {version = "223", markers = "sys_platform == \"win32\" and python_version >= \"3.6\""}
+pywin32 = {version = "227", markers = "sys_platform == \"win32\""}
 requests = ">=2.14.2,<2.18.0 || >2.18.0"
-six = ">=1.4.0"
 websocket-client = ">=0.32.0"

 [package.extras]
 ssh = ["paramiko (>=2.4.2)"]
-tls = ["pyOpenSSL (>=17.5.0)", "cryptography (>=1.3.4)", "idna (>=2.0.0)"]
+tls = ["pyOpenSSL (>=17.5.0)", "cryptography (>=3.4.7)", "idna (>=2.0.0)"]

 [[package]]
 name = "ecdsa"
@@ -1004,17 +1003,6 @@ python-versions = ">=3.6"
 [package.extras]
 diagrams = ["jinja2", "railroad-diagrams"]

-[[package]]
-name = "pypiwin32"
-version = "223"
-description = ""
-category = "main"
-optional = false
-python-versions = "*"
-
-[package.dependencies]
-pywin32 = ">=223"
-
 [[package]]
 name = "pyrsistent"
 version = "0.18.1"
@@ -1136,7 +1124,7 @@ python-versions = "*"

 [[package]]
 name = "pywin32"
-version = "301"
+version = "227"
 description = "Python for Window Extensions"
 category = "main"
 optional = false
@@ -1513,8 +1501,8 @@ cryptography = [
    {file = "cryptography-36.0.1.tar.gz", hash = "sha256:53e5c1dc3d7a953de055d77bef2ff607ceef7a2aac0353b5d630ab67f7423638"},
 ]
 docker = [
-    {file = "docker-4.2.2-py2.py3-none-any.whl", hash = "sha256:03a46400c4080cb6f7aa997f881ddd84fef855499ece219d75fbdb53289c17ab"},
-    {file = "docker-4.2.2.tar.gz", hash = "sha256:26eebadce7e298f55b76a88c4f8802476c5eaddbdbe38dbc6cce8781c47c9b54"},
+    {file = "docker-5.0.3-py2.py3-none-any.whl", hash = "sha256:7a79bb439e3df59d0a72621775d600bc8bc8b422d285824cb37103eab91d1ce0"},
+    {file = "docker-5.0.3.tar.gz", hash = "sha256:d916a26b62970e7c2f554110ed6af04c7ccff8e9f81ad17d0d40c75637e227fb"},
 ]
 ecdsa = [
    {file = "ecdsa-0.17.0-py2.py3-none-any.whl", hash = "sha256:5cf31d5b33743abe0dfc28999036c849a69d548f994b535e527ee3cb7f3ef676"},
@@ -1814,10 +1802,6 @@ pyparsing = [
    {file = "pyparsing-3.0.6-py3-none-any.whl", hash = "sha256:04ff808a5b90911829c55c4e26f75fa5ca8a2f5f36aa3a51f68e27033341d3e4"},
    {file = "pyparsing-3.0.6.tar.gz", hash = "sha256:d9bdec0013ef1eb5a84ab39a3b3868911598afa494f5faa038647101504e2b81"},
 ]
-pypiwin32 = [
-    {file = "pypiwin32-223-py3-none-any.whl", hash = "sha256:67adf399debc1d5d14dffc1ab5acacb800da569754fafdc576b2a039485aa775"},
-    {file = "pypiwin32-223.tar.gz", hash = "sha256:71be40c1fbd28594214ecaecb58e7aa8b708eabfa0125c8a109ebd51edbd776a"},
-]
 pyrsistent = [
    {file = "pyrsistent-0.18.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:df46c854f490f81210870e509818b729db4488e1f30f2a1ce1698b2295a878d1"},
    {file = "pyrsistent-0.18.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d45866ececf4a5fff8742c25722da6d4c9e180daa7b405dc0a2a2790d668c26"},
@@ -1874,16 +1858,18 @@ pytz = [
    {file = "pytz-2021.3.tar.gz", hash = "sha256:acad2d8b20a1af07d4e4c9d2e9285c5ed9104354062f275f3fcd88dcef4f1326"},
 ]
 pywin32 = [
-    {file = "pywin32-301-cp35-cp35m-win32.whl", hash = "sha256:93367c96e3a76dfe5003d8291ae16454ca7d84bb24d721e0b74a07610b7be4a7"},
-    {file = "pywin32-301-cp35-cp35m-win_amd64.whl", hash = "sha256:9635df6998a70282bd36e7ac2a5cef9ead1627b0a63b17c731312c7a0daebb72"},
-    {file = "pywin32-301-cp36-cp36m-win32.whl", hash = "sha256:c866f04a182a8cb9b7855de065113bbd2e40524f570db73ef1ee99ff0a5cc2f0"},
-    {file = "pywin32-301-cp36-cp36m-win_amd64.whl", hash = "sha256:dafa18e95bf2a92f298fe9c582b0e205aca45c55f989937c52c454ce65b93c78"},
-    {file = "pywin32-301-cp37-cp37m-win32.whl", hash = "sha256:98f62a3f60aa64894a290fb7494bfa0bfa0a199e9e052e1ac293b2ad3cd2818b"},
-    {file = "pywin32-301-cp37-cp37m-win_amd64.whl", hash = "sha256:fb3b4933e0382ba49305cc6cd3fb18525df7fd96aa434de19ce0878133bf8e4a"},
-    {file = "pywin32-301-cp38-cp38-win32.whl", hash = "sha256:88981dd3cfb07432625b180f49bf4e179fb8cbb5704cd512e38dd63636af7a17"},
-    {file = "pywin32-301-cp38-cp38-win_amd64.whl", hash = "sha256:8c9d33968aa7fcddf44e47750e18f3d034c3e443a707688a008a2e52bbef7e96"},
-    {file = "pywin32-301-cp39-cp39-win32.whl", hash = "sha256:595d397df65f1b2e0beaca63a883ae6d8b6df1cdea85c16ae85f6d2e648133fe"},
-    {file = "pywin32-301-cp39-cp39-win_amd64.whl", hash = "sha256:87604a4087434cd814ad8973bd47d6524bd1fa9e971ce428e76b62a5e0860fdf"},
+    {file = "pywin32-227-cp27-cp27m-win32.whl", hash = "sha256:371fcc39416d736401f0274dd64c2302728c9e034808e37381b5e1b22be4a6b0"},
+    {file = "pywin32-227-cp27-cp27m-win_amd64.whl", hash = "sha256:4cdad3e84191194ea6d0dd1b1b9bdda574ff563177d2adf2b4efec2a244fa116"},
+    {file = "pywin32-227-cp35-cp35m-win32.whl", hash = "sha256:f4c5be1a293bae0076d93c88f37ee8da68136744588bc5e2be2f299a34ceb7aa"},
+    {file = "pywin32-227-cp35-cp35m-win_amd64.whl", hash = "sha256:a929a4af626e530383a579431b70e512e736e9588106715215bf685a3ea508d4"},
+    {file = "pywin32-227-cp36-cp36m-win32.whl", hash = "sha256:300a2db938e98c3e7e2093e4491439e62287d0d493fe07cce110db070b54c0be"},
+    {file = "pywin32-227-cp36-cp36m-win_amd64.whl", hash = "sha256:9b31e009564fb95db160f154e2aa195ed66bcc4c058ed72850d047141b36f3a2"},
+    {file = "pywin32-227-cp37-cp37m-win32.whl", hash = "sha256:47a3c7551376a865dd8d095a98deba954a98f326c6fe3c72d8726ca6e6b15507"},
+    {file = "pywin32-227-cp37-cp37m-win_amd64.whl", hash = "sha256:31f88a89139cb2adc40f8f0e65ee56a8c585f629974f9e07622ba80199057511"},
+    {file = "pywin32-227-cp38-cp38-win32.whl", hash = "sha256:7f18199fbf29ca99dff10e1f09451582ae9e372a892ff03a28528a24d55875bc"},
+    {file = "pywin32-227-cp38-cp38-win_amd64.whl", hash = "sha256:7c1ae32c489dc012930787f06244426f8356e129184a02c25aef163917ce158e"},
+    {file = "pywin32-227-cp39-cp39-win32.whl", hash = "sha256:c054c52ba46e7eb6b7d7dfae4dbd987a1bb48ee86debe3f245a2884ece46e295"},
+    {file = "pywin32-227-cp39-cp39-win_amd64.whl", hash = "sha256:f27cec5e7f588c3d1051651830ecc00294f90728d19c3bf6916e6dba93ea357c"},
 ]
 pyyaml = [
    {file = "PyYAML-6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53"},
--- a/safekeeper/README.md
+++ b/safekeeper/README.md
--- a/docs/safekeeper-protocol.md
+++ b/docs/safekeeper-protocol.md
--- a/safekeeper/src/broker.rs
+++ b/safekeeper/src/broker.rs
@@ -83,9 +83,7 @@ impl ElectionLeader {
    ) -> Result<bool> {
        let resp = self.client.leader(election_name).await?;

-        let kv = resp
-            .kv()
-            .ok_or_else(|| anyhow!("failed to get leader response"))?;
+        let kv = resp.kv().ok_or(anyhow!("failed to get leader response"))?;
        let leader = kv.value_str()?;

        Ok(leader == candidate_name)
--- a/safekeeper/src/safekeeper.rs
+++ b/safekeeper/src/safekeeper.rs
@@ -637,17 +637,6 @@ where
        &mut self,
        msg: &VoteRequest,
    ) -> Result<Option<AcceptorProposerMessage>> {
-        // Once voted, we won't accept data from older proposers; flush
-        // everything we've already received so that new proposer starts
-        // streaming at end of our WAL, without overlap. Currently we truncate
-        // WAL at streaming point, so this avoids truncating already committed
-        // WAL.
-        //
-        // TODO: it would be smoother to not truncate committed piece at
-        // handle_elected instead. Currently not a big deal, as proposer is the
-        // only source of WAL; with peer2peer recovery it would be more
-        // important.
-        self.wal_store.flush_wal()?;
        // initialize with refusal
        let mut resp = VoteResponse {
            term: self.state.acceptor_state.term,
--- a/scripts/coverage
+++ b/scripts/coverage
@@ -94,9 +94,9 @@ class Cargo:
            'test',
            '--no-run',
            '--message-format=json',
+            f'--profile={profile}',
        ]
-        env = dict(os.environ, PROFILE=profile)
-        output = subprocess.check_output(cmd, cwd=self.cwd, env=env, text=True)
+        output = subprocess.check_output(cmd, cwd=self.cwd, text=True)

        for line in output.splitlines(keepends=False):
            meta = json.loads(line)
@@ -210,7 +210,8 @@ class ProfDir:

    @property
    def files(self) -> List[Path]:
-        return [f for f in self.cwd.iterdir() if f.suffix in ('.profraw', '.profdata')]
+        exts = ('.profraw', '.profdata')
+        return sorted([f for f in self.cwd.iterdir() if f.suffix in exts])

    @property
    def file_names_hash(self) -> str:
@@ -546,7 +547,6 @@ self-contained example:
    p_report = commands.add_parser('report', help='generate a coverage report')
    p_report.add_argument('--profile',
                          default='debug',
-                          choices=('debug', 'release'),
                          help='cargo build profile')
    p_report.add_argument('--format',
                          default='html',
--- a/test_runner/batch_others/test_ancestor_branch.py
+++ b/test_runner/batch_others/test_ancestor_branch.py
@@ -105,3 +105,16 @@ def test_ancestor_branch(neon_env_builder: NeonEnvBuilder):

    branch2_cur.execute('SELECT count(*) FROM foo')
    assert branch2_cur.fetchone() == (300000, )
+
+
+def test_ancestor_branch_detach(neon_simple_env: NeonEnv):
+    env = neon_simple_env
+
+    parent_timeline_id = env.neon_cli.create_branch("test_ancestor_branch_detach_parent", "empty")
+
+    env.neon_cli.create_branch("test_ancestor_branch_detach_branch1",
+                               "test_ancestor_branch_detach_parent")
+
+    ps_http = env.pageserver.http_client()
+    with pytest.raises(NeonPageserverApiException, match="Failed to detach inmem tenant timeline"):
+        ps_http.timeline_detach(env.initial_tenant, parent_timeline_id)
--- a/test_runner/batch_others/test_branch_and_gc.py
+++ b/test_runner/batch_others/test_branch_and_gc.py
@@ -1,167 +0,0 @@
-import threading
-import pytest
-from fixtures.log_helper import log
-from fixtures.neon_fixtures import NeonEnv
-from fixtures.utils import lsn_from_hex
-
-
-# Test the GC implementation when running with branching.
-# This test reproduces the issue https://github.com/neondatabase/neon/issues/707.
-#
-# Consider two LSNs `lsn1` and `lsn2` with some delta files as follows:
-# ...
-# p   -> has an image layer xx_p with p < lsn1
-# ...
-# lsn1
-# ...
-# q   -> has an image layer yy_q with lsn1 < q < lsn2
-# ...
-# lsn2
-#
-# Consider running a GC iteration such that the GC horizon is between p and lsn1
-# ...
-# p       -> has an image layer xx_p with p < lsn1
-# D_start -> is a delta layer D's start (e.g D = '...-...-D_start-D_end')
-# ...
-# GC_h    -> is a gc horizon such that p < GC_h < lsn1
-# ...
-# lsn1
-# ...
-# D_end   -> is a delta layer D's end
-# ...
-# q       -> has an image layer yy_q with lsn1 < q < lsn2
-# ...
-# lsn2
-#
-# As described in the issue #707, the image layer xx_p will be deleted as
-# its range is below the GC horizon and there exists a newer image layer yy_q (q > p).
-# However, removing xx_p will corrupt any delta layers that depend on xx_p that
-# are not deleted by GC. For example, the delta layer D is corrupted in the
-# above example because D depends on the image layer xx_p for value reconstruction.
-#
-# Because the delta layer D covering lsn1 is corrupted, creating a branch
-# starting from lsn1 should return an error as follows:
-#     could not find data for key ... at LSN ..., for request at LSN ...
-def test_branch_and_gc(neon_simple_env: NeonEnv):
-    env = neon_simple_env
-
-    tenant, _ = env.neon_cli.create_tenant(
-        conf={
-            # disable background GC
-            'gc_period': '10 m',
-            'gc_horizon': f'{10 * 1024 ** 3}',
-
-            # small checkpoint distance to create more delta layer files
-            'checkpoint_distance': f'{1024 ** 2}',
-
-            # set the target size to be large to allow the image layer to cover the whole key space
-            'compaction_target_size': f'{1024 ** 3}',
-
-            # tweak the default settings to allow quickly create image layers and L1 layers
-            'compaction_period': '1 s',
-            'compaction_threshold': '2',
-            'image_creation_threshold': '1',
-
-            # set PITR interval to be small, so we can do GC
-            'pitr_interval': '1 s'
-        })
-
-    timeline_main = env.neon_cli.create_timeline(f'test_main', tenant_id=tenant)
-    pg_main = env.postgres.create_start('test_main', tenant_id=tenant)
-
-    main_cur = pg_main.connect().cursor()
-
-    main_cur.execute(
-        "CREATE TABLE foo(key serial primary key, t text default 'foooooooooooooooooooooooooooooooooooooooooooooooooooo')"
-    )
-    main_cur.execute('INSERT INTO foo SELECT FROM generate_series(1, 100000)')
-    main_cur.execute('SELECT pg_current_wal_insert_lsn()')
-    lsn1 = main_cur.fetchone()[0]
-    log.info(f'LSN1: {lsn1}')
-
-    main_cur.execute('INSERT INTO foo SELECT FROM generate_series(1, 100000)')
-    main_cur.execute('SELECT pg_current_wal_insert_lsn()')
-    lsn2 = main_cur.fetchone()[0]
-    log.info(f'LSN2: {lsn2}')
-
-    # Set the GC horizon so that lsn1 is inside the horizon, which means
-    # we can create a new branch starting from lsn1.
-    env.pageserver.safe_psql(
-        f'''do_gc {tenant.hex} {timeline_main.hex} {lsn_from_hex(lsn2) - lsn_from_hex(lsn1) + 1024}'''
-    )
-
-    env.neon_cli.create_branch('test_branch',
-                               'test_main',
-                               tenant_id=tenant,
-                               ancestor_start_lsn=lsn1)
-    pg_branch = env.postgres.create_start('test_branch', tenant_id=tenant)
-
-    branch_cur = pg_branch.connect().cursor()
-    branch_cur.execute('INSERT INTO foo SELECT FROM generate_series(1, 100000)')
-
-    branch_cur.execute('SELECT count(*) FROM foo')
-    assert branch_cur.fetchone() == (200000, )
-
-
-# This test simulates a race condition happening when branch creation and GC are performed concurrently.
-#
-# Suppose we want to create a new timeline 't' from a source timeline 's' starting
-# from a lsn 'lsn'. Upon creating 't', if we don't hold the GC lock and compare 'lsn' with
-# the latest GC information carefully, it's possible for GC to accidentally remove data
-# needed by the new timeline.
-#
-# In this test, GC is requested before the branch creation but is delayed to happen after branch creation.
-# As a result, when doing GC for the source timeline, we don't have any information about
-# the upcoming new branches, so it's possible to remove data that may be needed by the new branches.
-# It's the branch creation task's job to make sure the starting 'lsn' is not out of scope
-# and prevent creating branches with invalid starting LSNs.
-#
-# For more details, see discussion in https://github.com/neondatabase/neon/pull/2101#issuecomment-1185273447.
-def test_branch_creation_before_gc(neon_simple_env: NeonEnv):
-    env = neon_simple_env
-    # Disable background GC but set the `pitr_interval` to be small, so GC can delete something
-    tenant, _ = env.neon_cli.create_tenant(
-        conf={
-            # disable background GC
-            'gc_period': '10 m',
-            'gc_horizon': f'{10 * 1024 ** 3}',
-
-            # small checkpoint distance to create more delta layer files
-            'checkpoint_distance': f'{1024 ** 2}',
-
-            # set the target size to be large to allow the image layer to cover the whole key space
-            'compaction_target_size': f'{1024 ** 3}',
-
-            # tweak the default settings to allow quickly create image layers and L1 layers
-            'compaction_period': '1 s',
-            'compaction_threshold': '2',
-            'image_creation_threshold': '1',
-
-            # set PITR interval to be small, so we can do GC
-            'pitr_interval': '1 s'
-        })
-
-    b0 = env.neon_cli.create_branch('b0', tenant_id=tenant)
-    pg0 = env.postgres.create_start('b0', tenant_id=tenant)
-    res = pg0.safe_psql_many(queries=[
-        "CREATE TABLE t(key serial primary key)",
-        "INSERT INTO t SELECT FROM generate_series(1, 100000)",
-        "SELECT pg_current_wal_insert_lsn()",
-        "INSERT INTO t SELECT FROM generate_series(1, 100000)",
-    ])
-    lsn = res[2][0][0]
-
-    # Use `failpoint=sleep` and `threading` to make the GC iteration triggers *before* the
-    # branch creation task but the individual timeline GC iteration happens *after*
-    # the branch creation task.
-    env.pageserver.safe_psql(f"failpoints before-timeline-gc=sleep(2000)")
-
-    def do_gc():
-        env.pageserver.safe_psql(f"do_gc {tenant.hex} {b0.hex} 0")
-
-    thread = threading.Thread(target=do_gc, daemon=True)
-    thread.start()
-
-    # The starting LSN is invalid as the corresponding record is scheduled to be removed by in-queue GC.
-    with pytest.raises(Exception, match="invalid branch start lsn"):
-        env.neon_cli.create_branch('b1', 'b0', tenant_id=tenant, ancestor_start_lsn=lsn)
--- a/test_runner/batch_others/test_branching.py
+++ b/test_runner/batch_others/test_branching.py
@@ -1,89 +0,0 @@
-from typing import List
-import threading
-import pytest
-from fixtures.neon_fixtures import NeonEnv, PgBin, Postgres
-import time
-import random
-from fixtures.log_helper import log
-from performance.test_perf_pgbench import get_scales_matrix
-
-
-# Test branch creation
-#
-# This test spawns pgbench in a thread in the background, and creates a branch while
-# pgbench is running. Then it launches pgbench on the new branch, and creates another branch.
-# Repeat `n_branches` times.
-#
-# If 'ty' == 'cascade', each branch is created from the previous branch, so that you end
-# up with a branch of a branch of a branch ... of a branch. With 'ty' == 'flat',
-# each branch is created from the root.
-@pytest.mark.parametrize("n_branches", [10])
-@pytest.mark.parametrize("scale", get_scales_matrix(1))
-@pytest.mark.parametrize("ty", ["cascade", "flat"])
-def test_branching_with_pgbench(neon_simple_env: NeonEnv,
-                                pg_bin: PgBin,
-                                n_branches: int,
-                                scale: int,
-                                ty: str):
-    env = neon_simple_env
-
-    # Use aggressive GC and checkpoint settings, so that we also exercise GC during the test
-    tenant, _ = env.neon_cli.create_tenant(
-         conf={
-             'gc_period': '5 s',
-             'gc_horizon': f'{1024 ** 2}',
-             'checkpoint_distance': f'{1024 ** 2}',
-             'compaction_target_size': f'{1024 ** 2}',
-             # set PITR interval to be small, so we can do GC
-             'pitr_interval': '5 s'
-         })
-
-    def run_pgbench(pg: Postgres):
-        connstr = pg.connstr()
-
-        log.info(f"Start a pgbench workload on pg {connstr}")
-
-        pg_bin.run_capture(['pgbench', '-i', f'-s{scale}', connstr])
-        pg_bin.run_capture(['pgbench', '-T15', connstr])
-
-    env.neon_cli.create_branch('b0', tenant_id=tenant)
-    pgs: List[Postgres] = []
-    pgs.append(env.postgres.create_start('b0', tenant_id=tenant))
-
-    threads: List[threading.Thread] = []
-    threads.append(threading.Thread(target=run_pgbench, args=(pgs[0], ), daemon=True))
-    threads[-1].start()
-
-    thread_limit = 4
-
-    for i in range(n_branches):
-        # random a delay between [0, 5]
-        delay = random.random() * 5
-        time.sleep(delay)
-        log.info(f"Sleep {delay}s")
-
-        # If the number of concurrent threads exceeds a threshold,
-        # wait for all the threads to finish before spawning a new one.
-        # Because tests defined in `batch_others` are run concurrently in CI,
-        # we want to avoid the situation that one test exhausts resources for other tests.
-        if len(threads) >= thread_limit:
-            for thread in threads:
-                thread.join()
-            threads = []
-
-        if ty == "cascade":
-            env.neon_cli.create_branch('b{}'.format(i + 1), 'b{}'.format(i), tenant_id=tenant)
-        else:
-            env.neon_cli.create_branch('b{}'.format(i + 1), 'b0', tenant_id=tenant)
-
-        pgs.append(env.postgres.create_start('b{}'.format(i + 1), tenant_id=tenant))
-
-        threads.append(threading.Thread(target=run_pgbench, args=(pgs[-1], ), daemon=True))
-        threads[-1].start()
-
-    for thread in threads:
-        thread.join()
-
-    for pg in pgs:
-        res = pg.safe_psql('SELECT count(*) from pgbench_accounts')
-        assert res[0] == (100000 * scale, )
--- a/test_runner/batch_others/test_broken_timeline.py
+++ b/test_runner/batch_others/test_broken_timeline.py
@@ -110,6 +110,6 @@ def test_fix_broken_timelines_on_startup(neon_simple_env: NeonEnv):
    env.neon_cli.pageserver_stop(immediate=True)
    env.neon_cli.pageserver_start()

-    # Check that tenant with "broken" timeline is not loaded.
-    with pytest.raises(Exception, match=f"Failed to get repo for tenant {tenant_id.hex}"):
-        env.neon_cli.list_timelines(tenant_id)
+    # Check that the "broken" timeline is not loaded
+    timelines = env.neon_cli.list_timelines(tenant_id)
+    assert len(timelines) == 1
--- a/test_runner/batch_others/test_close_fds.py
+++ b/test_runner/batch_others/test_close_fds.py
@@ -1,51 +0,0 @@
-from contextlib import closing
-import shutil
-import time
-import subprocess
-import os.path
-
-from cached_property import threading
-from fixtures.neon_fixtures import NeonEnv
-from fixtures.log_helper import log
-
-
-def lsof_path() -> str:
-    path_output = shutil.which("lsof")
-    if path_output is None:
-        raise RuntimeError('lsof not found in PATH')
-    else:
-        return path_output
-
-
-# Makes sure that `pageserver.pid` is only held by `pageserve` command, not other commands.
-# This is to test the changes in https://github.com/neondatabase/neon/pull/1834.
-def test_lsof_pageserver_pid(neon_simple_env: NeonEnv):
-    env = neon_simple_env
-
-    def start_workload():
-        env.neon_cli.create_branch("test_lsof_pageserver_pid")
-        pg = env.postgres.create_start("test_lsof_pageserver_pid")
-        with closing(pg.connect()) as conn:
-            with conn.cursor() as cur:
-                cur.execute("CREATE TABLE foo as SELECT x FROM generate_series(1,100000) x")
-                cur.execute("update foo set x=x+1")
-
-    workload_thread = threading.Thread(target=start_workload, args=(), daemon=True)
-    workload_thread.start()
-
-    path = os.path.join(env.repo_dir, "pageserver.pid")
-    lsof = lsof_path()
-    while workload_thread.is_alive():
-        res = subprocess.run([lsof, path],
-                             check=False,
-                             universal_newlines=True,
-                             stdout=subprocess.PIPE,
-                             stderr=subprocess.PIPE)
-
-        # parse the `lsof` command's output to get only the list of commands
-        commands = [line.split(' ')[0] for line in res.stdout.strip().split('\n')[1:]]
-        if len(commands) > 0:
-            log.info(f"lsof commands: {commands}")
-            assert commands == ['pageserve']
-
-        time.sleep(1.0)
--- a/test_runner/batch_others/test_crafted_wal_end.py
+++ b/test_runner/batch_others/test_crafted_wal_end.py
@@ -1,63 +0,0 @@
-from fixtures.neon_fixtures import NeonEnvBuilder, WalCraft
-from fixtures.log_helper import log
-import pytest
-
-# Restart nodes with WAL end having specially crafted shape, like last record
-# crossing segment boundary, to test decoding issues.
-
-
-@pytest.mark.parametrize('wal_type',
-                         [
-                             'simple',
-                             'last_wal_record_xlog_switch',
-                             'last_wal_record_xlog_switch_ends_on_page_boundary',
-                             'last_wal_record_crossing_segment',
-                             'wal_record_crossing_segment_followed_by_small_one',
-                         ])
-def test_crafted_wal_end(neon_env_builder: NeonEnvBuilder, wal_type: str):
-    neon_env_builder.num_safekeepers = 1
-    env = neon_env_builder.init_start()
-    env.neon_cli.create_branch('test_crafted_wal_end')
-
-    pg = env.postgres.create('test_crafted_wal_end')
-    wal_craft = WalCraft(env)
-    pg.config(wal_craft.postgres_config())
-    pg.start()
-    res = pg.safe_psql_many(queries=[
-        'CREATE TABLE keys(key int primary key)',
-        'INSERT INTO keys SELECT generate_series(1, 100)',
-        'SELECT SUM(key) FROM keys'
-    ])
-    assert res[-1][0] == (5050, )
-
-    wal_craft.in_existing(wal_type, pg.connstr())
-
-    log.info("Restarting all safekeepers and pageservers")
-    env.pageserver.stop()
-    env.safekeepers[0].stop()
-    env.safekeepers[0].start()
-    env.pageserver.start()
-
-    log.info("Trying more queries")
-    res = pg.safe_psql_many(queries=[
-        'SELECT SUM(key) FROM keys',
-        'INSERT INTO keys SELECT generate_series(101, 200)',
-        'SELECT SUM(key) FROM keys',
-    ])
-    assert res[0][0] == (5050, )
-    assert res[-1][0] == (20100, )
-
-    log.info("Restarting all safekeepers and pageservers (again)")
-    env.pageserver.stop()
-    env.safekeepers[0].stop()
-    env.safekeepers[0].start()
-    env.pageserver.start()
-
-    log.info("Trying more queries (again)")
-    res = pg.safe_psql_many(queries=[
-        'SELECT SUM(key) FROM keys',
-        'INSERT INTO keys SELECT generate_series(201, 300)',
-        'SELECT SUM(key) FROM keys',
-    ])
-    assert res[0][0] == (20100, )
-    assert res[-1][0] == (45150, )
--- a/test_runner/batch_others/test_import.py
+++ b/test_runner/batch_others/test_import.py
@@ -90,7 +90,7 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build
    # Clean up
    # TODO it should clean itself
    client = env.pageserver.http_client()
-    client.timeline_delete(tenant, timeline)
+    client.timeline_detach(tenant, timeline)

    # Importing correct backup works
    import_tar(base_tar, wal_tar)
--- a/test_runner/batch_others/test_normal_work.py
+++ b/test_runner/batch_others/test_normal_work.py
@@ -24,7 +24,7 @@ def check_tenant(env: NeonEnv, pageserver_http: NeonPageserverHttpClient):
    assert res_2[0] == (5000050000, )

    pg.stop()
-    pageserver_http.tenant_detach(tenant_id)
+    pageserver_http.timeline_detach(tenant_id, timeline_id)


@pytest.mark.parametrize('num_timelines,num_safekeepers', [(3, 1)])
--- a/test_runner/batch_others/test_remote_storage.py
+++ b/test_runner/batch_others/test_remote_storage.py
@@ -6,7 +6,7 @@ from contextlib import closing
 from pathlib import Path
 import time
 from uuid import UUID
-from fixtures.neon_fixtures import NeonEnvBuilder, assert_timeline_local, wait_until, wait_for_last_record_lsn, wait_for_upload
+from fixtures.neon_fixtures import NeonEnvBuilder, assert_local, wait_until, wait_for_last_record_lsn, wait_for_upload
 from fixtures.log_helper import log
 from fixtures.utils import lsn_from_hex, lsn_to_hex
 import pytest
@@ -91,14 +91,14 @@ def test_remote_storage_backup_and_restore(neon_env_builder: NeonEnvBuilder, sto
    # Introduce failpoint in download
    env.pageserver.safe_psql(f"failpoints remote-storage-download-pre-rename=return")

-    client.tenant_attach(UUID(tenant_id))
+    client.timeline_attach(UUID(tenant_id), UUID(timeline_id))

-    # is there a better way to assert that failpoint triggered?
+    # is there a better way to assert that fafilpoint triggered?
    time.sleep(10)

    # assert cannot attach timeline that is scheduled for download
-    with pytest.raises(Exception, match="Conflict: Tenant download is already in progress"):
-        client.tenant_attach(UUID(tenant_id))
+    with pytest.raises(Exception, match="Timeline download is already in progress"):
+        client.timeline_attach(UUID(tenant_id), UUID(timeline_id))

    detail = client.timeline_detail(UUID(tenant_id), UUID(timeline_id))
    log.info("Timeline detail with active failpoint: %s", detail)
@@ -109,12 +109,12 @@ def test_remote_storage_backup_and_restore(neon_env_builder: NeonEnvBuilder, sto
    env.pageserver.stop()
    env.pageserver.start()

-    client.tenant_attach(UUID(tenant_id))
+    client.timeline_attach(UUID(tenant_id), UUID(timeline_id))

    log.info("waiting for timeline redownload")
    wait_until(number_of_iterations=10,
               interval=1,
-               func=lambda: assert_timeline_local(client, UUID(tenant_id), UUID(timeline_id)))
+               func=lambda: assert_local(client, UUID(tenant_id), UUID(timeline_id)))

    detail = client.timeline_detail(UUID(tenant_id), UUID(timeline_id))
    assert detail['local'] is not None
--- a/test_runner/batch_others/test_tenant_detach.py
+++ b/test_runner/batch_others/test_tenant_detach.py
@@ -1,64 +0,0 @@
-from threading import Thread
-from uuid import uuid4
-import psycopg2
-import pytest
-
-from fixtures.log_helper import log
-from fixtures.neon_fixtures import NeonEnvBuilder, NeonPageserverApiException
-
-
-def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder):
-    env = neon_env_builder.init_start()
-    pageserver_http = env.pageserver.http_client()
-
-    # first check for non existing tenant
-    tenant_id = uuid4()
-    with pytest.raises(expected_exception=NeonPageserverApiException,
-                       match=f'Tenant not found for id {tenant_id.hex}'):
-        pageserver_http.tenant_detach(tenant_id)
-
-    # create new nenant
-    tenant_id, timeline_id = env.neon_cli.create_tenant()
-
-    # assert tenant exists on disk
-    assert (env.repo_dir / "tenants" / tenant_id.hex).exists()
-
-    pg = env.postgres.create_start('main', tenant_id=tenant_id)
-    # we rely upon autocommit after each statement
-    pg.safe_psql_many(queries=[
-        'CREATE TABLE t(key int primary key, value text)',
-        'INSERT INTO t SELECT generate_series(1,100000), \'payload\'',
-    ])
-
-    # gc should not try to even start
-    with pytest.raises(expected_exception=psycopg2.DatabaseError,
-                       match='gc target timeline does not exist'):
-        env.pageserver.safe_psql(f'do_gc {tenant_id.hex} {uuid4().hex} 0')
-
-    # try to concurrently run gc and detach
-    gc_thread = Thread(
-        target=lambda: env.pageserver.safe_psql(f'do_gc {tenant_id.hex} {timeline_id.hex} 0'), )
-    gc_thread.start()
-
-    last_error = None
-    for i in range(3):
-        try:
-            pageserver_http.tenant_detach(tenant_id)
-        except Exception as e:
-            last_error = e
-            log.error(f"try {i} error detaching tenant: {e}")
-            continue
-        else:
-            break
-    # else is called if the loop finished without reaching "break"
-    else:
-        pytest.fail(f"could not detach timeline: {last_error}")
-
-    gc_thread.join(timeout=10)
-
-    # check that nothing is left on disk for deleted tenant
-    assert not (env.repo_dir / "tenants" / tenant_id.hex).exists()
-
-    with pytest.raises(expected_exception=psycopg2.DatabaseError,
-                       match=f'Tenant {tenant_id.hex} not found'):
-        env.pageserver.safe_psql(f'do_gc {tenant_id.hex} {timeline_id.hex} 0')
--- a/test_runner/batch_others/test_tenant_relocation.py
+++ b/test_runner/batch_others/test_tenant_relocation.py
@@ -1,31 +1,16 @@
+from contextlib import closing, contextmanager
 import os
 import pathlib
-import signal
 import subprocess
 import threading
-from contextlib import closing, contextmanager
-from typing import Any, Dict, Optional, Tuple
+import typing
 from uuid import UUID
-
-import pytest
 from fixtures.log_helper import log
-from fixtures.neon_fixtures import (
-    Etcd,
-    NeonEnv,
-    NeonEnvBuilder,
-    NeonPageserverHttpClient,
-    PageserverPort,
-    PortDistributor,
-    Postgres,
-    assert_no_in_progress_downloads_for_tenant,
-    assert_timeline_local,
-    base_dir,
-    neon_binpath,
-    pg_distrib_dir,
-    wait_for_last_record_lsn,
-    wait_for_upload,
-    wait_until,
-)
+from typing import Optional
+import signal
+import pytest
+
+from fixtures.neon_fixtures import PgProtocol, PortDistributor, Postgres, NeonEnvBuilder, Etcd, NeonPageserverHttpClient, assert_local, wait_until, wait_for_last_record_lsn, wait_for_upload, neon_binpath, pg_distrib_dir, base_dir
 from fixtures.utils import lsn_from_hex, subprocess_capture


@@ -116,109 +101,6 @@ def load(pg: Postgres, stop_event: threading.Event, load_ok_event: threading.Eve
    log.info('load thread stopped')


-def populate_branch(
-    pg: Postgres,
-    tenant_id: UUID,
-    ps_http: NeonPageserverHttpClient,
-    create_table: bool,
-    expected_sum: Optional[int],
-) -> Tuple[UUID, int]:
-    # insert some data
-    with pg_cur(pg) as cur:
-        cur.execute("SHOW neon.timeline_id")
-        timeline_id = UUID(cur.fetchone()[0])
-        log.info("timeline to relocate %s", timeline_id.hex)
-
-        cur.execute("SELECT pg_current_wal_flush_lsn()")
-        log.info("pg_current_wal_flush_lsn() %s", lsn_from_hex(cur.fetchone()[0]))
-        log.info("timeline detail %s",
-                 ps_http.timeline_detail(tenant_id=tenant_id, timeline_id=timeline_id))
-
-        # we rely upon autocommit after each statement
-        # as waiting for acceptors happens there
-        if create_table:
-            cur.execute("CREATE TABLE t(key int, value text)")
-        cur.execute("INSERT INTO t SELECT generate_series(1,1000), 'some payload'")
-        if expected_sum is not None:
-            cur.execute("SELECT sum(key) FROM t")
-            assert cur.fetchone() == (expected_sum, )
-        cur.execute("SELECT pg_current_wal_flush_lsn()")
-
-        current_lsn = lsn_from_hex(cur.fetchone()[0])
-        return timeline_id, current_lsn
-
-
-def ensure_checkpoint(
-    pageserver_cur,
-    pageserver_http: NeonPageserverHttpClient,
-    tenant_id: UUID,
-    timeline_id: UUID,
-    current_lsn: int,
-):
-    # run checkpoint manually to be sure that data landed in remote storage
-    pageserver_cur.execute(f"checkpoint {tenant_id.hex} {timeline_id.hex}")
-
-    # wait until pageserver successfully uploaded a checkpoint to remote storage
-    wait_for_upload(pageserver_http, tenant_id, timeline_id, current_lsn)
-
-
-def check_timeline_attached(
-    new_pageserver_http_client: NeonPageserverHttpClient,
-    tenant_id: UUID,
-    timeline_id: UUID,
-    old_timeline_detail: Dict[str, Any],
-    old_current_lsn: int,
-):
-    # new pageserver should be in sync (modulo wal tail or vacuum activity) with the old one because there was no new writes since checkpoint
-    new_timeline_detail = assert_timeline_local(new_pageserver_http_client, tenant_id, timeline_id)
-
-    # when load is active these checks can break because lsns are not static
-    # so lets check with some margin
-    assert_abs_margin_ratio(lsn_from_hex(new_timeline_detail['local']['disk_consistent_lsn']),
-                            lsn_from_hex(old_timeline_detail['local']['disk_consistent_lsn']),
-                            0.03)
-
-    assert_abs_margin_ratio(lsn_from_hex(new_timeline_detail['local']['disk_consistent_lsn']),
-                            old_current_lsn,
-                            0.03)
-
-
-def switch_pg_to_new_pageserver(env: NeonEnv,
-                                pg: Postgres,
-                                new_pageserver_port: int,
-                                tenant_id: UUID,
-                                timeline_id: UUID) -> pathlib.Path:
-    pg.stop()
-
-    pg_config_file_path = pathlib.Path(pg.config_file_path())
-    pg_config_file_path.open('a').write(
-        f"\nneon.pageserver_connstring = 'postgresql://no_user:@localhost:{new_pageserver_port}'")
-
-    pg.start()
-
-    timeline_to_detach_local_path = env.repo_dir / 'tenants' / tenant_id.hex / 'timelines' / timeline_id.hex
-    files_before_detach = os.listdir(timeline_to_detach_local_path)
-    assert 'metadata' in files_before_detach, f'Regular timeline {timeline_to_detach_local_path} should have the metadata file,\
-            but got: {files_before_detach}'
-    assert len(files_before_detach) >= 2, f'Regular timeline {timeline_to_detach_local_path} should have at least one layer file,\
-            but got {files_before_detach}'
-
-    return timeline_to_detach_local_path
-
-
-def post_migration_check(pg: Postgres, sum_before_migration: int, old_local_path: pathlib.Path):
-    with pg_cur(pg) as cur:
-        # check that data is still there
-        cur.execute("SELECT sum(key) FROM t")
-        assert cur.fetchone() == (sum_before_migration, )
-        # check that we can write new data
-        cur.execute("INSERT INTO t SELECT generate_series(1001,2000), 'some payload'")
-        cur.execute("SELECT sum(key) FROM t")
-        assert cur.fetchone() == (sum_before_migration + 1500500, )
-
-    assert not os.path.exists(old_local_path), f'After detach, local timeline dir {old_local_path} should be removed'
-
-
@pytest.mark.parametrize(
    'method',
    [
@@ -244,83 +126,61 @@ def test_tenant_relocation(neon_env_builder: NeonEnvBuilder,
    # create folder for remote storage mock
    remote_storage_mock_path = env.repo_dir / 'local_fs_remote_storage'

-    # we use two branches to check that they are both relocated
-    # first branch is used for load, compute for second one is used to
-    # check that data is not lost
+    tenant, _ = env.neon_cli.create_tenant(UUID("74ee8b079a0e437eb0afea7d26a07209"))
+    log.info("tenant to relocate %s", tenant)
+
+    # attach does not download ancestor branches (should it?), just use root branch for now
+    env.neon_cli.create_root_branch('test_tenant_relocation', tenant_id=tenant)
+
+    tenant_pg = env.postgres.create_start(branch_name='test_tenant_relocation',
+                                          node_name='test_tenant_relocation',
+                                          tenant_id=tenant)
+
+    # insert some data
+    with closing(tenant_pg.connect()) as conn:
+        with conn.cursor() as cur:
+            # save timeline for later gc call
+            cur.execute("SHOW neon.timeline_id")
+            timeline = UUID(cur.fetchone()[0])
+            log.info("timeline to relocate %s", timeline.hex)
+
+            # we rely upon autocommit after each statement
+            # as waiting for acceptors happens there
+            cur.execute("CREATE TABLE t(key int primary key, value text)")
+            cur.execute("INSERT INTO t SELECT generate_series(1,1000), 'some payload'")
+            cur.execute("SELECT sum(key) FROM t")
+            assert cur.fetchone() == (500500, )
+            cur.execute("SELECT pg_current_wal_flush_lsn()")
+
+            current_lsn = lsn_from_hex(cur.fetchone()[0])

    pageserver_http = env.pageserver.http_client()

-    tenant_id, initial_timeline_id = env.neon_cli.create_tenant(UUID("74ee8b079a0e437eb0afea7d26a07209"))
-    log.info("tenant to relocate %s initial_timeline_id %s", tenant_id, initial_timeline_id)
-
-    env.neon_cli.create_branch("test_tenant_relocation_main", tenant_id=tenant_id)
-    pg_main = env.postgres.create_start(branch_name='test_tenant_relocation_main',
-                                        tenant_id=tenant_id)
-
-    timeline_id_main, current_lsn_main = populate_branch(
-        pg_main,
-        tenant_id=tenant_id,
-        ps_http=pageserver_http,
-        create_table=True,
-        expected_sum=500500,
-    )
-
-    env.neon_cli.create_branch(
-        new_branch_name="test_tenant_relocation_second",
-        ancestor_branch_name="test_tenant_relocation_main",
-        tenant_id=tenant_id,
-    )
-    pg_second = env.postgres.create_start(branch_name='test_tenant_relocation_second',
-                                          tenant_id=tenant_id)
-
-    timeline_id_second, current_lsn_second = populate_branch(
-        pg_second,
-        tenant_id=tenant_id,
-        ps_http=pageserver_http,
-        create_table=False,
-        expected_sum=1001000,
-    )
-
    # wait until pageserver receives that data
-    wait_for_last_record_lsn(pageserver_http, tenant_id, timeline_id_main, current_lsn_main)
-    timeline_detail_main = assert_timeline_local(pageserver_http, tenant_id, timeline_id_main)
-
-    wait_for_last_record_lsn(pageserver_http, tenant_id, timeline_id_second, current_lsn_second)
-    timeline_detail_second = assert_timeline_local(pageserver_http, tenant_id, timeline_id_second)
+    wait_for_last_record_lsn(pageserver_http, tenant, timeline, current_lsn)
+    timeline_detail = assert_local(pageserver_http, tenant, timeline)

    if with_load == 'with_load':
        # create load table
-        with pg_cur(pg_main) as cur:
+        with pg_cur(tenant_pg) as cur:
            cur.execute("CREATE TABLE load(value text)")

        load_stop_event = threading.Event()
        load_ok_event = threading.Event()
        load_thread = threading.Thread(
            target=load,
-            args=(pg_main, load_stop_event, load_ok_event),
+            args=(tenant_pg, load_stop_event, load_ok_event),
            daemon=True,  # To make sure the child dies when the parent errors
        )
        load_thread.start()

-    # this requirement introduces a problem
-    # if user creates a branch during migration
-    # it wont appear on the new pageserver
-    with pg_cur(env.pageserver) as cur:
-        ensure_checkpoint(
-            cur,
-            pageserver_http=pageserver_http,
-            tenant_id=tenant_id,
-            timeline_id=timeline_id_main,
-            current_lsn=current_lsn_main,
-        )
+    # run checkpoint manually to be sure that data landed in remote storage
+    with closing(env.pageserver.connect()) as psconn:
+        with psconn.cursor() as pscur:
+            pscur.execute(f"checkpoint {tenant.hex} {timeline.hex}")

-        ensure_checkpoint(
-            cur,
-            pageserver_http=pageserver_http,
-            tenant_id=tenant_id,
-            timeline_id=timeline_id_second,
-            current_lsn=current_lsn_second,
-        )
+    # wait until pageserver successfully uploaded a checkpoint to remote storage
+    wait_for_upload(pageserver_http, tenant, timeline, current_lsn)

    log.info("inititalizing new pageserver")
    # bootstrap second pageserver
@@ -347,7 +207,7 @@ def test_tenant_relocation(neon_env_builder: NeonEnvBuilder,
                "python",
                os.path.join(base_dir, "scripts/export_import_between_pageservers.py"),
                "--tenant-id",
-                tenant_id.hex,
+                tenant.hex,
                "--from-host",
                "localhost",
                "--from-http-port",
@@ -368,33 +228,22 @@ def test_tenant_relocation(neon_env_builder: NeonEnvBuilder,
            subprocess_capture(str(env.repo_dir), cmd, check=True)
        elif method == "minor":
            # call to attach timeline to new pageserver
-            new_pageserver_http.tenant_attach(tenant_id)
+            new_pageserver_http.timeline_attach(tenant, timeline)

-            # check that it shows that download is in progress
-            tenant_status = new_pageserver_http.tenant_status(tenant_id=tenant_id)
-            assert tenant_status.get('has_in_progress_downloads'), tenant_status
+            # new pageserver should be in sync (modulo wal tail or vacuum activity) with the old one because there was no new writes since checkpoint
+            new_timeline_detail = wait_until(
+                number_of_iterations=5,
+                interval=1,
+                func=lambda: assert_local(new_pageserver_http, tenant, timeline))

-            # wait until tenant is downloaded
-            wait_until(number_of_iterations=10,
-                       interval=1,
-                       func=lambda: assert_no_in_progress_downloads_for_tenant(
-                           new_pageserver_http, tenant_id))
+            # when load is active these checks can break because lsns are not static
+            # so lets check with some margin
+            assert_abs_margin_ratio(
+                lsn_from_hex(new_timeline_detail['local']['disk_consistent_lsn']),
+                lsn_from_hex(timeline_detail['local']['disk_consistent_lsn']),
+                0.03)

-            check_timeline_attached(
-                new_pageserver_http,
-                tenant_id,
-                timeline_id_main,
-                timeline_detail_main,
-                current_lsn_main,
-            )
-
-            check_timeline_attached(
-                new_pageserver_http,
-                tenant_id,
-                timeline_id_second,
-                timeline_detail_second,
-                current_lsn_second,
-            )
+        tenant_pg.stop()

        # rewrite neon cli config to use new pageserver for basebackup to start new compute
        cli_config_lines = (env.repo_dir / 'config').read_text().splitlines()
@@ -402,29 +251,33 @@ def test_tenant_relocation(neon_env_builder: NeonEnvBuilder,
        cli_config_lines[-1] = f"listen_pg_addr = 'localhost:{new_pageserver_pg_port}'"
        (env.repo_dir / 'config').write_text('\n'.join(cli_config_lines))

-        old_local_path_main = switch_pg_to_new_pageserver(
-            env,
-            pg_main,
-            new_pageserver_pg_port,
-            tenant_id,
-            timeline_id_main,
+        tenant_pg_config_file_path = pathlib.Path(tenant_pg.config_file_path())
+        tenant_pg_config_file_path.open('a').write(
+            f"\nneon.pageserver_connstring = 'postgresql://no_user:@localhost:{new_pageserver_pg_port}'"
        )

-        old_local_path_second = switch_pg_to_new_pageserver(
-            env,
-            pg_second,
-            new_pageserver_pg_port,
-            tenant_id,
-            timeline_id_second,
-        )
+        tenant_pg.start()
+
+        timeline_to_detach_local_path = env.repo_dir / 'tenants' / tenant.hex / 'timelines' / timeline.hex
+        files_before_detach = os.listdir(timeline_to_detach_local_path)
+        assert 'metadata' in files_before_detach, f'Regular timeline {timeline_to_detach_local_path} should have the metadata file,\
+             but got: {files_before_detach}'
+        assert len(files_before_detach) > 2, f'Regular timeline {timeline_to_detach_local_path} should have at least one layer file,\
+             but got {files_before_detach}'

        # detach tenant from old pageserver before we check
        # that all the data is there to be sure that old pageserver
        # is no longer involved, and if it is, we will see the errors
-        pageserver_http.tenant_detach(tenant_id)
+        pageserver_http.timeline_detach(tenant, timeline)

-        post_migration_check(pg_main, 500500, old_local_path_main)
-        post_migration_check(pg_second, 1001000, old_local_path_second)
+        with pg_cur(tenant_pg) as cur:
+            # check that data is still there
+            cur.execute("SELECT sum(key) FROM t")
+            assert cur.fetchone() == (500500, )
+            # check that we can write new data
+            cur.execute("INSERT INTO t SELECT generate_series(1001,2000), 'some payload'")
+            cur.execute("SELECT sum(key) FROM t")
+            assert cur.fetchone() == (2001000, )

        if with_load == 'with_load':
            assert load_ok_event.wait(3)
@@ -433,6 +286,8 @@ def test_tenant_relocation(neon_env_builder: NeonEnvBuilder,
            load_thread.join(timeout=10)
            log.info('load thread stopped')

+        assert not os.path.exists(timeline_to_detach_local_path), f'After detach, local timeline dir {timeline_to_detach_local_path} should be removed'
+
        # bring old pageserver back for clean shutdown via neon cli
        # new pageserver will be shut down by the context manager
        cli_config_lines = (env.repo_dir / 'config').read_text().splitlines()
--- a/test_runner/batch_others/test_tenant_tasks.py
+++ b/test_runner/batch_others/test_tenant_tasks.py
@@ -35,10 +35,10 @@ def test_tenant_tasks(neon_env_builder: NeonEnvBuilder):
        value = line.lstrip(name).strip()
        return int(value)

-    def delete_all_timelines(tenant):
+    def detach_all_timelines(tenant):
        timelines = [UUID(t["timeline_id"]) for t in client.timeline_list(tenant)]
        for t in timelines:
-            client.timeline_delete(tenant, t)
+            client.timeline_detach(tenant, t)

    def assert_idle(tenant):
        assert get_state(tenant) == "Idle"
@@ -56,7 +56,7 @@ def test_tenant_tasks(neon_env_builder: NeonEnvBuilder):
    # TODO they should be already idle since there are no active computes
    for tenant_info in client.tenant_list():
        tenant_id = UUID(tenant_info["id"])
-        delete_all_timelines(tenant_id)
+        detach_all_timelines(tenant_id)
        wait_until(10, 0.2, lambda: assert_idle(tenant_id))

    # Assert that all tasks finish quickly after tenants go idle
--- a/test_runner/batch_others/test_timeline_delete.py
+++ b/test_runner/batch_others/test_timeline_delete.py
@@ -1,60 +0,0 @@
-from uuid import uuid4
-import pytest
-
-from fixtures.neon_fixtures import NeonEnv, NeonPageserverApiException, wait_until
-
-
-def test_timeline_delete(neon_simple_env: NeonEnv):
-    env = neon_simple_env
-
-    ps_http = env.pageserver.http_client()
-
-    # first try to delete non existing timeline
-    # for existing tenant:
-    invalid_timeline_id = uuid4()
-    with pytest.raises(NeonPageserverApiException, match="timeline not found"):
-        ps_http.timeline_delete(tenant_id=env.initial_tenant, timeline_id=invalid_timeline_id)
-
-    # for non existing tenant:
-    invalid_tenant_id = uuid4()
-    with pytest.raises(NeonPageserverApiException,
-                       match=f"Tenant {invalid_tenant_id.hex} not found in local tenant state"):
-        ps_http.timeline_delete(tenant_id=invalid_tenant_id, timeline_id=invalid_timeline_id)
-
-    # construct pair of branches to validate that pageserver prohibits
-    # deletion of ancestor timelines when they have child branches
-    parent_timeline_id = env.neon_cli.create_branch("test_ancestor_branch_delete_parent", "empty")
-
-    leaf_timeline_id = env.neon_cli.create_branch("test_ancestor_branch_delete_branch1",
-                                                  "test_ancestor_branch_delete_parent")
-
-    ps_http = env.pageserver.http_client()
-    with pytest.raises(NeonPageserverApiException,
-                       match="Cannot detach timeline which has child timelines"):
-
-        timeline_path = env.repo_dir / "tenants" / env.initial_tenant.hex / "timelines" / parent_timeline_id.hex
-        assert timeline_path.exists()
-
-        ps_http.timeline_delete(env.initial_tenant, parent_timeline_id)
-
-        assert not timeline_path.exists()
-
-    timeline_path = env.repo_dir / "tenants" / env.initial_tenant.hex / "timelines" / leaf_timeline_id.hex
-    assert timeline_path.exists()
-
-    # retry deletes when compaction or gc is running in pageserver
-    wait_until(number_of_iterations=3,
-               interval=0.2,
-               func=lambda: ps_http.timeline_delete(env.initial_tenant, leaf_timeline_id))
-
-    assert not timeline_path.exists()
-
-    # check 404
-    with pytest.raises(NeonPageserverApiException,
-                       match="is not found neither locally nor remotely"):
-        ps_http.timeline_detail(env.initial_tenant, leaf_timeline_id)
-
-        # FIXME leaves tenant without timelines, should we prevent deletion of root timeline?
-        wait_until(number_of_iterations=3,
-                   interval=0.2,
-                   func=lambda: ps_http.timeline_delete(env.initial_tenant, parent_timeline_id))
--- a/test_runner/batch_others/test_timeline_size.py
+++ b/test_runner/batch_others/test_timeline_size.py
@@ -1,7 +1,7 @@
 from contextlib import closing
 import psycopg2.extras
 import psycopg2.errors
-from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres, assert_timeline_local
+from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres, assert_local
 from fixtures.log_helper import log
 import time

@@ -11,7 +11,7 @@ def test_timeline_size(neon_simple_env: NeonEnv):
    new_timeline_id = env.neon_cli.create_branch('test_timeline_size', 'empty')

    client = env.pageserver.http_client()
-    timeline_details = assert_timeline_local(client, env.initial_tenant, new_timeline_id)
+    timeline_details = assert_local(client, env.initial_tenant, new_timeline_id)
    assert timeline_details['local']['current_logical_size'] == timeline_details['local'][
        'current_logical_size_non_incremental']

@@ -29,13 +29,13 @@ def test_timeline_size(neon_simple_env: NeonEnv):
                    FROM generate_series(1, 10) g
            """)

-            res = assert_timeline_local(client, env.initial_tenant, new_timeline_id)
+            res = assert_local(client, env.initial_tenant, new_timeline_id)
            local_details = res['local']
            assert local_details["current_logical_size"] == local_details[
                "current_logical_size_non_incremental"]
            cur.execute("TRUNCATE foo")

-            res = assert_timeline_local(client, env.initial_tenant, new_timeline_id)
+            res = assert_local(client, env.initial_tenant, new_timeline_id)
            local_details = res['local']
            assert local_details["current_logical_size"] == local_details[
                "current_logical_size_non_incremental"]
@@ -46,7 +46,7 @@ def test_timeline_size_createdropdb(neon_simple_env: NeonEnv):
    new_timeline_id = env.neon_cli.create_branch('test_timeline_size', 'empty')

    client = env.pageserver.http_client()
-    timeline_details = assert_timeline_local(client, env.initial_tenant, new_timeline_id)
+    timeline_details = assert_local(client, env.initial_tenant, new_timeline_id)
    assert timeline_details['local']['current_logical_size'] == timeline_details['local'][
        'current_logical_size_non_incremental']

@@ -57,7 +57,7 @@ def test_timeline_size_createdropdb(neon_simple_env: NeonEnv):
        with conn.cursor() as cur:
            cur.execute("SHOW neon.timeline_id")

-            res = assert_timeline_local(client, env.initial_tenant, new_timeline_id)
+            res = assert_local(client, env.initial_tenant, new_timeline_id)
            local_details = res['local']
            assert local_details["current_logical_size"] == local_details[
                "current_logical_size_non_incremental"]
@@ -73,14 +73,14 @@ def test_timeline_size_createdropdb(neon_simple_env: NeonEnv):
                            FROM generate_series(1, 10) g
                    """)

-                    res = assert_timeline_local(client, env.initial_tenant, new_timeline_id)
+                    res = assert_local(client, env.initial_tenant, new_timeline_id)
                    local_details = res['local']
                    assert local_details["current_logical_size"] == local_details[
                        "current_logical_size_non_incremental"]

            cur.execute('DROP DATABASE foodb')

-            res = assert_timeline_local(client, env.initial_tenant, new_timeline_id)
+            res = assert_local(client, env.initial_tenant, new_timeline_id)
            local_details = res['local']
            assert local_details["current_logical_size"] == local_details[
                "current_logical_size_non_incremental"]
@@ -117,7 +117,7 @@ def test_timeline_size_quota(neon_env_builder: NeonEnvBuilder):
    new_timeline_id = env.neon_cli.create_branch('test_timeline_size_quota')

    client = env.pageserver.http_client()
-    res = assert_timeline_local(client, env.initial_tenant, new_timeline_id)
+    res = assert_local(client, env.initial_tenant, new_timeline_id)
    assert res['local']["current_logical_size"] == res['local'][
        "current_logical_size_non_incremental"]

--- a/test_runner/batch_others/test_wal_acceptor_async.py
+++ b/test_runner/batch_others/test_wal_acceptor_async.py
@@ -302,8 +302,6 @@ def test_compute_restarts(neon_env_builder: NeonEnvBuilder):


 class BackgroundCompute(object):
-    MAX_QUERY_GAP_SECONDS = 2
-
    def __init__(self, index: int, env: NeonEnv, branch: str):
        self.index = index
        self.env = env
@@ -341,7 +339,7 @@ class BackgroundCompute(object):

            # With less sleep, there is a very big chance of not committing
            # anything or only 1 xact during test run.
-            await asyncio.sleep(random.uniform(0, self.MAX_QUERY_GAP_SECONDS))
+            await asyncio.sleep(2 * random.random())
        self.running = False


@@ -358,34 +356,20 @@ async def run_concurrent_computes(env: NeonEnv,
    background_tasks = [asyncio.create_task(compute.run()) for compute in computes]

    await asyncio.sleep(run_seconds)
-    log.info("stopping all tasks but one")
    for compute in computes[1:]:
        compute.stopped = True
-    await asyncio.gather(*background_tasks[1:])
    log.info("stopped all tasks but one")

    # work for some time with only one compute -- it should be able to make some xacts
-    TIMEOUT_SECONDS = computes[0].MAX_QUERY_GAP_SECONDS + 3
-    initial_queries_by_0 = len(computes[0].successful_queries)
-    log.info(f'Waiting for another query by computes[0], '
-             f'it already had {initial_queries_by_0}, timeout is {TIMEOUT_SECONDS}s')
-    for _ in range(10 * TIMEOUT_SECONDS):
-        current_queries_by_0 = len(computes[0].successful_queries) - initial_queries_by_0
-        if current_queries_by_0 >= 1:
-            log.info(f'Found {current_queries_by_0} successful queries '
-                     f'by computes[0], completing the test')
-            break
-        await asyncio.sleep(0.1)
-    else:
-        assert False, "Timed out while waiting for another query by computes[0]"
+    await asyncio.sleep(8)
    computes[0].stopped = True

-    await asyncio.gather(background_tasks[0])
+    await asyncio.gather(*background_tasks)

    result = await exec_compute_query(env, branch, 'SELECT * FROM query_log')
    # we should have inserted something while single compute was running
-    log.info(f'Executed {len(result)} queries, {current_queries_by_0} of them '
-             f'by computes[0] after we started stopping the others')
+    assert len(result) >= 4
+    log.info(f'Executed {len(result)} queries')
    for row in result:
        log.info(f'{row[0]} {row[1]} {row[2]}')

--- a/test_runner/batch_pg_regress/test_isolation.py
+++ b/test_runner/batch_pg_regress/test_isolation.py
@@ -20,22 +20,18 @@ def test_isolation(neon_simple_env: NeonEnv, test_output_dir: Path, pg_bin, caps
    runpath = test_output_dir / 'regress'
    (runpath / 'testtablespace').mkdir(parents=True)

-    # Find the pg_isolation_regress binary
-    proc = pg_bin.run(['pg_config', '--libdir'], capture_output=True)
-    libdir = proc.stdout.decode().strip()
-    proc = pg_bin.run(['pg_config', '--bindir'], capture_output=True)
-    bindir = proc.stdout.decode().strip()
-    pg_isolation_regress = os.path.join(libdir,
-                                        'postgresql/pgxs/src/test/isolation/pg_isolation_regress')
-
    # Compute all the file locations that pg_isolation_regress will need.
+    build_path = os.path.join(pg_distrib_dir, 'build/src/test/isolation')
    src_path = os.path.join(base_dir, 'vendor/postgres/src/test/isolation')
+    bindir = os.path.join(pg_distrib_dir, 'bin')
    schedule = os.path.join(src_path, 'isolation_schedule')
+    pg_isolation_regress = os.path.join(build_path, 'pg_isolation_regress')

    pg_isolation_regress_command = [
        pg_isolation_regress,
        '--use-existing',
        '--bindir={}'.format(bindir),
+        '--dlpath={}'.format(build_path),
        '--inputdir={}'.format(src_path),
        '--schedule={}'.format(schedule),
    ]
--- a/test_runner/batch_pg_regress/test_neon_regress.py
+++ b/test_runner/batch_pg_regress/test_neon_regress.py
@@ -20,22 +20,19 @@ def test_neon_regress(neon_simple_env: NeonEnv, test_output_dir: Path, pg_bin, c
    runpath = test_output_dir / 'regress'
    (runpath / 'testtablespace').mkdir(parents=True)

-    # Find the pg_regress binary and --bindir option to pass to it.
-    proc = pg_bin.run(['pg_config', '--libdir'], capture_output=True)
-    libdir = proc.stdout.decode().strip()
-    proc = pg_bin.run(['pg_config', '--bindir'], capture_output=True)
-    bindir = proc.stdout.decode().strip()
-    pg_regress = os.path.join(libdir, 'postgresql/pgxs/src/test/regress/pg_regress')
-
    # Compute all the file locations that pg_regress will need.
    # This test runs neon specific tests
+    build_path = os.path.join(pg_distrib_dir, 'build/src/test/regress')
    src_path = os.path.join(base_dir, 'test_runner/neon_regress')
+    bindir = os.path.join(pg_distrib_dir, 'bin')
    schedule = os.path.join(src_path, 'parallel_schedule')
+    pg_regress = os.path.join(build_path, 'pg_regress')

    pg_regress_command = [
        pg_regress,
        '--use-existing',
        '--bindir={}'.format(bindir),
+        '--dlpath={}'.format(build_path),
        '--schedule={}'.format(schedule),
        '--inputdir={}'.format(src_path),
    ]
--- a/test_runner/batch_pg_regress/test_pg_regress.py
+++ b/test_runner/batch_pg_regress/test_pg_regress.py
@@ -19,23 +19,19 @@ def test_pg_regress(neon_simple_env: NeonEnv, test_output_dir: pathlib.Path, pg_
    runpath = test_output_dir / 'regress'
    (runpath / 'testtablespace').mkdir(parents=True)

-    # Find the pg_regress binary and --bindir option to pass to it.
-    proc = pg_bin.run(['pg_config', '--libdir'], capture_output=True)
-    libdir = proc.stdout.decode().strip()
-    proc = pg_bin.run(['pg_config', '--bindir'], capture_output=True)
-    bindir = proc.stdout.decode().strip()
-    pg_regress = os.path.join(libdir, 'postgresql/pgxs/src/test/regress/pg_regress')
-
    # Compute all the file locations that pg_regress will need.
+    build_path = os.path.join(pg_distrib_dir, 'build/src/test/regress')
    src_path = os.path.join(base_dir, 'vendor/postgres/src/test/regress')
+    bindir = os.path.join(pg_distrib_dir, 'bin')
    schedule = os.path.join(src_path, 'parallel_schedule')
-    dlpath = os.path.join(base_dir, 'build/src/test/regress')
+    pg_regress = os.path.join(build_path, 'pg_regress')

    pg_regress_command = [
        pg_regress,
+        '--bindir=""',
        '--use-existing',
        '--bindir={}'.format(bindir),
-        '--dlpath={}'.format(dlpath),
+        '--dlpath={}'.format(build_path),
        '--schedule={}'.format(schedule),
        '--inputdir={}'.format(src_path),
    ]
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -4,7 +4,6 @@ from dataclasses import field
 from enum import Flag, auto
 import textwrap
 from cached_property import cached_property
-import abc
 import asyncpg
 import os
 import boto3
@@ -30,7 +29,7 @@ from dataclasses import dataclass
 # Type-related stuff
 from psycopg2.extensions import connection as PgConnection
 from psycopg2.extensions import make_dsn, parse_dsn
-from typing import Any, Callable, Dict, Iterator, List, Optional, Type, TypeVar, cast, Union, Tuple
+from typing import Any, Callable, Dict, Iterator, List, Optional, TypeVar, cast, Union, Tuple
 from typing_extensions import Literal

 import requests
@@ -325,7 +324,7 @@ class PgProtocol:
        # Convert options='-c<key>=<val>' to server_settings
        if 'options' in conn_options:
            options = conn_options.pop('options')
-            for match in re.finditer(r'-c(\w*)=(\w*)', options):
+            for match in re.finditer('-c(\w*)=(\w*)', options):
                key = match.group(1)
                val = match.group(2)
                if 'server_options' in conn_options:
@@ -796,49 +795,18 @@ class NeonPageserverHttpClient(requests.Session):
    def check_status(self):
        self.get(f"http://localhost:{self.port}/v1/status").raise_for_status()

-    def tenant_list(self) -> List[Dict[Any, Any]]:
-        res = self.get(f"http://localhost:{self.port}/v1/tenant")
-        self.verbose_error(res)
-        res_json = res.json()
-        assert isinstance(res_json, list)
-        return res_json
-
-    def tenant_create(self, new_tenant_id: Optional[uuid.UUID] = None) -> uuid.UUID:
+    def timeline_attach(self, tenant_id: uuid.UUID, timeline_id: uuid.UUID):
        res = self.post(
-            f"http://localhost:{self.port}/v1/tenant",
-            json={
-                'new_tenant_id': new_tenant_id.hex if new_tenant_id else None,
-            },
+            f"http://localhost:{self.port}/v1/tenant/{tenant_id.hex}/timeline/{timeline_id.hex}/attach",
        )
        self.verbose_error(res)
-        if res.status_code == 409:
-            raise Exception(f'could not create tenant: already exists for id {new_tenant_id}')
-        new_tenant_id = res.json()
-        assert isinstance(new_tenant_id, str)
-        return uuid.UUID(new_tenant_id)

-    def tenant_attach(self, tenant_id: uuid.UUID):
-        res = self.post(f"http://localhost:{self.port}/v1/tenant/{tenant_id.hex}/attach")
+    def timeline_detach(self, tenant_id: uuid.UUID, timeline_id: uuid.UUID):
+        res = self.post(
+            f"http://localhost:{self.port}/v1/tenant/{tenant_id.hex}/timeline/{timeline_id.hex}/detach",
+        )
        self.verbose_error(res)

-    def tenant_detach(self, tenant_id: uuid.UUID):
-        res = self.post(f"http://localhost:{self.port}/v1/tenant/{tenant_id.hex}/detach")
-        self.verbose_error(res)
-
-    def tenant_status(self, tenant_id: uuid.UUID) -> Dict[Any, Any]:
-        res = self.get(f"http://localhost:{self.port}/v1/tenant/{tenant_id.hex}")
-        self.verbose_error(res)
-        res_json = res.json()
-        assert isinstance(res_json, dict)
-        return res_json
-
-    def timeline_list(self, tenant_id: uuid.UUID) -> List[Dict[str, Any]]:
-        res = self.get(f"http://localhost:{self.port}/v1/tenant/{tenant_id.hex}/timeline")
-        self.verbose_error(res)
-        res_json = res.json()
-        assert isinstance(res_json, list)
-        return res_json
-
    def timeline_create(
        self,
        tenant_id: uuid.UUID,
@@ -863,6 +831,34 @@ class NeonPageserverHttpClient(requests.Session):
        assert isinstance(res_json, dict)
        return res_json

+    def tenant_list(self) -> List[Dict[Any, Any]]:
+        res = self.get(f"http://localhost:{self.port}/v1/tenant")
+        self.verbose_error(res)
+        res_json = res.json()
+        assert isinstance(res_json, list)
+        return res_json
+
+    def tenant_create(self, new_tenant_id: Optional[uuid.UUID] = None) -> uuid.UUID:
+        res = self.post(
+            f"http://localhost:{self.port}/v1/tenant",
+            json={
+                'new_tenant_id': new_tenant_id.hex if new_tenant_id else None,
+            },
+        )
+        self.verbose_error(res)
+        if res.status_code == 409:
+            raise Exception(f'could not create tenant: already exists for id {new_tenant_id}')
+        new_tenant_id = res.json()
+        assert isinstance(new_tenant_id, str)
+        return uuid.UUID(new_tenant_id)
+
+    def timeline_list(self, tenant_id: uuid.UUID) -> List[Dict[Any, Any]]:
+        res = self.get(f"http://localhost:{self.port}/v1/tenant/{tenant_id.hex}/timeline")
+        self.verbose_error(res)
+        res_json = res.json()
+        assert isinstance(res_json, list)
+        return res_json
+
    def timeline_detail(self, tenant_id: uuid.UUID, timeline_id: uuid.UUID) -> Dict[Any, Any]:
        res = self.get(
            f"http://localhost:{self.port}/v1/tenant/{tenant_id.hex}/timeline/{timeline_id.hex}?include-non-incremental-logical-size=1"
@@ -872,14 +868,6 @@ class NeonPageserverHttpClient(requests.Session):
        assert isinstance(res_json, dict)
        return res_json

-    def timeline_delete(self, tenant_id: uuid.UUID, timeline_id: uuid.UUID):
-        res = self.delete(
-            f"http://localhost:{self.port}/v1/tenant/{tenant_id.hex}/timeline/{timeline_id.hex}")
-        self.verbose_error(res)
-        res_json = res.json()
-        assert res_json is None
-        return res_json
-
    def wal_receiver_get(self, tenant_id: uuid.UUID, timeline_id: uuid.UUID) -> Dict[Any, Any]:
        res = self.get(
            f"http://localhost:{self.port}/v1/tenant/{tenant_id.hex}/timeline/{timeline_id.hex}/wal_receiver"
@@ -909,89 +897,14 @@ TIMELINE_DATA_EXTRACTOR = re.compile(r"\s(?P<branch_name>[^\s]+)\s\[(?P<timeline
                                     re.MULTILINE)


-class AbstractNeonCli(abc.ABC):
-    """
-    A typed wrapper around an arbitrary Neon CLI tool.
-    Supports a way to run arbitrary command directly via CLI.
-    Do not use directly, use specific subclasses instead.
-    """
-    def __init__(self, env: NeonEnv):
-        self.env = env
-
-    COMMAND: str = cast(str, None)  # To be overwritten by the derived class.
-
-    def raw_cli(self,
-                arguments: List[str],
-                extra_env_vars: Optional[Dict[str, str]] = None,
-                check_return_code=True) -> 'subprocess.CompletedProcess[str]':
-        """
-        Run the command with the specified arguments.
-
-        Arguments must be in list form, e.g. ['pg', 'create']
-
-        Return both stdout and stderr, which can be accessed as
-
-        >>> result = env.neon_cli.raw_cli(...)
-        >>> assert result.stderr == ""
-        >>> log.info(result.stdout)
-
-        If `check_return_code`, on non-zero exit code logs failure and raises.
-        """
-
-        assert type(arguments) == list
-        assert type(self.COMMAND) == str
-
-        bin_neon = os.path.join(str(neon_binpath), self.COMMAND)
-
-        args = [bin_neon] + arguments
-        log.info('Running command "{}"'.format(' '.join(args)))
-        log.info(f'Running in "{self.env.repo_dir}"')
-
-        env_vars = os.environ.copy()
-        env_vars['NEON_REPO_DIR'] = str(self.env.repo_dir)
-        env_vars['POSTGRES_DISTRIB_DIR'] = str(pg_distrib_dir)
-        if self.env.rust_log_override is not None:
-            env_vars['RUST_LOG'] = self.env.rust_log_override
-        for (extra_env_key, extra_env_value) in (extra_env_vars or {}).items():
-            env_vars[extra_env_key] = extra_env_value
-
-        # Pass coverage settings
-        var = 'LLVM_PROFILE_FILE'
-        val = os.environ.get(var)
-        if val:
-            env_vars[var] = val
-
-        # Intercept CalledProcessError and print more info
-        res = subprocess.run(args,
-                             env=env_vars,
-                             check=False,
-                             universal_newlines=True,
-                             stdout=subprocess.PIPE,
-                             stderr=subprocess.PIPE)
-        if not res.returncode:
-            log.info(f"Run success: {res.stdout}")
-        elif check_return_code:
-            # this way command output will be in recorded and shown in CI in failure message
-            msg = f"""\
-            Run {res.args} failed:
-              stdout: {res.stdout}
-              stderr: {res.stderr}
-            """
-            log.info(msg)
-            raise Exception(msg) from subprocess.CalledProcessError(res.returncode,
-                                                                    res.args,
-                                                                    res.stdout,
-                                                                    res.stderr)
-        return res
-
-
-class NeonCli(AbstractNeonCli):
+class NeonCli:
    """
    A typed wrapper around the `neon` CLI tool.
    Supports main commands via typed methods and a way to run arbitrary command directly via CLI.
    """
-
-    COMMAND = 'neon_local'
+    def __init__(self, env: NeonEnv):
+        self.env = env
+        pass

    def create_tenant(self,
                      tenant_id: Optional[uuid.UUID] = None,
@@ -1262,23 +1175,69 @@ class NeonCli(AbstractNeonCli):

        return self.raw_cli(args, check_return_code=check_return_code)

+    def raw_cli(self,
+                arguments: List[str],
+                extra_env_vars: Optional[Dict[str, str]] = None,
+                check_return_code=True) -> 'subprocess.CompletedProcess[str]':
+        """
+        Run "neon" with the specified arguments.

-class WalCraft(AbstractNeonCli):
-    """
-    A typed wrapper around the `wal_craft` CLI tool.
-    Supports main commands via typed methods and a way to run arbitrary command directly via CLI.
-    """
+        Arguments must be in list form, e.g. ['pg', 'create']

-    COMMAND = 'wal_craft'
+        Return both stdout and stderr, which can be accessed as

-    def postgres_config(self) -> List[str]:
-        res = self.raw_cli(["print-postgres-config"])
-        res.check_returncode()
-        return res.stdout.split('\n')
+        >>> result = env.neon_cli.raw_cli(...)
+        >>> assert result.stderr == ""
+        >>> log.info(result.stdout)

-    def in_existing(self, type: str, connection: str) -> None:
-        res = self.raw_cli(["in-existing", type, connection])
-        res.check_returncode()
+        If `check_return_code`, on non-zero exit code logs failure and raises.
+        """
+
+        assert type(arguments) == list
+
+        bin_neon = os.path.join(str(neon_binpath), 'neon_local')
+
+        args = [bin_neon] + arguments
+        log.info('Running command "{}"'.format(' '.join(args)))
+        log.info(f'Running in "{self.env.repo_dir}"')
+
+        env_vars = os.environ.copy()
+        env_vars['NEON_REPO_DIR'] = str(self.env.repo_dir)
+        env_vars['POSTGRES_DISTRIB_DIR'] = str(pg_distrib_dir)
+        if self.env.rust_log_override is not None:
+            env_vars['RUST_LOG'] = self.env.rust_log_override
+        for (extra_env_key, extra_env_value) in (extra_env_vars or {}).items():
+            env_vars[extra_env_key] = extra_env_value
+
+        # Pass coverage settings
+        var = 'LLVM_PROFILE_FILE'
+        val = os.environ.get(var)
+        if val:
+            env_vars[var] = val
+
+        # Intercept CalledProcessError and print more info
+        res = subprocess.run(args,
+                             env=env_vars,
+                             check=False,
+                             universal_newlines=True,
+                             stdout=subprocess.PIPE,
+                             stderr=subprocess.PIPE)
+        if not res.returncode:
+            log.info(f"Run success: {res.stdout}")
+        elif check_return_code:
+            # this way command output will be in recorded and shown in CI in failure message
+            msg = f"""\
+            Run {res.args} failed:
+              stdout: {res.stdout}
+              stderr: {res.stderr}
+            """
+            log.info(msg)
+            raise Exception(msg) from subprocess.CalledProcessError(res.returncode,
+                                                                    res.args,
+                                                                    res.stdout,
+                                                                    res.stderr)
+
+        return res


 class NeonPageserver(PgProtocol):
@@ -1372,10 +1331,7 @@ class PgBin:
        env.update(env_add)
        return env

-    def run(self,
-            command: List[str],
-            env: Optional[Env] = None,
-            **kwargs) -> 'subprocess.CompletedProcess[str]':
+    def run(self, command: List[str], env: Optional[Env] = None, cwd: Optional[str] = None):
        """
        Run one of the postgres binaries.

@@ -1392,7 +1348,7 @@ class PgBin:
        self._fixpath(command)
        log.info('Running command "{}"'.format(' '.join(command)))
        env = self._build_env(env)
-        return subprocess.run(command, env=env, check=True, **kwargs)
+        subprocess.run(command, env=env, cwd=cwd, check=True)

    def run_capture(self,
                    command: List[str],
@@ -2223,22 +2179,14 @@ def wait_until(number_of_iterations: int, interval: float, func):
    raise Exception("timed out while waiting for %s" % func) from last_exception


-def assert_timeline_local(pageserver_http_client: NeonPageserverHttpClient,
-                          tenant: uuid.UUID,
-                          timeline: uuid.UUID):
+def assert_local(pageserver_http_client: NeonPageserverHttpClient,
+                 tenant: uuid.UUID,
+                 timeline: uuid.UUID):
    timeline_detail = pageserver_http_client.timeline_detail(tenant, timeline)
    assert timeline_detail.get('local', {}).get("disk_consistent_lsn"), timeline_detail
    return timeline_detail


-def assert_no_in_progress_downloads_for_tenant(
-    pageserver_http_client: NeonPageserverHttpClient,
-    tenant: uuid.UUID,
-):
-    tenant_status = pageserver_http_client.tenant_status(tenant)
-    assert tenant_status['has_in_progress_downloads'] is False, tenant_status
-
-
 def remote_consistent_lsn(pageserver_http_client: NeonPageserverHttpClient,
                          tenant: uuid.UUID,
                          timeline: uuid.UUID) -> int:
--- a/test_runner/fixtures/utils.py
+++ b/test_runner/fixtures/utils.py
@@ -83,9 +83,6 @@ def get_dir_size(path: str) -> int:
    totalbytes = 0
    for root, dirs, files in os.walk(path):
        for name in files:
-            try:
-                totalbytes += os.path.getsize(os.path.join(root, name))
-            except FileNotFoundError as e:
-                pass  # file could be concurrently removed
+            totalbytes += os.path.getsize(os.path.join(root, name))

    return totalbytes
--- a/test_runner/performance/test_branch_creation.py
+++ b/test_runner/performance/test_branch_creation.py
@@ -1,110 +0,0 @@
-import random
-import time
-import statistics
-import threading
-import timeit
-import pytest
-from typing import List
-from fixtures.benchmark_fixture import MetricReport
-from fixtures.compare_fixtures import NeonCompare
-from fixtures.log_helper import log
-
-
-def _record_branch_creation_durations(neon_compare: NeonCompare, durs: List[float]):
-    neon_compare.zenbenchmark.record("branch_creation_duration_max",
-                                     max(durs),
-                                     's',
-                                     MetricReport.LOWER_IS_BETTER)
-    neon_compare.zenbenchmark.record("branch_creation_duration_avg",
-                                     statistics.mean(durs),
-                                     's',
-                                     MetricReport.LOWER_IS_BETTER)
-    neon_compare.zenbenchmark.record("branch_creation_duration_stdev",
-                                     statistics.stdev(durs),
-                                     's',
-                                     MetricReport.LOWER_IS_BETTER)
-
-
-@pytest.mark.parametrize("n_branches", [20])
-# Test measures the latency of branch creation during a heavy [1] workload.
-#
-# [1]: to simulate a heavy workload, the test tweaks the GC and compaction settings
-# to increase the task's frequency. The test runs `pgbench` in each new branch.
-# Each branch is created from a randomly picked source branch.
-def test_branch_creation_heavy_write(neon_compare: NeonCompare, n_branches: int):
-    env = neon_compare.env
-    pg_bin = neon_compare.pg_bin
-
-    # Use aggressive GC and checkpoint settings, so GC and compaction happen more often during the test
-    tenant, _ = env.neon_cli.create_tenant(
-         conf={
-             'gc_period': '5 s',
-             'gc_horizon': f'{4 * 1024 ** 2}',
-             'checkpoint_distance': f'{2 * 1024 ** 2}',
-             'compaction_target_size': f'{1024 ** 2}',
-             'compaction_threshold': '2',
-             # set PITR interval to be small, so we can do GC
-             'pitr_interval': '5 s'
-         })
-
-    def run_pgbench(branch: str):
-        log.info(f"Start a pgbench workload on branch {branch}")
-
-        pg = env.postgres.create_start(branch, tenant_id=tenant)
-        connstr = pg.connstr()
-
-        pg_bin.run_capture(['pgbench', '-i', connstr])
-        pg_bin.run_capture(['pgbench', '-c10', '-T10', connstr])
-
-        pg.stop()
-
-    env.neon_cli.create_branch('b0', tenant_id=tenant)
-
-    threads: List[threading.Thread] = []
-    threads.append(threading.Thread(target=run_pgbench, args=('b0', ), daemon=True))
-    threads[-1].start()
-
-    branch_creation_durations = []
-    for i in range(n_branches):
-        time.sleep(1.0)
-
-        # random a source branch
-        p = random.randint(0, i)
-
-        timer = timeit.default_timer()
-        env.neon_cli.create_branch('b{}'.format(i + 1), 'b{}'.format(p), tenant_id=tenant)
-        dur = timeit.default_timer() - timer
-
-        log.info(f"Creating branch b{i+1} took {dur}s")
-        branch_creation_durations.append(dur)
-
-        threads.append(threading.Thread(target=run_pgbench, args=(f'b{i+1}', ), daemon=True))
-        threads[-1].start()
-
-    for thread in threads:
-        thread.join()
-
-    _record_branch_creation_durations(neon_compare, branch_creation_durations)
-
-
-@pytest.mark.parametrize("n_branches", [1024])
-# Test measures the latency of branch creation when creating a lot of branches.
-def test_branch_creation_many(neon_compare: NeonCompare, n_branches: int):
-    env = neon_compare.env
-
-    env.neon_cli.create_branch('b0')
-
-    pg = env.postgres.create_start('b0')
-    neon_compare.pg_bin.run_capture(['pgbench', '-i', '-s10', pg.connstr()])
-
-    branch_creation_durations = []
-
-    for i in range(n_branches):
-        # random a source branch
-        p = random.randint(0, i)
-        timer = timeit.default_timer()
-        env.neon_cli.create_branch('b{}'.format(i + 1), 'b{}'.format(p))
-        dur = timeit.default_timer() - timer
-        branch_creation_durations.append(dur)
-
-    _record_branch_creation_durations(neon_compare, branch_creation_durations)
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Dmitry Ivanov	0ce4538e67	Rework build profiles in root Cargo.toml	2022-07-08 02:08:53 +03:00
Dmitry Ivanov	7042f9d12d	Minor fixes to scripts/coverage	2022-07-08 02:08:28 +03:00