Wait for compute image before deploy in GitHub Action

We need both storage **and** compute images for deploy, because control plane picks the compute version based on the storage version. If it notices a fresh storage it may bump the compute version. And if compute image failed to build it may break things badly.
Fill build-args for Docker builds via GH Actions context
2026-03-05 17:30:38 +00:00 · 2022-07-14 11:27:16 +02:00 · 2022-07-14 10:28:15 +03:00 · 2022-07-13 21:22:44 +03:00 · 2022-07-13 15:38:22 +02:00 · 2022-07-13 14:12:11 +01:00
189 changed files with 10192 additions and 4552 deletions
--- a/.circleci/ansible/systemd/safekeeper.service
+++ b/.circleci/ansible/systemd/safekeeper.service
@@ -1,18 +0,0 @@
-[Unit]
-Description=Zenith safekeeper
-After=network.target auditd.service
-
-[Service]
-Type=simple
-User=safekeeper
-Environment=RUST_BACKTRACE=1 ZENITH_REPO_DIR=/storage/safekeeper/data LD_LIBRARY_PATH=/usr/local/lib
-ExecStart=/usr/local/bin/safekeeper -l {{ inventory_hostname }}.local:6500 --listen-http {{ inventory_hostname }}.local:7676 -p {{ first_pageserver }}:6400 -D /storage/safekeeper/data --broker-endpoints={{ etcd_endpoints }} --remote-storage='{bucket_name="{{bucket_name}}", bucket_region="{{bucket_region}}", prefix_in_bucket="wal"}'
-ExecReload=/bin/kill -HUP $MAINPID
-KillMode=mixed
-KillSignal=SIGINT
-Restart=on-failure
-TimeoutSec=10
-LimitNOFILE=30000000
-
-[Install]
-WantedBy=multi-user.target
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -5,10 +5,10 @@ executors:
    resource_class: xlarge
    docker:
      # NB: when changed, do not forget to update rust image tag in all Dockerfiles
-      - image: zimg/rust:1.58
+      - image: neondatabase/rust:1.58
  neon-executor:
    docker:
-      - image: zimg/rust:1.58
+      - image: neondatabase/rust:1.58

 jobs:
  # A job to build postgres
@@ -37,7 +37,7 @@ jobs:
          name: Restore postgres cache
          keys:
            # Restore ONLY if the rev key matches exactly
-            - v04-postgres-cache-<< parameters.build_type >>-{{ checksum "/tmp/cache-key-postgres" }}
+            - v05-postgres-cache-<< parameters.build_type >>-{{ checksum "/tmp/cache-key-postgres" }}

        # Build postgres if the restore_cache didn't find a build.
        # `make` can't figure out whether the cache is valid, since
@@ -54,7 +54,7 @@ jobs:

      - save_cache:
          name: Save postgres cache
-          key: v04-postgres-cache-<< parameters.build_type >>-{{ checksum "/tmp/cache-key-postgres" }}
+          key: v05-postgres-cache-<< parameters.build_type >>-{{ checksum "/tmp/cache-key-postgres" }}
          paths:
            - tmp_install

@@ -85,7 +85,7 @@ jobs:
          name: Restore postgres cache
          keys:
            # Restore ONLY if the rev key matches exactly
-            - v04-postgres-cache-<< parameters.build_type >>-{{ checksum "/tmp/cache-key-postgres" }}
+            - v05-postgres-cache-<< parameters.build_type >>-{{ checksum "/tmp/cache-key-postgres" }}

      - restore_cache:
          name: Restore rust cache
@@ -93,31 +93,29 @@ jobs:
            # Require an exact match. While an out of date cache might speed up the build,
            # there's no way to clean out old packages, so the cache grows every time something
            # changes.
-            - v04-rust-cache-deps-<< parameters.build_type >>-{{ checksum "Cargo.lock" }}
+            - v05-rust-cache-deps-<< parameters.build_type >>-{{ checksum "Cargo.lock" }}

        # Build the rust code, including test binaries
      - run:
          name: Rust build << parameters.build_type >>
          command: |
            if [[ $BUILD_TYPE == "debug" ]]; then
-              cov_prefix=(scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage run)
              CARGO_FLAGS=
            elif [[ $BUILD_TYPE == "release" ]]; then
-              cov_prefix=()
              CARGO_FLAGS="--release --features profiling"
            fi

            export CARGO_INCREMENTAL=0
            export CACHEPOT_BUCKET=zenith-rust-cachepot
-            export RUSTC_WRAPPER=cachepot
+            export RUSTC_WRAPPER=""
            export AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}"
            export AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}"
-            "${cov_prefix[@]}" mold -run cargo build $CARGO_FLAGS --features failpoints --bins --tests
+            mold -run cargo build $CARGO_FLAGS --features failpoints --bins --tests
            cachepot -s

      - save_cache:
          name: Save rust cache
-          key: v04-rust-cache-deps-<< parameters.build_type >>-{{ checksum "Cargo.lock" }}
+          key: v05-rust-cache-deps-<< parameters.build_type >>-{{ checksum "Cargo.lock" }}
          paths:
            - ~/.cargo/registry
            - ~/.cargo/git
@@ -128,35 +126,22 @@ jobs:
          name: cargo test
          command: |
            if [[ $BUILD_TYPE == "debug" ]]; then
-              cov_prefix=(scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage run)
              CARGO_FLAGS=
            elif [[ $BUILD_TYPE == "release" ]]; then
-              cov_prefix=()
              CARGO_FLAGS=--release
            fi

-            "${cov_prefix[@]}" cargo test $CARGO_FLAGS
+            cargo test $CARGO_FLAGS

        # Install the rust binaries, for use by test jobs
      - run:
          name: Install rust binaries
          command: |
-            if [[ $BUILD_TYPE == "debug" ]]; then
-              cov_prefix=(scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage run)
-            elif [[ $BUILD_TYPE == "release" ]]; then
-              cov_prefix=()
-            fi
-
            binaries=$(
-              "${cov_prefix[@]}" cargo metadata --format-version=1 --no-deps |
+              cargo metadata --format-version=1 --no-deps |
              jq -r '.packages[].targets[] | select(.kind | index("bin")) | .name'
            )

-            test_exe_paths=$(
-              "${cov_prefix[@]}" cargo test --message-format=json --no-run |
-              jq -r '.executable | select(. != null)'
-            )
-
            mkdir -p /tmp/zenith/bin
            mkdir -p /tmp/zenith/test_bin
            mkdir -p /tmp/zenith/etc
@@ -166,34 +151,15 @@ jobs:
              SRC=target/$BUILD_TYPE/$bin
              DST=/tmp/zenith/bin/$bin
              cp $SRC $DST
-              echo $DST >> /tmp/zenith/etc/binaries.list
            done

-            # Install test executables (for code coverage)
-            if [[ $BUILD_TYPE == "debug" ]]; then
-              for bin in $test_exe_paths; do
-                SRC=$bin
-                DST=/tmp/zenith/test_bin/$(basename $bin)
-                cp $SRC $DST
-                echo $DST >> /tmp/zenith/etc/binaries.list
-              done
-            fi
-
        # Install the postgres binaries, for use by test jobs
      - run:
          name: Install postgres binaries
          command: |
            cp -a tmp_install /tmp/zenith/pg_install

-      - run:
-          name: Merge coverage data
-          command: |
-            # This will speed up workspace uploads
-            if [[ $BUILD_TYPE == "debug" ]]; then
-              scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage merge
-            fi
-
-        # Save the rust binaries and coverage data for other jobs in this workflow.
+      # Save rust binaries for other jobs in the workflow
      - persist_to_workspace:
          root: /tmp/zenith
          paths:
@@ -286,7 +252,7 @@ jobs:
          # no_output_timeout, specified here.
          no_output_timeout: 10m
          environment:
-            - ZENITH_BIN: /tmp/zenith/bin
+            - NEON_BIN: /tmp/zenith/bin
            - POSTGRES_DISTRIB_DIR: /tmp/zenith/pg_install
            - TEST_OUTPUT: /tmp/test_output
            # this variable will be embedded in perf test report
@@ -314,12 +280,6 @@ jobs:

            export GITHUB_SHA=$CIRCLE_SHA1

-            if [[ $BUILD_TYPE == "debug" ]]; then
-              cov_prefix=(scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage run)
-            elif [[ $BUILD_TYPE == "release" ]]; then
-              cov_prefix=()
-            fi
-
            # Run the tests.
            #
            # The junit.xml file allows CircleCI to display more fine-grained test information
@@ -330,7 +290,7 @@ jobs:
            # -n4 uses four processes to run tests via pytest-xdist
            # -s is not used to prevent pytest from capturing output, because tests are running
            # in parallel and logs are mixed between different tests
-            "${cov_prefix[@]}" ./scripts/pytest \
+            ./scripts/pytest \
              --junitxml=$TEST_OUTPUT/junit.xml \
              --tb=short \
              --verbose \
@@ -359,379 +319,12 @@ jobs:
      # The store_test_results step tells CircleCI where to find the junit.xml file.
      - store_test_results:
          path: /tmp/test_output
-      - run:
-          name: Merge coverage data
-          command: |
-            # This will speed up workspace uploads
-            if [[ $BUILD_TYPE == "debug" ]]; then
-              scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage merge
-            fi
-      # Save coverage data (if any)
+      # Save data (if any)
      - persist_to_workspace:
          root: /tmp/zenith
          paths:
            - "*"

-  coverage-report:
-    executor: neon-xlarge-executor
-    steps:
-      - attach_workspace:
-          at: /tmp/zenith
-      - checkout
-      - restore_cache:
-          name: Restore rust cache
-          keys:
-            # Require an exact match. While an out of date cache might speed up the build,
-            # there's no way to clean out old packages, so the cache grows every time something
-            # changes.
-            - v04-rust-cache-deps-debug-{{ checksum "Cargo.lock" }}
-      - run:
-          name: Build coverage report
-          command: |
-            COMMIT_URL=https://github.com/neondatabase/neon/commit/$CIRCLE_SHA1
-
-            scripts/coverage \
-              --dir=/tmp/zenith/coverage report \
-              --input-objects=/tmp/zenith/etc/binaries.list \
-              --commit-url=$COMMIT_URL \
-              --format=github
-      - run:
-          name: Upload coverage report
-          command: |
-            LOCAL_REPO=$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME
-            REPORT_URL=https://neondatabase.github.io/zenith-coverage-data/$CIRCLE_SHA1
-            COMMIT_URL=https://github.com/neondatabase/neon/commit/$CIRCLE_SHA1
-
-            scripts/git-upload \
-              --repo=https://$VIP_VAP_ACCESS_TOKEN@github.com/neondatabase/zenith-coverage-data.git \
-              --message="Add code coverage for $COMMIT_URL" \
-              copy /tmp/zenith/coverage/report $CIRCLE_SHA1 # COPY FROM TO_RELATIVE
-
-            # Add link to the coverage report to the commit
-            curl -f -X POST \
-            https://api.github.com/repos/$LOCAL_REPO/statuses/$CIRCLE_SHA1 \
-            -H "Accept: application/vnd.github.v3+json" \
-            --user "$CI_ACCESS_TOKEN" \
-            --data \
-              "{
-                \"state\": \"success\",
-                \"context\": \"zenith-coverage\",
-                \"description\": \"Coverage report is ready\",
-                \"target_url\": \"$REPORT_URL\"
-              }"
-
-  # Build neondatabase/neon:latest image and push it to Docker hub
-  docker-image:
-    docker:
-      - image: cimg/base:2021.04
-    steps:
-      - checkout
-      - setup_remote_docker:
-          docker_layer_caching: true
-      - run:
-          name: Init postgres submodule
-          command: git submodule update --init --depth 1
-      - run:
-          name: Build and push Docker image
-          command: |
-            echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
-            DOCKER_TAG=$(git log --oneline|wc -l)
-            docker build \
-              --pull \
-              --build-arg GIT_VERSION=${CIRCLE_SHA1} \
-              --build-arg AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}" \
-              --build-arg AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}" \
-              --tag neondatabase/neon:${DOCKER_TAG} --tag neondatabase/neon:latest .
-            docker push neondatabase/neon:${DOCKER_TAG}
-            docker push neondatabase/neon:latest
-
-  # Build neondatabase/compute-node:latest image and push it to Docker hub
-  docker-image-compute:
-    docker:
-      - image: cimg/base:2021.04
-    steps:
-      - checkout
-      - setup_remote_docker:
-          docker_layer_caching: true
-      - run:
-          name: Build and push compute-tools Docker image
-          command: |
-            echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
-            docker build \
-              --build-arg AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}" \
-              --build-arg AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}" \
-              --tag neondatabase/compute-tools:local \
-              --tag neondatabase/compute-tools:latest \
-              -f Dockerfile.compute-tools .
-            # Only push :latest image
-            docker push neondatabase/compute-tools:latest
-      - run:
-          name: Init postgres submodule
-          command: git submodule update --init --depth 1
-      - run:
-          name: Build and push compute-node Docker image
-          command: |
-            echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
-            DOCKER_TAG=$(git log --oneline|wc -l)
-            docker build --tag neondatabase/compute-node:${DOCKER_TAG} \
-              --tag neondatabase/compute-node:latest vendor/postgres \
-              --build-arg COMPUTE_TOOLS_TAG=local
-            docker push neondatabase/compute-node:${DOCKER_TAG}
-            docker push neondatabase/compute-node:latest
-
-  # Build production neondatabase/neon:release image and push it to Docker hub
-  docker-image-release:
-    docker:
-      - image: cimg/base:2021.04
-    steps:
-      - checkout
-      - setup_remote_docker:
-          docker_layer_caching: true
-      - run:
-          name: Init postgres submodule
-          command: git submodule update --init --depth 1
-      - run:
-          name: Build and push Docker image
-          command: |
-            echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
-            DOCKER_TAG="release-$(git log --oneline|wc -l)"
-            docker build \
-              --pull \
-              --build-arg GIT_VERSION=${CIRCLE_SHA1} \
-              --build-arg AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}" \
-              --build-arg AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}" \
-              --tag neondatabase/neon:${DOCKER_TAG} --tag neondatabase/neon:release .
-            docker push neondatabase/neon:${DOCKER_TAG}
-            docker push neondatabase/neon:release
-
-  # Build production neondatabase/compute-node:release image and push it to Docker hub
-  docker-image-compute-release:
-    docker:
-      - image: cimg/base:2021.04
-    steps:
-      - checkout
-      - setup_remote_docker:
-          docker_layer_caching: true
-      - run:
-          name: Build and push compute-tools Docker image
-          command: |
-            echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
-            docker build \
-              --build-arg AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}" \
-              --build-arg AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}" \
-              --tag neondatabase/compute-tools:release \
-              --tag neondatabase/compute-tools:local \
-              -f Dockerfile.compute-tools .
-            # Only push :release image
-            docker push neondatabase/compute-tools:release
-      - run:
-          name: Init postgres submodule
-          command: git submodule update --init --depth 1
-      - run:
-          name: Build and push compute-node Docker image
-          command: |
-            echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
-            DOCKER_TAG="release-$(git log --oneline|wc -l)"
-            docker build --tag neondatabase/compute-node:${DOCKER_TAG} \
-              --tag neondatabase/compute-node:release vendor/postgres \
-              --build-arg COMPUTE_TOOLS_TAG=local
-            docker push neondatabase/compute-node:${DOCKER_TAG}
-            docker push neondatabase/compute-node:release
-
-  deploy-staging:
-    docker:
-      - image: cimg/python:3.10
-    steps:
-      - checkout
-      - setup_remote_docker
-      - run:
-          name: Setup ansible
-          command: |
-            pip install --progress-bar off --user ansible boto3
-      - run:
-          name: Redeploy
-          command: |
-            cd "$(pwd)/.circleci/ansible"
-
-            ./get_binaries.sh
-
-            echo "${TELEPORT_SSH_KEY}"  | tr -d '\n'| base64 --decode >ssh-key
-            echo "${TELEPORT_SSH_CERT}" | tr -d '\n'| base64 --decode >ssh-key-cert.pub
-            chmod 0600 ssh-key
-            ssh-add ssh-key
-            rm -f ssh-key ssh-key-cert.pub
-
-            ansible-playbook deploy.yaml -i staging.hosts
-            rm -f neon_install.tar.gz .neon_current_version
-
-  deploy-staging-proxy:
-    docker:
-      - image: cimg/base:2021.04
-    environment:
-      KUBECONFIG: .kubeconfig
-    steps:
-      - checkout
-      - run:
-          name: Store kubeconfig file
-          command: |
-            echo "${STAGING_KUBECONFIG_DATA}" | base64 --decode > ${KUBECONFIG}
-            chmod 0600 ${KUBECONFIG}
-      - run:
-          name: Setup helm v3
-          command: |
-            curl -s https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
-            helm repo add neondatabase https://neondatabase.github.io/helm-charts
-      - run:
-          name: Re-deploy proxy
-          command: |
-            DOCKER_TAG=$(git log --oneline|wc -l)
-            helm upgrade neon-proxy       neondatabase/neon-proxy --install -f .circleci/helm-values/staging.proxy.yaml --set image.tag=${DOCKER_TAG} --wait
-            helm upgrade neon-proxy-scram neondatabase/neon-proxy --install -f .circleci/helm-values/staging.proxy-scram.yaml --set image.tag=${DOCKER_TAG} --wait
-
-  deploy-neon-stress:
-    docker:
-      - image: cimg/python:3.10
-    steps:
-      - checkout
-      - setup_remote_docker
-      - run:
-          name: Setup ansible
-          command: |
-            pip install --progress-bar off --user ansible boto3
-      - run:
-          name: Redeploy
-          command: |
-            cd "$(pwd)/.circleci/ansible"
-
-            ./get_binaries.sh
-
-            echo "${TELEPORT_SSH_KEY}"  | tr -d '\n'| base64 --decode >ssh-key
-            echo "${TELEPORT_SSH_CERT}" | tr -d '\n'| base64 --decode >ssh-key-cert.pub
-            chmod 0600 ssh-key
-            ssh-add ssh-key
-            rm -f ssh-key ssh-key-cert.pub
-
-            ansible-playbook deploy.yaml -i neon-stress.hosts
-            rm -f neon_install.tar.gz .neon_current_version
-
-  deploy-neon-stress-proxy:
-    docker:
-      - image: cimg/base:2021.04
-    environment:
-      KUBECONFIG: .kubeconfig
-    steps:
-      - checkout
-      - run:
-          name: Store kubeconfig file
-          command: |
-            echo "${NEON_STRESS_KUBECONFIG_DATA}" | base64 --decode > ${KUBECONFIG}
-            chmod 0600 ${KUBECONFIG}
-      - run:
-          name: Setup helm v3
-          command: |
-            curl -s https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
-            helm repo add neondatabase https://neondatabase.github.io/helm-charts
-      - run:
-          name: Re-deploy proxy
-          command: |
-            DOCKER_TAG=$(git log --oneline|wc -l)
-            helm upgrade neon-stress-proxy       neondatabase/neon-proxy --install -f .circleci/helm-values/neon-stress.proxy.yaml --set image.tag=${DOCKER_TAG} --wait
-            helm upgrade neon-stress-proxy-scram neondatabase/neon-proxy --install -f .circleci/helm-values/neon-stress.proxy-scram.yaml --set image.tag=${DOCKER_TAG} --wait
-
-  deploy-release:
-    docker:
-      - image: cimg/python:3.10
-    steps:
-      - checkout
-      - setup_remote_docker
-      - run:
-          name: Setup ansible
-          command: |
-            pip install --progress-bar off --user ansible boto3
-      - run:
-          name: Redeploy
-          command: |
-            cd "$(pwd)/.circleci/ansible"
-
-            RELEASE=true ./get_binaries.sh
-
-            echo "${TELEPORT_SSH_KEY}"  | tr -d '\n'| base64 --decode >ssh-key
-            echo "${TELEPORT_SSH_CERT}" | tr -d '\n'| base64 --decode >ssh-key-cert.pub
-            chmod 0600 ssh-key
-            ssh-add ssh-key
-            rm -f ssh-key ssh-key-cert.pub
-
-            ansible-playbook deploy.yaml -i production.hosts
-            rm -f neon_install.tar.gz .neon_current_version
-
-  deploy-release-proxy:
-    docker:
-      - image: cimg/base:2021.04
-    environment:
-      KUBECONFIG: .kubeconfig
-    steps:
-      - checkout
-      - run:
-          name: Store kubeconfig file
-          command: |
-            echo "${PRODUCTION_KUBECONFIG_DATA}" | base64 --decode > ${KUBECONFIG}
-            chmod 0600 ${KUBECONFIG}
-      - run:
-          name: Setup helm v3
-          command: |
-            curl -s https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
-            helm repo add neondatabase https://neondatabase.github.io/helm-charts
-      - run:
-          name: Re-deploy proxy
-          command: |
-            DOCKER_TAG="release-$(git log --oneline|wc -l)"
-            helm upgrade neon-proxy       neondatabase/neon-proxy --install -f .circleci/helm-values/production.proxy.yaml --set image.tag=${DOCKER_TAG} --wait
-            helm upgrade neon-proxy-scram neondatabase/neon-proxy --install -f .circleci/helm-values/production.proxy-scram.yaml --set image.tag=${DOCKER_TAG} --wait
-
-  # Trigger a new remote CI job
-  remote-ci-trigger:
-    docker:
-      - image: cimg/base:2021.04
-    parameters:
-      remote_repo:
-        type: string
-    environment:
-      REMOTE_REPO: << parameters.remote_repo >>
-    steps:
-      - run:
-          name: Set PR's status to pending
-          command: |
-            LOCAL_REPO=$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME
-
-            curl -f -X POST \
-            https://api.github.com/repos/$LOCAL_REPO/statuses/$CIRCLE_SHA1 \
-            -H "Accept: application/vnd.github.v3+json" \
-            --user "$CI_ACCESS_TOKEN" \
-            --data \
-              "{
-                \"state\": \"pending\",
-                \"context\": \"neon-cloud-e2e\",
-                \"description\": \"[$REMOTE_REPO] Remote CI job is about to start\"
-              }"
-      - run:
-          name: Request a remote CI test
-          command: |
-            LOCAL_REPO=$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME
-
-            curl -f -X POST \
-            https://api.github.com/repos/$REMOTE_REPO/actions/workflows/testing.yml/dispatches \
-            -H "Accept: application/vnd.github.v3+json" \
-            --user "$CI_ACCESS_TOKEN" \
-            --data \
-              "{
-                \"ref\": \"main\",
-                \"inputs\": {
-                  \"ci_job_name\": \"neon-cloud-e2e\",
-                  \"commit_hash\": \"$CIRCLE_SHA1\",
-                  \"remote_repo\": \"$LOCAL_REPO\"
-                }
-              }"
-
 workflows:
  build_and_test:
    jobs:
@@ -774,120 +367,3 @@ workflows:
          save_perf_report: true
          requires:
            - build-neon-release
-      - coverage-report:
-          # Context passes credentials for gh api
-          context: CI_ACCESS_TOKEN
-          requires:
-            # TODO: consider adding more
-            - other-tests-debug
-      - docker-image:
-          # Context gives an ability to login
-          context: Docker Hub
-          # Build image only for commits to main
-          filters:
-            branches:
-              only:
-                - main
-          requires:
-            - pg_regress-tests-release
-            - other-tests-release
-      - docker-image-compute:
-          # Context gives an ability to login
-          context: Docker Hub
-          # Build image only for commits to main
-          filters:
-            branches:
-              only:
-                - main
-          requires:
-            - pg_regress-tests-release
-            - other-tests-release
-      - deploy-staging:
-          # Context gives an ability to login
-          context: Docker Hub
-          # deploy only for commits to main
-          filters:
-            branches:
-              only:
-                - main
-          requires:
-            - docker-image
-      - deploy-staging-proxy:
-          # deploy only for commits to main
-          filters:
-            branches:
-              only:
-                - main
-          requires:
-            - docker-image
-
-      - deploy-neon-stress:
-          # Context gives an ability to login
-          context: Docker Hub
-          # deploy only for commits to main
-          filters:
-            branches:
-              only:
-                - main
-          requires:
-            - docker-image
-      - deploy-neon-stress-proxy:
-          # deploy only for commits to main
-          filters:
-            branches:
-              only:
-                - main
-          requires:
-            - docker-image
-
-      - docker-image-release:
-          # Context gives an ability to login
-          context: Docker Hub
-          # Build image only for commits to main
-          filters:
-            branches:
-              only:
-                - release
-          requires:
-            - pg_regress-tests-release
-            - other-tests-release
-      - docker-image-compute-release:
-          # Context gives an ability to login
-          context: Docker Hub
-          # Build image only for commits to main
-          filters:
-            branches:
-              only:
-                - release
-          requires:
-            - pg_regress-tests-release
-            - other-tests-release
-      - deploy-release:
-          # Context gives an ability to login
-          context: Docker Hub
-          # deploy only for commits to main
-          filters:
-            branches:
-              only:
-                - release
-          requires:
-            - docker-image-release
-      - deploy-release-proxy:
-          # deploy only for commits to main
-          filters:
-            branches:
-              only:
-                - release
-          requires:
-            - docker-image-release
-      - remote-ci-trigger:
-          # Context passes credentials for gh api
-          context: CI_ACCESS_TOKEN
-          remote_repo: "neondatabase/cloud"
-          requires:
-            # XXX: Successful build doesn't mean everything is OK, but
-            # the job to be triggered takes so much time to complete (~22 min)
-            # that it's better not to wait for the commented-out steps
-            - build-neon-release
-            # - pg_regress-tests-release
-            # - other-tests-release
--- a/.dockerignore
+++ b/.dockerignore
@@ -9,8 +9,8 @@ tmp_install
 tmp_check_cli
 test_output
 .vscode
-.zenith
-integration_tests/.zenith
+.neon
+integration_tests/.neon
 .mypy_cache

 Dockerfile
--- a/.github/actions/run-python-test-set/action.yml
+++ b/.github/actions/run-python-test-set/action.yml
@@ -0,0 +1,140 @@
+name: 'Run python test'
+description: 'Runs a Neon python test set, performing all the required preparations before'
+
+inputs:
+  build_type:
+    description: 'Type of Rust (neon) and C (postgres) builds. Must be "release" or "debug".'
+    required: true
+  rust_toolchain:
+    description: 'Rust toolchain version to fetch the caches'
+    required: true
+  test_selection:
+    description: 'A python test suite to run'
+    required: true
+  extra_params:
+    description: 'Arbitrary parameters to pytest. For example "-s" to prevent capturing stdout/stderr'
+    required: false
+    default: ''
+  needs_postgres_source:
+    description: 'Set to true if the test suite requires postgres source checked out'
+    required: false
+    default: 'false'
+  run_in_parallel:
+    description: 'Whether to run tests in parallel'
+    required: false
+    default: 'true'
+  save_perf_report:
+    description: 'Whether to upload the performance report'
+    required: false
+    default: 'false'
+
+runs:
+  using: "composite"
+  steps:
+    - name: Get Neon artifact for restoration
+      uses: actions/download-artifact@v3
+      with:
+        name: neon-${{ runner.os }}-${{ inputs.build_type }}-${{ inputs.rust_toolchain }}-artifact
+        path: ./neon-artifact/
+
+    - name: Extract Neon artifact
+      shell: bash -ex {0}
+      run: |
+        mkdir -p /tmp/neon/
+        tar -xf ./neon-artifact/neon.tgz -C /tmp/neon/
+        rm -rf ./neon-artifact/
+
+    - name: Checkout
+      if: inputs.needs_postgres_source == 'true'
+      uses: actions/checkout@v3
+      with:
+        submodules: true
+        fetch-depth: 1
+
+    - name: Cache poetry deps
+      id: cache_poetry
+      uses: actions/cache@v3
+      with:
+        path: ~/.cache/pypoetry/virtualenvs
+        key: v1-${{ runner.os }}-python-deps-${{ hashFiles('poetry.lock') }}
+
+    - name: Install Python deps
+      shell: bash -ex {0}
+      run: ./scripts/pysync
+
+    - name: Run pytest
+      env:
+        NEON_BIN: /tmp/neon/bin
+        POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
+        TEST_OUTPUT: /tmp/test_output
+        # this variable will be embedded in perf test report
+        # and is needed to distinguish different environments
+        PLATFORM: github-actions-selfhosted
+      shell: bash -ex {0}
+      run: |
+        PERF_REPORT_DIR="$(realpath test_runner/perf-report-local)"
+        rm -rf $PERF_REPORT_DIR
+
+        TEST_SELECTION="test_runner/${{ inputs.test_selection }}"
+        EXTRA_PARAMS="${{ inputs.extra_params }}"
+        if [ -z "$TEST_SELECTION" ]; then
+          echo "test_selection must be set"
+          exit 1
+        fi
+        if [[ "${{ inputs.run_in_parallel }}" == "true" ]]; then
+          EXTRA_PARAMS="-n4 $EXTRA_PARAMS"
+        fi
+        if [[ "${{ inputs.save_perf_report }}" == "true" ]]; then
+          if [[ "$GITHUB_REF" == "refs/heads/main" ]]; then
+            mkdir -p "$PERF_REPORT_DIR"
+            EXTRA_PARAMS="--out-dir $PERF_REPORT_DIR $EXTRA_PARAMS"
+          fi
+        fi
+
+        if [[ "${{ inputs.build_type }}" == "debug" ]]; then
+          cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
+        elif [[ "${{ inputs.build_type }}" == "release" ]]; then
+          cov_prefix=()
+        fi
+
+        # Run the tests.
+        #
+        # The junit.xml file allows CircleCI to display more fine-grained test information
+        # in its "Tests" tab in the results page.
+        # --verbose prints name of each test (helpful when there are
+        # multiple tests in one file)
+        # -rA prints summary in the end
+        # -n4 uses four processes to run tests via pytest-xdist
+        # -s is not used to prevent pytest from capturing output, because tests are running
+        # in parallel and logs are mixed between different tests
+        "${cov_prefix[@]}" ./scripts/pytest \
+          --junitxml=$TEST_OUTPUT/junit.xml \
+          --tb=short \
+          --verbose \
+          -m "not remote_cluster" \
+          -rA $TEST_SELECTION $EXTRA_PARAMS
+
+        if [[ "${{ inputs.save_perf_report }}" == "true" ]]; then
+          if [[ "$GITHUB_REF" == "refs/heads/main" ]]; then
+            export REPORT_FROM="$PERF_REPORT_DIR"
+            export REPORT_TO=local
+            scripts/generate_and_push_perf_report.sh
+          fi
+        fi
+
+    - name: Delete all data but logs
+      shell: bash -ex {0}
+      if: always()
+      run: |
+        du -sh /tmp/test_output/*
+        find /tmp/test_output -type f ! -name "*.log" ! -name "regression.diffs" ! -name "junit.xml" ! -name "*.filediff" ! -name "*.stdout" ! -name "*.stderr" ! -name "flamegraph.svg" ! -name "*.metrics" -delete
+        du -sh /tmp/test_output/*
+
+    - name: Upload python test logs
+      if: always()
+      uses: actions/upload-artifact@v3
+      with:
+        retention-days: 7
+        if-no-files-found: error
+        name: python-test-${{ inputs.test_selection }}-${{ runner.os }}-${{ inputs.build_type }}-${{ inputs.rust_toolchain }}-logs
+        path: /tmp/test_output/
--- a/.github/actions/save-coverage-data/action.yml
+++ b/.github/actions/save-coverage-data/action.yml
@@ -0,0 +1,17 @@
+name: 'Merge and upload coverage data'
+description: 'Compresses and uploads the coverage data as an artifact'
+
+runs:
+  using: "composite"
+  steps:
+    - name: Merge coverage data
+      shell: bash -ex {0}
+      run: scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage merge
+
+    - name: Upload coverage data
+      uses: actions/upload-artifact@v3
+      with:
+        retention-days: 7
+        if-no-files-found: error
+        name: coverage-data-artifact
+        path: /tmp/coverage/
--- a/.circleci/ansible/.gitignore
+++ b/.circleci/ansible/.gitignore
--- a/.circleci/ansible/ansible.cfg
+++ b/.circleci/ansible/ansible.cfg
@@ -6,5 +6,7 @@ timeout = 30

 [ssh_connection]
 ssh_args   = -F ./ansible.ssh.cfg
-scp_if_ssh = True
+# teleport doesn't support sftp yet https://github.com/gravitational/teleport/issues/7127
+# and scp neither worked for me
+transfer_method = piped
 pipelining = True
--- a/.circleci/ansible/ansible.ssh.cfg
+++ b/.circleci/ansible/ansible.ssh.cfg
@@ -1,3 +1,7 @@
+# Remove this once https://github.com/gravitational/teleport/issues/10918 is fixed
+# (use pre 8.5 option name to cope with old ssh in CI)
+PubkeyAcceptedKeyTypes +ssh-rsa-cert-v01@openssh.com
+
 Host tele.zenith.tech
    User admin
    Port 3023
--- a/.circleci/ansible/deploy.yaml
+++ b/.circleci/ansible/deploy.yaml
@@ -57,7 +57,7 @@
      args:
        creates: "/storage/pageserver/data/tenants"
      environment:
-        ZENITH_REPO_DIR: "/storage/pageserver/data"
+        NEON_REPO_DIR: "/storage/pageserver/data"
        LD_LIBRARY_PATH: "/usr/local/lib"
      become: true
      tags:
@@ -131,7 +131,7 @@
      args:
        creates: "/storage/safekeeper/data/safekeeper.id"
      environment:
-        ZENITH_REPO_DIR: "/storage/safekeeper/data"
+        NEON_REPO_DIR: "/storage/safekeeper/data"
        LD_LIBRARY_PATH: "/usr/local/lib"
      become: true
      tags:
--- a/.circleci/ansible/get_binaries.sh
+++ b/.circleci/ansible/get_binaries.sh
--- a/.circleci/ansible/neon-stress.hosts
+++ b/.circleci/ansible/neon-stress.hosts
@@ -12,6 +12,7 @@ pageservers
 safekeepers

 [storage:vars]
+env_name = neon-stress
 console_mgmt_base_url = http://neon-stress-console.local
 bucket_name           = neon-storage-ireland
 bucket_region         = eu-west-1
--- a/.circleci/ansible/production.hosts
+++ b/.circleci/ansible/production.hosts
@@ -1,6 +1,7 @@
 [pageservers]
 #zenith-1-ps-1 console_region_id=1
 zenith-1-ps-2 console_region_id=1
+zenith-1-ps-3 console_region_id=1

 [safekeepers]
 zenith-1-sk-1 console_region_id=1
@@ -12,6 +13,7 @@ pageservers
 safekeepers

 [storage:vars]
+env_name = prod-1
 console_mgmt_base_url = http://console-release.local
 bucket_name           = zenith-storage-oregon
 bucket_region         = us-west-2
--- a/.circleci/ansible/scripts/init_pageserver.sh
+++ b/.circleci/ansible/scripts/init_pageserver.sh
--- a/.circleci/ansible/scripts/init_safekeeper.sh
+++ b/.circleci/ansible/scripts/init_safekeeper.sh
--- a/.circleci/ansible/staging.hosts
+++ b/.circleci/ansible/staging.hosts
@@ -1,6 +1,7 @@
 [pageservers]
 #zenith-us-stage-ps-1 console_region_id=27
 zenith-us-stage-ps-2 console_region_id=27
+zenith-us-stage-ps-3 console_region_id=27

 [safekeepers]
 zenith-us-stage-sk-4 console_region_id=27
@@ -12,6 +13,7 @@ pageservers
 safekeepers

 [storage:vars]
+env_name = us-stage
 console_mgmt_base_url = http://console-staging.local
 bucket_name           = zenith-staging-storage-us-east-1
 bucket_region         = us-east-1
--- a/.circleci/ansible/systemd/pageserver.service
+++ b/.circleci/ansible/systemd/pageserver.service
@@ -5,7 +5,7 @@ After=network.target auditd.service
 [Service]
 Type=simple
 User=pageserver
-Environment=RUST_BACKTRACE=1 ZENITH_REPO_DIR=/storage/pageserver LD_LIBRARY_PATH=/usr/local/lib
+Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/pageserver LD_LIBRARY_PATH=/usr/local/lib
 ExecStart=/usr/local/bin/pageserver -c "pg_distrib_dir='/usr/local'" -c "listen_pg_addr='0.0.0.0:6400'" -c "listen_http_addr='0.0.0.0:9898'" -c "broker_endpoints=['{{ etcd_endpoints }}']" -D /storage/pageserver/data
 ExecReload=/bin/kill -HUP $MAINPID
 KillMode=mixed
--- a/.github/ansible/systemd/safekeeper.service
+++ b/.github/ansible/systemd/safekeeper.service
@@ -0,0 +1,18 @@
+[Unit]
+Description=Zenith safekeeper
+After=network.target auditd.service
+
+[Service]
+Type=simple
+User=safekeeper
+Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/safekeeper/data LD_LIBRARY_PATH=/usr/local/lib
+ExecStart=/usr/local/bin/safekeeper -l {{ inventory_hostname }}.local:6500 --listen-http {{ inventory_hostname }}.local:7676 -D /storage/safekeeper/data --broker-endpoints={{ etcd_endpoints }} --remote-storage='{bucket_name="{{bucket_name}}", bucket_region="{{bucket_region}}", prefix_in_bucket="{{ env_name }}/wal"}'
+ExecReload=/bin/kill -HUP $MAINPID
+KillMode=mixed
+KillSignal=SIGINT
+Restart=on-failure
+TimeoutSec=10
+LimitNOFILE=30000000
+
+[Install]
+WantedBy=multi-user.target
--- a/.circleci/helm-values/neon-stress.proxy-scram.yaml
+++ b/.circleci/helm-values/neon-stress.proxy-scram.yaml
--- a/.circleci/helm-values/neon-stress.proxy.yaml
+++ b/.circleci/helm-values/neon-stress.proxy.yaml
--- a/.circleci/helm-values/production.proxy-scram.yaml
+++ b/.circleci/helm-values/production.proxy-scram.yaml
--- a/.circleci/helm-values/production.proxy.yaml
+++ b/.circleci/helm-values/production.proxy.yaml
--- a/.circleci/helm-values/staging.proxy-scram.yaml
+++ b/.circleci/helm-values/staging.proxy-scram.yaml
--- a/.circleci/helm-values/staging.proxy.yaml
+++ b/.circleci/helm-values/staging.proxy.yaml
--- a/.github/workflows/benchmarking.yml
+++ b/.github/workflows/benchmarking.yml
@@ -26,11 +26,11 @@ jobs:
    runs-on: [self-hosted, zenith-benchmarker]

    env:
-      POSTGRES_DISTRIB_DIR: "/usr/pgsql-13"
+      POSTGRES_DISTRIB_DIR: "/usr/pgsql-14"

    steps:
    - name: Checkout zenith repo
-      uses: actions/checkout@v2
+      uses: actions/checkout@v3

    # actions/setup-python@v2 is not working correctly on self-hosted runners
    # see https://github.com/actions/setup-python/issues/162
@@ -88,7 +88,7 @@ jobs:
        # Plus time needed to initialize the test databases.
        TEST_PG_BENCH_DURATIONS_MATRIX: "300"
        TEST_PG_BENCH_SCALES_MATRIX: "10,100"
-        PLATFORM: "zenith-staging"
+        PLATFORM: "neon-staging"
        BENCHMARK_CONNSTR: "${{ secrets.BENCHMARK_STAGING_CONNSTR }}"
        REMOTE_ENV: "1" # indicate to test harness that we do not have zenith binaries locally
      run: |
@@ -96,7 +96,7 @@ jobs:
        # since it might generate duplicates when calling ingest_perf_test_result.py
        rm -rf perf-report-staging
        mkdir -p perf-report-staging
-        ./scripts/pytest test_runner/performance/ -v -m "remote_cluster" --skip-interfering-proc-check --out-dir perf-report-staging
+        ./scripts/pytest test_runner/performance/ -v -m "remote_cluster" --skip-interfering-proc-check --out-dir perf-report-staging --timeout 3600

    - name: Submit result
      env:
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -0,0 +1,642 @@
+name: Test
+
+on:
+  push:
+    branches:
+    - main
+  pull_request:
+
+defaults:
+  run:
+    shell: bash -ex {0}
+
+concurrency:
+   group: ${{ github.workflow }}-${{ github.ref }}
+   cancel-in-progress: true
+
+env:
+  RUST_BACKTRACE: 1
+  COPT: '-Werror'
+
+jobs:
+  build-postgres:
+    runs-on: [ self-hosted, Linux, k8s-runner ]
+    strategy:
+      fail-fast: false
+      matrix:
+        build_type: [ debug, release ]
+        rust_toolchain: [ 1.58 ]
+
+    env:
+      BUILD_TYPE: ${{ matrix.build_type }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: true
+          fetch-depth: 1
+
+      - name: Set pg revision for caching
+        id: pg_ver
+        run: echo ::set-output name=pg_rev::$(git rev-parse HEAD:vendor/postgres)
+
+      - name: Cache postgres build
+        id: cache_pg
+        uses: actions/cache@v3
+        with:
+          path: tmp_install/
+          key: v1-${{ runner.os }}-${{ matrix.build_type }}-pg-${{ steps.pg_ver.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
+
+      - name: Build postgres
+        if: steps.cache_pg.outputs.cache-hit != 'true'
+        run: mold -run make postgres -j$(nproc)
+
+      # actions/cache@v3 does not allow concurrently using the same cache across job steps, so use a separate cache
+      - name: Prepare postgres artifact
+        run: tar -C tmp_install/ -czf ./pg.tgz .
+      - name: Upload postgres artifact
+        uses: actions/upload-artifact@v3
+        with:
+          retention-days: 7
+          if-no-files-found: error
+          name: postgres-${{ runner.os }}-${{ matrix.build_type }}-artifact
+          path: ./pg.tgz
+
+
+  build-neon:
+    runs-on: [ self-hosted, Linux, k8s-runner ]
+    needs: [ build-postgres ]
+    strategy:
+      fail-fast: false
+      matrix:
+        build_type: [ debug, release ]
+        rust_toolchain: [ 1.58 ]
+
+    env:
+      BUILD_TYPE: ${{ matrix.build_type }}
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: true
+          fetch-depth: 1
+
+      - name: Get postgres artifact for restoration
+        uses: actions/download-artifact@v3
+        with:
+          name: postgres-${{ runner.os }}-${{ matrix.build_type }}-artifact
+          path: ./postgres-artifact/
+      - name: Extract postgres artifact
+        run: |
+          mkdir ./tmp_install/
+          tar -xf ./postgres-artifact/pg.tgz -C ./tmp_install/
+          rm -rf ./postgres-artifact/
+
+      - name: Cache cargo deps
+        id: cache_cargo
+        uses: actions/cache@v3
+        with:
+          path: |
+            ~/.cargo/registry/
+            ~/.cargo/git/
+            target/
+          # Fall back to older versions of the key, if no cache for current Cargo.lock was found
+          key: |
+            v2-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-${{ hashFiles('Cargo.lock') }}
+            v2-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-
+
+      - name: Run cargo build
+        run: |
+          if [[ $BUILD_TYPE == "debug" ]]; then
+            cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
+            CARGO_FLAGS=
+          elif [[ $BUILD_TYPE == "release" ]]; then
+            cov_prefix=()
+            CARGO_FLAGS="--release --features profiling"
+          fi
+
+          "${cov_prefix[@]}" mold -run cargo build $CARGO_FLAGS --features failpoints --bins --tests
+
+      - name: Run cargo test
+        run: |
+          if [[ $BUILD_TYPE == "debug" ]]; then
+            cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
+            CARGO_FLAGS=
+          elif [[ $BUILD_TYPE == "release" ]]; then
+            cov_prefix=()
+            CARGO_FLAGS=--release
+          fi
+
+          "${cov_prefix[@]}" cargo test $CARGO_FLAGS
+
+      - name: Install rust binaries
+        run: |
+          if [[ $BUILD_TYPE == "debug" ]]; then
+            cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
+          elif [[ $BUILD_TYPE == "release" ]]; then
+            cov_prefix=()
+          fi
+
+          binaries=$(
+            "${cov_prefix[@]}" cargo metadata --format-version=1 --no-deps |
+            jq -r '.packages[].targets[] | select(.kind | index("bin")) | .name'
+          )
+
+          test_exe_paths=$(
+            "${cov_prefix[@]}" cargo test --message-format=json --no-run |
+            jq -r '.executable | select(. != null)'
+          )
+
+          mkdir -p /tmp/neon/bin/
+          mkdir -p /tmp/neon/test_bin/
+          mkdir -p /tmp/neon/etc/
+
+          # Keep bloated coverage data files away from the rest of the artifact
+          mkdir -p /tmp/coverage/
+
+          # Install target binaries
+          for bin in $binaries; do
+            SRC=target/$BUILD_TYPE/$bin
+            DST=/tmp/neon/bin/$bin
+            cp "$SRC" "$DST"
+          done
+
+          # Install test executables and write list of all binaries (for code coverage)
+          if [[ $BUILD_TYPE == "debug" ]]; then
+            for bin in $binaries; do
+              echo "/tmp/neon/bin/$bin" >> /tmp/coverage/binaries.list
+            done
+            for bin in $test_exe_paths; do
+              SRC=$bin
+              DST=/tmp/neon/test_bin/$(basename $bin)
+              cp "$SRC" "$DST"
+              echo "$DST" >> /tmp/coverage/binaries.list
+            done
+          fi
+
+      - name: Install postgres binaries
+        run: cp -a tmp_install /tmp/neon/pg_install
+
+      - name: Prepare neon artifact
+        run: tar -C /tmp/neon/ -czf ./neon.tgz .
+
+      - name: Upload neon binaries
+        uses: actions/upload-artifact@v3
+        with:
+          retention-days: 7
+          if-no-files-found: error
+          name: neon-${{ runner.os }}-${{ matrix.build_type }}-${{ matrix.rust_toolchain }}-artifact
+          path: ./neon.tgz
+
+      # XXX: keep this after the binaries.list is formed, so the coverage can properly work later
+      - name: Merge and upload coverage data
+        if: matrix.build_type == 'debug'
+        uses: ./.github/actions/save-coverage-data
+
+
+  pg_regress-tests:
+    runs-on: [ self-hosted, Linux, k8s-runner ]
+    needs: [ build-neon ]
+    strategy:
+      fail-fast: false
+      matrix:
+        build_type: [ debug, release ]
+        rust_toolchain: [ 1.58 ]
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: true
+          fetch-depth: 2
+
+      - name: Pytest regress tests
+        uses: ./.github/actions/run-python-test-set
+        with:
+          build_type: ${{ matrix.build_type }}
+          rust_toolchain: ${{ matrix.rust_toolchain }}
+          test_selection: batch_pg_regress
+          needs_postgres_source: true
+
+      - name: Merge and upload coverage data
+        if: matrix.build_type == 'debug'
+        uses: ./.github/actions/save-coverage-data
+
+  other-tests:
+    runs-on: [ self-hosted, Linux, k8s-runner ]
+    needs: [ build-neon ]
+    strategy:
+      fail-fast: false
+      matrix:
+        build_type: [ debug, release ]
+        rust_toolchain: [ 1.58 ]
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: true
+          fetch-depth: 2
+
+      - name: Pytest other tests
+        uses: ./.github/actions/run-python-test-set
+        with:
+          build_type: ${{ matrix.build_type }}
+          rust_toolchain: ${{ matrix.rust_toolchain }}
+          test_selection: batch_others
+
+      - name: Merge and upload coverage data
+        if: matrix.build_type == 'debug'
+        uses: ./.github/actions/save-coverage-data
+
+  benchmarks:
+    runs-on: [ self-hosted, Linux, k8s-runner ]
+    needs: [ build-neon ]
+    strategy:
+      fail-fast: false
+      matrix:
+        build_type: [ release ]
+        rust_toolchain: [ 1.58 ]
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: true
+          fetch-depth: 2
+
+      - name: Pytest benchmarks
+        uses: ./.github/actions/run-python-test-set
+        with:
+          build_type: ${{ matrix.build_type }}
+          rust_toolchain: ${{ matrix.rust_toolchain }}
+          test_selection: performance
+          run_in_parallel: false
+          save_perf_report: true
+        env:
+          VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
+          PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
+      # XXX: no coverage data handling here, since benchmarks are run on release builds,
+      # while coverage is currently collected for the debug ones
+
+  coverage-report:
+    runs-on: [ self-hosted, Linux, k8s-runner ]
+    needs: [ other-tests, pg_regress-tests ]
+    strategy:
+      fail-fast: false
+      matrix:
+        build_type: [ debug ]
+        rust_toolchain: [ 1.58 ]
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: true
+          fetch-depth: 1
+
+      - name: Restore cargo deps cache
+        id: cache_cargo
+        uses: actions/cache@v3
+        with:
+          path: |
+            ~/.cargo/registry/
+            ~/.cargo/git/
+            target/
+          key: v2-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-${{ hashFiles('Cargo.lock') }}
+
+      - name: Get Neon artifact for restoration
+        uses: actions/download-artifact@v3
+        with:
+          name: neon-${{ runner.os }}-${{ matrix.build_type }}-${{ matrix.rust_toolchain }}-artifact
+          path: ./neon-artifact/
+
+      - name: Extract Neon artifact
+        run: |
+          mkdir -p /tmp/neon/
+          tar -xf ./neon-artifact/neon.tgz -C /tmp/neon/
+          rm -rf ./neon-artifact/
+
+      - name: Restore coverage data
+        uses: actions/download-artifact@v3
+        with:
+          name: coverage-data-artifact
+          path: /tmp/coverage/
+
+      - name: Merge coverage data
+        run: scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage merge
+
+      - name: Build and upload coverage report
+        run: |
+          COMMIT_SHA=${{ github.event.pull_request.head.sha }}
+          COMMIT_SHA=${COMMIT_SHA:-${{ github.sha }}}
+          COMMIT_URL=https://github.com/${{ github.repository }}/commit/$COMMIT_SHA
+
+          scripts/coverage \
+            --dir=/tmp/coverage report \
+            --input-objects=/tmp/coverage/binaries.list \
+            --commit-url=$COMMIT_URL \
+            --format=github
+
+          REPORT_URL=https://${{ github.repository_owner }}.github.io/zenith-coverage-data/$COMMIT_SHA
+
+          scripts/git-upload \
+            --repo=https://${{ secrets.VIP_VAP_ACCESS_TOKEN }}@github.com/${{ github.repository_owner }}/zenith-coverage-data.git \
+            --message="Add code coverage for $COMMIT_URL" \
+            copy /tmp/coverage/report $COMMIT_SHA # COPY FROM TO_RELATIVE
+
+          # Add link to the coverage report to the commit
+          curl -f -X POST \
+          https://api.github.com/repos/${{ github.repository }}/statuses/$COMMIT_SHA \
+          -H "Accept: application/vnd.github.v3+json" \
+          --user "${{ secrets.CI_ACCESS_TOKEN }}" \
+          --data \
+            "{
+              \"state\": \"success\",
+              \"context\": \"neon-coverage\",
+              \"description\": \"Coverage report is ready\",
+              \"target_url\": \"$REPORT_URL\"
+            }"
+
+  trigger-e2e-tests:
+   runs-on: [ self-hosted, Linux, k8s-runner ]
+   needs: [ build-neon ]
+   steps:
+     - name: Set PR's status to pending and request a remote CI test
+       run: |
+         COMMIT_SHA=${{ github.event.pull_request.head.sha }}
+         COMMIT_SHA=${COMMIT_SHA:-${{ github.sha }}}
+
+         REMOTE_REPO="${{ github.repository_owner }}/cloud"
+
+         curl -f -X POST \
+         https://api.github.com/repos/${{ github.repository }}/statuses/$COMMIT_SHA \
+         -H "Accept: application/vnd.github.v3+json" \
+         --user "${{ secrets.CI_ACCESS_TOKEN }}" \
+         --data \
+           "{
+             \"state\": \"pending\",
+             \"context\": \"neon-cloud-e2e\",
+             \"description\": \"[$REMOTE_REPO] Remote CI job is about to start\"
+           }"
+
+         curl -f -X POST \
+         https://api.github.com/repos/$REMOTE_REPO/actions/workflows/testing.yml/dispatches \
+         -H "Accept: application/vnd.github.v3+json" \
+         --user "${{ secrets.CI_ACCESS_TOKEN }}" \
+         --data \
+           "{
+             \"ref\": \"main\",
+             \"inputs\": {
+               \"ci_job_name\": \"neon-cloud-e2e\",
+               \"commit_hash\": \"$COMMIT_SHA\",
+               \"remote_repo\": \"${{ github.repository }}\"
+             }
+           }"
+
+  docker-image:
+    runs-on: [ self-hosted, Linux, k8s-runner ]
+    needs: [ pg_regress-tests, other-tests ]
+    if: |
+      (github.ref_name == 'main' || github.ref_name == 'release') &&
+      github.event_name != 'workflow_dispatch'
+    outputs:
+      build-tag: ${{steps.build-tag.outputs.tag}}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: true
+          fetch-depth: 0
+
+      - name: Login to DockerHub
+        uses: docker/login-action@v1
+        with:
+          username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
+          password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v1
+        with:
+          driver: docker
+
+      - name: Get build tag
+        run: |
+          if [[ "$GITHUB_REF_NAME" == "main" ]]; then
+            echo "::set-output name=tag::$(git rev-list --count HEAD)"
+          elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
+            echo "::set-output name=tag::release-$(git rev-list --count HEAD)"
+          else
+            echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
+            exit 1
+          fi
+        id: build-tag
+
+      - name: Get legacy build tag
+        run: |
+          if [[ "$GITHUB_REF_NAME" == "main" ]]; then
+            echo "::set-output name=tag::latest
+          elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
+            echo "::set-output name=tag::release
+          else
+            echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
+            exit 1
+          fi
+        id: legacy-build-tag
+
+      - name: Build neon Docker image
+        uses: docker/build-push-action@v2
+        with:
+          context: .
+          build-args: |
+            GIT_VERSION="${{github.sha}}"
+            AWS_ACCESS_KEY_ID="${{secrets.CACHEPOT_AWS_ACCESS_KEY_ID}}"
+            AWS_SECRET_ACCESS_KEY="${{secrets.CACHEPOT_AWS_SECRET_ACCESS_KEY}}"
+          pull: true
+          push: true
+          tags: neondatabase/neon:${{steps.legacy-build-tag.outputs.tag}}, neondatabase/neon:${{steps.build-tag.outputs.tag}}
+
+  docker-image-compute:
+    runs-on: [ self-hosted, Linux, k8s-runner ]
+    needs: [ pg_regress-tests, other-tests ]
+    if: |
+      (github.ref_name == 'main' || github.ref_name == 'release') &&
+      github.event_name != 'workflow_dispatch'
+    outputs:
+      build-tag: ${{steps.build-tag.outputs.tag}}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: true
+          fetch-depth: 0
+
+      - name: Login to DockerHub
+        uses: docker/login-action@v1
+        with:
+          username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
+          password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v1
+        with:
+          driver: docker
+
+      - name: Get build tag
+        run: |
+          if [[ "$GITHUB_REF_NAME" == "main" ]]; then
+            echo "::set-output name=tag::$(git rev-list --count HEAD)"
+          elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
+            echo "::set-output name=tag::release-$(git rev-list --count HEAD)"
+          else
+            echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
+            exit 1
+          fi
+        id: build-tag
+
+      - name: Get legacy build tag
+        run: |
+          if [[ "$GITHUB_REF_NAME" == "main" ]]; then
+            echo "::set-output name=tag::latest
+          elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
+            echo "::set-output name=tag::release
+          else
+            echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
+            exit 1
+          fi
+        id: legacy-build-tag
+
+      - name: Build compute-tools Docker image
+        uses: docker/build-push-action@v2
+        with:
+          context: .
+          build-args: |
+            GIT_VERSION="${{github.sha}}"
+            AWS_ACCESS_KEY_ID="${{secrets.CACHEPOT_AWS_ACCESS_KEY_ID}}"
+            AWS_SECRET_ACCESS_KEY="${{secrets.CACHEPOT_AWS_SECRET_ACCESS_KEY}}"
+          push: false
+          file: Dockerfile.compute-tools
+          tags: neondatabase/compute-tools:local
+
+      - name: Push compute-tools Docker image
+        uses: docker/build-push-action@v2
+        with:
+          context: .
+          build-args: |
+            GIT_VERSION="${{github.sha}}"
+            AWS_ACCESS_KEY_ID="${{secrets.CACHEPOT_AWS_ACCESS_KEY_ID}}"
+            AWS_SECRET_ACCESS_KEY="${{secrets.CACHEPOT_AWS_SECRET_ACCESS_KEY}}"
+          push: true
+          file: Dockerfile.compute-tools
+          tags: neondatabase/compute-tools:${{steps.legacy-build-tag.outputs.tag}}
+
+      - name: Build compute-node Docker image
+        uses: docker/build-push-action@v2
+        with:
+          context: ./vendor/postgres/
+          build-args:
+            COMPUTE_TOOLS_TAG=local
+          push: true
+          tags: neondatabase/compute-node:${{steps.legacy-build-tag.outputs.tag}}, neondatabase/compute-node:${{steps.build-tag.outputs.tag}}
+
+  calculate-deploy-targets:
+    runs-on: [ self-hosted, Linux, k8s-runner ]
+    if: |
+      (github.ref_name == 'main' || github.ref_name == 'release') &&
+      github.event_name != 'workflow_dispatch'
+    outputs:
+      matrix-include: ${{ steps.set-matrix.outputs.include }}
+    steps:
+      - id: set-matrix
+        run: |
+          if [[ "$GITHUB_REF_NAME" == "main" ]]; then
+            STAGING='{"env_name": "staging", "proxy_job": "neon-proxy", "proxy_config": "staging.proxy", "kubeconfig_secret": "STAGING_KUBECONFIG_DATA"}'
+            NEON_STRESS='{"env_name": "neon-stress", "proxy_job": "neon-stress-proxy", "proxy_config": "neon-stress.proxy", "kubeconfig_secret": "NEON_STRESS_KUBECONFIG_DATA"}'
+            echo "::set-output name=include::[$STAGING, $NEON_STRESS]"
+          elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
+            PRODUCTION='{"env_name": "production", "proxy_job": "neon-proxy", "proxy_config": "production.proxy", "kubeconfig_secret": "PRODUCTION_KUBECONFIG_DATA"}'
+            echo "::set-output name=include::[$PRODUCTION]"
+          else
+            echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
+            exit 1
+          fi
+
+  deploy:
+    runs-on: [ self-hosted, Linux, k8s-runner ]
+    # We need both storage **and** compute images for deploy, because control plane
+    # picks the compute version based on the storage version. If it notices a fresh
+    # storage it may bump the compute version. And if compute image failed to build
+    # it may break things badly.
+    needs: [ docker-image, docker-image-compute, calculate-deploy-targets ]
+    if: |
+      (github.ref_name == 'main' || github.ref_name == 'release') &&
+      github.event_name != 'workflow_dispatch'
+    strategy:
+      matrix:
+        include: ${{fromJSON(needs.calculate-deploy-targets.outputs.matrix-include)}}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: true
+          fetch-depth: 0
+
+      - name: Setup ansible
+        run: |
+          pip install --progress-bar off --user ansible boto3
+
+      - name: Redeploy
+        run: |
+          cd "$(pwd)/.github/ansible"
+
+          if [[ "$GITHUB_REF_NAME" == "main" ]]; then
+            ./get_binaries.sh
+          elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
+            RELEASE=true ./get_binaries.sh
+          else
+            echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
+            exit 1
+          fi
+
+          eval $(ssh-agent)
+          echo "${{ secrets.TELEPORT_SSH_KEY }}"  | tr -d '\n'| base64 --decode >ssh-key
+          echo "${{ secrets.TELEPORT_SSH_CERT }}" | tr -d '\n'| base64 --decode >ssh-key-cert.pub
+          chmod 0600 ssh-key
+          ssh-add ssh-key
+          rm -f ssh-key ssh-key-cert.pub
+
+          ansible-playbook deploy.yaml -i ${{ matrix.env_name }}.hosts
+          rm -f neon_install.tar.gz .neon_current_version
+
+  deploy-proxy:
+    runs-on: [ self-hosted, Linux, k8s-runner ]
+    # Compute image isn't strictly required for proxy deploy, but let's still wait for it
+    # to run all deploy jobs consistently.
+    needs: [ docker-image, docker-image-compute, calculate-deploy-targets ]
+    if: |
+      (github.ref_name == 'main' || github.ref_name == 'release') &&
+      github.event_name != 'workflow_dispatch'
+    strategy:
+      matrix:
+        include: ${{fromJSON(needs.calculate-deploy-targets.outputs.matrix-include)}}
+    env:
+      KUBECONFIG: .kubeconfig
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: true
+          fetch-depth: 0
+
+      - name: Store kubeconfig file
+        run: |
+          echo "${{ secrets[matrix.kubeconfig_secret] }}" | base64 --decode > ${KUBECONFIG}
+          chmod 0600 ${KUBECONFIG}
+
+      - name: Setup helm v3
+        run: |
+          curl -s https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
+          helm repo add neondatabase https://neondatabase.github.io/helm-charts
+
+      - name: Re-deploy proxy
+        run: |
+          DOCKER_TAG=${{needs.docker-image.outputs.build-tag}}
+          helm upgrade ${{ matrix.proxy_job }}       neondatabase/neon-proxy --namespace default --install -f .github/helm-values/${{ matrix.proxy_config }}.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
+          helm upgrade ${{ matrix.proxy_job }}-scram neondatabase/neon-proxy --namespace default --install -f .github/helm-values/${{ matrix.proxy_config }}-scram.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
--- a/.github/workflows/codestyle.yml
+++ b/.github/workflows/codestyle.yml
@@ -1,4 +1,4 @@
-name: Build and Test
+name: Check code style and build

 on:
  push:
@@ -6,15 +6,27 @@ on:
    - main
  pull_request:

+defaults:
+  run:
+    shell: bash -ex {0}
+
+concurrency:
+   group: ${{ github.workflow }}-${{ github.ref }}
+   cancel-in-progress: true
+
+env:
+  RUST_BACKTRACE: 1
+
 jobs:
-  regression-check:
+  check-codestyle-rust:
    strategy:
+      fail-fast: false
      matrix:
        # If we want to duplicate this job for different
        # Rust toolchains (e.g. nightly or 1.37.0), add them here.
        rust_toolchain: [1.58]
        os: [ubuntu-latest, macos-latest]
-    timeout-minutes: 30
+    timeout-minutes: 50
    name: run regression test suite
    runs-on: ${{ matrix.os }}

@@ -92,5 +104,30 @@ jobs:
      - name: Run cargo clippy
        run: ./run_clippy.sh

-      - name: Run cargo test
-        run: cargo test --all --all-targets
+      - name: Ensure all project builds
+        run: cargo build --all --all-targets
+
+  check-codestyle-python:
+    runs-on: [ self-hosted, Linux, k8s-runner ]
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: false
+          fetch-depth: 1
+
+      - name: Cache poetry deps
+        id: cache_poetry
+        uses: actions/cache@v3
+        with:
+          path: ~/.cache/pypoetry/virtualenvs
+          key: v1-codestyle-python-deps-${{ hashFiles('poetry.lock') }}
+
+      - name: Install Python deps
+        run: ./scripts/pysync
+
+      - name: Run yapf to ensure code format
+        run: poetry run yapf --recursive --diff .
+
+      - name: Run mypy to check types
+        run: poetry run mypy .
--- a/.github/workflows/pg_clients.yml
+++ b/.github/workflows/pg_clients.yml
@@ -0,0 +1,71 @@
+name: Test Postgres client libraries
+
+on:
+  schedule:
+    # * is a special character in YAML so you have to quote this string
+    #          ┌───────────── minute (0 - 59)
+    #          │ ┌───────────── hour (0 - 23)
+    #          │ │ ┌───────────── day of the month (1 - 31)
+    #          │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
+    #          │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
+    - cron:  '23 02 * * *' # run once a day, timezone is utc
+
+  workflow_dispatch:
+
+concurrency:
+   group: ${{ github.workflow }}-${{ github.ref }}
+   cancel-in-progress: true
+
+jobs:
+  test-postgres-client-libs:
+    runs-on: [ ubuntu-latest ]
+
+    steps:
+    - name: Checkout
+      uses: actions/checkout@v3
+
+    - uses: actions/setup-python@v4
+      with:
+        python-version: 3.9
+
+    - name: Install Poetry
+      uses: snok/install-poetry@v1
+
+    - name: Cache poetry deps
+      id: cache_poetry
+      uses: actions/cache@v3
+      with:
+        path: ~/.cache/pypoetry/virtualenvs
+        key: v1-${{ runner.os }}-python-deps-${{ hashFiles('poetry.lock') }}
+
+    - name: Install Python deps
+      shell: bash -ex {0}
+      run: ./scripts/pysync
+
+    - name: Run pytest
+      env:
+        REMOTE_ENV: 1
+        BENCHMARK_CONNSTR: "${{ secrets.BENCHMARK_STAGING_CONNSTR }}"
+        TEST_OUTPUT: /tmp/test_output
+        POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
+      shell: bash -ex {0}
+      run: |
+        # Test framework expects we have psql binary;
+        # but since we don't really need it in this test, let's mock it
+        mkdir -p "$POSTGRES_DISTRIB_DIR/bin" && touch "$POSTGRES_DISTRIB_DIR/bin/psql";
+        ./scripts/pytest \
+          --junitxml=$TEST_OUTPUT/junit.xml \
+          --tb=short \
+          --verbose \
+          -m "remote_cluster" \
+          -rA "test_runner/pg_clients"
+
+    - name: Post to a Slack channel
+      if: failure()
+      id: slack
+      uses: slackapi/slack-github-action@v1
+      with:
+        channel-id: "C033QLM5P7D" # dev-staging-stream
+        slack-message: "Testing Postgres clients: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
+      env:
+        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
--- a/.gitignore
+++ b/.gitignore
@@ -5,8 +5,9 @@
 __pycache__/
 test_output/
 .vscode
-/.zenith
-/integration_tests/.zenith
+.idea
+/.neon
+/integration_tests/.neon

 # Coverage
 *.profraw
--- a/.yapfignore
+++ b/.yapfignore
@@ -6,5 +6,5 @@ target/
 tmp_install/
 __pycache__/
 test_output/
-.zenith/
+.neon/
 .git/
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -64,6 +64,45 @@ dependencies = [
 "nodrop",
 ]

+[[package]]
+name = "asn1-rs"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "30ff05a702273012438132f449575dbc804e27b2f3cbe3069aa237d26c98fa33"
+dependencies = [
+ "asn1-rs-derive",
+ "asn1-rs-impl",
+ "displaydoc",
+ "nom",
+ "num-traits",
+ "rusticata-macros",
+ "thiserror",
+ "time 0.3.9",
+]
+
+[[package]]
+name = "asn1-rs-derive"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "db8b7511298d5b7784b40b092d9e9dcd3a627a5707e4b5e507931ab0d44eeebf"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+ "synstructure",
+]
+
+[[package]]
+name = "asn1-rs-impl"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2777730b2039ac0f95f093556e61b6d26cebed5393ca6f152717777cec3a42ed"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
 [[package]]
 name = "async-stream"
 version = "0.3.3"
@@ -422,6 +461,7 @@ dependencies = [
 "tar",
 "tokio",
 "tokio-postgres",
+ "url",
 "workspace_hack",
 ]

@@ -712,6 +752,12 @@ dependencies = [
 "syn",
 ]

+[[package]]
+name = "data-encoding"
+version = "2.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3ee2393c4a91429dffb4bedf19f4d6abf27d8a732c8ce4980305d782e5426d57"
+
 [[package]]
 name = "debugid"
 version = "0.7.3"
@@ -721,6 +767,20 @@ dependencies = [
 "uuid",
 ]

+[[package]]
+name = "der-parser"
+version = "7.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fe398ac75057914d7d07307bf67dc7f3f574a26783b4fc7805a20ffa9f506e82"
+dependencies = [
+ "asn1-rs",
+ "displaydoc",
+ "nom",
+ "num-bigint",
+ "num-traits",
+ "rusticata-macros",
+]
+
 [[package]]
 name = "digest"
 version = "0.9.0"
@@ -762,6 +822,17 @@ dependencies = [
 "winapi",
 ]

+[[package]]
+name = "displaydoc"
+version = "0.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3bf95dc3f046b9da4f2d51833c0d3547d8564ef6910f5c1ed130306a75b92886"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
 [[package]]
 name = "either"
 version = "1.6.1"
@@ -1731,6 +1802,15 @@ dependencies = [
 "memchr",
 ]

+[[package]]
+name = "oid-registry"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "38e20717fa0541f39bd146692035c37bedfa532b3e5071b35761082407546b2a"
+dependencies = [
+ "asn1-rs",
+]
+
 [[package]]
 name = "once_cell"
 version = "1.10.0"
@@ -1842,6 +1922,7 @@ dependencies = [
 "tracing",
 "url",
 "utils",
+ "walkdir",
 "workspace_hack",
 ]

@@ -2070,7 +2151,7 @@ dependencies = [
 "serde",
 "thiserror",
 "utils",
- "wal_generate",
+ "wal_craft",
 "workspace_hack",
 ]

@@ -2249,6 +2330,7 @@ dependencies = [
 "url",
 "utils",
 "workspace_hack",
+ "x509-parser",
 ]

 [[package]]
@@ -2620,6 +2702,15 @@ dependencies = [
 "semver",
 ]

+[[package]]
+name = "rusticata-macros"
+version = "4.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "faf0c4a6ece9950b9abdb62b1cfcf2a68b3b67a10ba445b3bb85be2a293d0632"
+dependencies = [
+ "nom",
+]
+
 [[package]]
 name = "rustls"
 version = "0.20.4"
@@ -3059,6 +3150,18 @@ version = "0.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "20518fe4a4c9acf048008599e464deb21beeae3d3578418951a189c235a7a9a8"

+[[package]]
+name = "synstructure"
+version = "0.12.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f36bdaa60a83aca3921b5259d5400cbf5e90fc51931376a9bd4a0eb79aa7210f"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+ "unicode-xid",
+]
+
 [[package]]
 name = "tar"
 version = "0.4.38"
@@ -3650,14 +3753,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"

 [[package]]
-name = "wal_generate"
+name = "wal_craft"
 version = "0.1.0"
 dependencies = [
 "anyhow",
 "clap 3.0.14",
 "env_logger",
 "log",
+ "once_cell",
 "postgres",
+ "postgres_ffi",
 "tempfile",
 ]

@@ -3921,6 +4026,24 @@ dependencies = [
 "tracing-core",
 ]

+[[package]]
+name = "x509-parser"
+version = "0.13.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9fb9bace5b5589ffead1afb76e43e34cff39cd0f3ce7e170ae0c29e53b88eb1c"
+dependencies = [
+ "asn1-rs",
+ "base64",
+ "data-encoding",
+ "der-parser",
+ "lazy_static",
+ "nom",
+ "oid-registry",
+ "rusticata-macros",
+ "thiserror",
+ "time 0.3.9",
+]
+
 [[package]]
 name = "xattr"
 version = "0.2.2"
--- a/10
+++ b/10
@@ -1,5 +1,5 @@
 # Build Postgres
-FROM zimg/rust:1.58 AS pg-build
+FROM neondatabase/rust:1.58 AS pg-build
 WORKDIR /pg

 USER root
@@ -14,7 +14,7 @@ RUN set -e \
    && tar -C tmp_install -czf /postgres_install.tar.gz .

 # Build zenith binaries
-FROM zimg/rust:1.58 AS build
+FROM neondatabase/rust:1.58 AS build
 ARG GIT_VERSION=local

 ARG CACHEPOT_BUCKET=zenith-rust-cachepot
@@ -46,9 +46,9 @@ RUN set -e \
    && useradd -d /data zenith \
    && chown -R zenith:zenith /data

-COPY --from=build --chown=zenith:zenith /home/circleci/project/target/release/pageserver /usr/local/bin
-COPY --from=build --chown=zenith:zenith /home/circleci/project/target/release/safekeeper /usr/local/bin
-COPY --from=build --chown=zenith:zenith /home/circleci/project/target/release/proxy      /usr/local/bin
+COPY --from=build --chown=zenith:zenith /home/runner/target/release/pageserver /usr/local/bin
+COPY --from=build --chown=zenith:zenith /home/runner/target/release/safekeeper /usr/local/bin
+COPY --from=build --chown=zenith:zenith /home/runner/target/release/proxy      /usr/local/bin

 COPY --from=pg-build /pg/tmp_install/         /usr/local/
 COPY --from=pg-build /postgres_install.tar.gz /data/
--- a/Dockerfile.compute-tools
+++ b/Dockerfile.compute-tools
@@ -1,6 +1,6 @@
 # First transient image to build compute_tools binaries
 # NB: keep in sync with rust image version in .circle/config.yml
-FROM zimg/rust:1.58 AS rust-build
+FROM neondatabase/rust:1.58 AS rust-build

 ARG CACHEPOT_BUCKET=zenith-rust-cachepot
 ARG AWS_ACCESS_KEY_ID
@@ -15,4 +15,4 @@ RUN set -e \
 # Final image that only has one binary
 FROM debian:buster-slim

-COPY --from=rust-build /home/circleci/project/target/release/compute_ctl /usr/local/bin/compute_ctl
+COPY --from=rust-build /home/runner/target/release/compute_ctl /usr/local/bin/compute_ctl
--- a/43
+++ b/43
@@ -1,3 +1,8 @@
+ROOT_PROJECT_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
+
+# Where to install Postgres, default is ./tmp_install, maybe useful for package managers
+POSTGRES_INSTALL_DIR ?= $(ROOT_PROJECT_DIR)/tmp_install
+
 # Seccomp BPF is only available for Linux
 UNAME_S := $(shell uname -s)
 ifeq ($(UNAME_S),Linux)
@@ -55,55 +60,55 @@ zenith: postgres-headers
 	$(CARGO_CMD_PREFIX) cargo build $(CARGO_BUILD_FLAGS)

 ### PostgreSQL parts
-tmp_install/build/config.status:
+$(POSTGRES_INSTALL_DIR)/build/config.status:
 	+@echo "Configuring postgres build"
-	mkdir -p tmp_install/build
-	(cd tmp_install/build && \
-	../../vendor/postgres/configure CFLAGS='$(PG_CFLAGS)' \
+	mkdir -p $(POSTGRES_INSTALL_DIR)/build
+	(cd $(POSTGRES_INSTALL_DIR)/build && \
+	$(ROOT_PROJECT_DIR)/vendor/postgres/configure CFLAGS='$(PG_CFLAGS)' \
 		$(PG_CONFIGURE_OPTS) \
 		$(SECCOMP) \
-		--prefix=$(abspath tmp_install) > configure.log)
+		--prefix=$(abspath $(POSTGRES_INSTALL_DIR)) > configure.log)

 # nicer alias for running 'configure'
 .PHONY: postgres-configure
-postgres-configure: tmp_install/build/config.status
+postgres-configure: $(POSTGRES_INSTALL_DIR)/build/config.status

-# Install the PostgreSQL header files into tmp_install/include
+# Install the PostgreSQL header files into $(POSTGRES_INSTALL_DIR)/include
 .PHONY: postgres-headers
 postgres-headers: postgres-configure
 	+@echo "Installing PostgreSQL headers"
-	$(MAKE) -C tmp_install/build/src/include MAKELEVEL=0 install
+	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/src/include MAKELEVEL=0 install

 # Compile and install PostgreSQL and contrib/neon
 .PHONY: postgres
 postgres: postgres-configure \
 		  postgres-headers # to prevent `make install` conflicts with zenith's `postgres-headers`
 	+@echo "Compiling PostgreSQL"
-	$(MAKE) -C tmp_install/build MAKELEVEL=0 install
+	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build MAKELEVEL=0 install
 	+@echo "Compiling contrib/neon"
-	$(MAKE) -C tmp_install/build/contrib/neon install
+	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/contrib/neon install
 	+@echo "Compiling contrib/neon_test_utils"
-	$(MAKE) -C tmp_install/build/contrib/neon_test_utils install
+	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/contrib/neon_test_utils install
 	+@echo "Compiling pg_buffercache"
-	$(MAKE) -C tmp_install/build/contrib/pg_buffercache install
+	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/contrib/pg_buffercache install
 	+@echo "Compiling pageinspect"
-	$(MAKE) -C tmp_install/build/contrib/pageinspect install
+	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/contrib/pageinspect install


 .PHONY: postgres-clean
 postgres-clean:
-	$(MAKE) -C tmp_install/build MAKELEVEL=0 clean
+	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build MAKELEVEL=0 clean

 # This doesn't remove the effects of 'configure'.
 .PHONY: clean
 clean:
-	cd tmp_install/build && $(MAKE) clean
+	cd $(POSTGRES_INSTALL_DIR)/build && $(MAKE) clean
 	$(CARGO_CMD_PREFIX) cargo clean

 # This removes everything
 .PHONY: distclean
 distclean:
-	rm -rf tmp_install
+	rm -rf $(POSTGRES_INSTALL_DIR)
 	$(CARGO_CMD_PREFIX) cargo clean

 .PHONY: fmt
@@ -112,8 +117,4 @@ fmt:

 .PHONY: setup-pre-commit-hook
 setup-pre-commit-hook:
-	ln -s -f ../../pre-commit.py .git/hooks/pre-commit
-
-# Rebuild when any makefile changes
-# https://stackoverflow.com/questions/3871444/making-all-rules-depend-on-the-makefile-itself
-.EXTRA_PREREQS+=$(foreach mk, ${MAKEFILE_LIST},$(abspath ${mk}))
+	ln -s -f $(ROOT_PROJECT_DIR)/pre-commit.py .git/hooks/pre-commit
--- a/README.md
+++ b/README.md
@@ -29,7 +29,7 @@ Pageserver consists of:
 ## Running local installation


-#### building on Linux
+#### Installing dependencies on Linux
 1. Install build dependencies and other useful packages

 * On Ubuntu or Debian this set of packages should be sufficient to build the code:
@@ -49,18 +49,11 @@ dnf install flex bison readline-devel zlib-devel openssl-devel \
 curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
 ```

-3. Build neon and patched postgres
-```sh
-git clone --recursive https://github.com/neondatabase/neon.git
-cd neon
-make -j`nproc`
-```
-
-#### building on OSX (12.3.1)
+#### Installing dependencies on OSX (12.3.1)
 1. Install XCode and dependencies
 ```
 xcode-select --install
-brew install protobuf etcd
+brew install protobuf etcd openssl
 ```

 2. [Install Rust](https://www.rust-lang.org/tools/install)
@@ -76,11 +69,20 @@ brew install libpq
 brew link --force libpq
 ```

-4. Build neon and patched postgres
-```sh
+#### Building on Linux and OSX
+
+1. Build neon and patched postgres
+```
+# Note: The path to the neon sources can not contain a space.
+
 git clone --recursive https://github.com/neondatabase/neon.git
 cd neon
-make -j5
+
+# The preferred and default is to make a debug build. This will create a 
+# demonstrably slower build than a release build. If you want to use a release
+# build, utilize "`BUILD_TYPE=release make -j`nproc``" 
+
+make -j`nproc`
 ```

 #### dependency installation notes
@@ -93,7 +95,7 @@ Python (3.9 or higher), and install python3 packages using `./scripts/pysync` (r
 #### running neon database
 1. Start pageserver and postgres on top of it (should be called from repo root):
 ```sh
-# Create repository in .zenith with proper paths to binaries and data
+# Create repository in .neon with proper paths to binaries and data
 # Later that would be responsibility of a package install script
 > ./target/debug/neon_local init
 initializing tenantid 9ef87a5bf0d92544f6fafeeb3239695c
@@ -103,16 +105,16 @@ pageserver init succeeded

 # start pageserver and safekeeper
 > ./target/debug/neon_local start
-Starting pageserver at '127.0.0.1:64000' in '.zenith'
+Starting pageserver at '127.0.0.1:64000' in '.neon'
 Pageserver started
 initializing for sk 1 for 7676
-Starting safekeeper at '127.0.0.1:5454' in '.zenith/safekeepers/sk1'
+Starting safekeeper at '127.0.0.1:5454' in '.neon/safekeepers/sk1'
 Safekeeper started

 # start postgres compute node
 > ./target/debug/neon_local pg start main
 Starting new postgres main on timeline de200bd42b49cc1814412c7e592dd6e9 ...
-Extracting base backup to create postgres instance: path=.zenith/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/main port=55432
+Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/main port=55432
 Starting postgres node at 'host=127.0.0.1 port=55432 user=cloud_admin dbname=postgres'

 # check list of running postgres instances
@@ -149,7 +151,7 @@ Created timeline 'b3b863fa45fa9e57e615f9f2d944e601' at Lsn 0/16F9A00 for tenant:
 # start postgres on that branch
 > ./target/debug/neon_local pg start migration_check --branch-name migration_check
 Starting new postgres migration_check on timeline b3b863fa45fa9e57e615f9f2d944e601 ...
-Extracting base backup to create postgres instance: path=.zenith/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/migration_check port=55433
+Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/migration_check port=55433
 Starting postgres node at 'host=127.0.0.1 port=55433 user=cloud_admin dbname=postgres'

 # check the new list of running postgres instances
@@ -209,7 +211,7 @@ Same applies to certain spelling: i.e. we use MB to denote 1024 * 1024 bytes, wh
 To get more familiar with this aspect, refer to:

 - [Neon glossary](/docs/glossary.md)
- [PostgreSQL glossary](https://www.postgresql.org/docs/13/glossary.html)
+- [PostgreSQL glossary](https://www.postgresql.org/docs/14/glossary.html)
 - Other PostgreSQL documentation and sources (Neon fork sources can be found [here](https://github.com/neondatabase/postgres))

 ## Join the development
--- a/compute_tools/Cargo.toml
+++ b/compute_tools/Cargo.toml
@@ -18,4 +18,5 @@ serde_json = "1"
 tar = "0.4"
 tokio = { version = "1.17", features = ["macros", "rt", "rt-multi-thread"] }
 tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
+url = "2.2.2"
 workspace_hack = { version = "0.1", path = "../workspace_hack" }
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -33,7 +33,7 @@ use std::process::exit;
 use std::sync::{Arc, RwLock};
 use std::{thread, time::Duration};

-use anyhow::Result;
+use anyhow::{Context, Result};
 use chrono::Utc;
 use clap::Arg;
 use log::{error, info};
@@ -45,6 +45,7 @@ use compute_tools::monitor::launch_monitor;
 use compute_tools::params::*;
 use compute_tools::pg_helpers::*;
 use compute_tools::spec::*;
+use url::Url;

 fn main() -> Result<()> {
    // TODO: re-use `utils::logging` later
@@ -131,7 +132,7 @@ fn main() -> Result<()> {

    let compute_state = ComputeNode {
        start_time: Utc::now(),
-        connstr: connstr.to_string(),
+        connstr: Url::parse(connstr).context("cannot parse connstr as a URL")?,
        pgdata: pgdata.to_string(),
        pgbin: pgbin.to_string(),
        spec,
--- a/compute_tools/src/checker.rs
+++ b/compute_tools/src/checker.rs
@@ -1,5 +1,3 @@
-use std::sync::Arc;
-
 use anyhow::{anyhow, Result};
 use log::error;
 use postgres::Client;
@@ -23,9 +21,8 @@ pub fn create_writablity_check_data(client: &mut Client) -> Result<()> {
    Ok(())
 }

-pub async fn check_writability(compute: &Arc<ComputeNode>) -> Result<()> {
-    let connstr = &compute.connstr;
-    let (client, connection) = tokio_postgres::connect(connstr, NoTls).await?;
+pub async fn check_writability(compute: &ComputeNode) -> Result<()> {
+    let (client, connection) = tokio_postgres::connect(compute.connstr.as_str(), NoTls).await?;
    if client.is_closed() {
        return Err(anyhow!("connection to postgres closed"));
    }
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -35,7 +35,8 @@ use crate::spec::*;
 /// Compute node info shared across several `compute_ctl` threads.
 pub struct ComputeNode {
    pub start_time: DateTime<Utc>,
-    pub connstr: String,
+    // Url type maintains proper escaping
+    pub connstr: url::Url,
    pub pgdata: String,
    pub pgbin: String,
    pub spec: ComputeSpec,
@@ -268,28 +269,33 @@ impl ComputeNode {
        // In this case we need to connect with old `zenith_admin`name
        // and create new user. We cannot simply rename connected user,
        // but we can create a new one and grant it all privileges.
-        let mut client = match Client::connect(&self.connstr, NoTls) {
+        let mut client = match Client::connect(self.connstr.as_str(), NoTls) {
            Err(e) => {
                info!(
                    "cannot connect to postgres: {}, retrying with `zenith_admin` username",
                    e
                );
-                let zenith_admin_connstr = self.connstr.replacen("cloud_admin", "zenith_admin", 1);
+                let mut zenith_admin_connstr = self.connstr.clone();

-                let mut client = Client::connect(&zenith_admin_connstr, NoTls)?;
+                zenith_admin_connstr
+                    .set_username("zenith_admin")
+                    .map_err(|_| anyhow::anyhow!("invalid connstr"))?;
+
+                let mut client = Client::connect(zenith_admin_connstr.as_str(), NoTls)?;
                client.simple_query("CREATE USER cloud_admin WITH SUPERUSER")?;
                client.simple_query("GRANT zenith_admin TO cloud_admin")?;
                drop(client);

                // reconnect with connsting with expected name
-                Client::connect(&self.connstr, NoTls)?
+                Client::connect(self.connstr.as_str(), NoTls)?
            }
            Ok(client) => client,
        };

        handle_roles(&self.spec, &mut client)?;
        handle_databases(&self.spec, &mut client)?;
-        handle_grants(&self.spec, &mut client)?;
+        handle_role_deletions(self, &mut client)?;
+        handle_grants(self, &mut client)?;
        create_writablity_check_data(&mut client)?;

        // 'Close' connection
--- a/compute_tools/src/monitor.rs
+++ b/compute_tools/src/monitor.rs
@@ -13,11 +13,11 @@ const MONITOR_CHECK_INTERVAL: u64 = 500; // milliseconds
 // Spin in a loop and figure out the last activity time in the Postgres.
 // Then update it in the shared state. This function never errors out.
 // XXX: the only expected panic is at `RwLock` unwrap().
-fn watch_compute_activity(compute: &Arc<ComputeNode>) {
+fn watch_compute_activity(compute: &ComputeNode) {
    // Suppose that `connstr` doesn't change
-    let connstr = compute.connstr.clone();
+    let connstr = compute.connstr.as_str();
    // Define `client` outside of the loop to reuse existing connection if it's active.
-    let mut client = Client::connect(&connstr, NoTls);
+    let mut client = Client::connect(connstr, NoTls);
    let timeout = time::Duration::from_millis(MONITOR_CHECK_INTERVAL);

    info!("watching Postgres activity at {}", connstr);
@@ -32,7 +32,7 @@ fn watch_compute_activity(compute: &Arc<ComputeNode>) {
                    info!("connection to postgres closed, trying to reconnect");

                    // Connection is closed, reconnect and try again.
-                    client = Client::connect(&connstr, NoTls);
+                    client = Client::connect(connstr, NoTls);
                    continue;
                }

@@ -93,7 +93,7 @@ fn watch_compute_activity(compute: &Arc<ComputeNode>) {
                debug!("cannot connect to postgres: {}, retrying", e);

                // Establish a new connection and try again.
-                client = Client::connect(&connstr, NoTls);
+                client = Client::connect(connstr, NoTls);
            }
        }
    }
--- a/compute_tools/src/pg_helpers.rs
+++ b/compute_tools/src/pg_helpers.rs
@@ -1,3 +1,4 @@
+use std::fmt::Write;
 use std::fs::File;
 use std::io::{BufRead, BufReader};
 use std::net::{SocketAddr, TcpStream};
@@ -138,9 +139,11 @@ impl Role {
            // Now we also support SCRAM-SHA-256 and to preserve compatibility
            // we treat all encrypted_password as md5 unless they starts with SCRAM-SHA-256.
            if pass.starts_with("SCRAM-SHA-256") {
-                params.push_str(&format!(" PASSWORD '{}'", pass));
+                write!(params, " PASSWORD '{pass}'")
+                    .expect("String is documented to not to error during write operations");
            } else {
-                params.push_str(&format!(" PASSWORD 'md5{}'", pass));
+                write!(params, " PASSWORD 'md5{pass}'")
+                    .expect("String is documented to not to error during write operations");
            }
        } else {
            params.push_str(" PASSWORD NULL");
@@ -158,7 +161,8 @@ impl Database {
    /// it may require a proper quoting too.
    pub fn to_pg_options(&self) -> String {
        let mut params: String = self.options.as_pg_options();
-        params.push_str(&format!(" OWNER {}", &self.owner.quote()));
+        write!(params, " OWNER {}", &self.owner.quote())
+            .expect("String is documented to not to error during write operations");

        params
    }
@@ -244,18 +248,20 @@ pub fn wait_for_postgres(pg: &mut Child, port: &str, pgdata: &Path) -> Result<()
            bail!("Postgres exited unexpectedly with code {}", code);
        }

-        if pid_path.exists() {
-            let file = BufReader::new(File::open(&pid_path)?);
-            let status = file
-                .lines()
-                .last()
-                .unwrap()
-                .unwrap_or_else(|_| "unknown".to_string());
-            let can_connect = TcpStream::connect_timeout(&addr, timeout).is_ok();
+        // Check that we can open pid file first.
+        if let Ok(file) = File::open(&pid_path) {
+            let file = BufReader::new(file);
+            let last_line = file.lines().last();

-            // Now Postgres is ready to accept connections
-            if status.trim() == "ready" && can_connect {
-                break;
+            // Pid file could be there and we could read it, but it could be empty, for example.
+            if let Some(Ok(line)) = last_line {
+                let status = line.trim();
+                let can_connect = TcpStream::connect_timeout(&addr, timeout).is_ok();
+
+                // Now Postgres is ready to accept connections
+                if status == "ready" && can_connect {
+                    break;
+                }
            }
        }

--- a/compute_tools/src/spec.rs
+++ b/compute_tools/src/spec.rs
@@ -1,10 +1,12 @@
 use std::path::Path;

-use anyhow::Result;
+use anyhow::{anyhow, Result};
 use log::{info, log_enabled, warn, Level};
-use postgres::Client;
+use postgres::error::SqlState;
+use postgres::{Client, NoTls};
 use serde::Deserialize;

+use crate::compute::ComputeNode;
 use crate::config;
 use crate::params::PG_HBA_ALL_MD5;
 use crate::pg_helpers::*;
@@ -97,18 +99,13 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {

    // Process delta operations first
    if let Some(ops) = &spec.delta_operations {
-        info!("processing delta operations on roles");
+        info!("processing role renames");
        for op in ops {
            match op.action.as_ref() {
-                // We do not check either role exists or not,
-                // Postgres will take care of it for us
                "delete_role" => {
-                    let query: String = format!("DROP ROLE IF EXISTS {}", &op.name.quote());
-
-                    warn!("deleting role '{}'", &op.name);
-                    xact.execute(query.as_str(), &[])?;
+                    // no-op now, roles will be deleted at the end of configuration
                }
-                // Renaming role drops its password, since tole name is
+                // Renaming role drops its password, since role name is
                // used as a salt there.  It is important that this role
                // is recorded with a new `name` in the `roles` list.
                // Follow up roles update will set the new password.
@@ -182,7 +179,7 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
            xact.execute(query.as_str(), &[])?;

            let grant_query = format!(
-                "grant pg_read_all_data, pg_write_all_data to {}",
+                "GRANT pg_read_all_data, pg_write_all_data TO {}",
                name.quote()
            );
            xact.execute(grant_query.as_str(), &[])?;
@@ -197,6 +194,70 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
    Ok(())
 }

+/// Reassign all dependent objects and delete requested roles.
+pub fn handle_role_deletions(node: &ComputeNode, client: &mut Client) -> Result<()> {
+    let spec = &node.spec;
+
+    // First, reassign all dependent objects to db owners.
+    if let Some(ops) = &spec.delta_operations {
+        info!("reassigning dependent objects of to-be-deleted roles");
+        for op in ops {
+            if op.action == "delete_role" {
+                reassign_owned_objects(node, &op.name)?;
+            }
+        }
+    }
+
+    // Second, proceed with role deletions.
+    let mut xact = client.transaction()?;
+    if let Some(ops) = &spec.delta_operations {
+        info!("processing role deletions");
+        for op in ops {
+            // We do not check either role exists or not,
+            // Postgres will take care of it for us
+            if op.action == "delete_role" {
+                let query: String = format!("DROP ROLE IF EXISTS {}", &op.name.quote());
+
+                warn!("deleting role '{}'", &op.name);
+                xact.execute(query.as_str(), &[])?;
+            }
+        }
+    }
+
+    Ok(())
+}
+
+// Reassign all owned objects in all databases to the owner of the database.
+fn reassign_owned_objects(node: &ComputeNode, role_name: &PgIdent) -> Result<()> {
+    for db in &node.spec.cluster.databases {
+        if db.owner != *role_name {
+            let mut connstr = node.connstr.clone();
+            // database name is always the last and the only component of the path
+            connstr.set_path(&db.name);
+
+            let mut client = Client::connect(connstr.as_str(), NoTls)?;
+
+            // This will reassign all dependent objects to the db owner
+            let reassign_query = format!(
+                "REASSIGN OWNED BY {} TO {}",
+                role_name.quote(),
+                db.owner.quote()
+            );
+            info!(
+                "reassigning objects owned by '{}' in db '{}' to '{}'",
+                role_name, &db.name, &db.owner
+            );
+            client.simple_query(&reassign_query)?;
+
+            // This now will only drop privileges of the role
+            let drop_query = format!("DROP OWNED BY {}", role_name.quote());
+            client.simple_query(&drop_query)?;
+        }
+    }
+
+    Ok(())
+}
+
 /// It follows mostly the same logic as `handle_roles()` excepting that we
 /// does not use an explicit transactions block, since major database operations
 /// like `CREATE DATABASE` and `DROP DATABASE` do not support it. Statement-level
@@ -289,23 +350,66 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
    Ok(())
 }

-// Grant CREATE ON DATABASE to the database owner
-// to allow clients create trusted extensions.
-pub fn handle_grants(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
+/// Grant CREATE ON DATABASE to the database owner and do some other alters and grants
+/// to allow users creating trusted extensions and re-creating `public` schema, for example.
+pub fn handle_grants(node: &ComputeNode, client: &mut Client) -> Result<()> {
+    let spec = &node.spec;
+
    info!("cluster spec grants:");

+    // We now have a separate `web_access` role to connect to the database
+    // via the web interface and proxy link auth. And also we grant a
+    // read / write all data privilege to every role. So also grant
+    // create to everyone.
+    // XXX: later we should stop messing with Postgres ACL in such horrible
+    // ways.
+    let roles = spec
+        .cluster
+        .roles
+        .iter()
+        .map(|r| r.name.quote())
+        .collect::<Vec<_>>();
+
    for db in &spec.cluster.databases {
        let dbname = &db.name;

        let query: String = format!(
            "GRANT CREATE ON DATABASE {} TO {}",
            dbname.quote(),
-            db.owner.quote()
+            roles.join(", ")
        );
        info!("grant query {}", &query);

        client.execute(query.as_str(), &[])?;
    }

+    // Do some per-database access adjustments. We'd better do this at db creation time,
+    // but CREATE DATABASE isn't transactional. So we cannot create db + do some grants
+    // atomically.
+    let mut db_connstr = node.connstr.clone();
+    for db in &node.spec.cluster.databases {
+        // database name is always the last and the only component of the path
+        db_connstr.set_path(&db.name);
+
+        let mut db_client = Client::connect(db_connstr.as_str(), NoTls)?;
+
+        // This will only change ownership on the schema itself, not the objects
+        // inside it. Without it owner of the `public` schema will be `cloud_admin`
+        // and database owner cannot do anything with it.
+        let alter_query = format!("ALTER SCHEMA public OWNER TO {}", db.owner.quote());
+        let res = db_client.simple_query(&alter_query);
+
+        if let Err(e) = res {
+            if e.code() == Some(&SqlState::INVALID_SCHEMA_NAME) {
+                // This is OK, db just don't have a `public` schema.
+                // Probably user dropped it manually.
+                info!("no 'public' schema found in the database {}", db.name);
+            } else {
+                // Something different happened, propagate the error
+                return Err(anyhow!(e));
+            }
+        }
+    }
+
    Ok(())
 }
--- a/control_plane/src/local_env.rs
+++ b/control_plane/src/local_env.rs
@@ -21,9 +21,9 @@ use utils::{
 use crate::safekeeper::SafekeeperNode;

 //
-// This data structures represents zenith CLI config
+// This data structures represents neon_local CLI config
 //
-// It is deserialized from the .zenith/config file, or the config file passed
+// It is deserialized from the .neon/config file, or the config file passed
 // to 'zenith init --config=<path>' option. See control_plane/simple.conf for
 // an example.
 //
@@ -34,8 +34,8 @@ pub struct LocalEnv {
    // compute nodes).
    //
    // This is not stored in the config file. Rather, this is the path where the
-    // config file itself is. It is read from the ZENITH_REPO_DIR env variable or
-    // '.zenith' if not given.
+    // config file itself is. It is read from the NEON_REPO_DIR env variable or
+    // '.neon' if not given.
    #[serde(skip)]
    pub base_data_dir: PathBuf,

@@ -177,6 +177,7 @@ pub struct SafekeeperConf {
    pub sync: bool,
    pub remote_storage: Option<String>,
    pub backup_threads: Option<u32>,
+    pub auth_enabled: bool,
 }

 impl Default for SafekeeperConf {
@@ -188,6 +189,7 @@ impl Default for SafekeeperConf {
            sync: true,
            remote_storage: None,
            backup_threads: None,
+            auth_enabled: false,
        }
    }
 }
@@ -337,7 +339,7 @@ impl LocalEnv {
    pub fn persist_config(&self, base_path: &Path) -> anyhow::Result<()> {
        // Currently, the user first passes a config file with 'zenith init --config=<path>'
        // We read that in, in `create_config`, and fill any missing defaults. Then it's saved
-        // to .zenith/config. TODO: We lose any formatting and comments along the way, which is
+        // to .neon/config. TODO: We lose any formatting and comments along the way, which is
        // a bit sad.
        let mut conf_content = r#"# This file describes a locale deployment of the page server
 # and safekeeeper node. It is read by the 'zenith' command-line
@@ -401,16 +403,6 @@ impl LocalEnv {
                self.pg_distrib_dir.display()
            );
        }
-        for binary in ["pageserver", "safekeeper"] {
-            if !self.zenith_distrib_dir.join(binary).exists() {
-                bail!(
-                    "Can't find binary '{}' in zenith distrib dir '{}'",
-                    binary,
-                    self.zenith_distrib_dir.display()
-                );
-            }
-        }
-
        for binary in ["pageserver", "safekeeper"] {
            if !self.zenith_distrib_dir.join(binary).exists() {
                bail!(
@@ -419,12 +411,6 @@ impl LocalEnv {
                );
            }
        }
-        if !self.pg_distrib_dir.join("bin/postgres").exists() {
-            bail!(
-                "Can't find postgres binary at {}",
-                self.pg_distrib_dir.display()
-            );
-        }

        fs::create_dir(&base_path)?;

@@ -481,9 +467,9 @@ impl LocalEnv {
 }

 fn base_path() -> PathBuf {
-    match std::env::var_os("ZENITH_REPO_DIR") {
+    match std::env::var_os("NEON_REPO_DIR") {
        Some(val) => PathBuf::from(val),
-        None => PathBuf::from(".zenith"),
+        None => PathBuf::from(".neon"),
    }
 }

--- a/control_plane/src/safekeeper.rs
+++ b/control_plane/src/safekeeper.rs
@@ -149,6 +149,11 @@ impl SafekeeperNode {
        if let Some(ref remote_storage) = self.conf.remote_storage {
            cmd.args(&["--remote-storage", remote_storage]);
        }
+        if self.conf.auth_enabled {
+            cmd.arg("--auth-validation-public-key-path");
+            // PathBuf is better be passed as is, not via `String`.
+            cmd.arg(self.env.base_data_dir.join("auth_public_key.pem"));
+        }

        fill_aws_secrets_vars(&mut cmd);

--- a/control_plane/src/storage.rs
+++ b/control_plane/src/storage.rs
@@ -1,5 +1,6 @@
 use std::collections::HashMap;
-use std::io::Write;
+use std::fs::File;
+use std::io::{BufReader, Write};
 use std::net::TcpStream;
 use std::num::NonZeroU64;
 use std::path::PathBuf;
@@ -527,4 +528,54 @@ impl PageServerNode {

        Ok(timeline_info_response)
    }
+
+    /// Import a basebackup prepared using either:
+    /// a) `pg_basebackup -F tar`, or
+    /// b) The `fullbackup` pageserver endpoint
+    ///
+    /// # Arguments
+    /// * `tenant_id` - tenant to import into. Created if not exists
+    /// * `timeline_id` - id to assign to imported timeline
+    /// * `base` - (start lsn of basebackup, path to `base.tar` file)
+    /// * `pg_wal` - if there's any wal to import: (end lsn, path to `pg_wal.tar`)
+    pub fn timeline_import(
+        &self,
+        tenant_id: ZTenantId,
+        timeline_id: ZTimelineId,
+        base: (Lsn, PathBuf),
+        pg_wal: Option<(Lsn, PathBuf)>,
+    ) -> anyhow::Result<()> {
+        let mut client = self.pg_connection_config.connect(NoTls).unwrap();
+
+        // Init base reader
+        let (start_lsn, base_tarfile_path) = base;
+        let base_tarfile = File::open(base_tarfile_path)?;
+        let mut base_reader = BufReader::new(base_tarfile);
+
+        // Init wal reader if necessary
+        let (end_lsn, wal_reader) = if let Some((end_lsn, wal_tarfile_path)) = pg_wal {
+            let wal_tarfile = File::open(wal_tarfile_path)?;
+            let wal_reader = BufReader::new(wal_tarfile);
+            (end_lsn, Some(wal_reader))
+        } else {
+            (start_lsn, None)
+        };
+
+        // Import base
+        let import_cmd =
+            format!("import basebackup {tenant_id} {timeline_id} {start_lsn} {end_lsn}");
+        let mut writer = client.copy_in(&import_cmd)?;
+        io::copy(&mut base_reader, &mut writer)?;
+        writer.finish()?;
+
+        // Import wal if necessary
+        if let Some(mut wal_reader) = wal_reader {
+            let import_cmd = format!("import wal {tenant_id} {timeline_id} {start_lsn} {end_lsn}");
+            let mut writer = client.copy_in(&import_cmd)?;
+            io::copy(&mut wal_reader, &mut writer)?;
+            writer.finish()?;
+        }
+
+        Ok(())
+    }
 }
--- a/docs/rfcs/cluster-size-limits.md
+++ b/docs/rfcs/cluster-size-limits.md
@@ -36,12 +36,12 @@ This is how the `LOGICAL_TIMELINE_SIZE` metric is implemented in the pageserver.
 Alternatively, we could count only relation data. As in pg_database_size().
 This approach is somewhat more user-friendly because it is the data that is really affected by the user.
 On the other hand, it puts us in a weaker position than other services, i.e., RDS.
-We will need to refactor the timeline_size counter or add another counter to implement it. 
+We will need to refactor the timeline_size counter or add another counter to implement it.

 Timeline size is updated during wal digestion. It is not versioned and is valid at the last_received_lsn moment.
 Then this size should be reported to compute node.

-`current_timeline_size` value is included in the walreceiver's custom feedback message: `ZenithFeedback.`
+`current_timeline_size` value is included in the walreceiver's custom feedback message: `ReplicationFeedback.`

 (PR about protocol changes https://github.com/zenithdb/zenith/pull/1037).

@@ -64,11 +64,11 @@ We should warn users if the limit is soon to be reached.
 ### **Reliability, failure modes and corner cases**

 1. `current_timeline_size` is valid at the last received and digested by pageserver lsn.
-    
+
    If pageserver lags behind compute node, `current_timeline_size` will lag too. This lag can be tuned using backpressure, but it is not expected to be 0 all the time.
-    
+
    So transactions that happen in this lsn range may cause limit overflow. Especially operations that generate (i.e., CREATE DATABASE) or free (i.e., TRUNCATE) a lot of data pages while generating a small amount of WAL. Are there other operations like this?
-    
+
    Currently, CREATE DATABASE operations are restricted in the console. So this is not an issue.


--- a/docs/settings.md
+++ b/docs/settings.md
@@ -154,7 +154,7 @@ The default distrib dir is `./tmp_install/`.
 #### workdir (-D)

 A directory in the file system, where pageserver will store its files.
-The default is `./.zenith/`.
+The default is `./.neon/`.

 This parameter has a special CLI alias (`-D`) and can not be overridden with regular `-c` way.

--- a/libs/etcd_broker/src/lib.rs
+++ b/libs/etcd_broker/src/lib.rs
@@ -1,392 +1,209 @@
 //! A set of primitives to access a shared data/updates, propagated via etcd broker (not persistent).
 //! Intended to connect services to each other, not to store their data.
-use std::{
-    collections::{hash_map, HashMap},
-    fmt::Display,
-    str::FromStr,
-};

-use once_cell::sync::Lazy;
-use regex::{Captures, Regex};
-use serde::{Deserialize, Serialize};
-use serde_with::{serde_as, DisplayFromStr};
+/// All broker keys, that are used when dealing with etcd.
+pub mod subscription_key;
+/// All broker values, possible to use when dealing with etcd.
+pub mod subscription_value;

-pub use etcd_client::*;
+use std::str::FromStr;

+use serde::de::DeserializeOwned;
+
+use subscription_key::SubscriptionKey;
 use tokio::{sync::mpsc, task::JoinHandle};
 use tracing::*;
-use utils::{
-    lsn::Lsn,
-    zid::{NodeId, ZTenantId, ZTenantTimelineId},
-};
+
+use crate::subscription_key::SubscriptionFullKey;
+
+pub use etcd_client::*;

 /// Default value to use for prefixing to all etcd keys with.
 /// This way allows isolating safekeeper/pageserver groups in the same etcd cluster.
 pub const DEFAULT_NEON_BROKER_ETCD_PREFIX: &str = "neon";

-#[derive(Debug, Deserialize, Serialize)]
-struct SafekeeperTimeline {
-    safekeeper_id: NodeId,
-    info: SkTimelineInfo,
+/// A way to control the data retrieval from a certain subscription.
+pub struct BrokerSubscription<V> {
+    /// An unbounded channel to fetch the relevant etcd updates from.
+    pub value_updates: mpsc::UnboundedReceiver<BrokerUpdate<V>>,
+    key: SubscriptionKey,
+    /// A subscription task handle, to allow waiting on it for the task to complete.
+    /// Both the updates channel and the handle require `&mut`, so it's better to keep
+    /// both `pub` to allow using both in the same structures without borrow checker complaining.
+    pub watcher_handle: JoinHandle<Result<(), BrokerError>>,
+    watcher: Watcher,
 }

-/// Published data about safekeeper's timeline. Fields made optional for easy migrations.
-#[serde_as]
-#[derive(Debug, Clone, Deserialize, Serialize)]
-pub struct SkTimelineInfo {
-    /// Term of the last entry.
-    pub last_log_term: Option<u64>,
-    /// LSN of the last record.
-    #[serde_as(as = "Option<DisplayFromStr>")]
-    #[serde(default)]
-    pub flush_lsn: Option<Lsn>,
-    /// Up to which LSN safekeeper regards its WAL as committed.
-    #[serde_as(as = "Option<DisplayFromStr>")]
-    #[serde(default)]
-    pub commit_lsn: Option<Lsn>,
-    /// LSN up to which safekeeper has backed WAL.
-    #[serde_as(as = "Option<DisplayFromStr>")]
-    #[serde(default)]
-    pub backup_lsn: Option<Lsn>,
-    /// LSN of last checkpoint uploaded by pageserver.
-    #[serde_as(as = "Option<DisplayFromStr>")]
-    #[serde(default)]
-    pub remote_consistent_lsn: Option<Lsn>,
-    #[serde_as(as = "Option<DisplayFromStr>")]
-    #[serde(default)]
-    pub peer_horizon_lsn: Option<Lsn>,
-    #[serde(default)]
-    pub safekeeper_connstr: Option<String>,
-    #[serde(default)]
-    pub pageserver_connstr: Option<String>,
+impl<V> BrokerSubscription<V> {
+    /// Cancels the subscription, stopping the data poller and waiting for it to shut down.
+    pub async fn cancel(mut self) -> Result<(), BrokerError> {
+        self.watcher.cancel().await.map_err(|e| {
+            BrokerError::EtcdClient(
+                e,
+                format!("Failed to cancel broker subscription, kind: {:?}", self.key),
+            )
+        })?;
+        match (&mut self.watcher_handle).await {
+            Ok(res) => res,
+            Err(e) => {
+                if e.is_cancelled() {
+                    // don't error on the tasks that are cancelled already
+                    Ok(())
+                } else {
+                    Err(BrokerError::InternalError(format!(
+                        "Panicked during broker subscription task, kind: {:?}, error: {e}",
+                        self.key
+                    )))
+                }
+            }
+        }
+    }
+}
+
+impl<V> Drop for BrokerSubscription<V> {
+    fn drop(&mut self) {
+        // we poll data from etcd into the channel in the same struct, so if the whole struct gets dropped,
+        // no more data is used by the receiver and it's safe to cancel and drop the whole etcd subscription task.
+        self.watcher_handle.abort();
+    }
+}
+
+/// An update from the etcd broker.
+pub struct BrokerUpdate<V> {
+    /// Etcd generation version, the bigger the more actual the data is.
+    pub etcd_version: i64,
+    /// Etcd key for the corresponding value, parsed from the broker KV.
+    pub key: SubscriptionFullKey,
+    /// Current etcd value, parsed from the broker KV.
+    pub value: V,
 }

 #[derive(Debug, thiserror::Error)]
 pub enum BrokerError {
    #[error("Etcd client error: {0}. Context: {1}")]
    EtcdClient(etcd_client::Error, String),
-    #[error("Error during parsing etcd data: {0}")]
-    ParsingError(String),
+    #[error("Error during parsing etcd key: {0}")]
+    KeyNotParsed(String),
    #[error("Internal error: {0}")]
    InternalError(String),
 }

-/// A way to control the data retrieval from a certain subscription.
-pub struct SkTimelineSubscription {
-    safekeeper_timeline_updates:
-        mpsc::UnboundedReceiver<HashMap<ZTenantTimelineId, HashMap<NodeId, SkTimelineInfo>>>,
-    kind: SkTimelineSubscriptionKind,
-    watcher_handle: JoinHandle<Result<(), BrokerError>>,
-    watcher: Watcher,
-}
-
-impl SkTimelineSubscription {
-    /// Asynchronously polls for more data from the subscription, suspending the current future if there's no data sent yet.
-    pub async fn fetch_data(
-        &mut self,
-    ) -> Option<HashMap<ZTenantTimelineId, HashMap<NodeId, SkTimelineInfo>>> {
-        self.safekeeper_timeline_updates.recv().await
-    }
-
-    /// Cancels the subscription, stopping the data poller and waiting for it to shut down.
-    pub async fn cancel(mut self) -> Result<(), BrokerError> {
-        self.watcher.cancel().await.map_err(|e| {
-            BrokerError::EtcdClient(
-                e,
-                format!(
-                    "Failed to cancel timeline subscription, kind: {:?}",
-                    self.kind
-                ),
-            )
-        })?;
-        self.watcher_handle.await.map_err(|e| {
-            BrokerError::InternalError(format!(
-                "Failed to join the timeline updates task, kind: {:?}, error: {e}",
-                self.kind
-            ))
-        })?
-    }
-}
-
-/// The subscription kind to the timeline updates from safekeeper.
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub struct SkTimelineSubscriptionKind {
-    broker_etcd_prefix: String,
-    kind: SubscriptionKind,
-}
-
-impl SkTimelineSubscriptionKind {
-    pub fn all(broker_etcd_prefix: String) -> Self {
-        Self {
-            broker_etcd_prefix,
-            kind: SubscriptionKind::All,
-        }
-    }
-
-    pub fn tenant(broker_etcd_prefix: String, tenant: ZTenantId) -> Self {
-        Self {
-            broker_etcd_prefix,
-            kind: SubscriptionKind::Tenant(tenant),
-        }
-    }
-
-    pub fn timeline(broker_etcd_prefix: String, timeline: ZTenantTimelineId) -> Self {
-        Self {
-            broker_etcd_prefix,
-            kind: SubscriptionKind::Timeline(timeline),
-        }
-    }
-
-    /// Etcd key to use for watching a certain timeline updates from safekeepers.
-    pub fn watch_key(&self) -> String {
-        match self.kind {
-            SubscriptionKind::All => self.broker_etcd_prefix.to_string(),
-            SubscriptionKind::Tenant(tenant_id) => {
-                format!("{}/{tenant_id}/safekeeper", self.broker_etcd_prefix)
-            }
-            SubscriptionKind::Timeline(ZTenantTimelineId {
-                tenant_id,
-                timeline_id,
-            }) => format!(
-                "{}/{tenant_id}/{timeline_id}/safekeeper",
-                self.broker_etcd_prefix
-            ),
-        }
-    }
-}
-
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
-enum SubscriptionKind {
-    /// Get every timeline update.
-    All,
-    /// Get certain tenant timelines' updates.
-    Tenant(ZTenantId),
-    /// Get certain timeline updates.
-    Timeline(ZTenantTimelineId),
-}
-
 /// Creates a background task to poll etcd for timeline updates from safekeepers.
 /// Stops and returns `Err` on any error during etcd communication.
 /// Watches the key changes until either the watcher is cancelled via etcd or the subscription cancellation handle,
 /// exiting normally in such cases.
-pub async fn subscribe_to_safekeeper_timeline_updates(
+/// Etcd values are parsed as json fukes into a type, specified in the generic patameter.
+pub async fn subscribe_for_json_values<V>(
    client: &mut Client,
-    subscription: SkTimelineSubscriptionKind,
-) -> Result<SkTimelineSubscription, BrokerError> {
-    info!("Subscribing to timeline updates, subscription kind: {subscription:?}");
-    let kind = subscription.clone();
+    key: SubscriptionKey,
+) -> Result<BrokerSubscription<V>, BrokerError>
+where
+    V: DeserializeOwned + Send + 'static,
+{
+    subscribe_for_values(client, key, |_, value_str| {
+        match serde_json::from_str::<V>(value_str) {
+            Ok(value) => Some(value),
+            Err(e) => {
+                error!("Failed to parse value str '{value_str}': {e}");
+                None
+            }
+        }
+    })
+    .await
+}
+
+/// Same as [`subscribe_for_json_values`], but allows to specify a custom parser of a etcd value string.
+pub async fn subscribe_for_values<P, V>(
+    client: &mut Client,
+    key: SubscriptionKey,
+    value_parser: P,
+) -> Result<BrokerSubscription<V>, BrokerError>
+where
+    V: Send + 'static,
+    P: Fn(SubscriptionFullKey, &str) -> Option<V> + Send + 'static,
+{
+    info!("Subscribing to broker value updates, key: {key:?}");
+    let subscription_key = key.clone();

    let (watcher, mut stream) = client
-        .watch(
-            subscription.watch_key(),
-            Some(WatchOptions::new().with_prefix()),
-        )
+        .watch(key.watch_key(), Some(WatchOptions::new().with_prefix()))
        .await
        .map_err(|e| {
            BrokerError::EtcdClient(
                e,
-                format!("Failed to init the watch for subscription {subscription:?}"),
+                format!("Failed to init the watch for subscription {key:?}"),
            )
        })?;

-    let (timeline_updates_sender, safekeeper_timeline_updates) = mpsc::unbounded_channel();
+    let (value_updates_sender, value_updates_receiver) = mpsc::unbounded_channel();
    let watcher_handle = tokio::spawn(async move {
        while let Some(resp) = stream.message().await.map_err(|e| BrokerError::InternalError(format!(
-            "Failed to get messages from the subscription stream, kind: {:?}, error: {e}", subscription.kind
+            "Failed to get messages from the subscription stream, kind: {:?}, error: {e}", key.kind
        )))? {
            if resp.canceled() {
                info!("Watch for timeline updates subscription was canceled, exiting");
                break;
            }

-            let mut timeline_updates: HashMap<ZTenantTimelineId, HashMap<NodeId, SkTimelineInfo>> = HashMap::new();
-            // Keep track that the timeline data updates from etcd arrive in the right order.
-            // https://etcd.io/docs/v3.5/learning/api_guarantees/#isolation-level-and-consistency-of-replicas
-            // > etcd does not ensure linearizability for watch operations. Users are expected to verify the revision of watch responses to ensure correct ordering.
-            let mut timeline_etcd_versions: HashMap<ZTenantTimelineId, i64> = HashMap::new();
-
-
            let events = resp.events();
            debug!("Processing {} events", events.len());

            for event in events {
                if EventType::Put == event.event_type() {
                    if let Some(new_etcd_kv) = event.kv() {
-                        let new_kv_version = new_etcd_kv.version();
-                        let (key_str, value_str) = match extract_key_value_str(new_etcd_kv) {
-                            Ok(strs) => strs,
-                            Err(e) => {
-                                error!("Failed to represent etcd KV {new_etcd_kv:?} as pair of str: {e}");
-                                continue;
+                        match parse_etcd_kv(new_etcd_kv, &value_parser, &key.cluster_prefix) {
+                            Ok(Some((key, value))) => if let Err(e) = value_updates_sender.send(BrokerUpdate {
+                                etcd_version: new_etcd_kv.version(),
+                                key,
+                                value,
+                            }) {
+                                info!("Broker value updates for key {key:?} sender got dropped, exiting: {e}");
+                                break;
                            },
-                        };
-
-                        match parse_etcd_key_value(&subscription,  key_str, value_str) {
-                            Ok((zttid, timeline)) => {
-                                match timeline_updates
-                                    .entry(zttid)
-                                    .or_default()
-                                    .entry(timeline.safekeeper_id)
-                                {
-                                    hash_map::Entry::Occupied(mut o) => {
-                                        let old_etcd_kv_version = timeline_etcd_versions.get(&zttid).copied().unwrap_or(i64::MIN);
-                                        if old_etcd_kv_version < new_kv_version {
-                                            o.insert(timeline.info);
-                                            timeline_etcd_versions.insert(zttid,new_kv_version);
-                                        } else {
-                                            debug!("Skipping etcd timeline update due to older version compared to one that's already stored");
-                                        }
-                                    }
-                                    hash_map::Entry::Vacant(v) => {
-                                        v.insert(timeline.info);
-                                        timeline_etcd_versions.insert(zttid,new_kv_version);
-                                    }
-                                }
-                            }
-                            Err(e) => error!("Failed to parse timeline update: {e}"),
+                            Ok(None) => debug!("Ignoring key {key:?} : no value was returned by the parser"),
+                            Err(BrokerError::KeyNotParsed(e)) => debug!("Unexpected key {key:?} for timeline update: {e}"),
+                            Err(e) => error!("Failed to represent etcd KV {new_etcd_kv:?}: {e}"),
                        };
                    }
                }
            }
-
-            if let Err(e) = timeline_updates_sender.send(timeline_updates) {
-                info!("Timeline updates sender got dropped, exiting: {e}");
-                break;
-            }
        }

        Ok(())
    }.instrument(info_span!("etcd_broker")));

-    Ok(SkTimelineSubscription {
-        kind,
-        safekeeper_timeline_updates,
+    Ok(BrokerSubscription {
+        key: subscription_key,
+        value_updates: value_updates_receiver,
        watcher_handle,
        watcher,
    })
 }

-fn extract_key_value_str(kv: &KeyValue) -> Result<(&str, &str), BrokerError> {
-    let key = kv.key_str().map_err(|e| {
+fn parse_etcd_kv<P, V>(
+    kv: &KeyValue,
+    value_parser: &P,
+    cluster_prefix: &str,
+) -> Result<Option<(SubscriptionFullKey, V)>, BrokerError>
+where
+    P: Fn(SubscriptionFullKey, &str) -> Option<V>,
+{
+    let key_str = kv.key_str().map_err(|e| {
        BrokerError::EtcdClient(e, "Failed to extract key str out of etcd KV".to_string())
    })?;
-    let value = kv.value_str().map_err(|e| {
+    let value_str = kv.value_str().map_err(|e| {
        BrokerError::EtcdClient(e, "Failed to extract value str out of etcd KV".to_string())
    })?;
-    Ok((key, value))
-}

-static SK_TIMELINE_KEY_REGEX: Lazy<Regex> = Lazy::new(|| {
-    Regex::new("/([[:xdigit:]]+)/([[:xdigit:]]+)/safekeeper/([[:digit:]]+)$")
-        .expect("wrong regex for safekeeper timeline etcd key")
-});
-
-fn parse_etcd_key_value(
-    subscription: &SkTimelineSubscriptionKind,
-    key_str: &str,
-    value_str: &str,
-) -> Result<(ZTenantTimelineId, SafekeeperTimeline), BrokerError> {
-    let broker_prefix = subscription.broker_etcd_prefix.as_str();
-    if !key_str.starts_with(broker_prefix) {
-        return Err(BrokerError::ParsingError(format!(
-            "KV has unexpected key '{key_str}' that does not start with broker prefix {broker_prefix}"
+    if !key_str.starts_with(cluster_prefix) {
+        return Err(BrokerError::KeyNotParsed(format!(
+            "KV has unexpected key '{key_str}' that does not start with cluster prefix {cluster_prefix}"
        )));
    }

-    let key_part = &key_str[broker_prefix.len()..];
-    let key_captures = match SK_TIMELINE_KEY_REGEX.captures(key_part) {
-        Some(captures) => captures,
-        None => {
-            return Err(BrokerError::ParsingError(format!(
-                "KV has unexpected key part '{key_part}' that does not match required regex {}",
-                SK_TIMELINE_KEY_REGEX.as_str()
-            )));
-        }
-    };
-    let info = serde_json::from_str(value_str).map_err(|e| {
-        BrokerError::ParsingError(format!(
-            "Failed to parse '{value_str}' as safekeeper timeline info: {e}"
-        ))
+    let key = SubscriptionFullKey::from_str(&key_str[cluster_prefix.len()..]).map_err(|e| {
+        BrokerError::KeyNotParsed(format!("Failed to parse KV key '{key_str}': {e}"))
    })?;

-    let zttid = ZTenantTimelineId::new(
-        parse_capture(&key_captures, 1).map_err(BrokerError::ParsingError)?,
-        parse_capture(&key_captures, 2).map_err(BrokerError::ParsingError)?,
-    );
-    let safekeeper_id = NodeId(parse_capture(&key_captures, 3).map_err(BrokerError::ParsingError)?);
-
-    Ok((
-        zttid,
-        SafekeeperTimeline {
-            safekeeper_id,
-            info,
-        },
-    ))
-}
-
-fn parse_capture<T>(caps: &Captures, index: usize) -> Result<T, String>
-where
-    T: FromStr,
-    <T as FromStr>::Err: Display,
-{
-    let capture_match = caps
-        .get(index)
-        .ok_or_else(|| format!("Failed to get capture match at index {index}"))?
-        .as_str();
-    capture_match.parse().map_err(|e| {
-        format!(
-            "Failed to parse {} from {capture_match}: {e}",
-            std::any::type_name::<T>()
-        )
-    })
-}
-
-#[cfg(test)]
-mod tests {
-    use utils::zid::ZTimelineId;
-
-    use super::*;
-
-    #[test]
-    fn typical_etcd_prefix_should_be_parsed() {
-        let prefix = "neon";
-        let tenant_id = ZTenantId::generate();
-        let timeline_id = ZTimelineId::generate();
-        let all_subscription = SkTimelineSubscriptionKind {
-            broker_etcd_prefix: prefix.to_string(),
-            kind: SubscriptionKind::All,
-        };
-        let tenant_subscription = SkTimelineSubscriptionKind {
-            broker_etcd_prefix: prefix.to_string(),
-            kind: SubscriptionKind::Tenant(tenant_id),
-        };
-        let timeline_subscription = SkTimelineSubscriptionKind {
-            broker_etcd_prefix: prefix.to_string(),
-            kind: SubscriptionKind::Timeline(ZTenantTimelineId::new(tenant_id, timeline_id)),
-        };
-
-        let typical_etcd_kv_strs = [
-            (
-                format!("{prefix}/{tenant_id}/{timeline_id}/safekeeper/1"),
-                r#"{"last_log_term":231,"flush_lsn":"0/241BB70","commit_lsn":"0/241BB70","backup_lsn":"0/2000000","remote_consistent_lsn":"0/0","peer_horizon_lsn":"0/16960E8","safekeeper_connstr":"something.local:1234","pageserver_connstr":"postgresql://(null):@somethine.else.local:3456"}"#,
-            ),
-            (
-                format!("{prefix}/{tenant_id}/{timeline_id}/safekeeper/13"),
-                r#"{"last_log_term":231,"flush_lsn":"0/241BB70","commit_lsn":"0/241BB70","backup_lsn":"0/2000000","remote_consistent_lsn":"0/0","peer_horizon_lsn":"0/16960E8","safekeeper_connstr":"something.local:1234","pageserver_connstr":"postgresql://(null):@somethine.else.local:3456"}"#,
-            ),
-        ];
-
-        for (key_string, value_str) in typical_etcd_kv_strs {
-            for subscription in [
-                &all_subscription,
-                &tenant_subscription,
-                &timeline_subscription,
-            ] {
-                let (id, _timeline) =
-                    parse_etcd_key_value(subscription, &key_string, value_str)
-                        .unwrap_or_else(|e| panic!("Should be able to parse etcd key string '{key_string}' and etcd value string '{value_str}' for subscription {subscription:?}, but got: {e}"));
-                assert_eq!(id, ZTenantTimelineId::new(tenant_id, timeline_id));
-            }
-        }
-    }
+    Ok(value_parser(key, value_str).map(|value| (key, value)))
 }
--- a/libs/etcd_broker/src/subscription_key.rs
+++ b/libs/etcd_broker/src/subscription_key.rs
@@ -0,0 +1,310 @@
+//! Etcd broker keys, used in the project and shared between instances.
+//! The keys are split into two categories:
+//!
+//! * [`SubscriptionFullKey`] full key format: `<cluster_prefix>/<tenant>/<timeline>/<node_kind>/<operation>/<node_id>`
+//! Always returned from etcd in this form, always start with the user key provided.
+//!
+//! * [`SubscriptionKey`] user input key format: always partial, since it's unknown which `node_id`'s are available.
+//! Full key always starts with the user input one, due to etcd subscription properties.
+
+use std::{fmt::Display, str::FromStr};
+
+use once_cell::sync::Lazy;
+use regex::{Captures, Regex};
+use utils::zid::{NodeId, ZTenantId, ZTenantTimelineId};
+
+/// The subscription kind to the timeline updates from safekeeper.
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct SubscriptionKey {
+    /// Generic cluster prefix, allowing to use the same etcd instance by multiple logic groups.
+    pub cluster_prefix: String,
+    /// The subscription kind.
+    pub kind: SubscriptionKind,
+}
+
+/// All currently possible key kinds of a etcd broker subscription.
+/// Etcd works so, that every key that starts with the subbscription key given is considered matching and
+/// returned as part of the subscrption.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum SubscriptionKind {
+    /// Get every update in etcd.
+    All,
+    /// Get etcd updates for any timeiline of a certain tenant, affected by any operation from any node kind.
+    TenantTimelines(ZTenantId),
+    /// Get etcd updates for a certain timeline of a tenant, affected by any operation from any node kind.
+    Timeline(ZTenantTimelineId),
+    /// Get etcd timeline updates, specific to a certain node kind.
+    Node(ZTenantTimelineId, NodeKind),
+    /// Get etcd timeline updates for a certain operation on specific nodes.
+    Operation(ZTenantTimelineId, NodeKind, OperationKind),
+}
+
+/// All kinds of nodes, able to write into etcd.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum NodeKind {
+    Safekeeper,
+    Pageserver,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum OperationKind {
+    Safekeeper(SkOperationKind),
+}
+
+/// Current operations, running inside the safekeeper node.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum SkOperationKind {
+    TimelineInfo,
+    WalBackup,
+}
+
+static SUBSCRIPTION_FULL_KEY_REGEX: Lazy<Regex> = Lazy::new(|| {
+    Regex::new("/([[:xdigit:]]+)/([[:xdigit:]]+)/([^/]+)/([^/]+)/([[:digit:]]+)$")
+        .expect("wrong subscription full etcd key regex")
+});
+
+/// Full key, received from etcd during any of the component's work.
+/// No other etcd keys are considered during system's work.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub struct SubscriptionFullKey {
+    pub id: ZTenantTimelineId,
+    pub node_kind: NodeKind,
+    pub operation: OperationKind,
+    pub node_id: NodeId,
+}
+
+impl SubscriptionKey {
+    /// Subscribes for all etcd updates.
+    pub fn all(cluster_prefix: String) -> Self {
+        SubscriptionKey {
+            cluster_prefix,
+            kind: SubscriptionKind::All,
+        }
+    }
+
+    /// Subscribes to a given timeline info updates from safekeepers.
+    pub fn sk_timeline_info(cluster_prefix: String, timeline: ZTenantTimelineId) -> Self {
+        Self {
+            cluster_prefix,
+            kind: SubscriptionKind::Operation(
+                timeline,
+                NodeKind::Safekeeper,
+                OperationKind::Safekeeper(SkOperationKind::TimelineInfo),
+            ),
+        }
+    }
+
+    /// Subscribes to all timeine updates during specific operations, running on the corresponding nodes.
+    pub fn operation(
+        cluster_prefix: String,
+        timeline: ZTenantTimelineId,
+        node_kind: NodeKind,
+        operation: OperationKind,
+    ) -> Self {
+        Self {
+            cluster_prefix,
+            kind: SubscriptionKind::Operation(timeline, node_kind, operation),
+        }
+    }
+
+    /// Etcd key to use for watching a certain timeline updates from safekeepers.
+    pub fn watch_key(&self) -> String {
+        let cluster_prefix = &self.cluster_prefix;
+        match self.kind {
+            SubscriptionKind::All => cluster_prefix.to_string(),
+            SubscriptionKind::TenantTimelines(tenant_id) => {
+                format!("{cluster_prefix}/{tenant_id}")
+            }
+            SubscriptionKind::Timeline(id) => {
+                format!("{cluster_prefix}/{id}")
+            }
+            SubscriptionKind::Node(id, node_kind) => {
+                format!("{cluster_prefix}/{id}/{node_kind}")
+            }
+            SubscriptionKind::Operation(id, node_kind, operation_kind) => {
+                format!("{cluster_prefix}/{id}/{node_kind}/{operation_kind}")
+            }
+        }
+    }
+}
+
+impl Display for OperationKind {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            OperationKind::Safekeeper(o) => o.fmt(f),
+        }
+    }
+}
+
+impl FromStr for OperationKind {
+    type Err = String;
+
+    fn from_str(operation_kind_str: &str) -> Result<Self, Self::Err> {
+        match operation_kind_str {
+            "timeline_info" => Ok(OperationKind::Safekeeper(SkOperationKind::TimelineInfo)),
+            "wal_backup" => Ok(OperationKind::Safekeeper(SkOperationKind::WalBackup)),
+            _ => Err(format!("Unknown operation kind: {operation_kind_str}")),
+        }
+    }
+}
+
+impl Display for SubscriptionFullKey {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        let Self {
+            id,
+            node_kind,
+            operation,
+            node_id,
+        } = self;
+        write!(f, "{id}/{node_kind}/{operation}/{node_id}")
+    }
+}
+
+impl FromStr for SubscriptionFullKey {
+    type Err = String;
+
+    fn from_str(subscription_kind_str: &str) -> Result<Self, Self::Err> {
+        let key_captures = match SUBSCRIPTION_FULL_KEY_REGEX.captures(subscription_kind_str) {
+            Some(captures) => captures,
+            None => {
+                return Err(format!(
+                    "Subscription kind str does not match a subscription full key regex {}",
+                    SUBSCRIPTION_FULL_KEY_REGEX.as_str()
+                ));
+            }
+        };
+
+        Ok(Self {
+            id: ZTenantTimelineId::new(
+                parse_capture(&key_captures, 1)?,
+                parse_capture(&key_captures, 2)?,
+            ),
+            node_kind: parse_capture(&key_captures, 3)?,
+            operation: parse_capture(&key_captures, 4)?,
+            node_id: NodeId(parse_capture(&key_captures, 5)?),
+        })
+    }
+}
+
+fn parse_capture<T>(caps: &Captures, index: usize) -> Result<T, String>
+where
+    T: FromStr,
+    <T as FromStr>::Err: Display,
+{
+    let capture_match = caps
+        .get(index)
+        .ok_or_else(|| format!("Failed to get capture match at index {index}"))?
+        .as_str();
+    capture_match.parse().map_err(|e| {
+        format!(
+            "Failed to parse {} from {capture_match}: {e}",
+            std::any::type_name::<T>()
+        )
+    })
+}
+
+impl Display for NodeKind {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Self::Safekeeper => write!(f, "safekeeper"),
+            Self::Pageserver => write!(f, "pageserver"),
+        }
+    }
+}
+
+impl FromStr for NodeKind {
+    type Err = String;
+
+    fn from_str(node_kind_str: &str) -> Result<Self, Self::Err> {
+        match node_kind_str {
+            "safekeeper" => Ok(Self::Safekeeper),
+            "pageserver" => Ok(Self::Pageserver),
+            _ => Err(format!("Invalid node kind: {node_kind_str}")),
+        }
+    }
+}
+
+impl Display for SkOperationKind {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Self::TimelineInfo => write!(f, "timeline_info"),
+            Self::WalBackup => write!(f, "wal_backup"),
+        }
+    }
+}
+
+impl FromStr for SkOperationKind {
+    type Err = String;
+
+    fn from_str(operation_str: &str) -> Result<Self, Self::Err> {
+        match operation_str {
+            "timeline_info" => Ok(Self::TimelineInfo),
+            "wal_backup" => Ok(Self::WalBackup),
+            _ => Err(format!("Invalid operation: {operation_str}")),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use utils::zid::ZTimelineId;
+
+    use super::*;
+
+    #[test]
+    fn full_cluster_key_parsing() {
+        let prefix = "neon";
+        let node_kind = NodeKind::Safekeeper;
+        let operation_kind = OperationKind::Safekeeper(SkOperationKind::WalBackup);
+        let tenant_id = ZTenantId::generate();
+        let timeline_id = ZTimelineId::generate();
+        let id = ZTenantTimelineId::new(tenant_id, timeline_id);
+        let node_id = NodeId(1);
+
+        let timeline_subscription_keys = [
+            SubscriptionKey {
+                cluster_prefix: prefix.to_string(),
+                kind: SubscriptionKind::All,
+            },
+            SubscriptionKey {
+                cluster_prefix: prefix.to_string(),
+                kind: SubscriptionKind::TenantTimelines(tenant_id),
+            },
+            SubscriptionKey {
+                cluster_prefix: prefix.to_string(),
+                kind: SubscriptionKind::Timeline(id),
+            },
+            SubscriptionKey {
+                cluster_prefix: prefix.to_string(),
+                kind: SubscriptionKind::Node(id, node_kind),
+            },
+            SubscriptionKey {
+                cluster_prefix: prefix.to_string(),
+                kind: SubscriptionKind::Operation(id, node_kind, operation_kind),
+            },
+        ];
+
+        let full_key_string = format!(
+            "{}/{node_id}",
+            timeline_subscription_keys.last().unwrap().watch_key()
+        );
+
+        for key in timeline_subscription_keys {
+            assert!(full_key_string.starts_with(&key.watch_key()), "Full key '{full_key_string}' should start with any of the keys, keys, but {key:?} did not match");
+        }
+
+        let full_key = SubscriptionFullKey::from_str(&full_key_string).unwrap_or_else(|e| {
+            panic!("Failed to parse {full_key_string} as a subscription full key: {e}")
+        });
+
+        assert_eq!(
+            full_key,
+            SubscriptionFullKey {
+                id,
+                node_kind,
+                operation: operation_kind,
+                node_id
+            }
+        )
+    }
+}
--- a/libs/etcd_broker/src/subscription_value.rs
+++ b/libs/etcd_broker/src/subscription_value.rs
@@ -0,0 +1,35 @@
+//! Module for the values to put into etcd.
+
+use serde::{Deserialize, Serialize};
+use serde_with::{serde_as, DisplayFromStr};
+use utils::lsn::Lsn;
+
+/// Data about safekeeper's timeline. Fields made optional for easy migrations.
+#[serde_as]
+#[derive(Debug, Clone, Deserialize, Serialize)]
+pub struct SkTimelineInfo {
+    /// Term of the last entry.
+    pub last_log_term: Option<u64>,
+    /// LSN of the last record.
+    #[serde_as(as = "Option<DisplayFromStr>")]
+    #[serde(default)]
+    pub flush_lsn: Option<Lsn>,
+    /// Up to which LSN safekeeper regards its WAL as committed.
+    #[serde_as(as = "Option<DisplayFromStr>")]
+    #[serde(default)]
+    pub commit_lsn: Option<Lsn>,
+    /// LSN up to which safekeeper has backed WAL.
+    #[serde_as(as = "Option<DisplayFromStr>")]
+    #[serde(default)]
+    pub backup_lsn: Option<Lsn>,
+    /// LSN of last checkpoint uploaded by pageserver.
+    #[serde_as(as = "Option<DisplayFromStr>")]
+    #[serde(default)]
+    pub remote_consistent_lsn: Option<Lsn>,
+    #[serde_as(as = "Option<DisplayFromStr>")]
+    #[serde(default)]
+    pub peer_horizon_lsn: Option<Lsn>,
+    /// A connection string to use for WAL receiving.
+    #[serde(default)]
+    pub safekeeper_connstr: Option<String>,
+}
--- a/libs/postgres_ffi/Cargo.toml
+++ b/libs/postgres_ffi/Cargo.toml
@@ -23,7 +23,7 @@ workspace_hack = { version = "0.1", path = "../../workspace_hack" }
 [dev-dependencies]
 env_logger = "0.9"
 postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
-wal_generate = { path = "wal_generate" }
+wal_craft = { path = "wal_craft" }

 [build-dependencies]
 bindgen = "0.59.1"
--- a/libs/postgres_ffi/build.rs
+++ b/libs/postgres_ffi/build.rs
@@ -2,6 +2,7 @@ extern crate bindgen;

 use std::env;
 use std::path::PathBuf;
+use std::process::Command;

 use bindgen::callbacks::ParseCallbacks;

@@ -45,6 +46,43 @@ fn main() {
    // Tell cargo to invalidate the built crate whenever the wrapper changes
    println!("cargo:rerun-if-changed=pg_control_ffi.h");

+    // Finding the location of C headers for the Postgres server:
+    // - if POSTGRES_INSTALL_DIR is set look into it, otherwise look into `<project_root>/tmp_install`
+    // - if there's a `bin/pg_config` file use it for getting include server, otherwise use `<project_root>/tmp_install/include/postgresql/server`
+    let mut pg_install_dir: PathBuf;
+    if let Some(postgres_install_dir) = env::var_os("POSTGRES_INSTALL_DIR") {
+        pg_install_dir = postgres_install_dir.into();
+    } else {
+        pg_install_dir = PathBuf::from("tmp_install")
+    }
+
+    if pg_install_dir.is_relative() {
+        let cwd = env::current_dir().unwrap();
+        pg_install_dir = cwd.join("..").join("..").join(pg_install_dir);
+    }
+
+    let pg_config_bin = pg_install_dir.join("bin").join("pg_config");
+    let inc_server_path: String = if pg_config_bin.exists() {
+        let output = Command::new(pg_config_bin)
+            .arg("--includedir-server")
+            .output()
+            .expect("failed to execute `pg_config --includedir-server`");
+
+        if !output.status.success() {
+            panic!("`pg_config --includedir-server` failed")
+        }
+
+        String::from_utf8(output.stdout).unwrap().trim_end().into()
+    } else {
+        pg_install_dir
+            .join("include")
+            .join("postgresql")
+            .join("server")
+            .into_os_string()
+            .into_string()
+            .unwrap()
+    };
+
    // The bindgen::Builder is the main entry point
    // to bindgen, and lets you build up options for
    // the resulting bindings.
@@ -81,15 +119,7 @@ fn main() {
        // explicit padding fields.
        .explicit_padding(true)
        //
-        // Path the server include dir. It is in tmp_install/include/server, if you did
-        // "configure --prefix=<path to tmp_install>". But if you used "configure --prefix=/",
-        // and used DESTDIR to move it into tmp_install, then it's in
-        // tmp_install/include/postgres/server
-        // 'pg_config --includedir-server' would perhaps be the more proper way to find it,
-        // but this will do for now.
-        //
-        .clang_arg("-I../../tmp_install/include/server")
-        .clang_arg("-I../../tmp_install/include/postgresql/server")
+        .clang_arg(format!("-I{inc_server_path}"))
        //
        // Finish the builder and generate the bindings.
        //
--- a/libs/postgres_ffi/src/waldecoder.rs
+++ b/libs/postgres_ffi/src/waldecoder.rs
@@ -82,7 +82,17 @@ impl WalStreamDecoder {
        // that cross page boundaries.
        loop {
            // parse and verify page boundaries as we go
-            if self.lsn.segment_offset(pg_constants::WAL_SEGMENT_SIZE) == 0 {
+            if self.padlen > 0 {
+                // We should first skip padding, as we may have to skip some page headers if we're processing the XLOG_SWITCH record.
+                if self.inputbuf.remaining() < self.padlen as usize {
+                    return Ok(None);
+                }
+
+                // skip padding
+                self.inputbuf.advance(self.padlen as usize);
+                self.lsn += self.padlen as u64;
+                self.padlen = 0;
+            } else if self.lsn.segment_offset(pg_constants::WAL_SEGMENT_SIZE) == 0 {
                // parse long header

                if self.inputbuf.remaining() < XLOG_SIZE_OF_XLOG_LONG_PHD {
@@ -128,15 +138,6 @@ impl WalStreamDecoder {

                self.lsn += XLOG_SIZE_OF_XLOG_SHORT_PHD as u64;
                continue;
-            } else if self.padlen > 0 {
-                if self.inputbuf.remaining() < self.padlen as usize {
-                    return Ok(None);
-                }
-
-                // skip padding
-                self.inputbuf.advance(self.padlen as usize);
-                self.lsn += self.padlen as u64;
-                self.padlen = 0;
            } else if self.contlen == 0 {
                assert!(self.recordbuf.is_empty());

@@ -226,10 +227,10 @@ impl WalStreamDecoder {
            self.padlen = self.lsn.calc_padding(8u32) as u32;
        }

-        // Always align resulting LSN on 0x8 boundary -- that is important for getPage()
-        // and WalReceiver integration. Since this code is used both for WalReceiver and
-        // initial WAL import let's force alignment right here.
-        let result = (self.lsn.align(), recordbuf);
+        // We should return LSN of the next record, not the last byte of this record or
+        // the byte immediately after. Note that this handles both XLOG_SWITCH and usual
+        // records, the former "spans" until the next WAL segment (see test_xlog_switch).
+        let result = (self.lsn + self.padlen as u64, recordbuf);
        Ok(Some(result))
    }
 }
--- a/libs/postgres_ffi/src/xlog_utils.rs
+++ b/libs/postgres_ffi/src/xlog_utils.rs
@@ -597,19 +597,18 @@ mod tests {
    fn init_logging() {
        let _ = env_logger::Builder::from_env(
            env_logger::Env::default()
-                .default_filter_or("wal_generate=info,postgres_ffi::xlog_utils=trace"),
+                .default_filter_or("wal_craft=info,postgres_ffi::xlog_utils=trace"),
        )
        .is_test(true)
        .try_init();
    }

-    fn test_end_of_wal(
+    fn test_end_of_wal<C: wal_craft::Crafter>(
        test_name: &str,
-        generate_wal: impl Fn(&mut postgres::Client) -> anyhow::Result<postgres::types::PgLsn>,
        expected_end_of_wal_non_partial: Lsn,
        last_segment: &str,
    ) {
-        use wal_generate::*;
+        use wal_craft::*;
        // 1. Generate some WAL
        let top_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
            .join("..")
@@ -622,9 +621,9 @@ mod tests {
            fs::remove_dir_all(&cfg.datadir).unwrap();
        }
        cfg.initdb().unwrap();
-        let mut srv = cfg.start_server().unwrap();
+        let srv = cfg.start_server().unwrap();
        let expected_wal_end: Lsn =
-            u64::from(generate_wal(&mut srv.connect_with_timeout().unwrap()).unwrap()).into();
+            u64::from(C::craft(&mut srv.connect_with_timeout().unwrap()).unwrap()).into();
        srv.kill();

        // 2. Pick WAL generated by initdb
@@ -681,9 +680,8 @@ mod tests {
    #[test]
    pub fn test_find_end_of_wal_simple() {
        init_logging();
-        test_end_of_wal(
+        test_end_of_wal::<wal_craft::Simple>(
            "test_find_end_of_wal_simple",
-            wal_generate::generate_simple,
            "0/2000000".parse::<Lsn>().unwrap(),
            "000000010000000000000001",
        );
@@ -692,9 +690,8 @@ mod tests {
    #[test]
    pub fn test_find_end_of_wal_crossing_segment_followed_by_small_one() {
        init_logging();
-        test_end_of_wal(
+        test_end_of_wal::<wal_craft::WalRecordCrossingSegmentFollowedBySmallOne>(
            "test_find_end_of_wal_crossing_segment_followed_by_small_one",
-            wal_generate::generate_wal_record_crossing_segment_followed_by_small_one,
            "0/3000000".parse::<Lsn>().unwrap(),
            "000000010000000000000002",
        );
@@ -704,9 +701,8 @@ mod tests {
    #[ignore = "not yet fixed, needs correct parsing of pre-last segments"] // TODO
    pub fn test_find_end_of_wal_last_crossing_segment() {
        init_logging();
-        test_end_of_wal(
+        test_end_of_wal::<wal_craft::LastWalRecordCrossingSegment>(
            "test_find_end_of_wal_last_crossing_segment",
-            wal_generate::generate_last_wal_record_crossing_segment,
            "0/3000000".parse::<Lsn>().unwrap(),
            "000000010000000000000002",
        );
--- a/libs/postgres_ffi/wal_generate/Cargo.toml
+++ b/libs/postgres_ffi/wal_generate/Cargo.toml
@@ -1,5 +1,5 @@
 [package]
-name = "wal_generate"
+name = "wal_craft"
 version = "0.1.0"
 edition = "2021"

@@ -10,5 +10,7 @@ anyhow = "1.0"
 clap = "3.0"
 env_logger = "0.9"
 log = "0.4"
+once_cell = "1.8.0"
 postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
+postgres_ffi = { path = "../" }
 tempfile = "3.2"
--- a/libs/postgres_ffi/wal_craft/src/bin/wal_craft.rs
+++ b/libs/postgres_ffi/wal_craft/src/bin/wal_craft.rs
@@ -0,0 +1,100 @@
+use anyhow::*;
+use clap::{App, Arg, ArgMatches};
+use std::str::FromStr;
+use wal_craft::*;
+
+fn main() -> Result<()> {
+    env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("wal_craft=info"))
+        .init();
+    let type_arg = &Arg::new("type")
+        .takes_value(true)
+        .help("Type of WAL to craft")
+        .possible_values([
+            Simple::NAME,
+            LastWalRecordXlogSwitch::NAME,
+            LastWalRecordXlogSwitchEndsOnPageBoundary::NAME,
+            WalRecordCrossingSegmentFollowedBySmallOne::NAME,
+            LastWalRecordCrossingSegment::NAME,
+        ])
+        .required(true);
+    let arg_matches = App::new("Postgres WAL crafter")
+        .about("Crafts Postgres databases with specific WAL properties")
+        .subcommand(
+            App::new("print-postgres-config")
+                .about("Print the configuration required for PostgreSQL server before running this script")
+        )
+        .subcommand(
+            App::new("with-initdb")
+                .about("Craft WAL in a new data directory first initialized with initdb")
+                .arg(type_arg)
+                .arg(
+                    Arg::new("datadir")
+                        .takes_value(true)
+                        .help("Data directory for the Postgres server")
+                        .required(true)
+                )
+                .arg(
+                    Arg::new("pg-distrib-dir")
+                        .long("pg-distrib-dir")
+                        .takes_value(true)
+                        .help("Directory with Postgres distribution (bin and lib directories, e.g. tmp_install)")
+                        .default_value("/usr/local")
+                )
+        )
+        .subcommand(
+            App::new("in-existing")
+                .about("Craft WAL at an existing recently created Postgres database. Note that server may append new WAL entries on shutdown.")
+                .arg(type_arg)
+                .arg(
+                    Arg::new("connection")
+                        .takes_value(true)
+                        .help("Connection string to the Postgres database to populate")
+                        .required(true)
+                )
+        )
+        .get_matches();
+
+    let wal_craft = |arg_matches: &ArgMatches, client| {
+        let lsn = match arg_matches.value_of("type").unwrap() {
+            Simple::NAME => Simple::craft(client)?,
+            LastWalRecordXlogSwitch::NAME => LastWalRecordXlogSwitch::craft(client)?,
+            LastWalRecordXlogSwitchEndsOnPageBoundary::NAME => {
+                LastWalRecordXlogSwitchEndsOnPageBoundary::craft(client)?
+            }
+            WalRecordCrossingSegmentFollowedBySmallOne::NAME => {
+                WalRecordCrossingSegmentFollowedBySmallOne::craft(client)?
+            }
+            LastWalRecordCrossingSegment::NAME => LastWalRecordCrossingSegment::craft(client)?,
+            a => panic!("Unknown --type argument: {}", a),
+        };
+        println!("end_of_wal = {}", lsn);
+        Ok(())
+    };
+
+    match arg_matches.subcommand() {
+        None => panic!("No subcommand provided"),
+        Some(("print-postgres-config", _)) => {
+            for cfg in REQUIRED_POSTGRES_CONFIG.iter() {
+                println!("{}", cfg);
+            }
+            Ok(())
+        }
+        Some(("with-initdb", arg_matches)) => {
+            let cfg = Conf {
+                pg_distrib_dir: arg_matches.value_of("pg-distrib-dir").unwrap().into(),
+                datadir: arg_matches.value_of("datadir").unwrap().into(),
+            };
+            cfg.initdb()?;
+            let srv = cfg.start_server()?;
+            wal_craft(arg_matches, &mut srv.connect_with_timeout()?)?;
+            srv.kill();
+            Ok(())
+        }
+        Some(("in-existing", arg_matches)) => wal_craft(
+            arg_matches,
+            &mut postgres::Config::from_str(arg_matches.value_of("connection").unwrap())?
+                .connect(postgres::NoTls)?,
+        ),
+        Some(_) => panic!("Unknown subcommand"),
+    }
+}
--- a/libs/postgres_ffi/wal_generate/src/lib.rs
+++ b/libs/postgres_ffi/wal_generate/src/lib.rs
@@ -1,9 +1,14 @@
 use anyhow::*;
 use core::time::Duration;
 use log::*;
+use once_cell::sync::Lazy;
 use postgres::types::PgLsn;
 use postgres::Client;
+use postgres_ffi::xlog_utils::{
+    XLOG_BLCKSZ, XLOG_SIZE_OF_XLOG_RECORD, XLOG_SIZE_OF_XLOG_SHORT_PHD,
+};
 use std::cmp::Ordering;
+use std::fs;
 use std::path::{Path, PathBuf};
 use std::process::{Command, Stdio};
 use std::time::Instant;
@@ -21,6 +26,16 @@ pub struct PostgresServer {
    client_config: postgres::Config,
 }

+pub static REQUIRED_POSTGRES_CONFIG: Lazy<Vec<&'static str>> = Lazy::new(|| {
+    vec![
+        "wal_keep_size=50MB",            // Ensure old WAL is not removed
+        "shared_preload_libraries=neon", // can only be loaded at startup
+        // Disable background processes as much as possible
+        "wal_writer_delay=10s",
+        "autovacuum=off",
+    ]
+});
+
 impl Conf {
    fn pg_bin_dir(&self) -> PathBuf {
        self.pg_distrib_dir.join("bin")
@@ -69,6 +84,12 @@ impl Conf {

    pub fn start_server(&self) -> Result<PostgresServer> {
        info!("Starting Postgres server in {:?}", self.datadir);
+        let log_file = fs::File::create(self.datadir.join("pg.log")).with_context(|| {
+            format!(
+                "Failed to create pg.log file in directory {}",
+                self.datadir.display()
+            )
+        })?;
        let unix_socket_dir = tempdir()?; // We need a directory with a short name for Unix socket (up to 108 symbols)
        let unix_socket_dir_path = unix_socket_dir.path().to_owned();
        let server_process = self
@@ -78,13 +99,9 @@ impl Conf {
            .arg(unix_socket_dir_path.as_os_str())
            .arg("-D")
            .arg(self.datadir.as_os_str())
-            .args(&["-c", "wal_keep_size=50MB"]) // Ensure old WAL is not removed
            .args(&["-c", "logging_collector=on"]) // stderr will mess up with tests output
-            .args(&["-c", "shared_preload_libraries=neon"]) // can only be loaded at startup
-            // Disable background processes as much as possible
-            .args(&["-c", "wal_writer_delay=10s"])
-            .args(&["-c", "autovacuum=off"])
-            .stderr(Stdio::null())
+            .args(REQUIRED_POSTGRES_CONFIG.iter().flat_map(|cfg| ["-c", cfg]))
+            .stderr(Stdio::from(log_file))
            .spawn()?;
        let server = PostgresServer {
            process: server_process,
@@ -137,7 +154,7 @@ impl PostgresServer {
        bail!("Connection timed out");
    }

-    pub fn kill(&mut self) {
+    pub fn kill(mut self) {
        self.process.kill().unwrap();
        self.process.wait().unwrap();
    }
@@ -174,12 +191,16 @@ pub trait PostgresClientExt: postgres::GenericClient {

 impl<C: postgres::GenericClient> PostgresClientExt for C {}

-fn generate_internal<C: postgres::GenericClient>(
-    client: &mut C,
-    f: impl Fn(&mut C, PgLsn) -> Result<Option<PgLsn>>,
-) -> Result<PgLsn> {
+pub fn ensure_server_config(client: &mut impl postgres::GenericClient) -> Result<()> {
    client.execute("create extension if not exists neon_test_utils", &[])?;

+    let wal_keep_size: String = client.query_one("SHOW wal_keep_size", &[])?.get(0);
+    ensure!(wal_keep_size == "50MB");
+    let wal_writer_delay: String = client.query_one("SHOW wal_writer_delay", &[])?.get(0);
+    ensure!(wal_writer_delay == "10s");
+    let autovacuum: String = client.query_one("SHOW autovacuum", &[])?.get(0);
+    ensure!(autovacuum == "off");
+
    let wal_segment_size = client.query_one(
        "select cast(setting as bigint) as setting, unit \
         from pg_settings where name = 'wal_segment_size'",
@@ -194,13 +215,29 @@ fn generate_internal<C: postgres::GenericClient>(
        "Unexpected wal_segment_size in bytes"
    );

+    Ok(())
+}
+
+pub trait Crafter {
+    const NAME: &'static str;
+
+    /// Generates WAL using the client `client`. Returns the expected end-of-wal LSN.
+    fn craft(client: &mut impl postgres::GenericClient) -> Result<PgLsn>;
+}
+
+fn craft_internal<C: postgres::GenericClient>(
+    client: &mut C,
+    f: impl Fn(&mut C, PgLsn) -> Result<Option<PgLsn>>,
+) -> Result<PgLsn> {
+    ensure_server_config(client)?;
+
    let initial_lsn = client.pg_current_wal_insert_lsn()?;
    info!("LSN initial = {}", initial_lsn);

    let last_lsn = match f(client, initial_lsn)? {
        None => client.pg_current_wal_insert_lsn()?,
        Some(last_lsn) => match last_lsn.cmp(&client.pg_current_wal_insert_lsn()?) {
-            Ordering::Less => bail!("Some records were inserted after the generated WAL"),
+            Ordering::Less => bail!("Some records were inserted after the crafted WAL"),
            Ordering::Equal => last_lsn,
            Ordering::Greater => bail!("Reported LSN is greater than insert_lsn"),
        },
@@ -209,25 +246,116 @@ fn generate_internal<C: postgres::GenericClient>(
    // Some records may be not flushed, e.g. non-transactional logical messages.
    client.execute("select neon_xlogflush(pg_current_wal_insert_lsn())", &[])?;
    match last_lsn.cmp(&client.pg_current_wal_flush_lsn()?) {
-        Ordering::Less => bail!("Some records were flushed after the generated WAL"),
+        Ordering::Less => bail!("Some records were flushed after the crafted WAL"),
        Ordering::Equal => {}
        Ordering::Greater => bail!("Reported LSN is greater than flush_lsn"),
    }
    Ok(last_lsn)
 }

-pub fn generate_simple(client: &mut impl postgres::GenericClient) -> Result<PgLsn> {
-    generate_internal(client, |client, _| {
-        client.execute("CREATE table t(x int)", &[])?;
-        Ok(None)
-    })
+pub struct Simple;
+impl Crafter for Simple {
+    const NAME: &'static str = "simple";
+    fn craft(client: &mut impl postgres::GenericClient) -> Result<PgLsn> {
+        craft_internal(client, |client, _| {
+            client.execute("CREATE table t(x int)", &[])?;
+            Ok(None)
+        })
+    }
 }

-fn generate_single_logical_message(
+pub struct LastWalRecordXlogSwitch;
+impl Crafter for LastWalRecordXlogSwitch {
+    const NAME: &'static str = "last_wal_record_xlog_switch";
+    fn craft(client: &mut impl postgres::GenericClient) -> Result<PgLsn> {
+        // Do not use generate_internal because here we end up with flush_lsn exactly on
+        // the segment boundary and insert_lsn after the initial page header, which is unusual.
+        ensure_server_config(client)?;
+
+        client.execute("CREATE table t(x int)", &[])?;
+        let after_xlog_switch: PgLsn = client.query_one("SELECT pg_switch_wal()", &[])?.get(0);
+        let next_segment = PgLsn::from(0x0200_0000);
+        ensure!(
+            after_xlog_switch <= next_segment,
+            "XLOG_SWITCH message ended after the expected segment boundary: {} > {}",
+            after_xlog_switch,
+            next_segment
+        );
+        Ok(next_segment)
+    }
+}
+
+pub struct LastWalRecordXlogSwitchEndsOnPageBoundary;
+impl Crafter for LastWalRecordXlogSwitchEndsOnPageBoundary {
+    const NAME: &'static str = "last_wal_record_xlog_switch_ends_on_page_boundary";
+    fn craft(client: &mut impl postgres::GenericClient) -> Result<PgLsn> {
+        // Do not use generate_internal because here we end up with flush_lsn exactly on
+        // the segment boundary and insert_lsn after the initial page header, which is unusual.
+        ensure_server_config(client)?;
+
+        client.execute("CREATE table t(x int)", &[])?;
+
+        // Add padding so the XLOG_SWITCH record ends exactly on XLOG_BLCKSZ boundary.
+        // We will use logical message as the padding. We start with detecting how much WAL
+        // it takes for one logical message, considering all alignments and headers.
+        let base_wal_advance = {
+            let before_lsn = client.pg_current_wal_insert_lsn()?;
+            // Small non-empty message bigger than few bytes is more likely than an empty
+            // message to have the same format as the big padding message.
+            client.execute(
+                "SELECT pg_logical_emit_message(false, 'swch', REPEAT('a', 10))",
+                &[],
+            )?;
+            // The XLOG_SWITCH record has no data => its size is exactly XLOG_SIZE_OF_XLOG_RECORD.
+            (u64::from(client.pg_current_wal_insert_lsn()?) - u64::from(before_lsn)) as usize
+                + XLOG_SIZE_OF_XLOG_RECORD
+        };
+        let mut remaining_lsn =
+            XLOG_BLCKSZ - u64::from(client.pg_current_wal_insert_lsn()?) as usize % XLOG_BLCKSZ;
+        if remaining_lsn < base_wal_advance {
+            remaining_lsn += XLOG_BLCKSZ;
+        }
+        let repeats = 10 + remaining_lsn - base_wal_advance;
+        info!(
+            "current_wal_insert_lsn={}, remaining_lsn={}, base_wal_advance={}, repeats={}",
+            client.pg_current_wal_insert_lsn()?,
+            remaining_lsn,
+            base_wal_advance,
+            repeats
+        );
+        client.execute(
+            "SELECT pg_logical_emit_message(false, 'swch', REPEAT('a', $1))",
+            &[&(repeats as i32)],
+        )?;
+        info!(
+            "current_wal_insert_lsn={}, XLOG_SIZE_OF_XLOG_RECORD={}",
+            client.pg_current_wal_insert_lsn()?,
+            XLOG_SIZE_OF_XLOG_RECORD
+        );
+
+        // Emit the XLOG_SWITCH
+        let after_xlog_switch: PgLsn = client.query_one("SELECT pg_switch_wal()", &[])?.get(0);
+        let next_segment = PgLsn::from(0x0200_0000);
+        ensure!(
+            after_xlog_switch < next_segment,
+            "XLOG_SWITCH message ended on or after the expected segment boundary: {} > {}",
+            after_xlog_switch,
+            next_segment
+        );
+        ensure!(
+            u64::from(after_xlog_switch) as usize % XLOG_BLCKSZ == XLOG_SIZE_OF_XLOG_SHORT_PHD,
+            "XLOG_SWITCH message ended not on page boundary: {}",
+            after_xlog_switch
+        );
+        Ok(next_segment)
+    }
+}
+
+fn craft_single_logical_message(
    client: &mut impl postgres::GenericClient,
    transactional: bool,
 ) -> Result<PgLsn> {
-    generate_internal(client, |client, initial_lsn| {
+    craft_internal(client, |client, initial_lsn| {
        ensure!(
            initial_lsn < PgLsn::from(0x0200_0000 - 1024 * 1024),
            "Initial LSN is too far in the future"
@@ -265,14 +393,18 @@ fn generate_single_logical_message(
    })
 }

-pub fn generate_wal_record_crossing_segment_followed_by_small_one(
-    client: &mut impl postgres::GenericClient,
-) -> Result<PgLsn> {
-    generate_single_logical_message(client, true)
+pub struct WalRecordCrossingSegmentFollowedBySmallOne;
+impl Crafter for WalRecordCrossingSegmentFollowedBySmallOne {
+    const NAME: &'static str = "wal_record_crossing_segment_followed_by_small_one";
+    fn craft(client: &mut impl postgres::GenericClient) -> Result<PgLsn> {
+        craft_single_logical_message(client, true)
+    }
 }

-pub fn generate_last_wal_record_crossing_segment<C: postgres::GenericClient>(
-    client: &mut C,
-) -> Result<PgLsn> {
-    generate_single_logical_message(client, false)
+pub struct LastWalRecordCrossingSegment;
+impl Crafter for LastWalRecordCrossingSegment {
+    const NAME: &'static str = "last_wal_record_crossing_segment";
+    fn craft(client: &mut impl postgres::GenericClient) -> Result<PgLsn> {
+        craft_single_logical_message(client, false)
+    }
 }
--- a/libs/postgres_ffi/wal_generate/src/bin/wal_generate.rs
+++ b/libs/postgres_ffi/wal_generate/src/bin/wal_generate.rs
@@ -1,58 +0,0 @@
-use anyhow::*;
-use clap::{App, Arg};
-use wal_generate::*;
-
-fn main() -> Result<()> {
-    env_logger::Builder::from_env(
-        env_logger::Env::default().default_filter_or("wal_generate=info"),
-    )
-    .init();
-    let arg_matches = App::new("Postgres WAL generator")
-        .about("Generates Postgres databases with specific WAL properties")
-        .arg(
-            Arg::new("datadir")
-                .short('D')
-                .long("datadir")
-                .takes_value(true)
-                .help("Data directory for the Postgres server")
-                .required(true)
-        )
-        .arg(
-            Arg::new("pg-distrib-dir")
-                .long("pg-distrib-dir")
-                .takes_value(true)
-                .help("Directory with Postgres distribution (bin and lib directories, e.g. tmp_install)")
-                .default_value("/usr/local")
-        )
-        .arg(
-            Arg::new("type")
-                .long("type")
-                .takes_value(true)
-                .help("Type of WAL to generate")
-                .possible_values(["simple", "last_wal_record_crossing_segment", "wal_record_crossing_segment_followed_by_small_one"])
-                .required(true)
-        )
-        .get_matches();
-
-    let cfg = Conf {
-        pg_distrib_dir: arg_matches.value_of("pg-distrib-dir").unwrap().into(),
-        datadir: arg_matches.value_of("datadir").unwrap().into(),
-    };
-    cfg.initdb()?;
-    let mut srv = cfg.start_server()?;
-    let lsn = match arg_matches.value_of("type").unwrap() {
-        "simple" => generate_simple(&mut srv.connect_with_timeout()?)?,
-        "last_wal_record_crossing_segment" => {
-            generate_last_wal_record_crossing_segment(&mut srv.connect_with_timeout()?)?
-        }
-        "wal_record_crossing_segment_followed_by_small_one" => {
-            generate_wal_record_crossing_segment_followed_by_small_one(
-                &mut srv.connect_with_timeout()?,
-            )?
-        }
-        a => panic!("Unknown --type argument: {}", a),
-    };
-    println!("end_of_wal = {}", lsn);
-    srv.kill();
-    Ok(())
-}
--- a/libs/remote_storage/src/lib.rs
+++ b/libs/remote_storage/src/lib.rs
@@ -12,8 +12,10 @@ use std::{
    borrow::Cow,
    collections::HashMap,
    ffi::OsStr,
+    fmt::Debug,
    num::{NonZeroU32, NonZeroUsize},
    path::{Path, PathBuf},
+    pin::Pin,
 };

 use anyhow::{bail, Context};
@@ -40,13 +42,19 @@ pub const DEFAULT_REMOTE_STORAGE_MAX_SYNC_ERRORS: u32 = 10;
 /// https://aws.amazon.com/premiumsupport/knowledge-center/s3-request-limit-avoid-throttling/
 pub const DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT: usize = 100;

+pub trait RemoteObjectName {
+    // Needed to retrieve last component for RemoteObjectId.
+    // In other words a file name
+    fn object_name(&self) -> Option<&str>;
+}
+
 /// Storage (potentially remote) API to manage its state.
 /// This storage tries to be unaware of any layered repository context,
 /// providing basic CRUD operations for storage files.
 #[async_trait::async_trait]
 pub trait RemoteStorage: Send + Sync {
    /// A way to uniquely reference a file in the remote storage.
-    type RemoteObjectId;
+    type RemoteObjectId: RemoteObjectName;

    /// Attempts to derive the storage path out of the local path, if the latter is correct.
    fn remote_object_id(&self, local_path: &Path) -> anyhow::Result<Self::RemoteObjectId>;
@@ -57,6 +65,12 @@ pub trait RemoteStorage: Send + Sync {
    /// Lists all items the storage has right now.
    async fn list(&self) -> anyhow::Result<Vec<Self::RemoteObjectId>>;

+    /// Lists all top level subdirectories for a given prefix
+    async fn list_prefixes(
+        &self,
+        prefix: Option<Self::RemoteObjectId>,
+    ) -> anyhow::Result<Vec<Self::RemoteObjectId>>;
+
    /// Streams the local file contents into remote into the remote storage entry.
    async fn upload(
        &self,
@@ -70,11 +84,7 @@ pub trait RemoteStorage: Send + Sync {

    /// Streams the remote storage entry contents into the buffered writer given, returns the filled writer.
    /// Returns the metadata, if any was stored with the file previously.
-    async fn download(
-        &self,
-        from: &Self::RemoteObjectId,
-        to: &mut (impl io::AsyncWrite + Unpin + Send + Sync),
-    ) -> anyhow::Result<Option<StorageMetadata>>;
+    async fn download(&self, from: &Self::RemoteObjectId) -> Result<Download, DownloadError>;

    /// Streams a given byte range of the remote storage entry contents into the buffered writer given, returns the filled writer.
    /// Returns the metadata, if any was stored with the file previously.
@@ -83,12 +93,49 @@ pub trait RemoteStorage: Send + Sync {
        from: &Self::RemoteObjectId,
        start_inclusive: u64,
        end_exclusive: Option<u64>,
-        to: &mut (impl io::AsyncWrite + Unpin + Send + Sync),
-    ) -> anyhow::Result<Option<StorageMetadata>>;
+    ) -> Result<Download, DownloadError>;

    async fn delete(&self, path: &Self::RemoteObjectId) -> anyhow::Result<()>;
 }

+pub struct Download {
+    pub download_stream: Pin<Box<dyn io::AsyncRead + Unpin + Send>>,
+    /// Extra key-value data, associated with the current remote file.
+    pub metadata: Option<StorageMetadata>,
+}
+
+impl Debug for Download {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("Download")
+            .field("metadata", &self.metadata)
+            .finish()
+    }
+}
+
+#[derive(Debug)]
+pub enum DownloadError {
+    /// Validation or other error happened due to user input.
+    BadInput(anyhow::Error),
+    /// The file was not found in the remote storage.
+    NotFound,
+    /// The file was found in the remote storage, but the download failed.
+    Other(anyhow::Error),
+}
+
+impl std::fmt::Display for DownloadError {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            DownloadError::BadInput(e) => {
+                write!(f, "Failed to download a remote file due to user input: {e}")
+            }
+            DownloadError::NotFound => write!(f, "No file found for the remote object id given"),
+            DownloadError::Other(e) => write!(f, "Failed to download a remote file: {e}"),
+        }
+    }
+}
+
+impl std::error::Error for DownloadError {}
+
 /// Every storage, currently supported.
 /// Serves as a simple way to pass around the [`RemoteStorage`] without dealing with generics.
 pub enum GenericRemoteStorage {
@@ -180,7 +227,7 @@ pub struct S3Config {
    pub concurrency_limit: NonZeroUsize,
 }

-impl std::fmt::Debug for S3Config {
+impl Debug for S3Config {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("S3Config")
            .field("bucket_name", &self.bucket_name)
--- a/libs/remote_storage/src/local_fs.rs
+++ b/libs/remote_storage/src/local_fs.rs
@@ -5,6 +5,7 @@
 //! volume is mounted to the local FS.

 use std::{
+    borrow::Cow,
    future::Future,
    path::{Path, PathBuf},
    pin::Pin,
@@ -17,10 +18,16 @@ use tokio::{
 };
 use tracing::*;

-use crate::path_with_suffix_extension;
+use crate::{path_with_suffix_extension, Download, DownloadError, RemoteObjectName};

 use super::{strip_path_prefix, RemoteStorage, StorageMetadata};

+impl RemoteObjectName for PathBuf {
+    fn object_name(&self) -> Option<&str> {
+        self.file_stem().and_then(|n| n.to_str())
+    }
+}
+
 pub struct LocalFs {
    working_directory: PathBuf,
    storage_root: PathBuf,
@@ -101,7 +108,18 @@ impl RemoteStorage for LocalFs {
    }

    async fn list(&self) -> anyhow::Result<Vec<Self::RemoteObjectId>> {
-        get_all_files(&self.storage_root).await
+        get_all_files(&self.storage_root, true).await
+    }
+
+    async fn list_prefixes(
+        &self,
+        prefix: Option<Self::RemoteObjectId>,
+    ) -> anyhow::Result<Vec<Self::RemoteObjectId>> {
+        let path = match prefix {
+            Some(prefix) => Cow::Owned(self.storage_root.join(prefix)),
+            None => Cow::Borrowed(&self.storage_root),
+        };
+        get_all_files(path.as_ref(), false).await
    }

    async fn upload(
@@ -192,15 +210,12 @@ impl RemoteStorage for LocalFs {
        Ok(())
    }

-    async fn download(
-        &self,
-        from: &Self::RemoteObjectId,
-        to: &mut (impl io::AsyncWrite + Unpin + Send + Sync),
-    ) -> anyhow::Result<Option<StorageMetadata>> {
-        let file_path = self.resolve_in_storage(from)?;
-
-        if file_path.exists() && file_path.is_file() {
-            let mut source = io::BufReader::new(
+    async fn download(&self, from: &Self::RemoteObjectId) -> Result<Download, DownloadError> {
+        let file_path = self
+            .resolve_in_storage(from)
+            .map_err(DownloadError::BadInput)?;
+        if file_exists(&file_path).map_err(DownloadError::BadInput)? {
+            let source = io::BufReader::new(
                fs::OpenOptions::new()
                    .read(true)
                    .open(&file_path)
@@ -210,22 +225,20 @@ impl RemoteStorage for LocalFs {
                            "Failed to open source file '{}' to use in the download",
                            file_path.display()
                        )
-                    })?,
+                    })
+                    .map_err(DownloadError::Other)?,
            );
-            io::copy(&mut source, to).await.with_context(|| {
-                format!(
-                    "Failed to download file '{}' from the local storage",
-                    file_path.display()
-                )
-            })?;
-            source.flush().await?;

-            self.read_storage_metadata(&file_path).await
+            let metadata = self
+                .read_storage_metadata(&file_path)
+                .await
+                .map_err(DownloadError::Other)?;
+            Ok(Download {
+                metadata,
+                download_stream: Box::pin(source),
+            })
        } else {
-            bail!(
-                "File '{}' either does not exist or is not a file",
-                file_path.display()
-            )
+            Err(DownloadError::NotFound)
        }
    }

@@ -234,22 +247,19 @@ impl RemoteStorage for LocalFs {
        from: &Self::RemoteObjectId,
        start_inclusive: u64,
        end_exclusive: Option<u64>,
-        to: &mut (impl io::AsyncWrite + Unpin + Send + Sync),
-    ) -> anyhow::Result<Option<StorageMetadata>> {
+    ) -> Result<Download, DownloadError> {
        if let Some(end_exclusive) = end_exclusive {
-            ensure!(
-                end_exclusive > start_inclusive,
-                "Invalid range, start ({}) is bigger then end ({:?})",
-                start_inclusive,
-                end_exclusive
-            );
+            if end_exclusive <= start_inclusive {
+                return Err(DownloadError::Other(anyhow::anyhow!("Invalid range, start ({start_inclusive}) is not less than end_exclusive ({end_exclusive:?})")));
+            };
            if start_inclusive == end_exclusive.saturating_sub(1) {
-                return Ok(None);
+                return Err(DownloadError::Other(anyhow::anyhow!("Invalid range, start ({start_inclusive}) and end_exclusive ({end_exclusive:?}) difference is zero bytes")));
            }
        }
-        let file_path = self.resolve_in_storage(from)?;
-
-        if file_path.exists() && file_path.is_file() {
+        let file_path = self
+            .resolve_in_storage(from)
+            .map_err(DownloadError::BadInput)?;
+        if file_exists(&file_path).map_err(DownloadError::BadInput)? {
            let mut source = io::BufReader::new(
                fs::OpenOptions::new()
                    .read(true)
@@ -260,31 +270,31 @@ impl RemoteStorage for LocalFs {
                            "Failed to open source file '{}' to use in the download",
                            file_path.display()
                        )
-                    })?,
+                    })
+                    .map_err(DownloadError::Other)?,
            );
            source
                .seek(io::SeekFrom::Start(start_inclusive))
                .await
-                .context("Failed to seek to the range start in a local storage file")?;
-            match end_exclusive {
-                Some(end_exclusive) => {
-                    io::copy(&mut source.take(end_exclusive - start_inclusive), to).await
-                }
-                None => io::copy(&mut source, to).await,
-            }
-            .with_context(|| {
-                format!(
-                    "Failed to download file '{}' range from the local storage",
-                    file_path.display()
-                )
-            })?;
+                .context("Failed to seek to the range start in a local storage file")
+                .map_err(DownloadError::Other)?;
+            let metadata = self
+                .read_storage_metadata(&file_path)
+                .await
+                .map_err(DownloadError::Other)?;

-            self.read_storage_metadata(&file_path).await
+            Ok(match end_exclusive {
+                Some(end_exclusive) => Download {
+                    metadata,
+                    download_stream: Box::pin(source.take(end_exclusive - start_inclusive)),
+                },
+                None => Download {
+                    metadata,
+                    download_stream: Box::pin(source),
+                },
+            })
        } else {
-            bail!(
-                "File '{}' either does not exist or is not a file",
-                file_path.display()
-            )
+            Err(DownloadError::NotFound)
        }
    }

@@ -307,6 +317,7 @@ fn storage_metadata_path(original_path: &Path) -> PathBuf {

 fn get_all_files<'a, P>(
    directory_path: P,
+    recursive: bool,
 ) -> Pin<Box<dyn Future<Output = anyhow::Result<Vec<PathBuf>>> + Send + Sync + 'a>>
 where
    P: AsRef<Path> + Send + Sync + 'a,
@@ -323,7 +334,11 @@ where
                    if file_type.is_symlink() {
                        debug!("{:?} us a symlink, skipping", entry_path)
                    } else if file_type.is_dir() {
-                        paths.extend(get_all_files(entry_path).await?.into_iter())
+                        if recursive {
+                            paths.extend(get_all_files(entry_path, true).await?.into_iter())
+                        } else {
+                            paths.push(dir_entry.path())
+                        }
                    } else {
                        paths.push(dir_entry.path());
                    }
@@ -352,6 +367,19 @@ async fn create_target_directory(target_file_path: &Path) -> anyhow::Result<()>
    Ok(())
 }

+fn file_exists(file_path: &Path) -> anyhow::Result<bool> {
+    if file_path.exists() {
+        ensure!(
+            file_path.is_file(),
+            "file path '{}' is not a file",
+            file_path.display()
+        );
+        Ok(true)
+    } else {
+        Ok(false)
+    }
+}
+
 #[cfg(test)]
 mod pure_tests {
    use tempfile::tempdir;
@@ -518,6 +546,31 @@ mod fs_tests {
    use std::{collections::HashMap, io::Write};
    use tempfile::tempdir;

+    async fn read_and_assert_remote_file_contents(
+        storage: &LocalFs,
+        #[allow(clippy::ptr_arg)]
+        // have to use &PathBuf due to `storage.local_path` parameter requirements
+        remote_storage_path: &PathBuf,
+        expected_metadata: Option<&StorageMetadata>,
+    ) -> anyhow::Result<String> {
+        let mut download = storage
+            .download(remote_storage_path)
+            .await
+            .map_err(|e| anyhow::anyhow!("Download failed: {e}"))?;
+        ensure!(
+            download.metadata.as_ref() == expected_metadata,
+            "Unexpected metadata returned for the downloaded file"
+        );
+
+        let mut contents = String::new();
+        download
+            .download_stream
+            .read_to_string(&mut contents)
+            .await
+            .context("Failed to read remote file contents into string")?;
+        Ok(contents)
+    }
+
    #[tokio::test]
    async fn upload_file() -> anyhow::Result<()> {
        let workdir = tempdir()?.path().to_owned();
@@ -568,15 +621,7 @@ mod fs_tests {
        let upload_name = "upload_1";
        let upload_target = upload_dummy_file(&workdir, &storage, upload_name, None).await?;

-        let mut content_bytes = io::BufWriter::new(std::io::Cursor::new(Vec::new()));
-        let metadata = storage.download(&upload_target, &mut content_bytes).await?;
-        assert!(
-            metadata.is_none(),
-            "No metadata should be returned for no metadata upload"
-        );
-
-        content_bytes.flush().await?;
-        let contents = String::from_utf8(content_bytes.into_inner().into_inner())?;
+        let contents = read_and_assert_remote_file_contents(&storage, &upload_target, None).await?;
        assert_eq!(
            dummy_contents(upload_name),
            contents,
@@ -584,13 +629,9 @@ mod fs_tests {
        );

        let non_existing_path = PathBuf::from("somewhere").join("else");
-        match storage.download(&non_existing_path, &mut io::sink()).await {
-            Ok(_) => panic!("Should not allow downloading non-existing storage files"),
-            Err(e) => {
-                let error_string = e.to_string();
-                assert!(error_string.contains("does not exist"));
-                assert!(error_string.contains(&non_existing_path.display().to_string()));
-            }
+        match storage.download(&non_existing_path).await {
+            Err(DownloadError::NotFound) => {} // Should get NotFound for non existing keys
+            other => panic!("Should get a NotFound error when downloading non-existing storage files, but got: {other:?}"),
        }
        Ok(())
    }
@@ -603,58 +644,31 @@ mod fs_tests {
        let upload_name = "upload_1";
        let upload_target = upload_dummy_file(&workdir, &storage, upload_name, None).await?;

-        let mut full_range_bytes = io::BufWriter::new(std::io::Cursor::new(Vec::new()));
-        let metadata = storage
-            .download_byte_range(&upload_target, 0, None, &mut full_range_bytes)
-            .await?;
-        assert!(
-            metadata.is_none(),
-            "No metadata should be returned for no metadata upload"
-        );
-        full_range_bytes.flush().await?;
+        let full_range_download_contents =
+            read_and_assert_remote_file_contents(&storage, &upload_target, None).await?;
        assert_eq!(
            dummy_contents(upload_name),
-            String::from_utf8(full_range_bytes.into_inner().into_inner())?,
+            full_range_download_contents,
            "Download full range should return the whole upload"
        );

-        let mut zero_range_bytes = io::BufWriter::new(std::io::Cursor::new(Vec::new()));
-        let same_byte = 1_000_000_000;
-        let metadata = storage
-            .download_byte_range(
-                &upload_target,
-                same_byte,
-                Some(same_byte + 1), // exclusive end
-                &mut zero_range_bytes,
-            )
-            .await?;
-        assert!(
-            metadata.is_none(),
-            "No metadata should be returned for no metadata upload"
-        );
-        zero_range_bytes.flush().await?;
-        assert!(
-            zero_range_bytes.into_inner().into_inner().is_empty(),
-            "Zero byte range should not download any part of the file"
-        );
-
        let uploaded_bytes = dummy_contents(upload_name).into_bytes();
        let (first_part_local, second_part_local) = uploaded_bytes.split_at(3);

-        let mut first_part_remote = io::BufWriter::new(std::io::Cursor::new(Vec::new()));
-        let metadata = storage
-            .download_byte_range(
-                &upload_target,
-                0,
-                Some(first_part_local.len() as u64),
-                &mut first_part_remote,
-            )
+        let mut first_part_download = storage
+            .download_byte_range(&upload_target, 0, Some(first_part_local.len() as u64))
            .await?;
        assert!(
-            metadata.is_none(),
+            first_part_download.metadata.is_none(),
            "No metadata should be returned for no metadata upload"
        );

+        let mut first_part_remote = io::BufWriter::new(std::io::Cursor::new(Vec::new()));
+        io::copy(
+            &mut first_part_download.download_stream,
+            &mut first_part_remote,
+        )
+        .await?;
        first_part_remote.flush().await?;
        let first_part_remote = first_part_remote.into_inner().into_inner();
        assert_eq!(
@@ -663,20 +677,24 @@ mod fs_tests {
            "First part bytes should be returned when requested"
        );

-        let mut second_part_remote = io::BufWriter::new(std::io::Cursor::new(Vec::new()));
-        let metadata = storage
+        let mut second_part_download = storage
            .download_byte_range(
                &upload_target,
                first_part_local.len() as u64,
                Some((first_part_local.len() + second_part_local.len()) as u64),
-                &mut second_part_remote,
            )
            .await?;
        assert!(
-            metadata.is_none(),
+            second_part_download.metadata.is_none(),
            "No metadata should be returned for no metadata upload"
        );

+        let mut second_part_remote = io::BufWriter::new(std::io::Cursor::new(Vec::new()));
+        io::copy(
+            &mut second_part_download.download_stream,
+            &mut second_part_remote,
+        )
+        .await?;
        second_part_remote.flush().await?;
        let second_part_remote = second_part_remote.into_inner().into_inner();
        assert_eq!(
@@ -696,11 +714,30 @@ mod fs_tests {
        let upload_name = "upload_1";
        let upload_target = upload_dummy_file(&workdir, &storage, upload_name, None).await?;

+        let start = 1_000_000_000;
+        let end = start + 1;
+        match storage
+            .download_byte_range(
+                &upload_target,
+                start,
+                Some(end), // exclusive end
+            )
+            .await
+        {
+            Ok(_) => panic!("Should not allow downloading wrong ranges"),
+            Err(e) => {
+                let error_string = e.to_string();
+                assert!(error_string.contains("zero bytes"));
+                assert!(error_string.contains(&start.to_string()));
+                assert!(error_string.contains(&end.to_string()));
+            }
+        }
+
        let start = 10000;
        let end = 234;
        assert!(start > end, "Should test an incorrect range");
        match storage
-            .download_byte_range(&upload_target, start, Some(end), &mut io::sink())
+            .download_byte_range(&upload_target, start, Some(end))
            .await
        {
            Ok(_) => panic!("Should not allow downloading wrong ranges"),
@@ -712,18 +749,6 @@ mod fs_tests {
            }
        }

-        let non_existing_path = PathBuf::from("somewhere").join("else");
-        match storage
-            .download_byte_range(&non_existing_path, 1, Some(3), &mut io::sink())
-            .await
-        {
-            Ok(_) => panic!("Should not allow downloading non-existing storage file ranges"),
-            Err(e) => {
-                let error_string = e.to_string();
-                assert!(error_string.contains("does not exist"));
-                assert!(error_string.contains(&non_existing_path.display().to_string()));
-            }
-        }
        Ok(())
    }

@@ -762,35 +787,26 @@ mod fs_tests {
        let upload_target =
            upload_dummy_file(&workdir, &storage, upload_name, Some(metadata.clone())).await?;

-        let mut content_bytes = io::BufWriter::new(std::io::Cursor::new(Vec::new()));
-        let full_download_metadata = storage.download(&upload_target, &mut content_bytes).await?;
-
-        content_bytes.flush().await?;
-        let contents = String::from_utf8(content_bytes.into_inner().into_inner())?;
+        let full_range_download_contents =
+            read_and_assert_remote_file_contents(&storage, &upload_target, Some(&metadata)).await?;
        assert_eq!(
            dummy_contents(upload_name),
-            contents,
+            full_range_download_contents,
            "We should upload and download the same contents"
        );

-        assert_eq!(
-            full_download_metadata.as_ref(),
-            Some(&metadata),
-            "We should get the same metadata back for full download"
-        );
-
        let uploaded_bytes = dummy_contents(upload_name).into_bytes();
        let (first_part_local, _) = uploaded_bytes.split_at(3);

-        let mut first_part_remote = io::BufWriter::new(std::io::Cursor::new(Vec::new()));
-        let partial_download_metadata = storage
-            .download_byte_range(
-                &upload_target,
-                0,
-                Some(first_part_local.len() as u64),
-                &mut first_part_remote,
-            )
+        let mut partial_download_with_metadata = storage
+            .download_byte_range(&upload_target, 0, Some(first_part_local.len() as u64))
            .await?;
+        let mut first_part_remote = io::BufWriter::new(std::io::Cursor::new(Vec::new()));
+        io::copy(
+            &mut partial_download_with_metadata.download_stream,
+            &mut first_part_remote,
+        )
+        .await?;
        first_part_remote.flush().await?;
        let first_part_remote = first_part_remote.into_inner().into_inner();
        assert_eq!(
@@ -800,8 +816,8 @@ mod fs_tests {
        );

        assert_eq!(
-            partial_download_metadata.as_ref(),
-            Some(&metadata),
+            partial_download_with_metadata.metadata,
+            Some(metadata),
            "We should get the same metadata back for partial download"
        );

@@ -843,7 +859,7 @@ mod fs_tests {
    }

    fn dummy_contents(name: &str) -> String {
-        format!("contents for {}", name)
+        format!("contents for {name}")
    }

    async fn list_files_sorted(storage: &LocalFs) -> anyhow::Result<Vec<PathBuf>> {
--- a/libs/remote_storage/src/s3_bucket.rs
+++ b/libs/remote_storage/src/s3_bucket.rs
@@ -9,17 +9,19 @@ use std::path::{Path, PathBuf};
 use anyhow::Context;
 use rusoto_core::{
    credential::{InstanceMetadataProvider, StaticProvider},
-    HttpClient, Region,
+    HttpClient, Region, RusotoError,
 };
 use rusoto_s3::{
-    DeleteObjectRequest, GetObjectRequest, ListObjectsV2Request, PutObjectRequest, S3Client,
-    StreamingBody, S3,
+    DeleteObjectRequest, GetObjectError, GetObjectRequest, ListObjectsV2Request, PutObjectRequest,
+    S3Client, StreamingBody, S3,
 };
 use tokio::{io, sync::Semaphore};
 use tokio_util::io::ReaderStream;
 use tracing::debug;

-use crate::{strip_path_prefix, RemoteStorage, S3Config};
+use crate::{
+    strip_path_prefix, Download, DownloadError, RemoteObjectName, RemoteStorage, S3Config,
+};

 use super::StorageMetadata;

@@ -117,6 +119,25 @@ impl S3ObjectKey {
    }
 }

+impl RemoteObjectName for S3ObjectKey {
+    /// Turn a/b/c or a/b/c/ into c
+    fn object_name(&self) -> Option<&str> {
+        // corner case, char::to_string is not const, thats why this is more verbose than it needs to be
+        // see https://github.com/rust-lang/rust/issues/88674
+        if self.0.len() == 1 && self.0.chars().next().unwrap() == S3_PREFIX_SEPARATOR {
+            return None;
+        }
+
+        if self.0.ends_with(S3_PREFIX_SEPARATOR) {
+            self.0.rsplit(S3_PREFIX_SEPARATOR).nth(1)
+        } else {
+            self.0
+                .rsplit_once(S3_PREFIX_SEPARATOR)
+                .map(|(_, last)| last)
+        }
+    }
+}
+
 /// AWS S3 storage.
 pub struct S3Bucket {
    workdir: PathBuf,
@@ -187,6 +208,39 @@ impl S3Bucket {
            concurrency_limiter: Semaphore::new(aws_config.concurrency_limit.get()),
        })
    }
+
+    async fn download_object(&self, request: GetObjectRequest) -> Result<Download, DownloadError> {
+        let _guard = self
+            .concurrency_limiter
+            .acquire()
+            .await
+            .context("Concurrency limiter semaphore got closed during S3 download")
+            .map_err(DownloadError::Other)?;
+
+        metrics::inc_get_object();
+
+        match self.client.get_object(request).await {
+            Ok(object_output) => match object_output.body {
+                None => {
+                    metrics::inc_get_object_fail();
+                    Err(DownloadError::Other(anyhow::anyhow!(
+                        "Got no body for the S3 object given"
+                    )))
+                }
+                Some(body) => Ok(Download {
+                    metadata: object_output.metadata.map(StorageMetadata),
+                    download_stream: Box::pin(io::BufReader::new(body.into_async_read())),
+                }),
+            },
+            Err(RusotoError::Service(GetObjectError::NoSuchKey(_))) => Err(DownloadError::NotFound),
+            Err(e) => {
+                metrics::inc_get_object_fail();
+                Err(DownloadError::Other(anyhow::anyhow!(
+                    "Failed to download S3 object: {e}"
+                )))
+            }
+        }
+    }
 }

 #[async_trait::async_trait]
@@ -250,6 +304,77 @@ impl RemoteStorage for S3Bucket {
        Ok(document_keys)
    }

+    /// Note: it wont include empty "directories"
+    async fn list_prefixes(
+        &self,
+        prefix: Option<Self::RemoteObjectId>,
+    ) -> anyhow::Result<Vec<Self::RemoteObjectId>> {
+        let list_prefix = match prefix {
+            Some(prefix) => {
+                let mut prefix_in_bucket = self.prefix_in_bucket.clone().unwrap_or_default();
+                // if there is no trailing / in default prefix and
+                // supplied prefix does not start with "/" insert it
+                if !(prefix_in_bucket.ends_with(S3_PREFIX_SEPARATOR)
+                    || prefix.0.starts_with(S3_PREFIX_SEPARATOR))
+                {
+                    prefix_in_bucket.push(S3_PREFIX_SEPARATOR);
+                }
+
+                prefix_in_bucket.push_str(&prefix.0);
+                // required to end with a separator
+                // otherwise request will return only the entry of a prefix
+                if !prefix_in_bucket.ends_with(S3_PREFIX_SEPARATOR) {
+                    prefix_in_bucket.push(S3_PREFIX_SEPARATOR);
+                }
+                Some(prefix_in_bucket)
+            }
+            None => self.prefix_in_bucket.clone(),
+        };
+
+        let mut document_keys = Vec::new();
+
+        let mut continuation_token = None;
+        loop {
+            let _guard = self
+                .concurrency_limiter
+                .acquire()
+                .await
+                .context("Concurrency limiter semaphore got closed during S3 list")?;
+
+            metrics::inc_list_objects();
+
+            let fetch_response = self
+                .client
+                .list_objects_v2(ListObjectsV2Request {
+                    bucket: self.bucket_name.clone(),
+                    prefix: list_prefix.clone(),
+                    continuation_token,
+                    delimiter: Some(S3_PREFIX_SEPARATOR.to_string()),
+                    ..ListObjectsV2Request::default()
+                })
+                .await
+                .map_err(|e| {
+                    metrics::inc_list_objects_fail();
+                    e
+                })?;
+
+            document_keys.extend(
+                fetch_response
+                    .common_prefixes
+                    .unwrap_or_default()
+                    .into_iter()
+                    .filter_map(|o| Some(S3ObjectKey(o.prefix?))),
+            );
+
+            match fetch_response.continuation_token {
+                Some(new_token) => continuation_token = Some(new_token),
+                None => break,
+            }
+        }
+
+        Ok(document_keys)
+    }
+
    async fn upload(
        &self,
        from: impl io::AsyncRead + Unpin + Send + Sync + 'static,
@@ -283,38 +408,13 @@ impl RemoteStorage for S3Bucket {
        Ok(())
    }

-    async fn download(
-        &self,
-        from: &Self::RemoteObjectId,
-        to: &mut (impl io::AsyncWrite + Unpin + Send + Sync),
-    ) -> anyhow::Result<Option<StorageMetadata>> {
-        let _guard = self
-            .concurrency_limiter
-            .acquire()
-            .await
-            .context("Concurrency limiter semaphore got closed during S3 download")?;
-
-        metrics::inc_get_object();
-
-        let object_output = self
-            .client
-            .get_object(GetObjectRequest {
-                bucket: self.bucket_name.clone(),
-                key: from.key().to_owned(),
-                ..GetObjectRequest::default()
-            })
-            .await
-            .map_err(|e| {
-                metrics::inc_get_object_fail();
-                e
-            })?;
-
-        if let Some(body) = object_output.body {
-            let mut from = io::BufReader::new(body.into_async_read());
-            io::copy(&mut from, to).await?;
-        }
-
-        Ok(object_output.metadata.map(StorageMetadata))
+    async fn download(&self, from: &Self::RemoteObjectId) -> Result<Download, DownloadError> {
+        self.download_object(GetObjectRequest {
+            bucket: self.bucket_name.clone(),
+            key: from.key().to_owned(),
+            ..GetObjectRequest::default()
+        })
+        .await
    }

    async fn download_byte_range(
@@ -322,8 +422,7 @@ impl RemoteStorage for S3Bucket {
        from: &Self::RemoteObjectId,
        start_inclusive: u64,
        end_exclusive: Option<u64>,
-        to: &mut (impl io::AsyncWrite + Unpin + Send + Sync),
-    ) -> anyhow::Result<Option<StorageMetadata>> {
+    ) -> Result<Download, DownloadError> {
        // S3 accepts ranges as https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35
        // and needs both ends to be exclusive
        let end_inclusive = end_exclusive.map(|end| end.saturating_sub(1));
@@ -331,34 +430,14 @@ impl RemoteStorage for S3Bucket {
            Some(end_inclusive) => format!("bytes={}-{}", start_inclusive, end_inclusive),
            None => format!("bytes={}-", start_inclusive),
        });
-        let _guard = self
-            .concurrency_limiter
-            .acquire()
-            .await
-            .context("Concurrency limiter semaphore got closed during S3 range download")?;

-        metrics::inc_get_object();
-
-        let object_output = self
-            .client
-            .get_object(GetObjectRequest {
-                bucket: self.bucket_name.clone(),
-                key: from.key().to_owned(),
-                range,
-                ..GetObjectRequest::default()
-            })
-            .await
-            .map_err(|e| {
-                metrics::inc_get_object_fail();
-                e
-            })?;
-
-        if let Some(body) = object_output.body {
-            let mut from = io::BufReader::new(body.into_async_read());
-            io::copy(&mut from, to).await?;
-        }
-
-        Ok(object_output.metadata.map(StorageMetadata))
+        self.download_object(GetObjectRequest {
+            bucket: self.bucket_name.clone(),
+            key: from.key().to_owned(),
+            range,
+            ..GetObjectRequest::default()
+        })
+        .await
    }

    async fn delete(&self, path: &Self::RemoteObjectId) -> anyhow::Result<()> {
@@ -391,6 +470,25 @@ mod tests {

    use super::*;

+    #[test]
+    fn object_name() {
+        let k = S3ObjectKey("a/b/c".to_owned());
+        assert_eq!(k.object_name(), Some("c"));
+
+        let k = S3ObjectKey("a/b/c/".to_owned());
+        assert_eq!(k.object_name(), Some("c"));
+
+        let k = S3ObjectKey("a/".to_owned());
+        assert_eq!(k.object_name(), Some("a"));
+
+        // XXX is it impossible to have an empty key?
+        let k = S3ObjectKey("".to_owned());
+        assert_eq!(k.object_name(), None);
+
+        let k = S3ObjectKey("/".to_owned());
+        assert_eq!(k.object_name(), None);
+    }
+
    #[test]
    fn download_destination() -> anyhow::Result<()> {
        let workdir = tempdir()?.path().to_owned();
--- a/libs/utils/src/postgres_backend.rs
+++ b/libs/utils/src/postgres_backend.rs
@@ -13,13 +13,10 @@ use std::fmt;
 use std::io::{self, Write};
 use std::net::{Shutdown, SocketAddr, TcpStream};
 use std::str::FromStr;
-use std::sync::atomic::{AtomicBool, Ordering};
 use std::sync::Arc;
 use std::time::Duration;
 use tracing::*;

-static PGBACKEND_SHUTDOWN_REQUESTED: AtomicBool = AtomicBool::new(false);
-
 pub trait Handler {
    /// Handle single query.
    /// postgres_backend will issue ReadyForQuery after calling this (this
@@ -45,6 +42,10 @@ pub trait Handler {
    fn check_auth_jwt(&mut self, _pgb: &mut PostgresBackend, _jwt_response: &[u8]) -> Result<()> {
        bail!("JWT auth failed")
    }
+
+    fn is_shutdown_requested(&self) -> bool {
+        false
+    }
 }

 /// PostgresBackend protocol state.
@@ -274,7 +275,7 @@ impl PostgresBackend {

        let mut unnamed_query_string = Bytes::new();

-        while !PGBACKEND_SHUTDOWN_REQUESTED.load(Ordering::Relaxed) {
+        while !handler.is_shutdown_requested() {
            match self.read_message() {
                Ok(message) => {
                    if let Some(msg) = message {
@@ -493,8 +494,3 @@ impl PostgresBackend {
        Ok(ProcessMsgResult::Continue)
    }
 }
-
-// Set the flag to inform connections to cancel
-pub fn set_pgbackend_shutdown_requested() {
-    PGBACKEND_SHUTDOWN_REQUESTED.swap(true, Ordering::Relaxed);
-}
--- a/libs/utils/src/pq_proto.rs
+++ b/libs/utils/src/pq_proto.rs
@@ -269,7 +269,14 @@ impl FeStartupPacket {
                            .next()
                            .context("expected even number of params in StartupMessage")?;
                        if name == "options" {
-                            // deprecated way of passing params as cmd line args
+                            // parsing options arguments "...&options=<var0>%3D<val0>+<var1>=<var1>..."
+                            // '%3D' is '=' and '+' is ' '
+
+                            // Note: we allow users that don't have SNI capabilities,
+                            // to pass a special keyword argument 'project'
+                            // to be used to determine the cluster name by the proxy.
+
+                            //TODO: write unit test for this and refactor in its own function.
                            for cmdopt in value.split(' ') {
                                let nameval: Vec<&str> = cmdopt.split('=').collect();
                                if nameval.len() == 2 {
@@ -919,10 +926,10 @@ impl<'a> BeMessage<'a> {
    }
 }

-// Zenith extension of postgres replication protocol
-// See ZENITH_STATUS_UPDATE_TAG_BYTE
+// Neon extension of postgres replication protocol
+// See NEON_STATUS_UPDATE_TAG_BYTE
 #[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
-pub struct ZenithFeedback {
+pub struct ReplicationFeedback {
    // Last known size of the timeline. Used to enforce timeline size limit.
    pub current_timeline_size: u64,
    // Parts of StandbyStatusUpdate we resend to compute via safekeeper
@@ -932,13 +939,13 @@ pub struct ZenithFeedback {
    pub ps_replytime: SystemTime,
 }

-// NOTE: Do not forget to increment this number when adding new fields to ZenithFeedback.
+// NOTE: Do not forget to increment this number when adding new fields to ReplicationFeedback.
 // Do not remove previously available fields because this might be backwards incompatible.
-pub const ZENITH_FEEDBACK_FIELDS_NUMBER: u8 = 5;
+pub const REPLICATION_FEEDBACK_FIELDS_NUMBER: u8 = 5;

-impl ZenithFeedback {
-    pub fn empty() -> ZenithFeedback {
-        ZenithFeedback {
+impl ReplicationFeedback {
+    pub fn empty() -> ReplicationFeedback {
+        ReplicationFeedback {
            current_timeline_size: 0,
            ps_writelsn: 0,
            ps_applylsn: 0,
@@ -947,7 +954,7 @@ impl ZenithFeedback {
        }
    }

-    // Serialize ZenithFeedback using custom format
+    // Serialize ReplicationFeedback using custom format
    // to support protocol extensibility.
    //
    // Following layout is used:
@@ -958,7 +965,7 @@ impl ZenithFeedback {
    // uint32 - value length in bytes
    // value itself
    pub fn serialize(&self, buf: &mut BytesMut) -> Result<()> {
-        buf.put_u8(ZENITH_FEEDBACK_FIELDS_NUMBER); // # of keys
+        buf.put_u8(REPLICATION_FEEDBACK_FIELDS_NUMBER); // # of keys
        write_cstr(&Bytes::from("current_timeline_size"), buf)?;
        buf.put_i32(8);
        buf.put_u64(self.current_timeline_size);
@@ -985,9 +992,9 @@ impl ZenithFeedback {
        Ok(())
    }

-    // Deserialize ZenithFeedback message
-    pub fn parse(mut buf: Bytes) -> ZenithFeedback {
-        let mut zf = ZenithFeedback::empty();
+    // Deserialize ReplicationFeedback message
+    pub fn parse(mut buf: Bytes) -> ReplicationFeedback {
+        let mut zf = ReplicationFeedback::empty();
        let nfields = buf.get_u8();
        let mut i = 0;
        while i < nfields {
@@ -1028,14 +1035,14 @@ impl ZenithFeedback {
                _ => {
                    let len = buf.get_i32();
                    warn!(
-                        "ZenithFeedback parse. unknown key {} of len {}. Skip it.",
+                        "ReplicationFeedback parse. unknown key {} of len {}. Skip it.",
                        key, len
                    );
                    buf.advance(len as usize);
                }
            }
        }
-        trace!("ZenithFeedback parsed is {:?}", zf);
+        trace!("ReplicationFeedback parsed is {:?}", zf);
        zf
    }
 }
@@ -1045,8 +1052,8 @@ mod tests {
    use super::*;

    #[test]
-    fn test_zenithfeedback_serialization() {
-        let mut zf = ZenithFeedback::empty();
+    fn test_replication_feedback_serialization() {
+        let mut zf = ReplicationFeedback::empty();
        // Fill zf with some values
        zf.current_timeline_size = 12345678;
        // Set rounded time to be able to compare it with deserialized value,
@@ -1055,13 +1062,13 @@ mod tests {
        let mut data = BytesMut::new();
        zf.serialize(&mut data).unwrap();

-        let zf_parsed = ZenithFeedback::parse(data.freeze());
+        let zf_parsed = ReplicationFeedback::parse(data.freeze());
        assert_eq!(zf, zf_parsed);
    }

    #[test]
-    fn test_zenithfeedback_unknown_key() {
-        let mut zf = ZenithFeedback::empty();
+    fn test_replication_feedback_unknown_key() {
+        let mut zf = ReplicationFeedback::empty();
        // Fill zf with some values
        zf.current_timeline_size = 12345678;
        // Set rounded time to be able to compare it with deserialized value,
@@ -1072,7 +1079,7 @@ mod tests {

        // Add an extra field to the buffer and adjust number of keys
        if let Some(first) = data.first_mut() {
-            *first = ZENITH_FEEDBACK_FIELDS_NUMBER + 1;
+            *first = REPLICATION_FEEDBACK_FIELDS_NUMBER + 1;
        }

        write_cstr(&Bytes::from("new_field_one"), &mut data).unwrap();
@@ -1080,7 +1087,7 @@ mod tests {
        data.put_u64(42);

        // Parse serialized data and check that new field is not parsed
-        let zf_parsed = ZenithFeedback::parse(data.freeze());
+        let zf_parsed = ReplicationFeedback::parse(data.freeze());
        assert_eq!(zf, zf_parsed);
    }

--- a/neon_local/src/main.rs
+++ b/neon_local/src/main.rs
@@ -14,7 +14,7 @@ use safekeeper::defaults::{
    DEFAULT_PG_LISTEN_PORT as DEFAULT_SAFEKEEPER_PG_PORT,
 };
 use std::collections::{BTreeSet, HashMap};
-use std::path::Path;
+use std::path::{Path, PathBuf};
 use std::process::exit;
 use std::str::FromStr;
 use utils::{
@@ -159,6 +159,20 @@ fn main() -> Result<()> {
                .about("Create a new blank timeline")
                .arg(tenant_id_arg.clone())
                .arg(branch_name_arg.clone()))
+            .subcommand(App::new("import")
+                .about("Import timeline from basebackup directory")
+                .arg(tenant_id_arg.clone())
+                .arg(timeline_id_arg.clone())
+                .arg(Arg::new("node-name").long("node-name").takes_value(true)
+                    .help("Name to assign to the imported timeline"))
+                .arg(Arg::new("base-tarfile").long("base-tarfile").takes_value(true)
+                    .help("Basebackup tarfile to import"))
+                .arg(Arg::new("base-lsn").long("base-lsn").takes_value(true)
+                    .help("Lsn the basebackup starts at"))
+                .arg(Arg::new("wal-tarfile").long("wal-tarfile").takes_value(true)
+                    .help("Wal to add after base"))
+                .arg(Arg::new("end-lsn").long("end-lsn").takes_value(true)
+                    .help("Lsn the basebackup ends at")))
        ).subcommand(
            App::new("tenant")
            .setting(AppSettings::ArgRequiredElseHelp)
@@ -523,7 +537,13 @@ fn handle_tenant(tenant_match: &ArgMatches, env: &mut local_env::LocalEnv) -> an
    match tenant_match.subcommand() {
        Some(("list", _)) => {
            for t in pageserver.tenant_list()? {
-                println!("{} {}", t.id, t.state);
+                println!(
+                    "{} {}",
+                    t.id,
+                    t.state
+                        .map(|s| s.to_string())
+                        .unwrap_or_else(|| String::from(""))
+                );
            }
        }
        Some(("create", create_match)) => {
@@ -613,6 +633,43 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
                timeline.timeline_id, last_record_lsn, tenant_id,
            );
        }
+        Some(("import", import_match)) => {
+            let tenant_id = get_tenant_id(import_match, env)?;
+            let timeline_id = parse_timeline_id(import_match)?.expect("No timeline id provided");
+            let name = import_match
+                .value_of("node-name")
+                .ok_or_else(|| anyhow!("No node name provided"))?;
+
+            // Parse base inputs
+            let base_tarfile = import_match
+                .value_of("base-tarfile")
+                .map(|s| PathBuf::from_str(s).unwrap())
+                .ok_or_else(|| anyhow!("No base-tarfile provided"))?;
+            let base_lsn = Lsn::from_str(
+                import_match
+                    .value_of("base-lsn")
+                    .ok_or_else(|| anyhow!("No base-lsn provided"))?,
+            )?;
+            let base = (base_lsn, base_tarfile);
+
+            // Parse pg_wal inputs
+            let wal_tarfile = import_match
+                .value_of("wal-tarfile")
+                .map(|s| PathBuf::from_str(s).unwrap());
+            let end_lsn = import_match
+                .value_of("end-lsn")
+                .map(|s| Lsn::from_str(s).unwrap());
+            // TODO validate both or none are provided
+            let pg_wal = end_lsn.zip(wal_tarfile);
+
+            let mut cplane = ComputeControlPlane::load(env.clone())?;
+            println!("Importing timeline into pageserver ...");
+            pageserver.timeline_import(tenant_id, timeline_id, base, pg_wal)?;
+            println!("Creating node for imported timeline ...");
+            env.register_branch_mapping(name.to_string(), tenant_id, timeline_id)?;
+            cplane.new_node(tenant_id, name, timeline_id, None, None)?;
+            println!("Done");
+        }
        Some(("branch", branch_match)) => {
            let tenant_id = get_tenant_id(branch_match, env)?;
            let new_branch_name = branch_match
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -61,6 +61,7 @@ utils = { path = "../libs/utils" }
 remote_storage = { path = "../libs/remote_storage" }
 workspace_hack = { version = "0.1", path = "../workspace_hack" }
 close_fds = "0.3.2"
+walkdir = "2.3.2"

 [dev-dependencies]
 hex-literal = "0.3"
--- a/pageserver/README.md
+++ b/pageserver/README.md
@@ -69,7 +69,7 @@ Repository

 The repository stores all the page versions, or WAL records needed to
 reconstruct them. Each tenant has a separate Repository, which is
-stored in the .zenith/tenants/<tenantid> directory.
+stored in the .neon/tenants/<tenantid> directory.

 Repository is an abstract trait, defined in `repository.rs`. It is
 implemented by the LayeredRepository object in
@@ -92,7 +92,7 @@ Each repository also has a WAL redo manager associated with it, see
 records, whenever we need to reconstruct a page version from WAL to
 satisfy a GetPage@LSN request, or to avoid accumulating too much WAL
 for a page. The WAL redo manager uses a Postgres process running in
-special zenith wal-redo mode to do the actual WAL redo, and
+special Neon wal-redo mode to do the actual WAL redo, and
 communicates with the process using a pipe.


--- a/pageserver/src/basebackup.rs
+++ b/pageserver/src/basebackup.rs
@@ -13,6 +13,7 @@
 use anyhow::{anyhow, bail, ensure, Context, Result};
 use bytes::{BufMut, BytesMut};
 use fail::fail_point;
+use itertools::Itertools;
 use std::fmt::Write as FmtWrite;
 use std::io;
 use std::io::Write;
@@ -21,7 +22,7 @@ use std::time::SystemTime;
 use tar::{Builder, EntryType, Header};
 use tracing::*;

-use crate::reltag::SlruKind;
+use crate::reltag::{RelTag, SlruKind};
 use crate::repository::Timeline;
 use crate::DatadirTimelineImpl;
 use postgres_ffi::xlog_utils::*;
@@ -39,11 +40,12 @@ where
    timeline: &'a Arc<DatadirTimelineImpl>,
    pub lsn: Lsn,
    prev_record_lsn: Lsn,
-
+    full_backup: bool,
    finished: bool,
 }

-// Create basebackup with non-rel data in it. Omit relational data.
+// Create basebackup with non-rel data in it.
+// Only include relational data if 'full_backup' is true.
 //
 // Currently we use empty lsn in two cases:
 //  * During the basebackup right after timeline creation
@@ -58,6 +60,8 @@ where
        write: W,
        timeline: &'a Arc<DatadirTimelineImpl>,
        req_lsn: Option<Lsn>,
+        prev_lsn: Option<Lsn>,
+        full_backup: bool,
    ) -> Result<Basebackup<'a, W>> {
        // Compute postgres doesn't have any previous WAL files, but the first
        // record that it's going to write needs to include the LSN of the
@@ -93,21 +97,34 @@ where
            (end_of_timeline.prev, end_of_timeline.last)
        };

+        // Consolidate the derived and the provided prev_lsn values
+        let prev_lsn = if let Some(provided_prev_lsn) = prev_lsn {
+            if backup_prev != Lsn(0) {
+                ensure!(backup_prev == provided_prev_lsn)
+            }
+            provided_prev_lsn
+        } else {
+            backup_prev
+        };
+
        info!(
-            "taking basebackup lsn={}, prev_lsn={}",
-            backup_lsn, backup_prev
+            "taking basebackup lsn={}, prev_lsn={} (full_backup={})",
+            backup_lsn, prev_lsn, full_backup
        );

        Ok(Basebackup {
            ar: Builder::new(AbortableWrite::new(write)),
            timeline,
            lsn: backup_lsn,
-            prev_record_lsn: backup_prev,
+            prev_record_lsn: prev_lsn,
+            full_backup,
            finished: false,
        })
    }

    pub fn send_tarball(mut self) -> anyhow::Result<()> {
+        // TODO include checksum
+
        // Create pgdata subdirs structure
        for dir in pg_constants::PGDATA_SUBDIRS.iter() {
            let header = new_tar_header_dir(*dir)?;
@@ -140,6 +157,13 @@ where
        // Create tablespace directories
        for ((spcnode, dbnode), has_relmap_file) in self.timeline.list_dbdirs(self.lsn)? {
            self.add_dbdir(spcnode, dbnode, has_relmap_file)?;
+
+            // Gather and send relational files in each database if full backup is requested.
+            if self.full_backup {
+                for rel in self.timeline.list_rels(spcnode, dbnode, self.lsn)? {
+                    self.add_rel(rel)?;
+                }
+            }
        }
        for xid in self.timeline.list_twophase_files(self.lsn)? {
            self.add_twophase_file(xid)?;
@@ -157,6 +181,38 @@ where
        Ok(())
    }

+    fn add_rel(&mut self, tag: RelTag) -> anyhow::Result<()> {
+        let nblocks = self.timeline.get_rel_size(tag, self.lsn)?;
+
+        // Function that adds relation segment data to archive
+        let mut add_file = |segment_index, data: &Vec<u8>| -> anyhow::Result<()> {
+            let file_name = tag.to_segfile_name(segment_index as u32);
+            let header = new_tar_header(&file_name, data.len() as u64)?;
+            self.ar.append(&header, data.as_slice())?;
+            Ok(())
+        };
+
+        // If the relation is empty, create an empty file
+        if nblocks == 0 {
+            add_file(0, &vec![])?;
+            return Ok(());
+        }
+
+        // Add a file for each chunk of blocks (aka segment)
+        let chunks = (0..nblocks).chunks(pg_constants::RELSEG_SIZE as usize);
+        for (seg, blocks) in chunks.into_iter().enumerate() {
+            let mut segment_data: Vec<u8> = vec![];
+            for blknum in blocks {
+                let img = self.timeline.get_rel_page_at_lsn(tag, blknum, self.lsn)?;
+                segment_data.extend_from_slice(&img[..]);
+            }
+
+            add_file(seg, &segment_data)?;
+        }
+
+        Ok(())
+    }
+
    //
    // Generate SLRU segment files from repository.
    //
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -104,7 +104,7 @@ fn main() -> anyhow::Result<()> {
        return Ok(());
    }

-    let workdir = Path::new(arg_matches.value_of("workdir").unwrap_or(".zenith"));
+    let workdir = Path::new(arg_matches.value_of("workdir").unwrap_or(".neon"));
    let workdir = workdir
        .canonicalize()
        .with_context(|| format!("Error opening workdir '{}'", workdir.display()))?;
@@ -263,6 +263,8 @@ fn start_pageserver(conf: &'static PageServerConf, daemonize: bool) -> Result<()
    // start profiler (if enabled)
    let profiler_guard = profiling::init_profiler(conf);

+    pageserver::tenant_tasks::init_tenant_task_pool()?;
+
    // initialize authentication for incoming connections
    let auth = match &conf.auth_type {
        AuthType::Trust | AuthType::MD5 => None,
--- a/pageserver/src/http/openapi_spec.yml
+++ b/pageserver/src/http/openapi_spec.yml
@@ -22,6 +22,49 @@ paths:
                properties:
                  id:
                    type: integer
+
+  /v1/tenant/{tenant_id}:
+    parameters:
+      - name: tenant_id
+        in: path
+        required: true
+        schema:
+          type: string
+          format: hex
+    get:
+      description: Get tenant status
+      responses:
+        "200":
+          description: Currently returns the flag whether the tenant has inprogress timeline downloads
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/TenantInfo"
+        "400":
+          description: Error when no tenant id found in path or no timeline id
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        "401":
+          description: Unauthorized Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/UnauthorizedError"
+        "403":
+          description: Forbidden Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ForbiddenError"
+        "500":
+          description: Generic operation error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+
  /v1/tenant/{tenant_id}/timeline:
    parameters:
      - name: tenant_id
@@ -70,6 +113,7 @@ paths:
            application/json:
              schema:
                $ref: "#/components/schemas/Error"
+
  /v1/tenant/{tenant_id}/timeline/{timeline_id}:
    parameters:
      - name: tenant_id
@@ -84,13 +128,14 @@ paths:
        schema:
          type: string
          format: hex
-      - name: include-non-incremental-logical-size
-        in: query
-        schema:
-          type: string
-          description: Controls calculation of current_logical_size_non_incremental
    get:
      description: Get info about the timeline
+      parameters:
+        - name: include-non-incremental-logical-size
+          in: query
+          schema:
+            type: string
+          description: Controls calculation of current_logical_size_non_incremental
      responses:
        "200":
          description: TimelineInfo
@@ -122,6 +167,35 @@ paths:
            application/json:
              schema:
                $ref: "#/components/schemas/Error"
+    delete:
+      description: "Attempts to delete specified timeline. On 500 errors should be retried"
+      responses:
+        "200":
+          description: Ok
+        "400":
+          description: Error when no tenant id found in path or no timeline id
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        "401":
+          description: Unauthorized Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/UnauthorizedError"
+        "403":
+          description: Forbidden Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ForbiddenError"
+        "500":
+          description: Generic operation error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"

  /v1/tenant/{tenant_id}/timeline/{timeline_id}/wal_receiver:
    parameters:
@@ -171,7 +245,7 @@ paths:
              schema:
                $ref: "#/components/schemas/Error"

-  /v1/tenant/{tenant_id}/timeline/{timeline_id}/attach:
+  /v1/tenant/{tenant_id}/attach:
    parameters:
      - name: tenant_id
        in: path
@@ -179,19 +253,13 @@ paths:
        schema:
          type: string
          format: hex
-      - name: timeline_id
-        in: path
-        required: true
-        schema:
-          type: string
-          format: hex
    post:
-      description: Attach remote timeline
+      description: Schedules attach operation to happen in the background for given tenant
      responses:
-        "200":
-          description: Timeline attaching scheduled
+        "202":
+          description: Tenant attaching scheduled
        "400":
-          description: Error when no tenant id found in path or no timeline id
+          description: Error when no tenant id found in path parameters
          content:
            application/json:
              schema:
@@ -215,7 +283,7 @@ paths:
              schema:
                $ref: "#/components/schemas/NotFoundError"
        "409":
-          description: Timeline download is already in progress
+          description: Tenant download is already in progress
          content:
            application/json:
              schema:
@@ -227,7 +295,6 @@ paths:
              schema:
                $ref: "#/components/schemas/Error"

-
  /v1/tenant/{tenant_id}/timeline/{timeline_id}/detach:
    parameters:
      - name: tenant_id
@@ -243,10 +310,11 @@ paths:
          type: string
          format: hex
    post:
-      description: Detach local timeline
+      description: Deprecated, use DELETE /v1/tenant/{tenant_id}/timeline/{timeline_id} instead
+      deprecated: true
      responses:
        "200":
-          description: Timeline detached
+          description: Ok
        "400":
          description: Error when no tenant id found in path or no timeline id
          content:
@@ -272,6 +340,43 @@ paths:
              schema:
                $ref: "#/components/schemas/Error"

+  /v1/tenant/{tenant_id}/detach:
+    parameters:
+      - name: tenant_id
+        in: path
+        required: true
+        schema:
+          type: string
+          format: hex
+    post:
+      description: Detach local tenant
+      responses:
+        "200":
+          description: Tenant detached
+        "400":
+          description: Error when no tenant id found in path parameters
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        "401":
+          description: Unauthorized Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/UnauthorizedError"
+        "403":
+          description: Forbidden Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ForbiddenError"
+        "500":
+          description: Generic operation error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"

  /v1/tenant/{tenant_id}/timeline/:
    parameters:
@@ -467,12 +572,13 @@ components:
      type: object
      required:
        - id
-        - state
      properties:
        id:
          type: string
        state:
          type: string
+        has_in_progress_downloads:
+          type: boolean
    TenantCreateInfo:
      type: object
      properties:
@@ -567,6 +673,7 @@ components:
          type: integer
        current_logical_size_non_incremental:
          type: integer
+
    WalReceiverEntry:
      type: object
      required:
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -14,6 +14,7 @@ use crate::repository::Repository;
 use crate::storage_sync;
 use crate::storage_sync::index::{RemoteIndex, RemoteTimeline};
 use crate::tenant_config::TenantConfOpt;
+use crate::tenant_mgr::TenantInfo;
 use crate::timelines::{LocalTimelineInfo, RemoteTimelineInfo, TimelineInfo};
 use crate::{config::PageServerConf, tenant_mgr, timelines};
 use utils::{
@@ -209,9 +210,9 @@ async fn timeline_detail_handler(request: Request<Body>) -> Result<Response<Body
    .await;

    if local_timeline_info.is_none() && remote_timeline_info.is_none() {
-        return Err(ApiError::NotFound(
-            "Timeline is not found neither locally nor remotely".to_string(),
-        ));
+        return Err(ApiError::NotFound(format!(
+            "Timeline {tenant_id}/{timeline_id} is not found neither locally nor remotely"
+        )));
    }

    let timeline_info = TimelineInfo {
@@ -241,123 +242,157 @@ async fn wal_receiver_get_handler(request: Request<Body>) -> Result<Response<Bod
    json_response(StatusCode::OK, &wal_receiver_entry)
 }

-async fn timeline_attach_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
+// TODO makes sense to provide tenant config right away the same way as it handled in tenant_create
+async fn tenant_attach_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
    let tenant_id: ZTenantId = parse_request_param(&request, "tenant_id")?;
    check_permission(&request, Some(tenant_id))?;

-    let timeline_id: ZTimelineId = parse_request_param(&request, "timeline_id")?;
-    info!(
-        "Handling timeline {} attach for tenant: {}",
-        timeline_id, tenant_id,
-    );
+    info!("Handling tenant attach {}", tenant_id,);

    tokio::task::spawn_blocking(move || {
-        if tenant_mgr::get_local_timeline_with_load(tenant_id, timeline_id).is_ok() {
-            // TODO: maybe answer with 309 Not Modified here?
-            anyhow::bail!("Timeline is already present locally")
+        if tenant_mgr::get_tenant_state(tenant_id).is_some() {
+            anyhow::bail!("Tenant is already present locally")
        };
        Ok(())
    })
    .await
    .map_err(ApiError::from_err)??;

-    let sync_id = ZTenantTimelineId {
-        tenant_id,
-        timeline_id,
-    };
    let state = get_state(&request);
    let remote_index = &state.remote_index;

    let mut index_accessor = remote_index.write().await;
-    if let Some(remote_timeline) = index_accessor.timeline_entry_mut(&sync_id) {
-        if remote_timeline.awaits_download {
+    if let Some(tenant_entry) = index_accessor.tenant_entry_mut(&tenant_id) {
+        if tenant_entry.has_in_progress_downloads() {
            return Err(ApiError::Conflict(
-                "Timeline download is already in progress".to_string(),
+                "Tenant download is already in progress".to_string(),
            ));
        }

-        remote_timeline.awaits_download = true;
-        storage_sync::schedule_layer_download(tenant_id, timeline_id);
-        return json_response(StatusCode::ACCEPTED, ());
-    } else {
-        // no timeline in the index, release the lock to make the potentially lengthy download opetation
-        drop(index_accessor);
-    }
-
-    let new_timeline = match try_download_index_part_data(state, sync_id).await {
-        Ok(Some(mut new_timeline)) => {
-            tokio::fs::create_dir_all(state.conf.timeline_path(&timeline_id, &tenant_id))
-                .await
-                .context("Failed to create new timeline directory")?;
-            new_timeline.awaits_download = true;
-            new_timeline
+        for (timeline_id, remote_timeline) in tenant_entry.iter_mut() {
+            storage_sync::schedule_layer_download(tenant_id, *timeline_id);
+            remote_timeline.awaits_download = true;
        }
-        Ok(None) => return Err(ApiError::NotFound("Unknown remote timeline".to_string())),
+        return json_response(StatusCode::ACCEPTED, ());
+    }
+    // no tenant in the index, release the lock to make the potentially lengthy download opetation
+    drop(index_accessor);
+
+    // download index parts for every tenant timeline
+    let remote_timelines = match gather_tenant_timelines_index_parts(state, tenant_id).await {
+        Ok(Some(remote_timelines)) => remote_timelines,
+        Ok(None) => return Err(ApiError::NotFound("Unknown remote tenant".to_string())),
        Err(e) => {
-            error!("Failed to retrieve remote timeline data: {:?}", e);
+            error!("Failed to retrieve remote tenant data: {:?}", e);
            return Err(ApiError::NotFound(
-                "Failed to retrieve remote timeline".to_string(),
+                "Failed to retrieve remote tenant".to_string(),
            ));
        }
    };

+    // recheck that download is not in progress because
+    // we've released the lock to avoid holding it during the download
    let mut index_accessor = remote_index.write().await;
-    match index_accessor.timeline_entry_mut(&sync_id) {
-        Some(remote_timeline) => {
-            if remote_timeline.awaits_download {
+    let tenant_entry = match index_accessor.tenant_entry_mut(&tenant_id) {
+        Some(tenant_entry) => {
+            if tenant_entry.has_in_progress_downloads() {
                return Err(ApiError::Conflict(
-                    "Timeline download is already in progress".to_string(),
+                    "Tenant download is already in progress".to_string(),
                ));
            }
-            remote_timeline.awaits_download = true;
+            tenant_entry
        }
-        None => index_accessor.add_timeline_entry(sync_id, new_timeline),
+        None => index_accessor.add_tenant_entry(tenant_id),
+    };
+
+    // populate remote index with the data from index part and create directories on the local filesystem
+    for (timeline_id, mut remote_timeline) in remote_timelines {
+        tokio::fs::create_dir_all(state.conf.timeline_path(&timeline_id, &tenant_id))
+            .await
+            .context("Failed to create new timeline directory")?;
+
+        remote_timeline.awaits_download = true;
+        tenant_entry.insert(timeline_id, remote_timeline);
+        // schedule actual download
+        storage_sync::schedule_layer_download(tenant_id, timeline_id);
    }
-    storage_sync::schedule_layer_download(tenant_id, timeline_id);
+
    json_response(StatusCode::ACCEPTED, ())
 }

-async fn try_download_index_part_data(
+/// Note: is expensive from s3 access perspective,
+/// for details see comment to `storage_sync::gather_tenant_timelines_index_parts`
+async fn gather_tenant_timelines_index_parts(
    state: &State,
-    sync_id: ZTenantTimelineId,
-) -> anyhow::Result<Option<RemoteTimeline>> {
-    let index_part = match state.remote_storage.as_ref() {
+    tenant_id: ZTenantId,
+) -> anyhow::Result<Option<Vec<(ZTimelineId, RemoteTimeline)>>> {
+    let index_parts = match state.remote_storage.as_ref() {
        Some(GenericRemoteStorage::Local(local_storage)) => {
-            storage_sync::download_index_part(state.conf, local_storage, sync_id).await
+            storage_sync::gather_tenant_timelines_index_parts(state.conf, local_storage, tenant_id)
+                .await
        }
+        // FIXME here s3 storage contains its own limits, that are separate from sync storage thread ones
+        //       because it is a different instance. We can move this limit to some global static
+        //       or use one instance everywhere.
        Some(GenericRemoteStorage::S3(s3_storage)) => {
-            storage_sync::download_index_part(state.conf, s3_storage, sync_id).await
+            storage_sync::gather_tenant_timelines_index_parts(state.conf, s3_storage, tenant_id)
+                .await
        }
        None => return Ok(None),
    }
-    .with_context(|| format!("Failed to download index part for timeline {sync_id}"))?;
+    .with_context(|| format!("Failed to download index parts for tenant {tenant_id}"))?;

-    let timeline_path = state
-        .conf
-        .timeline_path(&sync_id.timeline_id, &sync_id.tenant_id);
-    RemoteTimeline::from_index_part(&timeline_path, index_part)
-        .map(Some)
-        .with_context(|| {
-            format!("Failed to convert index part into remote timeline for timeline {sync_id}")
-        })
+    let mut remote_timelines = Vec::with_capacity(index_parts.len());
+    for (timeline_id, index_part) in index_parts {
+        let timeline_path = state.conf.timeline_path(&timeline_id, &tenant_id);
+        let remote_timeline = RemoteTimeline::from_index_part(&timeline_path, index_part)
+            .with_context(|| {
+                format!("Failed to convert index part into remote timeline for timeline {tenant_id}/{timeline_id}")
+            })?;
+        remote_timelines.push((timeline_id, remote_timeline));
+    }
+    Ok(Some(remote_timelines))
 }

-async fn timeline_detach_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
+async fn timeline_delete_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
    let tenant_id: ZTenantId = parse_request_param(&request, "tenant_id")?;
    check_permission(&request, Some(tenant_id))?;

    let timeline_id: ZTimelineId = parse_request_param(&request, "timeline_id")?;

+    let state = get_state(&request);
    tokio::task::spawn_blocking(move || {
-        let _enter =
-            info_span!("timeline_detach_handler", tenant = %tenant_id, timeline = %timeline_id)
-                .entered();
-        let state = get_state(&request);
-        tenant_mgr::detach_timeline(state.conf, tenant_id, timeline_id)
+        let _enter = info_span!("tenant_detach_handler", tenant = %tenant_id).entered();
+        tenant_mgr::delete_timeline(tenant_id, timeline_id)
    })
    .await
    .map_err(ApiError::from_err)??;

+    let mut remote_index = state.remote_index.write().await;
+    remote_index.remove_timeline_entry(ZTenantTimelineId {
+        tenant_id,
+        timeline_id,
+    });
+
+    json_response(StatusCode::OK, ())
+}
+
+async fn tenant_detach_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
+    let tenant_id: ZTenantId = parse_request_param(&request, "tenant_id")?;
+    check_permission(&request, Some(tenant_id))?;
+
+    let state = get_state(&request);
+    let conf = state.conf;
+    tokio::task::spawn_blocking(move || {
+        let _enter = info_span!("tenant_detach_handler", tenant = %tenant_id).entered();
+        tenant_mgr::detach_tenant(conf, tenant_id)
+    })
+    .await
+    .map_err(ApiError::from_err)??;
+
+    let mut remote_index = state.remote_index.write().await;
+    remote_index.remove_tenant_entry(&tenant_id);
+
    json_response(StatusCode::OK, ())
 }

@@ -365,9 +400,13 @@ async fn tenant_list_handler(request: Request<Body>) -> Result<Response<Body>, A
    // check for management permission
    check_permission(&request, None)?;

+    let state = get_state(&request);
+    // clone to avoid holding the lock while awaiting for blocking task
+    let remote_index = state.remote_index.read().await.clone();
+
    let response_data = tokio::task::spawn_blocking(move || {
        let _enter = info_span!("tenant_list").entered();
-        crate::tenant_mgr::list_tenants()
+        crate::tenant_mgr::list_tenants(&remote_index)
    })
    .await
    .map_err(ApiError::from_err)?;
@@ -375,6 +414,34 @@ async fn tenant_list_handler(request: Request<Body>) -> Result<Response<Body>, A
    json_response(StatusCode::OK, response_data)
 }

+async fn tenant_status(request: Request<Body>) -> Result<Response<Body>, ApiError> {
+    let tenant_id: ZTenantId = parse_request_param(&request, "tenant_id")?;
+    check_permission(&request, Some(tenant_id))?;
+
+    // if tenant is in progress of downloading it can be absent in global tenant map
+    let tenant_state = tokio::task::spawn_blocking(move || tenant_mgr::get_tenant_state(tenant_id))
+        .await
+        .map_err(ApiError::from_err)?;
+
+    let state = get_state(&request);
+    let remote_index = &state.remote_index;
+
+    let index_accessor = remote_index.read().await;
+    let has_in_progress_downloads = index_accessor
+        .tenant_entry(&tenant_id)
+        .ok_or_else(|| ApiError::NotFound("Tenant not found in remote index".to_string()))?
+        .has_in_progress_downloads();
+
+    json_response(
+        StatusCode::OK,
+        TenantInfo {
+            id: tenant_id,
+            state: tenant_state,
+            has_in_progress_downloads: Some(has_in_progress_downloads),
+        },
+    )
+}
+
 async fn tenant_create_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
    // check for management permission
    check_permission(&request, None)?;
@@ -520,24 +587,28 @@ pub fn make_router(
        .get("/v1/status", status_handler)
        .get("/v1/tenant", tenant_list_handler)
        .post("/v1/tenant", tenant_create_handler)
+        .get("/v1/tenant/:tenant_id", tenant_status)
        .put("/v1/tenant/config", tenant_config_handler)
        .get("/v1/tenant/:tenant_id/timeline", timeline_list_handler)
        .post("/v1/tenant/:tenant_id/timeline", timeline_create_handler)
+        .post("/v1/tenant/:tenant_id/attach", tenant_attach_handler)
+        .post("/v1/tenant/:tenant_id/detach", tenant_detach_handler)
        .get(
            "/v1/tenant/:tenant_id/timeline/:timeline_id",
            timeline_detail_handler,
        )
+        .delete(
+            "/v1/tenant/:tenant_id/timeline/:timeline_id",
+            timeline_delete_handler,
+        )
+        // for backward compatibility
+        .post(
+            "/v1/tenant/:tenant_id/timeline/:timeline_id/detach",
+            timeline_delete_handler,
+        )
        .get(
            "/v1/tenant/:tenant_id/timeline/:timeline_id/wal_receiver",
            wal_receiver_get_handler,
        )
-        .post(
-            "/v1/tenant/:tenant_id/timeline/:timeline_id/attach",
-            timeline_attach_handler,
-        )
-        .post(
-            "/v1/tenant/:tenant_id/timeline/:timeline_id/detach",
-            timeline_detach_handler,
-        )
        .any(handler_404))
 }
--- a/pageserver/src/import_datadir.rs
+++ b/pageserver/src/import_datadir.rs
@@ -2,7 +2,6 @@
 //! Import data and WAL from a PostgreSQL data directory and WAL segments into
 //! a zenith Timeline.
 //!
-use std::fs;
 use std::fs::File;
 use std::io::{Read, Seek, SeekFrom};
 use std::path::{Path, PathBuf};
@@ -10,16 +9,18 @@ use std::path::{Path, PathBuf};
 use anyhow::{bail, ensure, Context, Result};
 use bytes::Bytes;
 use tracing::*;
+use walkdir::WalkDir;

 use crate::pgdatadir_mapping::*;
 use crate::reltag::{RelTag, SlruKind};
 use crate::repository::Repository;
+use crate::repository::Timeline;
 use crate::walingest::WalIngest;
 use postgres_ffi::relfile_utils::*;
 use postgres_ffi::waldecoder::*;
 use postgres_ffi::xlog_utils::*;
+use postgres_ffi::Oid;
 use postgres_ffi::{pg_constants, ControlFileData, DBState_DB_SHUTDOWNED};
-use postgres_ffi::{Oid, TransactionId};
 use utils::lsn::Lsn;

 ///
@@ -35,100 +36,30 @@ pub fn import_timeline_from_postgres_datadir<R: Repository>(
 ) -> Result<()> {
    let mut pg_control: Option<ControlFileData> = None;

+    // TODO this shoud be start_lsn, which is not necessarily equal to end_lsn (aka lsn)
+    // Then fishing out pg_control would be unnecessary
    let mut modification = tline.begin_modification(lsn);
    modification.init_empty()?;

-    // Scan 'global'
-    let mut relfiles: Vec<PathBuf> = Vec::new();
-    for direntry in fs::read_dir(path.join("global"))? {
-        let direntry = direntry?;
-        match direntry.file_name().to_str() {
-            None => continue,
+    // Import all but pg_wal
+    let all_but_wal = WalkDir::new(path)
+        .into_iter()
+        .filter_entry(|entry| !entry.path().ends_with("pg_wal"));
+    for entry in all_but_wal {
+        let entry = entry?;
+        let metadata = entry.metadata().expect("error getting dir entry metadata");
+        if metadata.is_file() {
+            let absolute_path = entry.path();
+            let relative_path = absolute_path.strip_prefix(path)?;

-            Some("pg_control") => {
-                pg_control = Some(import_control_file(&mut modification, &direntry.path())?);
+            let file = File::open(absolute_path)?;
+            let len = metadata.len() as usize;
+            if let Some(control_file) = import_file(&mut modification, relative_path, file, len)? {
+                pg_control = Some(control_file);
            }
-            Some("pg_filenode.map") => {
-                import_relmap_file(
-                    &mut modification,
-                    pg_constants::GLOBALTABLESPACE_OID,
-                    0,
-                    &direntry.path(),
-                )?;
-            }
-
-            // Load any relation files into the page server (but only after the other files)
-            _ => relfiles.push(direntry.path()),
+            modification.flush()?;
        }
    }
-    for relfile in relfiles {
-        import_relfile(
-            &mut modification,
-            &relfile,
-            pg_constants::GLOBALTABLESPACE_OID,
-            0,
-        )?;
-    }
-
-    // Scan 'base'. It contains database dirs, the database OID is the filename.
-    // E.g. 'base/12345', where 12345 is the database OID.
-    for direntry in fs::read_dir(path.join("base"))? {
-        let direntry = direntry?;
-
-        //skip all temporary files
-        if direntry.file_name().to_string_lossy() == "pgsql_tmp" {
-            continue;
-        }
-
-        let dboid = direntry.file_name().to_string_lossy().parse::<u32>()?;
-
-        let mut relfiles: Vec<PathBuf> = Vec::new();
-        for direntry in fs::read_dir(direntry.path())? {
-            let direntry = direntry?;
-            match direntry.file_name().to_str() {
-                None => continue,
-
-                Some("PG_VERSION") => {
-                    //modification.put_dbdir_creation(pg_constants::DEFAULTTABLESPACE_OID, dboid)?;
-                }
-                Some("pg_filenode.map") => import_relmap_file(
-                    &mut modification,
-                    pg_constants::DEFAULTTABLESPACE_OID,
-                    dboid,
-                    &direntry.path(),
-                )?,
-
-                // Load any relation files into the page server
-                _ => relfiles.push(direntry.path()),
-            }
-        }
-        for relfile in relfiles {
-            import_relfile(
-                &mut modification,
-                &relfile,
-                pg_constants::DEFAULTTABLESPACE_OID,
-                dboid,
-            )?;
-        }
-    }
-    for entry in fs::read_dir(path.join("pg_xact"))? {
-        let entry = entry?;
-        import_slru_file(&mut modification, SlruKind::Clog, &entry.path())?;
-    }
-    for entry in fs::read_dir(path.join("pg_multixact").join("members"))? {
-        let entry = entry?;
-        import_slru_file(&mut modification, SlruKind::MultiXactMembers, &entry.path())?;
-    }
-    for entry in fs::read_dir(path.join("pg_multixact").join("offsets"))? {
-        let entry = entry?;
-        import_slru_file(&mut modification, SlruKind::MultiXactOffsets, &entry.path())?;
-    }
-    for entry in fs::read_dir(path.join("pg_twophase"))? {
-        let entry = entry?;
-        let xid = u32::from_str_radix(&entry.path().to_string_lossy(), 16)?;
-        import_twophase_file(&mut modification, xid, &entry.path())?;
-    }
-    // TODO: Scan pg_tblspc

    // We're done importing all the data files.
    modification.commit()?;
@@ -158,31 +89,30 @@ pub fn import_timeline_from_postgres_datadir<R: Repository>(
 }

 // subroutine of import_timeline_from_postgres_datadir(), to load one relation file.
-fn import_relfile<R: Repository>(
+fn import_rel<R: Repository, Reader: Read>(
    modification: &mut DatadirModification<R>,
    path: &Path,
    spcoid: Oid,
    dboid: Oid,
+    mut reader: Reader,
+    len: usize,
 ) -> anyhow::Result<()> {
    // Does it look like a relation file?
    trace!("importing rel file {}", path.display());

-    let (relnode, forknum, segno) = parse_relfilename(&path.file_name().unwrap().to_string_lossy())
-        .map_err(|e| {
-            warn!("unrecognized file in postgres datadir: {:?} ({})", path, e);
-            e
-        })?;
+    let filename = &path
+        .file_name()
+        .expect("missing rel filename")
+        .to_string_lossy();
+    let (relnode, forknum, segno) = parse_relfilename(filename).map_err(|e| {
+        warn!("unrecognized file in postgres datadir: {:?} ({})", path, e);
+        e
+    })?;

-    let mut file = File::open(path)?;
    let mut buf: [u8; 8192] = [0u8; 8192];

-    let len = file.metadata().unwrap().len();
-    ensure!(len % pg_constants::BLCKSZ as u64 == 0);
-    let nblocks = len / pg_constants::BLCKSZ as u64;
-
-    if segno != 0 {
-        todo!();
-    }
+    ensure!(len % pg_constants::BLCKSZ as usize == 0);
+    let nblocks = len / pg_constants::BLCKSZ as usize;

    let rel = RelTag {
        spcnode: spcoid,
@@ -190,11 +120,22 @@ fn import_relfile<R: Repository>(
        relnode,
        forknum,
    };
-    modification.put_rel_creation(rel, nblocks as u32)?;

    let mut blknum: u32 = segno * (1024 * 1024 * 1024 / pg_constants::BLCKSZ as u32);
+
+    // Call put_rel_creation for every segment of the relation,
+    // because there is no guarantee about the order in which we are processing segments.
+    // ignore "relation already exists" error
+    if let Err(e) = modification.put_rel_creation(rel, nblocks as u32) {
+        if e.to_string().contains("already exists") {
+            debug!("relation {} already exists. we must be extending it", rel);
+        } else {
+            return Err(e);
+        }
+    }
+
    loop {
-        let r = file.read_exact(&mut buf);
+        let r = reader.read_exact(&mut buf);
        match r {
            Ok(_) => {
                modification.put_rel_page_image(rel, blknum, Bytes::copy_from_slice(&buf))?;
@@ -204,7 +145,9 @@ fn import_relfile<R: Repository>(
            Err(err) => match err.kind() {
                std::io::ErrorKind::UnexpectedEof => {
                    // reached EOF. That's expected.
-                    ensure!(blknum == nblocks as u32, "unexpected EOF");
+                    let relative_blknum =
+                        blknum - segno * (1024 * 1024 * 1024 / pg_constants::BLCKSZ as u32);
+                    ensure!(relative_blknum == nblocks as u32, "unexpected EOF");
                    break;
                }
                _ => {
@@ -215,96 +158,43 @@ fn import_relfile<R: Repository>(
        blknum += 1;
    }

+    // Update relation size
+    //
+    // If we process rel segments out of order,
+    // put_rel_extend will skip the update.
+    modification.put_rel_extend(rel, blknum)?;
+
    Ok(())
 }

-/// Import a relmapper (pg_filenode.map) file into the repository
-fn import_relmap_file<R: Repository>(
-    modification: &mut DatadirModification<R>,
-    spcnode: Oid,
-    dbnode: Oid,
-    path: &Path,
-) -> Result<()> {
-    let mut file = File::open(path)?;
-    let mut buffer = Vec::new();
-    // read the whole file
-    file.read_to_end(&mut buffer)?;
-
-    trace!("importing relmap file {}", path.display());
-
-    modification.put_relmap_file(spcnode, dbnode, Bytes::copy_from_slice(&buffer[..]))?;
-    Ok(())
-}
-
-/// Import a twophase state file (pg_twophase/<xid>) into the repository
-fn import_twophase_file<R: Repository>(
-    modification: &mut DatadirModification<R>,
-    xid: TransactionId,
-    path: &Path,
-) -> Result<()> {
-    let mut file = File::open(path)?;
-    let mut buffer = Vec::new();
-    // read the whole file
-    file.read_to_end(&mut buffer)?;
-
-    trace!("importing non-rel file {}", path.display());
-
-    modification.put_twophase_file(xid, Bytes::copy_from_slice(&buffer[..]))?;
-    Ok(())
-}
-
-///
-/// Import pg_control file into the repository.
-///
-/// The control file is imported as is, but we also extract the checkpoint record
-/// from it and store it separated.
-fn import_control_file<R: Repository>(
-    modification: &mut DatadirModification<R>,
-    path: &Path,
-) -> Result<ControlFileData> {
-    let mut file = File::open(path)?;
-    let mut buffer = Vec::new();
-    // read the whole file
-    file.read_to_end(&mut buffer)?;
-
-    trace!("importing control file {}", path.display());
-
-    // Import it as ControlFile
-    modification.put_control_file(Bytes::copy_from_slice(&buffer[..]))?;
-
-    // Extract the checkpoint record and import it separately.
-    let pg_control = ControlFileData::decode(&buffer)?;
-    let checkpoint_bytes = pg_control.checkPointCopy.encode()?;
-    modification.put_checkpoint(checkpoint_bytes)?;
-
-    Ok(pg_control)
-}
-
-///
 /// Import an SLRU segment file
 ///
-fn import_slru_file<R: Repository>(
+fn import_slru<R: Repository, Reader: Read>(
    modification: &mut DatadirModification<R>,
    slru: SlruKind,
    path: &Path,
+    mut reader: Reader,
+    len: usize,
 ) -> Result<()> {
    trace!("importing slru file {}", path.display());

-    let mut file = File::open(path)?;
    let mut buf: [u8; 8192] = [0u8; 8192];
-    let segno = u32::from_str_radix(&path.file_name().unwrap().to_string_lossy(), 16)?;
+    let filename = &path
+        .file_name()
+        .expect("missing slru filename")
+        .to_string_lossy();
+    let segno = u32::from_str_radix(filename, 16)?;

-    let len = file.metadata().unwrap().len();
-    ensure!(len % pg_constants::BLCKSZ as u64 == 0); // we assume SLRU block size is the same as BLCKSZ
-    let nblocks = len / pg_constants::BLCKSZ as u64;
+    ensure!(len % pg_constants::BLCKSZ as usize == 0); // we assume SLRU block size is the same as BLCKSZ
+    let nblocks = len / pg_constants::BLCKSZ as usize;

-    ensure!(nblocks <= pg_constants::SLRU_PAGES_PER_SEGMENT as u64);
+    ensure!(nblocks <= pg_constants::SLRU_PAGES_PER_SEGMENT as usize);

    modification.put_slru_segment_creation(slru, segno, nblocks as u32)?;

    let mut rpageno = 0;
    loop {
-        let r = file.read_exact(&mut buf);
+        let r = reader.read_exact(&mut buf);
        match r {
            Ok(_) => {
                modification.put_slru_page_image(
@@ -396,10 +286,272 @@ fn import_wal<R: Repository>(
    }

    if last_lsn != startpoint {
-        debug!("reached end of WAL at {}", last_lsn);
+        info!("reached end of WAL at {}", last_lsn);
    } else {
        info!("no WAL to import at {}", last_lsn);
    }

    Ok(())
 }
+
+pub fn import_basebackup_from_tar<R: Repository, Reader: Read>(
+    tline: &mut DatadirTimeline<R>,
+    reader: Reader,
+    base_lsn: Lsn,
+) -> Result<()> {
+    info!("importing base at {}", base_lsn);
+    let mut modification = tline.begin_modification(base_lsn);
+    modification.init_empty()?;
+
+    let mut pg_control: Option<ControlFileData> = None;
+
+    // Import base
+    for base_tar_entry in tar::Archive::new(reader).entries()? {
+        let entry = base_tar_entry?;
+        let header = entry.header();
+        let len = header.entry_size()? as usize;
+        let file_path = header.path()?.into_owned();
+
+        match header.entry_type() {
+            tar::EntryType::Regular => {
+                if let Some(res) = import_file(&mut modification, file_path.as_ref(), entry, len)? {
+                    // We found the pg_control file.
+                    pg_control = Some(res);
+                }
+                modification.flush()?;
+            }
+            tar::EntryType::Directory => {
+                debug!("directory {:?}", file_path);
+            }
+            _ => {
+                panic!("tar::EntryType::?? {}", file_path.display());
+            }
+        }
+    }
+
+    // sanity check: ensure that pg_control is loaded
+    let _pg_control = pg_control.context("pg_control file not found")?;
+
+    modification.commit()?;
+    Ok(())
+}
+
+pub fn import_wal_from_tar<R: Repository, Reader: Read>(
+    tline: &mut DatadirTimeline<R>,
+    reader: Reader,
+    start_lsn: Lsn,
+    end_lsn: Lsn,
+) -> Result<()> {
+    // Set up walingest mutable state
+    let mut waldecoder = WalStreamDecoder::new(start_lsn);
+    let mut segno = start_lsn.segment_number(pg_constants::WAL_SEGMENT_SIZE);
+    let mut offset = start_lsn.segment_offset(pg_constants::WAL_SEGMENT_SIZE);
+    let mut last_lsn = start_lsn;
+    let mut walingest = WalIngest::new(tline, start_lsn)?;
+
+    // Ingest wal until end_lsn
+    info!("importing wal until {}", end_lsn);
+    let mut pg_wal_tar = tar::Archive::new(reader);
+    let mut pg_wal_entries_iter = pg_wal_tar.entries()?;
+    while last_lsn <= end_lsn {
+        let bytes = {
+            let entry = pg_wal_entries_iter.next().expect("expected more wal")?;
+            let header = entry.header();
+            let file_path = header.path()?.into_owned();
+
+            match header.entry_type() {
+                tar::EntryType::Regular => {
+                    // FIXME: assume postgresql tli 1 for now
+                    let expected_filename = XLogFileName(1, segno, pg_constants::WAL_SEGMENT_SIZE);
+                    let file_name = file_path
+                        .file_name()
+                        .expect("missing wal filename")
+                        .to_string_lossy();
+                    ensure!(expected_filename == file_name);
+
+                    debug!("processing wal file {:?}", file_path);
+                    read_all_bytes(entry)?
+                }
+                tar::EntryType::Directory => {
+                    debug!("directory {:?}", file_path);
+                    continue;
+                }
+                _ => {
+                    panic!("tar::EntryType::?? {}", file_path.display());
+                }
+            }
+        };
+
+        waldecoder.feed_bytes(&bytes[offset..]);
+
+        while last_lsn <= end_lsn {
+            if let Some((lsn, recdata)) = waldecoder.poll_decode()? {
+                walingest.ingest_record(tline, recdata, lsn)?;
+                last_lsn = lsn;
+
+                debug!("imported record at {} (end {})", lsn, end_lsn);
+            }
+        }
+
+        debug!("imported records up to {}", last_lsn);
+        segno += 1;
+        offset = 0;
+    }
+
+    if last_lsn != start_lsn {
+        info!("reached end of WAL at {}", last_lsn);
+    } else {
+        info!("there was no WAL to import at {}", last_lsn);
+    }
+
+    // Log any extra unused files
+    for e in &mut pg_wal_entries_iter {
+        let entry = e?;
+        let header = entry.header();
+        let file_path = header.path()?.into_owned();
+        info!("skipping {:?}", file_path);
+    }
+
+    Ok(())
+}
+
+pub fn import_file<R: Repository, Reader: Read>(
+    modification: &mut DatadirModification<R>,
+    file_path: &Path,
+    reader: Reader,
+    len: usize,
+) -> Result<Option<ControlFileData>> {
+    debug!("looking at {:?}", file_path);
+
+    if file_path.starts_with("global") {
+        let spcnode = pg_constants::GLOBALTABLESPACE_OID;
+        let dbnode = 0;
+
+        match file_path
+            .file_name()
+            .expect("missing filename")
+            .to_string_lossy()
+            .as_ref()
+        {
+            "pg_control" => {
+                let bytes = read_all_bytes(reader)?;
+
+                // Extract the checkpoint record and import it separately.
+                let pg_control = ControlFileData::decode(&bytes[..])?;
+                let checkpoint_bytes = pg_control.checkPointCopy.encode()?;
+                modification.put_checkpoint(checkpoint_bytes)?;
+                debug!("imported control file");
+
+                // Import it as ControlFile
+                modification.put_control_file(bytes)?;
+                return Ok(Some(pg_control));
+            }
+            "pg_filenode.map" => {
+                let bytes = read_all_bytes(reader)?;
+                modification.put_relmap_file(spcnode, dbnode, bytes)?;
+                debug!("imported relmap file")
+            }
+            "PG_VERSION" => {
+                debug!("ignored");
+            }
+            _ => {
+                import_rel(modification, file_path, spcnode, dbnode, reader, len)?;
+                debug!("imported rel creation");
+            }
+        }
+    } else if file_path.starts_with("base") {
+        let spcnode = pg_constants::DEFAULTTABLESPACE_OID;
+        let dbnode: u32 = file_path
+            .iter()
+            .nth(1)
+            .expect("invalid file path, expected dbnode")
+            .to_string_lossy()
+            .parse()?;
+
+        match file_path
+            .file_name()
+            .expect("missing base filename")
+            .to_string_lossy()
+            .as_ref()
+        {
+            "pg_filenode.map" => {
+                let bytes = read_all_bytes(reader)?;
+                modification.put_relmap_file(spcnode, dbnode, bytes)?;
+                debug!("imported relmap file")
+            }
+            "PG_VERSION" => {
+                debug!("ignored");
+            }
+            _ => {
+                import_rel(modification, file_path, spcnode, dbnode, reader, len)?;
+                debug!("imported rel creation");
+            }
+        }
+    } else if file_path.starts_with("pg_xact") {
+        let slru = SlruKind::Clog;
+
+        import_slru(modification, slru, file_path, reader, len)?;
+        debug!("imported clog slru");
+    } else if file_path.starts_with("pg_multixact/offsets") {
+        let slru = SlruKind::MultiXactOffsets;
+
+        import_slru(modification, slru, file_path, reader, len)?;
+        debug!("imported multixact offsets slru");
+    } else if file_path.starts_with("pg_multixact/members") {
+        let slru = SlruKind::MultiXactMembers;
+
+        import_slru(modification, slru, file_path, reader, len)?;
+        debug!("imported multixact members slru");
+    } else if file_path.starts_with("pg_twophase") {
+        let file_name = &file_path
+            .file_name()
+            .expect("missing twophase filename")
+            .to_string_lossy();
+        let xid = u32::from_str_radix(file_name, 16)?;
+
+        let bytes = read_all_bytes(reader)?;
+        modification.put_twophase_file(xid, Bytes::copy_from_slice(&bytes[..]))?;
+        debug!("imported twophase file");
+    } else if file_path.starts_with("pg_wal") {
+        debug!("found wal file in base section. ignore it");
+    } else if file_path.starts_with("zenith.signal") {
+        // Parse zenith signal file to set correct previous LSN
+        let bytes = read_all_bytes(reader)?;
+        // zenith.signal format is "PREV LSN: prev_lsn"
+        // TODO write serialization and deserialization in the same place.
+        let zenith_signal = std::str::from_utf8(&bytes)?.trim();
+        let prev_lsn = match zenith_signal {
+            "PREV LSN: none" => Lsn(0),
+            "PREV LSN: invalid" => Lsn(0),
+            other => {
+                let split = other.split(':').collect::<Vec<_>>();
+                split[1]
+                    .trim()
+                    .parse::<Lsn>()
+                    .context("can't parse zenith.signal")?
+            }
+        };
+
+        // zenith.signal is not necessarily the last file, that we handle
+        // but it is ok to call `finish_write()`, because final `modification.commit()`
+        // will update lsn once more to the final one.
+        let writer = modification.tline.tline.writer();
+        writer.finish_write(prev_lsn);
+
+        debug!("imported zenith signal {}", prev_lsn);
+    } else if file_path.starts_with("pg_tblspc") {
+        // TODO Backups exported from neon won't have pg_tblspc, but we will need
+        // this to import arbitrary postgres databases.
+        bail!("Importing pg_tblspc is not implemented");
+    } else {
+        debug!("ignored");
+    }
+
+    Ok(None)
+}
+
+fn read_all_bytes<Reader: Read>(mut reader: Reader) -> Result<Bytes> {
+    let mut buf: Vec<u8> = vec![];
+    reader.read_to_end(&mut buf)?;
+    Ok(Bytes::copy_from_slice(&buf[..]))
+}
--- a/pageserver/src/layered_repository.rs
+++ b/pageserver/src/layered_repository.rs
@@ -4,7 +4,7 @@
 //! The functions here are responsible for locating the correct layer for the
 //! get/put call, tracing timeline branching history as needed.
 //!
-//! The files are stored in the .zenith/tenants/<tenantid>/timelines/<timelineid>
+//! The files are stored in the .neon/tenants/<tenantid>/timelines/<timelineid>
 //! directory. See layered_repository/README for how the files are managed.
 //! In addition to the layer files, there is a metadata file in the same
 //! directory that contains information about the timeline, in particular its
@@ -34,13 +34,11 @@ use std::time::{Duration, Instant, SystemTime};

 use self::metadata::{metadata_path, TimelineMetadata, METADATA_FILE_NAME};
 use crate::config::PageServerConf;
-use crate::keyspace::KeySpace;
+use crate::keyspace::{KeyPartitioning, KeySpace};
 use crate::storage_sync::index::RemoteIndex;
 use crate::tenant_config::{TenantConf, TenantConfOpt};

-use crate::repository::{
-    GcResult, Repository, RepositoryTimeline, Timeline, TimelineSyncStatusUpdate, TimelineWriter,
-};
+use crate::repository::{GcResult, Repository, RepositoryTimeline, Timeline, TimelineWriter};
 use crate::repository::{Key, Value};
 use crate::tenant_mgr;
 use crate::thread_mgr;
@@ -148,7 +146,7 @@ lazy_static! {
    .expect("failed to define a metric");
 }

-/// Parts of the `.zenith/tenants/<tenantid>/timelines/<timelineid>` directory prefix.
+/// Parts of the `.neon/tenants/<tenantid>/timelines/<timelineid>` directory prefix.
 pub const TIMELINES_SEGMENT_NAME: &str = "timelines";

 ///
@@ -158,6 +156,18 @@ pub struct LayeredRepository {
    // Global pageserver config parameters
    pub conf: &'static PageServerConf,

+    // Allows us to gracefully cancel operations that edit the directory
+    // that backs this layered repository. Usage:
+    //
+    // Use `let _guard = file_lock.try_read()` while writing any files.
+    // Use `let _guard = file_lock.write().unwrap()` to wait for all writes to finish.
+    //
+    // TODO try_read this lock during checkpoint as well to prevent race
+    //      between checkpoint and detach/delete.
+    // TODO try_read this lock for all gc/compaction operations, not just
+    //      ones scheduled by the tenant task manager.
+    pub file_lock: RwLock<()>,
+
    // Overridden tenant-specific config parameters.
    // We keep TenantConfOpt sturct here to preserve the information
    // about parameters that are not set.
@@ -220,43 +230,52 @@ impl Repository for LayeredRepository {

    fn create_empty_timeline(
        &self,
-        timelineid: ZTimelineId,
+        timeline_id: ZTimelineId,
        initdb_lsn: Lsn,
    ) -> Result<Arc<LayeredTimeline>> {
        let mut timelines = self.timelines.lock().unwrap();
+        let vacant_timeline_entry = match timelines.entry(timeline_id) {
+            Entry::Occupied(_) => bail!("Timeline already exists"),
+            Entry::Vacant(vacant_entry) => vacant_entry,
+        };
+
+        let timeline_path = self.conf.timeline_path(&timeline_id, &self.tenant_id);
+        if timeline_path.exists() {
+            bail!("Timeline directory already exists, but timeline is missing in repository map. This is a bug.")
+        }

        // Create the timeline directory, and write initial metadata to file.
-        crashsafe_dir::create_dir_all(self.conf.timeline_path(&timelineid, &self.tenant_id))?;
+        crashsafe_dir::create_dir_all(timeline_path)?;

        let metadata = TimelineMetadata::new(Lsn(0), None, None, Lsn(0), initdb_lsn, initdb_lsn);
-        Self::save_metadata(self.conf, timelineid, self.tenant_id, &metadata, true)?;
+        Self::save_metadata(self.conf, timeline_id, self.tenant_id, &metadata, true)?;

        let timeline = LayeredTimeline::new(
            self.conf,
            Arc::clone(&self.tenant_conf),
            metadata,
            None,
-            timelineid,
+            timeline_id,
            self.tenant_id,
            Arc::clone(&self.walredo_mgr),
            self.upload_layers,
        );
        timeline.layers.write().unwrap().next_open_layer_at = Some(initdb_lsn);

+        // Insert if not exists
        let timeline = Arc::new(timeline);
-        let r = timelines.insert(
-            timelineid,
-            LayeredTimelineEntry::Loaded(Arc::clone(&timeline)),
-        );
-        ensure!(
-            r.is_none(),
-            "assertion failure, inserted duplicate timeline"
-        );
+        vacant_timeline_entry.insert(LayeredTimelineEntry::Loaded(Arc::clone(&timeline)));
+
        Ok(timeline)
    }

    /// Branch a timeline
-    fn branch_timeline(&self, src: ZTimelineId, dst: ZTimelineId, start_lsn: Lsn) -> Result<()> {
+    fn branch_timeline(
+        &self,
+        src: ZTimelineId,
+        dst: ZTimelineId,
+        start_lsn: Option<Lsn>,
+    ) -> Result<()> {
        // We need to hold this lock to prevent GC from starting at the same time. GC scans the directory to learn
        // about timelines, so otherwise a race condition is possible, where we create new timeline and GC
        // concurrently removes data that is needed by the new timeline.
@@ -269,6 +288,14 @@ impl Repository for LayeredRepository {
            .context("failed to load timeline for branching")?
            .ok_or_else(|| anyhow::anyhow!("unknown timeline id: {}", &src))?;
        let latest_gc_cutoff_lsn = src_timeline.get_latest_gc_cutoff_lsn();
+
+        // If no start LSN is specified, we branch the new timeline from the source timeline's last record LSN
+        let start_lsn = start_lsn.unwrap_or_else(|| {
+            let lsn = src_timeline.get_last_record_lsn();
+            info!("branching timeline {dst} from timeline {src} at last record LSN: {lsn}");
+            lsn
+        });
+
        src_timeline
            .check_lsn_is_in_scope(start_lsn, &latest_gc_cutoff_lsn)
            .context("invalid branch start lsn")?;
@@ -315,19 +342,19 @@ impl Repository for LayeredRepository {
    /// metrics collection.
    fn gc_iteration(
        &self,
-        target_timelineid: Option<ZTimelineId>,
+        target_timeline_id: Option<ZTimelineId>,
        horizon: u64,
        pitr: Duration,
        checkpoint_before_gc: bool,
    ) -> Result<GcResult> {
-        let timeline_str = target_timelineid
+        let timeline_str = target_timeline_id
            .map(|x| x.to_string())
            .unwrap_or_else(|| "-".to_string());

        STORAGE_TIME
            .with_label_values(&["gc", &self.tenant_id.to_string(), &timeline_str])
            .observe_closure_duration(|| {
-                self.gc_iteration_internal(target_timelineid, horizon, pitr, checkpoint_before_gc)
+                self.gc_iteration_internal(target_timeline_id, horizon, pitr, checkpoint_before_gc)
            })
    }

@@ -394,50 +421,60 @@ impl Repository for LayeredRepository {
        Ok(())
    }

-    fn detach_timeline(&self, timeline_id: ZTimelineId) -> anyhow::Result<()> {
+    fn delete_timeline(&self, timeline_id: ZTimelineId) -> anyhow::Result<()> {
+        // in order to be retriable detach needs to be idempotent
+        // (or at least to a point that each time the detach is called it can make progress)
        let mut timelines = self.timelines.lock().unwrap();
-        // check no child timelines, because detach will remove files, which will brake child branches
-        // FIXME this can still be violated because we do not guarantee
-        //   that all ancestors are downloaded/attached to the same pageserver
-        let num_children = timelines
+
+        // Ensure that there are no child timelines **attached to that pageserver**,
+        // because detach removes files, which will break child branches
+        let children_exist = timelines
            .iter()
-            .filter(|(_, entry)| entry.ancestor_timeline_id() == Some(timeline_id))
-            .count();
+            .any(|(_, entry)| entry.ancestor_timeline_id() == Some(timeline_id));

        ensure!(
-            num_children == 0,
+            !children_exist,
            "Cannot detach timeline which has child timelines"
        );
+        let timeline_entry = match timelines.entry(timeline_id) {
+            Entry::Occupied(e) => e,
+            Entry::Vacant(_) => bail!("timeline not found"),
+        };
+
+        // try to acquire gc and compaction locks to prevent errors from missing files
+        let _gc_guard = self
+            .gc_cs
+            .try_lock()
+            .map_err(|e| anyhow::anyhow!("cannot acquire gc lock {e}"))?;
+
+        let compaction_guard = timeline_entry.get().compaction_guard()?;
+
+        let local_timeline_directory = self.conf.timeline_path(&timeline_id, &self.tenant_id);
+        std::fs::remove_dir_all(&local_timeline_directory).with_context(|| {
+            format!(
+                "Failed to remove local timeline directory '{}'",
+                local_timeline_directory.display()
+            )
+        })?;
+        info!("detach removed files");
+
+        drop(compaction_guard);
+        timeline_entry.remove();

-        ensure!(
-            timelines.remove(&timeline_id).is_some(),
-            "Cannot detach timeline {timeline_id} that is not available locally"
-        );
        Ok(())
    }

-    fn apply_timeline_remote_sync_status_update(
-        &self,
-        timeline_id: ZTimelineId,
-        timeline_sync_status_update: TimelineSyncStatusUpdate,
-    ) -> Result<()> {
-        debug!(
-            "apply_timeline_remote_sync_status_update timeline_id: {} update: {:?}",
-            timeline_id, timeline_sync_status_update
-        );
-        match timeline_sync_status_update {
-            TimelineSyncStatusUpdate::Downloaded => {
-                match self.timelines.lock().unwrap().entry(timeline_id) {
-                    Entry::Occupied(_) => bail!("We completed a download for a timeline that already exists in repository. This is a bug."),
-                    Entry::Vacant(entry) => {
-                        // we need to get metadata of a timeline, another option is to pass it along with Downloaded status
-                        let metadata = load_metadata(self.conf, timeline_id, self.tenant_id).context("failed to load local metadata")?;
-                        // finally we make newly downloaded timeline visible to repository
-                        entry.insert(LayeredTimelineEntry::Unloaded { id: timeline_id, metadata, })
-                    },
-                };
-            }
-        }
+    fn attach_timeline(&self, timeline_id: ZTimelineId) -> Result<()> {
+        debug!("attach timeline_id: {}", timeline_id,);
+        match self.timelines.lock().unwrap().entry(timeline_id) {
+            Entry::Occupied(_) => bail!("We completed a download for a timeline that already exists in repository. This is a bug."),
+            Entry::Vacant(entry) => {
+                // we need to get metadata of a timeline, another option is to pass it along with Downloaded status
+                let metadata = load_metadata(self.conf, timeline_id, self.tenant_id).context("failed to load local metadata")?;
+                // finally we make newly downloaded timeline visible to repository
+                entry.insert(LayeredTimelineEntry::Unloaded { id: timeline_id, metadata, })
+            },
+        };
        Ok(())
    }

@@ -487,6 +524,18 @@ impl LayeredTimelineEntry {
            }
        }
    }
+
+    fn compaction_guard(&self) -> Result<Option<MutexGuard<()>>, anyhow::Error> {
+        match self {
+            LayeredTimelineEntry::Loaded(timeline) => timeline
+                .compaction_cs
+                .try_lock()
+                .map_err(|e| anyhow::anyhow!("cannot lock compaction critical section {e}"))
+                .map(Some),
+
+            LayeredTimelineEntry::Unloaded { .. } => Ok(None),
+        }
+    }
 }

 impl From<LayeredTimelineEntry> for RepositoryTimeline<LayeredTimeline> {
@@ -685,6 +734,7 @@ impl LayeredRepository {
    ) -> LayeredRepository {
        LayeredRepository {
            tenant_id,
+            file_lock: RwLock::new(()),
            conf,
            tenant_conf: Arc::new(RwLock::new(tenant_conf)),
            timelines: Mutex::new(HashMap::new()),
@@ -822,13 +872,13 @@ impl LayeredRepository {
    //   we do.
    fn gc_iteration_internal(
        &self,
-        target_timelineid: Option<ZTimelineId>,
+        target_timeline_id: Option<ZTimelineId>,
        horizon: u64,
        pitr: Duration,
        checkpoint_before_gc: bool,
    ) -> Result<GcResult> {
        let _span_guard =
-            info_span!("gc iteration", tenant = %self.tenant_id, timeline = ?target_timelineid)
+            info_span!("gc iteration", tenant = %self.tenant_id, timeline = ?target_timeline_id)
                .entered();
        let mut totals: GcResult = Default::default();
        let now = Instant::now();
@@ -842,6 +892,12 @@ impl LayeredRepository {
        let mut timeline_ids = Vec::new();
        let mut timelines = self.timelines.lock().unwrap();

+        if let Some(target_timeline_id) = target_timeline_id.as_ref() {
+            if timelines.get(target_timeline_id).is_none() {
+                bail!("gc target timeline does not exist")
+            }
+        };
+
        for (timeline_id, timeline_entry) in timelines.iter() {
            timeline_ids.push(*timeline_id);

@@ -850,7 +906,7 @@ impl LayeredRepository {
            // Somewhat related: https://github.com/zenithdb/zenith/issues/999
            if let Some(ancestor_timeline_id) = &timeline_entry.ancestor_timeline_id() {
                // If target_timeline is specified, we only need to know branchpoints of its children
-                if let Some(timelineid) = target_timelineid {
+                if let Some(timelineid) = target_timeline_id {
                    if ancestor_timeline_id == &timelineid {
                        all_branchpoints
                            .insert((*ancestor_timeline_id, timeline_entry.ancestor_lsn()));
@@ -865,7 +921,7 @@ impl LayeredRepository {

        // Ok, we now know all the branch points.
        // Perform GC for each timeline.
-        for timelineid in timeline_ids.into_iter() {
+        for timeline_id in timeline_ids.into_iter() {
            if thread_mgr::is_shutdown_requested() {
                // We were requested to shut down. Stop and return with the progress we
                // made.
@@ -874,12 +930,12 @@ impl LayeredRepository {

            // Timeline is known to be local and loaded.
            let timeline = self
-                .get_timeline_load_internal(timelineid, &mut *timelines)?
+                .get_timeline_load_internal(timeline_id, &mut *timelines)?
                .expect("checked above that timeline is local and loaded");

            // If target_timeline is specified, only GC it
-            if let Some(target_timelineid) = target_timelineid {
-                if timelineid != target_timelineid {
+            if let Some(target_timelineid) = target_timeline_id {
+                if timeline_id != target_timelineid {
                    continue;
                }
            }
@@ -888,8 +944,8 @@ impl LayeredRepository {
                drop(timelines);
                let branchpoints: Vec<Lsn> = all_branchpoints
                    .range((
-                        Included((timelineid, Lsn(0))),
-                        Included((timelineid, Lsn(u64::MAX))),
+                        Included((timeline_id, Lsn(0))),
+                        Included((timeline_id, Lsn(u64::MAX))),
                    ))
                    .map(|&x| x.1)
                    .collect();
@@ -899,7 +955,7 @@ impl LayeredRepository {
                // used in tests, so we want as deterministic results as possible.
                if checkpoint_before_gc {
                    timeline.checkpoint(CheckpointConfig::Forced)?;
-                    info!("timeline {} checkpoint_before_gc done", timelineid);
+                    info!("timeline {} checkpoint_before_gc done", timeline_id);
                }
                timeline.update_gc_info(branchpoints, cutoff, pitr);
                let result = timeline.gc()?;
@@ -1584,7 +1640,7 @@ impl LayeredTimeline {
        Ok(layer)
    }

-    fn put_value(&self, key: Key, lsn: Lsn, val: Value) -> Result<()> {
+    fn put_value(&self, key: Key, lsn: Lsn, val: &Value) -> Result<()> {
        //info!("PUT: key {} at {}", key, lsn);
        let layer = self.get_layer_for_write(lsn)?;
        layer.put_value(key, lsn, val)?;
@@ -1712,26 +1768,29 @@ impl LayeredTimeline {

    /// Flush one frozen in-memory layer to disk, as a new delta layer.
    fn flush_frozen_layer(&self, frozen_layer: Arc<InMemoryLayer>) -> Result<()> {
-        let new_delta = frozen_layer.write_to_disk()?;
-        let new_delta_path = new_delta.path();
+        let layer_paths_to_upload;

-        // Sync the new layer to disk.
-        //
-        // We must also fsync the timeline dir to ensure the directory entries for
-        // new layer files are durable
-        //
-        // TODO: If we're running inside 'flush_frozen_layers' and there are multiple
-        // files to flush, it might be better to first write them all, and then fsync
-        // them all in parallel.
-        par_fsync::par_fsync(&[
-            new_delta_path.clone(),
-            self.conf.timeline_path(&self.timeline_id, &self.tenant_id),
-        ])?;
-        fail_point!("checkpoint-before-sync");
+        // As a special case, when we have just imported an image into the repository,
+        // instead of writing out a L0 delta layer, we directly write out image layer
+        // files instead. This is possible as long as *all* the data imported into the
+        // repository have the same LSN.
+        let lsn_range = frozen_layer.get_lsn_range();
+        if lsn_range.start == self.initdb_lsn && lsn_range.end == Lsn(self.initdb_lsn.0 + 1) {
+            let pgdir = tenant_mgr::get_local_timeline_with_load(self.tenant_id, self.timeline_id)?;
+            let (partitioning, _lsn) =
+                pgdir.repartition(self.initdb_lsn, self.get_compaction_target_size())?;
+            layer_paths_to_upload =
+                self.create_image_layers(&partitioning, self.initdb_lsn, true)?;
+        } else {
+            // normal case, write out a L0 delta layer file.
+            let delta_path = self.create_delta_layer(&frozen_layer)?;
+            layer_paths_to_upload = HashSet::from([delta_path]);
+        }

-        fail_point!("flush-frozen");
+        fail_point!("flush-frozen-before-sync");

-        // Finally, replace the frozen in-memory layer with the new on-disk layer
+        // The new on-disk layers are now in the layer map. We can remove the
+        // in-memory layer from the map now.
        {
            let mut layers = self.layers.write().unwrap();
            let l = layers.frozen_layers.pop_front();
@@ -1741,19 +1800,27 @@ impl LayeredTimeline {
            // layer to disk at the same time, that would not work.
            assert!(Arc::ptr_eq(&l.unwrap(), &frozen_layer));

-            // Add the new delta layer to the LayerMap
-            layers.insert_historic(Arc::new(new_delta));
-
            // release lock on 'layers'
        }

+        fail_point!("checkpoint-after-sync");
+
        // Update the metadata file, with new 'disk_consistent_lsn'
        //
        // TODO: This perhaps should be done in 'flush_frozen_layers', after flushing
        // *all* the layers, to avoid fsyncing the file multiple times.
-        let disk_consistent_lsn = Lsn(frozen_layer.get_lsn_range().end.0 - 1);
-        fail_point!("checkpoint-after-sync");
+        let disk_consistent_lsn = Lsn(lsn_range.end.0 - 1);
+        self.update_disk_consistent_lsn(disk_consistent_lsn, layer_paths_to_upload)?;

+        Ok(())
+    }
+
+    /// Update metadata file
+    fn update_disk_consistent_lsn(
+        &self,
+        disk_consistent_lsn: Lsn,
+        layer_paths_to_upload: HashSet<PathBuf>,
+    ) -> Result<()> {
        // If we were able to advance 'disk_consistent_lsn', save it the metadata file.
        // After crash, we will restart WAL streaming and processing from that point.
        let old_disk_consistent_lsn = self.disk_consistent_lsn.load();
@@ -1803,14 +1870,11 @@ impl LayeredTimeline {
                false,
            )?;

-            NUM_PERSISTENT_FILES_CREATED.inc_by(1);
-            PERSISTENT_BYTES_WRITTEN.inc_by(new_delta_path.metadata()?.len());
-
            if self.upload_layers.load(atomic::Ordering::Relaxed) {
                storage_sync::schedule_layer_upload(
                    self.tenant_id,
                    self.timeline_id,
-                    HashSet::from([new_delta_path]),
+                    layer_paths_to_upload,
                    Some(metadata),
                );
            }
@@ -1822,6 +1886,37 @@ impl LayeredTimeline {
        Ok(())
    }

+    // Write out the given frozen in-memory layer as a new L0 delta file
+    fn create_delta_layer(&self, frozen_layer: &InMemoryLayer) -> Result<PathBuf> {
+        // Write it out
+        let new_delta = frozen_layer.write_to_disk()?;
+        let new_delta_path = new_delta.path();
+
+        // Sync it to disk.
+        //
+        // We must also fsync the timeline dir to ensure the directory entries for
+        // new layer files are durable
+        //
+        // TODO: If we're running inside 'flush_frozen_layers' and there are multiple
+        // files to flush, it might be better to first write them all, and then fsync
+        // them all in parallel.
+        par_fsync::par_fsync(&[
+            new_delta_path.clone(),
+            self.conf.timeline_path(&self.timeline_id, &self.tenant_id),
+        ])?;
+
+        // Add it to the layer map
+        {
+            let mut layers = self.layers.write().unwrap();
+            layers.insert_historic(Arc::new(new_delta));
+        }
+
+        NUM_PERSISTENT_FILES_CREATED.inc_by(1);
+        PERSISTENT_BYTES_WRITTEN.inc_by(new_delta_path.metadata()?.len());
+
+        Ok(new_delta_path)
+    }
+
    pub fn compact(&self) -> Result<()> {
        //
        // High level strategy for compaction / image creation:
@@ -1865,29 +1960,23 @@ impl LayeredTimeline {
        if let Ok(pgdir) =
            tenant_mgr::get_local_timeline_with_load(self.tenant_id, self.timeline_id)
        {
+            // 2. Create new image layers for partitions that have been modified
+            // "enough".
            let (partitioning, lsn) = pgdir.repartition(
                self.get_last_record_lsn(),
                self.get_compaction_target_size(),
            )?;
-            let timer = self.create_images_time_histo.start_timer();
-            // 2. Create new image layers for partitions that have been modified
-            // "enough".
-            let mut layer_paths_to_upload = HashSet::with_capacity(partitioning.parts.len());
-            for part in partitioning.parts.iter() {
-                if self.time_for_new_image_layer(part, lsn)? {
-                    let new_path = self.create_image_layer(part, lsn)?;
-                    layer_paths_to_upload.insert(new_path);
-                }
-            }
-            if self.upload_layers.load(atomic::Ordering::Relaxed) {
+            let layer_paths_to_upload = self.create_image_layers(&partitioning, lsn, false)?;
+            if !layer_paths_to_upload.is_empty()
+                && self.upload_layers.load(atomic::Ordering::Relaxed)
+            {
                storage_sync::schedule_layer_upload(
                    self.tenant_id,
                    self.timeline_id,
-                    layer_paths_to_upload,
+                    HashSet::from_iter(layer_paths_to_upload),
                    None,
                );
            }
-            timer.stop_and_record();

            // 3. Compact
            let timer = self.compact_time_histo.start_timer();
@@ -1912,15 +2001,28 @@ impl LayeredTimeline {
                } else {
                    Lsn(0)
                };
+                // Let's consider an example:
+                //
+                // delta layer with LSN range 71-81
+                // delta layer with LSN range 81-91
+                // delta layer with LSN range 91-101
+                // image layer at LSN 100
+                //
+                // If 'lsn' is still 100, i.e. no new WAL has been processed since the last image layer,
+                // there's no need to create a new one. We check this case explicitly, to avoid passing
+                // a bogus range to count_deltas below, with start > end. It's even possible that there
+                // are some delta layers *later* than current 'lsn', if more WAL was processed and flushed
+                // after we read last_record_lsn, which is passed here in the 'lsn' argument.
+                if img_lsn < lsn {
+                    let num_deltas = layers.count_deltas(&img_range, &(img_lsn..lsn))?;

-                let num_deltas = layers.count_deltas(&img_range, &(img_lsn..lsn))?;
-
-                debug!(
-                    "range {}-{}, has {} deltas on this timeline",
-                    img_range.start, img_range.end, num_deltas
-                );
-                if num_deltas >= self.get_image_creation_threshold() {
-                    return Ok(true);
+                    debug!(
+                        "key range {}-{}, has {} deltas on this timeline in LSN range {}..{}",
+                        img_range.start, img_range.end, num_deltas, img_lsn, lsn
+                    );
+                    if num_deltas >= self.get_image_creation_threshold() {
+                        return Ok(true);
+                    }
                }
            }
        }
@@ -1928,21 +2030,40 @@ impl LayeredTimeline {
        Ok(false)
    }

-    fn create_image_layer(&self, partition: &KeySpace, lsn: Lsn) -> anyhow::Result<PathBuf> {
-        let img_range =
-            partition.ranges.first().unwrap().start..partition.ranges.last().unwrap().end;
-        let mut image_layer_writer =
-            ImageLayerWriter::new(self.conf, self.timeline_id, self.tenant_id, &img_range, lsn)?;
+    fn create_image_layers(
+        &self,
+        partitioning: &KeyPartitioning,
+        lsn: Lsn,
+        force: bool,
+    ) -> Result<HashSet<PathBuf>> {
+        let timer = self.create_images_time_histo.start_timer();
+        let mut image_layers: Vec<ImageLayer> = Vec::new();
+        let mut layer_paths_to_upload = HashSet::new();
+        for partition in partitioning.parts.iter() {
+            if force || self.time_for_new_image_layer(partition, lsn)? {
+                let img_range =
+                    partition.ranges.first().unwrap().start..partition.ranges.last().unwrap().end;
+                let mut image_layer_writer = ImageLayerWriter::new(
+                    self.conf,
+                    self.timeline_id,
+                    self.tenant_id,
+                    &img_range,
+                    lsn,
+                )?;

-        for range in &partition.ranges {
-            let mut key = range.start;
-            while key < range.end {
-                let img = self.get(key, lsn)?;
-                image_layer_writer.put_image(key, &img)?;
-                key = key.next();
+                for range in &partition.ranges {
+                    let mut key = range.start;
+                    while key < range.end {
+                        let img = self.get(key, lsn)?;
+                        image_layer_writer.put_image(key, &img)?;
+                        key = key.next();
+                    }
+                }
+                let image_layer = image_layer_writer.finish()?;
+                layer_paths_to_upload.insert(image_layer.path());
+                image_layers.push(image_layer);
            }
        }
-        let image_layer = image_layer_writer.finish()?;

        // Sync the new layer to disk before adding it to the layer map, to make sure
        // we don't garbage collect something based on the new layer, before it has
@@ -1953,19 +2074,18 @@ impl LayeredTimeline {
        //
        // Compaction creates multiple image layers. It would be better to create them all
        // and fsync them all in parallel.
-        par_fsync::par_fsync(&[
-            image_layer.path(),
-            self.conf.timeline_path(&self.timeline_id, &self.tenant_id),
-        ])?;
-
-        // FIXME: Do we need to do something to upload it to remote storage here?
+        let mut all_paths = Vec::from_iter(layer_paths_to_upload.clone());
+        all_paths.push(self.conf.timeline_path(&self.timeline_id, &self.tenant_id));
+        par_fsync::par_fsync(&all_paths)?;

        let mut layers = self.layers.write().unwrap();
-        let new_path = image_layer.path();
-        layers.insert_historic(Arc::new(image_layer));
+        for l in image_layers {
+            layers.insert_historic(Arc::new(l));
+        }
        drop(layers);
+        timer.stop_and_record();

-        Ok(new_path)
+        Ok(layer_paths_to_upload)
    }

    ///
@@ -2212,6 +2332,9 @@ impl LayeredTimeline {
                    LsnForTimestamp::Past(lsn) => {
                        debug!("past({})", lsn);
                    }
+                    LsnForTimestamp::NoData(lsn) => {
+                        debug!("nodata({})", lsn);
+                    }
                }
                debug!("pitr_cutoff_lsn = {:?}", pitr_cutoff_lsn)
            }
@@ -2485,7 +2608,7 @@ impl Deref for LayeredTimelineWriter<'_> {
 }

 impl<'a> TimelineWriter<'_> for LayeredTimelineWriter<'a> {
-    fn put(&self, key: Key, lsn: Lsn, value: Value) -> Result<()> {
+    fn put(&self, key: Key, lsn: Lsn, value: &Value) -> Result<()> {
        self.tl.put_value(key, lsn, value)
    }

@@ -2627,7 +2750,7 @@ pub mod tests {
        let TEST_KEY: Key = Key::from_hex("112222222233333333444444445500000001").unwrap();

        let writer = tline.writer();
-        writer.put(TEST_KEY, Lsn(0x10), Value::Image(TEST_IMG("foo at 0x10")))?;
+        writer.put(TEST_KEY, Lsn(0x10), &Value::Image(TEST_IMG("foo at 0x10")))?;
        writer.finish_write(Lsn(0x10));
        drop(writer);

@@ -2635,7 +2758,7 @@ pub mod tests {
        tline.compact()?;

        let writer = tline.writer();
-        writer.put(TEST_KEY, Lsn(0x20), Value::Image(TEST_IMG("foo at 0x20")))?;
+        writer.put(TEST_KEY, Lsn(0x20), &Value::Image(TEST_IMG("foo at 0x20")))?;
        writer.finish_write(Lsn(0x20));
        drop(writer);

@@ -2643,7 +2766,7 @@ pub mod tests {
        tline.compact()?;

        let writer = tline.writer();
-        writer.put(TEST_KEY, Lsn(0x30), Value::Image(TEST_IMG("foo at 0x30")))?;
+        writer.put(TEST_KEY, Lsn(0x30), &Value::Image(TEST_IMG("foo at 0x30")))?;
        writer.finish_write(Lsn(0x30));
        drop(writer);

@@ -2651,7 +2774,7 @@ pub mod tests {
        tline.compact()?;

        let writer = tline.writer();
-        writer.put(TEST_KEY, Lsn(0x40), Value::Image(TEST_IMG("foo at 0x40")))?;
+        writer.put(TEST_KEY, Lsn(0x40), &Value::Image(TEST_IMG("foo at 0x40")))?;
        writer.finish_write(Lsn(0x40));
        drop(writer);

@@ -2689,7 +2812,7 @@ pub mod tests {
                writer.put(
                    test_key,
                    lsn,
-                    Value::Image(TEST_IMG(&format!("{} at {}", blknum, lsn))),
+                    &Value::Image(TEST_IMG(&format!("{} at {}", blknum, lsn))),
                )?;
                writer.finish_write(lsn);
                drop(writer);
@@ -2735,7 +2858,7 @@ pub mod tests {
            writer.put(
                test_key,
                lsn,
-                Value::Image(TEST_IMG(&format!("{} at {}", blknum, lsn))),
+                &Value::Image(TEST_IMG(&format!("{} at {}", blknum, lsn))),
            )?;
            writer.finish_write(lsn);
            updated[blknum] = lsn;
@@ -2753,7 +2876,7 @@ pub mod tests {
                writer.put(
                    test_key,
                    lsn,
-                    Value::Image(TEST_IMG(&format!("{} at {}", blknum, lsn))),
+                    &Value::Image(TEST_IMG(&format!("{} at {}", blknum, lsn))),
                )?;
                writer.finish_write(lsn);
                drop(writer);
@@ -2805,7 +2928,7 @@ pub mod tests {
            writer.put(
                test_key,
                lsn,
-                Value::Image(TEST_IMG(&format!("{} at {}", blknum, lsn))),
+                &Value::Image(TEST_IMG(&format!("{} at {}", blknum, lsn))),
            )?;
            writer.finish_write(lsn);
            updated[blknum] = lsn;
@@ -2817,7 +2940,7 @@ pub mod tests {
        let mut tline_id = TIMELINE_ID;
        for _ in 0..50 {
            let new_tline_id = ZTimelineId::generate();
-            repo.branch_timeline(tline_id, new_tline_id, lsn)?;
+            repo.branch_timeline(tline_id, new_tline_id, Some(lsn))?;
            tline = repo.get_timeline_load(new_tline_id)?;
            tline_id = new_tline_id;

@@ -2829,7 +2952,7 @@ pub mod tests {
                writer.put(
                    test_key,
                    lsn,
-                    Value::Image(TEST_IMG(&format!("{} at {}", blknum, lsn))),
+                    &Value::Image(TEST_IMG(&format!("{} at {}", blknum, lsn))),
                )?;
                println!("updating {} at {}", blknum, lsn);
                writer.finish_write(lsn);
@@ -2876,7 +2999,7 @@ pub mod tests {
        #[allow(clippy::needless_range_loop)]
        for idx in 0..NUM_TLINES {
            let new_tline_id = ZTimelineId::generate();
-            repo.branch_timeline(tline_id, new_tline_id, lsn)?;
+            repo.branch_timeline(tline_id, new_tline_id, Some(lsn))?;
            tline = repo.get_timeline_load(new_tline_id)?;
            tline_id = new_tline_id;

@@ -2888,7 +3011,7 @@ pub mod tests {
                writer.put(
                    test_key,
                    lsn,
-                    Value::Image(TEST_IMG(&format!("{} {} at {}", idx, blknum, lsn))),
+                    &Value::Image(TEST_IMG(&format!("{} {} at {}", idx, blknum, lsn))),
                )?;
                println!("updating [{}][{}] at {}", idx, blknum, lsn);
                writer.finish_write(lsn);
--- a/pageserver/src/layered_repository/README.md
+++ b/pageserver/src/layered_repository/README.md
@@ -123,7 +123,7 @@ The files are called "layer files". Each layer file covers a range of keys, and
 a range of LSNs (or a single LSN, in case of image layers). You can think of it
 as a rectangle in the two-dimensional key-LSN space. The layer files for each
 timeline are stored in the timeline's subdirectory under
-`.zenith/tenants/<tenantid>/timelines`.
+`.neon/tenants/<tenantid>/timelines`.

 There are two kind of layer files: images, and delta layers. An image file
 contains a snapshot of all keys at a particular LSN, whereas a delta file
@@ -178,7 +178,7 @@ version, and how branching and GC works is still valid.
 The full path of a delta file looks like this:

 ```
-    .zenith/tenants/941ddc8604413b88b3d208bddf90396c/timelines/4af489b06af8eed9e27a841775616962/rel_1663_13990_2609_0_10_000000000169C348_0000000001702000
+    .neon/tenants/941ddc8604413b88b3d208bddf90396c/timelines/4af489b06af8eed9e27a841775616962/rel_1663_13990_2609_0_10_000000000169C348_0000000001702000
 ```

 For simplicity, the examples below use a simplified notation for the
--- a/pageserver/src/layered_repository/blob_io.rs
+++ b/pageserver/src/layered_repository/blob_io.rs
@@ -34,7 +34,7 @@ pub trait BlobCursor {
    ) -> Result<(), std::io::Error>;
 }

-impl<'a, R> BlobCursor for BlockCursor<R>
+impl<R> BlobCursor for BlockCursor<R>
 where
    R: BlockReader,
 {
--- a/pageserver/src/layered_repository/image_layer.rs
+++ b/pageserver/src/layered_repository/image_layer.rs
@@ -445,7 +445,10 @@ impl ImageLayerWriter {
            },
        );
        info!("new image layer {}", path.display());
-        let mut file = VirtualFile::create(&path)?;
+        let mut file = VirtualFile::open_with_options(
+            &path,
+            std::fs::OpenOptions::new().write(true).create_new(true),
+        )?;
        // make room for the header block
        file.seek(SeekFrom::Start(PAGE_SZ as u64))?;
        let blob_writer = WriteBlobWriter::new(file, PAGE_SZ as u64);
--- a/pageserver/src/layered_repository/inmemory_layer.rs
+++ b/pageserver/src/layered_repository/inmemory_layer.rs
@@ -267,13 +267,13 @@ impl InMemoryLayer {

    /// Common subroutine of the public put_wal_record() and put_page_image() functions.
    /// Adds the page version to the in-memory tree
-    pub fn put_value(&self, key: Key, lsn: Lsn, val: Value) -> Result<()> {
+    pub fn put_value(&self, key: Key, lsn: Lsn, val: &Value) -> Result<()> {
        trace!("put_value key {} at {}/{}", key, self.timelineid, lsn);
        let mut inner = self.inner.write().unwrap();

        inner.assert_writeable();

-        let off = inner.file.write_blob(&Value::ser(&val)?)?;
+        let off = inner.file.write_blob(&Value::ser(val)?)?;

        let vec_map = inner.index.entry(key).or_default();
        let old = vec_map.append_or_update_last(lsn, off).unwrap().0;
--- a/pageserver/src/lib.rs
+++ b/pageserver/src/lib.rs
@@ -13,7 +13,7 @@ pub mod repository;
 pub mod storage_sync;
 pub mod tenant_config;
 pub mod tenant_mgr;
-pub mod tenant_threads;
+pub mod tenant_tasks;
 pub mod thread_mgr;
 pub mod timelines;
 pub mod virtual_file;
@@ -24,7 +24,6 @@ pub mod walredo;

 use lazy_static::lazy_static;
 use tracing::info;
-use utils::postgres_backend;

 use crate::thread_mgr::ThreadKind;
 use metrics::{register_int_gauge_vec, IntGaugeVec};
@@ -73,7 +72,6 @@ pub fn shutdown_pageserver(exit_code: i32) {
    thread_mgr::shutdown_threads(Some(ThreadKind::LibpqEndpointListener), None, None);

    // Shut down any page service threads.
-    postgres_backend::set_pgbackend_shutdown_requested();
    thread_mgr::shutdown_threads(Some(ThreadKind::PageRequestHandler), None, None);

    // Shut down all the tenants. This flushes everything to disk and kills
--- a/pageserver/src/page_cache.rs
+++ b/pageserver/src/page_cache.rs
@@ -20,7 +20,7 @@
 //! assign a buffer for a page, you must hold the mapping lock and the lock on
 //! the slot at the same time.
 //!
-//! Whenever you need to hold both locks simultenously, the slot lock must be
+//! Whenever you need to hold both locks simultaneously, the slot lock must be
 //! acquired first. This consistent ordering avoids deadlocks. To look up a page
 //! in the cache, you would first look up the mapping, while holding the mapping
 //! lock, and then lock the slot. You must release the mapping lock in between,
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -13,7 +13,7 @@ use anyhow::{bail, ensure, Context, Result};
 use bytes::{Buf, BufMut, Bytes, BytesMut};
 use lazy_static::lazy_static;
 use regex::Regex;
-use std::io;
+use std::io::{self, Read};
 use std::net::TcpListener;
 use std::str;
 use std::str::FromStr;
@@ -29,6 +29,8 @@ use utils::{

 use crate::basebackup;
 use crate::config::{PageServerConf, ProfilingConfig};
+use crate::import_datadir::{import_basebackup_from_tar, import_wal_from_tar};
+use crate::layered_repository::LayeredRepository;
 use crate::pgdatadir_mapping::{DatadirTimeline, LsnForTimestamp};
 use crate::profiling::profpoint_start;
 use crate::reltag::RelTag;
@@ -200,6 +202,96 @@ impl PagestreamBeMessage {
    }
 }

+/// Implements Read for the server side of CopyIn
+struct CopyInReader<'a> {
+    pgb: &'a mut PostgresBackend,
+
+    /// Overflow buffer for bytes sent in CopyData messages
+    /// that the reader (caller of read) hasn't asked for yet.
+    /// TODO use BytesMut?
+    buf: Vec<u8>,
+
+    /// Bytes before `buf_begin` are considered as dropped.
+    /// This allows us to implement O(1) pop_front on Vec<u8>.
+    /// The Vec won't grow large because we only add to it
+    /// when it's empty.
+    buf_begin: usize,
+}
+
+impl<'a> CopyInReader<'a> {
+    // NOTE: pgb should be in copy in state already
+    fn new(pgb: &'a mut PostgresBackend) -> Self {
+        Self {
+            pgb,
+            buf: Vec::<_>::new(),
+            buf_begin: 0,
+        }
+    }
+}
+
+impl<'a> Drop for CopyInReader<'a> {
+    fn drop(&mut self) {
+        // Finalize copy protocol so that self.pgb can be reused
+        // TODO instead, maybe take ownership of pgb and give it back at the end
+        let mut buf: Vec<u8> = vec![];
+        let _ = self.read_to_end(&mut buf);
+    }
+}
+
+impl<'a> Read for CopyInReader<'a> {
+    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
+        while !thread_mgr::is_shutdown_requested() {
+            // Return from buffer if nonempty
+            if self.buf_begin < self.buf.len() {
+                let bytes_to_read = std::cmp::min(buf.len(), self.buf.len() - self.buf_begin);
+                buf[..bytes_to_read].copy_from_slice(&self.buf[self.buf_begin..][..bytes_to_read]);
+                self.buf_begin += bytes_to_read;
+                return Ok(bytes_to_read);
+            }
+
+            // Delete garbage
+            self.buf.clear();
+            self.buf_begin = 0;
+
+            // Wait for client to send CopyData bytes
+            match self.pgb.read_message() {
+                Ok(Some(message)) => {
+                    let copy_data_bytes = match message {
+                        FeMessage::CopyData(bytes) => bytes,
+                        FeMessage::CopyDone => return Ok(0),
+                        FeMessage::Sync => continue,
+                        m => {
+                            let msg = format!("unexpected message {:?}", m);
+                            self.pgb.write_message(&BeMessage::ErrorResponse(&msg))?;
+                            return Err(io::Error::new(io::ErrorKind::Other, msg));
+                        }
+                    };
+
+                    // Return as much as we can, saving the rest in self.buf
+                    let mut reader = copy_data_bytes.reader();
+                    let bytes_read = reader.read(buf)?;
+                    reader.read_to_end(&mut self.buf)?;
+                    return Ok(bytes_read);
+                }
+                Ok(None) => {
+                    let msg = "client closed connection";
+                    self.pgb.write_message(&BeMessage::ErrorResponse(msg))?;
+                    return Err(io::Error::new(io::ErrorKind::Other, msg));
+                }
+                Err(e) => {
+                    if !is_socket_read_timed_out(&e) {
+                        return Err(io::Error::new(io::ErrorKind::Other, e));
+                    }
+                }
+            }
+        }
+
+        // Shutting down
+        let msg = "Importer thread was shut down";
+        Err(io::Error::new(io::ErrorKind::Other, msg))
+    }
+}
+
 ///////////////////////////////////////////////////////////////////////////////

 ///
@@ -370,6 +462,10 @@ impl PageServerHandler {
    ) -> anyhow::Result<()> {
        let _enter = info_span!("pagestream", timeline = %timelineid, tenant = %tenantid).entered();

+        // NOTE: pagerequests handler exits when connection is closed,
+        //       so there is no need to reset the association
+        thread_mgr::associate_with(Some(tenantid), Some(timelineid));
+
        // Check that the timeline exists
        let timeline = tenant_mgr::get_local_timeline_with_load(tenantid, timelineid)
            .context("Cannot load local timeline")?;
@@ -443,6 +539,98 @@ impl PageServerHandler {
        Ok(())
    }

+    fn handle_import_basebackup(
+        &self,
+        pgb: &mut PostgresBackend,
+        tenant_id: ZTenantId,
+        timeline_id: ZTimelineId,
+        base_lsn: Lsn,
+        _end_lsn: Lsn,
+    ) -> anyhow::Result<()> {
+        thread_mgr::associate_with(Some(tenant_id), Some(timeline_id));
+        let _enter =
+            info_span!("import basebackup", timeline = %timeline_id, tenant = %tenant_id).entered();
+
+        // Create empty timeline
+        info!("creating new timeline");
+        let repo = tenant_mgr::get_repository_for_tenant(tenant_id)?;
+        let timeline = repo.create_empty_timeline(timeline_id, base_lsn)?;
+        let repartition_distance = repo.get_checkpoint_distance();
+        let mut datadir_timeline =
+            DatadirTimeline::<LayeredRepository>::new(timeline, repartition_distance);
+
+        // TODO mark timeline as not ready until it reaches end_lsn.
+        // We might have some wal to import as well, and we should prevent compute
+        // from connecting before that and writing conflicting wal.
+        //
+        // This is not relevant for pageserver->pageserver migrations, since there's
+        // no wal to import. But should be fixed if we want to import from postgres.
+
+        // TODO leave clean state on error. For now you can use detach to clean
+        // up broken state from a failed import.
+
+        // Import basebackup provided via CopyData
+        info!("importing basebackup");
+        pgb.write_message(&BeMessage::CopyInResponse)?;
+        let reader = CopyInReader::new(pgb);
+        import_basebackup_from_tar(&mut datadir_timeline, reader, base_lsn)?;
+
+        // TODO check checksum
+        // Meanwhile you can verify client-side by taking fullbackup
+        // and checking that it matches in size with what was imported.
+        // It wouldn't work if base came from vanilla postgres though,
+        // since we discard some log files.
+
+        // Flush data to disk, then upload to s3
+        info!("flushing layers");
+        datadir_timeline.tline.checkpoint(CheckpointConfig::Flush)?;
+
+        info!("done");
+        Ok(())
+    }
+
+    fn handle_import_wal(
+        &self,
+        pgb: &mut PostgresBackend,
+        tenant_id: ZTenantId,
+        timeline_id: ZTimelineId,
+        start_lsn: Lsn,
+        end_lsn: Lsn,
+    ) -> anyhow::Result<()> {
+        thread_mgr::associate_with(Some(tenant_id), Some(timeline_id));
+        let _enter =
+            info_span!("import wal", timeline = %timeline_id, tenant = %tenant_id).entered();
+
+        let repo = tenant_mgr::get_repository_for_tenant(tenant_id)?;
+        let timeline = repo.get_timeline_load(timeline_id)?;
+        ensure!(timeline.get_last_record_lsn() == start_lsn);
+
+        let repartition_distance = repo.get_checkpoint_distance();
+        let mut datadir_timeline =
+            DatadirTimeline::<LayeredRepository>::new(timeline, repartition_distance);
+
+        // TODO leave clean state on error. For now you can use detach to clean
+        // up broken state from a failed import.
+
+        // Import wal provided via CopyData
+        info!("importing wal");
+        pgb.write_message(&BeMessage::CopyInResponse)?;
+        let reader = CopyInReader::new(pgb);
+        import_wal_from_tar(&mut datadir_timeline, reader, start_lsn, end_lsn)?;
+
+        // TODO Does it make sense to overshoot?
+        ensure!(datadir_timeline.tline.get_last_record_lsn() >= end_lsn);
+
+        // Flush data to disk, then upload to s3. No need for a forced checkpoint.
+        // We only want to persist the data, and it doesn't matter if it's in the
+        // shape of deltas or images.
+        info!("flushing layers");
+        datadir_timeline.tline.checkpoint(CheckpointConfig::Flush)?;
+
+        info!("done");
+        Ok(())
+    }
+
    /// Helper function to handle the LSN from client request.
    ///
    /// Each GetPage (and Exists and Nblocks) request includes information about
@@ -545,17 +733,10 @@ impl PageServerHandler {
        let latest_gc_cutoff_lsn = timeline.tline.get_latest_gc_cutoff_lsn();
        let lsn = Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest, &latest_gc_cutoff_lsn)?;

-        let all_rels = timeline.list_rels(pg_constants::DEFAULTTABLESPACE_OID, req.dbnode, lsn)?;
-        let mut total_blocks: i64 = 0;
+        let total_blocks =
+            timeline.get_db_size(pg_constants::DEFAULTTABLESPACE_OID, req.dbnode, lsn)?;

-        for rel in all_rels {
-            if rel.forknum == 0 {
-                let n_blocks = timeline.get_rel_size(rel, lsn).unwrap_or(0);
-                total_blocks += n_blocks as i64;
-            }
-        }
-
-        let db_size = total_blocks * pg_constants::BLCKSZ as i64;
+        let db_size = total_blocks as i64 * pg_constants::BLCKSZ as i64;

        Ok(PagestreamBeMessage::DbSize(PagestreamDbSizeResponse {
            db_size,
@@ -591,7 +772,9 @@ impl PageServerHandler {
        pgb: &mut PostgresBackend,
        timelineid: ZTimelineId,
        lsn: Option<Lsn>,
+        prev_lsn: Option<Lsn>,
        tenantid: ZTenantId,
+        full_backup: bool,
    ) -> anyhow::Result<()> {
        let span = info_span!("basebackup", timeline = %timelineid, tenant = %tenantid, lsn = field::Empty);
        let _enter = span.enter();
@@ -614,7 +797,8 @@ impl PageServerHandler {
        {
            let mut writer = CopyDataSink { pgb };

-            let basebackup = basebackup::Basebackup::new(&mut writer, &timeline, lsn)?;
+            let basebackup =
+                basebackup::Basebackup::new(&mut writer, &timeline, lsn, prev_lsn, full_backup)?;
            span.record("lsn", &basebackup.lsn.to_string().as_str());
            basebackup.send_tarball()?;
        }
@@ -672,6 +856,10 @@ impl postgres_backend::Handler for PageServerHandler {
        Ok(())
    }

+    fn is_shutdown_requested(&self) -> bool {
+        thread_mgr::is_shutdown_requested()
+    }
+
    fn process_query(
        &mut self,
        pgb: &mut PostgresBackend,
@@ -713,8 +901,119 @@ impl postgres_backend::Handler for PageServerHandler {
            };

            // Check that the timeline exists
-            self.handle_basebackup_request(pgb, timelineid, lsn, tenantid)?;
+            self.handle_basebackup_request(pgb, timelineid, lsn, None, tenantid, false)?;
            pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
+        }
+        // return pair of prev_lsn and last_lsn
+        else if query_string.starts_with("get_last_record_rlsn ") {
+            let (_, params_raw) = query_string.split_at("get_last_record_rlsn ".len());
+            let params = params_raw.split_whitespace().collect::<Vec<_>>();
+
+            ensure!(
+                params.len() == 2,
+                "invalid param number for get_last_record_rlsn command"
+            );
+
+            let tenantid = ZTenantId::from_str(params[0])?;
+            let timelineid = ZTimelineId::from_str(params[1])?;
+
+            self.check_permission(Some(tenantid))?;
+            let timeline = tenant_mgr::get_local_timeline_with_load(tenantid, timelineid)
+                .context("Cannot load local timeline")?;
+
+            let end_of_timeline = timeline.tline.get_last_record_rlsn();
+
+            pgb.write_message_noflush(&BeMessage::RowDescription(&[
+                RowDescriptor::text_col(b"prev_lsn"),
+                RowDescriptor::text_col(b"last_lsn"),
+            ]))?
+            .write_message_noflush(&BeMessage::DataRow(&[
+                Some(end_of_timeline.prev.to_string().as_bytes()),
+                Some(end_of_timeline.last.to_string().as_bytes()),
+            ]))?
+            .write_message(&BeMessage::CommandComplete(b"SELECT 1"))?;
+        }
+        // same as basebackup, but result includes relational data as well
+        else if query_string.starts_with("fullbackup ") {
+            let (_, params_raw) = query_string.split_at("fullbackup ".len());
+            let params = params_raw.split_whitespace().collect::<Vec<_>>();
+
+            ensure!(
+                params.len() >= 2,
+                "invalid param number for fullbackup command"
+            );
+
+            let tenantid = ZTenantId::from_str(params[0])?;
+            let timelineid = ZTimelineId::from_str(params[1])?;
+
+            // The caller is responsible for providing correct lsn and prev_lsn.
+            let lsn = if params.len() > 2 {
+                Some(Lsn::from_str(params[2])?)
+            } else {
+                None
+            };
+            let prev_lsn = if params.len() > 3 {
+                Some(Lsn::from_str(params[3])?)
+            } else {
+                None
+            };
+
+            self.check_permission(Some(tenantid))?;
+
+            // Check that the timeline exists
+            self.handle_basebackup_request(pgb, timelineid, lsn, prev_lsn, tenantid, true)?;
+            pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
+        } else if query_string.starts_with("import basebackup ") {
+            // Import the `base` section (everything but the wal) of a basebackup.
+            // Assumes the tenant already exists on this pageserver.
+            //
+            // Files are scheduled to be persisted to remote storage, and the
+            // caller should poll the http api to check when that is done.
+            //
+            // Example import command:
+            // 1. Get start/end LSN from backup_manifest file
+            // 2. Run:
+            // cat my_backup/base.tar | psql -h $PAGESERVER \
+            //     -c "import basebackup $TENANT $TIMELINE $START_LSN $END_LSN"
+            let (_, params_raw) = query_string.split_at("import basebackup ".len());
+            let params = params_raw.split_whitespace().collect::<Vec<_>>();
+            ensure!(params.len() == 4);
+            let tenant = ZTenantId::from_str(params[0])?;
+            let timeline = ZTimelineId::from_str(params[1])?;
+            let base_lsn = Lsn::from_str(params[2])?;
+            let end_lsn = Lsn::from_str(params[3])?;
+
+            self.check_permission(Some(tenant))?;
+
+            match self.handle_import_basebackup(pgb, tenant, timeline, base_lsn, end_lsn) {
+                Ok(()) => pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?,
+                Err(e) => {
+                    error!("error importing base backup between {base_lsn} and {end_lsn}: {e:?}");
+                    pgb.write_message_noflush(&BeMessage::ErrorResponse(&e.to_string()))?
+                }
+            };
+        } else if query_string.starts_with("import wal ") {
+            // Import the `pg_wal` section of a basebackup.
+            //
+            // Files are scheduled to be persisted to remote storage, and the
+            // caller should poll the http api to check when that is done.
+            let (_, params_raw) = query_string.split_at("import wal ".len());
+            let params = params_raw.split_whitespace().collect::<Vec<_>>();
+            ensure!(params.len() == 4);
+            let tenant = ZTenantId::from_str(params[0])?;
+            let timeline = ZTimelineId::from_str(params[1])?;
+            let start_lsn = Lsn::from_str(params[2])?;
+            let end_lsn = Lsn::from_str(params[3])?;
+
+            self.check_permission(Some(tenant))?;
+
+            match self.handle_import_wal(pgb, tenant, timeline, start_lsn, end_lsn) {
+                Ok(()) => pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?,
+                Err(e) => {
+                    error!("error importing WAL between {start_lsn} and {end_lsn}: {e:?}");
+                    pgb.write_message_noflush(&BeMessage::ErrorResponse(&e.to_string()))?
+                }
+            };
        } else if query_string.to_ascii_lowercase().starts_with("set ") {
            // important because psycopg2 executes "SET datestyle TO 'ISO'"
            // on connect
@@ -802,7 +1101,6 @@ impl postgres_backend::Handler for PageServerHandler {
                .map(|h| h.as_str().parse())
                .unwrap_or_else(|| Ok(repo.get_gc_horizon()))?;

-            let repo = tenant_mgr::get_repository_for_tenant(tenantid)?;
            // Use tenant's pitr setting
            let pitr = repo.get_pitr_interval();
            let result = repo.gc_iteration(Some(timelineid), gc_horizon, pitr, true)?;
@@ -895,6 +1193,7 @@ impl postgres_backend::Handler for PageServerHandler {
                LsnForTimestamp::Present(lsn) => format!("{}", lsn),
                LsnForTimestamp::Future(_lsn) => "future".into(),
                LsnForTimestamp::Past(_lsn) => "past".into(),
+                LsnForTimestamp::NoData(_lsn) => "nodata".into(),
            };
            pgb.write_message_noflush(&BeMessage::DataRow(&[Some(result.as_bytes())]))?;
            pgb.write_message(&BeMessage::CommandComplete(b"SELECT 1"))?;
--- a/pageserver/src/pgdatadir_mapping.rs
+++ b/pageserver/src/pgdatadir_mapping.rs
@@ -51,6 +51,7 @@ pub enum LsnForTimestamp {
    Present(Lsn),
    Future(Lsn),
    Past(Lsn),
+    NoData(Lsn),
 }

 impl<R: Repository> DatadirTimeline<R> {
@@ -123,6 +124,19 @@ impl<R: Repository> DatadirTimeline<R> {
        self.tline.get(key, lsn)
    }

+    // Get size of a database in blocks
+    pub fn get_db_size(&self, spcnode: Oid, dbnode: Oid, lsn: Lsn) -> Result<usize> {
+        let mut total_blocks = 0;
+
+        let rels = self.list_rels(spcnode, dbnode, lsn)?;
+
+        for rel in rels {
+            let n_blocks = self.get_rel_size(rel, lsn)?;
+            total_blocks += n_blocks as usize;
+        }
+        Ok(total_blocks)
+    }
+
    /// Get size of a relation file
    pub fn get_rel_size(&self, tag: RelTag, lsn: Lsn) -> Result<BlockNumber> {
        ensure!(tag.relnode != 0, "invalid relnode");
@@ -250,7 +264,7 @@ impl<R: Repository> DatadirTimeline<R> {
            (false, false) => {
                // This can happen if no commit records have been processed yet, e.g.
                // just after importing a cluster.
-                bail!("no commit timestamps found");
+                Ok(LsnForTimestamp::NoData(max_lsn))
            }
            (true, false) => {
                // Didn't find any commit timestamps larger than the request
@@ -667,6 +681,10 @@ impl<'a, R: Repository> DatadirModification<'a, R> {
    }

    pub fn drop_dbdir(&mut self, spcnode: Oid, dbnode: Oid) -> Result<()> {
+        let req_lsn = self.tline.get_last_record_lsn();
+
+        let total_blocks = self.tline.get_db_size(spcnode, dbnode, req_lsn)?;
+
        // Remove entry from dbdir
        let buf = self.get(DBDIR_KEY)?;
        let mut dir = DbDirectory::des(&buf)?;
@@ -680,7 +698,8 @@ impl<'a, R: Repository> DatadirModification<'a, R> {
            );
        }

-        // FIXME: update pending_nblocks
+        // Update logical database size.
+        self.pending_nblocks -= total_blocks as isize;

        // Delete all relations and metadata files for the spcnode/dnode
        self.delete(dbdir_key_range(spcnode, dbnode));
@@ -749,6 +768,7 @@ impl<'a, R: Repository> DatadirModification<'a, R> {
    }

    /// Extend relation
+    /// If new size is smaller, do nothing.
    pub fn put_rel_extend(&mut self, rel: RelTag, nblocks: BlockNumber) -> Result<()> {
        ensure!(rel.relnode != 0, "invalid relnode");

@@ -756,10 +776,13 @@ impl<'a, R: Repository> DatadirModification<'a, R> {
        let size_key = rel_size_to_key(rel);
        let old_size = self.get(size_key)?.get_u32_le();

-        let buf = nblocks.to_le_bytes();
-        self.put(size_key, Value::Image(Bytes::from(buf.to_vec())));
+        // only extend relation here. never decrease the size
+        if nblocks > old_size {
+            let buf = nblocks.to_le_bytes();
+            self.put(size_key, Value::Image(Bytes::from(buf.to_vec())));

-        self.pending_nblocks += nblocks as isize - old_size as isize;
+            self.pending_nblocks += nblocks as isize - old_size as isize;
+        }
        Ok(())
    }

@@ -879,6 +902,57 @@ impl<'a, R: Repository> DatadirModification<'a, R> {
        Ok(())
    }

+    ///
+    /// Flush changes accumulated so far to the underlying repository.
+    ///
+    /// Usually, changes made in DatadirModification are atomic, but this allows
+    /// you to flush them to the underlying repository before the final `commit`.
+    /// That allows to free up the memory used to hold the pending changes.
+    ///
+    /// Currently only used during bulk import of a data directory. In that
+    /// context, breaking the atomicity is OK. If the import is interrupted, the
+    /// whole import fails and the timeline will be deleted anyway.
+    /// (Or to be precise, it will be left behind for debugging purposes and
+    /// ignored, see https://github.com/neondatabase/neon/pull/1809)
+    ///
+    /// Note: A consequence of flushing the pending operations is that they
+    /// won't be visible to subsequent operations until `commit`. The function
+    /// retains all the metadata, but data pages are flushed. That's again OK
+    /// for bulk import, where you are just loading data pages and won't try to
+    /// modify the same pages twice.
+    pub fn flush(&mut self) -> Result<()> {
+        // Unless we have accumulated a decent amount of changes, it's not worth it
+        // to scan through the pending_updates list.
+        let pending_nblocks = self.pending_nblocks;
+        if pending_nblocks < 10000 {
+            return Ok(());
+        }
+
+        let writer = self.tline.tline.writer();
+
+        // Flush relation and  SLRU data blocks, keep metadata.
+        let mut result: Result<()> = Ok(());
+        self.pending_updates.retain(|&key, value| {
+            if result.is_ok() && (is_rel_block_key(key) || is_slru_block_key(key)) {
+                result = writer.put(key, self.lsn, value);
+                false
+            } else {
+                true
+            }
+        });
+        result?;
+
+        if pending_nblocks != 0 {
+            self.tline.current_logical_size.fetch_add(
+                pending_nblocks * pg_constants::BLCKSZ as isize,
+                Ordering::SeqCst,
+            );
+            self.pending_nblocks = 0;
+        }
+
+        Ok(())
+    }
+
    ///
    /// Finish this atomic update, writing all the updated keys to the
    /// underlying timeline.
@@ -889,7 +963,7 @@ impl<'a, R: Repository> DatadirModification<'a, R> {
        let pending_nblocks = self.pending_nblocks;

        for (key, value) in self.pending_updates {
-            writer.put(key, self.lsn, value)?;
+            writer.put(key, self.lsn, &value)?;
        }
        for key_range in self.pending_deletions {
            writer.delete(key_range.clone(), self.lsn)?;
@@ -1294,6 +1368,10 @@ pub fn key_to_rel_block(key: Key) -> Result<(RelTag, BlockNumber)> {
    })
 }

+fn is_rel_block_key(key: Key) -> bool {
+    key.field1 == 0x00 && key.field4 != 0
+}
+
 pub fn key_to_slru_block(key: Key) -> Result<(SlruKind, u32, BlockNumber)> {
    Ok(match key.field1 {
        0x01 => {
@@ -1312,6 +1390,12 @@ pub fn key_to_slru_block(key: Key) -> Result<(SlruKind, u32, BlockNumber)> {
    })
 }

+fn is_slru_block_key(key: Key) -> bool {
+    key.field1 == 0x01                // SLRU-related
+        && key.field3 == 0x00000001   // but not SlruDir
+        && key.field6 != 0xffffffff // and not SlruSegSize
+}
+
 //
 //-- Tests that should work the same with any Repository/Timeline implementation.
 //
--- a/pageserver/src/profiling.rs
+++ b/pageserver/src/profiling.rs
@@ -81,6 +81,12 @@ mod profiling_impl {

    pub struct DummyProfilerGuard;

+    impl Drop for DummyProfilerGuard {
+        fn drop(&mut self) {
+            // do nothing, this exists to calm Clippy down
+        }
+    }
+
    pub fn profpoint_start(
        _conf: &PageServerConf,
        _point: ProfilingConfig,
--- a/pageserver/src/reltag.rs
+++ b/pageserver/src/reltag.rs
@@ -3,7 +3,7 @@ use std::cmp::Ordering;
 use std::fmt;

 use postgres_ffi::relfile_utils::forknumber_to_name;
-use postgres_ffi::Oid;
+use postgres_ffi::{pg_constants, Oid};

 ///
 /// Relation data file segment id throughout the Postgres cluster.
@@ -75,6 +75,30 @@ impl fmt::Display for RelTag {
    }
 }

+impl RelTag {
+    pub fn to_segfile_name(&self, segno: u32) -> String {
+        let mut name = if self.spcnode == pg_constants::GLOBALTABLESPACE_OID {
+            "global/".to_string()
+        } else {
+            format!("base/{}/", self.dbnode)
+        };
+
+        name += &self.relnode.to_string();
+
+        if let Some(fork_name) = forknumber_to_name(self.forknum) {
+            name += "_";
+            name += fork_name;
+        }
+
+        if segno != 0 {
+            name += ".";
+            name += &segno.to_string();
+        }
+
+        name
+    }
+}
+
 ///
 /// Non-relation transaction status files (clog (a.k.a. pg_xact) and
 /// pg_multixact) in Postgres are handled by SLRU (Simple LRU) buffer,
--- a/pageserver/src/remote_storage/storage_sync/delete.rs
+++ b/pageserver/src/remote_storage/storage_sync/delete.rs
@@ -1,223 +0,0 @@
-//! Timeline synchronization logic to delete a bulk of timeline's remote files from the remote storage.
-
-use anyhow::Context;
-use futures::stream::{FuturesUnordered, StreamExt};
-use tracing::{debug, error, info};
-use utils::zid::ZTenantTimelineId;
-
-use crate::remote_storage::{
-    storage_sync::{SyncQueue, SyncTask},
-    RemoteStorage,
-};
-
-use super::{LayersDeletion, SyncData};
-
-/// Attempts to remove the timleline layers from the remote storage.
-/// If the task had not adjusted the metadata before, the deletion will fail.
-pub(super) async fn delete_timeline_layers<'a, P, S>(
-    storage: &'a S,
-    sync_queue: &SyncQueue,
-    sync_id: ZTenantTimelineId,
-    mut delete_data: SyncData<LayersDeletion>,
-) -> bool
-where
-    P: std::fmt::Debug + Send + Sync + 'static,
-    S: RemoteStorage<RemoteObjectId = P> + Send + Sync + 'static,
-{
-    if !delete_data.data.deletion_registered {
-        error!("Cannot delete timeline layers before the deletion metadata is not registered, reenqueueing");
-        delete_data.retries += 1;
-        sync_queue.push(sync_id, SyncTask::Delete(delete_data));
-        return false;
-    }
-
-    if delete_data.data.layers_to_delete.is_empty() {
-        info!("No layers to delete, skipping");
-        return true;
-    }
-
-    let layers_to_delete = delete_data
-        .data
-        .layers_to_delete
-        .drain()
-        .collect::<Vec<_>>();
-    debug!("Layers to delete: {layers_to_delete:?}");
-    info!("Deleting {} timeline layers", layers_to_delete.len());
-
-    let mut delete_tasks = layers_to_delete
-        .into_iter()
-        .map(|local_layer_path| async {
-            let storage_path = match storage.storage_path(&local_layer_path).with_context(|| {
-                format!(
-                    "Failed to get the layer storage path for local path '{}'",
-                    local_layer_path.display()
-                )
-            }) {
-                Ok(path) => path,
-                Err(e) => return Err((e, local_layer_path)),
-            };
-
-            match storage.delete(&storage_path).await.with_context(|| {
-                format!(
-                    "Failed to delete remote layer from storage at '{:?}'",
-                    storage_path
-                )
-            }) {
-                Ok(()) => Ok(local_layer_path),
-                Err(e) => Err((e, local_layer_path)),
-            }
-        })
-        .collect::<FuturesUnordered<_>>();
-
-    let mut errored = false;
-    while let Some(deletion_result) = delete_tasks.next().await {
-        match deletion_result {
-            Ok(local_layer_path) => {
-                debug!(
-                    "Successfully deleted layer {} for timeline {sync_id}",
-                    local_layer_path.display()
-                );
-                delete_data.data.deleted_layers.insert(local_layer_path);
-            }
-            Err((e, local_layer_path)) => {
-                errored = true;
-                error!(
-                    "Failed to delete layer {} for timeline {sync_id}: {e:?}",
-                    local_layer_path.display()
-                );
-                delete_data.data.layers_to_delete.insert(local_layer_path);
-            }
-        }
-    }
-
-    if errored {
-        debug!("Reenqueuing failed delete task for timeline {sync_id}");
-        delete_data.retries += 1;
-        sync_queue.push(sync_id, SyncTask::Delete(delete_data));
-    }
-    errored
-}
-
-#[cfg(test)]
-mod tests {
-    use std::{collections::HashSet, num::NonZeroUsize};
-
-    use itertools::Itertools;
-    use tempfile::tempdir;
-    use tokio::fs;
-    use utils::lsn::Lsn;
-
-    use crate::{
-        remote_storage::{
-            storage_sync::test_utils::{create_local_timeline, dummy_metadata},
-            LocalFs,
-        },
-        repository::repo_harness::{RepoHarness, TIMELINE_ID},
-    };
-
-    use super::*;
-
-    #[tokio::test]
-    async fn delete_timeline_negative() -> anyhow::Result<()> {
-        let harness = RepoHarness::create("delete_timeline_negative")?;
-        let (sync_queue, _) = SyncQueue::new(NonZeroUsize::new(100).unwrap());
-        let sync_id = ZTenantTimelineId::new(harness.tenant_id, TIMELINE_ID);
-        let storage = LocalFs::new(tempdir()?.path().to_path_buf(), &harness.conf.workdir)?;
-
-        let deleted = delete_timeline_layers(
-            &storage,
-            &sync_queue,
-            sync_id,
-            SyncData {
-                retries: 1,
-                data: LayersDeletion {
-                    deleted_layers: HashSet::new(),
-                    layers_to_delete: HashSet::new(),
-                    deletion_registered: false,
-                },
-            },
-        )
-        .await;
-
-        assert!(
-            !deleted,
-            "Should not start the deletion for task with delete metadata unregistered"
-        );
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn delete_timeline() -> anyhow::Result<()> {
-        let harness = RepoHarness::create("delete_timeline")?;
-        let (sync_queue, _) = SyncQueue::new(NonZeroUsize::new(100).unwrap());
-
-        let sync_id = ZTenantTimelineId::new(harness.tenant_id, TIMELINE_ID);
-        let layer_files = ["a", "b", "c", "d"];
-        let storage = LocalFs::new(tempdir()?.path().to_path_buf(), &harness.conf.workdir)?;
-        let current_retries = 3;
-        let metadata = dummy_metadata(Lsn(0x30));
-        let local_timeline_path = harness.timeline_path(&TIMELINE_ID);
-        let timeline_upload =
-            create_local_timeline(&harness, TIMELINE_ID, &layer_files, metadata.clone()).await?;
-        for local_path in timeline_upload.layers_to_upload {
-            let remote_path = storage.storage_path(&local_path)?;
-            let remote_parent_dir = remote_path.parent().unwrap();
-            if !remote_parent_dir.exists() {
-                fs::create_dir_all(&remote_parent_dir).await?;
-            }
-            fs::copy(&local_path, &remote_path).await?;
-        }
-        assert_eq!(
-            storage
-                .list()
-                .await?
-                .into_iter()
-                .map(|remote_path| storage.local_path(&remote_path).unwrap())
-                .filter_map(|local_path| { Some(local_path.file_name()?.to_str()?.to_owned()) })
-                .sorted()
-                .collect::<Vec<_>>(),
-            layer_files
-                .iter()
-                .map(|layer_str| layer_str.to_string())
-                .sorted()
-                .collect::<Vec<_>>(),
-            "Expect to have all layer files remotely before deletion"
-        );
-
-        let deleted = delete_timeline_layers(
-            &storage,
-            &sync_queue,
-            sync_id,
-            SyncData {
-                retries: current_retries,
-                data: LayersDeletion {
-                    deleted_layers: HashSet::new(),
-                    layers_to_delete: HashSet::from([
-                        local_timeline_path.join("a"),
-                        local_timeline_path.join("c"),
-                        local_timeline_path.join("something_different"),
-                    ]),
-                    deletion_registered: true,
-                },
-            },
-        )
-        .await;
-        assert!(deleted, "Should be able to delete timeline files");
-
-        assert_eq!(
-            storage
-                .list()
-                .await?
-                .into_iter()
-                .map(|remote_path| storage.local_path(&remote_path).unwrap())
-                .filter_map(|local_path| { Some(local_path.file_name()?.to_str()?.to_owned()) })
-                .sorted()
-                .collect::<Vec<_>>(),
-            vec!["b".to_string(), "d".to_string()],
-            "Expect to have only non-deleted files remotely"
-        );
-
-        Ok(())
-    }
-}
--- a/pageserver/src/repository.rs
+++ b/pageserver/src/repository.rs
@@ -7,7 +7,6 @@ use byteorder::{ByteOrder, BE};
 use bytes::Bytes;
 use serde::{Deserialize, Serialize};
 use std::fmt;
-use std::fmt::Display;
 use std::ops::{AddAssign, Range};
 use std::sync::{Arc, RwLockReadGuard};
 use std::time::Duration;
@@ -182,32 +181,15 @@ impl Value {
    }
 }

-#[derive(Clone, Copy, Debug)]
-pub enum TimelineSyncStatusUpdate {
-    Downloaded,
-}
-
-impl Display for TimelineSyncStatusUpdate {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let s = match self {
-            TimelineSyncStatusUpdate::Downloaded => "Downloaded",
-        };
-        f.write_str(s)
-    }
-}
 ///
-/// A repository corresponds to one .zenith directory. One repository holds multiple
+/// A repository corresponds to one .neon directory. One repository holds multiple
 /// timelines, forked off from the same initial call to 'initdb'.
 pub trait Repository: Send + Sync {
    type Timeline: Timeline;

    /// Updates timeline based on the `TimelineSyncStatusUpdate`, received from the remote storage synchronization.
    /// See [`crate::remote_storage`] for more details about the synchronization.
-    fn apply_timeline_remote_sync_status_update(
-        &self,
-        timeline_id: ZTimelineId,
-        timeline_sync_status_update: TimelineSyncStatusUpdate,
-    ) -> Result<()>;
+    fn attach_timeline(&self, timeline_id: ZTimelineId) -> Result<()>;

    /// Get Timeline handle for given zenith timeline ID.
    /// This function is idempotent. It doesn't change internal state in any way.
@@ -224,12 +206,17 @@ pub trait Repository: Send + Sync {
    /// Initdb lsn is provided for timeline impl to be able to perform checks for some operations against it.
    fn create_empty_timeline(
        &self,
-        timelineid: ZTimelineId,
+        timeline_id: ZTimelineId,
        initdb_lsn: Lsn,
    ) -> Result<Arc<Self::Timeline>>;

    /// Branch a timeline
-    fn branch_timeline(&self, src: ZTimelineId, dst: ZTimelineId, start_lsn: Lsn) -> Result<()>;
+    fn branch_timeline(
+        &self,
+        src: ZTimelineId,
+        dst: ZTimelineId,
+        start_lsn: Option<Lsn>,
+    ) -> Result<()>;

    /// Flush all data to disk.
    ///
@@ -242,7 +229,7 @@ pub trait Repository: Send + Sync {
    ///
    /// 'timelineid' specifies the timeline to GC, or None for all.
    /// `horizon` specifies delta from last lsn to preserve all object versions (pitr interval).
-    /// `checkpoint_before_gc` parameter is used to force compaction of storage before CG
+    /// `checkpoint_before_gc` parameter is used to force compaction of storage before GC
    /// to make tests more deterministic.
    /// TODO Do we still need it or we can call checkpoint explicitly in tests where needed?
    fn gc_iteration(
@@ -259,10 +246,10 @@ pub trait Repository: Send + Sync {
    /// api's 'compact' command.
    fn compaction_iteration(&self) -> Result<()>;

-    /// detaches timeline-related in-memory data.
-    fn detach_timeline(&self, timeline_id: ZTimelineId) -> Result<()>;
+    /// removes timeline-related in-memory data
+    fn delete_timeline(&self, timeline_id: ZTimelineId) -> anyhow::Result<()>;

-    // Allows to retrieve remote timeline index from the repo. Used in walreceiver to grab remote consistent lsn.
+    /// Allows to retrieve remote timeline index from the repo. Used in walreceiver to grab remote consistent lsn.
    fn get_remote_index(&self) -> &RemoteIndex;
 }

@@ -406,7 +393,7 @@ pub trait TimelineWriter<'a> {
    ///
    /// This will implicitly extend the relation, if the page is beyond the
    /// current end-of-file.
-    fn put(&self, key: Key, lsn: Lsn, value: Value) -> Result<()>;
+    fn put(&self, key: Key, lsn: Lsn, value: &Value) -> Result<()>;

    fn delete(&self, key_range: Range<Key>, lsn: Lsn) -> Result<()>;

@@ -536,7 +523,7 @@ pub mod repo_harness {
                TenantConfOpt::from(self.tenant_conf),
                walredo_mgr,
                self.tenant_id,
-                RemoteIndex::empty(),
+                RemoteIndex::default(),
                false,
            );
            // populate repo with locally available timelines
@@ -552,10 +539,7 @@ pub mod repo_harness {
                    .parse()
                    .unwrap();

-                repo.apply_timeline_remote_sync_status_update(
-                    timeline_id,
-                    TimelineSyncStatusUpdate::Downloaded,
-                )?;
+                repo.attach_timeline(timeline_id)?;
            }

            Ok(repo)
@@ -619,12 +603,12 @@ mod tests {
        let tline = repo.create_empty_timeline(TIMELINE_ID, Lsn(0))?;

        let writer = tline.writer();
-        writer.put(*TEST_KEY, Lsn(0x10), Value::Image(TEST_IMG("foo at 0x10")))?;
+        writer.put(*TEST_KEY, Lsn(0x10), &Value::Image(TEST_IMG("foo at 0x10")))?;
        writer.finish_write(Lsn(0x10));
        drop(writer);

        let writer = tline.writer();
-        writer.put(*TEST_KEY, Lsn(0x20), Value::Image(TEST_IMG("foo at 0x20")))?;
+        writer.put(*TEST_KEY, Lsn(0x20), &Value::Image(TEST_IMG("foo at 0x20")))?;
        writer.finish_write(Lsn(0x20));
        drop(writer);

@@ -635,6 +619,19 @@ mod tests {
        Ok(())
    }

+    #[test]
+    fn no_duplicate_timelines() -> Result<()> {
+        let repo = RepoHarness::create("no_duplicate_timelines")?.load();
+        let _ = repo.create_empty_timeline(TIMELINE_ID, Lsn(0))?;
+
+        match repo.create_empty_timeline(TIMELINE_ID, Lsn(0)) {
+            Ok(_) => panic!("duplicate timeline creation should fail"),
+            Err(e) => assert_eq!(e.to_string(), "Timeline already exists"),
+        }
+
+        Ok(())
+    }
+
    /// Convenience function to create a page image with given string as the only content
    pub fn test_value(s: &str) -> Value {
        let mut buf = BytesMut::new();
@@ -658,24 +655,24 @@ mod tests {
        let TEST_KEY_B: Key = Key::from_hex("112222222233333333444444445500000002").unwrap();

        // Insert a value on the timeline
-        writer.put(TEST_KEY_A, Lsn(0x20), test_value("foo at 0x20"))?;
-        writer.put(TEST_KEY_B, Lsn(0x20), test_value("foobar at 0x20"))?;
+        writer.put(TEST_KEY_A, Lsn(0x20), &test_value("foo at 0x20"))?;
+        writer.put(TEST_KEY_B, Lsn(0x20), &test_value("foobar at 0x20"))?;
        writer.finish_write(Lsn(0x20));

-        writer.put(TEST_KEY_A, Lsn(0x30), test_value("foo at 0x30"))?;
+        writer.put(TEST_KEY_A, Lsn(0x30), &test_value("foo at 0x30"))?;
        writer.finish_write(Lsn(0x30));
-        writer.put(TEST_KEY_A, Lsn(0x40), test_value("foo at 0x40"))?;
+        writer.put(TEST_KEY_A, Lsn(0x40), &test_value("foo at 0x40"))?;
        writer.finish_write(Lsn(0x40));

        //assert_current_logical_size(&tline, Lsn(0x40));

        // Branch the history, modify relation differently on the new timeline
-        repo.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Lsn(0x30))?;
+        repo.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Some(Lsn(0x30)))?;
        let newtline = repo
            .get_timeline_load(NEW_TIMELINE_ID)
            .expect("Should have a local timeline");
        let new_writer = newtline.writer();
-        new_writer.put(TEST_KEY_A, Lsn(0x40), test_value("bar at 0x40"))?;
+        new_writer.put(TEST_KEY_A, Lsn(0x40), &test_value("bar at 0x40"))?;
        new_writer.finish_write(Lsn(0x40));

        // Check page contents on both branches
@@ -706,14 +703,14 @@ mod tests {
            writer.put(
                *TEST_KEY,
                lsn,
-                Value::Image(TEST_IMG(&format!("foo at {}", lsn))),
+                &Value::Image(TEST_IMG(&format!("foo at {}", lsn))),
            )?;
            writer.finish_write(lsn);
            lsn += 0x10;
            writer.put(
                *TEST_KEY,
                lsn,
-                Value::Image(TEST_IMG(&format!("foo at {}", lsn))),
+                &Value::Image(TEST_IMG(&format!("foo at {}", lsn))),
            )?;
            writer.finish_write(lsn);
            lsn += 0x10;
@@ -724,14 +721,14 @@ mod tests {
            writer.put(
                *TEST_KEY,
                lsn,
-                Value::Image(TEST_IMG(&format!("foo at {}", lsn))),
+                &Value::Image(TEST_IMG(&format!("foo at {}", lsn))),
            )?;
            writer.finish_write(lsn);
            lsn += 0x10;
            writer.put(
                *TEST_KEY,
                lsn,
-                Value::Image(TEST_IMG(&format!("foo at {}", lsn))),
+                &Value::Image(TEST_IMG(&format!("foo at {}", lsn))),
            )?;
            writer.finish_write(lsn);
        }
@@ -752,7 +749,7 @@ mod tests {
        repo.gc_iteration(Some(TIMELINE_ID), 0x10, Duration::ZERO, false)?;

        // try to branch at lsn 25, should fail because we already garbage collected the data
-        match repo.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Lsn(0x25)) {
+        match repo.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Some(Lsn(0x25))) {
            Ok(_) => panic!("branching should have failed"),
            Err(err) => {
                assert!(err.to_string().contains("invalid branch start lsn"));
@@ -773,7 +770,7 @@ mod tests {

        repo.create_empty_timeline(TIMELINE_ID, Lsn(0x50))?;
        // try to branch at lsn 0x25, should fail because initdb lsn is 0x50
-        match repo.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Lsn(0x25)) {
+        match repo.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Some(Lsn(0x25))) {
            Ok(_) => panic!("branching should have failed"),
            Err(err) => {
                assert!(&err.to_string().contains("invalid branch start lsn"));
@@ -818,7 +815,7 @@ mod tests {
        let tline = repo.create_empty_timeline(TIMELINE_ID, Lsn(0))?;
        make_some_layers(tline.as_ref(), Lsn(0x20))?;

-        repo.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Lsn(0x40))?;
+        repo.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Some(Lsn(0x40)))?;
        let newtline = repo
            .get_timeline_load(NEW_TIMELINE_ID)
            .expect("Should have a local timeline");
@@ -834,7 +831,7 @@ mod tests {
        let tline = repo.create_empty_timeline(TIMELINE_ID, Lsn(0))?;
        make_some_layers(tline.as_ref(), Lsn(0x20))?;

-        repo.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Lsn(0x40))?;
+        repo.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Some(Lsn(0x40)))?;
        let newtline = repo
            .get_timeline_load(NEW_TIMELINE_ID)
            .expect("Should have a local timeline");
@@ -892,7 +889,7 @@ mod tests {
            make_some_layers(tline.as_ref(), Lsn(0x20))?;
            tline.checkpoint(CheckpointConfig::Forced)?;

-            repo.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Lsn(0x40))?;
+            repo.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Some(Lsn(0x40)))?;

            let newtline = repo
                .get_timeline_load(NEW_TIMELINE_ID)
--- a/pageserver/src/storage_sync.rs
+++ b/pageserver/src/storage_sync.rs
@@ -178,20 +178,20 @@ use crate::{
        metadata::{metadata_path, TimelineMetadata, METADATA_FILE_NAME},
        LayeredRepository,
    },
-    repository::TimelineSyncStatusUpdate,
    storage_sync::{self, index::RemoteIndex},
-    tenant_mgr::apply_timeline_sync_status_updates,
+    tenant_mgr::attach_downloaded_tenants,
    thread_mgr,
    thread_mgr::ThreadKind,
 };

 use metrics::{
-    register_histogram_vec, register_int_counter, register_int_gauge, HistogramVec, IntCounter,
-    IntGauge,
+    register_histogram_vec, register_int_counter, register_int_counter_vec, register_int_gauge,
+    HistogramVec, IntCounter, IntCounterVec, IntGauge,
 };
 use utils::zid::{ZTenantId, ZTenantTimelineId, ZTimelineId};

-pub use self::download::download_index_part;
+use self::download::download_index_parts;
+pub use self::download::gather_tenant_timelines_index_parts;
 pub use self::download::TEMP_DOWNLOAD_EXTENSION;

 lazy_static! {
@@ -208,14 +208,17 @@ lazy_static! {
    static ref IMAGE_SYNC_TIME: HistogramVec = register_histogram_vec!(
        "pageserver_remote_storage_image_sync_seconds",
        "Time took to synchronize (download or upload) a whole pageserver image. \
-        Grouped by `operation_kind` (upload|download) and `status` (success|failure)",
-        &["operation_kind", "status"],
-        vec![
-            0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 4.0, 5.0, 6.0, 7.0,
-            8.0, 9.0, 10.0, 12.5, 15.0, 17.5, 20.0
-        ]
+        Grouped by tenant and timeline ids, `operation_kind` (upload|download) and `status` (success|failure)",
+        &["tenant_id", "timeline_id", "operation_kind", "status"],
+        vec![0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 3.0, 10.0, 20.0]
    )
    .expect("failed to register pageserver image sync time histogram vec");
+    static ref REMOTE_INDEX_UPLOAD: IntCounterVec = register_int_counter_vec!(
+        "pageserver_remote_storage_remote_index_uploads_total",
+        "Number of remote index uploads",
+        &["tenant_id", "timeline_id"],
+    )
+    .expect("failed to register pageserver remote index upload vec");
 }

 static SYNC_QUEUE: OnceCell<SyncQueue> = OnceCell::new();
@@ -298,7 +301,7 @@ pub fn start_local_timeline_sync(
            }
            Ok(SyncStartupData {
                local_timeline_init_statuses,
-                remote_index: RemoteIndex::empty(),
+                remote_index: RemoteIndex::default(),
            })
        }
    }
@@ -832,7 +835,7 @@ where
        .build()
        .context("Failed to create storage sync runtime")?;

-    let applicable_index_parts = runtime.block_on(try_fetch_index_parts(
+    let applicable_index_parts = runtime.block_on(download_index_parts(
        conf,
        &storage,
        local_timeline_files.keys().copied().collect(),
@@ -915,16 +918,48 @@ fn storage_sync_loop<P, S>(
        });

        match loop_step {
-            ControlFlow::Continue(new_timeline_states) => {
-                if new_timeline_states.is_empty() {
-                    debug!("Sync loop step completed, no new timeline states");
+            ControlFlow::Continue(updated_tenants) => {
+                if updated_tenants.is_empty() {
+                    debug!("Sync loop step completed, no new tenant states");
                } else {
                    info!(
-                        "Sync loop step completed, {} new timeline state update(s)",
-                        new_timeline_states.len()
+                        "Sync loop step completed, {} new tenant state update(s)",
+                        updated_tenants.len()
                    );
+                    let mut sync_status_updates: HashMap<ZTenantId, HashSet<ZTimelineId>> =
+                        HashMap::new();
+                    let index_accessor = runtime.block_on(index.write());
+                    for tenant_id in updated_tenants {
+                        let tenant_entry = match index_accessor.tenant_entry(&tenant_id) {
+                            Some(tenant_entry) => tenant_entry,
+                            None => {
+                                error!(
+                                    "cannot find tenant in remote index for timeline sync update"
+                                );
+                                continue;
+                            }
+                        };
+
+                        if tenant_entry.has_in_progress_downloads() {
+                            info!("Tenant {tenant_id} has pending timeline downloads, skipping repository registration");
+                            continue;
+                        } else {
+                            info!(
+                                "Tenant {tenant_id} download completed. Picking to register in repository"
+                            );
+                            // Here we assume that if tenant has no in-progress downloads that
+                            // means that it is the last completed timeline download that triggered
+                            // sync status update. So we look at the index for available timelines
+                            // and register them all at once in a repository for download
+                            // to be submitted in a single operation to repository
+                            // so it can apply them at once to internal timeline map.
+                            sync_status_updates
+                                .insert(tenant_id, tenant_entry.keys().copied().collect());
+                        }
+                    }
+                    drop(index_accessor);
                    // Batch timeline download registration to ensure that the external registration code won't block any running tasks before.
-                    apply_timeline_sync_status_updates(conf, &index, new_timeline_states);
+                    attach_downloaded_tenants(conf, &index, sync_status_updates);
                }
            }
            ControlFlow::Break(()) => {
@@ -935,6 +970,14 @@ fn storage_sync_loop<P, S>(
    }
 }

+// needed to check whether the download happened
+// more informative than just a bool
+#[derive(Debug)]
+enum DownloadMarker {
+    Downloaded,
+    Nothing,
+}
+
 async fn process_batches<P, S>(
    conf: &'static PageServerConf,
    max_sync_errors: NonZeroU32,
@@ -942,7 +985,7 @@ async fn process_batches<P, S>(
    index: &RemoteIndex,
    batched_tasks: HashMap<ZTenantTimelineId, SyncTaskBatch>,
    sync_queue: &SyncQueue,
-) -> HashMap<ZTenantId, HashMap<ZTimelineId, TimelineSyncStatusUpdate>>
+) -> HashSet<ZTenantId>
 where
    P: Debug + Send + Sync + 'static,
    S: RemoteStorage<RemoteObjectId = P> + Send + Sync + 'static,
@@ -967,22 +1010,19 @@ where
        })
        .collect::<FuturesUnordered<_>>();

-    let mut new_timeline_states: HashMap<
-        ZTenantId,
-        HashMap<ZTimelineId, TimelineSyncStatusUpdate>,
-    > = HashMap::new();
+    let mut downloaded_timelines = HashSet::new();

-    while let Some((sync_id, state_update)) = sync_results.next().await {
-        debug!("Finished storage sync task for sync id {sync_id}");
-        if let Some(state_update) = state_update {
-            new_timeline_states
-                .entry(sync_id.tenant_id)
-                .or_default()
-                .insert(sync_id.timeline_id, state_update);
+    while let Some((sync_id, download_marker)) = sync_results.next().await {
+        debug!(
+            "Finished storage sync task for sync id {sync_id} download marker {:?}",
+            download_marker
+        );
+        if matches!(download_marker, DownloadMarker::Downloaded) {
+            downloaded_timelines.insert(sync_id.tenant_id);
        }
    }

-    new_timeline_states
+    downloaded_timelines
 }

 async fn process_sync_task_batch<P, S>(
@@ -991,7 +1031,7 @@ async fn process_sync_task_batch<P, S>(
    max_sync_errors: NonZeroU32,
    sync_id: ZTenantTimelineId,
    batch: SyncTaskBatch,
-) -> Option<TimelineSyncStatusUpdate>
+) -> DownloadMarker
 where
    P: Debug + Send + Sync + 'static,
    S: RemoteStorage<RemoteObjectId = P> + Send + Sync + 'static,
@@ -1076,7 +1116,7 @@ where
                    }
                }
            }
-            None
+            DownloadMarker::Nothing
        }
        .instrument(info_span!("download_timeline_data")),
    );
@@ -1130,7 +1170,7 @@ async fn download_timeline_data<P, S>(
    new_download_data: SyncData<LayersDownload>,
    sync_start: Instant,
    task_name: &str,
-) -> Option<TimelineSyncStatusUpdate>
+) -> DownloadMarker
 where
    P: Debug + Send + Sync + 'static,
    S: RemoteStorage<RemoteObjectId = P> + Send + Sync + 'static,
@@ -1146,20 +1186,20 @@ where
    .await
    {
        DownloadedTimeline::Abort => {
-            register_sync_status(sync_start, task_name, None);
+            register_sync_status(sync_id, sync_start, task_name, None);
            if let Err(e) = index.write().await.set_awaits_download(&sync_id, false) {
                error!("Timeline {sync_id} was expected to be in the remote index after a download attempt, but it's absent: {e:?}");
            }
        }
        DownloadedTimeline::FailedAndRescheduled => {
-            register_sync_status(sync_start, task_name, Some(false));
+            register_sync_status(sync_id, sync_start, task_name, Some(false));
        }
        DownloadedTimeline::Successful(mut download_data) => {
            match update_local_metadata(conf, sync_id, current_remote_timeline).await {
                Ok(()) => match index.write().await.set_awaits_download(&sync_id, false) {
                    Ok(()) => {
-                        register_sync_status(sync_start, task_name, Some(true));
-                        return Some(TimelineSyncStatusUpdate::Downloaded);
+                        register_sync_status(sync_id, sync_start, task_name, Some(true));
+                        return DownloadMarker::Downloaded;
                    }
                    Err(e) => {
                        error!("Timeline {sync_id} was expected to be in the remote index after a successful download, but it's absent: {e:?}");
@@ -1169,13 +1209,13 @@ where
                    error!("Failed to update local timeline metadata: {e:?}");
                    download_data.retries += 1;
                    sync_queue.push(sync_id, SyncTask::Download(download_data));
-                    register_sync_status(sync_start, task_name, Some(false));
+                    register_sync_status(sync_id, sync_start, task_name, Some(false));
                }
            }
        }
    }

-    None
+    DownloadMarker::Nothing
 }

 async fn update_local_metadata(
@@ -1265,14 +1305,14 @@ async fn delete_timeline_data<P, S>(
            error!("Failed to update remote timeline {sync_id}: {e:?}");
            new_delete_data.retries += 1;
            sync_queue.push(sync_id, SyncTask::Delete(new_delete_data));
-            register_sync_status(sync_start, task_name, Some(false));
+            register_sync_status(sync_id, sync_start, task_name, Some(false));
            return;
        }
    }
    timeline_delete.deletion_registered = true;

    let sync_status = delete_timeline_layers(storage, sync_queue, sync_id, new_delete_data).await;
-    register_sync_status(sync_start, task_name, Some(sync_status));
+    register_sync_status(sync_id, sync_start, task_name, Some(sync_status));
 }

 async fn read_metadata_file(metadata_path: &Path) -> anyhow::Result<TimelineMetadata> {
@@ -1306,7 +1346,7 @@ async fn upload_timeline_data<P, S>(
    .await
    {
        UploadedTimeline::FailedAndRescheduled => {
-            register_sync_status(sync_start, task_name, Some(false));
+            register_sync_status(sync_id, sync_start, task_name, Some(false));
            return;
        }
        UploadedTimeline::Successful(upload_data) => upload_data,
@@ -1325,13 +1365,13 @@ async fn upload_timeline_data<P, S>(
    .await
    {
        Ok(()) => {
-            register_sync_status(sync_start, task_name, Some(true));
+            register_sync_status(sync_id, sync_start, task_name, Some(true));
        }
        Err(e) => {
            error!("Failed to update remote timeline {sync_id}: {e:?}");
            uploaded_data.retries += 1;
            sync_queue.push(sync_id, SyncTask::Upload(uploaded_data));
-            register_sync_status(sync_start, task_name, Some(false));
+            register_sync_status(sync_id, sync_start, task_name, Some(false));
        }
    }
 }
@@ -1421,7 +1461,14 @@ where
        IndexPart::from_remote_timeline(&timeline_path, updated_remote_timeline)
            .context("Failed to create an index part from the updated remote timeline")?;

-    info!("Uploading remote index for the timeline");
+    debug!("Uploading remote index for the timeline");
+    REMOTE_INDEX_UPLOAD
+        .with_label_values(&[
+            &sync_id.tenant_id.to_string(),
+            &sync_id.timeline_id.to_string(),
+        ])
+        .inc();
+
    upload_index_part(conf, storage, sync_id, new_index_part)
        .await
        .context("Failed to upload new index part")
@@ -1448,35 +1495,6 @@ async fn validate_task_retries<T>(
    ControlFlow::Continue(sync_data)
 }

-async fn try_fetch_index_parts<P, S>(
-    conf: &'static PageServerConf,
-    storage: &S,
-    keys: HashSet<ZTenantTimelineId>,
-) -> HashMap<ZTenantTimelineId, IndexPart>
-where
-    P: Debug + Send + Sync + 'static,
-    S: RemoteStorage<RemoteObjectId = P> + Send + Sync + 'static,
-{
-    let mut index_parts = HashMap::with_capacity(keys.len());
-
-    let mut part_downloads = keys
-        .into_iter()
-        .map(|id| async move { (id, download_index_part(conf, storage, id).await) })
-        .collect::<FuturesUnordered<_>>();
-
-    while let Some((id, part_upload_result)) = part_downloads.next().await {
-        match part_upload_result {
-            Ok(index_part) => {
-                debug!("Successfully fetched index part for {id}");
-                index_parts.insert(id, index_part);
-            }
-            Err(e) => warn!("Failed to fetch index part for {id}: {e}"),
-        }
-    }
-
-    index_parts
-}
-
 fn schedule_first_sync_tasks(
    index: &mut RemoteTimelineIndex,
    sync_queue: &SyncQueue,
@@ -1590,12 +1608,24 @@ fn compare_local_and_remote_timeline(
    (initial_timeline_status, awaits_download)
 }

-fn register_sync_status(sync_start: Instant, sync_name: &str, sync_status: Option<bool>) {
+fn register_sync_status(
+    sync_id: ZTenantTimelineId,
+    sync_start: Instant,
+    sync_name: &str,
+    sync_status: Option<bool>,
+) {
    let secs_elapsed = sync_start.elapsed().as_secs_f64();
-    info!("Processed a sync task in {secs_elapsed:.2} seconds");
+    debug!("Processed a sync task in {secs_elapsed:.2} seconds");
+
+    let tenant_id = sync_id.tenant_id.to_string();
+    let timeline_id = sync_id.timeline_id.to_string();
    match sync_status {
-        Some(true) => IMAGE_SYNC_TIME.with_label_values(&[sync_name, "success"]),
-        Some(false) => IMAGE_SYNC_TIME.with_label_values(&[sync_name, "failure"]),
+        Some(true) => {
+            IMAGE_SYNC_TIME.with_label_values(&[&tenant_id, &timeline_id, sync_name, "success"])
+        }
+        Some(false) => {
+            IMAGE_SYNC_TIME.with_label_values(&[&tenant_id, &timeline_id, sync_name, "failure"])
+        }
        None => return,
    }
    .observe(secs_elapsed)
--- a/pageserver/src/storage_sync/download.rs
+++ b/pageserver/src/storage_sync/download.rs
@@ -1,10 +1,14 @@
 //! Timeline synchronization logic to fetch the layer files from remote storage into pageserver's local directory.

-use std::{collections::HashSet, fmt::Debug, path::Path};
+use std::{
+    collections::{HashMap, HashSet},
+    fmt::Debug,
+    path::Path,
+};

 use anyhow::Context;
 use futures::stream::{FuturesUnordered, StreamExt};
-use remote_storage::{path_with_suffix_extension, RemoteStorage};
+use remote_storage::{path_with_suffix_extension, RemoteObjectName, RemoteStorage};
 use tokio::{
    fs,
    io::{self, AsyncWriteExt},
@@ -14,7 +18,7 @@ use tracing::{debug, error, info, warn};
 use crate::{
    config::PageServerConf, layered_repository::metadata::metadata_path, storage_sync::SyncTask,
 };
-use utils::zid::ZTenantTimelineId;
+use utils::zid::{ZTenantId, ZTenantTimelineId, ZTimelineId};

 use super::{
    index::{IndexPart, RemoteTimeline},
@@ -23,8 +27,108 @@ use super::{

 pub const TEMP_DOWNLOAD_EXTENSION: &str = "temp_download";

+/// FIXME: Needs cleanup. Currently it swallows errors. Here we need to ensure that
+/// we successfully downloaded all metadata parts for one tenant.
+/// And successful includes absence of index_part in the remote. Because it is valid situation
+/// when timeline was just created and pageserver restarted before upload of index part was completed.
+/// But currently RemoteStorage interface does not provide this knowledge because it uses
+/// anyhow::Error as an error type. So this needs a refactoring.
+///
+/// In other words we need to yield only complete sets of tenant timelines.
+/// Failure for one timeline of a tenant should exclude whole tenant from returned hashmap.
+/// So there are two requirements: keep everything in one futures unordered
+/// to allow higher concurrency. Mark tenants as failed independently.
+/// That requires some bookeeping.
+pub async fn download_index_parts<P, S>(
+    conf: &'static PageServerConf,
+    storage: &S,
+    keys: HashSet<ZTenantTimelineId>,
+) -> HashMap<ZTenantId, HashMap<ZTimelineId, IndexPart>>
+where
+    P: Debug + Send + Sync + 'static,
+    S: RemoteStorage<RemoteObjectId = P> + Send + Sync + 'static,
+{
+    let mut index_parts: HashMap<ZTenantId, HashMap<ZTimelineId, IndexPart>> = HashMap::new();
+
+    let mut part_downloads = keys
+        .into_iter()
+        .map(|id| async move { (id, download_index_part(conf, storage, id).await) })
+        .collect::<FuturesUnordered<_>>();
+
+    while let Some((id, part_upload_result)) = part_downloads.next().await {
+        match part_upload_result {
+            Ok(index_part) => {
+                debug!("Successfully fetched index part for {id}");
+                index_parts
+                    .entry(id.tenant_id)
+                    .or_default()
+                    .insert(id.timeline_id, index_part);
+            }
+            Err(e) => error!("Failed to fetch index part for {id}: {e}"),
+        }
+    }
+
+    index_parts
+}
+
+/// Note: The function is rather expensive from s3 access point of view, it will execute ceil(N/1000) + N requests.
+/// At least one request to obtain a list of tenant timelines (more requests is there are more than 1000 timelines).
+/// And then will attempt to download all index files that belong to these timelines.
+pub async fn gather_tenant_timelines_index_parts<P, S>(
+    conf: &'static PageServerConf,
+    storage: &S,
+    tenant_id: ZTenantId,
+) -> anyhow::Result<HashMap<ZTimelineId, IndexPart>>
+where
+    P: RemoteObjectName + Debug + Send + Sync + 'static,
+    S: RemoteStorage<RemoteObjectId = P> + Send + Sync + 'static,
+{
+    let tenant_path = conf.timelines_path(&tenant_id);
+    let tenant_storage_path = storage.remote_object_id(&tenant_path).with_context(|| {
+        format!(
+            "Failed to get tenant storage path for local path '{}'",
+            tenant_path.display()
+        )
+    })?;
+    let timelines = storage
+        .list_prefixes(Some(tenant_storage_path))
+        .await
+        .with_context(|| {
+            format!(
+                "Failed to list tenant storage path to get remote timelines to download: {}",
+                tenant_id
+            )
+        })?;
+
+    let mut sync_ids = HashSet::new();
+
+    for timeline_remote_storage_key in timelines {
+        let object_name = timeline_remote_storage_key.object_name().ok_or_else(|| {
+            anyhow::anyhow!("failed to get timeline id for remote tenant {tenant_id}")
+        })?;
+
+        let timeline_id: ZTimelineId = object_name
+            .parse()
+            .with_context(|| {
+                format!("failed to parse object name into timeline id for tenant {tenant_id} '{object_name}'")
+            })?;
+
+        sync_ids.insert(ZTenantTimelineId {
+            tenant_id,
+            timeline_id,
+        });
+    }
+
+    download_index_parts(conf, storage, sync_ids)
+        .await
+        .remove(&tenant_id)
+        .ok_or(anyhow::anyhow!(
+            "Missing tenant index parts. This is a bug."
+        ))
+}
+
 /// Retrieves index data from the remote storage for a given timeline.
-pub async fn download_index_part<P, S>(
+async fn download_index_part<P, S>(
    conf: &'static PageServerConf,
    storage: &S,
    sync_id: ZTenantTimelineId,
@@ -44,13 +148,23 @@ where
                index_part_path.display()
            )
        })?;
+
+    let mut index_part_download =
+        storage
+            .download(&part_storage_path)
+            .await
+            .with_context(|| {
+                format!("Failed to open download stream for for storage path {part_storage_path:?}")
+            })?;
    let mut index_part_bytes = Vec::new();
-    storage
-        .download(&part_storage_path, &mut index_part_bytes)
-        .await
-        .with_context(|| {
-            format!("Failed to download an index part from storage path {part_storage_path:?}")
-        })?;
+    io::copy(
+        &mut index_part_download.download_stream,
+        &mut index_part_bytes,
+    )
+    .await
+    .with_context(|| {
+        format!("Failed to download an index part from storage path {part_storage_path:?}")
+    })?;

    let index_part: IndexPart = serde_json::from_slice(&index_part_bytes).with_context(|| {
        format!("Failed to deserialize index part file from storage path '{part_storage_path:?}'")
@@ -162,15 +276,19 @@ where
                            temp_file_path.display()
                        )
                    })?;
-
-                storage
-                    .download(&layer_storage_path, &mut destination_file)
+                let mut download = storage
+                    .download(&layer_storage_path)
                    .await
                    .with_context(|| {
                        format!(
-                            "Failed to download a layer from storage path '{layer_storage_path:?}'"
+                            "Failed to open a download stream for layer with remote storage path '{layer_storage_path:?}'"
                        )
                    })?;
+                io::copy(&mut download.download_stream, &mut destination_file).await.with_context(|| {
+                    format!(
+                        "Failed to download layer with remote storage path '{layer_storage_path:?}' into file '{}'", temp_file_path.display()
+                    )
+                })?;

                // Tokio doc here: https://docs.rs/tokio/1.17.0/tokio/fs/struct.File.html states that:
                // A file will not be closed immediately when it goes out of scope if there are any IO operations
--- a/pageserver/src/storage_sync/index.rs
+++ b/pageserver/src/storage_sync/index.rs
@@ -2,6 +2,7 @@
 //! Able to restore itself from the storage index parts, that are located in every timeline's remote directory and contain all data about
 //! remote timeline layers and its metadata.

+use std::ops::{Deref, DerefMut};
 use std::{
    collections::{HashMap, HashSet},
    path::{Path, PathBuf},
@@ -14,7 +15,10 @@ use serde_with::{serde_as, DisplayFromStr};
 use tokio::sync::RwLock;

 use crate::{config::PageServerConf, layered_repository::metadata::TimelineMetadata};
-use utils::{lsn::Lsn, zid::ZTenantTimelineId};
+use utils::{
+    lsn::Lsn,
+    zid::{ZTenantId, ZTenantTimelineId, ZTimelineId},
+};

 /// A part of the filesystem path, that needs a root to become a path again.
 #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
@@ -41,38 +45,68 @@ impl RelativePath {
    }
 }

+#[derive(Debug, Clone, Default)]
+pub struct TenantEntry(HashMap<ZTimelineId, RemoteTimeline>);
+
+impl TenantEntry {
+    pub fn has_in_progress_downloads(&self) -> bool {
+        self.values()
+            .any(|remote_timeline| remote_timeline.awaits_download)
+    }
+}
+
+impl Deref for TenantEntry {
+    type Target = HashMap<ZTimelineId, RemoteTimeline>;
+
+    fn deref(&self) -> &Self::Target {
+        &self.0
+    }
+}
+
+impl DerefMut for TenantEntry {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        &mut self.0
+    }
+}
+
+impl From<HashMap<ZTimelineId, RemoteTimeline>> for TenantEntry {
+    fn from(inner: HashMap<ZTimelineId, RemoteTimeline>) -> Self {
+        Self(inner)
+    }
+}
+
 /// An index to track tenant files that exist on the remote storage.
-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, Default)]
 pub struct RemoteTimelineIndex {
-    timeline_entries: HashMap<ZTenantTimelineId, RemoteTimeline>,
+    entries: HashMap<ZTenantId, TenantEntry>,
 }

 /// A wrapper to synchronize the access to the index, should be created and used before dealing with any [`RemoteTimelineIndex`].
+#[derive(Default)]
 pub struct RemoteIndex(Arc<RwLock<RemoteTimelineIndex>>);

 impl RemoteIndex {
-    pub fn empty() -> Self {
-        Self(Arc::new(RwLock::new(RemoteTimelineIndex {
-            timeline_entries: HashMap::new(),
-        })))
-    }
-
    pub fn from_parts(
        conf: &'static PageServerConf,
-        index_parts: HashMap<ZTenantTimelineId, IndexPart>,
+        index_parts: HashMap<ZTenantId, HashMap<ZTimelineId, IndexPart>>,
    ) -> anyhow::Result<Self> {
-        let mut timeline_entries = HashMap::new();
+        let mut entries: HashMap<ZTenantId, TenantEntry> = HashMap::new();

-        for (sync_id, index_part) in index_parts {
-            let timeline_path = conf.timeline_path(&sync_id.timeline_id, &sync_id.tenant_id);
-            let remote_timeline = RemoteTimeline::from_index_part(&timeline_path, index_part)
-                .context("Failed to restore remote timeline data from index part")?;
-            timeline_entries.insert(sync_id, remote_timeline);
+        for (tenant_id, timelines) in index_parts {
+            for (timeline_id, index_part) in timelines {
+                let timeline_path = conf.timeline_path(&timeline_id, &tenant_id);
+                let remote_timeline =
+                    RemoteTimeline::from_index_part(&timeline_path, index_part)
+                        .context("Failed to restore remote timeline data from index part")?;
+
+                entries
+                    .entry(tenant_id)
+                    .or_default()
+                    .insert(timeline_id, remote_timeline);
+            }
        }

-        Ok(Self(Arc::new(RwLock::new(RemoteTimelineIndex {
-            timeline_entries,
-        }))))
+        Ok(Self(Arc::new(RwLock::new(RemoteTimelineIndex { entries }))))
    }

    pub async fn read(&self) -> tokio::sync::RwLockReadGuard<'_, RemoteTimelineIndex> {
@@ -91,20 +125,67 @@ impl Clone for RemoteIndex {
 }

 impl RemoteTimelineIndex {
-    pub fn timeline_entry(&self, id: &ZTenantTimelineId) -> Option<&RemoteTimeline> {
-        self.timeline_entries.get(id)
+    pub fn timeline_entry(
+        &self,
+        ZTenantTimelineId {
+            tenant_id,
+            timeline_id,
+        }: &ZTenantTimelineId,
+    ) -> Option<&RemoteTimeline> {
+        self.entries.get(tenant_id)?.get(timeline_id)
    }

-    pub fn timeline_entry_mut(&mut self, id: &ZTenantTimelineId) -> Option<&mut RemoteTimeline> {
-        self.timeline_entries.get_mut(id)
+    pub fn timeline_entry_mut(
+        &mut self,
+        ZTenantTimelineId {
+            tenant_id,
+            timeline_id,
+        }: &ZTenantTimelineId,
+    ) -> Option<&mut RemoteTimeline> {
+        self.entries.get_mut(tenant_id)?.get_mut(timeline_id)
    }

-    pub fn add_timeline_entry(&mut self, id: ZTenantTimelineId, entry: RemoteTimeline) {
-        self.timeline_entries.insert(id, entry);
+    pub fn add_timeline_entry(
+        &mut self,
+        ZTenantTimelineId {
+            tenant_id,
+            timeline_id,
+        }: ZTenantTimelineId,
+        entry: RemoteTimeline,
+    ) {
+        self.entries
+            .entry(tenant_id)
+            .or_default()
+            .insert(timeline_id, entry);
    }

-    pub fn all_sync_ids(&self) -> impl Iterator<Item = ZTenantTimelineId> + '_ {
-        self.timeline_entries.keys().copied()
+    pub fn remove_timeline_entry(
+        &mut self,
+        ZTenantTimelineId {
+            tenant_id,
+            timeline_id,
+        }: ZTenantTimelineId,
+    ) -> Option<RemoteTimeline> {
+        self.entries
+            .entry(tenant_id)
+            .or_default()
+            .remove(&timeline_id)
+    }
+
+    pub fn tenant_entry(&self, tenant_id: &ZTenantId) -> Option<&TenantEntry> {
+        self.entries.get(tenant_id)
+    }
+
+    pub fn tenant_entry_mut(&mut self, tenant_id: &ZTenantId) -> Option<&mut TenantEntry> {
+        self.entries.get_mut(tenant_id)
+    }
+
+    pub fn add_tenant_entry(&mut self, tenant_id: ZTenantId) -> &mut TenantEntry {
+        self.entries.entry(tenant_id).or_default()
+    }
+
+    pub fn remove_tenant_entry(&mut self, tenant_id: &ZTenantId) -> Option<TenantEntry> {
+        self.entries.remove(tenant_id)
    }

    pub fn set_awaits_download(
--- a/pageserver/src/storage_sync/upload.rs
+++ b/pageserver/src/storage_sync/upload.rs
@@ -4,6 +4,7 @@ use std::{fmt::Debug, path::PathBuf};

 use anyhow::Context;
 use futures::stream::{FuturesUnordered, StreamExt};
+use lazy_static::lazy_static;
 use remote_storage::RemoteStorage;
 use tokio::fs;
 use tracing::{debug, error, info, warn};
@@ -17,6 +18,16 @@ use super::{
 use crate::{
    config::PageServerConf, layered_repository::metadata::metadata_path, storage_sync::SyncTask,
 };
+use metrics::{register_int_counter_vec, IntCounterVec};
+
+lazy_static! {
+    static ref NO_LAYERS_UPLOAD: IntCounterVec = register_int_counter_vec!(
+        "pageserver_remote_storage_no_layers_uploads_total",
+        "Number of skipped uploads due to no layers",
+        &["tenant_id", "timeline_id"],
+    )
+    .expect("failed to register pageserver no layers upload vec");
+}

 /// Serializes and uploads the given index part data to the remote storage.
 pub(super) async fn upload_index_part<P, S>(
@@ -102,7 +113,13 @@ where
        .collect::<Vec<_>>();

    if layers_to_upload.is_empty() {
-        info!("No layers to upload after filtering, aborting");
+        debug!("No layers to upload after filtering, aborting");
+        NO_LAYERS_UPLOAD
+            .with_label_values(&[
+                &sync_id.tenant_id.to_string(),
+                &sync_id.timeline_id.to_string(),
+            ])
+            .inc();
        return UploadedTimeline::Successful(upload_data);
    }

--- a/pageserver/src/tenant_config.rs
+++ b/pageserver/src/tenant_config.rs
@@ -37,7 +37,7 @@ pub mod defaults {
    pub const DEFAULT_PITR_INTERVAL: &str = "30 days";
    pub const DEFAULT_WALRECEIVER_CONNECT_TIMEOUT: &str = "2 seconds";
    pub const DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT: &str = "10 seconds";
-    pub const DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG: u64 = 10_000;
+    pub const DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG: u64 = 10 * 1024 * 1024;
 }

 /// Per-tenant configuration options
--- a/pageserver/src/tenant_mgr.rs
+++ b/pageserver/src/tenant_mgr.rs
@@ -4,8 +4,8 @@
 use crate::config::PageServerConf;
 use crate::layered_repository::{load_metadata, LayeredRepository};
 use crate::pgdatadir_mapping::DatadirTimeline;
-use crate::repository::{Repository, TimelineSyncStatusUpdate};
-use crate::storage_sync::index::RemoteIndex;
+use crate::repository::Repository;
+use crate::storage_sync::index::{RemoteIndex, RemoteTimelineIndex};
 use crate::storage_sync::{self, LocalTimelineInitStatus, SyncStartupData};
 use crate::tenant_config::TenantConfOpt;
 use crate::thread_mgr::ThreadKind;
@@ -13,11 +13,11 @@ use crate::timelines::CreateRepo;
 use crate::walredo::PostgresRedoManager;
 use crate::{thread_mgr, timelines, walreceiver};
 use crate::{DatadirTimelineImpl, RepositoryImpl};
-use anyhow::{bail, Context};
+use anyhow::Context;
 use serde::{Deserialize, Serialize};
 use serde_with::{serde_as, DisplayFromStr};
 use std::collections::hash_map::Entry;
-use std::collections::HashMap;
+use std::collections::{HashMap, HashSet};
 use std::fmt;
 use std::sync::Arc;
 use tokio::sync::mpsc;
@@ -157,7 +157,13 @@ pub fn init_tenant_mgr(conf: &'static PageServerConf) -> anyhow::Result<RemoteIn
            // loading a tenant is serious, but it's better to complete the startup and
            // serve other tenants, than fail completely.
            error!("Failed to initialize local tenant {tenant_id}: {:?}", err);
-            set_tenant_state(tenant_id, TenantState::Broken)?;
+
+            if let Err(err) = set_tenant_state(tenant_id, TenantState::Broken) {
+                error!(
+                    "Failed to set tenant state to broken {tenant_id}: {:?}",
+                    err
+                );
+            }
        }
    }

@@ -165,44 +171,51 @@ pub fn init_tenant_mgr(conf: &'static PageServerConf) -> anyhow::Result<RemoteIn
 }

 pub enum LocalTimelineUpdate {
-    Detach(ZTenantTimelineId),
-    Attach(ZTenantTimelineId, Arc<DatadirTimelineImpl>),
+    Detach {
+        id: ZTenantTimelineId,
+        // used to signal to the detach caller that walreceiver successfully terminated for specified id
+        join_confirmation_sender: std::sync::mpsc::Sender<()>,
+    },
+    Attach {
+        id: ZTenantTimelineId,
+        datadir: Arc<DatadirTimelineImpl>,
+    },
 }

 impl std::fmt::Debug for LocalTimelineUpdate {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
-            Self::Detach(ttid) => f.debug_tuple("Remove").field(ttid).finish(),
-            Self::Attach(ttid, _) => f.debug_tuple("Add").field(ttid).finish(),
+            Self::Detach { id, .. } => f.debug_tuple("Remove").field(id).finish(),
+            Self::Attach { id, .. } => f.debug_tuple("Add").field(id).finish(),
        }
    }
 }

 /// Updates tenants' repositories, changing their timelines state in memory.
-pub fn apply_timeline_sync_status_updates(
+pub fn attach_downloaded_tenants(
    conf: &'static PageServerConf,
    remote_index: &RemoteIndex,
-    sync_status_updates: HashMap<ZTenantId, HashMap<ZTimelineId, TimelineSyncStatusUpdate>>,
+    sync_status_updates: HashMap<ZTenantId, HashSet<ZTimelineId>>,
 ) {
    if sync_status_updates.is_empty() {
-        debug!("no sync status updates to apply");
+        debug!("No sync status updates to apply");
        return;
    }
-    info!(
-        "Applying sync status updates for {} timelines",
-        sync_status_updates.len()
-    );
-    debug!("Sync status updates: {sync_status_updates:?}");
+    for (tenant_id, downloaded_timelines) in sync_status_updates {
+        info!(
+            "Registering downlloaded timelines for {tenant_id} {} timelines",
+            downloaded_timelines.len()
+        );
+        debug!("Downloaded timelines: {downloaded_timelines:?}");

-    for (tenant_id, status_updates) in sync_status_updates {
        let repo = match load_local_repo(conf, tenant_id, remote_index) {
            Ok(repo) => repo,
            Err(e) => {
-                error!("Failed to load repo for tenant {tenant_id} Error: {e:?}",);
+                error!("Failed to load repo for tenant {tenant_id} Error: {e:?}");
                continue;
            }
        };
-        match apply_timeline_remote_sync_status_updates(&repo, status_updates) {
+        match attach_downloaded_tenant(&repo, downloaded_timelines) {
            Ok(()) => info!("successfully applied sync status updates for tenant {tenant_id}"),
            Err(e) => error!(
                "Failed to apply timeline sync timeline status updates for tenant {tenant_id}: {e:?}"
@@ -230,8 +243,6 @@ pub fn shutdown_all_tenants() {
    drop(m);

    thread_mgr::shutdown_threads(Some(ThreadKind::WalReceiverManager), None, None);
-    thread_mgr::shutdown_threads(Some(ThreadKind::GarbageCollector), None, None);
-    thread_mgr::shutdown_threads(Some(ThreadKind::Compactor), None, None);

    // Ok, no background threads running anymore. Flush any remaining data in
    // memory to disk.
@@ -330,44 +341,12 @@ pub fn set_tenant_state(tenant_id: ZTenantId, new_state: TenantState) -> anyhow:
        }
        (TenantState::Idle, TenantState::Active) => {
            info!("activating tenant {tenant_id}");
-            let compactor_spawn_result = thread_mgr::spawn(
-                ThreadKind::Compactor,
-                Some(tenant_id),
-                None,
-                "Compactor thread",
-                false,
-                move || crate::tenant_threads::compact_loop(tenant_id),
-            );
-            if compactor_spawn_result.is_err() {
-                let mut m = tenants_state::write_tenants();
-                m.get_mut(&tenant_id)
-                    .with_context(|| format!("Tenant not found for id {tenant_id}"))?
-                    .state = old_state;
-                drop(m);
-            }
-            compactor_spawn_result?;

-            let gc_spawn_result = thread_mgr::spawn(
-                ThreadKind::GarbageCollector,
-                Some(tenant_id),
-                None,
-                "GC thread",
-                false,
-                move || crate::tenant_threads::gc_loop(tenant_id),
-            )
-            .map(|_thread_id| ()) // update the `Result::Ok` type to match the outer function's return signature
-            .with_context(|| format!("Failed to launch GC thread for tenant {tenant_id}"));
-
-            if let Err(e) = &gc_spawn_result {
-                let mut m = tenants_state::write_tenants();
-                m.get_mut(&tenant_id)
-                    .with_context(|| format!("Tenant not found for id {tenant_id}"))?
-                    .state = old_state;
-                drop(m);
-                error!("Failed to start GC thread for tenant {tenant_id}, stopping its checkpointer thread: {e:?}");
-                thread_mgr::shutdown_threads(Some(ThreadKind::Compactor), Some(tenant_id), None);
-                return gc_spawn_result;
-            }
+            // Spawn gc and compaction loops. The loops will shut themselves
+            // down when they notice that the tenant is inactive.
+            // TODO maybe use tokio::sync::watch instead?
+            crate::tenant_tasks::start_compaction_loop(tenant_id)?;
+            crate::tenant_tasks::start_gc_loop(tenant_id)?;
        }
        (TenantState::Idle, TenantState::Stopping) => {
            info!("stopping idle tenant {tenant_id}");
@@ -379,8 +358,10 @@ pub fn set_tenant_state(tenant_id: ZTenantId, new_state: TenantState) -> anyhow:
                Some(tenant_id),
                None,
            );
-            thread_mgr::shutdown_threads(Some(ThreadKind::GarbageCollector), Some(tenant_id), None);
-            thread_mgr::shutdown_threads(Some(ThreadKind::Compactor), Some(tenant_id), None);
+
+            // Wait until all gc/compaction tasks finish
+            let repo = get_repository_for_tenant(tenant_id)?;
+            let _guard = repo.file_lock.write().unwrap();
        }
    }

@@ -419,33 +400,86 @@ pub fn get_local_timeline_with_load(
    }
 }

-pub fn detach_timeline(
-    conf: &'static PageServerConf,
-    tenant_id: ZTenantId,
-    timeline_id: ZTimelineId,
-) -> anyhow::Result<()> {
-    // shutdown the timeline threads (this shuts down the walreceiver)
-    thread_mgr::shutdown_threads(None, Some(tenant_id), Some(timeline_id));
+pub fn delete_timeline(tenant_id: ZTenantId, timeline_id: ZTimelineId) -> anyhow::Result<()> {
+    // Start with the shutdown of timeline tasks (this shuts down the walreceiver)
+    // It is important that we do not take locks here, and do not check whether the timeline exists
+    // because if we hold tenants_state::write_tenants() while awaiting for the threads to join
+    // we cannot create new timelines and tenants, and that can take quite some time,
+    // it can even become stuck due to a bug making whole pageserver unavailable for some operations
+    // so this is the way how we deal with concurrent delete requests: shutdown everythig, wait for confirmation
+    // and then try to actually remove timeline from inmemory state and this is the point when concurrent requests
+    // will synchronize and either fail with the not found error or succeed

+    let (sender, receiver) = std::sync::mpsc::channel::<()>();
+    tenants_state::try_send_timeline_update(LocalTimelineUpdate::Detach {
+        id: ZTenantTimelineId::new(tenant_id, timeline_id),
+        join_confirmation_sender: sender,
+    });
+
+    debug!("waiting for wal receiver to shutdown");
+    let _ = receiver.recv();
+    debug!("wal receiver shutdown confirmed");
+    debug!("waiting for threads to shutdown");
+    thread_mgr::shutdown_threads(None, None, Some(timeline_id));
+    debug!("thread shutdown completed");
    match tenants_state::write_tenants().get_mut(&tenant_id) {
        Some(tenant) => {
-            tenant
-                .repo
-                .detach_timeline(timeline_id)
-                .context("Failed to detach inmem tenant timeline")?;
+            tenant.repo.delete_timeline(timeline_id)?;
            tenant.local_timelines.remove(&timeline_id);
-            tenants_state::try_send_timeline_update(LocalTimelineUpdate::Detach(
-                ZTenantTimelineId::new(tenant_id, timeline_id),
-            ));
        }
-        None => bail!("Tenant {tenant_id} not found in local tenant state"),
+        None => anyhow::bail!("Tenant {tenant_id} not found in local tenant state"),
    }

-    let local_timeline_directory = conf.timeline_path(&timeline_id, &tenant_id);
-    std::fs::remove_dir_all(&local_timeline_directory).with_context(|| {
+    Ok(())
+}
+
+pub fn detach_tenant(conf: &'static PageServerConf, tenant_id: ZTenantId) -> anyhow::Result<()> {
+    set_tenant_state(tenant_id, TenantState::Stopping)?;
+    // shutdown the tenant and timeline threads: gc, compaction, page service threads)
+    thread_mgr::shutdown_threads(None, Some(tenant_id), None);
+
+    // FIXME should we protect somehow from starting new threads/walreceivers when tenant is in stopping state?
+    // send stop signal to wal receiver and collect join handles while holding the lock
+    let walreceiver_join_handles = {
+        let tenants = tenants_state::write_tenants();
+        let tenant = tenants.get(&tenant_id).context("tenant not found")?;
+        let mut walreceiver_join_handles = Vec::with_capacity(tenant.local_timelines.len());
+        for timeline_id in tenant.local_timelines.keys() {
+            let (sender, receiver) = std::sync::mpsc::channel::<()>();
+            tenants_state::try_send_timeline_update(LocalTimelineUpdate::Detach {
+                id: ZTenantTimelineId::new(tenant_id, *timeline_id),
+                join_confirmation_sender: sender,
+            });
+            walreceiver_join_handles.push((*timeline_id, receiver));
+        }
+        // drop the tenants lock
+        walreceiver_join_handles
+    };
+
+    // wait for wal receivers to stop without holding the lock, because walreceiver
+    // will attempt to change tenant state which is protected by the same global tenants lock.
+    // TODO do we need a timeout here? how to handle it?
+    // recv_timeout is broken: https://github.com/rust-lang/rust/issues/94518#issuecomment-1057440631
+    // need to use crossbeam-channel
+    for (timeline_id, join_handle) in walreceiver_join_handles {
+        info!("waiting for wal receiver to shutdown timeline_id {timeline_id}");
+        join_handle.recv().context("failed to join walreceiver")?;
+        info!("wal receiver shutdown confirmed timeline_id {timeline_id}");
+    }
+
+    tenants_state::write_tenants().remove(&tenant_id);
+
+    // If removal fails there will be no way to successfully retry detach,
+    // because tenant no longer exists in in memory map. And it needs to be removed from it
+    // before we remove files because it contains references to repository
+    // which references ephemeral files which are deleted on drop. So if we keep these references
+    // code will attempt to remove files which no longer exist. This can be fixed by having shutdown
+    // mechanism for repository that will clean temporary data to avoid any references to ephemeral files
+    let local_tenant_directory = conf.tenant_path(&tenant_id);
+    std::fs::remove_dir_all(&local_tenant_directory).with_context(|| {
        format!(
            "Failed to remove local timeline directory '{}'",
-            local_timeline_directory.display()
+            local_tenant_directory.display()
        )
    })?;

@@ -466,10 +500,10 @@ fn load_local_timeline(
    ));
    page_tline.init_logical_size()?;

-    tenants_state::try_send_timeline_update(LocalTimelineUpdate::Attach(
-        ZTenantTimelineId::new(repo.tenant_id(), timeline_id),
-        Arc::clone(&page_tline),
-    ));
+    tenants_state::try_send_timeline_update(LocalTimelineUpdate::Attach {
+        id: ZTenantTimelineId::new(repo.tenant_id(), timeline_id),
+        datadir: Arc::clone(&page_tline),
+    });

    Ok(page_tline)
 }
@@ -479,15 +513,27 @@ fn load_local_timeline(
 pub struct TenantInfo {
    #[serde_as(as = "DisplayFromStr")]
    pub id: ZTenantId,
-    pub state: TenantState,
+    pub state: Option<TenantState>,
+    pub has_in_progress_downloads: Option<bool>,
 }

-pub fn list_tenants() -> Vec<TenantInfo> {
+pub fn list_tenants(remote_index: &RemoteTimelineIndex) -> Vec<TenantInfo> {
    tenants_state::read_tenants()
        .iter()
-        .map(|(id, tenant)| TenantInfo {
-            id: *id,
-            state: tenant.state,
+        .map(|(id, tenant)| {
+            let has_in_progress_downloads = remote_index
+                .tenant_entry(id)
+                .map(|entry| entry.has_in_progress_downloads());
+
+            if has_in_progress_downloads.is_none() {
+                error!("timeline is not found in remote index while it is present in the tenants registry")
+            }
+
+            TenantInfo {
+                id: *id,
+                state: Some(tenant.state),
+                has_in_progress_downloads,
+            }
        })
        .collect()
 }
@@ -499,74 +545,73 @@ pub fn list_tenants() -> Vec<TenantInfo> {
 /// A timeline is categorized as broken when any of following conditions is true:
 /// - failed to load the timeline's metadata
 /// - the timeline's disk consistent LSN is zero
-fn check_broken_timeline(repo: &LayeredRepository, timeline_id: ZTimelineId) -> anyhow::Result<()> {
-    let metadata = load_metadata(repo.conf, timeline_id, repo.tenant_id())
-        .context("failed to load metadata")?;
+fn check_broken_timeline(
+    conf: &'static PageServerConf,
+    tenant_id: ZTenantId,
+    timeline_id: ZTimelineId,
+) -> anyhow::Result<()> {
+    let metadata =
+        load_metadata(conf, timeline_id, tenant_id).context("failed to load metadata")?;

    // A timeline with zero disk consistent LSN can happen when the page server
    // failed to checkpoint the timeline import data when creating that timeline.
    if metadata.disk_consistent_lsn() == Lsn::INVALID {
-        bail!("Timeline {timeline_id} has a zero disk consistent LSN.");
+        anyhow::bail!("Timeline {timeline_id} has a zero disk consistent LSN.");
    }

    Ok(())
 }

+/// Note: all timelines are attached at once if and only if all of them are locally complete
 fn init_local_repository(
    conf: &'static PageServerConf,
    tenant_id: ZTenantId,
    local_timeline_init_statuses: HashMap<ZTimelineId, LocalTimelineInitStatus>,
    remote_index: &RemoteIndex,
 ) -> anyhow::Result<(), anyhow::Error> {
-    // initialize local tenant
-    let repo = load_local_repo(conf, tenant_id, remote_index)
-        .with_context(|| format!("Failed to load repo for tenant {tenant_id}"))?;
-
-    let mut status_updates = HashMap::with_capacity(local_timeline_init_statuses.len());
+    let mut timelines_to_attach = HashSet::new();
    for (timeline_id, init_status) in local_timeline_init_statuses {
        match init_status {
            LocalTimelineInitStatus::LocallyComplete => {
                debug!("timeline {timeline_id} for tenant {tenant_id} is locally complete, registering it in repository");
-                if let Err(err) = check_broken_timeline(&repo, timeline_id) {
-                    info!(
-                        "Found a broken timeline {timeline_id} (err={err:?}), skip registering it in repository"
-                    );
-                } else {
-                    status_updates.insert(timeline_id, TimelineSyncStatusUpdate::Downloaded);
-                }
+                check_broken_timeline(conf, tenant_id, timeline_id)
+                    .context("found broken timeline")?;
+                timelines_to_attach.insert(timeline_id);
            }
            LocalTimelineInitStatus::NeedsSync => {
                debug!(
                    "timeline {tenant_id} for tenant {timeline_id} needs sync, \
                     so skipped for adding into repository until sync is finished"
                );
+                return Ok(());
            }
        }
    }

+    // initialize local tenant
+    let repo = load_local_repo(conf, tenant_id, remote_index)
+        .with_context(|| format!("Failed to load repo for tenant {tenant_id}"))?;
+
    // Lets fail here loudly to be on the safe side.
    // XXX: It may be a better api to actually distinguish between repository startup
    //   and processing of newly downloaded timelines.
-    apply_timeline_remote_sync_status_updates(&repo, status_updates)
+    attach_downloaded_tenant(&repo, timelines_to_attach)
        .with_context(|| format!("Failed to bootstrap timelines for tenant {tenant_id}"))?;
    Ok(())
 }

-fn apply_timeline_remote_sync_status_updates(
+fn attach_downloaded_tenant(
    repo: &LayeredRepository,
-    status_updates: HashMap<ZTimelineId, TimelineSyncStatusUpdate>,
+    downloaded_timelines: HashSet<ZTimelineId>,
 ) -> anyhow::Result<()> {
-    let mut registration_queue = Vec::with_capacity(status_updates.len());
+    let mut registration_queue = Vec::with_capacity(downloaded_timelines.len());

    // first need to register the in-mem representations, to avoid missing ancestors during the local disk data registration
-    for (timeline_id, status_update) in status_updates {
-        repo.apply_timeline_remote_sync_status_update(timeline_id, status_update)
-            .with_context(|| {
-                format!("Failed to load timeline {timeline_id} into in-memory repository")
-            })?;
-        match status_update {
-            TimelineSyncStatusUpdate::Downloaded => registration_queue.push(timeline_id),
-        }
+    for timeline_id in downloaded_timelines {
+        repo.attach_timeline(timeline_id).with_context(|| {
+            format!("Failed to load timeline {timeline_id} into in-memory repository")
+        })?;
+        registration_queue.push(timeline_id);
    }

    for timeline_id in registration_queue {
@@ -574,7 +619,7 @@ fn apply_timeline_remote_sync_status_updates(
        match tenants_state::write_tenants().get_mut(&tenant_id) {
            Some(tenant) => match tenant.local_timelines.entry(timeline_id) {
                Entry::Occupied(_) => {
-                    bail!("Local timeline {timeline_id} already registered")
+                    anyhow::bail!("Local timeline {timeline_id} already registered")
                }
                Entry::Vacant(v) => {
                    v.insert(load_local_timeline(repo, timeline_id).with_context(|| {
@@ -582,7 +627,7 @@ fn apply_timeline_remote_sync_status_updates(
                    })?);
                }
            },
-            None => bail!(
+            None => anyhow::bail!(
                "Tenant {} not found in local tenant state",
                repo.tenant_id()
            ),
--- a/pageserver/src/tenant_tasks.rs
+++ b/pageserver/src/tenant_tasks.rs
@@ -0,0 +1,286 @@
+//! This module contains functions to serve per-tenant background processes,
+//! such as compaction and GC
+
+use std::collections::HashMap;
+use std::ops::ControlFlow;
+use std::time::Duration;
+
+use crate::repository::Repository;
+use crate::tenant_mgr::TenantState;
+use crate::thread_mgr::ThreadKind;
+use crate::{tenant_mgr, thread_mgr};
+use anyhow::{self, Context};
+use futures::stream::FuturesUnordered;
+use futures::StreamExt;
+use metrics::{register_int_counter_vec, IntCounterVec};
+use once_cell::sync::{Lazy, OnceCell};
+use tokio::sync::mpsc;
+use tokio::sync::watch;
+use tracing::*;
+use utils::zid::ZTenantId;
+
+static TENANT_TASK_EVENTS: Lazy<IntCounterVec> = Lazy::new(|| {
+    register_int_counter_vec!(
+        "pageserver_tenant_task_events",
+        "Number of task start/stop/fail events.",
+        &["event"],
+    )
+    .expect("Failed to register tenant_task_events metric")
+});
+
+///
+/// Compaction task's main loop
+///
+async fn compaction_loop(tenantid: ZTenantId, mut cancel: watch::Receiver<()>) {
+    loop {
+        trace!("waking up");
+
+        // Run blocking part of the task
+        let period: Result<Result<_, anyhow::Error>, _> = tokio::task::spawn_blocking(move || {
+            // Break if tenant is not active
+            if tenant_mgr::get_tenant_state(tenantid) != Some(TenantState::Active) {
+                return Ok(ControlFlow::Break(()));
+            }
+
+            // Break if we're not allowed to write to disk
+            let repo = tenant_mgr::get_repository_for_tenant(tenantid)?;
+            // TODO do this inside repo.compaction_iteration instead.
+            let _guard = match repo.file_lock.try_read() {
+                Ok(g) => g,
+                Err(_) => return Ok(ControlFlow::Break(())),
+            };
+
+            // Run compaction
+            let compaction_period = repo.get_compaction_period();
+            repo.compaction_iteration()?;
+            Ok(ControlFlow::Continue(compaction_period))
+        })
+        .await;
+
+        // Decide whether to sleep or break
+        let sleep_duration = match period {
+            Ok(Ok(ControlFlow::Continue(period))) => period,
+            Ok(Ok(ControlFlow::Break(()))) => break,
+            Ok(Err(e)) => {
+                error!("Compaction failed, retrying: {}", e);
+                Duration::from_secs(2)
+            }
+            Err(e) => {
+                error!("Compaction join error, retrying: {}", e);
+                Duration::from_secs(2)
+            }
+        };
+
+        // Sleep
+        tokio::select! {
+            _ = cancel.changed() => {
+                trace!("received cancellation request");
+                break;
+            },
+            _ = tokio::time::sleep(sleep_duration) => {},
+        }
+    }
+
+    trace!(
+        "compaction loop stopped. State is {:?}",
+        tenant_mgr::get_tenant_state(tenantid)
+    );
+}
+
+static START_GC_LOOP: OnceCell<mpsc::Sender<ZTenantId>> = OnceCell::new();
+static START_COMPACTION_LOOP: OnceCell<mpsc::Sender<ZTenantId>> = OnceCell::new();
+
+/// Spawn a task that will periodically schedule garbage collection until
+/// the tenant becomes inactive. This should be called on tenant
+/// activation.
+pub fn start_gc_loop(tenantid: ZTenantId) -> anyhow::Result<()> {
+    START_GC_LOOP
+        .get()
+        .context("Failed to get START_GC_LOOP")?
+        .blocking_send(tenantid)
+        .context("Failed to send to START_GC_LOOP channel")?;
+    Ok(())
+}
+
+/// Spawn a task that will periodically schedule compaction until
+/// the tenant becomes inactive. This should be called on tenant
+/// activation.
+pub fn start_compaction_loop(tenantid: ZTenantId) -> anyhow::Result<()> {
+    START_COMPACTION_LOOP
+        .get()
+        .context("failed to get START_COMPACTION_LOOP")?
+        .blocking_send(tenantid)
+        .context("failed to send to START_COMPACTION_LOOP")?;
+    Ok(())
+}
+
+/// Spawn the TenantTaskManager
+/// This needs to be called before start_gc_loop or start_compaction_loop
+pub fn init_tenant_task_pool() -> anyhow::Result<()> {
+    let runtime = tokio::runtime::Builder::new_multi_thread()
+        .thread_name("tenant-task-worker")
+        .enable_all()
+        .build()?;
+
+    let (gc_send, mut gc_recv) = mpsc::channel::<ZTenantId>(100);
+    START_GC_LOOP
+        .set(gc_send)
+        .expect("Failed to set START_GC_LOOP");
+
+    let (compaction_send, mut compaction_recv) = mpsc::channel::<ZTenantId>(100);
+    START_COMPACTION_LOOP
+        .set(compaction_send)
+        .expect("Failed to set START_COMPACTION_LOOP");
+
+    // TODO this is getting repetitive
+    let mut gc_loops = HashMap::<ZTenantId, watch::Sender<()>>::new();
+    let mut compaction_loops = HashMap::<ZTenantId, watch::Sender<()>>::new();
+
+    thread_mgr::spawn(
+        ThreadKind::TenantTaskManager,
+        None,
+        None,
+        "Tenant task manager main thread",
+        true,
+        move || {
+            runtime.block_on(async move {
+                let mut futures = FuturesUnordered::new();
+                loop {
+                    tokio::select! {
+                        _ = thread_mgr::shutdown_watcher() => {
+                            // Send cancellation to all tasks
+                            for (_, cancel) in gc_loops.drain() {
+                                cancel.send(()).ok();
+                            }
+                            for (_, cancel) in compaction_loops.drain() {
+                                cancel.send(()).ok();
+                            }
+
+                            // Exit after all tasks finish
+                            while let Some(result) = futures.next().await {
+                                match result {
+                                    Ok(()) => {
+                                        TENANT_TASK_EVENTS.with_label_values(&["stop"]).inc();
+                                    },
+                                    Err(e) => {
+                                        TENANT_TASK_EVENTS.with_label_values(&["panic"]).inc();
+                                        error!("loop join error {}", e)
+                                    },
+                                }
+                            }
+                            break;
+                        },
+                        tenantid = gc_recv.recv() => {
+                            let tenantid = tenantid.expect("Gc task channel closed unexpectedly");
+
+                            // Spawn new task, request cancellation of the old one if exists
+                            let (cancel_send, cancel_recv) = watch::channel(());
+                            let handle = tokio::spawn(gc_loop(tenantid, cancel_recv)
+                                .instrument(info_span!("gc loop", tenant = %tenantid)));
+                            if let Some(old_cancel_send) = gc_loops.insert(tenantid, cancel_send) {
+                                old_cancel_send.send(()).ok();
+                            }
+
+                            // Update metrics, remember handle
+                            TENANT_TASK_EVENTS.with_label_values(&["start"]).inc();
+                            futures.push(handle);
+                        },
+                        tenantid = compaction_recv.recv() => {
+                            let tenantid = tenantid.expect("Compaction task channel closed unexpectedly");
+
+                            // Spawn new task, request cancellation of the old one if exists
+                            let (cancel_send, cancel_recv) = watch::channel(());
+                            let handle = tokio::spawn(compaction_loop(tenantid, cancel_recv)
+                                .instrument(info_span!("compaction loop", tenant = %tenantid)));
+                            if let Some(old_cancel_send) = compaction_loops.insert(tenantid, cancel_send) {
+                                old_cancel_send.send(()).ok();
+                            }
+
+                            // Update metrics, remember handle
+                            TENANT_TASK_EVENTS.with_label_values(&["start"]).inc();
+                            futures.push(handle);
+                        },
+                        result = futures.next() => {
+                            // Log and count any unhandled panics
+                            match result {
+                                Some(Ok(())) => {
+                                    TENANT_TASK_EVENTS.with_label_values(&["stop"]).inc();
+                                },
+                                Some(Err(e)) => {
+                                    TENANT_TASK_EVENTS.with_label_values(&["panic"]).inc();
+                                    error!("loop join error {}", e)
+                                },
+                                None => {},
+                            };
+                        },
+                    }
+                }
+            });
+            Ok(())
+        },
+    )?;
+
+    Ok(())
+}
+
+///
+/// GC task's main loop
+///
+async fn gc_loop(tenantid: ZTenantId, mut cancel: watch::Receiver<()>) {
+    loop {
+        trace!("waking up");
+
+        // Run blocking part of the task
+        let period: Result<Result<_, anyhow::Error>, _> = tokio::task::spawn_blocking(move || {
+            // Break if tenant is not active
+            if tenant_mgr::get_tenant_state(tenantid) != Some(TenantState::Active) {
+                return Ok(ControlFlow::Break(()));
+            }
+
+            // Break if we're not allowed to write to disk
+            let repo = tenant_mgr::get_repository_for_tenant(tenantid)?;
+            // TODO do this inside repo.gc_iteration instead.
+            let _guard = match repo.file_lock.try_read() {
+                Ok(g) => g,
+                Err(_) => return Ok(ControlFlow::Break(())),
+            };
+
+            // Run gc
+            let gc_period = repo.get_gc_period();
+            let gc_horizon = repo.get_gc_horizon();
+            if gc_horizon > 0 {
+                repo.gc_iteration(None, gc_horizon, repo.get_pitr_interval(), false)?;
+            }
+
+            Ok(ControlFlow::Continue(gc_period))
+        })
+        .await;
+
+        // Decide whether to sleep or break
+        let sleep_duration = match period {
+            Ok(Ok(ControlFlow::Continue(period))) => period,
+            Ok(Ok(ControlFlow::Break(()))) => break,
+            Ok(Err(e)) => {
+                error!("Gc failed, retrying: {}", e);
+                Duration::from_secs(2)
+            }
+            Err(e) => {
+                error!("Gc join error, retrying: {}", e);
+                Duration::from_secs(2)
+            }
+        };
+
+        // Sleep
+        tokio::select! {
+            _ = cancel.changed() => {
+                trace!("received cancellation request");
+                break;
+            },
+            _ = tokio::time::sleep(sleep_duration) => {},
+        }
+    }
+    trace!(
+        "GC loop stopped. State is {:?}",
+        tenant_mgr::get_tenant_state(tenantid)
+    );
+}
--- a/pageserver/src/tenant_threads.rs
+++ b/pageserver/src/tenant_threads.rs
@@ -1,79 +0,0 @@
-//! This module contains functions to serve per-tenant background processes,
-//! such as compaction and GC
-use crate::repository::Repository;
-use crate::tenant_mgr;
-use crate::tenant_mgr::TenantState;
-use anyhow::Result;
-use std::time::Duration;
-use tracing::*;
-use utils::zid::ZTenantId;
-
-///
-/// Compaction thread's main loop
-///
-pub fn compact_loop(tenantid: ZTenantId) -> Result<()> {
-    if let Err(err) = compact_loop_ext(tenantid) {
-        error!("compact loop terminated with error: {:?}", err);
-        Err(err)
-    } else {
-        Ok(())
-    }
-}
-
-fn compact_loop_ext(tenantid: ZTenantId) -> Result<()> {
-    loop {
-        if tenant_mgr::get_tenant_state(tenantid) != Some(TenantState::Active) {
-            break;
-        }
-        let repo = tenant_mgr::get_repository_for_tenant(tenantid)?;
-        let compaction_period = repo.get_compaction_period();
-
-        std::thread::sleep(compaction_period);
-        trace!("compaction thread for tenant {} waking up", tenantid);
-
-        // Compact timelines
-        let repo = tenant_mgr::get_repository_for_tenant(tenantid)?;
-        repo.compaction_iteration()?;
-    }
-
-    trace!(
-        "compaction thread stopped for tenant {} state is {:?}",
-        tenantid,
-        tenant_mgr::get_tenant_state(tenantid)
-    );
-    Ok(())
-}
-
-///
-/// GC thread's main loop
-///
-pub fn gc_loop(tenantid: ZTenantId) -> Result<()> {
-    loop {
-        if tenant_mgr::get_tenant_state(tenantid) != Some(TenantState::Active) {
-            break;
-        }
-
-        trace!("gc thread for tenant {} waking up", tenantid);
-        let repo = tenant_mgr::get_repository_for_tenant(tenantid)?;
-        let gc_horizon = repo.get_gc_horizon();
-        // Garbage collect old files that are not needed for PITR anymore
-        if gc_horizon > 0 {
-            repo.gc_iteration(None, gc_horizon, repo.get_pitr_interval(), false)?;
-        }
-
-        // TODO Write it in more adequate way using
-        // condvar.wait_timeout() or something
-        let mut sleep_time = repo.get_gc_period().as_secs();
-        while sleep_time > 0 && tenant_mgr::get_tenant_state(tenantid) == Some(TenantState::Active)
-        {
-            sleep_time -= 1;
-            std::thread::sleep(Duration::from_secs(1));
-        }
-    }
-    trace!(
-        "GC thread stopped for tenant {} state is {:?}",
-        tenantid,
-        tenant_mgr::get_tenant_state(tenantid)
-    );
-    Ok(())
-}
--- a/pageserver/src/thread_mgr.rs
+++ b/pageserver/src/thread_mgr.rs
@@ -94,11 +94,8 @@ pub enum ThreadKind {
    // Main walreceiver manager thread that ensures that every timeline spawns a connection to safekeeper, to fetch WAL.
    WalReceiverManager,

-    // Thread that handles compaction of all timelines for a tenant.
-    Compactor,
-
-    // Thread that handles GC of a tenant
-    GarbageCollector,
+    // Thread that schedules new compaction and gc jobs
+    TenantTaskManager,

    // Thread that flushes frozen in-memory layers to disk
    LayerFlushThread,
@@ -108,15 +105,21 @@ pub enum ThreadKind {
    StorageSync,
 }

+struct MutableThreadState {
+    /// Tenant and timeline that this thread is associated with.
+    tenant_id: Option<ZTenantId>,
+    timeline_id: Option<ZTimelineId>,
+
+    /// Handle for waiting for the thread to exit. It can be None, if the
+    /// the thread has already exited.
+    join_handle: Option<JoinHandle<()>>,
+}
+
 struct PageServerThread {
    _thread_id: u64,

    kind: ThreadKind,

-    /// Tenant and timeline that this thread is associated with.
-    tenant_id: Option<ZTenantId>,
-    timeline_id: Option<ZTimelineId>,
-
    name: String,

    // To request thread shutdown, set the flag, and send a dummy message to the
@@ -124,9 +127,7 @@ struct PageServerThread {
    shutdown_requested: AtomicBool,
    shutdown_tx: watch::Sender<()>,

-    /// Handle for waiting for the thread to exit. It can be None, if the
-    /// the thread has already exited.
-    join_handle: Mutex<Option<JoinHandle<()>>>,
+    mutable: Mutex<MutableThreadState>,
 }

 /// Launch a new thread
@@ -145,29 +146,27 @@ where
 {
    let (shutdown_tx, shutdown_rx) = watch::channel(());
    let thread_id = NEXT_THREAD_ID.fetch_add(1, Ordering::Relaxed);
-    let thread = PageServerThread {
+    let thread = Arc::new(PageServerThread {
        _thread_id: thread_id,
        kind,
-        tenant_id,
-        timeline_id,
        name: name.to_string(),
-
        shutdown_requested: AtomicBool::new(false),
        shutdown_tx,
-
-        join_handle: Mutex::new(None),
-    };
-
-    let thread_rc = Arc::new(thread);
-
-    let mut jh_guard = thread_rc.join_handle.lock().unwrap();
+        mutable: Mutex::new(MutableThreadState {
+            tenant_id,
+            timeline_id,
+            join_handle: None,
+        }),
+    });

    THREADS
        .lock()
        .unwrap()
-        .insert(thread_id, Arc::clone(&thread_rc));
+        .insert(thread_id, Arc::clone(&thread));

-    let thread_rc2 = Arc::clone(&thread_rc);
+    let mut thread_mut = thread.mutable.lock().unwrap();
+
+    let thread_cloned = Arc::clone(&thread);
    let thread_name = name.to_string();
    let join_handle = match thread::Builder::new()
        .name(name.to_string())
@@ -175,7 +174,7 @@ where
            thread_wrapper(
                thread_name,
                thread_id,
-                thread_rc2,
+                thread_cloned,
                shutdown_rx,
                shutdown_process_on_error,
                f,
@@ -189,8 +188,8 @@ where
            return Err(err);
        }
    };
-    *jh_guard = Some(join_handle);
-    drop(jh_guard);
+    thread_mut.join_handle = Some(join_handle);
+    drop(thread_mut);

    // The thread is now running. Nothing more to do here
    Ok(thread_id)
@@ -229,19 +228,20 @@ fn thread_wrapper<F>(
        .remove(&thread_id)
        .expect("no thread in registry");

+    let thread_mut = thread.mutable.lock().unwrap();
    match result {
        Ok(Ok(())) => debug!("Thread '{}' exited normally", thread_name),
        Ok(Err(err)) => {
            if shutdown_process_on_error {
                error!(
                    "Shutting down: thread '{}' tenant_id: {:?}, timeline_id: {:?} exited with error: {:?}",
-                    thread_name, thread.tenant_id, thread.timeline_id, err
+                    thread_name, thread_mut.tenant_id, thread_mut.timeline_id, err
                );
                shutdown_pageserver(1);
            } else {
                error!(
                    "Thread '{}' tenant_id: {:?}, timeline_id: {:?} exited with error: {:?}",
-                    thread_name, thread.tenant_id, thread.timeline_id, err
+                    thread_name, thread_mut.tenant_id, thread_mut.timeline_id, err
                );
            }
        }
@@ -249,19 +249,29 @@ fn thread_wrapper<F>(
            if shutdown_process_on_error {
                error!(
                    "Shutting down: thread '{}' tenant_id: {:?}, timeline_id: {:?} panicked: {:?}",
-                    thread_name, thread.tenant_id, thread.timeline_id, err
+                    thread_name, thread_mut.tenant_id, thread_mut.timeline_id, err
                );
                shutdown_pageserver(1);
            } else {
                error!(
                    "Thread '{}' tenant_id: {:?}, timeline_id: {:?} panicked: {:?}",
-                    thread_name, thread.tenant_id, thread.timeline_id, err
+                    thread_name, thread_mut.tenant_id, thread_mut.timeline_id, err
                );
            }
        }
    }
 }

+// expected to be called from the thread of the given id.
+pub fn associate_with(tenant_id: Option<ZTenantId>, timeline_id: Option<ZTimelineId>) {
+    CURRENT_THREAD.with(|ct| {
+        let borrowed = ct.borrow();
+        let mut thread_mut = borrowed.as_ref().unwrap().mutable.lock().unwrap();
+        thread_mut.tenant_id = tenant_id;
+        thread_mut.timeline_id = timeline_id;
+    });
+}
+
 /// Is there a thread running that matches the criteria

 /// Signal and wait for threads to shut down.
@@ -285,9 +295,10 @@ pub fn shutdown_threads(

    let threads = THREADS.lock().unwrap();
    for thread in threads.values() {
+        let thread_mut = thread.mutable.lock().unwrap();
        if (kind.is_none() || Some(thread.kind) == kind)
-            && (tenant_id.is_none() || thread.tenant_id == tenant_id)
-            && (timeline_id.is_none() || thread.timeline_id == timeline_id)
+            && (tenant_id.is_none() || thread_mut.tenant_id == tenant_id)
+            && (timeline_id.is_none() || thread_mut.timeline_id == timeline_id)
        {
            thread.shutdown_requested.store(true, Ordering::Relaxed);
            // FIXME: handle error?
@@ -298,8 +309,10 @@ pub fn shutdown_threads(
    drop(threads);

    for thread in victim_threads {
+        let mut thread_mut = thread.mutable.lock().unwrap();
        info!("waiting for {} to shut down", thread.name);
-        if let Some(join_handle) = thread.join_handle.lock().unwrap().take() {
+        if let Some(join_handle) = thread_mut.join_handle.take() {
+            drop(thread_mut);
            let _ = join_handle.join();
        } else {
            // The thread had not even fully started yet. Or it was shut down
--- a/pageserver/src/timelines.rs
+++ b/pageserver/src/timelines.rs
@@ -202,7 +202,7 @@ pub fn create_repo(
            // anymore, but I think that could still happen.
            let wal_redo_manager = Arc::new(crate::walredo::DummyRedoManager {});

-            (wal_redo_manager as _, RemoteIndex::empty())
+            (wal_redo_manager as _, RemoteIndex::default())
        }
    };

@@ -347,7 +347,7 @@ pub(crate) fn create_timeline(
    tenant_id: ZTenantId,
    new_timeline_id: Option<ZTimelineId>,
    ancestor_timeline_id: Option<ZTimelineId>,
-    ancestor_start_lsn: Option<Lsn>,
+    mut ancestor_start_lsn: Option<Lsn>,
 ) -> Result<Option<TimelineInfo>> {
    let new_timeline_id = new_timeline_id.unwrap_or_else(ZTimelineId::generate);
    let repo = tenant_mgr::get_repository_for_tenant(tenant_id)?;
@@ -357,41 +357,35 @@ pub(crate) fn create_timeline(
        return Ok(None);
    }

-    let mut start_lsn = ancestor_start_lsn.unwrap_or(Lsn(0));
-
    let new_timeline_info = match ancestor_timeline_id {
        Some(ancestor_timeline_id) => {
            let ancestor_timeline = repo
                .get_timeline_load(ancestor_timeline_id)
                .context("Cannot branch off the timeline that's not present locally")?;

-            if start_lsn == Lsn(0) {
-                // Find end of WAL on the old timeline
-                let end_of_wal = ancestor_timeline.get_last_record_lsn();
-                info!("branching at end of WAL: {}", end_of_wal);
-                start_lsn = end_of_wal;
-            } else {
+            if let Some(lsn) = ancestor_start_lsn.as_mut() {
                // Wait for the WAL to arrive and be processed on the parent branch up
                // to the requested branch point. The repository code itself doesn't
                // require it, but if we start to receive WAL on the new timeline,
                // decoding the new WAL might need to look up previous pages, relation
                // sizes etc. and that would get confused if the previous page versions
                // are not in the repository yet.
-                ancestor_timeline.wait_lsn(start_lsn)?;
-            }
-            start_lsn = start_lsn.align();
+                *lsn = lsn.align();
+                ancestor_timeline.wait_lsn(*lsn)?;

-            let ancestor_ancestor_lsn = ancestor_timeline.get_ancestor_lsn();
-            if ancestor_ancestor_lsn > start_lsn {
-                // can we safely just branch from the ancestor instead?
-                anyhow::bail!(
+                let ancestor_ancestor_lsn = ancestor_timeline.get_ancestor_lsn();
+                if ancestor_ancestor_lsn > *lsn {
+                    // can we safely just branch from the ancestor instead?
+                    anyhow::bail!(
                    "invalid start lsn {} for ancestor timeline {}: less than timeline ancestor lsn {}",
-                    start_lsn,
+                    lsn,
                    ancestor_timeline_id,
                    ancestor_ancestor_lsn,
                );
+                }
            }
-            repo.branch_timeline(ancestor_timeline_id, new_timeline_id, start_lsn)?;
+
+            repo.branch_timeline(ancestor_timeline_id, new_timeline_id, ancestor_start_lsn)?;
            // load the timeline into memory
            let loaded_timeline =
                tenant_mgr::get_local_timeline_with_load(tenant_id, new_timeline_id)?;
--- a/pageserver/src/walreceiver.rs
+++ b/pageserver/src/walreceiver.rs
--- a/pageserver/src/walreceiver/connection_manager.rs
+++ b/pageserver/src/walreceiver/connection_manager.rs
--- a/pageserver/src/walreceiver/walreceiver_connection.rs
+++ b/pageserver/src/walreceiver/walreceiver_connection.rs
@@ -1,5 +1,5 @@
 //! Actual Postgres connection handler to stream WAL to the server.
-//! Runs as a separate, cancellable Tokio task.
+
 use std::{
    str::FromStr,
    sync::Arc,
@@ -10,113 +10,29 @@ use anyhow::{bail, ensure, Context};
 use bytes::BytesMut;
 use fail::fail_point;
 use postgres::{SimpleQueryMessage, SimpleQueryRow};
-use postgres_ffi::waldecoder::WalStreamDecoder;
 use postgres_protocol::message::backend::ReplicationMessage;
 use postgres_types::PgLsn;
 use tokio::{pin, select, sync::watch, time};
 use tokio_postgres::{replication::ReplicationStream, Client};
 use tokio_stream::StreamExt;
 use tracing::{debug, error, info, info_span, trace, warn, Instrument};
-use utils::{
-    lsn::Lsn,
-    pq_proto::ZenithFeedback,
-    zid::{NodeId, ZTenantTimelineId},
-};

+use super::TaskEvent;
 use crate::{
    http::models::WalReceiverEntry,
    repository::{Repository, Timeline},
    tenant_mgr,
    walingest::WalIngest,
 };
+use postgres_ffi::waldecoder::WalStreamDecoder;
+use utils::{lsn::Lsn, pq_proto::ReplicationFeedback, zid::ZTenantTimelineId};

-#[derive(Debug, Clone)]
-pub enum WalConnectionEvent {
-    Started,
-    NewWal(ZenithFeedback),
-    End(Result<(), String>),
-}
-
-/// A wrapper around standalone Tokio task, to poll its updates or cancel the task.
-#[derive(Debug)]
-pub struct WalReceiverConnection {
-    handle: tokio::task::JoinHandle<()>,
-    cancellation: watch::Sender<()>,
-    events_receiver: watch::Receiver<WalConnectionEvent>,
-}
-
-impl WalReceiverConnection {
-    /// Initializes the connection task, returning a set of handles on top of it.
-    /// The task is started immediately after the creation, fails if no connection is established during the timeout given.
-    pub fn open(
-        id: ZTenantTimelineId,
-        safekeeper_id: NodeId,
-        wal_producer_connstr: String,
-        connect_timeout: Duration,
-    ) -> Self {
-        let (cancellation, mut cancellation_receiver) = watch::channel(());
-        let (events_sender, events_receiver) = watch::channel(WalConnectionEvent::Started);
-
-        let handle = tokio::spawn(
-            async move {
-                let connection_result = handle_walreceiver_connection(
-                    id,
-                    &wal_producer_connstr,
-                    &events_sender,
-                    &mut cancellation_receiver,
-                    connect_timeout,
-                )
-                .await
-                .map_err(|e| {
-                    format!("Walreceiver connection for id {id} failed with error: {e:#}")
-                });
-
-                match &connection_result {
-                    Ok(()) => {
-                        debug!("Walreceiver connection for id {id} ended successfully")
-                    }
-                    Err(e) => warn!("{e}"),
-                }
-                events_sender
-                    .send(WalConnectionEvent::End(connection_result))
-                    .ok();
-            }
-            .instrument(info_span!("safekeeper_handle", sk = %safekeeper_id)),
-        );
-
-        Self {
-            handle,
-            cancellation,
-            events_receiver,
-        }
-    }
-
-    /// Polls for the next WAL receiver event, if there's any available since the last check.
-    /// Blocks if there's no new event available, returns `None` if no new events will ever occur.
-    /// Only the last event is returned, all events received between observatins are lost.
-    pub async fn next_event(&mut self) -> Option<WalConnectionEvent> {
-        match self.events_receiver.changed().await {
-            Ok(()) => Some(self.events_receiver.borrow().clone()),
-            Err(_cancellation_error) => None,
-        }
-    }
-
-    /// Gracefully aborts current WAL streaming task, waiting for the current WAL streamed.
-    pub async fn shutdown(&mut self) -> anyhow::Result<()> {
-        self.cancellation.send(()).ok();
-        let handle = &mut self.handle;
-        handle
-            .await
-            .context("Failed to join on a walreceiver connection task")?;
-        Ok(())
-    }
-}
-
-async fn handle_walreceiver_connection(
+/// Opens a conneciton to the given wal producer and streams the WAL, sending progress messages during streaming.
+pub async fn handle_walreceiver_connection(
    id: ZTenantTimelineId,
    wal_producer_connstr: &str,
-    events_sender: &watch::Sender<WalConnectionEvent>,
-    cancellation: &mut watch::Receiver<()>,
+    events_sender: &watch::Sender<TaskEvent<ReplicationFeedback>>,
+    mut cancellation: watch::Receiver<()>,
    connect_timeout: Duration,
 ) -> anyhow::Result<()> {
    // Connect to the database in replication mode.
@@ -214,8 +130,6 @@ async fn handle_walreceiver_connection(

    while let Some(replication_message) = {
        select! {
-            // check for shutdown first
-            biased;
            _ = cancellation.changed() => {
                info!("walreceiver interrupted");
                None
@@ -328,7 +242,7 @@ async fn handle_walreceiver_connection(

            // Send zenith feedback message.
            // Regular standby_status_update fields are put into this message.
-            let zenith_status_update = ZenithFeedback {
+            let zenith_status_update = ReplicationFeedback {
                current_timeline_size: timeline.get_current_logical_size() as u64,
                ps_writelsn: write_lsn,
                ps_flushlsn: flush_lsn,
@@ -344,7 +258,7 @@ async fn handle_walreceiver_connection(
                .as_mut()
                .zenith_status_update(data.len() as u64, &data)
                .await?;
-            if let Err(e) = events_sender.send(WalConnectionEvent::NewWal(zenith_status_update)) {
+            if let Err(e) = events_sender.send(TaskEvent::NewEvent(zenith_status_update)) {
                warn!("Wal connection event listener dropped, aborting the connection: {e}");
                return Ok(());
            }
--- a/pageserver/src/walredo.rs
+++ b/pageserver/src/walredo.rs
@@ -623,6 +623,7 @@ impl PostgresRedoProcess {
            .env_clear()
            .env("LD_LIBRARY_PATH", conf.pg_lib_dir())
            .env("DYLD_LIBRARY_PATH", conf.pg_lib_dir())
+            .close_fds()
            .output()
            .map_err(|e| Error::new(e.kind(), format!("failed to execute initdb: {}", e)))?;

--- a/proxy/Cargo.toml
+++ b/proxy/Cargo.toml
@@ -39,6 +39,8 @@ utils = { path = "../libs/utils" }
 metrics = { path = "../libs/metrics" }
 workspace_hack = { version = "0.1", path = "../workspace_hack" }

+x509-parser = "0.13.2"
+
 [dev-dependencies]
 rcgen = "0.8.14"
 rstest = "0.12"
--- a/proxy/src/auth/backend/console.rs
+++ b/proxy/src/auth/backend/console.rs
@@ -19,7 +19,7 @@ pub type Result<T> = std::result::Result<T, ConsoleAuthError>;
 #[derive(Debug, Error)]
 pub enum ConsoleAuthError {
    #[error(transparent)]
-    BadProjectName(#[from] auth::credentials::ProjectNameError),
+    BadProjectName(#[from] auth::credentials::ClientCredsParseError),

    // We shouldn't include the actual secret here.
    #[error("Bad authentication secret")]
@@ -49,6 +49,12 @@ impl UserFacingError for ConsoleAuthError {
    }
 }

+impl From<&auth::credentials::ClientCredsParseError> for ConsoleAuthError {
+    fn from(e: &auth::credentials::ClientCredsParseError) -> Self {
+        ConsoleAuthError::BadProjectName(e.clone())
+    }
+}
+
 // TODO: convert into an enum with "error"
 #[derive(Serialize, Deserialize, Debug)]
 struct GetRoleSecretResponse {
@@ -74,18 +80,12 @@ pub enum AuthInfo {
 pub(super) struct Api<'a> {
    endpoint: &'a ApiUrl,
    creds: &'a ClientCredentials,
-    /// Cache project name, since we'll need it several times.
-    project: &'a str,
 }

 impl<'a> Api<'a> {
    /// Construct an API object containing the auth parameters.
    pub(super) fn new(endpoint: &'a ApiUrl, creds: &'a ClientCredentials) -> Result<Self> {
-        Ok(Self {
-            endpoint,
-            creds,
-            project: creds.project_name()?,
-        })
+        Ok(Self { endpoint, creds })
    }

    /// Authenticate the existing user or throw an error.
@@ -100,7 +100,7 @@ impl<'a> Api<'a> {
        let mut url = self.endpoint.clone();
        url.path_segments_mut().push("proxy_get_role_secret");
        url.query_pairs_mut()
-            .append_pair("project", self.project)
+            .append_pair("project", self.creds.project_name.as_ref()?)
            .append_pair("role", &self.creds.user);

        // TODO: use a proper logger
@@ -123,7 +123,8 @@ impl<'a> Api<'a> {
    async fn wake_compute(&self) -> Result<DatabaseInfo> {
        let mut url = self.endpoint.clone();
        url.path_segments_mut().push("proxy_wake_compute");
-        url.query_pairs_mut().append_pair("project", self.project);
+        let project_name = self.creds.project_name.as_ref()?;
+        url.query_pairs_mut().append_pair("project", project_name);

        // TODO: use a proper logger
        println!("cplane request: {url}");
--- a/proxy/src/auth/credentials.rs
+++ b/proxy/src/auth/credentials.rs
@@ -8,10 +8,32 @@ use std::collections::HashMap;
 use thiserror::Error;
 use tokio::io::{AsyncRead, AsyncWrite};

-#[derive(Debug, Error)]
+#[derive(Debug, Error, PartialEq, Eq, Clone)]
 pub enum ClientCredsParseError {
-    #[error("Parameter `{0}` is missing in startup packet")]
+    #[error("Parameter `{0}` is missing in startup packet.")]
    MissingKey(&'static str),
+
+    #[error(
+        "Project name is not specified. \
+        EITHER please upgrade the postgres client library (libpq) for SNI support \
+        OR pass the project name as a parameter: '&options=project%3D<project-name>'."
+    )]
+    MissingSNIAndProjectName,
+
+    #[error("Inconsistent project name inferred from SNI ('{0}') and project option ('{1}').")]
+    InconsistentProjectNameAndSNI(String, String),
+
+    #[error("Common name is not set.")]
+    CommonNameNotSet,
+
+    #[error(
+        "SNI ('{1}') inconsistently formatted with respect to common name ('{0}'). \
+        SNI should be formatted as '<project-name>.<common-name>'."
+    )]
+    InconsistentCommonNameAndSNI(String, String),
+
+    #[error("Project name ('{0}') must contain only alphanumeric characters and hyphens ('-').")]
+    ProjectNameContainsIllegalChars(String),
 }

 impl UserFacingError for ClientCredsParseError {}
@@ -22,10 +44,7 @@ impl UserFacingError for ClientCredsParseError {}
 pub struct ClientCredentials {
    pub user: String,
    pub dbname: String,
-
-    // New console API requires SNI info to determine the cluster name.
-    // Other Auth backends don't need it.
-    pub sni_data: Option<String>,
+    pub project_name: Result<String, ClientCredsParseError>,
 }

 impl ClientCredentials {
@@ -33,51 +52,30 @@ impl ClientCredentials {
        // This logic will likely change in the future.
        self.user.ends_with("@zenith")
    }
-}

-#[derive(Debug, Error)]
-pub enum ProjectNameError {
-    #[error("SNI is missing, please upgrade the postgres client library")]
-    Missing,
-
-    #[error("SNI is malformed")]
-    Bad,
-}
-
-impl UserFacingError for ProjectNameError {}
-
-impl ClientCredentials {
-    /// Determine project name from SNI.
-    pub fn project_name(&self) -> Result<&str, ProjectNameError> {
-        // Currently project name is passed as a top level domain
-        let sni = self.sni_data.as_ref().ok_or(ProjectNameError::Missing)?;
-        let (first, _) = sni.split_once('.').ok_or(ProjectNameError::Bad)?;
-        Ok(first)
-    }
-}
-
-impl TryFrom<HashMap<String, String>> for ClientCredentials {
-    type Error = ClientCredsParseError;
-
-    fn try_from(mut value: HashMap<String, String>) -> Result<Self, Self::Error> {
+    pub fn parse(
+        mut options: HashMap<String, String>,
+        sni_data: Option<&str>,
+        common_name: Option<&str>,
+    ) -> Result<Self, ClientCredsParseError> {
        let mut get_param = |key| {
-            value
+            options
                .remove(key)
                .ok_or(ClientCredsParseError::MissingKey(key))
        };

        let user = get_param("user")?;
        let dbname = get_param("database")?;
+        let project_name = get_param("project").ok();
+        let project_name = get_project_name(sni_data, common_name, project_name.as_deref());

        Ok(Self {
            user,
            dbname,
-            sni_data: None,
+            project_name,
        })
    }
-}

-impl ClientCredentials {
    /// Use credentials to authenticate the user.
    pub async fn authenticate(
        self,
@@ -88,3 +86,244 @@ impl ClientCredentials {
        super::backend::handle_user(config, client, self).await
    }
 }
+
+/// Inferring project name from sni_data.
+fn project_name_from_sni_data(
+    sni_data: &str,
+    common_name: &str,
+) -> Result<String, ClientCredsParseError> {
+    let common_name_with_dot = format!(".{common_name}");
+    // check that ".{common_name_with_dot}" is the actual suffix in sni_data
+    if !sni_data.ends_with(&common_name_with_dot) {
+        return Err(ClientCredsParseError::InconsistentCommonNameAndSNI(
+            common_name.to_string(),
+            sni_data.to_string(),
+        ));
+    }
+    // return sni_data without the common name suffix.
+    Ok(sni_data
+        .strip_suffix(&common_name_with_dot)
+        .unwrap()
+        .to_string())
+}
+
+#[cfg(test)]
+mod tests_for_project_name_from_sni_data {
+    use super::*;
+
+    #[test]
+    fn passing() {
+        let target_project_name = "my-project-123";
+        let common_name = "localtest.me";
+        let sni_data = format!("{target_project_name}.{common_name}");
+        assert_eq!(
+            project_name_from_sni_data(&sni_data, common_name),
+            Ok(target_project_name.to_string())
+        );
+    }
+
+    #[test]
+    fn throws_inconsistent_common_name_and_sni_data() {
+        let target_project_name = "my-project-123";
+        let common_name = "localtest.me";
+        let wrong_suffix = "wrongtest.me";
+        assert_eq!(common_name.len(), wrong_suffix.len());
+        let wrong_common_name = format!("wrong{wrong_suffix}");
+        let sni_data = format!("{target_project_name}.{wrong_common_name}");
+        assert_eq!(
+            project_name_from_sni_data(&sni_data, common_name),
+            Err(ClientCredsParseError::InconsistentCommonNameAndSNI(
+                common_name.to_string(),
+                sni_data
+            ))
+        );
+    }
+}
+
+/// Determine project name from SNI or from project_name parameter from options argument.
+fn get_project_name(
+    sni_data: Option<&str>,
+    common_name: Option<&str>,
+    project_name: Option<&str>,
+) -> Result<String, ClientCredsParseError> {
+    // determine the project name from sni_data if it exists, otherwise from project_name.
+    let ret = match sni_data {
+        Some(sni_data) => {
+            let common_name = common_name.ok_or(ClientCredsParseError::CommonNameNotSet)?;
+            let project_name_from_sni = project_name_from_sni_data(sni_data, common_name)?;
+            // check invariant: project name from options and from sni should match
+            if let Some(project_name) = &project_name {
+                if !project_name_from_sni.eq(project_name) {
+                    return Err(ClientCredsParseError::InconsistentProjectNameAndSNI(
+                        project_name_from_sni,
+                        project_name.to_string(),
+                    ));
+                }
+            }
+            project_name_from_sni
+        }
+        None => project_name
+            .ok_or(ClientCredsParseError::MissingSNIAndProjectName)?
+            .to_string(),
+    };
+
+    // check formatting invariant: project name must contain only alphanumeric characters and hyphens.
+    if !ret.chars().all(|x: char| x.is_alphanumeric() || x == '-') {
+        return Err(ClientCredsParseError::ProjectNameContainsIllegalChars(ret));
+    }
+
+    Ok(ret)
+}
+
+#[cfg(test)]
+mod tests_for_project_name_only {
+    use super::*;
+
+    #[test]
+    fn passing_from_sni_data_only() {
+        let target_project_name = "my-project-123";
+        let common_name = "localtest.me";
+        let sni_data = format!("{target_project_name}.{common_name}");
+        assert_eq!(
+            get_project_name(Some(&sni_data), Some(common_name), None),
+            Ok(target_project_name.to_string())
+        );
+    }
+
+    #[test]
+    fn throws_project_name_contains_illegal_chars_from_sni_data_only() {
+        let project_name_prefix = "my-project";
+        let project_name_suffix = "123";
+        let common_name = "localtest.me";
+
+        for illegal_char_id in 0..256 {
+            let illegal_char = char::from_u32(illegal_char_id).unwrap();
+            if !(illegal_char.is_alphanumeric() || illegal_char == '-')
+                && illegal_char.to_string().len() == 1
+            {
+                let target_project_name =
+                    format!("{project_name_prefix}{illegal_char}{project_name_suffix}");
+                let sni_data = format!("{target_project_name}.{common_name}");
+                assert_eq!(
+                    get_project_name(Some(&sni_data), Some(common_name), None),
+                    Err(ClientCredsParseError::ProjectNameContainsIllegalChars(
+                        target_project_name
+                    ))
+                );
+            }
+        }
+    }
+
+    #[test]
+    fn passing_from_project_name_only() {
+        let target_project_name = "my-project-123";
+        let common_names = [Some("localtest.me"), None];
+        for common_name in common_names {
+            assert_eq!(
+                get_project_name(None, common_name, Some(target_project_name)),
+                Ok(target_project_name.to_string())
+            );
+        }
+    }
+
+    #[test]
+    fn throws_project_name_contains_illegal_chars_from_project_name_only() {
+        let project_name_prefix = "my-project";
+        let project_name_suffix = "123";
+        let common_names = [Some("localtest.me"), None];
+
+        for common_name in common_names {
+            for illegal_char_id in 0..256 {
+                let illegal_char: char = char::from_u32(illegal_char_id).unwrap();
+                if !(illegal_char.is_alphanumeric() || illegal_char == '-')
+                    && illegal_char.to_string().len() == 1
+                {
+                    let target_project_name =
+                        format!("{project_name_prefix}{illegal_char}{project_name_suffix}");
+                    assert_eq!(
+                        get_project_name(None, common_name, Some(&target_project_name)),
+                        Err(ClientCredsParseError::ProjectNameContainsIllegalChars(
+                            target_project_name
+                        ))
+                    );
+                }
+            }
+        }
+    }
+
+    #[test]
+    fn passing_from_sni_data_and_project_name() {
+        let target_project_name = "my-project-123";
+        let common_name = "localtest.me";
+        let sni_data = format!("{target_project_name}.{common_name}");
+        assert_eq!(
+            get_project_name(
+                Some(&sni_data),
+                Some(common_name),
+                Some(target_project_name)
+            ),
+            Ok(target_project_name.to_string())
+        );
+    }
+
+    #[test]
+    fn throws_inconsistent_project_name_and_sni() {
+        let project_name_param = "my-project-123";
+        let wrong_project_name = "not-my-project-123";
+        let common_name = "localtest.me";
+        let sni_data = format!("{wrong_project_name}.{common_name}");
+        assert_eq!(
+            get_project_name(Some(&sni_data), Some(common_name), Some(project_name_param)),
+            Err(ClientCredsParseError::InconsistentProjectNameAndSNI(
+                wrong_project_name.to_string(),
+                project_name_param.to_string()
+            ))
+        );
+    }
+
+    #[test]
+    fn throws_common_name_not_set() {
+        let target_project_name = "my-project-123";
+        let wrong_project_name = "not-my-project-123";
+        let common_name = "localtest.me";
+        let sni_datas = [
+            Some(format!("{wrong_project_name}.{common_name}")),
+            Some(format!("{target_project_name}.{common_name}")),
+        ];
+        let project_names = [None, Some(target_project_name)];
+        for sni_data in sni_datas {
+            for project_name_param in project_names {
+                assert_eq!(
+                    get_project_name(sni_data.as_deref(), None, project_name_param),
+                    Err(ClientCredsParseError::CommonNameNotSet)
+                );
+            }
+        }
+    }
+
+    #[test]
+    fn throws_inconsistent_common_name_and_sni_data() {
+        let target_project_name = "my-project-123";
+        let wrong_project_name = "not-my-project-123";
+        let common_name = "localtest.me";
+        let wrong_suffix = "wrongtest.me";
+        assert_eq!(common_name.len(), wrong_suffix.len());
+        let wrong_common_name = format!("wrong{wrong_suffix}");
+        let sni_datas = [
+            Some(format!("{wrong_project_name}.{wrong_common_name}")),
+            Some(format!("{target_project_name}.{wrong_common_name}")),
+        ];
+        let project_names = [None, Some(target_project_name)];
+        for project_name_param in project_names {
+            for sni_data in &sni_datas {
+                assert_eq!(
+                    get_project_name(sni_data.as_deref(), Some(common_name), project_name_param),
+                    Err(ClientCredsParseError::InconsistentCommonNameAndSNI(
+                        common_name.to_string(),
+                        sni_data.clone().unwrap().to_string()
+                    ))
+                );
+            }
+        }
+    }
+}
--- a/Show More
+++ b/Show More