Add script export_import_betwen_pageservers.py to migrate projects between pageservers

Do not overwrite an existing image layer.
See github issues #1594 and #1690 Co-authored-by: Konstantin Knizhnik <knizhnik@neon.tech>
2026-03-05 09:20:38 +00:00 · 2022-07-05 15:27:31 +03:00 · 2022-07-05 14:45:31 +03:00 · 2022-07-05 12:22:58 +01:00 · 2022-07-05 10:55:03 +03:00 · 2022-07-05 02:06:40 -04:00
149 changed files with 7179 additions and 2987 deletions
--- a/.circleci/ansible/ansible.cfg
+++ b/.circleci/ansible/ansible.cfg
@@ -6,5 +6,7 @@ timeout = 30

 [ssh_connection]
 ssh_args   = -F ./ansible.ssh.cfg
-scp_if_ssh = True
+# teleport doesn't support sftp yet https://github.com/gravitational/teleport/issues/7127
+# and scp neither worked for me
+transfer_method = piped
 pipelining = True
--- a/.circleci/ansible/ansible.ssh.cfg
+++ b/.circleci/ansible/ansible.ssh.cfg
@@ -1,3 +1,7 @@
+# Remove this once https://github.com/gravitational/teleport/issues/10918 is fixed
+# (use pre 8.5 option name to cope with old ssh in CI)
+PubkeyAcceptedKeyTypes +ssh-rsa-cert-v01@openssh.com
+
 Host tele.zenith.tech
    User admin
    Port 3023
--- a/.circleci/ansible/deploy.yaml
+++ b/.circleci/ansible/deploy.yaml
@@ -57,7 +57,7 @@
      args:
        creates: "/storage/pageserver/data/tenants"
      environment:
-        ZENITH_REPO_DIR: "/storage/pageserver/data"
+        NEON_REPO_DIR: "/storage/pageserver/data"
        LD_LIBRARY_PATH: "/usr/local/lib"
      become: true
      tags:
@@ -131,7 +131,7 @@
      args:
        creates: "/storage/safekeeper/data/safekeeper.id"
      environment:
-        ZENITH_REPO_DIR: "/storage/safekeeper/data"
+        NEON_REPO_DIR: "/storage/safekeeper/data"
        LD_LIBRARY_PATH: "/usr/local/lib"
      become: true
      tags:
--- a/.circleci/ansible/production.hosts
+++ b/.circleci/ansible/production.hosts
@@ -12,6 +12,7 @@ pageservers
 safekeepers

 [storage:vars]
+env_name = prod-1
 console_mgmt_base_url = http://console-release.local
 bucket_name           = zenith-storage-oregon
 bucket_region         = us-west-2
--- a/.circleci/ansible/staging.hosts
+++ b/.circleci/ansible/staging.hosts
@@ -1,6 +1,7 @@
 [pageservers]
 #zenith-us-stage-ps-1 console_region_id=27
 zenith-us-stage-ps-2 console_region_id=27
+zenith-us-stage-ps-3 console_region_id=27

 [safekeepers]
 zenith-us-stage-sk-4 console_region_id=27
@@ -12,6 +13,7 @@ pageservers
 safekeepers

 [storage:vars]
+env_name = us-stage
 console_mgmt_base_url = http://console-staging.local
 bucket_name           = zenith-staging-storage-us-east-1
 bucket_region         = us-east-1
--- a/.circleci/ansible/systemd/pageserver.service
+++ b/.circleci/ansible/systemd/pageserver.service
@@ -5,7 +5,7 @@ After=network.target auditd.service
 [Service]
 Type=simple
 User=pageserver
-Environment=RUST_BACKTRACE=1 ZENITH_REPO_DIR=/storage/pageserver LD_LIBRARY_PATH=/usr/local/lib
+Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/pageserver LD_LIBRARY_PATH=/usr/local/lib
 ExecStart=/usr/local/bin/pageserver -c "pg_distrib_dir='/usr/local'" -c "listen_pg_addr='0.0.0.0:6400'" -c "listen_http_addr='0.0.0.0:9898'" -c "broker_endpoints=['{{ etcd_endpoints }}']" -D /storage/pageserver/data
 ExecReload=/bin/kill -HUP $MAINPID
 KillMode=mixed
--- a/.circleci/ansible/systemd/safekeeper.service
+++ b/.circleci/ansible/systemd/safekeeper.service
@@ -5,8 +5,8 @@ After=network.target auditd.service
 [Service]
 Type=simple
 User=safekeeper
-Environment=RUST_BACKTRACE=1 ZENITH_REPO_DIR=/storage/safekeeper/data LD_LIBRARY_PATH=/usr/local/lib
-ExecStart=/usr/local/bin/safekeeper -l {{ inventory_hostname }}.local:6500 --listen-http {{ inventory_hostname }}.local:7676 -p {{ first_pageserver }}:6400 -D /storage/safekeeper/data --broker-endpoints={{ etcd_endpoints }} --remote-storage='{bucket_name="{{bucket_name}}", bucket_region="{{bucket_region}}", prefix_in_bucket="wal"}'
+Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/safekeeper/data LD_LIBRARY_PATH=/usr/local/lib
+ExecStart=/usr/local/bin/safekeeper -l {{ inventory_hostname }}.local:6500 --listen-http {{ inventory_hostname }}.local:7676 -D /storage/safekeeper/data --broker-endpoints={{ etcd_endpoints }} --remote-storage='{bucket_name="{{bucket_name}}", bucket_region="{{bucket_region}}", prefix_in_bucket="{{ env_name }}/wal"}'
 ExecReload=/bin/kill -HUP $MAINPID
 KillMode=mixed
 KillSignal=SIGINT
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -100,10 +100,8 @@ jobs:
          name: Rust build << parameters.build_type >>
          command: |
            if [[ $BUILD_TYPE == "debug" ]]; then
-              cov_prefix=(scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage run)
              CARGO_FLAGS=
            elif [[ $BUILD_TYPE == "release" ]]; then
-              cov_prefix=()
              CARGO_FLAGS="--release --features profiling"
            fi

@@ -112,7 +110,7 @@ jobs:
            export RUSTC_WRAPPER=cachepot
            export AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}"
            export AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}"
-            "${cov_prefix[@]}" mold -run cargo build $CARGO_FLAGS --features failpoints --bins --tests
+            mold -run cargo build $CARGO_FLAGS --features failpoints --bins --tests
            cachepot -s

      - save_cache:
@@ -128,32 +126,24 @@ jobs:
          name: cargo test
          command: |
            if [[ $BUILD_TYPE == "debug" ]]; then
-              cov_prefix=(scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage run)
              CARGO_FLAGS=
            elif [[ $BUILD_TYPE == "release" ]]; then
-              cov_prefix=()
              CARGO_FLAGS=--release
            fi

-            "${cov_prefix[@]}" cargo test $CARGO_FLAGS
+            cargo test $CARGO_FLAGS

        # Install the rust binaries, for use by test jobs
      - run:
          name: Install rust binaries
          command: |
-            if [[ $BUILD_TYPE == "debug" ]]; then
-              cov_prefix=(scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage run)
-            elif [[ $BUILD_TYPE == "release" ]]; then
-              cov_prefix=()
-            fi
-
            binaries=$(
-              "${cov_prefix[@]}" cargo metadata --format-version=1 --no-deps |
+              cargo metadata --format-version=1 --no-deps |
              jq -r '.packages[].targets[] | select(.kind | index("bin")) | .name'
            )

            test_exe_paths=$(
-              "${cov_prefix[@]}" cargo test --message-format=json --no-run |
+              cargo test --message-format=json --no-run |
              jq -r '.executable | select(. != null)'
            )

@@ -166,34 +156,15 @@ jobs:
              SRC=target/$BUILD_TYPE/$bin
              DST=/tmp/zenith/bin/$bin
              cp $SRC $DST
-              echo $DST >> /tmp/zenith/etc/binaries.list
            done

-            # Install test executables (for code coverage)
-            if [[ $BUILD_TYPE == "debug" ]]; then
-              for bin in $test_exe_paths; do
-                SRC=$bin
-                DST=/tmp/zenith/test_bin/$(basename $bin)
-                cp $SRC $DST
-                echo $DST >> /tmp/zenith/etc/binaries.list
-              done
-            fi
-
        # Install the postgres binaries, for use by test jobs
      - run:
          name: Install postgres binaries
          command: |
            cp -a tmp_install /tmp/zenith/pg_install

-      - run:
-          name: Merge coverage data
-          command: |
-            # This will speed up workspace uploads
-            if [[ $BUILD_TYPE == "debug" ]]; then
-              scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage merge
-            fi
-
-        # Save the rust binaries and coverage data for other jobs in this workflow.
+      # Save rust binaries for other jobs in the workflow
      - persist_to_workspace:
          root: /tmp/zenith
          paths:
@@ -286,7 +257,7 @@ jobs:
          # no_output_timeout, specified here.
          no_output_timeout: 10m
          environment:
-            - ZENITH_BIN: /tmp/zenith/bin
+            - NEON_BIN: /tmp/zenith/bin
            - POSTGRES_DISTRIB_DIR: /tmp/zenith/pg_install
            - TEST_OUTPUT: /tmp/test_output
            # this variable will be embedded in perf test report
@@ -314,12 +285,6 @@ jobs:

            export GITHUB_SHA=$CIRCLE_SHA1

-            if [[ $BUILD_TYPE == "debug" ]]; then
-              cov_prefix=(scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage run)
-            elif [[ $BUILD_TYPE == "release" ]]; then
-              cov_prefix=()
-            fi
-
            # Run the tests.
            #
            # The junit.xml file allows CircleCI to display more fine-grained test information
@@ -330,7 +295,7 @@ jobs:
            # -n4 uses four processes to run tests via pytest-xdist
            # -s is not used to prevent pytest from capturing output, because tests are running
            # in parallel and logs are mixed between different tests
-            "${cov_prefix[@]}" ./scripts/pytest \
+            ./scripts/pytest \
              --junitxml=$TEST_OUTPUT/junit.xml \
              --tb=short \
              --verbose \
@@ -359,67 +324,12 @@ jobs:
      # The store_test_results step tells CircleCI where to find the junit.xml file.
      - store_test_results:
          path: /tmp/test_output
-      - run:
-          name: Merge coverage data
-          command: |
-            # This will speed up workspace uploads
-            if [[ $BUILD_TYPE == "debug" ]]; then
-              scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage merge
-            fi
-      # Save coverage data (if any)
+      # Save data (if any)
      - persist_to_workspace:
          root: /tmp/zenith
          paths:
            - "*"

-  coverage-report:
-    executor: neon-xlarge-executor
-    steps:
-      - attach_workspace:
-          at: /tmp/zenith
-      - checkout
-      - restore_cache:
-          name: Restore rust cache
-          keys:
-            # Require an exact match. While an out of date cache might speed up the build,
-            # there's no way to clean out old packages, so the cache grows every time something
-            # changes.
-            - v04-rust-cache-deps-debug-{{ checksum "Cargo.lock" }}
-      - run:
-          name: Build coverage report
-          command: |
-            COMMIT_URL=https://github.com/neondatabase/neon/commit/$CIRCLE_SHA1
-
-            scripts/coverage \
-              --dir=/tmp/zenith/coverage report \
-              --input-objects=/tmp/zenith/etc/binaries.list \
-              --commit-url=$COMMIT_URL \
-              --format=github
-      - run:
-          name: Upload coverage report
-          command: |
-            LOCAL_REPO=$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME
-            REPORT_URL=https://neondatabase.github.io/zenith-coverage-data/$CIRCLE_SHA1
-            COMMIT_URL=https://github.com/neondatabase/neon/commit/$CIRCLE_SHA1
-
-            scripts/git-upload \
-              --repo=https://$VIP_VAP_ACCESS_TOKEN@github.com/neondatabase/zenith-coverage-data.git \
-              --message="Add code coverage for $COMMIT_URL" \
-              copy /tmp/zenith/coverage/report $CIRCLE_SHA1 # COPY FROM TO_RELATIVE
-
-            # Add link to the coverage report to the commit
-            curl -f -X POST \
-            https://api.github.com/repos/$LOCAL_REPO/statuses/$CIRCLE_SHA1 \
-            -H "Accept: application/vnd.github.v3+json" \
-            --user "$CI_ACCESS_TOKEN" \
-            --data \
-              "{
-                \"state\": \"success\",
-                \"context\": \"zenith-coverage\",
-                \"description\": \"Coverage report is ready\",
-                \"target_url\": \"$REPORT_URL\"
-              }"
-
  # Build neondatabase/neon:latest image and push it to Docker hub
  docker-image:
    docker:
@@ -688,50 +598,6 @@ jobs:
            helm upgrade neon-proxy       neondatabase/neon-proxy --install -f .circleci/helm-values/production.proxy.yaml --set image.tag=${DOCKER_TAG} --wait
            helm upgrade neon-proxy-scram neondatabase/neon-proxy --install -f .circleci/helm-values/production.proxy-scram.yaml --set image.tag=${DOCKER_TAG} --wait

-  # Trigger a new remote CI job
-  remote-ci-trigger:
-    docker:
-      - image: cimg/base:2021.04
-    parameters:
-      remote_repo:
-        type: string
-    environment:
-      REMOTE_REPO: << parameters.remote_repo >>
-    steps:
-      - run:
-          name: Set PR's status to pending
-          command: |
-            LOCAL_REPO=$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME
-
-            curl -f -X POST \
-            https://api.github.com/repos/$LOCAL_REPO/statuses/$CIRCLE_SHA1 \
-            -H "Accept: application/vnd.github.v3+json" \
-            --user "$CI_ACCESS_TOKEN" \
-            --data \
-              "{
-                \"state\": \"pending\",
-                \"context\": \"neon-cloud-e2e\",
-                \"description\": \"[$REMOTE_REPO] Remote CI job is about to start\"
-              }"
-      - run:
-          name: Request a remote CI test
-          command: |
-            LOCAL_REPO=$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME
-
-            curl -f -X POST \
-            https://api.github.com/repos/$REMOTE_REPO/actions/workflows/testing.yml/dispatches \
-            -H "Accept: application/vnd.github.v3+json" \
-            --user "$CI_ACCESS_TOKEN" \
-            --data \
-              "{
-                \"ref\": \"main\",
-                \"inputs\": {
-                  \"ci_job_name\": \"neon-cloud-e2e\",
-                  \"commit_hash\": \"$CIRCLE_SHA1\",
-                  \"remote_repo\": \"$LOCAL_REPO\"
-                }
-              }"
-
 workflows:
  build_and_test:
    jobs:
@@ -774,12 +640,6 @@ workflows:
          save_perf_report: true
          requires:
            - build-neon-release
-      - coverage-report:
-          # Context passes credentials for gh api
-          context: CI_ACCESS_TOKEN
-          requires:
-            # TODO: consider adding more
-            - other-tests-debug
      - docker-image:
          # Context gives an ability to login
          context: Docker Hub
@@ -880,14 +740,3 @@ workflows:
                - release
          requires:
            - docker-image-release
-      - remote-ci-trigger:
-          # Context passes credentials for gh api
-          context: CI_ACCESS_TOKEN
-          remote_repo: "neondatabase/cloud"
-          requires:
-            # XXX: Successful build doesn't mean everything is OK, but
-            # the job to be triggered takes so much time to complete (~22 min)
-            # that it's better not to wait for the commented-out steps
-            - build-neon-release
-            # - pg_regress-tests-release
-            # - other-tests-release
--- a/.dockerignore
+++ b/.dockerignore
@@ -9,8 +9,8 @@ tmp_install
 tmp_check_cli
 test_output
 .vscode
-.zenith
-integration_tests/.zenith
+.neon
+integration_tests/.neon
 .mypy_cache

 Dockerfile
--- a/.github/actions/run-python-test-set/action.yml
+++ b/.github/actions/run-python-test-set/action.yml
@@ -0,0 +1,140 @@
+name: 'Run python test'
+description: 'Runs a Neon python test set, performing all the required preparations before'
+
+inputs:
+  build_type:
+    description: 'Type of Rust (neon) and C (postgres) builds. Must be "release" or "debug".'
+    required: true
+  rust_toolchain:
+    description: 'Rust toolchain version to fetch the caches'
+    required: true
+  test_selection:
+    description: 'A python test suite to run'
+    required: true
+  extra_params:
+    description: 'Arbitrary parameters to pytest. For example "-s" to prevent capturing stdout/stderr'
+    required: false
+    default: ''
+  needs_postgres_source:
+    description: 'Set to true if the test suite requires postgres source checked out'
+    required: false
+    default: 'false'
+  run_in_parallel:
+    description: 'Whether to run tests in parallel'
+    required: false
+    default: 'true'
+  save_perf_report:
+    description: 'Whether to upload the performance report'
+    required: false
+    default: 'false'
+
+runs:
+  using: "composite"
+  steps:
+    - name: Get Neon artifact for restoration
+      uses: actions/download-artifact@v3
+      with:
+        name: neon-${{ runner.os }}-${{ inputs.build_type }}-${{ inputs.rust_toolchain }}-artifact
+        path: ./neon-artifact/
+
+    - name: Extract Neon artifact
+      shell: bash -ex {0}
+      run: |
+        mkdir -p /tmp/neon/
+        tar -xf ./neon-artifact/neon.tgz -C /tmp/neon/
+        rm -rf ./neon-artifact/
+
+    - name: Checkout
+      if: inputs.needs_postgres_source == 'true'
+      uses: actions/checkout@v3
+      with:
+        submodules: true
+        fetch-depth: 1
+
+    - name: Cache poetry deps
+      id: cache_poetry
+      uses: actions/cache@v3
+      with:
+        path: ~/.cache/pypoetry/virtualenvs
+        key: v1-${{ runner.os }}-python-deps-${{ hashFiles('poetry.lock') }}
+
+    - name: Install Python deps
+      shell: bash -ex {0}
+      run: ./scripts/pysync
+
+    - name: Run pytest
+      env:
+        NEON_BIN: /tmp/neon/bin
+        POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
+        TEST_OUTPUT: /tmp/test_output
+        # this variable will be embedded in perf test report
+        # and is needed to distinguish different environments
+        PLATFORM: github-actions-selfhosted
+      shell: bash -ex {0}
+      run: |
+        PERF_REPORT_DIR="$(realpath test_runner/perf-report-local)"
+        rm -rf $PERF_REPORT_DIR
+
+        TEST_SELECTION="test_runner/${{ inputs.test_selection }}"
+        EXTRA_PARAMS="${{ inputs.extra_params }}"
+        if [ -z "$TEST_SELECTION" ]; then
+          echo "test_selection must be set"
+          exit 1
+        fi
+        if [[ "${{ inputs.run_in_parallel }}" == "true" ]]; then
+          EXTRA_PARAMS="-n4 $EXTRA_PARAMS"
+        fi
+        if [[ "${{ inputs.save_perf_report }}" == "true" ]]; then
+          if [[ "$GITHUB_REF" == "main" ]]; then
+            mkdir -p "$PERF_REPORT_DIR"
+            EXTRA_PARAMS="--out-dir $PERF_REPORT_DIR $EXTRA_PARAMS"
+          fi
+        fi
+
+        if [[ "${{ inputs.build_type }}" == "debug" ]]; then
+          cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
+        elif [[ "${{ inputs.build_type }}" == "release" ]]; then
+          cov_prefix=()
+        fi
+
+        # Run the tests.
+        #
+        # The junit.xml file allows CircleCI to display more fine-grained test information
+        # in its "Tests" tab in the results page.
+        # --verbose prints name of each test (helpful when there are
+        # multiple tests in one file)
+        # -rA prints summary in the end
+        # -n4 uses four processes to run tests via pytest-xdist
+        # -s is not used to prevent pytest from capturing output, because tests are running
+        # in parallel and logs are mixed between different tests
+        "${cov_prefix[@]}" ./scripts/pytest \
+          --junitxml=$TEST_OUTPUT/junit.xml \
+          --tb=short \
+          --verbose \
+          -m "not remote_cluster" \
+          -rA $TEST_SELECTION $EXTRA_PARAMS
+
+        if [[ "${{ inputs.save_perf_report }}" == "true" ]]; then
+          if [[ "$GITHUB_REF" == "main" ]]; then
+            export REPORT_FROM="$PERF_REPORT_DIR"
+            export REPORT_TO=local
+            scripts/generate_and_push_perf_report.sh
+          fi
+        fi
+
+    - name: Delete all data but logs
+      shell: bash -ex {0}
+      if: always()
+      run: |
+        du -sh /tmp/test_output/*
+        find /tmp/test_output -type f ! -name "*.log" ! -name "regression.diffs" ! -name "junit.xml" ! -name "*.filediff" ! -name "*.stdout" ! -name "*.stderr" ! -name "flamegraph.svg" ! -name "*.metrics" -delete
+        du -sh /tmp/test_output/*
+
+    - name: Upload python test logs
+      if: always()
+      uses: actions/upload-artifact@v3
+      with:
+        retention-days: 7
+        if-no-files-found: error
+        name: python-test-${{ inputs.test_selection }}-${{ runner.os }}-${{ inputs.build_type }}-${{ inputs.rust_toolchain }}-logs
+        path: /tmp/test_output/
--- a/.github/actions/save-coverage-data/action.yml
+++ b/.github/actions/save-coverage-data/action.yml
@@ -0,0 +1,17 @@
+name: 'Merge and upload coverage data'
+description: 'Compresses and uploads the coverage data as an artifact'
+
+runs:
+  using: "composite"
+  steps:
+    - name: Merge coverage data
+      shell: bash -ex {0}
+      run: scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage merge
+
+    - name: Upload coverage data
+      uses: actions/upload-artifact@v3
+      with:
+        retention-days: 7
+        if-no-files-found: error
+        name: coverage-data-artifact
+        path: /tmp/coverage/
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -0,0 +1,389 @@
+name: Test
+
+on:
+  push:
+    branches:
+    - main
+  pull_request:
+
+defaults:
+  run:
+    shell: bash -ex {0}
+
+concurrency:
+   group: ${{ github.workflow }}-${{ github.ref }}
+   cancel-in-progress: true
+
+env:
+  RUST_BACKTRACE: 1
+  COPT: '-Werror'
+
+jobs:
+  build-postgres:
+    runs-on: [ self-hosted, Linux, k8s-runner ]
+    strategy:
+      fail-fast: false
+      matrix:
+        build_type: [ debug, release ]
+        rust_toolchain: [ 1.58 ]
+
+    env:
+      BUILD_TYPE: ${{ matrix.build_type }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: true
+          fetch-depth: 1
+
+      - name: Set pg revision for caching
+        id: pg_ver
+        run: echo ::set-output name=pg_rev::$(git rev-parse HEAD:vendor/postgres)
+
+      - name: Cache postgres build
+        id: cache_pg
+        uses: actions/cache@v3
+        with:
+          path: tmp_install/
+          key: v1-${{ runner.os }}-${{ matrix.build_type }}-pg-${{ steps.pg_ver.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
+
+      - name: Build postgres
+        if: steps.cache_pg.outputs.cache-hit != 'true'
+        run: mold -run make postgres -j$(nproc)
+
+      # actions/cache@v3 does not allow concurrently using the same cache across job steps, so use a separate cache
+      - name: Prepare postgres artifact
+        run: tar -C tmp_install/ -czf ./pg.tgz .
+      - name: Upload postgres artifact
+        uses: actions/upload-artifact@v3
+        with:
+          retention-days: 7
+          if-no-files-found: error
+          name: postgres-${{ runner.os }}-${{ matrix.build_type }}-artifact
+          path: ./pg.tgz
+
+
+  build-neon:
+    runs-on: [ self-hosted, Linux, k8s-runner ]
+    needs: [ build-postgres ]
+    strategy:
+      fail-fast: false
+      matrix:
+        build_type: [ debug, release ]
+        rust_toolchain: [ 1.58 ]
+
+    env:
+      BUILD_TYPE: ${{ matrix.build_type }}
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: true
+          fetch-depth: 1
+
+      - name: Get postgres artifact for restoration
+        uses: actions/download-artifact@v3
+        with:
+          name: postgres-${{ runner.os }}-${{ matrix.build_type }}-artifact
+          path: ./postgres-artifact/
+      - name: Extract postgres artifact
+        run: |
+          mkdir ./tmp_install/
+          tar -xf ./postgres-artifact/pg.tgz -C ./tmp_install/
+          rm -rf ./postgres-artifact/
+
+      - name: Cache cargo deps
+        id: cache_cargo
+        uses: actions/cache@v3
+        with:
+          path: |
+            ~/.cargo/registry/
+            ~/.cargo/git/
+            target/
+          # Fall back to older versions of the key, if no cache for current Cargo.lock was found
+          key: |
+            v2-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-${{ hashFiles('Cargo.lock') }}
+            v2-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-
+
+      - name: Run cargo build
+        run: |
+          if [[ $BUILD_TYPE == "debug" ]]; then
+            cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
+            CARGO_FLAGS=
+          elif [[ $BUILD_TYPE == "release" ]]; then
+            cov_prefix=()
+            CARGO_FLAGS="--release --features profiling"
+          fi
+
+          "${cov_prefix[@]}" mold -run cargo build $CARGO_FLAGS --features failpoints --bins --tests
+
+      - name: Run cargo test
+        run: |
+          if [[ $BUILD_TYPE == "debug" ]]; then
+            cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
+            CARGO_FLAGS=
+          elif [[ $BUILD_TYPE == "release" ]]; then
+            cov_prefix=()
+            CARGO_FLAGS=--release
+          fi
+
+          "${cov_prefix[@]}" cargo test $CARGO_FLAGS
+
+      - name: Install rust binaries
+        run: |
+          if [[ $BUILD_TYPE == "debug" ]]; then
+            cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
+          elif [[ $BUILD_TYPE == "release" ]]; then
+            cov_prefix=()
+          fi
+
+          binaries=$(
+            "${cov_prefix[@]}" cargo metadata --format-version=1 --no-deps |
+            jq -r '.packages[].targets[] | select(.kind | index("bin")) | .name'
+          )
+
+          test_exe_paths=$(
+            "${cov_prefix[@]}" cargo test --message-format=json --no-run |
+            jq -r '.executable | select(. != null)'
+          )
+
+          mkdir -p /tmp/neon/bin/
+          mkdir -p /tmp/neon/test_bin/
+          mkdir -p /tmp/neon/etc/
+
+          # Keep bloated coverage data files away from the rest of the artifact
+          mkdir -p /tmp/coverage/
+
+          # Install target binaries
+          for bin in $binaries; do
+            SRC=target/$BUILD_TYPE/$bin
+            DST=/tmp/neon/bin/$bin
+            cp "$SRC" "$DST"
+          done
+
+          # Install test executables and write list of all binaries (for code coverage)
+          if [[ $BUILD_TYPE == "debug" ]]; then
+            for bin in $binaries; do
+              echo "/tmp/neon/bin/$bin" >> /tmp/coverage/binaries.list
+            done
+            for bin in $test_exe_paths; do
+              SRC=$bin
+              DST=/tmp/neon/test_bin/$(basename $bin)
+              cp "$SRC" "$DST"
+              echo "$DST" >> /tmp/coverage/binaries.list
+            done
+          fi
+
+      - name: Install postgres binaries
+        run: cp -a tmp_install /tmp/neon/pg_install
+
+      - name: Prepare neon artifact
+        run: tar -C /tmp/neon/ -czf ./neon.tgz .
+
+      - name: Upload neon binaries
+        uses: actions/upload-artifact@v3
+        with:
+          retention-days: 7
+          if-no-files-found: error
+          name: neon-${{ runner.os }}-${{ matrix.build_type }}-${{ matrix.rust_toolchain }}-artifact
+          path: ./neon.tgz
+
+      # XXX: keep this after the binaries.list is formed, so the coverage can properly work later
+      - name: Merge and upload coverage data
+        if: matrix.build_type == 'debug'
+        uses: ./.github/actions/save-coverage-data
+
+
+  pg_regress-tests:
+    runs-on: [ self-hosted, Linux, k8s-runner ]
+    needs: [ build-neon ]
+    strategy:
+      fail-fast: false
+      matrix:
+        build_type: [ debug, release ]
+        rust_toolchain: [ 1.58 ]
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: true
+          fetch-depth: 2
+
+      - name: Pytest regress tests
+        uses: ./.github/actions/run-python-test-set
+        with:
+          build_type: ${{ matrix.build_type }}
+          rust_toolchain: ${{ matrix.rust_toolchain }}
+          test_selection: batch_pg_regress
+          needs_postgres_source: true
+
+      - name: Merge and upload coverage data
+        if: matrix.build_type == 'debug'
+        uses: ./.github/actions/save-coverage-data
+
+  other-tests:
+    runs-on: [ self-hosted, Linux, k8s-runner ]
+    needs: [ build-neon ]
+    strategy:
+      fail-fast: false
+      matrix:
+        build_type: [ debug, release ]
+        rust_toolchain: [ 1.58 ]
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: true
+          fetch-depth: 2
+
+      - name: Pytest other tests
+        uses: ./.github/actions/run-python-test-set
+        with:
+          build_type: ${{ matrix.build_type }}
+          rust_toolchain: ${{ matrix.rust_toolchain }}
+          test_selection: batch_others
+
+      - name: Merge and upload coverage data
+        if: matrix.build_type == 'debug'
+        uses: ./.github/actions/save-coverage-data
+
+  benchmarks:
+    runs-on: [ self-hosted, Linux, k8s-runner ]
+    needs: [ build-neon ]
+    strategy:
+      fail-fast: false
+      matrix:
+        build_type: [ release ]
+        rust_toolchain: [ 1.58 ]
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: true
+          fetch-depth: 2
+
+      - name: Pytest benchmarks
+        uses: ./.github/actions/run-python-test-set
+        with:
+          build_type: ${{ matrix.build_type }}
+          rust_toolchain: ${{ matrix.rust_toolchain }}
+          test_selection: performance
+          run_in_parallel: false
+          save_perf_report: true
+      # XXX: no coverage data handling here, since benchmarks are run on release builds,
+      # while coverage is currently collected for the debug ones
+
+  coverage-report:
+    runs-on: [ self-hosted, Linux, k8s-runner ]
+    needs: [ other-tests, pg_regress-tests ]
+    strategy:
+      fail-fast: false
+      matrix:
+        build_type: [ debug ]
+        rust_toolchain: [ 1.58 ]
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: true
+          fetch-depth: 1
+
+      - name: Restore cargo deps cache
+        id: cache_cargo
+        uses: actions/cache@v3
+        with:
+          path: |
+            ~/.cargo/registry/
+            ~/.cargo/git/
+            target/
+          key: v2-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-${{ hashFiles('Cargo.lock') }}
+
+      - name: Get Neon artifact for restoration
+        uses: actions/download-artifact@v3
+        with:
+          name: neon-${{ runner.os }}-${{ matrix.build_type }}-${{ matrix.rust_toolchain }}-artifact
+          path: ./neon-artifact/
+
+      - name: Extract Neon artifact
+        run: |
+          mkdir -p /tmp/neon/
+          tar -xf ./neon-artifact/neon.tgz -C /tmp/neon/
+          rm -rf ./neon-artifact/
+
+      - name: Restore coverage data
+        uses: actions/download-artifact@v3
+        with:
+          name: coverage-data-artifact
+          path: /tmp/coverage/
+
+      - name: Merge coverage data
+        run: scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage merge
+
+      - name: Build and upload coverage report
+        run: |
+          COMMIT_SHA=${{ github.event.pull_request.head.sha }}
+          COMMIT_SHA=${COMMIT_SHA:-${{ github.sha }}}
+          COMMIT_URL=https://github.com/${{ github.repository }}/commit/$COMMIT_SHA
+
+          scripts/coverage \
+            --dir=/tmp/coverage report \
+            --input-objects=/tmp/coverage/binaries.list \
+            --commit-url=$COMMIT_URL \
+            --format=github
+
+          REPORT_URL=https://${{ github.repository_owner }}.github.io/zenith-coverage-data/$COMMIT_SHA
+
+          scripts/git-upload \
+            --repo=https://${{ secrets.VIP_VAP_ACCESS_TOKEN }}@github.com/${{ github.repository_owner }}/zenith-coverage-data.git \
+            --message="Add code coverage for $COMMIT_URL" \
+            copy /tmp/coverage/report $COMMIT_SHA # COPY FROM TO_RELATIVE
+
+          # Add link to the coverage report to the commit
+          curl -f -X POST \
+          https://api.github.com/repos/${{ github.repository }}/statuses/$COMMIT_SHA \
+          -H "Accept: application/vnd.github.v3+json" \
+          --user "${{ secrets.CI_ACCESS_TOKEN }}" \
+          --data \
+            "{
+              \"state\": \"success\",
+              \"context\": \"neon-coverage\",
+              \"description\": \"Coverage report is ready\",
+              \"target_url\": \"$REPORT_URL\"
+            }"
+
+  trigger-e2e-tests:
+   runs-on: [ self-hosted, Linux, k8s-runner ]
+   needs: [ build-neon ]
+   steps:
+     - name: Set PR's status to pending and request a remote CI test
+       run: |
+         COMMIT_SHA=${{ github.event.pull_request.head.sha }}
+         COMMIT_SHA=${COMMIT_SHA:-${{ github.sha }}}
+
+         REMOTE_REPO="${{ github.repository_owner }}/cloud"
+
+         curl -f -X POST \
+         https://api.github.com/repos/${{ github.repository }}/statuses/$COMMIT_SHA \
+         -H "Accept: application/vnd.github.v3+json" \
+         --user "${{ secrets.CI_ACCESS_TOKEN }}" \
+         --data \
+           "{
+             \"state\": \"pending\",
+             \"context\": \"neon-cloud-e2e\",
+             \"description\": \"[$REMOTE_REPO] Remote CI job is about to start\"
+           }"
+
+         curl -f -X POST \
+         https://api.github.com/repos/$REMOTE_REPO/actions/workflows/testing.yml/dispatches \
+         -H "Accept: application/vnd.github.v3+json" \
+         --user "${{ secrets.CI_ACCESS_TOKEN }}" \
+         --data \
+           "{
+             \"ref\": \"main\",
+             \"inputs\": {
+               \"ci_job_name\": \"neon-cloud-e2e\",
+               \"commit_hash\": \"$COMMIT_SHA\",
+               \"remote_repo\": \"${{ github.repository }}\"
+             }
+           }"
--- a/.github/workflows/codestyle.yml
+++ b/.github/workflows/codestyle.yml
@@ -1,4 +1,4 @@
-name: Build and Test
+name: Check code style and build

 on:
  push:
@@ -6,15 +6,27 @@ on:
    - main
  pull_request:

+defaults:
+  run:
+    shell: bash -ex {0}
+
+concurrency:
+   group: ${{ github.workflow }}-${{ github.ref }}
+   cancel-in-progress: true
+
+env:
+  RUST_BACKTRACE: 1
+
 jobs:
-  regression-check:
+  check-codestyle-rust:
    strategy:
+      fail-fast: false
      matrix:
        # If we want to duplicate this job for different
        # Rust toolchains (e.g. nightly or 1.37.0), add them here.
        rust_toolchain: [1.58]
        os: [ubuntu-latest, macos-latest]
-    timeout-minutes: 30
+    timeout-minutes: 50
    name: run regression test suite
    runs-on: ${{ matrix.os }}

@@ -92,5 +104,30 @@ jobs:
      - name: Run cargo clippy
        run: ./run_clippy.sh

-      - name: Run cargo test
-        run: cargo test --all --all-targets
+      - name: Ensure all project builds
+        run: cargo build --all --all-targets
+
+  check-codestyle-python:
+    runs-on: [ self-hosted, Linux, k8s-runner ]
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: false
+          fetch-depth: 1
+
+      - name: Cache poetry deps
+        id: cache_poetry
+        uses: actions/cache@v3
+        with:
+          path: ~/.cache/pypoetry/virtualenvs
+          key: v1-codestyle-python-deps-${{ hashFiles('poetry.lock') }}
+
+      - name: Install Python deps
+        run: ./scripts/pysync
+
+      - name: Run yapf to ensure code format
+        run: poetry run yapf --recursive --diff .
+
+      - name: Run mypy to check types
+        run: poetry run mypy .
--- a/.github/workflows/pg_clients.yml
+++ b/.github/workflows/pg_clients.yml
@@ -0,0 +1,74 @@
+name: Test Postgres client libraries
+
+on:
+  schedule:
+    # * is a special character in YAML so you have to quote this string
+    #          ┌───────────── minute (0 - 59)
+    #          │ ┌───────────── hour (0 - 23)
+    #          │ │ ┌───────────── day of the month (1 - 31)
+    #          │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
+    #          │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
+    - cron:  '23 02 * * *' # run once a day, timezone is utc
+
+  workflow_dispatch:
+
+concurrency:
+   group: ${{ github.workflow }}-${{ github.ref }}
+   cancel-in-progress: true
+
+jobs:
+  test-postgres-client-libs:
+    runs-on: [ ubuntu-latest ]
+
+    steps:
+    - name: Checkout
+      uses: actions/checkout@v3
+
+    - uses: actions/setup-python@v4
+      with:
+        python-version: 3.9
+
+    - name: Install Poetry
+      uses: snok/install-poetry@v1
+
+    - name: Cache poetry deps
+      id: cache_poetry
+      uses: actions/cache@v3
+      with:
+        path: ~/.cache/pypoetry/virtualenvs
+        key: v1-${{ runner.os }}-python-deps-${{ hashFiles('poetry.lock') }}
+
+    - name: Install Python deps
+      shell: bash -ex {0}
+      run: ./scripts/pysync
+
+    - name: Run pytest
+      env:
+        REMOTE_ENV: 1
+        BENCHMARK_CONNSTR: "${{ secrets.BENCHMARK_STAGING_CONNSTR }}"
+        TEST_OUTPUT: /tmp/test_output
+        POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
+        # this variable will be embedded in perf test report
+        # and is needed to distinguish different environments
+        PLATFORM: github-actions-selfhosted
+      shell: bash -ex {0}
+      run: |
+        # Test framework expects we have psql binary;
+        # but since we don't really need it in this test, let's mock it
+        mkdir -p "$POSTGRES_DISTRIB_DIR/bin" && touch "$POSTGRES_DISTRIB_DIR/bin/psql";
+        ./scripts/pytest \
+          --junitxml=$TEST_OUTPUT/junit.xml \
+          --tb=short \
+          --verbose \
+          -m "remote_cluster" \
+          -rA "test_runner/pg_clients"
+
+    - name: Post to a Slack channel
+      if: failure()
+      id: slack
+      uses: slackapi/slack-github-action@v1
+      with:
+        channel-id: "C033QLM5P7D" # dev-staging-stream
+        slack-message: "Testing Postgres clients: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
+      env:
+        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
--- a/.gitignore
+++ b/.gitignore
@@ -5,8 +5,9 @@
 __pycache__/
 test_output/
 .vscode
-/.zenith
-/integration_tests/.zenith
+.idea
+/.neon
+/integration_tests/.neon

 # Coverage
 *.profraw
--- a/.yapfignore
+++ b/.yapfignore
@@ -6,5 +6,5 @@ target/
 tmp_install/
 __pycache__/
 test_output/
-.zenith/
+.neon/
 .git/
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -64,6 +64,45 @@ dependencies = [
 "nodrop",
 ]

+[[package]]
+name = "asn1-rs"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "30ff05a702273012438132f449575dbc804e27b2f3cbe3069aa237d26c98fa33"
+dependencies = [
+ "asn1-rs-derive",
+ "asn1-rs-impl",
+ "displaydoc",
+ "nom",
+ "num-traits",
+ "rusticata-macros",
+ "thiserror",
+ "time 0.3.9",
+]
+
+[[package]]
+name = "asn1-rs-derive"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "db8b7511298d5b7784b40b092d9e9dcd3a627a5707e4b5e507931ab0d44eeebf"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+ "synstructure",
+]
+
+[[package]]
+name = "asn1-rs-impl"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2777730b2039ac0f95f093556e61b6d26cebed5393ca6f152717777cec3a42ed"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
 [[package]]
 name = "async-stream"
 version = "0.3.3"
@@ -422,6 +461,7 @@ dependencies = [
 "tar",
 "tokio",
 "tokio-postgres",
+ "url",
 "workspace_hack",
 ]

@@ -712,6 +752,12 @@ dependencies = [
 "syn",
 ]

+[[package]]
+name = "data-encoding"
+version = "2.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3ee2393c4a91429dffb4bedf19f4d6abf27d8a732c8ce4980305d782e5426d57"
+
 [[package]]
 name = "debugid"
 version = "0.7.3"
@@ -721,6 +767,20 @@ dependencies = [
 "uuid",
 ]

+[[package]]
+name = "der-parser"
+version = "7.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fe398ac75057914d7d07307bf67dc7f3f574a26783b4fc7805a20ffa9f506e82"
+dependencies = [
+ "asn1-rs",
+ "displaydoc",
+ "nom",
+ "num-bigint",
+ "num-traits",
+ "rusticata-macros",
+]
+
 [[package]]
 name = "digest"
 version = "0.9.0"
@@ -762,6 +822,17 @@ dependencies = [
 "winapi",
 ]

+[[package]]
+name = "displaydoc"
+version = "0.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3bf95dc3f046b9da4f2d51833c0d3547d8564ef6910f5c1ed130306a75b92886"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
 [[package]]
 name = "either"
 version = "1.6.1"
@@ -1731,6 +1802,15 @@ dependencies = [
 "memchr",
 ]

+[[package]]
+name = "oid-registry"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "38e20717fa0541f39bd146692035c37bedfa532b3e5071b35761082407546b2a"
+dependencies = [
+ "asn1-rs",
+]
+
 [[package]]
 name = "once_cell"
 version = "1.10.0"
@@ -1842,6 +1922,7 @@ dependencies = [
 "tracing",
 "url",
 "utils",
+ "walkdir",
 "workspace_hack",
 ]

@@ -2249,6 +2330,7 @@ dependencies = [
 "url",
 "utils",
 "workspace_hack",
+ "x509-parser",
 ]

 [[package]]
@@ -2620,6 +2702,15 @@ dependencies = [
 "semver",
 ]

+[[package]]
+name = "rusticata-macros"
+version = "4.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "faf0c4a6ece9950b9abdb62b1cfcf2a68b3b67a10ba445b3bb85be2a293d0632"
+dependencies = [
+ "nom",
+]
+
 [[package]]
 name = "rustls"
 version = "0.20.4"
@@ -3059,6 +3150,18 @@ version = "0.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "20518fe4a4c9acf048008599e464deb21beeae3d3578418951a189c235a7a9a8"

+[[package]]
+name = "synstructure"
+version = "0.12.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f36bdaa60a83aca3921b5259d5400cbf5e90fc51931376a9bd4a0eb79aa7210f"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+ "unicode-xid",
+]
+
 [[package]]
 name = "tar"
 version = "0.4.38"
@@ -3921,6 +4024,24 @@ dependencies = [
 "tracing-core",
 ]

+[[package]]
+name = "x509-parser"
+version = "0.13.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9fb9bace5b5589ffead1afb76e43e34cff39cd0f3ce7e170ae0c29e53b88eb1c"
+dependencies = [
+ "asn1-rs",
+ "base64",
+ "data-encoding",
+ "der-parser",
+ "lazy_static",
+ "nom",
+ "oid-registry",
+ "rusticata-macros",
+ "thiserror",
+ "time 0.3.9",
+]
+
 [[package]]
 name = "xattr"
 version = "0.2.2"
--- a/10
+++ b/10
@@ -1,5 +1,5 @@
 # Build Postgres
-FROM zimg/rust:1.58 AS pg-build
+FROM neondatabase/rust:1.58 AS pg-build
 WORKDIR /pg

 USER root
@@ -14,7 +14,7 @@ RUN set -e \
    && tar -C tmp_install -czf /postgres_install.tar.gz .

 # Build zenith binaries
-FROM zimg/rust:1.58 AS build
+FROM neondatabase/rust:1.58 AS build
 ARG GIT_VERSION=local

 ARG CACHEPOT_BUCKET=zenith-rust-cachepot
@@ -46,9 +46,9 @@ RUN set -e \
    && useradd -d /data zenith \
    && chown -R zenith:zenith /data

-COPY --from=build --chown=zenith:zenith /home/circleci/project/target/release/pageserver /usr/local/bin
-COPY --from=build --chown=zenith:zenith /home/circleci/project/target/release/safekeeper /usr/local/bin
-COPY --from=build --chown=zenith:zenith /home/circleci/project/target/release/proxy      /usr/local/bin
+COPY --from=build --chown=zenith:zenith /home/runner/target/release/pageserver /usr/local/bin
+COPY --from=build --chown=zenith:zenith /home/runner/target/release/safekeeper /usr/local/bin
+COPY --from=build --chown=zenith:zenith /home/runner/target/release/proxy      /usr/local/bin

 COPY --from=pg-build /pg/tmp_install/         /usr/local/
 COPY --from=pg-build /postgres_install.tar.gz /data/
--- a/Dockerfile.compute-tools
+++ b/Dockerfile.compute-tools
@@ -1,6 +1,6 @@
 # First transient image to build compute_tools binaries
 # NB: keep in sync with rust image version in .circle/config.yml
-FROM zimg/rust:1.58 AS rust-build
+FROM neondatabase/rust:1.58 AS rust-build

 ARG CACHEPOT_BUCKET=zenith-rust-cachepot
 ARG AWS_ACCESS_KEY_ID
@@ -15,4 +15,4 @@ RUN set -e \
 # Final image that only has one binary
 FROM debian:buster-slim

-COPY --from=rust-build /home/circleci/project/target/release/compute_ctl /usr/local/bin/compute_ctl
+COPY --from=rust-build /home/runner/target/release/compute_ctl /usr/local/bin/compute_ctl
--- a/4
+++ b/4
@@ -113,7 +113,3 @@ fmt:
 .PHONY: setup-pre-commit-hook
 setup-pre-commit-hook:
 	ln -s -f ../../pre-commit.py .git/hooks/pre-commit
-
-# Rebuild when any makefile changes
-# https://stackoverflow.com/questions/3871444/making-all-rules-depend-on-the-makefile-itself
-.EXTRA_PREREQS+=$(foreach mk, ${MAKEFILE_LIST},$(abspath ${mk}))
--- a/README.md
+++ b/README.md
@@ -29,7 +29,7 @@ Pageserver consists of:
 ## Running local installation


-#### building on Linux
+#### Installing dependencies on Linux
 1. Install build dependencies and other useful packages

 * On Ubuntu or Debian this set of packages should be sufficient to build the code:
@@ -49,18 +49,11 @@ dnf install flex bison readline-devel zlib-devel openssl-devel \
 curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
 ```

-3. Build neon and patched postgres
-```sh
-git clone --recursive https://github.com/neondatabase/neon.git
-cd neon
-make -j`nproc`
-```
-
-#### building on OSX (12.3.1)
+#### Installing dependencies on OSX (12.3.1)
 1. Install XCode and dependencies
 ```
 xcode-select --install
-brew install protobuf etcd
+brew install protobuf etcd openssl
 ```

 2. [Install Rust](https://www.rust-lang.org/tools/install)
@@ -76,11 +69,20 @@ brew install libpq
 brew link --force libpq
 ```

-4. Build neon and patched postgres
-```sh
+#### Building on Linux and OSX
+
+1. Build neon and patched postgres
+```
+# Note: The path to the neon sources can not contain a space.
+
 git clone --recursive https://github.com/neondatabase/neon.git
 cd neon
-make -j5
+
+# The preferred and default is to make a debug build. This will create a 
+# demonstrably slower build than a release build. If you want to use a release
+# build, utilize "`BUILD_TYPE=release make -j`nproc``" 
+
+make -j`nproc`
 ```

 #### dependency installation notes
@@ -93,7 +95,7 @@ Python (3.9 or higher), and install python3 packages using `./scripts/pysync` (r
 #### running neon database
 1. Start pageserver and postgres on top of it (should be called from repo root):
 ```sh
-# Create repository in .zenith with proper paths to binaries and data
+# Create repository in .neon with proper paths to binaries and data
 # Later that would be responsibility of a package install script
 > ./target/debug/neon_local init
 initializing tenantid 9ef87a5bf0d92544f6fafeeb3239695c
@@ -103,16 +105,16 @@ pageserver init succeeded

 # start pageserver and safekeeper
 > ./target/debug/neon_local start
-Starting pageserver at '127.0.0.1:64000' in '.zenith'
+Starting pageserver at '127.0.0.1:64000' in '.neon'
 Pageserver started
 initializing for sk 1 for 7676
-Starting safekeeper at '127.0.0.1:5454' in '.zenith/safekeepers/sk1'
+Starting safekeeper at '127.0.0.1:5454' in '.neon/safekeepers/sk1'
 Safekeeper started

 # start postgres compute node
 > ./target/debug/neon_local pg start main
 Starting new postgres main on timeline de200bd42b49cc1814412c7e592dd6e9 ...
-Extracting base backup to create postgres instance: path=.zenith/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/main port=55432
+Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/main port=55432
 Starting postgres node at 'host=127.0.0.1 port=55432 user=cloud_admin dbname=postgres'

 # check list of running postgres instances
@@ -149,7 +151,7 @@ Created timeline 'b3b863fa45fa9e57e615f9f2d944e601' at Lsn 0/16F9A00 for tenant:
 # start postgres on that branch
 > ./target/debug/neon_local pg start migration_check --branch-name migration_check
 Starting new postgres migration_check on timeline b3b863fa45fa9e57e615f9f2d944e601 ...
-Extracting base backup to create postgres instance: path=.zenith/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/migration_check port=55433
+Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/migration_check port=55433
 Starting postgres node at 'host=127.0.0.1 port=55433 user=cloud_admin dbname=postgres'

 # check the new list of running postgres instances
@@ -209,7 +211,7 @@ Same applies to certain spelling: i.e. we use MB to denote 1024 * 1024 bytes, wh
 To get more familiar with this aspect, refer to:

 - [Neon glossary](/docs/glossary.md)
- [PostgreSQL glossary](https://www.postgresql.org/docs/13/glossary.html)
+- [PostgreSQL glossary](https://www.postgresql.org/docs/14/glossary.html)
 - Other PostgreSQL documentation and sources (Neon fork sources can be found [here](https://github.com/neondatabase/postgres))

 ## Join the development
--- a/compute_tools/Cargo.toml
+++ b/compute_tools/Cargo.toml
@@ -18,4 +18,5 @@ serde_json = "1"
 tar = "0.4"
 tokio = { version = "1.17", features = ["macros", "rt", "rt-multi-thread"] }
 tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
+url = "2.2.2"
 workspace_hack = { version = "0.1", path = "../workspace_hack" }
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -33,7 +33,7 @@ use std::process::exit;
 use std::sync::{Arc, RwLock};
 use std::{thread, time::Duration};

-use anyhow::Result;
+use anyhow::{Context, Result};
 use chrono::Utc;
 use clap::Arg;
 use log::{error, info};
@@ -45,6 +45,7 @@ use compute_tools::monitor::launch_monitor;
 use compute_tools::params::*;
 use compute_tools::pg_helpers::*;
 use compute_tools::spec::*;
+use url::Url;

 fn main() -> Result<()> {
    // TODO: re-use `utils::logging` later
@@ -131,7 +132,7 @@ fn main() -> Result<()> {

    let compute_state = ComputeNode {
        start_time: Utc::now(),
-        connstr: connstr.to_string(),
+        connstr: Url::parse(connstr).context("cannot parse connstr as a URL")?,
        pgdata: pgdata.to_string(),
        pgbin: pgbin.to_string(),
        spec,
--- a/compute_tools/src/checker.rs
+++ b/compute_tools/src/checker.rs
@@ -1,5 +1,3 @@
-use std::sync::Arc;
-
 use anyhow::{anyhow, Result};
 use log::error;
 use postgres::Client;
@@ -23,9 +21,8 @@ pub fn create_writablity_check_data(client: &mut Client) -> Result<()> {
    Ok(())
 }

-pub async fn check_writability(compute: &Arc<ComputeNode>) -> Result<()> {
-    let connstr = &compute.connstr;
-    let (client, connection) = tokio_postgres::connect(connstr, NoTls).await?;
+pub async fn check_writability(compute: &ComputeNode) -> Result<()> {
+    let (client, connection) = tokio_postgres::connect(compute.connstr.as_str(), NoTls).await?;
    if client.is_closed() {
        return Err(anyhow!("connection to postgres closed"));
    }
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -35,7 +35,8 @@ use crate::spec::*;
 /// Compute node info shared across several `compute_ctl` threads.
 pub struct ComputeNode {
    pub start_time: DateTime<Utc>,
-    pub connstr: String,
+    // Url type maintains proper escaping
+    pub connstr: url::Url,
    pub pgdata: String,
    pub pgbin: String,
    pub spec: ComputeSpec,
@@ -268,27 +269,32 @@ impl ComputeNode {
        // In this case we need to connect with old `zenith_admin`name
        // and create new user. We cannot simply rename connected user,
        // but we can create a new one and grant it all privileges.
-        let mut client = match Client::connect(&self.connstr, NoTls) {
+        let mut client = match Client::connect(self.connstr.as_str(), NoTls) {
            Err(e) => {
                info!(
                    "cannot connect to postgres: {}, retrying with `zenith_admin` username",
                    e
                );
-                let zenith_admin_connstr = self.connstr.replacen("cloud_admin", "zenith_admin", 1);
+                let mut zenith_admin_connstr = self.connstr.clone();

-                let mut client = Client::connect(&zenith_admin_connstr, NoTls)?;
+                zenith_admin_connstr
+                    .set_username("zenith_admin")
+                    .map_err(|_| anyhow::anyhow!("invalid connstr"))?;
+
+                let mut client = Client::connect(zenith_admin_connstr.as_str(), NoTls)?;
                client.simple_query("CREATE USER cloud_admin WITH SUPERUSER")?;
                client.simple_query("GRANT zenith_admin TO cloud_admin")?;
                drop(client);

                // reconnect with connsting with expected name
-                Client::connect(&self.connstr, NoTls)?
+                Client::connect(self.connstr.as_str(), NoTls)?
            }
            Ok(client) => client,
        };

        handle_roles(&self.spec, &mut client)?;
        handle_databases(&self.spec, &mut client)?;
+        handle_role_deletions(self, &mut client)?;
        handle_grants(&self.spec, &mut client)?;
        create_writablity_check_data(&mut client)?;

--- a/compute_tools/src/monitor.rs
+++ b/compute_tools/src/monitor.rs
@@ -13,11 +13,11 @@ const MONITOR_CHECK_INTERVAL: u64 = 500; // milliseconds
 // Spin in a loop and figure out the last activity time in the Postgres.
 // Then update it in the shared state. This function never errors out.
 // XXX: the only expected panic is at `RwLock` unwrap().
-fn watch_compute_activity(compute: &Arc<ComputeNode>) {
+fn watch_compute_activity(compute: &ComputeNode) {
    // Suppose that `connstr` doesn't change
-    let connstr = compute.connstr.clone();
+    let connstr = compute.connstr.as_str();
    // Define `client` outside of the loop to reuse existing connection if it's active.
-    let mut client = Client::connect(&connstr, NoTls);
+    let mut client = Client::connect(connstr, NoTls);
    let timeout = time::Duration::from_millis(MONITOR_CHECK_INTERVAL);

    info!("watching Postgres activity at {}", connstr);
@@ -32,7 +32,7 @@ fn watch_compute_activity(compute: &Arc<ComputeNode>) {
                    info!("connection to postgres closed, trying to reconnect");

                    // Connection is closed, reconnect and try again.
-                    client = Client::connect(&connstr, NoTls);
+                    client = Client::connect(connstr, NoTls);
                    continue;
                }

@@ -93,7 +93,7 @@ fn watch_compute_activity(compute: &Arc<ComputeNode>) {
                debug!("cannot connect to postgres: {}, retrying", e);

                // Establish a new connection and try again.
-                client = Client::connect(&connstr, NoTls);
+                client = Client::connect(connstr, NoTls);
            }
        }
    }
--- a/compute_tools/src/pg_helpers.rs
+++ b/compute_tools/src/pg_helpers.rs
@@ -1,3 +1,4 @@
+use std::fmt::Write;
 use std::fs::File;
 use std::io::{BufRead, BufReader};
 use std::net::{SocketAddr, TcpStream};
@@ -138,9 +139,11 @@ impl Role {
            // Now we also support SCRAM-SHA-256 and to preserve compatibility
            // we treat all encrypted_password as md5 unless they starts with SCRAM-SHA-256.
            if pass.starts_with("SCRAM-SHA-256") {
-                params.push_str(&format!(" PASSWORD '{}'", pass));
+                write!(params, " PASSWORD '{pass}'")
+                    .expect("String is documented to not to error during write operations");
            } else {
-                params.push_str(&format!(" PASSWORD 'md5{}'", pass));
+                write!(params, " PASSWORD 'md5{pass}'")
+                    .expect("String is documented to not to error during write operations");
            }
        } else {
            params.push_str(" PASSWORD NULL");
@@ -158,7 +161,8 @@ impl Database {
    /// it may require a proper quoting too.
    pub fn to_pg_options(&self) -> String {
        let mut params: String = self.options.as_pg_options();
-        params.push_str(&format!(" OWNER {}", &self.owner.quote()));
+        write!(params, " OWNER {}", &self.owner.quote())
+            .expect("String is documented to not to error during write operations");

        params
    }
--- a/compute_tools/src/spec.rs
+++ b/compute_tools/src/spec.rs
@@ -2,9 +2,10 @@ use std::path::Path;

 use anyhow::Result;
 use log::{info, log_enabled, warn, Level};
-use postgres::Client;
+use postgres::{Client, NoTls};
 use serde::Deserialize;

+use crate::compute::ComputeNode;
 use crate::config;
 use crate::params::PG_HBA_ALL_MD5;
 use crate::pg_helpers::*;
@@ -97,18 +98,13 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {

    // Process delta operations first
    if let Some(ops) = &spec.delta_operations {
-        info!("processing delta operations on roles");
+        info!("processing role renames");
        for op in ops {
            match op.action.as_ref() {
-                // We do not check either role exists or not,
-                // Postgres will take care of it for us
                "delete_role" => {
-                    let query: String = format!("DROP ROLE IF EXISTS {}", &op.name.quote());
-
-                    warn!("deleting role '{}'", &op.name);
-                    xact.execute(query.as_str(), &[])?;
+                    // no-op now, roles will be deleted at the end of configuration
                }
-                // Renaming role drops its password, since tole name is
+                // Renaming role drops its password, since role name is
                // used as a salt there.  It is important that this role
                // is recorded with a new `name` in the `roles` list.
                // Follow up roles update will set the new password.
@@ -182,7 +178,7 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
            xact.execute(query.as_str(), &[])?;

            let grant_query = format!(
-                "grant pg_read_all_data, pg_write_all_data to {}",
+                "GRANT pg_read_all_data, pg_write_all_data TO {}",
                name.quote()
            );
            xact.execute(grant_query.as_str(), &[])?;
@@ -197,6 +193,70 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
    Ok(())
 }

+/// Reassign all dependent objects and delete requested roles.
+pub fn handle_role_deletions(node: &ComputeNode, client: &mut Client) -> Result<()> {
+    let spec = &node.spec;
+
+    // First, reassign all dependent objects to db owners.
+    if let Some(ops) = &spec.delta_operations {
+        info!("reassigning dependent objects of to-be-deleted roles");
+        for op in ops {
+            if op.action == "delete_role" {
+                reassign_owned_objects(node, &op.name)?;
+            }
+        }
+    }
+
+    // Second, proceed with role deletions.
+    let mut xact = client.transaction()?;
+    if let Some(ops) = &spec.delta_operations {
+        info!("processing role deletions");
+        for op in ops {
+            // We do not check either role exists or not,
+            // Postgres will take care of it for us
+            if op.action == "delete_role" {
+                let query: String = format!("DROP ROLE IF EXISTS {}", &op.name.quote());
+
+                warn!("deleting role '{}'", &op.name);
+                xact.execute(query.as_str(), &[])?;
+            }
+        }
+    }
+
+    Ok(())
+}
+
+// Reassign all owned objects in all databases to the owner of the database.
+fn reassign_owned_objects(node: &ComputeNode, role_name: &PgIdent) -> Result<()> {
+    for db in &node.spec.cluster.databases {
+        if db.owner != *role_name {
+            let mut connstr = node.connstr.clone();
+            // database name is always the last and the only component of the path
+            connstr.set_path(&db.name);
+
+            let mut client = Client::connect(connstr.as_str(), NoTls)?;
+
+            // This will reassign all dependent objects to the db owner
+            let reassign_query = format!(
+                "REASSIGN OWNED BY {} TO {}",
+                role_name.quote(),
+                db.owner.quote()
+            );
+            info!(
+                "reassigning objects owned by '{}' in db '{}' to '{}'",
+                role_name, &db.name, &db.owner
+            );
+            client.simple_query(&reassign_query)?;
+
+            // This now will only drop privileges of the role
+            let drop_query = format!("DROP OWNED BY {}", role_name.quote());
+            client.simple_query(&drop_query)?;
+        }
+    }
+
+    Ok(())
+}
+
 /// It follows mostly the same logic as `handle_roles()` excepting that we
 /// does not use an explicit transactions block, since major database operations
 /// like `CREATE DATABASE` and `DROP DATABASE` do not support it. Statement-level
@@ -294,13 +354,26 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
 pub fn handle_grants(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
    info!("cluster spec grants:");

+    // We now have a separate `web_access` role to connect to the database
+    // via the web interface and proxy link auth. And also we grant a
+    // read / write all data privilege to every role. So also grant
+    // create to everyone.
+    // XXX: later we should stop messing with Postgres ACL in such horrible
+    // ways.
+    let roles = spec
+        .cluster
+        .roles
+        .iter()
+        .map(|r| r.name.quote())
+        .collect::<Vec<_>>();
+
    for db in &spec.cluster.databases {
        let dbname = &db.name;

        let query: String = format!(
            "GRANT CREATE ON DATABASE {} TO {}",
            dbname.quote(),
-            db.owner.quote()
+            roles.join(", ")
        );
        info!("grant query {}", &query);

--- a/control_plane/src/local_env.rs
+++ b/control_plane/src/local_env.rs
@@ -21,9 +21,9 @@ use utils::{
 use crate::safekeeper::SafekeeperNode;

 //
-// This data structures represents zenith CLI config
+// This data structures represents neon_local CLI config
 //
-// It is deserialized from the .zenith/config file, or the config file passed
+// It is deserialized from the .neon/config file, or the config file passed
 // to 'zenith init --config=<path>' option. See control_plane/simple.conf for
 // an example.
 //
@@ -34,8 +34,8 @@ pub struct LocalEnv {
    // compute nodes).
    //
    // This is not stored in the config file. Rather, this is the path where the
-    // config file itself is. It is read from the ZENITH_REPO_DIR env variable or
-    // '.zenith' if not given.
+    // config file itself is. It is read from the NEON_REPO_DIR env variable or
+    // '.neon' if not given.
    #[serde(skip)]
    pub base_data_dir: PathBuf,

@@ -177,6 +177,7 @@ pub struct SafekeeperConf {
    pub sync: bool,
    pub remote_storage: Option<String>,
    pub backup_threads: Option<u32>,
+    pub auth_enabled: bool,
 }

 impl Default for SafekeeperConf {
@@ -188,6 +189,7 @@ impl Default for SafekeeperConf {
            sync: true,
            remote_storage: None,
            backup_threads: None,
+            auth_enabled: false,
        }
    }
 }
@@ -337,7 +339,7 @@ impl LocalEnv {
    pub fn persist_config(&self, base_path: &Path) -> anyhow::Result<()> {
        // Currently, the user first passes a config file with 'zenith init --config=<path>'
        // We read that in, in `create_config`, and fill any missing defaults. Then it's saved
-        // to .zenith/config. TODO: We lose any formatting and comments along the way, which is
+        // to .neon/config. TODO: We lose any formatting and comments along the way, which is
        // a bit sad.
        let mut conf_content = r#"# This file describes a locale deployment of the page server
 # and safekeeeper node. It is read by the 'zenith' command-line
@@ -401,16 +403,6 @@ impl LocalEnv {
                self.pg_distrib_dir.display()
            );
        }
-        for binary in ["pageserver", "safekeeper"] {
-            if !self.zenith_distrib_dir.join(binary).exists() {
-                bail!(
-                    "Can't find binary '{}' in zenith distrib dir '{}'",
-                    binary,
-                    self.zenith_distrib_dir.display()
-                );
-            }
-        }
-
        for binary in ["pageserver", "safekeeper"] {
            if !self.zenith_distrib_dir.join(binary).exists() {
                bail!(
@@ -419,12 +411,6 @@ impl LocalEnv {
                );
            }
        }
-        if !self.pg_distrib_dir.join("bin/postgres").exists() {
-            bail!(
-                "Can't find postgres binary at {}",
-                self.pg_distrib_dir.display()
-            );
-        }

        fs::create_dir(&base_path)?;

@@ -481,9 +467,9 @@ impl LocalEnv {
 }

 fn base_path() -> PathBuf {
-    match std::env::var_os("ZENITH_REPO_DIR") {
+    match std::env::var_os("NEON_REPO_DIR") {
        Some(val) => PathBuf::from(val),
-        None => PathBuf::from(".zenith"),
+        None => PathBuf::from(".neon"),
    }
 }

--- a/control_plane/src/safekeeper.rs
+++ b/control_plane/src/safekeeper.rs
@@ -149,6 +149,11 @@ impl SafekeeperNode {
        if let Some(ref remote_storage) = self.conf.remote_storage {
            cmd.args(&["--remote-storage", remote_storage]);
        }
+        if self.conf.auth_enabled {
+            cmd.arg("--auth-validation-public-key-path");
+            // PathBuf is better be passed as is, not via `String`.
+            cmd.arg(self.env.base_data_dir.join("auth_public_key.pem"));
+        }

        fill_aws_secrets_vars(&mut cmd);

--- a/control_plane/src/storage.rs
+++ b/control_plane/src/storage.rs
@@ -1,5 +1,6 @@
 use std::collections::HashMap;
-use std::io::Write;
+use std::fs::File;
+use std::io::{BufReader, Write};
 use std::net::TcpStream;
 use std::num::NonZeroU64;
 use std::path::PathBuf;
@@ -527,4 +528,54 @@ impl PageServerNode {

        Ok(timeline_info_response)
    }
+
+    /// Import a basebackup prepared using either:
+    /// a) `pg_basebackup -F tar`, or
+    /// b) The `fullbackup` pageserver endpoint
+    ///
+    /// # Arguments
+    /// * `tenant_id` - tenant to import into. Created if not exists
+    /// * `timeline_id` - id to assign to imported timeline
+    /// * `base` - (start lsn of basebackup, path to `base.tar` file)
+    /// * `pg_wal` - if there's any wal to import: (end lsn, path to `pg_wal.tar`)
+    pub fn timeline_import(
+        &self,
+        tenant_id: ZTenantId,
+        timeline_id: ZTimelineId,
+        base: (Lsn, PathBuf),
+        pg_wal: Option<(Lsn, PathBuf)>,
+    ) -> anyhow::Result<()> {
+        let mut client = self.pg_connection_config.connect(NoTls).unwrap();
+
+        // Init base reader
+        let (start_lsn, base_tarfile_path) = base;
+        let base_tarfile = File::open(base_tarfile_path)?;
+        let mut base_reader = BufReader::new(base_tarfile);
+
+        // Init wal reader if necessary
+        let (end_lsn, wal_reader) = if let Some((end_lsn, wal_tarfile_path)) = pg_wal {
+            let wal_tarfile = File::open(wal_tarfile_path)?;
+            let wal_reader = BufReader::new(wal_tarfile);
+            (end_lsn, Some(wal_reader))
+        } else {
+            (start_lsn, None)
+        };
+
+        // Import base
+        let import_cmd =
+            format!("import basebackup {tenant_id} {timeline_id} {start_lsn} {end_lsn}");
+        let mut writer = client.copy_in(&import_cmd)?;
+        io::copy(&mut base_reader, &mut writer)?;
+        writer.finish()?;
+
+        // Import wal if necessary
+        if let Some(mut wal_reader) = wal_reader {
+            let import_cmd = format!("import wal {tenant_id} {timeline_id} {start_lsn} {end_lsn}");
+            let mut writer = client.copy_in(&import_cmd)?;
+            io::copy(&mut wal_reader, &mut writer)?;
+            writer.finish()?;
+        }
+
+        Ok(())
+    }
 }
--- a/docs/rfcs/cluster-size-limits.md
+++ b/docs/rfcs/cluster-size-limits.md
@@ -36,12 +36,12 @@ This is how the `LOGICAL_TIMELINE_SIZE` metric is implemented in the pageserver.
 Alternatively, we could count only relation data. As in pg_database_size().
 This approach is somewhat more user-friendly because it is the data that is really affected by the user.
 On the other hand, it puts us in a weaker position than other services, i.e., RDS.
-We will need to refactor the timeline_size counter or add another counter to implement it. 
+We will need to refactor the timeline_size counter or add another counter to implement it.

 Timeline size is updated during wal digestion. It is not versioned and is valid at the last_received_lsn moment.
 Then this size should be reported to compute node.

-`current_timeline_size` value is included in the walreceiver's custom feedback message: `ZenithFeedback.`
+`current_timeline_size` value is included in the walreceiver's custom feedback message: `ReplicationFeedback.`

 (PR about protocol changes https://github.com/zenithdb/zenith/pull/1037).

@@ -64,11 +64,11 @@ We should warn users if the limit is soon to be reached.
 ### **Reliability, failure modes and corner cases**

 1. `current_timeline_size` is valid at the last received and digested by pageserver lsn.
-    
+
    If pageserver lags behind compute node, `current_timeline_size` will lag too. This lag can be tuned using backpressure, but it is not expected to be 0 all the time.
-    
+
    So transactions that happen in this lsn range may cause limit overflow. Especially operations that generate (i.e., CREATE DATABASE) or free (i.e., TRUNCATE) a lot of data pages while generating a small amount of WAL. Are there other operations like this?
-    
+
    Currently, CREATE DATABASE operations are restricted in the console. So this is not an issue.


--- a/docs/settings.md
+++ b/docs/settings.md
@@ -154,7 +154,7 @@ The default distrib dir is `./tmp_install/`.
 #### workdir (-D)

 A directory in the file system, where pageserver will store its files.
-The default is `./.zenith/`.
+The default is `./.neon/`.

 This parameter has a special CLI alias (`-D`) and can not be overridden with regular `-c` way.

--- a/libs/etcd_broker/src/lib.rs
+++ b/libs/etcd_broker/src/lib.rs
@@ -1,392 +1,209 @@
 //! A set of primitives to access a shared data/updates, propagated via etcd broker (not persistent).
 //! Intended to connect services to each other, not to store their data.
-use std::{
-    collections::{hash_map, HashMap},
-    fmt::Display,
-    str::FromStr,
-};

-use once_cell::sync::Lazy;
-use regex::{Captures, Regex};
-use serde::{Deserialize, Serialize};
-use serde_with::{serde_as, DisplayFromStr};
+/// All broker keys, that are used when dealing with etcd.
+pub mod subscription_key;
+/// All broker values, possible to use when dealing with etcd.
+pub mod subscription_value;

-pub use etcd_client::*;
+use std::str::FromStr;

+use serde::de::DeserializeOwned;
+
+use subscription_key::SubscriptionKey;
 use tokio::{sync::mpsc, task::JoinHandle};
 use tracing::*;
-use utils::{
-    lsn::Lsn,
-    zid::{NodeId, ZTenantId, ZTenantTimelineId},
-};
+
+use crate::subscription_key::SubscriptionFullKey;
+
+pub use etcd_client::*;

 /// Default value to use for prefixing to all etcd keys with.
 /// This way allows isolating safekeeper/pageserver groups in the same etcd cluster.
 pub const DEFAULT_NEON_BROKER_ETCD_PREFIX: &str = "neon";

-#[derive(Debug, Deserialize, Serialize)]
-struct SafekeeperTimeline {
-    safekeeper_id: NodeId,
-    info: SkTimelineInfo,
+/// A way to control the data retrieval from a certain subscription.
+pub struct BrokerSubscription<V> {
+    /// An unbounded channel to fetch the relevant etcd updates from.
+    pub value_updates: mpsc::UnboundedReceiver<BrokerUpdate<V>>,
+    key: SubscriptionKey,
+    /// A subscription task handle, to allow waiting on it for the task to complete.
+    /// Both the updates channel and the handle require `&mut`, so it's better to keep
+    /// both `pub` to allow using both in the same structures without borrow checker complaining.
+    pub watcher_handle: JoinHandle<Result<(), BrokerError>>,
+    watcher: Watcher,
 }

-/// Published data about safekeeper's timeline. Fields made optional for easy migrations.
-#[serde_as]
-#[derive(Debug, Clone, Deserialize, Serialize)]
-pub struct SkTimelineInfo {
-    /// Term of the last entry.
-    pub last_log_term: Option<u64>,
-    /// LSN of the last record.
-    #[serde_as(as = "Option<DisplayFromStr>")]
-    #[serde(default)]
-    pub flush_lsn: Option<Lsn>,
-    /// Up to which LSN safekeeper regards its WAL as committed.
-    #[serde_as(as = "Option<DisplayFromStr>")]
-    #[serde(default)]
-    pub commit_lsn: Option<Lsn>,
-    /// LSN up to which safekeeper has backed WAL.
-    #[serde_as(as = "Option<DisplayFromStr>")]
-    #[serde(default)]
-    pub backup_lsn: Option<Lsn>,
-    /// LSN of last checkpoint uploaded by pageserver.
-    #[serde_as(as = "Option<DisplayFromStr>")]
-    #[serde(default)]
-    pub remote_consistent_lsn: Option<Lsn>,
-    #[serde_as(as = "Option<DisplayFromStr>")]
-    #[serde(default)]
-    pub peer_horizon_lsn: Option<Lsn>,
-    #[serde(default)]
-    pub safekeeper_connstr: Option<String>,
-    #[serde(default)]
-    pub pageserver_connstr: Option<String>,
+impl<V> BrokerSubscription<V> {
+    /// Cancels the subscription, stopping the data poller and waiting for it to shut down.
+    pub async fn cancel(mut self) -> Result<(), BrokerError> {
+        self.watcher.cancel().await.map_err(|e| {
+            BrokerError::EtcdClient(
+                e,
+                format!("Failed to cancel broker subscription, kind: {:?}", self.key),
+            )
+        })?;
+        match (&mut self.watcher_handle).await {
+            Ok(res) => res,
+            Err(e) => {
+                if e.is_cancelled() {
+                    // don't error on the tasks that are cancelled already
+                    Ok(())
+                } else {
+                    Err(BrokerError::InternalError(format!(
+                        "Panicked during broker subscription task, kind: {:?}, error: {e}",
+                        self.key
+                    )))
+                }
+            }
+        }
+    }
+}
+
+impl<V> Drop for BrokerSubscription<V> {
+    fn drop(&mut self) {
+        // we poll data from etcd into the channel in the same struct, so if the whole struct gets dropped,
+        // no more data is used by the receiver and it's safe to cancel and drop the whole etcd subscription task.
+        self.watcher_handle.abort();
+    }
+}
+
+/// An update from the etcd broker.
+pub struct BrokerUpdate<V> {
+    /// Etcd generation version, the bigger the more actual the data is.
+    pub etcd_version: i64,
+    /// Etcd key for the corresponding value, parsed from the broker KV.
+    pub key: SubscriptionFullKey,
+    /// Current etcd value, parsed from the broker KV.
+    pub value: V,
 }

 #[derive(Debug, thiserror::Error)]
 pub enum BrokerError {
    #[error("Etcd client error: {0}. Context: {1}")]
    EtcdClient(etcd_client::Error, String),
-    #[error("Error during parsing etcd data: {0}")]
-    ParsingError(String),
+    #[error("Error during parsing etcd key: {0}")]
+    KeyNotParsed(String),
    #[error("Internal error: {0}")]
    InternalError(String),
 }

-/// A way to control the data retrieval from a certain subscription.
-pub struct SkTimelineSubscription {
-    safekeeper_timeline_updates:
-        mpsc::UnboundedReceiver<HashMap<ZTenantTimelineId, HashMap<NodeId, SkTimelineInfo>>>,
-    kind: SkTimelineSubscriptionKind,
-    watcher_handle: JoinHandle<Result<(), BrokerError>>,
-    watcher: Watcher,
-}
-
-impl SkTimelineSubscription {
-    /// Asynchronously polls for more data from the subscription, suspending the current future if there's no data sent yet.
-    pub async fn fetch_data(
-        &mut self,
-    ) -> Option<HashMap<ZTenantTimelineId, HashMap<NodeId, SkTimelineInfo>>> {
-        self.safekeeper_timeline_updates.recv().await
-    }
-
-    /// Cancels the subscription, stopping the data poller and waiting for it to shut down.
-    pub async fn cancel(mut self) -> Result<(), BrokerError> {
-        self.watcher.cancel().await.map_err(|e| {
-            BrokerError::EtcdClient(
-                e,
-                format!(
-                    "Failed to cancel timeline subscription, kind: {:?}",
-                    self.kind
-                ),
-            )
-        })?;
-        self.watcher_handle.await.map_err(|e| {
-            BrokerError::InternalError(format!(
-                "Failed to join the timeline updates task, kind: {:?}, error: {e}",
-                self.kind
-            ))
-        })?
-    }
-}
-
-/// The subscription kind to the timeline updates from safekeeper.
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub struct SkTimelineSubscriptionKind {
-    broker_etcd_prefix: String,
-    kind: SubscriptionKind,
-}
-
-impl SkTimelineSubscriptionKind {
-    pub fn all(broker_etcd_prefix: String) -> Self {
-        Self {
-            broker_etcd_prefix,
-            kind: SubscriptionKind::All,
-        }
-    }
-
-    pub fn tenant(broker_etcd_prefix: String, tenant: ZTenantId) -> Self {
-        Self {
-            broker_etcd_prefix,
-            kind: SubscriptionKind::Tenant(tenant),
-        }
-    }
-
-    pub fn timeline(broker_etcd_prefix: String, timeline: ZTenantTimelineId) -> Self {
-        Self {
-            broker_etcd_prefix,
-            kind: SubscriptionKind::Timeline(timeline),
-        }
-    }
-
-    /// Etcd key to use for watching a certain timeline updates from safekeepers.
-    pub fn watch_key(&self) -> String {
-        match self.kind {
-            SubscriptionKind::All => self.broker_etcd_prefix.to_string(),
-            SubscriptionKind::Tenant(tenant_id) => {
-                format!("{}/{tenant_id}/safekeeper", self.broker_etcd_prefix)
-            }
-            SubscriptionKind::Timeline(ZTenantTimelineId {
-                tenant_id,
-                timeline_id,
-            }) => format!(
-                "{}/{tenant_id}/{timeline_id}/safekeeper",
-                self.broker_etcd_prefix
-            ),
-        }
-    }
-}
-
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
-enum SubscriptionKind {
-    /// Get every timeline update.
-    All,
-    /// Get certain tenant timelines' updates.
-    Tenant(ZTenantId),
-    /// Get certain timeline updates.
-    Timeline(ZTenantTimelineId),
-}
-
 /// Creates a background task to poll etcd for timeline updates from safekeepers.
 /// Stops and returns `Err` on any error during etcd communication.
 /// Watches the key changes until either the watcher is cancelled via etcd or the subscription cancellation handle,
 /// exiting normally in such cases.
-pub async fn subscribe_to_safekeeper_timeline_updates(
+/// Etcd values are parsed as json fukes into a type, specified in the generic patameter.
+pub async fn subscribe_for_json_values<V>(
    client: &mut Client,
-    subscription: SkTimelineSubscriptionKind,
-) -> Result<SkTimelineSubscription, BrokerError> {
-    info!("Subscribing to timeline updates, subscription kind: {subscription:?}");
-    let kind = subscription.clone();
+    key: SubscriptionKey,
+) -> Result<BrokerSubscription<V>, BrokerError>
+where
+    V: DeserializeOwned + Send + 'static,
+{
+    subscribe_for_values(client, key, |_, value_str| {
+        match serde_json::from_str::<V>(value_str) {
+            Ok(value) => Some(value),
+            Err(e) => {
+                error!("Failed to parse value str '{value_str}': {e}");
+                None
+            }
+        }
+    })
+    .await
+}
+
+/// Same as [`subscribe_for_json_values`], but allows to specify a custom parser of a etcd value string.
+pub async fn subscribe_for_values<P, V>(
+    client: &mut Client,
+    key: SubscriptionKey,
+    value_parser: P,
+) -> Result<BrokerSubscription<V>, BrokerError>
+where
+    V: Send + 'static,
+    P: Fn(SubscriptionFullKey, &str) -> Option<V> + Send + 'static,
+{
+    info!("Subscribing to broker value updates, key: {key:?}");
+    let subscription_key = key.clone();

    let (watcher, mut stream) = client
-        .watch(
-            subscription.watch_key(),
-            Some(WatchOptions::new().with_prefix()),
-        )
+        .watch(key.watch_key(), Some(WatchOptions::new().with_prefix()))
        .await
        .map_err(|e| {
            BrokerError::EtcdClient(
                e,
-                format!("Failed to init the watch for subscription {subscription:?}"),
+                format!("Failed to init the watch for subscription {key:?}"),
            )
        })?;

-    let (timeline_updates_sender, safekeeper_timeline_updates) = mpsc::unbounded_channel();
+    let (value_updates_sender, value_updates_receiver) = mpsc::unbounded_channel();
    let watcher_handle = tokio::spawn(async move {
        while let Some(resp) = stream.message().await.map_err(|e| BrokerError::InternalError(format!(
-            "Failed to get messages from the subscription stream, kind: {:?}, error: {e}", subscription.kind
+            "Failed to get messages from the subscription stream, kind: {:?}, error: {e}", key.kind
        )))? {
            if resp.canceled() {
                info!("Watch for timeline updates subscription was canceled, exiting");
                break;
            }

-            let mut timeline_updates: HashMap<ZTenantTimelineId, HashMap<NodeId, SkTimelineInfo>> = HashMap::new();
-            // Keep track that the timeline data updates from etcd arrive in the right order.
-            // https://etcd.io/docs/v3.5/learning/api_guarantees/#isolation-level-and-consistency-of-replicas
-            // > etcd does not ensure linearizability for watch operations. Users are expected to verify the revision of watch responses to ensure correct ordering.
-            let mut timeline_etcd_versions: HashMap<ZTenantTimelineId, i64> = HashMap::new();
-
-
            let events = resp.events();
            debug!("Processing {} events", events.len());

            for event in events {
                if EventType::Put == event.event_type() {
                    if let Some(new_etcd_kv) = event.kv() {
-                        let new_kv_version = new_etcd_kv.version();
-                        let (key_str, value_str) = match extract_key_value_str(new_etcd_kv) {
-                            Ok(strs) => strs,
-                            Err(e) => {
-                                error!("Failed to represent etcd KV {new_etcd_kv:?} as pair of str: {e}");
-                                continue;
+                        match parse_etcd_kv(new_etcd_kv, &value_parser, &key.cluster_prefix) {
+                            Ok(Some((key, value))) => if let Err(e) = value_updates_sender.send(BrokerUpdate {
+                                etcd_version: new_etcd_kv.version(),
+                                key,
+                                value,
+                            }) {
+                                info!("Broker value updates for key {key:?} sender got dropped, exiting: {e}");
+                                break;
                            },
-                        };
-
-                        match parse_etcd_key_value(&subscription,  key_str, value_str) {
-                            Ok((zttid, timeline)) => {
-                                match timeline_updates
-                                    .entry(zttid)
-                                    .or_default()
-                                    .entry(timeline.safekeeper_id)
-                                {
-                                    hash_map::Entry::Occupied(mut o) => {
-                                        let old_etcd_kv_version = timeline_etcd_versions.get(&zttid).copied().unwrap_or(i64::MIN);
-                                        if old_etcd_kv_version < new_kv_version {
-                                            o.insert(timeline.info);
-                                            timeline_etcd_versions.insert(zttid,new_kv_version);
-                                        } else {
-                                            debug!("Skipping etcd timeline update due to older version compared to one that's already stored");
-                                        }
-                                    }
-                                    hash_map::Entry::Vacant(v) => {
-                                        v.insert(timeline.info);
-                                        timeline_etcd_versions.insert(zttid,new_kv_version);
-                                    }
-                                }
-                            }
-                            Err(e) => error!("Failed to parse timeline update: {e}"),
+                            Ok(None) => debug!("Ignoring key {key:?} : no value was returned by the parser"),
+                            Err(BrokerError::KeyNotParsed(e)) => debug!("Unexpected key {key:?} for timeline update: {e}"),
+                            Err(e) => error!("Failed to represent etcd KV {new_etcd_kv:?}: {e}"),
                        };
                    }
                }
            }
-
-            if let Err(e) = timeline_updates_sender.send(timeline_updates) {
-                info!("Timeline updates sender got dropped, exiting: {e}");
-                break;
-            }
        }

        Ok(())
    }.instrument(info_span!("etcd_broker")));

-    Ok(SkTimelineSubscription {
-        kind,
-        safekeeper_timeline_updates,
+    Ok(BrokerSubscription {
+        key: subscription_key,
+        value_updates: value_updates_receiver,
        watcher_handle,
        watcher,
    })
 }

-fn extract_key_value_str(kv: &KeyValue) -> Result<(&str, &str), BrokerError> {
-    let key = kv.key_str().map_err(|e| {
+fn parse_etcd_kv<P, V>(
+    kv: &KeyValue,
+    value_parser: &P,
+    cluster_prefix: &str,
+) -> Result<Option<(SubscriptionFullKey, V)>, BrokerError>
+where
+    P: Fn(SubscriptionFullKey, &str) -> Option<V>,
+{
+    let key_str = kv.key_str().map_err(|e| {
        BrokerError::EtcdClient(e, "Failed to extract key str out of etcd KV".to_string())
    })?;
-    let value = kv.value_str().map_err(|e| {
+    let value_str = kv.value_str().map_err(|e| {
        BrokerError::EtcdClient(e, "Failed to extract value str out of etcd KV".to_string())
    })?;
-    Ok((key, value))
-}

-static SK_TIMELINE_KEY_REGEX: Lazy<Regex> = Lazy::new(|| {
-    Regex::new("/([[:xdigit:]]+)/([[:xdigit:]]+)/safekeeper/([[:digit:]]+)$")
-        .expect("wrong regex for safekeeper timeline etcd key")
-});
-
-fn parse_etcd_key_value(
-    subscription: &SkTimelineSubscriptionKind,
-    key_str: &str,
-    value_str: &str,
-) -> Result<(ZTenantTimelineId, SafekeeperTimeline), BrokerError> {
-    let broker_prefix = subscription.broker_etcd_prefix.as_str();
-    if !key_str.starts_with(broker_prefix) {
-        return Err(BrokerError::ParsingError(format!(
-            "KV has unexpected key '{key_str}' that does not start with broker prefix {broker_prefix}"
+    if !key_str.starts_with(cluster_prefix) {
+        return Err(BrokerError::KeyNotParsed(format!(
+            "KV has unexpected key '{key_str}' that does not start with cluster prefix {cluster_prefix}"
        )));
    }

-    let key_part = &key_str[broker_prefix.len()..];
-    let key_captures = match SK_TIMELINE_KEY_REGEX.captures(key_part) {
-        Some(captures) => captures,
-        None => {
-            return Err(BrokerError::ParsingError(format!(
-                "KV has unexpected key part '{key_part}' that does not match required regex {}",
-                SK_TIMELINE_KEY_REGEX.as_str()
-            )));
-        }
-    };
-    let info = serde_json::from_str(value_str).map_err(|e| {
-        BrokerError::ParsingError(format!(
-            "Failed to parse '{value_str}' as safekeeper timeline info: {e}"
-        ))
+    let key = SubscriptionFullKey::from_str(&key_str[cluster_prefix.len()..]).map_err(|e| {
+        BrokerError::KeyNotParsed(format!("Failed to parse KV key '{key_str}': {e}"))
    })?;

-    let zttid = ZTenantTimelineId::new(
-        parse_capture(&key_captures, 1).map_err(BrokerError::ParsingError)?,
-        parse_capture(&key_captures, 2).map_err(BrokerError::ParsingError)?,
-    );
-    let safekeeper_id = NodeId(parse_capture(&key_captures, 3).map_err(BrokerError::ParsingError)?);
-
-    Ok((
-        zttid,
-        SafekeeperTimeline {
-            safekeeper_id,
-            info,
-        },
-    ))
-}
-
-fn parse_capture<T>(caps: &Captures, index: usize) -> Result<T, String>
-where
-    T: FromStr,
-    <T as FromStr>::Err: Display,
-{
-    let capture_match = caps
-        .get(index)
-        .ok_or_else(|| format!("Failed to get capture match at index {index}"))?
-        .as_str();
-    capture_match.parse().map_err(|e| {
-        format!(
-            "Failed to parse {} from {capture_match}: {e}",
-            std::any::type_name::<T>()
-        )
-    })
-}
-
-#[cfg(test)]
-mod tests {
-    use utils::zid::ZTimelineId;
-
-    use super::*;
-
-    #[test]
-    fn typical_etcd_prefix_should_be_parsed() {
-        let prefix = "neon";
-        let tenant_id = ZTenantId::generate();
-        let timeline_id = ZTimelineId::generate();
-        let all_subscription = SkTimelineSubscriptionKind {
-            broker_etcd_prefix: prefix.to_string(),
-            kind: SubscriptionKind::All,
-        };
-        let tenant_subscription = SkTimelineSubscriptionKind {
-            broker_etcd_prefix: prefix.to_string(),
-            kind: SubscriptionKind::Tenant(tenant_id),
-        };
-        let timeline_subscription = SkTimelineSubscriptionKind {
-            broker_etcd_prefix: prefix.to_string(),
-            kind: SubscriptionKind::Timeline(ZTenantTimelineId::new(tenant_id, timeline_id)),
-        };
-
-        let typical_etcd_kv_strs = [
-            (
-                format!("{prefix}/{tenant_id}/{timeline_id}/safekeeper/1"),
-                r#"{"last_log_term":231,"flush_lsn":"0/241BB70","commit_lsn":"0/241BB70","backup_lsn":"0/2000000","remote_consistent_lsn":"0/0","peer_horizon_lsn":"0/16960E8","safekeeper_connstr":"something.local:1234","pageserver_connstr":"postgresql://(null):@somethine.else.local:3456"}"#,
-            ),
-            (
-                format!("{prefix}/{tenant_id}/{timeline_id}/safekeeper/13"),
-                r#"{"last_log_term":231,"flush_lsn":"0/241BB70","commit_lsn":"0/241BB70","backup_lsn":"0/2000000","remote_consistent_lsn":"0/0","peer_horizon_lsn":"0/16960E8","safekeeper_connstr":"something.local:1234","pageserver_connstr":"postgresql://(null):@somethine.else.local:3456"}"#,
-            ),
-        ];
-
-        for (key_string, value_str) in typical_etcd_kv_strs {
-            for subscription in [
-                &all_subscription,
-                &tenant_subscription,
-                &timeline_subscription,
-            ] {
-                let (id, _timeline) =
-                    parse_etcd_key_value(subscription, &key_string, value_str)
-                        .unwrap_or_else(|e| panic!("Should be able to parse etcd key string '{key_string}' and etcd value string '{value_str}' for subscription {subscription:?}, but got: {e}"));
-                assert_eq!(id, ZTenantTimelineId::new(tenant_id, timeline_id));
-            }
-        }
-    }
+    Ok(value_parser(key, value_str).map(|value| (key, value)))
 }
--- a/libs/etcd_broker/src/subscription_key.rs
+++ b/libs/etcd_broker/src/subscription_key.rs
@@ -0,0 +1,310 @@
+//! Etcd broker keys, used in the project and shared between instances.
+//! The keys are split into two categories:
+//!
+//! * [`SubscriptionFullKey`] full key format: `<cluster_prefix>/<tenant>/<timeline>/<node_kind>/<operation>/<node_id>`
+//! Always returned from etcd in this form, always start with the user key provided.
+//!
+//! * [`SubscriptionKey`] user input key format: always partial, since it's unknown which `node_id`'s are available.
+//! Full key always starts with the user input one, due to etcd subscription properties.
+
+use std::{fmt::Display, str::FromStr};
+
+use once_cell::sync::Lazy;
+use regex::{Captures, Regex};
+use utils::zid::{NodeId, ZTenantId, ZTenantTimelineId};
+
+/// The subscription kind to the timeline updates from safekeeper.
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct SubscriptionKey {
+    /// Generic cluster prefix, allowing to use the same etcd instance by multiple logic groups.
+    pub cluster_prefix: String,
+    /// The subscription kind.
+    pub kind: SubscriptionKind,
+}
+
+/// All currently possible key kinds of a etcd broker subscription.
+/// Etcd works so, that every key that starts with the subbscription key given is considered matching and
+/// returned as part of the subscrption.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum SubscriptionKind {
+    /// Get every update in etcd.
+    All,
+    /// Get etcd updates for any timeiline of a certain tenant, affected by any operation from any node kind.
+    TenantTimelines(ZTenantId),
+    /// Get etcd updates for a certain timeline of a tenant, affected by any operation from any node kind.
+    Timeline(ZTenantTimelineId),
+    /// Get etcd timeline updates, specific to a certain node kind.
+    Node(ZTenantTimelineId, NodeKind),
+    /// Get etcd timeline updates for a certain operation on specific nodes.
+    Operation(ZTenantTimelineId, NodeKind, OperationKind),
+}
+
+/// All kinds of nodes, able to write into etcd.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum NodeKind {
+    Safekeeper,
+    Pageserver,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum OperationKind {
+    Safekeeper(SkOperationKind),
+}
+
+/// Current operations, running inside the safekeeper node.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum SkOperationKind {
+    TimelineInfo,
+    WalBackup,
+}
+
+static SUBSCRIPTION_FULL_KEY_REGEX: Lazy<Regex> = Lazy::new(|| {
+    Regex::new("/([[:xdigit:]]+)/([[:xdigit:]]+)/([^/]+)/([^/]+)/([[:digit:]]+)$")
+        .expect("wrong subscription full etcd key regex")
+});
+
+/// Full key, received from etcd during any of the component's work.
+/// No other etcd keys are considered during system's work.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub struct SubscriptionFullKey {
+    pub id: ZTenantTimelineId,
+    pub node_kind: NodeKind,
+    pub operation: OperationKind,
+    pub node_id: NodeId,
+}
+
+impl SubscriptionKey {
+    /// Subscribes for all etcd updates.
+    pub fn all(cluster_prefix: String) -> Self {
+        SubscriptionKey {
+            cluster_prefix,
+            kind: SubscriptionKind::All,
+        }
+    }
+
+    /// Subscribes to a given timeline info updates from safekeepers.
+    pub fn sk_timeline_info(cluster_prefix: String, timeline: ZTenantTimelineId) -> Self {
+        Self {
+            cluster_prefix,
+            kind: SubscriptionKind::Operation(
+                timeline,
+                NodeKind::Safekeeper,
+                OperationKind::Safekeeper(SkOperationKind::TimelineInfo),
+            ),
+        }
+    }
+
+    /// Subscribes to all timeine updates during specific operations, running on the corresponding nodes.
+    pub fn operation(
+        cluster_prefix: String,
+        timeline: ZTenantTimelineId,
+        node_kind: NodeKind,
+        operation: OperationKind,
+    ) -> Self {
+        Self {
+            cluster_prefix,
+            kind: SubscriptionKind::Operation(timeline, node_kind, operation),
+        }
+    }
+
+    /// Etcd key to use for watching a certain timeline updates from safekeepers.
+    pub fn watch_key(&self) -> String {
+        let cluster_prefix = &self.cluster_prefix;
+        match self.kind {
+            SubscriptionKind::All => cluster_prefix.to_string(),
+            SubscriptionKind::TenantTimelines(tenant_id) => {
+                format!("{cluster_prefix}/{tenant_id}")
+            }
+            SubscriptionKind::Timeline(id) => {
+                format!("{cluster_prefix}/{id}")
+            }
+            SubscriptionKind::Node(id, node_kind) => {
+                format!("{cluster_prefix}/{id}/{node_kind}")
+            }
+            SubscriptionKind::Operation(id, node_kind, operation_kind) => {
+                format!("{cluster_prefix}/{id}/{node_kind}/{operation_kind}")
+            }
+        }
+    }
+}
+
+impl Display for OperationKind {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            OperationKind::Safekeeper(o) => o.fmt(f),
+        }
+    }
+}
+
+impl FromStr for OperationKind {
+    type Err = String;
+
+    fn from_str(operation_kind_str: &str) -> Result<Self, Self::Err> {
+        match operation_kind_str {
+            "timeline_info" => Ok(OperationKind::Safekeeper(SkOperationKind::TimelineInfo)),
+            "wal_backup" => Ok(OperationKind::Safekeeper(SkOperationKind::WalBackup)),
+            _ => Err(format!("Unknown operation kind: {operation_kind_str}")),
+        }
+    }
+}
+
+impl Display for SubscriptionFullKey {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        let Self {
+            id,
+            node_kind,
+            operation,
+            node_id,
+        } = self;
+        write!(f, "{id}/{node_kind}/{operation}/{node_id}")
+    }
+}
+
+impl FromStr for SubscriptionFullKey {
+    type Err = String;
+
+    fn from_str(subscription_kind_str: &str) -> Result<Self, Self::Err> {
+        let key_captures = match SUBSCRIPTION_FULL_KEY_REGEX.captures(subscription_kind_str) {
+            Some(captures) => captures,
+            None => {
+                return Err(format!(
+                    "Subscription kind str does not match a subscription full key regex {}",
+                    SUBSCRIPTION_FULL_KEY_REGEX.as_str()
+                ));
+            }
+        };
+
+        Ok(Self {
+            id: ZTenantTimelineId::new(
+                parse_capture(&key_captures, 1)?,
+                parse_capture(&key_captures, 2)?,
+            ),
+            node_kind: parse_capture(&key_captures, 3)?,
+            operation: parse_capture(&key_captures, 4)?,
+            node_id: NodeId(parse_capture(&key_captures, 5)?),
+        })
+    }
+}
+
+fn parse_capture<T>(caps: &Captures, index: usize) -> Result<T, String>
+where
+    T: FromStr,
+    <T as FromStr>::Err: Display,
+{
+    let capture_match = caps
+        .get(index)
+        .ok_or_else(|| format!("Failed to get capture match at index {index}"))?
+        .as_str();
+    capture_match.parse().map_err(|e| {
+        format!(
+            "Failed to parse {} from {capture_match}: {e}",
+            std::any::type_name::<T>()
+        )
+    })
+}
+
+impl Display for NodeKind {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Self::Safekeeper => write!(f, "safekeeper"),
+            Self::Pageserver => write!(f, "pageserver"),
+        }
+    }
+}
+
+impl FromStr for NodeKind {
+    type Err = String;
+
+    fn from_str(node_kind_str: &str) -> Result<Self, Self::Err> {
+        match node_kind_str {
+            "safekeeper" => Ok(Self::Safekeeper),
+            "pageserver" => Ok(Self::Pageserver),
+            _ => Err(format!("Invalid node kind: {node_kind_str}")),
+        }
+    }
+}
+
+impl Display for SkOperationKind {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Self::TimelineInfo => write!(f, "timeline_info"),
+            Self::WalBackup => write!(f, "wal_backup"),
+        }
+    }
+}
+
+impl FromStr for SkOperationKind {
+    type Err = String;
+
+    fn from_str(operation_str: &str) -> Result<Self, Self::Err> {
+        match operation_str {
+            "timeline_info" => Ok(Self::TimelineInfo),
+            "wal_backup" => Ok(Self::WalBackup),
+            _ => Err(format!("Invalid operation: {operation_str}")),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use utils::zid::ZTimelineId;
+
+    use super::*;
+
+    #[test]
+    fn full_cluster_key_parsing() {
+        let prefix = "neon";
+        let node_kind = NodeKind::Safekeeper;
+        let operation_kind = OperationKind::Safekeeper(SkOperationKind::WalBackup);
+        let tenant_id = ZTenantId::generate();
+        let timeline_id = ZTimelineId::generate();
+        let id = ZTenantTimelineId::new(tenant_id, timeline_id);
+        let node_id = NodeId(1);
+
+        let timeline_subscription_keys = [
+            SubscriptionKey {
+                cluster_prefix: prefix.to_string(),
+                kind: SubscriptionKind::All,
+            },
+            SubscriptionKey {
+                cluster_prefix: prefix.to_string(),
+                kind: SubscriptionKind::TenantTimelines(tenant_id),
+            },
+            SubscriptionKey {
+                cluster_prefix: prefix.to_string(),
+                kind: SubscriptionKind::Timeline(id),
+            },
+            SubscriptionKey {
+                cluster_prefix: prefix.to_string(),
+                kind: SubscriptionKind::Node(id, node_kind),
+            },
+            SubscriptionKey {
+                cluster_prefix: prefix.to_string(),
+                kind: SubscriptionKind::Operation(id, node_kind, operation_kind),
+            },
+        ];
+
+        let full_key_string = format!(
+            "{}/{node_id}",
+            timeline_subscription_keys.last().unwrap().watch_key()
+        );
+
+        for key in timeline_subscription_keys {
+            assert!(full_key_string.starts_with(&key.watch_key()), "Full key '{full_key_string}' should start with any of the keys, keys, but {key:?} did not match");
+        }
+
+        let full_key = SubscriptionFullKey::from_str(&full_key_string).unwrap_or_else(|e| {
+            panic!("Failed to parse {full_key_string} as a subscription full key: {e}")
+        });
+
+        assert_eq!(
+            full_key,
+            SubscriptionFullKey {
+                id,
+                node_kind,
+                operation: operation_kind,
+                node_id
+            }
+        )
+    }
+}
--- a/libs/etcd_broker/src/subscription_value.rs
+++ b/libs/etcd_broker/src/subscription_value.rs
@@ -0,0 +1,35 @@
+//! Module for the values to put into etcd.
+
+use serde::{Deserialize, Serialize};
+use serde_with::{serde_as, DisplayFromStr};
+use utils::lsn::Lsn;
+
+/// Data about safekeeper's timeline. Fields made optional for easy migrations.
+#[serde_as]
+#[derive(Debug, Clone, Deserialize, Serialize)]
+pub struct SkTimelineInfo {
+    /// Term of the last entry.
+    pub last_log_term: Option<u64>,
+    /// LSN of the last record.
+    #[serde_as(as = "Option<DisplayFromStr>")]
+    #[serde(default)]
+    pub flush_lsn: Option<Lsn>,
+    /// Up to which LSN safekeeper regards its WAL as committed.
+    #[serde_as(as = "Option<DisplayFromStr>")]
+    #[serde(default)]
+    pub commit_lsn: Option<Lsn>,
+    /// LSN up to which safekeeper has backed WAL.
+    #[serde_as(as = "Option<DisplayFromStr>")]
+    #[serde(default)]
+    pub backup_lsn: Option<Lsn>,
+    /// LSN of last checkpoint uploaded by pageserver.
+    #[serde_as(as = "Option<DisplayFromStr>")]
+    #[serde(default)]
+    pub remote_consistent_lsn: Option<Lsn>,
+    #[serde_as(as = "Option<DisplayFromStr>")]
+    #[serde(default)]
+    pub peer_horizon_lsn: Option<Lsn>,
+    /// A connection string to use for WAL receiving.
+    #[serde(default)]
+    pub safekeeper_connstr: Option<String>,
+}
--- a/libs/postgres_ffi/wal_generate/src/lib.rs
+++ b/libs/postgres_ffi/wal_generate/src/lib.rs
@@ -4,6 +4,7 @@ use log::*;
 use postgres::types::PgLsn;
 use postgres::Client;
 use std::cmp::Ordering;
+use std::fs;
 use std::path::{Path, PathBuf};
 use std::process::{Command, Stdio};
 use std::time::Instant;
@@ -69,6 +70,12 @@ impl Conf {

    pub fn start_server(&self) -> Result<PostgresServer> {
        info!("Starting Postgres server in {:?}", self.datadir);
+        let log_file = fs::File::create(self.datadir.join("pg.log")).with_context(|| {
+            format!(
+                "Failed to create pg.log file in directory {}",
+                self.datadir.display()
+            )
+        })?;
        let unix_socket_dir = tempdir()?; // We need a directory with a short name for Unix socket (up to 108 symbols)
        let unix_socket_dir_path = unix_socket_dir.path().to_owned();
        let server_process = self
@@ -84,7 +91,7 @@ impl Conf {
            // Disable background processes as much as possible
            .args(&["-c", "wal_writer_delay=10s"])
            .args(&["-c", "autovacuum=off"])
-            .stderr(Stdio::null())
+            .stderr(Stdio::from(log_file))
            .spawn()?;
        let server = PostgresServer {
            process: server_process,
--- a/libs/utils/src/postgres_backend.rs
+++ b/libs/utils/src/postgres_backend.rs
@@ -13,13 +13,10 @@ use std::fmt;
 use std::io::{self, Write};
 use std::net::{Shutdown, SocketAddr, TcpStream};
 use std::str::FromStr;
-use std::sync::atomic::{AtomicBool, Ordering};
 use std::sync::Arc;
 use std::time::Duration;
 use tracing::*;

-static PGBACKEND_SHUTDOWN_REQUESTED: AtomicBool = AtomicBool::new(false);
-
 pub trait Handler {
    /// Handle single query.
    /// postgres_backend will issue ReadyForQuery after calling this (this
@@ -45,6 +42,10 @@ pub trait Handler {
    fn check_auth_jwt(&mut self, _pgb: &mut PostgresBackend, _jwt_response: &[u8]) -> Result<()> {
        bail!("JWT auth failed")
    }
+
+    fn is_shutdown_requested(&self) -> bool {
+        false
+    }
 }

 /// PostgresBackend protocol state.
@@ -274,7 +275,7 @@ impl PostgresBackend {

        let mut unnamed_query_string = Bytes::new();

-        while !PGBACKEND_SHUTDOWN_REQUESTED.load(Ordering::Relaxed) {
+        while !handler.is_shutdown_requested() {
            match self.read_message() {
                Ok(message) => {
                    if let Some(msg) = message {
@@ -493,8 +494,3 @@ impl PostgresBackend {
        Ok(ProcessMsgResult::Continue)
    }
 }
-
-// Set the flag to inform connections to cancel
-pub fn set_pgbackend_shutdown_requested() {
-    PGBACKEND_SHUTDOWN_REQUESTED.swap(true, Ordering::Relaxed);
-}
--- a/libs/utils/src/pq_proto.rs
+++ b/libs/utils/src/pq_proto.rs
@@ -269,7 +269,14 @@ impl FeStartupPacket {
                            .next()
                            .context("expected even number of params in StartupMessage")?;
                        if name == "options" {
-                            // deprecated way of passing params as cmd line args
+                            // parsing options arguments "...&options=<var0>%3D<val0>+<var1>=<var1>..."
+                            // '%3D' is '=' and '+' is ' '
+
+                            // Note: we allow users that don't have SNI capabilities,
+                            // to pass a special keyword argument 'project'
+                            // to be used to determine the cluster name by the proxy.
+
+                            //TODO: write unit test for this and refactor in its own function.
                            for cmdopt in value.split(' ') {
                                let nameval: Vec<&str> = cmdopt.split('=').collect();
                                if nameval.len() == 2 {
@@ -919,10 +926,10 @@ impl<'a> BeMessage<'a> {
    }
 }

-// Zenith extension of postgres replication protocol
-// See ZENITH_STATUS_UPDATE_TAG_BYTE
+// Neon extension of postgres replication protocol
+// See NEON_STATUS_UPDATE_TAG_BYTE
 #[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
-pub struct ZenithFeedback {
+pub struct ReplicationFeedback {
    // Last known size of the timeline. Used to enforce timeline size limit.
    pub current_timeline_size: u64,
    // Parts of StandbyStatusUpdate we resend to compute via safekeeper
@@ -932,13 +939,13 @@ pub struct ZenithFeedback {
    pub ps_replytime: SystemTime,
 }

-// NOTE: Do not forget to increment this number when adding new fields to ZenithFeedback.
+// NOTE: Do not forget to increment this number when adding new fields to ReplicationFeedback.
 // Do not remove previously available fields because this might be backwards incompatible.
-pub const ZENITH_FEEDBACK_FIELDS_NUMBER: u8 = 5;
+pub const REPLICATION_FEEDBACK_FIELDS_NUMBER: u8 = 5;

-impl ZenithFeedback {
-    pub fn empty() -> ZenithFeedback {
-        ZenithFeedback {
+impl ReplicationFeedback {
+    pub fn empty() -> ReplicationFeedback {
+        ReplicationFeedback {
            current_timeline_size: 0,
            ps_writelsn: 0,
            ps_applylsn: 0,
@@ -947,7 +954,7 @@ impl ZenithFeedback {
        }
    }

-    // Serialize ZenithFeedback using custom format
+    // Serialize ReplicationFeedback using custom format
    // to support protocol extensibility.
    //
    // Following layout is used:
@@ -958,7 +965,7 @@ impl ZenithFeedback {
    // uint32 - value length in bytes
    // value itself
    pub fn serialize(&self, buf: &mut BytesMut) -> Result<()> {
-        buf.put_u8(ZENITH_FEEDBACK_FIELDS_NUMBER); // # of keys
+        buf.put_u8(REPLICATION_FEEDBACK_FIELDS_NUMBER); // # of keys
        write_cstr(&Bytes::from("current_timeline_size"), buf)?;
        buf.put_i32(8);
        buf.put_u64(self.current_timeline_size);
@@ -985,9 +992,9 @@ impl ZenithFeedback {
        Ok(())
    }

-    // Deserialize ZenithFeedback message
-    pub fn parse(mut buf: Bytes) -> ZenithFeedback {
-        let mut zf = ZenithFeedback::empty();
+    // Deserialize ReplicationFeedback message
+    pub fn parse(mut buf: Bytes) -> ReplicationFeedback {
+        let mut zf = ReplicationFeedback::empty();
        let nfields = buf.get_u8();
        let mut i = 0;
        while i < nfields {
@@ -1028,14 +1035,14 @@ impl ZenithFeedback {
                _ => {
                    let len = buf.get_i32();
                    warn!(
-                        "ZenithFeedback parse. unknown key {} of len {}. Skip it.",
+                        "ReplicationFeedback parse. unknown key {} of len {}. Skip it.",
                        key, len
                    );
                    buf.advance(len as usize);
                }
            }
        }
-        trace!("ZenithFeedback parsed is {:?}", zf);
+        trace!("ReplicationFeedback parsed is {:?}", zf);
        zf
    }
 }
@@ -1045,8 +1052,8 @@ mod tests {
    use super::*;

    #[test]
-    fn test_zenithfeedback_serialization() {
-        let mut zf = ZenithFeedback::empty();
+    fn test_replication_feedback_serialization() {
+        let mut zf = ReplicationFeedback::empty();
        // Fill zf with some values
        zf.current_timeline_size = 12345678;
        // Set rounded time to be able to compare it with deserialized value,
@@ -1055,13 +1062,13 @@ mod tests {
        let mut data = BytesMut::new();
        zf.serialize(&mut data).unwrap();

-        let zf_parsed = ZenithFeedback::parse(data.freeze());
+        let zf_parsed = ReplicationFeedback::parse(data.freeze());
        assert_eq!(zf, zf_parsed);
    }

    #[test]
-    fn test_zenithfeedback_unknown_key() {
-        let mut zf = ZenithFeedback::empty();
+    fn test_replication_feedback_unknown_key() {
+        let mut zf = ReplicationFeedback::empty();
        // Fill zf with some values
        zf.current_timeline_size = 12345678;
        // Set rounded time to be able to compare it with deserialized value,
@@ -1072,7 +1079,7 @@ mod tests {

        // Add an extra field to the buffer and adjust number of keys
        if let Some(first) = data.first_mut() {
-            *first = ZENITH_FEEDBACK_FIELDS_NUMBER + 1;
+            *first = REPLICATION_FEEDBACK_FIELDS_NUMBER + 1;
        }

        write_cstr(&Bytes::from("new_field_one"), &mut data).unwrap();
@@ -1080,7 +1087,7 @@ mod tests {
        data.put_u64(42);

        // Parse serialized data and check that new field is not parsed
-        let zf_parsed = ZenithFeedback::parse(data.freeze());
+        let zf_parsed = ReplicationFeedback::parse(data.freeze());
        assert_eq!(zf, zf_parsed);
    }

--- a/neon_local/src/main.rs
+++ b/neon_local/src/main.rs
@@ -14,7 +14,7 @@ use safekeeper::defaults::{
    DEFAULT_PG_LISTEN_PORT as DEFAULT_SAFEKEEPER_PG_PORT,
 };
 use std::collections::{BTreeSet, HashMap};
-use std::path::Path;
+use std::path::{Path, PathBuf};
 use std::process::exit;
 use std::str::FromStr;
 use utils::{
@@ -159,6 +159,20 @@ fn main() -> Result<()> {
                .about("Create a new blank timeline")
                .arg(tenant_id_arg.clone())
                .arg(branch_name_arg.clone()))
+            .subcommand(App::new("import")
+                .about("Import timeline from basebackup directory")
+                .arg(tenant_id_arg.clone())
+                .arg(timeline_id_arg.clone())
+                .arg(Arg::new("node-name").long("node-name").takes_value(true)
+                    .help("Name to assign to the imported timeline"))
+                .arg(Arg::new("base-tarfile").long("base-tarfile").takes_value(true)
+                    .help("Basebackup tarfile to import"))
+                .arg(Arg::new("base-lsn").long("base-lsn").takes_value(true)
+                    .help("Lsn the basebackup starts at"))
+                .arg(Arg::new("wal-tarfile").long("wal-tarfile").takes_value(true)
+                    .help("Wal to add after base"))
+                .arg(Arg::new("end-lsn").long("end-lsn").takes_value(true)
+                    .help("Lsn the basebackup ends at")))
        ).subcommand(
            App::new("tenant")
            .setting(AppSettings::ArgRequiredElseHelp)
@@ -613,6 +627,43 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
                timeline.timeline_id, last_record_lsn, tenant_id,
            );
        }
+        Some(("import", import_match)) => {
+            let tenant_id = get_tenant_id(import_match, env)?;
+            let timeline_id = parse_timeline_id(import_match)?.expect("No timeline id provided");
+            let name = import_match
+                .value_of("node-name")
+                .ok_or_else(|| anyhow!("No node name provided"))?;
+
+            // Parse base inputs
+            let base_tarfile = import_match
+                .value_of("base-tarfile")
+                .map(|s| PathBuf::from_str(s).unwrap())
+                .ok_or_else(|| anyhow!("No base-tarfile provided"))?;
+            let base_lsn = Lsn::from_str(
+                import_match
+                    .value_of("base-lsn")
+                    .ok_or_else(|| anyhow!("No base-lsn provided"))?,
+            )?;
+            let base = (base_lsn, base_tarfile);
+
+            // Parse pg_wal inputs
+            let wal_tarfile = import_match
+                .value_of("wal-tarfile")
+                .map(|s| PathBuf::from_str(s).unwrap());
+            let end_lsn = import_match
+                .value_of("end-lsn")
+                .map(|s| Lsn::from_str(s).unwrap());
+            // TODO validate both or none are provided
+            let pg_wal = end_lsn.zip(wal_tarfile);
+
+            let mut cplane = ComputeControlPlane::load(env.clone())?;
+            println!("Importing timeline into pageserver ...");
+            pageserver.timeline_import(tenant_id, timeline_id, base, pg_wal)?;
+            println!("Creating node for imported timeline ...");
+            env.register_branch_mapping(name.to_string(), tenant_id, timeline_id)?;
+            cplane.new_node(tenant_id, name, timeline_id, None, None)?;
+            println!("Done");
+        }
        Some(("branch", branch_match)) => {
            let tenant_id = get_tenant_id(branch_match, env)?;
            let new_branch_name = branch_match
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -61,6 +61,7 @@ utils = { path = "../libs/utils" }
 remote_storage = { path = "../libs/remote_storage" }
 workspace_hack = { version = "0.1", path = "../workspace_hack" }
 close_fds = "0.3.2"
+walkdir = "2.3.2"

 [dev-dependencies]
 hex-literal = "0.3"
--- a/pageserver/README.md
+++ b/pageserver/README.md
@@ -69,7 +69,7 @@ Repository

 The repository stores all the page versions, or WAL records needed to
 reconstruct them. Each tenant has a separate Repository, which is
-stored in the .zenith/tenants/<tenantid> directory.
+stored in the .neon/tenants/<tenantid> directory.

 Repository is an abstract trait, defined in `repository.rs`. It is
 implemented by the LayeredRepository object in
@@ -92,7 +92,7 @@ Each repository also has a WAL redo manager associated with it, see
 records, whenever we need to reconstruct a page version from WAL to
 satisfy a GetPage@LSN request, or to avoid accumulating too much WAL
 for a page. The WAL redo manager uses a Postgres process running in
-special zenith wal-redo mode to do the actual WAL redo, and
+special Neon wal-redo mode to do the actual WAL redo, and
 communicates with the process using a pipe.


--- a/pageserver/src/basebackup.rs
+++ b/pageserver/src/basebackup.rs
@@ -13,6 +13,7 @@
 use anyhow::{anyhow, bail, ensure, Context, Result};
 use bytes::{BufMut, BytesMut};
 use fail::fail_point;
+use itertools::Itertools;
 use std::fmt::Write as FmtWrite;
 use std::io;
 use std::io::Write;
@@ -21,7 +22,7 @@ use std::time::SystemTime;
 use tar::{Builder, EntryType, Header};
 use tracing::*;

-use crate::reltag::SlruKind;
+use crate::reltag::{RelTag, SlruKind};
 use crate::repository::Timeline;
 use crate::DatadirTimelineImpl;
 use postgres_ffi::xlog_utils::*;
@@ -39,11 +40,12 @@ where
    timeline: &'a Arc<DatadirTimelineImpl>,
    pub lsn: Lsn,
    prev_record_lsn: Lsn,
-
+    full_backup: bool,
    finished: bool,
 }

-// Create basebackup with non-rel data in it. Omit relational data.
+// Create basebackup with non-rel data in it.
+// Only include relational data if 'full_backup' is true.
 //
 // Currently we use empty lsn in two cases:
 //  * During the basebackup right after timeline creation
@@ -58,6 +60,7 @@ where
        write: W,
        timeline: &'a Arc<DatadirTimelineImpl>,
        req_lsn: Option<Lsn>,
+        full_backup: bool,
    ) -> Result<Basebackup<'a, W>> {
        // Compute postgres doesn't have any previous WAL files, but the first
        // record that it's going to write needs to include the LSN of the
@@ -94,8 +97,8 @@ where
        };

        info!(
-            "taking basebackup lsn={}, prev_lsn={}",
-            backup_lsn, backup_prev
+            "taking basebackup lsn={}, prev_lsn={} (full_backup={})",
+            backup_lsn, backup_prev, full_backup
        );

        Ok(Basebackup {
@@ -103,11 +106,14 @@ where
            timeline,
            lsn: backup_lsn,
            prev_record_lsn: backup_prev,
+            full_backup,
            finished: false,
        })
    }

    pub fn send_tarball(mut self) -> anyhow::Result<()> {
+        // TODO include checksum
+
        // Create pgdata subdirs structure
        for dir in pg_constants::PGDATA_SUBDIRS.iter() {
            let header = new_tar_header_dir(*dir)?;
@@ -140,6 +146,13 @@ where
        // Create tablespace directories
        for ((spcnode, dbnode), has_relmap_file) in self.timeline.list_dbdirs(self.lsn)? {
            self.add_dbdir(spcnode, dbnode, has_relmap_file)?;
+
+            // Gather and send relational files in each database if full backup is requested.
+            if self.full_backup {
+                for rel in self.timeline.list_rels(spcnode, dbnode, self.lsn)? {
+                    self.add_rel(rel)?;
+                }
+            }
        }
        for xid in self.timeline.list_twophase_files(self.lsn)? {
            self.add_twophase_file(xid)?;
@@ -157,6 +170,38 @@ where
        Ok(())
    }

+    fn add_rel(&mut self, tag: RelTag) -> anyhow::Result<()> {
+        let nblocks = self.timeline.get_rel_size(tag, self.lsn)?;
+
+        // Function that adds relation segment data to archive
+        let mut add_file = |segment_index, data: &Vec<u8>| -> anyhow::Result<()> {
+            let file_name = tag.to_segfile_name(segment_index as u32);
+            let header = new_tar_header(&file_name, data.len() as u64)?;
+            self.ar.append(&header, data.as_slice())?;
+            Ok(())
+        };
+
+        // If the relation is empty, create an empty file
+        if nblocks == 0 {
+            add_file(0, &vec![])?;
+            return Ok(());
+        }
+
+        // Add a file for each chunk of blocks (aka segment)
+        let chunks = (0..nblocks).chunks(pg_constants::RELSEG_SIZE as usize);
+        for (seg, blocks) in chunks.into_iter().enumerate() {
+            let mut segment_data: Vec<u8> = vec![];
+            for blknum in blocks {
+                let img = self.timeline.get_rel_page_at_lsn(tag, blknum, self.lsn)?;
+                segment_data.extend_from_slice(&img[..]);
+            }
+
+            add_file(seg, &segment_data)?;
+        }
+
+        Ok(())
+    }
+
    //
    // Generate SLRU segment files from repository.
    //
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -104,7 +104,7 @@ fn main() -> anyhow::Result<()> {
        return Ok(());
    }

-    let workdir = Path::new(arg_matches.value_of("workdir").unwrap_or(".zenith"));
+    let workdir = Path::new(arg_matches.value_of("workdir").unwrap_or(".neon"));
    let workdir = workdir
        .canonicalize()
        .with_context(|| format!("Error opening workdir '{}'", workdir.display()))?;
@@ -263,6 +263,8 @@ fn start_pageserver(conf: &'static PageServerConf, daemonize: bool) -> Result<()
    // start profiler (if enabled)
    let profiler_guard = profiling::init_profiler(conf);

+    pageserver::tenant_tasks::init_tenant_task_pool()?;
+
    // initialize authentication for incoming connections
    let auth = match &conf.auth_type {
        AuthType::Trust | AuthType::MD5 => None,
--- a/pageserver/src/import_datadir.rs
+++ b/pageserver/src/import_datadir.rs
@@ -2,7 +2,6 @@
 //! Import data and WAL from a PostgreSQL data directory and WAL segments into
 //! a zenith Timeline.
 //!
-use std::fs;
 use std::fs::File;
 use std::io::{Read, Seek, SeekFrom};
 use std::path::{Path, PathBuf};
@@ -10,16 +9,18 @@ use std::path::{Path, PathBuf};
 use anyhow::{bail, ensure, Context, Result};
 use bytes::Bytes;
 use tracing::*;
+use walkdir::WalkDir;

 use crate::pgdatadir_mapping::*;
 use crate::reltag::{RelTag, SlruKind};
 use crate::repository::Repository;
+use crate::repository::Timeline;
 use crate::walingest::WalIngest;
 use postgres_ffi::relfile_utils::*;
 use postgres_ffi::waldecoder::*;
 use postgres_ffi::xlog_utils::*;
+use postgres_ffi::Oid;
 use postgres_ffi::{pg_constants, ControlFileData, DBState_DB_SHUTDOWNED};
-use postgres_ffi::{Oid, TransactionId};
 use utils::lsn::Lsn;

 ///
@@ -35,100 +36,29 @@ pub fn import_timeline_from_postgres_datadir<R: Repository>(
 ) -> Result<()> {
    let mut pg_control: Option<ControlFileData> = None;

+    // TODO this shoud be start_lsn, which is not necessarily equal to end_lsn (aka lsn)
+    // Then fishing out pg_control would be unnecessary
    let mut modification = tline.begin_modification(lsn);
    modification.init_empty()?;

-    // Scan 'global'
-    let mut relfiles: Vec<PathBuf> = Vec::new();
-    for direntry in fs::read_dir(path.join("global"))? {
-        let direntry = direntry?;
-        match direntry.file_name().to_str() {
-            None => continue,
+    // Import all but pg_wal
+    let all_but_wal = WalkDir::new(path)
+        .into_iter()
+        .filter_entry(|entry| !entry.path().ends_with("pg_wal"));
+    for entry in all_but_wal {
+        let entry = entry?;
+        let metadata = entry.metadata().expect("error getting dir entry metadata");
+        if metadata.is_file() {
+            let absolute_path = entry.path();
+            let relative_path = absolute_path.strip_prefix(path)?;

-            Some("pg_control") => {
-                pg_control = Some(import_control_file(&mut modification, &direntry.path())?);
-            }
-            Some("pg_filenode.map") => {
-                import_relmap_file(
-                    &mut modification,
-                    pg_constants::GLOBALTABLESPACE_OID,
-                    0,
-                    &direntry.path(),
-                )?;
-            }
-
-            // Load any relation files into the page server (but only after the other files)
-            _ => relfiles.push(direntry.path()),
-        }
-    }
-    for relfile in relfiles {
-        import_relfile(
-            &mut modification,
-            &relfile,
-            pg_constants::GLOBALTABLESPACE_OID,
-            0,
-        )?;
-    }
-
-    // Scan 'base'. It contains database dirs, the database OID is the filename.
-    // E.g. 'base/12345', where 12345 is the database OID.
-    for direntry in fs::read_dir(path.join("base"))? {
-        let direntry = direntry?;
-
-        //skip all temporary files
-        if direntry.file_name().to_string_lossy() == "pgsql_tmp" {
-            continue;
-        }
-
-        let dboid = direntry.file_name().to_string_lossy().parse::<u32>()?;
-
-        let mut relfiles: Vec<PathBuf> = Vec::new();
-        for direntry in fs::read_dir(direntry.path())? {
-            let direntry = direntry?;
-            match direntry.file_name().to_str() {
-                None => continue,
-
-                Some("PG_VERSION") => {
-                    //modification.put_dbdir_creation(pg_constants::DEFAULTTABLESPACE_OID, dboid)?;
-                }
-                Some("pg_filenode.map") => import_relmap_file(
-                    &mut modification,
-                    pg_constants::DEFAULTTABLESPACE_OID,
-                    dboid,
-                    &direntry.path(),
-                )?,
-
-                // Load any relation files into the page server
-                _ => relfiles.push(direntry.path()),
+            let file = File::open(absolute_path)?;
+            let len = metadata.len() as usize;
+            if let Some(control_file) = import_file(&mut modification, relative_path, file, len)? {
+                pg_control = Some(control_file);
            }
        }
-        for relfile in relfiles {
-            import_relfile(
-                &mut modification,
-                &relfile,
-                pg_constants::DEFAULTTABLESPACE_OID,
-                dboid,
-            )?;
-        }
    }
-    for entry in fs::read_dir(path.join("pg_xact"))? {
-        let entry = entry?;
-        import_slru_file(&mut modification, SlruKind::Clog, &entry.path())?;
-    }
-    for entry in fs::read_dir(path.join("pg_multixact").join("members"))? {
-        let entry = entry?;
-        import_slru_file(&mut modification, SlruKind::MultiXactMembers, &entry.path())?;
-    }
-    for entry in fs::read_dir(path.join("pg_multixact").join("offsets"))? {
-        let entry = entry?;
-        import_slru_file(&mut modification, SlruKind::MultiXactOffsets, &entry.path())?;
-    }
-    for entry in fs::read_dir(path.join("pg_twophase"))? {
-        let entry = entry?;
-        let xid = u32::from_str_radix(&entry.path().to_string_lossy(), 16)?;
-        import_twophase_file(&mut modification, xid, &entry.path())?;
-    }
-    // TODO: Scan pg_tblspc

    // We're done importing all the data files.
    modification.commit()?;
@@ -158,31 +88,30 @@ pub fn import_timeline_from_postgres_datadir<R: Repository>(
 }

 // subroutine of import_timeline_from_postgres_datadir(), to load one relation file.
-fn import_relfile<R: Repository>(
+fn import_rel<R: Repository, Reader: Read>(
    modification: &mut DatadirModification<R>,
    path: &Path,
    spcoid: Oid,
    dboid: Oid,
+    mut reader: Reader,
+    len: usize,
 ) -> anyhow::Result<()> {
    // Does it look like a relation file?
    trace!("importing rel file {}", path.display());

-    let (relnode, forknum, segno) = parse_relfilename(&path.file_name().unwrap().to_string_lossy())
-        .map_err(|e| {
-            warn!("unrecognized file in postgres datadir: {:?} ({})", path, e);
-            e
-        })?;
+    let filename = &path
+        .file_name()
+        .expect("missing rel filename")
+        .to_string_lossy();
+    let (relnode, forknum, segno) = parse_relfilename(filename).map_err(|e| {
+        warn!("unrecognized file in postgres datadir: {:?} ({})", path, e);
+        e
+    })?;

-    let mut file = File::open(path)?;
    let mut buf: [u8; 8192] = [0u8; 8192];

-    let len = file.metadata().unwrap().len();
-    ensure!(len % pg_constants::BLCKSZ as u64 == 0);
-    let nblocks = len / pg_constants::BLCKSZ as u64;
-
-    if segno != 0 {
-        todo!();
-    }
+    ensure!(len % pg_constants::BLCKSZ as usize == 0);
+    let nblocks = len / pg_constants::BLCKSZ as usize;

    let rel = RelTag {
        spcnode: spcoid,
@@ -190,11 +119,22 @@ fn import_relfile<R: Repository>(
        relnode,
        forknum,
    };
-    modification.put_rel_creation(rel, nblocks as u32)?;

    let mut blknum: u32 = segno * (1024 * 1024 * 1024 / pg_constants::BLCKSZ as u32);
+
+    // Call put_rel_creation for every segment of the relation,
+    // because there is no guarantee about the order in which we are processing segments.
+    // ignore "relation already exists" error
+    if let Err(e) = modification.put_rel_creation(rel, nblocks as u32) {
+        if e.to_string().contains("already exists") {
+            debug!("relation {} already exists. we must be extending it", rel);
+        } else {
+            return Err(e);
+        }
+    }
+
    loop {
-        let r = file.read_exact(&mut buf);
+        let r = reader.read_exact(&mut buf);
        match r {
            Ok(_) => {
                modification.put_rel_page_image(rel, blknum, Bytes::copy_from_slice(&buf))?;
@@ -204,7 +144,9 @@ fn import_relfile<R: Repository>(
            Err(err) => match err.kind() {
                std::io::ErrorKind::UnexpectedEof => {
                    // reached EOF. That's expected.
-                    ensure!(blknum == nblocks as u32, "unexpected EOF");
+                    let relative_blknum =
+                        blknum - segno * (1024 * 1024 * 1024 / pg_constants::BLCKSZ as u32);
+                    ensure!(relative_blknum == nblocks as u32, "unexpected EOF");
                    break;
                }
                _ => {
@@ -215,96 +157,43 @@ fn import_relfile<R: Repository>(
        blknum += 1;
    }

+    // Update relation size
+    //
+    // If we process rel segments out of order,
+    // put_rel_extend will skip the update.
+    modification.put_rel_extend(rel, blknum)?;
+
    Ok(())
 }

-/// Import a relmapper (pg_filenode.map) file into the repository
-fn import_relmap_file<R: Repository>(
-    modification: &mut DatadirModification<R>,
-    spcnode: Oid,
-    dbnode: Oid,
-    path: &Path,
-) -> Result<()> {
-    let mut file = File::open(path)?;
-    let mut buffer = Vec::new();
-    // read the whole file
-    file.read_to_end(&mut buffer)?;
-
-    trace!("importing relmap file {}", path.display());
-
-    modification.put_relmap_file(spcnode, dbnode, Bytes::copy_from_slice(&buffer[..]))?;
-    Ok(())
-}
-
-/// Import a twophase state file (pg_twophase/<xid>) into the repository
-fn import_twophase_file<R: Repository>(
-    modification: &mut DatadirModification<R>,
-    xid: TransactionId,
-    path: &Path,
-) -> Result<()> {
-    let mut file = File::open(path)?;
-    let mut buffer = Vec::new();
-    // read the whole file
-    file.read_to_end(&mut buffer)?;
-
-    trace!("importing non-rel file {}", path.display());
-
-    modification.put_twophase_file(xid, Bytes::copy_from_slice(&buffer[..]))?;
-    Ok(())
-}
-
-///
-/// Import pg_control file into the repository.
-///
-/// The control file is imported as is, but we also extract the checkpoint record
-/// from it and store it separated.
-fn import_control_file<R: Repository>(
-    modification: &mut DatadirModification<R>,
-    path: &Path,
-) -> Result<ControlFileData> {
-    let mut file = File::open(path)?;
-    let mut buffer = Vec::new();
-    // read the whole file
-    file.read_to_end(&mut buffer)?;
-
-    trace!("importing control file {}", path.display());
-
-    // Import it as ControlFile
-    modification.put_control_file(Bytes::copy_from_slice(&buffer[..]))?;
-
-    // Extract the checkpoint record and import it separately.
-    let pg_control = ControlFileData::decode(&buffer)?;
-    let checkpoint_bytes = pg_control.checkPointCopy.encode()?;
-    modification.put_checkpoint(checkpoint_bytes)?;
-
-    Ok(pg_control)
-}
-
-///
 /// Import an SLRU segment file
 ///
-fn import_slru_file<R: Repository>(
+fn import_slru<R: Repository, Reader: Read>(
    modification: &mut DatadirModification<R>,
    slru: SlruKind,
    path: &Path,
+    mut reader: Reader,
+    len: usize,
 ) -> Result<()> {
    trace!("importing slru file {}", path.display());

-    let mut file = File::open(path)?;
    let mut buf: [u8; 8192] = [0u8; 8192];
-    let segno = u32::from_str_radix(&path.file_name().unwrap().to_string_lossy(), 16)?;
+    let filename = &path
+        .file_name()
+        .expect("missing slru filename")
+        .to_string_lossy();
+    let segno = u32::from_str_radix(filename, 16)?;

-    let len = file.metadata().unwrap().len();
-    ensure!(len % pg_constants::BLCKSZ as u64 == 0); // we assume SLRU block size is the same as BLCKSZ
-    let nblocks = len / pg_constants::BLCKSZ as u64;
+    ensure!(len % pg_constants::BLCKSZ as usize == 0); // we assume SLRU block size is the same as BLCKSZ
+    let nblocks = len / pg_constants::BLCKSZ as usize;

-    ensure!(nblocks <= pg_constants::SLRU_PAGES_PER_SEGMENT as u64);
+    ensure!(nblocks <= pg_constants::SLRU_PAGES_PER_SEGMENT as usize);

    modification.put_slru_segment_creation(slru, segno, nblocks as u32)?;

    let mut rpageno = 0;
    loop {
-        let r = file.read_exact(&mut buf);
+        let r = reader.read_exact(&mut buf);
        match r {
            Ok(_) => {
                modification.put_slru_page_image(
@@ -396,10 +285,258 @@ fn import_wal<R: Repository>(
    }

    if last_lsn != startpoint {
-        debug!("reached end of WAL at {}", last_lsn);
+        info!("reached end of WAL at {}", last_lsn);
    } else {
        info!("no WAL to import at {}", last_lsn);
    }

    Ok(())
 }
+
+pub fn import_basebackup_from_tar<R: Repository, Reader: Read>(
+    tline: &mut DatadirTimeline<R>,
+    reader: Reader,
+    base_lsn: Lsn,
+) -> Result<()> {
+    info!("importing base at {}", base_lsn);
+    let mut modification = tline.begin_modification(base_lsn);
+    modification.init_empty()?;
+
+    let mut pg_control: Option<ControlFileData> = None;
+
+    // Import base
+    for base_tar_entry in tar::Archive::new(reader).entries()? {
+        let entry = base_tar_entry?;
+        let header = entry.header();
+        let len = header.entry_size()? as usize;
+        let file_path = header.path()?.into_owned();
+
+        match header.entry_type() {
+            tar::EntryType::Regular => {
+                if let Some(res) = import_file(&mut modification, file_path.as_ref(), entry, len)? {
+                    // We found the pg_control file.
+                    pg_control = Some(res);
+                }
+            }
+            tar::EntryType::Directory => {
+                debug!("directory {:?}", file_path);
+            }
+            _ => {
+                panic!("tar::EntryType::?? {}", file_path.display());
+            }
+        }
+    }
+
+    // sanity check: ensure that pg_control is loaded
+    let _pg_control = pg_control.context("pg_control file not found")?;
+
+    modification.commit()?;
+    Ok(())
+}
+
+pub fn import_wal_from_tar<R: Repository, Reader: Read>(
+    tline: &mut DatadirTimeline<R>,
+    reader: Reader,
+    start_lsn: Lsn,
+    end_lsn: Lsn,
+) -> Result<()> {
+    // Set up walingest mutable state
+    let mut waldecoder = WalStreamDecoder::new(start_lsn);
+    let mut segno = start_lsn.segment_number(pg_constants::WAL_SEGMENT_SIZE);
+    let mut offset = start_lsn.segment_offset(pg_constants::WAL_SEGMENT_SIZE);
+    let mut last_lsn = start_lsn;
+    let mut walingest = WalIngest::new(tline, start_lsn)?;
+
+    // Ingest wal until end_lsn
+    info!("importing wal until {}", end_lsn);
+    let mut pg_wal_tar = tar::Archive::new(reader);
+    let mut pg_wal_entries_iter = pg_wal_tar.entries()?;
+    while last_lsn <= end_lsn {
+        let bytes = {
+            let entry = pg_wal_entries_iter.next().expect("expected more wal")?;
+            let header = entry.header();
+            let file_path = header.path()?.into_owned();
+
+            match header.entry_type() {
+                tar::EntryType::Regular => {
+                    // FIXME: assume postgresql tli 1 for now
+                    let expected_filename = XLogFileName(1, segno, pg_constants::WAL_SEGMENT_SIZE);
+                    let file_name = file_path
+                        .file_name()
+                        .expect("missing wal filename")
+                        .to_string_lossy();
+                    ensure!(expected_filename == file_name);
+
+                    debug!("processing wal file {:?}", file_path);
+                    read_all_bytes(entry)?
+                }
+                tar::EntryType::Directory => {
+                    debug!("directory {:?}", file_path);
+                    continue;
+                }
+                _ => {
+                    panic!("tar::EntryType::?? {}", file_path.display());
+                }
+            }
+        };
+
+        waldecoder.feed_bytes(&bytes[offset..]);
+
+        while last_lsn <= end_lsn {
+            if let Some((lsn, recdata)) = waldecoder.poll_decode()? {
+                walingest.ingest_record(tline, recdata, lsn)?;
+                last_lsn = lsn;
+
+                debug!("imported record at {} (end {})", lsn, end_lsn);
+            }
+        }
+
+        debug!("imported records up to {}", last_lsn);
+        segno += 1;
+        offset = 0;
+    }
+
+    if last_lsn != start_lsn {
+        info!("reached end of WAL at {}", last_lsn);
+    } else {
+        info!("there was no WAL to import at {}", last_lsn);
+    }
+
+    // Log any extra unused files
+    for e in &mut pg_wal_entries_iter {
+        let entry = e?;
+        let header = entry.header();
+        let file_path = header.path()?.into_owned();
+        info!("skipping {:?}", file_path);
+    }
+
+    Ok(())
+}
+
+pub fn import_file<R: Repository, Reader: Read>(
+    modification: &mut DatadirModification<R>,
+    file_path: &Path,
+    reader: Reader,
+    len: usize,
+) -> Result<Option<ControlFileData>> {
+    debug!("looking at {:?}", file_path);
+
+    if file_path.starts_with("global") {
+        let spcnode = pg_constants::GLOBALTABLESPACE_OID;
+        let dbnode = 0;
+
+        match file_path
+            .file_name()
+            .expect("missing filename")
+            .to_string_lossy()
+            .as_ref()
+        {
+            "pg_control" => {
+                let bytes = read_all_bytes(reader)?;
+
+                // Extract the checkpoint record and import it separately.
+                let pg_control = ControlFileData::decode(&bytes[..])?;
+                let checkpoint_bytes = pg_control.checkPointCopy.encode()?;
+                modification.put_checkpoint(checkpoint_bytes)?;
+                debug!("imported control file");
+
+                // Import it as ControlFile
+                modification.put_control_file(bytes)?;
+                return Ok(Some(pg_control));
+            }
+            "pg_filenode.map" => {
+                let bytes = read_all_bytes(reader)?;
+                modification.put_relmap_file(spcnode, dbnode, bytes)?;
+                debug!("imported relmap file")
+            }
+            "PG_VERSION" => {
+                debug!("ignored");
+            }
+            _ => {
+                import_rel(modification, file_path, spcnode, dbnode, reader, len)?;
+                debug!("imported rel creation");
+            }
+        }
+    } else if file_path.starts_with("base") {
+        let spcnode = pg_constants::DEFAULTTABLESPACE_OID;
+        let dbnode: u32 = file_path
+            .iter()
+            .nth(1)
+            .expect("invalid file path, expected dbnode")
+            .to_string_lossy()
+            .parse()?;
+
+        match file_path
+            .file_name()
+            .expect("missing base filename")
+            .to_string_lossy()
+            .as_ref()
+        {
+            "pg_filenode.map" => {
+                let bytes = read_all_bytes(reader)?;
+                modification.put_relmap_file(spcnode, dbnode, bytes)?;
+                debug!("imported relmap file")
+            }
+            "PG_VERSION" => {
+                debug!("ignored");
+            }
+            _ => {
+                import_rel(modification, file_path, spcnode, dbnode, reader, len)?;
+                debug!("imported rel creation");
+            }
+        }
+    } else if file_path.starts_with("pg_xact") {
+        let slru = SlruKind::Clog;
+
+        import_slru(modification, slru, file_path, reader, len)?;
+        debug!("imported clog slru");
+    } else if file_path.starts_with("pg_multixact/offsets") {
+        let slru = SlruKind::MultiXactOffsets;
+
+        import_slru(modification, slru, file_path, reader, len)?;
+        debug!("imported multixact offsets slru");
+    } else if file_path.starts_with("pg_multixact/members") {
+        let slru = SlruKind::MultiXactMembers;
+
+        import_slru(modification, slru, file_path, reader, len)?;
+        debug!("imported multixact members slru");
+    } else if file_path.starts_with("pg_twophase") {
+        let file_name = &file_path
+            .file_name()
+            .expect("missing twophase filename")
+            .to_string_lossy();
+        let xid = u32::from_str_radix(file_name, 16)?;
+
+        let bytes = read_all_bytes(reader)?;
+        modification.put_twophase_file(xid, Bytes::copy_from_slice(&bytes[..]))?;
+        debug!("imported twophase file");
+    } else if file_path.starts_with("pg_wal") {
+        debug!("found wal file in base section. ignore it");
+    } else if file_path.starts_with("zenith.signal") {
+        // Parse zenith signal file to set correct previous LSN
+        let bytes = read_all_bytes(reader)?;
+        // zenith.signal format is "PREV LSN: prev_lsn"
+        let zenith_signal = std::str::from_utf8(&bytes)?;
+        let zenith_signal = zenith_signal.split(':').collect::<Vec<_>>();
+        let prev_lsn = zenith_signal[1].trim().parse::<Lsn>()?;
+
+        let writer = modification.tline.tline.writer();
+        writer.finish_write(prev_lsn);
+
+        debug!("imported zenith signal {}", prev_lsn);
+    } else if file_path.starts_with("pg_tblspc") {
+        // TODO Backups exported from neon won't have pg_tblspc, but we will need
+        // this to import arbitrary postgres databases.
+        bail!("Importing pg_tblspc is not implemented");
+    } else {
+        debug!("ignored");
+    }
+
+    Ok(None)
+}
+
+fn read_all_bytes<Reader: Read>(mut reader: Reader) -> Result<Bytes> {
+    let mut buf: Vec<u8> = vec![];
+    reader.read_to_end(&mut buf)?;
+    Ok(Bytes::copy_from_slice(&buf[..]))
+}
--- a/pageserver/src/layered_repository.rs
+++ b/pageserver/src/layered_repository.rs
@@ -4,7 +4,7 @@
 //! The functions here are responsible for locating the correct layer for the
 //! get/put call, tracing timeline branching history as needed.
 //!
-//! The files are stored in the .zenith/tenants/<tenantid>/timelines/<timelineid>
+//! The files are stored in the .neon/tenants/<tenantid>/timelines/<timelineid>
 //! directory. See layered_repository/README for how the files are managed.
 //! In addition to the layer files, there is a metadata file in the same
 //! directory that contains information about the timeline, in particular its
@@ -148,7 +148,7 @@ lazy_static! {
    .expect("failed to define a metric");
 }

-/// Parts of the `.zenith/tenants/<tenantid>/timelines/<timelineid>` directory prefix.
+/// Parts of the `.neon/tenants/<tenantid>/timelines/<timelineid>` directory prefix.
 pub const TIMELINES_SEGMENT_NAME: &str = "timelines";

 ///
@@ -158,6 +158,18 @@ pub struct LayeredRepository {
    // Global pageserver config parameters
    pub conf: &'static PageServerConf,

+    // Allows us to gracefully cancel operations that edit the directory
+    // that backs this layered repository. Usage:
+    //
+    // Use `let _guard = file_lock.try_read()` while writing any files.
+    // Use `let _guard = file_lock.write().unwrap()` to wait for all writes to finish.
+    //
+    // TODO try_read this lock during checkpoint as well to prevent race
+    //      between checkpoint and detach/delete.
+    // TODO try_read this lock for all gc/compaction operations, not just
+    //      ones scheduled by the tenant task manager.
+    pub file_lock: RwLock<()>,
+
    // Overridden tenant-specific config parameters.
    // We keep TenantConfOpt sturct here to preserve the information
    // about parameters that are not set.
@@ -243,15 +255,15 @@ impl Repository for LayeredRepository {
        );
        timeline.layers.write().unwrap().next_open_layer_at = Some(initdb_lsn);

+        // Insert if not exists
        let timeline = Arc::new(timeline);
-        let r = timelines.insert(
-            timelineid,
-            LayeredTimelineEntry::Loaded(Arc::clone(&timeline)),
-        );
-        ensure!(
-            r.is_none(),
-            "assertion failure, inserted duplicate timeline"
-        );
+        match timelines.entry(timelineid) {
+            Entry::Occupied(_) => bail!("Timeline already exists"),
+            Entry::Vacant(vacant) => {
+                vacant.insert(LayeredTimelineEntry::Loaded(Arc::clone(&timeline)))
+            }
+        };
+
        Ok(timeline)
    }

@@ -685,6 +697,7 @@ impl LayeredRepository {
    ) -> LayeredRepository {
        LayeredRepository {
            tenant_id,
+            file_lock: RwLock::new(()),
            conf,
            tenant_conf: Arc::new(RwLock::new(tenant_conf)),
            timelines: Mutex::new(HashMap::new()),
@@ -1727,9 +1740,7 @@ impl LayeredTimeline {
            new_delta_path.clone(),
            self.conf.timeline_path(&self.timeline_id, &self.tenant_id),
        ])?;
-        fail_point!("checkpoint-before-sync");
-
-        fail_point!("flush-frozen");
+        fail_point!("flush-frozen-before-sync");

        // Finally, replace the frozen in-memory layer with the new on-disk layer
        {
@@ -1912,15 +1923,28 @@ impl LayeredTimeline {
                } else {
                    Lsn(0)
                };
+                // Let's consider an example:
+                //
+                // delta layer with LSN range 71-81
+                // delta layer with LSN range 81-91
+                // delta layer with LSN range 91-101
+                // image layer at LSN 100
+                //
+                // If 'lsn' is still 100, i.e. no new WAL has been processed since the last image layer,
+                // there's no need to create a new one. We check this case explicitly, to avoid passing
+                // a bogus range to count_deltas below, with start > end. It's even possible that there
+                // are some delta layers *later* than current 'lsn', if more WAL was processed and flushed
+                // after we read last_record_lsn, which is passed here in the 'lsn' argument.
+                if img_lsn < lsn {
+                    let num_deltas = layers.count_deltas(&img_range, &(img_lsn..lsn))?;

-                let num_deltas = layers.count_deltas(&img_range, &(img_lsn..lsn))?;
-
-                debug!(
-                    "range {}-{}, has {} deltas on this timeline",
-                    img_range.start, img_range.end, num_deltas
-                );
-                if num_deltas >= self.get_image_creation_threshold() {
-                    return Ok(true);
+                    debug!(
+                        "key range {}-{}, has {} deltas on this timeline in LSN range {}..{}",
+                        img_range.start, img_range.end, num_deltas, img_lsn, lsn
+                    );
+                    if num_deltas >= self.get_image_creation_threshold() {
+                        return Ok(true);
+                    }
                }
            }
        }
@@ -2212,6 +2236,9 @@ impl LayeredTimeline {
                    LsnForTimestamp::Past(lsn) => {
                        debug!("past({})", lsn);
                    }
+                    LsnForTimestamp::NoData(lsn) => {
+                        debug!("nodata({})", lsn);
+                    }
                }
                debug!("pitr_cutoff_lsn = {:?}", pitr_cutoff_lsn)
            }
--- a/pageserver/src/layered_repository/README.md
+++ b/pageserver/src/layered_repository/README.md
@@ -123,7 +123,7 @@ The files are called "layer files". Each layer file covers a range of keys, and
 a range of LSNs (or a single LSN, in case of image layers). You can think of it
 as a rectangle in the two-dimensional key-LSN space. The layer files for each
 timeline are stored in the timeline's subdirectory under
-`.zenith/tenants/<tenantid>/timelines`.
+`.neon/tenants/<tenantid>/timelines`.

 There are two kind of layer files: images, and delta layers. An image file
 contains a snapshot of all keys at a particular LSN, whereas a delta file
@@ -178,7 +178,7 @@ version, and how branching and GC works is still valid.
 The full path of a delta file looks like this:

 ```
-    .zenith/tenants/941ddc8604413b88b3d208bddf90396c/timelines/4af489b06af8eed9e27a841775616962/rel_1663_13990_2609_0_10_000000000169C348_0000000001702000
+    .neon/tenants/941ddc8604413b88b3d208bddf90396c/timelines/4af489b06af8eed9e27a841775616962/rel_1663_13990_2609_0_10_000000000169C348_0000000001702000
 ```

 For simplicity, the examples below use a simplified notation for the
--- a/pageserver/src/layered_repository/blob_io.rs
+++ b/pageserver/src/layered_repository/blob_io.rs
@@ -34,7 +34,7 @@ pub trait BlobCursor {
    ) -> Result<(), std::io::Error>;
 }

-impl<'a, R> BlobCursor for BlockCursor<R>
+impl<R> BlobCursor for BlockCursor<R>
 where
    R: BlockReader,
 {
--- a/pageserver/src/layered_repository/image_layer.rs
+++ b/pageserver/src/layered_repository/image_layer.rs
@@ -445,7 +445,10 @@ impl ImageLayerWriter {
            },
        );
        info!("new image layer {}", path.display());
-        let mut file = VirtualFile::create(&path)?;
+        let mut file = VirtualFile::open_with_options(
+            &path,
+            std::fs::OpenOptions::new().write(true).create_new(true),
+        )?;
        // make room for the header block
        file.seek(SeekFrom::Start(PAGE_SZ as u64))?;
        let blob_writer = WriteBlobWriter::new(file, PAGE_SZ as u64);
--- a/pageserver/src/lib.rs
+++ b/pageserver/src/lib.rs
@@ -13,7 +13,7 @@ pub mod repository;
 pub mod storage_sync;
 pub mod tenant_config;
 pub mod tenant_mgr;
-pub mod tenant_threads;
+pub mod tenant_tasks;
 pub mod thread_mgr;
 pub mod timelines;
 pub mod virtual_file;
@@ -24,7 +24,6 @@ pub mod walredo;

 use lazy_static::lazy_static;
 use tracing::info;
-use utils::postgres_backend;

 use crate::thread_mgr::ThreadKind;
 use metrics::{register_int_gauge_vec, IntGaugeVec};
@@ -73,7 +72,6 @@ pub fn shutdown_pageserver(exit_code: i32) {
    thread_mgr::shutdown_threads(Some(ThreadKind::LibpqEndpointListener), None, None);

    // Shut down any page service threads.
-    postgres_backend::set_pgbackend_shutdown_requested();
    thread_mgr::shutdown_threads(Some(ThreadKind::PageRequestHandler), None, None);

    // Shut down all the tenants. This flushes everything to disk and kills
--- a/pageserver/src/page_cache.rs
+++ b/pageserver/src/page_cache.rs
@@ -20,7 +20,7 @@
 //! assign a buffer for a page, you must hold the mapping lock and the lock on
 //! the slot at the same time.
 //!
-//! Whenever you need to hold both locks simultenously, the slot lock must be
+//! Whenever you need to hold both locks simultaneously, the slot lock must be
 //! acquired first. This consistent ordering avoids deadlocks. To look up a page
 //! in the cache, you would first look up the mapping, while holding the mapping
 //! lock, and then lock the slot. You must release the mapping lock in between,
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -13,7 +13,7 @@ use anyhow::{bail, ensure, Context, Result};
 use bytes::{Buf, BufMut, Bytes, BytesMut};
 use lazy_static::lazy_static;
 use regex::Regex;
-use std::io;
+use std::io::{self, Read};
 use std::net::TcpListener;
 use std::str;
 use std::str::FromStr;
@@ -29,6 +29,8 @@ use utils::{

 use crate::basebackup;
 use crate::config::{PageServerConf, ProfilingConfig};
+use crate::import_datadir::{import_basebackup_from_tar, import_wal_from_tar};
+use crate::layered_repository::LayeredRepository;
 use crate::pgdatadir_mapping::{DatadirTimeline, LsnForTimestamp};
 use crate::profiling::profpoint_start;
 use crate::reltag::RelTag;
@@ -200,6 +202,96 @@ impl PagestreamBeMessage {
    }
 }

+/// Implements Read for the server side of CopyIn
+struct CopyInReader<'a> {
+    pgb: &'a mut PostgresBackend,
+
+    /// Overflow buffer for bytes sent in CopyData messages
+    /// that the reader (caller of read) hasn't asked for yet.
+    /// TODO use BytesMut?
+    buf: Vec<u8>,
+
+    /// Bytes before `buf_begin` are considered as dropped.
+    /// This allows us to implement O(1) pop_front on Vec<u8>.
+    /// The Vec won't grow large because we only add to it
+    /// when it's empty.
+    buf_begin: usize,
+}
+
+impl<'a> CopyInReader<'a> {
+    // NOTE: pgb should be in copy in state already
+    fn new(pgb: &'a mut PostgresBackend) -> Self {
+        Self {
+            pgb,
+            buf: Vec::<_>::new(),
+            buf_begin: 0,
+        }
+    }
+}
+
+impl<'a> Drop for CopyInReader<'a> {
+    fn drop(&mut self) {
+        // Finalize copy protocol so that self.pgb can be reused
+        // TODO instead, maybe take ownership of pgb and give it back at the end
+        let mut buf: Vec<u8> = vec![];
+        let _ = self.read_to_end(&mut buf);
+    }
+}
+
+impl<'a> Read for CopyInReader<'a> {
+    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
+        while !thread_mgr::is_shutdown_requested() {
+            // Return from buffer if nonempty
+            if self.buf_begin < self.buf.len() {
+                let bytes_to_read = std::cmp::min(buf.len(), self.buf.len() - self.buf_begin);
+                buf[..bytes_to_read].copy_from_slice(&self.buf[self.buf_begin..][..bytes_to_read]);
+                self.buf_begin += bytes_to_read;
+                return Ok(bytes_to_read);
+            }
+
+            // Delete garbage
+            self.buf.clear();
+            self.buf_begin = 0;
+
+            // Wait for client to send CopyData bytes
+            match self.pgb.read_message() {
+                Ok(Some(message)) => {
+                    let copy_data_bytes = match message {
+                        FeMessage::CopyData(bytes) => bytes,
+                        FeMessage::CopyDone => return Ok(0),
+                        FeMessage::Sync => continue,
+                        m => {
+                            let msg = format!("unexpected message {:?}", m);
+                            self.pgb.write_message(&BeMessage::ErrorResponse(&msg))?;
+                            return Err(io::Error::new(io::ErrorKind::Other, msg));
+                        }
+                    };
+
+                    // Return as much as we can, saving the rest in self.buf
+                    let mut reader = copy_data_bytes.reader();
+                    let bytes_read = reader.read(buf)?;
+                    reader.read_to_end(&mut self.buf)?;
+                    return Ok(bytes_read);
+                }
+                Ok(None) => {
+                    let msg = "client closed connection";
+                    self.pgb.write_message(&BeMessage::ErrorResponse(msg))?;
+                    return Err(io::Error::new(io::ErrorKind::Other, msg));
+                }
+                Err(e) => {
+                    if !is_socket_read_timed_out(&e) {
+                        return Err(io::Error::new(io::ErrorKind::Other, e));
+                    }
+                }
+            }
+        }
+
+        // Shutting down
+        let msg = "Importer thread was shut down";
+        Err(io::Error::new(io::ErrorKind::Other, msg))
+    }
+}
+
 ///////////////////////////////////////////////////////////////////////////////

 ///
@@ -370,6 +462,10 @@ impl PageServerHandler {
    ) -> anyhow::Result<()> {
        let _enter = info_span!("pagestream", timeline = %timelineid, tenant = %tenantid).entered();

+        // NOTE: pagerequests handler exits when connection is closed,
+        //       so there is no need to reset the association
+        thread_mgr::associate_with(Some(tenantid), Some(timelineid));
+
        // Check that the timeline exists
        let timeline = tenant_mgr::get_local_timeline_with_load(tenantid, timelineid)
            .context("Cannot load local timeline")?;
@@ -443,6 +539,98 @@ impl PageServerHandler {
        Ok(())
    }

+    fn handle_import_basebackup(
+        &self,
+        pgb: &mut PostgresBackend,
+        tenant_id: ZTenantId,
+        timeline_id: ZTimelineId,
+        base_lsn: Lsn,
+        _end_lsn: Lsn,
+    ) -> anyhow::Result<()> {
+        thread_mgr::associate_with(Some(tenant_id), Some(timeline_id));
+        let _enter =
+            info_span!("import basebackup", timeline = %timeline_id, tenant = %tenant_id).entered();
+
+        // Create empty timeline
+        info!("creating new timeline");
+        let repo = tenant_mgr::get_repository_for_tenant(tenant_id)?;
+        let timeline = repo.create_empty_timeline(timeline_id, base_lsn)?;
+        let repartition_distance = repo.get_checkpoint_distance();
+        let mut datadir_timeline =
+            DatadirTimeline::<LayeredRepository>::new(timeline, repartition_distance);
+
+        // TODO mark timeline as not ready until it reaches end_lsn.
+        // We might have some wal to import as well, and we should prevent compute
+        // from connecting before that and writing conflicting wal.
+        //
+        // This is not relevant for pageserver->pageserver migrations, since there's
+        // no wal to import. But should be fixed if we want to import from postgres.
+
+        // TODO leave clean state on error. For now you can use detach to clean
+        // up broken state from a failed import.
+
+        // Import basebackup provided via CopyData
+        info!("importing basebackup");
+        pgb.write_message(&BeMessage::CopyInResponse)?;
+        let reader = CopyInReader::new(pgb);
+        import_basebackup_from_tar(&mut datadir_timeline, reader, base_lsn)?;
+
+        // TODO check checksum
+        // Meanwhile you can verify client-side by taking fullbackup
+        // and checking that it matches in size with what was imported.
+        // It wouldn't work if base came from vanilla postgres though,
+        // since we discard some log files.
+
+        // Flush data to disk, then upload to s3
+        info!("flushing layers");
+        datadir_timeline.tline.checkpoint(CheckpointConfig::Flush)?;
+
+        info!("done");
+        Ok(())
+    }
+
+    fn handle_import_wal(
+        &self,
+        pgb: &mut PostgresBackend,
+        tenant_id: ZTenantId,
+        timeline_id: ZTimelineId,
+        start_lsn: Lsn,
+        end_lsn: Lsn,
+    ) -> anyhow::Result<()> {
+        thread_mgr::associate_with(Some(tenant_id), Some(timeline_id));
+        let _enter =
+            info_span!("import wal", timeline = %timeline_id, tenant = %tenant_id).entered();
+
+        let repo = tenant_mgr::get_repository_for_tenant(tenant_id)?;
+        let timeline = repo.get_timeline_load(timeline_id)?;
+        ensure!(timeline.get_last_record_lsn() == start_lsn);
+
+        let repartition_distance = repo.get_checkpoint_distance();
+        let mut datadir_timeline =
+            DatadirTimeline::<LayeredRepository>::new(timeline, repartition_distance);
+
+        // TODO leave clean state on error. For now you can use detach to clean
+        // up broken state from a failed import.
+
+        // Import wal provided via CopyData
+        info!("importing wal");
+        pgb.write_message(&BeMessage::CopyInResponse)?;
+        let reader = CopyInReader::new(pgb);
+        import_wal_from_tar(&mut datadir_timeline, reader, start_lsn, end_lsn)?;
+
+        // TODO Does it make sense to overshoot?
+        ensure!(datadir_timeline.tline.get_last_record_lsn() >= end_lsn);
+
+        // Flush data to disk, then upload to s3. No need for a forced checkpoint.
+        // We only want to persist the data, and it doesn't matter if it's in the
+        // shape of deltas or images.
+        info!("flushing layers");
+        datadir_timeline.tline.checkpoint(CheckpointConfig::Flush)?;
+
+        info!("done");
+        Ok(())
+    }
+
    /// Helper function to handle the LSN from client request.
    ///
    /// Each GetPage (and Exists and Nblocks) request includes information about
@@ -545,17 +733,10 @@ impl PageServerHandler {
        let latest_gc_cutoff_lsn = timeline.tline.get_latest_gc_cutoff_lsn();
        let lsn = Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest, &latest_gc_cutoff_lsn)?;

-        let all_rels = timeline.list_rels(pg_constants::DEFAULTTABLESPACE_OID, req.dbnode, lsn)?;
-        let mut total_blocks: i64 = 0;
+        let total_blocks =
+            timeline.get_db_size(pg_constants::DEFAULTTABLESPACE_OID, req.dbnode, lsn)?;

-        for rel in all_rels {
-            if rel.forknum == 0 {
-                let n_blocks = timeline.get_rel_size(rel, lsn).unwrap_or(0);
-                total_blocks += n_blocks as i64;
-            }
-        }
-
-        let db_size = total_blocks * pg_constants::BLCKSZ as i64;
+        let db_size = total_blocks as i64 * pg_constants::BLCKSZ as i64;

        Ok(PagestreamBeMessage::DbSize(PagestreamDbSizeResponse {
            db_size,
@@ -592,6 +773,7 @@ impl PageServerHandler {
        timelineid: ZTimelineId,
        lsn: Option<Lsn>,
        tenantid: ZTenantId,
+        full_backup: bool,
    ) -> anyhow::Result<()> {
        let span = info_span!("basebackup", timeline = %timelineid, tenant = %tenantid, lsn = field::Empty);
        let _enter = span.enter();
@@ -614,7 +796,7 @@ impl PageServerHandler {
        {
            let mut writer = CopyDataSink { pgb };

-            let basebackup = basebackup::Basebackup::new(&mut writer, &timeline, lsn)?;
+            let basebackup = basebackup::Basebackup::new(&mut writer, &timeline, lsn, full_backup)?;
            span.record("lsn", &basebackup.lsn.to_string().as_str());
            basebackup.send_tarball()?;
        }
@@ -672,6 +854,10 @@ impl postgres_backend::Handler for PageServerHandler {
        Ok(())
    }

+    fn is_shutdown_requested(&self) -> bool {
+        thread_mgr::is_shutdown_requested()
+    }
+
    fn process_query(
        &mut self,
        pgb: &mut PostgresBackend,
@@ -713,8 +899,79 @@ impl postgres_backend::Handler for PageServerHandler {
            };

            // Check that the timeline exists
-            self.handle_basebackup_request(pgb, timelineid, lsn, tenantid)?;
+            self.handle_basebackup_request(pgb, timelineid, lsn, tenantid, false)?;
            pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
+        }
+        // same as basebackup, but result includes relational data as well
+        else if query_string.starts_with("fullbackup ") {
+            let (_, params_raw) = query_string.split_at("fullbackup ".len());
+            let params = params_raw.split_whitespace().collect::<Vec<_>>();
+
+            ensure!(
+                params.len() == 3,
+                "invalid param number for fullbackup command"
+            );
+
+            let tenantid = ZTenantId::from_str(params[0])?;
+            let timelineid = ZTimelineId::from_str(params[1])?;
+
+            self.check_permission(Some(tenantid))?;
+
+            // Lsn is required for fullbackup, because otherwise we would not know
+            // at which lsn to upload this backup.
+            //
+            // The caller is responsible for providing a valid lsn
+            // and using it in the subsequent import.
+            let lsn = Some(Lsn::from_str(params[2])?);
+
+            // Check that the timeline exists
+            self.handle_basebackup_request(pgb, timelineid, lsn, tenantid, true)?;
+            pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
+        } else if query_string.starts_with("import basebackup ") {
+            // Import the `base` section (everything but the wal) of a basebackup.
+            // Assumes the tenant already exists on this pageserver.
+            //
+            // Files are scheduled to be persisted to remote storage, and the
+            // caller should poll the http api to check when that is done.
+            //
+            // Example import command:
+            // 1. Get start/end LSN from backup_manifest file
+            // 2. Run:
+            // cat my_backup/base.tar | psql -h $PAGESERVER \
+            //     -c "import basebackup $TENANT $TIMELINE $START_LSN $END_LSN"
+            let (_, params_raw) = query_string.split_at("import basebackup ".len());
+            let params = params_raw.split_whitespace().collect::<Vec<_>>();
+            ensure!(params.len() == 4);
+            let tenant = ZTenantId::from_str(params[0])?;
+            let timeline = ZTimelineId::from_str(params[1])?;
+            let base_lsn = Lsn::from_str(params[2])?;
+            let end_lsn = Lsn::from_str(params[3])?;
+
+            self.check_permission(Some(tenant))?;
+
+            match self.handle_import_basebackup(pgb, tenant, timeline, base_lsn, end_lsn) {
+                Ok(()) => pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?,
+                Err(e) => pgb.write_message_noflush(&BeMessage::ErrorResponse(&e.to_string()))?,
+            };
+        } else if query_string.starts_with("import wal ") {
+            // Import the `pg_wal` section of a basebackup.
+            //
+            // Files are scheduled to be persisted to remote storage, and the
+            // caller should poll the http api to check when that is done.
+            let (_, params_raw) = query_string.split_at("import wal ".len());
+            let params = params_raw.split_whitespace().collect::<Vec<_>>();
+            ensure!(params.len() == 4);
+            let tenant = ZTenantId::from_str(params[0])?;
+            let timeline = ZTimelineId::from_str(params[1])?;
+            let start_lsn = Lsn::from_str(params[2])?;
+            let end_lsn = Lsn::from_str(params[3])?;
+
+            self.check_permission(Some(tenant))?;
+
+            match self.handle_import_wal(pgb, tenant, timeline, start_lsn, end_lsn) {
+                Ok(()) => pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?,
+                Err(e) => pgb.write_message_noflush(&BeMessage::ErrorResponse(&e.to_string()))?,
+            };
        } else if query_string.to_ascii_lowercase().starts_with("set ") {
            // important because psycopg2 executes "SET datestyle TO 'ISO'"
            // on connect
@@ -802,7 +1059,6 @@ impl postgres_backend::Handler for PageServerHandler {
                .map(|h| h.as_str().parse())
                .unwrap_or_else(|| Ok(repo.get_gc_horizon()))?;

-            let repo = tenant_mgr::get_repository_for_tenant(tenantid)?;
            // Use tenant's pitr setting
            let pitr = repo.get_pitr_interval();
            let result = repo.gc_iteration(Some(timelineid), gc_horizon, pitr, true)?;
@@ -895,6 +1151,7 @@ impl postgres_backend::Handler for PageServerHandler {
                LsnForTimestamp::Present(lsn) => format!("{}", lsn),
                LsnForTimestamp::Future(_lsn) => "future".into(),
                LsnForTimestamp::Past(_lsn) => "past".into(),
+                LsnForTimestamp::NoData(_lsn) => "nodata".into(),
            };
            pgb.write_message_noflush(&BeMessage::DataRow(&[Some(result.as_bytes())]))?;
            pgb.write_message(&BeMessage::CommandComplete(b"SELECT 1"))?;
--- a/pageserver/src/pgdatadir_mapping.rs
+++ b/pageserver/src/pgdatadir_mapping.rs
@@ -51,6 +51,7 @@ pub enum LsnForTimestamp {
    Present(Lsn),
    Future(Lsn),
    Past(Lsn),
+    NoData(Lsn),
 }

 impl<R: Repository> DatadirTimeline<R> {
@@ -123,6 +124,19 @@ impl<R: Repository> DatadirTimeline<R> {
        self.tline.get(key, lsn)
    }

+    // Get size of a database in blocks
+    pub fn get_db_size(&self, spcnode: Oid, dbnode: Oid, lsn: Lsn) -> Result<usize> {
+        let mut total_blocks = 0;
+
+        let rels = self.list_rels(spcnode, dbnode, lsn)?;
+
+        for rel in rels {
+            let n_blocks = self.get_rel_size(rel, lsn)?;
+            total_blocks += n_blocks as usize;
+        }
+        Ok(total_blocks)
+    }
+
    /// Get size of a relation file
    pub fn get_rel_size(&self, tag: RelTag, lsn: Lsn) -> Result<BlockNumber> {
        ensure!(tag.relnode != 0, "invalid relnode");
@@ -250,7 +264,7 @@ impl<R: Repository> DatadirTimeline<R> {
            (false, false) => {
                // This can happen if no commit records have been processed yet, e.g.
                // just after importing a cluster.
-                bail!("no commit timestamps found");
+                Ok(LsnForTimestamp::NoData(max_lsn))
            }
            (true, false) => {
                // Didn't find any commit timestamps larger than the request
@@ -667,6 +681,10 @@ impl<'a, R: Repository> DatadirModification<'a, R> {
    }

    pub fn drop_dbdir(&mut self, spcnode: Oid, dbnode: Oid) -> Result<()> {
+        let req_lsn = self.tline.get_last_record_lsn();
+
+        let total_blocks = self.tline.get_db_size(spcnode, dbnode, req_lsn)?;
+
        // Remove entry from dbdir
        let buf = self.get(DBDIR_KEY)?;
        let mut dir = DbDirectory::des(&buf)?;
@@ -680,7 +698,8 @@ impl<'a, R: Repository> DatadirModification<'a, R> {
            );
        }

-        // FIXME: update pending_nblocks
+        // Update logical database size.
+        self.pending_nblocks -= total_blocks as isize;

        // Delete all relations and metadata files for the spcnode/dnode
        self.delete(dbdir_key_range(spcnode, dbnode));
@@ -749,6 +768,7 @@ impl<'a, R: Repository> DatadirModification<'a, R> {
    }

    /// Extend relation
+    /// If new size is smaller, do nothing.
    pub fn put_rel_extend(&mut self, rel: RelTag, nblocks: BlockNumber) -> Result<()> {
        ensure!(rel.relnode != 0, "invalid relnode");

@@ -756,10 +776,13 @@ impl<'a, R: Repository> DatadirModification<'a, R> {
        let size_key = rel_size_to_key(rel);
        let old_size = self.get(size_key)?.get_u32_le();

-        let buf = nblocks.to_le_bytes();
-        self.put(size_key, Value::Image(Bytes::from(buf.to_vec())));
+        // only extend relation here. never decrease the size
+        if nblocks > old_size {
+            let buf = nblocks.to_le_bytes();
+            self.put(size_key, Value::Image(Bytes::from(buf.to_vec())));

-        self.pending_nblocks += nblocks as isize - old_size as isize;
+            self.pending_nblocks += nblocks as isize - old_size as isize;
+        }
        Ok(())
    }

--- a/pageserver/src/profiling.rs
+++ b/pageserver/src/profiling.rs
@@ -81,6 +81,12 @@ mod profiling_impl {

    pub struct DummyProfilerGuard;

+    impl Drop for DummyProfilerGuard {
+        fn drop(&mut self) {
+            // do nothing, this exists to calm Clippy down
+        }
+    }
+
    pub fn profpoint_start(
        _conf: &PageServerConf,
        _point: ProfilingConfig,
--- a/pageserver/src/reltag.rs
+++ b/pageserver/src/reltag.rs
@@ -3,7 +3,7 @@ use std::cmp::Ordering;
 use std::fmt;

 use postgres_ffi::relfile_utils::forknumber_to_name;
-use postgres_ffi::Oid;
+use postgres_ffi::{pg_constants, Oid};

 ///
 /// Relation data file segment id throughout the Postgres cluster.
@@ -75,6 +75,30 @@ impl fmt::Display for RelTag {
    }
 }

+impl RelTag {
+    pub fn to_segfile_name(&self, segno: u32) -> String {
+        let mut name = if self.spcnode == pg_constants::GLOBALTABLESPACE_OID {
+            "global/".to_string()
+        } else {
+            format!("base/{}/", self.dbnode)
+        };
+
+        name += &self.relnode.to_string();
+
+        if let Some(fork_name) = forknumber_to_name(self.forknum) {
+            name += "_";
+            name += fork_name;
+        }
+
+        if segno != 0 {
+            name += ".";
+            name += &segno.to_string();
+        }
+
+        name
+    }
+}
+
 ///
 /// Non-relation transaction status files (clog (a.k.a. pg_xact) and
 /// pg_multixact) in Postgres are handled by SLRU (Simple LRU) buffer,
--- a/pageserver/src/remote_storage/storage_sync/delete.rs
+++ b/pageserver/src/remote_storage/storage_sync/delete.rs
@@ -1,223 +0,0 @@
-//! Timeline synchronization logic to delete a bulk of timeline's remote files from the remote storage.
-
-use anyhow::Context;
-use futures::stream::{FuturesUnordered, StreamExt};
-use tracing::{debug, error, info};
-use utils::zid::ZTenantTimelineId;
-
-use crate::remote_storage::{
-    storage_sync::{SyncQueue, SyncTask},
-    RemoteStorage,
-};
-
-use super::{LayersDeletion, SyncData};
-
-/// Attempts to remove the timleline layers from the remote storage.
-/// If the task had not adjusted the metadata before, the deletion will fail.
-pub(super) async fn delete_timeline_layers<'a, P, S>(
-    storage: &'a S,
-    sync_queue: &SyncQueue,
-    sync_id: ZTenantTimelineId,
-    mut delete_data: SyncData<LayersDeletion>,
-) -> bool
-where
-    P: std::fmt::Debug + Send + Sync + 'static,
-    S: RemoteStorage<RemoteObjectId = P> + Send + Sync + 'static,
-{
-    if !delete_data.data.deletion_registered {
-        error!("Cannot delete timeline layers before the deletion metadata is not registered, reenqueueing");
-        delete_data.retries += 1;
-        sync_queue.push(sync_id, SyncTask::Delete(delete_data));
-        return false;
-    }
-
-    if delete_data.data.layers_to_delete.is_empty() {
-        info!("No layers to delete, skipping");
-        return true;
-    }
-
-    let layers_to_delete = delete_data
-        .data
-        .layers_to_delete
-        .drain()
-        .collect::<Vec<_>>();
-    debug!("Layers to delete: {layers_to_delete:?}");
-    info!("Deleting {} timeline layers", layers_to_delete.len());
-
-    let mut delete_tasks = layers_to_delete
-        .into_iter()
-        .map(|local_layer_path| async {
-            let storage_path = match storage.storage_path(&local_layer_path).with_context(|| {
-                format!(
-                    "Failed to get the layer storage path for local path '{}'",
-                    local_layer_path.display()
-                )
-            }) {
-                Ok(path) => path,
-                Err(e) => return Err((e, local_layer_path)),
-            };
-
-            match storage.delete(&storage_path).await.with_context(|| {
-                format!(
-                    "Failed to delete remote layer from storage at '{:?}'",
-                    storage_path
-                )
-            }) {
-                Ok(()) => Ok(local_layer_path),
-                Err(e) => Err((e, local_layer_path)),
-            }
-        })
-        .collect::<FuturesUnordered<_>>();
-
-    let mut errored = false;
-    while let Some(deletion_result) = delete_tasks.next().await {
-        match deletion_result {
-            Ok(local_layer_path) => {
-                debug!(
-                    "Successfully deleted layer {} for timeline {sync_id}",
-                    local_layer_path.display()
-                );
-                delete_data.data.deleted_layers.insert(local_layer_path);
-            }
-            Err((e, local_layer_path)) => {
-                errored = true;
-                error!(
-                    "Failed to delete layer {} for timeline {sync_id}: {e:?}",
-                    local_layer_path.display()
-                );
-                delete_data.data.layers_to_delete.insert(local_layer_path);
-            }
-        }
-    }
-
-    if errored {
-        debug!("Reenqueuing failed delete task for timeline {sync_id}");
-        delete_data.retries += 1;
-        sync_queue.push(sync_id, SyncTask::Delete(delete_data));
-    }
-    errored
-}
-
-#[cfg(test)]
-mod tests {
-    use std::{collections::HashSet, num::NonZeroUsize};
-
-    use itertools::Itertools;
-    use tempfile::tempdir;
-    use tokio::fs;
-    use utils::lsn::Lsn;
-
-    use crate::{
-        remote_storage::{
-            storage_sync::test_utils::{create_local_timeline, dummy_metadata},
-            LocalFs,
-        },
-        repository::repo_harness::{RepoHarness, TIMELINE_ID},
-    };
-
-    use super::*;
-
-    #[tokio::test]
-    async fn delete_timeline_negative() -> anyhow::Result<()> {
-        let harness = RepoHarness::create("delete_timeline_negative")?;
-        let (sync_queue, _) = SyncQueue::new(NonZeroUsize::new(100).unwrap());
-        let sync_id = ZTenantTimelineId::new(harness.tenant_id, TIMELINE_ID);
-        let storage = LocalFs::new(tempdir()?.path().to_path_buf(), &harness.conf.workdir)?;
-
-        let deleted = delete_timeline_layers(
-            &storage,
-            &sync_queue,
-            sync_id,
-            SyncData {
-                retries: 1,
-                data: LayersDeletion {
-                    deleted_layers: HashSet::new(),
-                    layers_to_delete: HashSet::new(),
-                    deletion_registered: false,
-                },
-            },
-        )
-        .await;
-
-        assert!(
-            !deleted,
-            "Should not start the deletion for task with delete metadata unregistered"
-        );
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn delete_timeline() -> anyhow::Result<()> {
-        let harness = RepoHarness::create("delete_timeline")?;
-        let (sync_queue, _) = SyncQueue::new(NonZeroUsize::new(100).unwrap());
-
-        let sync_id = ZTenantTimelineId::new(harness.tenant_id, TIMELINE_ID);
-        let layer_files = ["a", "b", "c", "d"];
-        let storage = LocalFs::new(tempdir()?.path().to_path_buf(), &harness.conf.workdir)?;
-        let current_retries = 3;
-        let metadata = dummy_metadata(Lsn(0x30));
-        let local_timeline_path = harness.timeline_path(&TIMELINE_ID);
-        let timeline_upload =
-            create_local_timeline(&harness, TIMELINE_ID, &layer_files, metadata.clone()).await?;
-        for local_path in timeline_upload.layers_to_upload {
-            let remote_path = storage.storage_path(&local_path)?;
-            let remote_parent_dir = remote_path.parent().unwrap();
-            if !remote_parent_dir.exists() {
-                fs::create_dir_all(&remote_parent_dir).await?;
-            }
-            fs::copy(&local_path, &remote_path).await?;
-        }
-        assert_eq!(
-            storage
-                .list()
-                .await?
-                .into_iter()
-                .map(|remote_path| storage.local_path(&remote_path).unwrap())
-                .filter_map(|local_path| { Some(local_path.file_name()?.to_str()?.to_owned()) })
-                .sorted()
-                .collect::<Vec<_>>(),
-            layer_files
-                .iter()
-                .map(|layer_str| layer_str.to_string())
-                .sorted()
-                .collect::<Vec<_>>(),
-            "Expect to have all layer files remotely before deletion"
-        );
-
-        let deleted = delete_timeline_layers(
-            &storage,
-            &sync_queue,
-            sync_id,
-            SyncData {
-                retries: current_retries,
-                data: LayersDeletion {
-                    deleted_layers: HashSet::new(),
-                    layers_to_delete: HashSet::from([
-                        local_timeline_path.join("a"),
-                        local_timeline_path.join("c"),
-                        local_timeline_path.join("something_different"),
-                    ]),
-                    deletion_registered: true,
-                },
-            },
-        )
-        .await;
-        assert!(deleted, "Should be able to delete timeline files");
-
-        assert_eq!(
-            storage
-                .list()
-                .await?
-                .into_iter()
-                .map(|remote_path| storage.local_path(&remote_path).unwrap())
-                .filter_map(|local_path| { Some(local_path.file_name()?.to_str()?.to_owned()) })
-                .sorted()
-                .collect::<Vec<_>>(),
-            vec!["b".to_string(), "d".to_string()],
-            "Expect to have only non-deleted files remotely"
-        );
-
-        Ok(())
-    }
-}
--- a/pageserver/src/repository.rs
+++ b/pageserver/src/repository.rs
@@ -195,8 +195,9 @@ impl Display for TimelineSyncStatusUpdate {
        f.write_str(s)
    }
 }
+
 ///
-/// A repository corresponds to one .zenith directory. One repository holds multiple
+/// A repository corresponds to one .neon directory. One repository holds multiple
 /// timelines, forked off from the same initial call to 'initdb'.
 pub trait Repository: Send + Sync {
    type Timeline: Timeline;
@@ -242,7 +243,7 @@ pub trait Repository: Send + Sync {
    ///
    /// 'timelineid' specifies the timeline to GC, or None for all.
    /// `horizon` specifies delta from last lsn to preserve all object versions (pitr interval).
-    /// `checkpoint_before_gc` parameter is used to force compaction of storage before CG
+    /// `checkpoint_before_gc` parameter is used to force compaction of storage before GC
    /// to make tests more deterministic.
    /// TODO Do we still need it or we can call checkpoint explicitly in tests where needed?
    fn gc_iteration(
--- a/pageserver/src/storage_sync.rs
+++ b/pageserver/src/storage_sync.rs
@@ -186,8 +186,8 @@ use crate::{
 };

 use metrics::{
-    register_histogram_vec, register_int_counter, register_int_gauge, HistogramVec, IntCounter,
-    IntGauge,
+    register_histogram_vec, register_int_counter, register_int_counter_vec, register_int_gauge,
+    HistogramVec, IntCounter, IntCounterVec, IntGauge,
 };
 use utils::zid::{ZTenantId, ZTenantTimelineId, ZTimelineId};

@@ -208,14 +208,17 @@ lazy_static! {
    static ref IMAGE_SYNC_TIME: HistogramVec = register_histogram_vec!(
        "pageserver_remote_storage_image_sync_seconds",
        "Time took to synchronize (download or upload) a whole pageserver image. \
-        Grouped by `operation_kind` (upload|download) and `status` (success|failure)",
-        &["operation_kind", "status"],
-        vec![
-            0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 4.0, 5.0, 6.0, 7.0,
-            8.0, 9.0, 10.0, 12.5, 15.0, 17.5, 20.0
-        ]
+        Grouped by tenant and timeline ids, `operation_kind` (upload|download) and `status` (success|failure)",
+        &["tenant_id", "timeline_id", "operation_kind", "status"],
+        vec![0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 3.0, 10.0, 20.0]
    )
    .expect("failed to register pageserver image sync time histogram vec");
+    static ref REMOTE_INDEX_UPLOAD: IntCounterVec = register_int_counter_vec!(
+        "pageserver_remote_storage_remote_index_uploads_total",
+        "Number of remote index uploads",
+        &["tenant_id", "timeline_id"],
+    )
+    .expect("failed to register pageserver remote index upload vec");
 }

 static SYNC_QUEUE: OnceCell<SyncQueue> = OnceCell::new();
@@ -1146,19 +1149,19 @@ where
    .await
    {
        DownloadedTimeline::Abort => {
-            register_sync_status(sync_start, task_name, None);
+            register_sync_status(sync_id, sync_start, task_name, None);
            if let Err(e) = index.write().await.set_awaits_download(&sync_id, false) {
                error!("Timeline {sync_id} was expected to be in the remote index after a download attempt, but it's absent: {e:?}");
            }
        }
        DownloadedTimeline::FailedAndRescheduled => {
-            register_sync_status(sync_start, task_name, Some(false));
+            register_sync_status(sync_id, sync_start, task_name, Some(false));
        }
        DownloadedTimeline::Successful(mut download_data) => {
            match update_local_metadata(conf, sync_id, current_remote_timeline).await {
                Ok(()) => match index.write().await.set_awaits_download(&sync_id, false) {
                    Ok(()) => {
-                        register_sync_status(sync_start, task_name, Some(true));
+                        register_sync_status(sync_id, sync_start, task_name, Some(true));
                        return Some(TimelineSyncStatusUpdate::Downloaded);
                    }
                    Err(e) => {
@@ -1169,7 +1172,7 @@ where
                    error!("Failed to update local timeline metadata: {e:?}");
                    download_data.retries += 1;
                    sync_queue.push(sync_id, SyncTask::Download(download_data));
-                    register_sync_status(sync_start, task_name, Some(false));
+                    register_sync_status(sync_id, sync_start, task_name, Some(false));
                }
            }
        }
@@ -1265,14 +1268,14 @@ async fn delete_timeline_data<P, S>(
            error!("Failed to update remote timeline {sync_id}: {e:?}");
            new_delete_data.retries += 1;
            sync_queue.push(sync_id, SyncTask::Delete(new_delete_data));
-            register_sync_status(sync_start, task_name, Some(false));
+            register_sync_status(sync_id, sync_start, task_name, Some(false));
            return;
        }
    }
    timeline_delete.deletion_registered = true;

    let sync_status = delete_timeline_layers(storage, sync_queue, sync_id, new_delete_data).await;
-    register_sync_status(sync_start, task_name, Some(sync_status));
+    register_sync_status(sync_id, sync_start, task_name, Some(sync_status));
 }

 async fn read_metadata_file(metadata_path: &Path) -> anyhow::Result<TimelineMetadata> {
@@ -1306,7 +1309,7 @@ async fn upload_timeline_data<P, S>(
    .await
    {
        UploadedTimeline::FailedAndRescheduled => {
-            register_sync_status(sync_start, task_name, Some(false));
+            register_sync_status(sync_id, sync_start, task_name, Some(false));
            return;
        }
        UploadedTimeline::Successful(upload_data) => upload_data,
@@ -1325,13 +1328,13 @@ async fn upload_timeline_data<P, S>(
    .await
    {
        Ok(()) => {
-            register_sync_status(sync_start, task_name, Some(true));
+            register_sync_status(sync_id, sync_start, task_name, Some(true));
        }
        Err(e) => {
            error!("Failed to update remote timeline {sync_id}: {e:?}");
            uploaded_data.retries += 1;
            sync_queue.push(sync_id, SyncTask::Upload(uploaded_data));
-            register_sync_status(sync_start, task_name, Some(false));
+            register_sync_status(sync_id, sync_start, task_name, Some(false));
        }
    }
 }
@@ -1421,7 +1424,14 @@ where
        IndexPart::from_remote_timeline(&timeline_path, updated_remote_timeline)
            .context("Failed to create an index part from the updated remote timeline")?;

-    info!("Uploading remote index for the timeline");
+    debug!("Uploading remote index for the timeline");
+    REMOTE_INDEX_UPLOAD
+        .with_label_values(&[
+            &sync_id.tenant_id.to_string(),
+            &sync_id.timeline_id.to_string(),
+        ])
+        .inc();
+
    upload_index_part(conf, storage, sync_id, new_index_part)
        .await
        .context("Failed to upload new index part")
@@ -1590,12 +1600,24 @@ fn compare_local_and_remote_timeline(
    (initial_timeline_status, awaits_download)
 }

-fn register_sync_status(sync_start: Instant, sync_name: &str, sync_status: Option<bool>) {
+fn register_sync_status(
+    sync_id: ZTenantTimelineId,
+    sync_start: Instant,
+    sync_name: &str,
+    sync_status: Option<bool>,
+) {
    let secs_elapsed = sync_start.elapsed().as_secs_f64();
-    info!("Processed a sync task in {secs_elapsed:.2} seconds");
+    debug!("Processed a sync task in {secs_elapsed:.2} seconds");
+
+    let tenant_id = sync_id.tenant_id.to_string();
+    let timeline_id = sync_id.timeline_id.to_string();
    match sync_status {
-        Some(true) => IMAGE_SYNC_TIME.with_label_values(&[sync_name, "success"]),
-        Some(false) => IMAGE_SYNC_TIME.with_label_values(&[sync_name, "failure"]),
+        Some(true) => {
+            IMAGE_SYNC_TIME.with_label_values(&[&tenant_id, &timeline_id, sync_name, "success"])
+        }
+        Some(false) => {
+            IMAGE_SYNC_TIME.with_label_values(&[&tenant_id, &timeline_id, sync_name, "failure"])
+        }
        None => return,
    }
    .observe(secs_elapsed)
--- a/pageserver/src/storage_sync/upload.rs
+++ b/pageserver/src/storage_sync/upload.rs
@@ -4,6 +4,7 @@ use std::{fmt::Debug, path::PathBuf};

 use anyhow::Context;
 use futures::stream::{FuturesUnordered, StreamExt};
+use lazy_static::lazy_static;
 use remote_storage::RemoteStorage;
 use tokio::fs;
 use tracing::{debug, error, info, warn};
@@ -17,6 +18,16 @@ use super::{
 use crate::{
    config::PageServerConf, layered_repository::metadata::metadata_path, storage_sync::SyncTask,
 };
+use metrics::{register_int_counter_vec, IntCounterVec};
+
+lazy_static! {
+    static ref NO_LAYERS_UPLOAD: IntCounterVec = register_int_counter_vec!(
+        "pageserver_remote_storage_no_layers_uploads_total",
+        "Number of skipped uploads due to no layers",
+        &["tenant_id", "timeline_id"],
+    )
+    .expect("failed to register pageserver no layers upload vec");
+}

 /// Serializes and uploads the given index part data to the remote storage.
 pub(super) async fn upload_index_part<P, S>(
@@ -102,7 +113,13 @@ where
        .collect::<Vec<_>>();

    if layers_to_upload.is_empty() {
-        info!("No layers to upload after filtering, aborting");
+        debug!("No layers to upload after filtering, aborting");
+        NO_LAYERS_UPLOAD
+            .with_label_values(&[
+                &sync_id.tenant_id.to_string(),
+                &sync_id.timeline_id.to_string(),
+            ])
+            .inc();
        return UploadedTimeline::Successful(upload_data);
    }

--- a/pageserver/src/tenant_config.rs
+++ b/pageserver/src/tenant_config.rs
@@ -37,7 +37,7 @@ pub mod defaults {
    pub const DEFAULT_PITR_INTERVAL: &str = "30 days";
    pub const DEFAULT_WALRECEIVER_CONNECT_TIMEOUT: &str = "2 seconds";
    pub const DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT: &str = "10 seconds";
-    pub const DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG: u64 = 10_000;
+    pub const DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG: u64 = 10 * 1024 * 1024;
 }

 /// Per-tenant configuration options
--- a/pageserver/src/tenant_mgr.rs
+++ b/pageserver/src/tenant_mgr.rs
@@ -230,8 +230,6 @@ pub fn shutdown_all_tenants() {
    drop(m);

    thread_mgr::shutdown_threads(Some(ThreadKind::WalReceiverManager), None, None);
-    thread_mgr::shutdown_threads(Some(ThreadKind::GarbageCollector), None, None);
-    thread_mgr::shutdown_threads(Some(ThreadKind::Compactor), None, None);

    // Ok, no background threads running anymore. Flush any remaining data in
    // memory to disk.
@@ -330,44 +328,12 @@ pub fn set_tenant_state(tenant_id: ZTenantId, new_state: TenantState) -> anyhow:
        }
        (TenantState::Idle, TenantState::Active) => {
            info!("activating tenant {tenant_id}");
-            let compactor_spawn_result = thread_mgr::spawn(
-                ThreadKind::Compactor,
-                Some(tenant_id),
-                None,
-                "Compactor thread",
-                false,
-                move || crate::tenant_threads::compact_loop(tenant_id),
-            );
-            if compactor_spawn_result.is_err() {
-                let mut m = tenants_state::write_tenants();
-                m.get_mut(&tenant_id)
-                    .with_context(|| format!("Tenant not found for id {tenant_id}"))?
-                    .state = old_state;
-                drop(m);
-            }
-            compactor_spawn_result?;

-            let gc_spawn_result = thread_mgr::spawn(
-                ThreadKind::GarbageCollector,
-                Some(tenant_id),
-                None,
-                "GC thread",
-                false,
-                move || crate::tenant_threads::gc_loop(tenant_id),
-            )
-            .map(|_thread_id| ()) // update the `Result::Ok` type to match the outer function's return signature
-            .with_context(|| format!("Failed to launch GC thread for tenant {tenant_id}"));
-
-            if let Err(e) = &gc_spawn_result {
-                let mut m = tenants_state::write_tenants();
-                m.get_mut(&tenant_id)
-                    .with_context(|| format!("Tenant not found for id {tenant_id}"))?
-                    .state = old_state;
-                drop(m);
-                error!("Failed to start GC thread for tenant {tenant_id}, stopping its checkpointer thread: {e:?}");
-                thread_mgr::shutdown_threads(Some(ThreadKind::Compactor), Some(tenant_id), None);
-                return gc_spawn_result;
-            }
+            // Spawn gc and compaction loops. The loops will shut themselves
+            // down when they notice that the tenant is inactive.
+            // TODO maybe use tokio::sync::watch instead?
+            crate::tenant_tasks::start_compaction_loop(tenant_id)?;
+            crate::tenant_tasks::start_gc_loop(tenant_id)?;
        }
        (TenantState::Idle, TenantState::Stopping) => {
            info!("stopping idle tenant {tenant_id}");
@@ -379,8 +345,10 @@ pub fn set_tenant_state(tenant_id: ZTenantId, new_state: TenantState) -> anyhow:
                Some(tenant_id),
                None,
            );
-            thread_mgr::shutdown_threads(Some(ThreadKind::GarbageCollector), Some(tenant_id), None);
-            thread_mgr::shutdown_threads(Some(ThreadKind::Compactor), Some(tenant_id), None);
+
+            // Wait until all gc/compaction tasks finish
+            let repo = get_repository_for_tenant(tenant_id)?;
+            let _guard = repo.file_lock.write().unwrap();
        }
    }

--- a/pageserver/src/tenant_tasks.rs
+++ b/pageserver/src/tenant_tasks.rs
@@ -0,0 +1,288 @@
+//! This module contains functions to serve per-tenant background processes,
+//! such as compaction and GC
+
+use std::collections::HashMap;
+use std::ops::ControlFlow;
+use std::time::Duration;
+
+use crate::repository::Repository;
+use crate::tenant_mgr::TenantState;
+use crate::thread_mgr::ThreadKind;
+use crate::{tenant_mgr, thread_mgr};
+use anyhow::{self, Context};
+use futures::stream::FuturesUnordered;
+use futures::StreamExt;
+use metrics::{register_int_counter_vec, IntCounterVec};
+use once_cell::sync::{Lazy, OnceCell};
+use tokio::sync::mpsc;
+use tokio::sync::watch;
+use tracing::*;
+use utils::zid::ZTenantId;
+
+static TENANT_TASK_EVENTS: Lazy<IntCounterVec> = Lazy::new(|| {
+    register_int_counter_vec!(
+        "pageserver_tenant_task_events",
+        "Number of task start/stop/fail events.",
+        &["event"],
+    )
+    .expect("Failed to register tenant_task_events metric")
+});
+
+///
+/// Compaction task's main loop
+///
+async fn compaction_loop(tenantid: ZTenantId, mut cancel: watch::Receiver<()>) {
+    loop {
+        trace!("waking up");
+
+        // Run blocking part of the task
+        let period: Result<Result<_, anyhow::Error>, _> = tokio::task::spawn_blocking(move || {
+            // Break if tenant is not active
+            if tenant_mgr::get_tenant_state(tenantid) != Some(TenantState::Active) {
+                return Ok(ControlFlow::Break(()));
+            }
+
+            // Break if we're not allowed to write to disk
+            let repo = tenant_mgr::get_repository_for_tenant(tenantid)?;
+            // TODO do this inside repo.compaction_iteration instead.
+            let _guard = match repo.file_lock.try_read() {
+                Ok(g) => g,
+                Err(_) => return Ok(ControlFlow::Break(())),
+            };
+
+            // Run compaction
+            let compaction_period = repo.get_compaction_period();
+            repo.compaction_iteration()?;
+            Ok(ControlFlow::Continue(compaction_period))
+        })
+        .await;
+
+        // Decide whether to sleep or break
+        let sleep_duration = match period {
+            Ok(Ok(ControlFlow::Continue(period))) => period,
+            Ok(Ok(ControlFlow::Break(()))) => break,
+            Ok(Err(e)) => {
+                error!("Compaction failed, retrying: {}", e);
+                Duration::from_secs(2)
+            }
+            Err(e) => {
+                error!("Compaction join error, retrying: {}", e);
+                Duration::from_secs(2)
+            }
+        };
+
+        // Sleep
+        tokio::select! {
+            _ = cancel.changed() => {
+                trace!("received cancellation request");
+                break;
+            },
+            _ = tokio::time::sleep(sleep_duration) => {},
+        }
+    }
+
+    trace!(
+        "compaction loop stopped. State is {:?}",
+        tenant_mgr::get_tenant_state(tenantid)
+    );
+}
+
+static START_GC_LOOP: OnceCell<mpsc::Sender<ZTenantId>> = OnceCell::new();
+static START_COMPACTION_LOOP: OnceCell<mpsc::Sender<ZTenantId>> = OnceCell::new();
+
+/// Spawn a task that will periodically schedule garbage collection until
+/// the tenant becomes inactive. This should be called on tenant
+/// activation.
+pub fn start_gc_loop(tenantid: ZTenantId) -> anyhow::Result<()> {
+    START_GC_LOOP
+        .get()
+        .context("Failed to get START_GC_LOOP")?
+        .blocking_send(tenantid)
+        .context("Failed to send to START_GC_LOOP channel")?;
+    Ok(())
+}
+
+/// Spawn a task that will periodically schedule compaction until
+/// the tenant becomes inactive. This should be called on tenant
+/// activation.
+pub fn start_compaction_loop(tenantid: ZTenantId) -> anyhow::Result<()> {
+    START_COMPACTION_LOOP
+        .get()
+        .context("failed to get START_COMPACTION_LOOP")?
+        .blocking_send(tenantid)
+        .context("failed to send to START_COMPACTION_LOOP")?;
+    Ok(())
+}
+
+/// Spawn the TenantTaskManager
+/// This needs to be called before start_gc_loop or start_compaction_loop
+pub fn init_tenant_task_pool() -> anyhow::Result<()> {
+    let runtime = tokio::runtime::Builder::new_multi_thread()
+        .thread_name("tenant-task-worker")
+        .worker_threads(40) // Way more than necessary
+        .max_blocking_threads(100) // Way more than necessary
+        .enable_all()
+        .build()?;
+
+    let (gc_send, mut gc_recv) = mpsc::channel::<ZTenantId>(100);
+    START_GC_LOOP
+        .set(gc_send)
+        .expect("Failed to set START_GC_LOOP");
+
+    let (compaction_send, mut compaction_recv) = mpsc::channel::<ZTenantId>(100);
+    START_COMPACTION_LOOP
+        .set(compaction_send)
+        .expect("Failed to set START_COMPACTION_LOOP");
+
+    // TODO this is getting repetitive
+    let mut gc_loops = HashMap::<ZTenantId, watch::Sender<()>>::new();
+    let mut compaction_loops = HashMap::<ZTenantId, watch::Sender<()>>::new();
+
+    thread_mgr::spawn(
+        ThreadKind::TenantTaskManager,
+        None,
+        None,
+        "Tenant task manager main thread",
+        true,
+        move || {
+            runtime.block_on(async move {
+                let mut futures = FuturesUnordered::new();
+                loop {
+                    tokio::select! {
+                        _ = thread_mgr::shutdown_watcher() => {
+                            // Send cancellation to all tasks
+                            for (_, cancel) in gc_loops.drain() {
+                                cancel.send(()).ok();
+                            }
+                            for (_, cancel) in compaction_loops.drain() {
+                                cancel.send(()).ok();
+                            }
+
+                            // Exit after all tasks finish
+                            while let Some(result) = futures.next().await {
+                                match result {
+                                    Ok(()) => {
+                                        TENANT_TASK_EVENTS.with_label_values(&["stop"]).inc();
+                                    },
+                                    Err(e) => {
+                                        TENANT_TASK_EVENTS.with_label_values(&["panic"]).inc();
+                                        error!("loop join error {}", e)
+                                    },
+                                }
+                            }
+                            break;
+                        },
+                        tenantid = gc_recv.recv() => {
+                            let tenantid = tenantid.expect("Gc task channel closed unexpectedly");
+
+                            // Spawn new task, request cancellation of the old one if exists
+                            let (cancel_send, cancel_recv) = watch::channel(());
+                            let handle = tokio::spawn(gc_loop(tenantid, cancel_recv)
+                                .instrument(info_span!("gc loop", tenant = %tenantid)));
+                            if let Some(old_cancel_send) = gc_loops.insert(tenantid, cancel_send) {
+                                old_cancel_send.send(()).ok();
+                            }
+
+                            // Update metrics, remember handle
+                            TENANT_TASK_EVENTS.with_label_values(&["start"]).inc();
+                            futures.push(handle);
+                        },
+                        tenantid = compaction_recv.recv() => {
+                            let tenantid = tenantid.expect("Compaction task channel closed unexpectedly");
+
+                            // Spawn new task, request cancellation of the old one if exists
+                            let (cancel_send, cancel_recv) = watch::channel(());
+                            let handle = tokio::spawn(compaction_loop(tenantid, cancel_recv)
+                                .instrument(info_span!("compaction loop", tenant = %tenantid)));
+                            if let Some(old_cancel_send) = compaction_loops.insert(tenantid, cancel_send) {
+                                old_cancel_send.send(()).ok();
+                            }
+
+                            // Update metrics, remember handle
+                            TENANT_TASK_EVENTS.with_label_values(&["start"]).inc();
+                            futures.push(handle);
+                        },
+                        result = futures.next() => {
+                            // Log and count any unhandled panics
+                            match result {
+                                Some(Ok(())) => {
+                                    TENANT_TASK_EVENTS.with_label_values(&["stop"]).inc();
+                                },
+                                Some(Err(e)) => {
+                                    TENANT_TASK_EVENTS.with_label_values(&["panic"]).inc();
+                                    error!("loop join error {}", e)
+                                },
+                                None => {},
+                            };
+                        },
+                    }
+                }
+            });
+            Ok(())
+        },
+    )?;
+
+    Ok(())
+}
+
+///
+/// GC task's main loop
+///
+async fn gc_loop(tenantid: ZTenantId, mut cancel: watch::Receiver<()>) {
+    loop {
+        trace!("waking up");
+
+        // Run blocking part of the task
+        let period: Result<Result<_, anyhow::Error>, _> = tokio::task::spawn_blocking(move || {
+            // Break if tenant is not active
+            if tenant_mgr::get_tenant_state(tenantid) != Some(TenantState::Active) {
+                return Ok(ControlFlow::Break(()));
+            }
+
+            // Break if we're not allowed to write to disk
+            let repo = tenant_mgr::get_repository_for_tenant(tenantid)?;
+            // TODO do this inside repo.gc_iteration instead.
+            let _guard = match repo.file_lock.try_read() {
+                Ok(g) => g,
+                Err(_) => return Ok(ControlFlow::Break(())),
+            };
+
+            // Run gc
+            let gc_period = repo.get_gc_period();
+            let gc_horizon = repo.get_gc_horizon();
+            if gc_horizon > 0 {
+                repo.gc_iteration(None, gc_horizon, repo.get_pitr_interval(), false)?;
+            }
+
+            Ok(ControlFlow::Continue(gc_period))
+        })
+        .await;
+
+        // Decide whether to sleep or break
+        let sleep_duration = match period {
+            Ok(Ok(ControlFlow::Continue(period))) => period,
+            Ok(Ok(ControlFlow::Break(()))) => break,
+            Ok(Err(e)) => {
+                error!("Gc failed, retrying: {}", e);
+                Duration::from_secs(2)
+            }
+            Err(e) => {
+                error!("Gc join error, retrying: {}", e);
+                Duration::from_secs(2)
+            }
+        };
+
+        // Sleep
+        tokio::select! {
+            _ = cancel.changed() => {
+                trace!("received cancellation request");
+                break;
+            },
+            _ = tokio::time::sleep(sleep_duration) => {},
+        }
+    }
+    trace!(
+        "GC loop stopped. State is {:?}",
+        tenant_mgr::get_tenant_state(tenantid)
+    );
+}
--- a/pageserver/src/tenant_threads.rs
+++ b/pageserver/src/tenant_threads.rs
@@ -1,79 +0,0 @@
-//! This module contains functions to serve per-tenant background processes,
-//! such as compaction and GC
-use crate::repository::Repository;
-use crate::tenant_mgr;
-use crate::tenant_mgr::TenantState;
-use anyhow::Result;
-use std::time::Duration;
-use tracing::*;
-use utils::zid::ZTenantId;
-
-///
-/// Compaction thread's main loop
-///
-pub fn compact_loop(tenantid: ZTenantId) -> Result<()> {
-    if let Err(err) = compact_loop_ext(tenantid) {
-        error!("compact loop terminated with error: {:?}", err);
-        Err(err)
-    } else {
-        Ok(())
-    }
-}
-
-fn compact_loop_ext(tenantid: ZTenantId) -> Result<()> {
-    loop {
-        if tenant_mgr::get_tenant_state(tenantid) != Some(TenantState::Active) {
-            break;
-        }
-        let repo = tenant_mgr::get_repository_for_tenant(tenantid)?;
-        let compaction_period = repo.get_compaction_period();
-
-        std::thread::sleep(compaction_period);
-        trace!("compaction thread for tenant {} waking up", tenantid);
-
-        // Compact timelines
-        let repo = tenant_mgr::get_repository_for_tenant(tenantid)?;
-        repo.compaction_iteration()?;
-    }
-
-    trace!(
-        "compaction thread stopped for tenant {} state is {:?}",
-        tenantid,
-        tenant_mgr::get_tenant_state(tenantid)
-    );
-    Ok(())
-}
-
-///
-/// GC thread's main loop
-///
-pub fn gc_loop(tenantid: ZTenantId) -> Result<()> {
-    loop {
-        if tenant_mgr::get_tenant_state(tenantid) != Some(TenantState::Active) {
-            break;
-        }
-
-        trace!("gc thread for tenant {} waking up", tenantid);
-        let repo = tenant_mgr::get_repository_for_tenant(tenantid)?;
-        let gc_horizon = repo.get_gc_horizon();
-        // Garbage collect old files that are not needed for PITR anymore
-        if gc_horizon > 0 {
-            repo.gc_iteration(None, gc_horizon, repo.get_pitr_interval(), false)?;
-        }
-
-        // TODO Write it in more adequate way using
-        // condvar.wait_timeout() or something
-        let mut sleep_time = repo.get_gc_period().as_secs();
-        while sleep_time > 0 && tenant_mgr::get_tenant_state(tenantid) == Some(TenantState::Active)
-        {
-            sleep_time -= 1;
-            std::thread::sleep(Duration::from_secs(1));
-        }
-    }
-    trace!(
-        "GC thread stopped for tenant {} state is {:?}",
-        tenantid,
-        tenant_mgr::get_tenant_state(tenantid)
-    );
-    Ok(())
-}
--- a/pageserver/src/thread_mgr.rs
+++ b/pageserver/src/thread_mgr.rs
@@ -94,11 +94,8 @@ pub enum ThreadKind {
    // Main walreceiver manager thread that ensures that every timeline spawns a connection to safekeeper, to fetch WAL.
    WalReceiverManager,

-    // Thread that handles compaction of all timelines for a tenant.
-    Compactor,
-
-    // Thread that handles GC of a tenant
-    GarbageCollector,
+    // Thread that schedules new compaction and gc jobs
+    TenantTaskManager,

    // Thread that flushes frozen in-memory layers to disk
    LayerFlushThread,
@@ -108,15 +105,21 @@ pub enum ThreadKind {
    StorageSync,
 }

+struct MutableThreadState {
+    /// Tenant and timeline that this thread is associated with.
+    tenant_id: Option<ZTenantId>,
+    timeline_id: Option<ZTimelineId>,
+
+    /// Handle for waiting for the thread to exit. It can be None, if the
+    /// the thread has already exited.
+    join_handle: Option<JoinHandle<()>>,
+}
+
 struct PageServerThread {
    _thread_id: u64,

    kind: ThreadKind,

-    /// Tenant and timeline that this thread is associated with.
-    tenant_id: Option<ZTenantId>,
-    timeline_id: Option<ZTimelineId>,
-
    name: String,

    // To request thread shutdown, set the flag, and send a dummy message to the
@@ -124,9 +127,7 @@ struct PageServerThread {
    shutdown_requested: AtomicBool,
    shutdown_tx: watch::Sender<()>,

-    /// Handle for waiting for the thread to exit. It can be None, if the
-    /// the thread has already exited.
-    join_handle: Mutex<Option<JoinHandle<()>>>,
+    mutable: Mutex<MutableThreadState>,
 }

 /// Launch a new thread
@@ -145,29 +146,27 @@ where
 {
    let (shutdown_tx, shutdown_rx) = watch::channel(());
    let thread_id = NEXT_THREAD_ID.fetch_add(1, Ordering::Relaxed);
-    let thread = PageServerThread {
+    let thread = Arc::new(PageServerThread {
        _thread_id: thread_id,
        kind,
-        tenant_id,
-        timeline_id,
        name: name.to_string(),
-
        shutdown_requested: AtomicBool::new(false),
        shutdown_tx,
-
-        join_handle: Mutex::new(None),
-    };
-
-    let thread_rc = Arc::new(thread);
-
-    let mut jh_guard = thread_rc.join_handle.lock().unwrap();
+        mutable: Mutex::new(MutableThreadState {
+            tenant_id,
+            timeline_id,
+            join_handle: None,
+        }),
+    });

    THREADS
        .lock()
        .unwrap()
-        .insert(thread_id, Arc::clone(&thread_rc));
+        .insert(thread_id, Arc::clone(&thread));

-    let thread_rc2 = Arc::clone(&thread_rc);
+    let mut thread_mut = thread.mutable.lock().unwrap();
+
+    let thread_cloned = Arc::clone(&thread);
    let thread_name = name.to_string();
    let join_handle = match thread::Builder::new()
        .name(name.to_string())
@@ -175,7 +174,7 @@ where
            thread_wrapper(
                thread_name,
                thread_id,
-                thread_rc2,
+                thread_cloned,
                shutdown_rx,
                shutdown_process_on_error,
                f,
@@ -189,8 +188,8 @@ where
            return Err(err);
        }
    };
-    *jh_guard = Some(join_handle);
-    drop(jh_guard);
+    thread_mut.join_handle = Some(join_handle);
+    drop(thread_mut);

    // The thread is now running. Nothing more to do here
    Ok(thread_id)
@@ -229,19 +228,20 @@ fn thread_wrapper<F>(
        .remove(&thread_id)
        .expect("no thread in registry");

+    let thread_mut = thread.mutable.lock().unwrap();
    match result {
        Ok(Ok(())) => debug!("Thread '{}' exited normally", thread_name),
        Ok(Err(err)) => {
            if shutdown_process_on_error {
                error!(
                    "Shutting down: thread '{}' tenant_id: {:?}, timeline_id: {:?} exited with error: {:?}",
-                    thread_name, thread.tenant_id, thread.timeline_id, err
+                    thread_name, thread_mut.tenant_id, thread_mut.timeline_id, err
                );
                shutdown_pageserver(1);
            } else {
                error!(
                    "Thread '{}' tenant_id: {:?}, timeline_id: {:?} exited with error: {:?}",
-                    thread_name, thread.tenant_id, thread.timeline_id, err
+                    thread_name, thread_mut.tenant_id, thread_mut.timeline_id, err
                );
            }
        }
@@ -249,19 +249,29 @@ fn thread_wrapper<F>(
            if shutdown_process_on_error {
                error!(
                    "Shutting down: thread '{}' tenant_id: {:?}, timeline_id: {:?} panicked: {:?}",
-                    thread_name, thread.tenant_id, thread.timeline_id, err
+                    thread_name, thread_mut.tenant_id, thread_mut.timeline_id, err
                );
                shutdown_pageserver(1);
            } else {
                error!(
                    "Thread '{}' tenant_id: {:?}, timeline_id: {:?} panicked: {:?}",
-                    thread_name, thread.tenant_id, thread.timeline_id, err
+                    thread_name, thread_mut.tenant_id, thread_mut.timeline_id, err
                );
            }
        }
    }
 }

+// expected to be called from the thread of the given id.
+pub fn associate_with(tenant_id: Option<ZTenantId>, timeline_id: Option<ZTimelineId>) {
+    CURRENT_THREAD.with(|ct| {
+        let borrowed = ct.borrow();
+        let mut thread_mut = borrowed.as_ref().unwrap().mutable.lock().unwrap();
+        thread_mut.tenant_id = tenant_id;
+        thread_mut.timeline_id = timeline_id;
+    });
+}
+
 /// Is there a thread running that matches the criteria

 /// Signal and wait for threads to shut down.
@@ -285,9 +295,10 @@ pub fn shutdown_threads(

    let threads = THREADS.lock().unwrap();
    for thread in threads.values() {
+        let thread_mut = thread.mutable.lock().unwrap();
        if (kind.is_none() || Some(thread.kind) == kind)
-            && (tenant_id.is_none() || thread.tenant_id == tenant_id)
-            && (timeline_id.is_none() || thread.timeline_id == timeline_id)
+            && (tenant_id.is_none() || thread_mut.tenant_id == tenant_id)
+            && (timeline_id.is_none() || thread_mut.timeline_id == timeline_id)
        {
            thread.shutdown_requested.store(true, Ordering::Relaxed);
            // FIXME: handle error?
@@ -298,8 +309,10 @@ pub fn shutdown_threads(
    drop(threads);

    for thread in victim_threads {
+        let mut thread_mut = thread.mutable.lock().unwrap();
        info!("waiting for {} to shut down", thread.name);
-        if let Some(join_handle) = thread.join_handle.lock().unwrap().take() {
+        if let Some(join_handle) = thread_mut.join_handle.take() {
+            drop(thread_mut);
            let _ = join_handle.join();
        } else {
            // The thread had not even fully started yet. Or it was shut down
--- a/pageserver/src/walreceiver.rs
+++ b/pageserver/src/walreceiver.rs
--- a/pageserver/src/walreceiver/connection_manager.rs
+++ b/pageserver/src/walreceiver/connection_manager.rs
--- a/pageserver/src/walreceiver/walreceiver_connection.rs
+++ b/pageserver/src/walreceiver/walreceiver_connection.rs
@@ -1,5 +1,5 @@
 //! Actual Postgres connection handler to stream WAL to the server.
-//! Runs as a separate, cancellable Tokio task.
+
 use std::{
    str::FromStr,
    sync::Arc,
@@ -10,113 +10,29 @@ use anyhow::{bail, ensure, Context};
 use bytes::BytesMut;
 use fail::fail_point;
 use postgres::{SimpleQueryMessage, SimpleQueryRow};
-use postgres_ffi::waldecoder::WalStreamDecoder;
 use postgres_protocol::message::backend::ReplicationMessage;
 use postgres_types::PgLsn;
 use tokio::{pin, select, sync::watch, time};
 use tokio_postgres::{replication::ReplicationStream, Client};
 use tokio_stream::StreamExt;
 use tracing::{debug, error, info, info_span, trace, warn, Instrument};
-use utils::{
-    lsn::Lsn,
-    pq_proto::ZenithFeedback,
-    zid::{NodeId, ZTenantTimelineId},
-};

+use super::TaskEvent;
 use crate::{
    http::models::WalReceiverEntry,
    repository::{Repository, Timeline},
    tenant_mgr,
    walingest::WalIngest,
 };
+use postgres_ffi::waldecoder::WalStreamDecoder;
+use utils::{lsn::Lsn, pq_proto::ReplicationFeedback, zid::ZTenantTimelineId};

-#[derive(Debug, Clone)]
-pub enum WalConnectionEvent {
-    Started,
-    NewWal(ZenithFeedback),
-    End(Result<(), String>),
-}
-
-/// A wrapper around standalone Tokio task, to poll its updates or cancel the task.
-#[derive(Debug)]
-pub struct WalReceiverConnection {
-    handle: tokio::task::JoinHandle<()>,
-    cancellation: watch::Sender<()>,
-    events_receiver: watch::Receiver<WalConnectionEvent>,
-}
-
-impl WalReceiverConnection {
-    /// Initializes the connection task, returning a set of handles on top of it.
-    /// The task is started immediately after the creation, fails if no connection is established during the timeout given.
-    pub fn open(
-        id: ZTenantTimelineId,
-        safekeeper_id: NodeId,
-        wal_producer_connstr: String,
-        connect_timeout: Duration,
-    ) -> Self {
-        let (cancellation, mut cancellation_receiver) = watch::channel(());
-        let (events_sender, events_receiver) = watch::channel(WalConnectionEvent::Started);
-
-        let handle = tokio::spawn(
-            async move {
-                let connection_result = handle_walreceiver_connection(
-                    id,
-                    &wal_producer_connstr,
-                    &events_sender,
-                    &mut cancellation_receiver,
-                    connect_timeout,
-                )
-                .await
-                .map_err(|e| {
-                    format!("Walreceiver connection for id {id} failed with error: {e:#}")
-                });
-
-                match &connection_result {
-                    Ok(()) => {
-                        debug!("Walreceiver connection for id {id} ended successfully")
-                    }
-                    Err(e) => warn!("{e}"),
-                }
-                events_sender
-                    .send(WalConnectionEvent::End(connection_result))
-                    .ok();
-            }
-            .instrument(info_span!("safekeeper_handle", sk = %safekeeper_id)),
-        );
-
-        Self {
-            handle,
-            cancellation,
-            events_receiver,
-        }
-    }
-
-    /// Polls for the next WAL receiver event, if there's any available since the last check.
-    /// Blocks if there's no new event available, returns `None` if no new events will ever occur.
-    /// Only the last event is returned, all events received between observatins are lost.
-    pub async fn next_event(&mut self) -> Option<WalConnectionEvent> {
-        match self.events_receiver.changed().await {
-            Ok(()) => Some(self.events_receiver.borrow().clone()),
-            Err(_cancellation_error) => None,
-        }
-    }
-
-    /// Gracefully aborts current WAL streaming task, waiting for the current WAL streamed.
-    pub async fn shutdown(&mut self) -> anyhow::Result<()> {
-        self.cancellation.send(()).ok();
-        let handle = &mut self.handle;
-        handle
-            .await
-            .context("Failed to join on a walreceiver connection task")?;
-        Ok(())
-    }
-}
-
-async fn handle_walreceiver_connection(
+/// Opens a conneciton to the given wal producer and streams the WAL, sending progress messages during streaming.
+pub async fn handle_walreceiver_connection(
    id: ZTenantTimelineId,
    wal_producer_connstr: &str,
-    events_sender: &watch::Sender<WalConnectionEvent>,
-    cancellation: &mut watch::Receiver<()>,
+    events_sender: &watch::Sender<TaskEvent<ReplicationFeedback>>,
+    mut cancellation: watch::Receiver<()>,
    connect_timeout: Duration,
 ) -> anyhow::Result<()> {
    // Connect to the database in replication mode.
@@ -214,8 +130,6 @@ async fn handle_walreceiver_connection(

    while let Some(replication_message) = {
        select! {
-            // check for shutdown first
-            biased;
            _ = cancellation.changed() => {
                info!("walreceiver interrupted");
                None
@@ -328,7 +242,7 @@ async fn handle_walreceiver_connection(

            // Send zenith feedback message.
            // Regular standby_status_update fields are put into this message.
-            let zenith_status_update = ZenithFeedback {
+            let zenith_status_update = ReplicationFeedback {
                current_timeline_size: timeline.get_current_logical_size() as u64,
                ps_writelsn: write_lsn,
                ps_flushlsn: flush_lsn,
@@ -344,7 +258,7 @@ async fn handle_walreceiver_connection(
                .as_mut()
                .zenith_status_update(data.len() as u64, &data)
                .await?;
-            if let Err(e) = events_sender.send(WalConnectionEvent::NewWal(zenith_status_update)) {
+            if let Err(e) = events_sender.send(TaskEvent::NewEvent(zenith_status_update)) {
                warn!("Wal connection event listener dropped, aborting the connection: {e}");
                return Ok(());
            }
--- a/proxy/Cargo.toml
+++ b/proxy/Cargo.toml
@@ -39,6 +39,8 @@ utils = { path = "../libs/utils" }
 metrics = { path = "../libs/metrics" }
 workspace_hack = { version = "0.1", path = "../workspace_hack" }

+x509-parser = "0.13.2"
+
 [dev-dependencies]
 rcgen = "0.8.14"
 rstest = "0.12"
--- a/proxy/src/auth/backend/console.rs
+++ b/proxy/src/auth/backend/console.rs
@@ -19,7 +19,7 @@ pub type Result<T> = std::result::Result<T, ConsoleAuthError>;
 #[derive(Debug, Error)]
 pub enum ConsoleAuthError {
    #[error(transparent)]
-    BadProjectName(#[from] auth::credentials::ProjectNameError),
+    BadProjectName(#[from] auth::credentials::ClientCredsParseError),

    // We shouldn't include the actual secret here.
    #[error("Bad authentication secret")]
@@ -49,6 +49,12 @@ impl UserFacingError for ConsoleAuthError {
    }
 }

+impl From<&auth::credentials::ClientCredsParseError> for ConsoleAuthError {
+    fn from(e: &auth::credentials::ClientCredsParseError) -> Self {
+        ConsoleAuthError::BadProjectName(e.clone())
+    }
+}
+
 // TODO: convert into an enum with "error"
 #[derive(Serialize, Deserialize, Debug)]
 struct GetRoleSecretResponse {
@@ -74,18 +80,12 @@ pub enum AuthInfo {
 pub(super) struct Api<'a> {
    endpoint: &'a ApiUrl,
    creds: &'a ClientCredentials,
-    /// Cache project name, since we'll need it several times.
-    project: &'a str,
 }

 impl<'a> Api<'a> {
    /// Construct an API object containing the auth parameters.
    pub(super) fn new(endpoint: &'a ApiUrl, creds: &'a ClientCredentials) -> Result<Self> {
-        Ok(Self {
-            endpoint,
-            creds,
-            project: creds.project_name()?,
-        })
+        Ok(Self { endpoint, creds })
    }

    /// Authenticate the existing user or throw an error.
@@ -100,7 +100,7 @@ impl<'a> Api<'a> {
        let mut url = self.endpoint.clone();
        url.path_segments_mut().push("proxy_get_role_secret");
        url.query_pairs_mut()
-            .append_pair("project", self.project)
+            .append_pair("project", self.creds.project_name.as_ref()?)
            .append_pair("role", &self.creds.user);

        // TODO: use a proper logger
@@ -123,7 +123,8 @@ impl<'a> Api<'a> {
    async fn wake_compute(&self) -> Result<DatabaseInfo> {
        let mut url = self.endpoint.clone();
        url.path_segments_mut().push("proxy_wake_compute");
-        url.query_pairs_mut().append_pair("project", self.project);
+        let project_name = self.creds.project_name.as_ref()?;
+        url.query_pairs_mut().append_pair("project", project_name);

        // TODO: use a proper logger
        println!("cplane request: {url}");
--- a/proxy/src/auth/credentials.rs
+++ b/proxy/src/auth/credentials.rs
@@ -8,10 +8,32 @@ use std::collections::HashMap;
 use thiserror::Error;
 use tokio::io::{AsyncRead, AsyncWrite};

-#[derive(Debug, Error)]
+#[derive(Debug, Error, PartialEq, Eq, Clone)]
 pub enum ClientCredsParseError {
-    #[error("Parameter `{0}` is missing in startup packet")]
+    #[error("Parameter `{0}` is missing in startup packet.")]
    MissingKey(&'static str),
+
+    #[error(
+        "Project name is not specified. \
+        EITHER please upgrade the postgres client library (libpq) for SNI support \
+        OR pass the project name as a parameter: '&options=project%3D<project-name>'."
+    )]
+    MissingSNIAndProjectName,
+
+    #[error("Inconsistent project name inferred from SNI ('{0}') and project option ('{1}').")]
+    InconsistentProjectNameAndSNI(String, String),
+
+    #[error("Common name is not set.")]
+    CommonNameNotSet,
+
+    #[error(
+        "SNI ('{1}') inconsistently formatted with respect to common name ('{0}'). \
+        SNI should be formatted as '<project-name>.<common-name>'."
+    )]
+    InconsistentCommonNameAndSNI(String, String),
+
+    #[error("Project name ('{0}') must contain only alphanumeric characters and hyphens ('-').")]
+    ProjectNameContainsIllegalChars(String),
 }

 impl UserFacingError for ClientCredsParseError {}
@@ -22,10 +44,7 @@ impl UserFacingError for ClientCredsParseError {}
 pub struct ClientCredentials {
    pub user: String,
    pub dbname: String,
-
-    // New console API requires SNI info to determine the cluster name.
-    // Other Auth backends don't need it.
-    pub sni_data: Option<String>,
+    pub project_name: Result<String, ClientCredsParseError>,
 }

 impl ClientCredentials {
@@ -33,51 +52,30 @@ impl ClientCredentials {
        // This logic will likely change in the future.
        self.user.ends_with("@zenith")
    }
-}

-#[derive(Debug, Error)]
-pub enum ProjectNameError {
-    #[error("SNI is missing, please upgrade the postgres client library")]
-    Missing,
-
-    #[error("SNI is malformed")]
-    Bad,
-}
-
-impl UserFacingError for ProjectNameError {}
-
-impl ClientCredentials {
-    /// Determine project name from SNI.
-    pub fn project_name(&self) -> Result<&str, ProjectNameError> {
-        // Currently project name is passed as a top level domain
-        let sni = self.sni_data.as_ref().ok_or(ProjectNameError::Missing)?;
-        let (first, _) = sni.split_once('.').ok_or(ProjectNameError::Bad)?;
-        Ok(first)
-    }
-}
-
-impl TryFrom<HashMap<String, String>> for ClientCredentials {
-    type Error = ClientCredsParseError;
-
-    fn try_from(mut value: HashMap<String, String>) -> Result<Self, Self::Error> {
+    pub fn parse(
+        mut options: HashMap<String, String>,
+        sni_data: Option<&str>,
+        common_name: Option<&str>,
+    ) -> Result<Self, ClientCredsParseError> {
        let mut get_param = |key| {
-            value
+            options
                .remove(key)
                .ok_or(ClientCredsParseError::MissingKey(key))
        };

        let user = get_param("user")?;
        let dbname = get_param("database")?;
+        let project_name = get_param("project").ok();
+        let project_name = get_project_name(sni_data, common_name, project_name.as_deref());

        Ok(Self {
            user,
            dbname,
-            sni_data: None,
+            project_name,
        })
    }
-}

-impl ClientCredentials {
    /// Use credentials to authenticate the user.
    pub async fn authenticate(
        self,
@@ -88,3 +86,244 @@ impl ClientCredentials {
        super::backend::handle_user(config, client, self).await
    }
 }
+
+/// Inferring project name from sni_data.
+fn project_name_from_sni_data(
+    sni_data: &str,
+    common_name: &str,
+) -> Result<String, ClientCredsParseError> {
+    let common_name_with_dot = format!(".{common_name}");
+    // check that ".{common_name_with_dot}" is the actual suffix in sni_data
+    if !sni_data.ends_with(&common_name_with_dot) {
+        return Err(ClientCredsParseError::InconsistentCommonNameAndSNI(
+            common_name.to_string(),
+            sni_data.to_string(),
+        ));
+    }
+    // return sni_data without the common name suffix.
+    Ok(sni_data
+        .strip_suffix(&common_name_with_dot)
+        .unwrap()
+        .to_string())
+}
+
+#[cfg(test)]
+mod tests_for_project_name_from_sni_data {
+    use super::*;
+
+    #[test]
+    fn passing() {
+        let target_project_name = "my-project-123";
+        let common_name = "localtest.me";
+        let sni_data = format!("{target_project_name}.{common_name}");
+        assert_eq!(
+            project_name_from_sni_data(&sni_data, common_name),
+            Ok(target_project_name.to_string())
+        );
+    }
+
+    #[test]
+    fn throws_inconsistent_common_name_and_sni_data() {
+        let target_project_name = "my-project-123";
+        let common_name = "localtest.me";
+        let wrong_suffix = "wrongtest.me";
+        assert_eq!(common_name.len(), wrong_suffix.len());
+        let wrong_common_name = format!("wrong{wrong_suffix}");
+        let sni_data = format!("{target_project_name}.{wrong_common_name}");
+        assert_eq!(
+            project_name_from_sni_data(&sni_data, common_name),
+            Err(ClientCredsParseError::InconsistentCommonNameAndSNI(
+                common_name.to_string(),
+                sni_data
+            ))
+        );
+    }
+}
+
+/// Determine project name from SNI or from project_name parameter from options argument.
+fn get_project_name(
+    sni_data: Option<&str>,
+    common_name: Option<&str>,
+    project_name: Option<&str>,
+) -> Result<String, ClientCredsParseError> {
+    // determine the project name from sni_data if it exists, otherwise from project_name.
+    let ret = match sni_data {
+        Some(sni_data) => {
+            let common_name = common_name.ok_or(ClientCredsParseError::CommonNameNotSet)?;
+            let project_name_from_sni = project_name_from_sni_data(sni_data, common_name)?;
+            // check invariant: project name from options and from sni should match
+            if let Some(project_name) = &project_name {
+                if !project_name_from_sni.eq(project_name) {
+                    return Err(ClientCredsParseError::InconsistentProjectNameAndSNI(
+                        project_name_from_sni,
+                        project_name.to_string(),
+                    ));
+                }
+            }
+            project_name_from_sni
+        }
+        None => project_name
+            .ok_or(ClientCredsParseError::MissingSNIAndProjectName)?
+            .to_string(),
+    };
+
+    // check formatting invariant: project name must contain only alphanumeric characters and hyphens.
+    if !ret.chars().all(|x: char| x.is_alphanumeric() || x == '-') {
+        return Err(ClientCredsParseError::ProjectNameContainsIllegalChars(ret));
+    }
+
+    Ok(ret)
+}
+
+#[cfg(test)]
+mod tests_for_project_name_only {
+    use super::*;
+
+    #[test]
+    fn passing_from_sni_data_only() {
+        let target_project_name = "my-project-123";
+        let common_name = "localtest.me";
+        let sni_data = format!("{target_project_name}.{common_name}");
+        assert_eq!(
+            get_project_name(Some(&sni_data), Some(common_name), None),
+            Ok(target_project_name.to_string())
+        );
+    }
+
+    #[test]
+    fn throws_project_name_contains_illegal_chars_from_sni_data_only() {
+        let project_name_prefix = "my-project";
+        let project_name_suffix = "123";
+        let common_name = "localtest.me";
+
+        for illegal_char_id in 0..256 {
+            let illegal_char = char::from_u32(illegal_char_id).unwrap();
+            if !(illegal_char.is_alphanumeric() || illegal_char == '-')
+                && illegal_char.to_string().len() == 1
+            {
+                let target_project_name =
+                    format!("{project_name_prefix}{illegal_char}{project_name_suffix}");
+                let sni_data = format!("{target_project_name}.{common_name}");
+                assert_eq!(
+                    get_project_name(Some(&sni_data), Some(common_name), None),
+                    Err(ClientCredsParseError::ProjectNameContainsIllegalChars(
+                        target_project_name
+                    ))
+                );
+            }
+        }
+    }
+
+    #[test]
+    fn passing_from_project_name_only() {
+        let target_project_name = "my-project-123";
+        let common_names = [Some("localtest.me"), None];
+        for common_name in common_names {
+            assert_eq!(
+                get_project_name(None, common_name, Some(target_project_name)),
+                Ok(target_project_name.to_string())
+            );
+        }
+    }
+
+    #[test]
+    fn throws_project_name_contains_illegal_chars_from_project_name_only() {
+        let project_name_prefix = "my-project";
+        let project_name_suffix = "123";
+        let common_names = [Some("localtest.me"), None];
+
+        for common_name in common_names {
+            for illegal_char_id in 0..256 {
+                let illegal_char: char = char::from_u32(illegal_char_id).unwrap();
+                if !(illegal_char.is_alphanumeric() || illegal_char == '-')
+                    && illegal_char.to_string().len() == 1
+                {
+                    let target_project_name =
+                        format!("{project_name_prefix}{illegal_char}{project_name_suffix}");
+                    assert_eq!(
+                        get_project_name(None, common_name, Some(&target_project_name)),
+                        Err(ClientCredsParseError::ProjectNameContainsIllegalChars(
+                            target_project_name
+                        ))
+                    );
+                }
+            }
+        }
+    }
+
+    #[test]
+    fn passing_from_sni_data_and_project_name() {
+        let target_project_name = "my-project-123";
+        let common_name = "localtest.me";
+        let sni_data = format!("{target_project_name}.{common_name}");
+        assert_eq!(
+            get_project_name(
+                Some(&sni_data),
+                Some(common_name),
+                Some(target_project_name)
+            ),
+            Ok(target_project_name.to_string())
+        );
+    }
+
+    #[test]
+    fn throws_inconsistent_project_name_and_sni() {
+        let project_name_param = "my-project-123";
+        let wrong_project_name = "not-my-project-123";
+        let common_name = "localtest.me";
+        let sni_data = format!("{wrong_project_name}.{common_name}");
+        assert_eq!(
+            get_project_name(Some(&sni_data), Some(common_name), Some(project_name_param)),
+            Err(ClientCredsParseError::InconsistentProjectNameAndSNI(
+                wrong_project_name.to_string(),
+                project_name_param.to_string()
+            ))
+        );
+    }
+
+    #[test]
+    fn throws_common_name_not_set() {
+        let target_project_name = "my-project-123";
+        let wrong_project_name = "not-my-project-123";
+        let common_name = "localtest.me";
+        let sni_datas = [
+            Some(format!("{wrong_project_name}.{common_name}")),
+            Some(format!("{target_project_name}.{common_name}")),
+        ];
+        let project_names = [None, Some(target_project_name)];
+        for sni_data in sni_datas {
+            for project_name_param in project_names {
+                assert_eq!(
+                    get_project_name(sni_data.as_deref(), None, project_name_param),
+                    Err(ClientCredsParseError::CommonNameNotSet)
+                );
+            }
+        }
+    }
+
+    #[test]
+    fn throws_inconsistent_common_name_and_sni_data() {
+        let target_project_name = "my-project-123";
+        let wrong_project_name = "not-my-project-123";
+        let common_name = "localtest.me";
+        let wrong_suffix = "wrongtest.me";
+        assert_eq!(common_name.len(), wrong_suffix.len());
+        let wrong_common_name = format!("wrong{wrong_suffix}");
+        let sni_datas = [
+            Some(format!("{wrong_project_name}.{wrong_common_name}")),
+            Some(format!("{target_project_name}.{wrong_common_name}")),
+        ];
+        let project_names = [None, Some(target_project_name)];
+        for project_name_param in project_names {
+            for sni_data in &sni_datas {
+                assert_eq!(
+                    get_project_name(sni_data.as_deref(), Some(common_name), project_name_param),
+                    Err(ClientCredsParseError::InconsistentCommonNameAndSNI(
+                        common_name.to_string(),
+                        sni_data.clone().unwrap().to_string()
+                    ))
+                );
+            }
+        }
+    }
+}
--- a/proxy/src/config.rs
+++ b/proxy/src/config.rs
@@ -36,23 +36,35 @@ pub struct ProxyConfig {
    pub auth_link_uri: ApiUrl,
 }

-pub type TlsConfig = Arc<rustls::ServerConfig>;
+pub struct TlsConfig {
+    pub config: Arc<rustls::ServerConfig>,
+    pub common_name: Option<String>,
+}
+
+impl TlsConfig {
+    pub fn to_server_config(&self) -> Arc<rustls::ServerConfig> {
+        self.config.clone()
+    }
+}

 /// Configure TLS for the main endpoint.
 pub fn configure_tls(key_path: &str, cert_path: &str) -> anyhow::Result<TlsConfig> {
    let key = {
        let key_bytes = std::fs::read(key_path).context("TLS key file")?;
        let mut keys = rustls_pemfile::pkcs8_private_keys(&mut &key_bytes[..])
-            .context("couldn't read TLS keys")?;
+            .context(format!("Failed to read TLS keys at '{key_path}'"))?;

        ensure!(keys.len() == 1, "keys.len() = {} (should be 1)", keys.len());
        keys.pop().map(rustls::PrivateKey).unwrap()
    };

+    let cert_chain_bytes = std::fs::read(cert_path)
+        .context(format!("Failed to read TLS cert file at '{cert_path}.'"))?;
    let cert_chain = {
-        let cert_chain_bytes = std::fs::read(cert_path).context("TLS cert file")?;
        rustls_pemfile::certs(&mut &cert_chain_bytes[..])
-            .context("couldn't read TLS certificate chain")?
+            .context(format!(
+                "Failed to read TLS certificate chain from bytes from file at '{cert_path}'."
+            ))?
            .into_iter()
            .map(rustls::Certificate)
            .collect()
@@ -64,7 +76,25 @@ pub fn configure_tls(key_path: &str, cert_path: &str) -> anyhow::Result<TlsConfi
        // allow TLS 1.2 to be compatible with older client libraries
        .with_protocol_versions(&[&rustls::version::TLS13, &rustls::version::TLS12])?
        .with_no_client_auth()
-        .with_single_cert(cert_chain, key)?;
+        .with_single_cert(cert_chain, key)?
+        .into();

-    Ok(config.into())
+    // determine common name from tls-cert (-c server.crt param).
+    // used in asserting project name formatting invariant.
+    let common_name = {
+        let pem = x509_parser::pem::parse_x509_pem(&cert_chain_bytes)
+            .context(format!(
+                "Failed to parse PEM object from bytes from file at '{cert_path}'."
+            ))?
+            .1;
+        let almost_common_name = pem.parse_x509()?.tbs_certificate.subject.to_string();
+        let expected_prefix = "CN=*.";
+        let common_name = almost_common_name.strip_prefix(expected_prefix);
+        common_name.map(str::to_string)
+    };
+
+    Ok(TlsConfig {
+        config,
+        common_name,
+    })
 }
--- a/proxy/src/proxy.rs
+++ b/proxy/src/proxy.rs
@@ -81,7 +81,7 @@ async fn handle_client(
        NUM_CONNECTIONS_CLOSED_COUNTER.inc();
    }

-    let tls = config.tls_config.clone();
+    let tls = config.tls_config.as_ref();
    let (stream, creds) = match handshake(stream, tls, cancel_map).await? {
        Some(x) => x,
        None => return Ok(()), // it's a cancellation request
@@ -99,12 +99,14 @@ async fn handle_client(
 /// we also take an extra care of propagating only the select handshake errors to client.
 async fn handshake<S: AsyncRead + AsyncWrite + Unpin>(
    stream: S,
-    mut tls: Option<TlsConfig>,
+    mut tls: Option<&TlsConfig>,
    cancel_map: &CancelMap,
 ) -> anyhow::Result<Option<(PqStream<Stream<S>>, auth::ClientCredentials)>> {
    // Client may try upgrading to each protocol only once
    let (mut tried_ssl, mut tried_gss) = (false, false);

+    let common_name = tls.and_then(|cfg| cfg.common_name.as_deref());
+
    let mut stream = PqStream::new(Stream::from_raw(stream));
    loop {
        let msg = stream.read_startup_packet().await?;
@@ -122,7 +124,9 @@ async fn handshake<S: AsyncRead + AsyncWrite + Unpin>(
                    if let Some(tls) = tls.take() {
                        // Upgrade raw stream into a secure TLS-backed stream.
                        // NOTE: We've consumed `tls`; this fact will be used later.
-                        stream = PqStream::new(stream.into_inner().upgrade(tls).await?);
+                        stream = PqStream::new(
+                            stream.into_inner().upgrade(tls.to_server_config()).await?,
+                        );
                    }
                }
                _ => bail!(ERR_PROTO_VIOLATION),
@@ -143,15 +147,16 @@ async fn handshake<S: AsyncRead + AsyncWrite + Unpin>(
                    stream.throw_error_str(ERR_INSECURE_CONNECTION).await?;
                }

-                // Here and forth: `or_else` demands that we use a future here
-                let mut creds: auth::ClientCredentials = async { params.try_into() }
-                    .or_else(|e| stream.throw_error(e))
-                    .await?;
+                // Get SNI info when available
+                let sni_data = match stream.get_ref() {
+                    Stream::Tls { tls } => tls.get_ref().1.sni_hostname().map(|s| s.to_owned()),
+                    _ => None,
+                };

-                // Set SNI info when available
-                if let Stream::Tls { tls } = stream.get_ref() {
-                    creds.sni_data = tls.get_ref().1.sni_hostname().map(|s| s.to_owned());
-                }
+                // Construct credentials
+                let creds =
+                    auth::ClientCredentials::parse(params, sni_data.as_deref(), common_name);
+                let creds = async { creds }.or_else(|e| stream.throw_error(e)).await?;

                break Ok(Some((stream, creds)));
            }
@@ -264,12 +269,13 @@ mod tests {
    }

    /// Generate TLS certificates and build rustls configs for client and server.
-    fn generate_tls_config(
-        hostname: &str,
-    ) -> anyhow::Result<(ClientConfig<'_>, Arc<rustls::ServerConfig>)> {
+    fn generate_tls_config<'a>(
+        hostname: &'a str,
+        common_name: &'a str,
+    ) -> anyhow::Result<(ClientConfig<'a>, TlsConfig)> {
        let (ca, cert, key) = generate_certs(hostname)?;

-        let server_config = {
+        let tls_config = {
            let config = rustls::ServerConfig::builder()
                .with_safe_defaults()
                .with_no_client_auth()
@@ -291,7 +297,12 @@ mod tests {
            ClientConfig { config, hostname }
        };

-        Ok((client_config, server_config))
+        let tls_config = TlsConfig {
+            config: tls_config,
+            common_name: Some(common_name.to_string()),
+        };
+
+        Ok((client_config, tls_config))
    }

    #[async_trait]
@@ -346,7 +357,7 @@ mod tests {
        auth: impl TestAuth + Send,
    ) -> anyhow::Result<()> {
        let cancel_map = CancelMap::default();
-        let (mut stream, _creds) = handshake(client, tls, &cancel_map)
+        let (mut stream, _creds) = handshake(client, tls.as_ref(), &cancel_map)
            .await?
            .context("handshake failed")?;

@@ -365,7 +376,8 @@ mod tests {
    async fn handshake_tls_is_enforced_by_proxy() -> anyhow::Result<()> {
        let (client, server) = tokio::io::duplex(1024);

-        let (_, server_config) = generate_tls_config("localhost")?;
+        let (_, server_config) =
+            generate_tls_config("generic-project-name.localhost", "localhost")?;
        let proxy = tokio::spawn(dummy_proxy(client, Some(server_config), NoAuth));

        let client_err = tokio_postgres::Config::new()
@@ -393,7 +405,8 @@ mod tests {
    async fn handshake_tls() -> anyhow::Result<()> {
        let (client, server) = tokio::io::duplex(1024);

-        let (client_config, server_config) = generate_tls_config("localhost")?;
+        let (client_config, server_config) =
+            generate_tls_config("generic-project-name.localhost", "localhost")?;
        let proxy = tokio::spawn(dummy_proxy(client, Some(server_config), NoAuth));

        let (_client, _conn) = tokio_postgres::Config::new()
@@ -415,6 +428,7 @@ mod tests {
        let (_client, _conn) = tokio_postgres::Config::new()
            .user("john_doe")
            .dbname("earth")
+            .options("project=generic-project-name")
            .ssl_mode(SslMode::Prefer)
            .connect_raw(server, NoTls)
            .await?;
@@ -476,7 +490,8 @@ mod tests {
    async fn scram_auth_good(#[case] password: &str) -> anyhow::Result<()> {
        let (client, server) = tokio::io::duplex(1024);

-        let (client_config, server_config) = generate_tls_config("localhost")?;
+        let (client_config, server_config) =
+            generate_tls_config("generic-project-name.localhost", "localhost")?;
        let proxy = tokio::spawn(dummy_proxy(
            client,
            Some(server_config),
@@ -498,7 +513,8 @@ mod tests {
    async fn scram_auth_mock() -> anyhow::Result<()> {
        let (client, server) = tokio::io::duplex(1024);

-        let (client_config, server_config) = generate_tls_config("localhost")?;
+        let (client_config, server_config) =
+            generate_tls_config("generic-project-name.localhost", "localhost")?;
        let proxy = tokio::spawn(dummy_proxy(
            client,
            Some(server_config),
--- a/proxy/src/waiters.rs
+++ b/proxy/src/waiters.rs
@@ -115,7 +115,7 @@ mod tests {
            Ok(())
        });

-        let () = waiter.await?;
+        waiter.await?;
        notifier.await?
    }
 }
--- a/safekeeper/src/bin/safekeeper.rs
+++ b/safekeeper/src/bin/safekeeper.rs
@@ -10,6 +10,7 @@ use remote_storage::RemoteStorageConfig;
 use std::fs::{self, File};
 use std::io::{ErrorKind, Write};
 use std::path::{Path, PathBuf};
+use std::sync::Arc;
 use std::thread;
 use tokio::sync::mpsc;
 use toml_edit::Document;
@@ -27,6 +28,7 @@ use safekeeper::timeline::GlobalTimelines;
 use safekeeper::wal_backup;
 use safekeeper::wal_service;
 use safekeeper::SafeKeeperConf;
+use utils::auth::JwtAuth;
 use utils::{
    http::endpoint, logging, project_git_version, shutdown::exit_now, signals, tcp_listener,
    zid::NodeId,
@@ -132,6 +134,12 @@ fn main() -> anyhow::Result<()> {
                .default_missing_value("true")
                .help("Enable/disable WAL backup to s3. When disabled, safekeeper removes WAL ignoring WAL backup horizon."),
        )
+        .arg(
+            Arg::new("auth-validation-public-key-path")
+                .long("auth-validation-public-key-path")
+                .takes_value(true)
+                .help("Path to an RSA .pem public key which is used to check JWT tokens")
+        )
        .get_matches();

    if let Some(addr) = arg_matches.value_of("dump-control-file") {
@@ -204,6 +212,10 @@ fn main() -> anyhow::Result<()> {
        .parse()
        .context("failed to parse bool enable-s3-offload bool")?;

+    conf.auth_validation_public_key_path = arg_matches
+        .value_of("auth-validation-public-key-path")
+        .map(PathBuf::from);
+
    start_safekeeper(conf, given_id, arg_matches.is_present("init"))
 }

@@ -239,6 +251,19 @@ fn start_safekeeper(mut conf: SafeKeeperConf, given_id: Option<NodeId>, init: bo
        e
    })?;

+    let auth = match conf.auth_validation_public_key_path.as_ref() {
+        None => {
+            info!("Auth is disabled");
+            None
+        }
+        Some(path) => {
+            info!("Loading JWT auth key from {}", path.display());
+            Some(Arc::new(
+                JwtAuth::from_key_path(path).context("failed to load the auth key")?,
+            ))
+        }
+    };
+
    // XXX: Don't spawn any threads before daemonizing!
    if conf.daemonize {
        info!("daemonizing...");
@@ -280,8 +305,7 @@ fn start_safekeeper(mut conf: SafeKeeperConf, given_id: Option<NodeId>, init: bo
        thread::Builder::new()
            .name("http_endpoint_thread".into())
            .spawn(|| {
-                // TODO authentication
-                let router = http::make_router(conf_);
+                let router = http::make_router(conf_, auth);
                endpoint::serve_thread_main(
                    router,
                    http_listener,
@@ -295,6 +319,7 @@ fn start_safekeeper(mut conf: SafeKeeperConf, given_id: Option<NodeId>, init: bo
    let safekeeper_thread = thread::Builder::new()
        .name("Safekeeper thread".into())
        .spawn(|| {
+            // TODO: add auth
            if let Err(e) = wal_service::thread_main(conf_cloned, pg_listener) {
                info!("safekeeper thread terminated: {e}");
            }
@@ -309,6 +334,7 @@ fn start_safekeeper(mut conf: SafeKeeperConf, given_id: Option<NodeId>, init: bo
            thread::Builder::new()
                .name("broker thread".into())
                .spawn(|| {
+                    // TODO: add auth?
                    broker::thread_main(conf_);
                })?,
        );
@@ -321,6 +347,7 @@ fn start_safekeeper(mut conf: SafeKeeperConf, given_id: Option<NodeId>, init: bo
        thread::Builder::new()
            .name("WAL removal thread".into())
            .spawn(|| {
+                // TODO: add auth?
                remove_wal::thread_main(conf_);
            })?,
    );
@@ -330,6 +357,7 @@ fn start_safekeeper(mut conf: SafeKeeperConf, given_id: Option<NodeId>, init: bo
        thread::Builder::new()
            .name("wal backup launcher thread".into())
            .spawn(move || {
+                // TODO: add auth?
                wal_backup::wal_backup_launcher_thread_main(conf_, wal_backup_launcher_rx);
            })?,
    );
--- a/safekeeper/src/broker.rs
+++ b/safekeeper/src/broker.rs
@@ -4,9 +4,12 @@ use anyhow::anyhow;
 use anyhow::Context;
 use anyhow::Error;
 use anyhow::Result;
-use etcd_broker::Client;
-use etcd_broker::PutOptions;
-use etcd_broker::SkTimelineSubscriptionKind;
+use etcd_broker::subscription_value::SkTimelineInfo;
+use etcd_broker::LeaseKeepAliveStream;
+use etcd_broker::LeaseKeeper;
+
+use std::collections::hash_map::Entry;
+use std::collections::HashMap;
 use std::time::Duration;
 use tokio::spawn;
 use tokio::task::JoinHandle;
@@ -15,11 +18,15 @@ use tracing::*;
 use url::Url;

 use crate::{timeline::GlobalTimelines, SafeKeeperConf};
+use etcd_broker::{
+    subscription_key::{OperationKind, SkOperationKind, SubscriptionKey},
+    Client, PutOptions,
+};
 use utils::zid::{NodeId, ZTenantTimelineId};

 const RETRY_INTERVAL_MSEC: u64 = 1000;
 const PUSH_INTERVAL_MSEC: u64 = 1000;
-const LEASE_TTL_SEC: i64 = 5;
+const LEASE_TTL_SEC: i64 = 10;

 pub fn thread_main(conf: SafeKeeperConf) {
    let runtime = runtime::Builder::new_current_thread()
@@ -43,7 +50,7 @@ fn timeline_safekeeper_path(
 ) -> String {
    format!(
        "{}/{sk_id}",
-        SkTimelineSubscriptionKind::timeline(broker_etcd_prefix, zttid).watch_key()
+        SubscriptionKey::sk_timeline_info(broker_etcd_prefix, zttid).watch_key()
    )
 }

@@ -90,7 +97,7 @@ impl ElectionLeader {
    }
 }

-pub async fn get_leader(req: &Election) -> Result<ElectionLeader> {
+pub async fn get_leader(req: &Election, leader: &mut Option<ElectionLeader>) -> Result<()> {
    let mut client = Client::connect(req.broker_endpoints.clone(), None)
        .await
        .context("Could not connect to etcd")?;
@@ -102,22 +109,27 @@ pub async fn get_leader(req: &Election) -> Result<ElectionLeader> {

    let lease_id = lease.map(|l| l.id()).unwrap();

-    let keep_alive = spawn::<_>(lease_keep_alive(client.clone(), lease_id));
+    // kill previous keepalive, if any
+    if let Some(l) = leader.take() {
+        l.give_up().await;
+    }

-    if let Err(e) = client
+    let keep_alive = spawn::<_>(lease_keep_alive(client.clone(), lease_id));
+    // immediately save handle to kill task if we get canceled below
+    *leader = Some(ElectionLeader {
+        client: client.clone(),
+        keep_alive,
+    });
+
+    client
        .campaign(
            req.election_name.clone(),
            req.candidate_name.clone(),
            lease_id,
        )
-        .await
-    {
-        keep_alive.abort();
-        let _ = keep_alive.await;
-        return Err(e.into());
-    }
+        .await?;

-    Ok(ElectionLeader { client, keep_alive })
+    Ok(())
 }

 async fn lease_keep_alive(mut client: Client, lease_id: i64) -> Result<()> {
@@ -143,25 +155,52 @@ async fn lease_keep_alive(mut client: Client, lease_id: i64) -> Result<()> {
    }
 }

-pub fn get_campaign_name(
-    election_name: &str,
-    broker_prefix: &str,
-    id: ZTenantTimelineId,
-) -> String {
-    format!("{broker_prefix}/{id}/{election_name}")
-}
-
 pub fn get_candiate_name(system_id: NodeId) -> String {
    format!("id_{system_id}")
 }

+async fn push_sk_info(
+    zttid: ZTenantTimelineId,
+    mut client: Client,
+    key: String,
+    sk_info: SkTimelineInfo,
+    mut lease: Lease,
+) -> anyhow::Result<(ZTenantTimelineId, Lease)> {
+    let put_opts = PutOptions::new().with_lease(lease.id);
+    client
+        .put(
+            key.clone(),
+            serde_json::to_string(&sk_info)?,
+            Some(put_opts),
+        )
+        .await
+        .with_context(|| format!("failed to push safekeeper info to {}", key))?;
+
+    // revive the lease
+    lease
+        .keeper
+        .keep_alive()
+        .await
+        .context("failed to send LeaseKeepAliveRequest")?;
+    lease
+        .ka_stream
+        .message()
+        .await
+        .context("failed to receive LeaseKeepAliveResponse")?;
+
+    Ok((zttid, lease))
+}
+
+struct Lease {
+    id: i64,
+    keeper: LeaseKeeper,
+    ka_stream: LeaseKeepAliveStream,
+}
+
 /// Push once in a while data about all active timelines to the broker.
 async fn push_loop(conf: SafeKeeperConf) -> anyhow::Result<()> {
    let mut client = Client::connect(&conf.broker_endpoints, None).await?;
-
-    // Get and maintain lease to automatically delete obsolete data
-    let lease = client.lease_grant(LEASE_TTL_SEC, None).await?;
-    let (mut keeper, mut ka_stream) = client.lease_keep_alive(lease.id()).await?;
+    let mut leases: HashMap<ZTenantTimelineId, Lease> = HashMap::new();

    let push_interval = Duration::from_millis(PUSH_INTERVAL_MSEC);
    loop {
@@ -169,33 +208,46 @@ async fn push_loop(conf: SafeKeeperConf) -> anyhow::Result<()> {
        // is under plain mutex. That's ok, all this code is not performance
        // sensitive and there is no risk of deadlock as we don't await while
        // lock is held.
-        for zttid in GlobalTimelines::get_active_timelines() {
-            if let Some(tli) = GlobalTimelines::get_loaded(zttid) {
-                let sk_info = tli.get_public_info(&conf)?;
-                let put_opts = PutOptions::new().with_lease(lease.id());
-                client
-                    .put(
-                        timeline_safekeeper_path(
-                            conf.broker_etcd_prefix.clone(),
-                            zttid,
-                            conf.my_id,
-                        ),
-                        serde_json::to_string(&sk_info)?,
-                        Some(put_opts),
-                    )
-                    .await
-                    .context("failed to push safekeeper info")?;
+        let active_tlis = GlobalTimelines::get_active_timelines();
+
+        // // Get and maintain (if not yet) per timeline lease to automatically delete obsolete data.
+        for zttid in active_tlis.iter() {
+            if let Entry::Vacant(v) = leases.entry(*zttid) {
+                let lease = client.lease_grant(LEASE_TTL_SEC, None).await?;
+                let (keeper, ka_stream) = client.lease_keep_alive(lease.id()).await?;
+                v.insert(Lease {
+                    id: lease.id(),
+                    keeper,
+                    ka_stream,
+                });
            }
        }
-        // revive the lease
-        keeper
-            .keep_alive()
-            .await
-            .context("failed to send LeaseKeepAliveRequest")?;
-        ka_stream
-            .message()
-            .await
-            .context("failed to receive LeaseKeepAliveResponse")?;
+        leases.retain(|zttid, _| active_tlis.contains(zttid));
+
+        // Push data concurrently to not suffer from latency, with many timelines it can be slow.
+        let handles = active_tlis
+            .iter()
+            .filter_map(|zttid| GlobalTimelines::get_loaded(*zttid))
+            .map(|tli| {
+                let sk_info = tli.get_public_info(&conf);
+                let key = timeline_safekeeper_path(
+                    conf.broker_etcd_prefix.clone(),
+                    tli.zttid,
+                    conf.my_id,
+                );
+                let lease = leases.remove(&tli.zttid).unwrap();
+                tokio::spawn(push_sk_info(tli.zttid, client.clone(), key, sk_info, lease))
+            })
+            .collect::<Vec<_>>();
+        for h in handles {
+            let (zttid, lease) = h.await??;
+            // It is ugly to pull leases from hash and then put it back, but
+            // otherwise we have to resort to long living per tli tasks (which
+            // would generate a lot of errors when etcd is down) as task wants to
+            // have 'static objects, we can't borrow to it.
+            leases.insert(zttid, lease);
+        }
+
        sleep(push_interval).await;
    }
 }
@@ -204,22 +256,30 @@ async fn push_loop(conf: SafeKeeperConf) -> anyhow::Result<()> {
 async fn pull_loop(conf: SafeKeeperConf) -> Result<()> {
    let mut client = Client::connect(&conf.broker_endpoints, None).await?;

-    let mut subscription = etcd_broker::subscribe_to_safekeeper_timeline_updates(
+    let mut subscription = etcd_broker::subscribe_for_values(
        &mut client,
-        SkTimelineSubscriptionKind::all(conf.broker_etcd_prefix.clone()),
+        SubscriptionKey::all(conf.broker_etcd_prefix.clone()),
+        |full_key, value_str| {
+            if full_key.operation == OperationKind::Safekeeper(SkOperationKind::TimelineInfo) {
+                match serde_json::from_str::<SkTimelineInfo>(value_str) {
+                    Ok(new_info) => return Some(new_info),
+                    Err(e) => {
+                        error!("Failed to parse timeline info from value str '{value_str}': {e}")
+                    }
+                }
+            }
+            None
+        },
    )
    .await
    .context("failed to subscribe for safekeeper info")?;
    loop {
-        match subscription.fetch_data().await {
+        match subscription.value_updates.recv().await {
            Some(new_info) => {
-                for (zttid, sk_info) in new_info {
-                    // note: there are blocking operations below, but it's considered fine for now
-                    if let Ok(tli) = GlobalTimelines::get(&conf, zttid, false) {
-                        for (safekeeper_id, info) in sk_info {
-                            tli.record_safekeeper_info(&info, safekeeper_id).await?
-                        }
-                    }
+                // note: there are blocking operations below, but it's considered fine for now
+                if let Ok(tli) = GlobalTimelines::get(&conf, new_info.key.id, false) {
+                    tli.record_safekeeper_info(&new_info.value, new_info.key.node_id)
+                        .await?
                }
            }
            None => {
--- a/safekeeper/src/control_file_upgrade.rs
+++ b/safekeeper/src/control_file_upgrade.rs
@@ -239,6 +239,19 @@ pub fn upgrade_control_file(buf: &[u8], version: u32) -> Result<SafeKeeperState>
            remote_consistent_lsn: Lsn(0),
            peers: Peers(vec![]),
        });
+    } else if version == 5 {
+        info!("reading safekeeper control file version {}", version);
+        let mut oldstate = SafeKeeperState::des(&buf[..buf.len()])?;
+        if oldstate.timeline_start_lsn != Lsn(0) {
+            return Ok(oldstate);
+        }
+
+        // set special timeline_start_lsn because we don't know the real one
+        info!("setting timeline_start_lsn and local_start_lsn to Lsn(1)");
+        oldstate.timeline_start_lsn = Lsn(1);
+        oldstate.local_start_lsn = Lsn(1);
+
+        return Ok(oldstate);
    }
    bail!("unsupported safekeeper control file version {}", version)
 }
--- a/safekeeper/src/handler.rs
+++ b/safekeeper/src/handler.rs
@@ -29,12 +29,11 @@ pub struct SafekeeperPostgresHandler {
    pub ztenantid: Option<ZTenantId>,
    pub ztimelineid: Option<ZTimelineId>,
    pub timeline: Option<Arc<Timeline>>,
-    pageserver_connstr: Option<String>,
 }

 /// Parsed Postgres command.
 enum SafekeeperPostgresCommand {
-    StartWalPush { pageserver_connstr: Option<String> },
+    StartWalPush,
    StartReplication { start_lsn: Lsn },
    IdentifySystem,
    JSONCtrl { cmd: AppendLogicalMessage },
@@ -42,11 +41,7 @@ enum SafekeeperPostgresCommand {

 fn parse_cmd(cmd: &str) -> Result<SafekeeperPostgresCommand> {
    if cmd.starts_with("START_WAL_PUSH") {
-        let re = Regex::new(r"START_WAL_PUSH(?: (.+))?").unwrap();
-
-        let caps = re.captures(cmd).unwrap();
-        let pageserver_connstr = caps.get(1).map(|m| m.as_str().to_owned());
-        Ok(SafekeeperPostgresCommand::StartWalPush { pageserver_connstr })
+        Ok(SafekeeperPostgresCommand::StartWalPush)
    } else if cmd.starts_with("START_REPLICATION") {
        let re =
            Regex::new(r"START_REPLICATION(?: PHYSICAL)? ([[:xdigit:]]+/[[:xdigit:]]+)").unwrap();
@@ -86,8 +81,6 @@ impl postgres_backend::Handler for SafekeeperPostgresHandler {
                self.appname = Some(app_name.clone());
            }

-            self.pageserver_connstr = params.get("pageserver_connstr").cloned();
-
            Ok(())
        } else {
            bail!("Safekeeper received unexpected initial message: {:?}", sm);
@@ -113,14 +106,14 @@ impl postgres_backend::Handler for SafekeeperPostgresHandler {
        }

        match cmd {
-            SafekeeperPostgresCommand::StartWalPush { pageserver_connstr } => {
-                ReceiveWalConn::new(pgb, pageserver_connstr)
+            SafekeeperPostgresCommand::StartWalPush => {
+                ReceiveWalConn::new(pgb)
                    .run(self)
                    .context("failed to run ReceiveWalConn")?;
            }
            SafekeeperPostgresCommand::StartReplication { start_lsn } => {
                ReplicationConn::new(pgb)
-                    .run(self, pgb, start_lsn, self.pageserver_connstr.clone())
+                    .run(self, pgb, start_lsn)
                    .context("failed to run ReplicationConn")?;
            }
            SafekeeperPostgresCommand::IdentifySystem => {
@@ -142,7 +135,6 @@ impl SafekeeperPostgresHandler {
            ztenantid: None,
            ztimelineid: None,
            timeline: None,
-            pageserver_connstr: None,
        }
    }

--- a/safekeeper/src/http/routes.rs
+++ b/safekeeper/src/http/routes.rs
@@ -1,9 +1,9 @@
-use etcd_broker::SkTimelineInfo;
-use hyper::{Body, Request, Response, StatusCode};
+use hyper::{Body, Request, Response, StatusCode, Uri};

+use once_cell::sync::Lazy;
 use serde::Serialize;
 use serde::Serializer;
-use std::collections::HashMap;
+use std::collections::{HashMap, HashSet};
 use std::fmt::Display;
 use std::sync::Arc;

@@ -11,9 +11,11 @@ use crate::safekeeper::Term;
 use crate::safekeeper::TermHistory;
 use crate::timeline::{GlobalTimelines, TimelineDeleteForceResult};
 use crate::SafeKeeperConf;
+use etcd_broker::subscription_value::SkTimelineInfo;
 use utils::{
+    auth::JwtAuth,
    http::{
-        endpoint,
+        endpoint::{self, auth_middleware, check_permission},
        error::ApiError,
        json::{json_request, json_response},
        request::{ensure_no_body, parse_request_param},
@@ -32,6 +34,7 @@ struct SafekeeperStatus {

 /// Healthcheck handler.
 async fn status_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
+    check_permission(&request, None)?;
    let conf = get_conf(&request);
    let status = SafekeeperStatus { id: conf.my_id };
    json_response(StatusCode::OK, status)
@@ -91,6 +94,7 @@ async fn timeline_status_handler(request: Request<Body>) -> Result<Response<Body
        parse_request_param(&request, "tenant_id")?,
        parse_request_param(&request, "timeline_id")?,
    );
+    check_permission(&request, Some(zttid.tenant_id))?;

    let tli = GlobalTimelines::get(get_conf(&request), zttid, false).map_err(ApiError::from_err)?;
    let (inmem, state) = tli.get_state();
@@ -125,6 +129,7 @@ async fn timeline_create_handler(mut request: Request<Body>) -> Result<Response<
        tenant_id: request_data.tenant_id,
        timeline_id: request_data.timeline_id,
    };
+    check_permission(&request, Some(zttid.tenant_id))?;
    GlobalTimelines::create(get_conf(&request), zttid, request_data.peer_ids)
        .map_err(ApiError::from_err)?;

@@ -145,6 +150,7 @@ async fn timeline_delete_force_handler(
        parse_request_param(&request, "tenant_id")?,
        parse_request_param(&request, "timeline_id")?,
    );
+    check_permission(&request, Some(zttid.tenant_id))?;
    ensure_no_body(&mut request).await?;
    json_response(
        StatusCode::OK,
@@ -160,6 +166,7 @@ async fn tenant_delete_force_handler(
    mut request: Request<Body>,
 ) -> Result<Response<Body>, ApiError> {
    let tenant_id = parse_request_param(&request, "tenant_id")?;
+    check_permission(&request, Some(tenant_id))?;
    ensure_no_body(&mut request).await?;
    json_response(
        StatusCode::OK,
@@ -178,6 +185,7 @@ async fn record_safekeeper_info(mut request: Request<Body>) -> Result<Response<B
        parse_request_param(&request, "tenant_id")?,
        parse_request_param(&request, "timeline_id")?,
    );
+    check_permission(&request, Some(zttid.tenant_id))?;
    let safekeeper_info: SkTimelineInfo = json_request(&mut request).await?;

    let tli = GlobalTimelines::get(get_conf(&request), zttid, false).map_err(ApiError::from_err)?;
@@ -188,15 +196,33 @@ async fn record_safekeeper_info(mut request: Request<Body>) -> Result<Response<B
 }

 /// Safekeeper http router.
-pub fn make_router(conf: SafeKeeperConf) -> RouterBuilder<hyper::Body, ApiError> {
-    let router = endpoint::make_router();
+pub fn make_router(
+    conf: SafeKeeperConf,
+    auth: Option<Arc<JwtAuth>>,
+) -> RouterBuilder<hyper::Body, ApiError> {
+    let mut router = endpoint::make_router();
+    if auth.is_some() {
+        router = router.middleware(auth_middleware(|request| {
+            #[allow(clippy::mutable_key_type)]
+            static ALLOWLIST_ROUTES: Lazy<HashSet<Uri>> =
+                Lazy::new(|| ["/v1/status"].iter().map(|v| v.parse().unwrap()).collect());
+            if ALLOWLIST_ROUTES.contains(request.uri()) {
+                None
+            } else {
+                // Option<Arc<JwtAuth>> is always provided as data below, hence unwrap().
+                request.data::<Option<Arc<JwtAuth>>>().unwrap().as_deref()
+            }
+        }))
+    }
    router
        .data(Arc::new(conf))
+        .data(auth)
        .get("/v1/status", status_handler)
        .get(
            "/v1/timeline/:tenant_id/:timeline_id",
            timeline_status_handler,
        )
+        // Will be used in the future instead of implicit timeline creation
        .post("/v1/timeline", timeline_create_handler)
        .delete(
            "/v1/tenant/:tenant_id/timeline/:timeline_id",
--- a/safekeeper/src/json_ctrl.rs
+++ b/safekeeper/src/json_ctrl.rs
@@ -124,7 +124,7 @@ fn send_proposer_elected(spg: &mut SafekeeperPostgresHandler, term: Term, lsn: L
        term,
        start_streaming_at: lsn,
        term_history: history,
-        timeline_start_lsn: Lsn(0),
+        timeline_start_lsn: lsn,
    });

    spg.timeline.get().process_msg(&proposer_elected_request)?;
--- a/safekeeper/src/lib.rs
+++ b/safekeeper/src/lib.rs
@@ -57,6 +57,7 @@ pub struct SafeKeeperConf {
    pub my_id: NodeId,
    pub broker_endpoints: Vec<Url>,
    pub broker_etcd_prefix: String,
+    pub auth_validation_public_key_path: Option<PathBuf>,
 }

 impl SafeKeeperConf {
@@ -88,6 +89,7 @@ impl Default for SafeKeeperConf {
            broker_etcd_prefix: etcd_broker::DEFAULT_NEON_BROKER_ETCD_PREFIX.to_string(),
            backup_runtime_threads: DEFAULT_WAL_BACKUP_RUNTIME_THREADS,
            wal_backup_enabled: true,
+            auth_validation_public_key_path: None,
        }
    }
 }
--- a/safekeeper/src/metrics.rs
+++ b/safekeeper/src/metrics.rs
@@ -242,9 +242,9 @@ impl Collector for TimelineCollector {
            let timeline_id = tli.zttid.timeline_id.to_string();
            let labels = &[tenant_id.as_str(), timeline_id.as_str()];

-            let mut most_advanced: Option<utils::pq_proto::ZenithFeedback> = None;
+            let mut most_advanced: Option<utils::pq_proto::ReplicationFeedback> = None;
            for replica in tli.replicas.iter() {
-                if let Some(replica_feedback) = replica.zenith_feedback {
+                if let Some(replica_feedback) = replica.pageserver_feedback {
                    if let Some(current) = most_advanced {
                        if current.ps_writelsn < replica_feedback.ps_writelsn {
                            most_advanced = Some(replica_feedback);
--- a/safekeeper/src/receive_wal.rs
+++ b/safekeeper/src/receive_wal.rs
@@ -32,22 +32,14 @@ pub struct ReceiveWalConn<'pg> {
    pg_backend: &'pg mut PostgresBackend,
    /// The cached result of `pg_backend.socket().peer_addr()` (roughly)
    peer_addr: SocketAddr,
-    /// Pageserver connection string forwarded from compute
-    /// NOTE that it is allowed to operate without a pageserver.
-    /// So if compute has no pageserver configured do not use it.
-    pageserver_connstr: Option<String>,
 }

 impl<'pg> ReceiveWalConn<'pg> {
-    pub fn new(
-        pg: &'pg mut PostgresBackend,
-        pageserver_connstr: Option<String>,
-    ) -> ReceiveWalConn<'pg> {
+    pub fn new(pg: &'pg mut PostgresBackend) -> ReceiveWalConn<'pg> {
        let peer_addr = *pg.get_peer_addr();
        ReceiveWalConn {
            pg_backend: pg,
            peer_addr,
-            pageserver_connstr,
        }
    }

@@ -120,9 +112,7 @@ impl<'pg> ReceiveWalConn<'pg> {
                // Register the connection and defer unregister. Do that only
                // after processing first message, as it sets wal_seg_size,
                // wanted by many.
-                spg.timeline
-                    .get()
-                    .on_compute_connect(self.pageserver_connstr.as_ref())?;
+                spg.timeline.get().on_compute_connect()?;
                _guard = Some(ComputeConnectionGuard {
                    timeline: Arc::clone(spg.timeline.get()),
                });
--- a/safekeeper/src/safekeeper.rs
+++ b/safekeeper/src/safekeeper.rs
@@ -4,7 +4,7 @@ use anyhow::{bail, Context, Result};
 use byteorder::{LittleEndian, ReadBytesExt};
 use bytes::{Buf, BufMut, Bytes, BytesMut};

-use etcd_broker::SkTimelineInfo;
+use etcd_broker::subscription_value::SkTimelineInfo;
 use postgres_ffi::xlog_utils::TimeLineID;

 use postgres_ffi::xlog_utils::XLogSegNo;
@@ -23,12 +23,12 @@ use postgres_ffi::xlog_utils::MAX_SEND_SIZE;
 use utils::{
    bin_ser::LeSer,
    lsn::Lsn,
-    pq_proto::{SystemId, ZenithFeedback},
+    pq_proto::{ReplicationFeedback, SystemId},
    zid::{NodeId, ZTenantId, ZTenantTimelineId, ZTimelineId},
 };

 pub const SK_MAGIC: u32 = 0xcafeceefu32;
-pub const SK_FORMAT_VERSION: u32 = 5;
+pub const SK_FORMAT_VERSION: u32 = 6;
 const SK_PROTOCOL_VERSION: u32 = 2;
 const UNKNOWN_SERVER_VERSION: u32 = 0;

@@ -348,7 +348,7 @@ pub struct AppendResponse {
    // a criterion for walproposer --sync mode exit
    pub commit_lsn: Lsn,
    pub hs_feedback: HotStandbyFeedback,
-    pub zenith_feedback: ZenithFeedback,
+    pub pageserver_feedback: ReplicationFeedback,
 }

 impl AppendResponse {
@@ -358,7 +358,7 @@ impl AppendResponse {
            flush_lsn: Lsn(0),
            commit_lsn: Lsn(0),
            hs_feedback: HotStandbyFeedback::empty(),
-            zenith_feedback: ZenithFeedback::empty(),
+            pageserver_feedback: ReplicationFeedback::empty(),
        }
    }
 }
@@ -476,7 +476,7 @@ impl AcceptorProposerMessage {
                buf.put_u64_le(msg.hs_feedback.xmin);
                buf.put_u64_le(msg.hs_feedback.catalog_xmin);

-                msg.zenith_feedback.serialize(buf)?
+                msg.pageserver_feedback.serialize(buf)?
            }
        }

@@ -677,7 +677,7 @@ where
            commit_lsn: self.state.commit_lsn,
            // will be filled by the upper code to avoid bothering safekeeper
            hs_feedback: HotStandbyFeedback::empty(),
-            zenith_feedback: ZenithFeedback::empty(),
+            pageserver_feedback: ReplicationFeedback::empty(),
        };
        trace!("formed AppendResponse {:?}", ar);
        ar
--- a/safekeeper/src/send_wal.rs
+++ b/safekeeper/src/send_wal.rs
@@ -13,15 +13,17 @@ use serde::{Deserialize, Serialize};
 use std::cmp::min;
 use std::net::Shutdown;
 use std::sync::Arc;
-use std::thread::sleep;
 use std::time::Duration;
 use std::{str, thread};
+
+use tokio::sync::watch::Receiver;
+use tokio::time::timeout;
 use tracing::*;
 use utils::{
    bin_ser::BeSer,
    lsn::Lsn,
    postgres_backend::PostgresBackend,
-    pq_proto::{BeMessage, FeMessage, WalSndKeepAlive, XLogDataBody, ZenithFeedback},
+    pq_proto::{BeMessage, FeMessage, ReplicationFeedback, WalSndKeepAlive, XLogDataBody},
    sock_split::ReadStream,
 };

@@ -29,7 +31,7 @@ use utils::{
 const HOT_STANDBY_FEEDBACK_TAG_BYTE: u8 = b'h';
 const STANDBY_STATUS_UPDATE_TAG_BYTE: u8 = b'r';
 // zenith extension of replication protocol
-const ZENITH_STATUS_UPDATE_TAG_BYTE: u8 = b'z';
+const NEON_STATUS_UPDATE_TAG_BYTE: u8 = b'z';

 type FullTransactionId = u64;

@@ -122,15 +124,15 @@ impl ReplicationConn {
                            warn!("unexpected StandbyReply. Read-only postgres replicas are not supported in safekeepers yet.");
                            // timeline.update_replica_state(replica_id, Some(state));
                        }
-                        Some(ZENITH_STATUS_UPDATE_TAG_BYTE) => {
+                        Some(NEON_STATUS_UPDATE_TAG_BYTE) => {
                            // Note: deserializing is on m[9..] because we skip the tag byte and len bytes.
                            let buf = Bytes::copy_from_slice(&m[9..]);
-                            let reply = ZenithFeedback::parse(buf);
+                            let reply = ReplicationFeedback::parse(buf);

-                            trace!("ZenithFeedback is {:?}", reply);
-                            // Only pageserver sends ZenithFeedback, so set the flag.
+                            trace!("ReplicationFeedback is {:?}", reply);
+                            // Only pageserver sends ReplicationFeedback, so set the flag.
                            // This replica is the source of information to resend to compute.
-                            state.zenith_feedback = Some(reply);
+                            state.pageserver_feedback = Some(reply);

                            timeline.update_replica_state(replica_id, state);
                        }
@@ -162,9 +164,8 @@ impl ReplicationConn {
        spg: &mut SafekeeperPostgresHandler,
        pgb: &mut PostgresBackend,
        mut start_pos: Lsn,
-        pageserver_connstr: Option<String>,
    ) -> Result<()> {
-        let _enter = info_span!("WAL sender", timeline = %spg.ztimelineid.unwrap(), pageserver_connstr = %pageserver_connstr.as_deref().unwrap_or_default()).entered();
+        let _enter = info_span!("WAL sender", timeline = %spg.ztimelineid.unwrap()).entered();

        // spawn the background thread which receives HotStandbyFeedback messages.
        let bg_timeline = Arc::clone(spg.timeline.get());
@@ -192,100 +193,142 @@ impl ReplicationConn {
                }
            })?;

-        let mut wal_seg_size: usize;
-        loop {
-            wal_seg_size = spg.timeline.get().get_state().1.server.wal_seg_size as usize;
-            if wal_seg_size == 0 {
-                error!("Cannot start replication before connecting to wal_proposer");
-                sleep(Duration::from_secs(1));
+        let runtime = tokio::runtime::Builder::new_current_thread()
+            .enable_all()
+            .build()?;
+
+        runtime.block_on(async move {
+            let (_, persisted_state) = spg.timeline.get().get_state();
+            // add persisted_state.timeline_start_lsn == Lsn(0) check
+            if persisted_state.server.wal_seg_size == 0 {
+                bail!("Cannot start replication before connecting to walproposer");
+            }
+
+            let wal_end = spg.timeline.get().get_end_of_wal();
+            // Walproposer gets special handling: safekeeper must give proposer all
+            // local WAL till the end, whether committed or not (walproposer will
+            // hang otherwise). That's because walproposer runs the consensus and
+            // synchronizes safekeepers on the most advanced one.
+            //
+            // There is a small risk of this WAL getting concurrently garbaged if
+            // another compute rises which collects majority and starts fixing log
+            // on this safekeeper itself. That's ok as (old) proposer will never be
+            // able to commit such WAL.
+            let stop_pos: Option<Lsn> = if spg.appname == Some("wal_proposer_recovery".to_string())
+            {
+                Some(wal_end)
            } else {
+                None
+            };
+
+            info!("Start replication from {:?} till {:?}", start_pos, stop_pos);
+
+            // switch to copy
+            pgb.write_message(&BeMessage::CopyBothResponse)?;
+
+            let mut end_pos = Lsn(0);
+
+            let mut wal_reader = WalReader::new(
+                spg.conf.timeline_dir(&spg.timeline.get().zttid),
+                &persisted_state,
+                start_pos,
+                spg.conf.wal_backup_enabled,
+            )?;
+
+            // buffer for wal sending, limited by MAX_SEND_SIZE
+            let mut send_buf = vec![0u8; MAX_SEND_SIZE];
+
+            // watcher for commit_lsn updates
+            let mut commit_lsn_watch_rx = spg.timeline.get().get_commit_lsn_watch_rx();
+
+            loop {
+                if let Some(stop_pos) = stop_pos {
+                    if start_pos >= stop_pos {
+                        break; /* recovery finished */
+                    }
+                    end_pos = stop_pos;
+                } else {
+                    /* Wait until we have some data to stream */
+                    let lsn = wait_for_lsn(&mut commit_lsn_watch_rx, start_pos).await?;
+
+                    if let Some(lsn) = lsn {
+                        end_pos = lsn;
+                    } else {
+                        // TODO: also check once in a while whether we are walsender
+                        // to right pageserver.
+                        if spg.timeline.get().stop_walsender(replica_id)? {
+                            // Shut down, timeline is suspended.
+                            // TODO create proper error type for this
+                            bail!("end streaming to {:?}", spg.appname);
+                        }
+
+                        // timeout expired: request pageserver status
+                        pgb.write_message(&BeMessage::KeepAlive(WalSndKeepAlive {
+                            sent_ptr: end_pos.0,
+                            timestamp: get_current_timestamp(),
+                            request_reply: true,
+                        }))
+                        .context("Failed to send KeepAlive message")?;
+                        continue;
+                    }
+                }
+
+                let send_size = end_pos.checked_sub(start_pos).unwrap().0 as usize;
+                let send_size = min(send_size, send_buf.len());
+
+                let send_buf = &mut send_buf[..send_size];
+
+                // read wal into buffer
+                let send_size = wal_reader.read(send_buf).await?;
+                let send_buf = &send_buf[..send_size];
+
+                // Write some data to the network socket.
+                pgb.write_message(&BeMessage::XLogData(XLogDataBody {
+                    wal_start: start_pos.0,
+                    wal_end: end_pos.0,
+                    timestamp: get_current_timestamp(),
+                    data: send_buf,
+                }))
+                .context("Failed to send XLogData")?;
+
+                start_pos += send_size as u64;
+                trace!("sent WAL up to {}", start_pos);
+            }
+
+            Ok(())
+        })
+    }
+}
+
+const POLL_STATE_TIMEOUT: Duration = Duration::from_secs(1);
+
+// Wait until we have commit_lsn > lsn or timeout expires. Returns latest commit_lsn.
+async fn wait_for_lsn(rx: &mut Receiver<Lsn>, lsn: Lsn) -> Result<Option<Lsn>> {
+    let commit_lsn: Lsn = *rx.borrow();
+    if commit_lsn > lsn {
+        return Ok(Some(commit_lsn));
+    }
+
+    let res = timeout(POLL_STATE_TIMEOUT, async move {
+        let mut commit_lsn;
+        loop {
+            rx.changed().await?;
+            commit_lsn = *rx.borrow();
+            if commit_lsn > lsn {
                break;
            }
        }
-        let wal_end = spg.timeline.get().get_end_of_wal();
-        // Walproposer gets special handling: safekeeper must give proposer all
-        // local WAL till the end, whether committed or not (walproposer will
-        // hang otherwise). That's because walproposer runs the consensus and
-        // synchronizes safekeepers on the most advanced one.
-        //
-        // There is a small risk of this WAL getting concurrently garbaged if
-        // another compute rises which collects majority and starts fixing log
-        // on this safekeeper itself. That's ok as (old) proposer will never be
-        // able to commit such WAL.
-        let stop_pos: Option<Lsn> = if spg.appname == Some("wal_proposer_recovery".to_string()) {
-            Some(wal_end)
-        } else {
-            None
-        };
-        info!("Start replication from {:?} till {:?}", start_pos, stop_pos);

-        // switch to copy
-        pgb.write_message(&BeMessage::CopyBothResponse)?;
+        Ok(commit_lsn)
+    })
+    .await;

-        let mut end_pos = Lsn(0);
-
-        let mut wal_reader = WalReader::new(
-            spg.conf.timeline_dir(&spg.timeline.get().zttid),
-            wal_seg_size,
-            start_pos,
-        );
-
-        // buffer for wal sending, limited by MAX_SEND_SIZE
-        let mut send_buf = vec![0u8; MAX_SEND_SIZE];
-
-        loop {
-            if let Some(stop_pos) = stop_pos {
-                if start_pos >= stop_pos {
-                    break; /* recovery finished */
-                }
-                end_pos = stop_pos;
-            } else {
-                /* Wait until we have some data to stream */
-                let lsn = spg.timeline.get().wait_for_lsn(start_pos);
-
-                if let Some(lsn) = lsn {
-                    end_pos = lsn;
-                } else {
-                    // TODO: also check once in a while whether we are walsender
-                    // to right pageserver.
-                    if spg.timeline.get().stop_walsender(replica_id)? {
-                        // Shut down, timeline is suspended.
-                        // TODO create proper error type for this
-                        bail!("end streaming to {:?}", spg.appname);
-                    }
-
-                    // timeout expired: request pageserver status
-                    pgb.write_message(&BeMessage::KeepAlive(WalSndKeepAlive {
-                        sent_ptr: end_pos.0,
-                        timestamp: get_current_timestamp(),
-                        request_reply: true,
-                    }))
-                    .context("Failed to send KeepAlive message")?;
-                    continue;
-                }
-            }
-
-            let send_size = end_pos.checked_sub(start_pos).unwrap().0 as usize;
-            let send_size = min(send_size, send_buf.len());
-
-            let send_buf = &mut send_buf[..send_size];
-
-            // read wal into buffer
-            let send_size = wal_reader.read(send_buf)?;
-            let send_buf = &send_buf[..send_size];
-
-            // Write some data to the network socket.
-            pgb.write_message(&BeMessage::XLogData(XLogDataBody {
-                wal_start: start_pos.0,
-                wal_end: end_pos.0,
-                timestamp: get_current_timestamp(),
-                data: send_buf,
-            }))
-            .context("Failed to send XLogData")?;
-
-            start_pos += send_size as u64;
-            trace!("sent WAL up to {}", start_pos);
-        }
-        Ok(())
+    match res {
+        // success
+        Ok(Ok(commit_lsn)) => Ok(Some(commit_lsn)),
+        // error inside closure
+        Ok(Err(err)) => Err(err),
+        // timeout
+        Err(_) => Ok(None),
    }
 }
--- a/safekeeper/src/timeline.rs
+++ b/safekeeper/src/timeline.rs
@@ -3,7 +3,7 @@

 use anyhow::{bail, Context, Result};

-use etcd_broker::SkTimelineInfo;
+use etcd_broker::subscription_value::SkTimelineInfo;
 use lazy_static::lazy_static;
 use postgres_ffi::xlog_utils::XLogSegNo;

@@ -11,17 +11,17 @@ use serde::Serialize;
 use tokio::sync::watch;

 use std::cmp::{max, min};
-use std::collections::HashMap;
+use std::collections::{HashMap, HashSet};
 use std::fs::{self};

-use std::sync::{Arc, Condvar, Mutex, MutexGuard};
-use std::time::Duration;
+use std::sync::{Arc, Mutex, MutexGuard};
+
 use tokio::sync::mpsc::Sender;
 use tracing::*;

 use utils::{
    lsn::Lsn,
-    pq_proto::ZenithFeedback,
+    pq_proto::ReplicationFeedback,
    zid::{NodeId, ZTenantId, ZTenantTimelineId},
 };

@@ -37,8 +37,6 @@ use crate::wal_storage;
 use crate::wal_storage::Storage as wal_storage_iface;
 use crate::SafeKeeperConf;

-const POLL_STATE_TIMEOUT: Duration = Duration::from_secs(1);
-
 /// Replica status update + hot standby feedback
 #[derive(Debug, Clone, Copy)]
 pub struct ReplicaState {
@@ -48,8 +46,8 @@ pub struct ReplicaState {
    pub remote_consistent_lsn: Lsn,
    /// combined hot standby feedback from all replicas
    pub hs_feedback: HotStandbyFeedback,
-    /// Zenith specific feedback received from pageserver, if any
-    pub zenith_feedback: Option<ZenithFeedback>,
+    /// Replication specific feedback received from pageserver, if any
+    pub pageserver_feedback: Option<ReplicationFeedback>,
 }

 impl Default for ReplicaState {
@@ -68,7 +66,7 @@ impl ReplicaState {
                xmin: u64::MAX,
                catalog_xmin: u64::MAX,
            },
-            zenith_feedback: None,
+            pageserver_feedback: None,
        }
    }
 }
@@ -77,9 +75,6 @@ impl ReplicaState {
 struct SharedState {
    /// Safekeeper object
    sk: SafeKeeper<control_file::FileStorage, wal_storage::PhysicalStorage>,
-    /// For receiving-sending wal cooperation
-    /// quorum commit LSN we've notified walsenders about
-    notified_commit_lsn: Lsn,
    /// State of replicas
    replicas: Vec<Option<ReplicaState>>,
    /// True when WAL backup launcher oversees the timeline, making sure WAL is
@@ -95,7 +90,6 @@ struct SharedState {
    /// when tli is inactive instead of having this flag.
    active: bool,
    num_computes: u32,
-    pageserver_connstr: Option<String>,
    last_removed_segno: XLogSegNo,
 }

@@ -113,13 +107,11 @@ impl SharedState {
        let sk = SafeKeeper::new(zttid.timeline_id, control_store, wal_store, conf.my_id)?;

        Ok(Self {
-            notified_commit_lsn: Lsn(0),
            sk,
            replicas: Vec::new(),
            wal_backup_active: false,
            active: false,
            num_computes: 0,
-            pageserver_connstr: None,
            last_removed_segno: 0,
        })
    }
@@ -133,13 +125,11 @@ impl SharedState {
        info!("timeline {} restored", zttid.timeline_id);

        Ok(Self {
-            notified_commit_lsn: Lsn(0),
            sk: SafeKeeper::new(zttid.timeline_id, control_store, wal_store, conf.my_id)?,
            replicas: Vec::new(),
            wal_backup_active: false,
            active: false,
            num_computes: 0,
-            pageserver_connstr: None,
            last_removed_segno: 0,
        })
    }
@@ -152,8 +142,12 @@ impl SharedState {

    /// Mark timeline active/inactive and return whether s3 offloading requires
    /// start/stop action.
-    fn update_status(&mut self) -> bool {
-        self.active = self.is_active();
+    fn update_status(&mut self, ttid: ZTenantTimelineId) -> bool {
+        let is_active = self.is_active();
+        if self.active != is_active {
+            info!("timeline {} active={} now", ttid, is_active);
+        }
+        self.active = is_active;
        self.is_wal_backup_action_pending()
    }

@@ -190,33 +184,10 @@ impl SharedState {
        self.wal_backup_active
    }

-    /// Activate timeline's walsender: start/change timeline information propagated into etcd for further pageserver connections.
-    fn activate_walsender(
-        &mut self,
-        zttid: &ZTenantTimelineId,
-        new_pageserver_connstr: Option<String>,
-    ) {
-        if self.pageserver_connstr != new_pageserver_connstr {
-            self.deactivate_walsender(zttid);
-
-            if new_pageserver_connstr.is_some() {
-                info!(
-                    "timeline {} has activated its walsender with connstr {new_pageserver_connstr:?}",
-                    zttid.timeline_id,
-                );
-            }
-            self.pageserver_connstr = new_pageserver_connstr;
-        }
-    }
-
-    /// Deactivate the timeline: stop sending the timeline data into etcd, so no pageserver can connect for WAL streaming.
-    fn deactivate_walsender(&mut self, zttid: &ZTenantTimelineId) {
-        if let Some(pageserver_connstr) = self.pageserver_connstr.take() {
-            info!(
-                "timeline {} had deactivated its wallsender with connstr {pageserver_connstr:?}",
-                zttid.timeline_id,
-            )
-        }
+    // Can this safekeeper offload to s3? Recently joined safekeepers might not
+    // have necessary WAL.
+    fn can_wal_backup(&self) -> bool {
+        self.sk.state.local_start_lsn <= self.sk.inmem.backup_lsn
    }

    fn get_wal_seg_size(&self) -> usize {
@@ -243,25 +214,25 @@ impl SharedState {
            // we need to know which pageserver compute node considers to be main.
            // See https://github.com/zenithdb/zenith/issues/1171
            //
-            if let Some(zenith_feedback) = state.zenith_feedback {
-                if let Some(acc_feedback) = acc.zenith_feedback {
-                    if acc_feedback.ps_writelsn < zenith_feedback.ps_writelsn {
+            if let Some(pageserver_feedback) = state.pageserver_feedback {
+                if let Some(acc_feedback) = acc.pageserver_feedback {
+                    if acc_feedback.ps_writelsn < pageserver_feedback.ps_writelsn {
                        warn!("More than one pageserver is streaming WAL for the timeline. Feedback resolving is not fully supported yet.");
-                        acc.zenith_feedback = Some(zenith_feedback);
+                        acc.pageserver_feedback = Some(pageserver_feedback);
                    }
                } else {
-                    acc.zenith_feedback = Some(zenith_feedback);
+                    acc.pageserver_feedback = Some(pageserver_feedback);
                }

                // last lsn received by pageserver
                // FIXME if multiple pageservers are streaming WAL, last_received_lsn must be tracked per pageserver.
                // See https://github.com/zenithdb/zenith/issues/1171
-                acc.last_received_lsn = Lsn::from(zenith_feedback.ps_writelsn);
+                acc.last_received_lsn = Lsn::from(pageserver_feedback.ps_writelsn);

                // When at least one pageserver has preserved data up to remote_consistent_lsn,
                // safekeeper is free to delete it, so choose max of all pageservers.
                acc.remote_consistent_lsn = max(
-                    Lsn::from(zenith_feedback.ps_applylsn),
+                    Lsn::from(pageserver_feedback.ps_applylsn),
                    acc.remote_consistent_lsn,
                );
            }
@@ -293,8 +264,6 @@ pub struct Timeline {
    /// For breeding receivers.
    commit_lsn_watch_rx: watch::Receiver<Lsn>,
    mutex: Mutex<SharedState>,
-    /// conditional variable used to notify wal senders
-    cond: Condvar,
 }

 impl Timeline {
@@ -311,24 +280,18 @@ impl Timeline {
            commit_lsn_watch_tx,
            commit_lsn_watch_rx,
            mutex: Mutex::new(shared_state),
-            cond: Condvar::new(),
        }
    }

    /// Register compute connection, starting timeline-related activity if it is
    /// not running yet.
    /// Can fail only if channel to a static thread got closed, which is not normal at all.
-    pub fn on_compute_connect(&self, pageserver_connstr: Option<&String>) -> Result<()> {
+    pub fn on_compute_connect(&self) -> Result<()> {
        let is_wal_backup_action_pending: bool;
        {
            let mut shared_state = self.mutex.lock().unwrap();
            shared_state.num_computes += 1;
-            is_wal_backup_action_pending = shared_state.update_status();
-            // FIXME: currently we always adopt latest pageserver connstr, but we
-            // should have kind of generations assigned by compute to distinguish
-            // the latest one or even pass it through consensus to reliably deliver
-            // to all safekeepers.
-            shared_state.activate_walsender(&self.zttid, pageserver_connstr.cloned());
+            is_wal_backup_action_pending = shared_state.update_status(self.zttid);
        }
        // Wake up wal backup launcher, if offloading not started yet.
        if is_wal_backup_action_pending {
@@ -345,7 +308,7 @@ impl Timeline {
        {
            let mut shared_state = self.mutex.lock().unwrap();
            shared_state.num_computes -= 1;
-            is_wal_backup_action_pending = shared_state.update_status();
+            is_wal_backup_action_pending = shared_state.update_status(self.zttid);
        }
        // Wake up wal backup launcher, if it is time to stop the offloading.
        if is_wal_backup_action_pending {
@@ -360,11 +323,11 @@ impl Timeline {
        let mut shared_state = self.mutex.lock().unwrap();
        if shared_state.num_computes == 0 {
            let replica_state = shared_state.replicas[replica_id].unwrap();
-            let stop = shared_state.notified_commit_lsn == Lsn(0) || // no data at all yet
+            let stop = shared_state.sk.inmem.commit_lsn == Lsn(0) || // no data at all yet
            (replica_state.remote_consistent_lsn != Lsn::MAX && // Lsn::MAX means that we don't know the latest LSN yet.
             replica_state.remote_consistent_lsn >= shared_state.sk.inmem.commit_lsn);
            if stop {
-                shared_state.deactivate_walsender(&self.zttid);
+                shared_state.update_status(self.zttid);
                return Ok(true);
            }
        }
@@ -378,6 +341,12 @@ impl Timeline {
        shared_state.wal_backup_attend()
    }

+    // Can this safekeeper offload to s3? Recently joined safekeepers might not
+    // have necessary WAL.
+    pub fn can_wal_backup(&self) -> bool {
+        self.mutex.lock().unwrap().can_wal_backup()
+    }
+
    /// Deactivates the timeline, assuming it is being deleted.
    /// Returns whether the timeline was already active.
    ///
@@ -426,39 +395,6 @@ impl Timeline {
        })
    }

-    /// Timed wait for an LSN to be committed.
-    ///
-    /// Returns the last committed LSN, which will be at least
-    /// as high as the LSN waited for, or None if timeout expired.
-    ///
-    pub fn wait_for_lsn(&self, lsn: Lsn) -> Option<Lsn> {
-        let mut shared_state = self.mutex.lock().unwrap();
-        loop {
-            let commit_lsn = shared_state.notified_commit_lsn;
-            // This must be `>`, not `>=`.
-            if commit_lsn > lsn {
-                return Some(commit_lsn);
-            }
-            let result = self
-                .cond
-                .wait_timeout(shared_state, POLL_STATE_TIMEOUT)
-                .unwrap();
-            if result.1.timed_out() {
-                return None;
-            }
-            shared_state = result.0
-        }
-    }
-
-    // Notify caught-up WAL senders about new WAL data received
-    // TODO: replace-unify it with commit_lsn_watch.
-    fn notify_wal_senders(&self, shared_state: &mut MutexGuard<SharedState>) {
-        if shared_state.notified_commit_lsn < shared_state.sk.inmem.commit_lsn {
-            shared_state.notified_commit_lsn = shared_state.sk.inmem.commit_lsn;
-            self.cond.notify_all();
-        }
-    }
-
    pub fn get_commit_lsn_watch_rx(&self) -> watch::Receiver<Lsn> {
        self.commit_lsn_watch_rx.clone()
    }
@@ -478,13 +414,11 @@ impl Timeline {
            if let Some(AcceptorProposerMessage::AppendResponse(ref mut resp)) = rmsg {
                let state = shared_state.get_replicas_state();
                resp.hs_feedback = state.hs_feedback;
-                if let Some(zenith_feedback) = state.zenith_feedback {
-                    resp.zenith_feedback = zenith_feedback;
+                if let Some(pageserver_feedback) = state.pageserver_feedback {
+                    resp.pageserver_feedback = pageserver_feedback;
                }
            }

-            // Ping wal sender that new data might be available.
-            self.notify_wal_senders(&mut shared_state);
            commit_lsn = shared_state.sk.inmem.commit_lsn;
        }
        self.commit_lsn_watch_tx.send(commit_lsn)?;
@@ -511,9 +445,9 @@ impl Timeline {
    }

    /// Prepare public safekeeper info for reporting.
-    pub fn get_public_info(&self, conf: &SafeKeeperConf) -> anyhow::Result<SkTimelineInfo> {
+    pub fn get_public_info(&self, conf: &SafeKeeperConf) -> SkTimelineInfo {
        let shared_state = self.mutex.lock().unwrap();
-        Ok(SkTimelineInfo {
+        SkTimelineInfo {
            last_log_term: Some(shared_state.sk.get_epoch()),
            flush_lsn: Some(shared_state.sk.wal_store.flush_lsn()),
            // note: this value is not flushed to control file yet and can be lost
@@ -525,9 +459,8 @@ impl Timeline {
            )),
            peer_horizon_lsn: Some(shared_state.sk.inmem.peer_horizon_lsn),
            safekeeper_connstr: Some(conf.listen_pg_addr.clone()),
-            pageserver_connstr: shared_state.pageserver_connstr.clone(),
            backup_lsn: Some(shared_state.sk.inmem.backup_lsn),
-        })
+        }
    }

    /// Update timeline state with peer safekeeper data.
@@ -546,8 +479,7 @@ impl Timeline {
                return Ok(());
            }
            shared_state.sk.record_safekeeper_info(sk_info)?;
-            self.notify_wal_senders(&mut shared_state);
-            is_wal_backup_action_pending = shared_state.update_status();
+            is_wal_backup_action_pending = shared_state.update_status(self.zttid);
            commit_lsn = shared_state.sk.inmem.commit_lsn;
        }
        self.commit_lsn_watch_tx.send(commit_lsn)?;
@@ -693,6 +625,8 @@ impl GlobalTimelines {
        zttid: ZTenantTimelineId,
        create: bool,
    ) -> Result<Arc<Timeline>> {
+        let _enter = info_span!("", timeline = %zttid.tenant_id).entered();
+
        let mut state = TIMELINES_STATE.lock().unwrap();

        match state.timelines.get(&zttid) {
@@ -735,7 +669,7 @@ impl GlobalTimelines {
    }

    /// Get ZTenantTimelineIDs of all active timelines.
-    pub fn get_active_timelines() -> Vec<ZTenantTimelineId> {
+    pub fn get_active_timelines() -> HashSet<ZTenantTimelineId> {
        let state = TIMELINES_STATE.lock().unwrap();
        state
            .timelines
--- a/safekeeper/src/wal_backup.rs
+++ b/safekeeper/src/wal_backup.rs
@@ -1,4 +1,8 @@
 use anyhow::{Context, Result};
+use etcd_broker::subscription_key::{
+    NodeKind, OperationKind, SkOperationKind, SubscriptionKey, SubscriptionKind,
+};
+use tokio::io::AsyncRead;
 use tokio::task::JoinHandle;

 use std::cmp::min;
@@ -7,7 +11,9 @@ use std::path::{Path, PathBuf};
 use std::sync::Arc;
 use std::time::Duration;

-use postgres_ffi::xlog_utils::{XLogFileName, XLogSegNo, XLogSegNoOffsetToRecPtr, PG_TLI};
+use postgres_ffi::xlog_utils::{
+    XLogFileName, XLogSegNo, XLogSegNoOffsetToRecPtr, MAX_SEND_SIZE, PG_TLI,
+};
 use remote_storage::{GenericRemoteStorage, RemoteStorage};
 use tokio::fs::File;
 use tokio::runtime::Builder;
@@ -26,8 +32,6 @@ use crate::{broker, SafeKeeperConf};

 use once_cell::sync::OnceCell;

-const BACKUP_ELECTION_NAME: &str = "WAL_BACKUP";
-
 const BROKER_CONNECTION_RETRY_DELAY_MS: u64 = 1000;

 const UPLOAD_FAILURE_RETRY_MIN_MS: u64 = 10;
@@ -48,14 +52,10 @@ pub fn wal_backup_launcher_thread_main(
    });
 }

-/// Check whether wal backup is required for timeline and mark that launcher is
-/// aware of current status (if timeline exists).
-fn is_wal_backup_required(zttid: ZTenantTimelineId) -> bool {
-    if let Some(tli) = GlobalTimelines::get_loaded(zttid) {
-        tli.wal_backup_attend()
-    } else {
-        false
-    }
+/// Check whether wal backup is required for timeline. If yes, mark that launcher is
+/// aware of current status and return the timeline.
+fn is_wal_backup_required(zttid: ZTenantTimelineId) -> Option<Arc<Timeline>> {
+    GlobalTimelines::get_loaded(zttid).filter(|t| t.wal_backup_attend())
 }

 struct WalBackupTaskHandle {
@@ -63,6 +63,56 @@ struct WalBackupTaskHandle {
    handle: JoinHandle<()>,
 }

+struct WalBackupTimelineEntry {
+    timeline: Arc<Timeline>,
+    handle: Option<WalBackupTaskHandle>,
+}
+
+/// Start per timeline task, if it makes sense for this safekeeper to offload.
+fn consider_start_task(
+    conf: &SafeKeeperConf,
+    zttid: ZTenantTimelineId,
+    task: &mut WalBackupTimelineEntry,
+) {
+    if !task.timeline.can_wal_backup() {
+        return;
+    }
+    info!("starting WAL backup task for {}", zttid);
+
+    // TODO: decide who should offload right here by simply checking current
+    // state instead of running elections in offloading task.
+    let election_name = SubscriptionKey {
+        cluster_prefix: conf.broker_etcd_prefix.clone(),
+        kind: SubscriptionKind::Operation(
+            zttid,
+            NodeKind::Safekeeper,
+            OperationKind::Safekeeper(SkOperationKind::WalBackup),
+        ),
+    }
+    .watch_key();
+    let my_candidate_name = broker::get_candiate_name(conf.my_id);
+    let election = broker::Election::new(
+        election_name,
+        my_candidate_name,
+        conf.broker_endpoints.clone(),
+    );
+
+    let (shutdown_tx, shutdown_rx) = mpsc::channel(1);
+    let timeline_dir = conf.timeline_dir(&zttid);
+
+    let handle = tokio::spawn(
+        backup_task_main(zttid, timeline_dir, shutdown_rx, election)
+            .instrument(info_span!("WAL backup task", zttid = %zttid)),
+    );
+
+    task.handle = Some(WalBackupTaskHandle {
+        shutdown_tx,
+        handle,
+    });
+}
+
+const CHECK_TASKS_INTERVAL_MSEC: u64 = 1000;
+
 /// Sits on wal_backup_launcher_rx and starts/stops per timeline wal backup
 /// tasks. Having this in separate task simplifies locking, allows to reap
 /// panics and separate elections from offloading itself.
@@ -71,7 +121,7 @@ async fn wal_backup_launcher_main_loop(
    mut wal_backup_launcher_rx: Receiver<ZTenantTimelineId>,
 ) {
    info!(
-        "WAL backup launcher: started, remote config {:?}",
+        "WAL backup launcher started, remote config {:?}",
        conf.remote_storage
    );

@@ -82,60 +132,50 @@ async fn wal_backup_launcher_main_loop(
        })
    });

-    let mut tasks: HashMap<ZTenantTimelineId, WalBackupTaskHandle> = HashMap::new();
+    // Presense in this map means launcher is aware s3 offloading is needed for
+    // the timeline, but task is started only if it makes sense for to offload
+    // from this safekeeper.
+    let mut tasks: HashMap<ZTenantTimelineId, WalBackupTimelineEntry> = HashMap::new();

+    let mut ticker = tokio::time::interval(Duration::from_millis(CHECK_TASKS_INTERVAL_MSEC));
    loop {
-        // channel is never expected to get closed
-        let zttid = wal_backup_launcher_rx.recv().await.unwrap();
-        let is_wal_backup_required = is_wal_backup_required(zttid);
-        if conf.remote_storage.is_none() || !conf.wal_backup_enabled {
-            continue; /* just drain the channel and do nothing */
-        }
-        // do we need to do anything at all?
-        if is_wal_backup_required != tasks.contains_key(&zttid) {
-            if is_wal_backup_required {
-                // need to start the task
-                info!("starting WAL backup task for {}", zttid);
+        tokio::select! {
+            zttid = wal_backup_launcher_rx.recv() => {
+                // channel is never expected to get closed
+                let zttid = zttid.unwrap();
+                if conf.remote_storage.is_none() || !conf.wal_backup_enabled {
+                    continue; /* just drain the channel and do nothing */
+                }
+                let timeline = is_wal_backup_required(zttid);
+                // do we need to do anything at all?
+                if timeline.is_some() != tasks.contains_key(&zttid) {
+                    if let Some(timeline) = timeline {
+                        // need to start the task
+                        let entry = tasks.entry(zttid).or_insert(WalBackupTimelineEntry {
+                            timeline,
+                            handle: None,
+                        });
+                        consider_start_task(&conf, zttid, entry);
+                    } else {
+                        // need to stop the task
+                        info!("stopping WAL backup task for {}", zttid);

-                // TODO: decide who should offload in launcher itself by simply checking current state
-                let election_name = broker::get_campaign_name(
-                    BACKUP_ELECTION_NAME,
-                    &conf.broker_etcd_prefix,
-                    zttid,
-                );
-                let my_candidate_name = broker::get_candiate_name(conf.my_id);
-                let election = broker::Election::new(
-                    election_name,
-                    my_candidate_name,
-                    conf.broker_endpoints.clone(),
-                );
-
-                let (shutdown_tx, shutdown_rx) = mpsc::channel(1);
-                let timeline_dir = conf.timeline_dir(&zttid);
-
-                let handle = tokio::spawn(
-                    backup_task_main(zttid, timeline_dir, shutdown_rx, election)
-                        .instrument(info_span!("WAL backup task", zttid = %zttid)),
-                );
-
-                tasks.insert(
-                    zttid,
-                    WalBackupTaskHandle {
-                        shutdown_tx,
-                        handle,
-                    },
-                );
-            } else {
-                // need to stop the task
-                info!("stopping WAL backup task for {}", zttid);
-
-                let wb_handle = tasks.remove(&zttid).unwrap();
-                // Tell the task to shutdown. Error means task exited earlier, that's ok.
-                let _ = wb_handle.shutdown_tx.send(()).await;
-                // Await the task itself. TODO: restart panicked tasks earlier.
-                // Hm, why I can't await on reference to handle?
-                if let Err(e) = wb_handle.handle.await {
-                    warn!("WAL backup task for {} panicked: {}", zttid, e);
+                        let entry = tasks.remove(&zttid).unwrap();
+                        if let Some(wb_handle) = entry.handle {
+                            // Tell the task to shutdown. Error means task exited earlier, that's ok.
+                            let _ = wb_handle.shutdown_tx.send(()).await;
+                            // Await the task itself. TODO: restart panicked tasks earlier.
+                            if let Err(e) = wb_handle.handle.await {
+                                warn!("WAL backup task for {} panicked: {}", zttid, e);
+                            }
+                        }
+                    }
+                }
+            }
+            // Start known tasks, if needed and possible.
+            _ = ticker.tick() => {
+                for (zttid, entry) in tasks.iter_mut().filter(|(_, entry)| entry.handle.is_none()) {
+                    consider_start_task(&conf, *zttid, entry);
                }
            }
        }
@@ -200,20 +240,11 @@ impl WalBackupTask {
        loop {
            let mut retry_attempt = 0u32;

-            if let Some(l) = self.leader.take() {
-                l.give_up().await;
-            }
-
            info!("acquiring leadership");
-            match broker::get_leader(&self.election).await {
-                Ok(l) => {
-                    self.leader = Some(l);
-                }
-                Err(e) => {
-                    error!("error during leader election {:?}", e);
-                    sleep(Duration::from_millis(BROKER_CONNECTION_RETRY_DELAY_MS)).await;
-                    continue;
-                }
+            if let Err(e) = broker::get_leader(&self.election, &mut self.leader).await {
+                error!("error during leader election {:?}", e);
+                sleep(Duration::from_millis(BROKER_CONNECTION_RETRY_DELAY_MS)).await;
+                continue;
            }
            info!("acquired leadership");

@@ -417,3 +448,49 @@ async fn backup_object(source_file: &Path, size: usize) -> Result<()> {

    Ok(())
 }
+
+pub async fn read_object(
+    file_path: PathBuf,
+    offset: u64,
+) -> (impl AsyncRead, JoinHandle<Result<()>>) {
+    let storage = REMOTE_STORAGE.get().expect("failed to get remote storage");
+
+    let (mut pipe_writer, pipe_reader) = tokio::io::duplex(MAX_SEND_SIZE);
+
+    let copy_result = tokio::spawn(async move {
+        let res = match storage.as_ref().unwrap() {
+            GenericRemoteStorage::Local(local_storage) => {
+                let source = local_storage.remote_object_id(&file_path)?;
+
+                info!(
+                    "local download about to start from {} at offset {}",
+                    source.display(),
+                    offset
+                );
+                local_storage
+                    .download_byte_range(&source, offset, None, &mut pipe_writer)
+                    .await
+            }
+            GenericRemoteStorage::S3(s3_storage) => {
+                let s3key = s3_storage.remote_object_id(&file_path)?;
+
+                info!(
+                    "S3 download about to start from {:?} at offset {}",
+                    s3key, offset
+                );
+                s3_storage
+                    .download_byte_range(&s3key, offset, None, &mut pipe_writer)
+                    .await
+            }
+        };
+
+        if let Err(e) = res {
+            error!("failed to download WAL segment from remote storage: {}", e);
+            Err(e)
+        } else {
+            Ok(())
+        }
+    });
+
+    (pipe_reader, copy_result)
+}
--- a/safekeeper/src/wal_storage.rs
+++ b/safekeeper/src/wal_storage.rs
@@ -8,7 +8,9 @@
 //! Note that last file has `.partial` suffix, that's different from postgres.

 use anyhow::{anyhow, bail, Context, Result};
-use std::io::{Read, Seek, SeekFrom};
+use std::io::{self, Seek, SeekFrom};
+use std::pin::Pin;
+use tokio::io::AsyncRead;

 use lazy_static::lazy_static;
 use postgres_ffi::xlog_utils::{
@@ -26,6 +28,7 @@ use utils::{lsn::Lsn, zid::ZTenantTimelineId};

 use crate::safekeeper::SafeKeeperState;

+use crate::wal_backup::read_object;
 use crate::SafeKeeperConf;
 use postgres_ffi::xlog_utils::{XLogFileName, XLOG_BLCKSZ};

@@ -33,6 +36,8 @@ use postgres_ffi::waldecoder::WalStreamDecoder;

 use metrics::{register_histogram_vec, Histogram, HistogramVec, DISK_WRITE_SECONDS_BUCKETS};

+use tokio::io::{AsyncReadExt, AsyncSeekExt};
+
 lazy_static! {
    // The prometheus crate does not support u64 yet, i64 only (see `IntGauge`).
    // i64 is faster than f64, so update to u64 when available.
@@ -504,69 +509,123 @@ pub struct WalReader {
    timeline_dir: PathBuf,
    wal_seg_size: usize,
    pos: Lsn,
-    file: Option<File>,
+    wal_segment: Option<Pin<Box<dyn AsyncRead>>>,
+
+    enable_remote_read: bool,
+    // S3 will be used to read WAL if LSN is not available locally
+    local_start_lsn: Lsn,
 }

 impl WalReader {
-    pub fn new(timeline_dir: PathBuf, wal_seg_size: usize, pos: Lsn) -> Self {
-        Self {
-            timeline_dir,
-            wal_seg_size,
-            pos,
-            file: None,
+    pub fn new(
+        timeline_dir: PathBuf,
+        state: &SafeKeeperState,
+        start_pos: Lsn,
+        enable_remote_read: bool,
+    ) -> Result<Self> {
+        if start_pos < state.timeline_start_lsn {
+            bail!(
+                "Requested streaming from {}, which is before the start of the timeline {}",
+                start_pos,
+                state.timeline_start_lsn
+            );
        }
+
+        // TODO: add state.timeline_start_lsn == Lsn(0) check
+        if state.server.wal_seg_size == 0 || state.local_start_lsn == Lsn(0) {
+            bail!("state uninitialized, no data to read");
+        }
+
+        Ok(Self {
+            timeline_dir,
+            wal_seg_size: state.server.wal_seg_size as usize,
+            pos: start_pos,
+            wal_segment: None,
+            enable_remote_read,
+            local_start_lsn: state.local_start_lsn,
+        })
    }

-    pub fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
-        // Take the `File` from `wal_file`, or open a new file.
-        let mut file = match self.file.take() {
-            Some(file) => file,
-            None => {
-                // Open a new file.
-                let segno = self.pos.segment_number(self.wal_seg_size);
-                let wal_file_name = XLogFileName(PG_TLI, segno, self.wal_seg_size);
-                let wal_file_path = self.timeline_dir.join(wal_file_name);
-                Self::open_wal_file(&wal_file_path)?
-            }
+    pub async fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
+        let mut wal_segment = match self.wal_segment.take() {
+            Some(reader) => reader,
+            None => self.open_segment().await?,
        };

-        let xlogoff = self.pos.segment_offset(self.wal_seg_size) as usize;
-
        // How much to read and send in message? We cannot cross the WAL file
        // boundary, and we don't want send more than provided buffer.
+        let xlogoff = self.pos.segment_offset(self.wal_seg_size) as usize;
        let send_size = min(buf.len(), self.wal_seg_size - xlogoff);

        // Read some data from the file.
        let buf = &mut buf[0..send_size];
-        file.seek(SeekFrom::Start(xlogoff as u64))
-            .and_then(|_| file.read_exact(buf))
-            .context("Failed to read data from WAL file")?;
-
+        let send_size = wal_segment.read_exact(buf).await?;
        self.pos += send_size as u64;

-        // Decide whether to reuse this file. If we don't set wal_file here
-        // a new file will be opened next time.
+        // Decide whether to reuse this file. If we don't set wal_segment here
+        // a new reader will be opened next time.
        if self.pos.segment_offset(self.wal_seg_size) != 0 {
-            self.file = Some(file);
+            self.wal_segment = Some(wal_segment);
        }

        Ok(send_size)
    }

+    /// Open WAL segment at the current position of the reader.
+    async fn open_segment(&self) -> Result<Pin<Box<dyn AsyncRead>>> {
+        let xlogoff = self.pos.segment_offset(self.wal_seg_size) as usize;
+        let segno = self.pos.segment_number(self.wal_seg_size);
+        let wal_file_name = XLogFileName(PG_TLI, segno, self.wal_seg_size);
+        let wal_file_path = self.timeline_dir.join(wal_file_name);
+
+        // Try to open local file, if we may have WAL locally
+        if self.pos >= self.local_start_lsn {
+            let res = Self::open_wal_file(&wal_file_path).await;
+            match res {
+                Ok(mut file) => {
+                    file.seek(SeekFrom::Start(xlogoff as u64)).await?;
+                    return Ok(Box::pin(file));
+                }
+                Err(e) => {
+                    let is_not_found = e.chain().any(|e| {
+                        if let Some(e) = e.downcast_ref::<io::Error>() {
+                            e.kind() == io::ErrorKind::NotFound
+                        } else {
+                            false
+                        }
+                    });
+                    if !is_not_found {
+                        return Err(e);
+                    }
+                    // NotFound is expected, fall through to remote read
+                }
+            };
+        }
+
+        // Try to open remote file, if remote reads are enabled
+        if self.enable_remote_read {
+            let (reader, _) = read_object(wal_file_path, xlogoff as u64).await;
+            return Ok(Box::pin(reader));
+        }
+
+        bail!("WAL segment is not found")
+    }
+
    /// Helper function for opening a wal file.
-    fn open_wal_file(wal_file_path: &Path) -> Result<File> {
+    async fn open_wal_file(wal_file_path: &Path) -> Result<tokio::fs::File> {
        // First try to open the .partial file.
        let mut partial_path = wal_file_path.to_owned();
        partial_path.set_extension("partial");
-        if let Ok(opened_file) = File::open(&partial_path) {
+        if let Ok(opened_file) = tokio::fs::File::open(&partial_path).await {
            return Ok(opened_file);
        }

        // If that failed, try it without the .partial extension.
-        File::open(&wal_file_path)
+        tokio::fs::File::open(&wal_file_path)
+            .await
            .with_context(|| format!("Failed to open WAL file {:?}", wal_file_path))
            .map_err(|e| {
-                error!("{}", e);
+                warn!("{}", e);
                e
            })
    }
--- a/scripts/export_import_betwen_pageservers.py
+++ b/scripts/export_import_betwen_pageservers.py
@@ -0,0 +1,222 @@
+#
+# Simple script to export nodes from one pageserver
+# and import them into another page server
+#
+from os import path
+import os
+import requests
+import uuid
+import subprocess
+import argparse
+from pathlib import Path
+
+# directory to save exported tar files to
+basepath = path.dirname(path.abspath(__file__))
+
+
+class NeonPageserverApiException(Exception):
+    pass
+
+
+class NeonPageserverHttpClient(requests.Session):
+    def __init__(self, host, port):
+        super().__init__()
+        self.host = host
+        self.port = port
+
+    def verbose_error(self, res: requests.Response):
+        try:
+            res.raise_for_status()
+        except requests.RequestException as e:
+            try:
+                msg = res.json()['msg']
+            except:
+                msg = ''
+            raise NeonPageserverApiException(msg) from e
+
+    def check_status(self):
+        self.get(f"http://{self.host}:{self.port}/v1/status").raise_for_status()
+
+    def tenant_list(self):
+        res = self.get(f"http://{self.host}:{self.port}/v1/tenant")
+        self.verbose_error(res)
+        res_json = res.json()
+        assert isinstance(res_json, list)
+        return res_json
+
+    def tenant_create(self, new_tenant_id: uuid.UUID, ok_if_exists):
+        res = self.post(
+            f"http://{self.host}:{self.port}/v1/tenant",
+            json={
+                'new_tenant_id': new_tenant_id.hex,
+            },
+        )
+
+        if res.status_code == 409:
+            if ok_if_exists:
+                print(f'could not create tenant: already exists for id {new_tenant_id}')
+            else:
+                res.raise_for_status()
+        elif res.status_code == 201:
+            print(f'created tenant {new_tenant_id}')
+        else:
+            self.verbose_error(res)
+
+        return new_tenant_id
+
+    def timeline_list(self, tenant_id: uuid.UUID):
+        res = self.get(f"http://{self.host}:{self.port}/v1/tenant/{tenant_id.hex}/timeline")
+        self.verbose_error(res)
+        res_json = res.json()
+        assert isinstance(res_json, list)
+        return res_json
+
+
+def main(args: argparse.Namespace):
+    old_pageserver_host = args.old_pageserver_host
+    new_pageserver_host = args.new_pageserver_host
+    tenants = args.tenants
+
+    old_http_client = NeonPageserverHttpClient(old_pageserver_host, args.old_pageserver_http_port)
+    old_http_client.check_status()
+    old_pageserver_connstr = f"postgresql://{old_pageserver_host}:{args.old_pageserver_pg_port}"
+
+    new_http_client = NeonPageserverHttpClient(new_pageserver_host, args.new_pageserver_http_port)
+    new_http_client.check_status()
+    new_pageserver_connstr = f"postgresql://{new_pageserver_host}:{args.new_pageserver_pg_port}"
+
+    psql_env = {**os.environ, 'LD_LIBRARY_PATH': '/usr/local/lib/'}
+
+    for tenant_id in tenants:
+        print(f"Tenant: {tenant_id}")
+        timelines = old_http_client.timeline_list(uuid.UUID(tenant_id))
+        print(f"Timelines: {timelines}")
+
+        # Create tenant in new pageserver
+        if args.only_import is False:
+            new_http_client.tenant_create(uuid.UUID(tenant_id), args.ok_if_exists)
+
+        for timeline in timelines:
+
+            # Export timelines from old pageserver
+            if args.only_import is False:
+                query = f"fullbackup {timeline['tenant_id']} {timeline['timeline_id']} {timeline['local']['last_record_lsn']}"
+
+                cmd = ["psql", "--no-psqlrc", old_pageserver_connstr, "-c", query]
+                print(f"Running: {cmd}")
+
+                tar_filename = path.join(basepath,
+                                         f"{timeline['tenant_id']}_{timeline['timeline_id']}.tar")
+                stderr_filename = path.join(
+                    basepath, f"{timeline['tenant_id']}_{timeline['timeline_id']}.stderr")
+
+                with open(tar_filename, 'w') as stdout_f:
+                    with open(stderr_filename, 'w') as stderr_f:
+                        print(f"(capturing output to {tar_filename})")
+                        subprocess.run(cmd, stdout=stdout_f, stderr=stderr_f, env=psql_env)
+
+                print(f"Done export: {tar_filename}")
+
+            # Import timelines to new pageserver
+            psql_path = Path(args.psql_path)
+            import_cmd = f"import basebackup {timeline['tenant_id']} {timeline['timeline_id']} {timeline['local']['last_record_lsn']} {timeline['local']['last_record_lsn']}"
+            tar_filename = path.join(basepath,
+                                     f"{timeline['tenant_id']}_{timeline['timeline_id']}.tar")
+            full_cmd = rf"""cat {tar_filename} | {psql_path} {new_pageserver_connstr} -c '{import_cmd}' """
+
+            stderr_filename2 = path.join(
+                basepath, f"import_{timeline['tenant_id']}_{timeline['timeline_id']}.stderr")
+            stdout_filename = path.join(
+                basepath, f"import_{timeline['tenant_id']}_{timeline['timeline_id']}.stdout")
+
+            print(f"Running: {full_cmd}")
+
+            with open(stdout_filename, 'w') as stdout_f:
+                with open(stderr_filename2, 'w') as stderr_f:
+                    print(f"(capturing output to {stdout_filename})")
+                    subprocess.run(full_cmd,
+                                   stdout=stdout_f,
+                                   stderr=stderr_f,
+                                   env=psql_env,
+                                   shell=True)
+
+                    print(f"Done import")
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--tenant-id',
+        dest='tenants',
+        required=True,
+        nargs='+',
+        help='Id of the tenant to migrate. You can pass multiple arguments',
+    )
+    parser.add_argument(
+        '--from-host',
+        dest='old_pageserver_host',
+        required=True,
+        help='Host of the pageserver to migrate data from',
+    )
+    parser.add_argument(
+        '--from-http-port',
+        dest='old_pageserver_http_port',
+        required=False,
+        type=int,
+        default=9898,
+        help='HTTP port of the pageserver to migrate data from. Default: 9898',
+    )
+    parser.add_argument(
+        '--from-pg-port',
+        dest='old_pageserver_pg_port',
+        required=False,
+        type=int,
+        default=6400,
+        help='pg port of the pageserver to migrate data from. Default: 6400',
+    )
+    parser.add_argument(
+        '--to-host',
+        dest='new_pageserver_host',
+        required=True,
+        help='Host of the pageserver to migrate data to',
+    )
+    parser.add_argument(
+        '--to-http-port',
+        dest='new_pageserver_http_port',
+        required=False,
+        default=9898,
+        type=int,
+        help='HTTP port of the pageserver to migrate data to. Default: 9898',
+    )
+    parser.add_argument(
+        '--to-pg-port',
+        dest='new_pageserver_pg_port',
+        required=False,
+        default=6400,
+        type=int,
+        help='pg port of the pageserver to migrate data to. Default: 6400',
+    )
+    parser.add_argument(
+        '--ignore-tenant-exists',
+        dest='ok_if_exists',
+        required=False,
+        help=
+        'Ignore error if we are trying to create the tenant that already exists. It can be dangerous if existing tenant already contains some data.',
+    )
+    parser.add_argument(
+        '--psql-path',
+        dest='psql_path',
+        required=False,
+        default='/usr/local/bin/psql',
+        help='Path to the psql binary. Default: /usr/local/bin/psql',
+    )
+    parser.add_argument(
+        '--only-import',
+        dest='only_import',
+        required=False,
+        default=False,
+        action='store_true',
+        help='Skip export and tenant creation part',
+    )
+    args = parser.parse_args()
+    main(args)
--- a/scripts/generate_perf_report_page.py
+++ b/scripts/generate_perf_report_page.py
@@ -26,6 +26,7 @@ KEY_EXCLUDE_FIELDS = frozenset({
 })
 NEGATIVE_COLOR = 'negative'
 POSITIVE_COLOR = 'positive'
+EPS = 1e-6


@dataclass
@@ -120,7 +121,8 @@ def get_row_values(columns: List[str], run_result: SuitRun,
            # this might happen when new metric is added and there is no value for it in previous run
            # let this be here, TODO add proper handling when this actually happens
            raise ValueError(f'{column} not found in previous result')
-        ratio = float(value) / float(prev_value['value']) - 1
+        # adding `EPS` to each term to avoid ZeroDivisionError when the denominator is zero
+        ratio = (float(value) + EPS) / (float(prev_value['value']) + EPS) - 1
        ratio_display, color = format_ratio(ratio, current_value['report'])
        row_values.append(RowValue(value, color, ratio_display))
    return row_values
--- a/setup.cfg
+++ b/setup.cfg
@@ -28,6 +28,10 @@ strict = true
 # There is some work in progress, though: https://github.com/MagicStack/asyncpg/pull/577
 ignore_missing_imports = true

+[mypy-pg8000.*]
+# Used only in testing clients
+ignore_missing_imports = true
+
 [mypy-cached_property.*]
 ignore_missing_imports = true

--- a/test_runner/README.md
+++ b/test_runner/README.md
@@ -45,7 +45,7 @@ If you want to run all tests that have the string "bench" in their names:

 Useful environment variables:

-`ZENITH_BIN`: The directory where zenith binaries can be found.
+`NEON_BIN`: The directory where neon binaries can be found.
 `POSTGRES_DISTRIB_DIR`: The directory where postgres distribution can be found.
 `TEST_OUTPUT`: Set the directory where test state and test output files
 should go.
--- a/test_runner/batch_others/test_ancestor_branch.py
+++ b/test_runner/batch_others/test_ancestor_branch.py
@@ -1,6 +1,3 @@
-from contextlib import closing
-
-import psycopg2.extras
 import pytest
 from fixtures.log_helper import log
 from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, NeonPageserverApiException
@@ -24,7 +21,7 @@ def test_ancestor_branch(neon_env_builder: NeonEnvBuilder):
            'compaction_target_size': '4194304',
        })

-    env.pageserver.safe_psql("failpoints flush-frozen=sleep(10000)")
+    env.pageserver.safe_psql("failpoints flush-frozen-before-sync=sleep(10000)")

    pg_branch0 = env.postgres.create_start('main', tenant_id=tenant)
    branch0_cur = pg_branch0.connect().cursor()
--- a/test_runner/batch_others/test_auth.py
+++ b/test_runner/batch_others/test_auth.py
@@ -1,13 +1,11 @@
 from contextlib import closing
-from typing import Iterator
-from uuid import UUID, uuid4
+from uuid import uuid4
 from fixtures.neon_fixtures import NeonEnvBuilder, NeonPageserverApiException
-from requests.exceptions import HTTPError
 import pytest


 def test_pageserver_auth(neon_env_builder: NeonEnvBuilder):
-    neon_env_builder.pageserver_auth_enabled = True
+    neon_env_builder.auth_enabled = True
    env = neon_env_builder.init_start()

    ps = env.pageserver
@@ -54,7 +52,7 @@ def test_pageserver_auth(neon_env_builder: NeonEnvBuilder):

@pytest.mark.parametrize('with_safekeepers', [False, True])
 def test_compute_auth_to_pageserver(neon_env_builder: NeonEnvBuilder, with_safekeepers: bool):
-    neon_env_builder.pageserver_auth_enabled = True
+    neon_env_builder.auth_enabled = True
    if with_safekeepers:
        neon_env_builder.num_safekeepers = 3
    env = neon_env_builder.init_start()
--- a/test_runner/batch_others/test_backpressure.py
+++ b/test_runner/batch_others/test_backpressure.py
@@ -1,11 +1,9 @@
 from contextlib import closing, contextmanager
 import psycopg2.extras
 import pytest
-from fixtures.neon_fixtures import PgProtocol, NeonEnvBuilder
+from fixtures.neon_fixtures import NeonEnvBuilder
 from fixtures.log_helper import log
-import os
 import time
-import asyncpg
 from fixtures.neon_fixtures import Postgres
 import threading

--- a/test_runner/batch_others/test_basebackup_error.py
+++ b/test_runner/batch_others/test_basebackup_error.py
@@ -1,8 +1,6 @@
 import pytest
-from contextlib import closing

 from fixtures.neon_fixtures import NeonEnv
-from fixtures.log_helper import log


 #
--- a/test_runner/batch_others/test_branch_behind.py
+++ b/test_runner/batch_others/test_branch_behind.py
@@ -1,4 +1,3 @@
-import subprocess
 from contextlib import closing

 import psycopg2.extras
--- a/test_runner/batch_others/test_createdropdb.py
+++ b/test_runner/batch_others/test_createdropdb.py
@@ -35,9 +35,14 @@ def test_createdb(neon_simple_env: NeonEnv):
        with closing(db.connect(dbname='foodb')) as conn:
            with conn.cursor() as cur:
                # Check database size in both branches
-                cur.execute(
-                    'select pg_size_pretty(pg_database_size(%s)), pg_size_pretty(sum(pg_relation_size(oid))) from pg_class where relisshared is false;',
-                    ('foodb', ))
+                cur.execute("""
+                    select pg_size_pretty(pg_database_size('foodb')),
+                    pg_size_pretty(
+                    sum(pg_relation_size(oid, 'main'))
+                    +sum(pg_relation_size(oid, 'vm'))
+                    +sum(pg_relation_size(oid, 'fsm'))
+                    ) FROM pg_class where relisshared is false
+                   """)
                res = cur.fetchone()
                # check that dbsize equals sum of all relation sizes, excluding shared ones
                # This is how we define dbsize in neon for now
--- a/test_runner/batch_others/test_fullbackup.py
+++ b/test_runner/batch_others/test_fullbackup.py
@@ -0,0 +1,68 @@
+from contextlib import closing
+
+from fixtures.log_helper import log
+from fixtures.neon_fixtures import NeonEnvBuilder, PgBin, PortDistributor, VanillaPostgres
+from fixtures.neon_fixtures import pg_distrib_dir
+import os
+from fixtures.utils import subprocess_capture
+
+num_rows = 1000
+
+
+# Ensure that regular postgres can start from fullbackup
+def test_fullbackup(neon_env_builder: NeonEnvBuilder,
+                    pg_bin: PgBin,
+                    port_distributor: PortDistributor):
+
+    neon_env_builder.num_safekeepers = 1
+    env = neon_env_builder.init_start()
+
+    env.neon_cli.create_branch('test_fullbackup')
+    pgmain = env.postgres.create_start('test_fullbackup')
+    log.info("postgres is running on 'test_fullbackup' branch")
+
+    timeline = pgmain.safe_psql("SHOW neon.timeline_id")[0][0]
+
+    with closing(pgmain.connect()) as conn:
+        with conn.cursor() as cur:
+            # data loading may take a while, so increase statement timeout
+            cur.execute("SET statement_timeout='300s'")
+            cur.execute(f'''CREATE TABLE tbl AS SELECT 'long string to consume some space' || g
+                        from generate_series(1,{num_rows}) g''')
+            cur.execute("CHECKPOINT")
+
+            cur.execute('SELECT pg_current_wal_insert_lsn()')
+            lsn = cur.fetchone()[0]
+            log.info(f"start_backup_lsn = {lsn}")
+
+    # Set LD_LIBRARY_PATH in the env properly, otherwise we may use the wrong libpq.
+    # PgBin sets it automatically, but here we need to pipe psql output to the tar command.
+    psql_env = {'LD_LIBRARY_PATH': os.path.join(str(pg_distrib_dir), 'lib')}
+
+    # Get and unpack fullbackup from pageserver
+    restored_dir_path = env.repo_dir / "restored_datadir"
+    os.mkdir(restored_dir_path, 0o750)
+    query = f"fullbackup {env.initial_tenant.hex} {timeline} {lsn}"
+    cmd = ["psql", "--no-psqlrc", env.pageserver.connstr(), "-c", query]
+    result_basepath = pg_bin.run_capture(cmd, env=psql_env)
+    tar_output_file = result_basepath + ".stdout"
+    subprocess_capture(str(env.repo_dir),
+                       ["tar", "-xf", tar_output_file, "-C", str(restored_dir_path)])
+
+    # HACK
+    # fullbackup returns neon specific pg_control and first WAL segment
+    # use resetwal to overwrite it
+    pg_resetwal_path = os.path.join(pg_bin.pg_bin_path, 'pg_resetwal')
+    cmd = [pg_resetwal_path, "-D", str(restored_dir_path)]
+    pg_bin.run_capture(cmd, env=psql_env)
+
+    # Restore from the backup and find the data we inserted
+    port = port_distributor.get_port()
+    with VanillaPostgres(restored_dir_path, pg_bin, port, init=False) as vanilla_pg:
+        # TODO make port an optional argument
+        vanilla_pg.configure([
+            f"port={port}",
+        ])
+        vanilla_pg.start()
+        num_rows_found = vanilla_pg.safe_psql('select count(*) from tbl;', user="cloud_admin")[0][0]
+        assert num_rows == num_rows_found
--- a/test_runner/batch_others/test_import.py
+++ b/test_runner/batch_others/test_import.py
@@ -0,0 +1,198 @@
+import pytest
+from fixtures.neon_fixtures import NeonEnvBuilder, wait_for_upload, wait_for_last_record_lsn
+from fixtures.utils import lsn_from_hex, lsn_to_hex
+from uuid import UUID, uuid4
+import tarfile
+import os
+import shutil
+from pathlib import Path
+import json
+from fixtures.utils import subprocess_capture
+from fixtures.log_helper import log
+from contextlib import closing
+from fixtures.neon_fixtures import pg_distrib_dir
+
+
+@pytest.mark.timeout(600)
+def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_builder):
+    # Put data in vanilla pg
+    vanilla_pg.start()
+    vanilla_pg.safe_psql("create user cloud_admin with password 'postgres' superuser")
+    vanilla_pg.safe_psql('''create table t as select 'long string to consume some space' || g
+     from generate_series(1,300000) g''')
+    assert vanilla_pg.safe_psql('select count(*) from t') == [(300000, )]
+
+    # Take basebackup
+    basebackup_dir = os.path.join(test_output_dir, "basebackup")
+    base_tar = os.path.join(basebackup_dir, "base.tar")
+    wal_tar = os.path.join(basebackup_dir, "pg_wal.tar")
+    os.mkdir(basebackup_dir)
+    vanilla_pg.safe_psql("CHECKPOINT")
+    pg_bin.run([
+        "pg_basebackup",
+        "-F",
+        "tar",
+        "-d",
+        vanilla_pg.connstr(),
+        "-D",
+        basebackup_dir,
+    ])
+
+    # Make corrupt base tar with missing pg_control
+    unpacked_base = os.path.join(basebackup_dir, "unpacked-base")
+    corrupt_base_tar = os.path.join(unpacked_base, "corrupt-base.tar")
+    os.mkdir(unpacked_base, 0o750)
+    subprocess_capture(str(test_output_dir), ["tar", "-xf", base_tar, "-C", unpacked_base])
+    os.remove(os.path.join(unpacked_base, "global/pg_control"))
+    subprocess_capture(str(test_output_dir),
+                       ["tar", "-cf", "corrupt-base.tar"] + os.listdir(unpacked_base),
+                       cwd=unpacked_base)
+
+    # Get start_lsn and end_lsn
+    with open(os.path.join(basebackup_dir, "backup_manifest")) as f:
+        manifest = json.load(f)
+        start_lsn = manifest["WAL-Ranges"][0]["Start-LSN"]
+        end_lsn = manifest["WAL-Ranges"][0]["End-LSN"]
+
+    node_name = "import_from_vanilla"
+    tenant = uuid4()
+    timeline = uuid4()
+
+    # Set up pageserver for import
+    neon_env_builder.enable_local_fs_remote_storage()
+    env = neon_env_builder.init_start()
+    env.pageserver.http_client().tenant_create(tenant)
+
+    def import_tar(base, wal):
+        env.neon_cli.raw_cli([
+            "timeline",
+            "import",
+            "--tenant-id",
+            tenant.hex,
+            "--timeline-id",
+            timeline.hex,
+            "--node-name",
+            node_name,
+            "--base-lsn",
+            start_lsn,
+            "--base-tarfile",
+            base,
+            "--end-lsn",
+            end_lsn,
+            "--wal-tarfile",
+            wal,
+        ])
+
+    # Importing corrupt backup fails
+    with pytest.raises(Exception):
+        import_tar(corrupt_base_tar, wal_tar)
+
+    # Clean up
+    # TODO it should clean itself
+    client = env.pageserver.http_client()
+    client.timeline_detach(tenant, timeline)
+
+    # Importing correct backup works
+    import_tar(base_tar, wal_tar)
+
+    # Wait for data to land in s3
+    wait_for_last_record_lsn(client, tenant, timeline, lsn_from_hex(end_lsn))
+    wait_for_upload(client, tenant, timeline, lsn_from_hex(end_lsn))
+
+    # Check it worked
+    pg = env.postgres.create_start(node_name, tenant_id=tenant)
+    assert pg.safe_psql('select count(*) from t') == [(300000, )]
+
+
+@pytest.mark.timeout(600)
+def test_import_from_pageserver(test_output_dir, pg_bin, vanilla_pg, neon_env_builder):
+
+    num_rows = 3000
+    neon_env_builder.num_safekeepers = 1
+    neon_env_builder.enable_local_fs_remote_storage()
+    env = neon_env_builder.init_start()
+
+    env.neon_cli.create_branch('test_import_from_pageserver')
+    pgmain = env.postgres.create_start('test_import_from_pageserver')
+    log.info("postgres is running on 'test_import_from_pageserver' branch")
+
+    timeline = pgmain.safe_psql("SHOW neon.timeline_id")[0][0]
+
+    with closing(pgmain.connect()) as conn:
+        with conn.cursor() as cur:
+            # data loading may take a while, so increase statement timeout
+            cur.execute("SET statement_timeout='300s'")
+            cur.execute(f'''CREATE TABLE tbl AS SELECT 'long string to consume some space' || g
+                        from generate_series(1,{num_rows}) g''')
+            cur.execute("CHECKPOINT")
+
+            cur.execute('SELECT pg_current_wal_insert_lsn()')
+            lsn = cur.fetchone()[0]
+            log.info(f"start_backup_lsn = {lsn}")
+
+    # Set LD_LIBRARY_PATH in the env properly, otherwise we may use the wrong libpq.
+    # PgBin sets it automatically, but here we need to pipe psql output to the tar command.
+    psql_env = {'LD_LIBRARY_PATH': os.path.join(str(pg_distrib_dir), 'lib')}
+
+    # Get a fullbackup from pageserver
+    query = f"fullbackup { env.initial_tenant.hex} {timeline} {lsn}"
+    cmd = ["psql", "--no-psqlrc", env.pageserver.connstr(), "-c", query]
+    result_basepath = pg_bin.run_capture(cmd, env=psql_env)
+    tar_output_file = result_basepath + ".stdout"
+
+    # Stop the first pageserver instance, erase all its data
+    env.postgres.stop_all()
+    env.pageserver.stop()
+
+    dir_to_clear = Path(env.repo_dir) / 'tenants'
+    shutil.rmtree(dir_to_clear)
+    os.mkdir(dir_to_clear)
+
+    #start the pageserver again
+    env.pageserver.start()
+
+    # Import using another tenantid, because we use the same pageserver.
+    # TODO Create another pageserver to maeke test more realistic.
+    tenant = uuid4()
+
+    # Import to pageserver
+    node_name = "import_from_pageserver"
+    client = env.pageserver.http_client()
+    client.tenant_create(tenant)
+    env.neon_cli.raw_cli([
+        "timeline",
+        "import",
+        "--tenant-id",
+        tenant.hex,
+        "--timeline-id",
+        timeline,
+        "--node-name",
+        node_name,
+        "--base-lsn",
+        lsn,
+        "--base-tarfile",
+        os.path.join(tar_output_file),
+    ])
+
+    # Wait for data to land in s3
+    wait_for_last_record_lsn(client, tenant, UUID(timeline), lsn_from_hex(lsn))
+    wait_for_upload(client, tenant, UUID(timeline), lsn_from_hex(lsn))
+
+    # Check it worked
+    pg = env.postgres.create_start(node_name, tenant_id=tenant)
+    assert pg.safe_psql('select count(*) from tbl') == [(num_rows, )]
+
+    # Take another fullbackup
+    query = f"fullbackup { tenant.hex} {timeline} {lsn}"
+    cmd = ["psql", "--no-psqlrc", env.pageserver.connstr(), "-c", query]
+    result_basepath = pg_bin.run_capture(cmd, env=psql_env)
+    new_tar_output_file = result_basepath + ".stdout"
+
+    # Check it's the same as the first fullbackup
+    # TODO pageserver should be checking checksum
+    assert os.path.getsize(tar_output_file) == os.path.getsize(new_tar_output_file)
+
+    # Check that gc works
+    psconn = env.pageserver.connect()
+    pscur = psconn.cursor()
+    pscur.execute(f"do_gc {tenant.hex} {timeline} 0")
--- a/test_runner/batch_others/test_normal_work.py
+++ b/test_runner/batch_others/test_normal_work.py
@@ -1,5 +1,6 @@
 from fixtures.log_helper import log
 from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, NeonPageserverHttpClient
+import pytest


 def check_tenant(env: NeonEnv, pageserver_http: NeonPageserverHttpClient):
@@ -26,7 +27,8 @@ def check_tenant(env: NeonEnv, pageserver_http: NeonPageserverHttpClient):
    pageserver_http.timeline_detach(tenant_id, timeline_id)


-def test_normal_work(neon_env_builder: NeonEnvBuilder):
+@pytest.mark.parametrize('num_timelines,num_safekeepers', [(3, 1)])
+def test_normal_work(neon_env_builder: NeonEnvBuilder, num_timelines: int, num_safekeepers: int):
    """
    Basic test:
    * create new tenant with a timeline
@@ -40,8 +42,9 @@ def test_normal_work(neon_env_builder: NeonEnvBuilder):
    Repeat check for several tenants/timelines.
    """

+    neon_env_builder.num_safekeepers = num_safekeepers
    env = neon_env_builder.init_start()
    pageserver_http = env.pageserver.http_client()

-    for _ in range(3):
+    for _ in range(num_timelines):
        check_tenant(env, pageserver_http)
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Anastasia Lubennikova	263a3ea5e3	Add script export_import_betwen_pageservers.py to migrate projects between pageservers	2022-07-05 15:27:31 +03:00
Heikki Linnakangas	bb69e0920c	Do not overwrite an existing image layer. See github issues #1594 and #1690 Co-authored-by: Konstantin Knizhnik <knizhnik@neon.tech>	2022-07-05 14:45:31 +03:00
Alexander Bayandin	05f6a1394d	Add tests for different Postgres client libraries (#2008 ) * Add tests for different postgres clients * test/fixtures: sanitize test name for test_output_dir * test/fixtures: do not look for etcd before runtime * Add workflow for testing Postgres client libraries	2022-07-05 12:22:58 +01:00
Heikki Linnakangas	844832ffe4	Bump vendor/postgres Contains changes from two PRs in vendor/postgres: - https://github.com/neondatabase/postgres/pull/163 - https://github.com/neondatabase/postgres/pull/176	2022-07-05 10:55:03 +03:00
bojanserafimov	d29c545b5d	Gc/compaction thread pool, take 2 (#1933 ) Decrease the number of pageserver threads by running gc and compaction in a blocking tokio thread pool	2022-07-05 02:06:40 -04:00
Kirill Bulatov	6abdb12724	Fix 1.62 Clippy errors	2022-07-04 23:46:37 +03:00
Alexander Bayandin	7898e72990	Remove duplicated checks from LocalEnv	2022-07-04 22:35:00 +03:00
Dmitry Rodionov	65704708fa	remove unused imports, make more use of pathlib.Path	2022-07-01 18:56:51 +03:00
Arseny Sher	6100a02d0f	Prefix WAL files in s3 with environment name. It wasn't merged to prod yet, so safe to enable.	2022-07-01 19:21:28 +04:00
Arseny Sher	97fed38213	Fix `cadaca010c` for older ssh clients.	2022-07-01 19:20:59 +04:00
Arseny Sher	cadaca010c	Make ansible to work with storage nodes through teleport from local box.	2022-07-01 16:58:34 +03:00
Bojan Serafimov	f09c09438a	Fix gc after import	2022-07-01 11:10:49 +03:00
Dmitry Rodionov	00fc696606	replace extra urlencode dependency with already present url library	2022-06-30 14:32:15 +03:00
Kirill Bulatov	1d0706cf25	Fix walreceiver connection selection mechanism * Avoid reconnecting to safekeeper immediately after its failure by limiting candidates to those with fewest connection attempts. Thus we don't have to wait lagging_wal_timeout (10s by default) before switch happens even if no new changes are generated, and current test_restarts_under_load expects some commits to happen within 4s. * Make default max_lsn_wal_lag larger, otherwise we constant reconnections happen during normal work. * Fix wal_connection_attempts maintanance, preventing busy loop of reconnections.	2022-06-30 00:40:12 +03:00
Dmitry Ivanov	5ee19b0758	Fix bloated coverage uploads (#2005 ) Move coverage data to a better directory, merge it better and don't publish it from CircleCI pipeline	2022-06-29 17:59:19 +03:00
Kirill Bulatov	cef90d9220	Disable cachepot for GH Actions builds (#2007 )	2022-06-29 17:56:02 +03:00
Kirill Bulatov	4a05413a4c	More code coverage fixes in GH Actions (#2002 )	2022-06-27 22:40:20 +03:00
Kirill Bulatov	dd61f3558f	Fix coverage upload credentials retrieval (#2001 )	2022-06-27 20:41:09 +03:00
Kirill Bulatov	8a714f1ebf	Add coverage to GH actions and rework part of them (#1987 )	2022-06-27 19:15:56 +03:00
Arseny Sher	137291dc24	Push to etcd from safekeeper many timelines concurrently. Mitigates latency fee, making push throughput 1-1.5 order of magnitude bigger. Also make leases per timeline, not per whole safekeeper, avoiding storing garbage in etcd for deleted timelines while safekeeper is alive.	2022-06-27 16:30:21 +03:00
Kirill Bulatov	eb8926083e	Use the updated base build Docker image (#1972 )	2022-06-27 13:12:58 +03:00
Johan Eliasson	26bca6ddba	Add `openssl` to OSX dependencies (#1994 )	2022-06-26 21:54:07 +03:00
Arthur Petukhovsky	55192384c3	Fix zero timeline_start_lsn (#1981 ) * Fix zero timeline_start_lsn * Log more info on control file upgrade * Fix formatting Co-authored-by: Anastasia Lubennikova <anastasia@neon.tech>	2022-06-24 13:59:37 +03:00
KlimentSerafimov	392cd8b1fc	Refactored extracting project_name in console.rs. (#1982 )	2022-06-24 05:57:33 -04:00
Alexey Kondratov	3cc531d093	Fix CREATE EXTENSION for non-db-owner users (#1408 ) Previously, we were granting create only to db owner, but now we have a dedicated 'web_access' role to connect via web UI and proxy link auth. We anyway grant read / write all data to all roles, so let's grant create to everyone too. This creates some provelege objects in each db, which we need to drop before deleting the role. So now we reassign all owned objects to each db owner before deletion. This also fixes deletion of roles that created some data in any db previously. Will be tested by https://github.com/neondatabase/cloud/pull/1673 Later we should stop messing with Postgres ACL that much.	2022-06-23 21:36:53 +02:00
bojanserafimov	84b9fcbbd5	Increase a few test timeouts (#1977 )	2022-06-23 11:51:56 -04:00
Bojan Serafimov	93e050afe3	Don't require project name for link auth	2022-06-23 15:38:05 +03:00
Anastasia Lubennikova	6d7dc384a5	Add zenith-us-stage-ps-3 to deploy	2022-06-23 14:52:32 +03:00
Anastasia Lubennikova	3c2b03cd87	Update timeline size on dropdb. Add the test (#1973 ) In addition, fix database size calculation: count not only main fork of the relation, but also vm and fsm.	2022-06-23 12:28:12 +03:00
Kirill Bulatov	7c49abe7d1	Rework etcd timeline updates and their handling	2022-06-23 09:11:27 +03:00
KlimentSerafimov	d059e588a6	Added invariant check for project name. (#1921 ) Summary: Added invariant checking for project name. Refactored ClientCredentials and TlsConfig. * Added formatting invariant check for project name: \forall c \in project_name . c \in [alnum] U {'-'}. sni_data == <project_name>.<common_name> * Added exhaustive tests for get_project_name. * Refactored TlsConfig to contain common_name : Option<String>. * Refactored ClientCredentials construction to construct project_name directly. * Merged ProjectNameError into ClientCredsParseError. * Tweaked proxy tests to accommodate refactored ClientCredentials construction semantics. * [Pytests] Added project option argument to test_proxy_select_1. * Removed project param from Api since now it's contained in creds. * Refactored &Option<String> -> Option<&str>. Co-authored-by: Dmitrii Ivanov <dima@neon.tech>.	2022-06-22 09:34:24 -04:00
Sergey Melnikov	6222a0012b	Migrate from CircleCI to Github Actions: python codestyle, build and tests (#1647 ) Duplicate postgres and neon build and test jobs from CircleCI to Github actions.	2022-06-22 11:40:59 +03:00
bojanserafimov	1ca28e6f3c	Import basebackup into pageserver (#1925 ) Allow importing basebackup taken from vanilla postgres or another pageserver via psql copy in protocol.	2022-06-21 11:04:10 -04:00
Arthur Petukhovsky	6c4d6a2183	Remove timeline_start_lsn check temporary. (#1964 )	2022-06-21 02:02:24 +03:00
Thang Pham	37465dafe3	Add wal backpressure tests (#1919 ) Resolves #1889. This PR adds new tests to measure the WAL backpressure's performance under different workloads. ## Changes - add new performance tests in `test_wal_backpressure.py` - allow safekeeper's fsync to be configurable when running tests	2022-06-20 11:40:55 -04:00
Joshua D. Drake	ec0064c442	Small README.md changes (#1957 ) * Update make instructions for release and debug build. Update PostgreSQL glossary to proper version (14) * Continued cleanup of build instructions including removal of redundancies	2022-06-20 10:05:10 -04:00
Heikki Linnakangas	83c7e6ce52	Bump vendor/postgres. This brings in the change to not use a shared memory in the WAL redo process, to avoid running out of sysv shmem segments in the page server. Also, removal of callmemaybe bits.	2022-06-20 15:28:43 +03:00
Arthur Petukhovsky	f862373ac0	Fix WAL timeout in test_s3_wal_replay (#1953 )	2022-06-17 20:43:54 +03:00
Arthur Petukhovsky	699f46cd84	Download WAL from S3 if it's not available in safekeeper dir (#1932 ) `send_wal.rs` and `WalReader` are now async. `test_s3_wal_replay` checks that WAL can be replayed after offloaded.	2022-06-17 15:33:39 +03:00
Anastasia Lubennikova	36ee182d26	Implement page servise 'fullbackup' endpoint (#1923 ) * Implement page servise 'fullbackup' endpoint that works like basebackup, but also sends relational files * Add test_runner/batch_others/test_fullbackup.py Co-authored-by: bojanserafimov <bojan.serafimov7@gmail.com>	2022-06-16 14:07:11 +03:00
Anastasia Lubennikova	d11c9f9fcb	Use random ports for the proxy and local pg in tests Fixes #1931 Author: Dmitry Ivanov	2022-06-15 20:21:58 +03:00
Kirill Bulatov	d8a37452c8	Rename ZenithFeedback (#1912 )	2022-06-11 00:44:05 +03:00
chaitanya sharma	e1336f451d	renamed .zenith data-dir to .neon.	2022-06-09 18:19:18 +02:00
Arseny Sher	a4d8261390	Save Postgres log in test_find_end_of_wal_* tests.	2022-06-09 19:16:43 +04:00
Egor Suvorov	e2a5a31595	Safekeeper HTTP router: add comment about /v1/timeline	2022-06-09 17:14:46 +02:00
Egor Suvorov	0ac0fba77a	test_runner: test Safekeeper HTTP API Auth All endpoints except for POST /v1/timeline are tested, this one is not tested in any way yet. Three attempts for each endpoint: correctly authenticated, badly authenticated, unauthenticated.	2022-06-09 17:14:46 +02:00
Egor Suvorov	a001052cdd	test_runner: SafekeeperHttpClient: support auth	2022-06-09 17:14:46 +02:00
Egor Suvorov	1f1d852204	ZenithEnvBuilder: rename pageserver_auth_enabled --> auth_enabled	2022-06-09 17:14:46 +02:00
Egor Suvorov	f7b878611a	Implement JWT authentication in Safekeeper HTTP API (#1753 ) * `control_plane` crate (used by `neon_local`) now parses an `auth_enabled` bool for each Safekeeper * If auth is enabled, a Safekeeper is passed a path to a public key via a new command line argument * Added TODO comments to other places needing auth	2022-06-09 17:14:46 +02:00
Arseny Sher	a51b2dac9a	Don't s3 offload from newly joined safekeeper not having required WAL. I made the check at launcher level with the perspective of generally moving election (decision who offloads) there. Also log timeline 'active' changes.	2022-06-09 18:30:16 +04:00
Thang Pham	e22d9cee3a	fix `ZeroDivisionError` in `scripts/generate_perf_report_page` (#1906 ) Fixes the `ZeroDivisionError` error by adding `EPS=1e-6` when doing the calculation.	2022-06-08 09:15:12 -04:00
Arthur Petukhovsky	a01999bc4a	Replace most common remote logs with metrics (#1909 )	2022-06-08 13:36:49 +03:00
chaitanya sharma	32e64afd54	Use better parallel build instructions in readme.md (#1908 )	2022-06-08 11:25:37 +03:00
Kirill Bulatov	8a53472e4f	Force etcd broker keys to not to intersect	2022-06-08 11:21:05 +03:00
Dmitry Rodionov	6e26588d17	Allow to customize shutdown condition in PostgresBackend Use it in PageServerHandler to check per thread shutdown condition from thread_mgr which takes into account tenants and timelines	2022-06-07 22:11:54 +03:00
Arseny Sher	0b93253b3c	Fix leaked keepalive task in s3 offloading leader election. I still don't like the surroundings and feel we'd better get away without using election API at all, but this is a quick fix to keep CI green. ref #1815	2022-06-07 15:17:57 +04:00
Dmitry Rodionov	7dc6beacbd	make it possible to associate thread with a tenant after thread start	2022-06-07 12:59:35 +03:00
Thang Pham	6cfebc096f	Add read/write throughput performance tests (#1883 ) Part of #1467 This PR adds several performance tests that compare the [PG statistics](https://www.postgresql.org/docs/current/monitoring-stats.html) obtained when running PG benchmarks against Neon and vanilla PG to measure the read/write throughput of the DB.	2022-06-06 12:32:10 -04:00
KlimentSerafimov	fecad1ca34	Resolving issue #1745 . Added cluster option for SNI data (#1813 ) * Added project option in case SNI data is missing. Resolving issue #1745. * Added invariant checking for project name: if both sni_data and project_name are available then they should match.	2022-06-06 08:14:41 -04:00
bojanserafimov	92de8423af	Remove dead code (#1886 )	2022-06-05 09:18:11 -04:00
Dmitry Rodionov	e442f5357b	unify two identical failpoints in flush_frozen_layer probably is a merge artfact	2022-06-03 19:36:09 +03:00
Arseny Sher	5a723d44cd	Parametrize test_normal_work. I like to run small test locally, but let's avoid duplication.	2022-06-03 20:32:53 +04:00
Kirill Bulatov	2623193876	Remove pageserver_connstr from WAL stream logic	2022-06-03 17:30:36 +03:00
Arseny Sher	70a53c4b03	Get backup test_safekeeper_normal_work, but skip by default. It is handy for development.	2022-06-03 16:12:14 +04:00
Arseny Sher	9e108102b3	Silence etcd safekeeper info key parse errors. When we subscribe to everything, it is ok to receive not only safekeeper timeline updates.	2022-06-03 16:12:14 +04:00
huming	9c846a93e8	chore(doc)	2022-06-03 14:24:27 +03:00