Try larger sleep

Wait for pid death
Add hacky solution
2026-05-13 03:00:37 +00:00 · 2022-08-12 09:52:40 -04:00 · 2022-08-12 09:21:44 -04:00 · 2022-08-12 09:05:51 -04:00 · 2022-08-12 09:01:17 -04:00 · 2022-08-12 19:13:42 +07:00
153 changed files with 8163 additions and 5548 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -1,369 +0,0 @@
-version: 2.1
-
-executors:
-  neon-xlarge-executor:
-    resource_class: xlarge
-    docker:
-      # NB: when changed, do not forget to update rust image tag in all Dockerfiles
-      - image: neondatabase/rust:1.58
-  neon-executor:
-    docker:
-      - image: neondatabase/rust:1.58
-
-jobs:
-  # A job to build postgres
-  build-postgres:
-    executor: neon-xlarge-executor
-    parameters:
-      build_type:
-        type: enum
-        enum: ["debug", "release"]
-    environment:
-      BUILD_TYPE: << parameters.build_type >>
-    steps:
-        # Checkout the git repo (circleci doesn't have a flag to enable submodules here)
-      - checkout
-
-        # Grab the postgres git revision to build a cache key.
-        # Append makefile as it could change the way postgres is built.
-        # Note this works even though the submodule hasn't been checkout out yet.
-      - run:
-          name: Get postgres cache key
-          command: |
-              git rev-parse HEAD:vendor/postgres > /tmp/cache-key-postgres
-              cat Makefile >> /tmp/cache-key-postgres
-
-      - restore_cache:
-          name: Restore postgres cache
-          keys:
-            # Restore ONLY if the rev key matches exactly
-            - v05-postgres-cache-<< parameters.build_type >>-{{ checksum "/tmp/cache-key-postgres" }}
-
-        # Build postgres if the restore_cache didn't find a build.
-        # `make` can't figure out whether the cache is valid, since
-        # it only compares file timestamps.
-      - run:
-          name: build postgres
-          command: |
-            if [ ! -e tmp_install/bin/postgres ]; then
-              # "depth 1" saves some time by not cloning the whole repo
-              git submodule update --init --depth 1
-              # bail out on any warnings
-              COPT='-Werror' mold -run make postgres -j$(nproc)
-            fi
-
-      - save_cache:
-          name: Save postgres cache
-          key: v05-postgres-cache-<< parameters.build_type >>-{{ checksum "/tmp/cache-key-postgres" }}
-          paths:
-            - tmp_install
-
-  # A job to build Neon rust code
-  build-neon:
-    executor: neon-xlarge-executor
-    parameters:
-      build_type:
-        type: enum
-        enum: ["debug", "release"]
-    environment:
-      BUILD_TYPE: << parameters.build_type >>
-    steps:
-        # Checkout the git repo (without submodules)
-      - checkout
-
-        # Grab the postgres git revision to build a cache key.
-        # Append makefile as it could change the way postgres is built.
-        # Note this works even though the submodule hasn't been checkout out yet.
-      - run:
-          name: Get postgres cache key
-          command: |
-            git rev-parse HEAD:vendor/postgres > /tmp/cache-key-postgres
-            cat Makefile >> /tmp/cache-key-postgres
-
-
-      - restore_cache:
-          name: Restore postgres cache
-          keys:
-            # Restore ONLY if the rev key matches exactly
-            - v05-postgres-cache-<< parameters.build_type >>-{{ checksum "/tmp/cache-key-postgres" }}
-
-      - restore_cache:
-          name: Restore rust cache
-          keys:
-            # Require an exact match. While an out of date cache might speed up the build,
-            # there's no way to clean out old packages, so the cache grows every time something
-            # changes.
-            - v05-rust-cache-deps-<< parameters.build_type >>-{{ checksum "Cargo.lock" }}
-
-        # Build the rust code, including test binaries
-      - run:
-          name: Rust build << parameters.build_type >>
-          command: |
-            if [[ $BUILD_TYPE == "debug" ]]; then
-              CARGO_FLAGS=
-            elif [[ $BUILD_TYPE == "release" ]]; then
-              CARGO_FLAGS="--release --features profiling"
-            fi
-
-            export CARGO_INCREMENTAL=0
-            export CACHEPOT_BUCKET=zenith-rust-cachepot
-            export RUSTC_WRAPPER=""
-            export AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}"
-            export AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}"
-            mold -run cargo build $CARGO_FLAGS --features failpoints --bins --tests
-            cachepot -s
-
-      - save_cache:
-          name: Save rust cache
-          key: v05-rust-cache-deps-<< parameters.build_type >>-{{ checksum "Cargo.lock" }}
-          paths:
-            - ~/.cargo/registry
-            - ~/.cargo/git
-            - target
-
-        # Run rust unit tests
-      - run:
-          name: cargo test
-          command: |
-            if [[ $BUILD_TYPE == "debug" ]]; then
-              CARGO_FLAGS=
-            elif [[ $BUILD_TYPE == "release" ]]; then
-              CARGO_FLAGS=--release
-            fi
-
-            cargo test $CARGO_FLAGS
-
-        # Install the rust binaries, for use by test jobs
-      - run:
-          name: Install rust binaries
-          command: |
-            binaries=$(
-              cargo metadata --format-version=1 --no-deps |
-              jq -r '.packages[].targets[] | select(.kind | index("bin")) | .name'
-            )
-
-            mkdir -p /tmp/zenith/bin
-            mkdir -p /tmp/zenith/test_bin
-            mkdir -p /tmp/zenith/etc
-
-            # Install target binaries
-            for bin in $binaries; do
-              SRC=target/$BUILD_TYPE/$bin
-              DST=/tmp/zenith/bin/$bin
-              cp $SRC $DST
-            done
-
-        # Install the postgres binaries, for use by test jobs
-      - run:
-          name: Install postgres binaries
-          command: |
-            cp -a tmp_install /tmp/zenith/pg_install
-
-      # Save rust binaries for other jobs in the workflow
-      - persist_to_workspace:
-          root: /tmp/zenith
-          paths:
-            - "*"
-
-  check-codestyle-python:
-    executor: neon-executor
-    steps:
-      - checkout
-      - restore_cache:
-          keys:
-            - v2-python-deps-{{ checksum "poetry.lock" }}
-      - run:
-          name: Install deps
-          command: ./scripts/pysync
-      - save_cache:
-          key: v2-python-deps-{{ checksum "poetry.lock" }}
-          paths:
-            - /home/circleci/.cache/pypoetry/virtualenvs
-      - run:
-          name: Print versions
-          when: always
-          command: |
-              poetry run python --version
-              poetry show
-      - run:
-          name: Run yapf to ensure code format
-          when: always
-          command: poetry run yapf --recursive --diff .
-      - run:
-          name: Run mypy to check types
-          when: always
-          command: poetry run mypy .
-
-  run-pytest:
-    executor: neon-executor
-    parameters:
-      # pytest args to specify the tests to run.
-      #
-      # This can be a test file name, e.g. 'test_pgbench.py, or a subdirectory,
-      # or '-k foobar' to run tests containing string 'foobar'. See pytest man page
-      # section SPECIFYING TESTS / SELECTING TESTS for details.
-      #
-      # Select the type of Rust build. Must be "release" or "debug".
-      build_type:
-        type: string
-        default: "debug"
-      # This parameter is required, to prevent the mistake of running all tests in one job.
-      test_selection:
-        type: string
-        default: ""
-      # Arbitrary parameters to pytest. For example "-s" to prevent capturing stdout/stderr
-      extra_params:
-        type: string
-        default: ""
-      needs_postgres_source:
-        type: boolean
-        default: false
-      run_in_parallel:
-        type: boolean
-        default: true
-      save_perf_report:
-        type: boolean
-        default: false
-    environment:
-      BUILD_TYPE: << parameters.build_type >>
-    steps:
-      - attach_workspace:
-          at: /tmp/zenith
-      - checkout
-      - when:
-          condition: << parameters.needs_postgres_source >>
-          steps:
-            - run: git submodule update --init --depth 1
-      - restore_cache:
-          keys:
-            - v2-python-deps-{{ checksum "poetry.lock" }}
-      - run:
-          name: Install deps
-          command: ./scripts/pysync
-      - save_cache:
-          key: v2-python-deps-{{ checksum "poetry.lock" }}
-          paths:
-            - /home/circleci/.cache/pypoetry/virtualenvs
-      - run:
-          name: Run pytest
-          # pytest doesn't output test logs in real time, so CI job may fail with
-          # `Too long with no output` error, if a test is running for a long time.
-          # In that case, tests should have internal timeouts that are less than
-          # no_output_timeout, specified here.
-          no_output_timeout: 10m
-          environment:
-            - NEON_BIN: /tmp/zenith/bin
-            - POSTGRES_DISTRIB_DIR: /tmp/zenith/pg_install
-            - TEST_OUTPUT: /tmp/test_output
-            # this variable will be embedded in perf test report
-            # and is needed to distinguish different environments
-            - PLATFORM: zenith-local-ci
-          command: |
-            PERF_REPORT_DIR="$(realpath test_runner/perf-report-local)"
-            rm -rf $PERF_REPORT_DIR
-
-            TEST_SELECTION="test_runner/<< parameters.test_selection >>"
-            EXTRA_PARAMS="<< parameters.extra_params >>"
-            if [ -z "$TEST_SELECTION" ]; then
-              echo "test_selection must be set"
-              exit 1
-            fi
-            if << parameters.run_in_parallel >>; then
-              EXTRA_PARAMS="-n4 $EXTRA_PARAMS"
-            fi
-            if << parameters.save_perf_report >>; then
-              if [[ $CIRCLE_BRANCH == "main" ]]; then
-                mkdir -p "$PERF_REPORT_DIR"
-                EXTRA_PARAMS="--out-dir $PERF_REPORT_DIR $EXTRA_PARAMS"
-              fi
-            fi
-
-            export GITHUB_SHA=$CIRCLE_SHA1
-
-            # Run the tests.
-            #
-            # The junit.xml file allows CircleCI to display more fine-grained test information
-            # in its "Tests" tab in the results page.
-            # --verbose prints name of each test (helpful when there are
-            # multiple tests in one file)
-            # -rA prints summary in the end
-            # -n4 uses four processes to run tests via pytest-xdist
-            # -s is not used to prevent pytest from capturing output, because tests are running
-            # in parallel and logs are mixed between different tests
-            ./scripts/pytest \
-              --junitxml=$TEST_OUTPUT/junit.xml \
-              --tb=short \
-              --verbose \
-              -m "not remote_cluster" \
-              -rA $TEST_SELECTION $EXTRA_PARAMS
-
-            if << parameters.save_perf_report >>; then
-              if [[ $CIRCLE_BRANCH == "main" ]]; then
-                export REPORT_FROM="$PERF_REPORT_DIR"
-                export REPORT_TO=local
-                scripts/generate_and_push_perf_report.sh
-              fi
-            fi
-      - run:
-          # CircleCI artifacts are preserved one file at a time, so skipping
-          # this step isn't a good idea. If you want to extract the
-          # pageserver state, perhaps a tarball would be a better idea.
-          name: Delete all data but logs
-          when: always
-          command: |
-            du -sh /tmp/test_output/*
-            find /tmp/test_output -type f ! -name "*.log" ! -name "regression.diffs" ! -name "junit.xml" ! -name "*.filediff" ! -name "*.stdout" ! -name "*.stderr" ! -name "flamegraph.svg" ! -name "*.metrics" -delete
-            du -sh /tmp/test_output/*
-      - store_artifacts:
-          path: /tmp/test_output
-      # The store_test_results step tells CircleCI where to find the junit.xml file.
-      - store_test_results:
-          path: /tmp/test_output
-      # Save data (if any)
-      - persist_to_workspace:
-          root: /tmp/zenith
-          paths:
-            - "*"
-
-workflows:
-  build_and_test:
-    jobs:
-      - check-codestyle-python
-      - build-postgres:
-          name: build-postgres-<< matrix.build_type >>
-          matrix:
-            parameters:
-              build_type: ["debug", "release"]
-      - build-neon:
-          name: build-neon-<< matrix.build_type >>
-          matrix:
-            parameters:
-              build_type: ["debug", "release"]
-          requires:
-            - build-postgres-<< matrix.build_type >>
-      - run-pytest:
-          name: pg_regress-tests-<< matrix.build_type >>
-          matrix:
-            parameters:
-              build_type: ["debug", "release"]
-          test_selection: batch_pg_regress
-          needs_postgres_source: true
-          requires:
-            - build-neon-<< matrix.build_type >>
-      - run-pytest:
-          name: other-tests-<< matrix.build_type >>
-          matrix:
-            parameters:
-              build_type: ["debug", "release"]
-          test_selection: batch_others
-          requires:
-            - build-neon-<< matrix.build_type >>
-      - run-pytest:
-          name: benchmarks
-          context: PERF_TEST_RESULT_CONNSTR
-          build_type: release
-          test_selection: performance
-          run_in_parallel: false
-          save_perf_report: true
-          requires:
-            - build-neon-release
--- a/.github/actions/download/action.yml
+++ b/.github/actions/download/action.yml
@@ -0,0 +1,56 @@
+name: "Download an artifact"
+description: "Custom download action"
+inputs:
+  name:
+    description: "Artifact name"
+    required: true
+  path:
+    description: "A directory to put artifact into"
+    default: "."
+    required: false
+  skip-if-does-not-exist:
+    description: "Allow to skip if file doesn't exist, fail otherwise"
+    default: false
+    required: false
+
+runs:
+  using: "composite"
+  steps:
+    - name: Download artifact
+      id: download-artifact
+      shell: bash -euxo pipefail {0}
+      env:
+        TARGET: ${{ inputs.path }}
+        ARCHIVE: /tmp/downloads/${{ inputs.name }}.tar.zst
+        SKIP_IF_DOES_NOT_EXIST: ${{ inputs.skip-if-does-not-exist }}
+      run: |
+        BUCKET=neon-github-public-dev
+        PREFIX=artifacts/${GITHUB_RUN_ID}
+        FILENAME=$(basename $ARCHIVE)
+
+        S3_KEY=$(aws s3api list-objects-v2 --bucket ${BUCKET} --prefix ${PREFIX} | jq -r '.Contents[].Key' | grep ${FILENAME} | sort --version-sort | tail -1 || true)
+        if [ -z "${S3_KEY}" ]; then
+          if [ "${SKIP_IF_DOES_NOT_EXIST}" = "true" ]; then
+            echo '::set-output name=SKIPPED::true'
+            exit 0
+          else
+            echo 2>&1 "Neither s3://${BUCKET}/${PREFIX}/${GITHUB_RUN_ATTEMPT}/${FILENAME} nor its version from previous attempts exist"
+            exit 1
+          fi
+        fi
+
+        echo '::set-output name=SKIPPED::false'
+
+        mkdir -p $(dirname $ARCHIVE)
+        time aws s3 cp --only-show-errors s3://${BUCKET}/${S3_KEY} ${ARCHIVE}
+
+    - name: Extract artifact
+      if: ${{ steps.download-artifact.outputs.SKIPPED == 'false' }}
+      shell: bash -euxo pipefail {0}
+      env:
+        TARGET: ${{ inputs.path }}
+        ARCHIVE: /tmp/downloads/${{ inputs.name }}.tar.zst
+      run: |
+        mkdir -p ${TARGET}
+        time tar -xf ${ARCHIVE} -C ${TARGET}
+        rm -f ${ARCHIVE}
--- a/.github/actions/run-python-test-set/action.yml
+++ b/.github/actions/run-python-test-set/action.yml
@@ -27,22 +27,35 @@ inputs:
    description: 'Whether to upload the performance report'
    required: false
    default: 'false'
+  run_with_real_s3:
+    description: 'Whether to pass real s3 credentials to the test suite'
+    required: false
+    default: 'false'
+  real_s3_bucket:
+    description: 'Bucket name for real s3 tests'
+    required: false
+    default: ''
+  real_s3_region:
+    description: 'Region name for real s3 tests'
+    required: false
+    default: ''
+  real_s3_access_key_id:
+    description: 'Access key id'
+    required: false
+    default: ''
+  real_s3_secret_access_key:
+    description: 'Secret access key'
+    required: false
+    default: ''

 runs:
  using: "composite"
  steps:
-    - name: Get Neon artifact for restoration
-      uses: actions/download-artifact@v3
+    - name: Get Neon artifact
+      uses: ./.github/actions/download
      with:
        name: neon-${{ runner.os }}-${{ inputs.build_type }}-${{ inputs.rust_toolchain }}-artifact
-        path: ./neon-artifact/
-
-    - name: Extract Neon artifact
-      shell: bash -ex {0}
-      run: |
-        mkdir -p /tmp/neon/
-        tar -xf ./neon-artifact/neon.tar.zst -C /tmp/neon/
-        rm -rf ./neon-artifact/
+        path: /tmp/neon

    - name: Checkout
      if: inputs.needs_postgres_source == 'true'
@@ -59,7 +72,7 @@ runs:
        key: v1-${{ runner.os }}-python-deps-${{ hashFiles('poetry.lock') }}

    - name: Install Python deps
-      shell: bash -ex {0}
+      shell: bash -euxo pipefail {0}
      run: ./scripts/pysync

    - name: Run pytest
@@ -70,7 +83,10 @@ runs:
        # this variable will be embedded in perf test report
        # and is needed to distinguish different environments
        PLATFORM: github-actions-selfhosted
-      shell: bash -ex {0}
+        BUILD_TYPE: ${{ inputs.build_type }}
+        AWS_ACCESS_KEY_ID: ${{ inputs.real_s3_access_key_id }}
+        AWS_SECRET_ACCESS_KEY: ${{ inputs.real_s3_secret_access_key }}
+      shell: bash -euxo pipefail {0}
      run: |
        PERF_REPORT_DIR="$(realpath test_runner/perf-report-local)"
        rm -rf $PERF_REPORT_DIR
@@ -84,6 +100,14 @@ runs:
        if [[ "${{ inputs.run_in_parallel }}" == "true" ]]; then
          EXTRA_PARAMS="-n4 $EXTRA_PARAMS"
        fi
+
+        if [[ "${{ inputs.run_with_real_s3 }}" == "true" ]]; then
+          echo "REAL S3 ENABLED"
+          export ENABLE_REAL_S3_REMOTE_STORAGE=nonempty
+          export REMOTE_STORAGE_S3_BUCKET=${{ inputs.real_s3_bucket }}
+          export REMOTE_STORAGE_S3_REGION=${{ inputs.real_s3_region }}
+        fi
+
        if [[ "${{ inputs.save_perf_report }}" == "true" ]]; then
          if [[ "$GITHUB_REF" == "refs/heads/main" ]]; then
            mkdir -p "$PERF_REPORT_DIR"
@@ -99,7 +123,7 @@ runs:

        # Run the tests.
        #
-        # The junit.xml file allows CircleCI to display more fine-grained test information
+        # The junit.xml file allows CI tools to display more fine-grained test information
        # in its "Tests" tab in the results page.
        # --verbose prints name of each test (helpful when there are
        # multiple tests in one file)
@@ -123,7 +147,7 @@ runs:
        fi

    - name: Delete all data but logs
-      shell: bash -ex {0}
+      shell: bash -euxo pipefail {0}
      if: always()
      run: |
        du -sh /tmp/test_output/*
@@ -132,9 +156,7 @@ runs:

    - name: Upload python test logs
      if: always()
-      uses: actions/upload-artifact@v3
+      uses: ./.github/actions/upload
      with:
-        retention-days: 7
-        if-no-files-found: error
        name: python-test-${{ inputs.test_selection }}-${{ runner.os }}-${{ inputs.build_type }}-${{ inputs.rust_toolchain }}-logs
        path: /tmp/test_output/
--- a/.github/actions/save-coverage-data/action.yml
+++ b/.github/actions/save-coverage-data/action.yml
@@ -5,13 +5,18 @@ runs:
  using: "composite"
  steps:
    - name: Merge coverage data
-      shell: bash -ex {0}
+      shell: bash -euxo pipefail {0}
      run: scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage merge

-    - name: Upload coverage data
-      uses: actions/upload-artifact@v3
+    - name: Download previous coverage data into the same directory
+      uses: ./.github/actions/download
      with:
-        retention-days: 7
-        if-no-files-found: error
        name: coverage-data-artifact
-        path: /tmp/coverage/
+        path: /tmp/coverage
+        skip-if-does-not-exist: true # skip if there's no previous coverage to download
+
+    - name: Upload coverage data
+      uses: ./.github/actions/upload
+      with:
+        name: coverage-data-artifact
+        path: /tmp/coverage
--- a/.github/actions/upload/action.yml
+++ b/.github/actions/upload/action.yml
@@ -0,0 +1,51 @@
+name: "Upload an artifact"
+description: "Custom upload action"
+inputs:
+  name:
+    description: "Artifact name"
+    required: true
+  path:
+    description: "A directory or file to upload"
+    required: true
+
+runs:
+  using: "composite"
+  steps:
+    - name: Prepare artifact
+      shell: bash -euxo pipefail {0}
+      env:
+        SOURCE: ${{ inputs.path }}
+        ARCHIVE: /tmp/uploads/${{ inputs.name }}.tar.zst
+      run: |
+        mkdir -p $(dirname $ARCHIVE)
+
+        if [ -f ${ARCHIVE} ]; then
+          echo 2>&1 "File ${ARCHIVE} already exist. Something went wrong before"
+          exit 1
+        fi
+
+        ZSTD_NBTHREADS=0
+        if [ -d  ${SOURCE} ]; then
+          time tar -C ${SOURCE} -cf ${ARCHIVE} --zstd .
+        elif [ -f ${SOURCE} ]; then
+          time tar -cf ${ARCHIVE} --zstd ${SOURCE}
+        else
+          echo 2>&1 "${SOURCE} neither directory nor file, don't know how to handle it"
+        fi
+
+    - name: Upload artifact
+      shell: bash -euxo pipefail {0}
+      env:
+        SOURCE: ${{ inputs.path }}
+        ARCHIVE: /tmp/uploads/${{ inputs.name }}.tar.zst
+      run: |
+        BUCKET=neon-github-public-dev
+        PREFIX=artifacts/${GITHUB_RUN_ID}
+        FILENAME=$(basename $ARCHIVE)
+
+        FILESIZE=$(du -sh ${ARCHIVE} | cut -f1)
+
+        time aws s3 mv --only-show-errors ${ARCHIVE} s3://${BUCKET}/${PREFIX}/${GITHUB_RUN_ATTEMPT}/${FILENAME}
+
+        # Ref https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#adding-a-job-summary
+        echo "[${FILENAME}](https://${BUCKET}.s3.amazonaws.com/${PREFIX}/${GITHUB_RUN_ATTEMPT}/${FILENAME}) ${FILESIZE}" >> ${GITHUB_STEP_SUMMARY}
--- a/.github/ansible/production.hosts
+++ b/.github/ansible/production.hosts
@@ -17,4 +17,4 @@ env_name = prod-1
 console_mgmt_base_url = http://console-release.local
 bucket_name           = zenith-storage-oregon
 bucket_region         = us-west-2
-etcd_endpoints        = etcd-release.local:2379
+etcd_endpoints        = zenith-1-etcd.local:2379
--- a/.github/ansible/scripts/init_safekeeper.sh
+++ b/.github/ansible/scripts/init_safekeeper.sh
@@ -12,10 +12,9 @@ cat <<EOF | tee /tmp/payload
  "version": 1,
  "host": "${HOST}",
  "port": 6500,
+  "http_port": 7676,
  "region_id": {{ console_region_id }},
-  "instance_id": "${INSTANCE_ID}",
-  "http_host": "${HOST}",
-  "http_port": 7676
+  "instance_id": "${INSTANCE_ID}"
 }
 EOF

--- a/.github/ansible/staging.hosts
+++ b/.github/ansible/staging.hosts
@@ -17,4 +17,4 @@ env_name = us-stage
 console_mgmt_base_url = http://console-staging.local
 bucket_name           = zenith-staging-storage-us-east-1
 bucket_region         = us-east-1
-etcd_endpoints        = etcd-staging.local:2379
+etcd_endpoints        = zenith-us-stage-etcd.local:2379
--- a/.github/workflows/benchmarking.yml
+++ b/.github/workflows/benchmarking.yml
@@ -60,7 +60,7 @@ jobs:
    - name: Setup cluster
      env:
        BENCHMARK_CONNSTR: "${{ secrets.BENCHMARK_STAGING_CONNSTR }}"
-      shell: bash
+      shell: bash -euxo pipefail {0}
      run: |
        set -e

--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -3,13 +3,13 @@ name: Test and Deploy
 on:
  push:
    branches:
-    - main
-    - release
+      - main
+      - release
  pull_request:

 defaults:
  run:
-    shell: bash -ex {0}
+    shell: bash -euxo pipefail {0}

 concurrency:
  # Allow only one workflow per any non-`main` branch.
@@ -22,7 +22,8 @@ env:

 jobs:
  build-neon:
-    runs-on: [ self-hosted, Linux, k8s-runner ]
+    runs-on: dev
+    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rustlegacy:2746987948
    strategy:
      fail-fast: false
      matrix:
@@ -31,8 +32,19 @@ jobs:

    env:
      BUILD_TYPE: ${{ matrix.build_type }}
+      GIT_VERSION: ${{ github.sha }}

    steps:
+      - name: Fix git ownerwhip
+        run: |
+          # Workaround for `fatal: detected dubious ownership in repository at ...`
+          #
+          # Use both ${{ github.workspace }} and ${GITHUB_WORKSPACE} because they're different on host and in containers
+          #   Ref https://github.com/actions/checkout/issues/785
+          #
+          git config --global --add safe.directory ${{ github.workspace }}
+          git config --global --add safe.directory ${GITHUB_WORKSPACE}
+
      - name: Checkout
        uses: actions/checkout@v3
        with:
@@ -123,6 +135,7 @@ jobs:
            mkdir -p /tmp/coverage/

            mkdir -p /tmp/neon/test_bin/
+
            test_exe_paths=$(
              ${cov_prefix} cargo test $CARGO_FLAGS --message-format=json --no-run |
              jq -r '.executable | select(. != null)'
@@ -145,25 +158,20 @@ jobs:
      - name: Install postgres binaries
        run: cp -a tmp_install /tmp/neon/pg_install

-      - name: Prepare neon artifact
-        run: ZSTD_NBTHREADS=0 tar -C /tmp/neon/ -cf ./neon.tar.zst --zstd .
-
-      - name: Upload neon binaries
-        uses: actions/upload-artifact@v3
+      - name: Upload Neon artifact
+        uses: ./.github/actions/upload
        with:
-          retention-days: 7
-          if-no-files-found: error
          name: neon-${{ runner.os }}-${{ matrix.build_type }}-${{ matrix.rust_toolchain }}-artifact
-          path: ./neon.tar.zst
+          path: /tmp/neon

      # XXX: keep this after the binaries.list is formed, so the coverage can properly work later
      - name: Merge and upload coverage data
        if: matrix.build_type == 'debug'
        uses: ./.github/actions/save-coverage-data

-
  pg_regress-tests:
-    runs-on: [ self-hosted, Linux, k8s-runner ]
+    runs-on: dev
+    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rustlegacy:2746987948
    needs: [ build-neon ]
    strategy:
      fail-fast: false
@@ -190,7 +198,8 @@ jobs:
        uses: ./.github/actions/save-coverage-data

  other-tests:
-    runs-on: [ self-hosted, Linux, k8s-runner ]
+    runs-on: dev
+    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rustlegacy:2746987948
    needs: [ build-neon ]
    strategy:
      fail-fast: false
@@ -210,14 +219,20 @@ jobs:
          build_type: ${{ matrix.build_type }}
          rust_toolchain: ${{ matrix.rust_toolchain }}
          test_selection: batch_others
-
+          run_with_real_s3: true
+          real_s3_bucket: ci-tests-s3
+          real_s3_region: us-west-2
+          real_s3_access_key_id: "${{ secrets.AWS_ACCESS_KEY_ID_CI_TESTS_S3 }}"
+          real_s3_secret_access_key: "${{ secrets.AWS_SECRET_ACCESS_KEY_CI_TESTS_S3 }}"
      - name: Merge and upload coverage data
        if: matrix.build_type == 'debug'
        uses: ./.github/actions/save-coverage-data

  benchmarks:
-    runs-on: [ self-hosted, Linux, k8s-runner ]
+    runs-on: dev
+    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rustlegacy:2746987948
    needs: [ build-neon ]
+    if: github.ref_name == 'main' || contains(github.event.pull_request.labels.*.name, 'run-benchmarks')
    strategy:
      fail-fast: false
      matrix:
@@ -245,7 +260,8 @@ jobs:
      # while coverage is currently collected for the debug ones

  coverage-report:
-    runs-on: [ self-hosted, Linux, k8s-runner ]
+    runs-on: dev
+    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rustlegacy:2746987948
    needs: [ other-tests, pg_regress-tests ]
    strategy:
      fail-fast: false
@@ -270,23 +286,17 @@ jobs:
            target/
          key: v3-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-${{ hashFiles('Cargo.lock') }}

-      - name: Get Neon artifact for restoration
-        uses: actions/download-artifact@v3
+      - name: Get Neon artifact
+        uses: ./.github/actions/download
        with:
          name: neon-${{ runner.os }}-${{ matrix.build_type }}-${{ matrix.rust_toolchain }}-artifact
-          path: ./neon-artifact/
+          path: /tmp/neon

-      - name: Extract Neon artifact
-        run: |
-          mkdir -p /tmp/neon/
-          tar -xf ./neon-artifact/neon.tar.zst -C /tmp/neon/
-          rm -rf ./neon-artifact/
-
-      - name: Restore coverage data
-        uses: actions/download-artifact@v3
+      - name: Get coverage artifact
+        uses: ./.github/actions/download
        with:
          name: coverage-data-artifact
-          path: /tmp/coverage/
+          path: /tmp/coverage

      - name: Merge coverage data
        run: scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage merge
@@ -324,40 +334,40 @@ jobs:
            }"

  trigger-e2e-tests:
-   runs-on: [ self-hosted, Linux, k8s-runner ]
-   needs: [ build-neon ]
-   steps:
-     - name: Set PR's status to pending and request a remote CI test
-       run: |
-         COMMIT_SHA=${{ github.event.pull_request.head.sha }}
-         COMMIT_SHA=${COMMIT_SHA:-${{ github.sha }}}
+    runs-on: [ self-hosted, Linux, k8s-runner ]
+    needs: [ build-neon ]
+    steps:
+      - name: Set PR's status to pending and request a remote CI test
+        run: |
+          COMMIT_SHA=${{ github.event.pull_request.head.sha }}
+          COMMIT_SHA=${COMMIT_SHA:-${{ github.sha }}}

-         REMOTE_REPO="${{ github.repository_owner }}/cloud"
+          REMOTE_REPO="${{ github.repository_owner }}/cloud"

-         curl -f -X POST \
-         https://api.github.com/repos/${{ github.repository }}/statuses/$COMMIT_SHA \
-         -H "Accept: application/vnd.github.v3+json" \
-         --user "${{ secrets.CI_ACCESS_TOKEN }}" \
-         --data \
-           "{
-             \"state\": \"pending\",
-             \"context\": \"neon-cloud-e2e\",
-             \"description\": \"[$REMOTE_REPO] Remote CI job is about to start\"
-           }"
+          curl -f -X POST \
+          https://api.github.com/repos/${{ github.repository }}/statuses/$COMMIT_SHA \
+          -H "Accept: application/vnd.github.v3+json" \
+          --user "${{ secrets.CI_ACCESS_TOKEN }}" \
+          --data \
+            "{
+              \"state\": \"pending\",
+              \"context\": \"neon-cloud-e2e\",
+              \"description\": \"[$REMOTE_REPO] Remote CI job is about to start\"
+            }"

-         curl -f -X POST \
-         https://api.github.com/repos/$REMOTE_REPO/actions/workflows/testing.yml/dispatches \
-         -H "Accept: application/vnd.github.v3+json" \
-         --user "${{ secrets.CI_ACCESS_TOKEN }}" \
-         --data \
-           "{
-             \"ref\": \"main\",
-             \"inputs\": {
-               \"ci_job_name\": \"neon-cloud-e2e\",
-               \"commit_hash\": \"$COMMIT_SHA\",
-               \"remote_repo\": \"${{ github.repository }}\"
-             }
-           }"
+          curl -f -X POST \
+          https://api.github.com/repos/$REMOTE_REPO/actions/workflows/testing.yml/dispatches \
+          -H "Accept: application/vnd.github.v3+json" \
+          --user "${{ secrets.CI_ACCESS_TOKEN }}" \
+          --data \
+            "{
+              \"ref\": \"main\",
+              \"inputs\": {
+                \"ci_job_name\": \"neon-cloud-e2e\",
+                \"commit_hash\": \"$COMMIT_SHA\",
+                \"remote_repo\": \"${{ github.repository }}\"
+              }
+            }"

  docker-image:
    runs-on: [ self-hosted, Linux, k8s-runner ]
@@ -517,7 +527,7 @@ jobs:
          if [[ "$GITHUB_REF_NAME" == "main" ]]; then
            STAGING='{"env_name": "staging", "proxy_job": "neon-proxy", "proxy_config": "staging.proxy", "kubeconfig_secret": "STAGING_KUBECONFIG_DATA"}'
            NEON_STRESS='{"env_name": "neon-stress", "proxy_job": "neon-stress-proxy", "proxy_config": "neon-stress.proxy", "kubeconfig_secret": "NEON_STRESS_KUBECONFIG_DATA"}'
-            echo "::set-output name=include::[$STAGING]"
+            echo "::set-output name=include::[$STAGING, $NEON_STRESS]"
          elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
            PRODUCTION='{"env_name": "production", "proxy_job": "neon-proxy", "proxy_config": "production.proxy", "kubeconfig_secret": "PRODUCTION_KUBECONFIG_DATA"}'
            echo "::set-output name=include::[$PRODUCTION]"
--- a/.github/workflows/codestyle.yml
+++ b/.github/workflows/codestyle.yml
@@ -8,7 +8,7 @@ on:

 defaults:
  run:
-    shell: bash -ex {0}
+    shell: bash -euxo pipefail {0}

 concurrency:
  # Allow only one workflow per any non-`main` branch.
@@ -27,7 +27,7 @@ jobs:
        # Rust toolchains (e.g. nightly or 1.37.0), add them here.
        rust_toolchain: [1.58]
        os: [ubuntu-latest, macos-latest]
-    timeout-minutes: 50
+    timeout-minutes: 60
    name: run regression test suite
    runs-on: ${{ matrix.os }}

--- a/.github/workflows/pg_clients.yml
+++ b/.github/workflows/pg_clients.yml
@@ -40,7 +40,7 @@ jobs:
        key: v1-${{ runner.os }}-python-deps-${{ hashFiles('poetry.lock') }}

    - name: Install Python deps
-      shell: bash -ex {0}
+      shell: bash -euxo pipefail {0}
      run: ./scripts/pysync

    - name: Run pytest
@@ -49,7 +49,7 @@ jobs:
        BENCHMARK_CONNSTR: "${{ secrets.BENCHMARK_STAGING_CONNSTR }}"
        TEST_OUTPUT: /tmp/test_output
        POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
-      shell: bash -ex {0}
+      shell: bash -euxo pipefail {0}
      run: |
        # Test framework expects we have psql binary;
        # but since we don't really need it in this test, let's mock it
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -11,17 +11,15 @@ than it was before.

 ## Submitting changes

-1. Make a PR for every change.
-
-   Even seemingly trivial patches can break things in surprising ways.
-Use of common sense is OK. If you're only fixing a typo in a comment,
-it's probably fine to just push it. But if in doubt, open a PR.
-
-2. Get at least one +1 on your PR before you push.
+1. Get at least one +1 on your PR before you push.

   For simple patches, it will only take a minute for someone to review
 it.

+2. Don't force push small changes after making the PR ready for review.
+Doing so will force readers to re-read your entire PR, which will delay
+the review process.
+
 3. Always keep the CI green.

   Do not push, if the CI failed on your PR. Even if you think it's not
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -154,9 +154,9 @@ checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"

 [[package]]
 name = "axum"
-version = "0.5.12"
+version = "0.5.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d16705af05732b7d3258ec0f7b73c03a658a28925e050d8852d5b568ee8bcf4e"
+checksum = "6b9496f0c1d1afb7a2af4338bbe1d969cddfead41d87a9fb3aaa6d0bbc7af648"
 dependencies = [
 "async-trait",
 "axum-core",
@@ -317,15 +317,6 @@ dependencies = [
 "serde",
 ]

-[[package]]
-name = "cast"
-version = "0.2.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4c24dab4283a142afa2fdca129b80ad2c6284e073930f964c3a1293c225ee39a"
-dependencies = [
- "rustc_version",
-]
-
 [[package]]
 name = "cast"
 version = "0.3.0"
@@ -467,7 +458,6 @@ dependencies = [
 "clap 3.2.12",
 "env_logger",
 "hyper",
- "libc",
 "log",
 "postgres",
 "regex",
@@ -505,8 +495,8 @@ name = "control_plane"
 version = "0.1.0"
 dependencies = [
 "anyhow",
- "lazy_static",
 "nix",
+ "once_cell",
 "pageserver",
 "postgres",
 "regex",
@@ -517,7 +507,6 @@ dependencies = [
 "tar",
 "thiserror",
 "toml",
- "url",
 "utils",
 "workspace_hack",
 ]
@@ -581,7 +570,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b01d6de93b2b6c65e17c634a26653a29d107b3c98c607c765bf38d041531cd8f"
 dependencies = [
 "atty",
- "cast 0.3.0",
+ "cast",
 "clap 2.34.0",
 "criterion-plot",
 "csv",
@@ -602,11 +591,11 @@ dependencies = [

 [[package]]
 name = "criterion-plot"
-version = "0.4.4"
+version = "0.4.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d00996de9f2f7559f7f4dc286073197f83e92256a59ed395f9aac01fe717da57"
+checksum = "2673cc8207403546f45f5fd319a974b1e6983ad1a3ee7e6041650013be041876"
 dependencies = [
- "cast 0.2.7",
+ "cast",
 "itertools",
 ]

@@ -682,9 +671,9 @@ dependencies = [

 [[package]]
 name = "crypto-common"
-version = "0.1.5"
+version = "0.1.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2ccfd8c0ee4cce11e45b3fd6f9d5e69e0cc62912aa6a0cb1bf4617b0eba5a12f"
+checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
 dependencies = [
 "generic-array",
 "typenum",
@@ -1118,9 +1107,9 @@ dependencies = [

 [[package]]
 name = "gimli"
-version = "0.26.1"
+version = "0.26.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "78cc372d058dcf6d5ecd98510e7fbc9e5aec4d21de70f65fea8fecebcd881bd4"
+checksum = "22030e2c5a68ec659fde1e949a745124b48e6fa8b045b7ed5bd1fe4ccc5c4e5d"

 [[package]]
 name = "git-version"
@@ -1186,9 +1175,9 @@ dependencies = [

 [[package]]
 name = "hashbrown"
-version = "0.12.2"
+version = "0.12.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "607c8a29735385251a339424dd462993c0fed8fa09d378f259377df08c126022"
+checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"

 [[package]]
 name = "heck"
@@ -1390,7 +1379,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "10a35a97730320ffe8e2d410b5d3b69279b98d2c14bdb8b70ea89ecf7888d41e"
 dependencies = [
 "autocfg",
- "hashbrown 0.12.2",
+ "hashbrown 0.12.3",
 ]

 [[package]]
@@ -1602,7 +1591,6 @@ dependencies = [
 name = "metrics"
 version = "0.1.0"
 dependencies = [
- "lazy_static",
 "libc",
 "once_cell",
 "prometheus",
@@ -1677,7 +1665,6 @@ dependencies = [
 "git-version",
 "pageserver",
 "postgres",
- "postgres_ffi",
 "safekeeper",
 "serde_json",
 "utils",
@@ -1855,9 +1842,9 @@ dependencies = [

 [[package]]
 name = "os_str_bytes"
-version = "6.1.0"
+version = "6.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "21326818e99cfe6ce1e524c2a805c189a99b5ae555a35d19f9a284b427d86afa"
+checksum = "648001efe5d5c0102d8cea768e348da85d90af8ba91f0bea908f157951493cd4"

 [[package]]
 name = "pageserver"
@@ -1883,7 +1870,6 @@ dependencies = [
 "humantime-serde",
 "hyper",
 "itertools",
- "lazy_static",
 "metrics",
 "nix",
 "once_cell",
@@ -1905,7 +1891,6 @@ dependencies = [
 "thiserror",
 "tokio",
 "tokio-postgres",
- "tokio-stream",
 "toml_edit",
 "tracing",
 "url",
@@ -2130,9 +2115,9 @@ dependencies = [
 "crc32c",
 "env_logger",
 "hex",
- "lazy_static",
 "log",
 "memoffset",
+ "once_cell",
 "postgres",
 "rand",
 "regex",
@@ -2292,9 +2277,9 @@ dependencies = [
 "hex",
 "hmac 0.12.1",
 "hyper",
- "lazy_static",
 "md5",
 "metrics",
+ "once_cell",
 "parking_lot 0.12.1",
 "pin-project-lite",
 "rand",
@@ -2740,9 +2725,9 @@ dependencies = [

 [[package]]
 name = "rustversion"
-version = "1.0.7"
+version = "1.0.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a0a5f7c728f5d284929a1cccb5bc19884422bfe6ef4d6c409da2c41838983fcf"
+checksum = "24c8ad4f0c00e1eb5bc7614d236a7f1300e3dbd76b68cac8e06fb00b015ad8d8"

 [[package]]
 name = "ryu"
@@ -2764,12 +2749,10 @@ dependencies = [
 "daemonize",
 "etcd_broker",
 "fs2",
- "futures",
 "git-version",
 "hex",
 "humantime",
 "hyper",
- "lazy_static",
 "metrics",
 "once_cell",
 "postgres",
@@ -2784,12 +2767,10 @@ dependencies = [
 "tempfile",
 "tokio",
 "tokio-postgres",
- "tokio-util",
 "toml_edit",
 "tracing",
 "url",
 "utils",
- "walkdir",
 "workspace_hack",
 ]

@@ -3625,9 +3606,9 @@ checksum = "099b7128301d285f79ddd55b9a83d5e6b9e97c92e0ea0daebee7263e932de992"

 [[package]]
 name = "unicode-ident"
-version = "1.0.1"
+version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5bd2fe26506023ed7b5e1e315add59d6f584c621d037f9368fea9cfb988f368c"
+checksum = "15c61ba63f9235225a22310255a29b806b907c9b8c964bcbd0a2c70f3f2deea7"

 [[package]]
 name = "unicode-normalization"
@@ -3688,9 +3669,9 @@ dependencies = [
 "hex-literal",
 "hyper",
 "jsonwebtoken",
- "lazy_static",
 "metrics",
 "nix",
+ "once_cell",
 "pin-project-lite",
 "postgres",
 "postgres-protocol",
--- a/4
+++ b/4
@@ -17,6 +17,10 @@ RUN set -e \
 FROM neondatabase/rust:1.58 AS build
 ARG GIT_VERSION=local

+# Enable https://github.com/paritytech/cachepot to cache Rust crates' compilation results in Docker builds.
+# Set up cachepot to use an AWS S3 bucket for cache results, to reuse it between `docker build` invocations.
+# cachepot falls back to local filesystem if S3 is misconfigured, not failing the build.
+ARG RUSTC_WRAPPER=cachepot
 ARG CACHEPOT_BUCKET=zenith-rust-cachepot
 ARG AWS_ACCESS_KEY_ID
 ARG AWS_SECRET_ACCESS_KEY
--- a/Dockerfile.compute-tools
+++ b/Dockerfile.compute-tools
@@ -1,7 +1,11 @@
 # First transient image to build compute_tools binaries
-# NB: keep in sync with rust image version in .circle/config.yml
+# NB: keep in sync with rust image version in .github/workflows/build_and_test.yml
 FROM neondatabase/rust:1.58 AS rust-build

+# Enable https://github.com/paritytech/cachepot to cache Rust crates' compilation results in Docker builds.
+# Set up cachepot to use an AWS S3 bucket for cache results, to reuse it between `docker build` invocations.
+# cachepot falls back to local filesystem if S3 is misconfigured, not failing the build.
+ARG RUSTC_WRAPPER=cachepot
 ARG CACHEPOT_BUCKET=zenith-rust-cachepot
 ARG AWS_ACCESS_KEY_ID
 ARG AWS_SECRET_ACCESS_KEY
--- a/4
+++ b/4
@@ -29,9 +29,11 @@ else
 endif

 # macOS with brew-installed openssl requires explicit paths
+# It can be configured with OPENSSL_PREFIX variable
 UNAME_S := $(shell uname -s)
 ifeq ($(UNAME_S),Darwin)
-    PG_CONFIGURE_OPTS += --with-includes=$(HOMEBREW_PREFIX)/opt/openssl/include --with-libraries=$(HOMEBREW_PREFIX)/opt/openssl/lib
+    OPENSSL_PREFIX ?= $(shell brew --prefix openssl@3)
+    PG_CONFIGURE_OPTS += --with-includes=$(OPENSSL_PREFIX)/include --with-libraries=$(OPENSSL_PREFIX)/lib
 endif

 # Choose whether we should be silent or verbose
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # Neon

-Neon is a serverless open source alternative to AWS Aurora Postgres. It separates storage and compute and substitutes PostgreSQL storage layer by redistributing data across a cluster of nodes.
+Neon is a serverless open-source alternative to AWS Aurora Postgres. It separates storage and compute and substitutes the PostgreSQL storage layer by redistributing data across a cluster of nodes.

 The project used to be called "Zenith". Many of the commands and code comments
 still refer to "zenith", but we are in the process of renaming things.
@@ -12,32 +12,31 @@ Alternatively, compile and run the project [locally](#running-local-installation

 ## Architecture overview

-A Neon installation consists of compute nodes and Neon storage engine.
+A Neon installation consists of compute nodes and a Neon storage engine.

-Compute nodes are stateless PostgreSQL nodes, backed by Neon storage engine.
+Compute nodes are stateless PostgreSQL nodes backed by the Neon storage engine.

-Neon storage engine consists of two major components:
- Pageserver. Scalable storage backend for compute nodes.
- WAL service. The service that receives WAL from compute node and ensures that it is stored durably.
+The Neon storage engine consists of two major components:
+- Pageserver. Scalable storage backend for the compute nodes.
+- WAL service. The service receives WAL from the compute node and ensures that it is stored durably.

 Pageserver consists of:
 - Repository - Neon storage implementation.
 - WAL receiver - service that receives WAL from WAL service and stores it in the repository.
 - Page service - service that communicates with compute nodes and responds with pages from the repository.
- WAL redo - service that builds pages from base images and WAL records on Page service request.
-
+- WAL redo - service that builds pages from base images and WAL records on Page service request
 ## Running local installation


 #### Installing dependencies on Linux
-1. Install build dependencies and other useful packages
+1. Install build dependencies and other applicable packages

-* On Ubuntu or Debian this set of packages should be sufficient to build the code:
+* On Ubuntu or Debian, this set of packages should be sufficient to build the code:
 ```bash
 apt install build-essential libtool libreadline-dev zlib1g-dev flex bison libseccomp-dev \
 libssl-dev clang pkg-config libpq-dev etcd cmake postgresql-client
 ```
-* On Fedora these packages are needed:
+* On Fedora, these packages are needed:
 ```bash
 dnf install flex bison readline-devel zlib-devel openssl-devel \
  libseccomp-devel perl clang cmake etcd postgresql postgresql-contrib
@@ -69,7 +68,7 @@ brew install libpq
 brew link --force libpq
 ```

-#### Building on Linux and OSX
+#### Building on Linux

 1. Build neon and patched postgres
 ```
@@ -80,19 +79,35 @@ cd neon

 # The preferred and default is to make a debug build. This will create a 
 # demonstrably slower build than a release build. If you want to use a release
-# build, utilize "`BUILD_TYPE=release make -j`nproc``" 
+# build, utilize "BUILD_TYPE=release make -j`nproc`" 

 make -j`nproc`
 ```

-#### dependency installation notes
+#### Building on OSX
+
+1. Build neon and patched postgres
+```
+# Note: The path to the neon sources can not contain a space.
+
+git clone --recursive https://github.com/neondatabase/neon.git
+cd neon
+
+# The preferred and default is to make a debug build. This will create a 
+# demonstrably slower build than a release build. If you want to use a release
+# build, utilize "BUILD_TYPE=release make -j`sysctl -n hw.logicalcpu`" 
+
+make -j`sysctl -n hw.logicalcpu`
+```
+
+#### Dependency installation notes
 To run the `psql` client, install the `postgresql-client` package or modify `PATH` and `LD_LIBRARY_PATH` to include `tmp_install/bin` and `tmp_install/lib`, respectively.

 To run the integration tests or Python scripts (not required to use the code), install
-Python (3.9 or higher), and install python3 packages using `./scripts/pysync` (requires poetry) in the project directory.
+Python (3.9 or higher), and install python3 packages using `./scripts/pysync` (requires [poetry](https://python-poetry.org/)) in the project directory.


-#### running neon database
+#### Running neon database
 1. Start pageserver and postgres on top of it (should be called from repo root):
 ```sh
 # Create repository in .neon with proper paths to binaries and data
@@ -123,7 +138,7 @@ Starting postgres node at 'host=127.0.0.1 port=55432 user=cloud_admin dbname=pos
 main  127.0.0.1:55432  de200bd42b49cc1814412c7e592dd6e9  main         0/16B5BA8  running
 ```

-2. Now it is possible to connect to postgres and run some queries:
+2. Now, it is possible to connect to postgres and run some queries:
 ```text
 > psql -p55432 -h 127.0.0.1 -U cloud_admin postgres
 postgres=# CREATE TABLE t(key int primary key, value text);
@@ -181,14 +196,16 @@ postgres=# select * from t;
 (1 row)
 ```

-4. If you want to run tests afterwards (see below), you have to stop all the running the pageserver, safekeeper and postgres instances
-   you have just started. You can stop them all with one command:
+4. If you want to run tests afterward (see below), you must stop all the running of the pageserver, safekeeper, and postgres instances
+   you have just started. You can terminate them all with one command:
 ```sh
 > ./target/debug/neon_local stop
 ```

 ## Running tests

+Ensure your dependencies are installed as described [here](https://github.com/neondatabase/neon#dependency-installation-notes).
+
 ```sh
 git clone --recursive https://github.com/neondatabase/neon.git
 make # builds also postgres and installs it to ./tmp_install
@@ -205,8 +222,8 @@ To view your `rustdoc` documentation in a browser, try running `cargo doc --no-d

 ### Postgres-specific terms

-Due to Neon's very close relation with PostgreSQL internals, there are numerous specific terms used.
-Same applies to certain spelling: i.e. we use MB to denote 1024 * 1024 bytes, while MiB would be technically more correct, it's inconsistent with what PostgreSQL code and its documentation use.
+Due to Neon's very close relation with PostgreSQL internals, numerous specific terms are used.
+The same applies to certain spelling: i.e. we use MB to denote 1024 * 1024 bytes, while MiB would be technically more correct, it's inconsistent with what PostgreSQL code and its documentation use.

 To get more familiar with this aspect, refer to:

--- a/compute_tools/Cargo.toml
+++ b/compute_tools/Cargo.toml
@@ -4,7 +4,6 @@ version = "0.1.0"
 edition = "2021"

 [dependencies]
-libc = "0.2"
 anyhow = "1.0"
 chrono = "0.4"
 clap = "3.0"
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -157,7 +157,7 @@ fn main() -> Result<()> {
            exit(code)
        }
        Err(error) => {
-            error!("could not start the compute node: {}", error);
+            error!("could not start the compute node: {:?}", error);

            let mut state = compute.state.write().unwrap();
            state.error = Some(format!("{:?}", error));
--- a/control_plane/Cargo.toml
+++ b/control_plane/Cargo.toml
@@ -9,12 +9,11 @@ postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8
 serde = { version = "1.0", features = ["derive"] }
 serde_with = "1.12.0"
 toml = "0.5"
-lazy_static = "1.4"
+once_cell = "1.13.0"
 regex = "1"
 anyhow = "1.0"
 thiserror = "1"
 nix = "0.23"
-url = "2.2.2"
 reqwest = { version = "0.11", default-features = false, features = ["blocking", "json", "rustls-tls"] }

 pageserver = { path = "../pageserver" }
--- a/control_plane/src/etcd.rs
+++ b/control_plane/src/etcd.rs
@@ -30,14 +30,14 @@ pub fn start_etcd_process(env: &local_env::LocalEnv) -> anyhow::Result<()> {
    let etcd_stdout_file =
        fs::File::create(etcd_data_dir.join("etcd.stdout.log")).with_context(|| {
            format!(
-                "Failed to create ectd stout file in directory {}",
+                "Failed to create etcd stout file in directory {}",
                etcd_data_dir.display()
            )
        })?;
    let etcd_stderr_file =
        fs::File::create(etcd_data_dir.join("etcd.stderr.log")).with_context(|| {
            format!(
-                "Failed to create ectd stderr file in directory {}",
+                "Failed to create etcd stderr file in directory {}",
                etcd_data_dir.display()
            )
        })?;
--- a/control_plane/src/lib.rs
+++ b/control_plane/src/lib.rs
@@ -51,7 +51,11 @@ fn fill_rust_env_vars(cmd: &mut Command) -> &mut Command {
 }

 fn fill_aws_secrets_vars(mut cmd: &mut Command) -> &mut Command {
-    for env_key in ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"] {
+    for env_key in [
+        "AWS_ACCESS_KEY_ID",
+        "AWS_SECRET_ACCESS_KEY",
+        "AWS_SESSION_TOKEN",
+    ] {
        if let Ok(value) = std::env::var(env_key) {
            cmd = cmd.env(env_key, value);
        }
--- a/control_plane/src/postgresql_conf.rs
+++ b/control_plane/src/postgresql_conf.rs
@@ -5,7 +5,7 @@
 /// enough to extract a few settings we need in Zenith, assuming you don't do
 /// funny stuff like include-directives or funny escaping.
 use anyhow::{bail, Context, Result};
-use lazy_static::lazy_static;
+use once_cell::sync::Lazy;
 use regex::Regex;
 use std::collections::HashMap;
 use std::fmt;
@@ -19,9 +19,7 @@ pub struct PostgresConf {
    hash: HashMap<String, String>,
 }

-lazy_static! {
-    static ref CONF_LINE_RE: Regex = Regex::new(r"^((?:\w|\.)+)\s*=\s*(\S+)$").unwrap();
-}
+static CONF_LINE_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"^((?:\w|\.)+)\s*=\s*(\S+)$").unwrap());

 impl PostgresConf {
    pub fn new() -> PostgresConf {
@@ -139,10 +137,10 @@ fn escape_str(s: &str) -> String {
    //
    // This regex is a bit more conservative than the rules in guc-file.l, so we quote some
    // strings that PostgreSQL would accept without quoting, but that's OK.
-    lazy_static! {
-        static ref UNQUOTED_RE: Regex =
-            Regex::new(r"(^[-+]?[0-9]+[a-zA-Z]*$)|(^[a-zA-Z][a-zA-Z0-9]*$)").unwrap();
-    }
+
+    static UNQUOTED_RE: Lazy<Regex> =
+        Lazy::new(|| Regex::new(r"(^[-+]?[0-9]+[a-zA-Z]*$)|(^[a-zA-Z][a-zA-Z0-9]*$)").unwrap());
+
    if UNQUOTED_RE.is_match(s) {
        s.to_string()
    } else {
--- a/control_plane/src/safekeeper.rs
+++ b/control_plane/src/safekeeper.rs
@@ -247,7 +247,7 @@ impl SafekeeperNode {
        // Shutting down may take a long time,
        // if safekeeper flushes a lot of data
        let mut tcp_stopped = false;
-        for _ in 0..100 {
+        for i in 0..600 {
            if !tcp_stopped {
                if let Err(err) = TcpStream::connect(&address) {
                    tcp_stopped = true;
@@ -272,9 +272,11 @@ impl SafekeeperNode {
                    }
                }
            }
-            print!(".");
-            io::stdout().flush().unwrap();
-            thread::sleep(Duration::from_secs(1));
+            if i % 10 == 0 {
+                print!(".");
+                io::stdout().flush().unwrap();
+            }
+            thread::sleep(Duration::from_millis(100));
        }

        bail!("Failed to stop safekeeper with pid {}", pid);
@@ -304,10 +306,9 @@ impl SafekeeperNode {
        Ok(self
            .http_request(
                Method::POST,
-                format!("{}/{}", self.http_base_url, "timeline"),
+                format!("{}/tenant/{}/timeline", self.http_base_url, tenant_id),
            )
            .json(&TimelineCreateRequest {
-                tenant_id,
                timeline_id,
                peer_ids,
            })
--- a/control_plane/src/storage.rs
+++ b/control_plane/src/storage.rs
@@ -12,9 +12,9 @@ use anyhow::{bail, Context};
 use nix::errno::Errno;
 use nix::sys::signal::{kill, Signal};
 use nix::unistd::Pid;
-use pageserver::http::models::{TenantConfigRequest, TenantCreateRequest, TimelineCreateRequest};
-use pageserver::tenant_mgr::TenantInfo;
-use pageserver::timelines::TimelineInfo;
+use pageserver::http::models::{
+    TenantConfigRequest, TenantCreateRequest, TenantInfo, TimelineCreateRequest, TimelineInfo,
+};
 use postgres::{Config, NoTls};
 use reqwest::blocking::{Client, RequestBuilder, Response};
 use reqwest::{IntoUrl, Method};
@@ -318,7 +318,7 @@ impl PageServerNode {
        // Shutting down may take a long time,
        // if pageserver checkpoints a lot of data
        let mut tcp_stopped = false;
-        for _ in 0..100 {
+        for i in 0..600 {
            if !tcp_stopped {
                if let Err(err) = TcpStream::connect(&address) {
                    tcp_stopped = true;
@@ -344,9 +344,11 @@ impl PageServerNode {
                    }
                }
            }
-            print!(".");
-            io::stdout().flush().unwrap();
-            thread::sleep(Duration::from_secs(1));
+            if i % 10 == 0 {
+                print!(".");
+                io::stdout().flush().unwrap();
+            }
+            thread::sleep(Duration::from_millis(100));
        }

        bail!("Failed to stop pageserver with pid {}", pid);
@@ -399,6 +401,7 @@ impl PageServerNode {
                    .get("checkpoint_distance")
                    .map(|x| x.parse::<u64>())
                    .transpose()?,
+                checkpoint_timeout: settings.get("checkpoint_timeout").map(|x| x.to_string()),
                compaction_target_size: settings
                    .get("compaction_target_size")
                    .map(|x| x.parse::<u64>())
@@ -453,6 +456,7 @@ impl PageServerNode {
                    .map(|x| x.parse::<u64>())
                    .transpose()
                    .context("Failed to parse 'checkpoint_distance' as an integer")?,
+                checkpoint_timeout: settings.get("checkpoint_timeout").map(|x| x.to_string()),
                compaction_target_size: settings
                    .get("compaction_target_size")
                    .map(|x| x.parse::<u64>())
--- a/docker-entrypoint.sh
+++ b/docker-entrypoint.sh
@@ -1,6 +1,8 @@
 #!/bin/sh
 set -eux

+pageserver_id_param="${NODE_ID:-10}"
+
 broker_endpoints_param="${BROKER_ENDPOINT:-absent}"
 if [ "$broker_endpoints_param" != "absent" ]; then
    broker_endpoints_param="-c broker_endpoints=['$broker_endpoints_param']"
@@ -8,10 +10,12 @@ else
    broker_endpoints_param=''
 fi

+remote_storage_param="${REMOTE_STORAGE:-}"
+
 if [ "$1" = 'pageserver' ]; then
    if [ ! -d "/data/tenants" ]; then
        echo "Initializing pageserver data directory"
-        pageserver --init -D /data -c "pg_distrib_dir='/usr/local'" -c "id=10" $broker_endpoints_param
+        pageserver --init -D /data -c "pg_distrib_dir='/usr/local'" -c "id=${pageserver_id_param}" $broker_endpoints_param $remote_storage_param
    fi
    echo "Staring pageserver at 0.0.0.0:6400"
    pageserver -c "listen_pg_addr='0.0.0.0:6400'" -c "listen_http_addr='0.0.0.0:9898'" $broker_endpoints_param -D /data
--- a/docs/SUMMARY.md
+++ b/docs/SUMMARY.md
@@ -52,10 +52,8 @@
 - [multitenancy.md](./multitenancy.md) — how multitenancy is organized in the pageserver and Zenith CLI.
 - [settings.md](./settings.md)
 #FIXME: move these under sourcetree.md
-#- [pageserver/README.md](/pageserver/README.md)
 #- [postgres_ffi/README.md](/libs/postgres_ffi/README.md)
 #- [test_runner/README.md](/test_runner/README.md)
-#- [safekeeper/README.md](/safekeeper/README.md)


 # RFCs
--- a/docs/glossary.md
+++ b/docs/glossary.md
@@ -75,7 +75,7 @@ layer's Segment and range of LSNs.
 There are two kinds of layers, in-memory and on-disk layers. In-memory
 layers are used to ingest incoming WAL, and provide fast access
 to the recent page versions. On-disk layers are stored as files on disk, and
-are immutable. See pageserver/src/layered_repository/README.md for more.
+are immutable. See [pageserver-storage.md](./pageserver-storage.md) for more.

 ### Layer file (on-disk layer)

@@ -111,7 +111,7 @@ PostgreSQL LSNs and functions to monitor them:
 * `pg_last_wal_replay_lsn ()` - Returns the last write-ahead log location that has been replayed during recovery. If recovery is still in progress this will increase monotonically.
 [source PostgreSQL documentation](https://www.postgresql.org/docs/devel/functions-admin.html):

-Neon safekeeper LSNs. For more check [safekeeper/README_PROTO.md](/safekeeper/README_PROTO.md)
+Neon safekeeper LSNs. See [safekeeper protocol section](safekeeper-protocol.md) for more information.
 * `CommitLSN`: position in WAL confirmed by quorum safekeepers.
 * `RestartLSN`: position in WAL confirmed by all safekeepers.
 * `FlushLSN`: part of WAL persisted to the disk by safekeeper.
--- a/docs/pageserver-services.md
+++ b/docs/pageserver-services.md
@@ -68,8 +68,6 @@ There are the following implementations present:
 * local filesystem — to use in tests mainly
 * AWS S3           - to use in production

-Implementation details are covered in the [backup readme](./src/remote_storage/README.md) and corresponding Rust file docs, parameters documentation can be found at [settings docs](../docs/settings.md).
-
 The backup service is disabled by default and can be enabled to interact with a single remote storage.

 CLI examples:
@@ -118,7 +116,7 @@ implemented by the LayeredRepository object in
 `layered_repository.rs`. There is only that one implementation of the
 Repository trait, but it's still a useful abstraction that keeps the
 interface for the low-level storage functionality clean. The layered
-storage format is described in layered_repository/README.md.
+storage format is described in [pageserver-storage.md](./pageserver-storage.md).

 Each repository consists of multiple Timelines. Timeline is a
 workhorse that accepts page changes from the WAL, and serves
--- a/docs/settings.md
+++ b/docs/settings.md
@@ -15,7 +15,7 @@ listen_pg_addr = '127.0.0.1:64000'
 listen_http_addr = '127.0.0.1:9898'

 checkpoint_distance = '268435456' # in bytes
-checkpoint_period = '1 s'
+checkpoint_timeout = '10m'

 gc_period = '100 s'
 gc_horizon = '67108864'
@@ -46,7 +46,7 @@ Note the `[remote_storage]` section: it's a [table](https://toml.io/en/v1.0.0#ta

 All values can be passed as an argument to the pageserver binary, using the `-c` parameter and specified as a valid TOML string. All tables should be passed in the inline form.

-Example: `${PAGESERVER_BIN} -c "checkpoint_period = '100 s'" -c "remote_storage={local_path='/some/local/path/'}"`
+Example: `${PAGESERVER_BIN} -c "checkpoint_timeout = '10 m'" -c "remote_storage={local_path='/some/local/path/'}"`

 Note that TOML distinguishes between strings and integers, the former require single or double quotes around them.

@@ -82,6 +82,14 @@ S3.

 The unit is # of bytes.

+#### checkpoint_timeout
+
+Apart from `checkpoint_distance`, open layer flushing is also triggered
+`checkpoint_timeout` after the last flush. This makes WAL eventually uploaded to
+s3 when activity is stopped.
+
+The default is 10m.
+
 #### compaction_period

 Every `compaction_period` seconds, the page server checks if
--- a/docs/sourcetree.md
+++ b/docs/sourcetree.md
@@ -28,7 +28,7 @@ The pageserver has a few different duties:
 - Receive WAL from the WAL service and decode it.
 - Replay WAL that's applicable to the chunks that the Page Server maintains

-For more detailed info, see [/pageserver/README](/pageserver/README.md)
+For more detailed info, see [pageserver-services.md](./pageserver-services.md)

 `/proxy`:

@@ -57,7 +57,7 @@ PostgreSQL extension that contains functions needed for testing and debugging.
 The zenith WAL service that receives WAL from a primary compute nodes and streams it to the pageserver.
 It acts as a holding area and redistribution center for recently generated WAL.

-For more detailed info, see [/safekeeper/README](/safekeeper/README.md)
+For more detailed info, see [walservice.md](./walservice.md)

 `/workspace_hack`:
 The workspace_hack crate exists only to pin down some dependencies.
--- a/docs/walservice.md
+++ b/docs/walservice.md
@@ -75,8 +75,8 @@ safekeepers. The Paxos and crash recovery algorithm ensures that only
 one primary node can be actively streaming WAL to the quorum of
 safekeepers.

-See README_PROTO.md for a more detailed description of the consensus
-protocol. spec/ contains TLA+ specification of it.
+See [this section](safekeeper-protocol.md) for a more detailed description of
+the consensus protocol. spec/ contains TLA+ specification of it.

 # Q&A

--- a/libs/etcd_broker/Cargo.toml
+++ b/libs/etcd_broker/Cargo.toml
@@ -9,7 +9,7 @@
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1"
 serde_with = "1.12.0"
- once_cell = "1.8.0"
+ once_cell = "1.13.0"

 utils = { path = "../utils" }
 workspace_hack = { version = "0.1", path = "../../workspace_hack" }
--- a/libs/metrics/Cargo.toml
+++ b/libs/metrics/Cargo.toml
@@ -6,6 +6,5 @@ edition = "2021"
 [dependencies]
 prometheus = {version = "0.13", default_features=false, features = ["process"]} # removes protobuf dependency
 libc = "0.2"
-lazy_static = "1.4"
-once_cell = "1.8.0"
+once_cell = "1.13.0"
 workspace_hack = { version = "0.1", path = "../../workspace_hack" }
--- a/libs/metrics/src/lib.rs
+++ b/libs/metrics/src/lib.rs
@@ -2,7 +2,10 @@
 //! make sure that we use the same dep version everywhere.
 //! Otherwise, we might not see all metrics registered via
 //! a default registry.
-use lazy_static::lazy_static;
+use once_cell::sync::Lazy;
+use prometheus::core::{AtomicU64, GenericGauge, GenericGaugeVec};
+pub use prometheus::opts;
+pub use prometheus::register;
 pub use prometheus::{core, default_registry, proto};
 pub use prometheus::{exponential_buckets, linear_buckets};
 pub use prometheus::{register_gauge, Gauge};
@@ -18,6 +21,17 @@ pub use prometheus::{Encoder, TextEncoder};
 mod wrappers;
 pub use wrappers::{CountedReader, CountedWriter};

+pub type UIntGauge = GenericGauge<AtomicU64>;
+pub type UIntGaugeVec = GenericGaugeVec<AtomicU64>;
+
+#[macro_export]
+macro_rules! register_uint_gauge_vec {
+    ($NAME:expr, $HELP:expr, $LABELS_NAMES:expr $(,)?) => {{
+        let gauge_vec = UIntGaugeVec::new($crate::opts!($NAME, $HELP), $LABELS_NAMES).unwrap();
+        $crate::register(Box::new(gauge_vec.clone())).map(|_| gauge_vec)
+    }};
+}
+
 /// Gathers all Prometheus metrics and records the I/O stats just before that.
 ///
 /// Metrics gathering is a relatively simple and standalone operation, so
@@ -27,19 +41,22 @@ pub fn gather() -> Vec<prometheus::proto::MetricFamily> {
    prometheus::gather()
 }

-lazy_static! {
-    static ref DISK_IO_BYTES: IntGaugeVec = register_int_gauge_vec!(
+static DISK_IO_BYTES: Lazy<IntGaugeVec> = Lazy::new(|| {
+    register_int_gauge_vec!(
        "libmetrics_disk_io_bytes_total",
        "Bytes written and read from disk, grouped by the operation (read|write)",
        &["io_operation"]
    )
-    .expect("Failed to register disk i/o bytes int gauge vec");
-    static ref MAXRSS_KB: IntGauge = register_int_gauge!(
+    .expect("Failed to register disk i/o bytes int gauge vec")
+});
+
+static MAXRSS_KB: Lazy<IntGauge> = Lazy::new(|| {
+    register_int_gauge!(
        "libmetrics_maxrss_kb",
        "Memory usage (Maximum Resident Set Size)"
    )
-    .expect("Failed to register maxrss_kb int gauge");
-}
+    .expect("Failed to register maxrss_kb int gauge")
+});

 pub const DISK_WRITE_SECONDS_BUCKETS: &[f64] = &[
    0.000_050, 0.000_100, 0.000_500, 0.001, 0.003, 0.005, 0.01, 0.05, 0.1, 0.3, 0.5,
--- a/libs/metrics/src/wrappers.rs
+++ b/libs/metrics/src/wrappers.rs
@@ -10,13 +10,13 @@ use std::io::{Read, Result, Write};
 /// # use std::io::{Result, Read};
 /// # use metrics::{register_int_counter, IntCounter};
 /// # use metrics::CountedReader;
+/// # use once_cell::sync::Lazy;
 /// #
-/// # lazy_static::lazy_static! {
-/// #     static ref INT_COUNTER: IntCounter = register_int_counter!(
+/// # static INT_COUNTER: Lazy<IntCounter> = Lazy::new( || { register_int_counter!(
 /// #         "int_counter",
 /// #         "let's count something!"
-/// #     ).unwrap();
-/// # }
+/// #     ).unwrap()
+/// # });
 /// #
 /// fn do_some_reads(stream: impl Read, count: usize) -> Result<Vec<u8>> {
 ///     let mut reader = CountedReader::new(stream, |cnt| {
@@ -85,13 +85,13 @@ impl<T: Read> Read for CountedReader<'_, T> {
 /// # use std::io::{Result, Write};
 /// # use metrics::{register_int_counter, IntCounter};
 /// # use metrics::CountedWriter;
+/// # use once_cell::sync::Lazy;
 /// #
-/// # lazy_static::lazy_static! {
-/// #     static ref INT_COUNTER: IntCounter = register_int_counter!(
+/// # static INT_COUNTER: Lazy<IntCounter> = Lazy::new( || { register_int_counter!(
 /// #         "int_counter",
 /// #         "let's count something!"
-/// #     ).unwrap();
-/// # }
+/// #     ).unwrap()
+/// # });
 /// #
 /// fn do_some_writes(stream: impl Write, payload: &[u8]) -> Result<()> {
 ///     let mut writer = CountedWriter::new(stream, |cnt| {
--- a/libs/postgres_ffi/Cargo.toml
+++ b/libs/postgres_ffi/Cargo.toml
@@ -12,7 +12,7 @@ byteorder = "1.4.3"
 anyhow = "1.0"
 crc32c = "0.6.0"
 hex = "0.4.3"
-lazy_static = "1.4"
+once_cell = "1.13.0"
 log = "0.4.14"
 memoffset = "0.6.2"
 thiserror = "1.0"
--- a/libs/postgres_ffi/src/relfile_utils.rs
+++ b/libs/postgres_ffi/src/relfile_utils.rs
@@ -2,7 +2,7 @@
 //! Common utilities for dealing with PostgreSQL relation files.
 //!
 use crate::pg_constants;
-use lazy_static::lazy_static;
+use once_cell::sync::OnceCell;
 use regex::Regex;

 #[derive(Debug, Clone, thiserror::Error, PartialEq)]
@@ -54,11 +54,14 @@ pub fn forknumber_to_name(forknum: u8) -> Option<&'static str> {
 /// See functions relpath() and _mdfd_segpath() in PostgreSQL sources.
 ///
 pub fn parse_relfilename(fname: &str) -> Result<(u32, u8, u32), FilePathError> {
-    lazy_static! {
-        static ref RELFILE_RE: Regex =
-            Regex::new(r"^(?P<relnode>\d+)(_(?P<forkname>[a-z]+))?(\.(?P<segno>\d+))?$").unwrap();
-    }
+    static RELFILE_RE: OnceCell<Regex> = OnceCell::new();
+    RELFILE_RE.get_or_init(|| {
+        Regex::new(r"^(?P<relnode>\d+)(_(?P<forkname>[a-z]+))?(\.(?P<segno>\d+))?$").unwrap()
+    });
+
    let caps = RELFILE_RE
+        .get()
+        .unwrap()
        .captures(fname)
        .ok_or(FilePathError::InvalidFileName)?;

--- a/libs/postgres_ffi/src/waldecoder.rs
+++ b/libs/postgres_ffi/src/waldecoder.rs
@@ -13,24 +13,30 @@ use super::xlog_utils::*;
 use super::XLogLongPageHeaderData;
 use super::XLogPageHeaderData;
 use super::XLogRecord;
+use super::XLOG_PAGE_MAGIC;
 use bytes::{Buf, BufMut, Bytes, BytesMut};
 use crc32c::*;
 use log::*;
 use std::cmp::min;
+use std::num::NonZeroU32;
 use thiserror::Error;
 use utils::lsn::Lsn;

+enum State {
+    WaitingForRecord,
+    ReassemblingRecord {
+        recordbuf: BytesMut,
+        contlen: NonZeroU32,
+    },
+    SkippingEverything {
+        skip_until_lsn: Lsn,
+    },
+}
+
 pub struct WalStreamDecoder {
    lsn: Lsn,
-
-    startlsn: Lsn, // LSN where this record starts
-    contlen: u32,
-    padlen: u32,
-
    inputbuf: BytesMut,
-
-    /// buffer used to reassemble records that cross page boundaries.
-    recordbuf: BytesMut,
+    state: State,
 }

 #[derive(Error, Debug, Clone)]
@@ -48,13 +54,8 @@ impl WalStreamDecoder {
    pub fn new(lsn: Lsn) -> WalStreamDecoder {
        WalStreamDecoder {
            lsn,
-
-            startlsn: Lsn(0),
-            contlen: 0,
-            padlen: 0,
-
            inputbuf: BytesMut::new(),
-            recordbuf: BytesMut::new(),
+            state: State::WaitingForRecord,
        }
    }

@@ -67,6 +68,58 @@ impl WalStreamDecoder {
        self.inputbuf.extend_from_slice(buf);
    }

+    fn validate_page_header(&self, hdr: &XLogPageHeaderData) -> Result<(), WalDecodeError> {
+        let validate_impl = || {
+            if hdr.xlp_magic != XLOG_PAGE_MAGIC as u16 {
+                return Err(format!(
+                    "invalid xlog page header: xlp_magic={}, expected {}",
+                    hdr.xlp_magic, XLOG_PAGE_MAGIC
+                ));
+            }
+            if hdr.xlp_pageaddr != self.lsn.0 {
+                return Err(format!(
+                    "invalid xlog page header: xlp_pageaddr={}, expected {}",
+                    hdr.xlp_pageaddr, self.lsn
+                ));
+            }
+            match self.state {
+                State::WaitingForRecord => {
+                    if hdr.xlp_info & XLP_FIRST_IS_CONTRECORD != 0 {
+                        return Err(
+                            "invalid xlog page header: unexpected XLP_FIRST_IS_CONTRECORD".into(),
+                        );
+                    }
+                    if hdr.xlp_rem_len != 0 {
+                        return Err(format!(
+                            "invalid xlog page header: xlp_rem_len={}, but it's not a contrecord",
+                            hdr.xlp_rem_len
+                        ));
+                    }
+                }
+                State::ReassemblingRecord { contlen, .. } => {
+                    if hdr.xlp_info & XLP_FIRST_IS_CONTRECORD == 0 {
+                        return Err(
+                            "invalid xlog page header: XLP_FIRST_IS_CONTRECORD expected, not found"
+                                .into(),
+                        );
+                    }
+                    if hdr.xlp_rem_len != contlen.get() {
+                        return Err(format!(
+                            "invalid xlog page header: xlp_rem_len={}, expected {}",
+                            hdr.xlp_rem_len,
+                            contlen.get()
+                        ));
+                    }
+                }
+                State::SkippingEverything { .. } => {
+                    panic!("Should not be validating page header in the SkippingEverything state");
+                }
+            };
+            Ok(())
+        };
+        validate_impl().map_err(|msg| WalDecodeError { msg, lsn: self.lsn })
+    }
+
    /// Attempt to decode another WAL record from the input that has been fed to the
    /// decoder so far.
    ///
@@ -76,128 +129,121 @@ impl WalStreamDecoder {
    ///     Err(WalDecodeError): an error occurred while decoding, meaning the input was invalid.
    ///
    pub fn poll_decode(&mut self) -> Result<Option<(Lsn, Bytes)>, WalDecodeError> {
-        let recordbuf;
-
        // Run state machine that validates page headers, and reassembles records
        // that cross page boundaries.
        loop {
            // parse and verify page boundaries as we go
-            if self.padlen > 0 {
-                // We should first skip padding, as we may have to skip some page headers if we're processing the XLOG_SWITCH record.
-                if self.inputbuf.remaining() < self.padlen as usize {
-                    return Ok(None);
-                }
+            // However, we may have to skip some page headers if we're processing the XLOG_SWITCH record or skipping padding for whatever reason.
+            match self.state {
+                State::WaitingForRecord | State::ReassemblingRecord { .. } => {
+                    if self.lsn.segment_offset(pg_constants::WAL_SEGMENT_SIZE) == 0 {
+                        // parse long header

-                // skip padding
-                self.inputbuf.advance(self.padlen as usize);
-                self.lsn += self.padlen as u64;
-                self.padlen = 0;
-            } else if self.lsn.segment_offset(pg_constants::WAL_SEGMENT_SIZE) == 0 {
-                // parse long header
+                        if self.inputbuf.remaining() < XLOG_SIZE_OF_XLOG_LONG_PHD {
+                            return Ok(None);
+                        }

-                if self.inputbuf.remaining() < XLOG_SIZE_OF_XLOG_LONG_PHD {
-                    return Ok(None);
-                }
+                        let hdr = XLogLongPageHeaderData::from_bytes(&mut self.inputbuf).map_err(
+                            |e| WalDecodeError {
+                                msg: format!("long header deserialization failed {}", e),
+                                lsn: self.lsn,
+                            },
+                        )?;

-                let hdr = XLogLongPageHeaderData::from_bytes(&mut self.inputbuf).map_err(|e| {
-                    WalDecodeError {
-                        msg: format!("long header deserialization failed {}", e),
-                        lsn: self.lsn,
+                        self.validate_page_header(&hdr.std)?;
+
+                        self.lsn += XLOG_SIZE_OF_XLOG_LONG_PHD as u64;
+                    } else if self.lsn.block_offset() == 0 {
+                        if self.inputbuf.remaining() < XLOG_SIZE_OF_XLOG_SHORT_PHD {
+                            return Ok(None);
+                        }
+
+                        let hdr =
+                            XLogPageHeaderData::from_bytes(&mut self.inputbuf).map_err(|e| {
+                                WalDecodeError {
+                                    msg: format!("header deserialization failed {}", e),
+                                    lsn: self.lsn,
+                                }
+                            })?;
+
+                        self.validate_page_header(&hdr)?;
+
+                        self.lsn += XLOG_SIZE_OF_XLOG_SHORT_PHD as u64;
                    }
-                })?;
-
-                if hdr.std.xlp_pageaddr != self.lsn.0 {
-                    return Err(WalDecodeError {
-                        msg: "invalid xlog segment header".into(),
-                        lsn: self.lsn,
-                    });
                }
-                // TODO: verify the remaining fields in the header
-
-                self.lsn += XLOG_SIZE_OF_XLOG_LONG_PHD as u64;
-                continue;
-            } else if self.lsn.block_offset() == 0 {
-                if self.inputbuf.remaining() < XLOG_SIZE_OF_XLOG_SHORT_PHD {
-                    return Ok(None);
-                }
-
-                let hdr = XLogPageHeaderData::from_bytes(&mut self.inputbuf).map_err(|e| {
-                    WalDecodeError {
-                        msg: format!("header deserialization failed {}", e),
-                        lsn: self.lsn,
+                State::SkippingEverything { .. } => {}
+            }
+            match &mut self.state {
+                State::WaitingForRecord => {
+                    // need to have at least the xl_tot_len field
+                    if self.inputbuf.remaining() < 4 {
+                        return Ok(None);
                    }
-                })?;

-                if hdr.xlp_pageaddr != self.lsn.0 {
-                    return Err(WalDecodeError {
-                        msg: "invalid xlog page header".into(),
-                        lsn: self.lsn,
-                    });
+                    // peek xl_tot_len at the beginning of the record.
+                    // FIXME: assumes little-endian
+                    let xl_tot_len = (&self.inputbuf[0..4]).get_u32_le();
+                    if (xl_tot_len as usize) < XLOG_SIZE_OF_XLOG_RECORD {
+                        return Err(WalDecodeError {
+                            msg: format!("invalid xl_tot_len {}", xl_tot_len),
+                            lsn: self.lsn,
+                        });
+                    }
+                    // Fast path for the common case that the whole record fits on the page.
+                    let pageleft = self.lsn.remaining_in_block() as u32;
+                    if self.inputbuf.remaining() >= xl_tot_len as usize && xl_tot_len <= pageleft {
+                        self.lsn += xl_tot_len as u64;
+                        let recordbuf = self.inputbuf.copy_to_bytes(xl_tot_len as usize);
+                        return Ok(Some(self.complete_record(recordbuf)?));
+                    } else {
+                        // Need to assemble the record from pieces. Remember the size of the
+                        // record, and loop back. On next iteration, we will reach the 'else'
+                        // branch below, and copy the part of the record that was on this page
+                        // to 'recordbuf'.  Subsequent iterations will skip page headers, and
+                        // append the continuations from the next pages to 'recordbuf'.
+                        self.state = State::ReassemblingRecord {
+                            recordbuf: BytesMut::with_capacity(xl_tot_len as usize),
+                            contlen: NonZeroU32::new(xl_tot_len).unwrap(),
+                        }
+                    }
                }
-                // TODO: verify the remaining fields in the header
+                State::ReassemblingRecord { recordbuf, contlen } => {
+                    // we're continuing a record, possibly from previous page.
+                    let pageleft = self.lsn.remaining_in_block() as u32;

-                self.lsn += XLOG_SIZE_OF_XLOG_SHORT_PHD as u64;
-                continue;
-            } else if self.contlen == 0 {
-                assert!(self.recordbuf.is_empty());
+                    // read the rest of the record, or as much as fits on this page.
+                    let n = min(contlen.get(), pageleft) as usize;

-                // need to have at least the xl_tot_len field
-                if self.inputbuf.remaining() < 4 {
-                    return Ok(None);
+                    if self.inputbuf.remaining() < n {
+                        return Ok(None);
+                    }
+
+                    recordbuf.put(self.inputbuf.split_to(n));
+                    self.lsn += n as u64;
+                    *contlen = match NonZeroU32::new(contlen.get() - n as u32) {
+                        Some(x) => x,
+                        None => {
+                            // The record is now complete.
+                            let recordbuf = std::mem::replace(recordbuf, BytesMut::new()).freeze();
+                            return Ok(Some(self.complete_record(recordbuf)?));
+                        }
+                    }
                }
-
-                // peek xl_tot_len at the beginning of the record.
-                // FIXME: assumes little-endian
-                self.startlsn = self.lsn;
-                let xl_tot_len = (&self.inputbuf[0..4]).get_u32_le();
-                if (xl_tot_len as usize) < XLOG_SIZE_OF_XLOG_RECORD {
-                    return Err(WalDecodeError {
-                        msg: format!("invalid xl_tot_len {}", xl_tot_len),
-                        lsn: self.lsn,
-                    });
+                State::SkippingEverything { skip_until_lsn } => {
+                    assert!(*skip_until_lsn >= self.lsn);
+                    let n = skip_until_lsn.0 - self.lsn.0;
+                    if self.inputbuf.remaining() < n as usize {
+                        return Ok(None);
+                    }
+                    self.inputbuf.advance(n as usize);
+                    self.lsn += n;
+                    self.state = State::WaitingForRecord;
                }
-
-                // Fast path for the common case that the whole record fits on the page.
-                let pageleft = self.lsn.remaining_in_block() as u32;
-                if self.inputbuf.remaining() >= xl_tot_len as usize && xl_tot_len <= pageleft {
-                    // Take the record from the 'inputbuf', and validate it.
-                    recordbuf = self.inputbuf.copy_to_bytes(xl_tot_len as usize);
-                    self.lsn += xl_tot_len as u64;
-                    break;
-                } else {
-                    // Need to assemble the record from pieces. Remember the size of the
-                    // record, and loop back. On next iteration, we will reach the 'else'
-                    // branch below, and copy the part of the record that was on this page
-                    // to 'recordbuf'.  Subsequent iterations will skip page headers, and
-                    // append the continuations from the next pages to 'recordbuf'.
-                    self.recordbuf.reserve(xl_tot_len as usize);
-                    self.contlen = xl_tot_len;
-                    continue;
-                }
-            } else {
-                // we're continuing a record, possibly from previous page.
-                let pageleft = self.lsn.remaining_in_block() as u32;
-
-                // read the rest of the record, or as much as fits on this page.
-                let n = min(self.contlen, pageleft) as usize;
-
-                if self.inputbuf.remaining() < n {
-                    return Ok(None);
-                }
-
-                self.recordbuf.put(self.inputbuf.split_to(n));
-                self.lsn += n as u64;
-                self.contlen -= n as u32;
-
-                if self.contlen == 0 {
-                    // The record is now complete.
-                    recordbuf = std::mem::replace(&mut self.recordbuf, BytesMut::new()).freeze();
-                    break;
-                }
-                continue;
            }
        }
+    }

+    fn complete_record(&mut self, recordbuf: Bytes) -> Result<(Lsn, Bytes), WalDecodeError> {
        // We now have a record in the 'recordbuf' local variable.
        let xlogrec =
            XLogRecord::from_slice(&recordbuf[0..XLOG_SIZE_OF_XLOG_RECORD]).map_err(|e| {
@@ -219,18 +265,20 @@ impl WalStreamDecoder {

        // XLOG_SWITCH records are special. If we see one, we need to skip
        // to the next WAL segment.
-        if xlogrec.is_xlog_switch_record() {
+        let next_lsn = if xlogrec.is_xlog_switch_record() {
            trace!("saw xlog switch record at {}", self.lsn);
-            self.padlen = self.lsn.calc_padding(pg_constants::WAL_SEGMENT_SIZE as u64) as u32;
+            self.lsn + self.lsn.calc_padding(pg_constants::WAL_SEGMENT_SIZE as u64)
        } else {
            // Pad to an 8-byte boundary
-            self.padlen = self.lsn.calc_padding(8u32) as u32;
-        }
+            self.lsn.align()
+        };
+        self.state = State::SkippingEverything {
+            skip_until_lsn: next_lsn,
+        };

        // We should return LSN of the next record, not the last byte of this record or
        // the byte immediately after. Note that this handles both XLOG_SWITCH and usual
        // records, the former "spans" until the next WAL segment (see test_xlog_switch).
-        let result = (self.lsn + self.padlen as u64, recordbuf);
-        Ok(Some(result))
+        Ok((next_lsn, recordbuf))
    }
 }
--- a/libs/postgres_ffi/src/xlog_utils.rs
+++ b/libs/postgres_ffi/src/xlog_utils.rs
@@ -16,7 +16,7 @@ use crate::XLogRecord;
 use crate::XLOG_PAGE_MAGIC;

 use crate::pg_constants::WAL_SEGMENT_SIZE;
-use anyhow::{bail, ensure};
+use anyhow::{anyhow, bail, ensure};
 use byteorder::{ByteOrder, LittleEndian};
 use bytes::BytesMut;
 use bytes::{Buf, Bytes};
@@ -159,7 +159,7 @@ fn find_end_of_wal_segment(
    let mut buf = [0u8; XLOG_BLCKSZ];
    let file_name = XLogFileName(tli, segno, wal_seg_size);
    let mut last_valid_rec_pos: usize = start_offset; // assume at given start_offset begins new record
-    let mut file = File::open(data_dir.join(file_name.clone() + ".partial")).unwrap();
+    let mut file = File::open(data_dir.join(file_name.clone() + ".partial"))?;
    file.seek(SeekFrom::Start(offs as u64))?;
    // xl_crc is the last field in XLogRecord, will not be read into rec_hdr
    const_assert!(XLOG_RECORD_CRC_OFFS + 4 == XLOG_SIZE_OF_XLOG_RECORD);
@@ -396,10 +396,13 @@ pub fn find_end_of_wal(
    let mut high_tli: TimeLineID = 0;
    let mut high_ispartial = false;

-    for entry in fs::read_dir(data_dir).unwrap().flatten() {
+    for entry in fs::read_dir(data_dir)?.flatten() {
        let ispartial: bool;
        let entry_name = entry.file_name();
-        let fname = entry_name.to_str().unwrap();
+        let fname = entry_name
+            .to_str()
+            .ok_or_else(|| anyhow!("Invalid file name"))?;
+
        /*
         * Check if the filename looks like an xlog file, or a .partial file.
         */
@@ -411,7 +414,7 @@ pub fn find_end_of_wal(
            continue;
        }
        let (segno, tli) = XLogFromFileName(fname, wal_seg_size);
-        if !ispartial && entry.metadata().unwrap().len() != wal_seg_size as u64 {
+        if !ispartial && entry.metadata()?.len() != wal_seg_size as u64 {
            continue;
        }
        if segno > high_segno
--- a/libs/postgres_ffi/wal_craft/Cargo.toml
+++ b/libs/postgres_ffi/wal_craft/Cargo.toml
@@ -10,7 +10,7 @@ anyhow = "1.0"
 clap = "3.0"
 env_logger = "0.9"
 log = "0.4"
-once_cell = "1.8.0"
+once_cell = "1.13.0"
 postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
 postgres_ffi = { path = "../" }
 tempfile = "3.2"
--- a/libs/remote_storage/Cargo.toml
+++ b/libs/remote_storage/Cargo.toml
@@ -7,7 +7,7 @@ edition = "2021"
 anyhow = { version = "1.0", features = ["backtrace"] }
 async-trait = "0.1"
 metrics = { version = "0.1", path = "../metrics" }
-once_cell = "1.8.0"
+once_cell = "1.13.0"
 rusoto_core = "0.48"
 rusoto_s3 = "0.48"
 serde = { version = "1.0", features = ["derive"] }
--- a/libs/remote_storage/src/lib.rs
+++ b/libs/remote_storage/src/lib.rs
@@ -66,6 +66,9 @@ pub trait RemoteStorage: Send + Sync {
    async fn list(&self) -> anyhow::Result<Vec<Self::RemoteObjectId>>;

    /// Lists all top level subdirectories for a given prefix
+    /// Note: here we assume that if the prefix is passed it was obtained via remote_object_id
+    /// which already takes into account any kind of global prefix (prefix_in_bucket for S3 or storage_root for LocalFS)
+    /// so this method doesnt need to.
    async fn list_prefixes(
        &self,
        prefix: Option<Self::RemoteObjectId>,
--- a/libs/remote_storage/src/local_fs.rs
+++ b/libs/remote_storage/src/local_fs.rs
@@ -116,7 +116,7 @@ impl RemoteStorage for LocalFs {
        prefix: Option<Self::RemoteObjectId>,
    ) -> anyhow::Result<Vec<Self::RemoteObjectId>> {
        let path = match prefix {
-            Some(prefix) => Cow::Owned(self.storage_root.join(prefix)),
+            Some(prefix) => Cow::Owned(prefix),
            None => Cow::Borrowed(&self.storage_root),
        };
        get_all_files(path.as_ref(), false).await
--- a/libs/remote_storage/src/s3_bucket.rs
+++ b/libs/remote_storage/src/s3_bucket.rs
@@ -171,17 +171,25 @@ impl S3Bucket {

        let access_key_id = std::env::var("AWS_ACCESS_KEY_ID").ok();
        let secret_access_key = std::env::var("AWS_SECRET_ACCESS_KEY").ok();
+        // session token is used when authorizing through sso
+        // which is typically the case when testing locally on developer machine
+        let session_token = std::env::var("AWS_SESSION_TOKEN").ok();

        let client = if access_key_id.is_none() && secret_access_key.is_none() {
            debug!("Using IAM-based AWS access");
            S3Client::new_with(request_dispatcher, InstanceMetadataProvider::new(), region)
        } else {
-            debug!("Using credentials-based AWS access");
+            debug!(
+                "Using credentials-based AWS access. Session token is set: {}",
+                session_token.is_some()
+            );
            S3Client::new_with(
                request_dispatcher,
-                StaticProvider::new_minimal(
+                StaticProvider::new(
                    access_key_id.unwrap_or_default(),
                    secret_access_key.unwrap_or_default(),
+                    session_token,
+                    None,
                ),
                region,
            )
@@ -304,32 +312,24 @@ impl RemoteStorage for S3Bucket {
        Ok(document_keys)
    }

+    /// See the doc for `RemoteStorage::list_prefixes`
    /// Note: it wont include empty "directories"
    async fn list_prefixes(
        &self,
        prefix: Option<Self::RemoteObjectId>,
    ) -> anyhow::Result<Vec<Self::RemoteObjectId>> {
-        let list_prefix = match prefix {
-            Some(prefix) => {
-                let mut prefix_in_bucket = self.prefix_in_bucket.clone().unwrap_or_default();
-                // if there is no trailing / in default prefix and
-                // supplied prefix does not start with "/" insert it
-                if !(prefix_in_bucket.ends_with(S3_PREFIX_SEPARATOR)
-                    || prefix.0.starts_with(S3_PREFIX_SEPARATOR))
-                {
-                    prefix_in_bucket.push(S3_PREFIX_SEPARATOR);
-                }
-
-                prefix_in_bucket.push_str(&prefix.0);
+        // get the passed prefix or if it is not set use prefix_in_bucket value
+        let list_prefix = prefix
+            .map(|p| p.0)
+            .or_else(|| self.prefix_in_bucket.clone())
+            .map(|mut p| {
                // required to end with a separator
                // otherwise request will return only the entry of a prefix
-                if !prefix_in_bucket.ends_with(S3_PREFIX_SEPARATOR) {
-                    prefix_in_bucket.push(S3_PREFIX_SEPARATOR);
+                if !p.ends_with(S3_PREFIX_SEPARATOR) {
+                    p.push(S3_PREFIX_SEPARATOR);
                }
-                Some(prefix_in_bucket)
-            }
-            None => self.prefix_in_bucket.clone(),
-        };
+                p
+            });

        let mut document_keys = Vec::new();

--- a/libs/utils/Cargo.toml
+++ b/libs/utils/Cargo.toml
@@ -8,7 +8,6 @@ anyhow = "1.0"
 bincode = "1.3"
 bytes = "1.0.1"
 hyper = { version = "0.14.7", features = ["full"] }
-lazy_static = "1.4.0"
 pin-project-lite = "0.2.7"
 postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
 postgres-protocol = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
@@ -28,6 +27,8 @@ rustls = "0.20.2"
 rustls-split = "0.3.0"
 git-version = "0.3.5"
 serde_with = "1.12.0"
+once_cell = "1.13.0"
+

 metrics = { path = "../metrics" }
 workspace_hack = { version = "0.1", path = "../../workspace_hack" }
--- a/libs/utils/src/http/endpoint.rs
+++ b/libs/utils/src/http/endpoint.rs
@@ -4,8 +4,8 @@ use crate::zid::ZTenantId;
 use anyhow::anyhow;
 use hyper::header::AUTHORIZATION;
 use hyper::{header::CONTENT_TYPE, Body, Request, Response, Server};
-use lazy_static::lazy_static;
 use metrics::{register_int_counter, Encoder, IntCounter, TextEncoder};
+use once_cell::sync::Lazy;
 use routerify::ext::RequestExt;
 use routerify::RequestInfo;
 use routerify::{Middleware, Router, RouterBuilder, RouterService};
@@ -16,13 +16,13 @@ use std::net::TcpListener;

 use super::error::ApiError;

-lazy_static! {
-    static ref SERVE_METRICS_COUNT: IntCounter = register_int_counter!(
+static SERVE_METRICS_COUNT: Lazy<IntCounter> = Lazy::new(|| {
+    register_int_counter!(
        "libmetrics_metric_handler_requests_total",
        "Number of metric requests made"
    )
-    .expect("failed to define a metric");
-}
+    .expect("failed to define a metric")
+});

 async fn logger(res: Response<Body>, info: RequestInfo) -> Result<Response<Body>, ApiError> {
    info!("{} {} {}", info.method(), info.uri().path(), res.status(),);
--- a/libs/utils/src/pq_proto.rs
+++ b/libs/utils/src/pq_proto.rs
@@ -47,10 +47,12 @@ pub enum FeStartupPacket {
    StartupMessage {
        major_version: u32,
        minor_version: u32,
-        params: HashMap<String, String>,
+        params: StartupMessageParams,
    },
 }

+pub type StartupMessageParams = HashMap<String, String>;
+
 #[derive(Debug, Hash, PartialEq, Eq, Clone, Copy)]
 pub struct CancelKeyData {
    pub backend_pid: i32,
--- a/libs/utils/tests/ssl_test.rs
+++ b/libs/utils/tests/ssl_test.rs
@@ -7,7 +7,7 @@ use std::{

 use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
 use bytes::{Buf, BufMut, Bytes, BytesMut};
-use lazy_static::lazy_static;
+use once_cell::sync::Lazy;

 use utils::postgres_backend::{AuthType, Handler, PostgresBackend};

@@ -19,16 +19,15 @@ fn make_tcp_pair() -> (TcpStream, TcpStream) {
    (server_stream, client_stream)
 }

-lazy_static! {
-    static ref KEY: rustls::PrivateKey = {
-        let mut cursor = Cursor::new(include_bytes!("key.pem"));
-        rustls::PrivateKey(rustls_pemfile::rsa_private_keys(&mut cursor).unwrap()[0].clone())
-    };
-    static ref CERT: rustls::Certificate = {
-        let mut cursor = Cursor::new(include_bytes!("cert.pem"));
-        rustls::Certificate(rustls_pemfile::certs(&mut cursor).unwrap()[0].clone())
-    };
-}
+static KEY: Lazy<rustls::PrivateKey> = Lazy::new(|| {
+    let mut cursor = Cursor::new(include_bytes!("key.pem"));
+    rustls::PrivateKey(rustls_pemfile::rsa_private_keys(&mut cursor).unwrap()[0].clone())
+});
+
+static CERT: Lazy<rustls::Certificate> = Lazy::new(|| {
+    let mut cursor = Cursor::new(include_bytes!("cert.pem"));
+    rustls::Certificate(rustls_pemfile::certs(&mut cursor).unwrap()[0].clone())
+});

 #[test]
 fn ssl() {
--- a/neon_local/Cargo.toml
+++ b/neon_local/Cargo.toml
@@ -15,6 +15,5 @@ git-version = "0.3.5"
 pageserver = { path = "../pageserver" }
 control_plane = { path = "../control_plane" }
 safekeeper = { path = "../safekeeper" }
-postgres_ffi = { path = "../libs/postgres_ffi" }
 utils = { path = "../libs/utils" }
 workspace_hack = { version = "0.1", path = "../workspace_hack" }
--- a/neon_local/src/main.rs
+++ b/neon_local/src/main.rs
@@ -9,6 +9,7 @@ use pageserver::config::defaults::{
    DEFAULT_HTTP_LISTEN_ADDR as DEFAULT_PAGESERVER_HTTP_ADDR,
    DEFAULT_PG_LISTEN_ADDR as DEFAULT_PAGESERVER_PG_ADDR,
 };
+use pageserver::http::models::TimelineInfo;
 use safekeeper::defaults::{
    DEFAULT_HTTP_LISTEN_PORT as DEFAULT_SAFEKEEPER_HTTP_PORT,
    DEFAULT_PG_LISTEN_PORT as DEFAULT_SAFEKEEPER_PG_PORT,
@@ -25,8 +26,6 @@ use utils::{
    zid::{NodeId, ZTenantId, ZTenantTimelineId, ZTimelineId},
 };

-use pageserver::timelines::TimelineInfo;
-
 // Default id of a safekeeper node, if not specified on the command line.
 const DEFAULT_SAFEKEEPER_ID: NodeId = NodeId(1);
 const DEFAULT_PAGESERVER_ID: NodeId = NodeId(1);
@@ -885,7 +884,7 @@ fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Resul
    match sub_match.subcommand() {
        Some(("start", start_match)) => {
            if let Err(e) = pageserver.start(&pageserver_config_overrides(start_match)) {
-                eprintln!("pageserver start failed: {}", e);
+                eprintln!("pageserver start failed: {e}");
                exit(1);
            }
        }
@@ -907,10 +906,19 @@ fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Resul
            }

            if let Err(e) = pageserver.start(&pageserver_config_overrides(restart_match)) {
-                eprintln!("pageserver start failed: {}", e);
+                eprintln!("pageserver start failed: {e}");
                exit(1);
            }
        }
+
+        Some(("status", _)) => match PageServerNode::from_env(env).check_status() {
+            Ok(_) => println!("Page server is up and running"),
+            Err(err) => {
+                eprintln!("Page server is not available: {}", err);
+                exit(1);
+            }
+        },
+
        Some((sub_name, _)) => bail!("Unexpected pageserver subcommand '{}'", sub_name),
        None => bail!("no pageserver subcommand provided"),
    }
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -21,7 +21,6 @@ futures = "0.3.13"
 hex = "0.4.3"
 hyper = "0.14"
 itertools = "0.10.3"
-lazy_static = "1.4.0"
 clap = "3.0"
 daemonize = "0.4.1"
 tokio = { version = "1.17", features = ["process", "sync", "macros", "fs", "rt", "io-util", "time"] }
@@ -29,7 +28,6 @@ postgres-types = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d
 postgres-protocol = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
 postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
 tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
-tokio-stream = "0.1.8"
 anyhow = { version = "1.0", features = ["backtrace"] }
 crc32c = "0.6.0"
 thiserror = "1.0"
@@ -49,7 +47,7 @@ tracing = "0.1.27"
 signal-hook = "0.3.10"
 url = "2"
 nix = "0.23"
-once_cell = "1.8.0"
+once_cell = "1.13.0"
 crossbeam-utils = "0.8.5"
 fail = "0.5.0"
 git-version = "0.3.5"
--- a/pageserver/src/basebackup.rs
+++ b/pageserver/src/basebackup.rs
@@ -23,8 +23,7 @@ use tar::{Builder, EntryType, Header};
 use tracing::*;

 use crate::reltag::{RelTag, SlruKind};
-use crate::repository::Timeline;
-use crate::DatadirTimelineImpl;
+use crate::DatadirTimeline;
 use postgres_ffi::xlog_utils::*;
 use postgres_ffi::*;
 use utils::lsn::Lsn;
@@ -32,12 +31,13 @@ use utils::lsn::Lsn;
 /// This is short-living object only for the time of tarball creation,
 /// created mostly to avoid passing a lot of parameters between various functions
 /// used for constructing tarball.
-pub struct Basebackup<'a, W>
+pub struct Basebackup<'a, W, T>
 where
    W: Write,
+    T: DatadirTimeline,
 {
    ar: Builder<AbortableWrite<W>>,
-    timeline: &'a Arc<DatadirTimelineImpl>,
+    timeline: &'a Arc<T>,
    pub lsn: Lsn,
    prev_record_lsn: Lsn,
    full_backup: bool,
@@ -52,17 +52,18 @@ where
 //  * When working without safekeepers. In this situation it is important to match the lsn
 //    we are taking basebackup on with the lsn that is used in pageserver's walreceiver
 //    to start the replication.
-impl<'a, W> Basebackup<'a, W>
+impl<'a, W, T> Basebackup<'a, W, T>
 where
    W: Write,
+    T: DatadirTimeline,
 {
    pub fn new(
        write: W,
-        timeline: &'a Arc<DatadirTimelineImpl>,
+        timeline: &'a Arc<T>,
        req_lsn: Option<Lsn>,
        prev_lsn: Option<Lsn>,
        full_backup: bool,
-    ) -> Result<Basebackup<'a, W>> {
+    ) -> Result<Basebackup<'a, W, T>> {
        // Compute postgres doesn't have any previous WAL files, but the first
        // record that it's going to write needs to include the LSN of the
        // previous record (xl_prev). We include prev_record_lsn in the
@@ -79,13 +80,13 @@ where
        let (backup_prev, backup_lsn) = if let Some(req_lsn) = req_lsn {
            // Backup was requested at a particular LSN. Wait for it to arrive.
            info!("waiting for {}", req_lsn);
-            timeline.tline.wait_lsn(req_lsn)?;
+            timeline.wait_lsn(req_lsn)?;

            // If the requested point is the end of the timeline, we can
            // provide prev_lsn. (get_last_record_rlsn() might return it as
            // zero, though, if no WAL has been generated on this timeline
            // yet.)
-            let end_of_timeline = timeline.tline.get_last_record_rlsn();
+            let end_of_timeline = timeline.get_last_record_rlsn();
            if req_lsn == end_of_timeline.last {
                (end_of_timeline.prev, req_lsn)
            } else {
@@ -93,7 +94,7 @@ where
            }
        } else {
            // Backup was requested at end of the timeline.
-            let end_of_timeline = timeline.tline.get_last_record_rlsn();
+            let end_of_timeline = timeline.get_last_record_rlsn();
            (end_of_timeline.prev, end_of_timeline.last)
        };

@@ -371,7 +372,7 @@ where
        // add zenith.signal file
        let mut zenith_signal = String::new();
        if self.prev_record_lsn == Lsn(0) {
-            if self.lsn == self.timeline.tline.get_ancestor_lsn() {
+            if self.lsn == self.timeline.get_ancestor_lsn() {
                write!(zenith_signal, "PREV LSN: none")?;
            } else {
                write!(zenith_signal, "PREV LSN: invalid")?;
@@ -402,9 +403,10 @@ where
    }
 }

-impl<'a, W> Drop for Basebackup<'a, W>
+impl<'a, W, T> Drop for Basebackup<'a, W, T>
 where
    W: Write,
+    T: DatadirTimeline,
 {
    /// If the basebackup was not finished, prevent the Archive::drop() from
    /// writing the end-of-archive marker.
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -59,6 +59,7 @@ pub mod defaults {

 # [tenant_config]
 #checkpoint_distance = {DEFAULT_CHECKPOINT_DISTANCE} # in bytes
+#checkpoint_timeout = {DEFAULT_CHECKPOINT_TIMEOUT}
 #compaction_target_size = {DEFAULT_COMPACTION_TARGET_SIZE} # in bytes
 #compaction_period = '{DEFAULT_COMPACTION_PERIOD}'
 #compaction_threshold = '{DEFAULT_COMPACTION_THRESHOLD}'
@@ -452,6 +453,13 @@ impl PageServerConf {
                Some(parse_toml_u64("checkpoint_distance", checkpoint_distance)?);
        }

+        if let Some(checkpoint_timeout) = item.get("checkpoint_timeout") {
+            t_conf.checkpoint_timeout = Some(parse_toml_duration(
+                "checkpoint_timeout",
+                checkpoint_timeout,
+            )?);
+        }
+
        if let Some(compaction_target_size) = item.get("compaction_target_size") {
            t_conf.compaction_target_size = Some(parse_toml_u64(
                "compaction_target_size",
--- a/pageserver/src/http/models.rs
+++ b/pageserver/src/http/models.rs
@@ -7,6 +7,10 @@ use utils::{
    zid::{NodeId, ZTenantId, ZTimelineId},
 };

+// These enums are used in the API response fields.
+use crate::repository::LocalTimelineState;
+use crate::tenant_mgr::TenantState;
+
 #[serde_as]
 #[derive(Serialize, Deserialize)]
 pub struct TimelineCreateRequest {
@@ -28,6 +32,7 @@ pub struct TenantCreateRequest {
    #[serde_as(as = "Option<DisplayFromStr>")]
    pub new_tenant_id: Option<ZTenantId>,
    pub checkpoint_distance: Option<u64>,
+    pub checkpoint_timeout: Option<String>,
    pub compaction_target_size: Option<u64>,
    pub compaction_period: Option<String>,
    pub compaction_threshold: Option<usize>,
@@ -66,6 +71,7 @@ pub struct TenantConfigRequest {
    #[serde(default)]
    #[serde_as(as = "Option<DisplayFromStr>")]
    pub checkpoint_distance: Option<u64>,
+    pub checkpoint_timeout: Option<String>,
    pub compaction_target_size: Option<u64>,
    pub compaction_period: Option<String>,
    pub compaction_threshold: Option<usize>,
@@ -83,6 +89,7 @@ impl TenantConfigRequest {
        TenantConfigRequest {
            tenant_id,
            checkpoint_distance: None,
+            checkpoint_timeout: None,
            compaction_target_size: None,
            compaction_period: None,
            compaction_threshold: None,
@@ -97,14 +104,59 @@ impl TenantConfigRequest {
    }
 }

-/// A WAL receiver's data stored inside the global `WAL_RECEIVERS`.
-/// We keep one WAL receiver active per timeline.
+#[serde_as]
+#[derive(Serialize, Deserialize, Clone)]
+pub struct TenantInfo {
+    #[serde_as(as = "DisplayFromStr")]
+    pub id: ZTenantId,
+    pub state: Option<TenantState>,
+    pub current_physical_size: Option<u64>, // physical size is only included in `tenant_status` endpoint
+    pub has_in_progress_downloads: Option<bool>,
+}
+
 #[serde_as]
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct WalReceiverEntry {
-    pub wal_producer_connstr: Option<String>,
+pub struct LocalTimelineInfo {
+    #[serde_as(as = "Option<DisplayFromStr>")]
+    pub ancestor_timeline_id: Option<ZTimelineId>,
+    #[serde_as(as = "Option<DisplayFromStr>")]
+    pub ancestor_lsn: Option<Lsn>,
+    #[serde_as(as = "DisplayFromStr")]
+    pub last_record_lsn: Lsn,
+    #[serde_as(as = "Option<DisplayFromStr>")]
+    pub prev_record_lsn: Option<Lsn>,
+    #[serde_as(as = "DisplayFromStr")]
+    pub latest_gc_cutoff_lsn: Lsn,
+    #[serde_as(as = "DisplayFromStr")]
+    pub disk_consistent_lsn: Lsn,
+    pub current_logical_size: Option<usize>, // is None when timeline is Unloaded
+    pub current_physical_size: Option<u64>,  // is None when timeline is Unloaded
+    pub current_logical_size_non_incremental: Option<usize>,
+    pub current_physical_size_non_incremental: Option<u64>,
+    pub timeline_state: LocalTimelineState,
+
+    pub wal_source_connstr: Option<String>,
    #[serde_as(as = "Option<DisplayFromStr>")]
    pub last_received_msg_lsn: Option<Lsn>,
    /// the timestamp (in microseconds) of the last received message
    pub last_received_msg_ts: Option<u128>,
 }
+
+#[serde_as]
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct RemoteTimelineInfo {
+    #[serde_as(as = "DisplayFromStr")]
+    pub remote_consistent_lsn: Lsn,
+    pub awaits_download: bool,
+}
+
+#[serde_as]
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct TimelineInfo {
+    #[serde_as(as = "DisplayFromStr")]
+    pub tenant_id: ZTenantId,
+    #[serde_as(as = "DisplayFromStr")]
+    pub timeline_id: ZTimelineId,
+    pub local: Option<LocalTimelineInfo>,
+    pub remote: Option<RemoteTimelineInfo>,
+}
--- a/pageserver/src/http/openapi_spec.yml
+++ b/pageserver/src/http/openapi_spec.yml
@@ -78,6 +78,11 @@ paths:
        schema:
          type: string
          description: Controls calculation of current_logical_size_non_incremental
+      - name: include-non-incremental-physical-size
+        in: query
+        schema:
+          type: string
+          description: Controls calculation of current_physical_size_non_incremental
    get:
      description: Get timelines for tenant
      responses:
@@ -136,6 +141,11 @@ paths:
          schema:
            type: string
          description: Controls calculation of current_logical_size_non_incremental
+        - name: include-non-incremental-physical-size
+          in: query
+          schema:
+            type: string
+            description: Controls calculation of current_physical_size_non_incremental
      responses:
        "200":
          description: TimelineInfo
@@ -197,54 +207,6 @@ paths:
              schema:
                $ref: "#/components/schemas/Error"

-  /v1/tenant/{tenant_id}/timeline/{timeline_id}/wal_receiver:
-    parameters:
-      - name: tenant_id
-        in: path
-        required: true
-        schema:
-          type: string
-          format: hex
-      - name: timeline_id
-        in: path
-        required: true
-        schema:
-          type: string
-          format: hex
-    get:
-      description: Get wal receiver's data attached to the timeline
-      responses:
-        "200":
-          description: WalReceiverEntry
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/WalReceiverEntry"
-        "401":
-          description: Unauthorized Error
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/UnauthorizedError"
-        "403":
-          description: Forbidden Error
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/ForbiddenError"
-        "404":
-          description: Error when no wal receiver is running or found
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/NotFoundError"
-        "500":
-          description: Generic operation error
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/Error"
-
  /v1/tenant/{tenant_id}/attach:
    parameters:
      - name: tenant_id
@@ -577,6 +539,8 @@ components:
          type: string
        state:
          type: string
+        current_physical_size:
+          type: integer
        has_in_progress_downloads:
          type: boolean
    TenantCreateInfo:
@@ -596,6 +560,8 @@ components:
          type: string
        checkpoint_distance:
          type: integer
+        checkpoint_timeout:
+          type: string
        compaction_period:
          type: string
        compaction_threshold:
@@ -614,6 +580,8 @@ components:
          type: string
        checkpoint_distance:
          type: integer
+        checkpoint_timeout:
+          type: string
        compaction_period:
          type: string
        compaction_threshold:
@@ -671,18 +639,13 @@ components:
          format: hex
        current_logical_size:
          type: integer
+        current_physical_size:
+          type: integer
        current_logical_size_non_incremental:
          type: integer
-
-    WalReceiverEntry:
-      type: object
-      required:
-        - thread_id
-        - wal_producer_connstr
-      properties:
-        thread_id:
+        current_physical_size_non_incremental:
          type: integer
-        wal_producer_connstr:
+        wal_source_connstr:
          type: string
        last_received_msg_lsn:
          type: string
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -6,16 +6,19 @@ use hyper::{Body, Request, Response, Uri};
 use remote_storage::GenericRemoteStorage;
 use tracing::*;

+use super::models::{LocalTimelineInfo, RemoteTimelineInfo, TimelineInfo};
 use super::models::{
-    StatusResponse, TenantConfigRequest, TenantCreateRequest, TenantCreateResponse,
+    StatusResponse, TenantConfigRequest, TenantCreateRequest, TenantCreateResponse, TenantInfo,
    TimelineCreateRequest,
 };
-use crate::repository::Repository;
+use crate::layered_repository::metadata::TimelineMetadata;
+use crate::pgdatadir_mapping::DatadirTimeline;
+use crate::repository::{LocalTimelineState, RepositoryTimeline};
+use crate::repository::{Repository, Timeline};
 use crate::storage_sync;
 use crate::storage_sync::index::{RemoteIndex, RemoteTimeline};
 use crate::tenant_config::TenantConfOpt;
-use crate::tenant_mgr::TenantInfo;
-use crate::timelines::{LocalTimelineInfo, RemoteTimelineInfo, TimelineInfo};
+use crate::TimelineImpl;
 use crate::{config::PageServerConf, tenant_mgr, timelines};
 use utils::{
    auth::JwtAuth,
@@ -26,6 +29,7 @@ use utils::{
        request::parse_request_param,
        RequestExt, RouterBuilder,
    },
+    lsn::Lsn,
    zid::{ZTenantId, ZTenantTimelineId, ZTimelineId},
 };

@@ -79,6 +83,123 @@ fn get_config(request: &Request<Body>) -> &'static PageServerConf {
    get_state(request).conf
 }

+// Helper functions to construct a LocalTimelineInfo struct for a timeline
+
+fn local_timeline_info_from_loaded_timeline(
+    timeline: &TimelineImpl,
+    include_non_incremental_logical_size: bool,
+    include_non_incremental_physical_size: bool,
+) -> anyhow::Result<LocalTimelineInfo> {
+    let last_record_lsn = timeline.get_last_record_lsn();
+    let (wal_source_connstr, last_received_msg_lsn, last_received_msg_ts) = {
+        let guard = timeline.last_received_wal.lock().unwrap();
+        if let Some(info) = guard.as_ref() {
+            (
+                Some(info.wal_source_connstr.clone()),
+                Some(info.last_received_msg_lsn),
+                Some(info.last_received_msg_ts),
+            )
+        } else {
+            (None, None, None)
+        }
+    };
+
+    let info = LocalTimelineInfo {
+        ancestor_timeline_id: timeline.get_ancestor_timeline_id(),
+        ancestor_lsn: {
+            match timeline.get_ancestor_lsn() {
+                Lsn(0) => None,
+                lsn @ Lsn(_) => Some(lsn),
+            }
+        },
+        disk_consistent_lsn: timeline.get_disk_consistent_lsn(),
+        last_record_lsn,
+        prev_record_lsn: Some(timeline.get_prev_record_lsn()),
+        latest_gc_cutoff_lsn: *timeline.get_latest_gc_cutoff_lsn(),
+        timeline_state: LocalTimelineState::Loaded,
+        current_logical_size: Some(timeline.get_current_logical_size()),
+        current_physical_size: Some(timeline.get_physical_size()),
+        current_logical_size_non_incremental: if include_non_incremental_logical_size {
+            Some(timeline.get_current_logical_size_non_incremental(last_record_lsn)?)
+        } else {
+            None
+        },
+        current_physical_size_non_incremental: if include_non_incremental_physical_size {
+            Some(timeline.get_physical_size_non_incremental()?)
+        } else {
+            None
+        },
+        wal_source_connstr,
+        last_received_msg_lsn,
+        last_received_msg_ts,
+    };
+    Ok(info)
+}
+
+fn local_timeline_info_from_unloaded_timeline(metadata: &TimelineMetadata) -> LocalTimelineInfo {
+    LocalTimelineInfo {
+        ancestor_timeline_id: metadata.ancestor_timeline(),
+        ancestor_lsn: {
+            match metadata.ancestor_lsn() {
+                Lsn(0) => None,
+                lsn @ Lsn(_) => Some(lsn),
+            }
+        },
+        disk_consistent_lsn: metadata.disk_consistent_lsn(),
+        last_record_lsn: metadata.disk_consistent_lsn(),
+        prev_record_lsn: metadata.prev_record_lsn(),
+        latest_gc_cutoff_lsn: metadata.latest_gc_cutoff_lsn(),
+        timeline_state: LocalTimelineState::Unloaded,
+        current_logical_size: None,
+        current_physical_size: None,
+        current_logical_size_non_incremental: None,
+        current_physical_size_non_incremental: None,
+        wal_source_connstr: None,
+        last_received_msg_lsn: None,
+        last_received_msg_ts: None,
+    }
+}
+
+fn local_timeline_info_from_repo_timeline(
+    repo_timeline: &RepositoryTimeline<TimelineImpl>,
+    include_non_incremental_logical_size: bool,
+    include_non_incremental_physical_size: bool,
+) -> anyhow::Result<LocalTimelineInfo> {
+    match repo_timeline {
+        RepositoryTimeline::Loaded(timeline) => local_timeline_info_from_loaded_timeline(
+            &*timeline,
+            include_non_incremental_logical_size,
+            include_non_incremental_physical_size,
+        ),
+        RepositoryTimeline::Unloaded { metadata } => {
+            Ok(local_timeline_info_from_unloaded_timeline(metadata))
+        }
+    }
+}
+
+fn list_local_timelines(
+    tenant_id: ZTenantId,
+    include_non_incremental_logical_size: bool,
+    include_non_incremental_physical_size: bool,
+) -> Result<Vec<(ZTimelineId, LocalTimelineInfo)>> {
+    let repo = tenant_mgr::get_repository_for_tenant(tenant_id)
+        .with_context(|| format!("Failed to get repo for tenant {}", tenant_id))?;
+    let repo_timelines = repo.list_timelines();
+
+    let mut local_timeline_info = Vec::with_capacity(repo_timelines.len());
+    for (timeline_id, repository_timeline) in repo_timelines {
+        local_timeline_info.push((
+            timeline_id,
+            local_timeline_info_from_repo_timeline(
+                &repository_timeline,
+                include_non_incremental_logical_size,
+                include_non_incremental_physical_size,
+            )?,
+        ))
+    }
+    Ok(local_timeline_info)
+}
+
 // healthcheck handler
 async fn status_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
    let config = get_config(&request);
@@ -93,16 +214,30 @@ async fn timeline_create_handler(mut request: Request<Body>) -> Result<Response<

    let new_timeline_info = tokio::task::spawn_blocking(move || {
        let _enter = info_span!("/timeline_create", tenant = %tenant_id, new_timeline = ?request_data.new_timeline_id, lsn=?request_data.ancestor_start_lsn).entered();
-        timelines::create_timeline(
+
+        match timelines::create_timeline(
            get_config(&request),
            tenant_id,
            request_data.new_timeline_id.map(ZTimelineId::from),
            request_data.ancestor_timeline_id.map(ZTimelineId::from),
            request_data.ancestor_start_lsn,
-        )
+        ) {
+            Ok(Some((new_timeline_id, new_timeline))) => {
+                // Created. Construct a TimelineInfo for it.
+                let local_info = local_timeline_info_from_loaded_timeline(new_timeline.as_ref(), false, false)?;
+                Ok(Some(TimelineInfo {
+                    tenant_id,
+                    timeline_id: new_timeline_id,
+                    local: Some(local_info),
+                    remote: None,
+                }))
+            }
+            Ok(None) => Ok(None), // timeline already exists
+            Err(err) => Err(err),
+        }
    })
    .await
-    .map_err(ApiError::from_err)??;
+        .map_err(ApiError::from_err)??;

    Ok(match new_timeline_info {
        Some(info) => json_response(StatusCode::CREATED, info)?,
@@ -113,10 +248,17 @@ async fn timeline_create_handler(mut request: Request<Body>) -> Result<Response<
 async fn timeline_list_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
    let tenant_id: ZTenantId = parse_request_param(&request, "tenant_id")?;
    check_permission(&request, Some(tenant_id))?;
-    let include_non_incremental_logical_size = get_include_non_incremental_logical_size(&request);
+    let include_non_incremental_logical_size =
+        query_param_present(&request, "include-non-incremental-logical-size");
+    let include_non_incremental_physical_size =
+        query_param_present(&request, "include-non-incremental-physical-size");
    let local_timeline_infos = tokio::task::spawn_blocking(move || {
        let _enter = info_span!("timeline_list", tenant = %tenant_id).entered();
-        crate::timelines::get_local_timelines(tenant_id, include_non_incremental_logical_size)
+        list_local_timelines(
+            tenant_id,
+            include_non_incremental_logical_size,
+            include_non_incremental_physical_size,
+        )
    })
    .await
    .map_err(ApiError::from_err)??;
@@ -145,17 +287,15 @@ async fn timeline_list_handler(request: Request<Body>) -> Result<Response<Body>,
    json_response(StatusCode::OK, response_data)
 }

-// Gate non incremental logical size calculation behind a flag
-// after pgbench -i -s100 calculation took 28ms so if multiplied by the number of timelines
-// and tenants it can take noticeable amount of time. Also the value currently used only in tests
-fn get_include_non_incremental_logical_size(request: &Request<Body>) -> bool {
+/// Checks if a query param is present in the request's URL
+fn query_param_present(request: &Request<Body>, param: &str) -> bool {
    request
        .uri()
        .query()
        .map(|v| {
            url::form_urlencoded::parse(v.as_bytes())
                .into_owned()
-                .any(|(param, _)| param == "include-non-incremental-logical-size")
+                .any(|(p, _)| p == param)
        })
        .unwrap_or(false)
 }
@@ -165,7 +305,10 @@ async fn timeline_detail_handler(request: Request<Body>) -> Result<Response<Body
    check_permission(&request, Some(tenant_id))?;

    let timeline_id: ZTimelineId = parse_request_param(&request, "timeline_id")?;
-    let include_non_incremental_logical_size = get_include_non_incremental_logical_size(&request);
+    let include_non_incremental_logical_size =
+        query_param_present(&request, "include-non-incremental-logical-size");
+    let include_non_incremental_physical_size =
+        query_param_present(&request, "include-non-incremental-physical-size");

    let (local_timeline_info, remote_timeline_info) = async {
        // any error here will render local timeline as None
@@ -176,11 +319,10 @@ async fn timeline_detail_handler(request: Request<Body>) -> Result<Response<Body
                repo.get_timeline(timeline_id)
                    .as_ref()
                    .map(|timeline| {
-                        LocalTimelineInfo::from_repo_timeline(
-                            tenant_id,
-                            timeline_id,
+                        local_timeline_info_from_repo_timeline(
                            timeline,
                            include_non_incremental_logical_size,
+                            include_non_incremental_physical_size,
                        )
                    })
                    .transpose()?
@@ -225,23 +367,6 @@ async fn timeline_detail_handler(request: Request<Body>) -> Result<Response<Body
    json_response(StatusCode::OK, timeline_info)
 }

-async fn wal_receiver_get_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
-    let tenant_id: ZTenantId = parse_request_param(&request, "tenant_id")?;
-    check_permission(&request, Some(tenant_id))?;
-
-    let timeline_id: ZTimelineId = parse_request_param(&request, "timeline_id")?;
-    let wal_receiver_entry = crate::walreceiver::get_wal_receiver_entry(tenant_id, timeline_id)
-        .instrument(info_span!("wal_receiver_get", tenant = %tenant_id, timeline = %timeline_id))
-        .await
-        .ok_or_else(|| {
-            ApiError::NotFound(format!(
-                "WAL receiver data not found for tenant {tenant_id} and timeline {timeline_id}"
-            ))
-        })?;
-
-    json_response(StatusCode::OK, &wal_receiver_entry)
-}
-
 // TODO makes sense to provide tenant config right away the same way as it handled in tenant_create
 async fn tenant_attach_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
    let tenant_id: ZTenantId = parse_request_param(&request, "tenant_id")?;
@@ -429,14 +554,36 @@ async fn tenant_status(request: Request<Body>) -> Result<Response<Body>, ApiErro
    let index_accessor = remote_index.read().await;
    let has_in_progress_downloads = index_accessor
        .tenant_entry(&tenant_id)
-        .ok_or_else(|| ApiError::NotFound("Tenant not found in remote index".to_string()))?
-        .has_in_progress_downloads();
+        .map(|t| t.has_in_progress_downloads())
+        .unwrap_or_else(|| {
+            info!("Tenant {tenant_id} not found in remote index");
+            false
+        });
+
+    let current_physical_size =
+        match tokio::task::spawn_blocking(move || list_local_timelines(tenant_id, false, false))
+            .await
+            .map_err(ApiError::from_err)?
+        {
+            Err(err) => {
+                // Getting local timelines can fail when no local repo is on disk (e.g, when tenant data is being downloaded).
+                // In that case, put a warning message into log and operate normally.
+                warn!("Failed to get local timelines for tenant {tenant_id}: {err}");
+                None
+            }
+            Ok(local_timeline_infos) => Some(
+                local_timeline_infos
+                    .into_iter()
+                    .fold(0, |acc, x| acc + x.1.current_physical_size.unwrap()),
+            ),
+        };

    json_response(
        StatusCode::OK,
        TenantInfo {
            id: tenant_id,
            state: tenant_state,
+            current_physical_size,
            has_in_progress_downloads: Some(has_in_progress_downloads),
        },
    )
@@ -476,6 +623,11 @@ async fn tenant_create_handler(mut request: Request<Body>) -> Result<Response<Bo
    }

    tenant_conf.checkpoint_distance = request_data.checkpoint_distance;
+    if let Some(checkpoint_timeout) = request_data.checkpoint_timeout {
+        tenant_conf.checkpoint_timeout =
+            Some(humantime::parse_duration(&checkpoint_timeout).map_err(ApiError::from_err)?);
+    }
+
    tenant_conf.compaction_target_size = request_data.compaction_target_size;
    tenant_conf.compaction_threshold = request_data.compaction_threshold;

@@ -536,6 +688,10 @@ async fn tenant_config_handler(mut request: Request<Body>) -> Result<Response<Bo
    }

    tenant_conf.checkpoint_distance = request_data.checkpoint_distance;
+    if let Some(checkpoint_timeout) = request_data.checkpoint_timeout {
+        tenant_conf.checkpoint_timeout =
+            Some(humantime::parse_duration(&checkpoint_timeout).map_err(ApiError::from_err)?);
+    }
    tenant_conf.compaction_target_size = request_data.compaction_target_size;
    tenant_conf.compaction_threshold = request_data.compaction_threshold;

@@ -606,9 +762,5 @@ pub fn make_router(
            "/v1/tenant/:tenant_id/timeline/:timeline_id/detach",
            timeline_delete_handler,
        )
-        .get(
-            "/v1/tenant/:tenant_id/timeline/:timeline_id/wal_receiver",
-            wal_receiver_get_handler,
-        )
        .any(handler_404))
 }
--- a/pageserver/src/import_datadir.rs
+++ b/pageserver/src/import_datadir.rs
@@ -13,9 +13,8 @@ use walkdir::WalkDir;

 use crate::pgdatadir_mapping::*;
 use crate::reltag::{RelTag, SlruKind};
-use crate::repository::Repository;
-use crate::repository::Timeline;
 use crate::walingest::WalIngest;
+use crate::walrecord::DecodedWALRecord;
 use postgres_ffi::relfile_utils::*;
 use postgres_ffi::waldecoder::*;
 use postgres_ffi::xlog_utils::*;
@@ -29,9 +28,9 @@ use utils::lsn::Lsn;
 /// This is currently only used to import a cluster freshly created by initdb.
 /// The code that deals with the checkpoint would not work right if the
 /// cluster was not shut down cleanly.
-pub fn import_timeline_from_postgres_datadir<R: Repository>(
+pub fn import_timeline_from_postgres_datadir<T: DatadirTimeline>(
    path: &Path,
-    tline: &mut DatadirTimeline<R>,
+    tline: &T,
    lsn: Lsn,
 ) -> Result<()> {
    let mut pg_control: Option<ControlFileData> = None;
@@ -89,8 +88,8 @@ pub fn import_timeline_from_postgres_datadir<R: Repository>(
 }

 // subroutine of import_timeline_from_postgres_datadir(), to load one relation file.
-fn import_rel<R: Repository, Reader: Read>(
-    modification: &mut DatadirModification<R>,
+fn import_rel<T: DatadirTimeline, Reader: Read>(
+    modification: &mut DatadirModification<T>,
    path: &Path,
    spcoid: Oid,
    dboid: Oid,
@@ -169,8 +168,8 @@ fn import_rel<R: Repository, Reader: Read>(

 /// Import an SLRU segment file
 ///
-fn import_slru<R: Repository, Reader: Read>(
-    modification: &mut DatadirModification<R>,
+fn import_slru<T: DatadirTimeline, Reader: Read>(
+    modification: &mut DatadirModification<T>,
    slru: SlruKind,
    path: &Path,
    mut reader: Reader,
@@ -225,9 +224,9 @@ fn import_slru<R: Repository, Reader: Read>(

 /// Scan PostgreSQL WAL files in given directory and load all records between
 /// 'startpoint' and 'endpoint' into the repository.
-fn import_wal<R: Repository>(
+fn import_wal<T: DatadirTimeline>(
    walpath: &Path,
-    tline: &mut DatadirTimeline<R>,
+    tline: &T,
    startpoint: Lsn,
    endpoint: Lsn,
 ) -> Result<()> {
@@ -268,9 +267,11 @@ fn import_wal<R: Repository>(
        waldecoder.feed_bytes(&buf);

        let mut nrecords = 0;
+        let mut modification = tline.begin_modification(endpoint);
+        let mut decoded = DecodedWALRecord::default();
        while last_lsn <= endpoint {
            if let Some((lsn, recdata)) = waldecoder.poll_decode()? {
-                walingest.ingest_record(tline, recdata, lsn)?;
+                walingest.ingest_record(recdata, lsn, &mut modification, &mut decoded)?;
                last_lsn = lsn;

                nrecords += 1;
@@ -294,8 +295,8 @@ fn import_wal<R: Repository>(
    Ok(())
 }

-pub fn import_basebackup_from_tar<R: Repository, Reader: Read>(
-    tline: &mut DatadirTimeline<R>,
+pub fn import_basebackup_from_tar<T: DatadirTimeline, Reader: Read>(
+    tline: &T,
    reader: Reader,
    base_lsn: Lsn,
 ) -> Result<()> {
@@ -336,8 +337,8 @@ pub fn import_basebackup_from_tar<R: Repository, Reader: Read>(
    Ok(())
 }

-pub fn import_wal_from_tar<R: Repository, Reader: Read>(
-    tline: &mut DatadirTimeline<R>,
+pub fn import_wal_from_tar<T: DatadirTimeline, Reader: Read>(
+    tline: &T,
    reader: Reader,
    start_lsn: Lsn,
    end_lsn: Lsn,
@@ -384,9 +385,11 @@ pub fn import_wal_from_tar<R: Repository, Reader: Read>(

        waldecoder.feed_bytes(&bytes[offset..]);

+        let mut modification = tline.begin_modification(end_lsn);
+        let mut decoded = DecodedWALRecord::default();
        while last_lsn <= end_lsn {
            if let Some((lsn, recdata)) = waldecoder.poll_decode()? {
-                walingest.ingest_record(tline, recdata, lsn)?;
+                walingest.ingest_record(recdata, lsn, &mut modification, &mut decoded)?;
                last_lsn = lsn;

                debug!("imported record at {} (end {})", lsn, end_lsn);
@@ -415,8 +418,8 @@ pub fn import_wal_from_tar<R: Repository, Reader: Read>(
    Ok(())
 }

-pub fn import_file<R: Repository, Reader: Read>(
-    modification: &mut DatadirModification<R>,
+pub fn import_file<T: DatadirTimeline, Reader: Read>(
+    modification: &mut DatadirModification<T>,
    file_path: &Path,
    reader: Reader,
    len: usize,
@@ -535,7 +538,7 @@ pub fn import_file<R: Repository, Reader: Read>(
        // zenith.signal is not necessarily the last file, that we handle
        // but it is ok to call `finish_write()`, because final `modification.commit()`
        // will update lsn once more to the final one.
-        let writer = modification.tline.tline.writer();
+        let writer = modification.tline.writer();
        writer.finish_write(prev_lsn);

        debug!("imported zenith signal {}", prev_lsn);
--- a/pageserver/src/layered_repository.rs
+++ b/pageserver/src/layered_repository.rs
--- a/pageserver/src/layered_repository/block_io.rs
+++ b/pageserver/src/layered_repository/block_io.rs
@@ -5,7 +5,7 @@
 use crate::page_cache;
 use crate::page_cache::{ReadBufResult, PAGE_SZ};
 use bytes::Bytes;
-use lazy_static::lazy_static;
+use once_cell::sync::Lazy;
 use std::ops::{Deref, DerefMut};
 use std::os::unix::fs::FileExt;
 use std::sync::atomic::AtomicU64;
@@ -117,9 +117,7 @@ where
    }
 }

-lazy_static! {
-    static ref NEXT_ID: AtomicU64 = AtomicU64::new(1);
-}
+static NEXT_ID: Lazy<AtomicU64> = Lazy::new(|| AtomicU64::new(1));

 /// An adapter for reading a (virtual) file using the page cache.
 ///
--- a/pageserver/src/layered_repository/delta_layer.rs
+++ b/pageserver/src/layered_repository/delta_layer.rs
@@ -316,6 +316,18 @@ impl Layer for DeltaLayer {
        }
    }

+    fn key_iter<'a>(&'a self) -> Box<dyn Iterator<Item = (Key, Lsn, u64)> + 'a> {
+        let inner = match self.load() {
+            Ok(inner) => inner,
+            Err(e) => panic!("Failed to load a delta layer: {e:?}"),
+        };
+
+        match DeltaKeyIter::new(inner) {
+            Ok(iter) => Box::new(iter),
+            Err(e) => panic!("Layer index is corrupted: {e:?}"),
+        }
+    }
+
    fn delete(&self) -> Result<()> {
        // delete underlying file
        fs::remove_file(self.path())?;
@@ -660,11 +672,21 @@ impl DeltaLayerWriter {
    /// The values must be appended in key, lsn order.
    ///
    pub fn put_value(&mut self, key: Key, lsn: Lsn, val: Value) -> Result<()> {
+        self.put_value_bytes(key, lsn, &Value::ser(&val)?, val.will_init())
+    }
+
+    pub fn put_value_bytes(
+        &mut self,
+        key: Key,
+        lsn: Lsn,
+        val: &[u8],
+        will_init: bool,
+    ) -> Result<()> {
        assert!(self.lsn_range.start <= lsn);

-        let off = self.blob_writer.write_blob(&Value::ser(&val)?)?;
+        let off = self.blob_writer.write_blob(val)?;

-        let blob_ref = BlobRef::new(off, val.will_init());
+        let blob_ref = BlobRef::new(off, will_init);

        let delta_key = DeltaKey::from_key_lsn(&key, lsn);
        self.tree.append(&delta_key.0, blob_ref.0)?;
@@ -822,3 +844,75 @@ impl<'a> DeltaValueIter<'a> {
        }
    }
 }
+///
+/// Iterator over all keys stored in a delta layer
+///
+/// FIXME: This creates a Vector to hold all keys.
+/// That takes up quite a lot of memory. Should do this in a more streaming
+/// fashion.
+///
+struct DeltaKeyIter {
+    all_keys: Vec<(DeltaKey, u64)>,
+    next_idx: usize,
+}
+
+impl Iterator for DeltaKeyIter {
+    type Item = (Key, Lsn, u64);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.next_idx < self.all_keys.len() {
+            let (delta_key, size) = &self.all_keys[self.next_idx];
+
+            let key = delta_key.key();
+            let lsn = delta_key.lsn();
+
+            self.next_idx += 1;
+            Some((key, lsn, *size))
+        } else {
+            None
+        }
+    }
+}
+
+impl<'a> DeltaKeyIter {
+    fn new(inner: RwLockReadGuard<'a, DeltaLayerInner>) -> Result<Self> {
+        let file = inner.file.as_ref().unwrap();
+        let tree_reader = DiskBtreeReader::<_, DELTA_KEY_SIZE>::new(
+            inner.index_start_blk,
+            inner.index_root_blk,
+            file,
+        );
+
+        let mut all_keys: Vec<(DeltaKey, u64)> = Vec::new();
+        tree_reader.visit(
+            &[0u8; DELTA_KEY_SIZE],
+            VisitDirection::Forwards,
+            |key, value| {
+                let delta_key = DeltaKey::from_slice(key);
+                let pos = BlobRef(value).pos();
+                if let Some(last) = all_keys.last_mut() {
+                    if last.0.key() == delta_key.key() {
+                        return true;
+                    } else {
+                        // subtract offset of new key BLOB and first blob of this key
+                        // to get total size if values associated with this key
+                        let first_pos = last.1;
+                        last.1 = pos - first_pos;
+                    }
+                }
+                all_keys.push((delta_key, pos));
+                true
+            },
+        )?;
+        if let Some(last) = all_keys.last_mut() {
+            // Last key occupies all space till end of layer
+            last.1 = std::fs::metadata(&file.file.path)?.len() - last.1;
+        }
+        let iter = DeltaKeyIter {
+            all_keys,
+            next_idx: 0,
+        };
+
+        Ok(iter)
+    }
+}
--- a/pageserver/src/layered_repository/ephemeral_file.rs
+++ b/pageserver/src/layered_repository/ephemeral_file.rs
@@ -8,7 +8,7 @@ use crate::page_cache;
 use crate::page_cache::PAGE_SZ;
 use crate::page_cache::{ReadBufResult, WriteBufResult};
 use crate::virtual_file::VirtualFile;
-use lazy_static::lazy_static;
+use once_cell::sync::Lazy;
 use std::cmp::min;
 use std::collections::HashMap;
 use std::fs::OpenOptions;
@@ -21,15 +21,15 @@ use utils::zid::{ZTenantId, ZTimelineId};

 use std::os::unix::fs::FileExt;

-lazy_static! {
-    ///
-    /// This is the global cache of file descriptors (File objects).
-    ///
-    static ref EPHEMERAL_FILES: RwLock<EphemeralFiles> = RwLock::new(EphemeralFiles {
+///
+/// This is the global cache of file descriptors (File objects).
+///
+static EPHEMERAL_FILES: Lazy<RwLock<EphemeralFiles>> = Lazy::new(|| {
+    RwLock::new(EphemeralFiles {
        next_file_id: 1,
        files: HashMap::new(),
-    });
-}
+    })
+});

 pub struct EphemeralFiles {
    next_file_id: u64,
@@ -43,7 +43,7 @@ pub struct EphemeralFile {
    _timelineid: ZTimelineId,
    file: Arc<VirtualFile>,

-    size: u64,
+    pub size: u64,
 }

 impl EphemeralFile {
--- a/pageserver/src/layered_repository/inmemory_layer.rs
+++ b/pageserver/src/layered_repository/inmemory_layer.rs
@@ -15,6 +15,7 @@ use crate::layered_repository::storage_layer::{
 use crate::repository::{Key, Value};
 use crate::walrecord;
 use anyhow::{bail, ensure, Result};
+use std::cell::RefCell;
 use std::collections::HashMap;
 use tracing::*;
 use utils::{
@@ -30,6 +31,12 @@ use std::ops::Range;
 use std::path::PathBuf;
 use std::sync::RwLock;

+thread_local! {
+    /// A buffer for serializing object during [`InMemoryLayer::put_value`].
+    /// This buffer is reused for each serialization to avoid additional malloc calls.
+    static SER_BUFFER: RefCell<Vec<u8>> = RefCell::new(Vec::new());
+}
+
 pub struct InMemoryLayer {
    conf: &'static PageServerConf,
    tenantid: ZTenantId,
@@ -233,6 +240,14 @@ impl Layer for InMemoryLayer {
 }

 impl InMemoryLayer {
+    ///
+    /// Get layer size on the disk
+    ///
+    pub fn size(&self) -> Result<u64> {
+        let inner = self.inner.read().unwrap();
+        Ok(inner.file.size)
+    }
+
    ///
    /// Create a new, empty, in-memory layer
    ///
@@ -270,10 +285,17 @@ impl InMemoryLayer {
    pub fn put_value(&self, key: Key, lsn: Lsn, val: &Value) -> Result<()> {
        trace!("put_value key {} at {}/{}", key, self.timelineid, lsn);
        let mut inner = self.inner.write().unwrap();
-
        inner.assert_writeable();

-        let off = inner.file.write_blob(&Value::ser(val)?)?;
+        let off = {
+            SER_BUFFER.with(|x| -> Result<_> {
+                let mut buf = x.borrow_mut();
+                buf.clear();
+                val.ser_into(&mut (*buf))?;
+                let off = inner.file.write_blob(&buf)?;
+                Ok(off)
+            })?
+        };

        let vec_map = inner.index.entry(key).or_default();
        let old = vec_map.append_or_update_last(lsn, off).unwrap().0;
@@ -342,8 +364,8 @@ impl InMemoryLayer {
            // Write all page versions
            for (lsn, pos) in vec_map.as_slice() {
                cursor.read_blob_into_buf(*pos, &mut buf)?;
-                let val = Value::des(&buf)?;
-                delta_layer_writer.put_value(key, *lsn, val)?;
+                let will_init = Value::des(&buf)?.will_init();
+                delta_layer_writer.put_value_bytes(key, *lsn, &buf, will_init)?;
            }
        }

--- a/pageserver/src/layered_repository/layer_map.rs
+++ b/pageserver/src/layered_repository/layer_map.rs
@@ -10,24 +10,23 @@
 //! corresponding files are written to disk.
 //!

+use crate::layered_repository::inmemory_layer::InMemoryLayer;
 use crate::layered_repository::storage_layer::Layer;
 use crate::layered_repository::storage_layer::{range_eq, range_overlaps};
-use crate::layered_repository::InMemoryLayer;
 use crate::repository::Key;
 use anyhow::Result;
-use lazy_static::lazy_static;
 use metrics::{register_int_gauge, IntGauge};
+use once_cell::sync::Lazy;
 use std::collections::VecDeque;
 use std::ops::Range;
 use std::sync::Arc;
 use tracing::*;
 use utils::lsn::Lsn;

-lazy_static! {
-    static ref NUM_ONDISK_LAYERS: IntGauge =
-        register_int_gauge!("pageserver_ondisk_layers", "Number of layers on-disk")
-            .expect("failed to define a metric");
-}
+static NUM_ONDISK_LAYERS: Lazy<IntGauge> = Lazy::new(|| {
+    register_int_gauge!("pageserver_ondisk_layers", "Number of layers on-disk")
+        .expect("failed to define a metric")
+});

 ///
 /// LayerMap tracks what layers exist on a timeline.
--- a/pageserver/src/layered_repository/storage_layer.rs
+++ b/pageserver/src/layered_repository/storage_layer.rs
@@ -139,6 +139,12 @@ pub trait Layer: Send + Sync {
    /// Iterate through all keys and values stored in the layer
    fn iter(&self) -> Box<dyn Iterator<Item = Result<(Key, Lsn, Value)>> + '_>;

+    /// Iterate through all keys stored in the layer. Returns key, lsn and value size
+    /// It is used only for compaction and so is currently implemented only for DeltaLayer
+    fn key_iter(&self) -> Box<dyn Iterator<Item = (Key, Lsn, u64)> + '_> {
+        panic!("Not implemented")
+    }
+
    /// Permanently remove this layer from disk.
    fn delete(&self) -> Result<()>;

--- a/pageserver/src/layered_repository/timeline.rs
+++ b/pageserver/src/layered_repository/timeline.rs
--- a/pageserver/src/lib.rs
+++ b/pageserver/src/lib.rs
@@ -22,7 +22,7 @@ pub mod walreceiver;
 pub mod walrecord;
 pub mod walredo;

-use lazy_static::lazy_static;
+use once_cell::sync::Lazy;
 use tracing::info;

 use crate::thread_mgr::ThreadKind;
@@ -42,14 +42,14 @@ pub const STORAGE_FORMAT_VERSION: u16 = 3;
 pub const IMAGE_FILE_MAGIC: u16 = 0x5A60;
 pub const DELTA_FILE_MAGIC: u16 = 0x5A61;

-lazy_static! {
-    static ref LIVE_CONNECTIONS_COUNT: IntGaugeVec = register_int_gauge_vec!(
+static LIVE_CONNECTIONS_COUNT: Lazy<IntGaugeVec> = Lazy::new(|| {
+    register_int_gauge_vec!(
        "pageserver_live_connections",
        "Number of live network connections",
        &["pageserver_connection_kind"]
    )
-    .expect("failed to define a metric");
-}
+    .expect("failed to define a metric")
+});

 pub const LOG_FILE_NAME: &str = "pageserver.log";

@@ -63,8 +63,7 @@ pub enum CheckpointConfig {
 }

 pub type RepositoryImpl = LayeredRepository;
-
-pub type DatadirTimelineImpl = DatadirTimeline<RepositoryImpl>;
+pub type TimelineImpl = <LayeredRepository as repository::Repository>::Timeline;

 pub fn shutdown_pageserver(exit_code: i32) {
    // Shut down the libpq endpoint thread. This prevents new connections from
@@ -94,3 +93,56 @@ pub fn shutdown_pageserver(exit_code: i32) {
    info!("Shut down successfully completed");
    std::process::exit(exit_code);
 }
+
+const DEFAULT_BASE_BACKOFF_SECONDS: f64 = 0.1;
+const DEFAULT_MAX_BACKOFF_SECONDS: f64 = 3.0;
+
+async fn exponential_backoff(n: u32, base_increment: f64, max_seconds: f64) {
+    let backoff_duration_seconds =
+        exponential_backoff_duration_seconds(n, base_increment, max_seconds);
+    if backoff_duration_seconds > 0.0 {
+        info!(
+            "Backoff: waiting {backoff_duration_seconds} seconds before processing with the task",
+        );
+        tokio::time::sleep(std::time::Duration::from_secs_f64(backoff_duration_seconds)).await;
+    }
+}
+
+fn exponential_backoff_duration_seconds(n: u32, base_increment: f64, max_seconds: f64) -> f64 {
+    if n == 0 {
+        0.0
+    } else {
+        (1.0 + base_increment).powf(f64::from(n)).min(max_seconds)
+    }
+}
+
+#[cfg(test)]
+mod backoff_defaults_tests {
+    use super::*;
+
+    #[test]
+    fn backoff_defaults_produce_growing_backoff_sequence() {
+        let mut current_backoff_value = None;
+
+        for i in 0..10_000 {
+            let new_backoff_value = exponential_backoff_duration_seconds(
+                i,
+                DEFAULT_BASE_BACKOFF_SECONDS,
+                DEFAULT_MAX_BACKOFF_SECONDS,
+            );
+
+            if let Some(old_backoff_value) = current_backoff_value.replace(new_backoff_value) {
+                assert!(
+                    old_backoff_value <= new_backoff_value,
+                    "{i}th backoff value {new_backoff_value} is smaller than the previous one {old_backoff_value}"
+                )
+            }
+        }
+
+        assert_eq!(
+            current_backoff_value.expect("Should have produced backoff values to compare"),
+            DEFAULT_MAX_BACKOFF_SECONDS,
+            "Given big enough of retries, backoff should reach its allowed max value"
+        );
+    }
+}
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -11,7 +11,7 @@

 use anyhow::{bail, ensure, Context, Result};
 use bytes::{Buf, BufMut, Bytes, BytesMut};
-use lazy_static::lazy_static;
+use once_cell::sync::Lazy;
 use regex::Regex;
 use std::io::{self, Read};
 use std::net::TcpListener;
@@ -30,7 +30,6 @@ use utils::{
 use crate::basebackup;
 use crate::config::{PageServerConf, ProfilingConfig};
 use crate::import_datadir::{import_basebackup_from_tar, import_wal_from_tar};
-use crate::layered_repository::LayeredRepository;
 use crate::pgdatadir_mapping::{DatadirTimeline, LsnForTimestamp};
 use crate::profiling::profpoint_start;
 use crate::reltag::RelTag;
@@ -435,15 +434,15 @@ const TIME_BUCKETS: &[f64] = &[
    0.1,  // 1/10 s
 ];

-lazy_static! {
-    static ref SMGR_QUERY_TIME: HistogramVec = register_histogram_vec!(
+static SMGR_QUERY_TIME: Lazy<HistogramVec> = Lazy::new(|| {
+    register_histogram_vec!(
        "pageserver_smgr_query_seconds",
        "Time spent on smgr query handling",
        &["smgr_query_type", "tenant_id", "timeline_id"],
        TIME_BUCKETS.into()
    )
-    .expect("failed to define a metric");
-}
+    .expect("failed to define a metric")
+});

 impl PageServerHandler {
    pub fn new(conf: &'static PageServerConf, auth: Option<Arc<JwtAuth>>) -> Self {
@@ -555,9 +554,6 @@ impl PageServerHandler {
        info!("creating new timeline");
        let repo = tenant_mgr::get_repository_for_tenant(tenant_id)?;
        let timeline = repo.create_empty_timeline(timeline_id, base_lsn)?;
-        let repartition_distance = repo.get_checkpoint_distance();
-        let mut datadir_timeline =
-            DatadirTimeline::<LayeredRepository>::new(timeline, repartition_distance);

        // TODO mark timeline as not ready until it reaches end_lsn.
        // We might have some wal to import as well, and we should prevent compute
@@ -573,7 +569,7 @@ impl PageServerHandler {
        info!("importing basebackup");
        pgb.write_message(&BeMessage::CopyInResponse)?;
        let reader = CopyInReader::new(pgb);
-        import_basebackup_from_tar(&mut datadir_timeline, reader, base_lsn)?;
+        import_basebackup_from_tar(&*timeline, reader, base_lsn)?;

        // TODO check checksum
        // Meanwhile you can verify client-side by taking fullbackup
@@ -583,7 +579,7 @@ impl PageServerHandler {

        // Flush data to disk, then upload to s3
        info!("flushing layers");
-        datadir_timeline.tline.checkpoint(CheckpointConfig::Flush)?;
+        timeline.checkpoint(CheckpointConfig::Flush)?;

        info!("done");
        Ok(())
@@ -605,10 +601,6 @@ impl PageServerHandler {
        let timeline = repo.get_timeline_load(timeline_id)?;
        ensure!(timeline.get_last_record_lsn() == start_lsn);

-        let repartition_distance = repo.get_checkpoint_distance();
-        let mut datadir_timeline =
-            DatadirTimeline::<LayeredRepository>::new(timeline, repartition_distance);
-
        // TODO leave clean state on error. For now you can use detach to clean
        // up broken state from a failed import.

@@ -616,16 +608,16 @@ impl PageServerHandler {
        info!("importing wal");
        pgb.write_message(&BeMessage::CopyInResponse)?;
        let reader = CopyInReader::new(pgb);
-        import_wal_from_tar(&mut datadir_timeline, reader, start_lsn, end_lsn)?;
+        import_wal_from_tar(&*timeline, reader, start_lsn, end_lsn)?;

        // TODO Does it make sense to overshoot?
-        ensure!(datadir_timeline.tline.get_last_record_lsn() >= end_lsn);
+        ensure!(timeline.get_last_record_lsn() >= end_lsn);

        // Flush data to disk, then upload to s3. No need for a forced checkpoint.
        // We only want to persist the data, and it doesn't matter if it's in the
        // shape of deltas or images.
        info!("flushing layers");
-        datadir_timeline.tline.checkpoint(CheckpointConfig::Flush)?;
+        timeline.checkpoint(CheckpointConfig::Flush)?;

        info!("done");
        Ok(())
@@ -643,8 +635,8 @@ impl PageServerHandler {
    /// In either case, if the page server hasn't received the WAL up to the
    /// requested LSN yet, we will wait for it to arrive. The return value is
    /// the LSN that should be used to look up the page versions.
-    fn wait_or_get_last_lsn<R: Repository>(
-        timeline: &DatadirTimeline<R>,
+    fn wait_or_get_last_lsn<T: DatadirTimeline>(
+        timeline: &T,
        mut lsn: Lsn,
        latest: bool,
        latest_gc_cutoff_lsn: &RwLockReadGuard<Lsn>,
@@ -671,7 +663,7 @@ impl PageServerHandler {
            if lsn <= last_record_lsn {
                lsn = last_record_lsn;
            } else {
-                timeline.tline.wait_lsn(lsn)?;
+                timeline.wait_lsn(lsn)?;
                // Since we waited for 'lsn' to arrive, that is now the last
                // record LSN. (Or close enough for our purposes; the
                // last-record LSN can advance immediately after we return
@@ -681,7 +673,7 @@ impl PageServerHandler {
            if lsn == Lsn(0) {
                bail!("invalid LSN(0) in request");
            }
-            timeline.tline.wait_lsn(lsn)?;
+            timeline.wait_lsn(lsn)?;
        }
        ensure!(
            lsn >= **latest_gc_cutoff_lsn,
@@ -691,14 +683,14 @@ impl PageServerHandler {
        Ok(lsn)
    }

-    fn handle_get_rel_exists_request<R: Repository>(
+    fn handle_get_rel_exists_request<T: DatadirTimeline>(
        &self,
-        timeline: &DatadirTimeline<R>,
+        timeline: &T,
        req: &PagestreamExistsRequest,
    ) -> Result<PagestreamBeMessage> {
        let _enter = info_span!("get_rel_exists", rel = %req.rel, req_lsn = %req.lsn).entered();

-        let latest_gc_cutoff_lsn = timeline.tline.get_latest_gc_cutoff_lsn();
+        let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
        let lsn = Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest, &latest_gc_cutoff_lsn)?;

        let exists = timeline.get_rel_exists(req.rel, lsn)?;
@@ -708,13 +700,13 @@ impl PageServerHandler {
        }))
    }

-    fn handle_get_nblocks_request<R: Repository>(
+    fn handle_get_nblocks_request<T: DatadirTimeline>(
        &self,
-        timeline: &DatadirTimeline<R>,
+        timeline: &T,
        req: &PagestreamNblocksRequest,
    ) -> Result<PagestreamBeMessage> {
        let _enter = info_span!("get_nblocks", rel = %req.rel, req_lsn = %req.lsn).entered();
-        let latest_gc_cutoff_lsn = timeline.tline.get_latest_gc_cutoff_lsn();
+        let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
        let lsn = Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest, &latest_gc_cutoff_lsn)?;

        let n_blocks = timeline.get_rel_size(req.rel, lsn)?;
@@ -724,13 +716,13 @@ impl PageServerHandler {
        }))
    }

-    fn handle_db_size_request<R: Repository>(
+    fn handle_db_size_request<T: DatadirTimeline>(
        &self,
-        timeline: &DatadirTimeline<R>,
+        timeline: &T,
        req: &PagestreamDbSizeRequest,
    ) -> Result<PagestreamBeMessage> {
        let _enter = info_span!("get_db_size", dbnode = %req.dbnode, req_lsn = %req.lsn).entered();
-        let latest_gc_cutoff_lsn = timeline.tline.get_latest_gc_cutoff_lsn();
+        let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
        let lsn = Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest, &latest_gc_cutoff_lsn)?;

        let total_blocks =
@@ -743,14 +735,14 @@ impl PageServerHandler {
        }))
    }

-    fn handle_get_page_at_lsn_request<R: Repository>(
+    fn handle_get_page_at_lsn_request<T: DatadirTimeline>(
        &self,
-        timeline: &DatadirTimeline<R>,
+        timeline: &T,
        req: &PagestreamGetPageRequest,
    ) -> Result<PagestreamBeMessage> {
        let _enter = info_span!("get_page", rel = %req.rel, blkno = &req.blkno, req_lsn = %req.lsn)
            .entered();
-        let latest_gc_cutoff_lsn = timeline.tline.get_latest_gc_cutoff_lsn();
+        let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
        let lsn = Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest, &latest_gc_cutoff_lsn)?;
        /*
        // Add a 1s delay to some requests. The delayed causes the requests to
@@ -783,7 +775,7 @@ impl PageServerHandler {
        // check that the timeline exists
        let timeline = tenant_mgr::get_local_timeline_with_load(tenantid, timelineid)
            .context("Cannot load local timeline")?;
-        let latest_gc_cutoff_lsn = timeline.tline.get_latest_gc_cutoff_lsn();
+        let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
        if let Some(lsn) = lsn {
            timeline
                .check_lsn_is_in_scope(lsn, &latest_gc_cutoff_lsn)
@@ -921,7 +913,7 @@ impl postgres_backend::Handler for PageServerHandler {
            let timeline = tenant_mgr::get_local_timeline_with_load(tenantid, timelineid)
                .context("Cannot load local timeline")?;

-            let end_of_timeline = timeline.tline.get_last_record_rlsn();
+            let end_of_timeline = timeline.get_last_record_rlsn();

            pgb.write_message_noflush(&BeMessage::RowDescription(&[
                RowDescriptor::text_col(b"prev_lsn"),
@@ -1052,6 +1044,7 @@ impl postgres_backend::Handler for PageServerHandler {
            let repo = tenant_mgr::get_repository_for_tenant(tenantid)?;
            pgb.write_message_noflush(&BeMessage::RowDescription(&[
                RowDescriptor::int8_col(b"checkpoint_distance"),
+                RowDescriptor::int8_col(b"checkpoint_timeout"),
                RowDescriptor::int8_col(b"compaction_target_size"),
                RowDescriptor::int8_col(b"compaction_period"),
                RowDescriptor::int8_col(b"compaction_threshold"),
@@ -1062,6 +1055,12 @@ impl postgres_backend::Handler for PageServerHandler {
            ]))?
            .write_message_noflush(&BeMessage::DataRow(&[
                Some(repo.get_checkpoint_distance().to_string().as_bytes()),
+                Some(
+                    repo.get_checkpoint_timeout()
+                        .as_secs()
+                        .to_string()
+                        .as_bytes(),
+                ),
                Some(repo.get_compaction_target_size().to_string().as_bytes()),
                Some(
                    repo.get_compaction_period()
@@ -1139,7 +1138,7 @@ impl postgres_backend::Handler for PageServerHandler {
            let timelineid = ZTimelineId::from_str(caps.get(2).unwrap().as_str())?;
            let timeline = tenant_mgr::get_local_timeline_with_load(tenantid, timelineid)
                .context("Couldn't load timeline")?;
-            timeline.tline.compact()?;
+            timeline.compact()?;

            pgb.write_message_noflush(&SINGLE_COL_ROWDESC)?
                .write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
@@ -1159,13 +1158,8 @@ impl postgres_backend::Handler for PageServerHandler {
            let timeline = tenant_mgr::get_local_timeline_with_load(tenantid, timelineid)
                .context("Cannot load local timeline")?;

-            timeline.tline.checkpoint(CheckpointConfig::Forced)?;
-
-            // Also compact it.
-            //
-            // FIXME: This probably shouldn't be part of a "checkpoint" command, but a
-            // separate operation. Update the tests if you change this.
-            timeline.tline.compact()?;
+            // Checkpoint the timeline and also compact it (due to `CheckpointConfig::Forced`).
+            timeline.checkpoint(CheckpointConfig::Forced)?;

            pgb.write_message_noflush(&SINGLE_COL_ROWDESC)?
                .write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
--- a/pageserver/src/pgdatadir_mapping.rs
+++ b/pageserver/src/pgdatadir_mapping.rs
@@ -6,10 +6,10 @@
 //! walingest.rs handles a few things like implicit relation creation and extension.
 //! Clarify that)
 //!
-use crate::keyspace::{KeyPartitioning, KeySpace, KeySpaceAccum};
+use crate::keyspace::{KeySpace, KeySpaceAccum};
 use crate::reltag::{RelTag, SlruKind};
+use crate::repository::Timeline;
 use crate::repository::*;
-use crate::repository::{Repository, Timeline};
 use crate::walrecord::ZenithWalRecord;
 use anyhow::{bail, ensure, Result};
 use bytes::{Buf, Bytes};
@@ -18,34 +18,12 @@ use postgres_ffi::{pg_constants, Oid, TransactionId};
 use serde::{Deserialize, Serialize};
 use std::collections::{HashMap, HashSet};
 use std::ops::Range;
-use std::sync::atomic::{AtomicIsize, Ordering};
-use std::sync::{Arc, Mutex, RwLockReadGuard};
-use tracing::{debug, error, trace, warn};
+use tracing::{debug, trace, warn};
 use utils::{bin_ser::BeSer, lsn::Lsn};

 /// Block number within a relation or SLRU. This matches PostgreSQL's BlockNumber type.
 pub type BlockNumber = u32;

-pub struct DatadirTimeline<R>
-where
-    R: Repository,
-{
-    /// The underlying key-value store. Callers should not read or modify the
-    /// data in the underlying store directly. However, it is exposed to have
-    /// access to information like last-LSN, ancestor, and operations like
-    /// compaction.
-    pub tline: Arc<R::Timeline>,
-
-    /// When did we last calculate the partitioning?
-    partitioning: Mutex<(KeyPartitioning, Lsn)>,
-
-    /// Configuration: how often should the partitioning be recalculated.
-    repartition_threshold: u64,
-
-    /// Current logical size of the "datadir", at the last LSN.
-    current_logical_size: AtomicIsize,
-}
-
 #[derive(Debug)]
 pub enum LsnForTimestamp {
    Present(Lsn),
@@ -54,33 +32,29 @@ pub enum LsnForTimestamp {
    NoData(Lsn),
 }

-impl<R: Repository> DatadirTimeline<R> {
-    pub fn new(tline: Arc<R::Timeline>, repartition_threshold: u64) -> Self {
-        DatadirTimeline {
-            tline,
-            partitioning: Mutex::new((KeyPartitioning::new(), Lsn(0))),
-            current_logical_size: AtomicIsize::new(0),
-            repartition_threshold,
-        }
-    }
-
-    /// (Re-)calculate the logical size of the database at the latest LSN.
-    ///
-    /// This can be a slow operation.
-    pub fn init_logical_size(&self) -> Result<()> {
-        let last_lsn = self.tline.get_last_record_lsn();
-        self.current_logical_size.store(
-            self.get_current_logical_size_non_incremental(last_lsn)? as isize,
-            Ordering::SeqCst,
-        );
-        Ok(())
-    }
-
+///
+/// This trait provides all the functionality to store PostgreSQL relations, SLRUs,
+/// and other special kinds of files, in a versioned key-value store. The
+/// Timeline trait provides the key-value store.
+///
+/// This is a trait, so that we can easily include all these functions in a Timeline
+/// implementation. You're not expected to have different implementations of this trait,
+/// rather, this provides an interface and implementation, over Timeline.
+///
+/// If you wanted to store other kinds of data in the Neon repository, e.g.
+/// flat files or MySQL, you would create a new trait like this, with all the
+/// functions that make sense for the kind of data you're storing. For flat files,
+/// for example, you might have a function like "fn read(path, offset, size)".
+/// We might also have that situation in the future, to support multiple PostgreSQL
+/// versions, if there are big changes in how the data is organized in the data
+/// directory, or if new special files are introduced.
+///
+pub trait DatadirTimeline: Timeline {
    /// Start ingesting a WAL record, or other atomic modification of
    /// the timeline.
    ///
    /// This provides a transaction-like interface to perform a bunch
-    /// of modifications atomically, all stamped with one LSN.
+    /// of modifications atomically.
    ///
    /// To ingest a WAL record, call begin_modification(lsn) to get a
    /// DatadirModification object. Use the functions in the object to
@@ -88,18 +62,27 @@ impl<R: Repository> DatadirTimeline<R> {
    /// that the WAL record affects. When you're done, call commit() to
    /// commit the changes.
    ///
+    /// Lsn stored in modification is advanced by `ingest_record` and
+    /// is used by `commit()` to update `last_record_lsn`.
+    ///
+    /// Calling commit() will flush all the changes and reset the state,
+    /// so the `DatadirModification` struct can be reused to perform the next modification.
+    ///
    /// Note that any pending modifications you make through the
    /// modification object won't be visible to calls to the 'get' and list
    /// functions of the timeline until you finish! And if you update the
    /// same page twice, the last update wins.
    ///
-    pub fn begin_modification(&self, lsn: Lsn) -> DatadirModification<R> {
+    fn begin_modification(&self, lsn: Lsn) -> DatadirModification<Self>
+    where
+        Self: Sized,
+    {
        DatadirModification {
            tline: self,
-            lsn,
            pending_updates: HashMap::new(),
            pending_deletions: Vec::new(),
            pending_nblocks: 0,
+            lsn,
        }
    }

@@ -108,7 +91,7 @@ impl<R: Repository> DatadirTimeline<R> {
    //------------------------------------------------------------------------------

    /// Look up given page version.
-    pub fn get_rel_page_at_lsn(&self, tag: RelTag, blknum: BlockNumber, lsn: Lsn) -> Result<Bytes> {
+    fn get_rel_page_at_lsn(&self, tag: RelTag, blknum: BlockNumber, lsn: Lsn) -> Result<Bytes> {
        ensure!(tag.relnode != 0, "invalid relnode");

        let nblocks = self.get_rel_size(tag, lsn)?;
@@ -121,11 +104,11 @@ impl<R: Repository> DatadirTimeline<R> {
        }

        let key = rel_block_to_key(tag, blknum);
-        self.tline.get(key, lsn)
+        self.get(key, lsn)
    }

    // Get size of a database in blocks
-    pub fn get_db_size(&self, spcnode: Oid, dbnode: Oid, lsn: Lsn) -> Result<usize> {
+    fn get_db_size(&self, spcnode: Oid, dbnode: Oid, lsn: Lsn) -> Result<usize> {
        let mut total_blocks = 0;

        let rels = self.list_rels(spcnode, dbnode, lsn)?;
@@ -138,9 +121,13 @@ impl<R: Repository> DatadirTimeline<R> {
    }

    /// Get size of a relation file
-    pub fn get_rel_size(&self, tag: RelTag, lsn: Lsn) -> Result<BlockNumber> {
+    fn get_rel_size(&self, tag: RelTag, lsn: Lsn) -> Result<BlockNumber> {
        ensure!(tag.relnode != 0, "invalid relnode");

+        if let Some(nblocks) = self.get_cached_rel_size(&tag, lsn) {
+            return Ok(nblocks);
+        }
+
        if (tag.forknum == pg_constants::FSM_FORKNUM
            || tag.forknum == pg_constants::VISIBILITYMAP_FORKNUM)
            && !self.get_rel_exists(tag, lsn)?
@@ -153,17 +140,25 @@ impl<R: Repository> DatadirTimeline<R> {
        }

        let key = rel_size_to_key(tag);
-        let mut buf = self.tline.get(key, lsn)?;
-        Ok(buf.get_u32_le())
+        let mut buf = self.get(key, lsn)?;
+        let nblocks = buf.get_u32_le();
+
+        // Update relation size cache
+        self.update_cached_rel_size(tag, lsn, nblocks);
+        Ok(nblocks)
    }

    /// Does relation exist?
-    pub fn get_rel_exists(&self, tag: RelTag, lsn: Lsn) -> Result<bool> {
+    fn get_rel_exists(&self, tag: RelTag, lsn: Lsn) -> Result<bool> {
        ensure!(tag.relnode != 0, "invalid relnode");

+        // first try to lookup relation in cache
+        if let Some(_nblocks) = self.get_cached_rel_size(&tag, lsn) {
+            return Ok(true);
+        }
        // fetch directory listing
        let key = rel_dir_to_key(tag.spcnode, tag.dbnode);
-        let buf = self.tline.get(key, lsn)?;
+        let buf = self.get(key, lsn)?;
        let dir = RelDirectory::des(&buf)?;

        let exists = dir.rels.get(&(tag.relnode, tag.forknum)).is_some();
@@ -172,10 +167,10 @@ impl<R: Repository> DatadirTimeline<R> {
    }

    /// Get a list of all existing relations in given tablespace and database.
-    pub fn list_rels(&self, spcnode: Oid, dbnode: Oid, lsn: Lsn) -> Result<HashSet<RelTag>> {
+    fn list_rels(&self, spcnode: Oid, dbnode: Oid, lsn: Lsn) -> Result<HashSet<RelTag>> {
        // fetch directory listing
        let key = rel_dir_to_key(spcnode, dbnode);
-        let buf = self.tline.get(key, lsn)?;
+        let buf = self.get(key, lsn)?;
        let dir = RelDirectory::des(&buf)?;

        let rels: HashSet<RelTag> =
@@ -190,7 +185,7 @@ impl<R: Repository> DatadirTimeline<R> {
    }

    /// Look up given SLRU page version.
-    pub fn get_slru_page_at_lsn(
+    fn get_slru_page_at_lsn(
        &self,
        kind: SlruKind,
        segno: u32,
@@ -198,26 +193,21 @@ impl<R: Repository> DatadirTimeline<R> {
        lsn: Lsn,
    ) -> Result<Bytes> {
        let key = slru_block_to_key(kind, segno, blknum);
-        self.tline.get(key, lsn)
+        self.get(key, lsn)
    }

    /// Get size of an SLRU segment
-    pub fn get_slru_segment_size(
-        &self,
-        kind: SlruKind,
-        segno: u32,
-        lsn: Lsn,
-    ) -> Result<BlockNumber> {
+    fn get_slru_segment_size(&self, kind: SlruKind, segno: u32, lsn: Lsn) -> Result<BlockNumber> {
        let key = slru_segment_size_to_key(kind, segno);
-        let mut buf = self.tline.get(key, lsn)?;
+        let mut buf = self.get(key, lsn)?;
        Ok(buf.get_u32_le())
    }

    /// Get size of an SLRU segment
-    pub fn get_slru_segment_exists(&self, kind: SlruKind, segno: u32, lsn: Lsn) -> Result<bool> {
+    fn get_slru_segment_exists(&self, kind: SlruKind, segno: u32, lsn: Lsn) -> Result<bool> {
        // fetch directory listing
        let key = slru_dir_to_key(kind);
-        let buf = self.tline.get(key, lsn)?;
+        let buf = self.get(key, lsn)?;
        let dir = SlruSegmentDirectory::des(&buf)?;

        let exists = dir.segments.get(&segno).is_some();
@@ -231,10 +221,10 @@ impl<R: Repository> DatadirTimeline<R> {
    /// so it's not well defined which LSN you get if there were multiple commits
    /// "in flight" at that point in time.
    ///
-    pub fn find_lsn_for_timestamp(&self, search_timestamp: TimestampTz) -> Result<LsnForTimestamp> {
-        let gc_cutoff_lsn_guard = self.tline.get_latest_gc_cutoff_lsn();
+    fn find_lsn_for_timestamp(&self, search_timestamp: TimestampTz) -> Result<LsnForTimestamp> {
+        let gc_cutoff_lsn_guard = self.get_latest_gc_cutoff_lsn();
        let min_lsn = *gc_cutoff_lsn_guard;
-        let max_lsn = self.tline.get_last_record_lsn();
+        let max_lsn = self.get_last_record_lsn();

        // LSNs are always 8-byte aligned. low/mid/high represent the
        // LSN divided by 8.
@@ -325,88 +315,51 @@ impl<R: Repository> DatadirTimeline<R> {
    }

    /// Get a list of SLRU segments
-    pub fn list_slru_segments(&self, kind: SlruKind, lsn: Lsn) -> Result<HashSet<u32>> {
+    fn list_slru_segments(&self, kind: SlruKind, lsn: Lsn) -> Result<HashSet<u32>> {
        // fetch directory entry
        let key = slru_dir_to_key(kind);

-        let buf = self.tline.get(key, lsn)?;
+        let buf = self.get(key, lsn)?;
        let dir = SlruSegmentDirectory::des(&buf)?;

        Ok(dir.segments)
    }

-    pub fn get_relmap_file(&self, spcnode: Oid, dbnode: Oid, lsn: Lsn) -> Result<Bytes> {
+    fn get_relmap_file(&self, spcnode: Oid, dbnode: Oid, lsn: Lsn) -> Result<Bytes> {
        let key = relmap_file_key(spcnode, dbnode);

-        let buf = self.tline.get(key, lsn)?;
+        let buf = self.get(key, lsn)?;
        Ok(buf)
    }

-    pub fn list_dbdirs(&self, lsn: Lsn) -> Result<HashMap<(Oid, Oid), bool>> {
+    fn list_dbdirs(&self, lsn: Lsn) -> Result<HashMap<(Oid, Oid), bool>> {
        // fetch directory entry
-        let buf = self.tline.get(DBDIR_KEY, lsn)?;
+        let buf = self.get(DBDIR_KEY, lsn)?;
        let dir = DbDirectory::des(&buf)?;

        Ok(dir.dbdirs)
    }

-    pub fn get_twophase_file(&self, xid: TransactionId, lsn: Lsn) -> Result<Bytes> {
+    fn get_twophase_file(&self, xid: TransactionId, lsn: Lsn) -> Result<Bytes> {
        let key = twophase_file_key(xid);
-        let buf = self.tline.get(key, lsn)?;
+        let buf = self.get(key, lsn)?;
        Ok(buf)
    }

-    pub fn list_twophase_files(&self, lsn: Lsn) -> Result<HashSet<TransactionId>> {
+    fn list_twophase_files(&self, lsn: Lsn) -> Result<HashSet<TransactionId>> {
        // fetch directory entry
-        let buf = self.tline.get(TWOPHASEDIR_KEY, lsn)?;
+        let buf = self.get(TWOPHASEDIR_KEY, lsn)?;
        let dir = TwoPhaseDirectory::des(&buf)?;

        Ok(dir.xids)
    }

-    pub fn get_control_file(&self, lsn: Lsn) -> Result<Bytes> {
-        self.tline.get(CONTROLFILE_KEY, lsn)
+    fn get_control_file(&self, lsn: Lsn) -> Result<Bytes> {
+        self.get(CONTROLFILE_KEY, lsn)
    }

-    pub fn get_checkpoint(&self, lsn: Lsn) -> Result<Bytes> {
-        self.tline.get(CHECKPOINT_KEY, lsn)
-    }
-
-    /// Get the LSN of the last ingested WAL record.
-    ///
-    /// This is just a convenience wrapper that calls through to the underlying
-    /// repository.
-    pub fn get_last_record_lsn(&self) -> Lsn {
-        self.tline.get_last_record_lsn()
-    }
-
-    /// Check that it is valid to request operations with that lsn.
-    ///
-    /// This is just a convenience wrapper that calls through to the underlying
-    /// repository.
-    pub fn check_lsn_is_in_scope(
-        &self,
-        lsn: Lsn,
-        latest_gc_cutoff_lsn: &RwLockReadGuard<Lsn>,
-    ) -> Result<()> {
-        self.tline.check_lsn_is_in_scope(lsn, latest_gc_cutoff_lsn)
-    }
-
-    /// Retrieve current logical size of the timeline
-    ///
-    /// NOTE: counted incrementally, includes ancestors,
-    pub fn get_current_logical_size(&self) -> usize {
-        let current_logical_size = self.current_logical_size.load(Ordering::Acquire);
-        match usize::try_from(current_logical_size) {
-            Ok(sz) => sz,
-            Err(_) => {
-                error!(
-                    "current_logical_size is out of range: {}",
-                    current_logical_size
-                );
-                0
-            }
-        }
+    fn get_checkpoint(&self, lsn: Lsn) -> Result<Bytes> {
+        self.get(CHECKPOINT_KEY, lsn)
    }

    /// Does the same as get_current_logical_size but counted on demand.
@@ -414,16 +367,16 @@ impl<R: Repository> DatadirTimeline<R> {
    ///
    /// Only relation blocks are counted currently. That excludes metadata,
    /// SLRUs, twophase files etc.
-    pub fn get_current_logical_size_non_incremental(&self, lsn: Lsn) -> Result<usize> {
+    fn get_current_logical_size_non_incremental(&self, lsn: Lsn) -> Result<usize> {
        // Fetch list of database dirs and iterate them
-        let buf = self.tline.get(DBDIR_KEY, lsn)?;
+        let buf = self.get(DBDIR_KEY, lsn)?;
        let dbdir = DbDirectory::des(&buf)?;

        let mut total_size: usize = 0;
        for (spcnode, dbnode) in dbdir.dbdirs.keys() {
            for rel in self.list_rels(*spcnode, *dbnode, lsn)? {
                let relsize_key = rel_size_to_key(rel);
-                let mut buf = self.tline.get(relsize_key, lsn)?;
+                let mut buf = self.get(relsize_key, lsn)?;
                let relsize = buf.get_u32_le();

                total_size += relsize as usize;
@@ -444,7 +397,7 @@ impl<R: Repository> DatadirTimeline<R> {
        result.add_key(DBDIR_KEY);

        // Fetch list of database dirs and iterate them
-        let buf = self.tline.get(DBDIR_KEY, lsn)?;
+        let buf = self.get(DBDIR_KEY, lsn)?;
        let dbdir = DbDirectory::des(&buf)?;

        let mut dbs: Vec<(Oid, Oid)> = dbdir.dbdirs.keys().cloned().collect();
@@ -461,7 +414,7 @@ impl<R: Repository> DatadirTimeline<R> {
            rels.sort_unstable();
            for rel in rels {
                let relsize_key = rel_size_to_key(rel);
-                let mut buf = self.tline.get(relsize_key, lsn)?;
+                let mut buf = self.get(relsize_key, lsn)?;
                let relsize = buf.get_u32_le();

                result.add_range(rel_block_to_key(rel, 0)..rel_block_to_key(rel, relsize));
@@ -477,13 +430,13 @@ impl<R: Repository> DatadirTimeline<R> {
        ] {
            let slrudir_key = slru_dir_to_key(kind);
            result.add_key(slrudir_key);
-            let buf = self.tline.get(slrudir_key, lsn)?;
+            let buf = self.get(slrudir_key, lsn)?;
            let dir = SlruSegmentDirectory::des(&buf)?;
            let mut segments: Vec<u32> = dir.segments.iter().cloned().collect();
            segments.sort_unstable();
            for segno in segments {
                let segsize_key = slru_segment_size_to_key(kind, segno);
-                let mut buf = self.tline.get(segsize_key, lsn)?;
+                let mut buf = self.get(segsize_key, lsn)?;
                let segsize = buf.get_u32_le();

                result.add_range(
@@ -495,7 +448,7 @@ impl<R: Repository> DatadirTimeline<R> {

        // Then pg_twophase
        result.add_key(TWOPHASEDIR_KEY);
-        let buf = self.tline.get(TWOPHASEDIR_KEY, lsn)?;
+        let buf = self.get(TWOPHASEDIR_KEY, lsn)?;
        let twophase_dir = TwoPhaseDirectory::des(&buf)?;
        let mut xids: Vec<TransactionId> = twophase_dir.xids.iter().cloned().collect();
        xids.sort_unstable();
@@ -509,31 +462,31 @@ impl<R: Repository> DatadirTimeline<R> {
        Ok(result.to_keyspace())
    }

-    pub fn repartition(&self, lsn: Lsn, partition_size: u64) -> Result<(KeyPartitioning, Lsn)> {
-        let mut partitioning_guard = self.partitioning.lock().unwrap();
-        if partitioning_guard.1 == Lsn(0)
-            || lsn.0 - partitioning_guard.1 .0 > self.repartition_threshold
-        {
-            let keyspace = self.collect_keyspace(lsn)?;
-            let partitioning = keyspace.partition(partition_size);
-            *partitioning_guard = (partitioning, lsn);
-            return Ok((partitioning_guard.0.clone(), lsn));
-        }
-        Ok((partitioning_guard.0.clone(), partitioning_guard.1))
-    }
+    /// Get cached size of relation if it not updated after specified LSN
+    fn get_cached_rel_size(&self, tag: &RelTag, lsn: Lsn) -> Option<BlockNumber>;
+
+    /// Update cached relation size if there is no more recent update
+    fn update_cached_rel_size(&self, tag: RelTag, lsn: Lsn, nblocks: BlockNumber);
+
+    /// Store cached relation size
+    fn set_cached_rel_size(&self, tag: RelTag, lsn: Lsn, nblocks: BlockNumber);
+
+    /// Remove cached relation size
+    fn remove_cached_rel_size(&self, tag: &RelTag);
 }

 /// DatadirModification represents an operation to ingest an atomic set of
 /// updates to the repository. It is created by the 'begin_record'
 /// function. It is called for each WAL record, so that all the modifications
 /// by a one WAL record appear atomic.
-pub struct DatadirModification<'a, R: Repository> {
+pub struct DatadirModification<'a, T: DatadirTimeline> {
    /// The timeline this modification applies to. You can access this to
    /// read the state, but note that any pending updates are *not* reflected
    /// in the state in 'tline' yet.
-    pub tline: &'a DatadirTimeline<R>,
+    pub tline: &'a T,

-    lsn: Lsn,
+    /// Lsn assigned by begin_modification
+    pub lsn: Lsn,

    // The modifications are not applied directly to the underlying key-value store.
    // The put-functions add the modifications here, and they are flushed to the
@@ -543,7 +496,7 @@ pub struct DatadirModification<'a, R: Repository> {
    pending_nblocks: isize,
 }

-impl<'a, R: Repository> DatadirModification<'a, R> {
+impl<'a, T: DatadirTimeline> DatadirModification<'a, T> {
    /// Initialize a completely new repository.
    ///
    /// This inserts the directory metadata entries that are assumed to
@@ -744,26 +697,36 @@ impl<'a, R: Repository> DatadirModification<'a, R> {

        self.pending_nblocks += nblocks as isize;

+        // Update relation size cache
+        self.tline.set_cached_rel_size(rel, self.lsn, nblocks);
+
        // Even if nblocks > 0, we don't insert any actual blocks here. That's up to the
        // caller.
-
        Ok(())
    }

    /// Truncate relation
    pub fn put_rel_truncation(&mut self, rel: RelTag, nblocks: BlockNumber) -> Result<()> {
        ensure!(rel.relnode != 0, "invalid relnode");
-        let size_key = rel_size_to_key(rel);
+        let last_lsn = self.tline.get_last_record_lsn();
+        if self.tline.get_rel_exists(rel, last_lsn)? {
+            let size_key = rel_size_to_key(rel);
+            // Fetch the old size first
+            let old_size = self.get(size_key)?.get_u32_le();

-        // Fetch the old size first
-        let old_size = self.get(size_key)?.get_u32_le();
+            // Update the entry with the new size.
+            let buf = nblocks.to_le_bytes();
+            self.put(size_key, Value::Image(Bytes::from(buf.to_vec())));

-        // Update the entry with the new size.
-        let buf = nblocks.to_le_bytes();
-        self.put(size_key, Value::Image(Bytes::from(buf.to_vec())));
+            // Update relation size cache
+            self.tline.set_cached_rel_size(rel, self.lsn, nblocks);

-        // Update logical database size.
-        self.pending_nblocks -= old_size as isize - nblocks as isize;
+            // Update relation size cache
+            self.tline.set_cached_rel_size(rel, self.lsn, nblocks);
+
+            // Update logical database size.
+            self.pending_nblocks -= old_size as isize - nblocks as isize;
+        }
        Ok(())
    }

@@ -781,6 +744,9 @@ impl<'a, R: Repository> DatadirModification<'a, R> {
            let buf = nblocks.to_le_bytes();
            self.put(size_key, Value::Image(Bytes::from(buf.to_vec())));

+            // Update relation size cache
+            self.tline.set_cached_rel_size(rel, self.lsn, nblocks);
+
            self.pending_nblocks += nblocks as isize - old_size as isize;
        }
        Ok(())
@@ -806,6 +772,9 @@ impl<'a, R: Repository> DatadirModification<'a, R> {
        let old_size = self.get(size_key)?.get_u32_le();
        self.pending_nblocks -= old_size as isize;

+        // Remove enty from relation size cache
+        self.tline.remove_cached_rel_size(&rel);
+
        // Delete size entry, as well as all blocks
        self.delete(rel_key_range(rel));

@@ -928,7 +897,7 @@ impl<'a, R: Repository> DatadirModification<'a, R> {
            return Ok(());
        }

-        let writer = self.tline.tline.writer();
+        let writer = self.tline.writer();

        // Flush relation and  SLRU data blocks, keep metadata.
        let mut result: Result<()> = Ok(());
@@ -943,10 +912,7 @@ impl<'a, R: Repository> DatadirModification<'a, R> {
        result?;

        if pending_nblocks != 0 {
-            self.tline.current_logical_size.fetch_add(
-                pending_nblocks * pg_constants::BLCKSZ as isize,
-                Ordering::SeqCst,
-            );
+            writer.update_current_logical_size(pending_nblocks * pg_constants::BLCKSZ as isize);
            self.pending_nblocks = 0;
        }

@@ -956,26 +922,25 @@ impl<'a, R: Repository> DatadirModification<'a, R> {
    ///
    /// Finish this atomic update, writing all the updated keys to the
    /// underlying timeline.
+    /// All the modifications in this atomic update are stamped by the specified LSN.
    ///
-    pub fn commit(self) -> Result<()> {
-        let writer = self.tline.tline.writer();
-
+    pub fn commit(&mut self) -> Result<()> {
+        let writer = self.tline.writer();
+        let lsn = self.lsn;
        let pending_nblocks = self.pending_nblocks;
+        self.pending_nblocks = 0;

-        for (key, value) in self.pending_updates {
-            writer.put(key, self.lsn, &value)?;
+        for (key, value) in self.pending_updates.drain() {
+            writer.put(key, lsn, &value)?;
        }
-        for key_range in self.pending_deletions {
-            writer.delete(key_range.clone(), self.lsn)?;
+        for key_range in self.pending_deletions.drain(..) {
+            writer.delete(key_range, lsn)?;
        }

-        writer.finish_write(self.lsn);
+        writer.finish_write(lsn);

        if pending_nblocks != 0 {
-            self.tline.current_logical_size.fetch_add(
-                pending_nblocks * pg_constants::BLCKSZ as isize,
-                Ordering::SeqCst,
-            );
+            writer.update_current_logical_size(pending_nblocks * pg_constants::BLCKSZ as isize);
        }

        Ok(())
@@ -1001,8 +966,8 @@ impl<'a, R: Repository> DatadirModification<'a, R> {
                bail!("unexpected pending WAL record");
            }
        } else {
-            let last_lsn = self.tline.get_last_record_lsn();
-            self.tline.tline.get(key, last_lsn)
+            let lsn = Lsn::max(self.tline.get_last_record_lsn(), self.lsn);
+            self.tline.get(key, lsn)
        }
    }

@@ -1404,13 +1369,12 @@ fn is_slru_block_key(key: Key) -> bool {
 pub fn create_test_timeline<R: Repository>(
    repo: R,
    timeline_id: utils::zid::ZTimelineId,
-) -> Result<Arc<crate::DatadirTimeline<R>>> {
+) -> Result<std::sync::Arc<R::Timeline>> {
    let tline = repo.create_empty_timeline(timeline_id, Lsn(8))?;
-    let tline = DatadirTimeline::new(tline, 256 * 1024);
    let mut m = tline.begin_modification(Lsn(8));
    m.init_empty()?;
    m.commit()?;
-    Ok(Arc::new(tline))
+    Ok(tline)
 }

 #[allow(clippy::bool_assert_comparison)]
@@ -1483,7 +1447,7 @@ mod tests {
            .contains(&TESTREL_A));

        // Run checkpoint and garbage collection and check that it's still not visible
-        newtline.tline.checkpoint(CheckpointConfig::Forced)?;
+        newtline.checkpoint(CheckpointConfig::Forced)?;
        repo.gc_iteration(Some(NEW_TIMELINE_ID), 0, true)?;

        assert!(!newtline
--- a/pageserver/src/repository.rs
+++ b/pageserver/src/repository.rs
@@ -185,7 +185,7 @@ impl Value {
 /// A repository corresponds to one .neon directory. One repository holds multiple
 /// timelines, forked off from the same initial call to 'initdb'.
 pub trait Repository: Send + Sync {
-    type Timeline: Timeline;
+    type Timeline: crate::DatadirTimeline;

    /// Updates timeline based on the `TimelineSyncStatusUpdate`, received from the remote storage synchronization.
    /// See [`crate::remote_storage`] for more details about the synchronization.
@@ -277,15 +277,6 @@ pub enum LocalTimelineState {
    Unloaded,
 }

-impl<'a, T> From<&'a RepositoryTimeline<T>> for LocalTimelineState {
-    fn from(local_timeline_entry: &'a RepositoryTimeline<T>) -> Self {
-        match local_timeline_entry {
-            RepositoryTimeline::Loaded(_) => LocalTimelineState::Loaded,
-            RepositoryTimeline::Unloaded { .. } => LocalTimelineState::Unloaded,
-        }
-    }
-}
-
 ///
 /// Result of performing GC
 ///
@@ -382,6 +373,11 @@ pub trait Timeline: Send + Sync {
        lsn: Lsn,
        latest_gc_cutoff_lsn: &RwLockReadGuard<Lsn>,
    ) -> Result<()>;
+
+    /// Get the physical size of the timeline at the latest LSN
+    fn get_physical_size(&self) -> u64;
+    /// Get the physical size of the timeline at the latest LSN non incrementally
+    fn get_physical_size_non_incremental(&self) -> Result<u64>;
 }

 /// Various functions to mutate the timeline.
@@ -405,12 +401,14 @@ pub trait TimelineWriter<'a> {
    /// the 'lsn' or anything older. The previous last record LSN is stored alongside
    /// the latest and can be read.
    fn finish_write(&self, lsn: Lsn);
+
+    fn update_current_logical_size(&self, delta: isize);
 }

 #[cfg(test)]
 pub mod repo_harness {
    use bytes::BytesMut;
-    use lazy_static::lazy_static;
+    use once_cell::sync::Lazy;
    use std::sync::{Arc, RwLock, RwLockReadGuard, RwLockWriteGuard};
    use std::{fs, path::PathBuf};

@@ -441,14 +439,13 @@ pub mod repo_harness {
        buf.freeze()
    }

-    lazy_static! {
-        static ref LOCK: RwLock<()> = RwLock::new(());
-    }
+    static LOCK: Lazy<RwLock<()>> = Lazy::new(|| RwLock::new(()));

    impl From<TenantConf> for TenantConfOpt {
        fn from(tenant_conf: TenantConf) -> Self {
            Self {
                checkpoint_distance: Some(tenant_conf.checkpoint_distance),
+                checkpoint_timeout: Some(tenant_conf.checkpoint_timeout),
                compaction_target_size: Some(tenant_conf.compaction_target_size),
                compaction_period: Some(tenant_conf.compaction_period),
                compaction_threshold: Some(tenant_conf.compaction_threshold),
@@ -591,11 +588,10 @@ mod tests {
    //use std::sync::Arc;
    use bytes::BytesMut;
    use hex_literal::hex;
-    use lazy_static::lazy_static;
+    use once_cell::sync::Lazy;

-    lazy_static! {
-        static ref TEST_KEY: Key = Key::from_slice(&hex!("112222222233333333444444445500000001"));
-    }
+    static TEST_KEY: Lazy<Key> =
+        Lazy::new(|| Key::from_slice(&hex!("112222222233333333444444445500000001")));

    #[test]
    fn test_basic() -> Result<()> {
--- a/pageserver/src/storage_sync.rs
+++ b/pageserver/src/storage_sync.rs
@@ -155,8 +155,7 @@ use std::{

 use anyhow::{anyhow, bail, Context};
 use futures::stream::{FuturesUnordered, StreamExt};
-use lazy_static::lazy_static;
-use once_cell::sync::OnceCell;
+use once_cell::sync::{Lazy, OnceCell};
 use remote_storage::{GenericRemoteStorage, RemoteStorage};
 use tokio::{
    fs,
@@ -173,10 +172,10 @@ use self::{
 };
 use crate::{
    config::PageServerConf,
+    exponential_backoff,
    layered_repository::{
        ephemeral_file::is_ephemeral_file,
        metadata::{metadata_path, TimelineMetadata, METADATA_FILE_NAME},
-        LayeredRepository,
    },
    storage_sync::{self, index::RemoteIndex},
    tenant_mgr::attach_downloaded_tenants,
@@ -185,8 +184,8 @@ use crate::{
 };

 use metrics::{
-    register_histogram_vec, register_int_counter, register_int_counter_vec, register_int_gauge,
-    HistogramVec, IntCounter, IntCounterVec, IntGauge,
+    register_histogram_vec, register_int_counter_vec, register_int_gauge, HistogramVec,
+    IntCounterVec, IntGauge,
 };
 use utils::zid::{ZTenantId, ZTenantTimelineId, ZTimelineId};

@@ -194,32 +193,33 @@ use self::download::download_index_parts;
 pub use self::download::gather_tenant_timelines_index_parts;
 pub use self::download::TEMP_DOWNLOAD_EXTENSION;

-lazy_static! {
-    static ref REMAINING_SYNC_ITEMS: IntGauge = register_int_gauge!(
+static REMAINING_SYNC_ITEMS: Lazy<IntGauge> = Lazy::new(|| {
+    register_int_gauge!(
        "pageserver_remote_storage_remaining_sync_items",
        "Number of storage sync items left in the queue"
    )
-    .expect("failed to register pageserver remote storage remaining sync items int gauge");
-    static ref FATAL_TASK_FAILURES: IntCounter = register_int_counter!(
-        "pageserver_remote_storage_fatal_task_failures_total",
-        "Number of critically failed tasks"
-    )
-    .expect("failed to register pageserver remote storage remaining sync items int gauge");
-    static ref IMAGE_SYNC_TIME: HistogramVec = register_histogram_vec!(
+    .expect("failed to register pageserver remote storage remaining sync items int gauge")
+});
+
+static IMAGE_SYNC_TIME: Lazy<HistogramVec> = Lazy::new(|| {
+    register_histogram_vec!(
        "pageserver_remote_storage_image_sync_seconds",
        "Time took to synchronize (download or upload) a whole pageserver image. \
        Grouped by tenant and timeline ids, `operation_kind` (upload|download) and `status` (success|failure)",
        &["tenant_id", "timeline_id", "operation_kind", "status"],
        vec![0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 3.0, 10.0, 20.0]
    )
-    .expect("failed to register pageserver image sync time histogram vec");
-    static ref REMOTE_INDEX_UPLOAD: IntCounterVec = register_int_counter_vec!(
+    .expect("failed to register pageserver image sync time histogram vec")
+});
+
+static REMOTE_INDEX_UPLOAD: Lazy<IntCounterVec> = Lazy::new(|| {
+    register_int_counter_vec!(
        "pageserver_remote_storage_remote_index_uploads_total",
        "Number of remote index uploads",
        &["tenant_id", "timeline_id"],
    )
-    .expect("failed to register pageserver remote index upload vec");
-}
+    .expect("failed to register pageserver remote index upload vec")
+});

 static SYNC_QUEUE: OnceCell<SyncQueue> = OnceCell::new();

@@ -970,14 +970,19 @@ fn storage_sync_loop<P, S>(
    }
 }

-// needed to check whether the download happened
-// more informative than just a bool
 #[derive(Debug)]
-enum DownloadMarker {
+enum DownloadStatus {
    Downloaded,
    Nothing,
 }

+#[derive(Debug)]
+enum UploadStatus {
+    Uploaded,
+    Failed,
+    Nothing,
+}
+
 async fn process_batches<P, S>(
    conf: &'static PageServerConf,
    max_sync_errors: NonZeroU32,
@@ -1017,7 +1022,7 @@ where
            "Finished storage sync task for sync id {sync_id} download marker {:?}",
            download_marker
        );
-        if matches!(download_marker, DownloadMarker::Downloaded) {
+        if matches!(download_marker, DownloadStatus::Downloaded) {
            downloaded_timelines.insert(sync_id.tenant_id);
        }
    }
@@ -1031,7 +1036,7 @@ async fn process_sync_task_batch<P, S>(
    max_sync_errors: NonZeroU32,
    sync_id: ZTenantTimelineId,
    batch: SyncTaskBatch,
-) -> DownloadMarker
+) -> DownloadStatus
 where
    P: Debug + Send + Sync + 'static,
    S: RemoteStorage<RemoteObjectId = P> + Send + Sync + 'static,
@@ -1048,7 +1053,7 @@ where
    // When operating in a system without tasks failing over the error threshold,
    // current batching and task processing systems aim to update the layer set and metadata files (remote and local),
    // without "losing" such layer files.
-    let (upload_result, status_update) = tokio::join!(
+    let (upload_status, download_status) = tokio::join!(
        async {
            if let Some(upload_data) = upload_data {
                match validate_task_retries(upload_data, max_sync_errors)
@@ -1066,7 +1071,7 @@ where
                            "upload",
                        )
                        .await;
-                        return Some(());
+                        UploadStatus::Uploaded
                    }
                    ControlFlow::Break(failed_upload_data) => {
                        if let Err(e) = update_remote_data(
@@ -1083,10 +1088,13 @@ where
                        {
                            error!("Failed to update remote timeline {sync_id}: {e:?}");
                        }
+
+                        UploadStatus::Failed
                    }
                }
+            } else {
+                UploadStatus::Nothing
            }
-            None
        }
        .instrument(info_span!("upload_timeline_data")),
        async {
@@ -1116,50 +1124,53 @@ where
                    }
                }
            }
-            DownloadMarker::Nothing
+            DownloadStatus::Nothing
        }
        .instrument(info_span!("download_timeline_data")),
    );

    if let Some(delete_data) = batch.delete {
-        if upload_result.is_some() {
-            match validate_task_retries(delete_data, max_sync_errors)
-                .instrument(info_span!("retries_validation"))
-                .await
-            {
-                ControlFlow::Continue(new_delete_data) => {
-                    delete_timeline_data(
-                        conf,
-                        (storage.as_ref(), &index, sync_queue),
-                        sync_id,
-                        new_delete_data,
-                        sync_start,
-                        "delete",
-                    )
-                    .instrument(info_span!("delete_timeline_data"))
-                    .await;
-                }
-                ControlFlow::Break(failed_delete_data) => {
-                    if let Err(e) = update_remote_data(
-                        conf,
-                        storage.as_ref(),
-                        &index,
-                        sync_id,
-                        RemoteDataUpdate::Delete(&failed_delete_data.data.deleted_layers),
-                    )
+        match upload_status {
+            UploadStatus::Uploaded | UploadStatus::Nothing => {
+                match validate_task_retries(delete_data, max_sync_errors)
+                    .instrument(info_span!("retries_validation"))
                    .await
-                    {
-                        error!("Failed to update remote timeline {sync_id}: {e:?}");
+                {
+                    ControlFlow::Continue(new_delete_data) => {
+                        delete_timeline_data(
+                            conf,
+                            (storage.as_ref(), &index, sync_queue),
+                            sync_id,
+                            new_delete_data,
+                            sync_start,
+                            "delete",
+                        )
+                        .instrument(info_span!("delete_timeline_data"))
+                        .await;
+                    }
+                    ControlFlow::Break(failed_delete_data) => {
+                        if let Err(e) = update_remote_data(
+                            conf,
+                            storage.as_ref(),
+                            &index,
+                            sync_id,
+                            RemoteDataUpdate::Delete(&failed_delete_data.data.deleted_layers),
+                        )
+                        .await
+                        {
+                            error!("Failed to update remote timeline {sync_id}: {e:?}");
+                        }
                    }
                }
            }
-        } else {
-            sync_queue.push(sync_id, SyncTask::Delete(delete_data));
-            warn!("Skipping delete task due to failed upload tasks, reenqueuing");
+            UploadStatus::Failed => {
+                warn!("Skipping delete task due to failed upload tasks, reenqueuing");
+                sync_queue.push(sync_id, SyncTask::Delete(delete_data));
+            }
        }
    }

-    status_update
+    download_status
 }

 async fn download_timeline_data<P, S>(
@@ -1170,7 +1181,7 @@ async fn download_timeline_data<P, S>(
    new_download_data: SyncData<LayersDownload>,
    sync_start: Instant,
    task_name: &str,
-) -> DownloadMarker
+) -> DownloadStatus
 where
    P: Debug + Send + Sync + 'static,
    S: RemoteStorage<RemoteObjectId = P> + Send + Sync + 'static,
@@ -1199,7 +1210,7 @@ where
                Ok(()) => match index.write().await.set_awaits_download(&sync_id, false) {
                    Ok(()) => {
                        register_sync_status(sync_id, sync_start, task_name, Some(true));
-                        return DownloadMarker::Downloaded;
+                        return DownloadStatus::Downloaded;
                    }
                    Err(e) => {
                        error!("Timeline {sync_id} was expected to be in the remote index after a successful download, but it's absent: {e:?}");
@@ -1215,7 +1226,7 @@ where
        }
    }

-    DownloadMarker::Nothing
+    DownloadStatus::Nothing
 }

 async fn update_local_metadata(
@@ -1257,7 +1268,13 @@ async fn update_local_metadata(
            timeline_id,
        } = sync_id;
        tokio::task::spawn_blocking(move || {
-            LayeredRepository::save_metadata(conf, timeline_id, tenant_id, &cloned_metadata, true)
+            crate::layered_repository::save_metadata(
+                conf,
+                timeline_id,
+                tenant_id,
+                &cloned_metadata,
+                true,
+            )
        })
        .await
        .with_context(|| {
@@ -1487,11 +1504,7 @@ async fn validate_task_retries<T>(
        return ControlFlow::Break(sync_data);
    }

-    if current_attempt > 0 {
-        let seconds_to_wait = 2.0_f64.powf(current_attempt as f64 - 1.0).min(30.0);
-        info!("Waiting {seconds_to_wait} seconds before starting the task");
-        tokio::time::sleep(Duration::from_secs_f64(seconds_to_wait)).await;
-    }
+    exponential_backoff(current_attempt, 1.0, 30.0).await;
    ControlFlow::Continue(sync_data)
 }

--- a/pageserver/src/storage_sync/download.rs
+++ b/pageserver/src/storage_sync/download.rs
@@ -130,6 +130,7 @@ where
            tenant_path.display()
        )
    })?;
+
    let timelines = storage
        .list_prefixes(Some(tenant_storage_path))
        .await
@@ -140,6 +141,13 @@ where
            )
        })?;

+    if timelines.is_empty() {
+        anyhow::bail!(
+            "no timelines found on the remote storage for tenant {}",
+            tenant_id
+        )
+    }
+
    let mut sync_ids = HashSet::new();

    for timeline_remote_storage_key in timelines {
--- a/pageserver/src/storage_sync/upload.rs
+++ b/pageserver/src/storage_sync/upload.rs
@@ -4,7 +4,7 @@ use std::{fmt::Debug, path::PathBuf};

 use anyhow::Context;
 use futures::stream::{FuturesUnordered, StreamExt};
-use lazy_static::lazy_static;
+use once_cell::sync::Lazy;
 use remote_storage::RemoteStorage;
 use tokio::fs;
 use tracing::{debug, error, info, warn};
@@ -20,14 +20,14 @@ use crate::{
 };
 use metrics::{register_int_counter_vec, IntCounterVec};

-lazy_static! {
-    static ref NO_LAYERS_UPLOAD: IntCounterVec = register_int_counter_vec!(
+static NO_LAYERS_UPLOAD: Lazy<IntCounterVec> = Lazy::new(|| {
+    register_int_counter_vec!(
        "pageserver_remote_storage_no_layers_uploads_total",
        "Number of skipped uploads due to no layers",
        &["tenant_id", "timeline_id"],
    )
-    .expect("failed to register pageserver no layers upload vec");
-}
+    .expect("failed to register pageserver no layers upload vec")
+});

 /// Serializes and uploads the given index part data to the remote storage.
 pub(super) async fn upload_index_part<P, S>(
--- a/pageserver/src/tenant_config.rs
+++ b/pageserver/src/tenant_config.rs
@@ -23,6 +23,7 @@ pub mod defaults {
    // which is good for now to trigger bugs.
    // This parameter actually determines L0 layer file size.
    pub const DEFAULT_CHECKPOINT_DISTANCE: u64 = 256 * 1024 * 1024;
+    pub const DEFAULT_CHECKPOINT_TIMEOUT: &str = "10 m";

    // Target file size, when creating image and delta layers.
    // This parameter determines L1 layer file size.
@@ -48,6 +49,9 @@ pub struct TenantConf {
    // page server crashes.
    // This parameter actually determines L0 layer file size.
    pub checkpoint_distance: u64,
+    // Inmemory layer is also flushed at least once in checkpoint_timeout to
+    // eventually upload WAL after activity is stopped.
+    pub checkpoint_timeout: Duration,
    // Target file size, when creating image and delta layers.
    // This parameter determines L1 layer file size.
    pub compaction_target_size: u64,
@@ -90,6 +94,7 @@ pub struct TenantConf {
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
 pub struct TenantConfOpt {
    pub checkpoint_distance: Option<u64>,
+    pub checkpoint_timeout: Option<Duration>,
    pub compaction_target_size: Option<u64>,
    #[serde(with = "humantime_serde")]
    pub compaction_period: Option<Duration>,
@@ -113,6 +118,9 @@ impl TenantConfOpt {
            checkpoint_distance: self
                .checkpoint_distance
                .unwrap_or(global_conf.checkpoint_distance),
+            checkpoint_timeout: self
+                .checkpoint_timeout
+                .unwrap_or(global_conf.checkpoint_timeout),
            compaction_target_size: self
                .compaction_target_size
                .unwrap_or(global_conf.compaction_target_size),
@@ -142,6 +150,9 @@ impl TenantConfOpt {
        if let Some(checkpoint_distance) = other.checkpoint_distance {
            self.checkpoint_distance = Some(checkpoint_distance);
        }
+        if let Some(checkpoint_timeout) = other.checkpoint_timeout {
+            self.checkpoint_timeout = Some(checkpoint_timeout);
+        }
        if let Some(compaction_target_size) = other.compaction_target_size {
            self.compaction_target_size = Some(compaction_target_size);
        }
@@ -181,6 +192,8 @@ impl TenantConf {

        TenantConf {
            checkpoint_distance: DEFAULT_CHECKPOINT_DISTANCE,
+            checkpoint_timeout: humantime::parse_duration(DEFAULT_CHECKPOINT_TIMEOUT)
+                .expect("cannot parse default checkpoint timeout"),
            compaction_target_size: DEFAULT_COMPACTION_TARGET_SIZE,
            compaction_period: humantime::parse_duration(DEFAULT_COMPACTION_PERIOD)
                .expect("cannot parse default compaction period"),
@@ -212,6 +225,7 @@ impl TenantConf {
    pub fn dummy_conf() -> Self {
        TenantConf {
            checkpoint_distance: defaults::DEFAULT_CHECKPOINT_DISTANCE,
+            checkpoint_timeout: Duration::from_secs(600),
            compaction_target_size: 4 * 1024 * 1024,
            compaction_period: Duration::from_secs(10),
            compaction_threshold: defaults::DEFAULT_COMPACTION_THRESHOLD,
--- a/pageserver/src/tenant_mgr.rs
+++ b/pageserver/src/tenant_mgr.rs
@@ -2,8 +2,8 @@
 //! page server.

 use crate::config::PageServerConf;
+use crate::http::models::TenantInfo;
 use crate::layered_repository::{load_metadata, LayeredRepository};
-use crate::pgdatadir_mapping::DatadirTimeline;
 use crate::repository::Repository;
 use crate::storage_sync::index::{RemoteIndex, RemoteTimelineIndex};
 use crate::storage_sync::{self, LocalTimelineInitStatus, SyncStartupData};
@@ -12,10 +12,9 @@ use crate::thread_mgr::ThreadKind;
 use crate::timelines::CreateRepo;
 use crate::walredo::PostgresRedoManager;
 use crate::{thread_mgr, timelines, walreceiver};
-use crate::{DatadirTimelineImpl, RepositoryImpl};
+use crate::{RepositoryImpl, TimelineImpl};
 use anyhow::Context;
 use serde::{Deserialize, Serialize};
-use serde_with::{serde_as, DisplayFromStr};
 use std::collections::hash_map::Entry;
 use std::collections::{HashMap, HashSet};
 use std::fmt;
@@ -28,23 +27,25 @@ use utils::zid::{ZTenantId, ZTenantTimelineId, ZTimelineId};

 mod tenants_state {
    use anyhow::ensure;
+    use once_cell::sync::Lazy;
    use std::{
        collections::HashMap,
        sync::{RwLock, RwLockReadGuard, RwLockWriteGuard},
    };
    use tokio::sync::mpsc;
    use tracing::{debug, error};
-
    use utils::zid::ZTenantId;

    use crate::tenant_mgr::{LocalTimelineUpdate, Tenant};

-    lazy_static::lazy_static! {
-        static ref TENANTS: RwLock<HashMap<ZTenantId, Tenant>> = RwLock::new(HashMap::new());
-        /// Sends updates to the local timelines (creation and deletion) to the WAL receiver,
-        /// so that it can enable/disable corresponding processes.
-        static ref TIMELINE_UPDATE_SENDER: RwLock<Option<mpsc::UnboundedSender<LocalTimelineUpdate>>> = RwLock::new(None);
-    }
+    static TENANTS: Lazy<RwLock<HashMap<ZTenantId, Tenant>>> =
+        Lazy::new(|| RwLock::new(HashMap::new()));
+
+    /// Sends updates to the local timelines (creation and deletion) to the WAL receiver,
+    /// so that it can enable/disable corresponding processes.
+    static TIMELINE_UPDATE_SENDER: Lazy<
+        RwLock<Option<mpsc::UnboundedSender<LocalTimelineUpdate>>>,
+    > = Lazy::new(|| RwLock::new(None));

    pub(super) fn read_tenants() -> RwLockReadGuard<'static, HashMap<ZTenantId, Tenant>> {
        TENANTS
@@ -101,7 +102,7 @@ struct Tenant {
    ///
    /// Local timelines have more metadata that's loaded into memory,
    /// that is located in the `repo.timelines` field, [`crate::layered_repository::LayeredTimelineEntry`].
-    local_timelines: HashMap<ZTimelineId, Arc<DatadirTimelineImpl>>,
+    local_timelines: HashMap<ZTimelineId, Arc<<RepositoryImpl as Repository>::Timeline>>,
 }

 #[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq)]
@@ -178,7 +179,7 @@ pub enum LocalTimelineUpdate {
    },
    Attach {
        id: ZTenantTimelineId,
-        datadir: Arc<DatadirTimelineImpl>,
+        datadir: Arc<<RepositoryImpl as Repository>::Timeline>,
    },
 }

@@ -382,7 +383,7 @@ pub fn get_repository_for_tenant(tenant_id: ZTenantId) -> anyhow::Result<Arc<Rep
 pub fn get_local_timeline_with_load(
    tenant_id: ZTenantId,
    timeline_id: ZTimelineId,
-) -> anyhow::Result<Arc<DatadirTimelineImpl>> {
+) -> anyhow::Result<Arc<TimelineImpl>> {
    let mut m = tenants_state::write_tenants();
    let tenant = m
        .get_mut(&tenant_id)
@@ -489,34 +490,23 @@ pub fn detach_tenant(conf: &'static PageServerConf, tenant_id: ZTenantId) -> any
 fn load_local_timeline(
    repo: &RepositoryImpl,
    timeline_id: ZTimelineId,
-) -> anyhow::Result<Arc<DatadirTimeline<LayeredRepository>>> {
+) -> anyhow::Result<Arc<TimelineImpl>> {
    let inmem_timeline = repo.get_timeline_load(timeline_id).with_context(|| {
        format!("Inmem timeline {timeline_id} not found in tenant's repository")
    })?;
-    let repartition_distance = repo.get_checkpoint_distance() / 10;
-    let page_tline = Arc::new(DatadirTimelineImpl::new(
-        inmem_timeline,
-        repartition_distance,
-    ));
-    page_tline.init_logical_size()?;
+    inmem_timeline.init_logical_size()?;

    tenants_state::try_send_timeline_update(LocalTimelineUpdate::Attach {
        id: ZTenantTimelineId::new(repo.tenant_id(), timeline_id),
-        datadir: Arc::clone(&page_tline),
+        datadir: Arc::clone(&inmem_timeline),
    });

-    Ok(page_tline)
-}
-
-#[serde_as]
-#[derive(Serialize, Deserialize, Clone)]
-pub struct TenantInfo {
-    #[serde_as(as = "DisplayFromStr")]
-    pub id: ZTenantId,
-    pub state: Option<TenantState>,
-    pub has_in_progress_downloads: Option<bool>,
+    Ok(inmem_timeline)
 }

+///
+/// Get list of tenants, for the mgmt API
+///
 pub fn list_tenants(remote_index: &RemoteTimelineIndex) -> Vec<TenantInfo> {
    tenants_state::read_tenants()
        .iter()
@@ -532,6 +522,7 @@ pub fn list_tenants(remote_index: &RemoteTimelineIndex) -> Vec<TenantInfo> {
            TenantInfo {
                id: *id,
                state: Some(tenant.state),
+                current_physical_size: None,
                has_in_progress_downloads,
            }
        })
--- a/pageserver/src/tenant_tasks.rs
+++ b/pageserver/src/tenant_tasks.rs
@@ -120,6 +120,10 @@ pub fn init_tenant_task_pool() -> anyhow::Result<()> {
    let runtime = tokio::runtime::Builder::new_multi_thread()
        .thread_name("tenant-task-worker")
        .enable_all()
+        .on_thread_start(|| {
+            thread_mgr::register(ThreadKind::TenantTaskWorker, "tenant-task-worker")
+        })
+        .on_thread_stop(thread_mgr::deregister)
        .build()?;

    let (gc_send, mut gc_recv) = mpsc::channel::<ZTenantId>(100);
--- a/pageserver/src/thread_mgr.rs
+++ b/pageserver/src/thread_mgr.rs
@@ -45,21 +45,20 @@ use tokio::sync::watch;

 use tracing::{debug, error, info, warn};

-use lazy_static::lazy_static;
+use once_cell::sync::Lazy;

 use utils::zid::{ZTenantId, ZTimelineId};

 use crate::shutdown_pageserver;

-lazy_static! {
-    /// Each thread that we track is associated with a "thread ID". It's just
-    /// an increasing number that we assign, not related to any system thread
-    /// id.
-    static ref NEXT_THREAD_ID: AtomicU64 = AtomicU64::new(1);
+/// Each thread that we track is associated with a "thread ID". It's just
+/// an increasing number that we assign, not related to any system thread
+/// id.
+static NEXT_THREAD_ID: Lazy<AtomicU64> = Lazy::new(|| AtomicU64::new(1));

-    /// Global registry of threads
-    static ref THREADS: Mutex<HashMap<u64, Arc<PageServerThread>>> = Mutex::new(HashMap::new());
-}
+/// Global registry of threads
+static THREADS: Lazy<Mutex<HashMap<u64, Arc<PageServerThread>>>> =
+    Lazy::new(|| Mutex::new(HashMap::new()));

 // There is a Tokio watch channel for each thread, which can be used to signal the
 // thread that it needs to shut down. This thread local variable holds the receiving
@@ -97,6 +96,9 @@ pub enum ThreadKind {
    // Thread that schedules new compaction and gc jobs
    TenantTaskManager,

+    // Worker thread for tenant tasks thread pool
+    TenantTaskWorker,
+
    // Thread that flushes frozen in-memory layers to disk
    LayerFlushThread,

@@ -105,18 +107,20 @@ pub enum ThreadKind {
    StorageSync,
 }

+#[derive(Default)]
 struct MutableThreadState {
    /// Tenant and timeline that this thread is associated with.
    tenant_id: Option<ZTenantId>,
    timeline_id: Option<ZTimelineId>,

    /// Handle for waiting for the thread to exit. It can be None, if the
-    /// the thread has already exited.
+    /// the thread has already exited. OR if this thread is managed externally
+    /// and was not spawned through thread_mgr.rs::spawn function.
    join_handle: Option<JoinHandle<()>>,
 }

 struct PageServerThread {
-    _thread_id: u64,
+    thread_id: u64,

    kind: ThreadKind,

@@ -147,7 +151,7 @@ where
    let (shutdown_tx, shutdown_rx) = watch::channel(());
    let thread_id = NEXT_THREAD_ID.fetch_add(1, Ordering::Relaxed);
    let thread = Arc::new(PageServerThread {
-        _thread_id: thread_id,
+        thread_id,
        kind,
        name: name.to_string(),
        shutdown_requested: AtomicBool::new(false),
@@ -315,8 +319,10 @@ pub fn shutdown_threads(
            drop(thread_mut);
            let _ = join_handle.join();
        } else {
-            // The thread had not even fully started yet. Or it was shut down
-            // concurrently and already exited
+            // Possibly one of:
+            //  * The thread had not even fully started yet.
+            //  * It was shut down concurrently and already exited
+            //  * Is managed through `register`/`deregister` fns without providing a join handle
        }
    }
 }
@@ -348,3 +354,56 @@ pub fn is_shutdown_requested() -> bool {
        }
    })
 }
+
+/// Needed to register threads that were not spawned through spawn function.
+/// For example tokio blocking threads. This function is expected to be used
+/// in tandem with `deregister`.
+/// NOTE: threads registered through this function cannot be joined
+pub fn register(kind: ThreadKind, name: &str) {
+    CURRENT_THREAD.with(|ct| {
+        let mut borrowed = ct.borrow_mut();
+        if borrowed.is_some() {
+            panic!("thread already registered")
+        };
+        let (shutdown_tx, shutdown_rx) = watch::channel(());
+        let thread_id = NEXT_THREAD_ID.fetch_add(1, Ordering::Relaxed);
+
+        let thread = Arc::new(PageServerThread {
+            thread_id,
+            kind,
+            name: name.to_owned(),
+            shutdown_requested: AtomicBool::new(false),
+            shutdown_tx,
+            mutable: Mutex::new(MutableThreadState {
+                tenant_id: None,
+                timeline_id: None,
+                join_handle: None,
+            }),
+        });
+
+        *borrowed = Some(Arc::clone(&thread));
+
+        SHUTDOWN_RX.with(|rx| {
+            *rx.borrow_mut() = Some(shutdown_rx);
+        });
+
+        THREADS.lock().unwrap().insert(thread_id, thread);
+    });
+}
+
+// Expected to be used in tandem with `register`. See the doc for `register` for more details
+pub fn deregister() {
+    CURRENT_THREAD.with(|ct| {
+        let mut borrowed = ct.borrow_mut();
+        let thread = match borrowed.take() {
+            Some(thread) => thread,
+            None => panic!("calling deregister on unregistered thread"),
+        };
+
+        SHUTDOWN_RX.with(|rx| {
+            *rx.borrow_mut() = None;
+        });
+
+        THREADS.lock().unwrap().remove(&thread.thread_id)
+    });
+}
--- a/pageserver/src/timelines.rs
+++ b/pageserver/src/timelines.rs
@@ -4,8 +4,6 @@

 use anyhow::{bail, ensure, Context, Result};
 use postgres_ffi::ControlFileData;
-use serde::{Deserialize, Serialize};
-use serde_with::{serde_as, DisplayFromStr};
 use std::{
    fs,
    path::Path,
@@ -20,123 +18,15 @@ use utils::{
    zid::{ZTenantId, ZTimelineId},
 };

+use crate::tenant_mgr;
 use crate::{
-    config::PageServerConf,
-    layered_repository::metadata::TimelineMetadata,
-    repository::{LocalTimelineState, Repository},
-    storage_sync::index::RemoteIndex,
-    tenant_config::TenantConfOpt,
-    DatadirTimeline, RepositoryImpl,
+    config::PageServerConf, repository::Repository, storage_sync::index::RemoteIndex,
+    tenant_config::TenantConfOpt, RepositoryImpl, TimelineImpl,
 };
 use crate::{import_datadir, LOG_FILE_NAME};
 use crate::{layered_repository::LayeredRepository, walredo::WalRedoManager};
-use crate::{repository::RepositoryTimeline, tenant_mgr};
 use crate::{repository::Timeline, CheckpointConfig};

-#[serde_as]
-#[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct LocalTimelineInfo {
-    #[serde_as(as = "Option<DisplayFromStr>")]
-    pub ancestor_timeline_id: Option<ZTimelineId>,
-    #[serde_as(as = "Option<DisplayFromStr>")]
-    pub ancestor_lsn: Option<Lsn>,
-    #[serde_as(as = "DisplayFromStr")]
-    pub last_record_lsn: Lsn,
-    #[serde_as(as = "Option<DisplayFromStr>")]
-    pub prev_record_lsn: Option<Lsn>,
-    #[serde_as(as = "DisplayFromStr")]
-    pub latest_gc_cutoff_lsn: Lsn,
-    #[serde_as(as = "DisplayFromStr")]
-    pub disk_consistent_lsn: Lsn,
-    pub current_logical_size: Option<usize>, // is None when timeline is Unloaded
-    pub current_logical_size_non_incremental: Option<usize>,
-    pub timeline_state: LocalTimelineState,
-}
-
-impl LocalTimelineInfo {
-    pub fn from_loaded_timeline<R: Repository>(
-        datadir_tline: &DatadirTimeline<R>,
-        include_non_incremental_logical_size: bool,
-    ) -> anyhow::Result<Self> {
-        let last_record_lsn = datadir_tline.tline.get_last_record_lsn();
-        let info = LocalTimelineInfo {
-            ancestor_timeline_id: datadir_tline.tline.get_ancestor_timeline_id(),
-            ancestor_lsn: {
-                match datadir_tline.tline.get_ancestor_lsn() {
-                    Lsn(0) => None,
-                    lsn @ Lsn(_) => Some(lsn),
-                }
-            },
-            disk_consistent_lsn: datadir_tline.tline.get_disk_consistent_lsn(),
-            last_record_lsn,
-            prev_record_lsn: Some(datadir_tline.tline.get_prev_record_lsn()),
-            latest_gc_cutoff_lsn: *datadir_tline.tline.get_latest_gc_cutoff_lsn(),
-            timeline_state: LocalTimelineState::Loaded,
-            current_logical_size: Some(datadir_tline.get_current_logical_size()),
-            current_logical_size_non_incremental: if include_non_incremental_logical_size {
-                Some(datadir_tline.get_current_logical_size_non_incremental(last_record_lsn)?)
-            } else {
-                None
-            },
-        };
-        Ok(info)
-    }
-
-    pub fn from_unloaded_timeline(metadata: &TimelineMetadata) -> Self {
-        LocalTimelineInfo {
-            ancestor_timeline_id: metadata.ancestor_timeline(),
-            ancestor_lsn: {
-                match metadata.ancestor_lsn() {
-                    Lsn(0) => None,
-                    lsn @ Lsn(_) => Some(lsn),
-                }
-            },
-            disk_consistent_lsn: metadata.disk_consistent_lsn(),
-            last_record_lsn: metadata.disk_consistent_lsn(),
-            prev_record_lsn: metadata.prev_record_lsn(),
-            latest_gc_cutoff_lsn: metadata.latest_gc_cutoff_lsn(),
-            timeline_state: LocalTimelineState::Unloaded,
-            current_logical_size: None,
-            current_logical_size_non_incremental: None,
-        }
-    }
-
-    pub fn from_repo_timeline<T>(
-        tenant_id: ZTenantId,
-        timeline_id: ZTimelineId,
-        repo_timeline: &RepositoryTimeline<T>,
-        include_non_incremental_logical_size: bool,
-    ) -> anyhow::Result<Self> {
-        match repo_timeline {
-            RepositoryTimeline::Loaded(_) => {
-                let datadir_tline =
-                    tenant_mgr::get_local_timeline_with_load(tenant_id, timeline_id)?;
-                Self::from_loaded_timeline(&datadir_tline, include_non_incremental_logical_size)
-            }
-            RepositoryTimeline::Unloaded { metadata } => Ok(Self::from_unloaded_timeline(metadata)),
-        }
-    }
-}
-
-#[serde_as]
-#[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct RemoteTimelineInfo {
-    #[serde_as(as = "DisplayFromStr")]
-    pub remote_consistent_lsn: Lsn,
-    pub awaits_download: bool,
-}
-
-#[serde_as]
-#[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct TimelineInfo {
-    #[serde_as(as = "DisplayFromStr")]
-    pub tenant_id: ZTenantId,
-    #[serde_as(as = "DisplayFromStr")]
-    pub timeline_id: ZTimelineId,
-    pub local: Option<LocalTimelineInfo>,
-    pub remote: Option<RemoteTimelineInfo>,
-}
-
 #[derive(Debug, Clone, Copy)]
 pub struct PointInTime {
    pub timeline_id: ZTimelineId,
@@ -298,19 +188,18 @@ fn bootstrap_timeline<R: Repository>(
    // Initdb lsn will be equal to last_record_lsn which will be set after import.
    // Because we know it upfront avoid having an option or dummy zero value by passing it to create_empty_timeline.
    let timeline = repo.create_empty_timeline(tli, lsn)?;
-    let mut page_tline: DatadirTimeline<R> = DatadirTimeline::new(timeline, u64::MAX);
-    import_datadir::import_timeline_from_postgres_datadir(&pgdata_path, &mut page_tline, lsn)?;
+    import_datadir::import_timeline_from_postgres_datadir(&pgdata_path, &*timeline, lsn)?;

    fail::fail_point!("before-checkpoint-new-timeline", |_| {
        bail!("failpoint before-checkpoint-new-timeline");
    });

-    page_tline.tline.checkpoint(CheckpointConfig::Forced)?;
+    timeline.checkpoint(CheckpointConfig::Forced)?;

    info!(
        "created root timeline {} timeline.lsn {}",
        tli,
-        page_tline.tline.get_last_record_lsn()
+        timeline.get_last_record_lsn()
    );

    // Remove temp dir. We don't need it anymore
@@ -319,36 +208,22 @@ fn bootstrap_timeline<R: Repository>(
    Ok(())
 }

-pub(crate) fn get_local_timelines(
-    tenant_id: ZTenantId,
-    include_non_incremental_logical_size: bool,
-) -> Result<Vec<(ZTimelineId, LocalTimelineInfo)>> {
-    let repo = tenant_mgr::get_repository_for_tenant(tenant_id)
-        .with_context(|| format!("Failed to get repo for tenant {}", tenant_id))?;
-    let repo_timelines = repo.list_timelines();
-
-    let mut local_timeline_info = Vec::with_capacity(repo_timelines.len());
-    for (timeline_id, repository_timeline) in repo_timelines {
-        local_timeline_info.push((
-            timeline_id,
-            LocalTimelineInfo::from_repo_timeline(
-                tenant_id,
-                timeline_id,
-                &repository_timeline,
-                include_non_incremental_logical_size,
-            )?,
-        ))
-    }
-    Ok(local_timeline_info)
-}
-
+///
+/// Create a new timeline.
+///
+/// Returns the new timeline ID and reference to its Timeline object.
+///
+/// If the caller specified the timeline ID to use (`new_timeline_id`), and timeline with
+/// the same timeline ID already exists, returns None. If `new_timeline_id` is not given,
+/// a new unique ID is generated.
+///
 pub(crate) fn create_timeline(
    conf: &'static PageServerConf,
    tenant_id: ZTenantId,
    new_timeline_id: Option<ZTimelineId>,
    ancestor_timeline_id: Option<ZTimelineId>,
    mut ancestor_start_lsn: Option<Lsn>,
-) -> Result<Option<TimelineInfo>> {
+) -> Result<Option<(ZTimelineId, Arc<TimelineImpl>)>> {
    let new_timeline_id = new_timeline_id.unwrap_or_else(ZTimelineId::generate);
    let repo = tenant_mgr::get_repository_for_tenant(tenant_id)?;

@@ -357,7 +232,7 @@ pub(crate) fn create_timeline(
        return Ok(None);
    }

-    let new_timeline_info = match ancestor_timeline_id {
+    match ancestor_timeline_id {
        Some(ancestor_timeline_id) => {
            let ancestor_timeline = repo
                .get_timeline_load(ancestor_timeline_id)
@@ -385,26 +260,13 @@ pub(crate) fn create_timeline(
                }
            }

-            repo.branch_timeline(ancestor_timeline_id, new_timeline_id, ancestor_start_lsn)?;
-            // load the timeline into memory
-            let loaded_timeline =
-                tenant_mgr::get_local_timeline_with_load(tenant_id, new_timeline_id)?;
-            LocalTimelineInfo::from_loaded_timeline(&loaded_timeline, false)
-                .context("cannot fill timeline info")?
-        }
-        None => {
-            bootstrap_timeline(conf, tenant_id, new_timeline_id, repo.as_ref())?;
-            // load the timeline into memory
-            let new_timeline =
-                tenant_mgr::get_local_timeline_with_load(tenant_id, new_timeline_id)?;
-            LocalTimelineInfo::from_loaded_timeline(&new_timeline, false)
-                .context("cannot fill timeline info")?
+            repo.branch_timeline(ancestor_timeline_id, new_timeline_id, ancestor_start_lsn)?
        }
+        None => bootstrap_timeline(conf, tenant_id, new_timeline_id, repo.as_ref())?,
    };
-    Ok(Some(TimelineInfo {
-        tenant_id,
-        timeline_id: new_timeline_id,
-        local: Some(new_timeline_info),
-        remote: None,
-    }))
+
+    // load the timeline into memory
+    let loaded_timeline = tenant_mgr::get_local_timeline_with_load(tenant_id, new_timeline_id)?;
+
+    Ok(Some((new_timeline_id, loaded_timeline)))
 }
--- a/pageserver/src/virtual_file.rs
+++ b/pageserver/src/virtual_file.rs
@@ -10,7 +10,7 @@
 //! This is similar to PostgreSQL's virtual file descriptor facility in
 //! src/backend/storage/file/fd.c
 //!
-use lazy_static::lazy_static;
+use once_cell::sync::Lazy;
 use once_cell::sync::OnceCell;
 use std::fs::{File, OpenOptions};
 use std::io::{Error, ErrorKind, Read, Seek, SeekFrom, Write};
@@ -32,23 +32,24 @@ const STORAGE_IO_TIME_BUCKETS: &[f64] = &[
    1.0,      // 1 sec
 ];

-lazy_static! {
-    static ref STORAGE_IO_TIME: HistogramVec = register_histogram_vec!(
+static STORAGE_IO_TIME: Lazy<HistogramVec> = Lazy::new(|| {
+    register_histogram_vec!(
        "pageserver_io_operations_seconds",
        "Time spent in IO operations",
        &["operation", "tenant_id", "timeline_id"],
        STORAGE_IO_TIME_BUCKETS.into()
    )
-    .expect("failed to define a metric");
-}
-lazy_static! {
-    static ref STORAGE_IO_SIZE: IntGaugeVec = register_int_gauge_vec!(
+    .expect("failed to define a metric")
+});
+
+static STORAGE_IO_SIZE: Lazy<IntGaugeVec> = Lazy::new(|| {
+    register_int_gauge_vec!(
        "pageserver_io_operations_bytes_total",
        "Total amount of bytes read/written in IO operations",
        &["operation", "tenant_id", "timeline_id"]
    )
-    .expect("failed to define a metric");
-}
+    .expect("failed to define a metric")
+});

 ///
 /// A virtual file descriptor. You can use this just like std::fs::File, but internally
--- a/pageserver/src/walingest.rs
+++ b/pageserver/src/walingest.rs
@@ -30,11 +30,8 @@ use anyhow::Result;
 use bytes::{Buf, Bytes, BytesMut};
 use tracing::*;

-use std::collections::HashMap;
-
 use crate::pgdatadir_mapping::*;
 use crate::reltag::{RelTag, SlruKind};
-use crate::repository::Repository;
 use crate::walrecord::*;
 use postgres_ffi::nonrelfile_utils::mx_offset_to_member_segment;
 use postgres_ffi::xlog_utils::*;
@@ -44,17 +41,15 @@ use utils::lsn::Lsn;

 static ZERO_PAGE: Bytes = Bytes::from_static(&[0u8; 8192]);

-pub struct WalIngest<'a, R: Repository> {
-    timeline: &'a DatadirTimeline<R>,
+pub struct WalIngest<'a, T: DatadirTimeline> {
+    timeline: &'a T,

    checkpoint: CheckPoint,
    checkpoint_modified: bool,
-
-    relsize_cache: HashMap<RelTag, BlockNumber>,
 }

-impl<'a, R: Repository> WalIngest<'a, R> {
-    pub fn new(timeline: &DatadirTimeline<R>, startpoint: Lsn) -> Result<WalIngest<R>> {
+impl<'a, T: DatadirTimeline> WalIngest<'a, T> {
+    pub fn new(timeline: &T, startpoint: Lsn) -> Result<WalIngest<T>> {
        // Fetch the latest checkpoint into memory, so that we can compare with it
        // quickly in `ingest_record` and update it when it changes.
        let checkpoint_bytes = timeline.get_checkpoint(startpoint)?;
@@ -65,26 +60,27 @@ impl<'a, R: Repository> WalIngest<'a, R> {
            timeline,
            checkpoint,
            checkpoint_modified: false,
-            relsize_cache: HashMap::new(),
        })
    }

    ///
    /// Decode a PostgreSQL WAL record and store it in the repository, in the given timeline.
    ///
+    /// This function updates `lsn` field of `DatadirModification`
    ///
    /// Helper function to parse a WAL record and call the Timeline's PUT functions for all the
    /// relations/pages that the record affects.
    ///
    pub fn ingest_record(
        &mut self,
-        timeline: &DatadirTimeline<R>,
        recdata: Bytes,
        lsn: Lsn,
+        modification: &mut DatadirModification<T>,
+        decoded: &mut DecodedWALRecord,
    ) -> Result<()> {
-        let mut modification = timeline.begin_modification(lsn);
+        modification.lsn = lsn;
+        decode_wal_record(recdata, decoded).context("failed decoding wal record")?;

-        let mut decoded = decode_wal_record(recdata).context("failed decoding wal record")?;
        let mut buf = decoded.record.clone();
        buf.advance(decoded.main_data_offset);

@@ -98,7 +94,7 @@ impl<'a, R: Repository> WalIngest<'a, R> {
        if decoded.xl_rmid == pg_constants::RM_HEAP_ID
            || decoded.xl_rmid == pg_constants::RM_HEAP2_ID
        {
-            self.ingest_heapam_record(&mut buf, &mut modification, &mut decoded)?;
+            self.ingest_heapam_record(&mut buf, modification, decoded)?;
        }
        // Handle other special record types
        if decoded.xl_rmid == pg_constants::RM_SMGR_ID
@@ -106,19 +102,19 @@ impl<'a, R: Repository> WalIngest<'a, R> {
                == pg_constants::XLOG_SMGR_CREATE
        {
            let create = XlSmgrCreate::decode(&mut buf);
-            self.ingest_xlog_smgr_create(&mut modification, &create)?;
+            self.ingest_xlog_smgr_create(modification, &create)?;
        } else if decoded.xl_rmid == pg_constants::RM_SMGR_ID
            && (decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK)
                == pg_constants::XLOG_SMGR_TRUNCATE
        {
            let truncate = XlSmgrTruncate::decode(&mut buf);
-            self.ingest_xlog_smgr_truncate(&mut modification, &truncate)?;
+            self.ingest_xlog_smgr_truncate(modification, &truncate)?;
        } else if decoded.xl_rmid == pg_constants::RM_DBASE_ID {
            if (decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK)
                == pg_constants::XLOG_DBASE_CREATE
            {
                let createdb = XlCreateDatabase::decode(&mut buf);
-                self.ingest_xlog_dbase_create(&mut modification, &createdb)?;
+                self.ingest_xlog_dbase_create(modification, &createdb)?;
            } else if (decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK)
                == pg_constants::XLOG_DBASE_DROP
            {
@@ -137,7 +133,7 @@ impl<'a, R: Repository> WalIngest<'a, R> {
                let segno = pageno / pg_constants::SLRU_PAGES_PER_SEGMENT;
                let rpageno = pageno % pg_constants::SLRU_PAGES_PER_SEGMENT;
                self.put_slru_page_image(
-                    &mut modification,
+                    modification,
                    SlruKind::Clog,
                    segno,
                    rpageno,
@@ -146,7 +142,7 @@ impl<'a, R: Repository> WalIngest<'a, R> {
            } else {
                assert!(info == pg_constants::CLOG_TRUNCATE);
                let xlrec = XlClogTruncate::decode(&mut buf);
-                self.ingest_clog_truncate_record(&mut modification, &xlrec)?;
+                self.ingest_clog_truncate_record(modification, &xlrec)?;
            }
        } else if decoded.xl_rmid == pg_constants::RM_XACT_ID {
            let info = decoded.xl_info & pg_constants::XLOG_XACT_OPMASK;
@@ -154,7 +150,7 @@ impl<'a, R: Repository> WalIngest<'a, R> {
                let parsed_xact =
                    XlXactParsedRecord::decode(&mut buf, decoded.xl_xid, decoded.xl_info);
                self.ingest_xact_record(
-                    &mut modification,
+                    modification,
                    &parsed_xact,
                    info == pg_constants::XLOG_XACT_COMMIT,
                )?;
@@ -164,7 +160,7 @@ impl<'a, R: Repository> WalIngest<'a, R> {
                let parsed_xact =
                    XlXactParsedRecord::decode(&mut buf, decoded.xl_xid, decoded.xl_info);
                self.ingest_xact_record(
-                    &mut modification,
+                    modification,
                    &parsed_xact,
                    info == pg_constants::XLOG_XACT_COMMIT_PREPARED,
                )?;
@@ -187,7 +183,7 @@ impl<'a, R: Repository> WalIngest<'a, R> {
                let segno = pageno / pg_constants::SLRU_PAGES_PER_SEGMENT;
                let rpageno = pageno % pg_constants::SLRU_PAGES_PER_SEGMENT;
                self.put_slru_page_image(
-                    &mut modification,
+                    modification,
                    SlruKind::MultiXactOffsets,
                    segno,
                    rpageno,
@@ -198,7 +194,7 @@ impl<'a, R: Repository> WalIngest<'a, R> {
                let segno = pageno / pg_constants::SLRU_PAGES_PER_SEGMENT;
                let rpageno = pageno % pg_constants::SLRU_PAGES_PER_SEGMENT;
                self.put_slru_page_image(
-                    &mut modification,
+                    modification,
                    SlruKind::MultiXactMembers,
                    segno,
                    rpageno,
@@ -206,14 +202,14 @@ impl<'a, R: Repository> WalIngest<'a, R> {
                )?;
            } else if info == pg_constants::XLOG_MULTIXACT_CREATE_ID {
                let xlrec = XlMultiXactCreate::decode(&mut buf);
-                self.ingest_multixact_create_record(&mut modification, &xlrec)?;
+                self.ingest_multixact_create_record(modification, &xlrec)?;
            } else if info == pg_constants::XLOG_MULTIXACT_TRUNCATE_ID {
                let xlrec = XlMultiXactTruncate::decode(&mut buf);
-                self.ingest_multixact_truncate_record(&mut modification, &xlrec)?;
+                self.ingest_multixact_truncate_record(modification, &xlrec)?;
            }
        } else if decoded.xl_rmid == pg_constants::RM_RELMAP_ID {
            let xlrec = XlRelmapUpdate::decode(&mut buf);
-            self.ingest_relmap_page(&mut modification, &xlrec, &decoded)?;
+            self.ingest_relmap_page(modification, &xlrec, decoded)?;
        } else if decoded.xl_rmid == pg_constants::RM_XLOG_ID {
            let info = decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK;
            if info == pg_constants::XLOG_NEXTOID {
@@ -248,7 +244,7 @@ impl<'a, R: Repository> WalIngest<'a, R> {
        // Iterate through all the blocks that the record modifies, and
        // "put" a separate copy of the record for each block.
        for blk in decoded.blocks.iter() {
-            self.ingest_decoded_block(&mut modification, lsn, &decoded, blk)?;
+            self.ingest_decoded_block(modification, lsn, decoded, blk)?;
        }

        // If checkpoint data was updated, store the new version in the repository
@@ -268,7 +264,7 @@ impl<'a, R: Repository> WalIngest<'a, R> {

    fn ingest_decoded_block(
        &mut self,
-        modification: &mut DatadirModification<R>,
+        modification: &mut DatadirModification<T>,
        lsn: Lsn,
        decoded: &DecodedWALRecord,
        blk: &DecodedBkpBlock,
@@ -328,7 +324,7 @@ impl<'a, R: Repository> WalIngest<'a, R> {
    fn ingest_heapam_record(
        &mut self,
        buf: &mut Bytes,
-        modification: &mut DatadirModification<R>,
+        modification: &mut DatadirModification<T>,
        decoded: &mut DecodedWALRecord,
    ) -> Result<()> {
        // Handle VM bit updates that are implicitly part of heap records.
@@ -409,7 +405,7 @@ impl<'a, R: Repository> WalIngest<'a, R> {
            // replaying it would fail to find the previous image of the page, because
            // it doesn't exist. So check if the VM page(s) exist, and skip the WAL
            // record if it doesn't.
-            let vm_size = self.get_relsize(vm_rel)?;
+            let vm_size = self.get_relsize(vm_rel, modification.lsn)?;
            if let Some(blknum) = new_vm_blk {
                if blknum >= vm_size {
                    new_vm_blk = None;
@@ -472,7 +468,7 @@ impl<'a, R: Repository> WalIngest<'a, R> {
    /// Subroutine of ingest_record(), to handle an XLOG_DBASE_CREATE record.
    fn ingest_xlog_dbase_create(
        &mut self,
-        modification: &mut DatadirModification<R>,
+        modification: &mut DatadirModification<T>,
        rec: &XlCreateDatabase,
    ) -> Result<()> {
        let db_id = rec.db_id;
@@ -539,7 +535,7 @@ impl<'a, R: Repository> WalIngest<'a, R> {

    fn ingest_xlog_smgr_create(
        &mut self,
-        modification: &mut DatadirModification<R>,
+        modification: &mut DatadirModification<T>,
        rec: &XlSmgrCreate,
    ) -> Result<()> {
        let rel = RelTag {
@@ -557,7 +553,7 @@ impl<'a, R: Repository> WalIngest<'a, R> {
    /// This is the same logic as in PostgreSQL's smgr_redo() function.
    fn ingest_xlog_smgr_truncate(
        &mut self,
-        modification: &mut DatadirModification<R>,
+        modification: &mut DatadirModification<T>,
        rec: &XlSmgrTruncate,
    ) -> Result<()> {
        let spcnode = rec.rnode.spcnode;
@@ -622,7 +618,7 @@ impl<'a, R: Repository> WalIngest<'a, R> {
    ///
    fn ingest_xact_record(
        &mut self,
-        modification: &mut DatadirModification<R>,
+        modification: &mut DatadirModification<T>,
        parsed: &XlXactParsedRecord,
        is_commit: bool,
    ) -> Result<()> {
@@ -691,7 +687,7 @@ impl<'a, R: Repository> WalIngest<'a, R> {

    fn ingest_clog_truncate_record(
        &mut self,
-        modification: &mut DatadirModification<R>,
+        modification: &mut DatadirModification<T>,
        xlrec: &XlClogTruncate,
    ) -> Result<()> {
        info!(
@@ -749,7 +745,7 @@ impl<'a, R: Repository> WalIngest<'a, R> {

    fn ingest_multixact_create_record(
        &mut self,
-        modification: &mut DatadirModification<R>,
+        modification: &mut DatadirModification<T>,
        xlrec: &XlMultiXactCreate,
    ) -> Result<()> {
        // Create WAL record for updating the multixact-offsets page
@@ -828,7 +824,7 @@ impl<'a, R: Repository> WalIngest<'a, R> {

    fn ingest_multixact_truncate_record(
        &mut self,
-        modification: &mut DatadirModification<R>,
+        modification: &mut DatadirModification<T>,
        xlrec: &XlMultiXactTruncate,
    ) -> Result<()> {
        self.checkpoint.oldestMulti = xlrec.end_trunc_off;
@@ -862,7 +858,7 @@ impl<'a, R: Repository> WalIngest<'a, R> {

    fn ingest_relmap_page(
        &mut self,
-        modification: &mut DatadirModification<R>,
+        modification: &mut DatadirModification<T>,
        xlrec: &XlRelmapUpdate,
        decoded: &DecodedWALRecord,
    ) -> Result<()> {
@@ -878,17 +874,16 @@ impl<'a, R: Repository> WalIngest<'a, R> {

    fn put_rel_creation(
        &mut self,
-        modification: &mut DatadirModification<R>,
+        modification: &mut DatadirModification<T>,
        rel: RelTag,
    ) -> Result<()> {
-        self.relsize_cache.insert(rel, 0);
        modification.put_rel_creation(rel, 0)?;
        Ok(())
    }

    fn put_rel_page_image(
        &mut self,
-        modification: &mut DatadirModification<R>,
+        modification: &mut DatadirModification<T>,
        rel: RelTag,
        blknum: BlockNumber,
        img: Bytes,
@@ -900,7 +895,7 @@ impl<'a, R: Repository> WalIngest<'a, R> {

    fn put_rel_wal_record(
        &mut self,
-        modification: &mut DatadirModification<R>,
+        modification: &mut DatadirModification<T>,
        rel: RelTag,
        blknum: BlockNumber,
        rec: ZenithWalRecord,
@@ -912,63 +907,49 @@ impl<'a, R: Repository> WalIngest<'a, R> {

    fn put_rel_truncation(
        &mut self,
-        modification: &mut DatadirModification<R>,
+        modification: &mut DatadirModification<T>,
        rel: RelTag,
        nblocks: BlockNumber,
    ) -> Result<()> {
        modification.put_rel_truncation(rel, nblocks)?;
-        self.relsize_cache.insert(rel, nblocks);
        Ok(())
    }

    fn put_rel_drop(
        &mut self,
-        modification: &mut DatadirModification<R>,
+        modification: &mut DatadirModification<T>,
        rel: RelTag,
    ) -> Result<()> {
        modification.put_rel_drop(rel)?;
-        self.relsize_cache.remove(&rel);
        Ok(())
    }

-    fn get_relsize(&mut self, rel: RelTag) -> Result<BlockNumber> {
-        if let Some(nblocks) = self.relsize_cache.get(&rel) {
-            Ok(*nblocks)
+    fn get_relsize(&mut self, rel: RelTag, lsn: Lsn) -> Result<BlockNumber> {
+        let nblocks = if !self.timeline.get_rel_exists(rel, lsn)? {
+            0
        } else {
-            let last_lsn = self.timeline.get_last_record_lsn();
-            let nblocks = if !self.timeline.get_rel_exists(rel, last_lsn)? {
-                0
-            } else {
-                self.timeline.get_rel_size(rel, last_lsn)?
-            };
-            self.relsize_cache.insert(rel, nblocks);
-            Ok(nblocks)
-        }
+            self.timeline.get_rel_size(rel, lsn)?
+        };
+        Ok(nblocks)
    }

    fn handle_rel_extend(
        &mut self,
-        modification: &mut DatadirModification<R>,
+        modification: &mut DatadirModification<T>,
        rel: RelTag,
        blknum: BlockNumber,
    ) -> Result<()> {
        let new_nblocks = blknum + 1;
-        let old_nblocks = if let Some(nblocks) = self.relsize_cache.get(&rel) {
-            *nblocks
+        // Check if the relation exists. We implicitly create relations on first
+        // record.
+        // TODO: would be nice if to be more explicit about it
+        let last_lsn = modification.lsn;
+        let old_nblocks = if !self.timeline.get_rel_exists(rel, last_lsn)? {
+            // create it with 0 size initially, the logic below will extend it
+            modification.put_rel_creation(rel, 0)?;
+            0
        } else {
-            // Check if the relation exists. We implicitly create relations on first
-            // record.
-            // TODO: would be nice if to be more explicit about it
-            let last_lsn = self.timeline.get_last_record_lsn();
-            let nblocks = if !self.timeline.get_rel_exists(rel, last_lsn)? {
-                // create it with 0 size initially, the logic below will extend it
-                modification.put_rel_creation(rel, 0)?;
-                0
-            } else {
-                self.timeline.get_rel_size(rel, last_lsn)?
-            };
-            self.relsize_cache.insert(rel, nblocks);
-            nblocks
+            self.timeline.get_rel_size(rel, last_lsn)?
        };

        if new_nblocks > old_nblocks {
@@ -979,14 +960,13 @@ impl<'a, R: Repository> WalIngest<'a, R> {
            for gap_blknum in old_nblocks..blknum {
                modification.put_rel_page_image(rel, gap_blknum, ZERO_PAGE.clone())?;
            }
-            self.relsize_cache.insert(rel, new_nblocks);
        }
        Ok(())
    }

    fn put_slru_page_image(
        &mut self,
-        modification: &mut DatadirModification<R>,
+        modification: &mut DatadirModification<T>,
        kind: SlruKind,
        segno: u32,
        blknum: BlockNumber,
@@ -999,7 +979,7 @@ impl<'a, R: Repository> WalIngest<'a, R> {

    fn handle_slru_extend(
        &mut self,
-        modification: &mut DatadirModification<R>,
+        modification: &mut DatadirModification<T>,
        kind: SlruKind,
        segno: u32,
        blknum: BlockNumber,
@@ -1052,6 +1032,7 @@ mod tests {
    use super::*;
    use crate::pgdatadir_mapping::create_test_timeline;
    use crate::repository::repo_harness::*;
+    use crate::repository::Timeline;
    use postgres_ffi::pg_constants;

    /// Arbitrary relation tag, for testing.
@@ -1062,13 +1043,13 @@ mod tests {
        forknum: 0,
    };

-    fn assert_current_logical_size<R: Repository>(_timeline: &DatadirTimeline<R>, _lsn: Lsn) {
+    fn assert_current_logical_size<T: Timeline>(_timeline: &T, _lsn: Lsn) {
        // TODO
    }

    static ZERO_CHECKPOINT: Bytes = Bytes::from_static(&[0u8; SIZEOF_CHECKPOINT]);

-    fn init_walingest_test<R: Repository>(tline: &DatadirTimeline<R>) -> Result<WalIngest<R>> {
+    fn init_walingest_test<T: DatadirTimeline>(tline: &T) -> Result<WalIngest<T>> {
        let mut m = tline.begin_modification(Lsn(0x10));
        m.put_checkpoint(ZERO_CHECKPOINT.clone())?;
        m.put_relmap_file(0, 111, Bytes::from(""))?; // dummy relmapper file
@@ -1082,7 +1063,7 @@ mod tests {
    fn test_relsize() -> Result<()> {
        let repo = RepoHarness::create("test_relsize")?.load();
        let tline = create_test_timeline(repo, TIMELINE_ID)?;
-        let mut walingest = init_walingest_test(&tline)?;
+        let mut walingest = init_walingest_test(&*tline)?;

        let mut m = tline.begin_modification(Lsn(0x20));
        walingest.put_rel_creation(&mut m, TESTREL_A)?;
@@ -1098,7 +1079,7 @@ mod tests {
        walingest.put_rel_page_image(&mut m, TESTREL_A, 2, TEST_IMG("foo blk 2 at 5"))?;
        m.commit()?;

-        assert_current_logical_size(&tline, Lsn(0x50));
+        assert_current_logical_size(&*tline, Lsn(0x50));

        // The relation was created at LSN 2, not visible at LSN 1 yet.
        assert_eq!(tline.get_rel_exists(TESTREL_A, Lsn(0x10))?, false);
@@ -1145,7 +1126,7 @@ mod tests {
        let mut m = tline.begin_modification(Lsn(0x60));
        walingest.put_rel_truncation(&mut m, TESTREL_A, 2)?;
        m.commit()?;
-        assert_current_logical_size(&tline, Lsn(0x60));
+        assert_current_logical_size(&*tline, Lsn(0x60));

        // Check reported size and contents after truncation
        assert_eq!(tline.get_rel_size(TESTREL_A, Lsn(0x60))?, 2);
@@ -1210,7 +1191,7 @@ mod tests {
    fn test_drop_extend() -> Result<()> {
        let repo = RepoHarness::create("test_drop_extend")?.load();
        let tline = create_test_timeline(repo, TIMELINE_ID)?;
-        let mut walingest = init_walingest_test(&tline)?;
+        let mut walingest = init_walingest_test(&*tline)?;

        let mut m = tline.begin_modification(Lsn(0x20));
        walingest.put_rel_page_image(&mut m, TESTREL_A, 0, TEST_IMG("foo blk 0 at 2"))?;
@@ -1250,7 +1231,7 @@ mod tests {
    fn test_truncate_extend() -> Result<()> {
        let repo = RepoHarness::create("test_truncate_extend")?.load();
        let tline = create_test_timeline(repo, TIMELINE_ID)?;
-        let mut walingest = init_walingest_test(&tline)?;
+        let mut walingest = init_walingest_test(&*tline)?;

        // Create a 20 MB relation (the size is arbitrary)
        let relsize = 20 * 1024 * 1024 / 8192;
@@ -1338,7 +1319,7 @@ mod tests {
    fn test_large_rel() -> Result<()> {
        let repo = RepoHarness::create("test_large_rel")?.load();
        let tline = create_test_timeline(repo, TIMELINE_ID)?;
-        let mut walingest = init_walingest_test(&tline)?;
+        let mut walingest = init_walingest_test(&*tline)?;

        let mut lsn = 0x10;
        for blknum in 0..pg_constants::RELSEG_SIZE + 1 {
@@ -1349,7 +1330,7 @@ mod tests {
            m.commit()?;
        }

-        assert_current_logical_size(&tline, Lsn(lsn));
+        assert_current_logical_size(&*tline, Lsn(lsn));

        assert_eq!(
            tline.get_rel_size(TESTREL_A, Lsn(lsn))?,
@@ -1365,7 +1346,7 @@ mod tests {
            tline.get_rel_size(TESTREL_A, Lsn(lsn))?,
            pg_constants::RELSEG_SIZE
        );
-        assert_current_logical_size(&tline, Lsn(lsn));
+        assert_current_logical_size(&*tline, Lsn(lsn));

        // Truncate another block
        lsn += 0x10;
@@ -1376,7 +1357,7 @@ mod tests {
            tline.get_rel_size(TESTREL_A, Lsn(lsn))?,
            pg_constants::RELSEG_SIZE - 1
        );
-        assert_current_logical_size(&tline, Lsn(lsn));
+        assert_current_logical_size(&*tline, Lsn(lsn));

        // Truncate to 1500, and then truncate all the way down to 0, one block at a time
        // This tests the behavior at segment boundaries
@@ -1393,7 +1374,7 @@ mod tests {

            size -= 1;
        }
-        assert_current_logical_size(&tline, Lsn(lsn));
+        assert_current_logical_size(&*tline, Lsn(lsn));

        Ok(())
    }
--- a/pageserver/src/walreceiver.rs
+++ b/pageserver/src/walreceiver.rs
@@ -26,7 +26,6 @@ mod walreceiver_connection;
 use anyhow::{ensure, Context};
 use etcd_broker::Client;
 use itertools::Itertools;
-use once_cell::sync::Lazy;
 use std::cell::Cell;
 use std::collections::{hash_map, HashMap, HashSet};
 use std::future::Future;
@@ -36,14 +35,13 @@ use std::thread_local;
 use std::time::Duration;
 use tokio::{
    select,
-    sync::{mpsc, watch, RwLock},
+    sync::{mpsc, watch},
    task::JoinHandle,
 };
 use tracing::*;
 use url::Url;

 use crate::config::PageServerConf;
-use crate::http::models::WalReceiverEntry;
 use crate::tenant_mgr::{self, LocalTimelineUpdate, TenantState};
 use crate::thread_mgr::{self, ThreadKind};
 use utils::zid::{ZTenantId, ZTenantTimelineId, ZTimelineId};
@@ -55,23 +53,6 @@ thread_local! {
    pub(crate) static IS_WAL_RECEIVER: Cell<bool> = Cell::new(false);
 }

-/// WAL receiver state for sharing with the outside world.
-/// Only entries for timelines currently available in pageserver are stored.
-static WAL_RECEIVER_ENTRIES: Lazy<RwLock<HashMap<ZTenantTimelineId, WalReceiverEntry>>> =
-    Lazy::new(|| RwLock::new(HashMap::new()));
-
-/// Gets the public WAL streaming entry for a certain timeline.
-pub async fn get_wal_receiver_entry(
-    tenant_id: ZTenantId,
-    timeline_id: ZTimelineId,
-) -> Option<WalReceiverEntry> {
-    WAL_RECEIVER_ENTRIES
-        .read()
-        .await
-        .get(&ZTenantTimelineId::new(tenant_id, timeline_id))
-        .cloned()
-}
-
 /// Sets up the main WAL receiver thread that manages the rest of the subtasks inside of it, per timeline.
 /// See comments in [`wal_receiver_main_thread_loop_step`] for more details on per timeline activities.
 pub fn init_wal_receiver_main_thread(
@@ -85,7 +66,7 @@ pub fn init_wal_receiver_main_thread(
    );
    let broker_prefix = &conf.broker_etcd_prefix;
    info!(
-        "Starting wal receiver main thread, etdc endpoints: {}",
+        "Starting wal receiver main thread, etcd endpoints: {}",
        etcd_endpoints.iter().map(Url::to_string).join(", ")
    );

@@ -281,13 +262,10 @@ async fn wal_receiver_main_thread_loop_step<'a>(
                        }
                        None => warn!("Timeline {id} does not have a tenant entry in wal receiver main thread"),
                    };
-                    {
-                        WAL_RECEIVER_ENTRIES.write().await.remove(&id);
-                        if let Err(e) = join_confirmation_sender.send(()) {
-                            warn!("cannot send wal_receiver shutdown confirmation {e}")
-                        } else {
-                            info!("confirm walreceiver shutdown for {id}");
-                        }
+                    if let Err(e) = join_confirmation_sender.send(()) {
+                        warn!("cannot send wal_receiver shutdown confirmation {e}")
+                    } else {
+                        info!("confirm walreceiver shutdown for {id}");
                    }
                }
                // Timeline got attached, retrieve all necessary information to start its broker loop and maintain this loop endlessly.
@@ -322,17 +300,6 @@ async fn wal_receiver_main_thread_loop_step<'a>(
                            }
                        };

-                    {
-                        WAL_RECEIVER_ENTRIES.write().await.insert(
-                            id,
-                            WalReceiverEntry {
-                                wal_producer_connstr: None,
-                                last_received_msg_lsn: None,
-                                last_received_msg_ts: None,
-                            },
-                        );
-                    }
-
                    vacant_connection_manager_entry.insert(
                        connection_manager::spawn_connection_manager_task(
                            id,
--- a/pageserver/src/walreceiver/connection_manager.rs
+++ b/pageserver/src/walreceiver/connection_manager.rs
@@ -25,7 +25,12 @@ use etcd_broker::{
 use tokio::select;
 use tracing::*;

-use crate::DatadirTimelineImpl;
+use crate::{
+    exponential_backoff,
+    repository::{Repository, Timeline},
+    DEFAULT_BASE_BACKOFF_SECONDS, DEFAULT_MAX_BACKOFF_SECONDS,
+};
+use crate::{RepositoryImpl, TimelineImpl};
 use utils::{
    lsn::Lsn,
    pq_proto::ReplicationFeedback,
@@ -39,7 +44,7 @@ pub(super) fn spawn_connection_manager_task(
    id: ZTenantTimelineId,
    broker_loop_prefix: String,
    mut client: Client,
-    local_timeline: Arc<DatadirTimelineImpl>,
+    local_timeline: Arc<TimelineImpl>,
    wal_connect_timeout: Duration,
    lagging_wal_timeout: Duration,
    max_lsn_wal_lag: NonZeroU64,
@@ -167,7 +172,7 @@ async fn connection_manager_loop_step(
            walreceiver_state
                .change_connection(
                    new_candidate.safekeeper_id,
-                    new_candidate.wal_producer_connstr,
+                    new_candidate.wal_source_connstr,
                )
                .await
        }
@@ -229,23 +234,11 @@ async fn subscribe_for_timeline_updates(
    }
 }

-const DEFAULT_BASE_BACKOFF_SECONDS: f64 = 2.0;
-const DEFAULT_MAX_BACKOFF_SECONDS: f64 = 60.0;
-
-async fn exponential_backoff(n: u32, base: f64, max_seconds: f64) {
-    if n == 0 {
-        return;
-    }
-    let seconds_to_wait = base.powf(f64::from(n) - 1.0).min(max_seconds);
-    info!("Backoff: waiting {seconds_to_wait} seconds before proceeding with the task");
-    tokio::time::sleep(Duration::from_secs_f64(seconds_to_wait)).await;
-}
-
 /// All data that's needed to run endless broker loop and keep the WAL streaming connection alive, if possible.
 struct WalreceiverState {
    id: ZTenantTimelineId,
    /// Use pageserver data about the timeline to filter out some of the safekeepers.
-    local_timeline: Arc<DatadirTimelineImpl>,
+    local_timeline: Arc<TimelineImpl>,
    /// The timeout on the connection to safekeeper for WAL streaming.
    wal_connect_timeout: Duration,
    /// The timeout to use to determine when the current connection is "stale" and reconnect to the other one.
@@ -283,7 +276,7 @@ struct EtcdSkTimeline {
 impl WalreceiverState {
    fn new(
        id: ZTenantTimelineId,
-        local_timeline: Arc<DatadirTimelineImpl>,
+        local_timeline: Arc<<RepositoryImpl as Repository>::Timeline>,
        wal_connect_timeout: Duration,
        lagging_wal_timeout: Duration,
        max_lsn_wal_lag: NonZeroU64,
@@ -301,7 +294,7 @@ impl WalreceiverState {
    }

    /// Shuts down the current connection (if any) and immediately starts another one with the given connection string.
-    async fn change_connection(&mut self, new_sk_id: NodeId, new_wal_producer_connstr: String) {
+    async fn change_connection(&mut self, new_sk_id: NodeId, new_wal_source_connstr: String) {
        if let Some(old_connection) = self.wal_connection.take() {
            old_connection.connection_task.shutdown().await
        }
@@ -323,7 +316,7 @@ impl WalreceiverState {
                .await;
                super::walreceiver_connection::handle_walreceiver_connection(
                    id,
-                    &new_wal_producer_connstr,
+                    &new_wal_source_connstr,
                    events_sender.as_ref(),
                    cancellation,
                    connect_timeout,
@@ -386,7 +379,7 @@ impl WalreceiverState {
            Some(existing_wal_connection) => {
                let connected_sk_node = existing_wal_connection.sk_id;

-                let (new_sk_id, new_safekeeper_etcd_data, new_wal_producer_connstr) =
+                let (new_sk_id, new_safekeeper_etcd_data, new_wal_source_connstr) =
                    self.select_connection_candidate(Some(connected_sk_node))?;

                let now = Utc::now().naive_utc();
@@ -396,7 +389,7 @@ impl WalreceiverState {
                    if latest_interaciton > self.lagging_wal_timeout {
                        return Some(NewWalConnectionCandidate {
                            safekeeper_id: new_sk_id,
-                            wal_producer_connstr: new_wal_producer_connstr,
+                            wal_source_connstr: new_wal_source_connstr,
                            reason: ReconnectReason::NoWalTimeout {
                                last_wal_interaction: Some(
                                    existing_wal_connection.latest_connection_update,
@@ -422,7 +415,7 @@ impl WalreceiverState {
                                        return Some(
                                            NewWalConnectionCandidate {
                                                safekeeper_id: new_sk_id,
-                                                wal_producer_connstr: new_wal_producer_connstr,
+                                                wal_source_connstr: new_wal_source_connstr,
                                                reason: ReconnectReason::LaggingWal { current_lsn, new_lsn, threshold: self.max_lsn_wal_lag },
                                            });
                                    }
@@ -433,18 +426,18 @@ impl WalreceiverState {
                    None => {
                        return Some(NewWalConnectionCandidate {
                            safekeeper_id: new_sk_id,
-                            wal_producer_connstr: new_wal_producer_connstr,
+                            wal_source_connstr: new_wal_source_connstr,
                            reason: ReconnectReason::NoEtcdDataForExistingConnection,
                        })
                    }
                }
            }
            None => {
-                let (new_sk_id, _, new_wal_producer_connstr) =
+                let (new_sk_id, _, new_wal_source_connstr) =
                    self.select_connection_candidate(None)?;
                return Some(NewWalConnectionCandidate {
                    safekeeper_id: new_sk_id,
-                    wal_producer_connstr: new_wal_producer_connstr,
+                    wal_source_connstr: new_wal_source_connstr,
                    reason: ReconnectReason::NoExistingConnection,
                });
            }
@@ -545,7 +538,7 @@ impl WalreceiverState {
 #[derive(Debug, PartialEq, Eq)]
 struct NewWalConnectionCandidate {
    safekeeper_id: NodeId,
-    wal_producer_connstr: String,
+    wal_source_connstr: String,
    reason: ReconnectReason,
 }

@@ -802,7 +795,7 @@ mod tests {
            "Should select new safekeeper due to missing connection, even if there's also a lag in the wal over the threshold"
        );
        assert!(only_candidate
-            .wal_producer_connstr
+            .wal_source_connstr
            .contains(DUMMY_SAFEKEEPER_CONNSTR));

        let selected_lsn = 100_000;
@@ -867,7 +860,7 @@ mod tests {
            "Should select new safekeeper due to missing connection, even if there's also a lag in the wal over the threshold"
        );
        assert!(biggest_wal_candidate
-            .wal_producer_connstr
+            .wal_source_connstr
            .contains(DUMMY_SAFEKEEPER_CONNSTR));

        Ok(())
@@ -984,7 +977,7 @@ mod tests {
            "Should select new safekeeper due to missing etcd data, even if there's an existing connection with this safekeeper"
        );
        assert!(only_candidate
-            .wal_producer_connstr
+            .wal_source_connstr
            .contains(DUMMY_SAFEKEEPER_CONNSTR));

        Ok(())
@@ -1066,7 +1059,7 @@ mod tests {
            "Should select bigger WAL safekeeper if it starts to lag enough"
        );
        assert!(over_threshcurrent_candidate
-            .wal_producer_connstr
+            .wal_source_connstr
            .contains("advanced by Lsn safekeeper"));

        Ok(())
@@ -1133,7 +1126,7 @@ mod tests {
            unexpected => panic!("Unexpected reason: {unexpected:?}"),
        }
        assert!(over_threshcurrent_candidate
-            .wal_producer_connstr
+            .wal_source_connstr
            .contains(DUMMY_SAFEKEEPER_CONNSTR));

        Ok(())
@@ -1189,7 +1182,7 @@ mod tests {
            unexpected => panic!("Unexpected reason: {unexpected:?}"),
        }
        assert!(over_threshcurrent_candidate
-            .wal_producer_connstr
+            .wal_source_connstr
            .contains(DUMMY_SAFEKEEPER_CONNSTR));

        Ok(())
@@ -1203,13 +1196,10 @@ mod tests {
                tenant_id: harness.tenant_id,
                timeline_id: TIMELINE_ID,
            },
-            local_timeline: Arc::new(DatadirTimelineImpl::new(
-                harness
-                    .load()
-                    .create_empty_timeline(TIMELINE_ID, Lsn(0))
-                    .expect("Failed to create an empty timeline for dummy wal connection manager"),
-                10_000,
-            )),
+            local_timeline: harness
+                .load()
+                .create_empty_timeline(TIMELINE_ID, Lsn(0))
+                .expect("Failed to create an empty timeline for dummy wal connection manager"),
            wal_connect_timeout: Duration::from_secs(1),
            lagging_wal_timeout: Duration::from_secs(1),
            max_lsn_wal_lag: NonZeroU64::new(1).unwrap(),
--- a/pageserver/src/walreceiver/walreceiver_connection.rs
+++ b/pageserver/src/walreceiver/walreceiver_connection.rs
@@ -9,36 +9,38 @@ use std::{
 use anyhow::{bail, ensure, Context};
 use bytes::BytesMut;
 use fail::fail_point;
+use futures::StreamExt;
 use postgres::{SimpleQueryMessage, SimpleQueryRow};
 use postgres_protocol::message::backend::ReplicationMessage;
 use postgres_types::PgLsn;
 use tokio::{pin, select, sync::watch, time};
 use tokio_postgres::{replication::ReplicationStream, Client};
-use tokio_stream::StreamExt;
 use tracing::{debug, error, info, info_span, trace, warn, Instrument};

 use super::TaskEvent;
 use crate::{
-    http::models::WalReceiverEntry,
+    layered_repository::WalReceiverInfo,
+    pgdatadir_mapping::DatadirTimeline,
    repository::{Repository, Timeline},
    tenant_mgr,
    walingest::WalIngest,
+    walrecord::DecodedWALRecord,
 };
 use postgres_ffi::waldecoder::WalStreamDecoder;
 use utils::{lsn::Lsn, pq_proto::ReplicationFeedback, zid::ZTenantTimelineId};

-/// Opens a conneciton to the given wal producer and streams the WAL, sending progress messages during streaming.
+/// Open a connection to the given safekeeper and receive WAL, sending back progress
+/// messages as we go.
 pub async fn handle_walreceiver_connection(
    id: ZTenantTimelineId,
-    wal_producer_connstr: &str,
+    wal_source_connstr: &str,
    events_sender: &watch::Sender<TaskEvent<ReplicationFeedback>>,
    mut cancellation: watch::Receiver<()>,
    connect_timeout: Duration,
 ) -> anyhow::Result<()> {
    // Connect to the database in replication mode.
-    info!("connecting to {wal_producer_connstr}");
-    let connect_cfg =
-        format!("{wal_producer_connstr} application_name=pageserver replication=true");
+    info!("connecting to {wal_source_connstr}");
+    let connect_cfg = format!("{wal_source_connstr} application_name=pageserver replication=true");

    let (mut replication_client, connection) = time::timeout(
        connect_timeout,
@@ -150,19 +152,25 @@ pub async fn handle_walreceiver_connection(

                waldecoder.feed_bytes(data);

-                while let Some((lsn, recdata)) = waldecoder.poll_decode()? {
-                    let _enter = info_span!("processing record", lsn = %lsn).entered();
+                {
+                    let mut decoded = DecodedWALRecord::default();
+                    let mut modification = timeline.begin_modification(endlsn);
+                    while let Some((lsn, recdata)) = waldecoder.poll_decode()? {
+                        // let _enter = info_span!("processing record", lsn = %lsn).entered();

-                    // It is important to deal with the aligned records as lsn in getPage@LSN is
-                    // aligned and can be several bytes bigger. Without this alignment we are
-                    // at risk of hitting a deadlock.
-                    ensure!(lsn.is_aligned());
+                        // It is important to deal with the aligned records as lsn in getPage@LSN is
+                        // aligned and can be several bytes bigger. Without this alignment we are
+                        // at risk of hitting a deadlock.
+                        ensure!(lsn.is_aligned());

-                    walingest.ingest_record(&timeline, recdata, lsn)?;
+                        walingest
+                            .ingest_record(recdata, lsn, &mut modification, &mut decoded)
+                            .context("could not ingest record at {lsn}")?;

-                    fail_point!("walreceiver-after-ingest");
+                        fail_point!("walreceiver-after-ingest");

-                    last_rec_lsn = lsn;
+                        last_rec_lsn = lsn;
+                    }
                }

                if !caught_up && endlsn >= end_of_wal {
@@ -170,16 +178,6 @@ pub async fn handle_walreceiver_connection(
                    caught_up = true;
                }

-                let timeline_to_check = Arc::clone(&timeline.tline);
-                tokio::task::spawn_blocking(move || timeline_to_check.check_checkpoint_distance())
-                    .await
-                    .with_context(|| {
-                        format!("Spawned checkpoint check task panicked for timeline {id}")
-                    })?
-                    .with_context(|| {
-                        format!("Failed to check checkpoint distance for timeline {id}")
-                    })?;
-
                Some(endlsn)
            }

@@ -200,6 +198,12 @@ pub async fn handle_walreceiver_connection(
            _ => None,
        };

+        let timeline_to_check = Arc::clone(&timeline);
+        tokio::task::spawn_blocking(move || timeline_to_check.check_checkpoint_distance())
+            .await
+            .with_context(|| format!("Spawned checkpoint check task panicked for timeline {id}"))?
+            .with_context(|| format!("Failed to check checkpoint distance for timeline {id}"))?;
+
        if let Some(last_lsn) = status_update {
            let remote_index = repo.get_remote_index();
            let timeline_remote_consistent_lsn = remote_index
@@ -218,27 +222,22 @@ pub async fn handle_walreceiver_connection(
            // The last LSN we processed. It is not guaranteed to survive pageserver crash.
            let write_lsn = u64::from(last_lsn);
            // `disk_consistent_lsn` is the LSN at which page server guarantees local persistence of all received data
-            let flush_lsn = u64::from(timeline.tline.get_disk_consistent_lsn());
+            let flush_lsn = u64::from(timeline.get_disk_consistent_lsn());
            // The last LSN that is synced to remote storage and is guaranteed to survive pageserver crash
            // Used by safekeepers to remove WAL preceding `remote_consistent_lsn`.
            let apply_lsn = u64::from(timeline_remote_consistent_lsn);
            let ts = SystemTime::now();

-            // Update the current WAL receiver's data stored inside the global hash table `WAL_RECEIVERS`
-            {
-                super::WAL_RECEIVER_ENTRIES.write().await.insert(
-                    id,
-                    WalReceiverEntry {
-                        wal_producer_connstr: Some(wal_producer_connstr.to_owned()),
-                        last_received_msg_lsn: Some(last_lsn),
-                        last_received_msg_ts: Some(
-                            ts.duration_since(SystemTime::UNIX_EPOCH)
-                                .expect("Received message time should be before UNIX EPOCH!")
-                                .as_micros(),
-                        ),
-                    },
-                );
-            }
+            // Update the status about what we just received. This is shown in the mgmt API.
+            let last_received_wal = WalReceiverInfo {
+                wal_source_connstr: wal_source_connstr.to_owned(),
+                last_received_msg_lsn: last_lsn,
+                last_received_msg_ts: ts
+                    .duration_since(SystemTime::UNIX_EPOCH)
+                    .expect("Received message time should be before UNIX EPOCH!")
+                    .as_micros(),
+            };
+            *timeline.last_received_wal.lock().unwrap() = Some(last_received_wal);

            // Send zenith feedback message.
            // Regular standby_status_update fields are put into this message.
--- a/pageserver/src/walrecord.rs
+++ b/pageserver/src/walrecord.rs
@@ -96,6 +96,7 @@ impl DecodedBkpBlock {
    }
 }

+#[derive(Default)]
 pub struct DecodedWALRecord {
    pub xl_xid: TransactionId,
    pub xl_info: u8,
@@ -505,7 +506,17 @@ impl XlMultiXactTruncate {
 //      block data
 //      ...
 //      main data
-pub fn decode_wal_record(record: Bytes) -> Result<DecodedWALRecord, DeserializeError> {
+//
+//
+// For performance reasons, the caller provides the DecodedWALRecord struct and the function just fills it in.
+// It would be more natural for this function to return a DecodedWALRecord as return value,
+// but reusing the caller-supplied struct avoids an allocation.
+// This code is in the hot path for digesting incoming WAL, and is very performance sensitive.
+//
+pub fn decode_wal_record(
+    record: Bytes,
+    decoded: &mut DecodedWALRecord,
+) -> Result<(), DeserializeError> {
    let mut rnode_spcnode: u32 = 0;
    let mut rnode_dbnode: u32 = 0;
    let mut rnode_relnode: u32 = 0;
@@ -534,7 +545,7 @@ pub fn decode_wal_record(record: Bytes) -> Result<DecodedWALRecord, DeserializeE
    let mut blocks_total_len: u32 = 0;
    let mut main_data_len = 0;
    let mut datatotal: u32 = 0;
-    let mut blocks: Vec<DecodedBkpBlock> = Vec::new();
+    decoded.blocks.clear();

    // 2. Decode the headers.
    // XLogRecordBlockHeaders if any,
@@ -713,7 +724,7 @@ pub fn decode_wal_record(record: Bytes) -> Result<DecodedWALRecord, DeserializeE
                    blk.blkno
                );

-                blocks.push(blk);
+                decoded.blocks.push(blk);
            }

            _ => {
@@ -724,7 +735,7 @@ pub fn decode_wal_record(record: Bytes) -> Result<DecodedWALRecord, DeserializeE

    // 3. Decode blocks.
    let mut ptr = record.len() - buf.remaining();
-    for blk in blocks.iter_mut() {
+    for blk in decoded.blocks.iter_mut() {
        if blk.has_image {
            blk.bimg_offset = ptr as u32;
            ptr += blk.bimg_len as usize;
@@ -744,14 +755,13 @@ pub fn decode_wal_record(record: Bytes) -> Result<DecodedWALRecord, DeserializeE
        assert_eq!(buf.remaining(), main_data_len as usize);
    }

-    Ok(DecodedWALRecord {
-        xl_xid: xlogrec.xl_xid,
-        xl_info: xlogrec.xl_info,
-        xl_rmid: xlogrec.xl_rmid,
-        record,
-        blocks,
-        main_data_offset,
-    })
+    decoded.xl_xid = xlogrec.xl_xid;
+    decoded.xl_info = xlogrec.xl_info;
+    decoded.xl_rmid = xlogrec.xl_rmid;
+    decoded.record = record;
+    decoded.main_data_offset = main_data_offset;
+
+    Ok(())
 }

 ///
--- a/pageserver/src/walredo.rs
+++ b/pageserver/src/walredo.rs
@@ -20,8 +20,8 @@
 //!
 use byteorder::{ByteOrder, LittleEndian};
 use bytes::{BufMut, Bytes, BytesMut};
-use lazy_static::lazy_static;
 use nix::poll::*;
+use once_cell::sync::Lazy;
 use serde::Serialize;
 use std::fs;
 use std::fs::OpenOptions;
@@ -105,21 +105,27 @@ impl crate::walredo::WalRedoManager for DummyRedoManager {
 // We collect the time spent in actual WAL redo ('redo'), and time waiting
 // for access to the postgres process ('wait') since there is only one for
 // each tenant.
-lazy_static! {
-    static ref WAL_REDO_TIME: Histogram =
-        register_histogram!("pageserver_wal_redo_seconds", "Time spent on WAL redo")
-            .expect("failed to define a metric");
-    static ref WAL_REDO_WAIT_TIME: Histogram = register_histogram!(
+
+static WAL_REDO_TIME: Lazy<Histogram> = Lazy::new(|| {
+    register_histogram!("pageserver_wal_redo_seconds", "Time spent on WAL redo")
+        .expect("failed to define a metric")
+});
+
+static WAL_REDO_WAIT_TIME: Lazy<Histogram> = Lazy::new(|| {
+    register_histogram!(
        "pageserver_wal_redo_wait_seconds",
        "Time spent waiting for access to the WAL redo process"
    )
-    .expect("failed to define a metric");
-    static ref WAL_REDO_RECORD_COUNTER: IntCounter = register_int_counter!(
+    .expect("failed to define a metric")
+});
+
+static WAL_REDO_RECORD_COUNTER: Lazy<IntCounter> = Lazy::new(|| {
+    register_int_counter!(
        "pageserver_replayed_wal_records_total",
        "Number of WAL records replayed in WAL redo process"
    )
-    .unwrap();
-}
+    .unwrap()
+});

 ///
 /// This is the real implementation that uses a Postgres process to
--- a/poetry.lock
+++ b/poetry.lock
--- a/proxy/Cargo.toml
+++ b/proxy/Cargo.toml
@@ -14,7 +14,7 @@ hashbrown = "0.11.2"
 hex = "0.4.3"
 hmac = "0.12.1"
 hyper = "0.14"
-lazy_static = "1.4.0"
+once_cell = "1.13.0"
 md5 = "0.7.0"
 parking_lot = "0.12"
 pin-project-lite = "0.2.7"
--- a/proxy/src/auth.rs
+++ b/proxy/src/auth.rs
@@ -1,11 +1,14 @@
 //! Client authentication mechanisms.

 pub mod backend;
-pub use backend::DatabaseInfo;
+pub use backend::{BackendType, DatabaseInfo};

 mod credentials;
 pub use credentials::ClientCredentials;

+mod password_hack;
+use password_hack::PasswordHackPayload;
+
 mod flow;
 pub use flow::*;

@@ -29,9 +32,8 @@ pub enum AuthErrorImpl {
    #[error(transparent)]
    Sasl(#[from] crate::sasl::Error),

-    /// For passwords that couldn't be processed by [`backend::legacy_console::parse_password`].
-    #[error("Malformed password message")]
-    MalformedPassword,
+    #[error("Malformed password message: {0}")]
+    MalformedPassword(&'static str),

    /// Errors produced by [`crate::stream::PqStream`].
    #[error(transparent)]
@@ -76,7 +78,7 @@ impl UserFacingError for AuthError {
            Console(e) => e.to_string_client(),
            GetAuthInfo(e) => e.to_string_client(),
            Sasl(e) => e.to_string_client(),
-            MalformedPassword => self.to_string(),
+            MalformedPassword(_) => self.to_string(),
            _ => "Internal error".to_string(),
        }
    }
--- a/proxy/src/auth/backend.rs
+++ b/proxy/src/auth/backend.rs
@@ -1,26 +1,23 @@
-mod legacy_console;
 mod link;
 mod postgres;

 pub mod console;

+mod legacy_console;
 pub use legacy_console::{AuthError, AuthErrorImpl};

-use super::ClientCredentials;
 use crate::{
-    compute,
-    config::{AuthBackendType, ProxyConfig},
-    mgmt,
+    auth::{self, AuthFlow, ClientCredentials},
+    compute, config, mgmt,
    stream::PqStream,
    waiters::{self, Waiter, Waiters},
 };
-use lazy_static::lazy_static;
+
+use once_cell::sync::Lazy;
 use serde::{Deserialize, Serialize};
 use tokio::io::{AsyncRead, AsyncWrite};

-lazy_static! {
-    static ref CPLANE_WAITERS: Waiters<mgmt::ComputeReady> = Default::default();
-}
+static CPLANE_WAITERS: Lazy<Waiters<mgmt::ComputeReady>> = Lazy::new(Default::default);

 /// Give caller an opportunity to wait for the cloud's reply.
 pub async fn with_waiter<R, T, E>(
@@ -78,32 +75,158 @@ impl From<DatabaseInfo> for tokio_postgres::Config {
    }
 }

-pub(super) async fn handle_user(
-    config: &ProxyConfig,
-    client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin + Send>,
-    creds: ClientCredentials,
-) -> super::Result<compute::NodeInfo> {
-    use AuthBackendType::*;
-    match config.auth_backend {
-        LegacyConsole => {
-            legacy_console::handle_user(
-                &config.auth_endpoint,
-                &config.auth_link_uri,
-                client,
-                &creds,
-            )
-            .await
+/// This type serves two purposes:
+///
+/// * When `T` is `()`, it's just a regular auth backend selector
+///   which we use in [`crate::config::ProxyConfig`].
+///
+/// * However, when we substitute `T` with [`ClientCredentials`],
+///   this helps us provide the credentials only to those auth
+///   backends which require them for the authentication process.
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub enum BackendType<T> {
+    /// Legacy Cloud API (V1) + link auth.
+    LegacyConsole(T),
+    /// Current Cloud API (V2).
+    Console(T),
+    /// Local mock of Cloud API (V2).
+    Postgres(T),
+    /// Authentication via a web browser.
+    Link,
+}
+
+impl<T> BackendType<T> {
+    /// Very similar to [`std::option::Option::map`].
+    /// Maps [`BackendType<T>`] to [`BackendType<R>`] by applying
+    /// a function to a contained value.
+    pub fn map<R>(self, f: impl FnOnce(T) -> R) -> BackendType<R> {
+        use BackendType::*;
+        match self {
+            LegacyConsole(x) => LegacyConsole(f(x)),
+            Console(x) => Console(f(x)),
+            Postgres(x) => Postgres(f(x)),
+            Link => Link,
+        }
+    }
+}
+
+impl<T, E> BackendType<Result<T, E>> {
+    /// Very similar to [`std::option::Option::transpose`].
+    /// This is most useful for error handling.
+    pub fn transpose(self) -> Result<BackendType<T>, E> {
+        use BackendType::*;
+        match self {
+            LegacyConsole(x) => x.map(LegacyConsole),
+            Console(x) => x.map(Console),
+            Postgres(x) => x.map(Postgres),
+            Link => Ok(Link),
+        }
+    }
+}
+
+impl BackendType<ClientCredentials> {
+    /// Authenticate the client via the requested backend, possibly using credentials.
+    pub async fn authenticate(
+        mut self,
+        urls: &config::AuthUrls,
+        client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin + Send>,
+    ) -> super::Result<compute::NodeInfo> {
+        use BackendType::*;
+
+        if let Console(creds) | Postgres(creds) = &mut self {
+            // If there's no project so far, that entails that client doesn't
+            // support SNI or other means of passing the project name.
+            // We now expect to see a very specific payload in the place of password.
+            if creds.project().is_none() {
+                let payload = AuthFlow::new(client)
+                    .begin(auth::PasswordHack)
+                    .await?
+                    .authenticate()
+                    .await?;
+
+                // Finally we may finish the initialization of `creds`.
+                // TODO: add missing type safety to ClientCredentials.
+                creds.project = Some(payload.project);
+
+                let mut config = match &self {
+                    Console(creds) => {
+                        console::Api::new(&urls.auth_endpoint, creds)
+                            .wake_compute()
+                            .await?
+                    }
+                    Postgres(creds) => {
+                        postgres::Api::new(&urls.auth_endpoint, creds)
+                            .wake_compute()
+                            .await?
+                    }
+                    _ => unreachable!("see the patterns above"),
+                };
+
+                // We should use a password from payload as well.
+                config.password(payload.password);
+
+                return Ok(compute::NodeInfo {
+                    reported_auth_ok: false,
+                    config,
+                });
+            }
+        }
+
+        match self {
+            LegacyConsole(creds) => {
+                legacy_console::handle_user(
+                    &urls.auth_endpoint,
+                    &urls.auth_link_uri,
+                    &creds,
+                    client,
+                )
+                .await
+            }
+            Console(creds) => {
+                console::Api::new(&urls.auth_endpoint, &creds)
+                    .handle_user(client)
+                    .await
+            }
+            Postgres(creds) => {
+                postgres::Api::new(&urls.auth_endpoint, &creds)
+                    .handle_user(client)
+                    .await
+            }
+            // NOTE: this auth backend doesn't use client credentials.
+            Link => link::handle_user(&urls.auth_link_uri, client).await,
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_backend_type_map() {
+        let values = [
+            BackendType::LegacyConsole(0),
+            BackendType::Console(0),
+            BackendType::Postgres(0),
+            BackendType::Link,
+        ];
+
+        for value in values {
+            assert_eq!(value.map(|x| x), value);
+        }
+    }
+
+    #[test]
+    fn test_backend_type_transpose() {
+        let values = [
+            BackendType::LegacyConsole(Ok::<_, ()>(0)),
+            BackendType::Console(Ok(0)),
+            BackendType::Postgres(Ok(0)),
+            BackendType::Link,
+        ];
+
+        for value in values {
+            assert_eq!(value.map(Result::unwrap), value.transpose().unwrap());
        }
-        Console => {
-            console::Api::new(&config.auth_endpoint, &creds)?
-                .handle_user(client)
-                .await
-        }
-        Postgres => {
-            postgres::Api::new(&config.auth_endpoint, &creds)?
-                .handle_user(client)
-                .await
-        }
-        Link => link::handle_user(&config.auth_link_uri, client).await,
    }
 }
--- a/proxy/src/auth/backend/console.rs
+++ b/proxy/src/auth/backend/console.rs
@@ -1,18 +1,17 @@
 //! Cloud API V2.

 use crate::{
-    auth::{self, AuthFlow, ClientCredentials, DatabaseInfo},
-    compute,
-    error::UserFacingError,
+    auth::{self, AuthFlow, ClientCredentials},
+    compute::{self, ComputeConnCfg},
+    error::{io_error, UserFacingError},
    scram,
    stream::PqStream,
    url::ApiUrl,
 };
 use serde::{Deserialize, Serialize};
-use std::{future::Future, io};
+use std::future::Future;
 use thiserror::Error;
 use tokio::io::{AsyncRead, AsyncWrite};
-use utils::pq_proto::{BeMessage as Be, BeParameterStatusMessage};

 pub type Result<T> = std::result::Result<T, ConsoleAuthError>;

@@ -84,8 +83,8 @@ pub(super) struct Api<'a> {

 impl<'a> Api<'a> {
    /// Construct an API object containing the auth parameters.
-    pub(super) fn new(endpoint: &'a ApiUrl, creds: &'a ClientCredentials) -> Result<Self> {
-        Ok(Self { endpoint, creds })
+    pub(super) fn new(endpoint: &'a ApiUrl, creds: &'a ClientCredentials) -> Self {
+        Self { endpoint, creds }
    }

    /// Authenticate the existing user or throw an error.
@@ -100,7 +99,7 @@ impl<'a> Api<'a> {
        let mut url = self.endpoint.clone();
        url.path_segments_mut().push("proxy_get_role_secret");
        url.query_pairs_mut()
-            .append_pair("project", self.creds.project_name.as_ref()?)
+            .append_pair("project", self.creds.project().expect("impossible"))
            .append_pair("role", &self.creds.user);

        // TODO: use a proper logger
@@ -120,11 +119,11 @@ impl<'a> Api<'a> {
    }

    /// Wake up the compute node and return the corresponding connection info.
-    async fn wake_compute(&self) -> Result<DatabaseInfo> {
+    pub(super) async fn wake_compute(&self) -> Result<ComputeConnCfg> {
        let mut url = self.endpoint.clone();
        url.path_segments_mut().push("proxy_wake_compute");
-        let project_name = self.creds.project_name.as_ref()?;
-        url.query_pairs_mut().append_pair("project", project_name);
+        url.query_pairs_mut()
+            .append_pair("project", self.creds.project().expect("impossible"));

        // TODO: use a proper logger
        println!("cplane request: {url}");
@@ -137,16 +136,20 @@ impl<'a> Api<'a> {
        let response: GetWakeComputeResponse =
            serde_json::from_str(&resp.text().await.map_err(io_error)?)?;

-        let (host, port) = parse_host_port(&response.address)
-            .ok_or(ConsoleAuthError::BadComputeAddress(response.address))?;
+        // Unfortunately, ownership won't let us use `Option::ok_or` here.
+        let (host, port) = match parse_host_port(&response.address) {
+            None => return Err(ConsoleAuthError::BadComputeAddress(response.address)),
+            Some(x) => x,
+        };

-        Ok(DatabaseInfo {
-            host,
-            port,
-            dbname: self.creds.dbname.to_owned(),
-            user: self.creds.user.to_owned(),
-            password: None,
-        })
+        let mut config = ComputeConnCfg::new();
+        config
+            .host(host)
+            .port(port)
+            .dbname(&self.creds.dbname)
+            .user(&self.creds.user);
+
+        Ok(config)
    }
 }

@@ -160,7 +163,7 @@ pub(super) async fn handle_user<'a, Endpoint, GetAuthInfo, WakeCompute>(
 ) -> auth::Result<compute::NodeInfo>
 where
    GetAuthInfo: Future<Output = Result<AuthInfo>>,
-    WakeCompute: Future<Output = Result<DatabaseInfo>>,
+    WakeCompute: Future<Output = Result<ComputeConnCfg>>,
 {
    let auth_info = get_auth_info(endpoint).await?;

@@ -179,48 +182,18 @@ where
        }
    };

-    client
-        .write_message_noflush(&Be::AuthenticationOk)?
-        .write_message_noflush(&BeParameterStatusMessage::encoding())?;
+    let mut config = wake_compute(endpoint).await?;
+    if let Some(keys) = scram_keys {
+        config.auth_keys(tokio_postgres::config::AuthKeys::ScramSha256(keys));
+    }

    Ok(compute::NodeInfo {
-        db_info: wake_compute(endpoint).await?,
-        scram_keys,
+        reported_auth_ok: false,
+        config,
    })
 }

-/// Upcast (almost) any error into an opaque [`io::Error`].
-pub(super) fn io_error(e: impl Into<Box<dyn std::error::Error + Send + Sync>>) -> io::Error {
-    io::Error::new(io::ErrorKind::Other, e)
-}
-
-fn parse_host_port(input: &str) -> Option<(String, u16)> {
+fn parse_host_port(input: &str) -> Option<(&str, u16)> {
    let (host, port) = input.split_once(':')?;
-    Some((host.to_owned(), port.parse().ok()?))
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use serde_json::json;
-
-    #[test]
-    fn parse_db_info() -> anyhow::Result<()> {
-        let _: DatabaseInfo = serde_json::from_value(json!({
-            "host": "localhost",
-            "port": 5432,
-            "dbname": "postgres",
-            "user": "john_doe",
-            "password": "password",
-        }))?;
-
-        let _: DatabaseInfo = serde_json::from_value(json!({
-            "host": "localhost",
-            "port": 5432,
-            "dbname": "postgres",
-            "user": "john_doe",
-        }))?;
-
-        Ok(())
-    }
+    Some((host, port.parse().ok()?))
 }
--- a/proxy/src/auth/backend/legacy_console.rs
+++ b/proxy/src/auth/backend/legacy_console.rs
@@ -11,7 +11,7 @@ use crate::{
 use serde::{Deserialize, Serialize};
 use thiserror::Error;
 use tokio::io::{AsyncRead, AsyncWrite};
-use utils::pq_proto::{BeMessage as Be, BeParameterStatusMessage};
+use utils::pq_proto::BeMessage as Be;

 #[derive(Debug, Error)]
 pub enum AuthErrorImpl {
@@ -76,6 +76,12 @@ enum ProxyAuthResponse {
    NotReady { ready: bool }, // TODO: get rid of `ready`
 }

+impl ClientCredentials {
+    fn is_existing_user(&self) -> bool {
+        self.user.ends_with("@zenith")
+    }
+}
+
 async fn authenticate_proxy_client(
    auth_endpoint: &reqwest::Url,
    creds: &ClientCredentials,
@@ -100,7 +106,7 @@ async fn authenticate_proxy_client(
        }

        let auth_info: ProxyAuthResponse = serde_json::from_str(resp.text().await?.as_str())?;
-        println!("got auth info: #{:?}", auth_info);
+        println!("got auth info: {:?}", auth_info);

        use ProxyAuthResponse::*;
        let db_info = match auth_info {
@@ -128,7 +134,9 @@ async fn handle_existing_user(

    // Read client's password hash
    let msg = client.read_password_message().await?;
-    let md5_response = parse_password(&msg).ok_or(auth::AuthErrorImpl::MalformedPassword)?;
+    let md5_response = parse_password(&msg).ok_or(auth::AuthErrorImpl::MalformedPassword(
+        "the password should be a valid null-terminated utf-8 string",
+    ))?;

    let db_info = authenticate_proxy_client(
        auth_endpoint,
@@ -139,21 +147,17 @@ async fn handle_existing_user(
    )
    .await?;

-    client
-        .write_message_noflush(&Be::AuthenticationOk)?
-        .write_message_noflush(&BeParameterStatusMessage::encoding())?;
-
    Ok(compute::NodeInfo {
-        db_info,
-        scram_keys: None,
+        reported_auth_ok: false,
+        config: db_info.into(),
    })
 }

 pub async fn handle_user(
    auth_endpoint: &reqwest::Url,
    auth_link_uri: &reqwest::Url,
-    client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin + Send>,
    creds: &ClientCredentials,
+    client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin + Send>,
 ) -> auth::Result<compute::NodeInfo> {
    if creds.is_existing_user() {
        handle_existing_user(auth_endpoint, client, creds).await
@@ -201,4 +205,24 @@ mod tests {
        .unwrap();
        assert!(matches!(auth, ProxyAuthResponse::NotReady { .. }));
    }
+
+    #[test]
+    fn parse_db_info() -> anyhow::Result<()> {
+        let _: DatabaseInfo = serde_json::from_value(json!({
+            "host": "localhost",
+            "port": 5432,
+            "dbname": "postgres",
+            "user": "john_doe",
+            "password": "password",
+        }))?;
+
+        let _: DatabaseInfo = serde_json::from_value(json!({
+            "host": "localhost",
+            "port": 5432,
+            "dbname": "postgres",
+            "user": "john_doe",
+        }))?;
+
+        Ok(())
+    }
 }
--- a/proxy/src/auth/backend/link.rs
+++ b/proxy/src/auth/backend/link.rs
@@ -41,7 +41,7 @@ pub async fn handle_user(
    client.write_message_noflush(&Be::NoticeResponse("Connecting to database."))?;

    Ok(compute::NodeInfo {
-        db_info,
-        scram_keys: None,
+        reported_auth_ok: true,
+        config: db_info.into(),
    })
 }
--- a/proxy/src/auth/backend/postgres.rs
+++ b/proxy/src/auth/backend/postgres.rs
@@ -3,10 +3,12 @@
 use crate::{
    auth::{
        self,
-        backend::console::{self, io_error, AuthInfo, Result},
-        ClientCredentials, DatabaseInfo,
+        backend::console::{self, AuthInfo, Result},
+        ClientCredentials,
    },
-    compute, scram,
+    compute::{self, ComputeConnCfg},
+    error::io_error,
+    scram,
    stream::PqStream,
    url::ApiUrl,
 };
@@ -20,8 +22,8 @@ pub(super) struct Api<'a> {

 impl<'a> Api<'a> {
    /// Construct an API object containing the auth parameters.
-    pub(super) fn new(endpoint: &'a ApiUrl, creds: &'a ClientCredentials) -> Result<Self> {
-        Ok(Self { endpoint, creds })
+    pub(super) fn new(endpoint: &'a ApiUrl, creds: &'a ClientCredentials) -> Self {
+        Self { endpoint, creds }
    }

    /// Authenticate the existing user or throw an error.
@@ -56,7 +58,10 @@ impl<'a> Api<'a> {

            // We shouldn't get more than one row anyway.
            [row, ..] => {
-                let entry = row.try_get(0).map_err(io_error)?;
+                let entry = row
+                    .try_get("rolpassword")
+                    .map_err(|e| io_error(format!("failed to read user's password: {e}")))?;
+
                scram::ServerSecret::parse(entry)
                    .map(AuthInfo::Scram)
                    .or_else(|| {
@@ -75,14 +80,14 @@ impl<'a> Api<'a> {
    }

    /// We don't need to wake anything locally, so we just return the connection info.
-    async fn wake_compute(&self) -> Result<DatabaseInfo> {
-        Ok(DatabaseInfo {
-            // TODO: handle that near CLI params parsing
-            host: self.endpoint.host_str().unwrap_or("localhost").to_owned(),
-            port: self.endpoint.port().unwrap_or(5432),
-            dbname: self.creds.dbname.to_owned(),
-            user: self.creds.user.to_owned(),
-            password: None,
-        })
+    pub(super) async fn wake_compute(&self) -> Result<ComputeConnCfg> {
+        let mut config = ComputeConnCfg::new();
+        config
+            .host(self.endpoint.host_str().unwrap_or("localhost"))
+            .port(self.endpoint.port().unwrap_or(5432))
+            .dbname(&self.creds.dbname)
+            .user(&self.creds.user);
+
+        Ok(config)
    }
 }
--- a/proxy/src/auth/credentials.rs
+++ b/proxy/src/auth/credentials.rs
@@ -1,39 +1,25 @@
 //! User credentials used in authentication.

-use crate::compute;
-use crate::config::ProxyConfig;
 use crate::error::UserFacingError;
-use crate::stream::PqStream;
-use std::collections::HashMap;
 use thiserror::Error;
-use tokio::io::{AsyncRead, AsyncWrite};
+use utils::pq_proto::StartupMessageParams;

 #[derive(Debug, Error, PartialEq, Eq, Clone)]
 pub enum ClientCredsParseError {
-    #[error("Parameter `{0}` is missing in startup packet.")]
+    #[error("Parameter '{0}' is missing in startup packet.")]
    MissingKey(&'static str),

-    #[error(
-        "Project name is not specified. \
-        EITHER please upgrade the postgres client library (libpq) for SNI support \
-        OR pass the project name as a parameter: '&options=project%3D<project-name>'."
-    )]
-    MissingSNIAndProjectName,
-
    #[error("Inconsistent project name inferred from SNI ('{0}') and project option ('{1}').")]
-    InconsistentProjectNameAndSNI(String, String),
-
-    #[error("Common name is not set.")]
-    CommonNameNotSet,
+    InconsistentProjectNames(String, String),

    #[error(
        "SNI ('{1}') inconsistently formatted with respect to common name ('{0}'). \
-        SNI should be formatted as '<project-name>.<common-name>'."
+        SNI should be formatted as '<project-name>.{0}'."
    )]
-    InconsistentCommonNameAndSNI(String, String),
+    InconsistentSni(String, String),

-    #[error("Project name ('{0}') must contain only alphanumeric characters and hyphens ('-').")]
-    ProjectNameContainsIllegalChars(String),
+    #[error("Project name ('{0}') must contain only alphanumeric characters and hyphen.")]
+    MalformedProjectName(String),
 }

 impl UserFacingError for ClientCredsParseError {}
@@ -44,286 +30,171 @@ impl UserFacingError for ClientCredsParseError {}
 pub struct ClientCredentials {
    pub user: String,
    pub dbname: String,
-    pub project_name: Result<String, ClientCredsParseError>,
+    pub project: Option<String>,
 }

 impl ClientCredentials {
-    pub fn is_existing_user(&self) -> bool {
-        // This logic will likely change in the future.
-        self.user.ends_with("@zenith")
+    pub fn project(&self) -> Option<&str> {
+        self.project.as_deref()
    }
+}

+impl ClientCredentials {
    pub fn parse(
-        mut options: HashMap<String, String>,
-        sni_data: Option<&str>,
+        mut options: StartupMessageParams,
+        sni: Option<&str>,
        common_name: Option<&str>,
    ) -> Result<Self, ClientCredsParseError> {
-        let mut get_param = |key| {
-            options
-                .remove(key)
-                .ok_or(ClientCredsParseError::MissingKey(key))
-        };
+        use ClientCredsParseError::*;

+        // Some parameters are absolutely necessary, others not so much.
+        let mut get_param = |key| options.remove(key).ok_or(MissingKey(key));
+
+        // Some parameters are stored in the startup message.
        let user = get_param("user")?;
        let dbname = get_param("database")?;
-        let project_name = get_param("project").ok();
-        let project_name = get_project_name(sni_data, common_name, project_name.as_deref());
+        let project_a = get_param("project").ok();
+
+        // Alternative project name is in fact a subdomain from SNI.
+        // NOTE: we do not consider SNI if `common_name` is missing.
+        let project_b = sni
+            .zip(common_name)
+            .map(|(sni, cn)| {
+                // TODO: what if SNI is present but just a common name?
+                subdomain_from_sni(sni, cn)
+                    .ok_or_else(|| InconsistentSni(sni.to_owned(), cn.to_owned()))
+            })
+            .transpose()?;
+
+        let project = match (project_a, project_b) {
+            // Invariant: if we have both project name variants, they should match.
+            (Some(a), Some(b)) if a != b => Some(Err(InconsistentProjectNames(a, b))),
+            (a, b) => a.or(b).map(|name| {
+                // Invariant: project name may not contain certain characters.
+                check_project_name(name).map_err(MalformedProjectName)
+            }),
+        }
+        .transpose()?;

        Ok(Self {
            user,
            dbname,
-            project_name,
+            project,
        })
    }
+}

-    /// Use credentials to authenticate the user.
-    pub async fn authenticate(
-        self,
-        config: &ProxyConfig,
-        client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin + Send>,
-    ) -> super::Result<compute::NodeInfo> {
-        // This method is just a convenient facade for `handle_user`
-        super::backend::handle_user(config, client, self).await
+fn check_project_name(name: String) -> Result<String, String> {
+    if name.chars().all(|c| c.is_alphanumeric() || c == '-') {
+        Ok(name)
+    } else {
+        Err(name)
    }
 }

-/// Inferring project name from sni_data.
-fn project_name_from_sni_data(
-    sni_data: &str,
-    common_name: &str,
-) -> Result<String, ClientCredsParseError> {
-    let common_name_with_dot = format!(".{common_name}");
-    // check that ".{common_name_with_dot}" is the actual suffix in sni_data
-    if !sni_data.ends_with(&common_name_with_dot) {
-        return Err(ClientCredsParseError::InconsistentCommonNameAndSNI(
-            common_name.to_string(),
-            sni_data.to_string(),
+fn subdomain_from_sni(sni: &str, common_name: &str) -> Option<String> {
+    sni.strip_suffix(common_name)?
+        .strip_suffix('.')
+        .map(str::to_owned)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn make_options<'a, const N: usize>(pairs: [(&'a str, &'a str); N]) -> StartupMessageParams {
+        StartupMessageParams::from(pairs.map(|(k, v)| (k.to_owned(), v.to_owned())))
+    }
+
+    #[test]
+    #[ignore = "TODO: fix how database is handled"]
+    fn parse_bare_minimum() -> anyhow::Result<()> {
+        // According to postgresql, only `user` should be required.
+        let options = make_options([("user", "john_doe")]);
+
+        // TODO: check that `creds.dbname` is None.
+        let creds = ClientCredentials::parse(options, None, None)?;
+        assert_eq!(creds.user, "john_doe");
+
+        Ok(())
+    }
+
+    #[test]
+    fn parse_missing_project() -> anyhow::Result<()> {
+        let options = make_options([("user", "john_doe"), ("database", "world")]);
+
+        let creds = ClientCredentials::parse(options, None, None)?;
+        assert_eq!(creds.user, "john_doe");
+        assert_eq!(creds.dbname, "world");
+        assert_eq!(creds.project, None);
+
+        Ok(())
+    }
+
+    #[test]
+    fn parse_project_from_sni() -> anyhow::Result<()> {
+        let options = make_options([("user", "john_doe"), ("database", "world")]);
+
+        let sni = Some("foo.localhost");
+        let common_name = Some("localhost");
+
+        let creds = ClientCredentials::parse(options, sni, common_name)?;
+        assert_eq!(creds.user, "john_doe");
+        assert_eq!(creds.dbname, "world");
+        assert_eq!(creds.project.as_deref(), Some("foo"));
+
+        Ok(())
+    }
+
+    #[test]
+    fn parse_project_from_options() -> anyhow::Result<()> {
+        let options = make_options([
+            ("user", "john_doe"),
+            ("database", "world"),
+            ("project", "bar"),
+        ]);
+
+        let creds = ClientCredentials::parse(options, None, None)?;
+        assert_eq!(creds.user, "john_doe");
+        assert_eq!(creds.dbname, "world");
+        assert_eq!(creds.project.as_deref(), Some("bar"));
+
+        Ok(())
+    }
+
+    #[test]
+    fn parse_projects_identical() -> anyhow::Result<()> {
+        let options = make_options([
+            ("user", "john_doe"),
+            ("database", "world"),
+            ("project", "baz"),
+        ]);
+
+        let sni = Some("baz.localhost");
+        let common_name = Some("localhost");
+
+        let creds = ClientCredentials::parse(options, sni, common_name)?;
+        assert_eq!(creds.user, "john_doe");
+        assert_eq!(creds.dbname, "world");
+        assert_eq!(creds.project.as_deref(), Some("baz"));
+
+        Ok(())
+    }
+
+    #[test]
+    fn parse_projects_different() {
+        let options = make_options([
+            ("user", "john_doe"),
+            ("database", "world"),
+            ("project", "first"),
+        ]);
+
+        let sni = Some("second.localhost");
+        let common_name = Some("localhost");
+
+        assert!(matches!(
+            ClientCredentials::parse(options, sni, common_name).expect_err("should fail"),
+            ClientCredsParseError::InconsistentProjectNames(_, _)
        ));
    }
-    // return sni_data without the common name suffix.
-    Ok(sni_data
-        .strip_suffix(&common_name_with_dot)
-        .unwrap()
-        .to_string())
-}
-
-#[cfg(test)]
-mod tests_for_project_name_from_sni_data {
-    use super::*;
-
-    #[test]
-    fn passing() {
-        let target_project_name = "my-project-123";
-        let common_name = "localtest.me";
-        let sni_data = format!("{target_project_name}.{common_name}");
-        assert_eq!(
-            project_name_from_sni_data(&sni_data, common_name),
-            Ok(target_project_name.to_string())
-        );
-    }
-
-    #[test]
-    fn throws_inconsistent_common_name_and_sni_data() {
-        let target_project_name = "my-project-123";
-        let common_name = "localtest.me";
-        let wrong_suffix = "wrongtest.me";
-        assert_eq!(common_name.len(), wrong_suffix.len());
-        let wrong_common_name = format!("wrong{wrong_suffix}");
-        let sni_data = format!("{target_project_name}.{wrong_common_name}");
-        assert_eq!(
-            project_name_from_sni_data(&sni_data, common_name),
-            Err(ClientCredsParseError::InconsistentCommonNameAndSNI(
-                common_name.to_string(),
-                sni_data
-            ))
-        );
-    }
-}
-
-/// Determine project name from SNI or from project_name parameter from options argument.
-fn get_project_name(
-    sni_data: Option<&str>,
-    common_name: Option<&str>,
-    project_name: Option<&str>,
-) -> Result<String, ClientCredsParseError> {
-    // determine the project name from sni_data if it exists, otherwise from project_name.
-    let ret = match sni_data {
-        Some(sni_data) => {
-            let common_name = common_name.ok_or(ClientCredsParseError::CommonNameNotSet)?;
-            let project_name_from_sni = project_name_from_sni_data(sni_data, common_name)?;
-            // check invariant: project name from options and from sni should match
-            if let Some(project_name) = &project_name {
-                if !project_name_from_sni.eq(project_name) {
-                    return Err(ClientCredsParseError::InconsistentProjectNameAndSNI(
-                        project_name_from_sni,
-                        project_name.to_string(),
-                    ));
-                }
-            }
-            project_name_from_sni
-        }
-        None => project_name
-            .ok_or(ClientCredsParseError::MissingSNIAndProjectName)?
-            .to_string(),
-    };
-
-    // check formatting invariant: project name must contain only alphanumeric characters and hyphens.
-    if !ret.chars().all(|x: char| x.is_alphanumeric() || x == '-') {
-        return Err(ClientCredsParseError::ProjectNameContainsIllegalChars(ret));
-    }
-
-    Ok(ret)
-}
-
-#[cfg(test)]
-mod tests_for_project_name_only {
-    use super::*;
-
-    #[test]
-    fn passing_from_sni_data_only() {
-        let target_project_name = "my-project-123";
-        let common_name = "localtest.me";
-        let sni_data = format!("{target_project_name}.{common_name}");
-        assert_eq!(
-            get_project_name(Some(&sni_data), Some(common_name), None),
-            Ok(target_project_name.to_string())
-        );
-    }
-
-    #[test]
-    fn throws_project_name_contains_illegal_chars_from_sni_data_only() {
-        let project_name_prefix = "my-project";
-        let project_name_suffix = "123";
-        let common_name = "localtest.me";
-
-        for illegal_char_id in 0..256 {
-            let illegal_char = char::from_u32(illegal_char_id).unwrap();
-            if !(illegal_char.is_alphanumeric() || illegal_char == '-')
-                && illegal_char.to_string().len() == 1
-            {
-                let target_project_name =
-                    format!("{project_name_prefix}{illegal_char}{project_name_suffix}");
-                let sni_data = format!("{target_project_name}.{common_name}");
-                assert_eq!(
-                    get_project_name(Some(&sni_data), Some(common_name), None),
-                    Err(ClientCredsParseError::ProjectNameContainsIllegalChars(
-                        target_project_name
-                    ))
-                );
-            }
-        }
-    }
-
-    #[test]
-    fn passing_from_project_name_only() {
-        let target_project_name = "my-project-123";
-        let common_names = [Some("localtest.me"), None];
-        for common_name in common_names {
-            assert_eq!(
-                get_project_name(None, common_name, Some(target_project_name)),
-                Ok(target_project_name.to_string())
-            );
-        }
-    }
-
-    #[test]
-    fn throws_project_name_contains_illegal_chars_from_project_name_only() {
-        let project_name_prefix = "my-project";
-        let project_name_suffix = "123";
-        let common_names = [Some("localtest.me"), None];
-
-        for common_name in common_names {
-            for illegal_char_id in 0..256 {
-                let illegal_char: char = char::from_u32(illegal_char_id).unwrap();
-                if !(illegal_char.is_alphanumeric() || illegal_char == '-')
-                    && illegal_char.to_string().len() == 1
-                {
-                    let target_project_name =
-                        format!("{project_name_prefix}{illegal_char}{project_name_suffix}");
-                    assert_eq!(
-                        get_project_name(None, common_name, Some(&target_project_name)),
-                        Err(ClientCredsParseError::ProjectNameContainsIllegalChars(
-                            target_project_name
-                        ))
-                    );
-                }
-            }
-        }
-    }
-
-    #[test]
-    fn passing_from_sni_data_and_project_name() {
-        let target_project_name = "my-project-123";
-        let common_name = "localtest.me";
-        let sni_data = format!("{target_project_name}.{common_name}");
-        assert_eq!(
-            get_project_name(
-                Some(&sni_data),
-                Some(common_name),
-                Some(target_project_name)
-            ),
-            Ok(target_project_name.to_string())
-        );
-    }
-
-    #[test]
-    fn throws_inconsistent_project_name_and_sni() {
-        let project_name_param = "my-project-123";
-        let wrong_project_name = "not-my-project-123";
-        let common_name = "localtest.me";
-        let sni_data = format!("{wrong_project_name}.{common_name}");
-        assert_eq!(
-            get_project_name(Some(&sni_data), Some(common_name), Some(project_name_param)),
-            Err(ClientCredsParseError::InconsistentProjectNameAndSNI(
-                wrong_project_name.to_string(),
-                project_name_param.to_string()
-            ))
-        );
-    }
-
-    #[test]
-    fn throws_common_name_not_set() {
-        let target_project_name = "my-project-123";
-        let wrong_project_name = "not-my-project-123";
-        let common_name = "localtest.me";
-        let sni_datas = [
-            Some(format!("{wrong_project_name}.{common_name}")),
-            Some(format!("{target_project_name}.{common_name}")),
-        ];
-        let project_names = [None, Some(target_project_name)];
-        for sni_data in sni_datas {
-            for project_name_param in project_names {
-                assert_eq!(
-                    get_project_name(sni_data.as_deref(), None, project_name_param),
-                    Err(ClientCredsParseError::CommonNameNotSet)
-                );
-            }
-        }
-    }
-
-    #[test]
-    fn throws_inconsistent_common_name_and_sni_data() {
-        let target_project_name = "my-project-123";
-        let wrong_project_name = "not-my-project-123";
-        let common_name = "localtest.me";
-        let wrong_suffix = "wrongtest.me";
-        assert_eq!(common_name.len(), wrong_suffix.len());
-        let wrong_common_name = format!("wrong{wrong_suffix}");
-        let sni_datas = [
-            Some(format!("{wrong_project_name}.{wrong_common_name}")),
-            Some(format!("{target_project_name}.{wrong_common_name}")),
-        ];
-        let project_names = [None, Some(target_project_name)];
-        for project_name_param in project_names {
-            for sni_data in &sni_datas {
-                assert_eq!(
-                    get_project_name(sni_data.as_deref(), Some(common_name), project_name_param),
-                    Err(ClientCredsParseError::InconsistentCommonNameAndSNI(
-                        common_name.to_string(),
-                        sni_data.clone().unwrap().to_string()
-                    ))
-                );
-            }
-        }
-    }
 }
--- a/proxy/src/auth/flow.rs
+++ b/proxy/src/auth/flow.rs
@@ -1,8 +1,7 @@
 //! Main authentication flow.

-use super::AuthErrorImpl;
-use crate::stream::PqStream;
-use crate::{sasl, scram};
+use super::{AuthErrorImpl, PasswordHackPayload};
+use crate::{sasl, scram, stream::PqStream};
 use std::io;
 use tokio::io::{AsyncRead, AsyncWrite};
 use utils::pq_proto::{BeAuthenticationSaslMessage, BeMessage, BeMessage as Be};
@@ -27,6 +26,17 @@ impl AuthMethod for Scram<'_> {
    }
 }

+/// Use an ad hoc auth flow (for clients which don't support SNI) proposed in
+/// <https://github.com/neondatabase/cloud/issues/1620#issuecomment-1165332290>.
+pub struct PasswordHack;
+
+impl AuthMethod for PasswordHack {
+    #[inline(always)]
+    fn first_message(&self) -> BeMessage<'_> {
+        Be::AuthenticationCleartextPassword
+    }
+}
+
 /// This wrapper for [`PqStream`] performs client authentication.
 #[must_use]
 pub struct AuthFlow<'a, Stream, State> {
@@ -57,13 +67,34 @@ impl<'a, S: AsyncWrite + Unpin> AuthFlow<'a, S, Begin> {
    }
 }

+impl<S: AsyncRead + AsyncWrite + Unpin> AuthFlow<'_, S, PasswordHack> {
+    /// Perform user authentication. Raise an error in case authentication failed.
+    pub async fn authenticate(self) -> super::Result<PasswordHackPayload> {
+        let msg = self.stream.read_password_message().await?;
+        let password = msg
+            .strip_suffix(&[0])
+            .ok_or(AuthErrorImpl::MalformedPassword("missing terminator"))?;
+
+        // The so-called "password" should contain a base64-encoded json.
+        // We will use it later to route the client to their project.
+        let bytes = base64::decode(password)
+            .map_err(|_| AuthErrorImpl::MalformedPassword("bad encoding"))?;
+
+        let payload = serde_json::from_slice(&bytes)
+            .map_err(|_| AuthErrorImpl::MalformedPassword("invalid payload"))?;
+
+        Ok(payload)
+    }
+}
+
 /// Stream wrapper for handling [SCRAM](crate::scram) auth.
 impl<S: AsyncRead + AsyncWrite + Unpin> AuthFlow<'_, S, Scram<'_>> {
    /// Perform user authentication. Raise an error in case authentication failed.
    pub async fn authenticate(self) -> super::Result<scram::ScramKey> {
        // Initial client message contains the chosen auth method's name.
        let msg = self.stream.read_password_message().await?;
-        let sasl = sasl::FirstMessage::parse(&msg).ok_or(AuthErrorImpl::MalformedPassword)?;
+        let sasl = sasl::FirstMessage::parse(&msg)
+            .ok_or(AuthErrorImpl::MalformedPassword("bad sasl message"))?;

        // Currently, the only supported SASL method is SCRAM.
        if !scram::METHODS.contains(&sasl.method) {
--- a/proxy/src/auth/password_hack.rs
+++ b/proxy/src/auth/password_hack.rs
@@ -0,0 +1,102 @@
+//! Payload for ad hoc authentication method for clients that don't support SNI.
+//! See the `impl` for [`super::backend::BackendType<ClientCredentials>`].
+//! Read more: <https://github.com/neondatabase/cloud/issues/1620#issuecomment-1165332290>.
+
+use serde::{de, Deserialize, Deserializer};
+use std::fmt;
+
+#[derive(Deserialize)]
+#[serde(untagged)]
+pub enum Password {
+    /// A regular string for utf-8 encoded passwords.
+    Simple { password: String },
+
+    /// Password is base64-encoded because it may contain arbitrary byte sequences.
+    Encoded {
+        #[serde(rename = "password_", deserialize_with = "deserialize_base64")]
+        password: Vec<u8>,
+    },
+}
+
+impl AsRef<[u8]> for Password {
+    fn as_ref(&self) -> &[u8] {
+        match self {
+            Password::Simple { password } => password.as_ref(),
+            Password::Encoded { password } => password.as_ref(),
+        }
+    }
+}
+
+#[derive(Deserialize)]
+pub struct PasswordHackPayload {
+    pub project: String,
+
+    #[serde(flatten)]
+    pub password: Password,
+}
+
+fn deserialize_base64<'a, D: Deserializer<'a>>(des: D) -> Result<Vec<u8>, D::Error> {
+    // It's very tempting to replace this with
+    //
+    // ```
+    // let base64: &str = Deserialize::deserialize(des)?;
+    // base64::decode(base64).map_err(serde::de::Error::custom)
+    // ```
+    //
+    // Unfortunately, we can't always deserialize into `&str`, so we'd
+    // have to use an allocating `String` instead. Thus, visitor is better.
+    struct Visitor;
+
+    impl<'de> de::Visitor<'de> for Visitor {
+        type Value = Vec<u8>;
+
+        fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
+            formatter.write_str("a string")
+        }
+
+        fn visit_str<E: de::Error>(self, v: &str) -> Result<Self::Value, E> {
+            base64::decode(v).map_err(de::Error::custom)
+        }
+    }
+
+    des.deserialize_str(Visitor)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use rstest::rstest;
+    use serde_json::json;
+
+    #[test]
+    fn parse_password() -> anyhow::Result<()> {
+        let password: Password = serde_json::from_value(json!({
+            "password": "foo",
+        }))?;
+        assert_eq!(password.as_ref(), "foo".as_bytes());
+
+        let password: Password = serde_json::from_value(json!({
+            "password_": base64::encode("foo"),
+        }))?;
+        assert_eq!(password.as_ref(), "foo".as_bytes());
+
+        Ok(())
+    }
+
+    #[rstest]
+    #[case("password", str::to_owned)]
+    #[case("password_", base64::encode)]
+    fn parse(#[case] key: &str, #[case] encode: fn(&'static str) -> String) -> anyhow::Result<()> {
+        let (password, project) = ("password", "pie-in-the-sky");
+        let payload = json!({
+            "project": project,
+            key: encode(password),
+        });
+
+        let payload: PasswordHackPayload = serde_json::from_value(payload)?;
+        assert_eq!(payload.password.as_ref(), password.as_bytes());
+        assert_eq!(payload.project, project);
+
+        Ok(())
+    }
+}
--- a/proxy/src/compute.rs
+++ b/proxy/src/compute.rs
@@ -1,8 +1,6 @@
-use crate::auth::DatabaseInfo;
-use crate::cancellation::CancelClosure;
-use crate::error::UserFacingError;
-use std::io;
-use std::net::SocketAddr;
+use crate::{cancellation::CancelClosure, error::UserFacingError};
+use futures::TryFutureExt;
+use std::{io, net::SocketAddr};
 use thiserror::Error;
 use tokio::net::TcpStream;
 use tokio_postgres::NoTls;
@@ -21,44 +19,96 @@ pub enum ConnectionError {
    FailedToFetchPgVersion,
 }

-impl UserFacingError for ConnectionError {}
-
-/// PostgreSQL version as [`String`].
-pub type Version = String;
+impl UserFacingError for ConnectionError {
+    fn to_string_client(&self) -> String {
+        use ConnectionError::*;
+        match self {
+            // This helps us drop irrelevant library-specific prefixes.
+            // TODO: propagate severity level and other parameters.
+            Postgres(err) => match err.as_db_error() {
+                Some(err) => err.message().to_string(),
+                None => err.to_string(),
+            },
+            other => other.to_string(),
+        }
+    }
+}

 /// A pair of `ClientKey` & `ServerKey` for `SCRAM-SHA-256`.
 pub type ScramKeys = tokio_postgres::config::ScramKeys<32>;

-/// Compute node connection params.
+pub type ComputeConnCfg = tokio_postgres::Config;
+
+/// Various compute node info for establishing connection etc.
 pub struct NodeInfo {
-    pub db_info: DatabaseInfo,
-    pub scram_keys: Option<ScramKeys>,
+    /// Did we send [`utils::pq_proto::BeMessage::AuthenticationOk`]?
+    pub reported_auth_ok: bool,
+    /// Compute node connection params.
+    pub config: tokio_postgres::Config,
 }

 impl NodeInfo {
    async fn connect_raw(&self) -> io::Result<(SocketAddr, TcpStream)> {
-        let host_port = (self.db_info.host.as_str(), self.db_info.port);
-        let socket = TcpStream::connect(host_port).await?;
-        let socket_addr = socket.peer_addr()?;
-        socket2::SockRef::from(&socket).set_keepalive(true)?;
+        use tokio_postgres::config::Host;

-        Ok((socket_addr, socket))
+        let connect_once = |host, port| {
+            TcpStream::connect((host, port)).and_then(|socket| async {
+                let socket_addr = socket.peer_addr()?;
+                // This prevents load balancer from severing the connection.
+                socket2::SockRef::from(&socket).set_keepalive(true)?;
+                Ok((socket_addr, socket))
+            })
+        };
+
+        // We can't reuse connection establishing logic from `tokio_postgres` here,
+        // because it has no means for extracting the underlying socket which we
+        // require for our business.
+        let mut connection_error = None;
+        let ports = self.config.get_ports();
+        for (i, host) in self.config.get_hosts().iter().enumerate() {
+            let port = ports.get(i).or_else(|| ports.get(0)).unwrap_or(&5432);
+            let host = match host {
+                Host::Tcp(host) => host.as_str(),
+                Host::Unix(_) => continue, // unix sockets are not welcome here
+            };
+
+            // TODO: maybe we should add a timeout.
+            match connect_once(host, *port).await {
+                Ok(socket) => return Ok(socket),
+                Err(err) => {
+                    // We can't throw an error here, as there might be more hosts to try.
+                    println!("failed to connect to compute `{host}:{port}`: {err}");
+                    connection_error = Some(err);
+                }
+            }
+        }
+
+        Err(connection_error.unwrap_or_else(|| {
+            io::Error::new(
+                io::ErrorKind::Other,
+                format!("couldn't connect: bad compute config: {:?}", self.config),
+            )
+        }))
    }
+}

+pub struct PostgresConnection {
+    /// Socket connected to a compute node.
+    pub stream: TcpStream,
+    /// PostgreSQL version of this instance.
+    pub version: String,
+}
+
+impl NodeInfo {
    /// Connect to a corresponding compute node.
-    pub async fn connect(self) -> Result<(TcpStream, Version, CancelClosure), ConnectionError> {
-        let (socket_addr, mut socket) = self
+    pub async fn connect(&self) -> Result<(PostgresConnection, CancelClosure), ConnectionError> {
+        let (socket_addr, mut stream) = self
            .connect_raw()
            .await
            .map_err(|_| ConnectionError::FailedToConnectToCompute)?;

-        let mut config = tokio_postgres::Config::from(self.db_info);
-        if let Some(scram_keys) = self.scram_keys {
-            config.auth_keys(tokio_postgres::config::AuthKeys::ScramSha256(scram_keys));
-        }
-
        // TODO: establish a secure connection to the DB
-        let (client, conn) = config.connect_raw(&mut socket, NoTls).await?;
+        let (client, conn) = self.config.connect_raw(&mut stream, NoTls).await?;
        let version = conn
            .parameter("server_version")
            .ok_or(ConnectionError::FailedToFetchPgVersion)?
@@ -66,6 +116,8 @@ impl NodeInfo {

        let cancel_closure = CancelClosure::new(socket_addr, client.cancel_token());

-        Ok((socket, version, cancel_closure))
+        let db = PostgresConnection { stream, version };
+
+        Ok((db, cancel_closure))
    }
 }
--- a/proxy/src/config.rs
+++ b/proxy/src/config.rs
@@ -1,28 +1,16 @@
-use crate::url::ApiUrl;
+use crate::{auth, url::ApiUrl};
 use anyhow::{bail, ensure, Context};
 use std::{str::FromStr, sync::Arc};

-#[derive(Debug)]
-pub enum AuthBackendType {
-    /// Legacy Cloud API (V1).
-    LegacyConsole,
-    /// Authentication via a web browser.
-    Link,
-    /// Current Cloud API (V2).
-    Console,
-    /// Local mock of Cloud API (V2).
-    Postgres,
-}
-
-impl FromStr for AuthBackendType {
+impl FromStr for auth::BackendType<()> {
    type Err = anyhow::Error;

    fn from_str(s: &str) -> anyhow::Result<Self> {
-        use AuthBackendType::*;
+        use auth::BackendType::*;
        Ok(match s {
-            "legacy" => LegacyConsole,
-            "console" => Console,
-            "postgres" => Postgres,
+            "legacy" => LegacyConsole(()),
+            "console" => Console(()),
+            "postgres" => Postgres(()),
            "link" => Link,
            _ => bail!("Invalid option `{s}` for auth method"),
        })
@@ -31,7 +19,11 @@ impl FromStr for AuthBackendType {

 pub struct ProxyConfig {
    pub tls_config: Option<TlsConfig>,
-    pub auth_backend: AuthBackendType,
+    pub auth_backend: auth::BackendType<()>,
+    pub auth_urls: AuthUrls,
+}
+
+pub struct AuthUrls {
    pub auth_endpoint: ApiUrl,
    pub auth_link_uri: ApiUrl,
 }
@@ -87,10 +79,8 @@ pub fn configure_tls(key_path: &str, cert_path: &str) -> anyhow::Result<TlsConfi
                "Failed to parse PEM object from bytes from file at '{cert_path}'."
            ))?
            .1;
-        let almost_common_name = pem.parse_x509()?.tbs_certificate.subject.to_string();
-        let expected_prefix = "CN=*.";
-        let common_name = almost_common_name.strip_prefix(expected_prefix);
-        common_name.map(str::to_string)
+        let common_name = pem.parse_x509()?.subject().to_string();
+        common_name.strip_prefix("CN=*.").map(|s| s.to_string())
    };

    Ok(TlsConfig {
--- a/proxy/src/error.rs
+++ b/proxy/src/error.rs
@@ -1,3 +1,5 @@
+use std::io;
+
 /// Marks errors that may be safely shown to a client.
 /// This trait can be seen as a specialized version of [`ToString`].
 ///
@@ -15,3 +17,8 @@ pub trait UserFacingError: ToString {
        self.to_string()
    }
 }
+
+/// Upcast (almost) any error into an opaque [`io::Error`].
+pub fn io_error(e: impl Into<Box<dyn std::error::Error + Send + Sync>>) -> io::Error {
+    io::Error::new(io::ErrorKind::Other, e)
+}
--- a/Show More
+++ b/Show More