Try larger sleep

Wait for pid death
Add hacky solution
2026-03-16 14:50:37 +00:00 · 2022-08-12 09:52:40 -04:00 · 2022-08-12 09:21:44 -04:00 · 2022-08-12 09:05:51 -04:00 · 2022-08-12 09:01:17 -04:00
110 changed files with 1993 additions and 2882 deletions
--- a/.github/actions/upload/action.yml
+++ b/.github/actions/upload/action.yml
@@ -29,12 +29,8 @@ runs:
          time tar -C ${SOURCE} -cf ${ARCHIVE} --zstd .
        elif [ -f ${SOURCE} ]; then
          time tar -cf ${ARCHIVE} --zstd ${SOURCE}
-        elif ! ls ${SOURCE} > /dev/null 2>&1; then
-          echo 2>&1 "${SOURCE} does not exist"
-          exit 2
        else
-          echo 2>&1 "${SOURCE} is neither a directory nor a file, do not know how to handle it"
-          exit 3
+          echo 2>&1 "${SOURCE} neither directory nor file, don't know how to handle it"
        fi

    - name: Upload artifact
--- a/.github/ansible/get_binaries.sh
+++ b/.github/ansible/get_binaries.sh
@@ -2,14 +2,30 @@

 set -e

-if [ -n "${DOCKER_TAG}" ]; then
-  # Verson is DOCKER_TAG but without prefix
-  VERSION=$(echo $DOCKER_TAG | sed 's/^.*-//g')
+RELEASE=${RELEASE:-false}
+
+# look at docker hub for latest tag for neon docker image
+if [ "${RELEASE}" = "true" ]; then
+    echo "search latest release tag"
+    VERSION=$(curl -s https://registry.hub.docker.com/v1/repositories/neondatabase/neon/tags |jq -r -S '.[].name' | grep release | sed 's/release-//g' | grep -E '^[0-9]+$' | sort -n | tail -1)
+    if [ -z "${VERSION}" ]; then
+        echo "no any docker tags found, exiting..."
+        exit 1
+    else
+        TAG="release-${VERSION}"
+    fi
 else
-  echo "Please set DOCKER_TAG environment variable"
-  exit 1
+    echo "search latest dev tag"
+    VERSION=$(curl -s https://registry.hub.docker.com/v1/repositories/neondatabase/neon/tags |jq -r -S '.[].name' | grep -E '^[0-9]+$' | sort -n | tail -1)
+    if [ -z "${VERSION}" ]; then
+        echo "no any docker tags found, exiting..."
+        exit 1
+    else
+        TAG="${VERSION}"
+    fi
 fi

+echo "found ${VERSION}"

 # do initial cleanup
 rm -rf neon_install postgres_install.tar.gz neon_install.tar.gz .neon_current_version
@@ -17,8 +33,8 @@ mkdir neon_install

 # retrieve binaries from docker image
 echo "getting binaries from docker image"
-docker pull --quiet neondatabase/neon:${DOCKER_TAG}
-ID=$(docker create neondatabase/neon:${DOCKER_TAG})
+docker pull --quiet neondatabase/neon:${TAG}
+ID=$(docker create neondatabase/neon:${TAG})
 docker cp ${ID}:/data/postgres_install.tar.gz .
 tar -xzf postgres_install.tar.gz -C neon_install
 docker cp ${ID}:/usr/local/bin/pageserver neon_install/bin/
--- a/.github/ansible/scripts/init_safekeeper.sh
+++ b/.github/ansible/scripts/init_safekeeper.sh
@@ -1,8 +1,7 @@
 #!/bin/sh

-# fetch params from meta-data service
+# get instance id from meta-data service
 INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id)
-AZ_ID=$(curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone)

 # store fqdn hostname in var
 HOST=$(hostname -f)
@@ -15,8 +14,7 @@ cat <<EOF | tee /tmp/payload
  "port": 6500,
  "http_port": 7676,
  "region_id": {{ console_region_id }},
-  "instance_id": "${INSTANCE_ID}",
-  "availability_zone_id": "${AZ_ID}"
+  "instance_id": "${INSTANCE_ID}"
 }
 EOF

--- a/.github/workflows/benchmarking.yml
+++ b/.github/workflows/benchmarking.yml
@@ -1,4 +1,4 @@
-name: Benchmarking
+name: benchmarking

 on:
  # uncomment to run on push for debugging your PR
@@ -15,15 +15,6 @@ on:

  workflow_dispatch: # adds ability to run this manually

-defaults:
-  run:
-    shell: bash -euxo pipefail {0}
-
-concurrency:
-  # Allow only one workflow per any non-`main` branch.
-  group: ${{ github.workflow }}-${{ github.ref }}-${{ github.ref == 'refs/heads/main' && github.sha || 'anysha' }}
-  cancel-in-progress: true
-
 jobs:
  bench:
    # this workflow runs on self hosteed runner
@@ -69,6 +60,7 @@ jobs:
    - name: Setup cluster
      env:
        BENCHMARK_CONNSTR: "${{ secrets.BENCHMARK_STAGING_CONNSTR }}"
+      shell: bash -euxo pipefail {0}
      run: |
        set -e

@@ -104,9 +96,7 @@ jobs:
        # since it might generate duplicates when calling ingest_perf_test_result.py
        rm -rf perf-report-staging
        mkdir -p perf-report-staging
-        # Set --sparse-ordering option of pytest-order plugin to ensure tests are running in order of appears in the file,
-        # it's important for test_perf_pgbench.py::test_pgbench_remote_* tests
-        ./scripts/pytest test_runner/performance/ -v -m "remote_cluster" --sparse-ordering --skip-interfering-proc-check --out-dir perf-report-staging --timeout 5400
+        ./scripts/pytest test_runner/performance/ -v -m "remote_cluster" --skip-interfering-proc-check --out-dir perf-report-staging --timeout 3600

    - name: Submit result
      env:
@@ -123,106 +113,3 @@ jobs:
        slack-message: "Periodic perf testing: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
      env:
        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
-
-  pgbench-compare:
-    env:
-      TEST_PG_BENCH_DURATIONS_MATRIX: "60m"
-      TEST_PG_BENCH_SCALES_MATRIX: "10gb"
-      REMOTE_ENV: "1"
-      POSTGRES_DISTRIB_DIR: /usr
-      TEST_OUTPUT: /tmp/test_output
-
-    strategy:
-      fail-fast: false
-      matrix:
-        connstr: [ BENCHMARK_CAPTEST_CONNSTR, BENCHMARK_RDS_CONNSTR ]
-
-    runs-on: dev
-    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rustlegacy:2817580636
-
-    timeout-minutes: 360 # 6h
-
-    steps:
-    - uses: actions/checkout@v3
-
-    - name: Cache poetry deps
-      id: cache_poetry
-      uses: actions/cache@v3
-      with:
-        path: ~/.cache/pypoetry/virtualenvs
-        key: v2-${{ runner.os }}-python-deps-${{ hashFiles('poetry.lock') }}
-
-    - name: Install Python deps
-      run: ./scripts/pysync
-
-    - name: Calculate platform
-      id: calculate-platform
-      env:
-        CONNSTR: ${{ matrix.connstr }}
-      run: |
-        if [ "${CONNSTR}" = "BENCHMARK_CAPTEST_CONNSTR" ]; then
-          PLATFORM=neon-captest
-        elif [ "${CONNSTR}" = "BENCHMARK_RDS_CONNSTR" ]; then
-          PLATFORM=rds-aurora
-        else
-          echo 2>&1 "Unknown CONNSTR=${CONNSTR}. Allowed are BENCHMARK_CAPTEST_CONNSTR, and BENCHMARK_RDS_CONNSTR only"
-          exit 1
-        fi
-
-        echo "::set-output name=PLATFORM::${PLATFORM}"
-
-    - name: Install Deps
-      run: |
-        echo "deb http://apt.postgresql.org/pub/repos/apt focal-pgdg main" | sudo tee /etc/apt/sources.list.d/pgdg.list
-        wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | sudo apt-key add -
-        sudo apt -y update
-        sudo apt install -y postgresql-14 postgresql-client-14
-
-    - name: Benchmark init
-      env:
-        PLATFORM: ${{ steps.calculate-platform.outputs.PLATFORM }}
-        BENCHMARK_CONNSTR: ${{ secrets[matrix.connstr] }}
-      run: |
-        mkdir -p perf-report-captest
-
-        psql $BENCHMARK_CONNSTR -c "SELECT 1;"
-        ./scripts/pytest test_runner/performance/test_perf_pgbench.py::test_pgbench_remote_init -v -m "remote_cluster" --skip-interfering-proc-check --out-dir perf-report-captest --timeout 21600
-
-    - name: Benchmark simple-update
-      env:
-        PLATFORM: ${{ steps.calculate-platform.outputs.PLATFORM }}
-        BENCHMARK_CONNSTR: ${{ secrets[matrix.connstr] }}
-      run: |
-        psql $BENCHMARK_CONNSTR -c "SELECT 1;"
-        ./scripts/pytest test_runner/performance/test_perf_pgbench.py::test_pgbench_remote_simple_update -v -m "remote_cluster" --skip-interfering-proc-check --out-dir perf-report-captest --timeout 21600
-
-    - name: Benchmark select-only
-      env:
-        PLATFORM: ${{ steps.calculate-platform.outputs.PLATFORM }}
-        BENCHMARK_CONNSTR: ${{ secrets[matrix.connstr] }}
-      run: |
-        psql $BENCHMARK_CONNSTR -c "SELECT 1;"
-        ./scripts/pytest test_runner/performance/test_perf_pgbench.py::test_pgbench_remote_select_only -v -m "remote_cluster" --skip-interfering-proc-check --out-dir perf-report-captest --timeout 21600
-
-    - name: Submit result
-      env:
-        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
-        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
-      run: |
-        REPORT_FROM=$(realpath perf-report-captest) REPORT_TO=staging scripts/generate_and_push_perf_report.sh
-
-    - name: Upload logs
-      if: always()
-      uses: ./.github/actions/upload
-      with:
-        name: bench-captest-${{ steps.calculate-platform.outputs.PLATFORM }}
-        path: /tmp/test_output/
-
-    - name: Post to a Slack channel
-      if: ${{ github.event.schedule && failure() }}
-      uses: slackapi/slack-github-action@v1
-      with:
-        channel-id: "C033QLM5P7D" # dev-staging-stream
-        slack-message: "Periodic perf testing: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
-      env:
-        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -7,6 +7,10 @@ on:
      - release
  pull_request:

+defaults:
+  run:
+    shell: bash -euxo pipefail {0}
+
 concurrency:
  # Allow only one workflow per any non-`main` branch.
  group: ${{ github.workflow }}-${{ github.ref }}-${{ github.ref == 'refs/heads/main' && github.sha || 'anysha' }}
@@ -17,39 +21,9 @@ env:
  COPT: '-Werror'

 jobs:
-  tag:
-    runs-on: dev
-    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:latest
-    outputs:
-      build-tag: ${{steps.build-tag.outputs.tag}}
-
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v3
-        with:
-          fetch-depth: 0
-
-      - name: Get build tag
-        run: |
-          echo run:$GITHUB_RUN_ID
-          echo ref:$GITHUB_REF_NAME
-          echo rev:$(git rev-list --count HEAD)
-          if [[ "$GITHUB_REF_NAME" == "main" ]]; then
-            echo "::set-output name=tag::$(git rev-list --count HEAD)"
-          elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
-            echo "::set-output name=tag::release-$(git rev-list --count HEAD)"
-          else
-            echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
-            echo "::set-output name=tag::$GITHUB_RUN_ID"
-          fi
-        shell: bash
-        id: build-tag
-
  build-neon:
    runs-on: dev
-    container:
-      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
-      options: --init
+    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rustlegacy:2746987948
    strategy:
      fail-fast: false
      matrix:
@@ -61,7 +35,7 @@ jobs:
      GIT_VERSION: ${{ github.sha }}

    steps:
-      - name: Fix git ownership
+      - name: Fix git ownerwhip
        run: |
          # Workaround for `fatal: detected dubious ownership in repository at ...`
          #
@@ -80,7 +54,6 @@ jobs:
      - name: Set pg revision for caching
        id: pg_ver
        run: echo ::set-output name=pg_rev::$(git rev-parse HEAD:vendor/postgres)
-        shell: bash -euxo pipefail {0}

      # Set some environment variables used by all the steps.
      #
@@ -104,7 +77,6 @@ jobs:
          echo "cov_prefix=${cov_prefix}" >> $GITHUB_ENV
          echo "CARGO_FEATURES=${CARGO_FEATURES}" >> $GITHUB_ENV
          echo "CARGO_FLAGS=${CARGO_FLAGS}" >> $GITHUB_ENV
-        shell: bash -euxo pipefail {0}

      # Don't include the ~/.cargo/registry/src directory. It contains just
      # uncompressed versions of the crates in ~/.cargo/registry/cache
@@ -121,8 +93,8 @@ jobs:
            target/
          # Fall back to older versions of the key, if no cache for current Cargo.lock was found
          key: |
-            v6-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-${{ hashFiles('Cargo.lock') }}
-            v6-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-
+            v3-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-${{ hashFiles('Cargo.lock') }}
+            v3-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-

      - name: Cache postgres build
        id: cache_pg
@@ -134,17 +106,14 @@ jobs:
      - name: Build postgres
        if: steps.cache_pg.outputs.cache-hit != 'true'
        run: mold -run make postgres -j$(nproc)
-        shell: bash -euxo pipefail {0}

      - name: Run cargo build
        run: |
          ${cov_prefix} mold -run cargo build $CARGO_FLAGS --features failpoints --bins --tests
-        shell: bash -euxo pipefail {0}

      - name: Run cargo test
        run: |
          ${cov_prefix} cargo test $CARGO_FLAGS
-        shell: bash -euxo pipefail {0}

      - name: Install rust binaries
        run: |
@@ -185,11 +154,9 @@ jobs:
              echo "/tmp/neon/bin/$bin" >> /tmp/coverage/binaries.list
            done
          fi
-        shell: bash -euxo pipefail {0}

      - name: Install postgres binaries
        run: cp -a tmp_install /tmp/neon/pg_install
-        shell: bash -euxo pipefail {0}

      - name: Upload Neon artifact
        uses: ./.github/actions/upload
@@ -204,9 +171,7 @@ jobs:

  pg_regress-tests:
    runs-on: dev
-    container:
-      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
-      options: --init
+    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rustlegacy:2746987948
    needs: [ build-neon ]
    strategy:
      fail-fast: false
@@ -234,9 +199,7 @@ jobs:

  other-tests:
    runs-on: dev
-    container:
-      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
-      options: --init
+    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rustlegacy:2746987948
    needs: [ build-neon ]
    strategy:
      fail-fast: false
@@ -267,9 +230,7 @@ jobs:

  benchmarks:
    runs-on: dev
-    container:
-      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
-      options: --init
+    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rustlegacy:2746987948
    needs: [ build-neon ]
    if: github.ref_name == 'main' || contains(github.event.pull_request.labels.*.name, 'run-benchmarks')
    strategy:
@@ -300,9 +261,7 @@ jobs:

  coverage-report:
    runs-on: dev
-    container:
-      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
-      options: --init
+    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rustlegacy:2746987948
    needs: [ other-tests, pg_regress-tests ]
    strategy:
      fail-fast: false
@@ -325,7 +284,7 @@ jobs:
            !~/.cargo/registry/src
            ~/.cargo/git/
            target/
-          key: v5-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-${{ hashFiles('Cargo.lock') }}
+          key: v3-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-${{ hashFiles('Cargo.lock') }}

      - name: Get Neon artifact
        uses: ./.github/actions/download
@@ -341,7 +300,6 @@ jobs:

      - name: Merge coverage data
        run: scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage merge
-        shell: bash -euxo pipefail {0}

      - name: Build and upload coverage report
        run: |
@@ -374,13 +332,9 @@ jobs:
              \"description\": \"Coverage report is ready\",
              \"target_url\": \"$REPORT_URL\"
            }"
-        shell: bash -euxo pipefail {0}

  trigger-e2e-tests:
-    runs-on: dev
-    container:
-      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
-      options: --init
+    runs-on: [ self-hosted, Linux, k8s-runner ]
    needs: [ build-neon ]
    steps:
      - name: Set PR's status to pending and request a remote CI test
@@ -415,130 +369,150 @@ jobs:
              }
            }"

-  neon-image:
-    runs-on: dev
-    container: gcr.io/kaniko-project/executor:v1.9.0-debug
-
+  docker-image:
+    runs-on: [ self-hosted, Linux, k8s-runner ]
+    needs: [ pg_regress-tests, other-tests ]
+    if: |
+      (github.ref_name == 'main' || github.ref_name == 'release') &&
+      github.event_name != 'workflow_dispatch'
+    outputs:
+      build-tag: ${{steps.build-tag.outputs.tag}}
    steps:
      - name: Checkout
-        uses: actions/checkout@v1 # v3 won't work with kaniko
+        uses: actions/checkout@v3
        with:
          submodules: true
          fetch-depth: 0

-      - name: Configure ECR login
-        run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json
+      - name: Login to DockerHub
+        uses: docker/login-action@v1
+        with:
+          username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
+          password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}

-      - name: Kaniko build neon
-        run: /kaniko/executor --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:$GITHUB_RUN_ID
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v1
+        with:
+          driver: docker

-  compute-tools-image:
-    runs-on: dev
-    container: gcr.io/kaniko-project/executor:v1.9.0-debug
+      - name: Get build tag
+        run: |
+          if [[ "$GITHUB_REF_NAME" == "main" ]]; then
+            echo "::set-output name=tag::$(git rev-list --count HEAD)"
+          elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
+            echo "::set-output name=tag::release-$(git rev-list --count HEAD)"
+          else
+            echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
+            exit 1
+          fi
+        id: build-tag

+      - name: Get legacy build tag
+        run: |
+          if [[ "$GITHUB_REF_NAME" == "main" ]]; then
+            echo "::set-output name=tag::latest"
+          elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
+            echo "::set-output name=tag::release"
+          else
+            echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
+            exit 1
+          fi
+        id: legacy-build-tag
+
+      - name: Build neon Docker image
+        uses: docker/build-push-action@v2
+        with:
+          context: .
+          build-args: |
+            GIT_VERSION="${{github.sha}}"
+            AWS_ACCESS_KEY_ID="${{secrets.CACHEPOT_AWS_ACCESS_KEY_ID}}"
+            AWS_SECRET_ACCESS_KEY="${{secrets.CACHEPOT_AWS_SECRET_ACCESS_KEY}}"
+          pull: true
+          push: true
+          tags: neondatabase/neon:${{steps.legacy-build-tag.outputs.tag}}, neondatabase/neon:${{steps.build-tag.outputs.tag}}
+
+  docker-image-compute:
+    runs-on: [ self-hosted, Linux, k8s-runner ]
+    needs: [ pg_regress-tests, other-tests ]
+    if: |
+      (github.ref_name == 'main' || github.ref_name == 'release') &&
+      github.event_name != 'workflow_dispatch'
+    outputs:
+      build-tag: ${{steps.build-tag.outputs.tag}}
    steps:
      - name: Checkout
-        uses: actions/checkout@v1 # v3 won't work with kaniko
-
-      - name: Configure ECR login
-        run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json
-
-      - name: Kaniko build compute tools
-        run: /kaniko/executor --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --dockerfile Dockerfile.compute-tools --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:$GITHUB_RUN_ID
-
-  compute-node-image:
-    runs-on: dev
-    container: gcr.io/kaniko-project/executor:v1.9.0-debug
-
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v1 # v3 won't work with kaniko
+        uses: actions/checkout@v3
        with:
          submodules: true
          fetch-depth: 0

-      - name: Configure ECR login
-        run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json
+      - name: Login to DockerHub
+        uses: docker/login-action@v1
+        with:
+          username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
+          password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}

-      - name: Kaniko build compute node
-        working-directory: ./vendor/postgres/
-        run: /kaniko/executor --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node:$GITHUB_RUN_ID
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v1
+        with:
+          driver: docker

-  promote-images:
-    runs-on: dev
-    needs: [ neon-image, compute-tools-image, compute-node-image ]
-    if: github.event_name != 'workflow_dispatch'
-    container: amazon/aws-cli
-    strategy:
-      fail-fast: false
-      matrix:
-        name: [ neon, compute-tools, compute-node ]
-
-    steps:
-      - name: Promote image to latest
-        run:
-          MANIFEST=$(aws ecr batch-get-image --repository-name ${{ matrix.name }} --image-ids imageTag=$GITHUB_RUN_ID --query 'images[].imageManifest' --output text) && aws ecr put-image --repository-name ${{ matrix.name }} --image-tag latest --image-manifest "$MANIFEST"
-
-  push-docker-hub:
-    runs-on: dev
-    needs: [ promote-images, tag ]
-    container: golang:1.19-bullseye
-
-    steps:
-      - name: Install Crane & ECR helper
+      - name: Get build tag
        run: |
-          go install github.com/google/go-containerregistry/cmd/crane@31786c6cbb82d6ec4fb8eb79cd9387905130534e # v0.11.0
-          go install github.com/awslabs/amazon-ecr-credential-helper/ecr-login/cli/docker-credential-ecr-login@69c85dc22db6511932bbf119e1a0cc5c90c69a7f # v0.6.0
-          
-#      - name: Get build tag
-#        run: |
-#          if [[ "$GITHUB_REF_NAME" == "main" ]]; then
-#            echo "::set-output name=tag::$(git rev-list --count HEAD)"
-#          elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
-#            echo "::set-output name=tag::release-$(git rev-list --count HEAD)"
-#          else
-#            echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release' "
-#            echo "::set-output name=tag::$GITHUB_RUN_ID"
-#          fi
-#        id: build-tag
+          if [[ "$GITHUB_REF_NAME" == "main" ]]; then
+            echo "::set-output name=tag::$(git rev-list --count HEAD)"
+          elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
+            echo "::set-output name=tag::release-$(git rev-list --count HEAD)"
+          else
+            echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
+            exit 1
+          fi
+        id: build-tag

-      - name: Configure ECR login
+      - name: Get legacy build tag
        run: |
-          mkdir /github/home/.docker/
-          echo "{\"credsStore\":\"ecr-login\"}" > /github/home/.docker/config.json
+          if [[ "$GITHUB_REF_NAME" == "main" ]]; then
+            echo "::set-output name=tag::latest"
+          elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
+            echo "::set-output name=tag::release"
+          else
+            echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
+            exit 1
+          fi
+        id: legacy-build-tag

-      - name: Pull neon image from ECR
-        run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:latest neon
+      - name: Build compute-tools Docker image
+        uses: docker/build-push-action@v2
+        with:
+          context: .
+          build-args: |
+            GIT_VERSION="${{github.sha}}"
+            AWS_ACCESS_KEY_ID="${{secrets.CACHEPOT_AWS_ACCESS_KEY_ID}}"
+            AWS_SECRET_ACCESS_KEY="${{secrets.CACHEPOT_AWS_SECRET_ACCESS_KEY}}"
+          push: false
+          file: Dockerfile.compute-tools
+          tags: neondatabase/compute-tools:local

-      - name: Pull compute tools image from ECR
-        run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:latest compute-tools
+      - name: Push compute-tools Docker image
+        uses: docker/build-push-action@v2
+        with:
+          context: .
+          build-args: |
+            GIT_VERSION="${{github.sha}}"
+            AWS_ACCESS_KEY_ID="${{secrets.CACHEPOT_AWS_ACCESS_KEY_ID}}"
+            AWS_SECRET_ACCESS_KEY="${{secrets.CACHEPOT_AWS_SECRET_ACCESS_KEY}}"
+          push: true
+          file: Dockerfile.compute-tools
+          tags: neondatabase/compute-tools:${{steps.legacy-build-tag.outputs.tag}}

-      - name: Pull compute node image from ECR
-        run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node:latest compute-node
-
-      - name: Configure docker login
-        run: |
-          # ECR Credential Helper & Docker Hub don't work together in config, hence reset
-          echo "" > /github/home/.docker/config.json
-          crane auth login -u ${{ secrets.NEON_DOCKERHUB_USERNAME }} -p ${{ secrets.NEON_DOCKERHUB_PASSWORD }} index.docker.io
-
-      - name: Push neon image to Docker Hub
-        run: crane push neon neondatabase/neon:${{needs.tag.outputs.build-tag}}
-
-      - name: Push compute tools image to Docker Hub
-        run: crane push compute-tools neondatabase/compute-tools:${{needs.tag.outputs.build-tag}}
-
-      - name: Push compute node image to Docker Hub
-        run: crane push compute-node neondatabase/compute-node:${{needs.tag.outputs.build-tag}}
-
-      - name: Add latest tag to images
-        if: |
-          (github.ref_name == 'main' || github.ref_name == 'release') &&
-          github.event_name != 'workflow_dispatch'
-        run: |
-          crane tag neondatabase/neon:${{needs.tag.outputs.build-tag}} latest
-          crane tag neondatabase/compute-tools:${{needs.tag.outputs.build-tag}} latest
-          crane tag neondatabase/compute-node:${{needs.tag.outputs.build-tag}} latest
+      - name: Build compute-node Docker image
+        uses: docker/build-push-action@v2
+        with:
+          context: ./vendor/postgres/
+          build-args:
+            COMPUTE_TOOLS_TAG=local
+          push: true
+          tags: neondatabase/compute-node:${{steps.legacy-build-tag.outputs.tag}}, neondatabase/compute-node:${{steps.build-tag.outputs.tag}}

  calculate-deploy-targets:
    runs-on: [ self-hosted, Linux, k8s-runner ]
@@ -564,16 +538,14 @@ jobs:

  deploy:
    runs-on: [ self-hosted, Linux, k8s-runner ]
-    #container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:latest
-    # We need both storage **and** compute images for deploy, because control plane picks the compute version based on the storage version.
-    # If it notices a fresh storage it may bump the compute version. And if compute image failed to build it may break things badly
-    needs: [ push-docker-hub, calculate-deploy-targets, tag, other-tests, pg_regress-tests ]
+    # We need both storage **and** compute images for deploy, because control plane
+    # picks the compute version based on the storage version. If it notices a fresh
+    # storage it may bump the compute version. And if compute image failed to build
+    # it may break things badly.
+    needs: [ docker-image, docker-image-compute, calculate-deploy-targets ]
    if: |
      (github.ref_name == 'main' || github.ref_name == 'release') &&
      github.event_name != 'workflow_dispatch'
-    defaults:
-      run:
-        shell: bash
    strategy:
      matrix:
        include: ${{fromJSON(needs.calculate-deploy-targets.outputs.matrix-include)}}
@@ -584,19 +556,12 @@ jobs:
          submodules: true
          fetch-depth: 0

-      - name: Setup python
-        uses: actions/setup-python@v4
-        with:
-          python-version: '3.10'
-
      - name: Setup ansible
        run: |
-          export PATH="/root/.local/bin:$PATH"
          pip install --progress-bar off --user ansible boto3

      - name: Redeploy
        run: |
-          export DOCKER_TAG=${{needs.tag.outputs.build-tag}}
          cd "$(pwd)/.github/ansible"

          if [[ "$GITHUB_REF_NAME" == "main" ]]; then
@@ -619,16 +584,13 @@ jobs:
          rm -f neon_install.tar.gz .neon_current_version

  deploy-proxy:
-    runs-on: dev
-    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:latest
-    # Compute image isn't strictly required for proxy deploy, but let's still wait for it to run all deploy jobs consistently.
-    needs: [ push-docker-hub, calculate-deploy-targets, tag, other-tests, pg_regress-tests ]
+    runs-on: [ self-hosted, Linux, k8s-runner ]
+    # Compute image isn't strictly required for proxy deploy, but let's still wait for it
+    # to run all deploy jobs consistently.
+    needs: [ docker-image, docker-image-compute, calculate-deploy-targets ]
    if: |
      (github.ref_name == 'main' || github.ref_name == 'release') &&
      github.event_name != 'workflow_dispatch'
-    defaults:
-      run:
-        shell: bash
    strategy:
      matrix:
        include: ${{fromJSON(needs.calculate-deploy-targets.outputs.matrix-include)}}
@@ -641,9 +603,6 @@ jobs:
          submodules: true
          fetch-depth: 0

-      - name: Add curl
-        run: apt update && apt install curl -y
-
      - name: Store kubeconfig file
        run: |
          echo "${{ secrets[matrix.kubeconfig_secret] }}" | base64 --decode > ${KUBECONFIG}
@@ -656,6 +615,6 @@ jobs:

      - name: Re-deploy proxy
        run: |
-          DOCKER_TAG=${{needs.tag.outputs.build-tag}}
+          DOCKER_TAG=${{needs.docker-image.outputs.build-tag}}
          helm upgrade ${{ matrix.proxy_job }}       neondatabase/neon-proxy --namespace default --install -f .github/helm-values/${{ matrix.proxy_config }}.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
          helm upgrade ${{ matrix.proxy_job }}-scram neondatabase/neon-proxy --namespace default --install -f .github/helm-values/${{ matrix.proxy_config }}-scram.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
--- a/.github/workflows/codestyle.yml
+++ b/.github/workflows/codestyle.yml
@@ -101,7 +101,7 @@ jobs:
            !~/.cargo/registry/src
            ~/.cargo/git
            target
-          key: v2-${{ runner.os }}-cargo-${{ hashFiles('./Cargo.lock') }}-rust-${{ matrix.rust_toolchain }}
+          key: v1-${{ runner.os }}-cargo-${{ hashFiles('./Cargo.lock') }}-rust-${{ matrix.rust_toolchain }}

      - name: Run cargo clippy
        run: ./run_clippy.sh
--- a/.github/workflows/pg_clients.yml
+++ b/.github/workflows/pg_clients.yml
@@ -19,12 +19,8 @@ concurrency:

 jobs:
  test-postgres-client-libs:
-    # TODO: switch to gen2 runner, requires docker
    runs-on: [ ubuntu-latest ]

-    env:
-      TEST_OUTPUT: /tmp/test_output
-
    steps:
    - name: Checkout
      uses: actions/checkout@v3
@@ -51,7 +47,7 @@ jobs:
      env:
        REMOTE_ENV: 1
        BENCHMARK_CONNSTR: "${{ secrets.BENCHMARK_STAGING_CONNSTR }}"
-
+        TEST_OUTPUT: /tmp/test_output
        POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
      shell: bash -euxo pipefail {0}
      run: |
@@ -65,18 +61,9 @@ jobs:
          -m "remote_cluster" \
          -rA "test_runner/pg_clients"

-    # We use GitHub's action upload-artifact because `ubuntu-latest` doesn't have configured AWS CLI.
-    # It will be fixed after switching to gen2 runner
-    - name: Upload python test logs
-      if: always()
-      uses: actions/upload-artifact@v3
-      with:
-        retention-days: 7
-        name: python-test-pg_clients-${{ runner.os }}-stage-logs
-        path: ${{ env.TEST_OUTPUT }}
-
    - name: Post to a Slack channel
-      if: ${{ github.event.schedule && failure() }}
+      if: failure()
+      id: slack
      uses: slackapi/slack-github-action@v1
      with:
        channel-id: "C033QLM5P7D" # dev-staging-stream
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,4 @@
 /target
-/bindings/python/neon-dev-utils/target
 /tmp_check
 /tmp_install
 /tmp_check_cli
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -48,9 +48,9 @@ dependencies = [

 [[package]]
 name = "anyhow"
-version = "1.0.62"
+version = "1.0.58"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1485d4d2cc45e7b201ee3767015c96faa5904387c9d87c6efdd0fb511f12d305"
+checksum = "bb07d2053ccdbe10e2af2995a2f116c1330396493dc1269f6a91d0ae82e19704"
 dependencies = [
 "backtrace",
 ]
@@ -1409,17 +1409,6 @@ dependencies = [
 "cfg-if",
 ]

-[[package]]
-name = "integration_tests"
-version = "0.1.0"
-dependencies = [
- "anyhow",
- "pg_bin",
- "tokio",
- "tokio-postgres",
- "utils",
-]
-
 [[package]]
 name = "ipnet"
 version = "2.5.0"
@@ -1989,14 +1978,6 @@ dependencies = [
 "indexmap",
 ]

-[[package]]
-name = "pg_bin"
-version = "0.1.0"
-dependencies = [
- "tokio-postgres",
- "utils",
-]
-
 [[package]]
 name = "phf"
 version = "0.10.1"
@@ -2288,7 +2269,6 @@ dependencies = [
 "anyhow",
 "async-trait",
 "base64",
- "bstr",
 "bytes",
 "clap 3.2.12",
 "futures",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -7,13 +7,8 @@ members = [
    "safekeeper",
    "workspace_hack",
    "neon_local",
-    "integration_tests",
    "libs/*",
 ]
-exclude = [
-    "bindings/python/neon-dev-utils",
-]
-

 [profile.release]
 # This is useful for profiling and, to some extent, debug.
--- a/42
+++ b/42
@@ -1,6 +1,8 @@
 # Build Postgres
-FROM 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned AS pg-build
-WORKDIR /home/nonroot
+FROM neondatabase/rust:1.58 AS pg-build
+WORKDIR /pg
+
+USER root

 COPY vendor/postgres vendor/postgres
 COPY Makefile Makefile
@@ -9,30 +11,27 @@ ENV BUILD_TYPE release
 RUN set -e \
    && mold -run make -j $(nproc) -s postgres \
    && rm -rf tmp_install/build \
-    && tar -C tmp_install -czf /home/nonroot/postgres_install.tar.gz .
+    && tar -C tmp_install -czf /postgres_install.tar.gz .

 # Build zenith binaries
-FROM 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned AS build
-WORKDIR /home/nonroot
+FROM neondatabase/rust:1.58 AS build
 ARG GIT_VERSION=local

 # Enable https://github.com/paritytech/cachepot to cache Rust crates' compilation results in Docker builds.
 # Set up cachepot to use an AWS S3 bucket for cache results, to reuse it between `docker build` invocations.
-# cachepot falls back to local filesystem if S3 is misconfigured, not failing the build
+# cachepot falls back to local filesystem if S3 is misconfigured, not failing the build.
 ARG RUSTC_WRAPPER=cachepot
-ENV AWS_REGION=eu-central-1
-ENV CACHEPOT_S3_KEY_PREFIX=cachepot
-ARG CACHEPOT_BUCKET=neon-github-dev
-#ARG AWS_ACCESS_KEY_ID
-#ARG AWS_SECRET_ACCESS_KEY
+ARG CACHEPOT_BUCKET=zenith-rust-cachepot
+ARG AWS_ACCESS_KEY_ID
+ARG AWS_SECRET_ACCESS_KEY

-COPY --from=pg-build /home/nonroot/tmp_install/include/postgresql/server tmp_install/include/postgresql/server
+COPY --from=pg-build /pg/tmp_install/include/postgresql/server tmp_install/include/postgresql/server
 COPY . .

 # Show build caching stats to check if it was used in the end.
 # Has to be the part of the same RUN since cachepot daemon is killed in the end of this RUN, losing the compilation stats.
 RUN set -e \
-    && mold -run cargo build --release \
+    && sudo -E "PATH=$PATH" mold -run cargo build --release \
    && cachepot -s

 # Build final image
@@ -41,8 +40,8 @@ FROM debian:bullseye-slim
 WORKDIR /data

 RUN set -e \
-    && apt update \
-    && apt install -y \
+    && apt-get update \
+    && apt-get install -y \
        libreadline-dev \
        libseccomp-dev \
        openssl \
@@ -51,14 +50,17 @@ RUN set -e \
    && useradd -d /data zenith \
    && chown -R zenith:zenith /data

-COPY --from=build --chown=zenith:zenith /home/nonroot/target/release/pageserver /usr/local/bin
-COPY --from=build --chown=zenith:zenith /home/nonroot/target/release/safekeeper /usr/local/bin
-COPY --from=build --chown=zenith:zenith /home/nonroot/target/release/proxy      /usr/local/bin
+COPY --from=build --chown=zenith:zenith /home/runner/target/release/pageserver /usr/local/bin
+COPY --from=build --chown=zenith:zenith /home/runner/target/release/safekeeper /usr/local/bin
+COPY --from=build --chown=zenith:zenith /home/runner/target/release/proxy      /usr/local/bin

-COPY --from=pg-build /home/nonroot/tmp_install/ /usr/local/
-COPY --from=pg-build /home/nonroot/postgres_install.tar.gz /data/
+COPY --from=pg-build /pg/tmp_install/         /usr/local/
+COPY --from=pg-build /postgres_install.tar.gz /data/
+
+COPY docker-entrypoint.sh /docker-entrypoint.sh

 VOLUME ["/data"]
 USER zenith
 EXPOSE 6400
+ENTRYPOINT ["/docker-entrypoint.sh"]
 CMD ["pageserver"]
--- a/Dockerfile.compute-tools
+++ b/Dockerfile.compute-tools
@@ -1,25 +1,22 @@
 # First transient image to build compute_tools binaries
 # NB: keep in sync with rust image version in .github/workflows/build_and_test.yml
-FROM 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned AS rust-build
-WORKDIR /home/nonroot
+FROM neondatabase/rust:1.58 AS rust-build

 # Enable https://github.com/paritytech/cachepot to cache Rust crates' compilation results in Docker builds.
 # Set up cachepot to use an AWS S3 bucket for cache results, to reuse it between `docker build` invocations.
 # cachepot falls back to local filesystem if S3 is misconfigured, not failing the build.
 ARG RUSTC_WRAPPER=cachepot
-ENV AWS_REGION=eu-central-1
-ENV CACHEPOT_S3_KEY_PREFIX=cachepot
-ARG CACHEPOT_BUCKET=neon-github-dev
-#ARG AWS_ACCESS_KEY_ID
-#ARG AWS_SECRET_ACCESS_KEY
+ARG CACHEPOT_BUCKET=zenith-rust-cachepot
+ARG AWS_ACCESS_KEY_ID
+ARG AWS_SECRET_ACCESS_KEY

 COPY . .

 RUN set -e \
-    && mold -run cargo build -p compute_tools --release \
+    && sudo -E "PATH=$PATH" mold -run cargo build -p compute_tools --release \
    && cachepot -s

 # Final image that only has one binary
-FROM debian:bullseye-slim
+FROM debian:buster-slim

-COPY --from=rust-build /home/nonroot/target/release/compute_ctl /usr/local/bin/compute_ctl
+COPY --from=rust-build /home/runner/target/release/compute_ctl /usr/local/bin/compute_ctl
--- a/bindings/python/neon-dev-utils/Cargo.lock
+++ b/bindings/python/neon-dev-utils/Cargo.lock
@@ -1,264 +0,0 @@
-# This file is automatically @generated by Cargo.
-# It is not intended for manual editing.
-version = 3
-
-[[package]]
-name = "autocfg"
-version = "1.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
-
-[[package]]
-name = "bitflags"
-version = "1.3.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
-
-[[package]]
-name = "cfg-if"
-version = "1.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
-
-[[package]]
-name = "indoc"
-version = "0.3.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "47741a8bc60fb26eb8d6e0238bbb26d8575ff623fdc97b1a2c00c050b9684ed8"
-dependencies = [
- "indoc-impl",
- "proc-macro-hack",
-]
-
-[[package]]
-name = "indoc-impl"
-version = "0.3.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ce046d161f000fffde5f432a0d034d0341dc152643b2598ed5bfce44c4f3a8f0"
-dependencies = [
- "proc-macro-hack",
- "proc-macro2",
- "quote",
- "syn",
- "unindent",
-]
-
-[[package]]
-name = "instant"
-version = "0.1.12"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c"
-dependencies = [
- "cfg-if",
-]
-
-[[package]]
-name = "libc"
-version = "0.2.132"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8371e4e5341c3a96db127eb2465ac681ced4c433e01dd0e938adbef26ba93ba5"
-
-[[package]]
-name = "lock_api"
-version = "0.4.8"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9f80bf5aacaf25cbfc8210d1cfb718f2bf3b11c4c54e5afe36c236853a8ec390"
-dependencies = [
- "autocfg",
- "scopeguard",
-]
-
-[[package]]
-name = "neon-dev-utils"
-version = "0.1.0"
-dependencies = [
- "pyo3",
-]
-
-[[package]]
-name = "once_cell"
-version = "1.13.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "074864da206b4973b84eb91683020dbefd6a8c3f0f38e054d93954e891935e4e"
-
-[[package]]
-name = "parking_lot"
-version = "0.11.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99"
-dependencies = [
- "instant",
- "lock_api",
- "parking_lot_core",
-]
-
-[[package]]
-name = "parking_lot_core"
-version = "0.8.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d76e8e1493bcac0d2766c42737f34458f1c8c50c0d23bcb24ea953affb273216"
-dependencies = [
- "cfg-if",
- "instant",
- "libc",
- "redox_syscall",
- "smallvec",
- "winapi",
-]
-
-[[package]]
-name = "paste"
-version = "0.1.18"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "45ca20c77d80be666aef2b45486da86238fabe33e38306bd3118fe4af33fa880"
-dependencies = [
- "paste-impl",
- "proc-macro-hack",
-]
-
-[[package]]
-name = "paste-impl"
-version = "0.1.18"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d95a7db200b97ef370c8e6de0088252f7e0dfff7d047a28528e47456c0fc98b6"
-dependencies = [
- "proc-macro-hack",
-]
-
-[[package]]
-name = "proc-macro-hack"
-version = "0.5.19"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5"
-
-[[package]]
-name = "proc-macro2"
-version = "1.0.43"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0a2ca2c61bc9f3d74d2886294ab7b9853abd9c1ad903a3ac7815c58989bb7bab"
-dependencies = [
- "unicode-ident",
-]
-
-[[package]]
-name = "pyo3"
-version = "0.15.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d41d50a7271e08c7c8a54cd24af5d62f73ee3a6f6a314215281ebdec421d5752"
-dependencies = [
- "cfg-if",
- "indoc",
- "libc",
- "parking_lot",
- "paste",
- "pyo3-build-config",
- "pyo3-macros",
- "unindent",
-]
-
-[[package]]
-name = "pyo3-build-config"
-version = "0.15.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "779239fc40b8e18bc8416d3a37d280ca9b9fb04bda54b98037bb6748595c2410"
-dependencies = [
- "once_cell",
-]
-
-[[package]]
-name = "pyo3-macros"
-version = "0.15.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "00b247e8c664be87998d8628e86f282c25066165f1f8dda66100c48202fdb93a"
-dependencies = [
- "pyo3-macros-backend",
- "quote",
- "syn",
-]
-
-[[package]]
-name = "pyo3-macros-backend"
-version = "0.15.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5a8c2812c412e00e641d99eeb79dd478317d981d938aa60325dfa7157b607095"
-dependencies = [
- "proc-macro2",
- "pyo3-build-config",
- "quote",
- "syn",
-]
-
-[[package]]
-name = "quote"
-version = "1.0.21"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179"
-dependencies = [
- "proc-macro2",
-]
-
-[[package]]
-name = "redox_syscall"
-version = "0.2.16"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a"
-dependencies = [
- "bitflags",
-]
-
-[[package]]
-name = "scopeguard"
-version = "1.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
-
-[[package]]
-name = "smallvec"
-version = "1.9.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2fd0db749597d91ff862fd1d55ea87f7855a744a8425a64695b6fca237d1dad1"
-
-[[package]]
-name = "syn"
-version = "1.0.99"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "58dbef6ec655055e20b86b15a8cc6d439cca19b667537ac6a1369572d151ab13"
-dependencies = [
- "proc-macro2",
- "quote",
- "unicode-ident",
-]
-
-[[package]]
-name = "unicode-ident"
-version = "1.0.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c4f5b37a154999a8f3f98cc23a628d850e154479cd94decf3414696e12e31aaf"
-
-[[package]]
-name = "unindent"
-version = "0.1.10"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "58ee9362deb4a96cef4d437d1ad49cffc9b9e92d202b6995674e928ce684f112"
-
-[[package]]
-name = "winapi"
-version = "0.3.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
-dependencies = [
- "winapi-i686-pc-windows-gnu",
- "winapi-x86_64-pc-windows-gnu",
-]
-
-[[package]]
-name = "winapi-i686-pc-windows-gnu"
-version = "0.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
-
-[[package]]
-name = "winapi-x86_64-pc-windows-gnu"
-version = "0.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
--- a/bindings/python/neon-dev-utils/Cargo.toml
+++ b/bindings/python/neon-dev-utils/Cargo.toml
@@ -1,16 +0,0 @@
-[package]
-name = "neon-dev-utils"
-version = "0.1.0"
-edition = "2021"
-
-[lib]
-name = "neon_dev_utils"
-# "cdylib" is necessary to produce a shared library for Python to import from.
-#
-# Downstream Rust code (including code in `bin/`, `examples/`, and `tests/`) will not be able
-# to `use string_sum;` unless the "rlib" or "lib" crate type is also included, e.g.:
-# crate-type = ["cdylib", "rlib"]
-crate-type = ["cdylib"]
-
-[dependencies]
-pyo3 = { version = "0.15.1", features = ["extension-module"] }
--- a/bindings/python/neon-dev-utils/poetry.lock
+++ b/bindings/python/neon-dev-utils/poetry.lock
@@ -1,31 +0,0 @@
-[[package]]
-name = "maturin"
-version = "0.13.2"
-description = "Build and publish crates with pyo3, rust-cpython and cffi bindings as well as rust binaries as python packages"
-category = "dev"
-optional = false
-python-versions = ">=3.7"
-
-[package.dependencies]
-tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
-
-[package.extras]
-zig = ["ziglang (>=0.9.0,<0.10.0)"]
-patchelf = ["patchelf"]
-
-[[package]]
-name = "tomli"
-version = "2.0.1"
-description = "A lil' TOML parser"
-category = "dev"
-optional = false
-python-versions = ">=3.7"
-
-[metadata]
-lock-version = "1.1"
-python-versions = "^3.10"
-content-hash = "4e177514d6cf74b58bcd8ca30ef300c10a833b3e6b1d809aa57337ee20efeb47"
-
-[metadata.files]
-maturin = []
-tomli = []
--- a/bindings/python/neon-dev-utils/pyproject.toml
+++ b/bindings/python/neon-dev-utils/pyproject.toml
@@ -1,15 +0,0 @@
-[tool.poetry]
-name = "neon-dev-utils"
-version = "0.1.0"
-description = "Python bindings for common neon development utils"
-authors = ["Your Name <you@example.com>"]
-
-[tool.poetry.dependencies]
-python = "^3.10"
-
-[tool.poetry.dev-dependencies]
-maturin = "^0.13.2"
-
-[build-system]
-requires = ["maturin>=0.13.2", "poetry-core>=1.0.0"]
-build-backend = "maturin"
--- a/bindings/python/neon-dev-utils/src/lib.rs
+++ b/bindings/python/neon-dev-utils/src/lib.rs
@@ -1,17 +0,0 @@
-use pyo3::prelude::*;
-
-/// Formats the sum of two numbers as string.
-#[pyfunction]
-fn sum_as_string(a: usize, b: usize) -> PyResult<String> {
-    Ok((a + b).to_string())
-}
-
-/// A Python module implemented in Rust. The name of this function must match
-/// the `lib.name` setting in the `Cargo.toml`, else Python will not be able to
-/// import the module.
-#[pymodule]
-fn neon_dev_utils(_py: Python, m: &PyModule) -> PyResult<()> {
-    m.add_function(wrap_pyfunction!(sum_as_string, m)?)?;
-
-    Ok(())
-}
--- a/control_plane/src/local_env.rs
+++ b/control_plane/src/local_env.rs
@@ -24,7 +24,7 @@ use crate::safekeeper::SafekeeperNode;
 // This data structures represents neon_local CLI config
 //
 // It is deserialized from the .neon/config file, or the config file passed
-// to 'neon_local init --config=<path>' option. See control_plane/simple.conf for
+// to 'zenith init --config=<path>' option. See control_plane/simple.conf for
 // an example.
 //
 #[serde_as]
@@ -320,7 +320,7 @@ impl LocalEnv {

        if !repopath.exists() {
            bail!(
-                "Zenith config is not found in {}. You need to run 'neon_local init' first",
+                "Zenith config is not found in {}. You need to run 'zenith init' first",
                repopath.to_str().unwrap()
            );
        }
@@ -337,12 +337,12 @@ impl LocalEnv {
    }

    pub fn persist_config(&self, base_path: &Path) -> anyhow::Result<()> {
-        // Currently, the user first passes a config file with 'neon_local init --config=<path>'
+        // Currently, the user first passes a config file with 'zenith init --config=<path>'
        // We read that in, in `create_config`, and fill any missing defaults. Then it's saved
        // to .neon/config. TODO: We lose any formatting and comments along the way, which is
        // a bit sad.
        let mut conf_content = r#"# This file describes a locale deployment of the page server
-# and safekeeeper node. It is read by the 'neon_local' command-line
+# and safekeeeper node. It is read by the 'zenith' command-line
 # utility.
 "#
        .to_string();
@@ -382,7 +382,7 @@ impl LocalEnv {
    }

    //
-    // Initialize a new Neon repository
+    // Initialize a new Zenith repository
    //
    pub fn init(&mut self) -> anyhow::Result<()> {
        // check if config already exists
--- a/control_plane/src/safekeeper.rs
+++ b/control_plane/src/safekeeper.rs
@@ -1,4 +1,5 @@
 use std::io::Write;
+use std::net::TcpStream;
 use std::path::PathBuf;
 use std::process::Command;
 use std::sync::Arc;
@@ -51,7 +52,7 @@ impl ResponseErrorMessageExt for Response {
        Err(SafekeeperHttpError::Response(
            match self.json::<HttpErrorBody>() {
                Ok(err_body) => format!("Error: {}", err_body.msg),
-                Err(_) => format!("Http error ({}) at {}.", status.as_u16(), url),
+                Err(_) => format!("Http error ({}) at {url}.", status.as_u16()),
            },
        ))
    }
@@ -240,23 +241,37 @@ impl SafekeeperNode {
            ),
        }

-        // Wait until process is gone
-        for i in 0..600 {
-            let signal = None; // Send no signal, just get the error code
-            match kill(pid, signal) {
-                Ok(_) => (), // Process exists, keep waiting
-                Err(Errno::ESRCH) => {
-                    // Process not found, we're done
-                    println!("done!");
-                    return Ok(());
-                }
-                Err(err) => bail!(
-                    "Failed to send signal to pageserver with pid {}: {}",
-                    pid,
-                    err.desc()
-                ),
-            };
+        let address = connection_address(&self.pg_connection_config);

+        // TODO Remove this "timeout" and handle it on caller side instead.
+        // Shutting down may take a long time,
+        // if safekeeper flushes a lot of data
+        let mut tcp_stopped = false;
+        for i in 0..600 {
+            if !tcp_stopped {
+                if let Err(err) = TcpStream::connect(&address) {
+                    tcp_stopped = true;
+                    if err.kind() != io::ErrorKind::ConnectionRefused {
+                        eprintln!("\nSafekeeper connection failed with error: {err}");
+                    }
+                }
+            }
+            if tcp_stopped {
+                // Also check status on the HTTP port
+                match self.check_status() {
+                    Err(SafekeeperHttpError::Transport(err)) if err.is_connect() => {
+                        println!("done!");
+                        return Ok(());
+                    }
+                    Err(err) => {
+                        eprintln!("\nSafekeeper status check failed with error: {err}");
+                        return Ok(());
+                    }
+                    Ok(()) => {
+                        // keep waiting
+                    }
+                }
+            }
            if i % 10 == 0 {
                print!(".");
                io::stdout().flush().unwrap();
--- a/control_plane/src/storage.rs
+++ b/control_plane/src/storage.rs
@@ -1,8 +1,9 @@
 use std::collections::HashMap;
 use std::fs::File;
 use std::io::{BufReader, Write};
+use std::net::TcpStream;
 use std::num::NonZeroU64;
-use std::path::{Path, PathBuf};
+use std::path::PathBuf;
 use std::process::Command;
 use std::time::Duration;
 use std::{io, result, thread};
@@ -102,19 +103,23 @@ impl PageServerNode {

    /// Construct libpq connection string for connecting to the pageserver.
    fn pageserver_connection_config(password: &str, listen_addr: &str) -> Config {
-        format!("postgresql://no_user:{password}@{listen_addr}/no_db")
+        format!("postgresql://no_user:{}@{}/no_db", password, listen_addr)
            .parse()
            .unwrap()
    }

-    pub fn initialize(
+    pub fn init(
        &self,
        create_tenant: Option<ZTenantId>,
        initial_timeline_id: Option<ZTimelineId>,
        config_overrides: &[&str],
    ) -> anyhow::Result<ZTimelineId> {
+        let mut cmd = Command::new(self.env.pageserver_bin()?);
+
        let id = format!("id={}", self.env.pageserver.id);
+
        // FIXME: the paths should be shell-escaped to handle paths with spaces, quotas etc.
+        let base_data_dir_param = self.env.base_data_dir.display().to_string();
        let pg_distrib_dir_param =
            format!("pg_distrib_dir='{}'", self.env.pg_distrib_dir.display());
        let authg_type_param = format!("auth_type='{}'", self.env.pageserver.auth_type);
@@ -134,52 +139,67 @@ impl PageServerNode {
                .collect::<Vec<_>>()
                .join(",")
        );
+        let mut args = Vec::with_capacity(20);
+
+        args.push("--init");
+        args.extend(["-D", &base_data_dir_param]);
+        args.extend(["-c", &pg_distrib_dir_param]);
+        args.extend(["-c", &authg_type_param]);
+        args.extend(["-c", &listen_http_addr_param]);
+        args.extend(["-c", &listen_pg_addr_param]);
+        args.extend(["-c", &broker_endpoints_param]);
+        args.extend(["-c", &id]);
+
        let broker_etcd_prefix_param = self
            .env
            .etcd_broker
            .broker_etcd_prefix
            .as_ref()
            .map(|prefix| format!("broker_etcd_prefix='{prefix}'"));
-
-        let mut init_config_overrides = config_overrides.to_vec();
-        init_config_overrides.push(&id);
-        init_config_overrides.push(&pg_distrib_dir_param);
-        init_config_overrides.push(&authg_type_param);
-        init_config_overrides.push(&listen_http_addr_param);
-        init_config_overrides.push(&listen_pg_addr_param);
-        init_config_overrides.push(&broker_endpoints_param);
-
        if let Some(broker_etcd_prefix_param) = broker_etcd_prefix_param.as_deref() {
-            init_config_overrides.push(broker_etcd_prefix_param);
+            args.extend(["-c", broker_etcd_prefix_param]);
+        }
+
+        for config_override in config_overrides {
+            args.extend(["-c", config_override]);
        }

        if self.env.pageserver.auth_type != AuthType::Trust {
-            init_config_overrides.push("auth_validation_public_key_path='auth_public_key.pem'");
+            args.extend([
+                "-c",
+                "auth_validation_public_key_path='auth_public_key.pem'",
+            ]);
        }

-        self.start_node(&init_config_overrides, &self.env.base_data_dir, true)?;
-        let init_result = self
-            .try_init_timeline(create_tenant, initial_timeline_id)
-            .context("Failed to create initial tenant and timeline for pageserver");
-        match &init_result {
-            Ok(initial_timeline_id) => {
-                println!("Successfully initialized timeline {initial_timeline_id}")
-            }
-            Err(e) => eprintln!("{e:#}"),
+        let create_tenant = create_tenant.map(|id| id.to_string());
+        if let Some(tenant_id) = create_tenant.as_deref() {
+            args.extend(["--create-tenant", tenant_id])
        }
-        self.stop(false)?;
-        init_result
-    }

-    fn try_init_timeline(
-        &self,
-        new_tenant_id: Option<ZTenantId>,
-        new_timeline_id: Option<ZTimelineId>,
-    ) -> anyhow::Result<ZTimelineId> {
-        let initial_tenant_id = self.tenant_create(new_tenant_id, HashMap::new())?;
-        let initial_timeline_info =
-            self.timeline_create(initial_tenant_id, new_timeline_id, None, None)?;
-        Ok(initial_timeline_info.timeline_id)
+        let initial_timeline_id = initial_timeline_id.unwrap_or_else(ZTimelineId::generate);
+        let initial_timeline_id_string = initial_timeline_id.to_string();
+        args.extend(["--initial-timeline-id", &initial_timeline_id_string]);
+
+        let cmd_with_args = cmd.args(args);
+        let init_output = fill_rust_env_vars(cmd_with_args)
+            .output()
+            .with_context(|| {
+                format!("failed to init pageserver with command {:?}", cmd_with_args)
+            })?;
+
+        if !init_output.status.success() {
+            bail!(
+                "init invocation failed, {}\nStdout: {}\nStderr: {}",
+                init_output.status,
+                String::from_utf8_lossy(&init_output.stdout),
+                String::from_utf8_lossy(&init_output.stderr)
+            );
+        }
+
+        // echo the captured output of the init command
+        println!("{}", String::from_utf8_lossy(&init_output.stdout));
+
+        Ok(initial_timeline_id)
    }

    pub fn repo_path(&self) -> PathBuf {
@@ -191,35 +211,15 @@ impl PageServerNode {
    }

    pub fn start(&self, config_overrides: &[&str]) -> anyhow::Result<()> {
-        self.start_node(config_overrides, &self.repo_path(), false)
-    }
-
-    fn start_node(
-        &self,
-        config_overrides: &[&str],
-        datadir: &Path,
-        update_config: bool,
-    ) -> anyhow::Result<()> {
-        println!(
+        print!(
            "Starting pageserver at '{}' in '{}'",
            connection_address(&self.pg_connection_config),
-            datadir.display()
+            self.repo_path().display()
        );
-        io::stdout().flush()?;
+        io::stdout().flush().unwrap();

-        let mut args = vec![
-            "-D",
-            datadir.to_str().with_context(|| {
-                format!(
-                    "Datadir path '{}' cannot be represented as a unicode string",
-                    datadir.display()
-                )
-            })?,
-        ];
-
-        if update_config {
-            args.push("--update-config");
-        }
+        let repo_path = self.repo_path();
+        let mut args = vec!["-D", repo_path.to_str().unwrap()];

        for config_override in config_overrides {
            args.extend(["-c", config_override]);
@@ -231,8 +231,8 @@ impl PageServerNode {

        if !filled_cmd.status()?.success() {
            bail!(
-                "Pageserver failed to start. See console output and '{}' for details.",
-                datadir.join("pageserver.log").display()
+                "Pageserver failed to start. See '{}' for details.",
+                self.repo_path().join("pageserver.log").display()
            );
        }

@@ -241,7 +241,7 @@ impl PageServerNode {
        const RETRIES: i8 = 15;
        for retries in 1..RETRIES {
            match self.check_status() {
-                Ok(()) => {
+                Ok(_) => {
                    println!("\nPageserver started");
                    return Ok(());
                }
@@ -255,18 +255,21 @@ impl PageServerNode {
                                if retries == 5 {
                                    println!() // put a line break after dots for second message
                                }
-                                println!("Pageserver not responding yet, err {err} retrying ({retries})...");
+                                println!(
+                                    "Pageserver not responding yet, err {} retrying ({})...",
+                                    err, retries
+                                );
                            }
                        }
                        PageserverHttpError::Response(msg) => {
-                            bail!("pageserver failed to start: {msg} ")
+                            bail!("pageserver failed to start: {} ", msg)
                        }
                    }
                    thread::sleep(Duration::from_secs(1));
                }
            }
        }
-        bail!("pageserver failed to start in {RETRIES} seconds");
+        bail!("pageserver failed to start in {} seconds", RETRIES);
    }

    ///
@@ -296,32 +299,51 @@ impl PageServerNode {
        match kill(pid, sig) {
            Ok(_) => (),
            Err(Errno::ESRCH) => {
-                println!("Pageserver with pid {pid} does not exist, but a PID file was found");
+                println!(
+                    "Pageserver with pid {} does not exist, but a PID file was found",
+                    pid
+                );
                return Ok(());
            }
            Err(err) => bail!(
-                "Failed to send signal to pageserver with pid {pid}: {}",
+                "Failed to send signal to pageserver with pid {}: {}",
+                pid,
                err.desc()
            ),
        }

-        // Wait until process is gone
-        for i in 0..600 {
-            let signal = None; // Send no signal, just get the error code
-            match kill(pid, signal) {
-                Ok(_) => (), // Process exists, keep waiting
-                Err(Errno::ESRCH) => {
-                    // Process not found, we're done
-                    println!("done!");
-                    return Ok(());
-                }
-                Err(err) => bail!(
-                    "Failed to send signal to pageserver with pid {}: {}",
-                    pid,
-                    err.desc()
-                ),
-            };
+        let address = connection_address(&self.pg_connection_config);

+        // TODO Remove this "timeout" and handle it on caller side instead.
+        // Shutting down may take a long time,
+        // if pageserver checkpoints a lot of data
+        let mut tcp_stopped = false;
+        for i in 0..600 {
+            if !tcp_stopped {
+                if let Err(err) = TcpStream::connect(&address) {
+                    tcp_stopped = true;
+                    if err.kind() != io::ErrorKind::ConnectionRefused {
+                        eprintln!("\nPageserver connection failed with error: {err}");
+                    }
+                }
+            }
+            if tcp_stopped {
+                // Also check status on the HTTP port
+
+                match self.check_status() {
+                    Err(PageserverHttpError::Transport(err)) if err.is_connect() => {
+                        println!("done!");
+                        return Ok(());
+                    }
+                    Err(err) => {
+                        eprintln!("\nPageserver status check failed with error: {err}");
+                        return Ok(());
+                    }
+                    Ok(()) => {
+                        // keep waiting
+                    }
+                }
+            }
            if i % 10 == 0 {
                print!(".");
                io::stdout().flush().unwrap();
@@ -329,13 +351,13 @@ impl PageServerNode {
            thread::sleep(Duration::from_millis(100));
        }

-        bail!("Failed to stop pageserver with pid {pid}");
+        bail!("Failed to stop pageserver with pid {}", pid);
    }

    pub fn page_server_psql(&self, sql: &str) -> Vec<postgres::SimpleQueryMessage> {
        let mut client = self.pg_connection_config.connect(NoTls).unwrap();

-        println!("Pageserver query: '{sql}'");
+        println!("Pageserver query: '{}'", sql);
        client.simple_query(sql).unwrap()
    }

@@ -370,8 +392,9 @@ impl PageServerNode {
        &self,
        new_tenant_id: Option<ZTenantId>,
        settings: HashMap<&str, &str>,
-    ) -> anyhow::Result<ZTenantId> {
-        self.http_request(Method::POST, format!("{}/tenant", self.http_base_url))
+    ) -> anyhow::Result<Option<ZTenantId>> {
+        let tenant_id_string = self
+            .http_request(Method::POST, format!("{}/tenant", self.http_base_url))
            .json(&TenantCreateRequest {
                new_tenant_id,
                checkpoint_distance: settings
@@ -410,16 +433,18 @@ impl PageServerNode {
            })
            .send()?
            .error_from_body()?
-            .json::<Option<String>>()
-            .with_context(|| {
-                format!("Failed to parse tenant creation response for tenant id: {new_tenant_id:?}")
-            })?
-            .context("No tenant id was found in the tenant creation response")
-            .and_then(|tenant_id_string| {
-                tenant_id_string.parse().with_context(|| {
-                    format!("Failed to parse response string as tenant id: '{tenant_id_string}'")
+            .json::<Option<String>>()?;
+
+        tenant_id_string
+            .map(|id| {
+                id.parse().with_context(|| {
+                    format!(
+                        "Failed to parse tennat creation response as tenant id: {}",
+                        id
+                    )
                })
            })
+            .transpose()
    }

    pub fn tenant_config(&self, tenant_id: ZTenantId, settings: HashMap<&str, &str>) -> Result<()> {
@@ -490,27 +515,22 @@ impl PageServerNode {
        new_timeline_id: Option<ZTimelineId>,
        ancestor_start_lsn: Option<Lsn>,
        ancestor_timeline_id: Option<ZTimelineId>,
-    ) -> anyhow::Result<TimelineInfo> {
-        self.http_request(
-            Method::POST,
-            format!("{}/tenant/{}/timeline", self.http_base_url, tenant_id),
-        )
-        .json(&TimelineCreateRequest {
-            new_timeline_id,
-            ancestor_start_lsn,
-            ancestor_timeline_id,
-        })
-        .send()?
-        .error_from_body()?
-        .json::<Option<TimelineInfo>>()
-        .with_context(|| {
-            format!("Failed to parse timeline creation response for tenant id: {tenant_id}")
-        })?
-        .with_context(|| {
-            format!(
-                "No timeline id was found in the timeline creation response for tenant {tenant_id}"
+    ) -> anyhow::Result<Option<TimelineInfo>> {
+        let timeline_info_response = self
+            .http_request(
+                Method::POST,
+                format!("{}/tenant/{}/timeline", self.http_base_url, tenant_id),
            )
-        })
+            .json(&TimelineCreateRequest {
+                new_timeline_id,
+                ancestor_start_lsn,
+                ancestor_timeline_id,
+            })
+            .send()?
+            .error_from_body()?
+            .json::<Option<TimelineInfo>>()?;
+
+        Ok(timeline_info_response)
    }

    /// Import a basebackup prepared using either:
--- a/docker-entrypoint.sh
+++ b/docker-entrypoint.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+set -eux
+
+pageserver_id_param="${NODE_ID:-10}"
+
+broker_endpoints_param="${BROKER_ENDPOINT:-absent}"
+if [ "$broker_endpoints_param" != "absent" ]; then
+    broker_endpoints_param="-c broker_endpoints=['$broker_endpoints_param']"
+else
+    broker_endpoints_param=''
+fi
+
+remote_storage_param="${REMOTE_STORAGE:-}"
+
+if [ "$1" = 'pageserver' ]; then
+    if [ ! -d "/data/tenants" ]; then
+        echo "Initializing pageserver data directory"
+        pageserver --init -D /data -c "pg_distrib_dir='/usr/local'" -c "id=${pageserver_id_param}" $broker_endpoints_param $remote_storage_param
+    fi
+    echo "Staring pageserver at 0.0.0.0:6400"
+    pageserver -c "listen_pg_addr='0.0.0.0:6400'" -c "listen_http_addr='0.0.0.0:9898'" $broker_endpoints_param -D /data
+else
+    "$@"
+fi
--- a/integration_tests/Cargo.toml
+++ b/integration_tests/Cargo.toml
@@ -1,13 +0,0 @@
-[package]
-name = "integration_tests"
-version = "0.1.0"
-edition = "2021"
-
-# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
-
-[dependencies]
-utils = { path = "../libs/utils" }
-pg_bin = { path = "../libs/pg_bin" }
-tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
-tokio = { version = "1.17", features = ["macros", "rt", "rt-multi-thread"] }
-anyhow = "1.0.62"
--- a/integration_tests/src/basic.rs
+++ b/integration_tests/src/basic.rs
@@ -1,36 +0,0 @@
-#[cfg(test)]
-mod tests {
-    use pg_bin::PgDatadir;
-    use std::path::PathBuf;
-    use tokio_postgres::NoTls;
-
-    #[tokio::test]
-    async fn test_postgres_select_1() -> anyhow::Result<()> {
-        // Test setup
-        let output = PathBuf::from("/home/bojan/tmp/");
-        let pg_prefix = PathBuf::from("/home/bojan/src/neondatabase/neon/tmp_install/bin/");
-
-        // Init datadir
-        let pg_datadir_path = PathBuf::from("/home/bojan/tmp/t1/");
-        let pg_datadir = PgDatadir::new_initdb(pg_datadir_path, &pg_prefix, &output, true);
-
-        // Get a postgres
-        let postgres = pg_datadir.spawn_postgres(pg_prefix, output);
-        let conn_info = postgres.admin_conn_info();
-
-        // Get client, run connection
-        let (client, connection) = conn_info.connect(NoTls).await?;
-        tokio::spawn(async move {
-            if let Err(e) = connection.await {
-                eprintln!("connection error: {}", e);
-            }
-        });
-
-        // Run "select 1"
-        let rows = client.query("SELECT 'hello';", &[]).await?;
-        let value: &str = rows[0].get(0);
-        assert_eq!(value, "hello");
-
-        Ok(())
-    }
-}
--- a/integration_tests/src/lib.rs
+++ b/integration_tests/src/lib.rs
@@ -1 +0,0 @@
-mod basic;
--- a/libs/pg_bin/Cargo.toml
+++ b/libs/pg_bin/Cargo.toml
@@ -1,10 +0,0 @@
-[package]
-name = "pg_bin"
-version = "0.1.0"
-edition = "2021"
-
-# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
-
-[dependencies]
-tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
-utils = { path = "../utils" }
--- a/libs/pg_bin/src/lib.rs
+++ b/libs/pg_bin/src/lib.rs
@@ -1,106 +0,0 @@
-//! Utils for runnig postgres binaries as subprocesses.
-use std::{fs::{File, remove_dir_all}, path::PathBuf, process::{Child, Command}, time::Duration};
-use std::io::Write;
-
-use utils::command_extensions::NeonCommandExtensions;
-
-
-pub struct PgDatadir {
-    path: PathBuf
-}
-
-impl PgDatadir {
-    pub fn new_initdb(
-        path: PathBuf,
-        pg_prefix: &PathBuf,
-        command_output_dir: &PathBuf,
-        remove_if_exists: bool
-    ) -> Self {
-        if remove_if_exists {
-            remove_dir_all(path.clone()).ok();
-        }
-
-        let status = Command::new(pg_prefix.join("initdb"))
-            .arg("-D")
-            .arg(path.clone())
-            .capture_to_files(command_output_dir.clone(), "initdb")
-            .status()
-            .expect("failed to get status");
-        assert!(status.success());
-
-        Self {
-            path
-        }
-    }
-
-    pub fn load_existing(path: PathBuf) -> Self{
-        Self {
-            path
-        }
-    }
-
-    pub fn path(&self) -> PathBuf {
-        self.path.clone()
-    }
-
-    pub fn spawn_postgres(self, pg_prefix: PathBuf, command_output_dir: PathBuf) -> LocalPostgres {
-        let port = 54729;
-
-        // Write conf
-        // TODO don't override existing conf
-        // - instead infer port from conf
-        let mut conf = File::create(self.path().join("postgresql.conf")).expect("failed to create file");
-        writeln!(&mut conf, "port = {}", port).expect("failed to write conf");
-
-        let process = Command::new(pg_prefix.join("postgres"))
-            .env("PGDATA", self.path())
-            .capture_to_files(command_output_dir, "pg")
-            .spawn()
-            .expect("postgres failed to spawn");
-
-        // Wait until ready. TODO improve this
-        std::thread::sleep(Duration::from_millis(300));
-
-        LocalPostgres {
-            datadir: self,
-            port: 54729,
-            process,
-        }
-    }
-}
-
-pub struct LocalPostgres {
-    datadir: PgDatadir,
-    port: u16,
-    process: Child,
-}
-
-impl LocalPostgres {
-    pub fn admin_conn_info(&self) -> tokio_postgres::Config {
-        // I don't like this, but idk what else to do
-        let whoami = Command::new("whoami").output().unwrap().stdout;
-        let user = String::from_utf8_lossy(&whoami);
-        let user = user.trim();
-
-        let mut config = tokio_postgres::Config::new();
-        config
-            .host("127.0.0.1")
-            .port(self.port)
-            .dbname("postgres")
-            .user(&user);
-        config
-    }
-
-    pub fn stop(mut self) -> PgDatadir {
-        self.process.kill().expect("failed to kill child");
-        PgDatadir {
-            path: self.datadir.path.clone()
-        }
-    }
-}
-
-impl Drop for LocalPostgres {
-    fn drop(&mut self) {
-        self.process.kill().expect("failed to kill child");
-    }
-}
--- a/libs/postgres_ffi/build.rs
+++ b/libs/postgres_ffi/build.rs
@@ -44,7 +44,7 @@ impl ParseCallbacks for PostgresFfiCallbacks {

 fn main() {
    // Tell cargo to invalidate the built crate whenever the wrapper changes
-    println!("cargo:rerun-if-changed=bindgen_deps.h");
+    println!("cargo:rerun-if-changed=pg_control_ffi.h");

    // Finding the location of C headers for the Postgres server:
    // - if POSTGRES_INSTALL_DIR is set look into it, otherwise look into `<project_root>/tmp_install`
@@ -88,9 +88,9 @@ fn main() {
    // the resulting bindings.
    let bindings = bindgen::Builder::default()
        //
-        // All the needed PostgreSQL headers are included from 'bindgen_deps.h'
+        // All the needed PostgreSQL headers are included from 'pg_control_ffi.h'
        //
-        .header("bindgen_deps.h")
+        .header("pg_control_ffi.h")
        //
        // Tell cargo to invalidate the built crate whenever any of the
        // included header files changed.
--- a/libs/postgres_ffi/pg_control_ffi.h
+++ b/libs/postgres_ffi/pg_control_ffi.h
--- a/libs/postgres_ffi/src/controlfile_utils.rs
+++ b/libs/postgres_ffi/src/controlfile_utils.rs
@@ -23,7 +23,7 @@
 //! information. You can use PostgreSQL's pg_controldata utility to view its
 //! contents.
 //!
-use super::bindings::{ControlFileData, PG_CONTROL_FILE_SIZE};
+use crate::{ControlFileData, PG_CONTROL_FILE_SIZE};

 use anyhow::{bail, Result};
 use bytes::{Bytes, BytesMut};
--- a/libs/postgres_ffi/src/lib.rs
+++ b/libs/postgres_ffi/src/lib.rs
@@ -7,62 +7,21 @@
 // https://github.com/rust-lang/rust-bindgen/issues/1651
 #![allow(deref_nullptr)]

+use serde::{Deserialize, Serialize};
 use utils::lsn::Lsn;

-macro_rules! postgres_ffi {
-    ($version:ident) => {
-        #[path = "."]
-        pub mod $version {
-            // fixme: does this have to be 'pub'?
-            pub mod bindings {
-                // bindgen generates bindings for a lot of stuff we don't need
-                #![allow(dead_code)]
+include!(concat!(env!("OUT_DIR"), "/bindings.rs"));

-                use serde::{Deserialize, Serialize};
-                include!(concat!(env!("OUT_DIR"), "/bindings.rs"));
-            }
-            pub mod controlfile_utils;
-            pub mod nonrelfile_utils;
-            pub mod pg_constants;
-            pub mod relfile_utils;
-            pub mod waldecoder;
-            pub mod xlog_utils;
-
-            // Re-export some symbols from bindings
-            pub use bindings::DBState_DB_SHUTDOWNED;
-            pub use bindings::{CheckPoint, ControlFileData, XLogRecord};
-        }
-    };
-}
-
-postgres_ffi!(v14);
-
-// Export some widely used datatypes that are unlikely to change across Postgres versions
-pub use v14::bindings::{uint32, uint64, Oid};
-pub use v14::bindings::{BlockNumber, OffsetNumber};
-pub use v14::bindings::{MultiXactId, TransactionId};
-
-// Likewise for these, although the assumption that these don't change is a little more iffy.
-pub use v14::bindings::{MultiXactOffset, MultiXactStatus};
-
-// from pg_config.h. These can be changed with configure options --with-blocksize=BLOCKSIZE and
-// --with-segsize=SEGSIZE, but assume the defaults for now.
-pub const BLCKSZ: u16 = 8192;
-pub const RELSEG_SIZE: u32 = 1024 * 1024 * 1024 / (BLCKSZ as u32);
-pub const XLOG_BLCKSZ: usize = 8192;
-
-// PG timeline is always 1, changing it doesn't have any useful meaning in Neon.
-//
-// NOTE: this is not to be confused with Neon timelines; different concept!
-//
-// It's a shaky assumption, that it's always 1. We might import a
-// PostgreSQL data directory that has gone through timeline bumps,
-// for example. FIXME later.
-pub const PG_TLI: u32 = 1;
+pub mod controlfile_utils;
+pub mod nonrelfile_utils;
+pub mod pg_constants;
+pub mod relfile_utils;
+pub mod waldecoder;
+pub mod xlog_utils;

 //  See TransactionIdIsNormal in transam.h
 pub const fn transaction_id_is_normal(id: TransactionId) -> bool {
-    id > v14::pg_constants::FIRST_NORMAL_TRANSACTION_ID
+    id > pg_constants::FIRST_NORMAL_TRANSACTION_ID
 }

 // See TransactionIdPrecedes in transam.c
--- a/libs/postgres_ffi/src/nonrelfile_utils.rs
+++ b/libs/postgres_ffi/src/nonrelfile_utils.rs
@@ -1,12 +1,11 @@
 //!
 //! Common utilities for dealing with PostgreSQL non-relation files.
 //!
-use crate::transaction_id_precedes;
-use super::pg_constants;
+use crate::{pg_constants, transaction_id_precedes};
 use bytes::BytesMut;
 use log::*;

-use super::bindings::MultiXactId;
+use crate::MultiXactId;

 pub fn transaction_id_set_status(xid: u32, status: u8, page: &mut BytesMut) {
    trace!(
--- a/libs/postgres_ffi/src/pg_constants.rs
+++ b/libs/postgres_ffi/src/pg_constants.rs
@@ -7,8 +7,7 @@
 //! comments on them.
 //!

-use super::bindings::PageHeaderData;
-use crate::BLCKSZ;
+use crate::PageHeaderData;

 //
 // From pg_tablespace_d.h
@@ -32,6 +31,11 @@ pub const SMGR_TRUNCATE_HEAP: u32 = 0x0001;
 pub const SMGR_TRUNCATE_VM: u32 = 0x0002;
 pub const SMGR_TRUNCATE_FSM: u32 = 0x0004;

+// from pg_config.h. These can be changed with configure options --with-blocksize=BLOCKSIZE and
+// --with-segsize=SEGSIZE, but assume the defaults for now.
+pub const BLCKSZ: u16 = 8192;
+pub const RELSEG_SIZE: u32 = 1024 * 1024 * 1024 / (BLCKSZ as u32);
+
 //
 // From bufpage.h
 //
@@ -209,6 +213,7 @@ pub const FIRST_NORMAL_OBJECT_ID: u32 = 16384;
 /* FIXME: pageserver should request wal_seg_size from compute node */
 pub const WAL_SEGMENT_SIZE: usize = 16 * 1024 * 1024;

+pub const XLOG_BLCKSZ: usize = 8192;
 pub const XLOG_CHECKPOINT_SHUTDOWN: u8 = 0x00;
 pub const XLOG_CHECKPOINT_ONLINE: u8 = 0x10;
 pub const XLP_LONG_HEADER: u16 = 0x0002;
--- a/libs/postgres_ffi/src/relfile_utils.rs
+++ b/libs/postgres_ffi/src/relfile_utils.rs
@@ -1,11 +1,11 @@
 //!
 //! Common utilities for dealing with PostgreSQL relation files.
 //!
-use super::pg_constants;
+use crate::pg_constants;
 use once_cell::sync::OnceCell;
 use regex::Regex;

-#[derive(Debug, Clone, thiserror::Error, PartialEq, Eq)]
+#[derive(Debug, Clone, thiserror::Error, PartialEq)]
 pub enum FilePathError {
    #[error("invalid relation fork name")]
    InvalidForkName,
--- a/libs/postgres_ffi/src/waldecoder.rs
+++ b/libs/postgres_ffi/src/waldecoder.rs
@@ -10,7 +10,10 @@
 //!
 use super::pg_constants;
 use super::xlog_utils::*;
-use super::bindings::{XLogLongPageHeaderData, XLogPageHeaderData, XLogRecord, XLOG_PAGE_MAGIC};
+use super::XLogLongPageHeaderData;
+use super::XLogPageHeaderData;
+use super::XLogRecord;
+use super::XLOG_PAGE_MAGIC;
 use bytes::{Buf, BufMut, Bytes, BytesMut};
 use crc32c::*;
 use log::*;
--- a/libs/postgres_ffi/src/xlog_utils.rs
+++ b/libs/postgres_ffi/src/xlog_utils.rs
@@ -7,33 +7,31 @@
 // have been named the same as the corresponding PostgreSQL functions instead.
 //

-use crc32c::crc32c_append;
-
-use super::bindings::{
-    CheckPoint, FullTransactionId, XLogLongPageHeaderData, XLogPageHeaderData, XLogRecord,
-    XLOG_PAGE_MAGIC,
-};
-use super::pg_constants;
-use super::pg_constants::WAL_SEGMENT_SIZE;
-use crate::v14::waldecoder::WalStreamDecoder;
-use crate::PG_TLI;
-use crate::{uint32, uint64, Oid};
+use crate::pg_constants;
+use crate::CheckPoint;
+use crate::FullTransactionId;
+use crate::XLogLongPageHeaderData;
+use crate::XLogPageHeaderData;
+use crate::XLogRecord;
+use crate::XLOG_PAGE_MAGIC;

+use crate::pg_constants::WAL_SEGMENT_SIZE;
+use anyhow::{anyhow, bail, ensure};
+use byteorder::{ByteOrder, LittleEndian};
 use bytes::BytesMut;
 use bytes::{Buf, Bytes};
-
+use crc32c::*;
 use log::*;
-
-use serde::Serialize;
-use std::fs::File;
+use std::cmp::max;
+use std::cmp::min;
+use std::fs::{self, File};
 use std::io::prelude::*;
-use std::io::ErrorKind;
 use std::io::SeekFrom;
 use std::path::{Path, PathBuf};
 use std::time::SystemTime;
 use utils::bin_ser::DeserializeError;
 use utils::bin_ser::SerializeError;
-
+use utils::const_assert;
 use utils::lsn::Lsn;

 pub const XLOG_FNAME_LEN: usize = 24;
@@ -49,6 +47,9 @@ pub const XLOG_SIZE_OF_XLOG_RECORD: usize = std::mem::size_of::<XLogRecord>();
 #[allow(clippy::identity_op)]
 pub const SIZE_OF_XLOG_RECORD_DATA_HEADER_SHORT: usize = 1 * 2;

+// PG timeline is always 1, changing it doesn't have useful meaning in Zenith.
+pub const PG_TLI: u32 = 1;
+
 pub type XLogRecPtr = u64;
 pub type TimeLineID = u32;
 pub type TimestampTz = i64;
@@ -79,12 +80,12 @@ pub fn XLogSegNoOffsetToRecPtr(

 #[allow(non_snake_case)]
 pub fn XLogFileName(tli: TimeLineID, logSegNo: XLogSegNo, wal_segsz_bytes: usize) -> String {
-    format!(
+    return format!(
        "{:>08X}{:>08X}{:>08X}",
        tli,
        logSegNo / XLogSegmentsPerXLogId(wal_segsz_bytes),
        logSegNo % XLogSegmentsPerXLogId(wal_segsz_bytes)
-    )
+    );
 }

 #[allow(non_snake_case)]
@@ -139,93 +140,338 @@ pub fn to_pg_timestamp(time: SystemTime) -> TimestampTz {
    }
 }

-// Returns (aligned) end_lsn of the last record in data_dir with WAL segments.
-// start_lsn must point to some previously known record boundary (beginning of
-// the next record). If no valid record after is found, start_lsn is returned
-// back.
-pub fn find_end_of_wal(
+/// Return offset of the last valid record in the segment segno, starting
+/// looking at start_offset. Returns start_offset if no records found.
+fn find_end_of_wal_segment(
    data_dir: &Path,
+    segno: XLogSegNo,
+    tli: TimeLineID,
    wal_seg_size: usize,
-    start_lsn: Lsn, // start reading WAL at this point; must point at record start_lsn.
-) -> anyhow::Result<Lsn> {
-    let mut result = start_lsn;
-    let mut curr_lsn = start_lsn;
+    start_offset: usize, // start reading at this point
+) -> anyhow::Result<u32> {
+    // step back to the beginning of the page to read it in...
+    let mut offs: usize = start_offset - start_offset % XLOG_BLCKSZ;
+    let mut skipping_first_contrecord: bool = false;
+    let mut contlen: usize = 0;
+    let mut xl_crc: u32 = 0;
+    let mut crc: u32 = 0;
+    let mut rec_offs: usize = 0;
    let mut buf = [0u8; XLOG_BLCKSZ];
-    let mut decoder = WalStreamDecoder::new(start_lsn);
+    let file_name = XLogFileName(tli, segno, wal_seg_size);
+    let mut last_valid_rec_pos: usize = start_offset; // assume at given start_offset begins new record
+    let mut file = File::open(data_dir.join(file_name.clone() + ".partial"))?;
+    file.seek(SeekFrom::Start(offs as u64))?;
+    // xl_crc is the last field in XLogRecord, will not be read into rec_hdr
+    const_assert!(XLOG_RECORD_CRC_OFFS + 4 == XLOG_SIZE_OF_XLOG_RECORD);
+    let mut rec_hdr = [0u8; XLOG_RECORD_CRC_OFFS];

-    // loop over segments
-    loop {
-        let segno = curr_lsn.segment_number(wal_seg_size);
-        let seg_file_name = XLogFileName(PG_TLI, segno, wal_seg_size);
-        let seg_file_path = data_dir.join(seg_file_name);
-        match open_wal_segment(&seg_file_path)? {
-            None => {
-                // no more segments
-                info!(
-                    "find_end_of_wal reached end at {:?}, segment {:?} doesn't exist",
-                    result, seg_file_path
+    trace!("find_end_of_wal_segment(data_dir={}, segno={}, tli={}, wal_seg_size={}, start_offset=0x{:x})", data_dir.display(), segno, tli, wal_seg_size, start_offset);
+    while offs < wal_seg_size {
+        // we are at the beginning of the page; read it in
+        if offs % XLOG_BLCKSZ == 0 {
+            trace!("offs=0x{:x}: new page", offs);
+            let bytes_read = file.read(&mut buf)?;
+            if bytes_read != buf.len() {
+                bail!(
+                    "failed to read {} bytes from {} at {}",
+                    XLOG_BLCKSZ,
+                    file_name,
+                    offs
                );
-                return Ok(result);
            }
-            Some(mut segment) => {
-                let seg_offs = curr_lsn.segment_offset(wal_seg_size);
-                segment.seek(SeekFrom::Start(seg_offs as u64))?;
-                // loop inside segment
-                loop {
-                    let bytes_read = segment.read(&mut buf)?;
-                    if bytes_read == 0 {
-                        break; // EOF
-                    }
-                    curr_lsn += bytes_read as u64;
-                    decoder.feed_bytes(&buf[0..bytes_read]);

-                    // advance result past all completely read records
-                    loop {
-                        match decoder.poll_decode() {
-                            Ok(Some(record)) => result = record.0,
-                            Err(e) => {
-                                info!(
-                                    "find_end_of_wal reached end at {:?}, decode error: {:?}",
-                                    result, e
-                                );
-                                return Ok(result);
-                            }
-                            Ok(None) => break, // need more data
-                        }
+            let xlp_magic = LittleEndian::read_u16(&buf[0..2]);
+            let xlp_info = LittleEndian::read_u16(&buf[2..4]);
+            let xlp_rem_len = LittleEndian::read_u32(&buf[XLP_REM_LEN_OFFS..XLP_REM_LEN_OFFS + 4]);
+            trace!(
+                "  xlp_magic=0x{:x}, xlp_info=0x{:x}, xlp_rem_len={}",
+                xlp_magic,
+                xlp_info,
+                xlp_rem_len
+            );
+            // this is expected in current usage when valid WAL starts after page header
+            if xlp_magic != XLOG_PAGE_MAGIC as u16 {
+                trace!(
+                    "  invalid WAL file {}.partial magic {} at {:?}",
+                    file_name,
+                    xlp_magic,
+                    Lsn(XLogSegNoOffsetToRecPtr(segno, offs as u32, wal_seg_size)),
+                );
+            }
+            if offs == 0 {
+                offs += XLOG_SIZE_OF_XLOG_LONG_PHD;
+                if (xlp_info & XLP_FIRST_IS_CONTRECORD) != 0 {
+                    trace!("  first record is contrecord");
+                    skipping_first_contrecord = true;
+                    contlen = xlp_rem_len as usize;
+                    if offs < start_offset {
+                        // Pre-condition failed: the beginning of the segment is unexpectedly corrupted.
+                        ensure!(start_offset - offs >= contlen,
+                            "start_offset is in the middle of the first record (which happens to be a contrecord), \
+                             expected to be on a record boundary. Is beginning of the segment corrupted?");
+                        contlen = 0;
+                        // keep skipping_first_contrecord to avoid counting the contrecord as valid, we did not check it.
                    }
+                } else {
+                    trace!("  first record is not contrecord");
+                }
+            } else {
+                offs += XLOG_SIZE_OF_XLOG_SHORT_PHD;
+            }
+            // ... and step forward again if asked
+            trace!("  skipped header to 0x{:x}", offs);
+            offs = max(offs, start_offset);
+        // beginning of the next record
+        } else if contlen == 0 {
+            let page_offs = offs % XLOG_BLCKSZ;
+            let xl_tot_len = LittleEndian::read_u32(&buf[page_offs..page_offs + 4]) as usize;
+            trace!("offs=0x{:x}: new record, xl_tot_len={}", offs, xl_tot_len);
+            if xl_tot_len == 0 {
+                info!(
+                    "find_end_of_wal_segment reached zeros at {:?}, last records ends at {:?}",
+                    Lsn(XLogSegNoOffsetToRecPtr(segno, offs as u32, wal_seg_size)),
+                    Lsn(XLogSegNoOffsetToRecPtr(
+                        segno,
+                        last_valid_rec_pos as u32,
+                        wal_seg_size
+                    ))
+                );
+                break; // zeros, reached the end
+            }
+            if skipping_first_contrecord {
+                skipping_first_contrecord = false;
+                trace!("  first contrecord has been just completed");
+            } else {
+                trace!(
+                    "  updating last_valid_rec_pos: 0x{:x} --> 0x{:x}",
+                    last_valid_rec_pos,
+                    offs
+                );
+                last_valid_rec_pos = offs;
+            }
+            offs += 4;
+            rec_offs = 4;
+            contlen = xl_tot_len - 4;
+            trace!(
+                "  reading rec_hdr[0..4] <-- [0x{:x}; 0x{:x})",
+                page_offs,
+                page_offs + 4
+            );
+            rec_hdr[0..4].copy_from_slice(&buf[page_offs..page_offs + 4]);
+        } else {
+            // we're continuing a record, possibly from previous page.
+            let page_offs = offs % XLOG_BLCKSZ;
+            let pageleft = XLOG_BLCKSZ - page_offs;
+
+            // read the rest of the record, or as much as fits on this page.
+            let n = min(contlen, pageleft);
+            trace!(
+                "offs=0x{:x}, record continuation, pageleft={}, contlen={}",
+                offs,
+                pageleft,
+                contlen
+            );
+            // fill rec_hdr header up to (but not including) xl_crc field
+            trace!(
+                "  rec_offs={}, XLOG_RECORD_CRC_OFFS={}, XLOG_SIZE_OF_XLOG_RECORD={}",
+                rec_offs,
+                XLOG_RECORD_CRC_OFFS,
+                XLOG_SIZE_OF_XLOG_RECORD
+            );
+            if rec_offs < XLOG_RECORD_CRC_OFFS {
+                let len = min(XLOG_RECORD_CRC_OFFS - rec_offs, n);
+                trace!(
+                    "  reading rec_hdr[{}..{}] <-- [0x{:x}; 0x{:x})",
+                    rec_offs,
+                    rec_offs + len,
+                    page_offs,
+                    page_offs + len
+                );
+                rec_hdr[rec_offs..rec_offs + len].copy_from_slice(&buf[page_offs..page_offs + len]);
+            }
+            if rec_offs <= XLOG_RECORD_CRC_OFFS && rec_offs + n >= XLOG_SIZE_OF_XLOG_RECORD {
+                let crc_offs = page_offs - rec_offs + XLOG_RECORD_CRC_OFFS;
+                // All records are aligned on 8-byte boundary, so their 8-byte frames
+                // cannot be split between pages. As xl_crc is the last field,
+                // its content is always on the same page.
+                const_assert!(XLOG_RECORD_CRC_OFFS % 8 == 4);
+                // We should always start reading aligned records even in incorrect WALs so if
+                // the condition is false it is likely a bug. However, it is localized somewhere
+                // in this function, hence we do not crash and just report failure instead.
+                ensure!(crc_offs % 8 == 4, "Record is not aligned properly (bug?)");
+                xl_crc = LittleEndian::read_u32(&buf[crc_offs..crc_offs + 4]);
+                trace!(
+                    "  reading xl_crc: [0x{:x}; 0x{:x}) = 0x{:x}",
+                    crc_offs,
+                    crc_offs + 4,
+                    xl_crc
+                );
+                crc = crc32c_append(0, &buf[crc_offs + 4..page_offs + n]);
+                trace!(
+                    "  initializing crc: [0x{:x}; 0x{:x}); crc = 0x{:x}",
+                    crc_offs + 4,
+                    page_offs + n,
+                    crc
+                );
+            } else if rec_offs > XLOG_RECORD_CRC_OFFS {
+                // As all records are 8-byte aligned, the header is already fully read and `crc` is initialized in the branch above.
+                ensure!(rec_offs >= XLOG_SIZE_OF_XLOG_RECORD);
+                let old_crc = crc;
+                crc = crc32c_append(crc, &buf[page_offs..page_offs + n]);
+                trace!(
+                    "  appending to crc: [0x{:x}; 0x{:x}); 0x{:x} --> 0x{:x}",
+                    page_offs,
+                    page_offs + n,
+                    old_crc,
+                    crc
+                );
+            } else {
+                // Correct because of the way conditions are written above.
+                assert!(rec_offs + n < XLOG_SIZE_OF_XLOG_RECORD);
+                // If `skipping_first_contrecord == true`, we may be reading from a middle of a record
+                // which started in the previous segment. Hence there is no point in validating the header.
+                if !skipping_first_contrecord && rec_offs + n > XLOG_RECORD_CRC_OFFS {
+                    info!(
+                        "Curiously corrupted WAL: a record stops inside the header; \
+                             offs=0x{:x}, record continuation, pageleft={}, contlen={}",
+                        offs, pageleft, contlen
+                    );
+                    break;
+                }
+                // Do nothing: we are still reading the header. It's accounted in CRC in the end of the record.
+            }
+            rec_offs += n;
+            offs += n;
+            contlen -= n;
+
+            if contlen == 0 {
+                trace!("  record completed at 0x{:x}", offs);
+                crc = crc32c_append(crc, &rec_hdr);
+                offs = (offs + 7) & !7; // pad on 8 bytes boundary */
+                trace!(
+                    "  padded offs to 0x{:x}, crc is {:x}, expected crc is {:x}",
+                    offs,
+                    crc,
+                    xl_crc
+                );
+                if skipping_first_contrecord {
+                    // do nothing, the flag will go down on next iteration when we're reading new record
+                    trace!("  first conrecord has been just completed");
+                } else if crc == xl_crc {
+                    // record is valid, advance the result to its end (with
+                    // alignment to the next record taken into account)
+                    trace!(
+                        "  updating last_valid_rec_pos: 0x{:x} --> 0x{:x}",
+                        last_valid_rec_pos,
+                        offs
+                    );
+                    last_valid_rec_pos = offs;
+                } else {
+                    info!(
+                        "CRC mismatch {} vs {} at {}",
+                        crc, xl_crc, last_valid_rec_pos
+                    );
+                    break;
                }
            }
        }
    }
+    trace!("last_valid_rec_pos=0x{:x}", last_valid_rec_pos);
+    Ok(last_valid_rec_pos as u32)
 }

-// Open .partial or full WAL segment file, if present.
-fn open_wal_segment(seg_file_path: &Path) -> anyhow::Result<Option<File>> {
-    let mut partial_path = seg_file_path.to_owned();
-    partial_path.set_extension("partial");
-    match File::open(partial_path) {
-        Ok(file) => Ok(Some(file)),
-        Err(e) => match e.kind() {
-            ErrorKind::NotFound => {
-                // .partial not found, try full
-                match File::open(seg_file_path) {
-                    Ok(file) => Ok(Some(file)),
-                    Err(e) => match e.kind() {
-                        ErrorKind::NotFound => Ok(None),
-                        _ => Err(e.into()),
-                    },
-                }
-            }
-            _ => Err(e.into()),
-        },
+///
+/// Scan a directory that contains PostgreSQL WAL files, for the end of WAL.
+/// If precise, returns end LSN (next insertion point, basically);
+/// otherwise, start of the last segment.
+/// Returns (0, 0) if there is no WAL.
+///
+pub fn find_end_of_wal(
+    data_dir: &Path,
+    wal_seg_size: usize,
+    precise: bool,
+    start_lsn: Lsn, // start reading WAL at this point or later
+) -> anyhow::Result<(XLogRecPtr, TimeLineID)> {
+    let mut high_segno: XLogSegNo = 0;
+    let mut high_tli: TimeLineID = 0;
+    let mut high_ispartial = false;
+
+    for entry in fs::read_dir(data_dir)?.flatten() {
+        let ispartial: bool;
+        let entry_name = entry.file_name();
+        let fname = entry_name
+            .to_str()
+            .ok_or_else(|| anyhow!("Invalid file name"))?;
+
+        /*
+         * Check if the filename looks like an xlog file, or a .partial file.
+         */
+        if IsXLogFileName(fname) {
+            ispartial = false;
+        } else if IsPartialXLogFileName(fname) {
+            ispartial = true;
+        } else {
+            continue;
+        }
+        let (segno, tli) = XLogFromFileName(fname, wal_seg_size);
+        if !ispartial && entry.metadata()?.len() != wal_seg_size as u64 {
+            continue;
+        }
+        if segno > high_segno
+            || (segno == high_segno && tli > high_tli)
+            || (segno == high_segno && tli == high_tli && high_ispartial && !ispartial)
+        {
+            high_segno = segno;
+            high_tli = tli;
+            high_ispartial = ispartial;
+        }
    }
+    if high_segno > 0 {
+        let mut high_offs = 0;
+        /*
+         * Move the starting pointer to the start of the next segment, if the
+         * highest one we saw was completed.
+         */
+        if !high_ispartial {
+            high_segno += 1;
+        } else if precise {
+            /* otherwise locate last record in last partial segment */
+            if start_lsn.segment_number(wal_seg_size) > high_segno {
+                bail!(
+                    "provided start_lsn {:?} is beyond highest segno {:?} available",
+                    start_lsn,
+                    high_segno,
+                );
+            }
+            let start_offset = if start_lsn.segment_number(wal_seg_size) == high_segno {
+                start_lsn.segment_offset(wal_seg_size)
+            } else {
+                0
+            };
+            high_offs = find_end_of_wal_segment(
+                data_dir,
+                high_segno,
+                high_tli,
+                wal_seg_size,
+                start_offset,
+            )?;
+        }
+        let high_ptr = XLogSegNoOffsetToRecPtr(high_segno, high_offs, wal_seg_size);
+        return Ok((high_ptr, high_tli));
+    }
+    Ok((0, 0))
 }

 pub fn main() {
    let mut data_dir = PathBuf::new();
    data_dir.push(".");
-    let wal_end = find_end_of_wal(&data_dir, WAL_SEGMENT_SIZE, Lsn(0)).unwrap();
-    println!("wal_end={:?}", wal_end);
+    let (wal_end, tli) = find_end_of_wal(&data_dir, WAL_SEGMENT_SIZE, true, Lsn(0)).unwrap();
+    println!(
+        "wal_end={:>08X}{:>08X}, tli={}",
+        (wal_end >> 32) as u32,
+        wal_end as u32,
+        tli
+    );
 }

 impl XLogRecord {
@@ -345,93 +591,11 @@ pub fn generate_wal_segment(segno: u64, system_id: u64) -> Result<Bytes, Seriali
    Ok(seg_buf.freeze())
 }

-#[repr(C)]
-#[derive(Serialize)]
-struct XlLogicalMessage {
-    db_id: Oid,
-    transactional: uint32, // bool, takes 4 bytes due to alignment in C structures
-    prefix_size: uint64,
-    message_size: uint64,
-}
-
-impl XlLogicalMessage {
-    pub fn encode(&self) -> Bytes {
-        use utils::bin_ser::LeSer;
-        self.ser().unwrap().into()
-    }
-}
-
-/// Create new WAL record for non-transactional logical message.
-/// Used for creating artificial WAL for tests, as LogicalMessage
-/// record is basically no-op.
-///
-/// NOTE: This leaves the xl_prev field zero. The safekeeper and
-/// pageserver tolerate that, but PostgreSQL does not.
-pub fn encode_logical_message(prefix: &str, message: &str) -> Vec<u8> {
-    let mut prefix_bytes: Vec<u8> = Vec::with_capacity(prefix.len() + 1);
-    prefix_bytes.write_all(prefix.as_bytes()).unwrap();
-    prefix_bytes.push(0);
-
-    let message_bytes = message.as_bytes();
-
-    let logical_message = XlLogicalMessage {
-        db_id: 0,
-        transactional: 0,
-        prefix_size: prefix_bytes.len() as u64,
-        message_size: message_bytes.len() as u64,
-    };
-
-    let mainrdata = logical_message.encode();
-    let mainrdata_len: usize = mainrdata.len() + prefix_bytes.len() + message_bytes.len();
-    // only short mainrdata is supported for now
-    assert!(mainrdata_len <= 255);
-    let mainrdata_len = mainrdata_len as u8;
-
-    let mut data: Vec<u8> = vec![pg_constants::XLR_BLOCK_ID_DATA_SHORT, mainrdata_len];
-    data.extend_from_slice(&mainrdata);
-    data.extend_from_slice(&prefix_bytes);
-    data.extend_from_slice(message_bytes);
-
-    let total_len = XLOG_SIZE_OF_XLOG_RECORD + data.len();
-
-    let mut header = XLogRecord {
-        xl_tot_len: total_len as u32,
-        xl_xid: 0,
-        xl_prev: 0,
-        xl_info: 0,
-        xl_rmid: 21,
-        __bindgen_padding_0: [0u8; 2usize],
-        xl_crc: 0, // crc will be calculated later
-    };
-
-    let header_bytes = header.encode().expect("failed to encode header");
-    let crc = crc32c_append(0, &data);
-    let crc = crc32c_append(crc, &header_bytes[0..XLOG_RECORD_CRC_OFFS]);
-    header.xl_crc = crc;
-
-    let mut wal: Vec<u8> = Vec::new();
-    wal.extend_from_slice(&header.encode().expect("failed to encode header"));
-    wal.extend_from_slice(&data);
-
-    // WAL start position must be aligned at 8 bytes,
-    // this will add padding for the next WAL record.
-    const PADDING: usize = 8;
-    let padding_rem = wal.len() % PADDING;
-    if padding_rem != 0 {
-        wal.resize(wal.len() + PADDING - padding_rem, 0);
-    }
-
-    wal
-}
-
 #[cfg(test)]
 mod tests {
    use super::*;
    use regex::Regex;
-    use std::cmp::min;
-    use std::fs;
    use std::{env, str::FromStr};
-    use utils::const_assert;

    fn init_logging() {
        let _ = env_logger::Builder::from_env(
@@ -442,7 +606,10 @@ mod tests {
        .try_init();
    }

-    fn test_end_of_wal<C: wal_craft::Crafter>(test_name: &str) {
+    fn test_end_of_wal<C: wal_craft::Crafter>(
+        test_name: &str,
+        expected_end_of_wal_non_partial: Lsn,
+    ) {
        use wal_craft::*;
        // Craft some WAL
        let top_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
@@ -463,7 +630,7 @@ mod tests {
            .iter()
            .map(|&lsn| u64::from(lsn).into())
            .collect();
-        let expected_end_of_wal: Lsn = u64::from(expected_end_of_wal_partial).into();
+        let expected_end_of_wal_partial: Lsn = u64::from(expected_end_of_wal_partial).into();
        srv.kill();

        // Check find_end_of_wal on the initial WAL
@@ -475,10 +642,10 @@ mod tests {
            .filter(|fname| IsXLogFileName(fname))
            .max()
            .unwrap();
-        check_pg_waldump_end_of_wal(&cfg, &last_segment, expected_end_of_wal);
-        for start_lsn in intermediate_lsns
-            .iter()
-            .chain(std::iter::once(&expected_end_of_wal))
+        check_pg_waldump_end_of_wal(&cfg, &last_segment, expected_end_of_wal_partial);
+        for start_lsn in std::iter::once(Lsn(0))
+            .chain(intermediate_lsns)
+            .chain(std::iter::once(expected_end_of_wal_partial))
        {
            // Erase all WAL before `start_lsn` to ensure it's not used by `find_end_of_wal`.
            // We assume that `start_lsn` is non-decreasing.
@@ -493,7 +660,7 @@ mod tests {
                }
                let (segno, _) = XLogFromFileName(&fname, WAL_SEGMENT_SIZE);
                let seg_start_lsn = XLogSegNoOffsetToRecPtr(segno, 0, WAL_SEGMENT_SIZE);
-                if seg_start_lsn > u64::from(*start_lsn) {
+                if seg_start_lsn > u64::from(start_lsn) {
                    continue;
                }
                let mut f = File::options().write(true).open(file.path()).unwrap();
@@ -501,12 +668,18 @@ mod tests {
                f.write_all(
                    &ZEROS[0..min(
                        WAL_SEGMENT_SIZE,
-                        (u64::from(*start_lsn) - seg_start_lsn) as usize,
+                        (u64::from(start_lsn) - seg_start_lsn) as usize,
                    )],
                )
                .unwrap();
            }
-            check_end_of_wal(&cfg, &last_segment, *start_lsn, expected_end_of_wal);
+            check_end_of_wal(
+                &cfg,
+                &last_segment,
+                start_lsn,
+                expected_end_of_wal_non_partial,
+                expected_end_of_wal_partial,
+            );
        }
    }

@@ -543,15 +716,18 @@ mod tests {
        cfg: &wal_craft::Conf,
        last_segment: &str,
        start_lsn: Lsn,
-        expected_end_of_wal: Lsn,
+        expected_end_of_wal_non_partial: Lsn,
+        expected_end_of_wal_partial: Lsn,
    ) {
        // Check end_of_wal on non-partial WAL segment (we treat it as fully populated)
-        // let wal_end = find_end_of_wal(&cfg.wal_dir(), WAL_SEGMENT_SIZE, start_lsn).unwrap();
-        // info!(
-        //     "find_end_of_wal returned wal_end={} with non-partial WAL segment",
-        //     wal_end
-        // );
-        // assert_eq!(wal_end, expected_end_of_wal_non_partial);
+        let (wal_end, tli) =
+            find_end_of_wal(&cfg.wal_dir(), WAL_SEGMENT_SIZE, true, start_lsn).unwrap();
+        let wal_end = Lsn(wal_end);
+        info!(
+            "find_end_of_wal returned (wal_end={}, tli={}) with non-partial WAL segment",
+            wal_end, tli
+        );
+        assert_eq!(wal_end, expected_end_of_wal_non_partial);

        // Rename file to partial to actually find last valid lsn, then rename it back.
        fs::rename(
@@ -559,12 +735,14 @@ mod tests {
            cfg.wal_dir().join(format!("{}.partial", last_segment)),
        )
        .unwrap();
-        let wal_end = find_end_of_wal(&cfg.wal_dir(), WAL_SEGMENT_SIZE, start_lsn).unwrap();
+        let (wal_end, tli) =
+            find_end_of_wal(&cfg.wal_dir(), WAL_SEGMENT_SIZE, true, start_lsn).unwrap();
+        let wal_end = Lsn(wal_end);
        info!(
-            "find_end_of_wal returned wal_end={} with partial WAL segment",
-            wal_end
+            "find_end_of_wal returned (wal_end={}, tli={}) with partial WAL segment",
+            wal_end, tli
        );
-        assert_eq!(wal_end, expected_end_of_wal);
+        assert_eq!(wal_end, expected_end_of_wal_partial);
        fs::rename(
            cfg.wal_dir().join(format!("{}.partial", last_segment)),
            cfg.wal_dir().join(last_segment),
@@ -577,7 +755,10 @@ mod tests {
    #[test]
    pub fn test_find_end_of_wal_simple() {
        init_logging();
-        test_end_of_wal::<wal_craft::Simple>("test_find_end_of_wal_simple");
+        test_end_of_wal::<wal_craft::Simple>(
+            "test_find_end_of_wal_simple",
+            "0/2000000".parse::<Lsn>().unwrap(),
+        );
    }

    #[test]
@@ -585,14 +766,17 @@ mod tests {
        init_logging();
        test_end_of_wal::<wal_craft::WalRecordCrossingSegmentFollowedBySmallOne>(
            "test_find_end_of_wal_crossing_segment_followed_by_small_one",
+            "0/3000000".parse::<Lsn>().unwrap(),
        );
    }

    #[test]
+    #[ignore = "not yet fixed, needs correct parsing of pre-last segments"] // TODO
    pub fn test_find_end_of_wal_last_crossing_segment() {
        init_logging();
        test_end_of_wal::<wal_craft::LastWalRecordCrossingSegment>(
            "test_find_end_of_wal_last_crossing_segment",
+            "0/3000000".parse::<Lsn>().unwrap(),
        );
    }

@@ -625,15 +809,4 @@ mod tests {
        checkpoint.update_next_xid(1024);
        assert_eq!(checkpoint.nextXid.value, 2048);
    }
-
-    #[test]
-    pub fn test_encode_logical_message() {
-        let expected = [
-            64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 0, 0, 170, 34, 166, 227, 255,
-            38, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 112, 114,
-            101, 102, 105, 120, 0, 109, 101, 115, 115, 97, 103, 101,
-        ];
-        let actual = encode_logical_message("prefix", "message");
-        assert_eq!(expected, actual[..]);
-    }
 }
--- a/libs/postgres_ffi/wal_craft/src/lib.rs
+++ b/libs/postgres_ffi/wal_craft/src/lib.rs
@@ -4,8 +4,8 @@ use log::*;
 use once_cell::sync::Lazy;
 use postgres::types::PgLsn;
 use postgres::Client;
-use postgres_ffi::v14::pg_constants::WAL_SEGMENT_SIZE;
-use postgres_ffi::v14::xlog_utils::{
+use postgres_ffi::pg_constants::WAL_SEGMENT_SIZE;
+use postgres_ffi::xlog_utils::{
    XLOG_BLCKSZ, XLOG_SIZE_OF_XLOG_RECORD, XLOG_SIZE_OF_XLOG_SHORT_PHD,
 };
 use std::cmp::Ordering;
--- a/libs/utils/src/bin_ser.rs
+++ b/libs/utils/src/bin_ser.rs
@@ -265,7 +265,7 @@ mod tests {
    use serde::{Deserialize, Serialize};
    use std::io::Cursor;

-    #[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
+    #[derive(Debug, PartialEq, Serialize, Deserialize)]
    pub struct ShortStruct {
        a: u8,
        b: u32,
@@ -286,7 +286,7 @@ mod tests {
    const SHORT2_ENC_LE: &[u8] = &[8, 0, 0, 3, 7];
    const SHORT2_ENC_LE_TRAILING: &[u8] = &[8, 0, 0, 3, 7, 0xff, 0xff, 0xff];

-    #[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
+    #[derive(Debug, PartialEq, Serialize, Deserialize)]
    pub struct LongMsg {
        pub tag: u8,
        pub blockpos: u32,
--- a/libs/utils/src/command_extensions.rs
+++ b/libs/utils/src/command_extensions.rs
@@ -1,21 +0,0 @@
-use std::path::PathBuf;
-use std::{os::unix::prelude::CommandExt, process::Command};
-use std::fs::File;
-
-
-pub trait NeonCommandExtensions: CommandExt {
-    fn capture_to_files(&mut self, path: PathBuf, name: &str) -> &mut Command;
-}
-
-impl NeonCommandExtensions for Command {
-    fn capture_to_files(&mut self, path: PathBuf, name: &str) -> &mut Command {
-        let out_file = File::create(path.join(format!("{}.out", name)))
-            .expect("can't make file");
-        let err_file = File::create(path.join(format!("{}.out", name)))
-            .expect("can't make file");
-
-        // TODO touch files?
-
-        self.stdout(out_file).stderr(err_file)
-    }
-}
--- a/libs/utils/src/http/request.rs
+++ b/libs/utils/src/http/request.rs
@@ -10,10 +10,12 @@ pub fn get_request_param<'a>(
 ) -> Result<&'a str, ApiError> {
    match request.param(param_name) {
        Some(arg) => Ok(arg),
-        None => Err(ApiError::BadRequest(format!(
-            "no {} specified in path param",
-            param_name
-        ))),
+        None => {
+            return Err(ApiError::BadRequest(format!(
+                "no {} specified in path param",
+                param_name
+            )))
+        }
    }
 }

--- a/libs/utils/src/lib.rs
+++ b/libs/utils/src/lib.rs
@@ -54,9 +54,6 @@ pub mod nonblock;
 // Default signal handling
 pub mod signals;

-// Helpers for running commands
-pub mod command_extensions;
-
 /// This is a shortcut to embed git sha into binaries and avoid copying the same build script to all packages
 ///
 /// we have several cases:
--- a/libs/utils/src/lsn.rs
+++ b/libs/utils/src/lsn.rs
@@ -18,7 +18,7 @@ pub const XLOG_BLCKSZ: u32 = 8192;
 pub struct Lsn(pub u64);

 /// We tried to parse an LSN from a string, but failed
-#[derive(Debug, PartialEq, Eq, thiserror::Error)]
+#[derive(Debug, PartialEq, thiserror::Error)]
 #[error("LsnParseError")]
 pub struct LsnParseError;

--- a/libs/utils/src/postgres_backend.rs
+++ b/libs/utils/src/postgres_backend.rs
@@ -50,7 +50,7 @@ pub trait Handler {

 /// PostgresBackend protocol state.
 /// XXX: The order of the constructors matters.
-#[derive(Clone, Copy, PartialEq, Eq, PartialOrd)]
+#[derive(Clone, Copy, PartialEq, PartialOrd)]
 pub enum ProtoState {
    Initialization,
    Encrypted,
--- a/libs/utils/src/pq_proto.rs
+++ b/libs/utils/src/pq_proto.rs
@@ -930,7 +930,7 @@ impl<'a> BeMessage<'a> {

 // Neon extension of postgres replication protocol
 // See NEON_STATUS_UPDATE_TAG_BYTE
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
 pub struct ReplicationFeedback {
    // Last known size of the timeline. Used to enforce timeline size limit.
    pub current_timeline_size: u64,
--- a/libs/utils/src/seqwait.rs
+++ b/libs/utils/src/seqwait.rs
@@ -9,7 +9,7 @@ use std::sync::Mutex;
 use std::time::Duration;

 /// An error happened while waiting for a number
-#[derive(Debug, PartialEq, Eq, thiserror::Error)]
+#[derive(Debug, PartialEq, thiserror::Error)]
 #[error("SeqWaitError")]
 pub enum SeqWaitError {
    /// The wait timeout was reached
--- a/libs/utils/tests/bin_ser_test.rs
+++ b/libs/utils/tests/bin_ser_test.rs
@@ -4,7 +4,7 @@ use serde::Deserialize;
 use std::io::Read;
 use utils::bin_ser::LeSer;

-#[derive(Debug, PartialEq, Eq, Deserialize)]
+#[derive(Debug, PartialEq, Deserialize)]
 pub struct HeaderData {
    magic: u16,
    info: u16,
--- a/libs/utils/tests/ssl_test.rs
+++ b/libs/utils/tests/ssl_test.rs
@@ -30,9 +30,6 @@ static CERT: Lazy<rustls::Certificate> = Lazy::new(|| {
 });

 #[test]
-// [false-positive](https://github.com/rust-lang/rust-clippy/issues/9274),
-// we resize the vector so doing some modifications after all
-#[allow(clippy::read_zero_byte_vec)]
 fn ssl() {
    let (mut client_sock, server_sock) = make_tcp_pair();

--- a/neon_local/src/main.rs
+++ b/neon_local/src/main.rs
@@ -501,10 +501,10 @@ fn handle_init(init_match: &ArgMatches) -> anyhow::Result<LocalEnv> {
    // default_tenantid was generated by the `env.init()` call above
    let initial_tenant_id = env.default_tenant_id.unwrap();

-    // Initialize pageserver, create initial tenant and timeline.
+    // Call 'pageserver init'.
    let pageserver = PageServerNode::from_env(&env);
    let initial_timeline_id = pageserver
-        .initialize(
+        .init(
            Some(initial_tenant_id),
            initial_timeline_id_arg,
            &pageserver_config_overrides(init_match),
@@ -551,15 +551,25 @@ fn handle_tenant(tenant_match: &ArgMatches, env: &mut local_env::LocalEnv) -> an
                .values_of("config")
                .map(|vals| vals.flat_map(|c| c.split_once(':')).collect())
                .unwrap_or_default();
-            let new_tenant_id = pageserver.tenant_create(initial_tenant_id, tenant_conf)?;
-            println!("tenant {new_tenant_id} successfully created on the pageserver");
+            let new_tenant_id = pageserver
+                .tenant_create(initial_tenant_id, tenant_conf)?
+                .ok_or_else(|| {
+                    anyhow!("Tenant with id {:?} was already created", initial_tenant_id)
+                })?;
+            println!(
+                "tenant {} successfully created on the pageserver",
+                new_tenant_id
+            );

            // Create an initial timeline for the new tenant
            let new_timeline_id = parse_timeline_id(create_match)?;
-            let timeline_info =
-                pageserver.timeline_create(new_tenant_id, new_timeline_id, None, None)?;
-            let new_timeline_id = timeline_info.timeline_id;
-            let last_record_lsn = timeline_info
+            let timeline = pageserver
+                .timeline_create(new_tenant_id, new_timeline_id, None, None)?
+                .context(format!(
+                    "Failed to create initial timeline for tenant {new_tenant_id}"
+                ))?;
+            let new_timeline_id = timeline.timeline_id;
+            let last_record_lsn = timeline
                .local
                .context(format!("Failed to get last record LSN: no local timeline info for timeline {new_timeline_id}"))?
                .last_record_lsn;
@@ -606,18 +616,20 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
            let new_branch_name = create_match
                .value_of("branch-name")
                .ok_or_else(|| anyhow!("No branch name provided"))?;
-            let timeline_info = pageserver.timeline_create(tenant_id, None, None, None)?;
-            let new_timeline_id = timeline_info.timeline_id;
+            let timeline = pageserver
+                .timeline_create(tenant_id, None, None, None)?
+                .ok_or_else(|| anyhow!("Failed to create new timeline for tenant {}", tenant_id))?;
+            let new_timeline_id = timeline.timeline_id;

-            let last_record_lsn = timeline_info
+            let last_record_lsn = timeline
                .local
                .expect("no local timeline info")
                .last_record_lsn;
            env.register_branch_mapping(new_branch_name.to_string(), tenant_id, new_timeline_id)?;

            println!(
-                "Created timeline '{}' at Lsn {last_record_lsn} for tenant: {tenant_id}",
-                timeline_info.timeline_id
+                "Created timeline '{}' at Lsn {} for tenant: {}",
+                timeline.timeline_id, last_record_lsn, tenant_id,
            );
        }
        Some(("import", import_match)) => {
@@ -668,7 +680,10 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
            let ancestor_timeline_id = env
                .get_branch_timeline_id(ancestor_branch_name, tenant_id)
                .ok_or_else(|| {
-                    anyhow!("Found no timeline id for branch name '{ancestor_branch_name}'")
+                    anyhow!(
+                        "Found no timeline id for branch name '{}'",
+                        ancestor_branch_name
+                    )
                })?;

            let start_lsn = branch_match
@@ -676,15 +691,12 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
                .map(Lsn::from_str)
                .transpose()
                .context("Failed to parse ancestor start Lsn from the request")?;
-            let timeline_info = pageserver.timeline_create(
-                tenant_id,
-                None,
-                start_lsn,
-                Some(ancestor_timeline_id),
-            )?;
-            let new_timeline_id = timeline_info.timeline_id;
+            let timeline = pageserver
+                .timeline_create(tenant_id, None, start_lsn, Some(ancestor_timeline_id))?
+                .ok_or_else(|| anyhow!("Failed to create new timeline for tenant {}", tenant_id))?;
+            let new_timeline_id = timeline.timeline_id;

-            let last_record_lsn = timeline_info
+            let last_record_lsn = timeline
                .local
                .expect("no local timeline info")
                .last_record_lsn;
@@ -692,11 +704,11 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
            env.register_branch_mapping(new_branch_name.to_string(), tenant_id, new_timeline_id)?;

            println!(
-                "Created timeline '{}' at Lsn {last_record_lsn} for tenant: {tenant_id}. Ancestor timeline: '{ancestor_branch_name}'",
-                timeline_info.timeline_id
+                "Created timeline '{}' at Lsn {} for tenant: {}. Ancestor timeline: '{}'",
+                timeline.timeline_id, last_record_lsn, tenant_id, ancestor_branch_name,
            );
        }
-        Some((sub_name, _)) => bail!("Unexpected tenant subcommand '{sub_name}'"),
+        Some((sub_name, _)) => bail!("Unexpected tenant subcommand '{}'", sub_name),
        None => bail!("no tenant subcommand provided"),
    }

--- a/pageserver/src/basebackup.rs
+++ b/pageserver/src/basebackup.rs
@@ -24,13 +24,8 @@ use tracing::*;

 use crate::reltag::{RelTag, SlruKind};
 use crate::DatadirTimeline;
-
-use postgres_ffi::v14::pg_constants;
-use postgres_ffi::v14::xlog_utils::{generate_wal_segment, normalize_lsn, XLogFileName};
-use postgres_ffi::v14::{CheckPoint, ControlFileData};
-use postgres_ffi::TransactionId;
-use postgres_ffi::PG_TLI;
-use postgres_ffi::{BLCKSZ, RELSEG_SIZE};
+use postgres_ffi::xlog_utils::*;
+use postgres_ffi::*;
 use utils::lsn::Lsn;

 /// This is short-living object only for the time of tarball creation,
@@ -205,7 +200,7 @@ where
        }

        // Add a file for each chunk of blocks (aka segment)
-        let chunks = (0..nblocks).chunks(RELSEG_SIZE as usize);
+        let chunks = (0..nblocks).chunks(pg_constants::RELSEG_SIZE as usize);
        for (seg, blocks) in chunks.into_iter().enumerate() {
            let mut segment_data: Vec<u8> = vec![];
            for blknum in blocks {
@@ -225,19 +220,23 @@ where
    fn add_slru_segment(&mut self, slru: SlruKind, segno: u32) -> anyhow::Result<()> {
        let nblocks = self.timeline.get_slru_segment_size(slru, segno, self.lsn)?;

-        let mut slru_buf: Vec<u8> = Vec::with_capacity(nblocks as usize * BLCKSZ as usize);
+        let mut slru_buf: Vec<u8> =
+            Vec::with_capacity(nblocks as usize * pg_constants::BLCKSZ as usize);
        for blknum in 0..nblocks {
            let img = self
                .timeline
                .get_slru_page_at_lsn(slru, segno, blknum, self.lsn)?;

            if slru == SlruKind::Clog {
-                ensure!(img.len() == BLCKSZ as usize || img.len() == BLCKSZ as usize + 8);
+                ensure!(
+                    img.len() == pg_constants::BLCKSZ as usize
+                        || img.len() == pg_constants::BLCKSZ as usize + 8
+                );
            } else {
-                ensure!(img.len() == BLCKSZ as usize);
+                ensure!(img.len() == pg_constants::BLCKSZ as usize);
            }

-            slru_buf.extend_from_slice(&img[..BLCKSZ as usize]);
+            slru_buf.extend_from_slice(&img[..pg_constants::BLCKSZ as usize]);
        }

        let segname = format!("{}/{:>04X}", slru.to_str(), segno);
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -1,6 +1,6 @@
 //! Main entry point for the Page Server executable.

-use std::{env, ops::ControlFlow, path::Path, str::FromStr};
+use std::{env, path::Path, str::FromStr};
 use tracing::*;

 use anyhow::{bail, Context, Result};
@@ -13,7 +13,7 @@ use pageserver::{
    config::{defaults::*, PageServerConf},
    http, page_cache, page_service, profiling, tenant_mgr, thread_mgr,
    thread_mgr::ThreadKind,
-    virtual_file, LOG_FILE_NAME,
+    timelines, virtual_file, LOG_FILE_NAME,
 };
 use utils::{
    auth::JwtAuth,
@@ -24,6 +24,7 @@ use utils::{
    shutdown::exit_now,
    signals::{self, Signal},
    tcp_listener,
+    zid::{ZTenantId, ZTimelineId},
 };

 project_git_version!(GIT_VERSION);
@@ -41,7 +42,6 @@ fn main() -> anyhow::Result<()> {
        .about("Materializes WAL stream to pages and serves them to the postgres")
        .version(&*version())
        .arg(
-
            Arg::new("daemonize")
                .short('d')
                .long("daemonize")
@@ -52,7 +52,7 @@ fn main() -> anyhow::Result<()> {
            Arg::new("init")
                .long("init")
                .takes_value(false)
-                .help("Initialize pageserver with all given config overrides"),
+                .help("Initialize pageserver service: creates an initial config, tenant and timeline, if specified"),
        )
        .arg(
            Arg::new("workdir")
@@ -61,6 +61,20 @@ fn main() -> anyhow::Result<()> {
                .takes_value(true)
                .help("Working directory for the pageserver"),
        )
+        .arg(
+            Arg::new("create-tenant")
+                .long("create-tenant")
+                .takes_value(true)
+                .help("Create tenant during init")
+                .requires("init"),
+        )
+        .arg(
+            Arg::new("initial-timeline-id")
+                .long("initial-timeline-id")
+                .takes_value(true)
+                .help("Use a specific timeline id during init and tenant creation")
+                .requires("create-tenant"),
+        )
        // See `settings.md` for more details on the extra configuration patameters pageserver can process
        .arg(
            Arg::new("config-override")
@@ -71,9 +85,6 @@ fn main() -> anyhow::Result<()> {
                .help("Additional configuration overrides of the ones from the toml config file (or new ones to add there).
                Any option has to be a valid toml document, example: `-c=\"foo='hey'\"` `-c=\"foo={value=1}\"`"),
        )
-        .arg(Arg::new("update-config").long("update-config").takes_value(false).help(
-            "Update the config file when started",
-        ))
        .arg(
            Arg::new("enabled-features")
                .long("enabled-features")
@@ -99,6 +110,18 @@ fn main() -> anyhow::Result<()> {
        .with_context(|| format!("Error opening workdir '{}'", workdir.display()))?;
    let cfg_file_path = workdir.join("pageserver.toml");

+    let init = arg_matches.is_present("init");
+    let create_tenant = arg_matches
+        .value_of("create-tenant")
+        .map(ZTenantId::from_str)
+        .transpose()
+        .context("Failed to parse tenant id from the arguments")?;
+    let initial_timeline_id = arg_matches
+        .value_of("initial-timeline-id")
+        .map(ZTimelineId::from_str)
+        .transpose()
+        .context("Failed to parse timeline id from the arguments")?;
+
    // Set CWD to workdir for non-daemon modes
    env::set_current_dir(&workdir).with_context(|| {
        format!(
@@ -108,86 +131,30 @@ fn main() -> anyhow::Result<()> {
    })?;

    let daemonize = arg_matches.is_present("daemonize");
-
-    let conf = match initialize_config(&cfg_file_path, arg_matches, &workdir)? {
-        ControlFlow::Continue(conf) => conf,
-        ControlFlow::Break(()) => {
-            info!("Pageserver config init successful");
-            return Ok(());
-        }
-    };
-
-    let tenants_path = conf.tenants_path();
-    if !tenants_path.exists() {
-        utils::crashsafe_dir::create_dir_all(conf.tenants_path()).with_context(|| {
-            format!(
-                "Failed to create tenants root dir at '{}'",
-                tenants_path.display()
-            )
-        })?;
+    if init && daemonize {
+        bail!("--daemonize cannot be used with --init")
    }

-    // Initialize up failpoints support
-    let scenario = FailScenario::setup();
-
-    // Basic initialization of things that don't change after startup
-    virtual_file::init(conf.max_file_descriptors);
-    page_cache::init(conf.page_cache_size);
-
-    start_pageserver(conf, daemonize).context("Failed to start pageserver")?;
-
-    scenario.teardown();
-    Ok(())
-}
-
-fn initialize_config(
-    cfg_file_path: &Path,
-    arg_matches: clap::ArgMatches,
-    workdir: &Path,
-) -> anyhow::Result<ControlFlow<(), &'static PageServerConf>> {
-    let init = arg_matches.is_present("init");
-    let update_config = init || arg_matches.is_present("update-config");
-
-    let (mut toml, config_file_exists) = if cfg_file_path.is_file() {
-        if init {
-            anyhow::bail!(
-                "Config file '{}' already exists, cannot init it, use --update-config to update it",
-                cfg_file_path.display()
-            );
-        }
-        // Supplement the CLI arguments with the config file
-        let cfg_file_contents = std::fs::read_to_string(&cfg_file_path).with_context(|| {
-            format!(
-                "Failed to read pageserver config at '{}'",
-                cfg_file_path.display()
-            )
-        })?;
-        (
-            cfg_file_contents
-                .parse::<toml_edit::Document>()
-                .with_context(|| {
-                    format!(
-                        "Failed to parse '{}' as pageserver config",
-                        cfg_file_path.display()
-                    )
-                })?,
-            true,
-        )
-    } else if cfg_file_path.exists() {
-        anyhow::bail!(
-            "Config file '{}' exists but is not a regular file",
-            cfg_file_path.display()
-        );
-    } else {
+    let mut toml = if init {
        // We're initializing the repo, so there's no config file yet
-        (
-            DEFAULT_CONFIG_FILE
-                .parse::<toml_edit::Document>()
-                .context("could not parse built-in config file")?,
-            false,
-        )
+        DEFAULT_CONFIG_FILE
+            .parse::<toml_edit::Document>()
+            .context("could not parse built-in config file")?
+    } else {
+        // Supplement the CLI arguments with the config file
+        let cfg_file_contents = std::fs::read_to_string(&cfg_file_path)
+            .with_context(|| format!("No pageserver config at '{}'", cfg_file_path.display()))?;
+        cfg_file_contents
+            .parse::<toml_edit::Document>()
+            .with_context(|| {
+                format!(
+                    "Failed to read '{}' as pageserver config",
+                    cfg_file_path.display()
+                )
+            })?
    };

+    // Process any extra options given with -c
    if let Some(values) = arg_matches.values_of("config-override") {
        for option_line in values {
            let doc = toml_edit::Document::from_str(option_line).with_context(|| {
@@ -198,38 +165,49 @@ fn initialize_config(
            })?;

            for (key, item) in doc.iter() {
-                if config_file_exists && update_config && key == "id" && toml.contains_key(key) {
-                    anyhow::bail!("Pageserver config file exists at '{}' and has node id already, it cannot be overridden", cfg_file_path.display());
+                if key == "id" {
+                    anyhow::ensure!(
+                        init,
+                        "node id can only be set during pageserver init and cannot be overridden"
+                    );
                }
                toml.insert(key, item.clone());
            }
        }
    }
-
-    debug!("Resulting toml: {toml}");
-    let conf = PageServerConf::parse_and_validate(&toml, workdir)
+    trace!("Resulting toml: {}", toml);
+    let conf = PageServerConf::parse_and_validate(&toml, &workdir)
        .context("Failed to parse pageserver configuration")?;

-    if update_config {
-        info!("Writing pageserver config to '{}'", cfg_file_path.display());
+    // The configuration is all set up now. Turn it into a 'static
+    // that can be freely stored in structs and passed across threads
+    // as a ref.
+    let conf: &'static PageServerConf = Box::leak(Box::new(conf));

+    // Initialize up failpoints support
+    let scenario = FailScenario::setup();
+
+    // Basic initialization of things that don't change after startup
+    virtual_file::init(conf.max_file_descriptors);
+    page_cache::init(conf.page_cache_size);
+
+    // Create repo and exit if init was requested
+    if init {
+        timelines::init_pageserver(conf, create_tenant, initial_timeline_id)
+            .context("Failed to init pageserver")?;
+        // write the config file
        std::fs::write(&cfg_file_path, toml.to_string()).with_context(|| {
            format!(
-                "Failed to write pageserver config to '{}'",
+                "Failed to initialize pageserver config at '{}'",
                cfg_file_path.display()
            )
        })?;
-        info!(
-            "Config successfully written to '{}'",
-            cfg_file_path.display()
-        )
+    } else {
+        start_pageserver(conf, daemonize).context("Failed to start pageserver")?;
    }

-    Ok(if init {
-        ControlFlow::Break(())
-    } else {
-        ControlFlow::Continue(Box::leak(Box::new(conf)))
-    })
+    scenario.teardown();
+    Ok(())
 }

 fn start_pageserver(conf: &'static PageServerConf, daemonize: bool) -> Result<()> {
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -11,13 +11,14 @@ use super::models::{
    StatusResponse, TenantConfigRequest, TenantCreateRequest, TenantCreateResponse, TenantInfo,
    TimelineCreateRequest,
 };
-use crate::layered_repository::{metadata::TimelineMetadata, LayeredTimeline};
+use crate::layered_repository::metadata::TimelineMetadata;
 use crate::pgdatadir_mapping::DatadirTimeline;
 use crate::repository::{LocalTimelineState, RepositoryTimeline};
 use crate::repository::{Repository, Timeline};
 use crate::storage_sync;
 use crate::storage_sync::index::{RemoteIndex, RemoteTimeline};
 use crate::tenant_config::TenantConfOpt;
+use crate::TimelineImpl;
 use crate::{config::PageServerConf, tenant_mgr, timelines};
 use utils::{
    auth::JwtAuth,
@@ -85,7 +86,7 @@ fn get_config(request: &Request<Body>) -> &'static PageServerConf {
 // Helper functions to construct a LocalTimelineInfo struct for a timeline

 fn local_timeline_info_from_loaded_timeline(
-    timeline: &LayeredTimeline,
+    timeline: &TimelineImpl,
    include_non_incremental_logical_size: bool,
    include_non_incremental_physical_size: bool,
 ) -> anyhow::Result<LocalTimelineInfo> {
@@ -160,13 +161,13 @@ fn local_timeline_info_from_unloaded_timeline(metadata: &TimelineMetadata) -> Lo
 }

 fn local_timeline_info_from_repo_timeline(
-    repo_timeline: &RepositoryTimeline<LayeredTimeline>,
+    repo_timeline: &RepositoryTimeline<TimelineImpl>,
    include_non_incremental_logical_size: bool,
    include_non_incremental_physical_size: bool,
 ) -> anyhow::Result<LocalTimelineInfo> {
    match repo_timeline {
        RepositoryTimeline::Loaded(timeline) => local_timeline_info_from_loaded_timeline(
-            timeline,
+            &*timeline,
            include_non_incremental_logical_size,
            include_non_incremental_physical_size,
        ),
--- a/pageserver/src/import_datadir.rs
+++ b/pageserver/src/import_datadir.rs
@@ -15,24 +15,13 @@ use crate::pgdatadir_mapping::*;
 use crate::reltag::{RelTag, SlruKind};
 use crate::walingest::WalIngest;
 use crate::walrecord::DecodedWALRecord;
-use postgres_ffi::v14::relfile_utils::*;
-use postgres_ffi::v14::waldecoder::*;
-use postgres_ffi::v14::xlog_utils::*;
-use postgres_ffi::v14::{pg_constants, ControlFileData, DBState_DB_SHUTDOWNED};
+use postgres_ffi::relfile_utils::*;
+use postgres_ffi::waldecoder::*;
+use postgres_ffi::xlog_utils::*;
 use postgres_ffi::Oid;
-use postgres_ffi::BLCKSZ;
+use postgres_ffi::{pg_constants, ControlFileData, DBState_DB_SHUTDOWNED};
 use utils::lsn::Lsn;

-// Returns checkpoint LSN from controlfile
-pub fn get_lsn_from_controlfile(path: &Path) -> Result<Lsn> {
-    // Read control file to extract the LSN
-    let controlfile_path = path.join("global").join("pg_control");
-    let controlfile = ControlFileData::decode(&std::fs::read(controlfile_path)?)?;
-    let lsn = controlfile.checkPoint;
-
-    Ok(Lsn(lsn))
-}
-
 ///
 /// Import all relation data pages from local disk into the repository.
 ///
@@ -121,8 +110,8 @@ fn import_rel<T: DatadirTimeline, Reader: Read>(

    let mut buf: [u8; 8192] = [0u8; 8192];

-    ensure!(len % BLCKSZ as usize == 0);
-    let nblocks = len / BLCKSZ as usize;
+    ensure!(len % pg_constants::BLCKSZ as usize == 0);
+    let nblocks = len / pg_constants::BLCKSZ as usize;

    let rel = RelTag {
        spcnode: spcoid,
@@ -131,7 +120,7 @@ fn import_rel<T: DatadirTimeline, Reader: Read>(
        forknum,
    };

-    let mut blknum: u32 = segno * (1024 * 1024 * 1024 / BLCKSZ as u32);
+    let mut blknum: u32 = segno * (1024 * 1024 * 1024 / pg_constants::BLCKSZ as u32);

    // Call put_rel_creation for every segment of the relation,
    // because there is no guarantee about the order in which we are processing segments.
@@ -155,7 +144,8 @@ fn import_rel<T: DatadirTimeline, Reader: Read>(
            Err(err) => match err.kind() {
                std::io::ErrorKind::UnexpectedEof => {
                    // reached EOF. That's expected.
-                    let relative_blknum = blknum - segno * (1024 * 1024 * 1024 / BLCKSZ as u32);
+                    let relative_blknum =
+                        blknum - segno * (1024 * 1024 * 1024 / pg_constants::BLCKSZ as u32);
                    ensure!(relative_blknum == nblocks as u32, "unexpected EOF");
                    break;
                }
@@ -194,8 +184,8 @@ fn import_slru<T: DatadirTimeline, Reader: Read>(
        .to_string_lossy();
    let segno = u32::from_str_radix(filename, 16)?;

-    ensure!(len % BLCKSZ as usize == 0); // we assume SLRU block size is the same as BLCKSZ
-    let nblocks = len / BLCKSZ as usize;
+    ensure!(len % pg_constants::BLCKSZ as usize == 0); // we assume SLRU block size is the same as BLCKSZ
+    let nblocks = len / pg_constants::BLCKSZ as usize;

    ensure!(nblocks <= pg_constants::SLRU_PAGES_PER_SEGMENT as usize);

--- a/pageserver/src/keyspace.rs
+++ b/pageserver/src/keyspace.rs
@@ -1,5 +1,5 @@
 use crate::repository::{key_range_size, singleton_range, Key};
-use postgres_ffi::BLCKSZ;
+use postgres_ffi::pg_constants;
 use std::ops::Range;

 ///
@@ -19,7 +19,7 @@ impl KeySpace {
    ///
    pub fn partition(&self, target_size: u64) -> KeyPartitioning {
        // Assume that each value is 8k in size.
-        let target_nblocks = (target_size / BLCKSZ as u64) as usize;
+        let target_nblocks = (target_size / pg_constants::BLCKSZ as u64) as usize;

        let mut parts = Vec::new();
        let mut current_part = Vec::new();
--- a/pageserver/src/layered_repository.rs
+++ b/pageserver/src/layered_repository.rs
@@ -59,9 +59,7 @@ mod storage_layer;
 mod timeline;

 use storage_layer::Layer;
-use timeline::LayeredTimelineEntry;
-
-pub use timeline::LayeredTimeline;
+use timeline::{LayeredTimeline, LayeredTimelineEntry};

 // re-export this function so that page_cache.rs can use it.
 pub use crate::layered_repository::ephemeral_file::writeback as writeback_ephemeral_file;
--- a/pageserver/src/layered_repository/disk_btree.rs
+++ b/pageserver/src/layered_repository/disk_btree.rs
@@ -209,7 +209,7 @@ where
    reader: R,
 }

-#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+#[derive(Clone, Copy, Debug, PartialEq)]
 pub enum VisitDirection {
    Forwards,
    Backwards,
--- a/pageserver/src/layered_repository/timeline.rs
+++ b/pageserver/src/layered_repository/timeline.rs
@@ -4,7 +4,6 @@ use anyhow::{anyhow, bail, ensure, Context, Result};
 use bytes::Bytes;
 use fail::fail_point;
 use itertools::Itertools;
-use metrics::core::{AtomicU64, GenericCounter};
 use once_cell::sync::Lazy;
 use tracing::*;

@@ -45,7 +44,7 @@ use crate::reltag::RelTag;
 use crate::tenant_config::TenantConfOpt;
 use crate::DatadirTimeline;

-use postgres_ffi::v14::xlog_utils::to_pg_timestamp;
+use postgres_ffi::xlog_utils::to_pg_timestamp;
 use utils::{
    lsn::{AtomicLsn, Lsn, RecordLsn},
    seqwait::SeqWait,
@@ -224,70 +223,6 @@ impl From<LayeredTimelineEntry> for RepositoryTimeline<LayeredTimeline> {
    }
 }

-struct TimelineMetrics {
-    pub reconstruct_time_histo: Histogram,
-    pub materialized_page_cache_hit_counter: GenericCounter<AtomicU64>,
-    pub flush_time_histo: Histogram,
-    pub compact_time_histo: Histogram,
-    pub create_images_time_histo: Histogram,
-    pub init_logical_size_histo: Histogram,
-    pub load_layer_map_histo: Histogram,
-    pub last_record_gauge: IntGauge,
-    pub wait_lsn_time_histo: Histogram,
-    pub current_physical_size_gauge: UIntGauge,
-}
-
-impl TimelineMetrics {
-    fn new(tenant_id: &ZTenantId, timeline_id: &ZTimelineId) -> Self {
-        let tenant_id = tenant_id.to_string();
-        let timeline_id = timeline_id.to_string();
-
-        let reconstruct_time_histo = RECONSTRUCT_TIME
-            .get_metric_with_label_values(&[&tenant_id, &timeline_id])
-            .unwrap();
-        let materialized_page_cache_hit_counter = MATERIALIZED_PAGE_CACHE_HIT
-            .get_metric_with_label_values(&[&tenant_id, &timeline_id])
-            .unwrap();
-        let flush_time_histo = STORAGE_TIME
-            .get_metric_with_label_values(&["layer flush", &tenant_id, &timeline_id])
-            .unwrap();
-        let compact_time_histo = STORAGE_TIME
-            .get_metric_with_label_values(&["compact", &tenant_id, &timeline_id])
-            .unwrap();
-        let create_images_time_histo = STORAGE_TIME
-            .get_metric_with_label_values(&["create images", &tenant_id, &timeline_id])
-            .unwrap();
-        let init_logical_size_histo = STORAGE_TIME
-            .get_metric_with_label_values(&["init logical size", &tenant_id, &timeline_id])
-            .unwrap();
-        let load_layer_map_histo = STORAGE_TIME
-            .get_metric_with_label_values(&["load layer map", &tenant_id, &timeline_id])
-            .unwrap();
-        let last_record_gauge = LAST_RECORD_LSN
-            .get_metric_with_label_values(&[&tenant_id, &timeline_id])
-            .unwrap();
-        let wait_lsn_time_histo = WAIT_LSN_TIME
-            .get_metric_with_label_values(&[&tenant_id, &timeline_id])
-            .unwrap();
-        let current_physical_size_gauge = CURRENT_PHYSICAL_SIZE
-            .get_metric_with_label_values(&[&tenant_id, &timeline_id])
-            .unwrap();
-
-        TimelineMetrics {
-            reconstruct_time_histo,
-            materialized_page_cache_hit_counter,
-            flush_time_histo,
-            compact_time_histo,
-            create_images_time_histo,
-            init_logical_size_histo,
-            load_layer_map_histo,
-            last_record_gauge,
-            wait_lsn_time_histo,
-            current_physical_size_gauge,
-        }
-    }
-}
-
 pub struct LayeredTimeline {
    conf: &'static PageServerConf,
    tenant_conf: Arc<RwLock<TenantConfOpt>>,
@@ -334,7 +269,14 @@ pub struct LayeredTimeline {
    ancestor_lsn: Lsn,

    // Metrics
-    metrics: TimelineMetrics,
+    reconstruct_time_histo: Histogram,
+    materialized_page_cache_hit_counter: IntCounter,
+    flush_time_histo: Histogram,
+    compact_time_histo: Histogram,
+    create_images_time_histo: Histogram,
+    last_record_gauge: IntGauge,
+    wait_lsn_time_histo: Histogram,
+    current_physical_size_gauge: UIntGauge,

    /// If `true`, will backup its files that appear after each checkpointing to the remote storage.
    upload_layers: AtomicBool,
@@ -484,7 +426,7 @@ impl Timeline for LayeredTimeline {
            "wait_lsn called by WAL receiver thread"
        );

-        self.metrics.wait_lsn_time_histo.observe_closure_duration(
+        self.wait_lsn_time_histo.observe_closure_duration(
            || self.last_record_lsn
                .wait_for_timeout(lsn, self.conf.wait_lsn_timeout)
                .with_context(|| {
@@ -526,8 +468,7 @@ impl Timeline for LayeredTimeline {

        self.get_reconstruct_data(key, lsn, &mut reconstruct_state)?;

-        self.metrics
-            .reconstruct_time_histo
+        self.reconstruct_time_histo
            .observe_closure_duration(|| self.reconstruct_value(key, lsn, reconstruct_state))
    }

@@ -589,7 +530,7 @@ impl Timeline for LayeredTimeline {
    }

    fn get_physical_size(&self) -> u64 {
-        self.metrics.current_physical_size_gauge.get()
+        self.current_physical_size_gauge.get()
    }

    fn get_physical_size_non_incremental(&self) -> anyhow::Result<u64> {
@@ -663,6 +604,43 @@ impl LayeredTimeline {
        walredo_mgr: Arc<dyn WalRedoManager + Send + Sync>,
        upload_layers: bool,
    ) -> LayeredTimeline {
+        let reconstruct_time_histo = RECONSTRUCT_TIME
+            .get_metric_with_label_values(&[&tenant_id.to_string(), &timeline_id.to_string()])
+            .unwrap();
+        let materialized_page_cache_hit_counter = MATERIALIZED_PAGE_CACHE_HIT
+            .get_metric_with_label_values(&[&tenant_id.to_string(), &timeline_id.to_string()])
+            .unwrap();
+        let flush_time_histo = STORAGE_TIME
+            .get_metric_with_label_values(&[
+                "layer flush",
+                &tenant_id.to_string(),
+                &timeline_id.to_string(),
+            ])
+            .unwrap();
+        let compact_time_histo = STORAGE_TIME
+            .get_metric_with_label_values(&[
+                "compact",
+                &tenant_id.to_string(),
+                &timeline_id.to_string(),
+            ])
+            .unwrap();
+        let create_images_time_histo = STORAGE_TIME
+            .get_metric_with_label_values(&[
+                "create images",
+                &tenant_id.to_string(),
+                &timeline_id.to_string(),
+            ])
+            .unwrap();
+        let last_record_gauge = LAST_RECORD_LSN
+            .get_metric_with_label_values(&[&tenant_id.to_string(), &timeline_id.to_string()])
+            .unwrap();
+        let wait_lsn_time_histo = WAIT_LSN_TIME
+            .get_metric_with_label_values(&[&tenant_id.to_string(), &timeline_id.to_string()])
+            .unwrap();
+        let current_physical_size_gauge = CURRENT_PHYSICAL_SIZE
+            .get_metric_with_label_values(&[&tenant_id.to_string(), &timeline_id.to_string()])
+            .unwrap();
+
        let mut result = LayeredTimeline {
            conf,
            tenant_conf,
@@ -685,7 +663,14 @@ impl LayeredTimeline {
            ancestor_timeline: ancestor,
            ancestor_lsn: metadata.ancestor_lsn(),

-            metrics: TimelineMetrics::new(&tenant_id, &timeline_id),
+            reconstruct_time_histo,
+            materialized_page_cache_hit_counter,
+            flush_time_histo,
+            compact_time_histo,
+            create_images_time_histo,
+            last_record_gauge,
+            wait_lsn_time_histo,
+            current_physical_size_gauge,

            upload_layers: AtomicBool::new(upload_layers),

@@ -721,8 +706,6 @@ impl LayeredTimeline {
        let mut layers = self.layers.write().unwrap();
        let mut num_layers = 0;

-        let timer = self.metrics.load_layer_map_histo.start_timer();
-
        // Scan timeline directory and create ImageFileName and DeltaFilename
        // structs representing all files on disk
        let timeline_path = self.conf.timeline_path(&self.timeline_id, &self.tenant_id);
@@ -794,11 +777,7 @@ impl LayeredTimeline {
            "loaded layer map with {} layers at {}, total physical size: {}",
            num_layers, disk_consistent_lsn, total_physical_size
        );
-        self.metrics
-            .current_physical_size_gauge
-            .set(total_physical_size);
-
-        timer.stop_and_record();
+        self.current_physical_size_gauge.set(total_physical_size);

        Ok(())
    }
@@ -829,16 +808,12 @@ impl LayeredTimeline {
            }
        }

-        let timer = self.metrics.init_logical_size_histo.start_timer();
-
        // Have to calculate it the hard way
        let last_lsn = self.get_last_record_lsn();
        let logical_size = self.get_current_logical_size_non_incremental(last_lsn)?;
        self.current_logical_size
            .store(logical_size as isize, AtomicOrdering::SeqCst);
        debug!("calculated logical size the hard way: {}", logical_size);
-
-        timer.stop_and_record();
        Ok(())
    }

@@ -903,7 +878,7 @@ impl LayeredTimeline {
                ValueReconstructResult::Continue => {
                    // If we reached an earlier cached page image, we're done.
                    if cont_lsn == cached_lsn + 1 {
-                        self.metrics.materialized_page_cache_hit_counter.inc_by(1);
+                        self.materialized_page_cache_hit_counter.inc_by(1);
                        return Ok(());
                    }
                    if prev_lsn <= cont_lsn {
@@ -1099,7 +1074,7 @@ impl LayeredTimeline {
    fn finish_write(&self, new_lsn: Lsn) {
        assert!(new_lsn.is_aligned());

-        self.metrics.last_record_gauge.set(new_lsn.0 as i64);
+        self.last_record_gauge.set(new_lsn.0 as i64);
        self.last_record_lsn.advance(new_lsn);
    }

@@ -1203,7 +1178,7 @@ impl LayeredTimeline {
            }
        };

-        let timer = self.metrics.flush_time_histo.start_timer();
+        let timer = self.flush_time_histo.start_timer();

        loop {
            let layers = self.layers.read().unwrap();
@@ -1374,7 +1349,7 @@ impl LayeredTimeline {

        // update the timeline's physical size
        let sz = new_delta_path.metadata()?.len();
-        self.metrics.current_physical_size_gauge.add(sz);
+        self.current_physical_size_gauge.add(sz);
        // update metrics
        NUM_PERSISTENT_FILES_CREATED.inc_by(1);
        PERSISTENT_BYTES_WRITTEN.inc_by(sz);
@@ -1443,7 +1418,7 @@ impl LayeredTimeline {
                }

                // 3. Compact
-                let timer = self.metrics.compact_time_histo.start_timer();
+                let timer = self.compact_time_histo.start_timer();
                self.compact_level0(target_file_size)?;
                timer.stop_and_record();
            }
@@ -1519,7 +1494,7 @@ impl LayeredTimeline {
        lsn: Lsn,
        force: bool,
    ) -> Result<HashSet<PathBuf>> {
-        let timer = self.metrics.create_images_time_histo.start_timer();
+        let timer = self.create_images_time_histo.start_timer();
        let mut image_layers: Vec<ImageLayer> = Vec::new();
        let mut layer_paths_to_upload = HashSet::new();
        for partition in partitioning.parts.iter() {
@@ -1563,8 +1538,7 @@ impl LayeredTimeline {

        let mut layers = self.layers.write().unwrap();
        for l in image_layers {
-            self.metrics
-                .current_physical_size_gauge
+            self.current_physical_size_gauge
                .add(l.path().metadata()?.len());
            layers.insert_historic(Arc::new(l));
        }
@@ -1814,8 +1788,7 @@ impl LayeredTimeline {
            let new_delta_path = l.path();

            // update the timeline's physical size
-            self.metrics
-                .current_physical_size_gauge
+            self.current_physical_size_gauge
                .add(new_delta_path.metadata()?.len());

            new_layer_paths.insert(new_delta_path);
@@ -1828,9 +1801,7 @@ impl LayeredTimeline {
        drop(all_keys_iter);
        for l in deltas_to_compact {
            if let Some(path) = l.local_path() {
-                self.metrics
-                    .current_physical_size_gauge
-                    .sub(path.metadata()?.len());
+                self.current_physical_size_gauge.sub(path.metadata()?.len());
                layer_paths_do_delete.insert(path);
            }
            l.delete()?;
@@ -2087,9 +2058,7 @@ impl LayeredTimeline {
        let mut layer_paths_to_delete = HashSet::with_capacity(layers_to_remove.len());
        for doomed_layer in layers_to_remove {
            if let Some(path) = doomed_layer.local_path() {
-                self.metrics
-                    .current_physical_size_gauge
-                    .sub(path.metadata()?.len());
+                self.current_physical_size_gauge.sub(path.metadata()?.len());
                layer_paths_to_delete.insert(path);
            }
            doomed_layer.delete()?;
--- a/pageserver/src/lib.rs
+++ b/pageserver/src/lib.rs
@@ -28,6 +28,7 @@ use tracing::info;
 use crate::thread_mgr::ThreadKind;
 use metrics::{register_int_gauge_vec, IntGaugeVec};

+use layered_repository::LayeredRepository;
 use pgdatadir_mapping::DatadirTimeline;

 /// Current storage format version
@@ -61,6 +62,9 @@ pub enum CheckpointConfig {
    Forced,
 }

+pub type RepositoryImpl = LayeredRepository;
+pub type TimelineImpl = <LayeredRepository as repository::Repository>::Timeline;
+
 pub fn shutdown_pageserver(exit_code: i32) {
    // Shut down the libpq endpoint thread. This prevents new connections from
    // being accepted.
--- a/pageserver/src/page_cache.rs
+++ b/pageserver/src/page_cache.rs
@@ -83,7 +83,7 @@ pub fn get() -> &'static PageCache {
    }
 }

-pub const PAGE_SZ: usize = postgres_ffi::BLCKSZ as usize;
+pub const PAGE_SZ: usize = postgres_ffi::pg_constants::BLCKSZ as usize;
 const MAX_USAGE_COUNT: u8 = 5;

 ///
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -40,10 +40,9 @@ use crate::thread_mgr;
 use crate::thread_mgr::ThreadKind;
 use crate::CheckpointConfig;
 use metrics::{register_histogram_vec, HistogramVec};
-use postgres_ffi::v14::xlog_utils::to_pg_timestamp;
+use postgres_ffi::xlog_utils::to_pg_timestamp;

-use postgres_ffi::v14::pg_constants::DEFAULTTABLESPACE_OID;
-use postgres_ffi::BLCKSZ;
+use postgres_ffi::pg_constants;

 // Wrapped in libpq CopyData
 enum PagestreamFeMessage {
@@ -726,9 +725,10 @@ impl PageServerHandler {
        let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
        let lsn = Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest, &latest_gc_cutoff_lsn)?;

-        let total_blocks = timeline.get_db_size(DEFAULTTABLESPACE_OID, req.dbnode, lsn)?;
+        let total_blocks =
+            timeline.get_db_size(pg_constants::DEFAULTTABLESPACE_OID, req.dbnode, lsn)?;

-        let db_size = total_blocks as i64 * BLCKSZ as i64;
+        let db_size = total_blocks as i64 * pg_constants::BLCKSZ as i64;

        Ok(PagestreamBeMessage::DbSize(PagestreamDbSizeResponse {
            db_size,
--- a/pageserver/src/pgdatadir_mapping.rs
+++ b/pageserver/src/pgdatadir_mapping.rs
@@ -13,10 +13,8 @@ use crate::repository::*;
 use crate::walrecord::ZenithWalRecord;
 use anyhow::{bail, ensure, Result};
 use bytes::{Buf, Bytes};
-use postgres_ffi::v14::pg_constants;
-use postgres_ffi::v14::xlog_utils::TimestampTz;
-use postgres_ffi::BLCKSZ;
-use postgres_ffi::{Oid, TransactionId};
+use postgres_ffi::xlog_utils::TimestampTz;
+use postgres_ffi::{pg_constants, Oid, TransactionId};
 use serde::{Deserialize, Serialize};
 use std::collections::{HashMap, HashSet};
 use std::ops::Range;
@@ -299,9 +297,9 @@ pub trait DatadirTimeline: Timeline {
                let clog_page =
                    self.get_slru_page_at_lsn(SlruKind::Clog, segno, blknum, probe_lsn)?;

-                if clog_page.len() == BLCKSZ as usize + 8 {
+                if clog_page.len() == pg_constants::BLCKSZ as usize + 8 {
                    let mut timestamp_bytes = [0u8; 8];
-                    timestamp_bytes.copy_from_slice(&clog_page[BLCKSZ as usize..]);
+                    timestamp_bytes.copy_from_slice(&clog_page[pg_constants::BLCKSZ as usize..]);
                    let timestamp = TimestampTz::from_be_bytes(timestamp_bytes);

                    if timestamp >= search_timestamp {
@@ -384,7 +382,7 @@ pub trait DatadirTimeline: Timeline {
                total_size += relsize as usize;
            }
        }
-        Ok(total_size * BLCKSZ as usize)
+        Ok(total_size * pg_constants::BLCKSZ as usize)
    }

    ///
@@ -914,7 +912,7 @@ impl<'a, T: DatadirTimeline> DatadirModification<'a, T> {
        result?;

        if pending_nblocks != 0 {
-            writer.update_current_logical_size(pending_nblocks * BLCKSZ as isize);
+            writer.update_current_logical_size(pending_nblocks * pg_constants::BLCKSZ as isize);
            self.pending_nblocks = 0;
        }

@@ -942,7 +940,7 @@ impl<'a, T: DatadirTimeline> DatadirModification<'a, T> {
        writer.finish_write(lsn);

        if pending_nblocks != 0 {
-            writer.update_current_logical_size(pending_nblocks * BLCKSZ as isize);
+            writer.update_current_logical_size(pending_nblocks * pg_constants::BLCKSZ as isize);
        }

        Ok(())
@@ -1016,7 +1014,7 @@ struct SlruSegmentDirectory {
    segments: HashSet<u32>,
 }

-static ZERO_PAGE: Bytes = Bytes::from_static(&[0u8; BLCKSZ as usize]);
+static ZERO_PAGE: Bytes = Bytes::from_static(&[0u8; pg_constants::BLCKSZ as usize]);

 // Layout of the Key address space
 //
--- a/pageserver/src/reltag.rs
+++ b/pageserver/src/reltag.rs
@@ -2,9 +2,8 @@ use serde::{Deserialize, Serialize};
 use std::cmp::Ordering;
 use std::fmt;

-use postgres_ffi::v14::pg_constants;
-use postgres_ffi::v14::relfile_utils::forknumber_to_name;
-use postgres_ffi::Oid;
+use postgres_ffi::relfile_utils::forknumber_to_name;
+use postgres_ffi::{pg_constants, Oid};

 ///
 /// Relation data file segment id throughout the Postgres cluster.
--- a/pageserver/src/repository.rs
+++ b/pageserver/src/repository.rs
@@ -412,6 +412,7 @@ pub mod repo_harness {
    use std::sync::{Arc, RwLock, RwLockReadGuard, RwLockWriteGuard};
    use std::{fs, path::PathBuf};

+    use crate::RepositoryImpl;
    use crate::{
        config::PageServerConf,
        layered_repository::LayeredRepository,
@@ -507,11 +508,11 @@ pub mod repo_harness {
            })
        }

-        pub fn load(&self) -> LayeredRepository {
+        pub fn load(&self) -> RepositoryImpl {
            self.try_load().expect("failed to load test repo")
        }

-        pub fn try_load(&self) -> Result<LayeredRepository> {
+        pub fn try_load(&self) -> Result<RepositoryImpl> {
            let walredo_mgr = Arc::new(TestRedoManager);

            let repo = LayeredRepository::new(
--- a/pageserver/src/storage_sync.rs
+++ b/pageserver/src/storage_sync.rs
@@ -979,7 +979,7 @@ enum DownloadStatus {
 #[derive(Debug)]
 enum UploadStatus {
    Uploaded,
-    Failed(anyhow::Error),
+    Failed,
    Nothing,
 }

@@ -1056,43 +1056,41 @@ where
    let (upload_status, download_status) = tokio::join!(
        async {
            if let Some(upload_data) = upload_data {
-                let upload_retries = upload_data.retries;
-                match validate_task_retries(upload_retries, max_sync_errors)
+                match validate_task_retries(upload_data, max_sync_errors)
                    .instrument(info_span!("retries_validation"))
                    .await
                {
-                    ControlFlow::Continue(()) => {
+                    ControlFlow::Continue(new_upload_data) => {
                        upload_timeline_data(
                            conf,
                            (storage.as_ref(), &index, sync_queue),
                            current_remote_timeline.as_ref(),
                            sync_id,
-                            upload_data,
+                            new_upload_data,
                            sync_start,
                            "upload",
                        )
-                        .await
+                        .await;
+                        UploadStatus::Uploaded
                    }
-                    ControlFlow::Break(()) => match update_remote_data(
-                        conf,
-                        storage.as_ref(),
-                        &index,
-                        sync_id,
-                        RemoteDataUpdate::Upload {
-                            uploaded_data: upload_data.data,
-                            upload_failed: true,
-                        },
-                    )
-                    .await
-                    {
-                        Ok(()) => UploadStatus::Failed(anyhow::anyhow!(
-                            "Aborted after retries validation, current retries: {upload_retries}, max retries allowed: {max_sync_errors}"
-                        )),
-                        Err(e) => {
+                    ControlFlow::Break(failed_upload_data) => {
+                        if let Err(e) = update_remote_data(
+                            conf,
+                            storage.as_ref(),
+                            &index,
+                            sync_id,
+                            RemoteDataUpdate::Upload {
+                                uploaded_data: failed_upload_data.data,
+                                upload_failed: true,
+                            },
+                        )
+                        .await
+                        {
                            error!("Failed to update remote timeline {sync_id}: {e:?}");
-                            UploadStatus::Failed(e)
                        }
-                    },
+
+                        UploadStatus::Failed
+                    }
                }
            } else {
                UploadStatus::Nothing
@@ -1101,23 +1099,23 @@ where
        .instrument(info_span!("upload_timeline_data")),
        async {
            if let Some(download_data) = download_data {
-                match validate_task_retries(download_data.retries, max_sync_errors)
+                match validate_task_retries(download_data, max_sync_errors)
                    .instrument(info_span!("retries_validation"))
                    .await
                {
-                    ControlFlow::Continue(()) => {
+                    ControlFlow::Continue(new_download_data) => {
                        return download_timeline_data(
                            conf,
                            (storage.as_ref(), &index, sync_queue),
                            current_remote_timeline.as_ref(),
                            sync_id,
-                            download_data,
+                            new_download_data,
                            sync_start,
                            "download",
                        )
                        .await;
                    }
-                    ControlFlow::Break(()) => {
+                    ControlFlow::Break(_) => {
                        index
                            .write()
                            .await
@@ -1134,29 +1132,29 @@ where
    if let Some(delete_data) = batch.delete {
        match upload_status {
            UploadStatus::Uploaded | UploadStatus::Nothing => {
-                match validate_task_retries(delete_data.retries, max_sync_errors)
+                match validate_task_retries(delete_data, max_sync_errors)
                    .instrument(info_span!("retries_validation"))
                    .await
                {
-                    ControlFlow::Continue(()) => {
+                    ControlFlow::Continue(new_delete_data) => {
                        delete_timeline_data(
                            conf,
                            (storage.as_ref(), &index, sync_queue),
                            sync_id,
-                            delete_data,
+                            new_delete_data,
                            sync_start,
                            "delete",
                        )
                        .instrument(info_span!("delete_timeline_data"))
                        .await;
                    }
-                    ControlFlow::Break(()) => {
+                    ControlFlow::Break(failed_delete_data) => {
                        if let Err(e) = update_remote_data(
                            conf,
                            storage.as_ref(),
                            &index,
                            sync_id,
-                            RemoteDataUpdate::Delete(&delete_data.data.deleted_layers),
+                            RemoteDataUpdate::Delete(&failed_delete_data.data.deleted_layers),
                        )
                        .await
                        {
@@ -1165,8 +1163,8 @@ where
                    }
                }
            }
-            UploadStatus::Failed(e) => {
-                warn!("Skipping delete task due to failed upload tasks, reenqueuing. Upload data: {:?}, delete data: {delete_data:?}. Upload failure: {e:#}", batch.upload);
+            UploadStatus::Failed => {
+                warn!("Skipping delete task due to failed upload tasks, reenqueuing");
                sync_queue.push(sync_id, SyncTask::Delete(delete_data));
            }
        }
@@ -1351,8 +1349,7 @@ async fn upload_timeline_data<P, S>(
    new_upload_data: SyncData<LayersUpload>,
    sync_start: Instant,
    task_name: &str,
-) -> UploadStatus
-where
+) where
    P: Debug + Send + Sync + 'static,
    S: RemoteStorage<RemoteObjectId = P> + Send + Sync + 'static,
 {
@@ -1365,9 +1362,9 @@ where
    )
    .await
    {
-        UploadedTimeline::FailedAndRescheduled(e) => {
+        UploadedTimeline::FailedAndRescheduled => {
            register_sync_status(sync_id, sync_start, task_name, Some(false));
-            return UploadStatus::Failed(e);
+            return;
        }
        UploadedTimeline::Successful(upload_data) => upload_data,
    };
@@ -1386,14 +1383,12 @@ where
    {
        Ok(()) => {
            register_sync_status(sync_id, sync_start, task_name, Some(true));
-            UploadStatus::Uploaded
        }
        Err(e) => {
            error!("Failed to update remote timeline {sync_id}: {e:?}");
            uploaded_data.retries += 1;
            sync_queue.push(sync_id, SyncTask::Upload(uploaded_data));
            register_sync_status(sync_id, sync_start, task_name, Some(false));
-            UploadStatus::Failed(e)
        }
    }
 }
@@ -1496,17 +1491,21 @@ where
        .context("Failed to upload new index part")
 }

-async fn validate_task_retries(
-    current_attempt: u32,
+async fn validate_task_retries<T>(
+    sync_data: SyncData<T>,
    max_sync_errors: NonZeroU32,
-) -> ControlFlow<(), ()> {
+) -> ControlFlow<SyncData<T>, SyncData<T>> {
+    let current_attempt = sync_data.retries;
    let max_sync_errors = max_sync_errors.get();
    if current_attempt >= max_sync_errors {
-        return ControlFlow::Break(());
+        error!(
+            "Aborting task that failed {current_attempt} times, exceeding retries threshold of {max_sync_errors}",
+        );
+        return ControlFlow::Break(sync_data);
    }

    exponential_backoff(current_attempt, 1.0, 30.0).await;
-    ControlFlow::Continue(())
+    ControlFlow::Continue(sync_data)
 }

 fn schedule_first_sync_tasks(
--- a/pageserver/src/storage_sync/delete.rs
+++ b/pageserver/src/storage_sync/delete.rs
@@ -95,8 +95,6 @@ where
        debug!("Reenqueuing failed delete task for timeline {sync_id}");
        delete_data.retries += 1;
        sync_queue.push(sync_id, SyncTask::Delete(delete_data));
-    } else {
-        info!("Successfully deleted all layers");
    }
    errored
 }
--- a/pageserver/src/storage_sync/upload.rs
+++ b/pageserver/src/storage_sync/upload.rs
@@ -75,7 +75,7 @@ where
 #[derive(Debug)]
 pub(super) enum UploadedTimeline {
    /// Upload failed due to some error, the upload task is rescheduled for another retry.
-    FailedAndRescheduled(anyhow::Error),
+    FailedAndRescheduled,
    /// No issues happened during the upload, all task files were put into the remote storage.
    Successful(SyncData<LayersUpload>),
 }
@@ -179,7 +179,7 @@ where
        })
        .collect::<FuturesUnordered<_>>();

-    let mut errors = Vec::new();
+    let mut errors_happened = false;
    while let Some(upload_result) = upload_tasks.next().await {
        match upload_result {
            Ok(uploaded_path) => {
@@ -188,13 +188,13 @@ where
            }
            Err(e) => match e {
                UploadError::Other(e) => {
+                    errors_happened = true;
                    error!("Failed to upload a layer for timeline {sync_id}: {e:?}");
-                    errors.push(format!("{e:#}"));
                }
                UploadError::MissingLocalFile(source_path, e) => {
                    if source_path.exists() {
+                        errors_happened = true;
                        error!("Failed to upload a layer for timeline {sync_id}: {e:?}");
-                        errors.push(format!("{e:#}"));
                    } else {
                        // We have run the upload sync task, but the file we wanted to upload is gone.
                        // This is "fine" due the asynchronous nature of the sync loop: it only reacts to events and might need to
@@ -217,17 +217,14 @@ where
        }
    }

-    if errors.is_empty() {
-        info!("Successfully uploaded all layers");
-        UploadedTimeline::Successful(upload_data)
-    } else {
+    if errors_happened {
        debug!("Reenqueuing failed upload task for timeline {sync_id}");
        upload_data.retries += 1;
        sync_queue.push(sync_id, SyncTask::Upload(upload_data));
-        UploadedTimeline::FailedAndRescheduled(anyhow::anyhow!(
-            "Errors appeared during layer uploads: {:?}",
-            errors
-        ))
+        UploadedTimeline::FailedAndRescheduled
+    } else {
+        info!("Successfully uploaded all layers");
+        UploadedTimeline::Successful(upload_data)
    }
 }

--- a/pageserver/src/tenant_config.rs
+++ b/pageserver/src/tenant_config.rs
@@ -37,7 +37,7 @@ pub mod defaults {
    pub const DEFAULT_IMAGE_CREATION_THRESHOLD: usize = 3;
    pub const DEFAULT_PITR_INTERVAL: &str = "30 days";
    pub const DEFAULT_WALRECEIVER_CONNECT_TIMEOUT: &str = "2 seconds";
-    pub const DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT: &str = "3 seconds";
+    pub const DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT: &str = "10 seconds";
    pub const DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG: u64 = 10 * 1024 * 1024;
 }

--- a/pageserver/src/tenant_mgr.rs
+++ b/pageserver/src/tenant_mgr.rs
@@ -3,14 +3,16 @@

 use crate::config::PageServerConf;
 use crate::http::models::TenantInfo;
-use crate::layered_repository::{load_metadata, LayeredRepository, LayeredTimeline};
+use crate::layered_repository::{load_metadata, LayeredRepository};
 use crate::repository::Repository;
 use crate::storage_sync::index::{RemoteIndex, RemoteTimelineIndex};
 use crate::storage_sync::{self, LocalTimelineInitStatus, SyncStartupData};
 use crate::tenant_config::TenantConfOpt;
 use crate::thread_mgr::ThreadKind;
+use crate::timelines::CreateRepo;
 use crate::walredo::PostgresRedoManager;
 use crate::{thread_mgr, timelines, walreceiver};
+use crate::{RepositoryImpl, TimelineImpl};
 use anyhow::Context;
 use serde::{Deserialize, Serialize};
 use std::collections::hash_map::Entry;
@@ -94,13 +96,13 @@ mod tenants_state {
 struct Tenant {
    state: TenantState,
    /// Contains in-memory state, including the timeline that might not yet flushed on disk or loaded form disk.
-    repo: Arc<LayeredRepository>,
+    repo: Arc<RepositoryImpl>,
    /// Timelines, located locally in the pageserver's datadir.
    /// Timelines can entirely be removed entirely by the `detach` operation only.
    ///
    /// Local timelines have more metadata that's loaded into memory,
    /// that is located in the `repo.timelines` field, [`crate::layered_repository::LayeredTimelineEntry`].
-    local_timelines: HashMap<ZTimelineId, Arc<LayeredTimeline>>,
+    local_timelines: HashMap<ZTimelineId, Arc<<RepositoryImpl as Repository>::Timeline>>,
 }

 #[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq)]
@@ -177,7 +179,7 @@ pub enum LocalTimelineUpdate {
    },
    Attach {
        id: ZTenantTimelineId,
-        datadir: Arc<LayeredTimeline>,
+        datadir: Arc<<RepositoryImpl as Repository>::Timeline>,
    },
 }

@@ -283,8 +285,10 @@ pub fn create_tenant_repository(
                conf,
                tenant_conf,
                tenant_id,
-                wal_redo_manager,
-                remote_index,
+                CreateRepo::Real {
+                    wal_redo_manager,
+                    remote_index,
+                },
            )?;
            v.insert(Tenant {
                state: TenantState::Idle,
@@ -365,7 +369,7 @@ pub fn set_tenant_state(tenant_id: ZTenantId, new_state: TenantState) -> anyhow:
    Ok(())
 }

-pub fn get_repository_for_tenant(tenant_id: ZTenantId) -> anyhow::Result<Arc<LayeredRepository>> {
+pub fn get_repository_for_tenant(tenant_id: ZTenantId) -> anyhow::Result<Arc<RepositoryImpl>> {
    let m = tenants_state::read_tenants();
    let tenant = m
        .get(&tenant_id)
@@ -379,7 +383,7 @@ pub fn get_repository_for_tenant(tenant_id: ZTenantId) -> anyhow::Result<Arc<Lay
 pub fn get_local_timeline_with_load(
    tenant_id: ZTenantId,
    timeline_id: ZTimelineId,
-) -> anyhow::Result<Arc<LayeredTimeline>> {
+) -> anyhow::Result<Arc<TimelineImpl>> {
    let mut m = tenants_state::write_tenants();
    let tenant = m
        .get_mut(&tenant_id)
@@ -484,9 +488,9 @@ pub fn detach_tenant(conf: &'static PageServerConf, tenant_id: ZTenantId) -> any
 }

 fn load_local_timeline(
-    repo: &LayeredRepository,
+    repo: &RepositoryImpl,
    timeline_id: ZTimelineId,
-) -> anyhow::Result<Arc<LayeredTimeline>> {
+) -> anyhow::Result<Arc<TimelineImpl>> {
    let inmem_timeline = repo.get_timeline_load(timeline_id).with_context(|| {
        format!("Inmem timeline {timeline_id} not found in tenant's repository")
    })?;
@@ -630,7 +634,7 @@ fn load_local_repo(
    conf: &'static PageServerConf,
    tenant_id: ZTenantId,
    remote_index: &RemoteIndex,
-) -> anyhow::Result<Arc<LayeredRepository>> {
+) -> anyhow::Result<Arc<RepositoryImpl>> {
    let mut m = tenants_state::write_tenants();
    let tenant = m.entry(tenant_id).or_insert_with(|| {
        // Set up a WAL redo manager, for applying WAL records.
--- a/pageserver/src/timelines.rs
+++ b/pageserver/src/timelines.rs
@@ -3,7 +3,7 @@
 //

 use anyhow::{bail, ensure, Context, Result};
-
+use postgres_ffi::ControlFileData;
 use std::{
    fs,
    path::Path,
@@ -13,21 +13,18 @@ use std::{
 use tracing::*;

 use utils::{
-    crashsafe_dir,
+    crashsafe_dir, logging,
    lsn::Lsn,
    zid::{ZTenantId, ZTimelineId},
 };

-use crate::import_datadir;
 use crate::tenant_mgr;
 use crate::{
    config::PageServerConf, repository::Repository, storage_sync::index::RemoteIndex,
-    tenant_config::TenantConfOpt,
-};
-use crate::{
-    layered_repository::{LayeredRepository, LayeredTimeline},
-    walredo::WalRedoManager,
+    tenant_config::TenantConfOpt, RepositoryImpl, TimelineImpl,
 };
+use crate::{import_datadir, LOG_FILE_NAME};
+use crate::{layered_repository::LayeredRepository, walredo::WalRedoManager};
 use crate::{repository::Timeline, CheckpointConfig};

 #[derive(Debug, Clone, Copy)]
@@ -36,13 +33,69 @@ pub struct PointInTime {
    pub lsn: Lsn,
 }

+pub fn init_pageserver(
+    conf: &'static PageServerConf,
+    create_tenant: Option<ZTenantId>,
+    initial_timeline_id: Option<ZTimelineId>,
+) -> anyhow::Result<()> {
+    // Initialize logger
+    // use true as daemonize parameter because otherwise we pollute zenith cli output with a few pages long output of info messages
+    let _log_file = logging::init(LOG_FILE_NAME, true)?;
+
+    crashsafe_dir::create_dir_all(conf.tenants_path())?;
+
+    if let Some(tenant_id) = create_tenant {
+        println!("initializing tenantid {}", tenant_id);
+        let repo = create_repo(conf, TenantConfOpt::default(), tenant_id, CreateRepo::Dummy)
+            .context("failed to create repo")?;
+        let new_timeline_id = initial_timeline_id.unwrap_or_else(ZTimelineId::generate);
+        bootstrap_timeline(conf, tenant_id, new_timeline_id, repo.as_ref())
+            .context("failed to create initial timeline")?;
+        println!("initial timeline {} created", new_timeline_id)
+    } else if initial_timeline_id.is_some() {
+        println!("Ignoring initial timeline parameter, due to no tenant id to create given");
+    }
+
+    println!("pageserver init succeeded");
+    Ok(())
+}
+
+pub enum CreateRepo {
+    Real {
+        wal_redo_manager: Arc<dyn WalRedoManager + Send + Sync>,
+        remote_index: RemoteIndex,
+    },
+    Dummy,
+}
+
 pub fn create_repo(
    conf: &'static PageServerConf,
    tenant_conf: TenantConfOpt,
    tenant_id: ZTenantId,
-    wal_redo_manager: Arc<dyn WalRedoManager + Send + Sync>,
-    remote_index: RemoteIndex,
-) -> Result<Arc<LayeredRepository>> {
+    create_repo: CreateRepo,
+) -> Result<Arc<RepositoryImpl>> {
+    let (wal_redo_manager, remote_index) = match create_repo {
+        CreateRepo::Real {
+            wal_redo_manager,
+            remote_index,
+        } => (wal_redo_manager, remote_index),
+        CreateRepo::Dummy => {
+            // We don't use the real WAL redo manager, because we don't want to spawn the WAL redo
+            // process during repository initialization.
+            //
+            // FIXME: That caused trouble, because the WAL redo manager spawned a thread that launched
+            // initdb in the background, and it kept running even after the "zenith init" had exited.
+            // In tests, we started the  page server immediately after that, so that initdb was still
+            // running in the background, and we failed to run initdb again in the same directory. This
+            // has been solved for the rapid init+start case now, but the general race condition remains
+            // if you restart the server quickly. The WAL redo manager doesn't use a separate thread
+            // anymore, but I think that could still happen.
+            let wal_redo_manager = Arc::new(crate::walredo::DummyRedoManager {});
+
+            (wal_redo_manager as _, RemoteIndex::default())
+        }
+    };
+
    let repo_dir = conf.tenant_path(&tenant_id);
    ensure!(
        !repo_dir.exists(),
@@ -69,6 +122,16 @@ pub fn create_repo(
    )))
 }

+// Returns checkpoint LSN from controlfile
+fn get_lsn_from_controlfile(path: &Path) -> Result<Lsn> {
+    // Read control file to extract the LSN
+    let controlfile_path = path.join("global").join("pg_control");
+    let controlfile = ControlFileData::decode(&fs::read(controlfile_path)?)?;
+    let lsn = controlfile.checkPoint;
+
+    Ok(Lsn(lsn))
+}
+
 // Create the cluster temporarily in 'initdbpath' directory inside the repository
 // to get bootstrap data for timeline initialization.
 //
@@ -118,7 +181,7 @@ fn bootstrap_timeline<R: Repository>(
    run_initdb(conf, &initdb_path)?;
    let pgdata_path = initdb_path;

-    let lsn = import_datadir::get_lsn_from_controlfile(&pgdata_path)?.align();
+    let lsn = get_lsn_from_controlfile(&pgdata_path)?.align();

    // Import the contents of the data directory at the initial checkpoint
    // LSN, and any WAL after that.
@@ -160,7 +223,7 @@ pub(crate) fn create_timeline(
    new_timeline_id: Option<ZTimelineId>,
    ancestor_timeline_id: Option<ZTimelineId>,
    mut ancestor_start_lsn: Option<Lsn>,
-) -> Result<Option<(ZTimelineId, Arc<LayeredTimeline>)>> {
+) -> Result<Option<(ZTimelineId, Arc<TimelineImpl>)>> {
    let new_timeline_id = new_timeline_id.unwrap_or_else(ZTimelineId::generate);
    let repo = tenant_mgr::get_repository_for_tenant(tenant_id)?;

--- a/pageserver/src/walingest.rs
+++ b/pageserver/src/walingest.rs
@@ -22,8 +22,8 @@
 //! bespoken Rust code.

 use anyhow::Context;
-use postgres_ffi::v14::nonrelfile_utils::clogpage_precedes;
-use postgres_ffi::v14::nonrelfile_utils::slru_may_delete_clogsegment;
+use postgres_ffi::nonrelfile_utils::clogpage_precedes;
+use postgres_ffi::nonrelfile_utils::slru_may_delete_clogsegment;
 use postgres_ffi::{page_is_new, page_set_lsn};

 use anyhow::Result;
@@ -33,12 +33,10 @@ use tracing::*;
 use crate::pgdatadir_mapping::*;
 use crate::reltag::{RelTag, SlruKind};
 use crate::walrecord::*;
-use postgres_ffi::v14::nonrelfile_utils::mx_offset_to_member_segment;
-use postgres_ffi::v14::pg_constants;
-use postgres_ffi::v14::xlog_utils::*;
-use postgres_ffi::v14::CheckPoint;
+use postgres_ffi::nonrelfile_utils::mx_offset_to_member_segment;
+use postgres_ffi::xlog_utils::*;
 use postgres_ffi::TransactionId;
-use postgres_ffi::BLCKSZ;
+use postgres_ffi::{pg_constants, CheckPoint};
 use utils::lsn::Lsn;

 static ZERO_PAGE: Bytes = Bytes::from_static(&[0u8; 8192]);
@@ -295,7 +293,7 @@ impl<'a, T: DatadirTimeline> WalIngest<'a, T> {
            // Extract page image from FPI record
            let img_len = blk.bimg_len as usize;
            let img_offs = blk.bimg_offset as usize;
-            let mut image = BytesMut::with_capacity(BLCKSZ as usize);
+            let mut image = BytesMut::with_capacity(pg_constants::BLCKSZ as usize);
            image.extend_from_slice(&decoded.record[img_offs..img_offs + img_len]);

            if blk.hole_length != 0 {
@@ -311,7 +309,7 @@ impl<'a, T: DatadirTimeline> WalIngest<'a, T> {
            if !page_is_new(&image) {
                page_set_lsn(&mut image, lsn)
            }
-            assert_eq!(image.len(), BLCKSZ as usize);
+            assert_eq!(image.len(), pg_constants::BLCKSZ as usize);
            self.put_rel_page_image(modification, rel, blk.blkno, image.freeze())?;
        } else {
            let rec = ZenithWalRecord::Postgres {
@@ -1035,8 +1033,7 @@ mod tests {
    use crate::pgdatadir_mapping::create_test_timeline;
    use crate::repository::repo_harness::*;
    use crate::repository::Timeline;
-    use postgres_ffi::v14::xlog_utils::SIZEOF_CHECKPOINT;
-    use postgres_ffi::RELSEG_SIZE;
+    use postgres_ffi::pg_constants;

    /// Arbitrary relation tag, for testing.
    const TESTREL_A: RelTag = RelTag {
@@ -1325,7 +1322,7 @@ mod tests {
        let mut walingest = init_walingest_test(&*tline)?;

        let mut lsn = 0x10;
-        for blknum in 0..RELSEG_SIZE + 1 {
+        for blknum in 0..pg_constants::RELSEG_SIZE + 1 {
            lsn += 0x10;
            let mut m = tline.begin_modification(Lsn(lsn));
            let img = TEST_IMG(&format!("foo blk {} at {}", blknum, Lsn(lsn)));
@@ -1335,22 +1332,31 @@ mod tests {

        assert_current_logical_size(&*tline, Lsn(lsn));

-        assert_eq!(tline.get_rel_size(TESTREL_A, Lsn(lsn))?, RELSEG_SIZE + 1);
+        assert_eq!(
+            tline.get_rel_size(TESTREL_A, Lsn(lsn))?,
+            pg_constants::RELSEG_SIZE + 1
+        );

        // Truncate one block
        lsn += 0x10;
        let mut m = tline.begin_modification(Lsn(lsn));
-        walingest.put_rel_truncation(&mut m, TESTREL_A, RELSEG_SIZE)?;
+        walingest.put_rel_truncation(&mut m, TESTREL_A, pg_constants::RELSEG_SIZE)?;
        m.commit()?;
-        assert_eq!(tline.get_rel_size(TESTREL_A, Lsn(lsn))?, RELSEG_SIZE);
+        assert_eq!(
+            tline.get_rel_size(TESTREL_A, Lsn(lsn))?,
+            pg_constants::RELSEG_SIZE
+        );
        assert_current_logical_size(&*tline, Lsn(lsn));

        // Truncate another block
        lsn += 0x10;
        let mut m = tline.begin_modification(Lsn(lsn));
-        walingest.put_rel_truncation(&mut m, TESTREL_A, RELSEG_SIZE - 1)?;
+        walingest.put_rel_truncation(&mut m, TESTREL_A, pg_constants::RELSEG_SIZE - 1)?;
        m.commit()?;
-        assert_eq!(tline.get_rel_size(TESTREL_A, Lsn(lsn))?, RELSEG_SIZE - 1);
+        assert_eq!(
+            tline.get_rel_size(TESTREL_A, Lsn(lsn))?,
+            pg_constants::RELSEG_SIZE - 1
+        );
        assert_current_logical_size(&*tline, Lsn(lsn));

        // Truncate to 1500, and then truncate all the way down to 0, one block at a time
--- a/pageserver/src/walreceiver/connection_manager.rs
+++ b/pageserver/src/walreceiver/connection_manager.rs
@@ -16,9 +16,8 @@ use std::{
    time::Duration,
 };

-use crate::{layered_repository::LayeredTimeline, repository::Timeline};
 use anyhow::Context;
-use chrono::{NaiveDateTime, Utc};
+use chrono::{DateTime, Local, NaiveDateTime, Utc};
 use etcd_broker::{
    subscription_key::SubscriptionKey, subscription_value::SkTimelineInfo, BrokerSubscription,
    BrokerUpdate, Client,
@@ -26,20 +25,26 @@ use etcd_broker::{
 use tokio::select;
 use tracing::*;

-use crate::{exponential_backoff, DEFAULT_BASE_BACKOFF_SECONDS, DEFAULT_MAX_BACKOFF_SECONDS};
+use crate::{
+    exponential_backoff,
+    repository::{Repository, Timeline},
+    DEFAULT_BASE_BACKOFF_SECONDS, DEFAULT_MAX_BACKOFF_SECONDS,
+};
+use crate::{RepositoryImpl, TimelineImpl};
 use utils::{
    lsn::Lsn,
+    pq_proto::ReplicationFeedback,
    zid::{NodeId, ZTenantTimelineId},
 };

-use super::{walreceiver_connection::WalConnectionStatus, TaskEvent, TaskHandle};
+use super::{TaskEvent, TaskHandle};

 /// Spawns the loop to take care of the timeline's WAL streaming connection.
 pub(super) fn spawn_connection_manager_task(
    id: ZTenantTimelineId,
    broker_loop_prefix: String,
    mut client: Client,
-    local_timeline: Arc<LayeredTimeline>,
+    local_timeline: Arc<TimelineImpl>,
    wal_connect_timeout: Duration,
    lagging_wal_timeout: Duration,
    max_lsn_wal_lag: NonZeroU64,
@@ -96,8 +101,6 @@ async fn connection_manager_loop_step(
    info!("Subscribed for etcd timeline changes, waiting for new etcd data");

    loop {
-        let time_until_next_retry = walreceiver_state.time_until_next_retry();
-
        select! {
            broker_connection_result = &mut broker_subscription.watcher_handle => {
                cleanup_broker_connection(broker_connection_result, walreceiver_state);
@@ -111,24 +114,23 @@ async fn connection_manager_loop_step(
                }
            } => {
                let wal_connection = walreceiver_state.wal_connection.as_mut().expect("Should have a connection, as checked by the corresponding select! guard");
-                match wal_connection_update {
-                    TaskEvent::Started => {},
-                    TaskEvent::NewEvent(status) => {
-                        if status.has_processed_wal {
-                            // We have advanced last_record_lsn by processing the WAL received
-                            // from this safekeeper. This is good enough to clean unsuccessful
-                            // retries history and allow reconnecting to this safekeeper without
-                            // sleeping for a long time.
-                            walreceiver_state.wal_connection_retries.remove(&wal_connection.sk_id);
-                        }
-                        wal_connection.status = status;
+                match &wal_connection_update {
+                    TaskEvent::Started => {
+                        wal_connection.latest_connection_update = Utc::now().naive_utc();
+                        *walreceiver_state.wal_connection_attempts.entry(wal_connection.sk_id).or_insert(0) += 1;
+                    },
+                    TaskEvent::NewEvent(replication_feedback) => {
+                        wal_connection.latest_connection_update = DateTime::<Local>::from(replication_feedback.ps_replytime).naive_utc();
+                        // reset connection attempts here only, the only place where both nodes
+                        // explicitly confirmn with replication feedback that they are connected to each other
+                        walreceiver_state.wal_connection_attempts.remove(&wal_connection.sk_id);
                    },
                    TaskEvent::End(end_result) => {
                        match end_result {
                            Ok(()) => debug!("WAL receiving task finished"),
                            Err(e) => warn!("WAL receiving task failed: {e}"),
                        };
-                        walreceiver_state.drop_old_connection(false).await;
+                        walreceiver_state.wal_connection = None;
                    },
                }
            },
@@ -152,8 +154,6 @@ async fn connection_manager_loop_step(
                    }
                }
            },
-
-            _ = async { tokio::time::sleep(time_until_next_retry.unwrap()).await }, if time_until_next_retry.is_some() => {}
        }

        // Fetch more etcd timeline updates, but limit ourselves since they may arrive quickly.
@@ -234,15 +234,11 @@ async fn subscribe_for_timeline_updates(
    }
 }

-const WALCONNECTION_RETRY_MIN_BACKOFF_SECONDS: f64 = 0.1;
-const WALCONNECTION_RETRY_MAX_BACKOFF_SECONDS: f64 = 15.0;
-const WALCONNECTION_RETRY_BACKOFF_MULTIPLIER: f64 = 1.5;
-
 /// All data that's needed to run endless broker loop and keep the WAL streaming connection alive, if possible.
 struct WalreceiverState {
    id: ZTenantTimelineId,
    /// Use pageserver data about the timeline to filter out some of the safekeepers.
-    local_timeline: Arc<LayeredTimeline>,
+    local_timeline: Arc<TimelineImpl>,
    /// The timeout on the connection to safekeeper for WAL streaming.
    wal_connect_timeout: Duration,
    /// The timeout to use to determine when the current connection is "stale" and reconnect to the other one.
@@ -251,8 +247,7 @@ struct WalreceiverState {
    max_lsn_wal_lag: NonZeroU64,
    /// Current connection to safekeeper for WAL streaming.
    wal_connection: Option<WalConnection>,
-    /// Info about retries and unsuccessful attempts to connect to safekeepers.
-    wal_connection_retries: HashMap<NodeId, RetryInfo>,
+    wal_connection_attempts: HashMap<NodeId, u32>,
    /// Data about all timelines, available for connection, fetched from etcd, grouped by their corresponding safekeeper node id.
    wal_stream_candidates: HashMap<NodeId, EtcdSkTimeline>,
 }
@@ -260,31 +255,12 @@ struct WalreceiverState {
 /// Current connection data.
 #[derive(Debug)]
 struct WalConnection {
-    /// Time when the connection was initiated.
-    started_at: NaiveDateTime,
    /// Current safekeeper pageserver is connected to for WAL streaming.
    sk_id: NodeId,
-    /// Status of the connection.
-    status: WalConnectionStatus,
+    /// Connection task start time or the timestamp of a latest connection message received.
+    latest_connection_update: NaiveDateTime,
    /// WAL streaming task handle.
-    connection_task: TaskHandle<WalConnectionStatus>,
-    /// Have we discovered that other safekeeper has more recent WAL than we do?
-    discovered_new_wal: Option<NewCommittedWAL>,
-}
-
-/// Notion of a new committed WAL, which exists on other safekeeper.
-#[derive(Debug, Clone, Copy)]
-struct NewCommittedWAL {
-    /// LSN of the new committed WAL.
-    lsn: Lsn,
-    /// When we discovered that the new committed WAL exists on other safekeeper.
-    discovered_at: NaiveDateTime,
-}
-
-#[derive(Debug)]
-struct RetryInfo {
-    next_retry_at: Option<NaiveDateTime>,
-    retry_duration_seconds: f64,
+    connection_task: TaskHandle<ReplicationFeedback>,
 }

 /// Data about the timeline to connect to, received from etcd.
@@ -300,7 +276,7 @@ struct EtcdSkTimeline {
 impl WalreceiverState {
    fn new(
        id: ZTenantTimelineId,
-        local_timeline: Arc<LayeredTimeline>,
+        local_timeline: Arc<<RepositoryImpl as Repository>::Timeline>,
        wal_connect_timeout: Duration,
        lagging_wal_timeout: Duration,
        max_lsn_wal_lag: NonZeroU64,
@@ -313,18 +289,31 @@ impl WalreceiverState {
            max_lsn_wal_lag,
            wal_connection: None,
            wal_stream_candidates: HashMap::new(),
-            wal_connection_retries: HashMap::new(),
+            wal_connection_attempts: HashMap::new(),
        }
    }

    /// Shuts down the current connection (if any) and immediately starts another one with the given connection string.
    async fn change_connection(&mut self, new_sk_id: NodeId, new_wal_source_connstr: String) {
-        self.drop_old_connection(true).await;
+        if let Some(old_connection) = self.wal_connection.take() {
+            old_connection.connection_task.shutdown().await
+        }

        let id = self.id;
        let connect_timeout = self.wal_connect_timeout;
+        let connection_attempt = self
+            .wal_connection_attempts
+            .get(&new_sk_id)
+            .copied()
+            .unwrap_or(0);
        let connection_handle = TaskHandle::spawn(move |events_sender, cancellation| {
            async move {
+                exponential_backoff(
+                    connection_attempt,
+                    DEFAULT_BASE_BACKOFF_SECONDS,
+                    DEFAULT_MAX_BACKOFF_SECONDS,
+                )
+                .await;
                super::walreceiver_connection::handle_walreceiver_connection(
                    id,
                    &new_wal_source_connstr,
@@ -338,88 +327,13 @@ impl WalreceiverState {
            .instrument(info_span!("walreceiver_connection", id = %id))
        });

-        let now = Utc::now().naive_utc();
        self.wal_connection = Some(WalConnection {
-            started_at: now,
            sk_id: new_sk_id,
-            status: WalConnectionStatus {
-                is_connected: false,
-                has_processed_wal: false,
-                latest_connection_update: now,
-                latest_wal_update: now,
-                streaming_lsn: None,
-                commit_lsn: None,
-            },
+            latest_connection_update: Utc::now().naive_utc(),
            connection_task: connection_handle,
-            discovered_new_wal: None,
        });
    }

-    /// Drops the current connection (if any) and updates retry timeout for the next
-    /// connection attempt to the same safekeeper.
-    async fn drop_old_connection(&mut self, needs_shutdown: bool) {
-        let wal_connection = match self.wal_connection.take() {
-            Some(wal_connection) => wal_connection,
-            None => return,
-        };
-
-        if needs_shutdown {
-            wal_connection.connection_task.shutdown().await;
-        }
-
-        let retry = self
-            .wal_connection_retries
-            .entry(wal_connection.sk_id)
-            .or_insert(RetryInfo {
-                next_retry_at: None,
-                retry_duration_seconds: WALCONNECTION_RETRY_MIN_BACKOFF_SECONDS,
-            });
-
-        let now = Utc::now().naive_utc();
-
-        // Schedule the next retry attempt. We want to have exponential backoff for connection attempts,
-        // and we add backoff to the time when we started the connection attempt. If the connection
-        // was active for a long time, then next_retry_at will be in the past.
-        retry.next_retry_at =
-            wal_connection
-                .started_at
-                .checked_add_signed(chrono::Duration::milliseconds(
-                    (retry.retry_duration_seconds * 1000.0) as i64,
-                ));
-
-        if let Some(next) = &retry.next_retry_at {
-            if next > &now {
-                info!(
-                    "Next connection retry to {:?} is at {}",
-                    wal_connection.sk_id, next
-                );
-            }
-        }
-
-        let next_retry_duration =
-            retry.retry_duration_seconds * WALCONNECTION_RETRY_BACKOFF_MULTIPLIER;
-        // Clamp the next retry duration to the maximum allowed.
-        let next_retry_duration = next_retry_duration.min(WALCONNECTION_RETRY_MAX_BACKOFF_SECONDS);
-        // Clamp the next retry duration to the minimum allowed.
-        let next_retry_duration = next_retry_duration.max(WALCONNECTION_RETRY_MIN_BACKOFF_SECONDS);
-
-        retry.retry_duration_seconds = next_retry_duration;
-    }
-
-    /// Returns time needed to wait to have a new candidate for WAL streaming.
-    fn time_until_next_retry(&self) -> Option<Duration> {
-        let now = Utc::now().naive_utc();
-
-        let next_retry_at = self
-            .wal_connection_retries
-            .values()
-            .filter_map(|retry| retry.next_retry_at)
-            .filter(|next_retry_at| next_retry_at > &now)
-            .min();
-
-        next_retry_at.and_then(|next_retry_at| (next_retry_at - now).to_std().ok())
-    }
-
    /// Adds another etcd timeline into the state, if its more recent than the one already added there for the same key.
    fn register_timeline_update(&mut self, timeline_update: BrokerUpdate<SkTimelineInfo>) {
        match self
@@ -447,16 +361,14 @@ impl WalreceiverState {
    /// Cleans up stale etcd records and checks the rest for the new connection candidate.
    /// Returns a new candidate, if the current state is absent or somewhat lagging, `None` otherwise.
    /// The current rules for approving new candidates:
-    /// * pick a candidate different from the connected safekeeper with biggest `commit_lsn` and lowest failed connection attemps
+    /// * pick from the input data from etcd for currently connected safekeeper (if any)
+    /// * out of the rest input entries, pick one with biggest `commit_lsn` that's after than pageserver's latest Lsn for the timeline
    /// * if there's no such entry, no new candidate found, abort
-    /// * otherwise check if the candidate is much better than the current one
-    ///
-    /// To understand exact rules for determining if the candidate is better than the current one, refer to this function's implementation.
-    /// General rules are following:
-    /// * if connected safekeeper is not present, pick the candidate
-    /// * if we haven't received any updates for some time, pick the candidate
-    /// * if the candidate commit_lsn is much higher than the current one, pick the candidate
-    /// * if connected safekeeper stopped sending us new WAL which is available on other safekeeper, pick the candidate
+    /// * check the current connection time data for staleness, reconnect if stale
+    /// * otherwise, check if etcd updates contain currently connected safekeeper
+    ///     * if not, that means no WAL updates happened after certain time (either none since the connection time or none since the last event after the connection)
+    ///       Reconnect if the time exceeds the threshold.
+    ///     * if there's one, compare its Lsn with the other candidate's, reconnect if candidate's over threshold
    ///
    /// This way we ensure to keep up with the most up-to-date safekeeper and don't try to jump from one safekeeper to another too frequently.
    /// Both thresholds are configured per tenant.
@@ -472,128 +384,53 @@ impl WalreceiverState {

                let now = Utc::now().naive_utc();
                if let Ok(latest_interaciton) =
-                    (now - existing_wal_connection.status.latest_connection_update).to_std()
+                    (now - existing_wal_connection.latest_connection_update).to_std()
                {
-                    // Drop connection if we haven't received keepalive message for a while.
-                    if latest_interaciton > self.wal_connect_timeout {
+                    if latest_interaciton > self.lagging_wal_timeout {
                        return Some(NewWalConnectionCandidate {
                            safekeeper_id: new_sk_id,
                            wal_source_connstr: new_wal_source_connstr,
-                            reason: ReconnectReason::NoKeepAlives {
-                                last_keep_alive: Some(
-                                    existing_wal_connection.status.latest_connection_update,
+                            reason: ReconnectReason::NoWalTimeout {
+                                last_wal_interaction: Some(
+                                    existing_wal_connection.latest_connection_update,
                                ),
                                check_time: now,
-                                threshold: self.wal_connect_timeout,
+                                threshold: self.lagging_wal_timeout,
                            },
                        });
                    }
                }

-                if !existing_wal_connection.status.is_connected {
-                    // We haven't connected yet and we shouldn't switch until connection timeout (condition above).
-                    return None;
-                }
-
-                if let Some(current_commit_lsn) = existing_wal_connection.status.commit_lsn {
-                    let new_commit_lsn = new_safekeeper_etcd_data.commit_lsn.unwrap_or(Lsn(0));
-                    // Check if the new candidate has much more WAL than the current one.
-                    match new_commit_lsn.0.checked_sub(current_commit_lsn.0) {
-                        Some(new_sk_lsn_advantage) => {
-                            if new_sk_lsn_advantage >= self.max_lsn_wal_lag.get() {
-                                return Some(NewWalConnectionCandidate {
-                                    safekeeper_id: new_sk_id,
-                                    wal_source_connstr: new_wal_source_connstr,
-                                    reason: ReconnectReason::LaggingWal {
-                                        current_commit_lsn,
-                                        new_commit_lsn,
-                                        threshold: self.max_lsn_wal_lag,
-                                    },
-                                });
+                match self.wal_stream_candidates.get(&connected_sk_node) {
+                    Some(current_connection_etcd_data) => {
+                        let new_lsn = new_safekeeper_etcd_data.commit_lsn.unwrap_or(Lsn(0));
+                        let current_lsn = current_connection_etcd_data
+                            .timeline
+                            .commit_lsn
+                            .unwrap_or(Lsn(0));
+                        match new_lsn.0.checked_sub(current_lsn.0)
+                            {
+                                Some(new_sk_lsn_advantage) => {
+                                    if new_sk_lsn_advantage >= self.max_lsn_wal_lag.get() {
+                                        return Some(
+                                            NewWalConnectionCandidate {
+                                                safekeeper_id: new_sk_id,
+                                                wal_source_connstr: new_wal_source_connstr,
+                                                reason: ReconnectReason::LaggingWal { current_lsn, new_lsn, threshold: self.max_lsn_wal_lag },
+                                            });
+                                    }
+                                }
+                                None => debug!("Best SK candidate has its commit Lsn behind the current timeline's latest consistent Lsn"),
                            }
-                        }
-                        None => debug!(
-                            "Best SK candidate has its commit_lsn behind connected SK's commit_lsn"
-                        ),
                    }
-                }
-
-                let current_lsn = match existing_wal_connection.status.streaming_lsn {
-                    Some(lsn) => lsn,
-                    None => self.local_timeline.get_last_record_lsn(),
-                };
-                let current_commit_lsn = existing_wal_connection
-                    .status
-                    .commit_lsn
-                    .unwrap_or(current_lsn);
-                let candidate_commit_lsn = new_safekeeper_etcd_data.commit_lsn.unwrap_or(Lsn(0));
-
-                // Keep discovered_new_wal only if connected safekeeper has not caught up yet.
-                let mut discovered_new_wal = existing_wal_connection
-                    .discovered_new_wal
-                    .filter(|new_wal| new_wal.lsn > current_commit_lsn);
-
-                if discovered_new_wal.is_none() {
-                    // Check if the new candidate has more WAL than the current one.
-                    // If the new candidate has more WAL than the current one, we consider switching to the new candidate.
-                    discovered_new_wal = if candidate_commit_lsn > current_commit_lsn {
-                        trace!(
-                            "New candidate has commit_lsn {}, higher than current_commit_lsn {}",
-                            candidate_commit_lsn,
-                            current_commit_lsn
-                        );
-                        Some(NewCommittedWAL {
-                            lsn: candidate_commit_lsn,
-                            discovered_at: Utc::now().naive_utc(),
+                    None => {
+                        return Some(NewWalConnectionCandidate {
+                            safekeeper_id: new_sk_id,
+                            wal_source_connstr: new_wal_source_connstr,
+                            reason: ReconnectReason::NoEtcdDataForExistingConnection,
                        })
-                    } else {
-                        None
-                    };
-                }
-
-                let waiting_for_new_lsn_since = if current_lsn < current_commit_lsn {
-                    // Connected safekeeper has more WAL, but we haven't received updates for some time.
-                    trace!(
-                        "Connected safekeeper has more WAL, but we haven't received updates for {:?}. current_lsn: {}, current_commit_lsn: {}",
-                        (now - existing_wal_connection.status.latest_wal_update).to_std(),
-                        current_lsn,
-                        current_commit_lsn
-                    );
-                    Some(existing_wal_connection.status.latest_wal_update)
-                } else {
-                    discovered_new_wal.as_ref().map(|new_wal| {
-                        // We know that new WAL is available on other safekeeper, but connected safekeeper don't have it.
-                        new_wal
-                            .discovered_at
-                            .max(existing_wal_connection.status.latest_wal_update)
-                    })
-                };
-
-                // If we haven't received any WAL updates for a while and candidate has more WAL, switch to it.
-                if let Some(waiting_for_new_lsn_since) = waiting_for_new_lsn_since {
-                    if let Ok(waiting_for_new_wal) = (now - waiting_for_new_lsn_since).to_std() {
-                        if candidate_commit_lsn > current_commit_lsn
-                            && waiting_for_new_wal > self.lagging_wal_timeout
-                        {
-                            return Some(NewWalConnectionCandidate {
-                                safekeeper_id: new_sk_id,
-                                wal_source_connstr: new_wal_source_connstr,
-                                reason: ReconnectReason::NoWalTimeout {
-                                    current_lsn,
-                                    current_commit_lsn,
-                                    candidate_commit_lsn,
-                                    last_wal_interaction: Some(
-                                        existing_wal_connection.status.latest_wal_update,
-                                    ),
-                                    check_time: now,
-                                    threshold: self.lagging_wal_timeout,
-                                },
-                            });
-                        }
                    }
                }
-
-                self.wal_connection.as_mut().unwrap().discovered_new_wal = discovered_new_wal;
            }
            None => {
                let (new_sk_id, _, new_wal_source_connstr) =
@@ -613,36 +450,52 @@ impl WalreceiverState {
    /// Optionally, omits the given node, to support gracefully switching from a healthy safekeeper to another.
    ///
    /// The candidate that is chosen:
-    /// * has no pending retry cooldown
-    /// * has greatest commit_lsn among the ones that are left
+    /// * has fewest connection attempts from pageserver to safekeeper node (reset every time the WAL replication feedback is sent)
+    /// * has greatest data Lsn among the ones that are left
+    ///
+    /// NOTE:
+    /// We evict timeline data received from etcd based on time passed since it was registered, along with its connection attempts values, but
+    /// otherwise to reset the connection attempts, a successful connection to that node is needed.
+    /// That won't happen now, before all nodes with less connection attempts are connected to first, which might leave the sk node with more advanced state to be ignored.
    fn select_connection_candidate(
        &self,
        node_to_omit: Option<NodeId>,
    ) -> Option<(NodeId, &SkTimelineInfo, String)> {
-        self.applicable_connection_candidates()
+        let all_candidates = self
+            .applicable_connection_candidates()
            .filter(|&(sk_id, _, _)| Some(sk_id) != node_to_omit)
+            .collect::<Vec<_>>();
+
+        let smallest_attempts_allowed = all_candidates
+            .iter()
+            .map(|(sk_id, _, _)| {
+                self.wal_connection_attempts
+                    .get(sk_id)
+                    .copied()
+                    .unwrap_or(0)
+            })
+            .min()?;
+
+        all_candidates
+            .into_iter()
+            .filter(|(sk_id, _, _)| {
+                smallest_attempts_allowed
+                    >= self
+                        .wal_connection_attempts
+                        .get(sk_id)
+                        .copied()
+                        .unwrap_or(0)
+            })
            .max_by_key(|(_, info, _)| info.commit_lsn)
    }

-    /// Returns a list of safekeepers that have valid info and ready for connection.
-    /// Some safekeepers are filtered by the retry cooldown.
    fn applicable_connection_candidates(
        &self,
    ) -> impl Iterator<Item = (NodeId, &SkTimelineInfo, String)> {
-        let now = Utc::now().naive_utc();
-
        self.wal_stream_candidates
            .iter()
-            .filter(|(_, info)| info.timeline.commit_lsn.is_some())
-            .filter(move |(sk_id, _)| {
-                let next_retry_at = self
-                    .wal_connection_retries
-                    .get(sk_id)
-                    .and_then(|retry_info| {
-                        retry_info.next_retry_at
-                    });
-
-                next_retry_at.is_none() || next_retry_at.unwrap() <= now
+            .filter(|(_, etcd_info)| {
+                etcd_info.timeline.commit_lsn > Some(self.local_timeline.get_last_record_lsn())
            })
            .filter_map(|(sk_id, etcd_info)| {
                let info = &etcd_info.timeline;
@@ -659,7 +512,6 @@ impl WalreceiverState {
            })
    }

-    /// Remove candidates which haven't sent etcd updates for a while.
    fn cleanup_old_candidates(&mut self) {
        let mut node_ids_to_remove = Vec::with_capacity(self.wal_stream_candidates.len());

@@ -678,7 +530,7 @@ impl WalreceiverState {
        });

        for node_id in node_ids_to_remove {
-            self.wal_connection_retries.remove(&node_id);
+            self.wal_connection_attempts.remove(&node_id);
        }
    }
 }
@@ -694,24 +546,17 @@ struct NewWalConnectionCandidate {
 #[derive(Debug, PartialEq, Eq)]
 enum ReconnectReason {
    NoExistingConnection,
+    NoEtcdDataForExistingConnection,
    LaggingWal {
-        current_commit_lsn: Lsn,
-        new_commit_lsn: Lsn,
+        current_lsn: Lsn,
+        new_lsn: Lsn,
        threshold: NonZeroU64,
    },
    NoWalTimeout {
-        current_lsn: Lsn,
-        current_commit_lsn: Lsn,
-        candidate_commit_lsn: Lsn,
        last_wal_interaction: Option<NaiveDateTime>,
        check_time: NaiveDateTime,
        threshold: Duration,
    },
-    NoKeepAlives {
-        last_keep_alive: Option<NaiveDateTime>,
-        check_time: NaiveDateTime,
-        threshold: Duration,
-    },
 }

 fn wal_stream_connection_string(
@@ -735,6 +580,8 @@ fn wal_stream_connection_string(

 #[cfg(test)]
 mod tests {
+    use std::time::SystemTime;
+
    use crate::repository::{
        repo_harness::{RepoHarness, TIMELINE_ID},
        Repository,
@@ -811,7 +658,7 @@ mod tests {
                        backup_lsn: None,
                        remote_consistent_lsn: None,
                        peer_horizon_lsn: None,
-                        safekeeper_connstr: None,
+                        safekeeper_connstr: Some(DUMMY_SAFEKEEPER_CONNSTR.to_string()),
                    },
                    etcd_version: 0,
                    latest_update: delay_over_threshold,
@@ -837,27 +684,22 @@ mod tests {
        let connected_sk_id = NodeId(0);
        let current_lsn = 100_000;

-        let connection_status = WalConnectionStatus {
-            is_connected: true,
-            has_processed_wal: true,
-            latest_connection_update: now,
-            latest_wal_update: now,
-            commit_lsn: Some(Lsn(current_lsn)),
-            streaming_lsn: Some(Lsn(current_lsn)),
-        };
-
        state.max_lsn_wal_lag = NonZeroU64::new(100).unwrap();
        state.wal_connection = Some(WalConnection {
-            started_at: now,
            sk_id: connected_sk_id,
-            status: connection_status.clone(),
+            latest_connection_update: now,
            connection_task: TaskHandle::spawn(move |sender, _| async move {
                sender
-                    .send(TaskEvent::NewEvent(connection_status.clone()))
+                    .send(TaskEvent::NewEvent(ReplicationFeedback {
+                        current_timeline_size: 1,
+                        ps_writelsn: 1,
+                        ps_applylsn: current_lsn,
+                        ps_flushlsn: 1,
+                        ps_replytime: SystemTime::now(),
+                    }))
                    .ok();
                Ok(())
            }),
-            discovered_new_wal: None,
        });
        state.wal_stream_candidates = HashMap::from([
            (
@@ -1068,13 +910,7 @@ mod tests {
                },
            ),
        ]);
-        state.wal_connection_retries = HashMap::from([(
-            NodeId(0),
-            RetryInfo {
-                next_retry_at: now.checked_add_signed(chrono::Duration::hours(1)),
-                retry_duration_seconds: WALCONNECTION_RETRY_MAX_BACKOFF_SECONDS,
-            },
-        )]);
+        state.wal_connection_attempts = HashMap::from([(NodeId(0), 1), (NodeId(1), 0)]);

        let candidate_with_less_errors = state
            .next_connection_candidate()
@@ -1082,12 +918,71 @@ mod tests {
        assert_eq!(
            candidate_with_less_errors.safekeeper_id,
            NodeId(1),
-            "Should select the node with no pending retry cooldown"
+            "Should select the node with less connection errors"
        );

        Ok(())
    }

+    #[tokio::test]
+    async fn connection_no_etcd_data_candidate() -> anyhow::Result<()> {
+        let harness = RepoHarness::create("connection_no_etcd_data_candidate")?;
+        let mut state = dummy_state(&harness);
+
+        let now = Utc::now().naive_utc();
+        let current_lsn = Lsn(100_000).align();
+        let connected_sk_id = NodeId(0);
+        let other_sk_id = NodeId(connected_sk_id.0 + 1);
+
+        state.wal_connection = Some(WalConnection {
+            sk_id: connected_sk_id,
+            latest_connection_update: now,
+            connection_task: TaskHandle::spawn(move |sender, _| async move {
+                sender
+                    .send(TaskEvent::NewEvent(ReplicationFeedback {
+                        current_timeline_size: 1,
+                        ps_writelsn: current_lsn.0,
+                        ps_applylsn: 1,
+                        ps_flushlsn: 1,
+                        ps_replytime: SystemTime::now(),
+                    }))
+                    .ok();
+                Ok(())
+            }),
+        });
+        state.wal_stream_candidates = HashMap::from([(
+            other_sk_id,
+            EtcdSkTimeline {
+                timeline: SkTimelineInfo {
+                    last_log_term: None,
+                    flush_lsn: None,
+                    commit_lsn: Some(Lsn(1 + state.max_lsn_wal_lag.get())),
+                    backup_lsn: None,
+                    remote_consistent_lsn: None,
+                    peer_horizon_lsn: None,
+                    safekeeper_connstr: Some(DUMMY_SAFEKEEPER_CONNSTR.to_string()),
+                },
+                etcd_version: 0,
+                latest_update: now,
+            },
+        )]);
+
+        let only_candidate = state
+            .next_connection_candidate()
+            .expect("Expected one candidate selected out of the only data option, but got none");
+        assert_eq!(only_candidate.safekeeper_id, other_sk_id);
+        assert_eq!(
+            only_candidate.reason,
+            ReconnectReason::NoEtcdDataForExistingConnection,
+            "Should select new safekeeper due to missing etcd data, even if there's an existing connection with this safekeeper"
+        );
+        assert!(only_candidate
+            .wal_source_connstr
+            .contains(DUMMY_SAFEKEEPER_CONNSTR));
+
+        Ok(())
+    }
+
    #[tokio::test]
    async fn lsn_wal_over_threshhold_current_candidate() -> anyhow::Result<()> {
        let harness = RepoHarness::create("lsn_wal_over_threshcurrent_candidate")?;
@@ -1098,26 +993,21 @@ mod tests {
        let connected_sk_id = NodeId(0);
        let new_lsn = Lsn(current_lsn.0 + state.max_lsn_wal_lag.get() + 1);

-        let connection_status = WalConnectionStatus {
-            is_connected: true,
-            has_processed_wal: true,
-            latest_connection_update: now,
-            latest_wal_update: now,
-            commit_lsn: Some(current_lsn),
-            streaming_lsn: Some(current_lsn),
-        };
-
        state.wal_connection = Some(WalConnection {
-            started_at: now,
            sk_id: connected_sk_id,
-            status: connection_status.clone(),
+            latest_connection_update: now,
            connection_task: TaskHandle::spawn(move |sender, _| async move {
                sender
-                    .send(TaskEvent::NewEvent(connection_status.clone()))
+                    .send(TaskEvent::NewEvent(ReplicationFeedback {
+                        current_timeline_size: 1,
+                        ps_writelsn: current_lsn.0,
+                        ps_applylsn: 1,
+                        ps_flushlsn: 1,
+                        ps_replytime: SystemTime::now(),
+                    }))
                    .ok();
                Ok(())
            }),
-            discovered_new_wal: None,
        });
        state.wal_stream_candidates = HashMap::from([
            (
@@ -1162,8 +1052,8 @@ mod tests {
        assert_eq!(
            over_threshcurrent_candidate.reason,
            ReconnectReason::LaggingWal {
-                current_commit_lsn: current_lsn,
-                new_commit_lsn: new_lsn,
+                current_lsn,
+                new_lsn,
                threshold: state.max_lsn_wal_lag
            },
            "Should select bigger WAL safekeeper if it starts to lag enough"
@@ -1176,36 +1066,31 @@ mod tests {
    }

    #[tokio::test]
-    async fn timeout_connection_threshhold_current_candidate() -> anyhow::Result<()> {
-        let harness = RepoHarness::create("timeout_connection_threshhold_current_candidate")?;
+    async fn timeout_wal_over_threshhold_current_candidate() -> anyhow::Result<()> {
+        let harness = RepoHarness::create("timeout_wal_over_threshhold_current_candidate")?;
        let mut state = dummy_state(&harness);
        let current_lsn = Lsn(100_000).align();
        let now = Utc::now().naive_utc();

-        let wal_connect_timeout = chrono::Duration::from_std(state.wal_connect_timeout)?;
+        let lagging_wal_timeout = chrono::Duration::from_std(state.lagging_wal_timeout)?;
        let time_over_threshold =
-            Utc::now().naive_utc() - wal_connect_timeout - wal_connect_timeout;
-
-        let connection_status = WalConnectionStatus {
-            is_connected: true,
-            has_processed_wal: true,
-            latest_connection_update: time_over_threshold,
-            latest_wal_update: time_over_threshold,
-            commit_lsn: Some(current_lsn),
-            streaming_lsn: Some(current_lsn),
-        };
+            Utc::now().naive_utc() - lagging_wal_timeout - lagging_wal_timeout;

        state.wal_connection = Some(WalConnection {
-            started_at: now,
            sk_id: NodeId(1),
-            status: connection_status.clone(),
+            latest_connection_update: time_over_threshold,
            connection_task: TaskHandle::spawn(move |sender, _| async move {
                sender
-                    .send(TaskEvent::NewEvent(connection_status.clone()))
+                    .send(TaskEvent::NewEvent(ReplicationFeedback {
+                        current_timeline_size: 1,
+                        ps_writelsn: current_lsn.0,
+                        ps_applylsn: 1,
+                        ps_flushlsn: 1,
+                        ps_replytime: SystemTime::now(),
+                    }))
                    .ok();
                Ok(())
            }),
-            discovered_new_wal: None,
        });
        state.wal_stream_candidates = HashMap::from([(
            NodeId(0),
@@ -1230,12 +1115,12 @@ mod tests {

        assert_eq!(over_threshcurrent_candidate.safekeeper_id, NodeId(0));
        match over_threshcurrent_candidate.reason {
-            ReconnectReason::NoKeepAlives {
-                last_keep_alive,
+            ReconnectReason::NoWalTimeout {
+                last_wal_interaction,
                threshold,
                ..
            } => {
-                assert_eq!(last_keep_alive, Some(time_over_threshold));
+                assert_eq!(last_wal_interaction, Some(time_over_threshold));
                assert_eq!(threshold, state.lagging_wal_timeout);
            }
            unexpected => panic!("Unexpected reason: {unexpected:?}"),
@@ -1248,35 +1133,20 @@ mod tests {
    }

    #[tokio::test]
-    async fn timeout_wal_over_threshhold_current_candidate() -> anyhow::Result<()> {
-        let harness = RepoHarness::create("timeout_wal_over_threshhold_current_candidate")?;
+    async fn timeout_connection_over_threshhold_current_candidate() -> anyhow::Result<()> {
+        let harness = RepoHarness::create("timeout_connection_over_threshhold_current_candidate")?;
        let mut state = dummy_state(&harness);
        let current_lsn = Lsn(100_000).align();
-        let new_lsn = Lsn(100_100).align();
        let now = Utc::now().naive_utc();

        let lagging_wal_timeout = chrono::Duration::from_std(state.lagging_wal_timeout)?;
        let time_over_threshold =
            Utc::now().naive_utc() - lagging_wal_timeout - lagging_wal_timeout;

-        let connection_status = WalConnectionStatus {
-            is_connected: true,
-            has_processed_wal: true,
-            latest_connection_update: now,
-            latest_wal_update: time_over_threshold,
-            commit_lsn: Some(current_lsn),
-            streaming_lsn: Some(current_lsn),
-        };
-
        state.wal_connection = Some(WalConnection {
-            started_at: now,
            sk_id: NodeId(1),
-            status: connection_status,
+            latest_connection_update: time_over_threshold,
            connection_task: TaskHandle::spawn(move |_, _| async move { Ok(()) }),
-            discovered_new_wal: Some(NewCommittedWAL {
-                discovered_at: time_over_threshold,
-                lsn: new_lsn,
-            }),
        });
        state.wal_stream_candidates = HashMap::from([(
            NodeId(0),
@@ -1284,7 +1154,7 @@ mod tests {
                timeline: SkTimelineInfo {
                    last_log_term: None,
                    flush_lsn: None,
-                    commit_lsn: Some(new_lsn),
+                    commit_lsn: Some(current_lsn),
                    backup_lsn: None,
                    remote_consistent_lsn: None,
                    peer_horizon_lsn: None,
@@ -1302,16 +1172,10 @@ mod tests {
        assert_eq!(over_threshcurrent_candidate.safekeeper_id, NodeId(0));
        match over_threshcurrent_candidate.reason {
            ReconnectReason::NoWalTimeout {
-                current_lsn,
-                current_commit_lsn,
-                candidate_commit_lsn,
                last_wal_interaction,
                threshold,
                ..
            } => {
-                assert_eq!(current_lsn, current_lsn);
-                assert_eq!(current_commit_lsn, current_lsn);
-                assert_eq!(candidate_commit_lsn, new_lsn);
                assert_eq!(last_wal_interaction, Some(time_over_threshold));
                assert_eq!(threshold, state.lagging_wal_timeout);
            }
@@ -1338,10 +1202,10 @@ mod tests {
                .expect("Failed to create an empty timeline for dummy wal connection manager"),
            wal_connect_timeout: Duration::from_secs(1),
            lagging_wal_timeout: Duration::from_secs(1),
-            max_lsn_wal_lag: NonZeroU64::new(1024 * 1024).unwrap(),
+            max_lsn_wal_lag: NonZeroU64::new(1).unwrap(),
            wal_connection: None,
            wal_stream_candidates: HashMap::new(),
-            wal_connection_retries: HashMap::new(),
+            wal_connection_attempts: HashMap::new(),
        }
    }
 }
--- a/pageserver/src/walreceiver/walreceiver_connection.rs
+++ b/pageserver/src/walreceiver/walreceiver_connection.rs
@@ -8,7 +8,6 @@ use std::{

 use anyhow::{bail, ensure, Context};
 use bytes::BytesMut;
-use chrono::{NaiveDateTime, Utc};
 use fail::fail_point;
 use futures::StreamExt;
 use postgres::{SimpleQueryMessage, SimpleQueryRow};
@@ -27,33 +26,15 @@ use crate::{
    walingest::WalIngest,
    walrecord::DecodedWALRecord,
 };
-use postgres_ffi::v14::waldecoder::WalStreamDecoder;
+use postgres_ffi::waldecoder::WalStreamDecoder;
 use utils::{lsn::Lsn, pq_proto::ReplicationFeedback, zid::ZTenantTimelineId};

-/// Status of the connection.
-#[derive(Debug, Clone)]
-pub struct WalConnectionStatus {
-    /// If we were able to initiate a postgres connection, this means that safekeeper process is at least running.
-    pub is_connected: bool,
-    /// Defines a healthy connection as one on which pageserver received WAL from safekeeper
-    /// and is able to process it in walingest without errors.
-    pub has_processed_wal: bool,
-    /// Connection establishment time or the timestamp of a latest connection message received.
-    pub latest_connection_update: NaiveDateTime,
-    /// Time of the latest WAL message received.
-    pub latest_wal_update: NaiveDateTime,
-    /// Latest WAL update contained WAL up to this LSN. Next WAL message with start from that LSN.
-    pub streaming_lsn: Option<Lsn>,
-    /// Latest commit_lsn received from the safekeeper. Can be zero if no message has been received yet.
-    pub commit_lsn: Option<Lsn>,
-}
-
 /// Open a connection to the given safekeeper and receive WAL, sending back progress
 /// messages as we go.
 pub async fn handle_walreceiver_connection(
    id: ZTenantTimelineId,
    wal_source_connstr: &str,
-    events_sender: &watch::Sender<TaskEvent<WalConnectionStatus>>,
+    events_sender: &watch::Sender<TaskEvent<ReplicationFeedback>>,
    mut cancellation: watch::Receiver<()>,
    connect_timeout: Duration,
 ) -> anyhow::Result<()> {
@@ -68,26 +49,12 @@ pub async fn handle_walreceiver_connection(
    .await
    .context("Timed out while waiting for walreceiver connection to open")?
    .context("Failed to open walreceiver conection")?;
-
-    info!("connected!");
-    let mut connection_status = WalConnectionStatus {
-        is_connected: true,
-        has_processed_wal: false,
-        latest_connection_update: Utc::now().naive_utc(),
-        latest_wal_update: Utc::now().naive_utc(),
-        streaming_lsn: None,
-        commit_lsn: None,
-    };
-    if let Err(e) = events_sender.send(TaskEvent::NewEvent(connection_status.clone())) {
-        warn!("Wal connection event listener dropped right after connection init, aborting the connection: {e}");
-        return Ok(());
-    }
-
    // The connection object performs the actual communication with the database,
    // so spawn it off to run on its own.
    let mut connection_cancellation = cancellation.clone();
    tokio::spawn(
        async move {
+            info!("connected!");
            select! {
                    connection_result = connection => match connection_result{
                            Ok(()) => info!("Walreceiver db connection closed"),
@@ -117,7 +84,6 @@ pub async fn handle_walreceiver_connection(

    let identify = identify_system(&mut replication_client).await?;
    info!("{identify:?}");
-
    let end_of_wal = Lsn::from(u64::from(identify.xlogpos));
    let mut caught_up = false;
    let ZTenantTimelineId {
@@ -125,14 +91,6 @@ pub async fn handle_walreceiver_connection(
        timeline_id,
    } = id;

-    connection_status.latest_connection_update = Utc::now().naive_utc();
-    connection_status.latest_wal_update = Utc::now().naive_utc();
-    connection_status.commit_lsn = Some(end_of_wal);
-    if let Err(e) = events_sender.send(TaskEvent::NewEvent(connection_status.clone())) {
-        warn!("Wal connection event listener dropped after IDENTIFY_SYSTEM, aborting the connection: {e}");
-        return Ok(());
-    }
-
    let (repo, timeline) = tokio::task::spawn_blocking(move || {
        let repo = tenant_mgr::get_repository_for_tenant(tenant_id)
            .with_context(|| format!("no repository found for tenant {tenant_id}"))?;
@@ -160,7 +118,7 @@ pub async fn handle_walreceiver_connection(
    // There might be some padding after the last full record, skip it.
    startpoint += startpoint.calc_padding(8u32);

-    info!("last_record_lsn {last_rec_lsn} starting replication from {startpoint}, safekeeper is at {end_of_wal}...");
+    info!("last_record_lsn {last_rec_lsn} starting replication from {startpoint}, server is at {end_of_wal}...");

    let query = format!("START_REPLICATION PHYSICAL {startpoint}");

@@ -182,33 +140,6 @@ pub async fn handle_walreceiver_connection(
        }
    } {
        let replication_message = replication_message?;
-        let now = Utc::now().naive_utc();
-        let last_rec_lsn_before_msg = last_rec_lsn;
-
-        // Update the connection status before processing the message. If the message processing
-        // fails (e.g. in walingest), we still want to know latests LSNs from the safekeeper.
-        match &replication_message {
-            ReplicationMessage::XLogData(xlog_data) => {
-                connection_status.latest_connection_update = now;
-                connection_status.commit_lsn = Some(Lsn::from(xlog_data.wal_end()));
-                connection_status.streaming_lsn = Some(Lsn::from(
-                    xlog_data.wal_start() + xlog_data.data().len() as u64,
-                ));
-                if !xlog_data.data().is_empty() {
-                    connection_status.latest_wal_update = now;
-                }
-            }
-            ReplicationMessage::PrimaryKeepAlive(keepalive) => {
-                connection_status.latest_connection_update = now;
-                connection_status.commit_lsn = Some(Lsn::from(keepalive.wal_end()));
-            }
-            &_ => {}
-        };
-        if let Err(e) = events_sender.send(TaskEvent::NewEvent(connection_status.clone())) {
-            warn!("Wal connection event listener dropped, aborting the connection: {e}");
-            return Ok(());
-        }
-
        let status_update = match replication_message {
            ReplicationMessage::XLogData(xlog_data) => {
                // Pass the WAL data to the decoder, and see if we can decode
@@ -267,15 +198,6 @@ pub async fn handle_walreceiver_connection(
            _ => None,
        };

-        if !connection_status.has_processed_wal && last_rec_lsn > last_rec_lsn_before_msg {
-            // We have successfully processed at least one WAL record.
-            connection_status.has_processed_wal = true;
-            if let Err(e) = events_sender.send(TaskEvent::NewEvent(connection_status.clone())) {
-                warn!("Wal connection event listener dropped, aborting the connection: {e}");
-                return Ok(());
-            }
-        }
-
        let timeline_to_check = Arc::clone(&timeline);
        tokio::task::spawn_blocking(move || timeline_to_check.check_checkpoint_distance())
            .await
@@ -335,6 +257,10 @@ pub async fn handle_walreceiver_connection(
                .as_mut()
                .zenith_status_update(data.len() as u64, &data)
                .await?;
+            if let Err(e) = events_sender.send(TaskEvent::NewEvent(zenith_status_update)) {
+                warn!("Wal connection event listener dropped, aborting the connection: {e}");
+                return Ok(());
+            }
        }
    }

--- a/pageserver/src/walrecord.rs
+++ b/pageserver/src/walrecord.rs
@@ -3,10 +3,9 @@
 //!
 use anyhow::Result;
 use bytes::{Buf, Bytes};
-use postgres_ffi::v14::pg_constants;
-use postgres_ffi::v14::xlog_utils::{TimestampTz, XLOG_SIZE_OF_XLOG_RECORD};
-use postgres_ffi::v14::XLogRecord;
-use postgres_ffi::BLCKSZ;
+use postgres_ffi::pg_constants;
+use postgres_ffi::xlog_utils::{TimestampTz, XLOG_SIZE_OF_XLOG_RECORD};
+use postgres_ffi::XLogRecord;
 use postgres_ffi::{BlockNumber, OffsetNumber};
 use postgres_ffi::{MultiXactId, MultiXactOffset, MultiXactStatus, Oid, TransactionId};
 use serde::{Deserialize, Serialize};
@@ -619,7 +618,7 @@ pub fn decode_wal_record(
                            blk.hole_length = 0;
                        }
                    } else {
-                        blk.hole_length = BLCKSZ - blk.bimg_len;
+                        blk.hole_length = pg_constants::BLCKSZ - blk.bimg_len;
                    }
                    datatotal += blk.bimg_len as u32;
                    blocks_total_len += blk.bimg_len as u32;
@@ -629,7 +628,9 @@ pub fn decode_wal_record(
                     * bimg_len < BLCKSZ if the HAS_HOLE flag is set.
                     */
                    if blk.bimg_info & pg_constants::BKPIMAGE_HAS_HOLE != 0
-                        && (blk.hole_offset == 0 || blk.hole_length == 0 || blk.bimg_len == BLCKSZ)
+                        && (blk.hole_offset == 0
+                            || blk.hole_length == 0
+                            || blk.bimg_len == pg_constants::BLCKSZ)
                    {
                        // TODO
                        /*
@@ -666,7 +667,7 @@ pub fn decode_wal_record(
                     * flag is set.
                     */
                    if (blk.bimg_info & pg_constants::BKPIMAGE_IS_COMPRESSED == 0)
-                        && blk.bimg_len == BLCKSZ
+                        && blk.bimg_len == pg_constants::BLCKSZ
                    {
                        // TODO
                        /*
@@ -684,7 +685,7 @@ pub fn decode_wal_record(
                     */
                    if blk.bimg_info & pg_constants::BKPIMAGE_HAS_HOLE == 0
                        && blk.bimg_info & pg_constants::BKPIMAGE_IS_COMPRESSED == 0
-                        && blk.bimg_len != BLCKSZ
+                        && blk.bimg_len != pg_constants::BLCKSZ
                    {
                        // TODO
                        /*
--- a/pageserver/src/walredo.rs
+++ b/pageserver/src/walredo.rs
@@ -44,12 +44,11 @@ use crate::reltag::{RelTag, SlruKind};
 use crate::repository::Key;
 use crate::walrecord::ZenithWalRecord;
 use metrics::{register_histogram, register_int_counter, Histogram, IntCounter};
-use postgres_ffi::v14::nonrelfile_utils::{
-    mx_offset_to_flags_bitshift, mx_offset_to_flags_offset, mx_offset_to_member_offset,
-    transaction_id_set_status,
-};
-use postgres_ffi::v14::pg_constants;
-use postgres_ffi::BLCKSZ;
+use postgres_ffi::nonrelfile_utils::mx_offset_to_flags_bitshift;
+use postgres_ffi::nonrelfile_utils::mx_offset_to_flags_offset;
+use postgres_ffi::nonrelfile_utils::mx_offset_to_member_offset;
+use postgres_ffi::nonrelfile_utils::transaction_id_set_status;
+use postgres_ffi::pg_constants;

 ///
 /// `RelTag` + block number (`blknum`) gives us a unique id of the page in the cluster.
@@ -83,6 +82,24 @@ pub trait WalRedoManager: Send + Sync {
    ) -> Result<Bytes, WalRedoError>;
 }

+///
+/// A dummy WAL Redo Manager implementation that doesn't allow replaying
+/// anything. Currently used during bootstrapping (zenith init), to create
+/// a Repository object without launching the real WAL redo process.
+///
+pub struct DummyRedoManager {}
+impl crate::walredo::WalRedoManager for DummyRedoManager {
+    fn request_redo(
+        &self,
+        _key: Key,
+        _lsn: Lsn,
+        _base_img: Option<Bytes>,
+        _records: Vec<(Lsn, ZenithWalRecord)>,
+    ) -> Result<Bytes, WalRedoError> {
+        Err(WalRedoError::InvalidState)
+    }
+}
+
 // Metrics collected on WAL redo operations
 //
 // We collect the time spent in actual WAL redo ('redo'), and time waiting
@@ -418,10 +435,10 @@ impl PostgresRedoManager {
                }

                // Append the timestamp
-                if page.len() == BLCKSZ as usize + 8 {
-                    page.truncate(BLCKSZ as usize);
+                if page.len() == pg_constants::BLCKSZ as usize + 8 {
+                    page.truncate(pg_constants::BLCKSZ as usize);
                }
-                if page.len() == BLCKSZ as usize {
+                if page.len() == pg_constants::BLCKSZ as usize {
                    page.extend_from_slice(&timestamp.to_be_bytes());
                } else {
                    warn!(
@@ -742,7 +759,7 @@ impl PostgresRedoProcess {

        // We expect the WAL redo process to respond with an 8k page image. We read it
        // into this buffer.
-        let mut resultbuf = vec![0; BLCKSZ.into()];
+        let mut resultbuf = vec![0; pg_constants::BLCKSZ.into()];
        let mut nresult: usize = 0; // # of bytes read into 'resultbuf' so far

        // Prepare for calling poll()
@@ -755,7 +772,7 @@ impl PostgresRedoProcess {
        // We do three things simultaneously: send the old base image and WAL records to
        // the child process's stdin, read the result from child's stdout, and forward any logging
        // information that the child writes to its stderr to the page server's log.
-        while nresult < BLCKSZ.into() {
+        while nresult < pg_constants::BLCKSZ.into() {
            // If we have more data to write, wake up if 'stdin' becomes writeable or
            // we have data to read. Otherwise only wake up if there's data to read.
            let nfds = if nwrite < writebuf.len() { 3 } else { 2 };
--- a/poetry.lock
+++ b/poetry.lock
--- a/proxy/Cargo.toml
+++ b/proxy/Cargo.toml
@@ -7,7 +7,6 @@ edition = "2021"
 anyhow = "1.0"
 async-trait = "0.1"
 base64 = "0.13.0"
-bstr = "0.2.17"
 bytes = { version = "1.0.1", features = ['serde'] }
 clap = "3.0"
 futures = "0.3.13"
--- a/proxy/src/auth.rs
+++ b/proxy/src/auth.rs
@@ -12,7 +12,7 @@ use password_hack::PasswordHackPayload;
 mod flow;
 pub use flow::*;

-use crate::error::UserFacingError;
+use crate::{error::UserFacingError, waiters};
 use std::io;
 use thiserror::Error;

@@ -22,54 +22,51 @@ pub type Result<T> = std::result::Result<T, AuthError>;
 /// Common authentication error.
 #[derive(Debug, Error)]
 pub enum AuthErrorImpl {
-    // This will be dropped in the future.
+    /// Authentication error reported by the console.
    #[error(transparent)]
-    Legacy(#[from] backend::LegacyAuthError),
+    Console(#[from] backend::AuthError),

    #[error(transparent)]
-    Link(#[from] backend::LinkAuthError),
+    GetAuthInfo(#[from] backend::console::ConsoleAuthError),

-    #[error(transparent)]
-    GetAuthInfo(#[from] backend::GetAuthInfoError),
-
-    #[error(transparent)]
-    WakeCompute(#[from] backend::WakeComputeError),
-
-    /// SASL protocol errors (includes [SCRAM](crate::scram)).
    #[error(transparent)]
    Sasl(#[from] crate::sasl::Error),

-    #[error("Unsupported authentication method: {0}")]
-    BadAuthMethod(Box<str>),
-
    #[error("Malformed password message: {0}")]
    MalformedPassword(&'static str),

-    #[error(
-        "Project name is not specified. \
-        Either please upgrade the postgres client library (libpq) for SNI support \
-        or pass the project name as a parameter: '&options=project%3D<project-name>'. \
-        See more at https://neon.tech/sni"
-    )]
-    MissingProjectName,
-
-    /// Errors produced by e.g. [`crate::stream::PqStream`].
+    /// Errors produced by [`crate::stream::PqStream`].
    #[error(transparent)]
    Io(#[from] io::Error),
 }

+impl AuthErrorImpl {
+    pub fn auth_failed(msg: impl Into<String>) -> Self {
+        Self::Console(backend::AuthError::auth_failed(msg))
+    }
+}
+
+impl From<waiters::RegisterError> for AuthErrorImpl {
+    fn from(e: waiters::RegisterError) -> Self {
+        Self::Console(backend::AuthError::from(e))
+    }
+}
+
+impl From<waiters::WaitError> for AuthErrorImpl {
+    fn from(e: waiters::WaitError) -> Self {
+        Self::Console(backend::AuthError::from(e))
+    }
+}
+
 #[derive(Debug, Error)]
 #[error(transparent)]
 pub struct AuthError(Box<AuthErrorImpl>);

-impl AuthError {
-    pub fn bad_auth_method(name: impl Into<Box<str>>) -> Self {
-        AuthErrorImpl::BadAuthMethod(name.into()).into()
-    }
-}
-
-impl<E: Into<AuthErrorImpl>> From<E> for AuthError {
-    fn from(e: E) -> Self {
+impl<T> From<T> for AuthError
+where
+    AuthErrorImpl: From<T>,
+{
+    fn from(e: T) -> Self {
        Self(Box::new(e.into()))
    }
 }
@@ -78,14 +75,10 @@ impl UserFacingError for AuthError {
    fn to_string_client(&self) -> String {
        use AuthErrorImpl::*;
        match self.0.as_ref() {
-            Legacy(e) => e.to_string_client(),
-            Link(e) => e.to_string_client(),
+            Console(e) => e.to_string_client(),
            GetAuthInfo(e) => e.to_string_client(),
-            WakeCompute(e) => e.to_string_client(),
            Sasl(e) => e.to_string_client(),
-            BadAuthMethod(_) => self.to_string(),
            MalformedPassword(_) => self.to_string(),
-            MissingProjectName => self.to_string(),
            _ => "Internal error".to_string(),
        }
    }
--- a/proxy/src/auth/backend.rs
+++ b/proxy/src/auth/backend.rs
@@ -1,13 +1,10 @@
+mod link;
 mod postgres;

-mod link;
-pub use link::LinkAuthError;
-
-mod console;
-pub use console::{GetAuthInfoError, WakeComputeError};
+pub mod console;

 mod legacy_console;
-pub use legacy_console::LegacyAuthError;
+pub use legacy_console::{AuthError, AuthErrorImpl};

 use crate::{
    auth::{self, AuthFlow, ClientCredentials},
@@ -86,7 +83,7 @@ impl From<DatabaseInfo> for tokio_postgres::Config {
 /// * However, when we substitute `T` with [`ClientCredentials`],
 ///   this helps us provide the credentials only to those auth
 ///   backends which require them for the authentication process.
-#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+#[derive(Debug, Clone, Copy, PartialEq)]
 pub enum BackendType<T> {
    /// Legacy Cloud API (V1) + link auth.
    LegacyConsole(T),
--- a/proxy/src/auth/backend/console.rs
+++ b/proxy/src/auth/backend/console.rs
@@ -13,11 +13,21 @@ use std::future::Future;
 use thiserror::Error;
 use tokio::io::{AsyncRead, AsyncWrite};

-const REQUEST_FAILED: &str = "Console request failed";
+pub type Result<T> = std::result::Result<T, ConsoleAuthError>;

 #[derive(Debug, Error)]
-pub enum TransportError {
-    #[error("Console responded with a malformed JSON: {0}")]
+pub enum ConsoleAuthError {
+    #[error(transparent)]
+    BadProjectName(#[from] auth::credentials::ClientCredsParseError),
+
+    // We shouldn't include the actual secret here.
+    #[error("Bad authentication secret")]
+    BadSecret,
+
+    #[error("Console responded with a malformed compute address: '{0}'")]
+    BadComputeAddress(String),
+
+    #[error("Console responded with a malformed JSON: '{0}'")]
    BadResponse(#[from] serde_json::Error),

    /// HTTP status (other than 200) returned by the console.
@@ -28,72 +38,19 @@ pub enum TransportError {
    Io(#[from] std::io::Error),
 }

-impl UserFacingError for TransportError {
+impl UserFacingError for ConsoleAuthError {
    fn to_string_client(&self) -> String {
-        use TransportError::*;
+        use ConsoleAuthError::*;
        match self {
-            HttpStatus(_) => self.to_string(),
-            _ => REQUEST_FAILED.to_owned(),
+            BadProjectName(e) => e.to_string_client(),
+            _ => "Internal error".to_string(),
        }
    }
 }

-// Helps eliminate graceless `.map_err` calls without introducing another ctor.
-impl From<reqwest::Error> for TransportError {
-    fn from(e: reqwest::Error) -> Self {
-        io_error(e).into()
-    }
-}
-
-#[derive(Debug, Error)]
-pub enum GetAuthInfoError {
-    // We shouldn't include the actual secret here.
-    #[error("Console responded with a malformed auth secret")]
-    BadSecret,
-
-    #[error(transparent)]
-    Transport(TransportError),
-}
-
-impl UserFacingError for GetAuthInfoError {
-    fn to_string_client(&self) -> String {
-        use GetAuthInfoError::*;
-        match self {
-            BadSecret => REQUEST_FAILED.to_owned(),
-            Transport(e) => e.to_string_client(),
-        }
-    }
-}
-
-impl<E: Into<TransportError>> From<E> for GetAuthInfoError {
-    fn from(e: E) -> Self {
-        Self::Transport(e.into())
-    }
-}
-
-#[derive(Debug, Error)]
-pub enum WakeComputeError {
-    // We shouldn't show users the address even if it's broken.
-    #[error("Console responded with a malformed compute address: {0}")]
-    BadComputeAddress(String),
-
-    #[error(transparent)]
-    Transport(TransportError),
-}
-
-impl UserFacingError for WakeComputeError {
-    fn to_string_client(&self) -> String {
-        use WakeComputeError::*;
-        match self {
-            BadComputeAddress(_) => REQUEST_FAILED.to_owned(),
-            Transport(e) => e.to_string_client(),
-        }
-    }
-}
-
-impl<E: Into<TransportError>> From<E> for WakeComputeError {
-    fn from(e: E) -> Self {
-        Self::Transport(e.into())
+impl From<&auth::credentials::ClientCredsParseError> for ConsoleAuthError {
+    fn from(e: &auth::credentials::ClientCredsParseError) -> Self {
+        ConsoleAuthError::BadProjectName(e.clone())
    }
 }

@@ -138,7 +95,7 @@ impl<'a> Api<'a> {
        handle_user(client, &self, Self::get_auth_info, Self::wake_compute).await
    }

-    async fn get_auth_info(&self) -> Result<AuthInfo, GetAuthInfoError> {
+    async fn get_auth_info(&self) -> Result<AuthInfo> {
        let mut url = self.endpoint.clone();
        url.path_segments_mut().push("proxy_get_role_secret");
        url.query_pairs_mut()
@@ -148,20 +105,21 @@ impl<'a> Api<'a> {
        // TODO: use a proper logger
        println!("cplane request: {url}");

-        let resp = reqwest::get(url.into_inner()).await?;
+        let resp = reqwest::get(url.into_inner()).await.map_err(io_error)?;
        if !resp.status().is_success() {
-            return Err(TransportError::HttpStatus(resp.status()).into());
+            return Err(ConsoleAuthError::HttpStatus(resp.status()));
        }

-        let response: GetRoleSecretResponse = serde_json::from_str(&resp.text().await?)?;
+        let response: GetRoleSecretResponse =
+            serde_json::from_str(&resp.text().await.map_err(io_error)?)?;

-        scram::ServerSecret::parse(&response.role_secret)
+        scram::ServerSecret::parse(response.role_secret.as_str())
            .map(AuthInfo::Scram)
-            .ok_or(GetAuthInfoError::BadSecret)
+            .ok_or(ConsoleAuthError::BadSecret)
    }

    /// Wake up the compute node and return the corresponding connection info.
-    pub(super) async fn wake_compute(&self) -> Result<ComputeConnCfg, WakeComputeError> {
+    pub(super) async fn wake_compute(&self) -> Result<ComputeConnCfg> {
        let mut url = self.endpoint.clone();
        url.path_segments_mut().push("proxy_wake_compute");
        url.query_pairs_mut()
@@ -170,16 +128,17 @@ impl<'a> Api<'a> {
        // TODO: use a proper logger
        println!("cplane request: {url}");

-        let resp = reqwest::get(url.into_inner()).await?;
+        let resp = reqwest::get(url.into_inner()).await.map_err(io_error)?;
        if !resp.status().is_success() {
-            return Err(TransportError::HttpStatus(resp.status()).into());
+            return Err(ConsoleAuthError::HttpStatus(resp.status()));
        }

-        let response: GetWakeComputeResponse = serde_json::from_str(&resp.text().await?)?;
+        let response: GetWakeComputeResponse =
+            serde_json::from_str(&resp.text().await.map_err(io_error)?)?;

        // Unfortunately, ownership won't let us use `Option::ok_or` here.
        let (host, port) = match parse_host_port(&response.address) {
-            None => return Err(WakeComputeError::BadComputeAddress(response.address)),
+            None => return Err(ConsoleAuthError::BadComputeAddress(response.address)),
            Some(x) => x,
        };

@@ -203,8 +162,8 @@ pub(super) async fn handle_user<'a, Endpoint, GetAuthInfo, WakeCompute>(
    wake_compute: impl FnOnce(&'a Endpoint) -> WakeCompute,
 ) -> auth::Result<compute::NodeInfo>
 where
-    GetAuthInfo: Future<Output = Result<AuthInfo, GetAuthInfoError>>,
-    WakeCompute: Future<Output = Result<ComputeConnCfg, WakeComputeError>>,
+    GetAuthInfo: Future<Output = Result<AuthInfo>>,
+    WakeCompute: Future<Output = Result<ComputeConnCfg>>,
 {
    let auth_info = get_auth_info(endpoint).await?;

@@ -212,7 +171,7 @@ where
    let scram_keys = match auth_info {
        AuthInfo::Md5(_) => {
            // TODO: decide if we should support MD5 in api v2
-            return Err(auth::AuthError::bad_auth_method("MD5"));
+            return Err(auth::AuthErrorImpl::auth_failed("MD5 is not supported").into());
        }
        AuthInfo::Scram(secret) => {
            let scram = auth::Scram(&secret);
--- a/proxy/src/auth/backend/legacy_console.rs
+++ b/proxy/src/auth/backend/legacy_console.rs
@@ -14,7 +14,7 @@ use tokio::io::{AsyncRead, AsyncWrite};
 use utils::pq_proto::BeMessage as Be;

 #[derive(Debug, Error)]
-pub enum LegacyAuthError {
+pub enum AuthErrorImpl {
    /// Authentication error reported by the console.
    #[error("Authentication failed: {0}")]
    AuthFailed(String),
@@ -24,7 +24,7 @@ pub enum LegacyAuthError {
    HttpStatus(reqwest::StatusCode),

    #[error("Console responded with a malformed JSON: {0}")]
-    BadResponse(#[from] serde_json::Error),
+    MalformedResponse(#[from] serde_json::Error),

    #[error(transparent)]
    Transport(#[from] reqwest::Error),
@@ -36,10 +36,30 @@ pub enum LegacyAuthError {
    WaiterWait(#[from] waiters::WaitError),
 }

-impl UserFacingError for LegacyAuthError {
+#[derive(Debug, Error)]
+#[error(transparent)]
+pub struct AuthError(Box<AuthErrorImpl>);
+
+impl AuthError {
+    /// Smart constructor for authentication error reported by `mgmt`.
+    pub fn auth_failed(msg: impl Into<String>) -> Self {
+        Self(Box::new(AuthErrorImpl::AuthFailed(msg.into())))
+    }
+}
+
+impl<T> From<T> for AuthError
+where
+    AuthErrorImpl: From<T>,
+{
+    fn from(e: T) -> Self {
+        Self(Box::new(e.into()))
+    }
+}
+
+impl UserFacingError for AuthError {
    fn to_string_client(&self) -> String {
-        use LegacyAuthError::*;
-        match self {
+        use AuthErrorImpl::*;
+        match self.0.as_ref() {
            AuthFailed(_) | HttpStatus(_) => self.to_string(),
            _ => "Internal error".to_string(),
        }
@@ -68,7 +88,7 @@ async fn authenticate_proxy_client(
    md5_response: &str,
    salt: &[u8; 4],
    psql_session_id: &str,
-) -> Result<DatabaseInfo, LegacyAuthError> {
+) -> Result<DatabaseInfo, AuthError> {
    let mut url = auth_endpoint.clone();
    url.query_pairs_mut()
        .append_pair("login", &creds.user)
@@ -82,17 +102,17 @@ async fn authenticate_proxy_client(
        // TODO: leverage `reqwest::Client` to reuse connections
        let resp = reqwest::get(url).await?;
        if !resp.status().is_success() {
-            return Err(LegacyAuthError::HttpStatus(resp.status()));
+            return Err(AuthErrorImpl::HttpStatus(resp.status()).into());
        }

-        let auth_info = serde_json::from_str(resp.text().await?.as_str())?;
+        let auth_info: ProxyAuthResponse = serde_json::from_str(resp.text().await?.as_str())?;
        println!("got auth info: {:?}", auth_info);

        use ProxyAuthResponse::*;
        let db_info = match auth_info {
            Ready { conn_info } => conn_info,
-            Error { error } => return Err(LegacyAuthError::AuthFailed(error)),
-            NotReady { .. } => waiter.await?.map_err(LegacyAuthError::AuthFailed)?,
+            Error { error } => return Err(AuthErrorImpl::AuthFailed(error).into()),
+            NotReady { .. } => waiter.await?.map_err(AuthErrorImpl::AuthFailed)?,
        };

        Ok(db_info)
@@ -104,7 +124,7 @@ async fn handle_existing_user(
    auth_endpoint: &reqwest::Url,
    client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin + Send>,
    creds: &ClientCredentials,
-) -> auth::Result<compute::NodeInfo> {
+) -> Result<compute::NodeInfo, auth::AuthError> {
    let psql_session_id = super::link::new_psql_session_id();
    let md5_salt = rand::random();

--- a/proxy/src/auth/backend/link.rs
+++ b/proxy/src/auth/backend/link.rs
@@ -1,34 +1,7 @@
-use crate::{auth, compute, error::UserFacingError, stream::PqStream, waiters};
-use thiserror::Error;
+use crate::{auth, compute, stream::PqStream};
 use tokio::io::{AsyncRead, AsyncWrite};
 use utils::pq_proto::{BeMessage as Be, BeParameterStatusMessage};

-#[derive(Debug, Error)]
-pub enum LinkAuthError {
-    /// Authentication error reported by the console.
-    #[error("Authentication failed: {0}")]
-    AuthFailed(String),
-
-    #[error(transparent)]
-    WaiterRegister(#[from] waiters::RegisterError),
-
-    #[error(transparent)]
-    WaiterWait(#[from] waiters::WaitError),
-
-    #[error(transparent)]
-    Io(#[from] std::io::Error),
-}
-
-impl UserFacingError for LinkAuthError {
-    fn to_string_client(&self) -> String {
-        use LinkAuthError::*;
-        match self {
-            AuthFailed(_) => self.to_string(),
-            _ => "Internal error".to_string(),
-        }
-    }
-}
-
 fn hello_message(redirect_uri: &str, session_id: &str) -> String {
    format!(
        concat![
@@ -61,7 +34,7 @@ pub async fn handle_user(
            .await?;

        // Wait for web console response (see `mgmt`)
-        waiter.await?.map_err(LinkAuthError::AuthFailed)
+        waiter.await?.map_err(auth::AuthErrorImpl::auth_failed)
    })
    .await?;

--- a/proxy/src/auth/backend/postgres.rs
+++ b/proxy/src/auth/backend/postgres.rs
@@ -3,7 +3,7 @@
 use crate::{
    auth::{
        self,
-        backend::console::{self, AuthInfo, GetAuthInfoError, TransportError, WakeComputeError},
+        backend::console::{self, AuthInfo, Result},
        ClientCredentials,
    },
    compute::{self, ComputeConnCfg},
@@ -20,13 +20,6 @@ pub(super) struct Api<'a> {
    creds: &'a ClientCredentials,
 }

-// Helps eliminate graceless `.map_err` calls without introducing another ctor.
-impl From<tokio_postgres::Error> for TransportError {
-    fn from(e: tokio_postgres::Error) -> Self {
-        io_error(e).into()
-    }
-}
-
 impl<'a> Api<'a> {
    /// Construct an API object containing the auth parameters.
    pub(super) fn new(endpoint: &'a ApiUrl, creds: &'a ClientCredentials) -> Self {
@@ -43,16 +36,21 @@ impl<'a> Api<'a> {
    }

    /// This implementation fetches the auth info from a local postgres instance.
-    async fn get_auth_info(&self) -> Result<AuthInfo, GetAuthInfoError> {
+    async fn get_auth_info(&self) -> Result<AuthInfo> {
        // Perhaps we could persist this connection, but then we'd have to
        // write more code for reopening it if it got closed, which doesn't
        // seem worth it.
        let (client, connection) =
-            tokio_postgres::connect(self.endpoint.as_str(), tokio_postgres::NoTls).await?;
+            tokio_postgres::connect(self.endpoint.as_str(), tokio_postgres::NoTls)
+                .await
+                .map_err(io_error)?;

        tokio::spawn(connection);
        let query = "select rolpassword from pg_catalog.pg_authid where rolname = $1";
-        let rows = client.query(query, &[&self.creds.user]).await?;
+        let rows = client
+            .query(query, &[&self.creds.user])
+            .await
+            .map_err(io_error)?;

        match &rows[..] {
            // We can't get a secret if there's no such user.
@@ -76,13 +74,13 @@ impl<'a> Api<'a> {
                        }))
                    })
                    // Putting the secret into this message is a security hazard!
-                    .ok_or(GetAuthInfoError::BadSecret)
+                    .ok_or(console::ConsoleAuthError::BadSecret)
            }
        }
    }

    /// We don't need to wake anything locally, so we just return the connection info.
-    pub(super) async fn wake_compute(&self) -> Result<ComputeConnCfg, WakeComputeError> {
+    pub(super) async fn wake_compute(&self) -> Result<ComputeConnCfg> {
        let mut config = ComputeConnCfg::new();
        config
            .host(self.endpoint.host_str().unwrap_or("localhost"))
--- a/proxy/src/auth/flow.rs
+++ b/proxy/src/auth/flow.rs
@@ -75,12 +75,13 @@ impl<S: AsyncRead + AsyncWrite + Unpin> AuthFlow<'_, S, PasswordHack> {
            .strip_suffix(&[0])
            .ok_or(AuthErrorImpl::MalformedPassword("missing terminator"))?;

-        let payload = PasswordHackPayload::parse(password)
-            // If we ended up here and the payload is malformed, it means that
-            // the user neither enabled SNI nor resorted to any other method
-            // for passing the project name we rely on. We should show them
-            // the most helpful error message and point to the documentation.
-            .ok_or(AuthErrorImpl::MissingProjectName)?;
+        // The so-called "password" should contain a base64-encoded json.
+        // We will use it later to route the client to their project.
+        let bytes = base64::decode(password)
+            .map_err(|_| AuthErrorImpl::MalformedPassword("bad encoding"))?;
+
+        let payload = serde_json::from_slice(&bytes)
+            .map_err(|_| AuthErrorImpl::MalformedPassword("invalid payload"))?;

        Ok(payload)
    }
@@ -97,7 +98,7 @@ impl<S: AsyncRead + AsyncWrite + Unpin> AuthFlow<'_, S, Scram<'_>> {

        // Currently, the only supported SASL method is SCRAM.
        if !scram::METHODS.contains(&sasl.method) {
-            return Err(super::AuthError::bad_auth_method(sasl.method));
+            return Err(AuthErrorImpl::auth_failed("method not supported").into());
        }

        let secret = self.state.0;
--- a/proxy/src/auth/password_hack.rs
+++ b/proxy/src/auth/password_hack.rs
@@ -1,46 +1,102 @@
 //! Payload for ad hoc authentication method for clients that don't support SNI.
 //! See the `impl` for [`super::backend::BackendType<ClientCredentials>`].
 //! Read more: <https://github.com/neondatabase/cloud/issues/1620#issuecomment-1165332290>.
-//! UPDATE (Mon Aug  8 13:20:34 UTC 2022): the payload format has been simplified.

-use bstr::ByteSlice;
+use serde::{de, Deserialize, Deserializer};
+use std::fmt;

-pub struct PasswordHackPayload {
-    pub project: String,
-    pub password: Vec<u8>,
+#[derive(Deserialize)]
+#[serde(untagged)]
+pub enum Password {
+    /// A regular string for utf-8 encoded passwords.
+    Simple { password: String },
+
+    /// Password is base64-encoded because it may contain arbitrary byte sequences.
+    Encoded {
+        #[serde(rename = "password_", deserialize_with = "deserialize_base64")]
+        password: Vec<u8>,
+    },
 }

-impl PasswordHackPayload {
-    pub fn parse(bytes: &[u8]) -> Option<Self> {
-        // The format is `project=<utf-8>;<password-bytes>`.
-        let mut iter = bytes.strip_prefix(b"project=")?.splitn_str(2, ";");
-        let project = iter.next()?.to_str().ok()?.to_owned();
-        let password = iter.next()?.to_owned();
-
-        Some(Self { project, password })
+impl AsRef<[u8]> for Password {
+    fn as_ref(&self) -> &[u8] {
+        match self {
+            Password::Simple { password } => password.as_ref(),
+            Password::Encoded { password } => password.as_ref(),
+        }
    }
 }

+#[derive(Deserialize)]
+pub struct PasswordHackPayload {
+    pub project: String,
+
+    #[serde(flatten)]
+    pub password: Password,
+}
+
+fn deserialize_base64<'a, D: Deserializer<'a>>(des: D) -> Result<Vec<u8>, D::Error> {
+    // It's very tempting to replace this with
+    //
+    // ```
+    // let base64: &str = Deserialize::deserialize(des)?;
+    // base64::decode(base64).map_err(serde::de::Error::custom)
+    // ```
+    //
+    // Unfortunately, we can't always deserialize into `&str`, so we'd
+    // have to use an allocating `String` instead. Thus, visitor is better.
+    struct Visitor;
+
+    impl<'de> de::Visitor<'de> for Visitor {
+        type Value = Vec<u8>;
+
+        fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
+            formatter.write_str("a string")
+        }
+
+        fn visit_str<E: de::Error>(self, v: &str) -> Result<Self::Value, E> {
+            base64::decode(v).map_err(de::Error::custom)
+        }
+    }
+
+    des.deserialize_str(Visitor)
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
+    use rstest::rstest;
+    use serde_json::json;

    #[test]
-    fn parse_password_hack_payload() {
-        let bytes = b"";
-        assert!(PasswordHackPayload::parse(bytes).is_none());
+    fn parse_password() -> anyhow::Result<()> {
+        let password: Password = serde_json::from_value(json!({
+            "password": "foo",
+        }))?;
+        assert_eq!(password.as_ref(), "foo".as_bytes());

-        let bytes = b"project=";
-        assert!(PasswordHackPayload::parse(bytes).is_none());
+        let password: Password = serde_json::from_value(json!({
+            "password_": base64::encode("foo"),
+        }))?;
+        assert_eq!(password.as_ref(), "foo".as_bytes());

-        let bytes = b"project=;";
-        let payload = PasswordHackPayload::parse(bytes).expect("parsing failed");
-        assert_eq!(payload.project, "");
-        assert_eq!(payload.password, b"");
+        Ok(())
+    }

-        let bytes = b"project=foobar;pass;word";
-        let payload = PasswordHackPayload::parse(bytes).expect("parsing failed");
-        assert_eq!(payload.project, "foobar");
-        assert_eq!(payload.password, b"pass;word");
+    #[rstest]
+    #[case("password", str::to_owned)]
+    #[case("password_", base64::encode)]
+    fn parse(#[case] key: &str, #[case] encode: fn(&'static str) -> String) -> anyhow::Result<()> {
+        let (password, project) = ("password", "pie-in-the-sky");
+        let payload = json!({
+            "project": project,
+            key: encode(password),
+        });
+
+        let payload: PasswordHackPayload = serde_json::from_value(payload)?;
+        assert_eq!(payload.password.as_ref(), password.as_bytes());
+        assert_eq!(payload.project, project);
+
+        Ok(())
    }
 }
--- a/proxy/src/compute.rs
+++ b/proxy/src/compute.rs
@@ -65,17 +65,8 @@ impl NodeInfo {
        // require for our business.
        let mut connection_error = None;
        let ports = self.config.get_ports();
-        let hosts = self.config.get_hosts();
-        // the ports array is supposed to have 0 entries, 1 entry, or as many entries as in the hosts array
-        if ports.len() > 1 && ports.len() != hosts.len() {
-            return Err(io::Error::new(
-                io::ErrorKind::Other,
-                format!("couldn't connect: bad compute config, ports and hosts entries' count does not match: {:?}", self.config),
-            ));
-        }
-
-        for (i, host) in hosts.iter().enumerate() {
-            let port = ports.get(i).or_else(|| ports.first()).unwrap_or(&5432);
+        for (i, host) in self.config.get_hosts().iter().enumerate() {
+            let port = ports.get(i).or_else(|| ports.get(0)).unwrap_or(&5432);
            let host = match host {
                Host::Tcp(host) => host.as_str(),
                Host::Unix(_) => continue, // unix sockets are not welcome here
--- a/proxy/src/scram/messages.rs
+++ b/proxy/src/scram/messages.rs
@@ -14,7 +14,7 @@ pub const SCRAM_RAW_NONCE_LEN: usize = 18;
 fn validate_sasl_extensions<'a>(parts: impl Iterator<Item = &'a str>) -> Option<()> {
    for mut chars in parts.map(|s| s.chars()) {
        let attr = chars.next()?;
-        if !('a'..='z').contains(&attr) && !('A'..='Z').contains(&attr) {
+        if !('a'..'z').contains(&attr) && !('A'..'Z').contains(&attr) {
            return None;
        }
        let eq = chars.next()?;
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -26,8 +26,6 @@ pytest-lazy-fixture = "^0.6.3"
 prometheus-client = "^0.14.1"
 pytest-timeout = "^2.1.0"
 Werkzeug = "2.1.2"
-pytest-order = "^1.0.1"
-neon-dev-utils = {path = "./bindings/python/neon-dev-utils"}

 [tool.poetry.dev-dependencies]
 yapf = "==0.31.0"
--- a/safekeeper/src/control_file_upgrade.rs
+++ b/safekeeper/src/control_file_upgrade.rs
@@ -40,7 +40,7 @@ struct SafeKeeperStateV1 {
    wal_start_lsn: Lsn,
 }

-#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
 pub struct ServerInfoV2 {
    /// Postgres server version
    pub pg_version: u32,
@@ -70,7 +70,7 @@ pub struct SafeKeeperStateV2 {
    pub wal_start_lsn: Lsn,
 }

-#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
 pub struct ServerInfoV3 {
    /// Postgres server version
    pub pg_version: u32,
--- a/safekeeper/src/handler.rs
+++ b/safekeeper/src/handler.rs
@@ -9,7 +9,7 @@ use crate::timeline::{Timeline, TimelineTools};
 use crate::SafeKeeperConf;
 use anyhow::{bail, Context, Result};

-use postgres_ffi::PG_TLI;
+use postgres_ffi::xlog_utils::PG_TLI;
 use regex::Regex;
 use std::str::FromStr;
 use std::sync::Arc;
@@ -90,10 +90,7 @@ impl postgres_backend::Handler for SafekeeperPostgresHandler {
    fn process_query(&mut self, pgb: &mut PostgresBackend, query_string: &str) -> Result<()> {
        let cmd = parse_cmd(query_string)?;

-        info!(
-            "got query {:?} in timeline {:?}",
-            query_string, self.ztimelineid
-        );
+        info!("got query {:?}", query_string);

        let create = !(matches!(cmd, SafekeeperPostgresCommand::StartReplication { .. })
            || matches!(cmd, SafekeeperPostgresCommand::IdentifySystem));
@@ -109,17 +106,23 @@ impl postgres_backend::Handler for SafekeeperPostgresHandler {
        }

        match cmd {
-            SafekeeperPostgresCommand::StartWalPush => ReceiveWalConn::new(pgb)
-                .run(self)
-                .context("failed to run ReceiveWalConn"),
-            SafekeeperPostgresCommand::StartReplication { start_lsn } => ReplicationConn::new(pgb)
-                .run(self, pgb, start_lsn)
-                .context("failed to run ReplicationConn"),
-            SafekeeperPostgresCommand::IdentifySystem => self.handle_identify_system(pgb),
-            SafekeeperPostgresCommand::JSONCtrl { ref cmd } => handle_json_ctrl(self, pgb, cmd),
+            SafekeeperPostgresCommand::StartWalPush => {
+                ReceiveWalConn::new(pgb)
+                    .run(self)
+                    .context("failed to run ReceiveWalConn")?;
+            }
+            SafekeeperPostgresCommand::StartReplication { start_lsn } => {
+                ReplicationConn::new(pgb)
+                    .run(self, pgb, start_lsn)
+                    .context("failed to run ReplicationConn")?;
+            }
+            SafekeeperPostgresCommand::IdentifySystem => {
+                self.handle_identify_system(pgb)?;
+            }
+            SafekeeperPostgresCommand::JSONCtrl { ref cmd } => {
+                handle_json_ctrl(self, pgb, cmd)?;
+            }
        }
-        .context(format!("timeline {timelineid}"))?;
-
        Ok(())
    }
 }
@@ -150,15 +153,8 @@ impl SafekeeperPostgresHandler {
    /// Handle IDENTIFY_SYSTEM replication command
    ///
    fn handle_identify_system(&mut self, pgb: &mut PostgresBackend) -> Result<()> {
-        let lsn = if self.is_walproposer_recovery() {
-            // walproposer should get all local WAL until flush_lsn
-            self.timeline.get().get_end_of_wal()
-        } else {
-            // other clients shouldn't get any uncommitted WAL
-            self.timeline.get().get_state().0.commit_lsn
-        }
-        .to_string();
-
+        let start_pos = self.timeline.get().get_end_of_wal();
+        let lsn = start_pos.to_string();
        let sysid = self
            .timeline
            .get()
@@ -207,11 +203,4 @@ impl SafekeeperPostgresHandler {
        .write_message(&BeMessage::CommandComplete(b"IDENTIFY_SYSTEM"))?;
        Ok(())
    }
-
-    /// Returns true if current connection is a replication connection, originating
-    /// from a walproposer recovery function. This connection gets a special handling:
-    /// safekeeper must stream all local WAL till the flush_lsn, whether committed or not.
-    pub fn is_walproposer_recovery(&self) -> bool {
-        self.appname == Some("wal_proposer_recovery".to_string())
-    }
 }
--- a/safekeeper/src/json_ctrl.rs
+++ b/safekeeper/src/json_ctrl.rs
@@ -7,7 +7,8 @@
 //!

 use anyhow::Result;
-use bytes::Bytes;
+use bytes::{BufMut, Bytes, BytesMut};
+use crc32c::crc32c_append;
 use serde::{Deserialize, Serialize};
 use tracing::*;

@@ -18,8 +19,9 @@ use crate::safekeeper::{
 };
 use crate::safekeeper::{SafeKeeperState, Term, TermHistory, TermSwitchEntry};
 use crate::timeline::TimelineTools;
-use postgres_ffi::v14::pg_constants;
-use postgres_ffi::v14::xlog_utils;
+use postgres_ffi::pg_constants;
+use postgres_ffi::xlog_utils;
+use postgres_ffi::{uint32, uint64, Oid, XLogRecord};
 use utils::{
    lsn::Lsn,
    postgres_backend::PostgresBackend,
@@ -142,7 +144,7 @@ fn append_logical_message(
    spg: &mut SafekeeperPostgresHandler,
    msg: &AppendLogicalMessage,
 ) -> Result<InsertedWAL> {
-    let wal_data = xlog_utils::encode_logical_message(&msg.lm_prefix, &msg.lm_message);
+    let wal_data = encode_logical_message(&msg.lm_prefix, &msg.lm_message);
    let sk_state = spg.timeline.get().get_state().1;

    let begin_lsn = msg.begin_lsn;
@@ -180,3 +182,90 @@ fn append_logical_message(
        append_response,
    })
 }
+
+#[repr(C)]
+#[derive(Debug, Clone, Default, Serialize, Deserialize)]
+struct XlLogicalMessage {
+    db_id: Oid,
+    transactional: uint32, // bool, takes 4 bytes due to alignment in C structures
+    prefix_size: uint64,
+    message_size: uint64,
+}
+
+impl XlLogicalMessage {
+    pub fn encode(&self) -> Bytes {
+        use utils::bin_ser::LeSer;
+        self.ser().unwrap().into()
+    }
+}
+
+/// Create new WAL record for non-transactional logical message.
+/// Used for creating artificial WAL for tests, as LogicalMessage
+/// record is basically no-op.
+fn encode_logical_message(prefix: &str, message: &str) -> Vec<u8> {
+    let mut prefix_bytes = BytesMut::with_capacity(prefix.len() + 1);
+    prefix_bytes.put(prefix.as_bytes());
+    prefix_bytes.put_u8(0);
+
+    let message_bytes = message.as_bytes();
+
+    let logical_message = XlLogicalMessage {
+        db_id: 0,
+        transactional: 0,
+        prefix_size: prefix_bytes.len() as u64,
+        message_size: message_bytes.len() as u64,
+    };
+
+    let mainrdata = logical_message.encode();
+    let mainrdata_len: usize = mainrdata.len() + prefix_bytes.len() + message_bytes.len();
+    // only short mainrdata is supported for now
+    assert!(mainrdata_len <= 255);
+    let mainrdata_len = mainrdata_len as u8;
+
+    let mut data: Vec<u8> = vec![pg_constants::XLR_BLOCK_ID_DATA_SHORT, mainrdata_len];
+    data.extend_from_slice(&mainrdata);
+    data.extend_from_slice(&prefix_bytes);
+    data.extend_from_slice(message_bytes);
+
+    let total_len = xlog_utils::XLOG_SIZE_OF_XLOG_RECORD + data.len();
+
+    let mut header = XLogRecord {
+        xl_tot_len: total_len as u32,
+        xl_xid: 0,
+        xl_prev: 0,
+        xl_info: 0,
+        xl_rmid: 21,
+        __bindgen_padding_0: [0u8; 2usize],
+        xl_crc: 0, // crc will be calculated later
+    };
+
+    let header_bytes = header.encode().expect("failed to encode header");
+    let crc = crc32c_append(0, &data);
+    let crc = crc32c_append(crc, &header_bytes[0..xlog_utils::XLOG_RECORD_CRC_OFFS]);
+    header.xl_crc = crc;
+
+    let mut wal: Vec<u8> = Vec::new();
+    wal.extend_from_slice(&header.encode().expect("failed to encode header"));
+    wal.extend_from_slice(&data);
+
+    // WAL start position must be aligned at 8 bytes,
+    // this will add padding for the next WAL record.
+    const PADDING: usize = 8;
+    let padding_rem = wal.len() % PADDING;
+    if padding_rem != 0 {
+        wal.resize(wal.len() + PADDING - padding_rem, 0);
+    }
+
+    wal
+}
+
+#[test]
+fn test_encode_logical_message() {
+    let expected = [
+        64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 0, 0, 170, 34, 166, 227, 255, 38,
+        0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 112, 114, 101, 102,
+        105, 120, 0, 109, 101, 115, 115, 97, 103, 101,
+    ];
+    let actual = encode_logical_message("prefix", "message");
+    assert_eq!(expected, actual[..]);
+}
--- a/safekeeper/src/metrics.rs
+++ b/safekeeper/src/metrics.rs
@@ -7,7 +7,7 @@ use metrics::{
    proto::MetricFamily,
    Gauge, IntGaugeVec,
 };
-use postgres_ffi::v14::xlog_utils::XLogSegNo;
+use postgres_ffi::xlog_utils::XLogSegNo;
 use utils::{lsn::Lsn, zid::ZTenantTimelineId};

 use crate::{
--- a/safekeeper/src/safekeeper.rs
+++ b/safekeeper/src/safekeeper.rs
@@ -5,7 +5,9 @@ use byteorder::{LittleEndian, ReadBytesExt};
 use bytes::{Buf, BufMut, Bytes, BytesMut};

 use etcd_broker::subscription_value::SkTimelineInfo;
-use postgres_ffi::v14::xlog_utils::{TimeLineID, XLogSegNo, MAX_SEND_SIZE};
+use postgres_ffi::xlog_utils::TimeLineID;
+
+use postgres_ffi::xlog_utils::XLogSegNo;
 use serde::{Deserialize, Serialize};
 use std::cmp::max;
 use std::cmp::min;
@@ -17,6 +19,7 @@ use crate::control_file;
 use crate::send_wal::HotStandbyFeedback;

 use crate::wal_storage;
+use postgres_ffi::xlog_utils::MAX_SEND_SIZE;
 use utils::{
    bin_ser::LeSer,
    lsn::Lsn,
@@ -124,7 +127,7 @@ impl AcceptorState {

 /// Information about Postgres. Safekeeper gets it once and then verifies
 /// all further connections from computes match.
-#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
 pub struct ServerInfo {
    /// Postgres server version
    pub pg_version: u32,
--- a/safekeeper/src/send_wal.rs
+++ b/safekeeper/src/send_wal.rs
@@ -6,7 +6,7 @@ use crate::timeline::{ReplicaState, Timeline, TimelineTools};
 use crate::wal_storage::WalReader;
 use anyhow::{bail, Context, Result};

-use postgres_ffi::v14::xlog_utils::{get_current_timestamp, TimestampTz, MAX_SEND_SIZE};
+use postgres_ffi::xlog_utils::{get_current_timestamp, TimestampTz, MAX_SEND_SIZE};

 use bytes::Bytes;
 use serde::{Deserialize, Serialize};
@@ -36,7 +36,7 @@ const NEON_STATUS_UPDATE_TAG_BYTE: u8 = b'z';
 type FullTransactionId = u64;

 /// Hot standby feedback received from replica
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
 pub struct HotStandbyFeedback {
    pub ts: TimestampTz,
    pub xmin: FullTransactionId,
@@ -170,7 +170,6 @@ impl ReplicationConn {
        // spawn the background thread which receives HotStandbyFeedback messages.
        let bg_timeline = Arc::clone(spg.timeline.get());
        let bg_stream_in = self.stream_in.take().unwrap();
-        let bg_timeline_id = spg.ztimelineid.unwrap();

        let state = ReplicaState::new();
        // This replica_id is used below to check if it's time to stop replication.
@@ -189,8 +188,6 @@ impl ReplicationConn {
        let _ = thread::Builder::new()
            .name("HotStandbyFeedback thread".into())
            .spawn(move || {
-                let _enter =
-                    info_span!("HotStandbyFeedback thread", timeline = %bg_timeline_id).entered();
                if let Err(err) = Self::background_thread(bg_stream_in, bg_replica_guard) {
                    error!("Replication background thread failed: {}", err);
                }
@@ -201,12 +198,13 @@ impl ReplicationConn {
            .build()?;

        runtime.block_on(async move {
-            let (inmem_state, persisted_state) = spg.timeline.get().get_state();
+            let (_, persisted_state) = spg.timeline.get().get_state();
            // add persisted_state.timeline_start_lsn == Lsn(0) check
            if persisted_state.server.wal_seg_size == 0 {
                bail!("Cannot start replication before connecting to walproposer");
            }

+            let wal_end = spg.timeline.get().get_end_of_wal();
            // Walproposer gets special handling: safekeeper must give proposer all
            // local WAL till the end, whether committed or not (walproposer will
            // hang otherwise). That's because walproposer runs the consensus and
@@ -216,8 +214,8 @@ impl ReplicationConn {
            // another compute rises which collects majority and starts fixing log
            // on this safekeeper itself. That's ok as (old) proposer will never be
            // able to commit such WAL.
-            let stop_pos: Option<Lsn> = if spg.is_walproposer_recovery() {
-                let wal_end = spg.timeline.get().get_end_of_wal();
+            let stop_pos: Option<Lsn> = if spg.appname == Some("wal_proposer_recovery".to_string())
+            {
                Some(wal_end)
            } else {
                None
@@ -228,7 +226,7 @@ impl ReplicationConn {
            // switch to copy
            pgb.write_message(&BeMessage::CopyBothResponse)?;

-            let mut end_pos = stop_pos.unwrap_or(inmem_state.commit_lsn);
+            let mut end_pos = Lsn(0);

            let mut wal_reader = WalReader::new(
                spg.conf.timeline_dir(&spg.timeline.get().zttid),
--- a/safekeeper/src/timeline.rs
+++ b/safekeeper/src/timeline.rs
@@ -4,9 +4,8 @@
 use anyhow::{bail, Context, Result};

 use etcd_broker::subscription_value::SkTimelineInfo;
-
 use once_cell::sync::Lazy;
-use postgres_ffi::v14::xlog_utils::XLogSegNo;
+use postgres_ffi::xlog_utils::XLogSegNo;

 use serde::Serialize;
 use tokio::sync::watch;
--- a/safekeeper/src/wal_backup.rs
+++ b/safekeeper/src/wal_backup.rs
@@ -11,8 +11,7 @@ use std::pin::Pin;
 use std::sync::Arc;
 use std::time::Duration;

-use postgres_ffi::v14::xlog_utils::{XLogFileName, XLogSegNo, XLogSegNoOffsetToRecPtr};
-use postgres_ffi::PG_TLI;
+use postgres_ffi::xlog_utils::{XLogFileName, XLogSegNo, XLogSegNoOffsetToRecPtr, PG_TLI};
 use remote_storage::{GenericRemoteStorage, RemoteStorage};
 use tokio::fs::File;
 use tokio::runtime::Builder;
--- a/safekeeper/src/wal_storage.rs
+++ b/safekeeper/src/wal_storage.rs
@@ -13,10 +13,9 @@ use std::pin::Pin;
 use tokio::io::AsyncRead;

 use once_cell::sync::Lazy;
-use postgres_ffi::v14::xlog_utils::{
-    find_end_of_wal, IsPartialXLogFileName, IsXLogFileName, XLogFromFileName, XLogSegNo,
+use postgres_ffi::xlog_utils::{
+    find_end_of_wal, IsPartialXLogFileName, IsXLogFileName, XLogFromFileName, XLogSegNo, PG_TLI,
 };
-use postgres_ffi::PG_TLI;
 use std::cmp::min;

 use std::fs::{self, remove_file, File, OpenOptions};
@@ -31,10 +30,9 @@ use crate::safekeeper::SafeKeeperState;

 use crate::wal_backup::read_object;
 use crate::SafeKeeperConf;
-use postgres_ffi::v14::xlog_utils::XLogFileName;
-use postgres_ffi::XLOG_BLCKSZ;
+use postgres_ffi::xlog_utils::{XLogFileName, XLOG_BLCKSZ};

-use postgres_ffi::v14::waldecoder::WalStreamDecoder;
+use postgres_ffi::waldecoder::WalStreamDecoder;

 use metrics::{register_histogram_vec, Histogram, HistogramVec, DISK_WRITE_SECONDS_BUCKETS};

@@ -334,7 +332,7 @@ impl Storage for PhysicalStorage {
        self.write_lsn = if state.commit_lsn == Lsn(0) {
            Lsn(0)
        } else {
-            find_end_of_wal(&self.timeline_dir, wal_seg_size, state.commit_lsn)?
+            Lsn(find_end_of_wal(&self.timeline_dir, wal_seg_size, true, state.commit_lsn)?.0)
        };

        self.write_record_lsn = self.write_lsn;
--- a/setup.cfg
+++ b/setup.cfg
@@ -18,10 +18,6 @@ exclude = ^vendor/
 # some tests don't typecheck when this flag is set
 check_untyped_defs = false

-# Help mypy find imports when running against list of individual files.
-# Without this line it would behave differently when executed on the entire project.
-mypy_path = $MYPY_CONFIG_FILE_DIR:$MYPY_CONFIG_FILE_DIR/test_runner
-
 disallow_incomplete_defs = false
 disallow_untyped_calls = false
 disallow_untyped_decorators = false
--- a/test_runner/batch_others/test_bindings.py
+++ b/test_runner/batch_others/test_bindings.py
@@ -1,5 +0,0 @@
-from neon_dev_utils import sum_as_string
-
-
-def test_neon_dev_utils():
-    assert sum_as_string(2, 3) == "5"
--- a/test_runner/batch_others/test_pageserver_api.py
+++ b/test_runner/batch_others/test_pageserver_api.py
@@ -1,11 +1,7 @@
 from typing import Optional
 from uuid import uuid4, UUID
 import pytest
-import pathlib
-import os
-import subprocess
 from fixtures.utils import lsn_from_hex
-from fixtures.log_helper import log
 from fixtures.neon_fixtures import (
    DEFAULT_BRANCH_NAME,
    NeonEnv,
@@ -13,43 +9,16 @@ from fixtures.neon_fixtures import (
    NeonPageserverHttpClient,
    NeonPageserverApiException,
    wait_until,
-    neon_binpath,
-    pg_distrib_dir,
 )


-# test that we cannot override node id after init
-def test_pageserver_init_node_id(neon_simple_env: NeonEnv):
-    repo_dir = neon_simple_env.repo_dir
-    pageserver_config = repo_dir / 'pageserver.toml'
-    pageserver_bin = pathlib.Path(neon_binpath) / 'pageserver'
-    run_pageserver = lambda args: subprocess.run([str(pageserver_bin), '-D', str(repo_dir), *args],
-                                                 check=False,
-                                                 universal_newlines=True,
-                                                 stdout=subprocess.PIPE,
-                                                 stderr=subprocess.PIPE)
-
-    # remove initial config
-    pageserver_config.unlink()
-
-    bad_init = run_pageserver(['--init', '-c', f'pg_distrib_dir="{pg_distrib_dir}"'])
-    assert bad_init.returncode == 1, 'pageserver should not be able to init new config without the node id'
-    assert "missing id" in bad_init.stderr
-    assert not pageserver_config.exists(), 'config file should not be created after init error'
-
-    completed_init = run_pageserver(
-        ['--init', '-c', 'id = 12345', '-c', f'pg_distrib_dir="{pg_distrib_dir}"'])
-    assert completed_init.returncode == 0, 'pageserver should be able to create a new config with the node id given'
-    assert pageserver_config.exists(), 'config file should be created successfully'
-
-    bad_reinit = run_pageserver(
-        ['--init', '-c', 'id = 12345', '-c', f'pg_distrib_dir="{pg_distrib_dir}"'])
-    assert bad_reinit.returncode == 1, 'pageserver should not be able to init new config without the node id'
-    assert "already exists, cannot init it" in bad_reinit.stderr
-
-    bad_update = run_pageserver(['--update-config', '-c', 'id = 3'])
-    assert bad_update.returncode == 1, 'pageserver should not allow updating node id'
-    assert "has node id already, it cannot be overridden" in bad_update.stderr
+# test that we cannot override node id
+def test_pageserver_init_node_id(neon_env_builder: NeonEnvBuilder):
+    env = neon_env_builder.init()
+    with pytest.raises(
+            Exception,
+            match="node id can only be set during pageserver init and cannot be overridden"):
+        env.pageserver.start(overrides=['--pageserver-config-override=id=10'])


 def check_client(client: NeonPageserverHttpClient, initial_tenant: UUID):
--- a/test_runner/batch_others/test_pageserver_restart.py
+++ b/test_runner/batch_others/test_pageserver_restart.py
@@ -2,6 +2,16 @@ from fixtures.neon_fixtures import NeonEnvBuilder
 from fixtures.log_helper import log


+# Test that the pageserver fixture is implemented correctly, allowing quick restarts.
+# This is a regression test, see https://github.com/neondatabase/neon/issues/2247
+def test_fixture_restart(neon_env_builder: NeonEnvBuilder):
+    env = neon_env_builder.init_start()
+
+    for i in range(3):
+        env.pageserver.stop()
+        env.pageserver.start()
+
+
 # Test restarting page server, while safekeeper and compute node keep
 # running.
 def test_pageserver_restart(neon_env_builder: NeonEnvBuilder):
--- a/test_runner/batch_others/test_proxy.py
+++ b/test_runner/batch_others/test_proxy.py
@@ -1,5 +1,6 @@
 import pytest
-import psycopg2
+import json
+import base64


 def test_proxy_select_1(static_proxy):
@@ -12,14 +13,22 @@ def test_password_hack(static_proxy):
    static_proxy.safe_psql(f"create role {user} with login password '{password}'",
                           options='project=irrelevant')

-    # Note the format of `magic`!
-    magic = f"project=irrelevant;{password}"
+    def encode(s: str) -> str:
+        return base64.b64encode(s.encode('utf-8')).decode('utf-8')
+
+    magic = encode(json.dumps({
+        'project': 'irrelevant',
+        'password': password,
+    }))
+
    static_proxy.safe_psql('select 1', sslsni=0, user=user, password=magic)

-    # Must also check that invalid magic won't be accepted.
-    with pytest.raises(psycopg2.errors.OperationalError):
-        magic = "broken"
-        static_proxy.safe_psql('select 1', sslsni=0, user=user, password=magic)
+    magic = encode(json.dumps({
+        'project': 'irrelevant',
+        'password_': encode(password),
+    }))
+
+    static_proxy.safe_psql('select 1', sslsni=0, user=user, password=magic)


 # Pass extra options to the server.
--- a/test_runner/batch_others/test_setup.py
+++ b/test_runner/batch_others/test_setup.py
@@ -1,17 +0,0 @@
-"""Tests for the code in test fixtures"""
-
-from fixtures.neon_fixtures import NeonEnvBuilder
-
-
-# Test that pageserver and safekeeper can restart quickly.
-# This is a regression test, see https://github.com/neondatabase/neon/issues/2247
-def test_fixture_restart(neon_env_builder: NeonEnvBuilder):
-    env = neon_env_builder.init_start()
-
-    for i in range(3):
-        env.pageserver.stop()
-        env.pageserver.start()
-
-    for i in range(3):
-        env.safekeepers[0].stop()
-        env.safekeepers[0].start()
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Bojan Serafimov	0705c99fdb	Try larger sleep	2022-08-12 09:52:40 -04:00
Bojan Serafimov	21089d5217	Wait for pid death	2022-08-12 09:21:44 -04:00
Bojan Serafimov	bd33ea9fae	Add hacky solution	2022-08-12 09:05:51 -04:00
Bojan Serafimov	414279726d	Reproduce pageserver.pid lock on restart issue	2022-08-12 09:01:17 -04:00