Update docs/rfcs/017-user-management.md

Co-authored-by: Anna Stepanyan <stepa6ka@gmail.com>
User management RFC
2026-02-04 11:10:37 +00:00 · 2022-07-15 13:30:23 +03:00 · 2022-07-15 13:19:02 +03:00
345 changed files with 16743 additions and 31399 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -0,0 +1,369 @@
+version: 2.1
+
+executors:
+  neon-xlarge-executor:
+    resource_class: xlarge
+    docker:
+      # NB: when changed, do not forget to update rust image tag in all Dockerfiles
+      - image: neondatabase/rust:1.58
+  neon-executor:
+    docker:
+      - image: neondatabase/rust:1.58
+
+jobs:
+  # A job to build postgres
+  build-postgres:
+    executor: neon-xlarge-executor
+    parameters:
+      build_type:
+        type: enum
+        enum: ["debug", "release"]
+    environment:
+      BUILD_TYPE: << parameters.build_type >>
+    steps:
+        # Checkout the git repo (circleci doesn't have a flag to enable submodules here)
+      - checkout
+
+        # Grab the postgres git revision to build a cache key.
+        # Append makefile as it could change the way postgres is built.
+        # Note this works even though the submodule hasn't been checkout out yet.
+      - run:
+          name: Get postgres cache key
+          command: |
+              git rev-parse HEAD:vendor/postgres > /tmp/cache-key-postgres
+              cat Makefile >> /tmp/cache-key-postgres
+
+      - restore_cache:
+          name: Restore postgres cache
+          keys:
+            # Restore ONLY if the rev key matches exactly
+            - v05-postgres-cache-<< parameters.build_type >>-{{ checksum "/tmp/cache-key-postgres" }}
+
+        # Build postgres if the restore_cache didn't find a build.
+        # `make` can't figure out whether the cache is valid, since
+        # it only compares file timestamps.
+      - run:
+          name: build postgres
+          command: |
+            if [ ! -e tmp_install/bin/postgres ]; then
+              # "depth 1" saves some time by not cloning the whole repo
+              git submodule update --init --depth 1
+              # bail out on any warnings
+              COPT='-Werror' mold -run make postgres -j$(nproc)
+            fi
+
+      - save_cache:
+          name: Save postgres cache
+          key: v05-postgres-cache-<< parameters.build_type >>-{{ checksum "/tmp/cache-key-postgres" }}
+          paths:
+            - tmp_install
+
+  # A job to build Neon rust code
+  build-neon:
+    executor: neon-xlarge-executor
+    parameters:
+      build_type:
+        type: enum
+        enum: ["debug", "release"]
+    environment:
+      BUILD_TYPE: << parameters.build_type >>
+    steps:
+        # Checkout the git repo (without submodules)
+      - checkout
+
+        # Grab the postgres git revision to build a cache key.
+        # Append makefile as it could change the way postgres is built.
+        # Note this works even though the submodule hasn't been checkout out yet.
+      - run:
+          name: Get postgres cache key
+          command: |
+            git rev-parse HEAD:vendor/postgres > /tmp/cache-key-postgres
+            cat Makefile >> /tmp/cache-key-postgres
+
+
+      - restore_cache:
+          name: Restore postgres cache
+          keys:
+            # Restore ONLY if the rev key matches exactly
+            - v05-postgres-cache-<< parameters.build_type >>-{{ checksum "/tmp/cache-key-postgres" }}
+
+      - restore_cache:
+          name: Restore rust cache
+          keys:
+            # Require an exact match. While an out of date cache might speed up the build,
+            # there's no way to clean out old packages, so the cache grows every time something
+            # changes.
+            - v05-rust-cache-deps-<< parameters.build_type >>-{{ checksum "Cargo.lock" }}
+
+        # Build the rust code, including test binaries
+      - run:
+          name: Rust build << parameters.build_type >>
+          command: |
+            if [[ $BUILD_TYPE == "debug" ]]; then
+              CARGO_FLAGS=
+            elif [[ $BUILD_TYPE == "release" ]]; then
+              CARGO_FLAGS="--release --features profiling"
+            fi
+
+            export CARGO_INCREMENTAL=0
+            export CACHEPOT_BUCKET=zenith-rust-cachepot
+            export RUSTC_WRAPPER=""
+            export AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}"
+            export AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}"
+            mold -run cargo build $CARGO_FLAGS --features failpoints --bins --tests
+            cachepot -s
+
+      - save_cache:
+          name: Save rust cache
+          key: v05-rust-cache-deps-<< parameters.build_type >>-{{ checksum "Cargo.lock" }}
+          paths:
+            - ~/.cargo/registry
+            - ~/.cargo/git
+            - target
+
+        # Run rust unit tests
+      - run:
+          name: cargo test
+          command: |
+            if [[ $BUILD_TYPE == "debug" ]]; then
+              CARGO_FLAGS=
+            elif [[ $BUILD_TYPE == "release" ]]; then
+              CARGO_FLAGS=--release
+            fi
+
+            cargo test $CARGO_FLAGS
+
+        # Install the rust binaries, for use by test jobs
+      - run:
+          name: Install rust binaries
+          command: |
+            binaries=$(
+              cargo metadata --format-version=1 --no-deps |
+              jq -r '.packages[].targets[] | select(.kind | index("bin")) | .name'
+            )
+
+            mkdir -p /tmp/zenith/bin
+            mkdir -p /tmp/zenith/test_bin
+            mkdir -p /tmp/zenith/etc
+
+            # Install target binaries
+            for bin in $binaries; do
+              SRC=target/$BUILD_TYPE/$bin
+              DST=/tmp/zenith/bin/$bin
+              cp $SRC $DST
+            done
+
+        # Install the postgres binaries, for use by test jobs
+      - run:
+          name: Install postgres binaries
+          command: |
+            cp -a tmp_install /tmp/zenith/pg_install
+
+      # Save rust binaries for other jobs in the workflow
+      - persist_to_workspace:
+          root: /tmp/zenith
+          paths:
+            - "*"
+
+  check-codestyle-python:
+    executor: neon-executor
+    steps:
+      - checkout
+      - restore_cache:
+          keys:
+            - v2-python-deps-{{ checksum "poetry.lock" }}
+      - run:
+          name: Install deps
+          command: ./scripts/pysync
+      - save_cache:
+          key: v2-python-deps-{{ checksum "poetry.lock" }}
+          paths:
+            - /home/circleci/.cache/pypoetry/virtualenvs
+      - run:
+          name: Print versions
+          when: always
+          command: |
+              poetry run python --version
+              poetry show
+      - run:
+          name: Run yapf to ensure code format
+          when: always
+          command: poetry run yapf --recursive --diff .
+      - run:
+          name: Run mypy to check types
+          when: always
+          command: poetry run mypy .
+
+  run-pytest:
+    executor: neon-executor
+    parameters:
+      # pytest args to specify the tests to run.
+      #
+      # This can be a test file name, e.g. 'test_pgbench.py, or a subdirectory,
+      # or '-k foobar' to run tests containing string 'foobar'. See pytest man page
+      # section SPECIFYING TESTS / SELECTING TESTS for details.
+      #
+      # Select the type of Rust build. Must be "release" or "debug".
+      build_type:
+        type: string
+        default: "debug"
+      # This parameter is required, to prevent the mistake of running all tests in one job.
+      test_selection:
+        type: string
+        default: ""
+      # Arbitrary parameters to pytest. For example "-s" to prevent capturing stdout/stderr
+      extra_params:
+        type: string
+        default: ""
+      needs_postgres_source:
+        type: boolean
+        default: false
+      run_in_parallel:
+        type: boolean
+        default: true
+      save_perf_report:
+        type: boolean
+        default: false
+    environment:
+      BUILD_TYPE: << parameters.build_type >>
+    steps:
+      - attach_workspace:
+          at: /tmp/zenith
+      - checkout
+      - when:
+          condition: << parameters.needs_postgres_source >>
+          steps:
+            - run: git submodule update --init --depth 1
+      - restore_cache:
+          keys:
+            - v2-python-deps-{{ checksum "poetry.lock" }}
+      - run:
+          name: Install deps
+          command: ./scripts/pysync
+      - save_cache:
+          key: v2-python-deps-{{ checksum "poetry.lock" }}
+          paths:
+            - /home/circleci/.cache/pypoetry/virtualenvs
+      - run:
+          name: Run pytest
+          # pytest doesn't output test logs in real time, so CI job may fail with
+          # `Too long with no output` error, if a test is running for a long time.
+          # In that case, tests should have internal timeouts that are less than
+          # no_output_timeout, specified here.
+          no_output_timeout: 10m
+          environment:
+            - NEON_BIN: /tmp/zenith/bin
+            - POSTGRES_DISTRIB_DIR: /tmp/zenith/pg_install
+            - TEST_OUTPUT: /tmp/test_output
+            # this variable will be embedded in perf test report
+            # and is needed to distinguish different environments
+            - PLATFORM: zenith-local-ci
+          command: |
+            PERF_REPORT_DIR="$(realpath test_runner/perf-report-local)"
+            rm -rf $PERF_REPORT_DIR
+
+            TEST_SELECTION="test_runner/<< parameters.test_selection >>"
+            EXTRA_PARAMS="<< parameters.extra_params >>"
+            if [ -z "$TEST_SELECTION" ]; then
+              echo "test_selection must be set"
+              exit 1
+            fi
+            if << parameters.run_in_parallel >>; then
+              EXTRA_PARAMS="-n4 $EXTRA_PARAMS"
+            fi
+            if << parameters.save_perf_report >>; then
+              if [[ $CIRCLE_BRANCH == "main" ]]; then
+                mkdir -p "$PERF_REPORT_DIR"
+                EXTRA_PARAMS="--out-dir $PERF_REPORT_DIR $EXTRA_PARAMS"
+              fi
+            fi
+
+            export GITHUB_SHA=$CIRCLE_SHA1
+
+            # Run the tests.
+            #
+            # The junit.xml file allows CircleCI to display more fine-grained test information
+            # in its "Tests" tab in the results page.
+            # --verbose prints name of each test (helpful when there are
+            # multiple tests in one file)
+            # -rA prints summary in the end
+            # -n4 uses four processes to run tests via pytest-xdist
+            # -s is not used to prevent pytest from capturing output, because tests are running
+            # in parallel and logs are mixed between different tests
+            ./scripts/pytest \
+              --junitxml=$TEST_OUTPUT/junit.xml \
+              --tb=short \
+              --verbose \
+              -m "not remote_cluster" \
+              -rA $TEST_SELECTION $EXTRA_PARAMS
+
+            if << parameters.save_perf_report >>; then
+              if [[ $CIRCLE_BRANCH == "main" ]]; then
+                export REPORT_FROM="$PERF_REPORT_DIR"
+                export REPORT_TO=local
+                scripts/generate_and_push_perf_report.sh
+              fi
+            fi
+      - run:
+          # CircleCI artifacts are preserved one file at a time, so skipping
+          # this step isn't a good idea. If you want to extract the
+          # pageserver state, perhaps a tarball would be a better idea.
+          name: Delete all data but logs
+          when: always
+          command: |
+            du -sh /tmp/test_output/*
+            find /tmp/test_output -type f ! -name "*.log" ! -name "regression.diffs" ! -name "junit.xml" ! -name "*.filediff" ! -name "*.stdout" ! -name "*.stderr" ! -name "flamegraph.svg" ! -name "*.metrics" -delete
+            du -sh /tmp/test_output/*
+      - store_artifacts:
+          path: /tmp/test_output
+      # The store_test_results step tells CircleCI where to find the junit.xml file.
+      - store_test_results:
+          path: /tmp/test_output
+      # Save data (if any)
+      - persist_to_workspace:
+          root: /tmp/zenith
+          paths:
+            - "*"
+
+workflows:
+  build_and_test:
+    jobs:
+      - check-codestyle-python
+      - build-postgres:
+          name: build-postgres-<< matrix.build_type >>
+          matrix:
+            parameters:
+              build_type: ["debug", "release"]
+      - build-neon:
+          name: build-neon-<< matrix.build_type >>
+          matrix:
+            parameters:
+              build_type: ["debug", "release"]
+          requires:
+            - build-postgres-<< matrix.build_type >>
+      - run-pytest:
+          name: pg_regress-tests-<< matrix.build_type >>
+          matrix:
+            parameters:
+              build_type: ["debug", "release"]
+          test_selection: batch_pg_regress
+          needs_postgres_source: true
+          requires:
+            - build-neon-<< matrix.build_type >>
+      - run-pytest:
+          name: other-tests-<< matrix.build_type >>
+          matrix:
+            parameters:
+              build_type: ["debug", "release"]
+          test_selection: batch_others
+          requires:
+            - build-neon-<< matrix.build_type >>
+      - run-pytest:
+          name: benchmarks
+          context: PERF_TEST_RESULT_CONNSTR
+          build_type: release
+          test_selection: performance
+          run_in_parallel: false
+          save_perf_report: true
+          requires:
+            - build-neon-release
--- a/.dockerignore
+++ b/.dockerignore
@@ -1,20 +1,18 @@
-*
+**/.git/
+**/__pycache__
+**/.pytest_cache

-!rust-toolchain.toml
-!Cargo.toml
-!Cargo.lock
-!Makefile
+.git
+target
+tmp_check
+tmp_install
+tmp_check_cli
+test_output
+.vscode
+.neon
+integration_tests/.neon
+.mypy_cache
+
+Dockerfile
+.dockerignore

-!.cargo/
-!.config/
-!control_plane/
-!compute_tools/
-!libs/
-!pageserver/
-!pgxn/
-!proxy/
-!safekeeper/
-!vendor/postgres-v14/
-!vendor/postgres-v15/
-!workspace_hack/
-!neon_local/
--- a/.git-blame-ignore-revs
+++ b/.git-blame-ignore-revs
@@ -1 +0,0 @@
-4c2bb43775947775401cbb9d774823c5723a91f8
--- a/.github/ISSUE_TEMPLATE/bug-template.md
+++ b/.github/ISSUE_TEMPLATE/bug-template.md
@@ -1,23 +0,0 @@
---
-name: Bug Template
-about: Used for describing bugs
-title: ''
-labels: t/bug
-assignees: ''
-
---
-
-## Steps to reproduce
-
-
-## Expected result
-
-
-## Actual result
-
-
-## Environment
-
-
-## Logs, links
- 
--- a/.github/ISSUE_TEMPLATE/epic-template.md
+++ b/.github/ISSUE_TEMPLATE/epic-template.md
@@ -1,25 +0,0 @@
---
-name: Epic Template
-about: A set of related tasks contributing towards specific outcome, comprising of
-  more than 1 week of work.
-title: 'Epic: '
-labels: t/Epic
-assignees: ''
-
---
-
-## Motivation
-
-
-## DoD
-
-
-## Implementation ideas
-
-
-## Tasks
- [ ]
-
-
-## Other related tasks and Epics
- 
--- a/.github/PULL_REQUEST_TEMPLATE/release-pr.md
+++ b/.github/PULL_REQUEST_TEMPLATE/release-pr.md
@@ -1,20 +0,0 @@
-## Release 202Y-MM-DD
-
-**NB: this PR must be merged only by 'Create a merge commit'!**
-
-### Checklist when preparing for release
- [ ] Read or refresh [the release flow guide](https://github.com/neondatabase/cloud/wiki/Release:-general-flow)
- [ ] Ask in the [cloud Slack channel](https://neondb.slack.com/archives/C033A2WE6BZ) that you are going to rollout the release. Any blockers?
- [ ] Does this release contain any db migrations? Destructive ones? What is the rollback plan?
-
-<!-- List everything that should be done **before** release, any issues / setting changes / etc -->
-
-### Checklist after release
- [ ] Based on the merged commits write release notes and open a PR into `website` repo ([example](https://github.com/neondatabase/website/pull/120/files))
- [ ] Check [#dev-production-stream](https://neondb.slack.com/archives/C03F5SM1N02) Slack channel
- [ ] Check [stuck projects page](https://console.neon.tech/admin/projects?sort=last_active&order=desc&stuck=true)
- [ ] Check [recent operation failures](https://console.neon.tech/admin/operations?action=create_timeline%2Cstart_compute%2Cstop_compute%2Csuspend_compute%2Capply_config%2Cdelete_timeline%2Cdelete_tenant%2Ccreate_branch%2Ccheck_availability&sort=updated_at&order=desc&had_retries=some)
- [ ] Check [cloud SLO dashboard](https://observer.zenith.tech/d/_oWcBMJ7k/cloud-slos?orgId=1)
- [ ] Check [compute startup metrics dashboard](https://observer.zenith.tech/d/5OkYJEmVz/compute-startup-time)
-
-<!-- List everything that should be done **after** release, any admin UI configuration / Grafana dashboard / alert changes / setting changes / etc -->
--- a/.github/actions/allure-report/action.yml
+++ b/.github/actions/allure-report/action.yml
@@ -1,221 +0,0 @@
-name: 'Create Allure report'
-description: 'Create and publish Allure report'
-
-inputs:
-  action:
-    desctiption: 'generate or store'
-    required: true
-  build_type:
-    description: '`build_type` from run-python-test-set action'
-    required: true
-  test_selection:
-    description: '`test_selector` from run-python-test-set action'
-    required: false
-outputs:
-  report-url:
-    description: 'Allure report URL'
-    value: ${{ steps.generate-report.outputs.report-url }}
-
-runs:
-  using: "composite"
-  steps:
-    - name: Validate input parameters
-      shell: bash -euxo pipefail {0}
-      run: |
-        if [ "${{ inputs.action }}" != "store" ] && [ "${{ inputs.action }}" != "generate" ]; then
-          echo 2>&1 "Unknown inputs.action type '${{ inputs.action }}'; allowed 'generate' or 'store' only"
-          exit 1
-        fi
-
-        if [ -z "${{ inputs.test_selection }}" ] && [ "${{ inputs.action }}" == "store" ]; then
-          echo 2>&1 "inputs.test_selection must be set for 'store' action"
-          exit 2
-        fi
-
-    - name: Calculate key
-      id: calculate-key
-      shell: bash -euxo pipefail {0}
-      run: |
-        # TODO: for manually triggered workflows (via workflow_dispatch) we need to have a separate key
-
-        pr_number=$(jq --raw-output .pull_request.number "$GITHUB_EVENT_PATH" || true)
-        if [ "${pr_number}" != "null" ]; then
-          key=pr-${pr_number}
-        elif [ "${GITHUB_REF}" = "refs/heads/main" ]; then
-          # Shortcut for a special branch
-          key=main
-        else
-          key=branch-$(echo ${GITHUB_REF#refs/heads/} | tr -c "[:alnum:]._-" "-")
-        fi
-        echo "::set-output name=KEY::${key}"
-
-    - uses: actions/setup-java@v3
-      if: ${{ inputs.action == 'generate' }}
-      with:
-        distribution: 'temurin'
-        java-version: '17'
-
-    - name: Install Allure
-      if: ${{ inputs.action == 'generate' }}
-      shell: bash -euxo pipefail {0}
-      run: |
-        if ! which allure; then
-          ALLURE_ZIP=allure-${ALLURE_VERSION}.zip
-          wget -q https://github.com/allure-framework/allure2/releases/download/${ALLURE_VERSION}/${ALLURE_ZIP}
-          echo "${ALLURE_ZIP_MD5}  ${ALLURE_ZIP}" | md5sum -c
-          unzip -q ${ALLURE_ZIP}
-          echo "$(pwd)/allure-${ALLURE_VERSION}/bin" >> $GITHUB_PATH
-          rm -f ${ALLURE_ZIP}
-        fi
-      env:
-        ALLURE_VERSION: 2.19.0
-        ALLURE_ZIP_MD5: ced21401a1a8b9dfb68cee9e4c210464
-
-    - name: Upload Allure results
-      if: ${{ inputs.action == 'store' }}
-      env:
-        REPORT_PREFIX: reports/${{ steps.calculate-key.outputs.KEY }}/${{ inputs.build_type }}
-        RAW_PREFIX: reports-raw/${{ steps.calculate-key.outputs.KEY }}/${{ inputs.build_type }}
-        TEST_OUTPUT: /tmp/test_output
-        BUCKET: neon-github-public-dev
-      shell: bash -euxo pipefail {0}
-      run: |
-        # Add metadata
-        cat <<EOF > $TEST_OUTPUT/allure/results/executor.json
-          {
-            "name": "GitHub Actions",
-            "type": "github",
-            "url": "https://${BUCKET}.s3.amazonaws.com/${REPORT_PREFIX}/latest/index.html",
-            "buildOrder": ${GITHUB_RUN_ID},
-            "buildName": "GitHub Actions Run #${{ github.run_number }}/${GITHUB_RUN_ATTEMPT}",
-            "buildUrl": "${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}/attempts/${GITHUB_RUN_ATTEMPT}",
-            "reportUrl": "https://${BUCKET}.s3.amazonaws.com/${REPORT_PREFIX}/${GITHUB_RUN_ID}/index.html",
-            "reportName": "Allure Report"
-          }
-        EOF
-        cat <<EOF > $TEST_OUTPUT/allure/results/environment.properties
-          TEST_SELECTION=${{ inputs.test_selection }}
-          BUILD_TYPE=${{ inputs.build_type }}
-        EOF
-
-        ARCHIVE="${GITHUB_RUN_ID}-${{ inputs.test_selection }}-${GITHUB_RUN_ATTEMPT}-$(date +%s).tar.zst"
-        ZSTD_NBTHREADS=0
-
-        tar -C ${TEST_OUTPUT}/allure/results -cf ${ARCHIVE} --zstd .
-        aws s3 mv --only-show-errors ${ARCHIVE} "s3://${BUCKET}/${RAW_PREFIX}/${ARCHIVE}"
-
-    # Potentially we could have several running build for the same key (for example for the main branch),  so we use improvised lock for this
-    - name: Acquire Allure lock
-      if: ${{ inputs.action == 'generate' }}
-      shell: bash -euxo pipefail {0}
-      env:
-        LOCK_FILE: reports/${{ steps.calculate-key.outputs.KEY }}/lock.txt
-        BUCKET: neon-github-public-dev
-      run: |
-        LOCK_TIMEOUT=300 # seconds
-
-        for _ in $(seq 1 5); do
-          for i in $(seq 1 ${LOCK_TIMEOUT}); do
-            LOCK_ADDED=$(aws s3api head-object --bucket neon-github-public-dev --key ${LOCK_FILE} | jq --raw-output '.LastModified' || true)
-            # `date --date="..."` is supported only by gnu date (i.e. it doesn't work on BSD/macOS)
-            if [ -z "${LOCK_ADDED}" ] || [ "$(( $(date +%s) - $(date --date="${LOCK_ADDED}" +%s) ))" -gt "${LOCK_TIMEOUT}" ]; then
-              break
-            fi
-            sleep 1
-          done
-          echo "${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}-${{ inputs.test_selection }}" > lock.txt
-          aws s3 mv --only-show-errors lock.txt "s3://${BUCKET}/${LOCK_FILE}"
-
-          # A double-check that exactly WE have acquired the lock
-          aws s3 cp --only-show-errors "s3://${BUCKET}/${LOCK_FILE}" ./lock.txt
-          if [ "$(cat lock.txt)" = "${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}-${{ inputs.test_selection }}" ]; then
-            break
-          fi
-        done
-
-    - name: Generate and publish final Allure report
-      if: ${{ inputs.action == 'generate' }}
-      id: generate-report
-      env:
-        REPORT_PREFIX: reports/${{ steps.calculate-key.outputs.KEY }}/${{ inputs.build_type }}
-        RAW_PREFIX: reports-raw/${{ steps.calculate-key.outputs.KEY }}/${{ inputs.build_type }}
-        TEST_OUTPUT: /tmp/test_output
-        BUCKET: neon-github-public-dev
-      shell: bash -euxo pipefail {0}
-      run: |
-        # Get previously uploaded data for this run
-        ZSTD_NBTHREADS=0
-
-        s3_filepaths=$(aws s3api list-objects-v2 --bucket ${BUCKET} --prefix ${RAW_PREFIX}/${GITHUB_RUN_ID}- | jq --raw-output  '.Contents[].Key')
-        if [ -z "$s3_filepaths" ]; then
-          # There's no previously uploaded data for this run
-          exit 0
-        fi
-        for s3_filepath in ${s3_filepaths}; do
-          aws s3 cp --only-show-errors "s3://${BUCKET}/${s3_filepath}" "${TEST_OUTPUT}/allure/"
-
-          archive=${TEST_OUTPUT}/allure/$(basename $s3_filepath)
-          mkdir -p ${archive%.tar.zst}
-          tar -xf ${archive} -C ${archive%.tar.zst}
-          rm -f ${archive}
-        done
-
-        # Get history trend
-        aws s3 cp --recursive --only-show-errors "s3://${BUCKET}/${REPORT_PREFIX}/latest/history" "${TEST_OUTPUT}/allure/latest/history" || true
-
-        # Generate report
-        allure generate --clean --output $TEST_OUTPUT/allure/report $TEST_OUTPUT/allure/*
-
-        # Replace a logo link with a redirect to the latest version of the report
-        sed -i 's|<a href="." class=|<a href="https://'${BUCKET}'.s3.amazonaws.com/'${REPORT_PREFIX}'/latest/index.html" class=|g' $TEST_OUTPUT/allure/report/app.js
-
-        # Upload a history and the final report (in this particular order to not to have duplicated history in 2 places)
-        aws s3 mv --recursive --only-show-errors "${TEST_OUTPUT}/allure/report/history" "s3://${BUCKET}/${REPORT_PREFIX}/latest/history"
-        aws s3 mv --recursive --only-show-errors "${TEST_OUTPUT}/allure/report" "s3://${BUCKET}/${REPORT_PREFIX}/${GITHUB_RUN_ID}"
-
-        REPORT_URL=https://${BUCKET}.s3.amazonaws.com/${REPORT_PREFIX}/${GITHUB_RUN_ID}/index.html
-
-        # Generate redirect
-        cat <<EOF > ./index.html
-          <!DOCTYPE html>
-
-          <meta charset="utf-8">
-          <title>Redirecting to ${REPORT_URL}</title>
-          <meta http-equiv="refresh" content="0; URL=${REPORT_URL}">
-        EOF
-        aws s3 cp --only-show-errors ./index.html "s3://${BUCKET}/${REPORT_PREFIX}/latest/index.html"
-
-        echo "[Allure Report](${REPORT_URL})" >> ${GITHUB_STEP_SUMMARY}
-        echo "::set-output name=report-url::${REPORT_URL}"
-
-    - name: Release Allure lock
-      if: ${{ inputs.action == 'generate' && always() }}
-      shell: bash -euxo pipefail {0}
-      env:
-        LOCK_FILE: reports/${{ steps.calculate-key.outputs.KEY }}/lock.txt
-        BUCKET: neon-github-public-dev
-      run: |
-        aws s3 cp --only-show-errors "s3://${BUCKET}/${LOCK_FILE}" ./lock.txt || exit 0
-
-        if [ "$(cat lock.txt)" = "${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}-${{ inputs.test_selection }}" ]; then
-          aws s3 rm "s3://${BUCKET}/${LOCK_FILE}"
-        fi
-
-    - uses: actions/github-script@v6
-      if: ${{ inputs.action == 'generate' && always() }}
-      env:
-        REPORT_URL: ${{ steps.generate-report.outputs.report-url }}
-        BUILD_TYPE: ${{ inputs.build_type }}
-        SHA: ${{ github.event.pull_request.head.sha || github.sha }}
-      with:
-        script: |
-          const { REPORT_URL, BUILD_TYPE, SHA } = process.env
-
-          await github.rest.repos.createCommitStatus({
-            owner: context.repo.owner,
-            repo: context.repo.repo,
-            sha: `${SHA}`,
-            state: 'success',
-            target_url: `${REPORT_URL}`,
-            context: `Allure report / ${BUILD_TYPE}`,
-          })
--- a/.github/actions/download/action.yml
+++ b/.github/actions/download/action.yml
@@ -1,56 +0,0 @@
-name: "Download an artifact"
-description: "Custom download action"
-inputs:
-  name:
-    description: "Artifact name"
-    required: true
-  path:
-    description: "A directory to put artifact into"
-    default: "."
-    required: false
-  skip-if-does-not-exist:
-    description: "Allow to skip if file doesn't exist, fail otherwise"
-    default: false
-    required: false
-
-runs:
-  using: "composite"
-  steps:
-    - name: Download artifact
-      id: download-artifact
-      shell: bash -euxo pipefail {0}
-      env:
-        TARGET: ${{ inputs.path }}
-        ARCHIVE: /tmp/downloads/${{ inputs.name }}.tar.zst
-        SKIP_IF_DOES_NOT_EXIST: ${{ inputs.skip-if-does-not-exist }}
-      run: |
-        BUCKET=neon-github-public-dev
-        PREFIX=artifacts/${GITHUB_RUN_ID}
-        FILENAME=$(basename $ARCHIVE)
-
-        S3_KEY=$(aws s3api list-objects-v2 --bucket ${BUCKET} --prefix ${PREFIX} | jq -r '.Contents[].Key' | grep ${FILENAME} | sort --version-sort | tail -1 || true)
-        if [ -z "${S3_KEY}" ]; then
-          if [ "${SKIP_IF_DOES_NOT_EXIST}" = "true" ]; then
-            echo '::set-output name=SKIPPED::true'
-            exit 0
-          else
-            echo 2>&1 "Neither s3://${BUCKET}/${PREFIX}/${GITHUB_RUN_ATTEMPT}/${FILENAME} nor its version from previous attempts exist"
-            exit 1
-          fi
-        fi
-
-        echo '::set-output name=SKIPPED::false'
-
-        mkdir -p $(dirname $ARCHIVE)
-        time aws s3 cp --only-show-errors s3://${BUCKET}/${S3_KEY} ${ARCHIVE}
-
-    - name: Extract artifact
-      if: ${{ steps.download-artifact.outputs.SKIPPED == 'false' }}
-      shell: bash -euxo pipefail {0}
-      env:
-        TARGET: ${{ inputs.path }}
-        ARCHIVE: /tmp/downloads/${{ inputs.name }}.tar.zst
-      run: |
-        mkdir -p ${TARGET}
-        time tar -xf ${ARCHIVE} -C ${TARGET}
-        rm -f ${ARCHIVE}
--- a/.github/actions/run-python-test-set/action.yml
+++ b/.github/actions/run-python-test-set/action.yml
@@ -3,7 +3,10 @@ description: 'Runs a Neon python test set, performing all the required preparati

 inputs:
  build_type:
-    description: 'Type of Rust (neon) and C (postgres) builds. Must be "release" or "debug", or "remote" for the remote cluster'
+    description: 'Type of Rust (neon) and C (postgres) builds. Must be "release" or "debug".'
+    required: true
+  rust_toolchain:
+    description: 'Rust toolchain version to fetch the caches'
    required: true
  test_selection:
    description: 'A python test suite to run'
@@ -21,39 +24,25 @@ inputs:
    required: false
    default: 'true'
  save_perf_report:
-    description: 'Whether to upload the performance report, if true PERF_TEST_RESULT_CONNSTR env variable should be set'
+    description: 'Whether to upload the performance report'
    required: false
    default: 'false'
-  run_with_real_s3:
-    description: 'Whether to pass real s3 credentials to the test suite'
-    required: false
-    default: 'false'
-  real_s3_bucket:
-    description: 'Bucket name for real s3 tests'
-    required: false
-    default: ''
-  real_s3_region:
-    description: 'Region name for real s3 tests'
-    required: false
-    default: ''
-  real_s3_access_key_id:
-    description: 'Access key id'
-    required: false
-    default: ''
-  real_s3_secret_access_key:
-    description: 'Secret access key'
-    required: false
-    default: ''

 runs:
  using: "composite"
  steps:
-    - name: Get Neon artifact
-      if: inputs.build_type != 'remote'
-      uses: ./.github/actions/download
+    - name: Get Neon artifact for restoration
+      uses: actions/download-artifact@v3
      with:
-        name: neon-${{ runner.os }}-${{ inputs.build_type }}-artifact
-        path: /tmp/neon
+        name: neon-${{ runner.os }}-${{ inputs.build_type }}-${{ inputs.rust_toolchain }}-artifact
+        path: ./neon-artifact/
+
+    - name: Extract Neon artifact
+      shell: bash -ex {0}
+      run: |
+        mkdir -p /tmp/neon/
+        tar -xf ./neon-artifact/neon.tgz -C /tmp/neon/
+        rm -rf ./neon-artifact/

    - name: Checkout
      if: inputs.needs_postgres_source == 'true'
@@ -70,27 +59,19 @@ runs:
        key: v1-${{ runner.os }}-python-deps-${{ hashFiles('poetry.lock') }}

    - name: Install Python deps
-      shell: bash -euxo pipefail {0}
+      shell: bash -ex {0}
      run: ./scripts/pysync

    - name: Run pytest
      env:
        NEON_BIN: /tmp/neon/bin
+        POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
        TEST_OUTPUT: /tmp/test_output
-        BUILD_TYPE: ${{ inputs.build_type }}
-        AWS_ACCESS_KEY_ID: ${{ inputs.real_s3_access_key_id }}
-        AWS_SECRET_ACCESS_KEY: ${{ inputs.real_s3_secret_access_key }}
-      shell: bash -euxo pipefail {0}
+        # this variable will be embedded in perf test report
+        # and is needed to distinguish different environments
+        PLATFORM: github-actions-selfhosted
+      shell: bash -ex {0}
      run: |
-        # PLATFORM will be embedded in the perf test report
-        # and it is needed to distinguish different environments
-        export PLATFORM=${PLATFORM:-github-actions-selfhosted}
-        export POSTGRES_DISTRIB_DIR=${POSTGRES_DISTRIB_DIR:-/tmp/neon/pg_install/v14}
-
-        if [ "${BUILD_TYPE}" = "remote" ]; then
-          export REMOTE_ENV=1
-        fi
-
        PERF_REPORT_DIR="$(realpath test_runner/perf-report-local)"
        rm -rf $PERF_REPORT_DIR

@@ -103,14 +84,6 @@ runs:
        if [[ "${{ inputs.run_in_parallel }}" == "true" ]]; then
          EXTRA_PARAMS="-n4 $EXTRA_PARAMS"
        fi
-
-        if [[ "${{ inputs.run_with_real_s3 }}" == "true" ]]; then
-          echo "REAL S3 ENABLED"
-          export ENABLE_REAL_S3_REMOTE_STORAGE=nonempty
-          export REMOTE_STORAGE_S3_BUCKET=${{ inputs.real_s3_bucket }}
-          export REMOTE_STORAGE_S3_REGION=${{ inputs.real_s3_region }}
-        fi
-
        if [[ "${{ inputs.save_perf_report }}" == "true" ]]; then
          if [[ "$GITHUB_REF" == "refs/heads/main" ]]; then
            mkdir -p "$PERF_REPORT_DIR"
@@ -122,18 +95,11 @@ runs:
          cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
        elif [[ "${{ inputs.build_type }}" == "release" ]]; then
          cov_prefix=()
-        else
-          cov_prefix=()
-        fi
-
-        # Wake up the cluster if we use remote neon instance
-        if [ "${{ inputs.build_type }}" = "remote" ] && [ -n "${BENCHMARK_CONNSTR}" ]; then
-          ${POSTGRES_DISTRIB_DIR}/bin/psql ${BENCHMARK_CONNSTR} -c "SELECT version();"
        fi

        # Run the tests.
        #
-        # The junit.xml file allows CI tools to display more fine-grained test information
+        # The junit.xml file allows CircleCI to display more fine-grained test information
        # in its "Tests" tab in the results page.
        # --verbose prints name of each test (helpful when there are
        # multiple tests in one file)
@@ -141,26 +107,34 @@ runs:
        # -n4 uses four processes to run tests via pytest-xdist
        # -s is not used to prevent pytest from capturing output, because tests are running
        # in parallel and logs are mixed between different tests
-        mkdir -p $TEST_OUTPUT/allure/results
        "${cov_prefix[@]}" ./scripts/pytest \
          --junitxml=$TEST_OUTPUT/junit.xml \
-          --alluredir=$TEST_OUTPUT/allure/results \
          --tb=short \
          --verbose \
+          -m "not remote_cluster" \
          -rA $TEST_SELECTION $EXTRA_PARAMS

        if [[ "${{ inputs.save_perf_report }}" == "true" ]]; then
          if [[ "$GITHUB_REF" == "refs/heads/main" ]]; then
            export REPORT_FROM="$PERF_REPORT_DIR"
-            export REPORT_TO="$PLATFORM"
+            export REPORT_TO=local
            scripts/generate_and_push_perf_report.sh
          fi
        fi

-    - name: Create Allure report
+    - name: Delete all data but logs
+      shell: bash -ex {0}
      if: always()
-      uses: ./.github/actions/allure-report
+      run: |
+        du -sh /tmp/test_output/*
+        find /tmp/test_output -type f ! -name "*.log" ! -name "regression.diffs" ! -name "junit.xml" ! -name "*.filediff" ! -name "*.stdout" ! -name "*.stderr" ! -name "flamegraph.svg" ! -name "*.metrics" -delete
+        du -sh /tmp/test_output/*
+
+    - name: Upload python test logs
+      if: always()
+      uses: actions/upload-artifact@v3
      with:
-        action: store
-        build_type: ${{ inputs.build_type }}
-        test_selection: ${{ inputs.test_selection }}
+        retention-days: 7
+        if-no-files-found: error
+        name: python-test-${{ inputs.test_selection }}-${{ runner.os }}-${{ inputs.build_type }}-${{ inputs.rust_toolchain }}-logs
+        path: /tmp/test_output/
--- a/.github/actions/save-coverage-data/action.yml
+++ b/.github/actions/save-coverage-data/action.yml
@@ -5,18 +5,13 @@ runs:
  using: "composite"
  steps:
    - name: Merge coverage data
-      shell: bash -euxo pipefail {0}
+      shell: bash -ex {0}
      run: scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage merge

-    - name: Download previous coverage data into the same directory
-      uses: ./.github/actions/download
-      with:
-        name: coverage-data-artifact
-        path: /tmp/coverage
-        skip-if-does-not-exist: true # skip if there's no previous coverage to download
-
    - name: Upload coverage data
-      uses: ./.github/actions/upload
+      uses: actions/upload-artifact@v3
      with:
+        retention-days: 7
+        if-no-files-found: error
        name: coverage-data-artifact
-        path: /tmp/coverage
+        path: /tmp/coverage/
--- a/.github/actions/upload/action.yml
+++ b/.github/actions/upload/action.yml
@@ -1,55 +0,0 @@
-name: "Upload an artifact"
-description: "Custom upload action"
-inputs:
-  name:
-    description: "Artifact name"
-    required: true
-  path:
-    description: "A directory or file to upload"
-    required: true
-
-runs:
-  using: "composite"
-  steps:
-    - name: Prepare artifact
-      shell: bash -euxo pipefail {0}
-      env:
-        SOURCE: ${{ inputs.path }}
-        ARCHIVE: /tmp/uploads/${{ inputs.name }}.tar.zst
-      run: |
-        mkdir -p $(dirname $ARCHIVE)
-
-        if [ -f ${ARCHIVE} ]; then
-          echo 2>&1 "File ${ARCHIVE} already exist. Something went wrong before"
-          exit 1
-        fi
-
-        ZSTD_NBTHREADS=0
-        if [ -d  ${SOURCE} ]; then
-          time tar -C ${SOURCE} -cf ${ARCHIVE} --zstd .
-        elif [ -f ${SOURCE} ]; then
-          time tar -cf ${ARCHIVE} --zstd ${SOURCE}
-        elif ! ls ${SOURCE} > /dev/null 2>&1; then
-          echo 2>&1 "${SOURCE} does not exist"
-          exit 2
-        else
-          echo 2>&1 "${SOURCE} is neither a directory nor a file, do not know how to handle it"
-          exit 3
-        fi
-
-    - name: Upload artifact
-      shell: bash -euxo pipefail {0}
-      env:
-        SOURCE: ${{ inputs.path }}
-        ARCHIVE: /tmp/uploads/${{ inputs.name }}.tar.zst
-      run: |
-        BUCKET=neon-github-public-dev
-        PREFIX=artifacts/${GITHUB_RUN_ID}
-        FILENAME=$(basename $ARCHIVE)
-
-        FILESIZE=$(du -sh ${ARCHIVE} | cut -f1)
-
-        time aws s3 mv --only-show-errors ${ARCHIVE} s3://${BUCKET}/${PREFIX}/${GITHUB_RUN_ATTEMPT}/${FILENAME}
-
-        # Ref https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#adding-a-job-summary
-        echo "[${FILENAME}](https://${BUCKET}.s3.amazonaws.com/${PREFIX}/${GITHUB_RUN_ATTEMPT}/${FILENAME}) ${FILESIZE}" >> ${GITHUB_STEP_SUMMARY}
--- a/.github/ansible/get_binaries.sh
+++ b/.github/ansible/get_binaries.sh
@@ -2,14 +2,30 @@

 set -e

-if [ -n "${DOCKER_TAG}" ]; then
-  # Verson is DOCKER_TAG but without prefix
-  VERSION=$(echo $DOCKER_TAG | sed 's/^.*-//g')
+RELEASE=${RELEASE:-false}
+
+# look at docker hub for latest tag for neon docker image
+if [ "${RELEASE}" = "true" ]; then
+    echo "search latest release tag"
+    VERSION=$(curl -s https://registry.hub.docker.com/v1/repositories/neondatabase/neon/tags |jq -r -S '.[].name' | grep release | sed 's/release-//g' | grep -E '^[0-9]+$' | sort -n | tail -1)
+    if [ -z "${VERSION}" ]; then
+        echo "no any docker tags found, exiting..."
+        exit 1
+    else
+        TAG="release-${VERSION}"
+    fi
 else
-  echo "Please set DOCKER_TAG environment variable"
-  exit 1
+    echo "search latest dev tag"
+    VERSION=$(curl -s https://registry.hub.docker.com/v1/repositories/neondatabase/neon/tags |jq -r -S '.[].name' | grep -E '^[0-9]+$' | sort -n | tail -1)
+    if [ -z "${VERSION}" ]; then
+        echo "no any docker tags found, exiting..."
+        exit 1
+    else
+        TAG="${VERSION}"
+    fi
 fi

+echo "found ${VERSION}"

 # do initial cleanup
 rm -rf neon_install postgres_install.tar.gz neon_install.tar.gz .neon_current_version
@@ -17,8 +33,8 @@ mkdir neon_install

 # retrieve binaries from docker image
 echo "getting binaries from docker image"
-docker pull --quiet neondatabase/neon:${DOCKER_TAG}
-ID=$(docker create neondatabase/neon:${DOCKER_TAG})
+docker pull --quiet neondatabase/neon:${TAG}
+ID=$(docker create neondatabase/neon:${TAG})
 docker cp ${ID}:/data/postgres_install.tar.gz .
 tar -xzf postgres_install.tar.gz -C neon_install
 docker cp ${ID}:/usr/local/bin/pageserver neon_install/bin/
--- a/.github/ansible/production.hosts
+++ b/.github/ansible/production.hosts
@@ -17,4 +17,4 @@ env_name = prod-1
 console_mgmt_base_url = http://console-release.local
 bucket_name           = zenith-storage-oregon
 bucket_region         = us-west-2
-etcd_endpoints        = zenith-1-etcd.local:2379
+etcd_endpoints        = etcd-release.local:2379
--- a/.github/ansible/scripts/init_safekeeper.sh
+++ b/.github/ansible/scripts/init_safekeeper.sh
@@ -1,8 +1,7 @@
 #!/bin/sh

-# fetch params from meta-data service
+# get instance id from meta-data service
 INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id)
-AZ_ID=$(curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone)

 # store fqdn hostname in var
 HOST=$(hostname -f)
@@ -13,10 +12,10 @@ cat <<EOF | tee /tmp/payload
  "version": 1,
  "host": "${HOST}",
  "port": 6500,
-  "http_port": 7676,
  "region_id": {{ console_region_id }},
  "instance_id": "${INSTANCE_ID}",
-  "availability_zone_id": "${AZ_ID}"
+  "http_host": "${HOST}",
+  "http_port": 7676
 }
 EOF

--- a/.github/ansible/staging.hosts
+++ b/.github/ansible/staging.hosts
@@ -17,4 +17,4 @@ env_name = us-stage
 console_mgmt_base_url = http://console-staging.local
 bucket_name           = zenith-staging-storage-us-east-1
 bucket_region         = us-east-1
-etcd_endpoints        = zenith-us-stage-etcd.local:2379
+etcd_endpoints        = etcd-staging.local:2379
--- a/.github/workflows/benchmarking.yml
+++ b/.github/workflows/benchmarking.yml
@@ -1,4 +1,4 @@
-name: Benchmarking
+name: benchmarking

 on:
  # uncomment to run on push for debugging your PR
@@ -11,19 +11,10 @@ on:
    #          │ │ ┌───────────── day of the month (1 - 31)
    #          │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
    #          │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
-    - cron:  '36 4 * * *' # run once a day, timezone is utc
+    - cron:  '36 7 * * *' # run once a day, timezone is utc

  workflow_dispatch: # adds ability to run this manually

-defaults:
-  run:
-    shell: bash -euxo pipefail {0}
-
-concurrency:
-  # Allow only one workflow per any non-`main` branch.
-  group: ${{ github.workflow }}-${{ github.ref }}-${{ github.ref == 'refs/heads/main' && github.sha || 'anysha' }}
-  cancel-in-progress: true
-
 jobs:
  bench:
    # this workflow runs on self hosteed runner
@@ -69,6 +60,7 @@ jobs:
    - name: Setup cluster
      env:
        BENCHMARK_CONNSTR: "${{ secrets.BENCHMARK_STAGING_CONNSTR }}"
+      shell: bash
      run: |
        set -e

@@ -104,9 +96,7 @@ jobs:
        # since it might generate duplicates when calling ingest_perf_test_result.py
        rm -rf perf-report-staging
        mkdir -p perf-report-staging
-        # Set --sparse-ordering option of pytest-order plugin to ensure tests are running in order of appears in the file,
-        # it's important for test_perf_pgbench.py::test_pgbench_remote_* tests
-        ./scripts/pytest test_runner/performance/ -v -m "remote_cluster" --sparse-ordering --out-dir perf-report-staging --timeout 5400
+        ./scripts/pytest test_runner/performance/ -v -m "remote_cluster" --skip-interfering-proc-check --out-dir perf-report-staging --timeout 3600

    - name: Submit result
      env:
@@ -114,113 +104,3 @@ jobs:
        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
      run: |
        REPORT_FROM=$(realpath perf-report-staging) REPORT_TO=staging scripts/generate_and_push_perf_report.sh
-
-    - name: Post to a Slack channel
-      if: ${{ github.event.schedule && failure() }}
-      uses: slackapi/slack-github-action@v1
-      with:
-        channel-id: "C033QLM5P7D" # dev-staging-stream
-        slack-message: "Periodic perf testing: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
-      env:
-        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
-
-  pgbench-compare:
-    env:
-      TEST_PG_BENCH_DURATIONS_MATRIX: "60m"
-      TEST_PG_BENCH_SCALES_MATRIX: "10gb"
-      POSTGRES_DISTRIB_DIR: /usr
-      TEST_OUTPUT: /tmp/test_output
-      BUILD_TYPE: remote
-
-    strategy:
-      fail-fast: false
-      matrix:
-        connstr: [ BENCHMARK_CAPTEST_CONNSTR, BENCHMARK_RDS_CONNSTR ]
-
-    runs-on: dev
-    container:
-      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rustlegacy:pinned
-      options: --init
-
-    timeout-minutes: 360 # 6h
-
-    steps:
-    - uses: actions/checkout@v3
-
-    - name: Calculate platform
-      id: calculate-platform
-      env:
-        CONNSTR: ${{ matrix.connstr }}
-      run: |
-        if [ "${CONNSTR}" = "BENCHMARK_CAPTEST_CONNSTR" ]; then
-          PLATFORM=neon-captest
-        elif [ "${CONNSTR}" = "BENCHMARK_RDS_CONNSTR" ]; then
-          PLATFORM=rds-aurora
-        else
-          echo 2>&1 "Unknown CONNSTR=${CONNSTR}. Allowed are BENCHMARK_CAPTEST_CONNSTR, and BENCHMARK_RDS_CONNSTR only"
-          exit 1
-        fi
-
-        echo "::set-output name=PLATFORM::${PLATFORM}"
-
-    - name: Install Deps
-      run: |
-        sudo apt -y update
-        sudo apt install -y postgresql-14
-
-    - name: Benchmark init
-      uses: ./.github/actions/run-python-test-set
-      with:
-        build_type: ${{ env.BUILD_TYPE }}
-        test_selection: performance
-        run_in_parallel: false
-        save_perf_report: true
-        extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_init
-      env:
-        PLATFORM: ${{ steps.calculate-platform.outputs.PLATFORM }}
-        BENCHMARK_CONNSTR: ${{ secrets[matrix.connstr] }}
-        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
-        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
-
-    - name: Benchmark simple-update
-      uses: ./.github/actions/run-python-test-set
-      with:
-        build_type: ${{ env.BUILD_TYPE }}
-        test_selection: performance
-        run_in_parallel: false
-        save_perf_report: true
-        extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_simple_update
-      env:
-        PLATFORM: ${{ steps.calculate-platform.outputs.PLATFORM }}
-        BENCHMARK_CONNSTR: ${{ secrets[matrix.connstr] }}
-        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
-        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
-
-    - name: Benchmark simple-update
-      uses: ./.github/actions/run-python-test-set
-      with:
-        build_type: ${{ env.BUILD_TYPE }}
-        test_selection: performance
-        run_in_parallel: false
-        save_perf_report: true
-        extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_select_only
-      env:
-        PLATFORM: ${{ steps.calculate-platform.outputs.PLATFORM }}
-        BENCHMARK_CONNSTR: ${{ secrets[matrix.connstr] }}
-        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
-        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
-
-    - name: Create Allure report
-      uses: ./.github/actions/allure-report
-      with:
-        action: generate
-        build_type: ${{ env.BUILD_TYPE }}
-
-    - name: Post to a Slack channel
-      if: ${{ github.event.schedule && failure() }}
-      uses: slackapi/slack-github-action@v1
-      with:
-        channel-id: "C033QLM5P7D" # dev-staging-stream
-        slack-message: "Periodic perf testing: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
-      env:
-        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -1,12 +1,15 @@
-name: Test and Deploy
+name: Test

 on:
  push:
    branches:
-      - main
-      - release
+    - main
  pull_request:

+defaults:
+  run:
+    shell: bash -ex {0}
+
 concurrency:
  # Allow only one workflow per any non-`main` branch.
  group: ${{ github.workflow }}-${{ github.ref }}-${{ github.ref == 'refs/heads/main' && github.sha || 'anysha' }}
@@ -17,163 +20,143 @@ env:
  COPT: '-Werror'

 jobs:
-  tag:
-    runs-on: dev
-    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:latest
-    outputs:
-      build-tag: ${{steps.build-tag.outputs.tag}}
-
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v3
-        with:
-          fetch-depth: 0
-
-      - name: Get build tag
-        run: |
-          echo run:$GITHUB_RUN_ID
-          echo ref:$GITHUB_REF_NAME
-          echo rev:$(git rev-list --count HEAD)
-          if [[ "$GITHUB_REF_NAME" == "main" ]]; then
-            echo "::set-output name=tag::$(git rev-list --count HEAD)"
-          elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
-            echo "::set-output name=tag::release-$(git rev-list --count HEAD)"
-          else
-            echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
-            echo "::set-output name=tag::$GITHUB_RUN_ID"
-          fi
-        shell: bash
-        id: build-tag
-
-  build-neon:
-    runs-on: dev
-    container:
-      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
-      options: --init
+  build-postgres:
+    runs-on: [ self-hosted, Linux, k8s-runner ]
    strategy:
      fail-fast: false
      matrix:
        build_type: [ debug, release ]
+        rust_toolchain: [ 1.58 ]

    env:
      BUILD_TYPE: ${{ matrix.build_type }}
-      GIT_VERSION: ${{ github.sha }}
-
    steps:
-      - name: Fix git ownership
-        run: |
-          # Workaround for `fatal: detected dubious ownership in repository at ...`
-          #
-          # Use both ${{ github.workspace }} and ${GITHUB_WORKSPACE} because they're different on host and in containers
-          #   Ref https://github.com/actions/checkout/issues/785
-          #
-          git config --global --add safe.directory ${{ github.workspace }}
-          git config --global --add safe.directory ${GITHUB_WORKSPACE}
-
      - name: Checkout
        uses: actions/checkout@v3
        with:
          submodules: true
          fetch-depth: 1

-      - name: Set pg 14 revision for caching
-        id: pg_v14_rev
-        run: echo ::set-output name=pg_rev::$(git rev-parse HEAD:vendor/postgres-v14)
-        shell: bash -euxo pipefail {0}
+      - name: Set pg revision for caching
+        id: pg_ver
+        run: echo ::set-output name=pg_rev::$(git rev-parse HEAD:vendor/postgres)

-      - name: Set pg 15 revision for caching
-        id: pg_v15_rev
-        run: echo ::set-output name=pg_rev::$(git rev-parse HEAD:vendor/postgres-v15)
-        shell: bash -euxo pipefail {0}
+      - name: Cache postgres build
+        id: cache_pg
+        uses: actions/cache@v3
+        with:
+          path: tmp_install/
+          key: v1-${{ runner.os }}-${{ matrix.build_type }}-pg-${{ steps.pg_ver.outputs.pg_rev }}-${{ hashFiles('Makefile') }}

-      # Set some environment variables used by all the steps.
-      #
-      # CARGO_FLAGS is extra options to pass to "cargo build", "cargo test" etc.
-      #   It also includes --features, if any
-      #
-      # CARGO_FEATURES is passed to "cargo metadata". It is separate from CARGO_FLAGS,
-      #   because "cargo metadata" doesn't accept --release or --debug options
-      #
-      - name: Set env variables
+      - name: Build postgres
+        if: steps.cache_pg.outputs.cache-hit != 'true'
+        run: mold -run make postgres -j$(nproc)
+
+      # actions/cache@v3 does not allow concurrently using the same cache across job steps, so use a separate cache
+      - name: Prepare postgres artifact
+        run: tar -C tmp_install/ -czf ./pg.tgz .
+      - name: Upload postgres artifact
+        uses: actions/upload-artifact@v3
+        with:
+          retention-days: 7
+          if-no-files-found: error
+          name: postgres-${{ runner.os }}-${{ matrix.build_type }}-artifact
+          path: ./pg.tgz
+
+
+  build-neon:
+    runs-on: [ self-hosted, Linux, k8s-runner ]
+    needs: [ build-postgres ]
+    strategy:
+      fail-fast: false
+      matrix:
+        build_type: [ debug, release ]
+        rust_toolchain: [ 1.58 ]
+
+    env:
+      BUILD_TYPE: ${{ matrix.build_type }}
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: true
+          fetch-depth: 1
+
+      - name: Get postgres artifact for restoration
+        uses: actions/download-artifact@v3
+        with:
+          name: postgres-${{ runner.os }}-${{ matrix.build_type }}-artifact
+          path: ./postgres-artifact/
+      - name: Extract postgres artifact
        run: |
-          if [[ $BUILD_TYPE == "debug" ]]; then
-            cov_prefix="scripts/coverage --profraw-prefix=$GITHUB_JOB --dir=/tmp/coverage run"
-            CARGO_FEATURES=""
-            CARGO_FLAGS="--locked --timings"
-          elif [[ $BUILD_TYPE == "release" ]]; then
-            cov_prefix=""
-            CARGO_FEATURES="--features profiling"
-            CARGO_FLAGS="--locked --timings --release $CARGO_FEATURES"
-          fi
-          echo "cov_prefix=${cov_prefix}" >> $GITHUB_ENV
-          echo "CARGO_FEATURES=${CARGO_FEATURES}" >> $GITHUB_ENV
-          echo "CARGO_FLAGS=${CARGO_FLAGS}" >> $GITHUB_ENV
-        shell: bash -euxo pipefail {0}
+          mkdir ./tmp_install/
+          tar -xf ./postgres-artifact/pg.tgz -C ./tmp_install/
+          rm -rf ./postgres-artifact/

-      # Don't include the ~/.cargo/registry/src directory. It contains just
-      # uncompressed versions of the crates in ~/.cargo/registry/cache
-      # directory, and it's faster to let 'cargo' to rebuild it from the
-      # compressed crates.
      - name: Cache cargo deps
        id: cache_cargo
        uses: actions/cache@v3
        with:
          path: |
            ~/.cargo/registry/
-            !~/.cargo/registry/src
            ~/.cargo/git/
            target/
          # Fall back to older versions of the key, if no cache for current Cargo.lock was found
          key: |
-            v8-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ hashFiles('Cargo.lock') }}
-            v8-${{ runner.os }}-${{ matrix.build_type }}-cargo-
-
-      - name: Cache postgres v14 build
-        id: cache_pg_14
-        uses: actions/cache@v3
-        with:
-          path: pg_install/v14
-          key: v1-${{ runner.os }}-${{ matrix.build_type }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
-
-      - name: Cache postgres v15 build
-        id: cache_pg_15
-        uses: actions/cache@v3
-        with:
-          path: pg_install/v15
-          key: v1-${{ runner.os }}-${{ matrix.build_type }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
-
-      - name: Build postgres v14
-        if: steps.cache_pg_14.outputs.cache-hit != 'true'
-        run: mold -run make postgres-v14 -j$(nproc)
-        shell: bash -euxo pipefail {0}
-
-      - name: Build postgres v15
-        if: steps.cache_pg_15.outputs.cache-hit != 'true'
-        run: mold -run make postgres-v15 -j$(nproc)
-        shell: bash -euxo pipefail {0}
-
-      - name: Build neon extensions
-        run: mold -run make neon-pg-ext -j$(nproc)
-        shell: bash -euxo pipefail {0}
+            v2-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-${{ hashFiles('Cargo.lock') }}
+            v2-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-

      - name: Run cargo build
        run: |
-          ${cov_prefix} mold -run cargo build $CARGO_FLAGS --features failpoints --bins --tests
-        shell: bash -euxo pipefail {0}
+          if [[ $BUILD_TYPE == "debug" ]]; then
+            cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
+            CARGO_FLAGS=
+          elif [[ $BUILD_TYPE == "release" ]]; then
+            cov_prefix=()
+            CARGO_FLAGS="--release --features profiling"
+          fi
+
+          "${cov_prefix[@]}" mold -run cargo build $CARGO_FLAGS --features failpoints --bins --tests

      - name: Run cargo test
        run: |
-          ${cov_prefix} cargo test $CARGO_FLAGS
-        shell: bash -euxo pipefail {0}
+          if [[ $BUILD_TYPE == "debug" ]]; then
+            cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
+            CARGO_FLAGS=
+          elif [[ $BUILD_TYPE == "release" ]]; then
+            cov_prefix=()
+            CARGO_FLAGS=--release
+          fi
+
+          "${cov_prefix[@]}" cargo test $CARGO_FLAGS

      - name: Install rust binaries
        run: |
-          # Install target binaries
-          mkdir -p /tmp/neon/bin/
+          if [[ $BUILD_TYPE == "debug" ]]; then
+            cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
+          elif [[ $BUILD_TYPE == "release" ]]; then
+            cov_prefix=()
+          fi
+
          binaries=$(
-            ${cov_prefix} cargo metadata $CARGO_FEATURES --format-version=1 --no-deps |
+            "${cov_prefix[@]}" cargo metadata --format-version=1 --no-deps |
            jq -r '.packages[].targets[] | select(.kind | index("bin")) | .name'
          )
+
+          test_exe_paths=$(
+            "${cov_prefix[@]}" cargo test --message-format=json --no-run |
+            jq -r '.executable | select(. != null)'
+          )
+
+          mkdir -p /tmp/neon/bin/
+          mkdir -p /tmp/neon/test_bin/
+          mkdir -p /tmp/neon/etc/
+
+          # Keep bloated coverage data files away from the rest of the artifact
+          mkdir -p /tmp/coverage/
+
+          # Install target binaries
          for bin in $binaries; do
            SRC=target/$BUILD_TYPE/$bin
            DST=/tmp/neon/bin/$bin
@@ -182,15 +165,9 @@ jobs:

          # Install test executables and write list of all binaries (for code coverage)
          if [[ $BUILD_TYPE == "debug" ]]; then
-            # Keep bloated coverage data files away from the rest of the artifact
-            mkdir -p /tmp/coverage/
-
-            mkdir -p /tmp/neon/test_bin/
-
-            test_exe_paths=$(
-              ${cov_prefix} cargo test $CARGO_FLAGS --message-format=json --no-run |
-              jq -r '.executable | select(. != null)'
-            )
+            for bin in $binaries; do
+              echo "/tmp/neon/bin/$bin" >> /tmp/coverage/binaries.list
+            done
            for bin in $test_exe_paths; do
              SRC=$bin
              DST=/tmp/neon/test_bin/$(basename $bin)
@@ -200,49 +177,36 @@ jobs:
              strip "$SRC" -o "$DST"
              echo "$DST" >> /tmp/coverage/binaries.list
            done
-
-            for bin in $binaries; do
-              echo "/tmp/neon/bin/$bin" >> /tmp/coverage/binaries.list
-            done
          fi
-        shell: bash -euxo pipefail {0}

      - name: Install postgres binaries
-        run: cp -a pg_install /tmp/neon/pg_install
-        shell: bash -euxo pipefail {0}
+        run: cp -a tmp_install /tmp/neon/pg_install

-      - name: Upload Neon artifact
-        uses: ./.github/actions/upload
-        with:
-          name: neon-${{ runner.os }}-${{ matrix.build_type }}-artifact
-          path: /tmp/neon
+      - name: Prepare neon artifact
+        run: tar -C /tmp/neon/ -czf ./neon.tgz .

-      - name: Prepare cargo build timing stats for storing
-        run: |
-          mkdir -p "/tmp/neon/cargo-timings/$BUILD_TYPE/"
-          cp -r ./target/cargo-timings/* "/tmp/neon/cargo-timings/$BUILD_TYPE/"
-        shell: bash -euxo pipefail {0}
-      - name: Upload cargo build stats
-        uses: ./.github/actions/upload
+      - name: Upload neon binaries
+        uses: actions/upload-artifact@v3
        with:
-          name: neon-${{ runner.os }}-${{ matrix.build_type }}-build-stats
-          path: /tmp/neon/cargo-timings/
+          retention-days: 7
+          if-no-files-found: error
+          name: neon-${{ runner.os }}-${{ matrix.build_type }}-${{ matrix.rust_toolchain }}-artifact
+          path: ./neon.tgz

      # XXX: keep this after the binaries.list is formed, so the coverage can properly work later
      - name: Merge and upload coverage data
        if: matrix.build_type == 'debug'
        uses: ./.github/actions/save-coverage-data

-  regress-tests:
-    runs-on: dev
-    container:
-      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
-      options: --init
+
+  pg_regress-tests:
+    runs-on: [ self-hosted, Linux, k8s-runner ]
    needs: [ build-neon ]
    strategy:
      fail-fast: false
      matrix:
        build_type: [ debug, release ]
+        rust_toolchain: [ 1.58 ]
    steps:
      - name: Checkout
        uses: actions/checkout@v3
@@ -250,33 +214,52 @@ jobs:
          submodules: true
          fetch-depth: 2

-      - name: Pytest regression tests
+      - name: Pytest regress tests
        uses: ./.github/actions/run-python-test-set
        with:
          build_type: ${{ matrix.build_type }}
-          test_selection: regress
+          rust_toolchain: ${{ matrix.rust_toolchain }}
+          test_selection: batch_pg_regress
          needs_postgres_source: true
-          run_with_real_s3: true
-          real_s3_bucket: ci-tests-s3
-          real_s3_region: us-west-2
-          real_s3_access_key_id: "${{ secrets.AWS_ACCESS_KEY_ID_CI_TESTS_S3 }}"
-          real_s3_secret_access_key: "${{ secrets.AWS_SECRET_ACCESS_KEY_CI_TESTS_S3 }}"
+
+      - name: Merge and upload coverage data
+        if: matrix.build_type == 'debug'
+        uses: ./.github/actions/save-coverage-data
+
+  other-tests:
+    runs-on: [ self-hosted, Linux, k8s-runner ]
+    needs: [ build-neon ]
+    strategy:
+      fail-fast: false
+      matrix:
+        build_type: [ debug, release ]
+        rust_toolchain: [ 1.58 ]
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: true
+          fetch-depth: 2
+
+      - name: Pytest other tests
+        uses: ./.github/actions/run-python-test-set
+        with:
+          build_type: ${{ matrix.build_type }}
+          rust_toolchain: ${{ matrix.rust_toolchain }}
+          test_selection: batch_others

      - name: Merge and upload coverage data
        if: matrix.build_type == 'debug'
        uses: ./.github/actions/save-coverage-data

  benchmarks:
-    runs-on: dev
-    container:
-      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
-      options: --init
+    runs-on: [ self-hosted, Linux, k8s-runner ]
    needs: [ build-neon ]
-    if: github.ref_name == 'main' || contains(github.event.pull_request.labels.*.name, 'run-benchmarks')
    strategy:
      fail-fast: false
      matrix:
        build_type: [ release ]
+        rust_toolchain: [ 1.58 ]
    steps:
      - name: Checkout
        uses: actions/checkout@v3
@@ -288,6 +271,7 @@ jobs:
        uses: ./.github/actions/run-python-test-set
        with:
          build_type: ${{ matrix.build_type }}
+          rust_toolchain: ${{ matrix.rust_toolchain }}
          test_selection: performance
          run_in_parallel: false
          save_perf_report: true
@@ -297,56 +281,14 @@ jobs:
      # XXX: no coverage data handling here, since benchmarks are run on release builds,
      # while coverage is currently collected for the debug ones

-  merge-allure-report:
-    runs-on: dev
-    container:
-      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
-      options: --init
-    needs: [ regress-tests, benchmarks ]
-    if: always()
-    strategy:
-      fail-fast: false
-      matrix:
-        build_type: [ debug, release ]
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v3
-        with:
-          submodules: false
-
-      - name: Create Allure report
-        id: create-allure-report
-        uses: ./.github/actions/allure-report
-        with:
-          action: generate
-          build_type: ${{ matrix.build_type }}
-
-      - name: Store Allure test stat in the DB
-        env:
-          BUILD_TYPE: ${{ matrix.build_type }}
-          SHA: ${{ github.event.pull_request.head.sha || github.sha }}
-          REPORT_URL: ${{ steps.create-allure-report.outputs.report-url }}
-          TEST_RESULT_CONNSTR: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR }}
-        shell: bash -euxo pipefail {0}
-        run: |
-          curl --fail --output suites.json ${REPORT_URL%/index.html}/data/suites.json
-          ./scripts/pysync
-
-          # Workaround for https://github.com/neondatabase/cloud/issues/2188
-          psql "$TEST_RESULT_CONNSTR" -c "SELECT 1;" || sleep 10
-
-          DATABASE_URL="$TEST_RESULT_CONNSTR" poetry run python3 scripts/ingest_regress_test_result.py --revision ${SHA} --reference ${GITHUB_REF} --build-type ${BUILD_TYPE} --ingest suites.json
-
  coverage-report:
-    runs-on: dev
-    container:
-      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
-      options: --init
-    needs: [ regress-tests ]
+    runs-on: [ self-hosted, Linux, k8s-runner ]
+    needs: [ other-tests, pg_regress-tests ]
    strategy:
      fail-fast: false
      matrix:
        build_type: [ debug ]
+        rust_toolchain: [ 1.58 ]
    steps:
      - name: Checkout
        uses: actions/checkout@v3
@@ -360,26 +302,30 @@ jobs:
        with:
          path: |
            ~/.cargo/registry/
-            !~/.cargo/registry/src
            ~/.cargo/git/
            target/
-          key: v8-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ hashFiles('Cargo.lock') }}
+          key: v2-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-${{ hashFiles('Cargo.lock') }}

-      - name: Get Neon artifact
-        uses: ./.github/actions/download
+      - name: Get Neon artifact for restoration
+        uses: actions/download-artifact@v3
        with:
-          name: neon-${{ runner.os }}-${{ matrix.build_type }}-artifact
-          path: /tmp/neon
+          name: neon-${{ runner.os }}-${{ matrix.build_type }}-${{ matrix.rust_toolchain }}-artifact
+          path: ./neon-artifact/

-      - name: Get coverage artifact
-        uses: ./.github/actions/download
+      - name: Extract Neon artifact
+        run: |
+          mkdir -p /tmp/neon/
+          tar -xf ./neon-artifact/neon.tgz -C /tmp/neon/
+          rm -rf ./neon-artifact/
+
+      - name: Restore coverage data
+        uses: actions/download-artifact@v3
        with:
          name: coverage-data-artifact
-          path: /tmp/coverage
+          path: /tmp/coverage/

      - name: Merge coverage data
        run: scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage merge
-        shell: bash -euxo pipefail {0}

      - name: Build and upload coverage report
        run: |
@@ -412,209 +358,187 @@ jobs:
              \"description\": \"Coverage report is ready\",
              \"target_url\": \"$REPORT_URL\"
            }"
-        shell: bash -euxo pipefail {0}

  trigger-e2e-tests:
-    runs-on: dev
-    container:
-      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:pinned
-      options: --init
-    needs: [ build-neon ]
-    steps:
-      - name: Set PR's status to pending and request a remote CI test
-        run: |
-          COMMIT_SHA=${{ github.event.pull_request.head.sha }}
-          COMMIT_SHA=${COMMIT_SHA:-${{ github.sha }}}
+   runs-on: [ self-hosted, Linux, k8s-runner ]
+   needs: [ build-neon ]
+   steps:
+     - name: Set PR's status to pending and request a remote CI test
+       run: |
+         COMMIT_SHA=${{ github.event.pull_request.head.sha }}
+         COMMIT_SHA=${COMMIT_SHA:-${{ github.sha }}}

-          REMOTE_REPO="${{ github.repository_owner }}/cloud"
+         REMOTE_REPO="${{ github.repository_owner }}/cloud"

-          curl -f -X POST \
-          https://api.github.com/repos/${{ github.repository }}/statuses/$COMMIT_SHA \
-          -H "Accept: application/vnd.github.v3+json" \
-          --user "${{ secrets.CI_ACCESS_TOKEN }}" \
-          --data \
-            "{
-              \"state\": \"pending\",
-              \"context\": \"neon-cloud-e2e\",
-              \"description\": \"[$REMOTE_REPO] Remote CI job is about to start\"
-            }"
+         curl -f -X POST \
+         https://api.github.com/repos/${{ github.repository }}/statuses/$COMMIT_SHA \
+         -H "Accept: application/vnd.github.v3+json" \
+         --user "${{ secrets.CI_ACCESS_TOKEN }}" \
+         --data \
+           "{
+             \"state\": \"pending\",
+             \"context\": \"neon-cloud-e2e\",
+             \"description\": \"[$REMOTE_REPO] Remote CI job is about to start\"
+           }"

-          curl -f -X POST \
-          https://api.github.com/repos/$REMOTE_REPO/actions/workflows/testing.yml/dispatches \
-          -H "Accept: application/vnd.github.v3+json" \
-          --user "${{ secrets.CI_ACCESS_TOKEN }}" \
-          --data \
-            "{
-              \"ref\": \"main\",
-              \"inputs\": {
-                \"ci_job_name\": \"neon-cloud-e2e\",
-                \"commit_hash\": \"$COMMIT_SHA\",
-                \"remote_repo\": \"${{ github.repository }}\"
-              }
-            }"
-
-  neon-image:
-    runs-on: dev
-    container: gcr.io/kaniko-project/executor:v1.9.0-debug
+         curl -f -X POST \
+         https://api.github.com/repos/$REMOTE_REPO/actions/workflows/testing.yml/dispatches \
+         -H "Accept: application/vnd.github.v3+json" \
+         --user "${{ secrets.CI_ACCESS_TOKEN }}" \
+         --data \
+           "{
+             \"ref\": \"main\",
+             \"inputs\": {
+               \"ci_job_name\": \"neon-cloud-e2e\",
+               \"commit_hash\": \"$COMMIT_SHA\",
+               \"remote_repo\": \"${{ github.repository }}\"
+             }
+           }"

+  docker-image:
+    runs-on: [ self-hosted, Linux, k8s-runner ]
+    needs: [ pg_regress-tests, other-tests ]
+    if: |
+      (github.ref_name == 'main' || github.ref_name == 'release') &&
+      github.event_name != 'workflow_dispatch'
+    outputs:
+      build-tag: ${{steps.build-tag.outputs.tag}}
    steps:
      - name: Checkout
-        uses: actions/checkout@v1 # v3 won't work with kaniko
+        uses: actions/checkout@v3
        with:
          submodules: true
          fetch-depth: 0

-      - name: Configure ECR login
-        run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json
+      - name: Login to DockerHub
+        uses: docker/login-action@v1
+        with:
+          username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
+          password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}

-      - name: Kaniko build neon
-        run: /kaniko/executor --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:$GITHUB_RUN_ID
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v1
+        with:
+          driver: docker

-  compute-tools-image:
-    runs-on: dev
-    container: gcr.io/kaniko-project/executor:v1.9.0-debug
+      - name: Get build tag
+        run: |
+          if [[ "$GITHUB_REF_NAME" == "main" ]]; then
+            echo "::set-output name=tag::$(git rev-list --count HEAD)"
+          elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
+            echo "::set-output name=tag::release-$(git rev-list --count HEAD)"
+          else
+            echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
+            exit 1
+          fi
+        id: build-tag

+      - name: Get legacy build tag
+        run: |
+          if [[ "$GITHUB_REF_NAME" == "main" ]]; then
+            echo "::set-output name=tag::latest
+          elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
+            echo "::set-output name=tag::release
+          else
+            echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
+            exit 1
+          fi
+        id: legacy-build-tag
+
+      - name: Build neon Docker image
+        uses: docker/build-push-action@v2
+        with:
+          context: .
+          build-args: |
+            GIT_VERSION="${{github.sha}}"
+            AWS_ACCESS_KEY_ID="${{secrets.CACHEPOT_AWS_ACCESS_KEY_ID}}"
+            AWS_SECRET_ACCESS_KEY="${{secrets.CACHEPOT_AWS_SECRET_ACCESS_KEY}}"
+          pull: true
+          push: true
+          tags: neondatabase/neon:${{steps.legacy-build-tag.outputs.tag}}, neondatabase/neon:${{steps.build-tag.outputs.tag}}
+
+  docker-image-compute:
+    runs-on: [ self-hosted, Linux, k8s-runner ]
+    needs: [ pg_regress-tests, other-tests ]
+    if: |
+      (github.ref_name == 'main' || github.ref_name == 'release') &&
+      github.event_name != 'workflow_dispatch'
+    outputs:
+      build-tag: ${{steps.build-tag.outputs.tag}}
    steps:
      - name: Checkout
-        uses: actions/checkout@v1 # v3 won't work with kaniko
-
-      - name: Configure ECR login
-        run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json
-
-      - name: Kaniko build compute tools
-        run: /kaniko/executor --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --dockerfile Dockerfile.compute-tools --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:$GITHUB_RUN_ID
-
-  compute-node-image:
-    runs-on: dev
-    container: gcr.io/kaniko-project/executor:v1.9.0-debug
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v1 # v3 won't work with kaniko
+        uses: actions/checkout@v3
        with:
          submodules: true
          fetch-depth: 0

-      - name: Configure ECR login
-        run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json
-
-        # compute-node uses postgres 14, which is default now
-        # cloud repo depends on this image name, thus duplicating it
-        # remove compute-node when cloud repo is updated
-      - name: Kaniko build compute node with extensions v14 (compatibility)
-        run: /kaniko/executor --skip-unused-stages --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --dockerfile Dockerfile.compute-node-v14 --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node:$GITHUB_RUN_ID
-
-  compute-node-image-v14:
-    runs-on: dev
-    container: gcr.io/kaniko-project/executor:v1.9.0-debug
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v1 # v3 won't work with kaniko
+      - name: Login to DockerHub
+        uses: docker/login-action@v1
        with:
-          submodules: true
-          fetch-depth: 0
+          username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
+          password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}

-      - name: Configure ECR login
-        run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json
-
-      - name: Kaniko build compute node with extensions v14
-        run: /kaniko/executor --skip-unused-stages  --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache  --context . --dockerfile Dockerfile.compute-node-v14 --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:$GITHUB_RUN_ID
-
-
-  compute-node-image-v15:
-    runs-on: dev
-    container: gcr.io/kaniko-project/executor:v1.9.0-debug
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v1 # v3 won't work with kaniko
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v1
        with:
-          submodules: true
-          fetch-depth: 0
+          driver: docker

-      - name: Configure ECR login
-        run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json
-
-      - name: Kaniko build compute node with extensions v15
-        run: /kaniko/executor --skip-unused-stages --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --context . --dockerfile Dockerfile.compute-node-v15 --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:$GITHUB_RUN_ID
-
-  promote-images:
-    runs-on: dev
-    needs: [ neon-image, compute-node-image, compute-node-image-v14, compute-tools-image ]
-    if: github.event_name != 'workflow_dispatch'
-    container: amazon/aws-cli
-    strategy:
-      fail-fast: false
-      matrix:
-        # compute-node uses postgres 14, which is default now
-        # cloud repo depends on this image name, thus duplicating it
-        # remove compute-node when cloud repo is updated
-        name: [ neon, compute-node, compute-node-v14, compute-tools ]
-
-    steps:
-      - name: Promote image to latest
-        run:
-          MANIFEST=$(aws ecr batch-get-image --repository-name ${{ matrix.name }} --image-ids imageTag=$GITHUB_RUN_ID --query 'images[].imageManifest' --output text) && aws ecr put-image --repository-name ${{ matrix.name }} --image-tag latest --image-manifest "$MANIFEST"
-
-  push-docker-hub:
-    runs-on: dev
-    needs: [ promote-images, tag ]
-    container: golang:1.19-bullseye
-
-    steps:
-      - name: Install Crane & ECR helper
+      - name: Get build tag
        run: |
-          go install github.com/google/go-containerregistry/cmd/crane@31786c6cbb82d6ec4fb8eb79cd9387905130534e # v0.11.0
-          go install github.com/awslabs/amazon-ecr-credential-helper/ecr-login/cli/docker-credential-ecr-login@69c85dc22db6511932bbf119e1a0cc5c90c69a7f # v0.6.0
+          if [[ "$GITHUB_REF_NAME" == "main" ]]; then
+            echo "::set-output name=tag::$(git rev-list --count HEAD)"
+          elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
+            echo "::set-output name=tag::release-$(git rev-list --count HEAD)"
+          else
+            echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
+            exit 1
+          fi
+        id: build-tag

-      - name: Configure ECR login
+      - name: Get legacy build tag
        run: |
-          mkdir /github/home/.docker/
-          echo "{\"credsStore\":\"ecr-login\"}" > /github/home/.docker/config.json
+          if [[ "$GITHUB_REF_NAME" == "main" ]]; then
+            echo "::set-output name=tag::latest
+          elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
+            echo "::set-output name=tag::release
+          else
+            echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
+            exit 1
+          fi
+        id: legacy-build-tag

-      - name: Pull neon image from ECR
-        run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:latest neon
+      - name: Build compute-tools Docker image
+        uses: docker/build-push-action@v2
+        with:
+          context: .
+          build-args: |
+            GIT_VERSION="${{github.sha}}"
+            AWS_ACCESS_KEY_ID="${{secrets.CACHEPOT_AWS_ACCESS_KEY_ID}}"
+            AWS_SECRET_ACCESS_KEY="${{secrets.CACHEPOT_AWS_SECRET_ACCESS_KEY}}"
+          push: false
+          file: Dockerfile.compute-tools
+          tags: neondatabase/compute-tools:local

-      - name: Pull compute tools image from ECR
-        run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:latest compute-tools
+      - name: Push compute-tools Docker image
+        uses: docker/build-push-action@v2
+        with:
+          context: .
+          build-args: |
+            GIT_VERSION="${{github.sha}}"
+            AWS_ACCESS_KEY_ID="${{secrets.CACHEPOT_AWS_ACCESS_KEY_ID}}"
+            AWS_SECRET_ACCESS_KEY="${{secrets.CACHEPOT_AWS_SECRET_ACCESS_KEY}}"
+          push: true
+          file: Dockerfile.compute-tools
+          tags: neondatabase/compute-tools:${{steps.legacy-build-tag.outputs.tag}}

-      - name: Pull compute node image from ECR
-        run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node:latest compute-node
-
-      - name: Pull compute node v14 image from ECR
-        run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:latest compute-node-v14
-
-      - name: Pull rust image from ECR
-        run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned rust
-
-      - name: Configure docker login
-        run: |
-          # ECR Credential Helper & Docker Hub don't work together in config, hence reset
-          echo "" > /github/home/.docker/config.json
-          crane auth login -u ${{ secrets.NEON_DOCKERHUB_USERNAME }} -p ${{ secrets.NEON_DOCKERHUB_PASSWORD }} index.docker.io
-
-      - name: Push neon image to Docker Hub
-        run: crane push neon neondatabase/neon:${{needs.tag.outputs.build-tag}}
-
-      - name: Push compute tools image to Docker Hub
-        run: crane push compute-tools neondatabase/compute-tools:${{needs.tag.outputs.build-tag}}
-
-      - name: Push compute node image to Docker Hub
-        run: crane push compute-node neondatabase/compute-node:${{needs.tag.outputs.build-tag}}
-
-      - name: Push compute node v14 image to Docker Hub
-        run: crane push compute-node-v14 neondatabase/compute-node-v14:${{needs.tag.outputs.build-tag}}
-
-      - name: Push rust image to Docker Hub
-        run: crane push rust neondatabase/rust:pinned
-
-      - name: Add latest tag to images
-        if: |
-          (github.ref_name == 'main' || github.ref_name == 'release') &&
-          github.event_name != 'workflow_dispatch'
-        run: |
-          crane tag neondatabase/neon:${{needs.tag.outputs.build-tag}} latest
-          crane tag neondatabase/compute-tools:${{needs.tag.outputs.build-tag}} latest
-          crane tag neondatabase/compute-node:${{needs.tag.outputs.build-tag}} latest
-          crane tag neondatabase/compute-node-v14:${{needs.tag.outputs.build-tag}} latest
+      - name: Build compute-node Docker image
+        uses: docker/build-push-action@v2
+        with:
+          context: ./vendor/postgres/
+          build-args:
+            COMPUTE_TOOLS_TAG=local
+          push: true
+          tags: neondatabase/compute-node:${{steps.legacy-build-tag.outputs.tag}}, neondatabase/compute-node:${{steps.build-tag.outputs.tag}}

  calculate-deploy-targets:
    runs-on: [ self-hosted, Linux, k8s-runner ]
@@ -640,16 +564,14 @@ jobs:

  deploy:
    runs-on: [ self-hosted, Linux, k8s-runner ]
-    #container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:latest
-    # We need both storage **and** compute images for deploy, because control plane picks the compute version based on the storage version.
-    # If it notices a fresh storage it may bump the compute version. And if compute image failed to build it may break things badly
-    needs: [ push-docker-hub, calculate-deploy-targets, tag, regress-tests ]
+    # We need both storage **and** compute images for deploy, because control plane
+    # picks the compute version based on the storage version. If it notices a fresh
+    # storage it may bump the compute version. And if compute image failed to build
+    # it may break things badly.
+    needs: [ docker-image, docker-image-compute, calculate-deploy-targets ]
    if: |
      (github.ref_name == 'main' || github.ref_name == 'release') &&
      github.event_name != 'workflow_dispatch'
-    defaults:
-      run:
-        shell: bash
    strategy:
      matrix:
        include: ${{fromJSON(needs.calculate-deploy-targets.outputs.matrix-include)}}
@@ -660,19 +582,12 @@ jobs:
          submodules: true
          fetch-depth: 0

-      - name: Setup python
-        uses: actions/setup-python@v4
-        with:
-          python-version: '3.10'
-
      - name: Setup ansible
        run: |
-          export PATH="/root/.local/bin:$PATH"
          pip install --progress-bar off --user ansible boto3

      - name: Redeploy
        run: |
-          export DOCKER_TAG=${{needs.tag.outputs.build-tag}}
          cd "$(pwd)/.github/ansible"

          if [[ "$GITHUB_REF_NAME" == "main" ]]; then
@@ -695,16 +610,13 @@ jobs:
          rm -f neon_install.tar.gz .neon_current_version

  deploy-proxy:
-    runs-on: dev
-    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:latest
-    # Compute image isn't strictly required for proxy deploy, but let's still wait for it to run all deploy jobs consistently.
-    needs: [ push-docker-hub, calculate-deploy-targets, tag, regress-tests ]
+    runs-on: [ self-hosted, Linux, k8s-runner ]
+    # Compute image isn't strictly required for proxy deploy, but let's still wait for it
+    # to run all deploy jobs consistently.
+    needs: [ docker-image, docker-image-compute, calculate-deploy-targets ]
    if: |
      (github.ref_name == 'main' || github.ref_name == 'release') &&
      github.event_name != 'workflow_dispatch'
-    defaults:
-      run:
-        shell: bash
    strategy:
      matrix:
        include: ${{fromJSON(needs.calculate-deploy-targets.outputs.matrix-include)}}
@@ -717,9 +629,6 @@ jobs:
          submodules: true
          fetch-depth: 0

-      - name: Add curl
-        run: apt update && apt install curl -y
-
      - name: Store kubeconfig file
        run: |
          echo "${{ secrets[matrix.kubeconfig_secret] }}" | base64 --decode > ${KUBECONFIG}
@@ -732,6 +641,6 @@ jobs:

      - name: Re-deploy proxy
        run: |
-          DOCKER_TAG=${{needs.tag.outputs.build-tag}}
+          DOCKER_TAG=${{needs.docker-image.outputs.build-tag}}
          helm upgrade ${{ matrix.proxy_job }}       neondatabase/neon-proxy --namespace default --install -f .github/helm-values/${{ matrix.proxy_config }}.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
          helm upgrade ${{ matrix.proxy_job }}-scram neondatabase/neon-proxy --namespace default --install -f .github/helm-values/${{ matrix.proxy_config }}-scram.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
--- a/.github/workflows/codestyle.yml
+++ b/.github/workflows/codestyle.yml
@@ -8,7 +8,7 @@ on:

 defaults:
  run:
-    shell: bash -euxo pipefail {0}
+    shell: bash -ex {0}

 concurrency:
  # Allow only one workflow per any non-`main` branch.
@@ -17,23 +17,18 @@ concurrency:

 env:
  RUST_BACKTRACE: 1
-  COPT: '-Werror'

 jobs:
  check-codestyle-rust:
    strategy:
      fail-fast: false
      matrix:
-        # XXX: both OSes have rustup
-        #   * https://github.com/actions/runner-images/blob/main/images/macos/macos-12-Readme.md#rust-tools
-        #   * https://github.com/actions/runner-images/blob/main/images/linux/Ubuntu2204-Readme.md#rust-tools
-        # this is all we need to install our toolchain later via rust-toolchain.toml
-        # so don't install any toolchain explicitly.
+        # If we want to duplicate this job for different
+        # Rust toolchains (e.g. nightly or 1.37.0), add them here.
+        rust_toolchain: [1.58]
        os: [ubuntu-latest, macos-latest]
-        # To support several Postgres versions, add them here.
-        postgres_version: [v14, v15]
-    timeout-minutes: 60
-    name: check codestyle rust and postgres
+    timeout-minutes: 50
+    name: run regression test suite
    runs-on: ${{ matrix.os }}

    steps:
@@ -43,6 +38,14 @@ jobs:
          submodules: true
          fetch-depth: 2

+      - name: Install rust toolchain ${{ matrix.rust_toolchain }}
+        uses: actions-rs/toolchain@v1
+        with:
+          profile: minimal
+          toolchain: ${{ matrix.rust_toolchain }}
+          components: rustfmt, clippy
+          override: true
+
      - name: Check formatting
        run: cargo fmt --all -- --check

@@ -58,14 +61,14 @@ jobs:

      - name: Set pg revision for caching
        id: pg_ver
-        run: echo ::set-output name=pg_rev::$(git rev-parse HEAD:vendor/postgres-${{matrix.postgres_version}})
+        run: echo ::set-output name=pg_rev::$(git rev-parse HEAD:vendor/postgres)

-      - name: Cache postgres ${{matrix.postgres_version}} build
+      - name: Cache postgres build
        id: cache_pg
-        uses: actions/cache@v3
+        uses: actions/cache@v2
        with:
          path: |
-            pg_install/${{matrix.postgres_version}}
+            tmp_install/
          key: ${{ runner.os }}-pg-${{ steps.pg_ver.outputs.pg_rev }}

      - name: Set extra env for macOS
@@ -78,36 +81,32 @@ jobs:
        if: steps.cache_pg.outputs.cache-hit != 'true'
        run: make postgres

-      - name: Build neon extensions
-        run: make neon-pg-ext
-
      # Plain configure output can contain weird errors like 'error: C compiler cannot create executables'
      # and the real cause will be inside config.log
      - name: Print configure logs in case of failure
        if: failure()
        continue-on-error: true
        run: |
-          echo '' && echo '=== Postgres ${{matrix.postgres_version}} config.log ===' && echo ''
-          cat pg_install/build/${{matrix.postgres_version}}/config.log
-          echo '' && echo '=== Postgres ${{matrix.postgres_version}} configure.log ===' && echo ''
-          cat pg_install/build/${{matrix.postgres_version}}/configure.log
+          echo '' && echo '=== config.log ===' && echo ''
+          cat tmp_install/build/config.log
+          echo '' && echo '=== configure.log ===' && echo ''
+          cat tmp_install/build/configure.log

      - name: Cache cargo deps
        id: cache_cargo
-        uses: actions/cache@v3
+        uses: actions/cache@v2
        with:
          path: |
            ~/.cargo/registry
-            !~/.cargo/registry/src
            ~/.cargo/git
            target
-          key: v4-${{ runner.os }}-cargo-${{ hashFiles('./Cargo.lock') }}-rust
+          key: ${{ runner.os }}-cargo-${{ hashFiles('./Cargo.lock') }}-rust-${{ matrix.rust_toolchain }}

      - name: Run cargo clippy
        run: ./run_clippy.sh

      - name: Ensure all project builds
-        run: cargo build --locked --all --all-targets
+        run: cargo build --all --all-targets

  check-codestyle-python:
    runs-on: [ self-hosted, Linux, k8s-runner ]
@@ -128,14 +127,8 @@ jobs:
      - name: Install Python deps
        run: ./scripts/pysync

-      - name: Run isort to ensure code format
-        run: poetry run isort --diff --check .
-
-      - name: Run black to ensure code format
-        run: poetry run black --diff --check .
-
-      - name: Run flake8 to ensure code format
-        run: poetry run flake8 .
+      - name: Run yapf to ensure code format
+        run: poetry run yapf --recursive --diff .

      - name: Run mypy to check types
        run: poetry run mypy .
--- a/.github/workflows/notifications.yml
+++ b/.github/workflows/notifications.yml
@@ -0,0 +1,45 @@
+name: Send Notifications
+
+on:
+  push:
+    branches: [ main ]
+
+jobs:
+  send-notifications:
+    timeout-minutes: 30
+    name: send commit notifications
+    runs-on: ubuntu-latest
+
+    steps:
+
+      - name: Checkout
+        uses: actions/checkout@v2
+        with:
+          submodules: true
+          fetch-depth: 2
+
+      - name: Form variables for notification message
+        id: git_info_grab
+        run: |
+          git_stat=$(git show --stat=50)
+          git_stat="${git_stat//'%'/'%25'}"
+          git_stat="${git_stat//$'\n'/'%0A'}"
+          git_stat="${git_stat//$'\r'/'%0D'}"
+          git_stat="${git_stat// / }" # space -> 'Space En', as github tends to eat ordinary spaces
+          echo "::set-output name=git_stat::$git_stat"
+          echo "::set-output name=sha_short::$(git rev-parse --short HEAD)"
+          echo "##[set-output name=git_branch;]$(echo ${GITHUB_REF#refs/heads/})"
+
+      - name: Send notification
+        uses: appleboy/telegram-action@master
+        with:
+          to: ${{ secrets.TELEGRAM_TO }}
+          token: ${{ secrets.TELEGRAM_TOKEN }}
+          format: markdown
+          args: |
+            *@${{ github.actor }} pushed to* [${{ github.repository }}:${{steps.git_info_grab.outputs.git_branch}}](github.com/${{ github.repository }}/commit/${{steps.git_info_grab.outputs.sha_short }})
+
+            ```
+            ${{ steps.git_info_grab.outputs.git_stat }}
+            ```
+
--- a/.github/workflows/pg_clients.yml
+++ b/.github/workflows/pg_clients.yml
@@ -19,12 +19,8 @@ concurrency:

 jobs:
  test-postgres-client-libs:
-    # TODO: switch to gen2 runner, requires docker
    runs-on: [ ubuntu-latest ]

-    env:
-      TEST_OUTPUT: /tmp/test_output
-
    steps:
    - name: Checkout
      uses: actions/checkout@v3
@@ -44,16 +40,16 @@ jobs:
        key: v1-${{ runner.os }}-python-deps-${{ hashFiles('poetry.lock') }}

    - name: Install Python deps
-      shell: bash -euxo pipefail {0}
+      shell: bash -ex {0}
      run: ./scripts/pysync

    - name: Run pytest
      env:
        REMOTE_ENV: 1
        BENCHMARK_CONNSTR: "${{ secrets.BENCHMARK_STAGING_CONNSTR }}"
-
-        POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install/v14
-      shell: bash -euxo pipefail {0}
+        TEST_OUTPUT: /tmp/test_output
+        POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
+      shell: bash -ex {0}
      run: |
        # Test framework expects we have psql binary;
        # but since we don't really need it in this test, let's mock it
@@ -65,18 +61,9 @@ jobs:
          -m "remote_cluster" \
          -rA "test_runner/pg_clients"

-    # We use GitHub's action upload-artifact because `ubuntu-latest` doesn't have configured AWS CLI.
-    # It will be fixed after switching to gen2 runner
-    - name: Upload python test logs
-      if: always()
-      uses: actions/upload-artifact@v3
-      with:
-        retention-days: 7
-        name: python-test-pg_clients-${{ runner.os }}-stage-logs
-        path: ${{ env.TEST_OUTPUT }}
-
    - name: Post to a Slack channel
-      if: ${{ github.event.schedule && failure() }}
+      if: failure()
+      id: slack
      uses: slackapi/slack-github-action@v1
      with:
        channel-id: "C033QLM5P7D" # dev-staging-stream
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,6 @@
-/pg_install
 /target
 /tmp_check
+/tmp_install
 /tmp_check_cli
 __pycache__/
 test_output/
@@ -15,6 +15,3 @@ test_output/

 *.key
 *.crt
-*.o
-*.so
-*.Po
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,8 +1,4 @@
-[submodule "vendor/postgres-v14"]
-	path = vendor/postgres-v14
-	url = https://github.com/neondatabase/postgres.git
+[submodule "vendor/postgres"]
+	path = vendor/postgres
+	url = https://github.com/zenithdb/postgres
 	branch = main
-[submodule "vendor/postgres-v15"]
-	path = vendor/postgres-v15
-	url = https://github.com/neondatabase/postgres.git
-	branch = REL_15_STABLE_neon
--- a/.yapfignore
+++ b/.yapfignore
@@ -0,0 +1,10 @@
+# This file is only read when `yapf` is run from this directory.
+# Hence we only top-level directories here to avoid confusion.
+# See source code for the exact file format: https://github.com/google/yapf/blob/c6077954245bc3add82dafd853a1c7305a6ebd20/yapf/yapflib/file_resources.py#L40-L43
+vendor/
+target/
+tmp_install/
+__pycache__/
+test_output/
+.neon/
+.git/
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -11,15 +11,17 @@ than it was before.

 ## Submitting changes

-1. Get at least one +1 on your PR before you push.
+1. Make a PR for every change.
+
+   Even seemingly trivial patches can break things in surprising ways.
+Use of common sense is OK. If you're only fixing a typo in a comment,
+it's probably fine to just push it. But if in doubt, open a PR.
+
+2. Get at least one +1 on your PR before you push.

   For simple patches, it will only take a minute for someone to review
 it.

-2. Don't force push small changes after making the PR ready for review.
-Doing so will force readers to re-read your entire PR, which will delay
-the review process.
-
 3. Always keep the CI green.

   Do not push, if the CI failed on your PR. Even if you think it's not
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -48,9 +48,9 @@ dependencies = [

 [[package]]
 name = "anyhow"
-version = "1.0.59"
+version = "1.0.58"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c91f1f46651137be86f3a2b9a8359f9ab421d04d941c62b5982e1ca21113adf9"
+checksum = "bb07d2053ccdbe10e2af2995a2f116c1330396493dc1269f6a91d0ae82e19704"
 dependencies = [
 "backtrace",
 ]
@@ -77,7 +77,7 @@ dependencies = [
 "num-traits",
 "rusticata-macros",
 "thiserror",
- "time 0.3.12",
+ "time 0.3.11",
 ]

 [[package]]
@@ -126,9 +126,9 @@ dependencies = [

 [[package]]
 name = "async-trait"
-version = "0.1.57"
+version = "0.1.56"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "76464446b8bc32758d7e88ee1a804d9914cd9b1cb264c029899680b0be29826f"
+checksum = "96cf8829f67d2eab0b2dfa42c5d0ef737e0724e4a82b01b3e292456202b19716"
 dependencies = [
 "proc-macro2",
 "quote",
@@ -154,9 +154,9 @@ checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"

 [[package]]
 name = "axum"
-version = "0.5.13"
+version = "0.5.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6b9496f0c1d1afb7a2af4338bbe1d969cddfead41d87a9fb3aaa6d0bbc7af648"
+checksum = "d16705af05732b7d3258ec0f7b73c03a658a28925e050d8852d5b568ee8bcf4e"
 dependencies = [
 "async-trait",
 "axum-core",
@@ -166,7 +166,7 @@ dependencies = [
 "http",
 "http-body",
 "hyper",
- "itoa 1.0.3",
+ "itoa 1.0.2",
 "matchit",
 "memchr",
 "mime",
@@ -298,9 +298,9 @@ checksum = "37ccbd214614c6783386c1af30caf03192f17891059cecc394b4fb119e363de3"

 [[package]]
 name = "bytemuck"
-version = "1.11.0"
+version = "1.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a5377c8865e74a160d21f29c2d40669f53286db6eab59b88540cbb12ffc8b835"
+checksum = "c53dfa917ec274df8ed3c572698f381a24eef2efba9492d797301b72b6db408a"

 [[package]]
 name = "byteorder"
@@ -310,13 +310,22 @@ checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"

 [[package]]
 name = "bytes"
-version = "1.2.1"
+version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ec8a7b6a70fde80372154c65702f00a0f56f3e1c36abbc6c440484be248856db"
+checksum = "c4872d67bab6358e59559027aa3b9157c53d9358c51423c17554809a8858e0f8"
 dependencies = [
 "serde",
 ]

+[[package]]
+name = "cast"
+version = "0.2.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4c24dab4283a142afa2fdca129b80ad2c6284e073930f964c3a1293c225ee39a"
+dependencies = [
+ "rustc_version",
+]
+
 [[package]]
 name = "cast"
 version = "0.3.0"
@@ -386,9 +395,9 @@ dependencies = [

 [[package]]
 name = "clap"
-version = "3.2.16"
+version = "3.2.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a3dbbb6653e7c55cc8595ad3e1f7be8f32aba4eb7ff7f0fd1163d4f3d137c0a9"
+checksum = "ab8b79fe3946ceb4a0b1c080b4018992b8d27e9ff363644c1c9b6387c854614d"
 dependencies = [
 "atty",
 "bitflags",
@@ -455,9 +464,10 @@ version = "0.1.0"
 dependencies = [
 "anyhow",
 "chrono",
- "clap 3.2.16",
+ "clap 3.2.12",
 "env_logger",
 "hyper",
+ "libc",
 "log",
 "postgres",
 "regex",
@@ -495,11 +505,8 @@ name = "control_plane"
 version = "0.1.0"
 dependencies = [
 "anyhow",
- "clap 3.2.16",
- "comfy-table",
- "git-version",
+ "lazy_static",
 "nix",
- "once_cell",
 "pageserver",
 "postgres",
 "regex",
@@ -510,6 +517,7 @@ dependencies = [
 "tar",
 "thiserror",
 "toml",
+ "url",
 "utils",
 "workspace_hack",
 ]
@@ -573,7 +581,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b01d6de93b2b6c65e17c634a26653a29d107b3c98c607c765bf38d041531cd8f"
 dependencies = [
 "atty",
- "cast",
+ "cast 0.3.0",
 "clap 2.34.0",
 "criterion-plot",
 "csv",
@@ -594,19 +602,19 @@ dependencies = [

 [[package]]
 name = "criterion-plot"
-version = "0.4.5"
+version = "0.4.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2673cc8207403546f45f5fd319a974b1e6983ad1a3ee7e6041650013be041876"
+checksum = "d00996de9f2f7559f7f4dc286073197f83e92256a59ed395f9aac01fe717da57"
 dependencies = [
- "cast",
+ "cast 0.2.7",
 "itertools",
 ]

 [[package]]
 name = "crossbeam-channel"
-version = "0.5.6"
+version = "0.5.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521"
+checksum = "4c02a4d71819009c192cf4872265391563fd6a84c81ff2c0f2a7026ca4c1d85c"
 dependencies = [
 "cfg-if",
 "crossbeam-utils",
@@ -614,9 +622,9 @@ dependencies = [

 [[package]]
 name = "crossbeam-deque"
-version = "0.8.2"
+version = "0.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "715e8152b692bba2d374b53d4875445368fdf21a94751410af607a5ac677d1fc"
+checksum = "6455c0ca19f0d2fbf751b908d5c55c1f5cbc65e03c4225427254b46890bdde1e"
 dependencies = [
 "cfg-if",
 "crossbeam-epoch",
@@ -625,9 +633,9 @@ dependencies = [

 [[package]]
 name = "crossbeam-epoch"
-version = "0.9.10"
+version = "0.9.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "045ebe27666471bb549370b4b0b3e51b07f56325befa4284db65fc89c02511b1"
+checksum = "07db9d94cbd326813772c968ccd25999e5f8ae22f4f8d1b11effa37ef6ce281d"
 dependencies = [
 "autocfg",
 "cfg-if",
@@ -639,9 +647,9 @@ dependencies = [

 [[package]]
 name = "crossbeam-utils"
-version = "0.8.11"
+version = "0.8.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "51887d4adc7b564537b15adcfb307936f8075dfcd5f00dde9a9f1d29383682bc"
+checksum = "7d82ee10ce34d7bc12c2122495e7593a9c41347ecdd64185af4ecf72cb1a7f83"
 dependencies = [
 "cfg-if",
 "once_cell",
@@ -674,9 +682,9 @@ dependencies = [

 [[package]]
 name = "crypto-common"
-version = "0.1.6"
+version = "0.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
+checksum = "2ccfd8c0ee4cce11e45b3fd6f9d5e69e0cc62912aa6a0cb1bf4617b0eba5a12f"
 dependencies = [
 "generic-array",
 "typenum",
@@ -920,9 +928,9 @@ checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7"

 [[package]]
 name = "fastrand"
-version = "1.8.0"
+version = "1.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a7a407cfaa3385c4ae6b23e84623d48c2798d06e3e6a1878f7f59f17b3f86499"
+checksum = "c3fcf0cee53519c866c09b5de1f6c56ff9d647101f81c1964fa632e148896cdf"
 dependencies = [
 "instant",
 ]
@@ -1089,9 +1097,9 @@ dependencies = [

 [[package]]
 name = "generic-array"
-version = "0.14.6"
+version = "0.14.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bff49e947297f3312447abdca79f45f4738097cc82b06e72054d2223f601f1b9"
+checksum = "fd48d33ec7f05fbfa152300fdad764757cbded343c1aa1cff2fbaf4134851803"
 dependencies = [
 "typenum",
 "version_check",
@@ -1110,9 +1118,9 @@ dependencies = [

 [[package]]
 name = "gimli"
-version = "0.26.2"
+version = "0.26.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "22030e2c5a68ec659fde1e949a745124b48e6fa8b045b7ed5bd1fe4ccc5c4e5d"
+checksum = "78cc372d058dcf6d5ecd98510e7fbc9e5aec4d21de70f65fea8fecebcd881bd4"

 [[package]]
 name = "git-version"
@@ -1169,13 +1177,19 @@ checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7"

 [[package]]
 name = "hashbrown"
-version = "0.12.3"
+version = "0.11.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
+checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e"
 dependencies = [
 "ahash",
 ]

+[[package]]
+name = "hashbrown"
+version = "0.12.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "607c8a29735385251a339424dd462993c0fed8fa09d378f259377df08c126022"
+
 [[package]]
 name = "heck"
 version = "0.3.3"
@@ -1242,7 +1256,7 @@ checksum = "75f43d41e26995c17e71ee126451dd3941010b0514a81a9d11f3b341debc2399"
 dependencies = [
 "bytes",
 "fnv",
- "itoa 1.0.3",
+ "itoa 1.0.2",
 ]

 [[package]]
@@ -1305,7 +1319,7 @@ dependencies = [
 "http-body",
 "httparse",
 "httpdate",
- "itoa 1.0.3",
+ "itoa 1.0.2",
 "pin-project-lite",
 "socket2",
 "tokio",
@@ -1376,7 +1390,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "10a35a97730320ffe8e2d410b5d3b69279b98d2c14bdb8b70ea89ecf7888d41e"
 dependencies = [
 "autocfg",
- "hashbrown",
+ "hashbrown 0.12.2",
 ]

 [[package]]
@@ -1388,7 +1402,7 @@ dependencies = [
 "ahash",
 "atty",
 "indexmap",
- "itoa 1.0.3",
+ "itoa 1.0.2",
 "lazy_static",
 "log",
 "num-format",
@@ -1429,15 +1443,15 @@ checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4"

 [[package]]
 name = "itoa"
-version = "1.0.3"
+version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6c8af84674fe1f223a982c933a0ee1086ac4d4052aa0fb8060c12c6ad838e754"
+checksum = "112c678d4050afce233f4f2852bb2eb519230b3cf12f33585275537d7e41578d"

 [[package]]
 name = "js-sys"
-version = "0.3.59"
+version = "0.3.58"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "258451ab10b34f8af53416d1fdab72c22e805f0c92a1136d59470ec0b11138b2"
+checksum = "c3fac17f7123a73ca62df411b1bf727ccc805daa070338fda671c86dac1bdc27"
 dependencies = [
 "wasm-bindgen",
 ]
@@ -1479,9 +1493,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"

 [[package]]
 name = "libc"
-version = "0.2.127"
+version = "0.2.126"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "505e71a4706fa491e9b1b55f51b95d4037d0821ee40131190475f692b35b009b"
+checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836"

 [[package]]
 name = "libloading"
@@ -1588,6 +1602,7 @@ dependencies = [
 name = "metrics"
 version = "0.1.0"
 dependencies = [
+ "lazy_static",
 "libc",
 "once_cell",
 "prometheus",
@@ -1651,6 +1666,24 @@ dependencies = [
 "tempfile",
 ]

+[[package]]
+name = "neon_local"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "clap 3.2.12",
+ "comfy-table",
+ "control_plane",
+ "git-version",
+ "pageserver",
+ "postgres",
+ "postgres_ffi",
+ "safekeeper",
+ "serde_json",
+ "utils",
+ "workspace_hack",
+]
+
 [[package]]
 name = "nix"
 version = "0.23.1"
@@ -1822,21 +1855,19 @@ dependencies = [

 [[package]]
 name = "os_str_bytes"
-version = "6.2.0"
+version = "6.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "648001efe5d5c0102d8cea768e348da85d90af8ba91f0bea908f157951493cd4"
+checksum = "21326818e99cfe6ce1e524c2a805c189a99b5ae555a35d19f9a284b427d86afa"

 [[package]]
 name = "pageserver"
 version = "0.1.0"
 dependencies = [
 "anyhow",
- "async-stream",
- "async-trait",
 "byteorder",
 "bytes",
 "chrono",
- "clap 3.2.16",
+ "clap 3.2.12",
 "close_fds",
 "const_format",
 "crc32c",
@@ -1852,6 +1883,7 @@ dependencies = [
 "humantime-serde",
 "hyper",
 "itertools",
+ "lazy_static",
 "metrics",
 "nix",
 "once_cell",
@@ -1873,7 +1905,7 @@ dependencies = [
 "thiserror",
 "tokio",
 "tokio-postgres",
- "tokio-util",
+ "tokio-stream",
 "toml_edit",
 "tracing",
 "url",
@@ -2094,12 +2126,13 @@ dependencies = [
 "bindgen",
 "byteorder",
 "bytes",
+ "chrono",
 "crc32c",
 "env_logger",
 "hex",
+ "lazy_static",
 "log",
 "memoffset",
- "once_cell",
 "postgres",
 "rand",
 "regex",
@@ -2137,9 +2170,9 @@ checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872"

 [[package]]
 name = "prettyplease"
-version = "0.1.18"
+version = "0.1.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "697ae720ee02011f439e0701db107ffe2916d83f718342d65d7f8bf7b8a5fee9"
+checksum = "da6ffbe862780245013cb1c0a48c4e44b7d665548088f91f6b90876d0625e4c2"
 dependencies = [
 "proc-macro2",
 "syn",
@@ -2153,9 +2186,9 @@ checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5"

 [[package]]
 name = "proc-macro2"
-version = "1.0.43"
+version = "1.0.40"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0a2ca2c61bc9f3d74d2886294ab7b9853abd9c1ad903a3ac7815c58989bb7bab"
+checksum = "dd96a1e8ed2596c337f8eae5f24924ec83f5ad5ab21ea8e455d3566c69fbcaf7"
 dependencies = [
 "unicode-ident",
 ]
@@ -2251,19 +2284,17 @@ dependencies = [
 "anyhow",
 "async-trait",
 "base64",
- "bstr",
 "bytes",
- "clap 3.2.16",
+ "clap 3.2.12",
 "futures",
 "git-version",
- "hashbrown",
+ "hashbrown 0.11.2",
 "hex",
 "hmac 0.12.1",
 "hyper",
- "itertools",
+ "lazy_static",
 "md5",
 "metrics",
- "once_cell",
 "parking_lot 0.12.1",
 "pin-project-lite",
 "rand",
@@ -2272,7 +2303,7 @@ dependencies = [
 "routerify",
 "rstest",
 "rustls",
- "rustls-pemfile",
+ "rustls-pemfile 0.2.1",
 "scopeguard",
 "serde",
 "serde_json",
@@ -2299,10 +2330,19 @@ dependencies = [
 ]

 [[package]]
-name = "quote"
-version = "1.0.21"
+name = "quickcheck"
+version = "1.0.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179"
+checksum = "588f6378e4dd99458b60ec275b4477add41ce4fa9f64dcba6f15adccb19b50d6"
+dependencies = [
+ "rand",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3bcdf212e9776fbcb2d23ab029360416bb1706b1aea2d1a5ba002727cbcab804"
 dependencies = [
 "proc-macro2",
 ]
@@ -2385,9 +2425,9 @@ dependencies = [

 [[package]]
 name = "redox_syscall"
-version = "0.2.16"
+version = "0.2.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a"
+checksum = "62f25bc4c7e55e0b0b7a1d43fb893f4fa1361d0abe38b9ce4f323c2adfe6ef42"
 dependencies = [
 "bitflags",
 ]
@@ -2482,7 +2522,7 @@ dependencies = [
 "percent-encoding",
 "pin-project-lite",
 "rustls",
- "rustls-pemfile",
+ "rustls-pemfile 1.0.0",
 "serde",
 "serde_json",
 "serde_urlencoded",
@@ -2673,9 +2713,18 @@ dependencies = [

 [[package]]
 name = "rustls-pemfile"
-version = "1.0.1"
+version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0864aeff53f8c05aa08d86e5ef839d3dfcf07aeba2db32f12db0ef716e87bd55"
+checksum = "5eebeaeb360c87bfb72e84abdb3447159c0eaececf1bef2aecd65a8be949d1c9"
+dependencies = [
+ "base64",
+]
+
+[[package]]
+name = "rustls-pemfile"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e7522c9de787ff061458fe9a829dc790a3f5b22dc571694fc5883f448b94d9a9"
 dependencies = [
 "base64",
 ]
@@ -2691,15 +2740,15 @@ dependencies = [

 [[package]]
 name = "rustversion"
-version = "1.0.9"
+version = "1.0.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "97477e48b4cf8603ad5f7aaf897467cf42ab4218a38ef76fb14c2d6773a6d6a8"
+checksum = "a0a5f7c728f5d284929a1cccb5bc19884422bfe6ef4d6c409da2c41838983fcf"

 [[package]]
 name = "ryu"
-version = "1.0.11"
+version = "1.0.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4501abdff3ae82a1c1b477a17252eb69cee9e66eb915c1abaa4f44d873df9f09"
+checksum = "f3f6f92acf49d1b98f7a81226834412ada05458b7364277387724a237f062695"

 [[package]]
 name = "safekeeper"
@@ -2709,16 +2758,18 @@ dependencies = [
 "async-trait",
 "byteorder",
 "bytes",
- "clap 3.2.16",
+ "clap 3.2.12",
 "const_format",
 "crc32c",
 "daemonize",
 "etcd_broker",
 "fs2",
+ "futures",
 "git-version",
 "hex",
 "humantime",
 "hyper",
+ "lazy_static",
 "metrics",
 "once_cell",
 "postgres",
@@ -2733,10 +2784,12 @@ dependencies = [
 "tempfile",
 "tokio",
 "tokio-postgres",
+ "tokio-util",
 "toml_edit",
 "tracing",
 "url",
 "utils",
+ "walkdir",
 "workspace_hack",
 ]

@@ -2800,15 +2853,15 @@ dependencies = [

 [[package]]
 name = "semver"
-version = "1.0.13"
+version = "1.0.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "93f6841e709003d68bb2deee8c343572bf446003ec20a583e76f7b15cebf3711"
+checksum = "a2333e6df6d6598f2b1974829f853c2b4c5f4a6e503c10af918081aa6f8564e1"

 [[package]]
 name = "serde"
-version = "1.0.142"
+version = "1.0.139"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e590c437916fb6b221e1d00df6e3294f3fccd70ca7e92541c475d6ed6ef5fee2"
+checksum = "0171ebb889e45aa68b44aee0859b3eede84c6f5f5c228e6f140c0b2a0a46cad6"
 dependencies = [
 "serde_derive",
 ]
@@ -2825,9 +2878,9 @@ dependencies = [

 [[package]]
 name = "serde_derive"
-version = "1.0.142"
+version = "1.0.139"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "34b5b8d809babe02f538c2cfec6f2c1ed10804c0e5a6a041a049a4f5588ccc2e"
+checksum = "dc1d3230c1de7932af58ad8ffbe1d784bd55efd5a9d84ac24f69c72d83543dfb"
 dependencies = [
 "proc-macro2",
 "quote",
@@ -2836,11 +2889,11 @@ dependencies = [

 [[package]]
 name = "serde_json"
-version = "1.0.83"
+version = "1.0.82"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "38dd04e3c8279e75b31ef29dbdceebfe5ad89f4d0937213c53f7d49d01b3d5a7"
+checksum = "82c2c1fdcd807d1098552c5b9a36e425e42e9fbd7c6a37a8425f390f781f7fa7"
 dependencies = [
- "itoa 1.0.3",
+ "itoa 1.0.2",
 "ryu",
 "serde",
 ]
@@ -2852,7 +2905,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd"
 dependencies = [
 "form_urlencoded",
- "itoa 1.0.3",
+ "itoa 1.0.2",
 "ryu",
 "serde",
 ]
@@ -2957,7 +3010,7 @@ dependencies = [
 "num-bigint",
 "num-traits",
 "thiserror",
- "time 0.3.12",
+ "time 0.3.11",
 ]

 [[package]]
@@ -2968,12 +3021,9 @@ checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de"

 [[package]]
 name = "slab"
-version = "0.4.7"
+version = "0.4.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4614a76b2a8be0058caa9dbbaf66d988527d86d003c11a94fbd335d7661edcef"
-dependencies = [
- "autocfg",
-]
+checksum = "eb703cfe953bccee95685111adeedb76fabe4e97549a58d16f03ea7b9367bb32"

 [[package]]
 name = "smallvec"
@@ -3081,9 +3131,9 @@ dependencies = [

 [[package]]
 name = "syn"
-version = "1.0.99"
+version = "1.0.98"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "58dbef6ec655055e20b86b15a8cc6d439cca19b667537ac6a1369572d151ab13"
+checksum = "c50aef8a904de4c23c788f104b7dddc7d6f79c647c7c8ce4cc8f73eb0ca773dd"
 dependencies = [
 "proc-macro2",
 "quote",
@@ -3159,18 +3209,18 @@ checksum = "b1141d4d61095b28419e22cb0bbf02755f5e54e0526f97f1e3d1d160e60885fb"

 [[package]]
 name = "thiserror"
-version = "1.0.32"
+version = "1.0.31"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f5f6586b7f764adc0231f4c79be7b920e766bb2f3e51b3661cdb263828f19994"
+checksum = "bd829fe32373d27f76265620b5309d0340cb8550f523c1dda251d6298069069a"
 dependencies = [
 "thiserror-impl",
 ]

 [[package]]
 name = "thiserror-impl"
-version = "1.0.32"
+version = "1.0.31"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "12bafc5b54507e0149cdf1b145a5d80ab80a90bcd9275df43d4fff68460f6c21"
+checksum = "0396bc89e626244658bef819e22d0cc459e795a5ebe878e6ec336d1674a8d79a"
 dependencies = [
 "proc-macro2",
 "quote",
@@ -3199,14 +3249,14 @@ dependencies = [

 [[package]]
 name = "time"
-version = "0.3.12"
+version = "0.3.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "74b7cc93fc23ba97fde84f7eea56c55d1ba183f495c6715defdfc7b9cb8c870f"
+checksum = "72c91f41dcb2f096c05f0873d667dceec1087ce5bcf984ec8ffb19acddbb3217"
 dependencies = [
- "itoa 1.0.3",
- "js-sys",
+ "itoa 1.0.2",
 "libc",
 "num_threads",
+ "quickcheck",
 "time-macros",
 ]

@@ -3243,9 +3293,9 @@ checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c"

 [[package]]
 name = "tokio"
-version = "1.20.1"
+version = "1.20.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7a8325f63a7d4774dd041e363b2409ed1c5cbbd0f867795e661df066b2b0a581"
+checksum = "57aec3cfa4c296db7255446efb4928a6be304b431a806216105542a67b6ca82e"
 dependencies = [
 "autocfg",
 "bytes",
@@ -3484,9 +3534,9 @@ checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52"

 [[package]]
 name = "tracing"
-version = "0.1.36"
+version = "0.1.34"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2fce9567bd60a67d08a16488756721ba392f24f29006402881e43b19aac64307"
+checksum = "5d0ecdcb44a79f0fe9844f0c4f33a342cbcbb5117de8001e6ba0dc2351327d09"
 dependencies = [
 "cfg-if",
 "log",
@@ -3508,11 +3558,11 @@ dependencies = [

 [[package]]
 name = "tracing-core"
-version = "0.1.29"
+version = "0.1.26"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5aeea4303076558a00714b823f9ad67d58a3bbda1df83d8827d21193156e22f7"
+checksum = "f54c8ca710e81886d498c2fd3331b56c93aa248d49de2222ad2742247c60072f"
 dependencies = [
- "once_cell",
+ "lazy_static",
 "valuable",
 ]

@@ -3575,9 +3625,9 @@ checksum = "099b7128301d285f79ddd55b9a83d5e6b9e97c92e0ea0daebee7263e932de992"

 [[package]]
 name = "unicode-ident"
-version = "1.0.3"
+version = "1.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c4f5b37a154999a8f3f98cc23a628d850e154479cd94decf3414696e12e31aaf"
+checksum = "5bd2fe26506023ed7b5e1e315add59d6f584c621d037f9368fea9cfb988f368c"

 [[package]]
 name = "unicode-normalization"
@@ -3629,7 +3679,6 @@ name = "utils"
 version = "0.1.0"
 dependencies = [
 "anyhow",
- "async-trait",
 "bincode",
 "byteorder",
 "bytes",
@@ -3639,16 +3688,16 @@ dependencies = [
 "hex-literal",
 "hyper",
 "jsonwebtoken",
+ "lazy_static",
 "metrics",
 "nix",
- "once_cell",
 "pin-project-lite",
 "postgres",
 "postgres-protocol",
 "rand",
 "routerify",
 "rustls",
- "rustls-pemfile",
+ "rustls-pemfile 0.2.1",
 "rustls-split",
 "serde",
 "serde_json",
@@ -3657,7 +3706,6 @@ dependencies = [
 "tempfile",
 "thiserror",
 "tokio",
- "tokio-rustls",
 "tracing",
 "tracing-subscriber",
 "workspace_hack",
@@ -3698,7 +3746,7 @@ name = "wal_craft"
 version = "0.1.0"
 dependencies = [
 "anyhow",
- "clap 3.2.16",
+ "clap 3.2.12",
 "env_logger",
 "log",
 "once_cell",
@@ -3742,9 +3790,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"

 [[package]]
 name = "wasm-bindgen"
-version = "0.2.82"
+version = "0.2.81"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fc7652e3f6c4706c8d9cd54832c4a4ccb9b5336e2c3bd154d5cccfbf1c1f5f7d"
+checksum = "7c53b543413a17a202f4be280a7e5c62a1c69345f5de525ee64f8cfdbc954994"
 dependencies = [
 "cfg-if",
 "wasm-bindgen-macro",
@@ -3752,13 +3800,13 @@ dependencies = [

 [[package]]
 name = "wasm-bindgen-backend"
-version = "0.2.82"
+version = "0.2.81"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "662cd44805586bd52971b9586b1df85cdbbd9112e4ef4d8f41559c334dc6ac3f"
+checksum = "5491a68ab4500fa6b4d726bd67408630c3dbe9c4fe7bda16d5c82a1fd8c7340a"
 dependencies = [
 "bumpalo",
+ "lazy_static",
 "log",
- "once_cell",
 "proc-macro2",
 "quote",
 "syn",
@@ -3767,9 +3815,9 @@ dependencies = [

 [[package]]
 name = "wasm-bindgen-futures"
-version = "0.4.32"
+version = "0.4.31"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fa76fb221a1f8acddf5b54ace85912606980ad661ac7a503b4570ffd3a624dad"
+checksum = "de9a9cec1733468a8c657e57fa2413d2ae2c0129b95e87c5b72b8ace4d13f31f"
 dependencies = [
 "cfg-if",
 "js-sys",
@@ -3779,9 +3827,9 @@ dependencies = [

 [[package]]
 name = "wasm-bindgen-macro"
-version = "0.2.82"
+version = "0.2.81"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b260f13d3012071dfb1512849c033b1925038373aea48ced3012c09df952c602"
+checksum = "c441e177922bc58f1e12c022624b6216378e5febc2f0533e41ba443d505b80aa"
 dependencies = [
 "quote",
 "wasm-bindgen-macro-support",
@@ -3789,9 +3837,9 @@ dependencies = [

 [[package]]
 name = "wasm-bindgen-macro-support"
-version = "0.2.82"
+version = "0.2.81"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5be8e654bdd9b79216c2929ab90721aa82faf65c48cdf08bdc4e7f51357b80da"
+checksum = "7d94ac45fcf608c1f45ef53e748d35660f168490c10b23704c7779ab8f5c3048"
 dependencies = [
 "proc-macro2",
 "quote",
@@ -3802,15 +3850,15 @@ dependencies = [

 [[package]]
 name = "wasm-bindgen-shared"
-version = "0.2.82"
+version = "0.2.81"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6598dd0bd3c7d51095ff6531a5b23e02acdc81804e30d8f07afb77b7215a140a"
+checksum = "6a89911bd99e5f3659ec4acf9c4d93b0a90fe4a2a11f15328472058edc5261be"

 [[package]]
 name = "web-sys"
-version = "0.3.59"
+version = "0.3.58"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ed055ab27f941423197eb86b2035720b1a3ce40504df082cac2ecc6ed73335a1"
+checksum = "2fed94beee57daf8dd7d51f2b15dc2bcde92d7a72304cdf662a4371008b71b90"
 dependencies = [
 "js-sys",
 "wasm-bindgen",
@@ -3935,7 +3983,6 @@ version = "0.1.0"
 dependencies = [
 "ahash",
 "anyhow",
- "bstr",
 "bytes",
 "chrono",
 "clap 2.34.0",
@@ -3945,7 +3992,7 @@ dependencies = [
 "futures-task",
 "futures-util",
 "generic-array",
- "hashbrown",
+ "hashbrown 0.11.2",
 "hex",
 "hyper",
 "indexmap",
@@ -3960,12 +4007,11 @@ dependencies = [
 "prost",
 "rand",
 "regex",
- "regex-automata",
 "regex-syntax",
 "scopeguard",
 "serde",
 "syn",
- "time 0.3.12",
+ "time 0.3.11",
 "tokio",
 "tokio-util",
 "tracing",
@@ -3987,7 +4033,7 @@ dependencies = [
 "oid-registry",
 "rusticata-macros",
 "thiserror",
- "time 0.3.12",
+ "time 0.3.11",
 ]

 [[package]]
@@ -4016,6 +4062,6 @@ dependencies = [

 [[package]]
 name = "zeroize"
-version = "1.5.7"
+version = "1.5.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c394b5bd0c6f669e7275d9c20aa90ae064cb22e75a1cad54e1b34088034b149f"
+checksum = "20b578acffd8516a6c3f2a1bdefc1ec37e547bb4e0fb8b6b01a4cafc886b4442"
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -6,6 +6,7 @@ members = [
    "proxy",
    "safekeeper",
    "workspace_hack",
+    "neon_local",
    "libs/*",
 ]

@@ -14,59 +15,6 @@ members = [
 # Besides, debug info should not affect the performance.
 debug = true

-[profile.release-line-debug]
-inherits = "release"
-debug = 1 # true = 2 = all symbols, 1 = line only
-[profile.release-line-debug-lto]
-inherits = "release"
-debug = 1 # true = 2 = all symbols, 1 = line only
-lto = true
-
-[profile.release-line-debug-size]
-inherits = "release"
-debug = 1 # true = 2 = all symbols, 1 = line only
-opt-level = "s"
-[profile.release-line-debug-zize]
-inherits = "release"
-debug = 1 # true = 2 = all symbols, 1 = line only
-opt-level = "z"
-[profile.release-line-debug-size-lto]
-inherits = "release"
-debug = 1 # true = 2 = all symbols, 1 = line only
-opt-level = "s"
-lto = true
-[profile.release-line-debug-zize-lto]
-inherits = "release"
-debug = 1 # true = 2 = all symbols, 1 = line only
-opt-level = "z"
-lto = true
-
-[profile.release-no-debug]
-inherits = "release"
-debug = false # true = 2 = all symbols, 1 = line only
-
-[profile.release-no-debug-size]
-inherits = "release"
-debug = false # true = 2 = all symbols, 1 = line only
-opt-level = "s"
-[profile.release-no-debug-zize]
-inherits = "release"
-debug = false # true = 2 = all symbols, 1 = line only
-opt-level = "z"
-
-[profile.release-no-debug-size-lto]
-inherits = "release"
-debug = false # true = 2 = all symbols, 1 = line only
-opt-level = "s"
-lto = true
-
-[profile.release-no-debug-zize-lto]
-inherits = "release"
-debug = false # true = 2 = all symbols, 1 = line only
-opt-level = "z"
-lto = true
-
-
 # This is only needed for proxy's tests.
 # TODO: we should probably fork `tokio-postgres-rustls` instead.
 [patch.crates-io]
--- a/76
+++ b/76
@@ -1,50 +1,33 @@
-### Creates a storage Docker image with postgres, pageserver, safekeeper and proxy binaries.
-### The image itself is mainly used as a container for the binaries and for starting e2e tests with custom parameters.
-### By default, the binaries inside the image have some mock parameters and can start, but are not intended to be used
-### inside this image in the real deployments.
-ARG REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
-ARG IMAGE=rust
-ARG TAG=pinned
-
 # Build Postgres
-FROM $REPOSITORY/$IMAGE:$TAG AS pg-build
-WORKDIR /home/nonroot
+FROM neondatabase/rust:1.58 AS pg-build
+WORKDIR /pg

-COPY --chown=nonroot vendor/postgres-v14 vendor/postgres-v14
-COPY --chown=nonroot vendor/postgres-v15 vendor/postgres-v15
-COPY --chown=nonroot pgxn pgxn
-COPY --chown=nonroot Makefile Makefile
+USER root
+
+COPY vendor/postgres vendor/postgres
+COPY Makefile Makefile

 ENV BUILD_TYPE release
 RUN set -e \
-    && mold -run make -j $(nproc) -s neon-pg-ext \
-    && rm -rf pg_install/v14/build \
-    && rm -rf pg_install/v15/build \
-    && tar -C pg_install/v14 -czf /home/nonroot/postgres_install.tar.gz .
+    && mold -run make -j $(nproc) -s postgres \
+    && rm -rf tmp_install/build \
+    && tar -C tmp_install -czf /postgres_install.tar.gz .

 # Build zenith binaries
-FROM $REPOSITORY/$IMAGE:$TAG AS build
-WORKDIR /home/nonroot
+FROM neondatabase/rust:1.58 AS build
 ARG GIT_VERSION=local

-# Enable https://github.com/paritytech/cachepot to cache Rust crates' compilation results in Docker builds.
-# Set up cachepot to use an AWS S3 bucket for cache results, to reuse it between `docker build` invocations.
-# cachepot falls back to local filesystem if S3 is misconfigured, not failing the build
-ARG RUSTC_WRAPPER=cachepot
-ENV AWS_REGION=eu-central-1
-ENV CACHEPOT_S3_KEY_PREFIX=cachepot
-ARG CACHEPOT_BUCKET=neon-github-dev
-#ARG AWS_ACCESS_KEY_ID
-#ARG AWS_SECRET_ACCESS_KEY
+ARG CACHEPOT_BUCKET=zenith-rust-cachepot
+ARG AWS_ACCESS_KEY_ID
+ARG AWS_SECRET_ACCESS_KEY

-COPY --from=pg-build /home/nonroot/pg_install/v14/include/postgresql/server pg_install/v14/include/postgresql/server
-COPY --from=pg-build /home/nonroot/pg_install/v15/include/postgresql/server pg_install/v15/include/postgresql/server
+COPY --from=pg-build /pg/tmp_install/include/postgresql/server tmp_install/include/postgresql/server
 COPY . .

 # Show build caching stats to check if it was used in the end.
 # Has to be the part of the same RUN since cachepot daemon is killed in the end of this RUN, losing the compilation stats.
 RUN set -e \
-&& mold -run cargo build --locked --release \
+    && sudo -E "PATH=$PATH" mold -run cargo build --release \
    && cachepot -s

 # Build final image
@@ -53,8 +36,8 @@ FROM debian:bullseye-slim
 WORKDIR /data

 RUN set -e \
-    && apt update \
-    && apt install -y \
+    && apt-get update \
+    && apt-get install -y \
        libreadline-dev \
        libseccomp-dev \
        openssl \
@@ -63,26 +46,17 @@ RUN set -e \
    && useradd -d /data zenith \
    && chown -R zenith:zenith /data

-COPY --from=build --chown=zenith:zenith /home/nonroot/target/release/pageserver /usr/local/bin
-COPY --from=build --chown=zenith:zenith /home/nonroot/target/release/safekeeper /usr/local/bin
-COPY --from=build --chown=zenith:zenith /home/nonroot/target/release/proxy      /usr/local/bin
+COPY --from=build --chown=zenith:zenith /home/runner/target/release/pageserver /usr/local/bin
+COPY --from=build --chown=zenith:zenith /home/runner/target/release/safekeeper /usr/local/bin
+COPY --from=build --chown=zenith:zenith /home/runner/target/release/proxy      /usr/local/bin

-# v14 is default for now
-COPY --from=pg-build /home/nonroot/pg_install/v14 /usr/local/
-COPY --from=pg-build /home/nonroot/postgres_install.tar.gz /data/
+COPY --from=pg-build /pg/tmp_install/         /usr/local/
+COPY --from=pg-build /postgres_install.tar.gz /data/

-# By default, pageserver uses `.neon/` working directory in WORKDIR, so create one and fill it with the dummy config.
-# Now, when `docker run ... pageserver` is run, it can start without errors, yet will have some default dummy values.
-RUN mkdir -p /data/.neon/ && chown -R zenith:zenith /data/.neon/ \
-    && /usr/local/bin/pageserver -D /data/.neon/ --init \
-       -c "id=1234" \
-       -c "broker_endpoints=['http://etcd:2379']" \
-       -c "pg_distrib_dir='/usr/local'" \
-       -c "listen_pg_addr='0.0.0.0:6400'" \
-       -c "listen_http_addr='0.0.0.0:9898'"
+COPY docker-entrypoint.sh /docker-entrypoint.sh

 VOLUME ["/data"]
 USER zenith
 EXPOSE 6400
-EXPOSE 9898
-CMD ["/bin/bash"]
+ENTRYPOINT ["/docker-entrypoint.sh"]
+CMD ["pageserver"]
--- a/Dockerfile.compute-node-v14
+++ b/Dockerfile.compute-node-v14
@@ -1,167 +0,0 @@
-ARG TAG=pinned
-# apparently, ARGs don't get replaced in RUN commands in kaniko
-# ARG POSTGIS_VERSION=3.3.0
-# ARG PLV8_VERSION=3.1.4
-# ARG PG_VERSION=v14
-
-#
-# Layer "build-deps"
-#
-FROM debian:bullseye-slim AS build-deps
-RUN apt update &&  \
-    apt install -y git autoconf automake libtool build-essential bison flex libreadline-dev zlib1g-dev libxml2-dev \
-    libcurl4-openssl-dev libossp-uuid-dev
-
-#
-# Layer "pg-build"
-# Build Postgres from the neon postgres repository.
-#
-FROM build-deps AS pg-build
-COPY vendor/postgres-v14 postgres
-RUN cd postgres && \
-    ./configure CFLAGS='-O2 -g3' --enable-debug --with-uuid=ossp && \
-    make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s install && \
-    make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C contrib/ install && \
-    # Install headers
-    make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/include install && \
-    make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/interfaces/libpq install
-
-#
-# Layer "postgis-build"
-# Build PostGIS from the upstream PostGIS mirror.
-#
-# PostGIS compiles against neon postgres sources without changes. Perhaps we
-# could even use the upstream binaries, compiled against vanilla Postgres, but
-# it would require some investigation to check that it works, and also keeps
-# working in the future. So for now, we compile our own binaries.
-FROM build-deps AS postgis-build
-COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
-RUN apt update && \
-    apt install -y gdal-bin libgdal-dev libprotobuf-c-dev protobuf-c-compiler xsltproc wget
-
-RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.0.tar.gz && \
-    tar xvzf postgis-3.3.0.tar.gz && \
-    cd postgis-3.3.0 && \
-    ./autogen.sh && \
-    export PATH="/usr/local/pgsql/bin:$PATH" && \
-    ./configure && \
-    make -j $(getconf _NPROCESSORS_ONLN) install && \
-    cd extensions/postgis && \
-    make clean && \
-    make -j $(getconf _NPROCESSORS_ONLN) install && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis.control && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_raster.control && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_tiger_geocoder.control && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_topology.control
-
-#
-# Layer "plv8-build"
-# Build plv8
-#
-FROM build-deps AS plv8-build
-COPY --from=postgis-build /usr/local/pgsql/ /usr/local/pgsql/
-RUN apt update && \
-    apt install -y git curl wget make ninja-build build-essential libncurses5 python3-dev pkg-config libc++-dev libc++abi-dev libglib2.0-dev
-
-# https://github.com/plv8/plv8/issues/475
-# Debian bullseye provides binutils 2.35 when >= 2.38 is necessary
-RUN echo "deb http://ftp.debian.org/debian testing main" >> /etc/apt/sources.list && \
-    echo "APT::Default-Release \"stable\";" > /etc/apt/apt.conf.d/default-release && \
-    apt update && \
-    apt install -y --no-install-recommends -t testing binutils
-
-RUN wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.4.tar.gz && \
-    tar xvzf v3.1.4.tar.gz && \
-    cd plv8-3.1.4 && \
-    export PATH="/usr/local/pgsql/bin:$PATH" && \
-    make -j $(getconf _NPROCESSORS_ONLN) && \
-    make -j $(getconf _NPROCESSORS_ONLN) install && \
-    rm -rf /plv8-* && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/plv8.control
-
-#
-# Layer "neon-pg-ext-build"
-# compile neon extensions
-#
-FROM build-deps AS neon-pg-ext-build
-COPY --from=postgis-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY pgxn/ pgxn/
-
-RUN make -j $(getconf _NPROCESSORS_ONLN) \
-        PG_CONFIG=/usr/local/pgsql/bin/pg_config \
-        -C pgxn/neon \
-        -s install
-
-# Compile and run the Neon-specific `compute_ctl` binary
-FROM 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:$TAG AS compute-tools
-USER nonroot
-# Copy entire project to get Cargo.* files with proper dependencies for the whole project
-COPY --chown=nonroot . .
-RUN cd compute_tools && cargo build --locked --profile release-line-debug-size-lto
-
-#
-# Clean up postgres folder before inclusion
-#
-FROM neon-pg-ext-build AS postgres-cleanup-layer
-COPY --from=neon-pg-ext-build /usr/local/pgsql /usr/local/pgsql
-
-# Remove binaries from /bin/ that we won't use (or would manually copy & install otherwise)
-RUN cd /usr/local/pgsql/bin && rm ecpg raster2pgsql shp2pgsql pgtopo_export pgtopo_import pgsql2shp
-
-# Remove headers that we won't need anymore - we've completed installation of all extensions
-RUN rm -r /usr/local/pgsql/include
-
-# Remove now-useless PGXS src infrastructure
-RUN rm -r /usr/local/pgsql/lib/pgxs/src
-
-# Remove static postgresql libraries - all compilation is finished, so we
-# can now remove these files - they must be included in other binaries by now
-# if they were to be used by other libraries.
-RUN rm /usr/local/pgsql/lib/lib*.a
-
-#
-# Final layer
-# Put it all together into the final image
-#
-FROM debian:bullseye-slim
-# Add user postgres
-RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \
-    echo "postgres:test_console_pass" | chpasswd && \
-    mkdir /var/db/postgres/compute && mkdir /var/db/postgres/specs && \
-    chown -R postgres:postgres /var/db/postgres && \
-    chmod 0750 /var/db/postgres/compute && \
-    echo '/usr/local/lib' >> /etc/ld.so.conf && /sbin/ldconfig
-
-# TODO: Check if we can make the extension setup more modular versus a linear build
-# currently plv8-build copies the output /usr/local/pgsql from postgis-build, etc#
-COPY --from=postgres-cleanup-layer --chown=postgres /usr/local/pgsql /usr/local
-COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-debug-size-lto/compute_ctl /usr/local/bin/compute_ctl
-
-# Install:
-# libreadline8 for psql
-# libossp-uuid16 for extension ossp-uuid
-# libgeos, libgdal, libproj and libprotobuf-c1 for PostGIS
-# GLIBC 2.34 for plv8.
-#     Debian bullseye provides GLIBC 2.31, so we install the library from testing
-#
-# Lastly, link compute_ctl into zenith_ctl while we're at it,
-# so that we don't need to put this in another layer.
-RUN apt update &&  \
-    apt install --no-install-recommends -y \
-        libreadline8 \
-        libossp-uuid16 \
-        libgeos-c1v5 \
-        libgdal28 \
-        libproj19 \
-        libprotobuf-c1 && \
-    rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
-    echo "Installing GLIBC 2.34" && \
-    echo "deb http://ftp.debian.org/debian testing main" >> /etc/apt/sources.list && \
-    echo "APT::Default-Release \"stable\";" > /etc/apt/apt.conf.d/default-release && \
-    apt update && \
-    apt install -y --no-install-recommends -t testing libc6 && \
-    rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
-    ln /usr/local/bin/compute_ctl /usr/local/bin/zenith_ctl
-
-USER postgres
-ENTRYPOINT ["/usr/local/bin/compute_ctl"]
--- a/Dockerfile.compute-node-v15
+++ b/Dockerfile.compute-node-v15
@@ -1,172 +0,0 @@
-#
-# This file is identical to the Dockerfile.compute-node-v14 file
-# except for the version of Postgres that is built.
-#
-
-ARG TAG=pinned
-# apparently, ARGs don't get replaced in RUN commands in kaniko
-# ARG POSTGIS_VERSION=3.3.0
-# ARG PLV8_VERSION=3.1.4
-# ARG PG_VERSION=v15
-
-#
-# Layer "build-deps"
-#
-FROM debian:bullseye-slim AS build-deps
-RUN apt update &&  \
-    apt install -y git autoconf automake libtool build-essential bison flex libreadline-dev zlib1g-dev libxml2-dev \
-    libcurl4-openssl-dev libossp-uuid-dev
-
-#
-# Layer "pg-build"
-# Build Postgres from the neon postgres repository.
-#
-FROM build-deps AS pg-build
-COPY vendor/postgres-v15 postgres
-RUN cd postgres && \
-    ./configure CFLAGS='-O2 -g3' --enable-debug --with-uuid=ossp && \
-    make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s install && \
-    make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C contrib/ install && \
-    # Install headers
-    make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/include install && \
-    make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/interfaces/libpq install
-
-#
-# Layer "postgis-build"
-# Build PostGIS from the upstream PostGIS mirror.
-#
-# PostGIS compiles against neon postgres sources without changes. Perhaps we
-# could even use the upstream binaries, compiled against vanilla Postgres, but
-# it would require some investigation to check that it works, and also keeps
-# working in the future. So for now, we compile our own binaries.
-FROM build-deps AS postgis-build
-COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
-RUN apt update && \
-    apt install -y gdal-bin libgdal-dev libprotobuf-c-dev protobuf-c-compiler xsltproc wget
-
-RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.0.tar.gz && \
-    tar xvzf postgis-3.3.0.tar.gz && \
-    cd postgis-3.3.0 && \
-    ./autogen.sh && \
-    export PATH="/usr/local/pgsql/bin:$PATH" && \
-    ./configure && \
-    make -j $(getconf _NPROCESSORS_ONLN) install && \
-    cd extensions/postgis && \
-    make clean && \
-    make -j $(getconf _NPROCESSORS_ONLN) install && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis.control && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_raster.control && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_tiger_geocoder.control && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_topology.control
-
-#
-# Layer "plv8-build"
-# Build plv8
-#
-FROM build-deps AS plv8-build
-COPY --from=postgis-build /usr/local/pgsql/ /usr/local/pgsql/
-RUN apt update && \
-    apt install -y git curl wget make ninja-build build-essential libncurses5 python3-dev pkg-config libc++-dev libc++abi-dev libglib2.0-dev
-
-# https://github.com/plv8/plv8/issues/475
-# Debian bullseye provides binutils 2.35 when >= 2.38 is necessary
-RUN echo "deb http://ftp.debian.org/debian testing main" >> /etc/apt/sources.list && \
-    echo "APT::Default-Release \"stable\";" > /etc/apt/apt.conf.d/default-release && \
-    apt update && \
-    apt install -y --no-install-recommends -t testing binutils
-
-RUN wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.4.tar.gz && \
-    tar xvzf v3.1.4.tar.gz && \
-    cd plv8-3.1.4 && \
-    export PATH="/usr/local/pgsql/bin:$PATH" && \
-    make -j $(getconf _NPROCESSORS_ONLN) && \
-    make -j $(getconf _NPROCESSORS_ONLN) install && \
-    rm -rf /plv8-* && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/plv8.control
-
-#
-# Layer "neon-pg-ext-build"
-# compile neon extensions
-#
-FROM build-deps AS neon-pg-ext-build
-COPY --from=postgis-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY pgxn/ pgxn/
-
-RUN make -j $(getconf _NPROCESSORS_ONLN) \
-        PG_CONFIG=/usr/local/pgsql/bin/pg_config \
-        -C pgxn/neon \
-        -s install
-
-# Compile and run the Neon-specific `compute_ctl` binary
-FROM 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:$TAG AS compute-tools
-USER nonroot
-# Copy entire project to get Cargo.* files with proper dependencies for the whole project
-COPY --chown=nonroot . .
-RUN cd compute_tools && cargo build --locked --profile release-line-debug-size-lto
-
-#
-# Clean up postgres folder before inclusion
-#
-FROM neon-pg-ext-build AS postgres-cleanup-layer
-COPY --from=neon-pg-ext-build /usr/local/pgsql /usr/local/pgsql
-
-# Remove binaries from /bin/ that we won't use (or would manually copy & install otherwise)
-RUN cd /usr/local/pgsql/bin && rm ecpg raster2pgsql shp2pgsql pgtopo_export pgtopo_import pgsql2shp
-
-# Remove headers that we won't need anymore - we've completed installation of all extensions
-RUN rm -r /usr/local/pgsql/include
-
-# Remove now-useless PGXS src infrastructure
-RUN rm -r /usr/local/pgsql/lib/pgxs/src
-
-# Remove static postgresql libraries - all compilation is finished, so we
-# can now remove these files - they must be included in other binaries by now
-# if they were to be used by other libraries.
-RUN rm /usr/local/pgsql/lib/lib*.a
-
-#
-# Final layer
-# Put it all together into the final image
-#
-FROM debian:bullseye-slim
-# Add user postgres
-RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \
-    echo "postgres:test_console_pass" | chpasswd && \
-    mkdir /var/db/postgres/compute && mkdir /var/db/postgres/specs && \
-    chown -R postgres:postgres /var/db/postgres && \
-    chmod 0750 /var/db/postgres/compute && \
-    echo '/usr/local/lib' >> /etc/ld.so.conf && /sbin/ldconfig
-
-# TODO: Check if we can make the extension setup more modular versus a linear build
-# currently plv8-build copies the output /usr/local/pgsql from postgis-build, etc#
-COPY --from=postgres-cleanup-layer --chown=postgres /usr/local/pgsql /usr/local
-COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-debug-size-lto/compute_ctl /usr/local/bin/compute_ctl
-
-# Install:
-# libreadline8 for psql
-# libossp-uuid16 for extension ossp-uuid
-# libgeos, libgdal, libproj and libprotobuf-c1 for PostGIS
-# GLIBC 2.34 for plv8.
-#     Debian bullseye provides GLIBC 2.31, so we install the library from testing
-#
-# Lastly, link compute_ctl into zenith_ctl while we're at it,
-# so that we don't need to put this in another layer.
-RUN apt update &&  \
-    apt install --no-install-recommends -y \
-        libreadline8 \
-        libossp-uuid16 \
-        libgeos-c1v5 \
-        libgdal28 \
-        libproj19 \
-        libprotobuf-c1 && \
-    rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
-    echo "Installing GLIBC 2.34" && \
-    echo "deb http://ftp.debian.org/debian testing main" >> /etc/apt/sources.list && \
-    echo "APT::Default-Release \"stable\";" > /etc/apt/apt.conf.d/default-release && \
-    apt update && \
-    apt install -y --no-install-recommends -t testing libc6 && \
-    rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
-    ln /usr/local/bin/compute_ctl /usr/local/bin/zenith_ctl
-
-USER postgres
-ENTRYPOINT ["/usr/local/bin/compute_ctl"]
--- a/Dockerfile.compute-node.legacy
+++ b/Dockerfile.compute-node.legacy
@@ -1,88 +0,0 @@
-#
-# Legacy version of the Dockerfile for the compute node.
-# Used by e2e CI. Building Dockerfile.compute-node will take
-# unreasonable ammount of time without v2 runners.
-#
-# TODO: remove once cloud repo CI is moved to v2 runners.
-#
-
-
-# Allow specifiyng different compute-tools tag and image repo, so we are
-# able to use different images
-ARG REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
-ARG IMAGE=compute-tools
-ARG TAG=latest
-
-#
-# Image with pre-built tools
-#
-FROM $REPOSITORY/$IMAGE:$TAG AS compute-deps
-# Only to get ready compute_ctl binary as deppendency
-
-#
-# Image with Postgres build deps
-#
-FROM debian:bullseye-slim AS build-deps
-
-RUN apt-get update && apt-get -yq install automake libtool build-essential bison flex libreadline-dev zlib1g-dev libxml2-dev \
-                                          libcurl4-openssl-dev libossp-uuid-dev
-
-#
-# Image with built Postgres
-#
-FROM build-deps AS pg-build
-
-# Add user postgres
-RUN adduser postgres
-RUN mkdir /pg && chown postgres:postgres /pg
-
-# Copy source files
-# version 14 is default for now
-COPY ./vendor/postgres-v14 /pg/
-COPY ./pgxn /pg/
-
-# Build and install Postgres locally
-RUN mkdir /pg/compute_build && cd /pg/compute_build && \
-    ../configure CFLAGS='-O2 -g3' --prefix=$(pwd)/postgres_bin --enable-debug --with-uuid=ossp && \
-    # Install main binaries and contribs
-    make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s install && \
-    make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C contrib/ install && \
-    # Install headers
-    make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/include install
-
-# Install neon contrib
-RUN make MAKELEVEL=0 PG_CONFIG=/pg/compute_build/postgres_bin/bin/pg_config -j $(getconf _NPROCESSORS_ONLN) -C /pg/neon install
-
-USER postgres
-WORKDIR /pg
-
-#
-# Final compute node image to be exported
-#
-FROM debian:bullseye-slim
-
-# libreadline-dev is required to run psql
-RUN apt-get update && apt-get -yq install libreadline-dev libossp-uuid-dev
-
-# Add user postgres
-RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \
-    echo "postgres:test_console_pass" | chpasswd && \
-    mkdir /var/db/postgres/compute && mkdir /var/db/postgres/specs && \
-    chown -R postgres:postgres /var/db/postgres && \
-    chmod 0750 /var/db/postgres/compute
-
-# Copy ready Postgres binaries
-COPY --from=pg-build /pg/compute_build/postgres_bin /usr/local
-
-# Copy binaries from compute-tools
-COPY --from=compute-deps /usr/local/bin/compute_ctl /usr/local/bin/compute_ctl
-
-# XXX: temporary symlink for compatibility with old control-plane
-RUN ln -s /usr/local/bin/compute_ctl /usr/local/bin/zenith_ctl
-
-# Add postgres shared objects to the search path
-RUN echo '/usr/local/lib' >> /etc/ld.so.conf && /sbin/ldconfig
-
-USER postgres
-
-ENTRYPOINT ["/usr/local/bin/compute_ctl"]
--- a/Dockerfile.compute-tools
+++ b/Dockerfile.compute-tools
@@ -1,29 +1,18 @@
 # First transient image to build compute_tools binaries
-# NB: keep in sync with rust image version in .github/workflows/build_and_test.yml
-ARG REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
-ARG IMAGE=rust
-ARG TAG=pinned
+# NB: keep in sync with rust image version in .circle/config.yml
+FROM neondatabase/rust:1.58 AS rust-build

-FROM $REPOSITORY/$IMAGE:$TAG AS rust-build
-WORKDIR /home/nonroot
-
-# Enable https://github.com/paritytech/cachepot to cache Rust crates' compilation results in Docker builds.
-# Set up cachepot to use an AWS S3 bucket for cache results, to reuse it between `docker build` invocations.
-# cachepot falls back to local filesystem if S3 is misconfigured, not failing the build.
-ARG RUSTC_WRAPPER=cachepot
-ENV AWS_REGION=eu-central-1
-ENV CACHEPOT_S3_KEY_PREFIX=cachepot
-ARG CACHEPOT_BUCKET=neon-github-dev
-#ARG AWS_ACCESS_KEY_ID
-#ARG AWS_SECRET_ACCESS_KEY
+ARG CACHEPOT_BUCKET=zenith-rust-cachepot
+ARG AWS_ACCESS_KEY_ID
+ARG AWS_SECRET_ACCESS_KEY

 COPY . .

 RUN set -e \
-    && mold -run cargo build -p compute_tools --locked --release \
+    && sudo -E "PATH=$PATH" mold -run cargo build -p compute_tools --release \
    && cachepot -s

 # Final image that only has one binary
-FROM debian:bullseye-slim
+FROM debian:buster-slim

-COPY --from=rust-build /home/nonroot/target/release/compute_ctl /usr/local/bin/compute_ctl
+COPY --from=rust-build /home/runner/target/release/compute_ctl /usr/local/bin/compute_ctl
--- a/175
+++ b/175
@@ -1,7 +1,15 @@
 ROOT_PROJECT_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))

-# Where to install Postgres, default is ./pg_install, maybe useful for package managers
-POSTGRES_INSTALL_DIR ?= $(ROOT_PROJECT_DIR)/pg_install/
+# Where to install Postgres, default is ./tmp_install, maybe useful for package managers
+POSTGRES_INSTALL_DIR ?= $(ROOT_PROJECT_DIR)/tmp_install
+
+# Seccomp BPF is only available for Linux
+UNAME_S := $(shell uname -s)
+ifeq ($(UNAME_S),Linux)
+	SECCOMP = --with-libseccomp
+else
+	SECCOMP =
+endif

 #
 # We differentiate between release / debug build types using the BUILD_TYPE
@@ -20,18 +28,10 @@ else
 	$(error Bad build type '$(BUILD_TYPE)', see Makefile for options)
 endif

-# Seccomp BPF is only available for Linux
-UNAME_S := $(shell uname -s)
-ifeq ($(UNAME_S),Linux)
-	PG_CONFIGURE_OPTS += --with-libseccomp
-endif
-
 # macOS with brew-installed openssl requires explicit paths
-# It can be configured with OPENSSL_PREFIX variable
 UNAME_S := $(shell uname -s)
 ifeq ($(UNAME_S),Darwin)
-    OPENSSL_PREFIX ?= $(shell brew --prefix openssl@3)
-    PG_CONFIGURE_OPTS += --with-includes=$(OPENSSL_PREFIX)/include --with-libraries=$(OPENSSL_PREFIX)/lib
+    PG_CONFIGURE_OPTS += --with-includes=$(HOMEBREW_PREFIX)/opt/openssl/include --with-libraries=$(HOMEBREW_PREFIX)/opt/openssl/lib
 endif

 # Choose whether we should be silent or verbose
@@ -46,139 +46,64 @@ CARGO_CMD_PREFIX += $(if $(filter n,$(MAKEFLAGS)),,+)
 CARGO_CMD_PREFIX += CARGO_TERM_PROGRESS_WHEN=never CI=1

 #
-# Top level Makefile to build Neon and PostgreSQL
+# Top level Makefile to build Zenith and PostgreSQL
 #
 .PHONY: all
-all: neon postgres neon-pg-ext
+all: zenith postgres

-### Neon Rust bits
+### Zenith Rust bits
 #
 # The 'postgres_ffi' depends on the Postgres headers.
-.PHONY: neon
-neon: postgres-v14-headers postgres-v15-headers
-	+@echo "Compiling Neon"
+.PHONY: zenith
+zenith: postgres-headers
+	+@echo "Compiling Zenith"
 	$(CARGO_CMD_PREFIX) cargo build $(CARGO_BUILD_FLAGS)

 ### PostgreSQL parts
-# The rules are duplicated for Postgres v14 and 15. We may want to refactor
-# to avoid the duplication in the future, but it's tolerable for now.
-#
-$(POSTGRES_INSTALL_DIR)/build/v14/config.status:
-	+@echo "Configuring Postgres v14 build"
-	mkdir -p $(POSTGRES_INSTALL_DIR)/build/v14
-	(cd $(POSTGRES_INSTALL_DIR)/build/v14 && \
-	$(ROOT_PROJECT_DIR)/vendor/postgres-v14/configure CFLAGS='$(PG_CFLAGS)' \
+$(POSTGRES_INSTALL_DIR)/build/config.status:
+	+@echo "Configuring postgres build"
+	mkdir -p $(POSTGRES_INSTALL_DIR)/build
+	(cd $(POSTGRES_INSTALL_DIR)/build && \
+	$(ROOT_PROJECT_DIR)/vendor/postgres/configure CFLAGS='$(PG_CFLAGS)' \
 		$(PG_CONFIGURE_OPTS) \
-		--prefix=$(abspath $(POSTGRES_INSTALL_DIR))/v14 > configure.log)
+		$(SECCOMP) \
+		--prefix=$(abspath $(POSTGRES_INSTALL_DIR)) > configure.log)

-$(POSTGRES_INSTALL_DIR)/build/v15/config.status:
-	+@echo "Configuring Postgres v15 build"
-	mkdir -p $(POSTGRES_INSTALL_DIR)/build/v15
-	(cd $(POSTGRES_INSTALL_DIR)/build/v15 && \
-	$(ROOT_PROJECT_DIR)/vendor/postgres-v15/configure CFLAGS='$(PG_CFLAGS)' \
-		$(PG_CONFIGURE_OPTS) \
-		--prefix=$(abspath $(POSTGRES_INSTALL_DIR))/v15 > configure.log)
+# nicer alias for running 'configure'
+.PHONY: postgres-configure
+postgres-configure: $(POSTGRES_INSTALL_DIR)/build/config.status

-# nicer alias to run 'configure'
-.PHONY: postgres-v14-configure
-postgres-v14-configure: $(POSTGRES_INSTALL_DIR)/build/v14/config.status
+# Install the PostgreSQL header files into $(POSTGRES_INSTALL_DIR)/include
+.PHONY: postgres-headers
+postgres-headers: postgres-configure
+	+@echo "Installing PostgreSQL headers"
+	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/src/include MAKELEVEL=0 install

-.PHONY: postgres-v15-configure
-postgres-v15-configure: $(POSTGRES_INSTALL_DIR)/build/v15/config.status
+# Compile and install PostgreSQL and contrib/neon
+.PHONY: postgres
+postgres: postgres-configure \
+		  postgres-headers # to prevent `make install` conflicts with zenith's `postgres-headers`
+	+@echo "Compiling PostgreSQL"
+	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build MAKELEVEL=0 install
+	+@echo "Compiling contrib/neon"
+	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/contrib/neon install
+	+@echo "Compiling contrib/neon_test_utils"
+	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/contrib/neon_test_utils install
+	+@echo "Compiling pg_buffercache"
+	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/contrib/pg_buffercache install
+	+@echo "Compiling pageinspect"
+	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/contrib/pageinspect install

-# Install the PostgreSQL header files into $(POSTGRES_INSTALL_DIR)/<version>/include
-.PHONY: postgres-v14-headers
-postgres-v14-headers: postgres-v14-configure
-	+@echo "Installing PostgreSQL v14 headers"
-	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14/src/include MAKELEVEL=0 install

-.PHONY: postgres-v15-headers
-postgres-v15-headers: postgres-v15-configure
-	+@echo "Installing PostgreSQL v15 headers"
-	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15/src/include MAKELEVEL=0 install
-
-# Compile and install PostgreSQL
-.PHONY: postgres-v14
-postgres-v14: postgres-v14-configure \
-		  postgres-v14-headers # to prevent `make install` conflicts with neon's `postgres-headers`
-	+@echo "Compiling PostgreSQL v14"
-	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14 MAKELEVEL=0 install
-	+@echo "Compiling libpq v14"
-	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14/src/interfaces/libpq install
-	+@echo "Compiling pg_buffercache v14"
-	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14/contrib/pg_buffercache install
-	+@echo "Compiling pageinspect v14"
-	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14/contrib/pageinspect install
-
-.PHONY: postgres-v15
-postgres-v15: postgres-v15-configure \
-		  postgres-v15-headers # to prevent `make install` conflicts with neon's `postgres-headers`
-	+@echo "Compiling PostgreSQL v15"
-	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15 MAKELEVEL=0 install
-	+@echo "Compiling libpq v15"
-	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15/src/interfaces/libpq install
-	+@echo "Compiling pg_buffercache v15"
-	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15/contrib/pg_buffercache install
-	+@echo "Compiling pageinspect v15"
-	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15/contrib/pageinspect install
-
-# shorthand to build all Postgres versions
-postgres: postgres-v14 postgres-v15
-
-.PHONY: postgres-v14-clean
-postgres-v14-clean:
-	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14 MAKELEVEL=0 clean
-	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14/contrib/pg_buffercache clean
-	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14/contrib/pageinspect clean
-	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14/src/interfaces/libpq clean
-
-.PHONY: postgres-v15-clean
-postgres-v15-clean:
-	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15 MAKELEVEL=0 clean
-	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15/contrib/pg_buffercache clean
-	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15/contrib/pageinspect clean
-	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15/src/interfaces/libpq clean
-
-neon-pg-ext-v14: postgres-v14
-	+@echo "Compiling neon v14"
-	mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-v14
-	(cd $(POSTGRES_INSTALL_DIR)/build/neon-v14 && \
-	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v14/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
-		-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile install)
-	+@echo "Compiling neon_test_utils" v14
-	mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-v14
-	(cd $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-v14 && \
-	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v14/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
-		-f $(ROOT_PROJECT_DIR)/pgxn/neon_test_utils/Makefile install)
-
-neon-pg-ext-v15: postgres-v15
-	+@echo "Compiling neon v15"
-	mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-v15
-	(cd $(POSTGRES_INSTALL_DIR)/build/neon-v15 && \
-	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v15/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
-		-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile install)
-	+@echo "Compiling neon_test_utils" v15
-	mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-v15
-	(cd $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-v15 && \
-	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v15/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
-		-f $(ROOT_PROJECT_DIR)/pgxn/neon_test_utils/Makefile install)
-
-.PHONY: neon-pg-ext-clean
-	$(MAKE) -C $(ROOT_PROJECT_DIR)/pgxn/neon clean
-	$(MAKE) -C $(ROOT_PROJECT_DIR)/pgxn/neon_test_utils clean
-
-neon-pg-ext: neon-pg-ext-v14 neon-pg-ext-v15
-postgres-headers: postgres-v14-headers postgres-v15-headers
-postgres-clean: postgres-v14-clean postgres-v15-clean
+.PHONY: postgres-clean
+postgres-clean:
+	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build MAKELEVEL=0 clean

 # This doesn't remove the effects of 'configure'.
 .PHONY: clean
 clean:
-	cd $(POSTGRES_INSTALL_DIR)/build/v14 && $(MAKE) clean
-	cd $(POSTGRES_INSTALL_DIR)/build/v15 && $(MAKE) clean
+	cd $(POSTGRES_INSTALL_DIR)/build && $(MAKE) clean
 	$(CARGO_CMD_PREFIX) cargo clean
-	cd pgxn/neon && $(MAKE) clean
-	cd pgxn/neon_test_utils && $(MAKE) clean

 # This removes everything
 .PHONY: distclean
--- a/4
+++ b/4
@@ -1,5 +1,5 @@
 Neon
 Copyright 2022 Neon Inc.

-The PostgreSQL submodules in vendor/postgres-v14 and vendor/postgres-v15 are licensed under the
-PostgreSQL license. See vendor/postgres-v14/COPYRIGHT and vendor/postgres-v15/COPYRIGHT.
+The PostgreSQL submodule in vendor/postgres is licensed under the
+PostgreSQL license. See vendor/postgres/COPYRIGHT.
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # Neon

-Neon is a serverless open-source alternative to AWS Aurora Postgres. It separates storage and compute and substitutes the PostgreSQL storage layer by redistributing data across a cluster of nodes.
+Neon is a serverless open source alternative to AWS Aurora Postgres. It separates storage and compute and substitutes PostgreSQL storage layer by redistributing data across a cluster of nodes.

 The project used to be called "Zenith". Many of the commands and code comments
 still refer to "zenith", but we are in the process of renaming things.
@@ -12,32 +12,32 @@ Alternatively, compile and run the project [locally](#running-local-installation

 ## Architecture overview

-A Neon installation consists of compute nodes and a Neon storage engine.
+A Neon installation consists of compute nodes and Neon storage engine.

-Compute nodes are stateless PostgreSQL nodes backed by the Neon storage engine.
+Compute nodes are stateless PostgreSQL nodes, backed by Neon storage engine.

-The Neon storage engine consists of two major components:
- Pageserver. Scalable storage backend for the compute nodes.
- WAL service. The service receives WAL from the compute node and ensures that it is stored durably.
+Neon storage engine consists of two major components:
+- Pageserver. Scalable storage backend for compute nodes.
+- WAL service. The service that receives WAL from compute node and ensures that it is stored durably.

 Pageserver consists of:
 - Repository - Neon storage implementation.
 - WAL receiver - service that receives WAL from WAL service and stores it in the repository.
 - Page service - service that communicates with compute nodes and responds with pages from the repository.
- WAL redo - service that builds pages from base images and WAL records on Page service request
+- WAL redo - service that builds pages from base images and WAL records on Page service request.

 ## Running local installation


 #### Installing dependencies on Linux
-1. Install build dependencies and other applicable packages
+1. Install build dependencies and other useful packages

-* On Ubuntu or Debian, this set of packages should be sufficient to build the code:
+* On Ubuntu or Debian this set of packages should be sufficient to build the code:
 ```bash
 apt install build-essential libtool libreadline-dev zlib1g-dev flex bison libseccomp-dev \
 libssl-dev clang pkg-config libpq-dev etcd cmake postgresql-client
 ```
-* On Fedora, these packages are needed:
+* On Fedora these packages are needed:
 ```bash
 dnf install flex bison readline-devel zlib-devel openssl-devel \
  libseccomp-devel perl clang cmake etcd postgresql postgresql-contrib
@@ -69,18 +69,7 @@ brew install libpq
 brew link --force libpq
 ```

-#### Rustc version
-
-The project uses [rust toolchain file](./rust-toolchain.toml) to define the version it's built with in CI for testing and local builds.
-
-This file is automatically picked up by [`rustup`](https://rust-lang.github.io/rustup/overrides.html#the-toolchain-file) that installs (if absent) and uses the toolchain version pinned in the file.
-
-rustup users who want to build with another toolchain can use [`rustup override`](https://rust-lang.github.io/rustup/overrides.html#directory-overrides) command to set a specific toolchain for the project's directory.
-
-non-rustup users most probably are not getting the same toolchain automatically from the file, so are responsible to manually verify their toolchain matches the version in the file.
-Newer rustc versions most probably will work fine, yet older ones might not be supported due to some new features used by the project or the crates.
-
-#### Building on Linux
+#### Building on Linux and OSX

 1. Build neon and patched postgres
 ```
@@ -89,37 +78,21 @@ Newer rustc versions most probably will work fine, yet older ones might not be s
 git clone --recursive https://github.com/neondatabase/neon.git
 cd neon

-# The preferred and default is to make a debug build. This will create a
-# demonstrably slower build than a release build. For a release build,
-# use "BUILD_TYPE=release make -j`nproc`"
+# The preferred and default is to make a debug build. This will create a 
+# demonstrably slower build than a release build. If you want to use a release
+# build, utilize "`BUILD_TYPE=release make -j`nproc``" 

 make -j`nproc`
 ```

-#### Building on OSX
-
-1. Build neon and patched postgres
-```
-# Note: The path to the neon sources can not contain a space.
-
-git clone --recursive https://github.com/neondatabase/neon.git
-cd neon
-
-# The preferred and default is to make a debug build. This will create a
-# demonstrably slower build than a release build. For a release build,
-# use "BUILD_TYPE=release make -j`sysctl -n hw.logicalcpu`"
-
-make -j`sysctl -n hw.logicalcpu`
-```
-
-#### Dependency installation notes
-To run the `psql` client, install the `postgresql-client` package or modify `PATH` and `LD_LIBRARY_PATH` to include `pg_install/bin` and `pg_install/lib`, respectively.
+#### dependency installation notes
+To run the `psql` client, install the `postgresql-client` package or modify `PATH` and `LD_LIBRARY_PATH` to include `tmp_install/bin` and `tmp_install/lib`, respectively.

 To run the integration tests or Python scripts (not required to use the code), install
-Python (3.9 or higher), and install python3 packages using `./scripts/pysync` (requires [poetry](https://python-poetry.org/)) in the project directory.
+Python (3.9 or higher), and install python3 packages using `./scripts/pysync` (requires poetry) in the project directory.


-#### Running neon database
+#### running neon database
 1. Start pageserver and postgres on top of it (should be called from repo root):
 ```sh
 # Create repository in .neon with proper paths to binaries and data
@@ -150,7 +123,7 @@ Starting postgres node at 'host=127.0.0.1 port=55432 user=cloud_admin dbname=pos
 main  127.0.0.1:55432  de200bd42b49cc1814412c7e592dd6e9  main         0/16B5BA8  running
 ```

-2. Now, it is possible to connect to postgres and run some queries:
+2. Now it is possible to connect to postgres and run some queries:
 ```text
 > psql -p55432 -h 127.0.0.1 -U cloud_admin postgres
 postgres=# CREATE TABLE t(key int primary key, value text);
@@ -208,19 +181,17 @@ postgres=# select * from t;
 (1 row)
 ```

-4. If you want to run tests afterward (see below), you must stop all the running of the pageserver, safekeeper, and postgres instances
-   you have just started. You can terminate them all with one command:
+4. If you want to run tests afterwards (see below), you have to stop all the running the pageserver, safekeeper and postgres instances
+   you have just started. You can stop them all with one command:
 ```sh
 > ./target/debug/neon_local stop
 ```

 ## Running tests

-Ensure your dependencies are installed as described [here](https://github.com/neondatabase/neon#dependency-installation-notes).
-
 ```sh
 git clone --recursive https://github.com/neondatabase/neon.git
-make # builds also postgres and installs it to ./pg_install
+make # builds also postgres and installs it to ./tmp_install
 ./scripts/pytest
 ```

@@ -234,8 +205,8 @@ To view your `rustdoc` documentation in a browser, try running `cargo doc --no-d

 ### Postgres-specific terms

-Due to Neon's very close relation with PostgreSQL internals, numerous specific terms are used.
-The same applies to certain spelling: i.e. we use MB to denote 1024 * 1024 bytes, while MiB would be technically more correct, it's inconsistent with what PostgreSQL code and its documentation use.
+Due to Neon's very close relation with PostgreSQL internals, there are numerous specific terms used.
+Same applies to certain spelling: i.e. we use MB to denote 1024 * 1024 bytes, while MiB would be technically more correct, it's inconsistent with what PostgreSQL code and its documentation use.

 To get more familiar with this aspect, refer to:

--- a/compute_tools/Cargo.toml
+++ b/compute_tools/Cargo.toml
@@ -4,6 +4,7 @@ version = "0.1.0"
 edition = "2021"

 [dependencies]
+libc = "0.2"
 anyhow = "1.0"
 chrono = "0.4"
 clap = "3.0"
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -157,7 +157,7 @@ fn main() -> Result<()> {
            exit(code)
        }
        Err(error) => {
-            error!("could not start the compute node: {:?}", error);
+            error!("could not start the compute node: {}", error);

            let mut state = compute.state.write().unwrap();
            state.error = Some(format!("{:?}", error));
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -178,7 +178,6 @@ impl ComputeNode {
            .args(&["--sync-safekeepers"])
            .env("PGDATA", &self.pgdata) // we cannot use -D in this mode
            .stdout(Stdio::piped())
-            .stderr(Stdio::piped())
            .spawn()
            .expect("postgres --sync-safekeepers failed to start");

@@ -188,13 +187,10 @@ impl ComputeNode {
        let sync_output = sync_handle
            .wait_with_output()
            .expect("postgres --sync-safekeepers failed");
-
        if !sync_output.status.success() {
            anyhow::bail!(
-                "postgres --sync-safekeepers exited with non-zero status: {}. stdout: {}, stderr: {}",
+                "postgres --sync-safekeepers exited with non-zero status: {}",
                sync_output.status,
-                String::from_utf8(sync_output.stdout).expect("postgres --sync-safekeepers exited, and stdout is not utf-8"),
-                String::from_utf8(sync_output.stderr).expect("postgres --sync-safekeepers exited, and stderr is not utf-8"),
            );
        }

--- a/compute_tools/src/pg_helpers.rs
+++ b/compute_tools/src/pg_helpers.rs
@@ -62,16 +62,9 @@ impl GenericOption {
    /// Represent `GenericOption` as configuration option.
    pub fn to_pg_setting(&self) -> String {
        if let Some(val) = &self.value {
-            let name = match self.name.as_str() {
-                "safekeepers" => "neon.safekeepers",
-                "wal_acceptor_reconnect" => "neon.safekeeper_reconnect_timeout",
-                "wal_acceptor_connect_timeout" => "neon.safekeeper_connect_timeout",
-                it => it,
-            };
-
            match self.vartype.as_ref() {
-                "string" => format!("{} = '{}'", name, val),
-                _ => format!("{} = {}", name, val),
+                "string" => format!("{} = '{}'", self.name, val),
+                _ => format!("{} = {}", self.name, val),
            }
        } else {
            self.name.to_owned()
--- a/compute_tools/src/spec.rs
+++ b/compute_tools/src/spec.rs
@@ -1,7 +1,8 @@
 use std::path::Path;

-use anyhow::Result;
+use anyhow::{anyhow, Result};
 use log::{info, log_enabled, warn, Level};
+use postgres::error::SqlState;
 use postgres::{Client, NoTls};
 use serde::Deserialize;

@@ -394,34 +395,20 @@ pub fn handle_grants(node: &ComputeNode, client: &mut Client) -> Result<()> {

        // This will only change ownership on the schema itself, not the objects
        // inside it. Without it owner of the `public` schema will be `cloud_admin`
-        // and database owner cannot do anything with it. SQL procedure ensures
-        // that it won't error out if schema `public` doesn't exist.
-        let alter_query = format!(
-            "DO $$\n\
-                DECLARE\n\
-                    schema_owner TEXT;\n\
-                BEGIN\n\
-                    IF EXISTS(\n\
-                        SELECT nspname\n\
-                        FROM pg_catalog.pg_namespace\n\
-                        WHERE nspname = 'public'\n\
-                    )\n\
-                    THEN\n\
-                        SELECT nspowner::regrole::text\n\
-                            FROM pg_catalog.pg_namespace\n\
-                            WHERE nspname = 'public'\n\
-                            INTO schema_owner;\n\
-                \n\
-                        IF schema_owner = 'cloud_admin' OR schema_owner = 'zenith_admin'\n\
-                        THEN\n\
-                            ALTER SCHEMA public OWNER TO {};\n\
-                        END IF;\n\
-                    END IF;\n\
-                END\n\
-            $$;",
-            db.owner.quote()
-        );
-        db_client.simple_query(&alter_query)?;
+        // and database owner cannot do anything with it.
+        let alter_query = format!("ALTER SCHEMA public OWNER TO {}", db.owner.quote());
+        let res = db_client.simple_query(&alter_query);
+
+        if let Err(e) = res {
+            if e.code() == Some(&SqlState::INVALID_SCHEMA_NAME) {
+                // This is OK, db just don't have a `public` schema.
+                // Probably user dropped it manually.
+                info!("no 'public' schema found in the database {}", db.name);
+            } else {
+                // Something different happened, propagate the error
+                return Err(anyhow!(e));
+            }
+        }
    }

    Ok(())
--- a/compute_tools/tests/cluster_spec.json
+++ b/compute_tools/tests/cluster_spec.json
@@ -85,7 +85,7 @@
                "vartype": "bool"
            },
            {
-                "name": "neon.safekeepers",
+                "name": "safekeepers",
                "value": "127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501",
                "vartype": "string"
            },
@@ -181,6 +181,7 @@
            }
        ]
    },
+
    "delta_operations": [
        {
            "action": "delete_db",
--- a/compute_tools/tests/pg_helpers_tests.rs
+++ b/compute_tools/tests/pg_helpers_tests.rs
@@ -28,7 +28,7 @@ mod pg_helpers_tests {

        assert_eq!(
            spec.cluster.settings.as_pg_settings(),
-            "fsync = off\nwal_level = replica\nhot_standby = on\nneon.safekeepers = '127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501'\nwal_log_hints = on\nlog_connections = on\nshared_buffers = 32768\nport = 55432\nmax_connections = 100\nmax_wal_senders = 10\nlisten_addresses = '0.0.0.0'\nwal_sender_timeout = 0\npassword_encryption = md5\nmaintenance_work_mem = 65536\nmax_parallel_workers = 8\nmax_worker_processes = 8\nneon.tenant_id = 'b0554b632bd4d547a63b86c3630317e8'\nmax_replication_slots = 10\nneon.timeline_id = '2414a61ffc94e428f14b5758fe308e13'\nshared_preload_libraries = 'neon'\nsynchronous_standby_names = 'walproposer'\nneon.pageserver_connstring = 'host=127.0.0.1 port=6400'"
+            "fsync = off\nwal_level = replica\nhot_standby = on\nsafekeepers = '127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501'\nwal_log_hints = on\nlog_connections = on\nshared_buffers = 32768\nport = 55432\nmax_connections = 100\nmax_wal_senders = 10\nlisten_addresses = '0.0.0.0'\nwal_sender_timeout = 0\npassword_encryption = md5\nmaintenance_work_mem = 65536\nmax_parallel_workers = 8\nmax_worker_processes = 8\nneon.tenant_id = 'b0554b632bd4d547a63b86c3630317e8'\nmax_replication_slots = 10\nneon.timeline_id = '2414a61ffc94e428f14b5758fe308e13'\nshared_preload_libraries = 'neon'\nsynchronous_standby_names = 'walproposer'\nneon.pageserver_connstring = 'host=127.0.0.1 port=6400'"
        );
    }

--- a/control_plane/Cargo.toml
+++ b/control_plane/Cargo.toml
@@ -4,19 +4,17 @@ version = "0.1.0"
 edition = "2021"

 [dependencies]
-clap = "3.0"
-comfy-table = "5.0.1"
-git-version = "0.3.5"
 tar = "0.4.38"
 postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
 serde = { version = "1.0", features = ["derive"] }
 serde_with = "1.12.0"
 toml = "0.5"
-once_cell = "1.13.0"
+lazy_static = "1.4"
 regex = "1"
 anyhow = "1.0"
 thiserror = "1"
 nix = "0.23"
+url = "2.2.2"
 reqwest = { version = "0.11", default-features = false, features = ["blocking", "json", "rustls-tls"] }

 pageserver = { path = "../pageserver" }
--- a/control_plane/src/compute.rs
+++ b/control_plane/src/compute.rs
@@ -150,7 +150,7 @@ impl PostgresNode {
        let port: u16 = conf.parse_field("port", &context)?;
        let timeline_id: ZTimelineId = conf.parse_field("neon.timeline_id", &context)?;
        let tenant_id: ZTenantId = conf.parse_field("neon.tenant_id", &context)?;
-        let uses_wal_proposer = conf.get("neon.safekeepers").is_some();
+        let uses_wal_proposer = conf.get("safekeepers").is_some();

        // parse recovery_target_lsn, if any
        let recovery_target_lsn: Option<Lsn> =
@@ -341,7 +341,7 @@ impl PostgresNode {
                .map(|sk| format!("localhost:{}", sk.pg_port))
                .collect::<Vec<String>>()
                .join(",");
-            conf.append("neon.safekeepers", &safekeepers);
+            conf.append("safekeepers", &safekeepers);
        } else {
            // We only use setup without safekeepers for tests,
            // and don't care about data durability on pageserver,
--- a/control_plane/src/etcd.rs
+++ b/control_plane/src/etcd.rs
@@ -30,14 +30,14 @@ pub fn start_etcd_process(env: &local_env::LocalEnv) -> anyhow::Result<()> {
    let etcd_stdout_file =
        fs::File::create(etcd_data_dir.join("etcd.stdout.log")).with_context(|| {
            format!(
-                "Failed to create etcd stout file in directory {}",
+                "Failed to create ectd stout file in directory {}",
                etcd_data_dir.display()
            )
        })?;
    let etcd_stderr_file =
        fs::File::create(etcd_data_dir.join("etcd.stderr.log")).with_context(|| {
            format!(
-                "Failed to create etcd stderr file in directory {}",
+                "Failed to create ectd stderr file in directory {}",
                etcd_data_dir.display()
            )
        })?;
--- a/control_plane/src/lib.rs
+++ b/control_plane/src/lib.rs
@@ -51,11 +51,7 @@ fn fill_rust_env_vars(cmd: &mut Command) -> &mut Command {
 }

 fn fill_aws_secrets_vars(mut cmd: &mut Command) -> &mut Command {
-    for env_key in [
-        "AWS_ACCESS_KEY_ID",
-        "AWS_SECRET_ACCESS_KEY",
-        "AWS_SESSION_TOKEN",
-    ] {
+    for env_key in ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"] {
        if let Ok(value) = std::env::var(env_key) {
            cmd = cmd.env(env_key, value);
        }
--- a/control_plane/src/local_env.rs
+++ b/control_plane/src/local_env.rs
@@ -24,7 +24,7 @@ use crate::safekeeper::SafekeeperNode;
 // This data structures represents neon_local CLI config
 //
 // It is deserialized from the .neon/config file, or the config file passed
-// to 'neon_local init --config=<path>' option. See control_plane/simple.conf for
+// to 'zenith init --config=<path>' option. See control_plane/simple.conf for
 // an example.
 //
 #[serde_as]
@@ -289,13 +289,13 @@ impl LocalEnv {
        let mut env: LocalEnv = toml::from_str(toml)?;

        // Find postgres binaries.
-        // Follow POSTGRES_DISTRIB_DIR if set, otherwise look in "pg_install/v14".
+        // Follow POSTGRES_DISTRIB_DIR if set, otherwise look in "tmp_install".
        if env.pg_distrib_dir == Path::new("") {
            if let Some(postgres_bin) = env::var_os("POSTGRES_DISTRIB_DIR") {
                env.pg_distrib_dir = postgres_bin.into();
            } else {
                let cwd = env::current_dir()?;
-                env.pg_distrib_dir = cwd.join("pg_install/v14")
+                env.pg_distrib_dir = cwd.join("tmp_install")
            }
        }

@@ -320,7 +320,7 @@ impl LocalEnv {

        if !repopath.exists() {
            bail!(
-                "Zenith config is not found in {}. You need to run 'neon_local init' first",
+                "Zenith config is not found in {}. You need to run 'zenith init' first",
                repopath.to_str().unwrap()
            );
        }
@@ -337,12 +337,12 @@ impl LocalEnv {
    }

    pub fn persist_config(&self, base_path: &Path) -> anyhow::Result<()> {
-        // Currently, the user first passes a config file with 'neon_local init --config=<path>'
+        // Currently, the user first passes a config file with 'zenith init --config=<path>'
        // We read that in, in `create_config`, and fill any missing defaults. Then it's saved
        // to .neon/config. TODO: We lose any formatting and comments along the way, which is
        // a bit sad.
        let mut conf_content = r#"# This file describes a locale deployment of the page server
-# and safekeeeper node. It is read by the 'neon_local' command-line
+# and safekeeeper node. It is read by the 'zenith' command-line
 # utility.
 "#
        .to_string();
@@ -382,7 +382,7 @@ impl LocalEnv {
    }

    //
-    // Initialize a new Neon repository
+    // Initialize a new Zenith repository
    //
    pub fn init(&mut self) -> anyhow::Result<()> {
        // check if config already exists
--- a/control_plane/src/postgresql_conf.rs
+++ b/control_plane/src/postgresql_conf.rs
@@ -5,7 +5,7 @@
 /// enough to extract a few settings we need in Zenith, assuming you don't do
 /// funny stuff like include-directives or funny escaping.
 use anyhow::{bail, Context, Result};
-use once_cell::sync::Lazy;
+use lazy_static::lazy_static;
 use regex::Regex;
 use std::collections::HashMap;
 use std::fmt;
@@ -19,7 +19,9 @@ pub struct PostgresConf {
    hash: HashMap<String, String>,
 }

-static CONF_LINE_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"^((?:\w|\.)+)\s*=\s*(\S+)$").unwrap());
+lazy_static! {
+    static ref CONF_LINE_RE: Regex = Regex::new(r"^((?:\w|\.)+)\s*=\s*(\S+)$").unwrap();
+}

 impl PostgresConf {
    pub fn new() -> PostgresConf {
@@ -137,10 +139,10 @@ fn escape_str(s: &str) -> String {
    //
    // This regex is a bit more conservative than the rules in guc-file.l, so we quote some
    // strings that PostgreSQL would accept without quoting, but that's OK.
-
-    static UNQUOTED_RE: Lazy<Regex> =
-        Lazy::new(|| Regex::new(r"(^[-+]?[0-9]+[a-zA-Z]*$)|(^[a-zA-Z][a-zA-Z0-9]*$)").unwrap());
-
+    lazy_static! {
+        static ref UNQUOTED_RE: Regex =
+            Regex::new(r"(^[-+]?[0-9]+[a-zA-Z]*$)|(^[a-zA-Z][a-zA-Z0-9]*$)").unwrap();
+    }
    if UNQUOTED_RE.is_match(s) {
        s.to_string()
    } else {
--- a/control_plane/src/safekeeper.rs
+++ b/control_plane/src/safekeeper.rs
@@ -1,4 +1,5 @@
 use std::io::Write;
+use std::net::TcpStream;
 use std::path::PathBuf;
 use std::process::Command;
 use std::sync::Arc;
@@ -46,12 +47,12 @@ impl ResponseErrorMessageExt for Response {
            return Ok(self);
        }

-        // reqwest does not export its error construction utility functions, so let's craft the message ourselves
+        // reqwest do not export it's error construction utility functions, so lets craft the message ourselves
        let url = self.url().to_owned();
        Err(SafekeeperHttpError::Response(
            match self.json::<HttpErrorBody>() {
                Ok(err_body) => format!("Error: {}", err_body.msg),
-                Err(_) => format!("Http error ({}) at {}.", status.as_u16(), url),
+                Err(_) => format!("Http error ({}) at {url}.", status.as_u16()),
            },
        ))
    }
@@ -240,28 +241,40 @@ impl SafekeeperNode {
            ),
        }

-        // Wait until process is gone
-        for i in 0..600 {
-            let signal = None; // Send no signal, just get the error code
-            match kill(pid, signal) {
-                Ok(_) => (), // Process exists, keep waiting
-                Err(Errno::ESRCH) => {
-                    // Process not found, we're done
-                    println!("done!");
-                    return Ok(());
-                }
-                Err(err) => bail!(
-                    "Failed to send signal to pageserver with pid {}: {}",
-                    pid,
-                    err.desc()
-                ),
-            };
+        let address = connection_address(&self.pg_connection_config);

-            if i % 10 == 0 {
-                print!(".");
-                io::stdout().flush().unwrap();
+        // TODO Remove this "timeout" and handle it on caller side instead.
+        // Shutting down may take a long time,
+        // if safekeeper flushes a lot of data
+        let mut tcp_stopped = false;
+        for _ in 0..100 {
+            if !tcp_stopped {
+                if let Err(err) = TcpStream::connect(&address) {
+                    tcp_stopped = true;
+                    if err.kind() != io::ErrorKind::ConnectionRefused {
+                        eprintln!("\nSafekeeper connection failed with error: {err}");
+                    }
+                }
            }
-            thread::sleep(Duration::from_millis(100));
+            if tcp_stopped {
+                // Also check status on the HTTP port
+                match self.check_status() {
+                    Err(SafekeeperHttpError::Transport(err)) if err.is_connect() => {
+                        println!("done!");
+                        return Ok(());
+                    }
+                    Err(err) => {
+                        eprintln!("\nSafekeeper status check failed with error: {err}");
+                        return Ok(());
+                    }
+                    Ok(()) => {
+                        // keep waiting
+                    }
+                }
+            }
+            print!(".");
+            io::stdout().flush().unwrap();
+            thread::sleep(Duration::from_secs(1));
        }

        bail!("Failed to stop safekeeper with pid {}", pid);
@@ -291,9 +304,10 @@ impl SafekeeperNode {
        Ok(self
            .http_request(
                Method::POST,
-                format!("{}/tenant/{}/timeline", self.http_base_url, tenant_id),
+                format!("{}/{}", self.http_base_url, "timeline"),
            )
            .json(&TimelineCreateRequest {
+                tenant_id,
                timeline_id,
                peer_ids,
            })
--- a/control_plane/src/storage.rs
+++ b/control_plane/src/storage.rs
@@ -1,8 +1,9 @@
 use std::collections::HashMap;
 use std::fs::File;
 use std::io::{BufReader, Write};
+use std::net::TcpStream;
 use std::num::NonZeroU64;
-use std::path::{Path, PathBuf};
+use std::path::PathBuf;
 use std::process::Command;
 use std::time::Duration;
 use std::{io, result, thread};
@@ -11,9 +12,9 @@ use anyhow::{bail, Context};
 use nix::errno::Errno;
 use nix::sys::signal::{kill, Signal};
 use nix::unistd::Pid;
-use pageserver::http::models::{
-    TenantConfigRequest, TenantCreateRequest, TenantInfo, TimelineCreateRequest, TimelineInfo,
-};
+use pageserver::http::models::{TenantConfigRequest, TenantCreateRequest, TimelineCreateRequest};
+use pageserver::tenant_mgr::TenantInfo;
+use pageserver::timelines::TimelineInfo;
 use postgres::{Config, NoTls};
 use reqwest::blocking::{Client, RequestBuilder, Response};
 use reqwest::{IntoUrl, Method};
@@ -57,7 +58,7 @@ impl ResponseErrorMessageExt for Response {
            return Ok(self);
        }

-        // reqwest does not export its error construction utility functions, so let's craft the message ourselves
+        // reqwest do not export it's error construction utility functions, so lets craft the message ourselves
        let url = self.url().to_owned();
        Err(PageserverHttpError::Response(
            match self.json::<HttpErrorBody>() {
@@ -102,19 +103,23 @@ impl PageServerNode {

    /// Construct libpq connection string for connecting to the pageserver.
    fn pageserver_connection_config(password: &str, listen_addr: &str) -> Config {
-        format!("postgresql://no_user:{password}@{listen_addr}/no_db")
+        format!("postgresql://no_user:{}@{}/no_db", password, listen_addr)
            .parse()
            .unwrap()
    }

-    pub fn initialize(
+    pub fn init(
        &self,
        create_tenant: Option<ZTenantId>,
        initial_timeline_id: Option<ZTimelineId>,
        config_overrides: &[&str],
    ) -> anyhow::Result<ZTimelineId> {
+        let mut cmd = Command::new(self.env.pageserver_bin()?);
+
        let id = format!("id={}", self.env.pageserver.id);
+
        // FIXME: the paths should be shell-escaped to handle paths with spaces, quotas etc.
+        let base_data_dir_param = self.env.base_data_dir.display().to_string();
        let pg_distrib_dir_param =
            format!("pg_distrib_dir='{}'", self.env.pg_distrib_dir.display());
        let authg_type_param = format!("auth_type='{}'", self.env.pageserver.auth_type);
@@ -134,52 +139,67 @@ impl PageServerNode {
                .collect::<Vec<_>>()
                .join(",")
        );
+        let mut args = Vec::with_capacity(20);
+
+        args.push("--init");
+        args.extend(["-D", &base_data_dir_param]);
+        args.extend(["-c", &pg_distrib_dir_param]);
+        args.extend(["-c", &authg_type_param]);
+        args.extend(["-c", &listen_http_addr_param]);
+        args.extend(["-c", &listen_pg_addr_param]);
+        args.extend(["-c", &broker_endpoints_param]);
+        args.extend(["-c", &id]);
+
        let broker_etcd_prefix_param = self
            .env
            .etcd_broker
            .broker_etcd_prefix
            .as_ref()
            .map(|prefix| format!("broker_etcd_prefix='{prefix}'"));
-
-        let mut init_config_overrides = config_overrides.to_vec();
-        init_config_overrides.push(&id);
-        init_config_overrides.push(&pg_distrib_dir_param);
-        init_config_overrides.push(&authg_type_param);
-        init_config_overrides.push(&listen_http_addr_param);
-        init_config_overrides.push(&listen_pg_addr_param);
-        init_config_overrides.push(&broker_endpoints_param);
-
        if let Some(broker_etcd_prefix_param) = broker_etcd_prefix_param.as_deref() {
-            init_config_overrides.push(broker_etcd_prefix_param);
+            args.extend(["-c", broker_etcd_prefix_param]);
+        }
+
+        for config_override in config_overrides {
+            args.extend(["-c", config_override]);
        }

        if self.env.pageserver.auth_type != AuthType::Trust {
-            init_config_overrides.push("auth_validation_public_key_path='auth_public_key.pem'");
+            args.extend([
+                "-c",
+                "auth_validation_public_key_path='auth_public_key.pem'",
+            ]);
        }

-        self.start_node(&init_config_overrides, &self.env.base_data_dir, true)?;
-        let init_result = self
-            .try_init_timeline(create_tenant, initial_timeline_id)
-            .context("Failed to create initial tenant and timeline for pageserver");
-        match &init_result {
-            Ok(initial_timeline_id) => {
-                println!("Successfully initialized timeline {initial_timeline_id}")
-            }
-            Err(e) => eprintln!("{e:#}"),
+        let create_tenant = create_tenant.map(|id| id.to_string());
+        if let Some(tenant_id) = create_tenant.as_deref() {
+            args.extend(["--create-tenant", tenant_id])
        }
-        self.stop(false)?;
-        init_result
-    }

-    fn try_init_timeline(
-        &self,
-        new_tenant_id: Option<ZTenantId>,
-        new_timeline_id: Option<ZTimelineId>,
-    ) -> anyhow::Result<ZTimelineId> {
-        let initial_tenant_id = self.tenant_create(new_tenant_id, HashMap::new())?;
-        let initial_timeline_info =
-            self.timeline_create(initial_tenant_id, new_timeline_id, None, None)?;
-        Ok(initial_timeline_info.timeline_id)
+        let initial_timeline_id = initial_timeline_id.unwrap_or_else(ZTimelineId::generate);
+        let initial_timeline_id_string = initial_timeline_id.to_string();
+        args.extend(["--initial-timeline-id", &initial_timeline_id_string]);
+
+        let cmd_with_args = cmd.args(args);
+        let init_output = fill_rust_env_vars(cmd_with_args)
+            .output()
+            .with_context(|| {
+                format!("failed to init pageserver with command {:?}", cmd_with_args)
+            })?;
+
+        if !init_output.status.success() {
+            bail!(
+                "init invocation failed, {}\nStdout: {}\nStderr: {}",
+                init_output.status,
+                String::from_utf8_lossy(&init_output.stdout),
+                String::from_utf8_lossy(&init_output.stderr)
+            );
+        }
+
+        // echo the captured output of the init command
+        println!("{}", String::from_utf8_lossy(&init_output.stdout));
+
+        Ok(initial_timeline_id)
    }

    pub fn repo_path(&self) -> PathBuf {
@@ -191,35 +211,15 @@ impl PageServerNode {
    }

    pub fn start(&self, config_overrides: &[&str]) -> anyhow::Result<()> {
-        self.start_node(config_overrides, &self.repo_path(), false)
-    }
-
-    fn start_node(
-        &self,
-        config_overrides: &[&str],
-        datadir: &Path,
-        update_config: bool,
-    ) -> anyhow::Result<()> {
-        println!(
+        print!(
            "Starting pageserver at '{}' in '{}'",
            connection_address(&self.pg_connection_config),
-            datadir.display()
+            self.repo_path().display()
        );
-        io::stdout().flush()?;
+        io::stdout().flush().unwrap();

-        let mut args = vec![
-            "-D",
-            datadir.to_str().with_context(|| {
-                format!(
-                    "Datadir path '{}' cannot be represented as a unicode string",
-                    datadir.display()
-                )
-            })?,
-        ];
-
-        if update_config {
-            args.push("--update-config");
-        }
+        let repo_path = self.repo_path();
+        let mut args = vec!["-D", repo_path.to_str().unwrap()];

        for config_override in config_overrides {
            args.extend(["-c", config_override]);
@@ -231,8 +231,8 @@ impl PageServerNode {

        if !filled_cmd.status()?.success() {
            bail!(
-                "Pageserver failed to start. See console output and '{}' for details.",
-                datadir.join("pageserver.log").display()
+                "Pageserver failed to start. See '{}' for details.",
+                self.repo_path().join("pageserver.log").display()
            );
        }

@@ -241,7 +241,7 @@ impl PageServerNode {
        const RETRIES: i8 = 15;
        for retries in 1..RETRIES {
            match self.check_status() {
-                Ok(()) => {
+                Ok(_) => {
                    println!("\nPageserver started");
                    return Ok(());
                }
@@ -255,18 +255,21 @@ impl PageServerNode {
                                if retries == 5 {
                                    println!() // put a line break after dots for second message
                                }
-                                println!("Pageserver not responding yet, err {err} retrying ({retries})...");
+                                println!(
+                                    "Pageserver not responding yet, err {} retrying ({})...",
+                                    err, retries
+                                );
                            }
                        }
                        PageserverHttpError::Response(msg) => {
-                            bail!("pageserver failed to start: {msg} ")
+                            bail!("pageserver failed to start: {} ", msg)
                        }
                    }
                    thread::sleep(Duration::from_secs(1));
                }
            }
        }
-        bail!("pageserver failed to start in {RETRIES} seconds");
+        bail!("pageserver failed to start in {} seconds", RETRIES);
    }

    ///
@@ -296,46 +299,63 @@ impl PageServerNode {
        match kill(pid, sig) {
            Ok(_) => (),
            Err(Errno::ESRCH) => {
-                println!("Pageserver with pid {pid} does not exist, but a PID file was found");
+                println!(
+                    "Pageserver with pid {} does not exist, but a PID file was found",
+                    pid
+                );
                return Ok(());
            }
            Err(err) => bail!(
-                "Failed to send signal to pageserver with pid {pid}: {}",
+                "Failed to send signal to pageserver with pid {}: {}",
+                pid,
                err.desc()
            ),
        }

-        // Wait until process is gone
-        for i in 0..600 {
-            let signal = None; // Send no signal, just get the error code
-            match kill(pid, signal) {
-                Ok(_) => (), // Process exists, keep waiting
-                Err(Errno::ESRCH) => {
-                    // Process not found, we're done
-                    println!("done!");
-                    return Ok(());
-                }
-                Err(err) => bail!(
-                    "Failed to send signal to pageserver with pid {}: {}",
-                    pid,
-                    err.desc()
-                ),
-            };
+        let address = connection_address(&self.pg_connection_config);

-            if i % 10 == 0 {
-                print!(".");
-                io::stdout().flush().unwrap();
+        // TODO Remove this "timeout" and handle it on caller side instead.
+        // Shutting down may take a long time,
+        // if pageserver checkpoints a lot of data
+        let mut tcp_stopped = false;
+        for _ in 0..100 {
+            if !tcp_stopped {
+                if let Err(err) = TcpStream::connect(&address) {
+                    tcp_stopped = true;
+                    if err.kind() != io::ErrorKind::ConnectionRefused {
+                        eprintln!("\nPageserver connection failed with error: {err}");
+                    }
+                }
            }
-            thread::sleep(Duration::from_millis(100));
+            if tcp_stopped {
+                // Also check status on the HTTP port
+
+                match self.check_status() {
+                    Err(PageserverHttpError::Transport(err)) if err.is_connect() => {
+                        println!("done!");
+                        return Ok(());
+                    }
+                    Err(err) => {
+                        eprintln!("\nPageserver status check failed with error: {err}");
+                        return Ok(());
+                    }
+                    Ok(()) => {
+                        // keep waiting
+                    }
+                }
+            }
+            print!(".");
+            io::stdout().flush().unwrap();
+            thread::sleep(Duration::from_secs(1));
        }

-        bail!("Failed to stop pageserver with pid {pid}");
+        bail!("Failed to stop pageserver with pid {}", pid);
    }

    pub fn page_server_psql(&self, sql: &str) -> Vec<postgres::SimpleQueryMessage> {
        let mut client = self.pg_connection_config.connect(NoTls).unwrap();

-        println!("Pageserver query: '{sql}'");
+        println!("Pageserver query: '{}'", sql);
        client.simple_query(sql).unwrap()
    }

@@ -370,15 +390,15 @@ impl PageServerNode {
        &self,
        new_tenant_id: Option<ZTenantId>,
        settings: HashMap<&str, &str>,
-    ) -> anyhow::Result<ZTenantId> {
-        self.http_request(Method::POST, format!("{}/tenant", self.http_base_url))
+    ) -> anyhow::Result<Option<ZTenantId>> {
+        let tenant_id_string = self
+            .http_request(Method::POST, format!("{}/tenant", self.http_base_url))
            .json(&TenantCreateRequest {
                new_tenant_id,
                checkpoint_distance: settings
                    .get("checkpoint_distance")
                    .map(|x| x.parse::<u64>())
                    .transpose()?,
-                checkpoint_timeout: settings.get("checkpoint_timeout").map(|x| x.to_string()),
                compaction_target_size: settings
                    .get("compaction_target_size")
                    .map(|x| x.parse::<u64>())
@@ -410,16 +430,18 @@ impl PageServerNode {
            })
            .send()?
            .error_from_body()?
-            .json::<Option<String>>()
-            .with_context(|| {
-                format!("Failed to parse tenant creation response for tenant id: {new_tenant_id:?}")
-            })?
-            .context("No tenant id was found in the tenant creation response")
-            .and_then(|tenant_id_string| {
-                tenant_id_string.parse().with_context(|| {
-                    format!("Failed to parse response string as tenant id: '{tenant_id_string}'")
+            .json::<Option<String>>()?;
+
+        tenant_id_string
+            .map(|id| {
+                id.parse().with_context(|| {
+                    format!(
+                        "Failed to parse tennat creation response as tenant id: {}",
+                        id
+                    )
                })
            })
+            .transpose()
    }

    pub fn tenant_config(&self, tenant_id: ZTenantId, settings: HashMap<&str, &str>) -> Result<()> {
@@ -431,7 +453,6 @@ impl PageServerNode {
                    .map(|x| x.parse::<u64>())
                    .transpose()
                    .context("Failed to parse 'checkpoint_distance' as an integer")?,
-                checkpoint_timeout: settings.get("checkpoint_timeout").map(|x| x.to_string()),
                compaction_target_size: settings
                    .get("compaction_target_size")
                    .map(|x| x.parse::<u64>())
@@ -490,27 +511,22 @@ impl PageServerNode {
        new_timeline_id: Option<ZTimelineId>,
        ancestor_start_lsn: Option<Lsn>,
        ancestor_timeline_id: Option<ZTimelineId>,
-    ) -> anyhow::Result<TimelineInfo> {
-        self.http_request(
-            Method::POST,
-            format!("{}/tenant/{}/timeline", self.http_base_url, tenant_id),
-        )
-        .json(&TimelineCreateRequest {
-            new_timeline_id,
-            ancestor_start_lsn,
-            ancestor_timeline_id,
-        })
-        .send()?
-        .error_from_body()?
-        .json::<Option<TimelineInfo>>()
-        .with_context(|| {
-            format!("Failed to parse timeline creation response for tenant id: {tenant_id}")
-        })?
-        .with_context(|| {
-            format!(
-                "No timeline id was found in the timeline creation response for tenant {tenant_id}"
+    ) -> anyhow::Result<Option<TimelineInfo>> {
+        let timeline_info_response = self
+            .http_request(
+                Method::POST,
+                format!("{}/tenant/{}/timeline", self.http_base_url, tenant_id),
            )
-        })
+            .json(&TimelineCreateRequest {
+                new_timeline_id,
+                ancestor_start_lsn,
+                ancestor_timeline_id,
+            })
+            .send()?
+            .error_from_body()?
+            .json::<Option<TimelineInfo>>()?;
+
+        Ok(timeline_info_response)
    }

    /// Import a basebackup prepared using either:
--- a/docker-entrypoint.sh
+++ b/docker-entrypoint.sh
@@ -0,0 +1,20 @@
+#!/bin/sh
+set -eux
+
+broker_endpoints_param="${BROKER_ENDPOINT:-absent}"
+if [ "$broker_endpoints_param" != "absent" ]; then
+    broker_endpoints_param="-c broker_endpoints=['$broker_endpoints_param']"
+else
+    broker_endpoints_param=''
+fi
+
+if [ "$1" = 'pageserver' ]; then
+    if [ ! -d "/data/tenants" ]; then
+        echo "Initializing pageserver data directory"
+        pageserver --init -D /data -c "pg_distrib_dir='/usr/local'" -c "id=10" $broker_endpoints_param
+    fi
+    echo "Staring pageserver at 0.0.0.0:6400"
+    pageserver -c "listen_pg_addr='0.0.0.0:6400'" -c "listen_http_addr='0.0.0.0:9898'" $broker_endpoints_param -D /data
+else
+    "$@"
+fi
--- a/docs/.gitignore
+++ b/docs/.gitignore
@@ -1 +0,0 @@
-book
--- a/docs/README.md
+++ b/docs/README.md
@@ -0,0 +1,14 @@
+# Zenith documentation
+
+## Table of contents
+
+- [authentication.md](authentication.md) — pageserver JWT authentication.
+- [docker.md](docker.md) — Docker images and building pipeline.
+- [glossary.md](glossary.md) — Glossary of all the terms used in codebase.
+- [multitenancy.md](multitenancy.md) — how multitenancy is organized in the pageserver and Zenith CLI.
+- [sourcetree.md](sourcetree.md) — Overview of the source tree layout.
+- [pageserver/README.md](/pageserver/README.md) — pageserver overview.
+- [postgres_ffi/README.md](/libs/postgres_ffi/README.md) — Postgres FFI overview.
+- [test_runner/README.md](/test_runner/README.md) — tests infrastructure overview.
+- [safekeeper/README.md](/safekeeper/README.md) — WAL service overview.
+- [core_changes.md](core_changes.md) - Description of Zenith changes in Postgres core
--- a/docs/SUMMARY.md
+++ b/docs/SUMMARY.md
@@ -1,82 +0,0 @@
-# Summary
-
-[Introduction]()
- [Separation of Compute and Storage](./separation-compute-storage.md)
-
-# Architecture
-
- [Compute]()
-  - [WAL proposer]()
-  - [WAL Backpressure]()
-  - [Postgres changes](./core_changes.md)
-
- [Pageserver](./pageserver.md)
-    - [Services](./pageserver-services.md)
-    - [Thread management](./pageserver-thread-mgmt.md)
-    - [WAL Redo](./pageserver-walredo.md)
-    - [Page cache](./pageserver-pagecache.md)
-    - [Storage](./pageserver-storage.md)
-        - [Datadir mapping]()
-        - [Layer files]()
-        - [Branching]()
-        - [Garbage collection]()
-    - [Cloud Storage]()
-    - [Processing a GetPage request](./pageserver-processing-getpage.md)
-    - [Processing WAL](./pageserver-processing-wal.md)
-	- [Management API]()
-	- [Tenant Rebalancing]()
-
- [WAL Service](walservice.md)
-  - [Consensus protocol](safekeeper-protocol.md)
-  - [Management API]()
-  - [Rebalancing]()
-
- [Control Plane]()
-
- [Proxy]()
-
- [Source view](./sourcetree.md)
-  - [docker.md](./docker.md) — Docker images and building pipeline.
-  - [Error handling and logging]()
-  - [Testing]()
-    - [Unit testing]()
-    - [Integration testing]()
-    - [Benchmarks]()
-
-
- [Glossary](./glossary.md)
-
-# Uncategorized
-
- [authentication.md](./authentication.md)
- [multitenancy.md](./multitenancy.md) — how multitenancy is organized in the pageserver and Zenith CLI.
- [settings.md](./settings.md)
-#FIXME: move these under sourcetree.md
-#- [postgres_ffi/README.md](/libs/postgres_ffi/README.md)
-#- [test_runner/README.md](/test_runner/README.md)
-
-
-# RFCs
-
- [RFCs](./rfcs/README.md)
-
- [002-storage](rfcs/002-storage.md)
- [003-laptop-cli](rfcs/003-laptop-cli.md)
- [004-durability](rfcs/004-durability.md)
- [005-zenith_local](rfcs/005-zenith_local.md)
- [006-laptop-cli-v2-CLI](rfcs/006-laptop-cli-v2-CLI.md)
- [006-laptop-cli-v2-repository-structure](rfcs/006-laptop-cli-v2-repository-structure.md)
- [007-serverless-on-laptop](rfcs/007-serverless-on-laptop.md)
- [008-push-pull](rfcs/008-push-pull.md)
- [009-snapshot-first-storage-cli](rfcs/009-snapshot-first-storage-cli.md)
- [009-snapshot-first-storage](rfcs/009-snapshot-first-storage.md)
- [009-snapshot-first-storage-pitr](rfcs/009-snapshot-first-storage-pitr.md)
- [010-storage_details](rfcs/010-storage_details.md)
- [011-retention-policy](rfcs/011-retention-policy.md)
- [012-background-tasks](rfcs/012-background-tasks.md)
- [013-term-history](rfcs/013-term-history.md)
- [014-safekeepers-gossip](rfcs/014-safekeepers-gossip.md)
- [014-storage-lsm](rfcs/014-storage-lsm.md)
- [015-storage-messaging](rfcs/015-storage-messaging.md)
- [016-connection-routing](rfcs/016-connection-routing.md)
- [cluster-size-limits](rfcs/cluster-size-limits.md)
--- a/docs/book.toml
+++ b/docs/book.toml
@@ -1,5 +0,0 @@
-[book]
-language = "en"
-multilingual = false
-src = "."
-title = "Neon architecture"
--- a/docs/core_changes.md
+++ b/docs/core_changes.md
@@ -1,519 +1,202 @@
-# Postgres core changes
+1. Add t_cid to XLOG record
+- Why?
+  The cmin/cmax on a heap page is a real bummer. I don't see any other way to fix that than bite the bullet and modify the WAL-logging routine to include the cmin/cmax.

-This lists all the changes that have been made to the PostgreSQL
-source tree, as a somewhat logical set of patches. The long-term goal
-is to eliminate all these changes, by submitting patches to upstream
-and refactoring code into extensions, so that you can run unmodified
-PostgreSQL against Neon storage.
+  To recap, the problem is that the XLOG_HEAP_INSERT record does not include the command id of the inserted row. And same with deletion/update. So in the primary, a row is inserted with current xmin + cmin. But in the replica, the cmin is always set to 1. That works, because the command id is only relevant to the inserting transaction itself. After commit/abort, no one cares abut it anymore.

-In Neon, we run PostgreSQL in the compute nodes, but we also run a special WAL redo process in the
-page server. We currently use the same binary for both, with --wal-redo runtime flag to launch it in
-the WAL redo mode. Some PostgreSQL changes are needed in the compute node, while others are just for
-the WAL redo process.
+- Alternatives?
+  I don't know

-In addition to core PostgreSQL changes, there is a Neon extension in contrib/neon, to hook into the
-smgr interface. Once all the core changes have been submitted to upstream or eliminated some other
-way, the extension could live outside the postgres repository and build against vanilla PostgreSQL.
+2. Add PD_WAL_LOGGED.
+- Why?
+  Postgres sometimes writes data to the page before it is wal-logged. If such page ais swapped out, we  will loose this change. The problem is currently solved by setting PD_WAL_LOGGED bit in page header. When page without this bit set is written to the SMGR, then it is forced to be written to the WAL as FPI using log_newpage_copy() function.

-Below is a list of all the PostgreSQL source code changes, categorized into changes needed for
-compute, and changes needed for the WAL redo process:
+  There was wrong assumption that it can happen only during construction of some exotic indexes (like gist). It is not true. The same situation can happen with COPY,VACUUM and when record hint bits are set.

-# Changes for Compute node
+- Discussion:
+  https://discord.com/channels/869525774699462656/882681420986851359

-## Add t_cid to heap WAL records
+- Alternatives:
+  Do not store this flag in page header, but associate this bit with shared buffer. Logically it is more correct but in practice we will get not advantages: neither in space, neither in CPU overhead.

-```
- src/backend/access/heap/heapam.c                            |   26 +-
- src/include/access/heapam_xlog.h                            |    6 +-
-```

-We have added a new t_cid field to heap WAL records. This changes the WAL record format, making Neon WAL format incompatible with vanilla PostgreSQL!
+3. XLogReadBufferForRedo not always loads and pins requested buffer. So we need to add extra checks that buffer is really pinned. Also do not use BufferGetBlockNumber for buffer returned by XLogReadBufferForRedo.
+- Why?
+  XLogReadBufferForRedo is not pinning pages which are not requested by wal-redo. It is specific only for wal-redo Postgres.

-### Problem we're trying to solve
+- Alternatives?
+  No

-The problem is that the XLOG_HEAP_INSERT record does not include the command id of the inserted row. And same with deletion/update. So in the primary, a row is inserted with current xmin + cmin. But in the replica, the cmin is always set to 1. That works in PostgreSQL, because the command id is only relevant to the inserting transaction itself. After commit/abort, no one cares about it anymore. But with Neon, we rely on WAL replay to reconstruct the page, even while the original transaction is still running.

-### How to get rid of the patch
+4. Eliminate reporting of some warnings related with hint bits, for example
+"page is not marked all-visible but visibility map bit is set in relation".
+- Why?
+  Hint bit may be not WAL logged.

-Bite the bullet and submit the patch to PostgreSQL, to add the t_cid to the WAL records. It makes the WAL records larger, which could make this unpopular in the PostgreSQL community. However, it might simplify some logical decoding code; Andres Freund briefly mentioned in PGCon 2022 discussion on Heikki's Neon presentation that logical decoding currently needs to jump through some hoops to reconstruct the same information.
+- Alternative?
+  Always wal log any page changes.


-### Alternatives
-Perhaps we could write an extra WAL record with the t_cid information, when a page is evicted that contains rows that were touched a transaction that's still running. However, that seems very complicated.
+5. Maintain last written LSN.
+- Why?
+  When compute node requests page from page server, we need to specify LSN. Ideally it should be LSN
+  of WAL record performing last update of this pages. But we do not know it, because we do not have page.
+  We can use current WAL flush position, but in this case there is high probability that page server
+  will be blocked until this peace of WAL is delivered.
+  As better approximation we can keep max LSN of written page. It will be better to take in account LSNs only of evicted pages,
+  but SMGR API doesn't provide such knowledge.

-## ginfast.c
+- Alternatives?
+  Maintain map of LSNs of evicted pages.

-```
-diff --git a/src/backend/access/gin/ginfast.c b/src/backend/access/gin/ginfast.c
-index e0d9940946..2d964c02e9 100644
--- a/src/backend/access/gin/ginfast.c
-+++ b/src/backend/access/gin/ginfast.c
-@@ -285,6 +285,17 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
-                memset(&sublist, 0, sizeof(GinMetaPageData));
-                makeSublist(index, collector->tuples, collector->ntuples, &sublist);
- 
-+               if (metadata->head != InvalidBlockNumber)
-+               {
-+                       /*
-+                        * ZENITH: Get buffer before XLogBeginInsert() to avoid recursive call
-+                        * of XLogBeginInsert(). Reading a new buffer might evict a dirty page from
-+                        * the buffer cache, and if that page happens to be an FSM or VM page, zenith_write()
-+                        * will try to WAL-log an image of the page.
-+                        */
-+                       buffer = ReadBuffer(index, metadata->tail);
-+               }
-+
-                if (needWal)
-                        XLogBeginInsert();
- 
-@@ -316,7 +327,6 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
-                        data.prevTail = metadata->tail;
-                        data.newRightlink = sublist.head;
- 
-                       buffer = ReadBuffer(index, metadata->tail);
-                        LockBuffer(buffer, GIN_EXCLUSIVE);
-                        page = BufferGetPage(buffer);
-```

-The problem is explained in the comment above
+6. Launching Postgres without WAL.
+- Why?
+  According to Zenith architecture compute node is stateless. So when we are launching
+  compute node, we need to provide some dummy PG_DATADIR. Relation pages
+  can be requested on demand from page server. But Postgres still need some non-relational data:
+  control and configuration files, SLRUs,...
+  It is currently implemented  using basebackup (do not mix with pg_basebackup) which is created
+  by pageserver. It includes in this tarball config/control files, SLRUs and required directories.
+  As far as pageserver do not have original (non-scattered) WAL segments, it includes in
+  this tarball dummy WAL segment which contains only SHUTDOWN_CHECKPOINT record at the beginning of segment,
+  which redo field points to the end of wal. It allows to load checkpoint record in more or less
+  standard way with minimal changes of Postgres, but then some special handling is needed,
+  including restoring previous record position from zenith.signal file.
+  Also we have to correctly initialize header of last WAL page (pointed by checkpoint.redo)
+  to pass checks performed by XLogReader.

-### How to get rid of the patch
+- Alternatives?
+  We may not include fake WAL segment in tarball at all and modify xlog.c to load checkpoint record
+  in special way. But it may only increase number of changes in xlog.c

-Can we stop WAL-logging FSM or VM pages? Or delay the WAL logging until we're out of the critical
-section or something.
+7. Add redo_read_buffer_filter callback to XLogReadBufferForRedoExtended
+- Why?
+  We need a way in wal-redo Postgres to ignore pages which are not requested by pageserver.
+  So wal-redo Postgres reconstructs only requested page and for all other returns BLK_DONE
+  which means that recovery for them is not needed.

-Maybe some bigger rewrite of FSM and VM would help to avoid WAL-logging FSM and VM page images?
+- Alternatives?
+  No

+8. Enforce WAL logging of sequence updates.
+- Why?
+  Due to performance reasons Postgres don't want to log each fetching of a value from a sequence,
+  so we pre-log a few fetches in advance. In the event of crash we can lose
+  (skip over) as many values as we pre-logged.
+  But it doesn't work with Zenith because page with sequence value can be evicted from buffer cache
+  and we will get a gap in sequence values even without crash.

-## Mark index builds that use buffer manager without logging explicitly
+- Alternatives:
+  Do not try to preserve sequential order but avoid performance penalty.

-```
- src/backend/access/gin/gininsert.c                          |    7 +
- src/backend/access/gist/gistbuild.c                         |   15 +-
- src/backend/access/spgist/spginsert.c                       |    8 +-

-also some changes in src/backend/storage/smgr/smgr.c
-```
+9. Treat unlogged tables as normal (permanent) tables.
+- Why?
+  Unlogged tables are not transient, so them have to survive node restart (unlike temporary tables).
+  But as far as compute node is stateless, we need to persist their data to storage node.
+  And it can only be done through the WAL.

-When a GIN index is built, for example, it is built by inserting the entries into the index more or
-less normally, but without WAL-logging anything. After the index has been built, we iterate through
-all pages and write them to the WAL. That doesn't work for Neon, because if a page is not WAL-logged
-and is evicted from the buffer cache, it is lost. We have an check to catch that in the Neon
-extension. To fix that, we've added a few functions to track explicitly when we're performing such
-an operation: `smgr_start_unlogged_build`, `smgr_finish_unlogged_build_phase_1` and
-`smgr_end_unlogged_build`.
-
-
-### How to get rid of the patch
-
-I think it would make sense to be more explicit about that in PostgreSQL too. So extract these
-changes to a patch and post to pgsql-hackers.
+- Alternatives?
+  * Store unlogged tables locally (violates requirement of stateless compute nodes).
+  * Prohibit unlogged tables at all.


-## Track last-written page LSN
+10. Support start Postgres in wal-redo mode
+- Why?
+  To be able to apply WAL record and reconstruct pages at page server.

-```
- src/backend/commands/dbcommands.c                           |   17 +-
+- Alternatives?
+  * Rewrite redo handlers in Rust
+  * Do not reconstruct pages at page server at all and do it at compute node.

-Also one call to SetLastWrittenPageLSN() in spginsert.c, maybe elsewhere too
-```

-Whenever a page is evicted from the buffer cache, we remember its LSN, so that we can use the same
-LSN in the GetPage@LSN request when reading the page back from the page server. The value is
-conservative: it would be correct to always use the last-inserted LSN, but it would be slow because
-then the page server would need to wait for the recent WAL to be streamed and processed, before
-responding to any GetPage@LSN request.
+11. WAL proposer
+- Why?
+  WAL proposer is communicating with safekeeper and ensures WAL durability by quorum writes.
+  It is currently implemented as patch to standard WAL sender.

-The last-written page LSN is mostly tracked in the smgrwrite() function, without core code changes,
-but there are a few exceptions where we've had to add explicit calls to the Neon-specific
-SetLastWrittenPageLSN() function.
+- Alternatives?
+  Can be moved to extension if some extra callbacks will be added to wal sender code.

-There's an open PR to track the LSN in a more-fine grained fashion:
-https://github.com/neondatabase/postgres/pull/177

-PostgreSQL v15 introduces a new method to do CREATE DATABASE that WAL-logs the database instead of
-relying copying files and checkpoint. With that method, we probably won't need any special handling.
-The old method is still available, though.
+12. Secure Computing BPF API wrapper.
+- Why?
+  Pageserver delegates complex WAL decoding duties to Postgres,
+  which means that the latter might fall victim to carefully designed
+  malicious WAL records and start doing harmful things to the system.
+  To prevent this, it has been decided to limit possible interactions
+  with the outside world using the Secure Computing BPF mode.
+
+- Alternatives:
+  * Rewrite redo handlers in Rust.
+  * Add more checks to guarantee correctness of WAL records.
+  * Move seccomp.c to extension
+  * Many other discussed approaches to neutralize incorrect WAL records vulnerabilities.
+
+
+13. Callbacks for replica feedbacks
+- Why?
+  Allowing waproposer to interact with walsender code.
+
+- Alternatives
+  Copy walsender code to walproposer.
+
+
+14. Support multiple SMGR implementations.
+- Why?
+  Postgres provides abstract API for storage manager but it has only one implementation
+  and provides no way to replace it with custom storage manager.
+
+- Alternatives?
+  None.
+
+
+15. Calculate database size as sum of all database relations.
+- Why?
+  Postgres is calculating database size by traversing data directory
+  but as far as Zenith compute node is stateless we can not do it.
+
+- Alternatives?
+  Send this request directly to pageserver and calculate real (physical) size
+  of Zenith representation of database/timeline, rather than sum logical size of all relations.

-### How to get rid of the patch

-Wait until v15?
+-----------------------------------------------
+Not currently committed but proposed:

+1. Disable ring buffer buffer manager strategies
+- Why?
+  Postgres tries to avoid cache flushing by bulk operations (copy, seqscan, vacuum,...).
+  Even if there are free space in buffer cache, pages may be evicted.
+  Negative effect of it can be somehow compensated by file system cache, but in case of Zenith
+  cost of requesting page from page server is much higher.

-## Cache relation sizes
+- Alternatives?
+  Instead of just prohibiting ring buffer we may try to implement more flexible eviction policy,
+  for example copy evicted page from ring buffer to some other buffer if there is free space
+  in buffer cache.

-The Neon extension contains a little cache for smgrnblocks() and smgrexists() calls, to avoid going
-to the page server every time. It might be useful to cache those in PostgreSQL, maybe in the
-relcache? (I think we do cache nblocks in relcache already, check why that's not good enough for
-Neon)
+2. Disable marking page as dirty when hint bits are set.
+- Why?
+  Postgres has to modify page twice: first time when some tuple is updated and second time when
+  hint bits are set. Wal logging hint bits updates requires FPI which significantly increase size of WAL.

+- Alternatives?
+  Add special WAL record for setting page hints.

-## Misc change in vacuumlazy.c
+3. Prefetching
+- Why?
+  As far as pages in Zenith are loaded on demand, to reduce node startup time
+  and also speedup some massive queries we need some mechanism for bulk loading to
+  reduce page request round-trip overhead.

-```
-index 8aab6e324e..c684c4fbee 100644
--- a/src/backend/access/heap/vacuumlazy.c
-+++ b/src/backend/access/heap/vacuumlazy.c
-@@ -1487,7 +1487,10 @@ lazy_scan_heap(LVRelState *vacrel, VacuumParams *params, bool aggressive)
-                else if (all_visible_according_to_vm && !PageIsAllVisible(page)
-                                 && VM_ALL_VISIBLE(vacrel->rel, blkno, &vmbuffer))
-                {
-                       elog(WARNING, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
-+                       /* ZENITH-XXX: all visible hint is not wal-logged
-+                        * FIXME: Replay visibilitymap changes in pageserver
-+                        */
-+                       elog(DEBUG1, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
-                                 vacrel->relname, blkno);
-                        visibilitymap_clear(vacrel->rel, blkno, vmbuffer,
-                                                                VISIBILITYMAP_VALID_BITS);
-```
+  Currently Postgres is supporting prefetching only for bitmap scan.
+  In Zenith we also use prefetch for sequential and index scan. For sequential scan we prefetch
+  some number of following pages. For index scan we prefetch pages of heap relation addressed by TIDs.

-
-Is this still needed? If that WARNING happens, it looks like potential corruption that we should
-fix!
-
-
-## Use buffer manager when extending VM or FSM
-
-```
- src/backend/storage/freespace/freespace.c                   |   14 +-
- src/backend/access/heap/visibilitymap.c                     |   15 +-
-
-diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c
-index e198df65d8..addfe93eac 100644
--- a/src/backend/access/heap/visibilitymap.c
-+++ b/src/backend/access/heap/visibilitymap.c
-@@ -652,10 +652,19 @@ vm_extend(Relation rel, BlockNumber vm_nblocks)
-        /* Now extend the file */
-        while (vm_nblocks_now < vm_nblocks)
-        {
-               PageSetChecksumInplace((Page) pg.data, vm_nblocks_now);
-+               /*
-+                * ZENITH: Initialize VM pages through buffer cache to prevent loading
-+                * them from pageserver.
-+                */
-+               Buffer  buffer = ReadBufferExtended(rel, VISIBILITYMAP_FORKNUM, P_NEW,
-+                                                                                       RBM_ZERO_AND_LOCK, NULL);
-+               Page    page = BufferGetPage(buffer);
-+
-+               PageInit((Page) page, BLCKSZ, 0);
-+               PageSetChecksumInplace(page, vm_nblocks_now);
-+               MarkBufferDirty(buffer);
-+               UnlockReleaseBuffer(buffer);
- 
-               smgrextend(rel->rd_smgr, VISIBILITYMAP_FORKNUM, vm_nblocks_now,
-                                  pg.data, false);
-                vm_nblocks_now++;
-        }
-```
-
-### Problem we're trying to solve
-
-???
-
-### How to get rid of the patch
-
-Maybe this would be a reasonable change in PostgreSQL too?
-
-
-## Allow startup without reading checkpoint record
-
-In Neon, the compute node is stateless. So when we are launching compute node, we need to provide
-some dummy PG_DATADIR. Relation pages can be requested on demand from page server. But Postgres
-still need some non-relational data: control and configuration files, SLRUs,...  It is currently
-implemented using basebackup (do not mix with pg_basebackup) which is created by pageserver. It
-includes in this tarball config/control files, SLRUs and required directories.
-
-As pageserver does not have the original WAL segments, the basebackup tarball includes an empty WAL
-segment to bootstrap the WAL writing, but it doesn't contain the checkpoint record.  There are some
-changes in xlog.c, to allow starting the compute node without reading the last checkpoint record
-from WAL.
-
-This includes code to read the `zenith.signal` file, which tells the startup code the LSN to start
-at. When the `zenith.signal` file is present, the startup uses that LSN instead of the last
-checkpoint's LSN. The system is known to be consistent at that LSN, without any WAL redo.
-
-
-### How to get rid of the patch
-
-???
-
-
-### Alternatives
-
-Include a fake checkpoint record in the tarball. Creating fake WAL is a bit risky, though; I'm
-afraid it might accidentally get streamed to the safekeepers and overwrite or corrupt the real WAL.
-
-## Disable sequence caching
-
-```
-diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c
-index 0415df9ccb..9f9db3c8bc 100644
--- a/src/backend/commands/sequence.c
-+++ b/src/backend/commands/sequence.c
-@@ -53,7 +53,9 @@
-  * so we pre-log a few fetches in advance. In the event of
-  * crash we can lose (skip over) as many values as we pre-logged.
-  */
-#define SEQ_LOG_VALS   32
-+/* Zenith XXX: to ensure sequence order of sequence in Zenith we need to WAL log each sequence update. */
-+/* #define SEQ_LOG_VALS        32 */
-+#define SEQ_LOG_VALS   0
-```
-
-Due to performance reasons Postgres don't want to log each fetching of a value from a sequence, so
-it pre-logs a few fetches in advance. In the event of crash we can lose (skip over) as many values
-as we pre-logged. But with Neon, because page with sequence value can be evicted from buffer cache,
-we can get a gap in sequence values even without crash.
-
-### How to get rid of the patch
-
-Maybe we can just remove it, and accept the gaps. Or add some special handling for sequence
-relations in the Neon extension, to WAL log the sequence page when it's about to be evicted. It
-would be weird if the sequence moved backwards though, think of PITR.
-
-Or add a GUC for the amount to prefix to PostgreSQL, and force it to 1 in Neon.
-
-
-## Walproposer
-
-```
- src/Makefile                                                |    1 +
- src/backend/replication/libpqwalproposer/Makefile           |   37 +
- src/backend/replication/libpqwalproposer/libpqwalproposer.c |  416 ++++++++++++
- src/backend/postmaster/bgworker.c                           |    4 +
- src/backend/postmaster/postmaster.c                         |    6 +
- src/backend/replication/Makefile                            |    4 +-
- src/backend/replication/walproposer.c                       | 2350 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
- src/backend/replication/walproposer_utils.c                 |  402 +++++++++++
- src/backend/replication/walreceiver.c                       |    7 +
- src/backend/replication/walsender.c                         |  320 ++++++---
- src/backend/storage/ipc/ipci.c                              |    6 +
- src/include/replication/walproposer.h                       |  565 ++++++++++++++++
-```
-
-WAL proposer is communicating with safekeeper and ensures WAL durability by quorum writes.  It is
-currently implemented as patch to standard WAL sender.
-
-### How to get rid of the patch
-
-Refactor into an extension. Submit hooks or APIs into upstream if necessary.
-
-@MMeent did some work on this already: https://github.com/neondatabase/postgres/pull/96
-
-## Ignore unexpected data beyond EOF in bufmgr.c
-
-```
-@@ -922,11 +928,14 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
-                 */
-                bufBlock = isLocalBuf ? LocalBufHdrGetBlock(bufHdr) : BufHdrGetBlock(bufHdr);
-                if (!PageIsNew((Page) bufBlock))
-                       ereport(ERROR,
-+               {
-+                        // XXX-ZENITH
-+                        MemSet((char *) bufBlock, 0, BLCKSZ);
-+                        ereport(DEBUG1,
-                                        (errmsg("unexpected data beyond EOF in block %u of relation %s",
-                                                        blockNum, relpath(smgr->smgr_rnode, forkNum)),
-                                         errhint("This has been seen to occur with buggy kernels; consider updating your system.")));
-
-+               }
-                /*
-                 * We *must* do smgrextend before succeeding, else the page will not
-                 * be reserved by the kernel, and the next P_NEW call will decide to
-```
-
-PostgreSQL is a bit sloppy with extending relations. Usually, the relation is extended with zeros
-first, then the page is filled, and finally the new page WAL-logged. But if multiple backends extend
-a relation at the same time, the pages can be WAL-logged in different order.
-
-I'm not sure what scenario exactly required this change in Neon, though.
-
-### How to get rid of the patch
-
-Submit patches to pgsql-hackers, to tighten up the WAL-logging around relation extension. It's a bit
-confusing even in PostgreSQL. Maybe WAL log the intention to extend first, then extend the relation,
-and finally WAL-log that the extension succeeded.
-
-## Make smgr interface available to extensions
-
-```
- src/backend/storage/smgr/smgr.c                             |  203 +++---
- src/include/storage/smgr.h                                  |   72 +-
-```
-
-### How to get rid of the patch
-
-Submit to upstream. This could be useful for the Disk Encryption patches too, or for compression.
-
-
-## Added relpersistence argument to smgropen()
-
-```
- src/backend/access/heap/heapam_handler.c                    |    2 +-
- src/backend/catalog/storage.c                               |   10 +-
- src/backend/commands/tablecmds.c                            |    2 +-
- src/backend/storage/smgr/md.c                               |    4 +-
- src/include/utils/rel.h                                     |    3 +-
-```
-
-Neon needs to treat unlogged relations differently from others, so the smgrread(), smgrwrite() etc.
-implementations need to know the 'relpersistence' of the relation. To get that information where
-it's needed, we added the 'relpersistence' field to smgropen().
-
-### How to get rid of the patch
-
-Maybe 'relpersistence' would be useful in PostgreSQL for debugging purposes? Or simply for the
-benefit of extensions like Neon. Should consider this in the patch to make smgr API usable to
-extensions.
-
-## Alternatives
-
-Currently in Neon, unlogged tables live on local disk in the compute node, and are wiped away on
-compute node restart. One alternative would be to instead WAL-log even unlogged tables, essentially
-ignoring the UNLOGGED option. Or prohibit UNLOGGED tables completely. But would we still need the
-relpersistence argument to handle index builds? See item on "Mark index builds that use buffer
-manager without logging explicitly".
-
-## Use smgr and dbsize_hook for size calculations
-
-```
- src/backend/utils/adt/dbsize.c                              |   61 +-
-```
-
-In PostgreSQL, the rel and db-size functions scan the data directory directly. That won't work in Neon.
-
-### How to get rid of the patch
-
-Send patch to PostgreSQL, to use smgr API functions for relation size calculation instead. Maybe as
-part of the general smgr API patch.
-
-
-
-# WAL redo process changes
-
-Pageserver delegates complex WAL decoding duties to Postgres, which means that the latter might fall
-victim to carefully designed malicious WAL records and start doing harmful things to the system.  To
-prevent this, the redo functions are executed in a separate process that is sandboxed with Linux
-Secure Computing mode (see seccomp(2) man page).
-
-As an alternative to having a separate WAL redo process, we could rewrite all redo handlers in Rust
-This is infeasible. However, it would take a lot of effort to rewrite them, ensure that you've done
-the rewrite correctly, and once you've done that, it would be a lot of ongoing maintenance effort to
-keep the rewritten code in sync over time, across new PostgreSQL versions. That's why we want to
-leverage PostgreSQL code.
-
-Another alternative would be to harden all the PostgreSQL WAL redo functions so that it would be
-safe to call them directly from Rust code, without needing the security sandbox. That's not feasible
-for similar reasons as rewriting them in Rust.
-
-
-## Don't replay change in XLogReadBufferForRedo that are not for the target page we're replaying
-
-```
- src/backend/access/gin/ginxlog.c                            |   19 +-
-
-Also some changes in xlog.c and xlogutils.c
-
-Example:
-
-@@ -415,21 +416,27 @@ ginRedoSplit(XLogReaderState *record)
-        if (!isLeaf)
-                ginRedoClearIncompleteSplit(record, 3);
- 
-       if (XLogReadBufferForRedo(record, 0, &lbuffer) != BLK_RESTORED)
-+       action = XLogReadBufferForRedo(record, 0, &lbuffer);
-+       if (action != BLK_RESTORED && action != BLK_DONE)
-                elog(ERROR, "GIN split record did not contain a full-page image of left page");
-```
-
-### Problem we're trying to solve
-
-In PostgreSQL, if a WAL redo function calls XLogReadBufferForRead() for a page that has a full-page
-image, it always succeeds. However, Neon WAL redo process is only concerned about replaying changes
-to a singe page, so replaying any changes for other pages is a waste of cycles. We have modified
-XLogReadBufferForRead() to return BLK_DONE for all other pages, to avoid the overhead. That is
-unexpected by code like the above.
-
-### How to get rid of the patch
-
-Submit the changes to upstream, hope the community accepts them. There's no harm to PostgreSQL from
-these changes, although it doesn't have any benefit either.
-
-To make these changes useful to upstream PostgreSQL, we could implement a feature to look ahead the
-WAL, and detect truncated relations. Even in PostgreSQL, it is a waste of cycles to replay changes
-to pages that are later truncated away, so we could have XLogReadBufferForRedo() return BLK_DONE or
-BLK_NOTFOUND for pages that are known to be truncated away later in the WAL stream.
-
-### Alternatives
-
-Maybe we could revert this optimization, and restore pages other than the target page too.
-
-## Add predefined_sysidentifier flag to initdb
-
-```
- src/backend/bootstrap/bootstrap.c                           |   13 +-
- src/bin/initdb/initdb.c                                     |    4 +
-
-And some changes in xlog.c
-```
-
-This is used to help with restoring a database when you have all the WAL, all the way back to
-initdb, but no backup. You can reconstruct the missing backup by running initdb again, with the same
-sysidentifier.
-
-
-### How to get rid of the patch
-
-Ignore it. This is only needed for disaster recovery, so once we've eliminated all other Postgres
-patches, we can just keep it around as a patch or as separate branch in a repo.
-
-
-# Not currently committed but proposed
-
-## Disable ring buffer buffer manager strategies
-
-### Why?
-
-Postgres tries to avoid cache flushing by bulk operations (copy, seqscan, vacuum,...).
-Even if there are free space in buffer cache, pages may be evicted.
-Negative effect of it can be somehow compensated by file system cache, but in Neon,
-cost of requesting page from page server is much higher.
-
-### Alternatives?
-
-Instead of just prohibiting ring buffer we may try to implement more flexible eviction policy,
-for example copy evicted page from ring buffer to some other buffer if there is free space
-in buffer cache.
-
-## Disable marking page as dirty when hint bits are set.
-
-### Why?
-
-Postgres has to modify page twice: first time when some tuple is updated and second time when
-hint bits are set. Wal logging hint bits updates requires FPI which significantly increase size of WAL.
-
-### Alternatives?
-
-Add special WAL record for setting page hints.
-
-## Prefetching
-
-### Why?
-
-As far as pages in Neon are loaded on demand, to reduce node startup time
-and also speedup some massive queries we need some mechanism for bulk loading to
-reduce page request round-trip overhead.
-
-Currently Postgres is supporting prefetching only for bitmap scan.
-In Neon we should also use prefetch for sequential and index scans, because the OS is not doing it for us.
-For sequential scan we could prefetch some number of following pages. For index scan we could prefetch pages
-of heap relation addressed by TIDs.
-
-## Prewarming
-
-### Why?
-
-Short downtime (or, in other words, fast compute node restart time) is one of the key feature of Zenith.
-But overhead of request-response round-trip for loading pages on demand can make started node warm-up quite slow.
-We can capture state of compute node buffer cache and send bulk request for this pages at startup.
+4. Prewarming.
+- Why?
+  Short downtime (or, in other words, fast compute node restart time) is one of the key feature of Zenith.
+  But overhead of request-response round-trip for loading pages on demand can make started node warm-up quite slow.
+  We can capture state of compute node buffer cache and send bulk request for this pages at startup.
--- a/docs/glossary.md
+++ b/docs/glossary.md
@@ -75,7 +75,7 @@ layer's Segment and range of LSNs.
 There are two kinds of layers, in-memory and on-disk layers. In-memory
 layers are used to ingest incoming WAL, and provide fast access
 to the recent page versions. On-disk layers are stored as files on disk, and
-are immutable. See [pageserver-storage.md](./pageserver-storage.md) for more.
+are immutable. See pageserver/src/layered_repository/README.md for more.

 ### Layer file (on-disk layer)

@@ -92,7 +92,6 @@ The layer map tracks what layers exist in a timeline.
 ### Layered repository

 Neon repository implementation that keeps data in layers.
-
 ### LSN

 The Log Sequence Number (LSN) is a unique identifier of the WAL record[] in the WAL log.
@@ -112,7 +111,7 @@ PostgreSQL LSNs and functions to monitor them:
 * `pg_last_wal_replay_lsn ()` - Returns the last write-ahead log location that has been replayed during recovery. If recovery is still in progress this will increase monotonically.
 [source PostgreSQL documentation](https://www.postgresql.org/docs/devel/functions-admin.html):

-Neon safekeeper LSNs. See [safekeeper protocol section](safekeeper-protocol.md) for more information.
+Neon safekeeper LSNs. For more check [safekeeper/README_PROTO.md](/safekeeper/README_PROTO.md)
 * `CommitLSN`: position in WAL confirmed by quorum safekeepers.
 * `RestartLSN`: position in WAL confirmed by all safekeepers.
 * `FlushLSN`: part of WAL persisted to the disk by safekeeper.
@@ -126,26 +125,6 @@ TODO: use this name consistently in remote storage code. Now `disk_consistent_ls
 * `ancestor_lsn` - LSN of the branch point (the LSN at which this branch was created)

 TODO: add table that describes mapping between PostgreSQL (compute), safekeeper and pageserver LSNs.
-
-### Logical size
-
-The pageserver tracks the "logical size" of a timeline. It is the
-total size of all relations in all Postgres databases on the
-timeline. It includes all user and system tables, including their FSM
-and VM forks. But it does not include SLRUs, twophase files or any
-other such data or metadata that lives outside relations.
-
-The logical size is calculated by the pageserver, and is sent to
-PostgreSQL via feedback messages to the safekeepers. PostgreSQL uses
-the logical size to enforce the size limit in the free tier. The
-logical size is also shown to users in the web console.
-
-The logical size is not affected by branches or the physical layout of
-layer files in the pageserver. If you have a database with 1 GB
-logical size and you create a branch of it, both branches will have 1
-GB logical size, even though the branch is copy-on-write and won't
-consume any extra physical disk space until you make changes to it.
-
 ### Page (block)

 The basic structure used to store relation data. All pages are of the same size.
--- a/docs/pageserver-page-service.md
+++ b/docs/pageserver-page-service.md
@@ -1,9 +0,0 @@
-# Page Service
-
-The Page Service listens for GetPage@LSN requests from the Compute Nodes,
-and responds with pages from the repository. On each GetPage@LSN request,
-it calls into the Repository function
-
-A separate thread is spawned for each incoming connection to the page
-service. The page service uses the libpq protocol to communicate with
-the client. The client is a Compute Postgres instance.
--- a/docs/pageserver-pagecache.md
+++ b/docs/pageserver-pagecache.md
@@ -1,8 +0,0 @@
-# Page cache
-
-TODO:
-
- shared across tenants
- store pages from layer files
- store pages from "in-memory layer"
- store materialized pages
--- a/docs/pageserver-processing-getpage.md
+++ b/docs/pageserver-processing-getpage.md
@@ -1,4 +0,0 @@
-# Processing a GetPage request
-
-TODO:
- sequence diagram that shows how a GetPage@LSN request is processed
--- a/docs/pageserver-processing-wal.md
+++ b/docs/pageserver-processing-wal.md
@@ -1,5 +0,0 @@
-# Processing WAL
-
-TODO:
- diagram that shows how incoming WAL is processed
- explain durability, what is fsync'd when, disk_consistent_lsn
--- a/docs/pageserver-thread-mgmt.md
+++ b/docs/pageserver-thread-mgmt.md
@@ -1,39 +0,0 @@
-## Thread management
-
-The pageserver uses Tokio for handling concurrency. Everything runs in
-Tokio tasks, although some parts are written in blocking style and use
-spawn_blocking().
-
-Each Tokio task is tracked by the `task_mgr` module. It maintains a
-registry of tasks, and which tenant or timeline they are operating
-on.
-
-### Handling shutdown
-
-When a tenant or timeline is deleted, we need to shut down all tasks
-operating on it, before deleting the data on disk. There's a function,
-`shutdown_tasks`, to request all tasks of a particular tenant or
-timeline to shutdown. It will also wait for them to finish.
-
-A task registered in the task registry can check if it has been
-requested to shut down, by calling `is_shutdown_requested()`. There's
-also a `shudown_watcher()` Future that can be used with `tokio::select!`
-or similar, to wake up on shutdown.
-
-
-### Sync vs async
-
-We use async to wait for incoming data on network connections, and to
-perform other long-running operations. For example, each WAL receiver
-connection is handled by a tokio Task. Once a piece of WAL has been
-received from the network, the task calls the blocking functions in
-the Repository to process the WAL.
-
-The core storage code in `layered_repository/` is synchronous, with
-blocking locks and I/O calls. The current model is that we consider
-disk I/Os to be short enough that we perform them while running in a
-Tokio task. If that becomes a problem, we should use `spawn_blocking`
-before entering the synchronous parts of the code, or switch to using
-tokio I/O functions.
-
-Be very careful when mixing sync and async code!
--- a/docs/pageserver-walredo.md
+++ b/docs/pageserver-walredo.md
@@ -1,77 +0,0 @@
-# WAL Redo
-
-To reconstruct a particular page version from an image of the page and
-some WAL records, the pageserver needs to replay the WAL records. This
-happens on-demand, when a GetPage@LSN request comes in, or as part of
-background jobs that reorganize data for faster access.
-
-It's important that data cannot leak from one tenant to another, and
-that a corrupt WAL record on one timeline doesn't affect other tenants
-or timelines.
-
-## Multi-tenant security
-
-If you have direct access to the WAL directory, or if you have
-superuser access to a running PostgreSQL server, it's easy to
-construct a malicious or corrupt WAL record that causes the WAL redo
-functions to crash, or to execute arbitrary code. That is not a
-security problem for PostgreSQL; if you have superuser access, you
-have full access to the system anyway.
-
-The Neon pageserver, however, is multi-tenant. It needs to execute WAL
-belonging to different tenants in the same system, and malicious WAL
-in one tenant must not affect other tenants.
-
-A separate WAL redo process is launched for each tenant, and the
-process uses the seccomp(2) system call to restrict its access to the
-bare minimum needed to replay WAL records. The process does not have
-access to the filesystem or network. It can only communicate with the
-parent pageserver process through a pipe.
-
-If an attacker creates a malicious WAL record and injects it into the
-WAL stream of a timeline, he can take control of the WAL redo process
-in the pageserver. However, the WAL redo process cannot access the
-rest of the system. And because there is a separate WAL redo process
-for each tenant, the hijacked WAL redo process can only see WAL and
-data belonging to the same tenant, which the attacker would have
-access to anyway.
-
-## WAL-redo process communication
-
-The WAL redo process runs the 'postgres' executable, launched with a
-Neon-specific command-line option to put it into WAL-redo process
-mode.  The pageserver controls the lifetime of the WAL redo processes,
-launching them as needed. If a tenant is detached from the pageserver,
-any WAL redo processes for that tenant are killed.
-
-The pageserver communicates with each WAL redo process over its
-stdin/stdout/stderr. It works in request-response model with a simple
-custom protocol, described in walredo.rs. To replay a set of WAL
-records for a page, the pageserver sends the "before" image of the
-page and the WAL records over 'stdin', followed by a command to
-perform the replay. The WAL redo process responds with an "after"
-image of the page.
-
-## Special handling of some records
-
-Some WAL record types are handled directly in the pageserver, by
-bespoken Rust code, and are not sent over to the WAL redo process.
-This includes SLRU-related WAL records, like commit records. SLRUs
-don't use the standard Postgres buffer manager, so dealing with them
-in the Neon WAL redo mode would require quite a few changes to
-Postgres code and special handling in the protocol anyway.
-
-Some record types that include a full-page-image (e.g. XLOG_FPI) are
-also handled specially when incoming WAL is processed already, and are
-stored as page images rather than WAL records.
-
-
-## Records that modify multiple pages
-
-Some Postgres WAL records modify multiple pages. Such WAL records are
-duplicated, so that a copy is stored for each affected page. This is
-somewhat wasteful, but because most WAL records only affect one page,
-the overhead is acceptable.
-
-The WAL redo always happens for one particular page. If the WAL record
-coantains changes to other pages, they are ignored.
--- a/docs/pageserver.md
+++ b/docs/pageserver.md
@@ -1,11 +0,0 @@
-# Page server architecture
-
-The Page Server has a few different duties:
-
- Respond to GetPage@LSN requests from the Compute Nodes
- Receive WAL from WAL safekeeper, and store it
- Upload data to S3 to make it durable, download files from S3 as needed
-
-S3 is the main fault-tolerant storage of all data, as there are no Page Server
-replicas. We use a separate fault-tolerant WAL service to reduce latency. It
-keeps track of WAL records which are not synced to S3 yet.
--- a/docs/rfcs/017-user-management.md
+++ b/docs/rfcs/017-user-management.md
@@ -0,0 +1,80 @@
+# Postgres user and database management
+
+We've accumulated a bunch of problems with our approach to role and database management, namely:
+
+1. we don't allow role and database creation from Postgres, and users are complaining about that
+2. fine-grained role management is not possible both from Postgres and console
+3. web_access and @user are different roles, which creates object access problems in some cases
+
+Right now, we do store users and databases both in console and Postgres, and there are two main reasons for
+that:
+
+* we want to be able to authenticate users in proxy against the console without Postgres involvement. Otherwise,
+malicious brute force attempts will wake up Postgres (expensive) and may exhaust the Postgres connection pool (deny of service).
+* it is handy when we can render console UI without waking up compute (e.g., show database list)
+
+Storing the same information in two systems is a form of replication. And in the current scheme
+the console is primary, and Postgres catalog is a replica.
+
+This RFC proposes to address problems 1. and 2. by making Postgres a source of truth for roles/databases and
+only caching this info in the console. So using the replication analogy, now the Postgres catalog will be primary, and
+the console will be a replica. Problem 3 is a bit different and could be addressed by ditching the web_access
+user and using, e.g., JWT auth for the @username user so that we do not introduce a new user (JWT is needed
+since we don't know users password).
+
+This RFC doesn't talk about giving root access to the database, which is blocked by a secure runtime setup.
+
+## Overview
+
+* Add `/tenant/$tenant/branch/$branch/refresh_catalog` endpoint to console management API which asks `/get_catalog` and updates cached roles/databases info.
+* Whenever user edits list of databases or users postgres signals `compute_ctl` to call `/<...>/refresh_catalog` in the console
+* Add password strenght check in our extension
+
+## Postgres behavior
+
+Default user role (@username) should have `CREATE ROLE`, `CREATE DB`, and `BYPASSRLS` privileges. We expose Postgres port
+to the open internet, so we need to check passwords strength. We can use the `passwordcheck` extension or do the same
+from our extension.
+
+Whenever a user edits a list of databases or users, Postgres sends SIGHUP to `compute_ctl`. `compute_ctl` should write PID to `compute_ctl.pid` file.
+
+
+## Compute_ctl behavior
+
+Upon `SIGHUP` signal `compute_ctl` should call `/tenant/$tenant/branch/$branch/refresh_catalog` to inform console about changes in the database. The console will circle back and load the data from `/get_catalog` on compute (see next section on why this approach instead of direct PUT/PATH to the console). In the case of `/refresh_catalog` failure, we should retry it N times.
+
+Also `compute_ctl` listens for http `/get_catalog` and returns list of databases and users upon request:
+```
+/get_catalog: -> {
+    databases: [{
+        name: "db1",
+        owner: "jack"
+    }],
+    roles: [{
+        name: "jack",
+        rolepassword: "SCRAM-SHA-256..."
+    }]
+}
+```
+
+## Console behavior
+
+Whenever the console receives `/refresh_catalog` on the management API it goes to compute and asks for `/get_catalog`. I suggest using this way instead of accepting a list of databases/roles directly to the console endpoint for the following reasons:
+
+* we, anyway, will need console originated call to compute's `/get_catalog` after historical branch creation
+* If an intruder gains access to some other `/tenant/$tenant/.../refresh_catalog` he won't be able to change the roles list and will just force an unnecessary reload.
+
+`/refresh_catalog` returns HTTP 200 OK on success.
+
+We should have a button in the admin UI to manually force `/refresh_catalog` in case of data desync.
+
+# Scalability
+
+On my laptop, I can create 4200 roles per second. That corresponds to 363 million roles per day. So both `/get_catalog` can become expensive, and our roles database can snowball. While we can address `/get_catalog` size by catching only the latest changes (e.g., maintain the audit table and drain it by the console), it is still not nice that a single tenant can blow up a multi-tenant console database. I would instead propose to limit the number of databases and roles by some big number like 1000 and bump this limit if somebody asks for it with a legit use case. 
+
+
+# QA:
+
+- Why implement `/get_catalog` instead of sending an SQL query from the console to the compute?
+
+- So far, we do not allow remote superuser access to Postgres, and exposing only endpoints with fixed queries beneath them reduces the attack surface.
--- a/docs/separation-compute-storage.md
+++ b/docs/separation-compute-storage.md
@@ -1,8 +0,0 @@
-# Separation of Compute and Storage
-
-TODO:
-
- Read path
- Write path
- Durability model
- API auth
--- a/docs/settings.md
+++ b/docs/settings.md
@@ -15,7 +15,7 @@ listen_pg_addr = '127.0.0.1:64000'
 listen_http_addr = '127.0.0.1:9898'

 checkpoint_distance = '268435456' # in bytes
-checkpoint_timeout = '10m'
+checkpoint_period = '1 s'

 gc_period = '100 s'
 gc_horizon = '67108864'
@@ -46,7 +46,7 @@ Note the `[remote_storage]` section: it's a [table](https://toml.io/en/v1.0.0#ta

 All values can be passed as an argument to the pageserver binary, using the `-c` parameter and specified as a valid TOML string. All tables should be passed in the inline form.

-Example: `${PAGESERVER_BIN} -c "checkpoint_timeout = '10 m'" -c "remote_storage={local_path='/some/local/path/'}"`
+Example: `${PAGESERVER_BIN} -c "checkpoint_period = '100 s'" -c "remote_storage={local_path='/some/local/path/'}"`

 Note that TOML distinguishes between strings and integers, the former require single or double quotes around them.

@@ -82,14 +82,6 @@ S3.

 The unit is # of bytes.

-#### checkpoint_timeout
-
-Apart from `checkpoint_distance`, open layer flushing is also triggered
-`checkpoint_timeout` after the last flush. This makes WAL eventually uploaded to
-s3 when activity is stopped.
-
-The default is 10m.
-
 #### compaction_period

 Every `compaction_period` seconds, the page server checks if
@@ -157,7 +149,7 @@ for other files and for sockets for incoming connections.
 A directory with Postgres installation to use during pageserver activities.
 Inside that dir, a `bin/postgres` binary should be present.

-The default distrib dir is `./pg_install/`.
+The default distrib dir is `./tmp_install/`.

 #### workdir (-D)

--- a/docs/sourcetree.md
+++ b/docs/sourcetree.md
@@ -28,7 +28,7 @@ The pageserver has a few different duties:
 - Receive WAL from the WAL service and decode it.
 - Replay WAL that's applicable to the chunks that the Page Server maintains

-For more detailed info, see [pageserver-services.md](./pageserver-services.md)
+For more detailed info, see [/pageserver/README](/pageserver/README.md)

 `/proxy`:

@@ -40,15 +40,15 @@ and create new databases and accounts (control plane API in our case).

 Integration tests, written in Python using the `pytest` framework.

-`/vendor/postgres-v14`:
+`/vendor/postgres`:

 PostgreSQL source tree, with the modifications needed for Neon.

-`/pgxn/neon`:
+`/vendor/postgres/contrib/neon`:

 PostgreSQL extension that implements storage manager API and network communications with remote page server.

-`/pgxn/neon_test_utils`:
+`/vendor/postgres/contrib/neon_test_utils`:

 PostgreSQL extension that contains functions needed for testing and debugging.

@@ -57,7 +57,7 @@ PostgreSQL extension that contains functions needed for testing and debugging.
 The zenith WAL service that receives WAL from a primary compute nodes and streams it to the pageserver.
 It acts as a holding area and redistribution center for recently generated WAL.

-For more detailed info, see [walservice.md](./walservice.md)
+For more detailed info, see [/safekeeper/README](/safekeeper/README.md)

 `/workspace_hack`:
 The workspace_hack crate exists only to pin down some dependencies.
@@ -112,13 +112,11 @@ Run `poetry shell` to activate the virtual environment.
 Alternatively, use `poetry run` to run a single command in the venv, e.g. `poetry run pytest`.

 ### Obligatory checks
-We force code formatting via `black`, `isort` and type hints via `mypy`.
-Run the following commands in the repository's root (next to `pyproject.toml`):
+We force code formatting via `yapf` and type hints via `mypy`.
+Run the following commands in the repository's root (next to `setup.cfg`):

 ```bash
-poetry run isort .  # Imports are reformatted
-poetry run black .  # All code is reformatted
-poetry run flake8 .  # Python linter
+poetry run yapf -ri .  # All code is reformatted
 poetry run mypy .  # Ensure there are no typing errors
 ```

@@ -127,7 +125,7 @@ Otherwise it will not find its configuration.

 Also consider:

-* Running `pycodestyle` (or a linter of your choice) and fixing possible defects, if any.
+* Running `flake8` (or a linter of your choice, e.g. `pycodestyle`) and fixing possible defects, if any.
 * Adding more type hints to your code to avoid `Any`.

 ### Changing dependencies
--- a/libs/etcd_broker/Cargo.toml
+++ b/libs/etcd_broker/Cargo.toml
@@ -9,7 +9,7 @@
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1"
 serde_with = "1.12.0"
- once_cell = "1.13.0"
+ once_cell = "1.8.0"

 utils = { path = "../utils" }
 workspace_hack = { version = "0.1", path = "../../workspace_hack" }
--- a/libs/metrics/Cargo.toml
+++ b/libs/metrics/Cargo.toml
@@ -6,5 +6,6 @@ edition = "2021"
 [dependencies]
 prometheus = {version = "0.13", default_features=false, features = ["process"]} # removes protobuf dependency
 libc = "0.2"
-once_cell = "1.13.0"
+lazy_static = "1.4"
+once_cell = "1.8.0"
 workspace_hack = { version = "0.1", path = "../../workspace_hack" }
--- a/libs/metrics/src/lib.rs
+++ b/libs/metrics/src/lib.rs
@@ -2,10 +2,7 @@
 //! make sure that we use the same dep version everywhere.
 //! Otherwise, we might not see all metrics registered via
 //! a default registry.
-use once_cell::sync::Lazy;
-use prometheus::core::{AtomicU64, GenericGauge, GenericGaugeVec};
-pub use prometheus::opts;
-pub use prometheus::register;
+use lazy_static::lazy_static;
 pub use prometheus::{core, default_registry, proto};
 pub use prometheus::{exponential_buckets, linear_buckets};
 pub use prometheus::{register_gauge, Gauge};
@@ -21,17 +18,6 @@ pub use prometheus::{Encoder, TextEncoder};
 mod wrappers;
 pub use wrappers::{CountedReader, CountedWriter};

-pub type UIntGauge = GenericGauge<AtomicU64>;
-pub type UIntGaugeVec = GenericGaugeVec<AtomicU64>;
-
-#[macro_export]
-macro_rules! register_uint_gauge_vec {
-    ($NAME:expr, $HELP:expr, $LABELS_NAMES:expr $(,)?) => {{
-        let gauge_vec = UIntGaugeVec::new($crate::opts!($NAME, $HELP), $LABELS_NAMES).unwrap();
-        $crate::register(Box::new(gauge_vec.clone())).map(|_| gauge_vec)
-    }};
-}
-
 /// Gathers all Prometheus metrics and records the I/O stats just before that.
 ///
 /// Metrics gathering is a relatively simple and standalone operation, so
@@ -41,22 +27,19 @@ pub fn gather() -> Vec<prometheus::proto::MetricFamily> {
    prometheus::gather()
 }

-static DISK_IO_BYTES: Lazy<IntGaugeVec> = Lazy::new(|| {
-    register_int_gauge_vec!(
+lazy_static! {
+    static ref DISK_IO_BYTES: IntGaugeVec = register_int_gauge_vec!(
        "libmetrics_disk_io_bytes_total",
        "Bytes written and read from disk, grouped by the operation (read|write)",
        &["io_operation"]
    )
-    .expect("Failed to register disk i/o bytes int gauge vec")
-});
-
-static MAXRSS_KB: Lazy<IntGauge> = Lazy::new(|| {
-    register_int_gauge!(
+    .expect("Failed to register disk i/o bytes int gauge vec");
+    static ref MAXRSS_KB: IntGauge = register_int_gauge!(
        "libmetrics_maxrss_kb",
        "Memory usage (Maximum Resident Set Size)"
    )
-    .expect("Failed to register maxrss_kb int gauge")
-});
+    .expect("Failed to register maxrss_kb int gauge");
+}

 pub const DISK_WRITE_SECONDS_BUCKETS: &[f64] = &[
    0.000_050, 0.000_100, 0.000_500, 0.001, 0.003, 0.005, 0.01, 0.05, 0.1, 0.3, 0.5,
--- a/libs/metrics/src/wrappers.rs
+++ b/libs/metrics/src/wrappers.rs
@@ -10,13 +10,13 @@ use std::io::{Read, Result, Write};
 /// # use std::io::{Result, Read};
 /// # use metrics::{register_int_counter, IntCounter};
 /// # use metrics::CountedReader;
-/// # use once_cell::sync::Lazy;
 /// #
-/// # static INT_COUNTER: Lazy<IntCounter> = Lazy::new( || { register_int_counter!(
+/// # lazy_static::lazy_static! {
+/// #     static ref INT_COUNTER: IntCounter = register_int_counter!(
 /// #         "int_counter",
 /// #         "let's count something!"
-/// #     ).unwrap()
-/// # });
+/// #     ).unwrap();
+/// # }
 /// #
 /// fn do_some_reads(stream: impl Read, count: usize) -> Result<Vec<u8>> {
 ///     let mut reader = CountedReader::new(stream, |cnt| {
@@ -85,13 +85,13 @@ impl<T: Read> Read for CountedReader<'_, T> {
 /// # use std::io::{Result, Write};
 /// # use metrics::{register_int_counter, IntCounter};
 /// # use metrics::CountedWriter;
-/// # use once_cell::sync::Lazy;
 /// #
-/// # static INT_COUNTER: Lazy<IntCounter> = Lazy::new( || { register_int_counter!(
+/// # lazy_static::lazy_static! {
+/// #     static ref INT_COUNTER: IntCounter = register_int_counter!(
 /// #         "int_counter",
 /// #         "let's count something!"
-/// #     ).unwrap()
-/// # });
+/// #     ).unwrap();
+/// # }
 /// #
 /// fn do_some_writes(stream: impl Write, payload: &[u8]) -> Result<()> {
 ///     let mut writer = CountedWriter::new(stream, |cnt| {
--- a/libs/postgres_ffi/Cargo.toml
+++ b/libs/postgres_ffi/Cargo.toml
@@ -4,6 +4,7 @@ version = "0.1.0"
 edition = "2021"

 [dependencies]
+chrono = "0.4.19"
 rand = "0.8.3"
 regex = "1.4.5"
 bytes = "1.0.1"
@@ -11,7 +12,7 @@ byteorder = "1.4.3"
 anyhow = "1.0"
 crc32c = "0.6.0"
 hex = "0.4.3"
-once_cell = "1.13.0"
+lazy_static = "1.4"
 log = "0.4.14"
 memoffset = "0.6.2"
 thiserror = "1.0"
--- a/libs/postgres_ffi/README.md
+++ b/libs/postgres_ffi/README.md
@@ -9,11 +9,9 @@ should be auto-generated too, but that's a TODO.

 The PostgreSQL on-disk file format is not portable across different
 CPU architectures and operating systems. It is also subject to change
-in each major PostgreSQL version. Currently, this module supports
-PostgreSQL v14 and v15: bindings and code that depends on them are version-specific.
-This code is organized in modules: `postgres_ffi::v14` and `postgres_ffi::v15`
-Version independend code is explicitly exported into shared `postgres_ffi`.
-
+in each major PostgreSQL version. Currently, this module is based on
+PostgreSQL v14, but in the future we will probably need a separate
+copy for each PostgreSQL version.

 TODO: Currently, there is also some code that deals with WAL records
 in pageserver/src/waldecoder.rs.  That should be moved into this
--- a/libs/postgres_ffi/build.rs
+++ b/libs/postgres_ffi/build.rs
@@ -44,102 +44,91 @@ impl ParseCallbacks for PostgresFfiCallbacks {

 fn main() {
    // Tell cargo to invalidate the built crate whenever the wrapper changes
-    println!("cargo:rerun-if-changed=bindgen_deps.h");
+    println!("cargo:rerun-if-changed=pg_control_ffi.h");

    // Finding the location of C headers for the Postgres server:
-    // - if POSTGRES_INSTALL_DIR is set look into it, otherwise look into `<project_root>/pg_install`
-    // - if there's a `bin/pg_config` file use it for getting include server, otherwise use `<project_root>/pg_install/{PG_MAJORVERSION}/include/postgresql/server`
-    let pg_install_dir = if let Some(postgres_install_dir) = env::var_os("POSTGRES_INSTALL_DIR") {
-        postgres_install_dir.into()
+    // - if POSTGRES_INSTALL_DIR is set look into it, otherwise look into `<project_root>/tmp_install`
+    // - if there's a `bin/pg_config` file use it for getting include server, otherwise use `<project_root>/tmp_install/include/postgresql/server`
+    let mut pg_install_dir: PathBuf;
+    if let Some(postgres_install_dir) = env::var_os("POSTGRES_INSTALL_DIR") {
+        pg_install_dir = postgres_install_dir.into();
    } else {
-        PathBuf::from("pg_install")
-    };
+        pg_install_dir = PathBuf::from("tmp_install")
+    }

-    for pg_version in &["v14", "v15"] {
-        let mut pg_install_dir_versioned = pg_install_dir.join(pg_version);
-        if pg_install_dir_versioned.is_relative() {
-            let cwd = env::current_dir().unwrap();
-            pg_install_dir_versioned = cwd.join("..").join("..").join(pg_install_dir_versioned);
+    if pg_install_dir.is_relative() {
+        let cwd = env::current_dir().unwrap();
+        pg_install_dir = cwd.join("..").join("..").join(pg_install_dir);
+    }
+
+    let pg_config_bin = pg_install_dir.join("bin").join("pg_config");
+    let inc_server_path: String = if pg_config_bin.exists() {
+        let output = Command::new(pg_config_bin)
+            .arg("--includedir-server")
+            .output()
+            .expect("failed to execute `pg_config --includedir-server`");
+
+        if !output.status.success() {
+            panic!("`pg_config --includedir-server` failed")
        }

-        let pg_config_bin = pg_install_dir_versioned
-            .join(pg_version)
-            .join("bin")
-            .join("pg_config");
-        let inc_server_path: String = if pg_config_bin.exists() {
-            let output = Command::new(pg_config_bin)
-                .arg("--includedir-server")
-                .output()
-                .expect("failed to execute `pg_config --includedir-server`");
+        String::from_utf8(output.stdout).unwrap().trim_end().into()
+    } else {
+        pg_install_dir
+            .join("include")
+            .join("postgresql")
+            .join("server")
+            .into_os_string()
+            .into_string()
+            .unwrap()
+    };

-            if !output.status.success() {
-                panic!("`pg_config --includedir-server` failed")
-            }
+    // The bindgen::Builder is the main entry point
+    // to bindgen, and lets you build up options for
+    // the resulting bindings.
+    let bindings = bindgen::Builder::default()
+        //
+        // All the needed PostgreSQL headers are included from 'pg_control_ffi.h'
+        //
+        .header("pg_control_ffi.h")
+        //
+        // Tell cargo to invalidate the built crate whenever any of the
+        // included header files changed.
+        //
+        .parse_callbacks(Box::new(PostgresFfiCallbacks))
+        //
+        // These are the types and constants that we want to generate bindings for
+        //
+        .allowlist_type("BlockNumber")
+        .allowlist_type("OffsetNumber")
+        .allowlist_type("MultiXactId")
+        .allowlist_type("MultiXactOffset")
+        .allowlist_type("MultiXactStatus")
+        .allowlist_type("ControlFileData")
+        .allowlist_type("CheckPoint")
+        .allowlist_type("FullTransactionId")
+        .allowlist_type("XLogRecord")
+        .allowlist_type("XLogPageHeaderData")
+        .allowlist_type("XLogLongPageHeaderData")
+        .allowlist_var("XLOG_PAGE_MAGIC")
+        .allowlist_var("PG_CONTROL_FILE_SIZE")
+        .allowlist_var("PG_CONTROLFILEDATA_OFFSETOF_CRC")
+        .allowlist_type("PageHeaderData")
+        .allowlist_type("DBState")
+        // Because structs are used for serialization, tell bindgen to emit
+        // explicit padding fields.
+        .explicit_padding(true)
+        //
+        .clang_arg(format!("-I{inc_server_path}"))
+        //
+        // Finish the builder and generate the bindings.
+        //
+        .generate()
+        .expect("Unable to generate bindings");

-            String::from_utf8(output.stdout).unwrap().trim_end().into()
-        } else {
-            pg_install_dir_versioned
-                .join("include")
-                .join("postgresql")
-                .join("server")
-                .into_os_string()
-                .into_string()
-                .unwrap()
-        };
-
-        // The bindgen::Builder is the main entry point
-        // to bindgen, and lets you build up options for
-        // the resulting bindings.
-        let bindings = bindgen::Builder::default()
-            //
-            // All the needed PostgreSQL headers are included from 'bindgen_deps.h'
-            //
-            .header("bindgen_deps.h")
-            //
-            // Tell cargo to invalidate the built crate whenever any of the
-            // included header files changed.
-            //
-            .parse_callbacks(Box::new(PostgresFfiCallbacks))
-            //
-            // These are the types and constants that we want to generate bindings for
-            //
-            .allowlist_type("BlockNumber")
-            .allowlist_type("OffsetNumber")
-            .allowlist_type("XLogRecPtr")
-            .allowlist_type("XLogSegNo")
-            .allowlist_type("TimeLineID")
-            .allowlist_type("TimestampTz")
-            .allowlist_type("MultiXactId")
-            .allowlist_type("MultiXactOffset")
-            .allowlist_type("MultiXactStatus")
-            .allowlist_type("ControlFileData")
-            .allowlist_type("CheckPoint")
-            .allowlist_type("FullTransactionId")
-            .allowlist_type("XLogRecord")
-            .allowlist_type("XLogPageHeaderData")
-            .allowlist_type("XLogLongPageHeaderData")
-            .allowlist_var("XLOG_PAGE_MAGIC")
-            .allowlist_var("PG_CONTROL_FILE_SIZE")
-            .allowlist_var("PG_CONTROLFILEDATA_OFFSETOF_CRC")
-            .allowlist_type("PageHeaderData")
-            .allowlist_type("DBState")
-            // Because structs are used for serialization, tell bindgen to emit
-            // explicit padding fields.
-            .explicit_padding(true)
-            //
-            .clang_arg(format!("-I{inc_server_path}"))
-            //
-            // Finish the builder and generate the bindings.
-            //
-            .generate()
-            .expect("Unable to generate bindings");
-
-        // Write the bindings to the $OUT_DIR/bindings_$pg_version.rs file.
-        let out_path = PathBuf::from(env::var("OUT_DIR").unwrap());
-        let filename = format!("bindings_{pg_version}.rs");
-
-        bindings
-            .write_to_file(out_path.join(filename))
-            .expect("Couldn't write bindings!");
-    }
+    // Write the bindings to the $OUT_DIR/bindings.rs file.
+    let out_path = PathBuf::from(env::var("OUT_DIR").unwrap());
+    bindings
+        .write_to_file(out_path.join("bindings.rs"))
+        .expect("Couldn't write bindings!");
 }
--- a/libs/postgres_ffi/pg_control_ffi.h
+++ b/libs/postgres_ffi/pg_control_ffi.h
--- a/libs/postgres_ffi/src/controlfile_utils.rs
+++ b/libs/postgres_ffi/src/controlfile_utils.rs
@@ -23,7 +23,7 @@
 //! information. You can use PostgreSQL's pg_controldata utility to view its
 //! contents.
 //!
-use super::bindings::{ControlFileData, PG_CONTROL_FILE_SIZE};
+use crate::{ControlFileData, PG_CONTROL_FILE_SIZE};

 use anyhow::{bail, Result};
 use bytes::{Bytes, BytesMut};
--- a/libs/postgres_ffi/src/lib.rs
+++ b/libs/postgres_ffi/src/lib.rs
@@ -7,74 +7,21 @@
 // https://github.com/rust-lang/rust-bindgen/issues/1651
 #![allow(deref_nullptr)]

+use serde::{Deserialize, Serialize};
 use utils::lsn::Lsn;

-macro_rules! postgres_ffi {
-    ($version:ident) => {
-        #[path = "."]
-        pub mod $version {
-            pub mod bindings {
-                // bindgen generates bindings for a lot of stuff we don't need
-                #![allow(dead_code)]
+include!(concat!(env!("OUT_DIR"), "/bindings.rs"));

-                use serde::{Deserialize, Serialize};
-                include!(concat!(
-                    env!("OUT_DIR"),
-                    "/bindings_",
-                    stringify!($version),
-                    ".rs"
-                ));
-            }
-            pub mod controlfile_utils;
-            pub mod nonrelfile_utils;
-            pub mod pg_constants;
-            pub mod relfile_utils;
-            pub mod waldecoder;
-            pub mod xlog_utils;
-
-            pub const PG_MAJORVERSION: &str = stringify!($version);
-
-            // Re-export some symbols from bindings
-            pub use bindings::DBState_DB_SHUTDOWNED;
-            pub use bindings::{CheckPoint, ControlFileData, XLogRecord};
-        }
-    };
-}
-
-postgres_ffi!(v14);
-postgres_ffi!(v15);
-
-// Export some widely used datatypes that are unlikely to change across Postgres versions
-pub use v14::bindings::{uint32, uint64, Oid};
-pub use v14::bindings::{BlockNumber, OffsetNumber};
-pub use v14::bindings::{MultiXactId, TransactionId};
-pub use v14::bindings::{TimeLineID, TimestampTz, XLogRecPtr, XLogSegNo};
-
-// Likewise for these, although the assumption that these don't change is a little more iffy.
-pub use v14::bindings::{MultiXactOffset, MultiXactStatus};
-pub use v14::xlog_utils::{XLOG_SIZE_OF_XLOG_RECORD, XLOG_SIZE_OF_XLOG_SHORT_PHD};
-
-// from pg_config.h. These can be changed with configure options --with-blocksize=BLOCKSIZE and
-// --with-segsize=SEGSIZE, but assume the defaults for now.
-pub const BLCKSZ: u16 = 8192;
-pub const RELSEG_SIZE: u32 = 1024 * 1024 * 1024 / (BLCKSZ as u32);
-pub const XLOG_BLCKSZ: usize = 8192;
-pub const WAL_SEGMENT_SIZE: usize = 16 * 1024 * 1024;
-
-pub const MAX_SEND_SIZE: usize = XLOG_BLCKSZ * 16;
-
-// PG timeline is always 1, changing it doesn't have any useful meaning in Neon.
-//
-// NOTE: this is not to be confused with Neon timelines; different concept!
-//
-// It's a shaky assumption, that it's always 1. We might import a
-// PostgreSQL data directory that has gone through timeline bumps,
-// for example. FIXME later.
-pub const PG_TLI: u32 = 1;
+pub mod controlfile_utils;
+pub mod nonrelfile_utils;
+pub mod pg_constants;
+pub mod relfile_utils;
+pub mod waldecoder;
+pub mod xlog_utils;

 //  See TransactionIdIsNormal in transam.h
 pub const fn transaction_id_is_normal(id: TransactionId) -> bool {
-    id > v14::pg_constants::FIRST_NORMAL_TRANSACTION_ID
+    id > pg_constants::FIRST_NORMAL_TRANSACTION_ID
 }

 // See TransactionIdPrecedes in transam.c
--- a/libs/postgres_ffi/src/nonrelfile_utils.rs
+++ b/libs/postgres_ffi/src/nonrelfile_utils.rs
@@ -1,12 +1,11 @@
 //!
 //! Common utilities for dealing with PostgreSQL non-relation files.
 //!
-use super::pg_constants;
-use crate::transaction_id_precedes;
+use crate::{pg_constants, transaction_id_precedes};
 use bytes::BytesMut;
 use log::*;

-use super::bindings::MultiXactId;
+use crate::MultiXactId;

 pub fn transaction_id_set_status(xid: u32, status: u8, page: &mut BytesMut) {
    trace!(
--- a/libs/postgres_ffi/src/pg_constants.rs
+++ b/libs/postgres_ffi/src/pg_constants.rs
@@ -7,8 +7,7 @@
 //! comments on them.
 //!

-use super::bindings::{PageHeaderData, XLogRecord};
-use crate::BLCKSZ;
+use crate::PageHeaderData;

 //
 // From pg_tablespace_d.h
@@ -32,6 +31,11 @@ pub const SMGR_TRUNCATE_HEAP: u32 = 0x0001;
 pub const SMGR_TRUNCATE_VM: u32 = 0x0002;
 pub const SMGR_TRUNCATE_FSM: u32 = 0x0004;

+// from pg_config.h. These can be changed with configure options --with-blocksize=BLOCKSIZE and
+// --with-segsize=SEGSIZE, but assume the defaults for now.
+pub const BLCKSZ: u16 = 8192;
+pub const RELSEG_SIZE: u32 = 1024 * 1024 * 1024 / (BLCKSZ as u32);
+
 //
 // From bufpage.h
 //
@@ -176,7 +180,7 @@ pub const XLOG_DBASE_DROP: u8 = 0x10;
 pub const XLOG_TBLSPC_CREATE: u8 = 0x00;
 pub const XLOG_TBLSPC_DROP: u8 = 0x10;

-pub const SIZEOF_XLOGRECORD: u32 = std::mem::size_of::<XLogRecord>() as u32;
+pub const SIZEOF_XLOGRECORD: u32 = 24;

 //
 // from xlogrecord.h
@@ -206,10 +210,16 @@ pub const INVALID_TRANSACTION_ID: u32 = 0;
 pub const FIRST_BOOTSTRAP_OBJECT_ID: u32 = 12000;
 pub const FIRST_NORMAL_OBJECT_ID: u32 = 16384;

+/* FIXME: pageserver should request wal_seg_size from compute node */
+pub const WAL_SEGMENT_SIZE: usize = 16 * 1024 * 1024;
+
+pub const XLOG_BLCKSZ: usize = 8192;
 pub const XLOG_CHECKPOINT_SHUTDOWN: u8 = 0x00;
 pub const XLOG_CHECKPOINT_ONLINE: u8 = 0x10;
 pub const XLP_LONG_HEADER: u16 = 0x0002;

+pub const PG_MAJORVERSION: &str = "14";
+
 // List of subdirectories inside pgdata.
 // Copied from src/bin/initdb/initdb.c
 pub const PGDATA_SUBDIRS: [&str; 22] = [
--- a/libs/postgres_ffi/src/relfile_utils.rs
+++ b/libs/postgres_ffi/src/relfile_utils.rs
@@ -1,11 +1,11 @@
 //!
 //! Common utilities for dealing with PostgreSQL relation files.
 //!
-use super::pg_constants;
-use once_cell::sync::OnceCell;
+use crate::pg_constants;
+use lazy_static::lazy_static;
 use regex::Regex;

-#[derive(Debug, Clone, thiserror::Error, PartialEq, Eq)]
+#[derive(Debug, Clone, thiserror::Error, PartialEq)]
 pub enum FilePathError {
    #[error("invalid relation fork name")]
    InvalidForkName,
@@ -54,14 +54,11 @@ pub fn forknumber_to_name(forknum: u8) -> Option<&'static str> {
 /// See functions relpath() and _mdfd_segpath() in PostgreSQL sources.
 ///
 pub fn parse_relfilename(fname: &str) -> Result<(u32, u8, u32), FilePathError> {
-    static RELFILE_RE: OnceCell<Regex> = OnceCell::new();
-    RELFILE_RE.get_or_init(|| {
-        Regex::new(r"^(?P<relnode>\d+)(_(?P<forkname>[a-z]+))?(\.(?P<segno>\d+))?$").unwrap()
-    });
-
+    lazy_static! {
+        static ref RELFILE_RE: Regex =
+            Regex::new(r"^(?P<relnode>\d+)(_(?P<forkname>[a-z]+))?(\.(?P<segno>\d+))?$").unwrap();
+    }
    let caps = RELFILE_RE
-        .get()
-        .unwrap()
        .captures(fname)
        .ok_or(FilePathError::InvalidFileName)?;

--- a/libs/postgres_ffi/src/waldecoder.rs
+++ b/libs/postgres_ffi/src/waldecoder.rs
@@ -8,32 +8,29 @@
 //! to look deeper into the WAL records to also understand which blocks they modify, the code
 //! for that is in pageserver/src/walrecord.rs
 //!
-use super::bindings::{XLogLongPageHeaderData, XLogPageHeaderData, XLogRecord, XLOG_PAGE_MAGIC};
+use super::pg_constants;
 use super::xlog_utils::*;
-use crate::WAL_SEGMENT_SIZE;
+use super::XLogLongPageHeaderData;
+use super::XLogPageHeaderData;
+use super::XLogRecord;
 use bytes::{Buf, BufMut, Bytes, BytesMut};
 use crc32c::*;
 use log::*;
 use std::cmp::min;
-use std::num::NonZeroU32;
 use thiserror::Error;
 use utils::lsn::Lsn;

-enum State {
-    WaitingForRecord,
-    ReassemblingRecord {
-        recordbuf: BytesMut,
-        contlen: NonZeroU32,
-    },
-    SkippingEverything {
-        skip_until_lsn: Lsn,
-    },
-}
-
 pub struct WalStreamDecoder {
    lsn: Lsn,
+
+    startlsn: Lsn, // LSN where this record starts
+    contlen: u32,
+    padlen: u32,
+
    inputbuf: BytesMut,
-    state: State,
+
+    /// buffer used to reassemble records that cross page boundaries.
+    recordbuf: BytesMut,
 }

 #[derive(Error, Debug, Clone)]
@@ -51,8 +48,13 @@ impl WalStreamDecoder {
    pub fn new(lsn: Lsn) -> WalStreamDecoder {
        WalStreamDecoder {
            lsn,
+
+            startlsn: Lsn(0),
+            contlen: 0,
+            padlen: 0,
+
            inputbuf: BytesMut::new(),
-            state: State::WaitingForRecord,
+            recordbuf: BytesMut::new(),
        }
    }

@@ -65,58 +67,6 @@ impl WalStreamDecoder {
        self.inputbuf.extend_from_slice(buf);
    }

-    fn validate_page_header(&self, hdr: &XLogPageHeaderData) -> Result<(), WalDecodeError> {
-        let validate_impl = || {
-            if hdr.xlp_magic != XLOG_PAGE_MAGIC as u16 {
-                return Err(format!(
-                    "invalid xlog page header: xlp_magic={}, expected {}",
-                    hdr.xlp_magic, XLOG_PAGE_MAGIC
-                ));
-            }
-            if hdr.xlp_pageaddr != self.lsn.0 {
-                return Err(format!(
-                    "invalid xlog page header: xlp_pageaddr={}, expected {}",
-                    hdr.xlp_pageaddr, self.lsn
-                ));
-            }
-            match self.state {
-                State::WaitingForRecord => {
-                    if hdr.xlp_info & XLP_FIRST_IS_CONTRECORD != 0 {
-                        return Err(
-                            "invalid xlog page header: unexpected XLP_FIRST_IS_CONTRECORD".into(),
-                        );
-                    }
-                    if hdr.xlp_rem_len != 0 {
-                        return Err(format!(
-                            "invalid xlog page header: xlp_rem_len={}, but it's not a contrecord",
-                            hdr.xlp_rem_len
-                        ));
-                    }
-                }
-                State::ReassemblingRecord { contlen, .. } => {
-                    if hdr.xlp_info & XLP_FIRST_IS_CONTRECORD == 0 {
-                        return Err(
-                            "invalid xlog page header: XLP_FIRST_IS_CONTRECORD expected, not found"
-                                .into(),
-                        );
-                    }
-                    if hdr.xlp_rem_len != contlen.get() {
-                        return Err(format!(
-                            "invalid xlog page header: xlp_rem_len={}, expected {}",
-                            hdr.xlp_rem_len,
-                            contlen.get()
-                        ));
-                    }
-                }
-                State::SkippingEverything { .. } => {
-                    panic!("Should not be validating page header in the SkippingEverything state");
-                }
-            };
-            Ok(())
-        };
-        validate_impl().map_err(|msg| WalDecodeError { msg, lsn: self.lsn })
-    }
-
    /// Attempt to decode another WAL record from the input that has been fed to the
    /// decoder so far.
    ///
@@ -126,122 +76,128 @@ impl WalStreamDecoder {
    ///     Err(WalDecodeError): an error occurred while decoding, meaning the input was invalid.
    ///
    pub fn poll_decode(&mut self) -> Result<Option<(Lsn, Bytes)>, WalDecodeError> {
+        let recordbuf;
+
        // Run state machine that validates page headers, and reassembles records
        // that cross page boundaries.
        loop {
            // parse and verify page boundaries as we go
-            // However, we may have to skip some page headers if we're processing the XLOG_SWITCH record or skipping padding for whatever reason.
-            match self.state {
-                State::WaitingForRecord | State::ReassemblingRecord { .. } => {
-                    if self.lsn.segment_offset(WAL_SEGMENT_SIZE) == 0 {
-                        // parse long header
-
-                        if self.inputbuf.remaining() < XLOG_SIZE_OF_XLOG_LONG_PHD {
-                            return Ok(None);
-                        }
-
-                        let hdr = XLogLongPageHeaderData::from_bytes(&mut self.inputbuf).map_err(
-                            |e| WalDecodeError {
-                                msg: format!("long header deserialization failed {}", e),
-                                lsn: self.lsn,
-                            },
-                        )?;
-
-                        self.validate_page_header(&hdr.std)?;
-
-                        self.lsn += XLOG_SIZE_OF_XLOG_LONG_PHD as u64;
-                    } else if self.lsn.block_offset() == 0 {
-                        if self.inputbuf.remaining() < XLOG_SIZE_OF_XLOG_SHORT_PHD {
-                            return Ok(None);
-                        }
-
-                        let hdr =
-                            XLogPageHeaderData::from_bytes(&mut self.inputbuf).map_err(|e| {
-                                WalDecodeError {
-                                    msg: format!("header deserialization failed {}", e),
-                                    lsn: self.lsn,
-                                }
-                            })?;
-
-                        self.validate_page_header(&hdr)?;
-
-                        self.lsn += XLOG_SIZE_OF_XLOG_SHORT_PHD as u64;
-                    }
+            if self.padlen > 0 {
+                // We should first skip padding, as we may have to skip some page headers if we're processing the XLOG_SWITCH record.
+                if self.inputbuf.remaining() < self.padlen as usize {
+                    return Ok(None);
                }
-                State::SkippingEverything { .. } => {}
-            }
-            // now read page contents
-            match &mut self.state {
-                State::WaitingForRecord => {
-                    // need to have at least the xl_tot_len field
-                    if self.inputbuf.remaining() < 4 {
-                        return Ok(None);
-                    }

-                    // peek xl_tot_len at the beginning of the record.
-                    // FIXME: assumes little-endian
-                    let xl_tot_len = (&self.inputbuf[0..4]).get_u32_le();
-                    if (xl_tot_len as usize) < XLOG_SIZE_OF_XLOG_RECORD {
-                        return Err(WalDecodeError {
-                            msg: format!("invalid xl_tot_len {}", xl_tot_len),
-                            lsn: self.lsn,
-                        });
-                    }
-                    // Fast path for the common case that the whole record fits on the page.
-                    let pageleft = self.lsn.remaining_in_block() as u32;
-                    if self.inputbuf.remaining() >= xl_tot_len as usize && xl_tot_len <= pageleft {
-                        self.lsn += xl_tot_len as u64;
-                        let recordbuf = self.inputbuf.copy_to_bytes(xl_tot_len as usize);
-                        return Ok(Some(self.complete_record(recordbuf)?));
-                    } else {
-                        // Need to assemble the record from pieces. Remember the size of the
-                        // record, and loop back. On next iterations, we will reach the branch
-                        // below, and copy the part of the record that was on this or next page(s)
-                        // to 'recordbuf'.  Subsequent iterations will skip page headers, and
-                        // append the continuations from the next pages to 'recordbuf'.
-                        self.state = State::ReassemblingRecord {
-                            recordbuf: BytesMut::with_capacity(xl_tot_len as usize),
-                            contlen: NonZeroU32::new(xl_tot_len).unwrap(),
-                        }
-                    }
+                // skip padding
+                self.inputbuf.advance(self.padlen as usize);
+                self.lsn += self.padlen as u64;
+                self.padlen = 0;
+            } else if self.lsn.segment_offset(pg_constants::WAL_SEGMENT_SIZE) == 0 {
+                // parse long header
+
+                if self.inputbuf.remaining() < XLOG_SIZE_OF_XLOG_LONG_PHD {
+                    return Ok(None);
                }
-                State::ReassemblingRecord { recordbuf, contlen } => {
-                    // we're continuing a record, possibly from previous page.
-                    let pageleft = self.lsn.remaining_in_block() as u32;

-                    // read the rest of the record, or as much as fits on this page.
-                    let n = min(contlen.get(), pageleft) as usize;
-
-                    if self.inputbuf.remaining() < n {
-                        return Ok(None);
+                let hdr = XLogLongPageHeaderData::from_bytes(&mut self.inputbuf).map_err(|e| {
+                    WalDecodeError {
+                        msg: format!("long header deserialization failed {}", e),
+                        lsn: self.lsn,
                    }
+                })?;

-                    recordbuf.put(self.inputbuf.split_to(n));
-                    self.lsn += n as u64;
-                    *contlen = match NonZeroU32::new(contlen.get() - n as u32) {
-                        Some(x) => x,
-                        None => {
-                            // The record is now complete.
-                            let recordbuf = std::mem::replace(recordbuf, BytesMut::new()).freeze();
-                            return Ok(Some(self.complete_record(recordbuf)?));
-                        }
-                    }
+                if hdr.std.xlp_pageaddr != self.lsn.0 {
+                    return Err(WalDecodeError {
+                        msg: "invalid xlog segment header".into(),
+                        lsn: self.lsn,
+                    });
                }
-                State::SkippingEverything { skip_until_lsn } => {
-                    assert!(*skip_until_lsn >= self.lsn);
-                    let n = skip_until_lsn.0 - self.lsn.0;
-                    if self.inputbuf.remaining() < n as usize {
-                        return Ok(None);
-                    }
-                    self.inputbuf.advance(n as usize);
-                    self.lsn += n;
-                    self.state = State::WaitingForRecord;
+                // TODO: verify the remaining fields in the header
+
+                self.lsn += XLOG_SIZE_OF_XLOG_LONG_PHD as u64;
+                continue;
+            } else if self.lsn.block_offset() == 0 {
+                if self.inputbuf.remaining() < XLOG_SIZE_OF_XLOG_SHORT_PHD {
+                    return Ok(None);
                }
+
+                let hdr = XLogPageHeaderData::from_bytes(&mut self.inputbuf).map_err(|e| {
+                    WalDecodeError {
+                        msg: format!("header deserialization failed {}", e),
+                        lsn: self.lsn,
+                    }
+                })?;
+
+                if hdr.xlp_pageaddr != self.lsn.0 {
+                    return Err(WalDecodeError {
+                        msg: "invalid xlog page header".into(),
+                        lsn: self.lsn,
+                    });
+                }
+                // TODO: verify the remaining fields in the header
+
+                self.lsn += XLOG_SIZE_OF_XLOG_SHORT_PHD as u64;
+                continue;
+            } else if self.contlen == 0 {
+                assert!(self.recordbuf.is_empty());
+
+                // need to have at least the xl_tot_len field
+                if self.inputbuf.remaining() < 4 {
+                    return Ok(None);
+                }
+
+                // peek xl_tot_len at the beginning of the record.
+                // FIXME: assumes little-endian
+                self.startlsn = self.lsn;
+                let xl_tot_len = (&self.inputbuf[0..4]).get_u32_le();
+                if (xl_tot_len as usize) < XLOG_SIZE_OF_XLOG_RECORD {
+                    return Err(WalDecodeError {
+                        msg: format!("invalid xl_tot_len {}", xl_tot_len),
+                        lsn: self.lsn,
+                    });
+                }
+
+                // Fast path for the common case that the whole record fits on the page.
+                let pageleft = self.lsn.remaining_in_block() as u32;
+                if self.inputbuf.remaining() >= xl_tot_len as usize && xl_tot_len <= pageleft {
+                    // Take the record from the 'inputbuf', and validate it.
+                    recordbuf = self.inputbuf.copy_to_bytes(xl_tot_len as usize);
+                    self.lsn += xl_tot_len as u64;
+                    break;
+                } else {
+                    // Need to assemble the record from pieces. Remember the size of the
+                    // record, and loop back. On next iteration, we will reach the 'else'
+                    // branch below, and copy the part of the record that was on this page
+                    // to 'recordbuf'.  Subsequent iterations will skip page headers, and
+                    // append the continuations from the next pages to 'recordbuf'.
+                    self.recordbuf.reserve(xl_tot_len as usize);
+                    self.contlen = xl_tot_len;
+                    continue;
+                }
+            } else {
+                // we're continuing a record, possibly from previous page.
+                let pageleft = self.lsn.remaining_in_block() as u32;
+
+                // read the rest of the record, or as much as fits on this page.
+                let n = min(self.contlen, pageleft) as usize;
+
+                if self.inputbuf.remaining() < n {
+                    return Ok(None);
+                }
+
+                self.recordbuf.put(self.inputbuf.split_to(n));
+                self.lsn += n as u64;
+                self.contlen -= n as u32;
+
+                if self.contlen == 0 {
+                    // The record is now complete.
+                    recordbuf = std::mem::replace(&mut self.recordbuf, BytesMut::new()).freeze();
+                    break;
+                }
+                continue;
            }
        }
-    }

-    fn complete_record(&mut self, recordbuf: Bytes) -> Result<(Lsn, Bytes), WalDecodeError> {
        // We now have a record in the 'recordbuf' local variable.
        let xlogrec =
            XLogRecord::from_slice(&recordbuf[0..XLOG_SIZE_OF_XLOG_RECORD]).map_err(|e| {
@@ -263,20 +219,18 @@ impl WalStreamDecoder {

        // XLOG_SWITCH records are special. If we see one, we need to skip
        // to the next WAL segment.
-        let next_lsn = if xlogrec.is_xlog_switch_record() {
+        if xlogrec.is_xlog_switch_record() {
            trace!("saw xlog switch record at {}", self.lsn);
-            self.lsn + self.lsn.calc_padding(WAL_SEGMENT_SIZE as u64)
+            self.padlen = self.lsn.calc_padding(pg_constants::WAL_SEGMENT_SIZE as u64) as u32;
        } else {
            // Pad to an 8-byte boundary
-            self.lsn.align()
-        };
-        self.state = State::SkippingEverything {
-            skip_until_lsn: next_lsn,
-        };
+            self.padlen = self.lsn.calc_padding(8u32) as u32;
+        }

        // We should return LSN of the next record, not the last byte of this record or
        // the byte immediately after. Note that this handles both XLOG_SWITCH and usual
        // records, the former "spans" until the next WAL segment (see test_xlog_switch).
-        Ok((next_lsn, recordbuf))
+        let result = (self.lsn + self.padlen as u64, recordbuf);
+        Ok(Some(result))
    }
 }
--- a/libs/postgres_ffi/src/xlog_utils.rs
+++ b/libs/postgres_ffi/src/xlog_utils.rs
@@ -7,39 +7,38 @@
 // have been named the same as the corresponding PostgreSQL functions instead.
 //

-use crc32c::crc32c_append;
-
-use super::bindings::{
-    CheckPoint, FullTransactionId, TimeLineID, TimestampTz, XLogLongPageHeaderData,
-    XLogPageHeaderData, XLogRecPtr, XLogRecord, XLogSegNo, XLOG_PAGE_MAGIC,
-};
-use super::pg_constants;
-use super::waldecoder::WalStreamDecoder;
-use crate::PG_TLI;
-use crate::{uint32, uint64, Oid};
-use crate::{WAL_SEGMENT_SIZE, XLOG_BLCKSZ};
+use crate::pg_constants;
+use crate::CheckPoint;
+use crate::FullTransactionId;
+use crate::XLogLongPageHeaderData;
+use crate::XLogPageHeaderData;
+use crate::XLogRecord;
+use crate::XLOG_PAGE_MAGIC;

+use anyhow::{bail, ensure};
+use byteorder::{ByteOrder, LittleEndian};
 use bytes::BytesMut;
 use bytes::{Buf, Bytes};
-
+use crc32c::*;
 use log::*;
-
-use serde::Serialize;
-use std::fs::File;
+use std::cmp::max;
+use std::cmp::min;
+use std::fs::{self, File};
 use std::io::prelude::*;
-use std::io::ErrorKind;
 use std::io::SeekFrom;
 use std::path::{Path, PathBuf};
 use std::time::SystemTime;
 use utils::bin_ser::DeserializeError;
 use utils::bin_ser::SerializeError;
-
+use utils::const_assert;
 use utils::lsn::Lsn;

 pub const XLOG_FNAME_LEN: usize = 24;
+pub const XLOG_BLCKSZ: usize = 8192;
 pub const XLP_FIRST_IS_CONTRECORD: u16 = 0x0001;
 pub const XLP_REM_LEN_OFFS: usize = 2 + 2 + 4 + 8;
 pub const XLOG_RECORD_CRC_OFFS: usize = 4 + 4 + 8 + 1 + 1 + 2;
+pub const MAX_SEND_SIZE: usize = XLOG_BLCKSZ * 16;

 pub const XLOG_SIZE_OF_XLOG_SHORT_PHD: usize = std::mem::size_of::<XLogPageHeaderData>();
 pub const XLOG_SIZE_OF_XLOG_LONG_PHD: usize = std::mem::size_of::<XLogLongPageHeaderData>();
@@ -47,6 +46,14 @@ pub const XLOG_SIZE_OF_XLOG_RECORD: usize = std::mem::size_of::<XLogRecord>();
 #[allow(clippy::identity_op)]
 pub const SIZE_OF_XLOG_RECORD_DATA_HEADER_SHORT: usize = 1 * 2;

+// PG timeline is always 1, changing it doesn't have useful meaning in Zenith.
+pub const PG_TLI: u32 = 1;
+
+pub type XLogRecPtr = u64;
+pub type TimeLineID = u32;
+pub type TimestampTz = i64;
+pub type XLogSegNo = u64;
+
 /// Interval of checkpointing metadata file. We should store metadata file to enforce
 /// predicate that checkpoint.nextXid is larger than any XID in WAL.
 /// But flushing checkpoint file for each transaction seems to be too expensive,
@@ -72,12 +79,12 @@ pub fn XLogSegNoOffsetToRecPtr(

 #[allow(non_snake_case)]
 pub fn XLogFileName(tli: TimeLineID, logSegNo: XLogSegNo, wal_segsz_bytes: usize) -> String {
-    format!(
+    return format!(
        "{:>08X}{:>08X}{:>08X}",
        tli,
        logSegNo / XLogSegmentsPerXLogId(wal_segsz_bytes),
        logSegNo % XLogSegmentsPerXLogId(wal_segsz_bytes)
-    )
+    );
 }

 #[allow(non_snake_case)]
@@ -132,93 +139,336 @@ pub fn to_pg_timestamp(time: SystemTime) -> TimestampTz {
    }
 }

-// Returns (aligned) end_lsn of the last record in data_dir with WAL segments.
-// start_lsn must point to some previously known record boundary (beginning of
-// the next record). If no valid record after is found, start_lsn is returned
-// back.
-pub fn find_end_of_wal(
+/// Return offset of the last valid record in the segment segno, starting
+/// looking at start_offset. Returns start_offset if no records found.
+fn find_end_of_wal_segment(
    data_dir: &Path,
+    segno: XLogSegNo,
+    tli: TimeLineID,
    wal_seg_size: usize,
-    start_lsn: Lsn, // start reading WAL at this point; must point at record start_lsn.
-) -> anyhow::Result<Lsn> {
-    let mut result = start_lsn;
-    let mut curr_lsn = start_lsn;
+    start_offset: usize, // start reading at this point
+) -> anyhow::Result<u32> {
+    // step back to the beginning of the page to read it in...
+    let mut offs: usize = start_offset - start_offset % XLOG_BLCKSZ;
+    let mut skipping_first_contrecord: bool = false;
+    let mut contlen: usize = 0;
+    let mut xl_crc: u32 = 0;
+    let mut crc: u32 = 0;
+    let mut rec_offs: usize = 0;
    let mut buf = [0u8; XLOG_BLCKSZ];
-    let mut decoder = WalStreamDecoder::new(start_lsn);
+    let file_name = XLogFileName(tli, segno, wal_seg_size);
+    let mut last_valid_rec_pos: usize = start_offset; // assume at given start_offset begins new record
+    let mut file = File::open(data_dir.join(file_name.clone() + ".partial")).unwrap();
+    file.seek(SeekFrom::Start(offs as u64))?;
+    // xl_crc is the last field in XLogRecord, will not be read into rec_hdr
+    const_assert!(XLOG_RECORD_CRC_OFFS + 4 == XLOG_SIZE_OF_XLOG_RECORD);
+    let mut rec_hdr = [0u8; XLOG_RECORD_CRC_OFFS];

-    // loop over segments
-    loop {
-        let segno = curr_lsn.segment_number(wal_seg_size);
-        let seg_file_name = XLogFileName(PG_TLI, segno, wal_seg_size);
-        let seg_file_path = data_dir.join(seg_file_name);
-        match open_wal_segment(&seg_file_path)? {
-            None => {
-                // no more segments
-                info!(
-                    "find_end_of_wal reached end at {:?}, segment {:?} doesn't exist",
-                    result, seg_file_path
+    trace!("find_end_of_wal_segment(data_dir={}, segno={}, tli={}, wal_seg_size={}, start_offset=0x{:x})", data_dir.display(), segno, tli, wal_seg_size, start_offset);
+    while offs < wal_seg_size {
+        // we are at the beginning of the page; read it in
+        if offs % XLOG_BLCKSZ == 0 {
+            trace!("offs=0x{:x}: new page", offs);
+            let bytes_read = file.read(&mut buf)?;
+            if bytes_read != buf.len() {
+                bail!(
+                    "failed to read {} bytes from {} at {}",
+                    XLOG_BLCKSZ,
+                    file_name,
+                    offs
                );
-                return Ok(result);
            }
-            Some(mut segment) => {
-                let seg_offs = curr_lsn.segment_offset(wal_seg_size);
-                segment.seek(SeekFrom::Start(seg_offs as u64))?;
-                // loop inside segment
-                loop {
-                    let bytes_read = segment.read(&mut buf)?;
-                    if bytes_read == 0 {
-                        break; // EOF
-                    }
-                    curr_lsn += bytes_read as u64;
-                    decoder.feed_bytes(&buf[0..bytes_read]);

-                    // advance result past all completely read records
-                    loop {
-                        match decoder.poll_decode() {
-                            Ok(Some(record)) => result = record.0,
-                            Err(e) => {
-                                info!(
-                                    "find_end_of_wal reached end at {:?}, decode error: {:?}",
-                                    result, e
-                                );
-                                return Ok(result);
-                            }
-                            Ok(None) => break, // need more data
-                        }
+            let xlp_magic = LittleEndian::read_u16(&buf[0..2]);
+            let xlp_info = LittleEndian::read_u16(&buf[2..4]);
+            let xlp_rem_len = LittleEndian::read_u32(&buf[XLP_REM_LEN_OFFS..XLP_REM_LEN_OFFS + 4]);
+            trace!(
+                "  xlp_magic=0x{:x}, xlp_info=0x{:x}, xlp_rem_len={}",
+                xlp_magic,
+                xlp_info,
+                xlp_rem_len
+            );
+            // this is expected in current usage when valid WAL starts after page header
+            if xlp_magic != XLOG_PAGE_MAGIC as u16 {
+                trace!(
+                    "  invalid WAL file {}.partial magic {} at {:?}",
+                    file_name,
+                    xlp_magic,
+                    Lsn(XLogSegNoOffsetToRecPtr(segno, offs as u32, wal_seg_size)),
+                );
+            }
+            if offs == 0 {
+                offs += XLOG_SIZE_OF_XLOG_LONG_PHD;
+                if (xlp_info & XLP_FIRST_IS_CONTRECORD) != 0 {
+                    trace!("  first record is contrecord");
+                    skipping_first_contrecord = true;
+                    contlen = xlp_rem_len as usize;
+                    if offs < start_offset {
+                        // Pre-condition failed: the beginning of the segment is unexpectedly corrupted.
+                        ensure!(start_offset - offs >= contlen,
+                            "start_offset is in the middle of the first record (which happens to be a contrecord), \
+                             expected to be on a record boundary. Is beginning of the segment corrupted?");
+                        contlen = 0;
+                        // keep skipping_first_contrecord to avoid counting the contrecord as valid, we did not check it.
                    }
+                } else {
+                    trace!("  first record is not contrecord");
+                }
+            } else {
+                offs += XLOG_SIZE_OF_XLOG_SHORT_PHD;
+            }
+            // ... and step forward again if asked
+            trace!("  skipped header to 0x{:x}", offs);
+            offs = max(offs, start_offset);
+        // beginning of the next record
+        } else if contlen == 0 {
+            let page_offs = offs % XLOG_BLCKSZ;
+            let xl_tot_len = LittleEndian::read_u32(&buf[page_offs..page_offs + 4]) as usize;
+            trace!("offs=0x{:x}: new record, xl_tot_len={}", offs, xl_tot_len);
+            if xl_tot_len == 0 {
+                info!(
+                    "find_end_of_wal_segment reached zeros at {:?}, last records ends at {:?}",
+                    Lsn(XLogSegNoOffsetToRecPtr(segno, offs as u32, wal_seg_size)),
+                    Lsn(XLogSegNoOffsetToRecPtr(
+                        segno,
+                        last_valid_rec_pos as u32,
+                        wal_seg_size
+                    ))
+                );
+                break; // zeros, reached the end
+            }
+            if skipping_first_contrecord {
+                skipping_first_contrecord = false;
+                trace!("  first contrecord has been just completed");
+            } else {
+                trace!(
+                    "  updating last_valid_rec_pos: 0x{:x} --> 0x{:x}",
+                    last_valid_rec_pos,
+                    offs
+                );
+                last_valid_rec_pos = offs;
+            }
+            offs += 4;
+            rec_offs = 4;
+            contlen = xl_tot_len - 4;
+            trace!(
+                "  reading rec_hdr[0..4] <-- [0x{:x}; 0x{:x})",
+                page_offs,
+                page_offs + 4
+            );
+            rec_hdr[0..4].copy_from_slice(&buf[page_offs..page_offs + 4]);
+        } else {
+            // we're continuing a record, possibly from previous page.
+            let page_offs = offs % XLOG_BLCKSZ;
+            let pageleft = XLOG_BLCKSZ - page_offs;
+
+            // read the rest of the record, or as much as fits on this page.
+            let n = min(contlen, pageleft);
+            trace!(
+                "offs=0x{:x}, record continuation, pageleft={}, contlen={}",
+                offs,
+                pageleft,
+                contlen
+            );
+            // fill rec_hdr header up to (but not including) xl_crc field
+            trace!(
+                "  rec_offs={}, XLOG_RECORD_CRC_OFFS={}, XLOG_SIZE_OF_XLOG_RECORD={}",
+                rec_offs,
+                XLOG_RECORD_CRC_OFFS,
+                XLOG_SIZE_OF_XLOG_RECORD
+            );
+            if rec_offs < XLOG_RECORD_CRC_OFFS {
+                let len = min(XLOG_RECORD_CRC_OFFS - rec_offs, n);
+                trace!(
+                    "  reading rec_hdr[{}..{}] <-- [0x{:x}; 0x{:x})",
+                    rec_offs,
+                    rec_offs + len,
+                    page_offs,
+                    page_offs + len
+                );
+                rec_hdr[rec_offs..rec_offs + len].copy_from_slice(&buf[page_offs..page_offs + len]);
+            }
+            if rec_offs <= XLOG_RECORD_CRC_OFFS && rec_offs + n >= XLOG_SIZE_OF_XLOG_RECORD {
+                let crc_offs = page_offs - rec_offs + XLOG_RECORD_CRC_OFFS;
+                // All records are aligned on 8-byte boundary, so their 8-byte frames
+                // cannot be split between pages. As xl_crc is the last field,
+                // its content is always on the same page.
+                const_assert!(XLOG_RECORD_CRC_OFFS % 8 == 4);
+                // We should always start reading aligned records even in incorrect WALs so if
+                // the condition is false it is likely a bug. However, it is localized somewhere
+                // in this function, hence we do not crash and just report failure instead.
+                ensure!(crc_offs % 8 == 4, "Record is not aligned properly (bug?)");
+                xl_crc = LittleEndian::read_u32(&buf[crc_offs..crc_offs + 4]);
+                trace!(
+                    "  reading xl_crc: [0x{:x}; 0x{:x}) = 0x{:x}",
+                    crc_offs,
+                    crc_offs + 4,
+                    xl_crc
+                );
+                crc = crc32c_append(0, &buf[crc_offs + 4..page_offs + n]);
+                trace!(
+                    "  initializing crc: [0x{:x}; 0x{:x}); crc = 0x{:x}",
+                    crc_offs + 4,
+                    page_offs + n,
+                    crc
+                );
+            } else if rec_offs > XLOG_RECORD_CRC_OFFS {
+                // As all records are 8-byte aligned, the header is already fully read and `crc` is initialized in the branch above.
+                ensure!(rec_offs >= XLOG_SIZE_OF_XLOG_RECORD);
+                let old_crc = crc;
+                crc = crc32c_append(crc, &buf[page_offs..page_offs + n]);
+                trace!(
+                    "  appending to crc: [0x{:x}; 0x{:x}); 0x{:x} --> 0x{:x}",
+                    page_offs,
+                    page_offs + n,
+                    old_crc,
+                    crc
+                );
+            } else {
+                // Correct because of the way conditions are written above.
+                assert!(rec_offs + n < XLOG_SIZE_OF_XLOG_RECORD);
+                // If `skipping_first_contrecord == true`, we may be reading from a middle of a record
+                // which started in the previous segment. Hence there is no point in validating the header.
+                if !skipping_first_contrecord && rec_offs + n > XLOG_RECORD_CRC_OFFS {
+                    info!(
+                        "Curiously corrupted WAL: a record stops inside the header; \
+                             offs=0x{:x}, record continuation, pageleft={}, contlen={}",
+                        offs, pageleft, contlen
+                    );
+                    break;
+                }
+                // Do nothing: we are still reading the header. It's accounted in CRC in the end of the record.
+            }
+            rec_offs += n;
+            offs += n;
+            contlen -= n;
+
+            if contlen == 0 {
+                trace!("  record completed at 0x{:x}", offs);
+                crc = crc32c_append(crc, &rec_hdr);
+                offs = (offs + 7) & !7; // pad on 8 bytes boundary */
+                trace!(
+                    "  padded offs to 0x{:x}, crc is {:x}, expected crc is {:x}",
+                    offs,
+                    crc,
+                    xl_crc
+                );
+                if skipping_first_contrecord {
+                    // do nothing, the flag will go down on next iteration when we're reading new record
+                    trace!("  first conrecord has been just completed");
+                } else if crc == xl_crc {
+                    // record is valid, advance the result to its end (with
+                    // alignment to the next record taken into account)
+                    trace!(
+                        "  updating last_valid_rec_pos: 0x{:x} --> 0x{:x}",
+                        last_valid_rec_pos,
+                        offs
+                    );
+                    last_valid_rec_pos = offs;
+                } else {
+                    info!(
+                        "CRC mismatch {} vs {} at {}",
+                        crc, xl_crc, last_valid_rec_pos
+                    );
+                    break;
                }
            }
        }
    }
+    trace!("last_valid_rec_pos=0x{:x}", last_valid_rec_pos);
+    Ok(last_valid_rec_pos as u32)
 }

-// Open .partial or full WAL segment file, if present.
-fn open_wal_segment(seg_file_path: &Path) -> anyhow::Result<Option<File>> {
-    let mut partial_path = seg_file_path.to_owned();
-    partial_path.set_extension("partial");
-    match File::open(partial_path) {
-        Ok(file) => Ok(Some(file)),
-        Err(e) => match e.kind() {
-            ErrorKind::NotFound => {
-                // .partial not found, try full
-                match File::open(seg_file_path) {
-                    Ok(file) => Ok(Some(file)),
-                    Err(e) => match e.kind() {
-                        ErrorKind::NotFound => Ok(None),
-                        _ => Err(e.into()),
-                    },
-                }
-            }
-            _ => Err(e.into()),
-        },
+///
+/// Scan a directory that contains PostgreSQL WAL files, for the end of WAL.
+/// If precise, returns end LSN (next insertion point, basically);
+/// otherwise, start of the last segment.
+/// Returns (0, 0) if there is no WAL.
+///
+pub fn find_end_of_wal(
+    data_dir: &Path,
+    wal_seg_size: usize,
+    precise: bool,
+    start_lsn: Lsn, // start reading WAL at this point or later
+) -> anyhow::Result<(XLogRecPtr, TimeLineID)> {
+    let mut high_segno: XLogSegNo = 0;
+    let mut high_tli: TimeLineID = 0;
+    let mut high_ispartial = false;
+
+    for entry in fs::read_dir(data_dir).unwrap().flatten() {
+        let ispartial: bool;
+        let entry_name = entry.file_name();
+        let fname = entry_name.to_str().unwrap();
+        /*
+         * Check if the filename looks like an xlog file, or a .partial file.
+         */
+        if IsXLogFileName(fname) {
+            ispartial = false;
+        } else if IsPartialXLogFileName(fname) {
+            ispartial = true;
+        } else {
+            continue;
+        }
+        let (segno, tli) = XLogFromFileName(fname, wal_seg_size);
+        if !ispartial && entry.metadata().unwrap().len() != wal_seg_size as u64 {
+            continue;
+        }
+        if segno > high_segno
+            || (segno == high_segno && tli > high_tli)
+            || (segno == high_segno && tli == high_tli && high_ispartial && !ispartial)
+        {
+            high_segno = segno;
+            high_tli = tli;
+            high_ispartial = ispartial;
+        }
    }
+    if high_segno > 0 {
+        let mut high_offs = 0;
+        /*
+         * Move the starting pointer to the start of the next segment, if the
+         * highest one we saw was completed.
+         */
+        if !high_ispartial {
+            high_segno += 1;
+        } else if precise {
+            /* otherwise locate last record in last partial segment */
+            if start_lsn.segment_number(wal_seg_size) > high_segno {
+                bail!(
+                    "provided start_lsn {:?} is beyond highest segno {:?} available",
+                    start_lsn,
+                    high_segno,
+                );
+            }
+            let start_offset = if start_lsn.segment_number(wal_seg_size) == high_segno {
+                start_lsn.segment_offset(wal_seg_size)
+            } else {
+                0
+            };
+            high_offs = find_end_of_wal_segment(
+                data_dir,
+                high_segno,
+                high_tli,
+                wal_seg_size,
+                start_offset,
+            )?;
+        }
+        let high_ptr = XLogSegNoOffsetToRecPtr(high_segno, high_offs, wal_seg_size);
+        return Ok((high_ptr, high_tli));
+    }
+    Ok((0, 0))
 }

 pub fn main() {
    let mut data_dir = PathBuf::new();
    data_dir.push(".");
-    let wal_end = find_end_of_wal(&data_dir, WAL_SEGMENT_SIZE, Lsn(0)).unwrap();
-    println!("wal_end={:?}", wal_end);
+    let wal_seg_size = 16 * 1024 * 1024;
+    let (wal_end, tli) = find_end_of_wal(&data_dir, wal_seg_size, true, Lsn(0)).unwrap();
+    println!(
+        "wal_end={:>08X}{:>08X}, tli={}",
+        (wal_end >> 32) as u32,
+        wal_end as u32,
+        tli
+    );
 }

 impl XLogRecord {
@@ -311,9 +561,9 @@ impl CheckPoint {
 // We need this segment to start compute node.
 //
 pub fn generate_wal_segment(segno: u64, system_id: u64) -> Result<Bytes, SerializeError> {
-    let mut seg_buf = BytesMut::with_capacity(WAL_SEGMENT_SIZE as usize);
+    let mut seg_buf = BytesMut::with_capacity(pg_constants::WAL_SEGMENT_SIZE as usize);

-    let pageaddr = XLogSegNoOffsetToRecPtr(segno, 0, WAL_SEGMENT_SIZE);
+    let pageaddr = XLogSegNoOffsetToRecPtr(segno, 0, pg_constants::WAL_SEGMENT_SIZE);
    let hdr = XLogLongPageHeaderData {
        std: {
            XLogPageHeaderData {
@@ -326,7 +576,7 @@ pub fn generate_wal_segment(segno: u64, system_id: u64) -> Result<Bytes, Seriali
            }
        },
        xlp_sysid: system_id,
-        xlp_seg_size: WAL_SEGMENT_SIZE as u32,
+        xlp_seg_size: pg_constants::WAL_SEGMENT_SIZE as u32,
        xlp_xlog_blcksz: XLOG_BLCKSZ as u32,
    };

@@ -334,182 +584,62 @@ pub fn generate_wal_segment(segno: u64, system_id: u64) -> Result<Bytes, Seriali
    seg_buf.extend_from_slice(&hdr_bytes);

    //zero out the rest of the file
-    seg_buf.resize(WAL_SEGMENT_SIZE, 0);
+    seg_buf.resize(pg_constants::WAL_SEGMENT_SIZE, 0);
    Ok(seg_buf.freeze())
 }

-#[repr(C)]
-#[derive(Serialize)]
-struct XlLogicalMessage {
-    db_id: Oid,
-    transactional: uint32, // bool, takes 4 bytes due to alignment in C structures
-    prefix_size: uint64,
-    message_size: uint64,
-}
-
-impl XlLogicalMessage {
-    pub fn encode(&self) -> Bytes {
-        use utils::bin_ser::LeSer;
-        self.ser().unwrap().into()
-    }
-}
-
-/// Create new WAL record for non-transactional logical message.
-/// Used for creating artificial WAL for tests, as LogicalMessage
-/// record is basically no-op.
-///
-/// NOTE: This leaves the xl_prev field zero. The safekeeper and
-/// pageserver tolerate that, but PostgreSQL does not.
-pub fn encode_logical_message(prefix: &str, message: &str) -> Vec<u8> {
-    let mut prefix_bytes: Vec<u8> = Vec::with_capacity(prefix.len() + 1);
-    prefix_bytes.write_all(prefix.as_bytes()).unwrap();
-    prefix_bytes.push(0);
-
-    let message_bytes = message.as_bytes();
-
-    let logical_message = XlLogicalMessage {
-        db_id: 0,
-        transactional: 0,
-        prefix_size: prefix_bytes.len() as u64,
-        message_size: message_bytes.len() as u64,
-    };
-
-    let mainrdata = logical_message.encode();
-    let mainrdata_len: usize = mainrdata.len() + prefix_bytes.len() + message_bytes.len();
-    // only short mainrdata is supported for now
-    assert!(mainrdata_len <= 255);
-    let mainrdata_len = mainrdata_len as u8;
-
-    let mut data: Vec<u8> = vec![pg_constants::XLR_BLOCK_ID_DATA_SHORT, mainrdata_len];
-    data.extend_from_slice(&mainrdata);
-    data.extend_from_slice(&prefix_bytes);
-    data.extend_from_slice(message_bytes);
-
-    let total_len = XLOG_SIZE_OF_XLOG_RECORD + data.len();
-
-    let mut header = XLogRecord {
-        xl_tot_len: total_len as u32,
-        xl_xid: 0,
-        xl_prev: 0,
-        xl_info: 0,
-        xl_rmid: 21,
-        __bindgen_padding_0: [0u8; 2usize],
-        xl_crc: 0, // crc will be calculated later
-    };
-
-    let header_bytes = header.encode().expect("failed to encode header");
-    let crc = crc32c_append(0, &data);
-    let crc = crc32c_append(crc, &header_bytes[0..XLOG_RECORD_CRC_OFFS]);
-    header.xl_crc = crc;
-
-    let mut wal: Vec<u8> = Vec::new();
-    wal.extend_from_slice(&header.encode().expect("failed to encode header"));
-    wal.extend_from_slice(&data);
-
-    // WAL start position must be aligned at 8 bytes,
-    // this will add padding for the next WAL record.
-    const PADDING: usize = 8;
-    let padding_rem = wal.len() % PADDING;
-    if padding_rem != 0 {
-        wal.resize(wal.len() + PADDING - padding_rem, 0);
-    }
-
-    wal
-}
-
 #[cfg(test)]
 mod tests {
-    use super::super::PG_MAJORVERSION;
    use super::*;
    use regex::Regex;
-    use std::cmp::min;
-    use std::fs;
    use std::{env, str::FromStr};
-    use utils::const_assert;

    fn init_logging() {
-        let _ = env_logger::Builder::from_env(env_logger::Env::default().default_filter_or(
-            format!("wal_craft=info,postgres_ffi::{PG_MAJORVERSION}::xlog_utils=trace"),
-        ))
+        let _ = env_logger::Builder::from_env(
+            env_logger::Env::default()
+                .default_filter_or("wal_craft=info,postgres_ffi::xlog_utils=trace"),
+        )
        .is_test(true)
        .try_init();
    }

-    fn test_end_of_wal<C: wal_craft::Crafter>(test_name: &str) {
+    fn test_end_of_wal<C: wal_craft::Crafter>(
+        test_name: &str,
+        expected_end_of_wal_non_partial: Lsn,
+        last_segment: &str,
+    ) {
        use wal_craft::*;
-
-        // Craft some WAL
+        // 1. Generate some WAL
        let top_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
            .join("..")
            .join("..");
        let cfg = Conf {
-            pg_distrib_dir: top_path.join(format!("pg_install/{PG_MAJORVERSION}")),
-            datadir: top_path.join(format!("test_output/{}-{PG_MAJORVERSION}", test_name)),
+            pg_distrib_dir: top_path.join("tmp_install"),
+            datadir: top_path.join(format!("test_output/{}", test_name)),
        };
        if cfg.datadir.exists() {
            fs::remove_dir_all(&cfg.datadir).unwrap();
        }
        cfg.initdb().unwrap();
        let srv = cfg.start_server().unwrap();
-        let (intermediate_lsns, expected_end_of_wal_partial) =
-            C::craft(&mut srv.connect_with_timeout().unwrap()).unwrap();
-        let intermediate_lsns: Vec<Lsn> = intermediate_lsns
-            .iter()
-            .map(|&lsn| u64::from(lsn).into())
-            .collect();
-        let expected_end_of_wal: Lsn = u64::from(expected_end_of_wal_partial).into();
+        let expected_wal_end: Lsn =
+            u64::from(C::craft(&mut srv.connect_with_timeout().unwrap()).unwrap()).into();
        srv.kill();

-        // Check find_end_of_wal on the initial WAL
-        let last_segment = cfg
-            .wal_dir()
-            .read_dir()
-            .unwrap()
-            .map(|f| f.unwrap().file_name().into_string().unwrap())
-            .filter(|fname| IsXLogFileName(fname))
-            .max()
-            .unwrap();
-        check_pg_waldump_end_of_wal(&cfg, &last_segment, expected_end_of_wal);
-        for start_lsn in intermediate_lsns
-            .iter()
-            .chain(std::iter::once(&expected_end_of_wal))
-        {
-            // Erase all WAL before `start_lsn` to ensure it's not used by `find_end_of_wal`.
-            // We assume that `start_lsn` is non-decreasing.
-            info!(
-                "Checking with start_lsn={}, erasing WAL before it",
-                start_lsn
-            );
-            for file in fs::read_dir(cfg.wal_dir()).unwrap().flatten() {
-                let fname = file.file_name().into_string().unwrap();
-                if !IsXLogFileName(&fname) {
-                    continue;
-                }
-                let (segno, _) = XLogFromFileName(&fname, WAL_SEGMENT_SIZE);
-                let seg_start_lsn = XLogSegNoOffsetToRecPtr(segno, 0, WAL_SEGMENT_SIZE);
-                if seg_start_lsn > u64::from(*start_lsn) {
-                    continue;
-                }
-                let mut f = File::options().write(true).open(file.path()).unwrap();
-                const ZEROS: [u8; WAL_SEGMENT_SIZE] = [0u8; WAL_SEGMENT_SIZE];
-                f.write_all(
-                    &ZEROS[0..min(
-                        WAL_SEGMENT_SIZE,
-                        (u64::from(*start_lsn) - seg_start_lsn) as usize,
-                    )],
-                )
-                .unwrap();
-            }
-            check_end_of_wal(&cfg, &last_segment, *start_lsn, expected_end_of_wal);
-        }
-    }
+        // 2. Pick WAL generated by initdb
+        let wal_dir = cfg.datadir.join("pg_wal");
+        let wal_seg_size = 16 * 1024 * 1024;

-    fn check_pg_waldump_end_of_wal(
-        cfg: &wal_craft::Conf,
-        last_segment: &str,
-        expected_end_of_wal: Lsn,
-    ) {
-        // Get the actual end of WAL by pg_waldump
+        // 3. Check end_of_wal on non-partial WAL segment (we treat it as fully populated)
+        let (wal_end, tli) = find_end_of_wal(&wal_dir, wal_seg_size, true, Lsn(0)).unwrap();
+        let wal_end = Lsn(wal_end);
+        info!(
+            "find_end_of_wal returned (wal_end={}, tli={})",
+            wal_end, tli
+        );
+        assert_eq!(wal_end, expected_end_of_wal_non_partial);
+
+        // 4. Get the actual end of WAL by pg_waldump
        let waldump_output = cfg
            .pg_waldump("000000010000000000000001", last_segment)
            .unwrap()
@@ -528,50 +658,33 @@ mod tests {
        let waldump_wal_end = Lsn::from_str(caps.get(1).unwrap().as_str()).unwrap();
        info!(
            "waldump erred on {}, expected wal end at {}",
-            waldump_wal_end, expected_end_of_wal
+            waldump_wal_end, expected_wal_end
        );
-        assert_eq!(waldump_wal_end, expected_end_of_wal);
-    }
+        assert_eq!(waldump_wal_end, expected_wal_end);

-    fn check_end_of_wal(
-        cfg: &wal_craft::Conf,
-        last_segment: &str,
-        start_lsn: Lsn,
-        expected_end_of_wal: Lsn,
-    ) {
-        // Check end_of_wal on non-partial WAL segment (we treat it as fully populated)
-        // let wal_end = find_end_of_wal(&cfg.wal_dir(), WAL_SEGMENT_SIZE, start_lsn).unwrap();
-        // info!(
-        //     "find_end_of_wal returned wal_end={} with non-partial WAL segment",
-        //     wal_end
-        // );
-        // assert_eq!(wal_end, expected_end_of_wal_non_partial);
-
-        // Rename file to partial to actually find last valid lsn, then rename it back.
+        // 5. Rename file to partial to actually find last valid lsn
        fs::rename(
-            cfg.wal_dir().join(&last_segment),
-            cfg.wal_dir().join(format!("{}.partial", last_segment)),
+            wal_dir.join(last_segment),
+            wal_dir.join(format!("{}.partial", last_segment)),
        )
        .unwrap();
-        let wal_end = find_end_of_wal(&cfg.wal_dir(), WAL_SEGMENT_SIZE, start_lsn).unwrap();
+        let (wal_end, tli) = find_end_of_wal(&wal_dir, wal_seg_size, true, Lsn(0)).unwrap();
+        let wal_end = Lsn(wal_end);
        info!(
-            "find_end_of_wal returned wal_end={} with partial WAL segment",
-            wal_end
+            "find_end_of_wal returned (wal_end={}, tli={})",
+            wal_end, tli
        );
-        assert_eq!(wal_end, expected_end_of_wal);
-        fs::rename(
-            cfg.wal_dir().join(format!("{}.partial", last_segment)),
-            cfg.wal_dir().join(last_segment),
-        )
-        .unwrap();
+        assert_eq!(wal_end, waldump_wal_end);
    }

-    const_assert!(WAL_SEGMENT_SIZE == 16 * 1024 * 1024);
-
    #[test]
    pub fn test_find_end_of_wal_simple() {
        init_logging();
-        test_end_of_wal::<wal_craft::Simple>("test_find_end_of_wal_simple");
+        test_end_of_wal::<wal_craft::Simple>(
+            "test_find_end_of_wal_simple",
+            "0/2000000".parse::<Lsn>().unwrap(),
+            "000000010000000000000001",
+        );
    }

    #[test]
@@ -579,14 +692,19 @@ mod tests {
        init_logging();
        test_end_of_wal::<wal_craft::WalRecordCrossingSegmentFollowedBySmallOne>(
            "test_find_end_of_wal_crossing_segment_followed_by_small_one",
+            "0/3000000".parse::<Lsn>().unwrap(),
+            "000000010000000000000002",
        );
    }

    #[test]
+    #[ignore = "not yet fixed, needs correct parsing of pre-last segments"] // TODO
    pub fn test_find_end_of_wal_last_crossing_segment() {
        init_logging();
        test_end_of_wal::<wal_craft::LastWalRecordCrossingSegment>(
            "test_find_end_of_wal_last_crossing_segment",
+            "0/3000000".parse::<Lsn>().unwrap(),
+            "000000010000000000000002",
        );
    }

@@ -619,15 +737,4 @@ mod tests {
        checkpoint.update_next_xid(1024);
        assert_eq!(checkpoint.nextXid.value, 2048);
    }
-
-    #[test]
-    pub fn test_encode_logical_message() {
-        let expected = [
-            64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 0, 0, 170, 34, 166, 227, 255,
-            38, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 112, 114,
-            101, 102, 105, 120, 0, 109, 101, 115, 115, 97, 103, 101,
-        ];
-        let actual = encode_logical_message("prefix", "message");
-        assert_eq!(expected, actual[..]);
-    }
 }
--- a/libs/postgres_ffi/wal_craft/Cargo.toml
+++ b/libs/postgres_ffi/wal_craft/Cargo.toml
@@ -10,7 +10,7 @@ anyhow = "1.0"
 clap = "3.0"
 env_logger = "0.9"
 log = "0.4"
-once_cell = "1.13.0"
+once_cell = "1.8.0"
 postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
 postgres_ffi = { path = "../" }
 tempfile = "3.2"
--- a/libs/postgres_ffi/wal_craft/src/bin/wal_craft.rs
+++ b/libs/postgres_ffi/wal_craft/src/bin/wal_craft.rs
@@ -37,7 +37,7 @@ fn main() -> Result<()> {
                    Arg::new("pg-distrib-dir")
                        .long("pg-distrib-dir")
                        .takes_value(true)
-                        .help("Directory with Postgres distribution (bin and lib directories, e.g. pg_install/v14)")
+                        .help("Directory with Postgres distribution (bin and lib directories, e.g. tmp_install)")
                        .default_value("/usr/local")
                )
        )
@@ -55,7 +55,7 @@ fn main() -> Result<()> {
        .get_matches();

    let wal_craft = |arg_matches: &ArgMatches, client| {
-        let (intermediate_lsns, end_of_wal_lsn) = match arg_matches.value_of("type").unwrap() {
+        let lsn = match arg_matches.value_of("type").unwrap() {
            Simple::NAME => Simple::craft(client)?,
            LastWalRecordXlogSwitch::NAME => LastWalRecordXlogSwitch::craft(client)?,
            LastWalRecordXlogSwitchEndsOnPageBoundary::NAME => {
@@ -67,10 +67,7 @@ fn main() -> Result<()> {
            LastWalRecordCrossingSegment::NAME => LastWalRecordCrossingSegment::craft(client)?,
            a => panic!("Unknown --type argument: {}", a),
        };
-        for lsn in intermediate_lsns {
-            println!("intermediate_lsn = {}", lsn);
-        }
-        println!("end_of_wal = {}", end_of_wal_lsn);
+        println!("end_of_wal = {}", lsn);
        Ok(())
    };

--- a/libs/postgres_ffi/wal_craft/src/lib.rs
+++ b/libs/postgres_ffi/wal_craft/src/lib.rs
@@ -4,8 +4,9 @@ use log::*;
 use once_cell::sync::Lazy;
 use postgres::types::PgLsn;
 use postgres::Client;
-use postgres_ffi::{WAL_SEGMENT_SIZE, XLOG_BLCKSZ};
-use postgres_ffi::{XLOG_SIZE_OF_XLOG_RECORD, XLOG_SIZE_OF_XLOG_SHORT_PHD};
+use postgres_ffi::xlog_utils::{
+    XLOG_BLCKSZ, XLOG_SIZE_OF_XLOG_RECORD, XLOG_SIZE_OF_XLOG_SHORT_PHD,
+};
 use std::cmp::Ordering;
 use std::fs;
 use std::path::{Path, PathBuf};
@@ -44,10 +45,6 @@ impl Conf {
        self.pg_distrib_dir.join("lib")
    }

-    pub fn wal_dir(&self) -> PathBuf {
-        self.datadir.join("pg_wal")
-    }
-
    fn new_pg_command(&self, command: impl AsRef<Path>) -> Result<Command> {
        let path = self.pg_bin_dir().join(command);
        ensure!(path.exists(), "Command {:?} does not exist", path);
@@ -214,7 +211,7 @@ pub fn ensure_server_config(client: &mut impl postgres::GenericClient) -> Result
        "Unexpected wal_segment_size unit"
    );
    ensure!(
-        wal_segment_size.get::<_, i64>("setting") == WAL_SEGMENT_SIZE as i64,
+        wal_segment_size.get::<_, i64>("setting") == 16 * 1024 * 1024,
        "Unexpected wal_segment_size in bytes"
    );

@@ -224,24 +221,20 @@ pub fn ensure_server_config(client: &mut impl postgres::GenericClient) -> Result
 pub trait Crafter {
    const NAME: &'static str;

-    /// Generates WAL using the client `client`. Returns a pair of:
-    /// * A vector of some valid "interesting" intermediate LSNs which one may start reading from.
-    ///   May include or exclude Lsn(0) and the end-of-wal.
-    /// * The expected end-of-wal LSN.
-    fn craft(client: &mut impl postgres::GenericClient) -> Result<(Vec<PgLsn>, PgLsn)>;
+    /// Generates WAL using the client `client`. Returns the expected end-of-wal LSN.
+    fn craft(client: &mut impl postgres::GenericClient) -> Result<PgLsn>;
 }

 fn craft_internal<C: postgres::GenericClient>(
    client: &mut C,
-    f: impl Fn(&mut C, PgLsn) -> Result<(Vec<PgLsn>, Option<PgLsn>)>,
-) -> Result<(Vec<PgLsn>, PgLsn)> {
+    f: impl Fn(&mut C, PgLsn) -> Result<Option<PgLsn>>,
+) -> Result<PgLsn> {
    ensure_server_config(client)?;

    let initial_lsn = client.pg_current_wal_insert_lsn()?;
    info!("LSN initial = {}", initial_lsn);

-    let (mut intermediate_lsns, last_lsn) = f(client, initial_lsn)?;
-    let last_lsn = match last_lsn {
+    let last_lsn = match f(client, initial_lsn)? {
        None => client.pg_current_wal_insert_lsn()?,
        Some(last_lsn) => match last_lsn.cmp(&client.pg_current_wal_insert_lsn()?) {
            Ordering::Less => bail!("Some records were inserted after the crafted WAL"),
@@ -249,9 +242,6 @@ fn craft_internal<C: postgres::GenericClient>(
            Ordering::Greater => bail!("Reported LSN is greater than insert_lsn"),
        },
    };
-    if !intermediate_lsns.starts_with(&[initial_lsn]) {
-        intermediate_lsns.insert(0, initial_lsn);
-    }

    // Some records may be not flushed, e.g. non-transactional logical messages.
    client.execute("select neon_xlogflush(pg_current_wal_insert_lsn())", &[])?;
@@ -260,16 +250,16 @@ fn craft_internal<C: postgres::GenericClient>(
        Ordering::Equal => {}
        Ordering::Greater => bail!("Reported LSN is greater than flush_lsn"),
    }
-    Ok((intermediate_lsns, last_lsn))
+    Ok(last_lsn)
 }

 pub struct Simple;
 impl Crafter for Simple {
    const NAME: &'static str = "simple";
-    fn craft(client: &mut impl postgres::GenericClient) -> Result<(Vec<PgLsn>, PgLsn)> {
+    fn craft(client: &mut impl postgres::GenericClient) -> Result<PgLsn> {
        craft_internal(client, |client, _| {
            client.execute("CREATE table t(x int)", &[])?;
-            Ok((Vec::new(), None))
+            Ok(None)
        })
    }
 }
@@ -277,13 +267,12 @@ impl Crafter for Simple {
 pub struct LastWalRecordXlogSwitch;
 impl Crafter for LastWalRecordXlogSwitch {
    const NAME: &'static str = "last_wal_record_xlog_switch";
-    fn craft(client: &mut impl postgres::GenericClient) -> Result<(Vec<PgLsn>, PgLsn)> {
+    fn craft(client: &mut impl postgres::GenericClient) -> Result<PgLsn> {
        // Do not use generate_internal because here we end up with flush_lsn exactly on
        // the segment boundary and insert_lsn after the initial page header, which is unusual.
        ensure_server_config(client)?;

        client.execute("CREATE table t(x int)", &[])?;
-        let before_xlog_switch = client.pg_current_wal_insert_lsn()?;
        let after_xlog_switch: PgLsn = client.query_one("SELECT pg_switch_wal()", &[])?.get(0);
        let next_segment = PgLsn::from(0x0200_0000);
        ensure!(
@@ -292,14 +281,14 @@ impl Crafter for LastWalRecordXlogSwitch {
            after_xlog_switch,
            next_segment
        );
-        Ok((vec![before_xlog_switch, after_xlog_switch], next_segment))
+        Ok(next_segment)
    }
 }

 pub struct LastWalRecordXlogSwitchEndsOnPageBoundary;
 impl Crafter for LastWalRecordXlogSwitchEndsOnPageBoundary {
    const NAME: &'static str = "last_wal_record_xlog_switch_ends_on_page_boundary";
-    fn craft(client: &mut impl postgres::GenericClient) -> Result<(Vec<PgLsn>, PgLsn)> {
+    fn craft(client: &mut impl postgres::GenericClient) -> Result<PgLsn> {
        // Do not use generate_internal because here we end up with flush_lsn exactly on
        // the segment boundary and insert_lsn after the initial page header, which is unusual.
        ensure_server_config(client)?;
@@ -345,7 +334,6 @@ impl Crafter for LastWalRecordXlogSwitchEndsOnPageBoundary {
        );

        // Emit the XLOG_SWITCH
-        let before_xlog_switch = client.pg_current_wal_insert_lsn()?;
        let after_xlog_switch: PgLsn = client.query_one("SELECT pg_switch_wal()", &[])?.get(0);
        let next_segment = PgLsn::from(0x0200_0000);
        ensure!(
@@ -359,14 +347,14 @@ impl Crafter for LastWalRecordXlogSwitchEndsOnPageBoundary {
            "XLOG_SWITCH message ended not on page boundary: {}",
            after_xlog_switch
        );
-        Ok((vec![before_xlog_switch, after_xlog_switch], next_segment))
+        Ok(next_segment)
    }
 }

 fn craft_single_logical_message(
    client: &mut impl postgres::GenericClient,
    transactional: bool,
-) -> Result<(Vec<PgLsn>, PgLsn)> {
+) -> Result<PgLsn> {
    craft_internal(client, |client, initial_lsn| {
        ensure!(
            initial_lsn < PgLsn::from(0x0200_0000 - 1024 * 1024),
@@ -398,9 +386,9 @@ fn craft_single_logical_message(
                message_lsn < after_message_lsn,
                "No record found after the emitted message"
            );
-            Ok((vec![message_lsn], Some(after_message_lsn)))
+            Ok(Some(after_message_lsn))
        } else {
-            Ok((Vec::new(), Some(message_lsn)))
+            Ok(Some(message_lsn))
        }
    })
 }
@@ -408,7 +396,7 @@ fn craft_single_logical_message(
 pub struct WalRecordCrossingSegmentFollowedBySmallOne;
 impl Crafter for WalRecordCrossingSegmentFollowedBySmallOne {
    const NAME: &'static str = "wal_record_crossing_segment_followed_by_small_one";
-    fn craft(client: &mut impl postgres::GenericClient) -> Result<(Vec<PgLsn>, PgLsn)> {
+    fn craft(client: &mut impl postgres::GenericClient) -> Result<PgLsn> {
        craft_single_logical_message(client, true)
    }
 }
@@ -416,7 +404,7 @@ impl Crafter for WalRecordCrossingSegmentFollowedBySmallOne {
 pub struct LastWalRecordCrossingSegment;
 impl Crafter for LastWalRecordCrossingSegment {
    const NAME: &'static str = "last_wal_record_crossing_segment";
-    fn craft(client: &mut impl postgres::GenericClient) -> Result<(Vec<PgLsn>, PgLsn)> {
+    fn craft(client: &mut impl postgres::GenericClient) -> Result<PgLsn> {
        craft_single_logical_message(client, false)
    }
 }
--- a/libs/remote_storage/Cargo.toml
+++ b/libs/remote_storage/Cargo.toml
@@ -7,7 +7,7 @@ edition = "2021"
 anyhow = { version = "1.0", features = ["backtrace"] }
 async-trait = "0.1"
 metrics = { version = "0.1", path = "../metrics" }
-once_cell = "1.13.0"
+once_cell = "1.8.0"
 rusoto_core = "0.48"
 rusoto_s3 = "0.48"
 serde = { version = "1.0", features = ["derive"] }
--- a/libs/remote_storage/src/lib.rs
+++ b/libs/remote_storage/src/lib.rs
@@ -12,12 +12,10 @@ use std::{
    borrow::Cow,
    collections::HashMap,
    ffi::OsStr,
-    fmt::{Debug, Display},
+    fmt::Debug,
    num::{NonZeroU32, NonZeroUsize},
-    ops::Deref,
    path::{Path, PathBuf},
    pin::Pin,
-    sync::Arc,
 };

 use anyhow::{bail, Context};
@@ -26,7 +24,10 @@ use tokio::io;
 use toml_edit::Item;
 use tracing::info;

-pub use self::{local_fs::LocalFs, s3_bucket::S3Bucket};
+pub use self::{
+    local_fs::LocalFs,
+    s3_bucket::{S3Bucket, S3ObjectKey},
+};

 /// How many different timelines can be processed simultaneously when synchronizing layers with the remote storage.
 /// During regular work, pageserver produces one layer file per timeline checkpoint, with bursts of concurrency
@@ -41,102 +42,60 @@ pub const DEFAULT_REMOTE_STORAGE_MAX_SYNC_ERRORS: u32 = 10;
 /// https://aws.amazon.com/premiumsupport/knowledge-center/s3-request-limit-avoid-throttling/
 pub const DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT: usize = 100;

-const REMOTE_STORAGE_PREFIX_SEPARATOR: char = '/';
-
-#[derive(Clone, PartialEq, Eq)]
-pub struct RemoteObjectId(String);
-
-///
-/// A key that refers to an object in remote storage. It works much like a Path,
-/// but it's a separate datatype so that you don't accidentally mix local paths
-/// and remote keys.
-///
-impl RemoteObjectId {
+pub trait RemoteObjectName {
    // Needed to retrieve last component for RemoteObjectId.
    // In other words a file name
-    /// Turn a/b/c or a/b/c/ into c
-    pub fn object_name(&self) -> Option<&str> {
-        // corner case, char::to_string is not const, thats why this is more verbose than it needs to be
-        // see https://github.com/rust-lang/rust/issues/88674
-        if self.0.len() == 1 && self.0.chars().next().unwrap() == REMOTE_STORAGE_PREFIX_SEPARATOR {
-            return None;
-        }
-
-        if self.0.ends_with(REMOTE_STORAGE_PREFIX_SEPARATOR) {
-            self.0.rsplit(REMOTE_STORAGE_PREFIX_SEPARATOR).nth(1)
-        } else {
-            self.0
-                .rsplit_once(REMOTE_STORAGE_PREFIX_SEPARATOR)
-                .map(|(_, last)| last)
-        }
-    }
-}
-
-impl Debug for RemoteObjectId {
-    fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
-        Debug::fmt(&self.0, fmt)
-    }
-}
-
-impl Display for RemoteObjectId {
-    fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        Display::fmt(&self.0, fmt)
-    }
+    fn object_name(&self) -> Option<&str>;
 }

 /// Storage (potentially remote) API to manage its state.
 /// This storage tries to be unaware of any layered repository context,
 /// providing basic CRUD operations for storage files.
 #[async_trait::async_trait]
-pub trait RemoteStorage: Send + Sync + 'static {
+pub trait RemoteStorage: Send + Sync {
+    /// A way to uniquely reference a file in the remote storage.
+    type RemoteObjectId: RemoteObjectName;
+
    /// Attempts to derive the storage path out of the local path, if the latter is correct.
-    fn remote_object_id(&self, local_path: &Path) -> anyhow::Result<RemoteObjectId>;
+    fn remote_object_id(&self, local_path: &Path) -> anyhow::Result<Self::RemoteObjectId>;

    /// Gets the download path of the given storage file.
-    fn local_path(&self, remote_object_id: &RemoteObjectId) -> anyhow::Result<PathBuf>;
+    fn local_path(&self, remote_object_id: &Self::RemoteObjectId) -> anyhow::Result<PathBuf>;

    /// Lists all items the storage has right now.
-    async fn list(&self) -> anyhow::Result<Vec<RemoteObjectId>>;
+    async fn list(&self) -> anyhow::Result<Vec<Self::RemoteObjectId>>;

    /// Lists all top level subdirectories for a given prefix
-    /// Note: here we assume that if the prefix is passed it was obtained via remote_object_id
-    /// which already takes into account any kind of global prefix (prefix_in_bucket for S3 or storage_root for LocalFS)
-    /// so this method doesnt need to.
    async fn list_prefixes(
        &self,
-        prefix: Option<&RemoteObjectId>,
-    ) -> anyhow::Result<Vec<RemoteObjectId>>;
+        prefix: Option<Self::RemoteObjectId>,
+    ) -> anyhow::Result<Vec<Self::RemoteObjectId>>;

    /// Streams the local file contents into remote into the remote storage entry.
    async fn upload(
        &self,
-        from: Box<(dyn io::AsyncRead + Unpin + Send + Sync + 'static)>,
+        from: impl io::AsyncRead + Unpin + Send + Sync + 'static,
        // S3 PUT request requires the content length to be specified,
        // otherwise it starts to fail with the concurrent connection count increasing.
        from_size_bytes: usize,
-        to: &RemoteObjectId,
+        to: &Self::RemoteObjectId,
        metadata: Option<StorageMetadata>,
    ) -> anyhow::Result<()>;

    /// Streams the remote storage entry contents into the buffered writer given, returns the filled writer.
    /// Returns the metadata, if any was stored with the file previously.
-    async fn download(&self, from: &RemoteObjectId) -> Result<Download, DownloadError>;
+    async fn download(&self, from: &Self::RemoteObjectId) -> Result<Download, DownloadError>;

    /// Streams a given byte range of the remote storage entry contents into the buffered writer given, returns the filled writer.
    /// Returns the metadata, if any was stored with the file previously.
    async fn download_byte_range(
        &self,
-        from: &RemoteObjectId,
+        from: &Self::RemoteObjectId,
        start_inclusive: u64,
        end_exclusive: Option<u64>,
    ) -> Result<Download, DownloadError>;

-    async fn delete(&self, path: &RemoteObjectId) -> anyhow::Result<()>;
-
-    /// Downcast to LocalFs implementation. For tests.
-    fn as_local(&self) -> Option<&LocalFs> {
-        None
-    }
+    async fn delete(&self, path: &Self::RemoteObjectId) -> anyhow::Result<()>;
 }

 pub struct Download {
@@ -179,91 +138,26 @@ impl std::error::Error for DownloadError {}

 /// Every storage, currently supported.
 /// Serves as a simple way to pass around the [`RemoteStorage`] without dealing with generics.
-#[derive(Clone)]
-pub struct GenericRemoteStorage(Arc<dyn RemoteStorage>);
-
-impl Deref for GenericRemoteStorage {
-    type Target = dyn RemoteStorage;
-
-    fn deref(&self) -> &Self::Target {
-        self.0.as_ref()
-    }
+pub enum GenericRemoteStorage {
+    Local(LocalFs),
+    S3(S3Bucket),
 }

 impl GenericRemoteStorage {
-    pub fn new(storage: impl RemoteStorage) -> Self {
-        Self(Arc::new(storage))
-    }
-
-    pub fn from_config(
+    pub fn new(
        working_directory: PathBuf,
        storage_config: &RemoteStorageConfig,
-    ) -> anyhow::Result<GenericRemoteStorage> {
-        Ok(match &storage_config.storage {
+    ) -> anyhow::Result<Self> {
+        match &storage_config.storage {
            RemoteStorageKind::LocalFs(root) => {
                info!("Using fs root '{}' as a remote storage", root.display());
-                GenericRemoteStorage::new(LocalFs::new(root.clone(), working_directory)?)
+                LocalFs::new(root.clone(), working_directory).map(GenericRemoteStorage::Local)
            }
            RemoteStorageKind::AwsS3(s3_config) => {
                info!("Using s3 bucket '{}' in region '{}' as a remote storage, prefix in bucket: '{:?}', bucket endpoint: '{:?}'",
-                      s3_config.bucket_name, s3_config.bucket_region, s3_config.prefix_in_bucket, s3_config.endpoint);
-                GenericRemoteStorage::new(S3Bucket::new(s3_config, working_directory)?)
+                    s3_config.bucket_name, s3_config.bucket_region, s3_config.prefix_in_bucket, s3_config.endpoint);
+                S3Bucket::new(s3_config, working_directory).map(GenericRemoteStorage::S3)
            }
-        })
-    }
-
-    /// Takes storage object contents and its size and uploads to remote storage,
-    /// mapping `from_path` to the corresponding remote object id in the storage.
-    ///
-    /// The storage object does not have to be present on the `from_path`,
-    /// this path is used for the remote object id conversion only.
-    pub async fn upload_storage_object(
-        &self,
-        from: Box<dyn tokio::io::AsyncRead + Unpin + Send + Sync + 'static>,
-        from_size_bytes: usize,
-        from_path: &Path,
-    ) -> anyhow::Result<()> {
-        let target_storage_path = self.remote_object_id(from_path).with_context(|| {
-            format!(
-                "Failed to get the storage path for source local path '{}'",
-                from_path.display()
-            )
-        })?;
-
-        self.upload(from, from_size_bytes, &target_storage_path, None)
-            .await
-            .with_context(|| {
-                format!(
-                    "Failed to upload from '{}' to storage path '{:?}'",
-                    from_path.display(),
-                    target_storage_path
-                )
-            })
-    }
-
-    /// Downloads the storage object into the `to_path` provided.
-    /// `byte_range` could be specified to dowload only a part of the file, if needed.
-    pub async fn download_storage_object(
-        &self,
-        byte_range: Option<(u64, Option<u64>)>,
-        to_path: &Path,
-    ) -> Result<Download, DownloadError> {
-        let remote_object_path = self
-            .remote_object_id(to_path)
-            .with_context(|| {
-                format!(
-                    "Failed to get the storage path for target local path '{}'",
-                    to_path.display()
-                )
-            })
-            .map_err(DownloadError::BadInput)?;
-
-        match byte_range {
-            Some((start, end)) => {
-                self.download_byte_range(&remote_object_path, start, end)
-                    .await
-            }
-            None => self.download(&remote_object_path).await,
        }
    }
 }
@@ -344,8 +238,6 @@ impl Debug for S3Config {
    }
 }

-/// Adds a suffix to the file(directory) name, either appending the suffux to the end of its extension,
-/// or if there's no extension, creates one and puts a suffix there.
 pub fn path_with_suffix_extension(original_path: impl AsRef<Path>, suffix: &str) -> PathBuf {
    let new_extension = match original_path
        .as_ref()
@@ -470,29 +362,5 @@ mod tests {
            &path_with_suffix_extension(&p, ".temp").to_string_lossy(),
            "/foo/bar.baz..temp"
        );
-        let p = PathBuf::from("/foo/bar/dir/");
-        assert_eq!(
-            &path_with_suffix_extension(&p, ".temp").to_string_lossy(),
-            "/foo/bar/dir..temp"
-        );
-    }
-
-    #[test]
-    fn object_name() {
-        let k = RemoteObjectId("a/b/c".to_owned());
-        assert_eq!(k.object_name(), Some("c"));
-
-        let k = RemoteObjectId("a/b/c/".to_owned());
-        assert_eq!(k.object_name(), Some("c"));
-
-        let k = RemoteObjectId("a/".to_owned());
-        assert_eq!(k.object_name(), Some("a"));
-
-        // XXX is it impossible to have an empty key?
-        let k = RemoteObjectId("".to_owned());
-        assert_eq!(k.object_name(), None);
-
-        let k = RemoteObjectId("/".to_owned());
-        assert_eq!(k.object_name(), None);
    }
 }
--- a/libs/remote_storage/src/local_fs.rs
+++ b/libs/remote_storage/src/local_fs.rs
@@ -5,6 +5,7 @@
 //! volume is mounted to the local FS.

 use std::{
+    borrow::Cow,
    future::Future,
    path::{Path, PathBuf},
    pin::Pin,
@@ -17,19 +18,14 @@ use tokio::{
 };
 use tracing::*;

-use crate::{path_with_suffix_extension, Download, DownloadError, RemoteObjectId};
+use crate::{path_with_suffix_extension, Download, DownloadError, RemoteObjectName};

 use super::{strip_path_prefix, RemoteStorage, StorageMetadata};

-const LOCAL_FS_TEMP_FILE_SUFFIX: &str = "___temp";
-
-/// Convert a Path in the remote storage into a RemoteObjectId
-fn remote_object_id_from_path(path: &Path) -> anyhow::Result<RemoteObjectId> {
-    Ok(RemoteObjectId(
-        path.to_str()
-            .ok_or_else(|| anyhow::anyhow!("unexpected characters found in path"))?
-            .to_string(),
-    ))
+impl RemoteObjectName for PathBuf {
+    fn object_name(&self) -> Option<&str> {
+        self.file_stem().and_then(|n| n.to_str())
+    }
 }

 pub struct LocalFs {
@@ -54,17 +50,11 @@ impl LocalFs {
        })
    }

-    ///
-    /// Get the absolute path in the local filesystem to given remote object.
-    ///
-    /// This is public so that it can be used in tests. Should not be used elsewhere.
-    ///
-    pub fn resolve_in_storage(&self, remote_object_id: &RemoteObjectId) -> anyhow::Result<PathBuf> {
-        let path = PathBuf::from(&remote_object_id.0);
+    fn resolve_in_storage(&self, path: &Path) -> anyhow::Result<PathBuf> {
        if path.is_relative() {
            Ok(self.storage_root.join(path))
        } else if path.starts_with(&self.storage_root) {
-            Ok(path)
+            Ok(path.to_path_buf())
        } else {
            bail!(
                "Path '{}' does not belong to the current storage",
@@ -102,42 +92,41 @@ impl LocalFs {

 #[async_trait::async_trait]
 impl RemoteStorage for LocalFs {
-    /// Convert a "local" path into a "remote path"
-    fn remote_object_id(&self, local_path: &Path) -> anyhow::Result<RemoteObjectId> {
-        let path = self.storage_root.join(
+    type RemoteObjectId = PathBuf;
+
+    fn remote_object_id(&self, local_path: &Path) -> anyhow::Result<Self::RemoteObjectId> {
+        Ok(self.storage_root.join(
            strip_path_prefix(&self.working_directory, local_path)
                .context("local path does not belong to this storage")?,
-        );
-        remote_object_id_from_path(&path)
+        ))
    }

-    fn local_path(&self, remote_object_id: &RemoteObjectId) -> anyhow::Result<PathBuf> {
-        let storage_path = PathBuf::from(&remote_object_id.0);
-        let relative_path = strip_path_prefix(&self.storage_root, &storage_path)
+    fn local_path(&self, storage_path: &Self::RemoteObjectId) -> anyhow::Result<PathBuf> {
+        let relative_path = strip_path_prefix(&self.storage_root, storage_path)
            .context("local path does not belong to this storage")?;
        Ok(self.working_directory.join(relative_path))
    }

-    async fn list(&self) -> anyhow::Result<Vec<RemoteObjectId>> {
+    async fn list(&self) -> anyhow::Result<Vec<Self::RemoteObjectId>> {
        get_all_files(&self.storage_root, true).await
    }

    async fn list_prefixes(
        &self,
-        prefix: Option<&RemoteObjectId>,
-    ) -> anyhow::Result<Vec<RemoteObjectId>> {
+        prefix: Option<Self::RemoteObjectId>,
+    ) -> anyhow::Result<Vec<Self::RemoteObjectId>> {
        let path = match prefix {
-            Some(prefix) => Path::new(&prefix.0),
-            None => &self.storage_root,
+            Some(prefix) => Cow::Owned(self.storage_root.join(prefix)),
+            None => Cow::Borrowed(&self.storage_root),
        };
-        get_all_files(path, false).await
+        get_all_files(path.as_ref(), false).await
    }

    async fn upload(
        &self,
-        from: Box<(dyn io::AsyncRead + Unpin + Send + Sync + 'static)>,
+        from: impl io::AsyncRead + Unpin + Send + Sync + 'static,
        from_size_bytes: usize,
-        to: &RemoteObjectId,
+        to: &Self::RemoteObjectId,
        metadata: Option<StorageMetadata>,
    ) -> anyhow::Result<()> {
        let target_file_path = self.resolve_in_storage(to)?;
@@ -145,8 +134,7 @@ impl RemoteStorage for LocalFs {
        // We need this dance with sort of durable rename (without fsyncs)
        // to prevent partial uploads. This was really hit when pageserver shutdown
        // cancelled the upload and partial file was left on the fs
-        let temp_file_path =
-            path_with_suffix_extension(&target_file_path, LOCAL_FS_TEMP_FILE_SUFFIX);
+        let temp_file_path = path_with_suffix_extension(&target_file_path, "temp");
        let mut destination = io::BufWriter::new(
            fs::OpenOptions::new()
                .write(true)
@@ -162,7 +150,8 @@ impl RemoteStorage for LocalFs {
        );

        let from_size_bytes = from_size_bytes as u64;
-        let mut buffer_to_read = from.take(from_size_bytes);
+        // Require to read 1 byte more than the expected to check later, that the stream and its size match.
+        let mut buffer_to_read = from.take(from_size_bytes + 1);

        let bytes_read = io::copy(&mut buffer_to_read, &mut destination)
            .await
@@ -173,15 +162,17 @@ impl RemoteStorage for LocalFs {
                )
            })?;

-        if bytes_read < from_size_bytes {
-            bail!("Provided stream was shorter than expected: {bytes_read} vs {from_size_bytes} bytes");
-        }
-        // Check if there is any extra data after the given size.
-        let mut from = buffer_to_read.into_inner();
-        let extra_read = from.read(&mut [1]).await?;
        ensure!(
-            extra_read == 0,
-            "Provided stream was larger than expected: expected {from_size_bytes} bytes",
+            bytes_read == from_size_bytes,
+            "Provided stream has actual size {} fthat is smaller than the given stream size {}",
+            bytes_read,
+            from_size_bytes
+        );
+
+        ensure!(
+            buffer_to_read.read(&mut [0]).await? == 0,
+            "Provided stream has bigger size than the given stream size {}",
+            from_size_bytes
        );

        destination.flush().await.with_context(|| {
@@ -219,7 +210,7 @@ impl RemoteStorage for LocalFs {
        Ok(())
    }

-    async fn download(&self, from: &RemoteObjectId) -> Result<Download, DownloadError> {
+    async fn download(&self, from: &Self::RemoteObjectId) -> Result<Download, DownloadError> {
        let file_path = self
            .resolve_in_storage(from)
            .map_err(DownloadError::BadInput)?;
@@ -253,7 +244,7 @@ impl RemoteStorage for LocalFs {

    async fn download_byte_range(
        &self,
-        from: &RemoteObjectId,
+        from: &Self::RemoteObjectId,
        start_inclusive: u64,
        end_exclusive: Option<u64>,
    ) -> Result<Download, DownloadError> {
@@ -307,7 +298,7 @@ impl RemoteStorage for LocalFs {
        }
    }

-    async fn delete(&self, path: &RemoteObjectId) -> anyhow::Result<()> {
+    async fn delete(&self, path: &Self::RemoteObjectId) -> anyhow::Result<()> {
        let file_path = self.resolve_in_storage(path)?;
        if file_path.exists() && file_path.is_file() {
            Ok(fs::remove_file(file_path).await?)
@@ -318,10 +309,6 @@ impl RemoteStorage for LocalFs {
            )
        }
    }
-
-    fn as_local(&self) -> Option<&LocalFs> {
-        Some(self)
-    }
 }

 fn storage_metadata_path(original_path: &Path) -> PathBuf {
@@ -331,7 +318,7 @@ fn storage_metadata_path(original_path: &Path) -> PathBuf {
 fn get_all_files<'a, P>(
    directory_path: P,
    recursive: bool,
-) -> Pin<Box<dyn Future<Output = anyhow::Result<Vec<RemoteObjectId>>> + Send + Sync + 'a>>
+) -> Pin<Box<dyn Future<Output = anyhow::Result<Vec<PathBuf>>> + Send + Sync + 'a>>
 where
    P: AsRef<Path> + Send + Sync + 'a,
 {
@@ -348,12 +335,12 @@ where
                        debug!("{:?} us a symlink, skipping", entry_path)
                    } else if file_type.is_dir() {
                        if recursive {
-                            paths.extend(get_all_files(&entry_path, true).await?.into_iter())
+                            paths.extend(get_all_files(entry_path, true).await?.into_iter())
                        } else {
-                            paths.push(remote_object_id_from_path(&dir_entry.path())?)
+                            paths.push(dir_entry.path())
                        }
                    } else {
-                        paths.push(remote_object_id_from_path(&dir_entry.path())?);
+                        paths.push(dir_entry.path());
                    }
                }
                Ok(paths)
@@ -415,15 +402,9 @@ mod pure_tests {
            .join("file_name");
        let expected_path = storage_root.join(local_path.strip_prefix(&workdir)?);

-        let actual_path = PathBuf::from(
-            storage
-                .remote_object_id(&local_path)
-                .expect("Matching path should map to storage path normally")
-                .0,
-        );
        assert_eq!(
            expected_path,
-            actual_path,
+            storage.remote_object_id(&local_path).expect("Matching path should map to storage path normally"),
            "File paths from workdir should be stored in local fs storage with the same path they have relative to the workdir"
        );

@@ -484,9 +465,7 @@ mod pure_tests {
        assert_eq!(
            local_path,
            storage
-                .local_path(&remote_object_id_from_path(
-                    &storage_root.join(local_path.strip_prefix(&workdir)?)
-                )?)
+                .local_path(&storage_root.join(local_path.strip_prefix(&workdir)?))
                .expect("For a valid input, valid local path should be parsed"),
            "Should be able to parse metadata out of the correctly named remote delta file"
        );
@@ -510,7 +489,8 @@ mod pure_tests {
    #[test]
    fn local_path_negatives() -> anyhow::Result<()> {
        #[track_caller]
-        fn local_path_error(storage: &LocalFs, storage_path: &RemoteObjectId) -> String {
+        #[allow(clippy::ptr_arg)] // have to use &PathBuf due to `storage.local_path` parameter requirements
+        fn local_path_error(storage: &LocalFs, storage_path: &PathBuf) -> String {
            match storage.local_path(storage_path) {
                Ok(wrong_path) => panic!(
                    "Expected local path input {:?} to cause an error, but got file path: {:?}",
@@ -527,8 +507,7 @@ mod pure_tests {
        };

        let totally_wrong_path = "wrong_wrong_wrong";
-        let error_message =
-            local_path_error(&storage, &RemoteObjectId(totally_wrong_path.to_string()));
+        let error_message = local_path_error(&storage, &PathBuf::from(totally_wrong_path));
        assert!(error_message.contains(totally_wrong_path));

        Ok(())
@@ -571,7 +550,7 @@ mod fs_tests {
        storage: &LocalFs,
        #[allow(clippy::ptr_arg)]
        // have to use &PathBuf due to `storage.local_path` parameter requirements
-        remote_storage_path: &RemoteObjectId,
+        remote_storage_path: &PathBuf,
        expected_metadata: Option<&StorageMetadata>,
    ) -> anyhow::Result<String> {
        let mut download = storage
@@ -602,20 +581,12 @@ mod fs_tests {
            "whatever_contents",
        )
        .await?;
-        let target_path = "/somewhere/else";
-        match storage
-            .upload(
-                Box::new(file),
-                size,
-                &RemoteObjectId(target_path.to_string()),
-                None,
-            )
-            .await
-        {
+        let target_path = PathBuf::from("/").join("somewhere").join("else");
+        match storage.upload(file, size, &target_path, None).await {
            Ok(()) => panic!("Should not allow storing files with wrong target path"),
            Err(e) => {
                let message = format!("{:?}", e);
-                assert!(message.contains(target_path));
+                assert!(message.contains(&target_path.display().to_string()));
                assert!(message.contains("does not belong to the current storage"));
            }
        }
@@ -638,34 +609,6 @@ mod fs_tests {
        Ok(())
    }

-    #[tokio::test]
-    async fn upload_file_negatives() -> anyhow::Result<()> {
-        let storage = create_storage()?;
-
-        let id = storage.remote_object_id(&storage.working_directory.join("dummy"))?;
-        let content = std::io::Cursor::new(b"12345");
-
-        // Check that you get an error if the size parameter doesn't match the actual
-        // size of the stream.
-        storage
-            .upload(Box::new(content.clone()), 0, &id, None)
-            .await
-            .expect_err("upload with zero size succeeded");
-        storage
-            .upload(Box::new(content.clone()), 4, &id, None)
-            .await
-            .expect_err("upload with too short size succeeded");
-        storage
-            .upload(Box::new(content.clone()), 6, &id, None)
-            .await
-            .expect_err("upload with too large size succeeded");
-
-        // Correct size is 5, this should succeed.
-        storage.upload(Box::new(content), 5, &id, None).await?;
-
-        Ok(())
-    }
-
    fn create_storage() -> anyhow::Result<LocalFs> {
        LocalFs::new(tempdir()?.path().to_owned(), tempdir()?.path().to_owned())
    }
@@ -685,8 +628,8 @@ mod fs_tests {
            "We should upload and download the same contents"
        );

-        let non_existing_path = "somewhere/else";
-        match storage.download(&RemoteObjectId(non_existing_path.to_string())).await {
+        let non_existing_path = PathBuf::from("somewhere").join("else");
+        match storage.download(&non_existing_path).await {
            Err(DownloadError::NotFound) => {} // Should get NotFound for non existing keys
            other => panic!("Should get a NotFound error when downloading non-existing storage files, but got: {other:?}"),
        }
@@ -825,7 +768,7 @@ mod fs_tests {
            Err(e) => {
                let error_string = e.to_string();
                assert!(error_string.contains("does not exist"));
-                assert!(error_string.contains(&upload_target.0));
+                assert!(error_string.contains(&upload_target.display().to_string()));
            }
        }
        Ok(())
@@ -886,19 +829,15 @@ mod fs_tests {
        storage: &LocalFs,
        name: &str,
        metadata: Option<StorageMetadata>,
-    ) -> anyhow::Result<RemoteObjectId> {
+    ) -> anyhow::Result<PathBuf> {
        let timeline_path = workdir.join("timelines").join("some_timeline");
        let relative_timeline_path = timeline_path.strip_prefix(&workdir)?;
        let storage_path = storage.storage_root.join(relative_timeline_path).join(name);
-        let remote_object_id = RemoteObjectId(storage_path.to_str().unwrap().to_string());

        let from_path = storage.working_directory.join(name);
        let (file, size) = create_file_for_upload(&from_path, &dummy_contents(name)).await?;
-
-        storage
-            .upload(Box::new(file), size, &remote_object_id, metadata)
-            .await?;
-        remote_object_id_from_path(&storage_path)
+        storage.upload(file, size, &storage_path, metadata).await?;
+        Ok(storage_path)
    }

    async fn create_file_for_upload(
@@ -923,9 +862,9 @@ mod fs_tests {
        format!("contents for {name}")
    }

-    async fn list_files_sorted(storage: &LocalFs) -> anyhow::Result<Vec<RemoteObjectId>> {
+    async fn list_files_sorted(storage: &LocalFs) -> anyhow::Result<Vec<PathBuf>> {
        let mut files = storage.list().await?;
-        files.sort_by(|a, b| a.0.cmp(&b.0));
+        files.sort();
        Ok(files)
    }
 }
--- a/libs/remote_storage/src/s3_bucket.rs
+++ b/libs/remote_storage/src/s3_bucket.rs
@@ -20,8 +20,7 @@ use tokio_util::io::ReaderStream;
 use tracing::debug;

 use crate::{
-    strip_path_prefix, Download, DownloadError, RemoteObjectId, RemoteStorage, S3Config,
-    REMOTE_STORAGE_PREFIX_SEPARATOR,
+    strip_path_prefix, Download, DownloadError, RemoteObjectName, RemoteStorage, S3Config,
 };

 use super::StorageMetadata;
@@ -91,26 +90,52 @@ pub(super) mod metrics {
    }
 }

-fn download_destination(
-    id: &RemoteObjectId,
-    workdir: &Path,
-    prefix_to_strip: Option<&str>,
-) -> PathBuf {
-    let path_without_prefix = match prefix_to_strip {
-        Some(prefix) => id.0.strip_prefix(prefix).unwrap_or_else(|| {
-            panic!(
-                "Could not strip prefix '{}' from S3 object key '{}'",
-                prefix, id.0
-            )
-        }),
-        None => &id.0,
-    };
+const S3_PREFIX_SEPARATOR: char = '/';

-    workdir.join(
-        path_without_prefix
-            .split(REMOTE_STORAGE_PREFIX_SEPARATOR)
-            .collect::<PathBuf>(),
-    )
+#[derive(Debug, Eq, PartialEq, PartialOrd, Ord, Hash)]
+pub struct S3ObjectKey(String);
+
+impl S3ObjectKey {
+    fn key(&self) -> &str {
+        &self.0
+    }
+
+    fn download_destination(&self, workdir: &Path, prefix_to_strip: Option<&str>) -> PathBuf {
+        let path_without_prefix = match prefix_to_strip {
+            Some(prefix) => self.0.strip_prefix(prefix).unwrap_or_else(|| {
+                panic!(
+                    "Could not strip prefix '{}' from S3 object key '{}'",
+                    prefix, self.0
+                )
+            }),
+            None => &self.0,
+        };
+
+        workdir.join(
+            path_without_prefix
+                .split(S3_PREFIX_SEPARATOR)
+                .collect::<PathBuf>(),
+        )
+    }
+}
+
+impl RemoteObjectName for S3ObjectKey {
+    /// Turn a/b/c or a/b/c/ into c
+    fn object_name(&self) -> Option<&str> {
+        // corner case, char::to_string is not const, thats why this is more verbose than it needs to be
+        // see https://github.com/rust-lang/rust/issues/88674
+        if self.0.len() == 1 && self.0.chars().next().unwrap() == S3_PREFIX_SEPARATOR {
+            return None;
+        }
+
+        if self.0.ends_with(S3_PREFIX_SEPARATOR) {
+            self.0.rsplit(S3_PREFIX_SEPARATOR).nth(1)
+        } else {
+            self.0
+                .rsplit_once(S3_PREFIX_SEPARATOR)
+                .map(|(_, last)| last)
+        }
+    }
 }

 /// AWS S3 storage.
@@ -146,25 +171,17 @@ impl S3Bucket {

        let access_key_id = std::env::var("AWS_ACCESS_KEY_ID").ok();
        let secret_access_key = std::env::var("AWS_SECRET_ACCESS_KEY").ok();
-        // session token is used when authorizing through sso
-        // which is typically the case when testing locally on developer machine
-        let session_token = std::env::var("AWS_SESSION_TOKEN").ok();

        let client = if access_key_id.is_none() && secret_access_key.is_none() {
            debug!("Using IAM-based AWS access");
            S3Client::new_with(request_dispatcher, InstanceMetadataProvider::new(), region)
        } else {
-            debug!(
-                "Using credentials-based AWS access. Session token is set: {}",
-                session_token.is_some()
-            );
+            debug!("Using credentials-based AWS access");
            S3Client::new_with(
                request_dispatcher,
-                StaticProvider::new(
+                StaticProvider::new_minimal(
                    access_key_id.unwrap_or_default(),
                    secret_access_key.unwrap_or_default(),
-                    session_token,
-                    None,
                ),
                region,
            )
@@ -172,12 +189,12 @@ impl S3Bucket {

        let prefix_in_bucket = aws_config.prefix_in_bucket.as_deref().map(|prefix| {
            let mut prefix = prefix;
-            while prefix.starts_with(REMOTE_STORAGE_PREFIX_SEPARATOR) {
+            while prefix.starts_with(S3_PREFIX_SEPARATOR) {
                prefix = &prefix[1..]
            }

            let mut prefix = prefix.to_string();
-            while prefix.ends_with(REMOTE_STORAGE_PREFIX_SEPARATOR) {
+            while prefix.ends_with(S3_PREFIX_SEPARATOR) {
                prefix.pop();
            }
            prefix
@@ -228,25 +245,23 @@ impl S3Bucket {

 #[async_trait::async_trait]
 impl RemoteStorage for S3Bucket {
-    fn remote_object_id(&self, local_path: &Path) -> anyhow::Result<RemoteObjectId> {
+    type RemoteObjectId = S3ObjectKey;
+
+    fn remote_object_id(&self, local_path: &Path) -> anyhow::Result<Self::RemoteObjectId> {
        let relative_path = strip_path_prefix(&self.workdir, local_path)?;
        let mut key = self.prefix_in_bucket.clone().unwrap_or_default();
        for segment in relative_path {
-            key.push(REMOTE_STORAGE_PREFIX_SEPARATOR);
+            key.push(S3_PREFIX_SEPARATOR);
            key.push_str(&segment.to_string_lossy());
        }
-        Ok(RemoteObjectId(key))
+        Ok(S3ObjectKey(key))
    }

-    fn local_path(&self, storage_path: &RemoteObjectId) -> anyhow::Result<PathBuf> {
-        Ok(download_destination(
-            storage_path,
-            &self.workdir,
-            self.prefix_in_bucket.as_deref(),
-        ))
+    fn local_path(&self, storage_path: &Self::RemoteObjectId) -> anyhow::Result<PathBuf> {
+        Ok(storage_path.download_destination(&self.workdir, self.prefix_in_bucket.as_deref()))
    }

-    async fn list(&self) -> anyhow::Result<Vec<RemoteObjectId>> {
+    async fn list(&self) -> anyhow::Result<Vec<Self::RemoteObjectId>> {
        let mut document_keys = Vec::new();

        let mut continuation_token = None;
@@ -277,7 +292,7 @@ impl RemoteStorage for S3Bucket {
                    .contents
                    .unwrap_or_default()
                    .into_iter()
-                    .filter_map(|o| Some(RemoteObjectId(o.key?))),
+                    .filter_map(|o| Some(S3ObjectKey(o.key?))),
            );

            match fetch_response.continuation_token {
@@ -289,24 +304,32 @@ impl RemoteStorage for S3Bucket {
        Ok(document_keys)
    }

-    /// See the doc for `RemoteStorage::list_prefixes`
    /// Note: it wont include empty "directories"
    async fn list_prefixes(
        &self,
-        prefix: Option<&RemoteObjectId>,
-    ) -> anyhow::Result<Vec<RemoteObjectId>> {
-        // get the passed prefix or if it is not set use prefix_in_bucket value
-        let list_prefix = prefix
-            .map(|p| p.0.clone())
-            .or_else(|| self.prefix_in_bucket.clone())
-            .map(|mut p| {
+        prefix: Option<Self::RemoteObjectId>,
+    ) -> anyhow::Result<Vec<Self::RemoteObjectId>> {
+        let list_prefix = match prefix {
+            Some(prefix) => {
+                let mut prefix_in_bucket = self.prefix_in_bucket.clone().unwrap_or_default();
+                // if there is no trailing / in default prefix and
+                // supplied prefix does not start with "/" insert it
+                if !(prefix_in_bucket.ends_with(S3_PREFIX_SEPARATOR)
+                    || prefix.0.starts_with(S3_PREFIX_SEPARATOR))
+                {
+                    prefix_in_bucket.push(S3_PREFIX_SEPARATOR);
+                }
+
+                prefix_in_bucket.push_str(&prefix.0);
                // required to end with a separator
                // otherwise request will return only the entry of a prefix
-                if !p.ends_with(REMOTE_STORAGE_PREFIX_SEPARATOR) {
-                    p.push(REMOTE_STORAGE_PREFIX_SEPARATOR);
+                if !prefix_in_bucket.ends_with(S3_PREFIX_SEPARATOR) {
+                    prefix_in_bucket.push(S3_PREFIX_SEPARATOR);
                }
-                p
-            });
+                Some(prefix_in_bucket)
+            }
+            None => self.prefix_in_bucket.clone(),
+        };

        let mut document_keys = Vec::new();

@@ -326,7 +349,7 @@ impl RemoteStorage for S3Bucket {
                    bucket: self.bucket_name.clone(),
                    prefix: list_prefix.clone(),
                    continuation_token,
-                    delimiter: Some(REMOTE_STORAGE_PREFIX_SEPARATOR.to_string()),
+                    delimiter: Some(S3_PREFIX_SEPARATOR.to_string()),
                    ..ListObjectsV2Request::default()
                })
                .await
@@ -340,7 +363,7 @@ impl RemoteStorage for S3Bucket {
                    .common_prefixes
                    .unwrap_or_default()
                    .into_iter()
-                    .filter_map(|o| Some(RemoteObjectId(o.prefix?))),
+                    .filter_map(|o| Some(S3ObjectKey(o.prefix?))),
            );

            match fetch_response.continuation_token {
@@ -354,9 +377,9 @@ impl RemoteStorage for S3Bucket {

    async fn upload(
        &self,
-        from: Box<(dyn io::AsyncRead + Unpin + Send + Sync + 'static)>,
+        from: impl io::AsyncRead + Unpin + Send + Sync + 'static,
        from_size_bytes: usize,
-        to: &RemoteObjectId,
+        to: &Self::RemoteObjectId,
        metadata: Option<StorageMetadata>,
    ) -> anyhow::Result<()> {
        let _guard = self
@@ -373,7 +396,7 @@ impl RemoteStorage for S3Bucket {
                    from_size_bytes,
                )),
                bucket: self.bucket_name.clone(),
-                key: to.0.to_owned(),
+                key: to.key().to_owned(),
                metadata: metadata.map(|m| m.0),
                ..PutObjectRequest::default()
            })
@@ -385,10 +408,10 @@ impl RemoteStorage for S3Bucket {
        Ok(())
    }

-    async fn download(&self, from: &RemoteObjectId) -> Result<Download, DownloadError> {
+    async fn download(&self, from: &Self::RemoteObjectId) -> Result<Download, DownloadError> {
        self.download_object(GetObjectRequest {
            bucket: self.bucket_name.clone(),
-            key: from.0.to_owned(),
+            key: from.key().to_owned(),
            ..GetObjectRequest::default()
        })
        .await
@@ -396,7 +419,7 @@ impl RemoteStorage for S3Bucket {

    async fn download_byte_range(
        &self,
-        from: &RemoteObjectId,
+        from: &Self::RemoteObjectId,
        start_inclusive: u64,
        end_exclusive: Option<u64>,
    ) -> Result<Download, DownloadError> {
@@ -410,14 +433,14 @@ impl RemoteStorage for S3Bucket {

        self.download_object(GetObjectRequest {
            bucket: self.bucket_name.clone(),
-            key: from.0.to_owned(),
+            key: from.key().to_owned(),
            range,
            ..GetObjectRequest::default()
        })
        .await
    }

-    async fn delete(&self, remote_object_id: &RemoteObjectId) -> anyhow::Result<()> {
+    async fn delete(&self, path: &Self::RemoteObjectId) -> anyhow::Result<()> {
        let _guard = self
            .concurrency_limiter
            .acquire()
@@ -429,7 +452,7 @@ impl RemoteStorage for S3Bucket {
        self.client
            .delete_object(DeleteObjectRequest {
                bucket: self.bucket_name.clone(),
-                key: remote_object_id.0.to_owned(),
+                key: path.key().to_owned(),
                ..DeleteObjectRequest::default()
            })
            .await
@@ -448,24 +471,43 @@ mod tests {
    use super::*;

    #[test]
-    fn test_download_destination() -> anyhow::Result<()> {
+    fn object_name() {
+        let k = S3ObjectKey("a/b/c".to_owned());
+        assert_eq!(k.object_name(), Some("c"));
+
+        let k = S3ObjectKey("a/b/c/".to_owned());
+        assert_eq!(k.object_name(), Some("c"));
+
+        let k = S3ObjectKey("a/".to_owned());
+        assert_eq!(k.object_name(), Some("a"));
+
+        // XXX is it impossible to have an empty key?
+        let k = S3ObjectKey("".to_owned());
+        assert_eq!(k.object_name(), None);
+
+        let k = S3ObjectKey("/".to_owned());
+        assert_eq!(k.object_name(), None);
+    }
+
+    #[test]
+    fn download_destination() -> anyhow::Result<()> {
        let workdir = tempdir()?.path().to_owned();
        let local_path = workdir.join("one").join("two").join("test_name");
        let relative_path = local_path.strip_prefix(&workdir)?;

-        let key = RemoteObjectId(format!(
+        let key = S3ObjectKey(format!(
            "{}{}",
-            REMOTE_STORAGE_PREFIX_SEPARATOR,
+            S3_PREFIX_SEPARATOR,
            relative_path
                .iter()
                .map(|segment| segment.to_str().unwrap())
                .collect::<Vec<_>>()
-                .join(&REMOTE_STORAGE_PREFIX_SEPARATOR.to_string()),
+                .join(&S3_PREFIX_SEPARATOR.to_string()),
        ));

        assert_eq!(
            local_path,
-            download_destination(&key, &workdir, None),
+            key.download_destination(&workdir, None),
            "Download destination should consist of s3 path joined with the workdir prefix"
        );

@@ -482,8 +524,8 @@ mod tests {

        let storage = dummy_storage(workdir);

-        let expected_key = RemoteObjectId(format!(
-            "{}{REMOTE_STORAGE_PREFIX_SEPARATOR}{segment_1}{REMOTE_STORAGE_PREFIX_SEPARATOR}{segment_2}",
+        let expected_key = S3ObjectKey(format!(
+            "{}{S3_PREFIX_SEPARATOR}{segment_1}{S3_PREFIX_SEPARATOR}{segment_2}",
            storage.prefix_in_bucket.as_deref().unwrap_or_default(),
        ));

@@ -554,7 +596,7 @@ mod tests {
            storage.prefix_in_bucket.as_deref(),
        );
        assert_eq!(
-            download_destination(&s3_key, &workdir, storage.prefix_in_bucket.as_deref()),
+            s3_key.download_destination(&workdir, storage.prefix_in_bucket.as_deref()),
            storage
                .local_path(&s3_key)
                .expect("For a valid input, valid S3 info should be parsed"),
@@ -566,7 +608,7 @@ mod tests {
            storage.prefix_in_bucket.as_deref(),
        );
        assert_eq!(
-            download_destination(&s3_key, &workdir, storage.prefix_in_bucket.as_deref()),
+            s3_key.download_destination(&workdir, storage.prefix_in_bucket.as_deref()),
            storage
                .local_path(&s3_key)
                .expect("For a valid input, valid S3 info should be parsed"),
@@ -607,11 +649,11 @@ mod tests {
        }
    }

-    fn create_s3_key(relative_file_path: &Path, prefix: Option<&str>) -> RemoteObjectId {
-        RemoteObjectId(relative_file_path.iter().fold(
+    fn create_s3_key(relative_file_path: &Path, prefix: Option<&str>) -> S3ObjectKey {
+        S3ObjectKey(relative_file_path.iter().fold(
            prefix.unwrap_or_default().to_string(),
            |mut path_string, segment| {
-                path_string.push(REMOTE_STORAGE_PREFIX_SEPARATOR);
+                path_string.push(S3_PREFIX_SEPARATOR);
                path_string.push_str(segment.to_str().unwrap());
                path_string
            },
--- a/libs/utils/Cargo.toml
+++ b/libs/utils/Cargo.toml
@@ -4,11 +4,11 @@ version = "0.1.0"
 edition = "2021"

 [dependencies]
-async-trait = "0.1"
 anyhow = "1.0"
 bincode = "1.3"
 bytes = "1.0.1"
 hyper = { version = "0.14.7", features = ["full"] }
+lazy_static = "1.4.0"
 pin-project-lite = "0.2.7"
 postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
 postgres-protocol = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
@@ -17,7 +17,6 @@ serde = { version = "1.0", features = ["derive"] }
 serde_json = "1"
 thiserror = "1.0"
 tokio = { version = "1.17", features = ["macros"]}
-tokio-rustls = "0.23"
 tracing = "0.1"
 tracing-subscriber = { version = "0.3", features = ["env-filter"] }
 nix = "0.23.0"
@@ -29,8 +28,6 @@ rustls = "0.20.2"
 rustls-split = "0.3.0"
 git-version = "0.3.5"
 serde_with = "1.12.0"
-once_cell = "1.13.0"
-

 metrics = { path = "../metrics" }
 workspace_hack = { version = "0.1", path = "../../workspace_hack" }
@@ -41,7 +38,7 @@ bytes = "1.0.1"
 hex-literal = "0.3"
 tempfile = "3.2"
 criterion = "0.3"
-rustls-pemfile = "1"
+rustls-pemfile = "0.2.1"

 [[bench]]
 name = "benchmarks"
--- a/libs/utils/src/bin_ser.rs
+++ b/libs/utils/src/bin_ser.rs
@@ -265,7 +265,7 @@ mod tests {
    use serde::{Deserialize, Serialize};
    use std::io::Cursor;

-    #[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
+    #[derive(Debug, PartialEq, Serialize, Deserialize)]
    pub struct ShortStruct {
        a: u8,
        b: u32,
@@ -286,7 +286,7 @@ mod tests {
    const SHORT2_ENC_LE: &[u8] = &[8, 0, 0, 3, 7];
    const SHORT2_ENC_LE_TRAILING: &[u8] = &[8, 0, 0, 3, 7, 0xff, 0xff, 0xff];

-    #[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
+    #[derive(Debug, PartialEq, Serialize, Deserialize)]
    pub struct LongMsg {
        pub tag: u8,
        pub blockpos: u32,
--- a/libs/utils/src/http/endpoint.rs
+++ b/libs/utils/src/http/endpoint.rs
@@ -4,8 +4,8 @@ use crate::zid::ZTenantId;
 use anyhow::anyhow;
 use hyper::header::AUTHORIZATION;
 use hyper::{header::CONTENT_TYPE, Body, Request, Response, Server};
+use lazy_static::lazy_static;
 use metrics::{register_int_counter, Encoder, IntCounter, TextEncoder};
-use once_cell::sync::Lazy;
 use routerify::ext::RequestExt;
 use routerify::RequestInfo;
 use routerify::{Middleware, Router, RouterBuilder, RouterService};
@@ -16,13 +16,13 @@ use std::net::TcpListener;

 use super::error::ApiError;

-static SERVE_METRICS_COUNT: Lazy<IntCounter> = Lazy::new(|| {
-    register_int_counter!(
+lazy_static! {
+    static ref SERVE_METRICS_COUNT: IntCounter = register_int_counter!(
        "libmetrics_metric_handler_requests_total",
        "Number of metric requests made"
    )
-    .expect("failed to define a metric")
-});
+    .expect("failed to define a metric");
+}

 async fn logger(res: Response<Body>, info: RequestInfo) -> Result<Response<Body>, ApiError> {
    info!("{} {} {}", info.method(), info.uri().path(), res.status(),);
--- a/libs/utils/src/http/request.rs
+++ b/libs/utils/src/http/request.rs
@@ -10,10 +10,12 @@ pub fn get_request_param<'a>(
 ) -> Result<&'a str, ApiError> {
    match request.param(param_name) {
        Some(arg) => Ok(arg),
-        None => Err(ApiError::BadRequest(format!(
-            "no {} specified in path param",
-            param_name
-        ))),
+        None => {
+            return Err(ApiError::BadRequest(format!(
+                "no {} specified in path param",
+                param_name
+            )))
+        }
    }
 }

--- a/libs/utils/src/lib.rs
+++ b/libs/utils/src/lib.rs
@@ -8,15 +8,14 @@ pub mod lsn;
 /// SeqWait allows waiting for a future sequence number to arrive
 pub mod seqwait;

-/// A simple Read-Copy-Update implementation.
-pub mod simple_rcu;
-
 /// append only ordered map implemented with a Vec
 pub mod vec_map;

+// Async version of SeqWait. Currently unused.
+// pub mod seqwait_async;
+
 pub mod bin_ser;
 pub mod postgres_backend;
-pub mod postgres_backend_async;
 pub mod pq_proto;

 // dealing with connstring parsing and handy access to it's parts
--- a/libs/utils/src/lsn.rs
+++ b/libs/utils/src/lsn.rs
@@ -18,7 +18,7 @@ pub const XLOG_BLCKSZ: u32 = 8192;
 pub struct Lsn(pub u64);

 /// We tried to parse an LSN from a string, but failed
-#[derive(Debug, PartialEq, Eq, thiserror::Error)]
+#[derive(Debug, PartialEq, thiserror::Error)]
 #[error("LsnParseError")]
 pub struct LsnParseError;

--- a/libs/utils/src/postgres_backend.rs
+++ b/libs/utils/src/postgres_backend.rs
@@ -50,7 +50,7 @@ pub trait Handler {

 /// PostgresBackend protocol state.
 /// XXX: The order of the constructors matters.
-#[derive(Clone, Copy, PartialEq, Eq, PartialOrd)]
+#[derive(Clone, Copy, PartialEq, PartialOrd)]
 pub enum ProtoState {
    Initialization,
    Encrypted,
@@ -163,9 +163,14 @@ pub fn is_socket_read_timed_out(error: &anyhow::Error) -> bool {
    false
 }

-// Cast a byte slice to a string slice, dropping null terminator if there's one.
-fn cstr_to_str(bytes: &[u8]) -> Result<&str> {
-    let without_null = bytes.strip_suffix(&[0]).unwrap_or(bytes);
+// Truncate 0 from C string in Bytes and stringify it (returns slice, no allocations)
+// PG protocol strings are always C strings.
+fn cstr_to_str(b: &Bytes) -> Result<&str> {
+    let without_null = if b.last() == Some(&0) {
+        &b[..b.len() - 1]
+    } else {
+        &b[..]
+    };
    std::str::from_utf8(without_null).map_err(|e| e.into())
 }

@@ -418,9 +423,9 @@ impl PostgresBackend {
                self.state = ProtoState::Established;
            }

-            FeMessage::Query(body) => {
+            FeMessage::Query(m) => {
                // remove null terminator
-                let query_string = cstr_to_str(&body)?;
+                let query_string = cstr_to_str(&m.body)?;

                trace!("got query {:?}", query_string);
                // xxx distinguish fatal and recoverable errors?
--- a/libs/utils/src/postgres_backend_async.rs
+++ b/libs/utils/src/postgres_backend_async.rs
@@ -1,485 +0,0 @@
-//! Server-side asynchronous Postgres connection, as limited as we need.
-//! To use, create PostgresBackend and run() it, passing the Handler
-//! implementation determining how to process the queries. Currently its API
-//! is rather narrow, but we can extend it once required.
-
-use crate::postgres_backend::AuthType;
-use crate::pq_proto::{BeMessage, BeParameterStatusMessage, FeMessage, FeStartupPacket};
-use anyhow::{bail, Context, Result};
-use bytes::{Bytes, BytesMut};
-use rand::Rng;
-use std::future::Future;
-use std::net::SocketAddr;
-use std::pin::Pin;
-use std::sync::Arc;
-use std::task::Poll;
-use tracing::{debug, error, trace};
-
-use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt};
-use tokio_rustls::TlsAcceptor;
-
-#[async_trait::async_trait]
-pub trait Handler {
-    /// Handle single query.
-    /// postgres_backend will issue ReadyForQuery after calling this (this
-    /// might be not what we want after CopyData streaming, but currently we don't
-    /// care).
-    async fn process_query(&mut self, pgb: &mut PostgresBackend, query_string: &str) -> Result<()>;
-
-    /// Called on startup packet receival, allows to process params.
-    ///
-    /// If Ok(false) is returned postgres_backend will skip auth -- that is needed for new users
-    /// creation is the proxy code. That is quite hacky and ad-hoc solution, may be we could allow
-    /// to override whole init logic in implementations.
-    fn startup(&mut self, _pgb: &mut PostgresBackend, _sm: &FeStartupPacket) -> Result<()> {
-        Ok(())
-    }
-
-    /// Check auth md5
-    fn check_auth_md5(&mut self, _pgb: &mut PostgresBackend, _md5_response: &[u8]) -> Result<()> {
-        bail!("MD5 auth failed")
-    }
-
-    /// Check auth jwt
-    fn check_auth_jwt(&mut self, _pgb: &mut PostgresBackend, _jwt_response: &[u8]) -> Result<()> {
-        bail!("JWT auth failed")
-    }
-}
-
-/// PostgresBackend protocol state.
-/// XXX: The order of the constructors matters.
-#[derive(Clone, Copy, PartialEq, Eq, PartialOrd)]
-pub enum ProtoState {
-    Initialization,
-    Encrypted,
-    Authentication,
-    Established,
-    Closed,
-}
-
-#[derive(Clone, Copy)]
-pub enum ProcessMsgResult {
-    Continue,
-    Break,
-}
-
-/// Always-writeable sock_split stream.
-/// May not be readable. See [`PostgresBackend::take_stream_in`]
-pub enum Stream {
-    Unencrypted(tokio::net::TcpStream),
-    Tls(Box<tokio_rustls::server::TlsStream<tokio::net::TcpStream>>),
-    Broken,
-}
-
-impl AsyncWrite for Stream {
-    fn poll_write(
-        self: Pin<&mut Self>,
-        cx: &mut std::task::Context<'_>,
-        buf: &[u8],
-    ) -> Poll<Result<usize, std::io::Error>> {
-        match self.get_mut() {
-            Self::Unencrypted(stream) => Pin::new(stream).poll_write(cx, buf),
-            Self::Tls(stream) => Pin::new(stream).poll_write(cx, buf),
-            Self::Broken => unreachable!(),
-        }
-    }
-    fn poll_flush(
-        self: Pin<&mut Self>,
-        cx: &mut std::task::Context<'_>,
-    ) -> Poll<Result<(), std::io::Error>> {
-        match self.get_mut() {
-            Self::Unencrypted(stream) => Pin::new(stream).poll_flush(cx),
-            Self::Tls(stream) => Pin::new(stream).poll_flush(cx),
-            Self::Broken => unreachable!(),
-        }
-    }
-    fn poll_shutdown(
-        self: Pin<&mut Self>,
-        cx: &mut std::task::Context<'_>,
-    ) -> Poll<Result<(), std::io::Error>> {
-        match self.get_mut() {
-            Self::Unencrypted(stream) => Pin::new(stream).poll_shutdown(cx),
-            Self::Tls(stream) => Pin::new(stream).poll_shutdown(cx),
-            Self::Broken => unreachable!(),
-        }
-    }
-}
-impl AsyncRead for Stream {
-    fn poll_read(
-        self: Pin<&mut Self>,
-        cx: &mut std::task::Context<'_>,
-        buf: &mut tokio::io::ReadBuf<'_>,
-    ) -> Poll<Result<(), std::io::Error>> {
-        match self.get_mut() {
-            Self::Unencrypted(stream) => Pin::new(stream).poll_read(cx, buf),
-            Self::Tls(stream) => Pin::new(stream).poll_read(cx, buf),
-            Self::Broken => unreachable!(),
-        }
-    }
-}
-
-pub struct PostgresBackend {
-    stream: Stream,
-    // Output buffer. c.f. BeMessage::write why we are using BytesMut here.
-    buf_out: BytesMut,
-
-    pub state: ProtoState,
-
-    md5_salt: [u8; 4],
-    auth_type: AuthType,
-
-    peer_addr: SocketAddr,
-    pub tls_config: Option<Arc<rustls::ServerConfig>>,
-}
-
-pub fn query_from_cstring(query_string: Bytes) -> Vec<u8> {
-    let mut query_string = query_string.to_vec();
-    if let Some(ch) = query_string.last() {
-        if *ch == 0 {
-            query_string.pop();
-        }
-    }
-    query_string
-}
-
-// Cast a byte slice to a string slice, dropping null terminator if there's one.
-fn cstr_to_str(bytes: &[u8]) -> Result<&str> {
-    let without_null = bytes.strip_suffix(&[0]).unwrap_or(bytes);
-    std::str::from_utf8(without_null).map_err(|e| e.into())
-}
-
-impl PostgresBackend {
-    pub fn new(
-        socket: tokio::net::TcpStream,
-        auth_type: AuthType,
-        tls_config: Option<Arc<rustls::ServerConfig>>,
-    ) -> std::io::Result<Self> {
-        let peer_addr = socket.peer_addr()?;
-
-        Ok(Self {
-            stream: Stream::Unencrypted(socket),
-            buf_out: BytesMut::with_capacity(10 * 1024),
-            state: ProtoState::Initialization,
-            md5_salt: [0u8; 4],
-            auth_type,
-            tls_config,
-            peer_addr,
-        })
-    }
-
-    pub fn get_peer_addr(&self) -> &SocketAddr {
-        &self.peer_addr
-    }
-
-    /// Read full message or return None if connection is closed.
-    pub async fn read_message(&mut self) -> Result<Option<FeMessage>> {
-        use ProtoState::*;
-        match self.state {
-            Initialization | Encrypted => FeStartupPacket::read_fut(&mut self.stream).await,
-            Authentication | Established => FeMessage::read_fut(&mut self.stream).await,
-            Closed => Ok(None),
-        }
-    }
-
-    /// Flush output buffer into the socket.
-    pub async fn flush(&mut self) -> std::io::Result<&mut Self> {
-        self.stream.write_all(&self.buf_out).await?;
-        self.buf_out.clear();
-        Ok(self)
-    }
-
-    /// Write message into internal output buffer.
-    pub fn write_message(&mut self, message: &BeMessage<'_>) -> Result<&mut Self, std::io::Error> {
-        BeMessage::write(&mut self.buf_out, message)?;
-        Ok(self)
-    }
-
-    // Wrapper for run_message_loop() that shuts down socket when we are done
-    pub async fn run<F, S>(mut self, handler: &mut impl Handler, shutdown_watcher: F) -> Result<()>
-    where
-        F: Fn() -> S,
-        S: Future,
-    {
-        let ret = self.run_message_loop(handler, shutdown_watcher).await;
-        let _ = self.stream.shutdown();
-        ret
-    }
-
-    async fn run_message_loop<F, S>(
-        &mut self,
-        handler: &mut impl Handler,
-        shutdown_watcher: F,
-    ) -> Result<()>
-    where
-        F: Fn() -> S,
-        S: Future,
-    {
-        trace!("postgres backend to {:?} started", self.peer_addr);
-
-        tokio::select!(
-            biased;
-
-            _ = shutdown_watcher() => {
-                // We were requested to shut down.
-                tracing::info!("shutdown request received during handshake");
-                return Ok(())
-            },
-
-            result = async {
-                while self.state < ProtoState::Established {
-                    if let Some(msg) = self.read_message().await? {
-                        trace!("got message {msg:?} during handshake");
-
-                        match self.process_handshake_message(handler, msg).await? {
-                            ProcessMsgResult::Continue => {
-                                self.flush().await?;
-                                continue;
-                            }
-                            ProcessMsgResult::Break => {
-                                trace!("postgres backend to {:?} exited during handshake", self.peer_addr);
-                                return Ok(());
-                            }
-                        }
-                    } else {
-                        trace!("postgres backend to {:?} exited during handshake", self.peer_addr);
-                        return Ok(());
-                    }
-                }
-                Ok::<(), anyhow::Error>(())
-            } => {
-                // Handshake complete.
-                result?;
-            }
-        );
-
-        // Authentication completed
-        let mut query_string = Bytes::new();
-        while let Some(msg) = tokio::select!(
-            biased;
-            _ = shutdown_watcher() => {
-                // We were requested to shut down.
-                tracing::info!("shutdown request received in run_message_loop");
-                Ok(None)
-            },
-            msg = self.read_message() => { msg },
-        )? {
-            trace!("got message {:?}", msg);
-
-            let result = self.process_message(handler, msg, &mut query_string).await;
-            self.flush().await?;
-            match result? {
-                ProcessMsgResult::Continue => {
-                    self.flush().await?;
-                    continue;
-                }
-                ProcessMsgResult::Break => break,
-            }
-        }
-
-        trace!("postgres backend to {:?} exited", self.peer_addr);
-        Ok(())
-    }
-
-    async fn start_tls(&mut self) -> anyhow::Result<()> {
-        if let Stream::Unencrypted(plain_stream) =
-            std::mem::replace(&mut self.stream, Stream::Broken)
-        {
-            let acceptor = TlsAcceptor::from(self.tls_config.clone().unwrap());
-            let tls_stream = acceptor.accept(plain_stream).await?;
-
-            self.stream = Stream::Tls(Box::new(tls_stream));
-            return Ok(());
-        };
-        bail!("TLS already started");
-    }
-
-    async fn process_handshake_message(
-        &mut self,
-        handler: &mut impl Handler,
-        msg: FeMessage,
-    ) -> Result<ProcessMsgResult> {
-        assert!(self.state < ProtoState::Established);
-        let have_tls = self.tls_config.is_some();
-        match msg {
-            FeMessage::StartupPacket(m) => {
-                trace!("got startup message {m:?}");
-
-                match m {
-                    FeStartupPacket::SslRequest => {
-                        debug!("SSL requested");
-
-                        self.write_message(&BeMessage::EncryptionResponse(have_tls))?;
-                        if have_tls {
-                            self.start_tls().await?;
-                            self.state = ProtoState::Encrypted;
-                        }
-                    }
-                    FeStartupPacket::GssEncRequest => {
-                        debug!("GSS requested");
-                        self.write_message(&BeMessage::EncryptionResponse(false))?;
-                    }
-                    FeStartupPacket::StartupMessage { .. } => {
-                        if have_tls && !matches!(self.state, ProtoState::Encrypted) {
-                            self.write_message(&BeMessage::ErrorResponse("must connect with TLS"))?;
-                            bail!("client did not connect with TLS");
-                        }
-
-                        // NB: startup() may change self.auth_type -- we are using that in proxy code
-                        // to bypass auth for new users.
-                        handler.startup(self, &m)?;
-
-                        match self.auth_type {
-                            AuthType::Trust => {
-                                self.write_message(&BeMessage::AuthenticationOk)?
-                                    .write_message(&BeParameterStatusMessage::encoding())?
-                                    // The async python driver requires a valid server_version
-                                    .write_message(&BeMessage::ParameterStatus(
-                                        BeParameterStatusMessage::ServerVersion("14.1"),
-                                    ))?
-                                    .write_message(&BeMessage::ReadyForQuery)?;
-                                self.state = ProtoState::Established;
-                            }
-                            AuthType::MD5 => {
-                                rand::thread_rng().fill(&mut self.md5_salt);
-                                self.write_message(&BeMessage::AuthenticationMD5Password(
-                                    self.md5_salt,
-                                ))?;
-                                self.state = ProtoState::Authentication;
-                            }
-                            AuthType::ZenithJWT => {
-                                self.write_message(&BeMessage::AuthenticationCleartextPassword)?;
-                                self.state = ProtoState::Authentication;
-                            }
-                        }
-                    }
-                    FeStartupPacket::CancelRequest { .. } => {
-                        self.state = ProtoState::Closed;
-                        return Ok(ProcessMsgResult::Break);
-                    }
-                }
-            }
-
-            FeMessage::PasswordMessage(m) => {
-                trace!("got password message '{:?}'", m);
-
-                assert!(self.state == ProtoState::Authentication);
-
-                match self.auth_type {
-                    AuthType::Trust => unreachable!(),
-                    AuthType::MD5 => {
-                        let (_, md5_response) = m.split_last().context("protocol violation")?;
-
-                        if let Err(e) = handler.check_auth_md5(self, md5_response) {
-                            self.write_message(&BeMessage::ErrorResponse(&e.to_string()))?;
-                            bail!("auth failed: {}", e);
-                        }
-                    }
-                    AuthType::ZenithJWT => {
-                        let (_, jwt_response) = m.split_last().context("protocol violation")?;
-
-                        if let Err(e) = handler.check_auth_jwt(self, jwt_response) {
-                            self.write_message(&BeMessage::ErrorResponse(&e.to_string()))?;
-                            bail!("auth failed: {}", e);
-                        }
-                    }
-                }
-                self.write_message(&BeMessage::AuthenticationOk)?
-                    .write_message(&BeParameterStatusMessage::encoding())?
-                    .write_message(&BeMessage::ReadyForQuery)?;
-                self.state = ProtoState::Established;
-            }
-
-            _ => {
-                self.state = ProtoState::Closed;
-                return Ok(ProcessMsgResult::Break);
-            }
-        }
-        Ok(ProcessMsgResult::Continue)
-    }
-
-    async fn process_message(
-        &mut self,
-        handler: &mut impl Handler,
-        msg: FeMessage,
-        unnamed_query_string: &mut Bytes,
-    ) -> Result<ProcessMsgResult> {
-        // Allow only startup and password messages during auth. Otherwise client would be able to bypass auth
-        // TODO: change that to proper top-level match of protocol state with separate message handling for each state
-        assert!(self.state == ProtoState::Established);
-
-        match msg {
-            FeMessage::StartupPacket(_) | FeMessage::PasswordMessage(_) => {
-                bail!("protocol violation");
-            }
-
-            FeMessage::Query(body) => {
-                // remove null terminator
-                let query_string = cstr_to_str(&body)?;
-
-                trace!("got query {:?}", query_string);
-                // xxx distinguish fatal and recoverable errors?
-                if let Err(e) = handler.process_query(self, query_string).await {
-                    // ":?" uses the alternate formatting style, which makes anyhow display the
-                    // full cause of the error, not just the top-level context + its trace.
-                    // We don't want to send that in the ErrorResponse though,
-                    // because it's not relevant to the compute node logs.
-                    error!("query handler for '{}' failed: {:?}", query_string, e);
-                    self.write_message(&BeMessage::ErrorResponse(&e.to_string()))?;
-                    // TODO: untangle convoluted control flow
-                    if e.to_string().contains("failed to run") {
-                        return Ok(ProcessMsgResult::Break);
-                    }
-                }
-                self.write_message(&BeMessage::ReadyForQuery)?;
-            }
-
-            FeMessage::Parse(m) => {
-                *unnamed_query_string = m.query_string;
-                self.write_message(&BeMessage::ParseComplete)?;
-            }
-
-            FeMessage::Describe(_) => {
-                self.write_message(&BeMessage::ParameterDescription)?
-                    .write_message(&BeMessage::NoData)?;
-            }
-
-            FeMessage::Bind(_) => {
-                self.write_message(&BeMessage::BindComplete)?;
-            }
-
-            FeMessage::Close(_) => {
-                self.write_message(&BeMessage::CloseComplete)?;
-            }
-
-            FeMessage::Execute(_) => {
-                let query_string = cstr_to_str(unnamed_query_string)?;
-                trace!("got execute {:?}", query_string);
-                // xxx distinguish fatal and recoverable errors?
-                if let Err(e) = handler.process_query(self, query_string).await {
-                    error!("query handler for '{}' failed: {:?}", query_string, e);
-                    self.write_message(&BeMessage::ErrorResponse(&e.to_string()))?;
-                }
-                // NOTE there is no ReadyForQuery message. This handler is used
-                // for basebackup and it uses CopyOut which doesn't require
-                // ReadyForQuery message and backend just switches back to
-                // processing mode after sending CopyDone or ErrorResponse.
-            }
-
-            FeMessage::Sync => {
-                self.write_message(&BeMessage::ReadyForQuery)?;
-            }
-
-            FeMessage::Terminate => {
-                return Ok(ProcessMsgResult::Break);
-            }
-
-            // We prefer explicit pattern matching to wildcards, because
-            // this helps us spot the places where new variants are missing
-            FeMessage::CopyData(_) | FeMessage::CopyDone | FeMessage::CopyFail => {
-                bail!("unexpected message type: {:?}", msg);
-            }
-        }
-
-        Ok(ProcessMsgResult::Continue)
-    }
-}
--- a/libs/utils/src/pq_proto.rs
+++ b/libs/utils/src/pq_proto.rs
@@ -7,14 +7,11 @@ use anyhow::{bail, ensure, Context, Result};
 use bytes::{Buf, BufMut, Bytes, BytesMut};
 use postgres_protocol::PG_EPOCH;
 use serde::{Deserialize, Serialize};
-use std::{
-    borrow::Cow,
-    collections::HashMap,
-    future::Future,
-    io::{self, Cursor},
-    str,
-    time::{Duration, SystemTime},
-};
+use std::collections::HashMap;
+use std::future::Future;
+use std::io::{self, Cursor};
+use std::str;
+use std::time::{Duration, SystemTime};
 use tokio::io::AsyncReadExt;
 use tracing::{trace, warn};

@@ -28,10 +25,8 @@ pub const TEXT_OID: Oid = 25;
 #[derive(Debug)]
 pub enum FeMessage {
    StartupPacket(FeStartupPacket),
-    // Simple query.
-    Query(Bytes),
-    // Extended query protocol.
-    Parse(FeParseMessage),
+    Query(FeQueryMessage), // Simple query
+    Parse(FeParseMessage), // Extended query protocol
    Describe(FeDescribeMessage),
    Bind(FeBindMessage),
    Execute(FeExecuteMessage),
@@ -52,72 +47,10 @@ pub enum FeStartupPacket {
    StartupMessage {
        major_version: u32,
        minor_version: u32,
-        params: StartupMessageParams,
+        params: HashMap<String, String>,
    },
 }

-#[derive(Debug)]
-pub struct StartupMessageParams {
-    params: HashMap<String, String>,
-}
-
-impl StartupMessageParams {
-    /// Get parameter's value by its name.
-    pub fn get(&self, name: &str) -> Option<&str> {
-        self.params.get(name).map(|s| s.as_str())
-    }
-
-    /// Split command-line options according to PostgreSQL's logic,
-    /// taking into account all escape sequences but leaving them as-is.
-    /// [`None`] means that there's no `options` in [`Self`].
-    pub fn options_raw(&self) -> Option<impl Iterator<Item = &str>> {
-        // See `postgres: pg_split_opts`.
-        let mut last_was_escape = false;
-        let iter = self
-            .get("options")?
-            .split(move |c: char| {
-                // We split by non-escaped whitespace symbols.
-                let should_split = c.is_ascii_whitespace() && !last_was_escape;
-                last_was_escape = c == '\\' && !last_was_escape;
-                should_split
-            })
-            .filter(|s| !s.is_empty());
-
-        Some(iter)
-    }
-
-    /// Split command-line options according to PostgreSQL's logic,
-    /// applying all escape sequences (using owned strings as needed).
-    /// [`None`] means that there's no `options` in [`Self`].
-    pub fn options_escaped(&self) -> Option<impl Iterator<Item = Cow<'_, str>>> {
-        // See `postgres: pg_split_opts`.
-        let iter = self.options_raw()?.map(|s| {
-            let mut preserve_next_escape = false;
-            let escape = |c| {
-                // We should remove '\\' unless it's preceded by '\\'.
-                let should_remove = c == '\\' && !preserve_next_escape;
-                preserve_next_escape = should_remove;
-                should_remove
-            };
-
-            match s.contains('\\') {
-                true => Cow::Owned(s.replace(escape, "")),
-                false => Cow::Borrowed(s),
-            }
-        });
-
-        Some(iter)
-    }
-
-    // This function is mostly useful in tests.
-    #[doc(hidden)]
-    pub fn new<'a, const N: usize>(pairs: [(&'a str, &'a str); N]) -> Self {
-        Self {
-            params: pairs.map(|(k, v)| (k.to_owned(), v.to_owned())).into(),
-        }
-    }
-}
-
 #[derive(Debug, Hash, PartialEq, Eq, Clone, Copy)]
 pub struct CancelKeyData {
    pub backend_pid: i32,
@@ -134,6 +67,11 @@ impl Distribution<CancelKeyData> for Standard {
    }
 }

+#[derive(Debug)]
+pub struct FeQueryMessage {
+    pub body: Bytes,
+}
+
 // We only support the simple case of Parse on unnamed prepared statement and
 // no params
 #[derive(Debug)]
@@ -149,7 +87,7 @@ pub struct FeDescribeMessage {

 // we only support unnamed prepared stmt and portal
 #[derive(Debug)]
-pub struct FeBindMessage;
+pub struct FeBindMessage {}

 // we only support unnamed prepared stmt or portal
 #[derive(Debug)]
@@ -160,7 +98,7 @@ pub struct FeExecuteMessage {

 // we only support unnamed prepared stmt and portal
 #[derive(Debug)]
-pub struct FeCloseMessage;
+pub struct FeCloseMessage {}

 /// Retry a read on EINTR
 ///
@@ -223,20 +161,22 @@ impl FeMessage {
                Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => return Ok(None),
                Err(e) => return Err(e.into()),
            };
+            let len = retry_read!(stream.read_u32().await)?;

-            // The message length includes itself, so it better be at least 4.
-            let len = retry_read!(stream.read_u32().await)?
+            // The message length includes itself, so it better be at least 4
+            let bodylen = len
                .checked_sub(4)
-                .context("invalid message length")?;
+                .context("invalid message length: parsing u32")?;

-            let body = {
-                let mut buffer = vec![0u8; len as usize];
-                stream.read_exact(&mut buffer).await?;
-                Bytes::from(buffer)
-            };
+            // Read message body
+            let mut body_buf: Vec<u8> = vec![0; bodylen as usize];
+            stream.read_exact(&mut body_buf).await?;

+            let body = Bytes::from(body_buf);
+
+            // Parse it
            match tag {
-                b'Q' => Ok(Some(FeMessage::Query(body))),
+                b'Q' => Ok(Some(FeMessage::Query(FeQueryMessage { body }))),
                b'P' => Ok(Some(FeParseMessage::parse(body)?)),
                b'D' => Ok(Some(FeDescribeMessage::parse(body)?)),
                b'E' => Ok(Some(FeExecuteMessage::parse(body)?)),
@@ -300,9 +240,9 @@ impl FeStartupPacket {
            stream.read_exact(params_bytes.as_mut()).await?;

            // Parse params depending on request code
-            let req_hi = request_code >> 16;
-            let req_lo = request_code & ((1 << 16) - 1);
-            let message = match (req_hi, req_lo) {
+            let most_sig_16_bits = request_code >> 16;
+            let least_sig_16_bits = request_code & ((1 << 16) - 1);
+            let message = match (most_sig_16_bits, least_sig_16_bits) {
                (RESERVED_INVALID_MAJOR_VERSION, CANCEL_REQUEST_CODE) => {
                    ensure!(params_len == 8, "expected 8 bytes for CancelRequest params");
                    let mut cursor = Cursor::new(params_bytes);
@@ -311,115 +251,173 @@ impl FeStartupPacket {
                        cancel_key: cursor.read_i32().await?,
                    })
                }
-                (RESERVED_INVALID_MAJOR_VERSION, NEGOTIATE_SSL_CODE) => {
-                    // Requested upgrade to SSL (aka TLS)
-                    FeStartupPacket::SslRequest
-                }
+                (RESERVED_INVALID_MAJOR_VERSION, NEGOTIATE_SSL_CODE) => FeStartupPacket::SslRequest,
                (RESERVED_INVALID_MAJOR_VERSION, NEGOTIATE_GSS_CODE) => {
-                    // Requested upgrade to GSSAPI
                    FeStartupPacket::GssEncRequest
                }
                (RESERVED_INVALID_MAJOR_VERSION, unrecognized_code) => {
                    bail!("Unrecognized request code {}", unrecognized_code)
                }
-                // TODO bail if protocol major_version is not 3?
                (major_version, minor_version) => {
-                    // Parse pairs of null-terminated strings (key, value).
-                    // See `postgres: ProcessStartupPacket, build_startup_packet`.
-                    let mut tokens = str::from_utf8(&params_bytes)
-                        .context("StartupMessage params: invalid utf-8")?
-                        .strip_suffix('\0') // drop packet's own null terminator
-                        .context("StartupMessage params: missing null terminator")?
-                        .split_terminator('\0');
-
-                    let mut params = HashMap::new();
-                    while let Some(name) = tokens.next() {
-                        let value = tokens
+                    // TODO bail if protocol major_version is not 3?
+                    // Parse null-terminated (String) pairs of param name / param value
+                    let params_str = str::from_utf8(&params_bytes).unwrap();
+                    let mut params_tokens = params_str.split('\0');
+                    let mut params: HashMap<String, String> = HashMap::new();
+                    while let Some(name) = params_tokens.next() {
+                        let value = params_tokens
                            .next()
-                            .context("StartupMessage params: key without value")?;
+                            .context("expected even number of params in StartupMessage")?;
+                        if name == "options" {
+                            // parsing options arguments "...&options=<var0>%3D<val0>+<var1>=<var1>..."
+                            // '%3D' is '=' and '+' is ' '

-                        params.insert(name.to_owned(), value.to_owned());
+                            // Note: we allow users that don't have SNI capabilities,
+                            // to pass a special keyword argument 'project'
+                            // to be used to determine the cluster name by the proxy.
+
+                            //TODO: write unit test for this and refactor in its own function.
+                            for cmdopt in value.split(' ') {
+                                let nameval: Vec<&str> = cmdopt.split('=').collect();
+                                if nameval.len() == 2 {
+                                    params.insert(nameval[0].to_string(), nameval[1].to_string());
+                                }
+                            }
+                        } else {
+                            params.insert(name.to_string(), value.to_string());
+                        }
                    }
-
                    FeStartupPacket::StartupMessage {
                        major_version,
                        minor_version,
-                        params: StartupMessageParams { params },
+                        params,
                    }
                }
            };
-
            Ok(Some(FeMessage::StartupPacket(message)))
        })
    }
 }

 impl FeParseMessage {
-    fn parse(mut buf: Bytes) -> anyhow::Result<FeMessage> {
+    pub fn parse(mut buf: Bytes) -> anyhow::Result<FeMessage> {
+        let _pstmt_name = read_null_terminated(&mut buf)?;
+        let query_string = read_null_terminated(&mut buf)?;
+        let nparams = buf.get_i16();
+
        // FIXME: the rust-postgres driver uses a named prepared statement
        // for copy_out(). We're not prepared to handle that correctly. For
        // now, just ignore the statement name, assuming that the client never
        // uses more than one prepared statement at a time.
+        /*
+        if !pstmt_name.is_empty() {
+            return Err(io::Error::new(
+                io::ErrorKind::InvalidInput,
+                "named prepared statements not implemented in Parse",
+            ));
+        }
+         */

-        let _pstmt_name = read_cstr(&mut buf)?;
-        let query_string = read_cstr(&mut buf)?;
-        let nparams = buf.get_i16();
-
-        ensure!(nparams == 0, "query params not implemented");
+        if nparams != 0 {
+            bail!("query params not implemented");
+        }

        Ok(FeMessage::Parse(FeParseMessage { query_string }))
    }
 }

 impl FeDescribeMessage {
-    fn parse(mut buf: Bytes) -> anyhow::Result<FeMessage> {
+    pub fn parse(mut buf: Bytes) -> anyhow::Result<FeMessage> {
        let kind = buf.get_u8();
-        let _pstmt_name = read_cstr(&mut buf)?;
+        let _pstmt_name = read_null_terminated(&mut buf)?;

        // FIXME: see FeParseMessage::parse
-        ensure!(
-            kind == b'S',
-            "only prepared statemement Describe is implemented"
-        );
+        /*
+        if !pstmt_name.is_empty() {
+            return Err(io::Error::new(
+                io::ErrorKind::InvalidInput,
+                "named prepared statements not implemented in Describe",
+            ));
+        }
+        */
+
+        if kind != b'S' {
+            bail!("only prepared statmement Describe is implemented");
+        }

        Ok(FeMessage::Describe(FeDescribeMessage { kind }))
    }
 }

 impl FeExecuteMessage {
-    fn parse(mut buf: Bytes) -> anyhow::Result<FeMessage> {
-        let portal_name = read_cstr(&mut buf)?;
+    pub fn parse(mut buf: Bytes) -> anyhow::Result<FeMessage> {
+        let portal_name = read_null_terminated(&mut buf)?;
        let maxrows = buf.get_i32();

-        ensure!(portal_name.is_empty(), "named portals not implemented");
-        ensure!(maxrows == 0, "row limit in Execute message not implemented");
+        if !portal_name.is_empty() {
+            bail!("named portals not implemented");
+        }
+
+        if maxrows != 0 {
+            bail!("row limit in Execute message not supported");
+        }

        Ok(FeMessage::Execute(FeExecuteMessage { maxrows }))
    }
 }

 impl FeBindMessage {
-    fn parse(mut buf: Bytes) -> anyhow::Result<FeMessage> {
-        let portal_name = read_cstr(&mut buf)?;
-        let _pstmt_name = read_cstr(&mut buf)?;
+    pub fn parse(mut buf: Bytes) -> anyhow::Result<FeMessage> {
+        let portal_name = read_null_terminated(&mut buf)?;
+        let _pstmt_name = read_null_terminated(&mut buf)?;
+
+        if !portal_name.is_empty() {
+            bail!("named portals not implemented");
+        }

        // FIXME: see FeParseMessage::parse
-        ensure!(portal_name.is_empty(), "named portals not implemented");
+        /*
+        if !pstmt_name.is_empty() {
+            return Err(io::Error::new(
+                io::ErrorKind::InvalidInput,
+                "named prepared statements not implemented",
+            ));
+        }
+        */

-        Ok(FeMessage::Bind(FeBindMessage))
+        Ok(FeMessage::Bind(FeBindMessage {}))
    }
 }

 impl FeCloseMessage {
-    fn parse(mut buf: Bytes) -> anyhow::Result<FeMessage> {
+    pub fn parse(mut buf: Bytes) -> anyhow::Result<FeMessage> {
        let _kind = buf.get_u8();
-        let _pstmt_or_portal_name = read_cstr(&mut buf)?;
+        let _pstmt_or_portal_name = read_null_terminated(&mut buf)?;

        // FIXME: we do nothing with Close
-        Ok(FeMessage::Close(FeCloseMessage))
+
+        Ok(FeMessage::Close(FeCloseMessage {}))
    }
 }

+fn read_null_terminated(buf: &mut Bytes) -> anyhow::Result<Bytes> {
+    let mut result = BytesMut::new();
+
+    loop {
+        if !buf.has_remaining() {
+            bail!("no null-terminator in string");
+        }
+
+        let byte = buf.get_u8();
+
+        if byte == 0 {
+            break;
+        }
+        result.put_u8(byte);
+    }
+    Ok(result.freeze())
+}
+
 // Backend

 #[derive(Debug)]
@@ -441,7 +439,7 @@ pub enum BeMessage<'a> {
    // None means column is NULL
    DataRow(&'a [Option<&'a [u8]>]),
    ErrorResponse(&'a str),
-    /// Single byte - used in response to SSLRequest/GSSENCRequest.
+    // single byte - used in response to SSLRequest/GSSENCRequest
    EncryptionResponse(bool),
    NoData,
    ParameterDescription,
@@ -554,22 +552,49 @@ pub static SINGLE_COL_ROWDESC: BeMessage = BeMessage::RowDescription(&[RowDescri
    formatcode: 0,
 }]);

+// Safe usize -> i32|i16 conversion, from rust-postgres
+trait FromUsize: Sized {
+    fn from_usize(x: usize) -> Result<Self, io::Error>;
+}
+
+macro_rules! from_usize {
+    ($t:ty) => {
+        impl FromUsize for $t {
+            #[inline]
+            fn from_usize(x: usize) -> io::Result<$t> {
+                if x > <$t>::max_value() as usize {
+                    Err(io::Error::new(
+                        io::ErrorKind::InvalidInput,
+                        "value too large to transmit",
+                    ))
+                } else {
+                    Ok(x as $t)
+                }
+            }
+        }
+    };
+}
+
+from_usize!(i32);
+
 /// Call f() to write body of the message and prepend it with 4-byte len as
 /// prescribed by the protocol.
-fn write_body<R>(buf: &mut BytesMut, f: impl FnOnce(&mut BytesMut) -> R) -> R {
+fn write_body<F>(buf: &mut BytesMut, f: F) -> io::Result<()>
+where
+    F: FnOnce(&mut BytesMut) -> io::Result<()>,
+{
    let base = buf.len();
    buf.extend_from_slice(&[0; 4]);

-    let res = f(buf);
+    f(buf)?;

-    let size = i32::try_from(buf.len() - base).expect("message too big to transmit");
+    let size = i32::from_usize(buf.len() - base)?;
    (&mut buf[base..]).put_slice(&size.to_be_bytes());
-
-    res
+    Ok(())
 }

 /// Safe write of s into buf as cstring (String in the protocol).
-fn write_cstr(s: &[u8], buf: &mut BytesMut) -> Result<(), io::Error> {
+pub fn write_cstr(s: &[u8], buf: &mut BytesMut) -> Result<(), io::Error> {
    if s.contains(&0) {
        return Err(io::Error::new(
            io::ErrorKind::InvalidInput,
@@ -581,11 +606,15 @@ fn write_cstr(s: &[u8], buf: &mut BytesMut) -> Result<(), io::Error> {
    Ok(())
 }

-fn read_cstr(buf: &mut Bytes) -> anyhow::Result<Bytes> {
-    let pos = buf.iter().position(|x| *x == 0);
-    let result = buf.split_to(pos.context("missing terminator")?);
-    buf.advance(1); // drop the null terminator
-    Ok(result)
+// Truncate 0 from C string in Bytes and stringify it (returns slice, no allocations)
+// PG protocol strings are always C strings.
+fn cstr_to_str(b: &Bytes) -> Result<&str> {
+    let without_null = if b.last() == Some(&0) {
+        &b[..b.len() - 1]
+    } else {
+        &b[..]
+    };
+    std::str::from_utf8(without_null).map_err(|e| e.into())
 }

 impl<'a> BeMessage<'a> {
@@ -600,14 +629,18 @@ impl<'a> BeMessage<'a> {
                buf.put_u8(b'R');
                write_body(buf, |buf| {
                    buf.put_i32(0); // Specifies that the authentication was successful.
-                });
+                    Ok::<_, io::Error>(())
+                })
+                .unwrap(); // write into BytesMut can't fail
            }

            BeMessage::AuthenticationCleartextPassword => {
                buf.put_u8(b'R');
                write_body(buf, |buf| {
                    buf.put_i32(3); // Specifies that clear text password is required.
-                });
+                    Ok::<_, io::Error>(())
+                })
+                .unwrap(); // write into BytesMut can't fail
            }

            BeMessage::AuthenticationMD5Password(salt) => {
@@ -615,7 +648,9 @@ impl<'a> BeMessage<'a> {
                write_body(buf, |buf| {
                    buf.put_i32(5); // Specifies that an MD5-encrypted password is required.
                    buf.put_slice(&salt[..]);
-                });
+                    Ok::<_, io::Error>(())
+                })
+                .unwrap(); // write into BytesMut can't fail
            }

            BeMessage::AuthenticationSasl(msg) => {
@@ -640,7 +675,8 @@ impl<'a> BeMessage<'a> {
                        }
                    }
                    Ok::<_, io::Error>(())
-                })?;
+                })
+                .unwrap()
            }

            BeMessage::BackendKeyData(key_data) => {
@@ -648,64 +684,77 @@ impl<'a> BeMessage<'a> {
                write_body(buf, |buf| {
                    buf.put_i32(key_data.backend_pid);
                    buf.put_i32(key_data.cancel_key);
-                });
+                    Ok(())
+                })
+                .unwrap();
            }

            BeMessage::BindComplete => {
                buf.put_u8(b'2');
-                write_body(buf, |_| {});
+                write_body(buf, |_| Ok::<(), io::Error>(())).unwrap();
            }

            BeMessage::CloseComplete => {
                buf.put_u8(b'3');
-                write_body(buf, |_| {});
+                write_body(buf, |_| Ok::<(), io::Error>(())).unwrap();
            }

            BeMessage::CommandComplete(cmd) => {
                buf.put_u8(b'C');
-                write_body(buf, |buf| write_cstr(cmd, buf))?;
+                write_body(buf, |buf| {
+                    write_cstr(cmd, buf)?;
+                    Ok::<_, io::Error>(())
+                })?;
            }

            BeMessage::CopyData(data) => {
                buf.put_u8(b'd');
                write_body(buf, |buf| {
                    buf.put_slice(data);
-                });
+                    Ok::<_, io::Error>(())
+                })
+                .unwrap();
            }

            BeMessage::CopyDone => {
                buf.put_u8(b'c');
-                write_body(buf, |_| {});
+                write_body(buf, |_| Ok::<(), io::Error>(())).unwrap();
            }

            BeMessage::CopyFail => {
                buf.put_u8(b'f');
-                write_body(buf, |_| {});
+                write_body(buf, |_| Ok::<(), io::Error>(())).unwrap();
            }

            BeMessage::CopyInResponse => {
                buf.put_u8(b'G');
                write_body(buf, |buf| {
-                    buf.put_u8(1); // copy_is_binary
-                    buf.put_i16(0); // numAttributes
-                });
+                    buf.put_u8(1); /* copy_is_binary */
+                    buf.put_i16(0); /* numAttributes */
+                    Ok::<_, io::Error>(())
+                })
+                .unwrap();
            }

            BeMessage::CopyOutResponse => {
                buf.put_u8(b'H');
                write_body(buf, |buf| {
-                    buf.put_u8(0); // copy_is_binary
-                    buf.put_i16(0); // numAttributes
-                });
+                    buf.put_u8(0); /* copy_is_binary */
+                    buf.put_i16(0); /* numAttributes */
+                    Ok::<_, io::Error>(())
+                })
+                .unwrap();
            }

            BeMessage::CopyBothResponse => {
                buf.put_u8(b'W');
                write_body(buf, |buf| {
                    // doesn't matter, used only for replication
-                    buf.put_u8(0); // copy_is_binary
-                    buf.put_i16(0); // numAttributes
-                });
+                    buf.put_u8(0); /* copy_is_binary */
+                    buf.put_i16(0); /* numAttributes */
+                    Ok::<_, io::Error>(())
+                })
+                .unwrap();
            }

            BeMessage::DataRow(vals) => {
@@ -720,7 +769,9 @@ impl<'a> BeMessage<'a> {
                            buf.put_i32(-1);
                        }
                    }
-                });
+                    Ok::<_, io::Error>(())
+                })
+                .unwrap();
            }

            // ErrorResponse is a zero-terminated array of zero-terminated fields.
@@ -735,17 +786,18 @@ impl<'a> BeMessage<'a> {
                buf.put_u8(b'E');
                write_body(buf, |buf| {
                    buf.put_u8(b'S'); // severity
-                    buf.put_slice(b"ERROR\0");
+                    write_cstr(&Bytes::from("ERROR"), buf)?;

                    buf.put_u8(b'C'); // SQLSTATE error code
-                    buf.put_slice(b"CXX000\0");
+                    write_cstr(&Bytes::from("CXX000"), buf)?;

                    buf.put_u8(b'M'); // the message
                    write_cstr(error_msg.as_bytes(), buf)?;

                    buf.put_u8(0); // terminator
                    Ok::<_, io::Error>(())
-                })?;
+                })
+                .unwrap();
            }

            // NoticeResponse has the same format as ErrorResponse. From doc: "The frontend should display the
@@ -758,22 +810,23 @@ impl<'a> BeMessage<'a> {
                buf.put_u8(b'N');
                write_body(buf, |buf| {
                    buf.put_u8(b'S'); // severity
-                    buf.put_slice(b"NOTICE\0");
+                    write_cstr(&Bytes::from("NOTICE"), buf)?;

                    buf.put_u8(b'C'); // SQLSTATE error code
-                    buf.put_slice(b"CXX000\0");
+                    write_cstr(&Bytes::from("CXX000"), buf)?;

                    buf.put_u8(b'M'); // the message
                    write_cstr(error_msg.as_bytes(), buf)?;

                    buf.put_u8(0); // terminator
                    Ok::<_, io::Error>(())
-                })?;
+                })
+                .unwrap();
            }

            BeMessage::NoData => {
                buf.put_u8(b'n');
-                write_body(buf, |_| {});
+                write_body(buf, |_| Ok::<(), io::Error>(())).unwrap();
            }

            BeMessage::EncryptionResponse(should_negotiate) => {
@@ -798,7 +851,9 @@ impl<'a> BeMessage<'a> {
                buf.put_u8(b'S');
                write_body(buf, |buf| {
                    buf.put_slice(&buffer[..cnt]);
-                });
+                    Ok::<_, io::Error>(())
+                })
+                .unwrap();
            }

            BeMessage::ParameterDescription => {
@@ -806,19 +861,23 @@ impl<'a> BeMessage<'a> {
                write_body(buf, |buf| {
                    // we don't support params, so always 0
                    buf.put_i16(0);
-                });
+                    Ok::<_, io::Error>(())
+                })
+                .unwrap();
            }

            BeMessage::ParseComplete => {
                buf.put_u8(b'1');
-                write_body(buf, |_| {});
+                write_body(buf, |_| Ok::<(), io::Error>(())).unwrap();
            }

            BeMessage::ReadyForQuery => {
                buf.put_u8(b'Z');
                write_body(buf, |buf| {
                    buf.put_u8(b'I');
-                });
+                    Ok::<_, io::Error>(())
+                })
+                .unwrap();
            }

            BeMessage::RowDescription(rows) => {
@@ -846,7 +905,9 @@ impl<'a> BeMessage<'a> {
                    buf.put_u64(body.wal_end);
                    buf.put_i64(body.timestamp);
                    buf.put_slice(body.data);
-                });
+                    Ok::<_, io::Error>(())
+                })
+                .unwrap();
            }

            BeMessage::KeepAlive(req) => {
@@ -855,8 +916,10 @@ impl<'a> BeMessage<'a> {
                    buf.put_u8(b'k');
                    buf.put_u64(req.sent_ptr);
                    buf.put_i64(req.timestamp);
-                    buf.put_u8(if req.request_reply { 1 } else { 0 });
-                });
+                    buf.put_u8(if req.request_reply { 1u8 } else { 0u8 });
+                    Ok::<_, io::Error>(())
+                })
+                .unwrap();
            }
        }
        Ok(())
@@ -865,7 +928,7 @@ impl<'a> BeMessage<'a> {

 // Neon extension of postgres replication protocol
 // See NEON_STATUS_UPDATE_TAG_BYTE
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
 pub struct ReplicationFeedback {
    // Last known size of the timeline. Used to enforce timeline size limit.
    pub current_timeline_size: u64,
@@ -903,17 +966,17 @@ impl ReplicationFeedback {
    // value itself
    pub fn serialize(&self, buf: &mut BytesMut) -> Result<()> {
        buf.put_u8(REPLICATION_FEEDBACK_FIELDS_NUMBER); // # of keys
-        buf.put_slice(b"current_timeline_size\0");
+        write_cstr(&Bytes::from("current_timeline_size"), buf)?;
        buf.put_i32(8);
        buf.put_u64(self.current_timeline_size);

-        buf.put_slice(b"ps_writelsn\0");
+        write_cstr(&Bytes::from("ps_writelsn"), buf)?;
        buf.put_i32(8);
        buf.put_u64(self.ps_writelsn);
-        buf.put_slice(b"ps_flushlsn\0");
+        write_cstr(&Bytes::from("ps_flushlsn"), buf)?;
        buf.put_i32(8);
        buf.put_u64(self.ps_flushlsn);
-        buf.put_slice(b"ps_applylsn\0");
+        write_cstr(&Bytes::from("ps_applylsn"), buf)?;
        buf.put_i32(8);
        buf.put_u64(self.ps_applylsn);

@@ -923,7 +986,7 @@ impl ReplicationFeedback {
            .expect("failed to serialize pg_replytime earlier than PG_EPOCH")
            .as_micros() as i64;

-        buf.put_slice(b"ps_replytime\0");
+        write_cstr(&Bytes::from("ps_replytime"), buf)?;
        buf.put_i32(8);
        buf.put_i64(timestamp);
        Ok(())
@@ -933,30 +996,33 @@ impl ReplicationFeedback {
    pub fn parse(mut buf: Bytes) -> ReplicationFeedback {
        let mut zf = ReplicationFeedback::empty();
        let nfields = buf.get_u8();
-        for _ in 0..nfields {
-            let key = read_cstr(&mut buf).unwrap();
-            match key.as_ref() {
-                b"current_timeline_size" => {
+        let mut i = 0;
+        while i < nfields {
+            i += 1;
+            let key_cstr = read_null_terminated(&mut buf).unwrap();
+            let key = cstr_to_str(&key_cstr).unwrap();
+            match key {
+                "current_timeline_size" => {
                    let len = buf.get_i32();
                    assert_eq!(len, 8);
                    zf.current_timeline_size = buf.get_u64();
                }
-                b"ps_writelsn" => {
+                "ps_writelsn" => {
                    let len = buf.get_i32();
                    assert_eq!(len, 8);
                    zf.ps_writelsn = buf.get_u64();
                }
-                b"ps_flushlsn" => {
+                "ps_flushlsn" => {
                    let len = buf.get_i32();
                    assert_eq!(len, 8);
                    zf.ps_flushlsn = buf.get_u64();
                }
-                b"ps_applylsn" => {
+                "ps_applylsn" => {
                    let len = buf.get_i32();
                    assert_eq!(len, 8);
                    zf.ps_applylsn = buf.get_u64();
                }
-                b"ps_replytime" => {
+                "ps_replytime" => {
                    let len = buf.get_i32();
                    assert_eq!(len, 8);
                    let raw_time = buf.get_i64();
@@ -969,8 +1035,8 @@ impl ReplicationFeedback {
                _ => {
                    let len = buf.get_i32();
                    warn!(
-                        "ReplicationFeedback parse. unknown key {} of len {len}. Skip it.",
-                        String::from_utf8_lossy(key.as_ref())
+                        "ReplicationFeedback parse. unknown key {} of len {}. Skip it.",
+                        key, len
                    );
                    buf.advance(len as usize);
                }
@@ -1016,7 +1082,7 @@ mod tests {
            *first = REPLICATION_FEEDBACK_FIELDS_NUMBER + 1;
        }

-        data.put_slice(b"new_field_one\0");
+        write_cstr(&Bytes::from("new_field_one"), &mut data).unwrap();
        data.put_i32(8);
        data.put_u64(42);

@@ -1025,33 +1091,6 @@ mod tests {
        assert_eq!(zf, zf_parsed);
    }

-    #[test]
-    fn test_startup_message_params_options_escaped() {
-        fn split_options(params: &StartupMessageParams) -> Vec<Cow<'_, str>> {
-            params
-                .options_escaped()
-                .expect("options are None")
-                .collect()
-        }
-
-        let make_params = |options| StartupMessageParams::new([("options", options)]);
-
-        let params = StartupMessageParams::new([]);
-        assert!(matches!(params.options_escaped(), None));
-
-        let params = make_params("");
-        assert!(split_options(&params).is_empty());
-
-        let params = make_params("foo");
-        assert_eq!(split_options(&params), ["foo"]);
-
-        let params = make_params(" foo  bar ");
-        assert_eq!(split_options(&params), ["foo", "bar"]);
-
-        let params = make_params("foo\\ bar \\ \\\\ baz\\  lol");
-        assert_eq!(split_options(&params), ["foo bar", " \\", "baz ", "lol"]);
-    }
-
    // Make sure that `read` is sync/async callable
    async fn _assert(stream: &mut (impl tokio::io::AsyncRead + Unpin)) {
        let _ = FeMessage::read(&mut [].as_ref());
--- a/libs/utils/src/seqwait.rs
+++ b/libs/utils/src/seqwait.rs
@@ -4,13 +4,12 @@ use std::cmp::{Eq, Ordering, PartialOrd};
 use std::collections::BinaryHeap;
 use std::fmt::Debug;
 use std::mem;
+use std::sync::mpsc::{channel, Receiver, Sender};
 use std::sync::Mutex;
 use std::time::Duration;
-use tokio::sync::watch::{channel, Receiver, Sender};
-use tokio::time::timeout;

 /// An error happened while waiting for a number
-#[derive(Debug, PartialEq, Eq, thiserror::Error)]
+#[derive(Debug, PartialEq, thiserror::Error)]
 #[error("SeqWaitError")]
 pub enum SeqWaitError {
    /// The wait timeout was reached
@@ -142,10 +141,10 @@ where
    ///
    /// This call won't complete until someone has called `advance`
    /// with a number greater than or equal to the one we're waiting for.
-    pub async fn wait_for(&self, num: V) -> Result<(), SeqWaitError> {
+    pub fn wait_for(&self, num: V) -> Result<(), SeqWaitError> {
        match self.queue_for_wait(num) {
            Ok(None) => Ok(()),
-            Ok(Some(mut rx)) => rx.changed().await.map_err(|_| SeqWaitError::Shutdown),
+            Ok(Some(rx)) => rx.recv().map_err(|_| SeqWaitError::Shutdown),
            Err(e) => Err(e),
        }
    }
@@ -157,18 +156,13 @@ where
    ///
    /// If that hasn't happened after the specified timeout duration,
    /// [`SeqWaitError::Timeout`] will be returned.
-    pub async fn wait_for_timeout(
-        &self,
-        num: V,
-        timeout_duration: Duration,
-    ) -> Result<(), SeqWaitError> {
+    pub fn wait_for_timeout(&self, num: V, timeout_duration: Duration) -> Result<(), SeqWaitError> {
        match self.queue_for_wait(num) {
            Ok(None) => Ok(()),
-            Ok(Some(mut rx)) => match timeout(timeout_duration, rx.changed()).await {
-                Ok(Ok(())) => Ok(()),
-                Ok(Err(_)) => Err(SeqWaitError::Shutdown),
-                Err(_) => Err(SeqWaitError::Timeout),
-            },
+            Ok(Some(rx)) => rx.recv_timeout(timeout_duration).map_err(|e| match e {
+                std::sync::mpsc::RecvTimeoutError::Timeout => SeqWaitError::Timeout,
+                std::sync::mpsc::RecvTimeoutError::Disconnected => SeqWaitError::Shutdown,
+            }),
            Err(e) => Err(e),
        }
    }
@@ -185,7 +179,7 @@ where
        }

        // Create a new channel.
-        let (tx, rx) = channel(());
+        let (tx, rx) = channel();
        internal.waiters.push(Waiter {
            wake_num: num,
            wake_channel: tx,
@@ -241,6 +235,7 @@ mod tests {
    use super::*;
    use std::sync::Arc;
    use std::thread::sleep;
+    use std::thread::spawn;
    use std::time::Duration;

    impl MonotonicCounter<i32> for i32 {
@@ -253,25 +248,25 @@ mod tests {
        }
    }

-    #[tokio::test]
-    async fn seqwait() {
+    #[test]
+    fn seqwait() {
        let seq = Arc::new(SeqWait::new(0));
        let seq2 = Arc::clone(&seq);
        let seq3 = Arc::clone(&seq);
-        tokio::task::spawn(async move {
-            seq2.wait_for(42).await.expect("wait_for 42");
+        spawn(move || {
+            seq2.wait_for(42).expect("wait_for 42");
            let old = seq2.advance(100);
            assert_eq!(old, 99);
-            seq2.wait_for(999).await.expect_err("no 999");
+            seq2.wait_for(999).expect_err("no 999");
        });
-        tokio::task::spawn(async move {
-            seq3.wait_for(42).await.expect("wait_for 42");
-            seq3.wait_for(0).await.expect("wait_for 0");
+        spawn(move || {
+            seq3.wait_for(42).expect("wait_for 42");
+            seq3.wait_for(0).expect("wait_for 0");
        });
        sleep(Duration::from_secs(1));
        let old = seq.advance(99);
        assert_eq!(old, 0);
-        seq.wait_for(100).await.expect("wait_for 100");
+        seq.wait_for(100).expect("wait_for 100");

        // Calling advance with a smaller value is a no-op
        assert_eq!(seq.advance(98), 100);
@@ -280,16 +275,16 @@ mod tests {
        seq.shutdown();
    }

-    #[tokio::test]
-    async fn seqwait_timeout() {
+    #[test]
+    fn seqwait_timeout() {
        let seq = Arc::new(SeqWait::new(0));
        let seq2 = Arc::clone(&seq);
-        tokio::task::spawn(async move {
+        spawn(move || {
            let timeout = Duration::from_millis(1);
-            let res = seq2.wait_for_timeout(42, timeout).await;
+            let res = seq2.wait_for_timeout(42, timeout);
            assert_eq!(res, Err(SeqWaitError::Timeout));
        });
-        tokio::time::sleep(Duration::from_secs(1)).await;
+        sleep(Duration::from_secs(1));
        // This will attempt to wake, but nothing will happen
        // because the waiter already dropped its Receiver.
        let old = seq.advance(99);
--- a/libs/utils/src/seqwait_async.rs
+++ b/libs/utils/src/seqwait_async.rs
@@ -0,0 +1,224 @@
+///
+/// Async version of 'seqwait.rs'
+///
+/// NOTE: This is currently unused. If you need this, you'll need to uncomment this in lib.rs.
+///
+
+#![warn(missing_docs)]
+
+use std::collections::BTreeMap;
+use std::fmt::Debug;
+use std::mem;
+use std::sync::Mutex;
+use std::time::Duration;
+use tokio::sync::watch::{channel, Receiver, Sender};
+use tokio::time::timeout;
+
+/// An error happened while waiting for a number
+#[derive(Debug, PartialEq, thiserror::Error)]
+#[error("SeqWaitError")]
+pub enum SeqWaitError {
+    /// The wait timeout was reached
+    Timeout,
+    /// [`SeqWait::shutdown`] was called
+    Shutdown,
+}
+
+/// Internal components of a `SeqWait`
+struct SeqWaitInt<T>
+where
+    T: Ord,
+{
+    waiters: BTreeMap<T, (Sender<()>, Receiver<()>)>,
+    current: T,
+    shutdown: bool,
+}
+
+/// A tool for waiting on a sequence number
+///
+/// This provides a way to await the arrival of a number.
+/// As soon as the number arrives by another caller calling
+/// [`advance`], then the waiter will be woken up.
+///
+/// This implementation takes a blocking Mutex on both [`wait_for`]
+/// and [`advance`], meaning there may be unexpected executor blocking
+/// due to thread scheduling unfairness. There are probably better
+/// implementations, but we can probably live with this for now.
+///
+/// [`wait_for`]: SeqWait::wait_for
+/// [`advance`]: SeqWait::advance
+///
+pub struct SeqWait<T>
+where
+    T: Ord,
+{
+    internal: Mutex<SeqWaitInt<T>>,
+}
+
+impl<T> SeqWait<T>
+where
+    T: Ord + Debug + Copy,
+{
+    /// Create a new `SeqWait`, initialized to a particular number
+    pub fn new(starting_num: T) -> Self {
+        let internal = SeqWaitInt {
+            waiters: BTreeMap::new(),
+            current: starting_num,
+            shutdown: false,
+        };
+        SeqWait {
+            internal: Mutex::new(internal),
+        }
+    }
+
+    /// Shut down a `SeqWait`, causing all waiters (present and
+    /// future) to return an error.
+    pub fn shutdown(&self) {
+        let waiters = {
+            // Prevent new waiters; wake all those that exist.
+            // Wake everyone with an error.
+            let mut internal = self.internal.lock().unwrap();
+
+            // This will steal the entire waiters map.
+            // When we drop it all waiters will be woken.
+            mem::take(&mut internal.waiters)
+
+            // Drop the lock as we exit this scope.
+        };
+
+        // When we drop the waiters list, each Receiver will
+        // be woken with an error.
+        // This drop doesn't need to be explicit; it's done
+        // here to make it easier to read the code and understand
+        // the order of events.
+        drop(waiters);
+    }
+
+    /// Wait for a number to arrive
+    ///
+    /// This call won't complete until someone has called `advance`
+    /// with a number greater than or equal to the one we're waiting for.
+    pub async fn wait_for(&self, num: T) -> Result<(), SeqWaitError> {
+        let mut rx = {
+            let mut internal = self.internal.lock().unwrap();
+            if internal.current >= num {
+                return Ok(());
+            }
+            if internal.shutdown {
+                return Err(SeqWaitError::Shutdown);
+            }
+
+            // If we already have a channel for waiting on this number, reuse it.
+            if let Some((_, rx)) = internal.waiters.get_mut(&num) {
+                // an Err from changed() means the sender was dropped.
+                rx.clone()
+            } else {
+                // Create a new channel.
+                let (tx, rx) = channel(());
+                internal.waiters.insert(num, (tx, rx.clone()));
+                rx
+            }
+            // Drop the lock as we exit this scope.
+        };
+        rx.changed().await.map_err(|_| SeqWaitError::Shutdown)
+    }
+
+    /// Wait for a number to arrive
+    ///
+    /// This call won't complete until someone has called `advance`
+    /// with a number greater than or equal to the one we're waiting for.
+    ///
+    /// If that hasn't happened after the specified timeout duration,
+    /// [`SeqWaitError::Timeout`] will be returned.
+    pub async fn wait_for_timeout(
+        &self,
+        num: T,
+        timeout_duration: Duration,
+    ) -> Result<(), SeqWaitError> {
+        timeout(timeout_duration, self.wait_for(num))
+            .await
+            .unwrap_or(Err(SeqWaitError::Timeout))
+    }
+
+    /// Announce a new number has arrived
+    ///
+    /// All waiters at this value or below will be woken.
+    ///
+    /// `advance` will panic if you send it a lower number than
+    /// a previous call.
+    pub fn advance(&self, num: T) {
+        let wake_these = {
+            let mut internal = self.internal.lock().unwrap();
+
+            if internal.current > num {
+                panic!(
+                    "tried to advance backwards, from {:?} to {:?}",
+                    internal.current, num
+                );
+            }
+            internal.current = num;
+
+            // split_off will give me all the high-numbered waiters,
+            // so split and then swap. Everything at or above `num`
+            // stays.
+            let mut split = internal.waiters.split_off(&num);
+            std::mem::swap(&mut split, &mut internal.waiters);
+
+            // `split_at` didn't get the value at `num`; if it's
+            // there take that too.
+            if let Some(sleeper) = internal.waiters.remove(&num) {
+                split.insert(num, sleeper);
+            }
+
+            split
+        };
+
+        for (_wake_num, (tx, _rx)) in wake_these {
+            // This can fail if there are no receivers.
+            // We don't care; discard the error.
+            let _ = tx.send(());
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::sync::Arc;
+    use tokio::time::{sleep, Duration};
+
+    #[tokio::test]
+    async fn seqwait() {
+        let seq = Arc::new(SeqWait::new(0));
+        let seq2 = Arc::clone(&seq);
+        let seq3 = Arc::clone(&seq);
+        tokio::spawn(async move {
+            seq2.wait_for(42).await.expect("wait_for 42");
+            seq2.advance(100);
+            seq2.wait_for(999).await.expect_err("no 999");
+        });
+        tokio::spawn(async move {
+            seq3.wait_for(42).await.expect("wait_for 42");
+            seq3.wait_for(0).await.expect("wait_for 0");
+        });
+        sleep(Duration::from_secs(1)).await;
+        seq.advance(99);
+        seq.wait_for(100).await.expect("wait_for 100");
+        seq.shutdown();
+    }
+
+    #[tokio::test]
+    async fn seqwait_timeout() {
+        let seq = Arc::new(SeqWait::new(0));
+        let seq2 = Arc::clone(&seq);
+        tokio::spawn(async move {
+            let timeout = Duration::from_millis(1);
+            let res = seq2.wait_for_timeout(42, timeout).await;
+            assert_eq!(res, Err(SeqWaitError::Timeout));
+        });
+        sleep(Duration::from_secs(1)).await;
+        // This will attempt to wake, but nothing will happen
+        // because the waiter already dropped its Receiver.
+        seq.advance(99);
+    }
+}
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Stas Kelvich	b3a6c4cf8a	Update docs/rfcs/017-user-management.md Co-authored-by: Anna Stepanyan <stepa6ka@gmail.com>	2022-07-15 13:30:23 +03:00
Stas Kelvich	fb8935305a	User management RFC	2022-07-15 13:19:02 +03:00