persistent_range_query: add layer_map_test

persistent_range_query: add stress test
persistent_range_query: more refs
2026-01-25 14:20:38 +00:00 · 2022-11-24 04:47:19 +02:00 · 2022-11-24 03:50:18 +02:00 · 2022-11-24 03:45:02 +02:00 · 2022-11-24 02:31:48 +02:00 · 2022-11-24 02:11:06 +02:00
85 changed files with 2893 additions and 802 deletions
--- a/.github/ansible/.gitignore
+++ b/.github/ansible/.gitignore
@@ -1,5 +1,3 @@
-zenith_install.tar.gz
-.zenith_current_version
 neon_install.tar.gz
 .neon_current_version

--- a/.github/ansible/production.hosts.yaml
+++ b/.github/ansible/production.hosts.yaml
@@ -22,6 +22,10 @@ storage:
          console_region_id: aws-us-west-2
        zenith-1-ps-3:
          console_region_id: aws-us-west-2
+        zenith-1-ps-4:
+          console_region_id: aws-us-west-2
+        zenith-1-ps-5:
+          console_region_id: aws-us-west-2

    safekeepers:
      hosts:
--- a/.github/ansible/staging.eu-west-1.hosts.yaml
+++ b/.github/ansible/staging.eu-west-1.hosts.yaml
@@ -0,0 +1,33 @@
+storage:
+  vars:
+    bucket_name: neon-dev-storage-eu-west-1
+    bucket_region: eu-west-1
+    console_mgmt_base_url: http://console-staging.local
+    etcd_endpoints: etcd-0.eu-west-1.aws.neon.build:2379
+    pageserver_config_stub:
+      pg_distrib_dir: /usr/local
+      remote_storage:
+        bucket_name: "{{ bucket_name }}"
+        bucket_region: "{{ bucket_region }}"
+        prefix_in_bucket: "pageserver/v1"
+    safekeeper_s3_prefix: safekeeper/v1/wal
+    hostname_suffix: ""
+    remote_user: ssm-user
+    ansible_aws_ssm_region: eu-west-1
+    ansible_aws_ssm_bucket_name: neon-dev-storage-eu-west-1
+    console_region_id: aws-eu-west-1
+
+  children:
+    pageservers:
+      hosts:
+        pageserver-0.eu-west-1.aws.neon.build:
+          ansible_host: i-01d496c5041c7f34c
+
+    safekeepers:
+      hosts:
+        safekeeper-0.eu-west-1.aws.neon.build:
+          ansible_host: i-05226ef85722831bf
+        safekeeper-1.eu-west-1.aws.neon.build:
+          ansible_host: i-06969ee1bf2958bfc
+        safekeeper-2.eu-west-1.aws.neon.build:
+          ansible_host: i-087892e9625984a0b
--- a/.github/ansible/staging.hosts.yaml
+++ b/.github/ansible/staging.hosts.yaml
@@ -3,7 +3,7 @@ storage:
    bucket_name: zenith-staging-storage-us-east-1
    bucket_region: us-east-1
    console_mgmt_base_url: http://console-staging.local
-    etcd_endpoints: zenith-us-stage-etcd.local:2379
+    etcd_endpoints: etcd-0.us-east-2.aws.neon.build:2379
    pageserver_config_stub:
      pg_distrib_dir: /usr/local
      remote_storage:
--- a/.github/ansible/staging.us-east-2.hosts.yaml
+++ b/.github/ansible/staging.us-east-2.hosts.yaml
@@ -22,6 +22,8 @@ storage:
      hosts:
        pageserver-0.us-east-2.aws.neon.build:
          ansible_host: i-0c3e70929edb5d691
+        pageserver-1.us-east-2.aws.neon.build:
+          ansible_host: i-0565a8b4008aa3f40

    safekeepers:
      hosts:
--- a/.github/helm-values/dev-eu-west-1-zeta.neon-proxy-scram.yaml
+++ b/.github/helm-values/dev-eu-west-1-zeta.neon-proxy-scram.yaml
@@ -0,0 +1,31 @@
+# Helm chart values for neon-proxy-scram.
+# This is a YAML-formatted file.
+
+image:
+  repository: neondatabase/neon
+
+settings:
+  authBackend: "console"
+  authEndpoint: "http://console-staging.local/management/api/v2"
+  domain: "*.eu-west-1.aws.neon.build"
+
+# -- Additional labels for neon-proxy pods
+podLabels:
+  zenith_service: proxy-scram
+  zenith_env: dev
+  zenith_region: eu-west-1
+  zenith_region_slug: eu-west-1
+
+exposedService:
+  annotations:
+    service.beta.kubernetes.io/aws-load-balancer-type: external
+    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
+    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
+    external-dns.alpha.kubernetes.io/hostname: eu-west-1.aws.neon.build
+
+#metrics:
+#  enabled: true
+#  serviceMonitor:
+#    enabled: true
+#    selector:
+#      release: kube-prometheus-stack
--- a/.github/workflows/benchmarking.yml
+++ b/.github/workflows/benchmarking.yml
@@ -144,7 +144,9 @@ jobs:
        # neon-captest-new: Run pgbench in a freshly created project
        # neon-captest-reuse: Same, but reusing existing project
        # neon-captest-prefetch: Same, with prefetching enabled (new project)
-        platform: [ neon-captest-new, neon-captest-reuse, neon-captest-prefetch ]
+        # rds-aurora: Aurora Postgres Serverless v2 with autoscaling from 0.5 to 2 ACUs
+        # rds-postgres: RDS Postgres db.m5.large instance (2 vCPU, 8 GiB) with gp3 EBS storage
+        platform: [ neon-captest-new, neon-captest-reuse, neon-captest-prefetch, rds-postgres ]
        db_size: [ 10gb ]
        include:
          - platform: neon-captest-new
@@ -164,7 +166,7 @@ jobs:
      SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref == 'refs/heads/main' ) }}
      PLATFORM: ${{ matrix.platform }}

-    runs-on: dev
+    runs-on: [ self-hosted, dev, x64 ]
    container:
      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rustlegacy:pinned
      options: --init
@@ -207,8 +209,11 @@ jobs:
          rds-aurora)
            CONNSTR=${{ secrets.BENCHMARK_RDS_CONNSTR }}
            ;;
+          rds-postgres)
+            CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_CONNSTR }}
+            ;;
          *)
-            echo 2>&1 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'neon-captest-new', 'neon-captest-prefetch' or 'rds-aurora'"
+            echo 2>&1 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'neon-captest-new', 'neon-captest-prefetch', 'rds-aurora', or 'rds-postgres'"
            exit 1
            ;;
        esac
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -18,8 +18,8 @@ env:

 jobs:
  tag:
-    runs-on: dev
-    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:latest
+    runs-on: [ self-hosted, dev, x64 ]
+    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:pinned
    outputs:
      build-tag: ${{steps.build-tag.outputs.tag}}

@@ -46,7 +46,7 @@ jobs:
        id: build-tag

  build-neon:
-    runs-on: dev
+    runs-on: [ self-hosted, dev, x64 ]
    container:
      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
      options: --init
@@ -236,7 +236,7 @@ jobs:
        uses: ./.github/actions/save-coverage-data

  regress-tests:
-    runs-on: dev
+    runs-on: [ self-hosted, dev, x64 ]
    container:
      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
      options: --init
@@ -269,7 +269,7 @@ jobs:
        uses: ./.github/actions/save-coverage-data

  benchmarks:
-    runs-on: dev
+    runs-on: [ self-hosted, dev, x64 ]
    container:
      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
      options: --init
@@ -300,7 +300,7 @@ jobs:
      # while coverage is currently collected for the debug ones

  merge-allure-report:
-    runs-on: dev
+    runs-on: [ self-hosted, dev, x64 ]
    container:
      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
      options: --init
@@ -338,7 +338,7 @@ jobs:
          DATABASE_URL="$TEST_RESULT_CONNSTR" poetry run python3 scripts/ingest_regress_test_result.py --revision ${SHA} --reference ${GITHUB_REF} --build-type ${BUILD_TYPE} --ingest suites.json

  coverage-report:
-    runs-on: dev
+    runs-on: [ self-hosted, dev, x64 ]
    container:
      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
      options: --init
@@ -415,7 +415,7 @@ jobs:
        shell: bash -euxo pipefail {0}

  trigger-e2e-tests:
-    runs-on: dev
+    runs-on: [ self-hosted, dev, x64 ]
    container:
      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:pinned
      options: --init
@@ -460,7 +460,7 @@ jobs:
            }"

  neon-image:
-    runs-on: dev
+    runs-on: [ self-hosted, dev, x64 ]
    needs: [ tag ]
    container: gcr.io/kaniko-project/executor:v1.9.0-debug

@@ -478,7 +478,7 @@ jobs:
        run: /kaniko/executor --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --build-arg GIT_VERSION=${{ github.sha }} --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}}

  compute-tools-image:
-    runs-on: dev
+    runs-on: [ self-hosted, dev, x64 ]
    needs: [ tag ]
    container: gcr.io/kaniko-project/executor:v1.9.0-debug

@@ -493,7 +493,7 @@ jobs:
        run: /kaniko/executor --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --build-arg GIT_VERSION=${{ github.sha }} --dockerfile Dockerfile.compute-tools --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}}

  compute-node-image-v14:
-    runs-on: dev
+    runs-on: [ self-hosted, dev, x64 ]
    container: gcr.io/kaniko-project/executor:v1.9.0-debug
    needs: [ tag ]
    steps:
@@ -510,7 +510,7 @@ jobs:
        run: /kaniko/executor --skip-unused-stages  --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache  --context . --build-arg GIT_VERSION=${{ github.sha }} --dockerfile Dockerfile.compute-node-v14 --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:${{needs.tag.outputs.build-tag}}

  compute-node-image-v15:
-    runs-on: dev
+    runs-on: [ self-hosted, dev, x64 ]
    container: gcr.io/kaniko-project/executor:v1.9.0-debug
    needs: [ tag ]
    steps:
@@ -528,7 +528,7 @@ jobs:

  test-images:
    needs: [ tag, neon-image, compute-node-image-v14, compute-node-image-v15, compute-tools-image ]
-    runs-on: dev
+    runs-on: [ self-hosted, dev, x64 ]

    steps:
      - name: Checkout
@@ -570,7 +570,7 @@ jobs:
          docker compose -f ./docker-compose/docker-compose.yml down

  promote-images:
-    runs-on: dev
+    runs-on: [ self-hosted, dev, x64 ]
    needs: [ tag, test-images ]
    if: github.event_name != 'workflow_dispatch'
    container: amazon/aws-cli
@@ -586,7 +586,7 @@ jobs:
          aws ecr put-image --repository-name ${{ matrix.name }} --image-tag latest --image-manifest "$MANIFEST"

  push-docker-hub:
-    runs-on: dev
+    runs-on: [ self-hosted, dev, x64 ]
    needs: [ promote-images, tag ]
    container: golang:1.19-bullseye

@@ -736,7 +736,7 @@ jobs:
          rm -f neon_install.tar.gz .neon_current_version

  deploy-new:
-    runs-on: dev
+    runs-on: [ self-hosted, dev, x64 ]
    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
    # We need both storage **and** compute images for deploy, because control plane picks the compute version based on the storage version.
    # If it notices a fresh storage it may bump the compute version. And if compute image failed to build it may break things badly
@@ -761,7 +761,6 @@ jobs:
        run: |
          export DOCKER_TAG=${{needs.tag.outputs.build-tag}}
          cd "$(pwd)/.github/ansible"
-
          if [[ "$GITHUB_REF_NAME" == "main" ]]; then
            ./get_binaries.sh
          elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
@@ -770,6 +769,38 @@ jobs:
            echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
            exit 1
          fi
+          ansible-galaxy collection install sivel.toiletwater
+          ansible-playbook deploy.yaml -i staging.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{secrets.NEON_STAGING_API_KEY}}
+          rm -f neon_install.tar.gz .neon_current_version
+
+  deploy-pr-test-new:
+    runs-on: [ self-hosted, dev, x64 ]
+    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
+    # We need both storage **and** compute images for deploy, because control plane picks the compute version based on the storage version.
+    # If it notices a fresh storage it may bump the compute version. And if compute image failed to build it may break things badly
+    needs: [ push-docker-hub, tag, regress-tests ]
+    if: |
+      contains(github.event.pull_request.labels.*.name, 'deploy-test-storage') && 
+      github.event_name != 'workflow_dispatch'
+    defaults:
+      run:
+        shell: bash
+    strategy:
+      matrix:
+        target_region: [ eu-west-1 ]
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: true
+          fetch-depth: 0
+
+      - name: Redeploy
+        run: |
+          export DOCKER_TAG=${{needs.tag.outputs.build-tag}}
+          cd "$(pwd)/.github/ansible"
+
+          ./get_binaries.sh

          ansible-galaxy collection install sivel.toiletwater
          ansible-playbook deploy.yaml -i staging.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{secrets.NEON_STAGING_API_KEY}}
@@ -780,7 +811,7 @@ jobs:
    container: 093970136003.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
    # We need both storage **and** compute images for deploy, because control plane picks the compute version based on the storage version.
    # If it notices a fresh storage it may bump the compute version. And if compute image failed to build it may break things badly
-    needs: [ push-docker-hub, calculate-deploy-targets, tag, regress-tests ]
+    needs: [ push-docker-hub, tag, regress-tests ]
    if: |
      (github.ref_name == 'release') &&
      github.event_name != 'workflow_dispatch'
@@ -816,7 +847,7 @@ jobs:
          rm -f neon_install.tar.gz .neon_current_version

  deploy-proxy:
-    runs-on: dev
+    runs-on: [ self-hosted, dev, x64 ]
    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:latest
    # Compute image isn't strictly required for proxy deploy, but let's still wait for it to run all deploy jobs consistently.
    needs: [ push-docker-hub, calculate-deploy-targets, tag, regress-tests ]
@@ -858,10 +889,10 @@ jobs:
          helm upgrade ${{ matrix.proxy_job }}-scram neondatabase/neon-proxy --namespace neon-proxy --install -f .github/helm-values/${{ matrix.proxy_config }}-scram.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s

  deploy-proxy-new:
-    runs-on: dev
+    runs-on: [ self-hosted, dev, x64 ]
    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
    # Compute image isn't strictly required for proxy deploy, but let's still wait for it to run all deploy jobs consistently.
-    needs: [ push-docker-hub, calculate-deploy-targets, tag, regress-tests ]
+    needs: [ push-docker-hub, tag, regress-tests ]
    if: |
      (github.ref_name == 'main') &&
      github.event_name != 'workflow_dispatch'
@@ -873,6 +904,8 @@ jobs:
        include:
          - target_region:  us-east-2
            target_cluster: dev-us-east-2-beta
+          - target_region:  eu-west-1
+            target_cluster: dev-eu-west-1-zeta
    steps:
      - name: Checkout
        uses: actions/checkout@v3
@@ -894,7 +927,7 @@ jobs:
    runs-on: prod
    container: 093970136003.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
    # Compute image isn't strictly required for proxy deploy, but let's still wait for it to run all deploy jobs consistently.
-    needs: [ push-docker-hub, calculate-deploy-targets, tag, regress-tests ]
+    needs: [ push-docker-hub, tag, regress-tests ]
    if: |
      (github.ref_name == 'release') &&
      github.event_name != 'workflow_dispatch'
@@ -928,7 +961,7 @@ jobs:
          helm upgrade neon-proxy-scram neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s

  promote-compatibility-data:
-    runs-on: dev
+    runs-on: [ self-hosted, dev, x64 ]
    container:
      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
      options: --init
--- a/.github/workflows/codestyle.yml
+++ b/.github/workflows/codestyle.yml
@@ -115,7 +115,7 @@ jobs:
        run: cargo build --locked --all --all-targets

  check-rust-dependencies:
-    runs-on: dev
+    runs-on: [ self-hosted, dev, x64 ]
    container:
      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
      options: --init
--- a/1
+++ b/1
@@ -8,3 +8,4 @@
 /pgxn/ @neondatabase/compute
 /proxy/ @neondatabase/control-plane 
 /safekeeper/ @neondatabase/safekeepers
+/vendor/ @neondatabase/compute
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2255,6 +2255,14 @@ version = "2.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e"

+[[package]]
+name = "persistent_range_query"
+version = "0.1.0"
+dependencies = [
+ "rand",
+ "workspace_hack",
+]
+
 [[package]]
 name = "petgraph"
 version = "0.6.2"
--- a/28
+++ b/28
@@ -20,18 +20,18 @@ else
 	$(error Bad build type '$(BUILD_TYPE)', see Makefile for options)
 endif

-# Seccomp BPF is only available for Linux
 UNAME_S := $(shell uname -s)
 ifeq ($(UNAME_S),Linux)
+	# Seccomp BPF is only available for Linux
 	PG_CONFIGURE_OPTS += --with-libseccomp
-endif
-
-# macOS with brew-installed openssl requires explicit paths
-# It can be configured with OPENSSL_PREFIX variable
-UNAME_S := $(shell uname -s)
-ifeq ($(UNAME_S),Darwin)
-    OPENSSL_PREFIX ?= $(shell brew --prefix openssl@3)
-    PG_CONFIGURE_OPTS += --with-includes=$(OPENSSL_PREFIX)/include --with-libraries=$(OPENSSL_PREFIX)/lib
+else ifeq ($(UNAME_S),Darwin)
+	# macOS with brew-installed openssl requires explicit paths
+	# It can be configured with OPENSSL_PREFIX variable
+	OPENSSL_PREFIX ?= $(shell brew --prefix openssl@3)
+	PG_CONFIGURE_OPTS += --with-includes=$(OPENSSL_PREFIX)/include --with-libraries=$(OPENSSL_PREFIX)/lib
+	# macOS already has bison and flex in the system, but they are old and result in postgres-v14 target failure
+	# brew formulae are keg-only and not symlinked into HOMEBREW_PREFIX, force their usage
+	EXTRA_PATH_OVERRIDES += $(shell brew --prefix bison)/bin/:$(shell brew --prefix flex)/bin/:
 endif

 # Use -C option so that when PostgreSQL "make install" installs the
@@ -73,7 +73,8 @@ $(POSTGRES_INSTALL_DIR)/build/v14/config.status:
 	+@echo "Configuring Postgres v14 build"
 	mkdir -p $(POSTGRES_INSTALL_DIR)/build/v14
 	(cd $(POSTGRES_INSTALL_DIR)/build/v14 && \
-	$(ROOT_PROJECT_DIR)/vendor/postgres-v14/configure CFLAGS='$(PG_CFLAGS)' \
+	env PATH="$(EXTRA_PATH_OVERRIDES):$$PATH" $(ROOT_PROJECT_DIR)/vendor/postgres-v14/configure \
+		CFLAGS='$(PG_CFLAGS)' \
 		$(PG_CONFIGURE_OPTS) \
 		--prefix=$(abspath $(POSTGRES_INSTALL_DIR))/v14 > configure.log)

@@ -81,7 +82,8 @@ $(POSTGRES_INSTALL_DIR)/build/v15/config.status:
 	+@echo "Configuring Postgres v15 build"
 	mkdir -p $(POSTGRES_INSTALL_DIR)/build/v15
 	(cd $(POSTGRES_INSTALL_DIR)/build/v15 && \
-	$(ROOT_PROJECT_DIR)/vendor/postgres-v15/configure CFLAGS='$(PG_CFLAGS)' \
+	env PATH="$(EXTRA_PATH_OVERRIDES):$$PATH" $(ROOT_PROJECT_DIR)/vendor/postgres-v15/configure \
+		CFLAGS='$(PG_CFLAGS)' \
 		$(PG_CONFIGURE_OPTS) \
 		--prefix=$(abspath $(POSTGRES_INSTALL_DIR))/v15 > configure.log)

@@ -111,6 +113,8 @@ postgres-v14: postgres-v14-configure \
 	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14 MAKELEVEL=0 install
 	+@echo "Compiling libpq v14"
 	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14/src/interfaces/libpq install
+	+@echo "Compiling pg_prewarm v14"
+	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14/contrib/pg_prewarm install
 	+@echo "Compiling pg_buffercache v14"
 	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14/contrib/pg_buffercache install
 	+@echo "Compiling pageinspect v14"
@@ -123,6 +127,8 @@ postgres-v15: postgres-v15-configure \
 	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15 MAKELEVEL=0 install
 	+@echo "Compiling libpq v15"
 	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15/src/interfaces/libpq install
+	+@echo "Compiling pg_prewarm v15"
+	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15/contrib/pg_prewarm install
 	+@echo "Compiling pg_buffercache v15"
 	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15/contrib/pg_buffercache install
 	+@echo "Compiling pageinspect v15"
--- a/README.md
+++ b/README.md
@@ -53,7 +53,7 @@ curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
 1. Install XCode and dependencies
 ```
 xcode-select --install
-brew install protobuf etcd openssl
+brew install protobuf etcd openssl flex bison
 ```

 2. [Install Rust](https://www.rust-lang.org/tools/install)
@@ -125,24 +125,23 @@ Python (3.9 or higher), and install python3 packages using `./scripts/pysync` (r
 # Create repository in .neon with proper paths to binaries and data
 # Later that would be responsibility of a package install script
 > ./target/debug/neon_local init
-Starting pageserver at '127.0.0.1:64000' in '.neon'
-
-Pageserver started
-Successfully initialized timeline 7dd0907914ac399ff3be45fb252bfdb7
-Stopping pageserver gracefully...done!
+Starting pageserver at '127.0.0.1:64000' in '.neon'.
+pageserver started, pid: 2545906
+Successfully initialized timeline de200bd42b49cc1814412c7e592dd6e9
+Stopped pageserver 1 process with pid 2545906

 # start pageserver and safekeeper
 > ./target/debug/neon_local start
-Starting etcd broker using /usr/bin/etcd
-Starting pageserver at '127.0.0.1:64000' in '.neon'
-
-Pageserver started
-Starting safekeeper at '127.0.0.1:5454' in '.neon/safekeepers/sk1'
-Safekeeper started
+Starting etcd broker using "/usr/bin/etcd"
+etcd started, pid: 2545996
+Starting pageserver at '127.0.0.1:64000' in '.neon'.
+pageserver started, pid: 2546005
+Starting safekeeper at '127.0.0.1:5454' in '.neon/safekeepers/sk1'.
+safekeeper 1 started, pid: 2546041

 # start postgres compute node
 > ./target/debug/neon_local pg start main
-Starting new postgres main on timeline de200bd42b49cc1814412c7e592dd6e9 ...
+Starting new postgres (v14) main on timeline de200bd42b49cc1814412c7e592dd6e9 ...
 Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/main port=55432
 Starting postgres node at 'host=127.0.0.1 port=55432 user=cloud_admin dbname=postgres'

--- a/cli-v2-story.md
+++ b/cli-v2-story.md
@@ -1,188 +0,0 @@
-Create a new Zenith repository in the current directory:
-
-    ~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli init
-    The files belonging to this database system will be owned by user "heikki".
-    This user must also own the server process.
-    
-    The database cluster will be initialized with locale "en_GB.UTF-8".
-    The default database encoding has accordingly been set to "UTF8".
-    The default text search configuration will be set to "english".
-    
-    Data page checksums are disabled.
-    
-    creating directory tmp ... ok
-    creating subdirectories ... ok
-    selecting dynamic shared memory implementation ... posix
-    selecting default max_connections ... 100
-    selecting default shared_buffers ... 128MB
-    selecting default time zone ... Europe/Helsinki
-    creating configuration files ... ok
-    running bootstrap script ... ok
-    performing post-bootstrap initialization ... ok
-    syncing data to disk ... ok
-    
-    initdb: warning: enabling "trust" authentication for local connections
-    You can change this by editing pg_hba.conf or using the option -A, or
-    --auth-local and --auth-host, the next time you run initdb.
-    new zenith repository was created in .zenith
-
-Initially, there is only one branch:
-
-    ~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli branch
-      main
-
-Start a local Postgres instance on the branch:
-
-    ~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli start main
-    Creating data directory from snapshot at 0/15FFB08...
-    waiting for server to start....2021-04-13 09:27:43.919 EEST [984664] LOG:  starting PostgreSQL 14devel on x86_64-pc-linux-gnu, compiled by gcc (Debian 10.2.1-6) 10.2.1 20210110, 64-bit
-    2021-04-13 09:27:43.920 EEST [984664] LOG:  listening on IPv6 address "::1", port 5432
-    2021-04-13 09:27:43.920 EEST [984664] LOG:  listening on IPv4 address "127.0.0.1", port 5432
-    2021-04-13 09:27:43.927 EEST [984664] LOG:  listening on Unix socket "/tmp/.s.PGSQL.5432"
-    2021-04-13 09:27:43.939 EEST [984665] LOG:  database system was interrupted; last known up at 2021-04-13 09:27:33 EEST
-    2021-04-13 09:27:43.939 EEST [984665] LOG:  creating missing WAL directory "pg_wal/archive_status"
-    2021-04-13 09:27:44.189 EEST [984665] LOG:  database system was not properly shut down; automatic recovery in progress
-    2021-04-13 09:27:44.195 EEST [984665] LOG:  invalid record length at 0/15FFB80: wanted 24, got 0
-    2021-04-13 09:27:44.195 EEST [984665] LOG:  redo is not required
-    2021-04-13 09:27:44.225 EEST [984664] LOG:  database system is ready to accept connections
-     done
-    server started
-
-Run some commands against it:
-
-    ~/git-sandbox/zenith (cli-v2)$ psql postgres -c "create table foo (t text);" 
-    CREATE TABLE
-    ~/git-sandbox/zenith (cli-v2)$ psql postgres -c "insert into foo values ('inserted on the main branch');" 
-    INSERT 0 1
-    ~/git-sandbox/zenith (cli-v2)$ psql postgres -c "select * from foo" 
-                  t              
-    -----------------------------
-     inserted on the main branch
-    (1 row)
-
-Create a new branch called 'experimental'. We create it from the
-current end of the 'main' branch, but you could specify a different
-LSN as the start point instead.
-
-    ~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli branch experimental main
-    branching at end of WAL: 0/161F478
-    
-    ~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli branch 
-      experimental
-      main
-
-Start another Postgres instance off the 'experimental' branch:
-
-    ~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli start experimental -- -o -p5433
-    Creating data directory from snapshot at 0/15FFB08...
-    waiting for server to start....2021-04-13 09:28:41.874 EEST [984766] LOG:  starting PostgreSQL 14devel on x86_64-pc-linux-gnu, compiled by gcc (Debian 10.2.1-6) 10.2.1 20210110, 64-bit
-    2021-04-13 09:28:41.875 EEST [984766] LOG:  listening on IPv6 address "::1", port 5433
-    2021-04-13 09:28:41.875 EEST [984766] LOG:  listening on IPv4 address "127.0.0.1", port 5433
-    2021-04-13 09:28:41.883 EEST [984766] LOG:  listening on Unix socket "/tmp/.s.PGSQL.5433"
-    2021-04-13 09:28:41.896 EEST [984767] LOG:  database system was interrupted; last known up at 2021-04-13 09:27:33 EEST
-    2021-04-13 09:28:42.265 EEST [984767] LOG:  database system was not properly shut down; automatic recovery in progress
-    2021-04-13 09:28:42.269 EEST [984767] LOG:  redo starts at 0/15FFB80
-    2021-04-13 09:28:42.272 EEST [984767] LOG:  invalid record length at 0/161F4B0: wanted 24, got 0
-    2021-04-13 09:28:42.272 EEST [984767] LOG:  redo done at 0/161F478 system usage: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s
-    2021-04-13 09:28:42.321 EEST [984766] LOG:  database system is ready to accept connections
-     done
-    server started
-
-Insert some a row on the 'experimental' branch:
-
-    ~/git-sandbox/zenith (cli-v2)$ psql postgres -p5433 -c "select * from foo" 
-                  t              
-    -----------------------------
-     inserted on the main branch
-    (1 row)
-    
-    ~/git-sandbox/zenith (cli-v2)$ psql postgres -p5433 -c "insert into foo values ('inserted on experimental')" 
-    INSERT 0 1
-    ~/git-sandbox/zenith (cli-v2)$ psql postgres -p5433 -c "select * from foo" 
-                  t              
-    -----------------------------
-     inserted on the main branch
-     inserted on experimental
-    (2 rows)
-    
-See that the other Postgres instance is still running on 'main' branch on port 5432:
-
-
-    ~/git-sandbox/zenith (cli-v2)$ psql postgres -p5432 -c "select * from foo" 
-                  t              
-    -----------------------------
-     inserted on the main branch
-    (1 row)
-
-
-
-
-Everything is stored in the .zenith directory:
-
-    ~/git-sandbox/zenith (cli-v2)$ ls -l .zenith/
-    total 12
-    drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:28 datadirs
-    drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:27 refs
-    drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:28 timelines
-
-The 'datadirs' directory contains the datadirs of the running instances:
-
-    ~/git-sandbox/zenith (cli-v2)$ ls -l .zenith/datadirs/
-    total 8
-    drwx------ 18 heikki heikki 4096 Apr 13 09:27 3c0c634c1674079b2c6d4edf7c91523e
-    drwx------ 18 heikki heikki 4096 Apr 13 09:28 697e3c103d4b1763cd6e82e4ff361d76
-    ~/git-sandbox/zenith (cli-v2)$ ls -l .zenith/datadirs/3c0c634c1674079b2c6d4edf7c91523e/
-    total 124
-    drwxr-xr-x 5 heikki heikki  4096 Apr 13 09:27 base
-    drwxr-xr-x 2 heikki heikki  4096 Apr 13 09:27 global
-    drwxr-xr-x 2 heikki heikki  4096 Apr 13 09:27 pg_commit_ts
-    drwxr-xr-x 2 heikki heikki  4096 Apr 13 09:27 pg_dynshmem
-    -rw------- 1 heikki heikki  4760 Apr 13 09:27 pg_hba.conf
-    -rw------- 1 heikki heikki  1636 Apr 13 09:27 pg_ident.conf
-    drwxr-xr-x 4 heikki heikki  4096 Apr 13 09:32 pg_logical
-    drwxr-xr-x 4 heikki heikki  4096 Apr 13 09:27 pg_multixact
-    drwxr-xr-x 2 heikki heikki  4096 Apr 13 09:27 pg_notify
-    drwxr-xr-x 2 heikki heikki  4096 Apr 13 09:27 pg_replslot
-    drwxr-xr-x 2 heikki heikki  4096 Apr 13 09:27 pg_serial
-    drwxr-xr-x 2 heikki heikki  4096 Apr 13 09:27 pg_snapshots
-    drwxr-xr-x 2 heikki heikki  4096 Apr 13 09:27 pg_stat
-    drwxr-xr-x 2 heikki heikki  4096 Apr 13 09:34 pg_stat_tmp
-    drwxr-xr-x 2 heikki heikki  4096 Apr 13 09:27 pg_subtrans
-    drwxr-xr-x 2 heikki heikki  4096 Apr 13 09:27 pg_tblspc
-    drwxr-xr-x 2 heikki heikki  4096 Apr 13 09:27 pg_twophase
-    -rw------- 1 heikki heikki     3 Apr 13 09:27 PG_VERSION
-    lrwxrwxrwx 1 heikki heikki    52 Apr 13 09:27 pg_wal -> ../../timelines/3c0c634c1674079b2c6d4edf7c91523e/wal
-    drwxr-xr-x 2 heikki heikki  4096 Apr 13 09:27 pg_xact
-    -rw------- 1 heikki heikki    88 Apr 13 09:27 postgresql.auto.conf
-    -rw------- 1 heikki heikki 28688 Apr 13 09:27 postgresql.conf
-    -rw------- 1 heikki heikki    96 Apr 13 09:27 postmaster.opts
-    -rw------- 1 heikki heikki   149 Apr 13 09:27 postmaster.pid
-
-Note how 'pg_wal' is just a symlink to the 'timelines' directory. The
-datadir is ephemeral, you can delete it at any time, and it can be reconstructed
-from the snapshots and WAL stored in the 'timelines' directory. So if you push/pull
-the repository, the 'datadirs' are not included. (They are like git working trees)
-
-    ~/git-sandbox/zenith (cli-v2)$ killall -9 postgres
-    ~/git-sandbox/zenith (cli-v2)$ rm -rf .zenith/datadirs/*
-    ~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli start experimental -- -o -p5433
-    Creating data directory from snapshot at 0/15FFB08...
-    waiting for server to start....2021-04-13 09:37:05.476 EEST [985340] LOG:  starting PostgreSQL 14devel on x86_64-pc-linux-gnu, compiled by gcc (Debian 10.2.1-6) 10.2.1 20210110, 64-bit
-    2021-04-13 09:37:05.477 EEST [985340] LOG:  listening on IPv6 address "::1", port 5433
-    2021-04-13 09:37:05.477 EEST [985340] LOG:  listening on IPv4 address "127.0.0.1", port 5433
-    2021-04-13 09:37:05.487 EEST [985340] LOG:  listening on Unix socket "/tmp/.s.PGSQL.5433"
-    2021-04-13 09:37:05.498 EEST [985341] LOG:  database system was interrupted; last known up at 2021-04-13 09:27:33 EEST
-    2021-04-13 09:37:05.808 EEST [985341] LOG:  database system was not properly shut down; automatic recovery in progress
-    2021-04-13 09:37:05.813 EEST [985341] LOG:  redo starts at 0/15FFB80
-    2021-04-13 09:37:05.815 EEST [985341] LOG:  invalid record length at 0/161F770: wanted 24, got 0
-    2021-04-13 09:37:05.815 EEST [985341] LOG:  redo done at 0/161F738 system usage: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s
-    2021-04-13 09:37:05.866 EEST [985340] LOG:  database system is ready to accept connections
-     done
-    server started
-    ~/git-sandbox/zenith (cli-v2)$ psql postgres -p5433 -c "select * from foo" 
-                  t              
-    -----------------------------
-     inserted on the main branch
-     inserted on experimental
-    (2 rows)
-
--- a/control_plane/src/background_process.rs
+++ b/control_plane/src/background_process.rs
@@ -26,8 +26,18 @@ use nix::unistd::Pid;

 use utils::lock_file;

-const RETRIES: u32 = 15;
-const RETRY_TIMEOUT_MILLIS: u64 = 500;
+// These constants control the loop used to poll for process start / stop.
+//
+// The loop waits for at most 10 seconds, polling every 100 ms.
+// Once a second, it prints a dot ("."), to give the user an indication that
+// it's waiting. If the process hasn't started/stopped after 5 seconds,
+// it prints a notice that it's taking long, but keeps waiting.
+//
+const RETRY_UNTIL_SECS: u64 = 10;
+const RETRIES: u64 = (RETRY_UNTIL_SECS * 1000) / RETRY_INTERVAL_MILLIS;
+const RETRY_INTERVAL_MILLIS: u64 = 100;
+const DOT_EVERY_RETRIES: u64 = 10;
+const NOTICE_AFTER_RETRIES: u64 = 50;

 /// Argument to `start_process`, to indicate whether it should create pidfile or if the process creates
 /// it itself.
@@ -107,16 +117,16 @@ where
                return Ok(spawned_process);
            }
            Ok(false) => {
-                if retries < 5 {
+                if retries == NOTICE_AFTER_RETRIES {
+                    // The process is taking a long time to start up. Keep waiting, but
+                    // print a message
+                    print!("\n{process_name} has not started yet, continuing to wait");
+                }
+                if retries % DOT_EVERY_RETRIES == 0 {
                    print!(".");
                    io::stdout().flush().unwrap();
-                } else {
-                    if retries == 5 {
-                        println!() // put a line break after dots for second message
-                    }
-                    println!("{process_name} has not started yet, retrying ({retries})...");
                }
-                thread::sleep(Duration::from_millis(RETRY_TIMEOUT_MILLIS));
+                thread::sleep(Duration::from_millis(RETRY_INTERVAL_MILLIS));
            }
            Err(e) => {
                println!("{process_name} failed to start: {e:#}");
@@ -127,7 +137,8 @@ where
            }
        }
    }
-    anyhow::bail!("{process_name} could not start in {RETRIES} attempts");
+    println!();
+    anyhow::bail!("{process_name} did not start in {RETRY_UNTIL_SECS} seconds");
 }

 /// Stops the process, using the pid file given. Returns Ok also if the process is already not running.
@@ -158,7 +169,7 @@ pub fn stop_process(immediate: bool, process_name: &str, pid_file: &Path) -> any
    }

    // Wait until process is gone
-    for _ in 0..RETRIES {
+    for retries in 0..RETRIES {
        match process_has_stopped(pid) {
            Ok(true) => {
                println!("\n{process_name} stopped");
@@ -170,9 +181,16 @@ pub fn stop_process(immediate: bool, process_name: &str, pid_file: &Path) -> any
                return Ok(());
            }
            Ok(false) => {
-                print!(".");
-                io::stdout().flush().unwrap();
-                thread::sleep(Duration::from_secs(1))
+                if retries == NOTICE_AFTER_RETRIES {
+                    // The process is taking a long time to start up. Keep waiting, but
+                    // print a message
+                    print!("\n{process_name} has not stopped yet, continuing to wait");
+                }
+                if retries % DOT_EVERY_RETRIES == 0 {
+                    print!(".");
+                    io::stdout().flush().unwrap();
+                }
+                thread::sleep(Duration::from_millis(RETRY_INTERVAL_MILLIS));
            }
            Err(e) => {
                println!("{process_name} with pid {pid} failed to stop: {e:#}");
@@ -180,24 +198,21 @@ pub fn stop_process(immediate: bool, process_name: &str, pid_file: &Path) -> any
            }
        }
    }
-
-    anyhow::bail!("{process_name} with pid {pid} failed to stop in {RETRIES} attempts");
+    println!();
+    anyhow::bail!("{process_name} with pid {pid} did not stop in {RETRY_UNTIL_SECS} seconds");
 }

 fn fill_rust_env_vars(cmd: &mut Command) -> &mut Command {
    let mut filled_cmd = cmd.env_clear().env("RUST_BACKTRACE", "1");

-    let var = "LLVM_PROFILE_FILE";
-    if let Some(val) = std::env::var_os(var) {
-        filled_cmd = filled_cmd.env(var, val);
+    // Pass through these environment variables to the command
+    for var in ["LLVM_PROFILE_FILE", "FAILPOINTS", "RUST_LOG"] {
+        if let Some(val) = std::env::var_os(var) {
+            filled_cmd = filled_cmd.env(var, val);
+        }
    }

-    const RUST_LOG_KEY: &str = "RUST_LOG";
-    if let Ok(rust_log_value) = std::env::var(RUST_LOG_KEY) {
-        filled_cmd.env(RUST_LOG_KEY, rust_log_value)
-    } else {
-        filled_cmd
-    }
+    filled_cmd
 }

 fn fill_aws_secrets_vars(mut cmd: &mut Command) -> &mut Command {
--- a/control_plane/src/compute.rs
+++ b/control_plane/src/compute.rs
@@ -343,7 +343,7 @@ impl PostgresNode {
        //   To be able to restore database in case of pageserver node crash, safekeeper should not
        //   remove WAL beyond this point. Too large lag can cause space exhaustion in safekeepers
        //   (if they are not able to upload WAL to S3).
-        conf.append("max_replication_write_lag", "500MB");
+        conf.append("max_replication_write_lag", "15MB");
        conf.append("max_replication_flush_lag", "10GB");

        if !self.env.safekeepers.is_empty() {
--- a/control_plane/src/etcd.rs
+++ b/control_plane/src/etcd.rs
@@ -6,7 +6,7 @@ use crate::{background_process, local_env};

 pub fn start_etcd_process(env: &local_env::LocalEnv) -> anyhow::Result<()> {
    let etcd_broker = &env.etcd_broker;
-    println!(
+    print!(
        "Starting etcd broker using {:?}",
        etcd_broker.etcd_binary_path
    );
--- a/control_plane/src/pageserver.rs
+++ b/control_plane/src/pageserver.rs
@@ -237,7 +237,7 @@ impl PageServerNode {
        datadir: &Path,
        update_config: bool,
    ) -> anyhow::Result<Child> {
-        println!(
+        print!(
            "Starting pageserver at '{}' in '{}'",
            self.pg_connection_config.raw_address(),
            datadir.display()
--- a/docs/SUMMARY.md
+++ b/docs/SUMMARY.md
@@ -37,7 +37,7 @@

 - [Source view](./sourcetree.md)
  - [docker.md](./docker.md) — Docker images and building pipeline.
-  - [Error handling and logging]()
+  - [Error handling and logging](./error-handling.md)
  - [Testing]()
    - [Unit testing]()
    - [Integration testing]()
--- a/docs/error-handling.md
+++ b/docs/error-handling.md
@@ -0,0 +1,198 @@
+# Error handling and logging
+
+## Logging errors
+
+The principle is that errors are logged when they are handled. If you
+just propagate an error to the caller in a function, you don't need to
+log it; the caller will. But if you consume an error in a function,
+you *must* log it (if it needs to be logged at all).
+
+For example:
+
+```rust
+fn read_motd_file() -> std::io::Result<String> {
+    let mut f = File::open("/etc/motd")?;
+    let mut result = String::new();
+    f.read_to_string(&mut result)?;
+    result
+}
+```
+
+Opening or reading the file could fail, but there is no need to log
+the error here. The function merely propagates the error to the
+caller, and it is up to the caller to log the error or propagate it
+further, if the failure is not expected. But if, for example, it is
+normal that the "/etc/motd" file doesn't exist, the caller can choose
+to silently ignore the error, or log it as an INFO or DEBUG level
+message:
+
+```rust
+fn get_message_of_the_day() -> String {
+    // Get the motd from /etc/motd, or return the default proverb
+    match read_motd_file() {
+        Ok(motd) => motd,
+        Err(err)  => {
+            // It's normal that /etc/motd doesn't exist, but if we fail to
+            // read it for some other reason, that's unexpected. The message
+            // of the day isn't very important though, so we just WARN and
+            // continue with the default in any case.
+            if err.kind() != std::io::ErrorKind::NotFound {
+                 tracing::warn!("could not read \"/etc/motd\": {err:?}");
+            }
+            "An old error is always more popular than a new truth. - German proverb"
+        }
+    }
+}
+```
+
+## Error types
+
+We use the `anyhow` crate widely. It contains many convenient macros
+like `bail!` and `ensure!` to construct and return errors, and to
+propagate many kinds of low-level errors, wrapped in `anyhow::Error`.
+
+A downside of `anyhow::Error` is that the caller cannot distinguish
+between different error cases. Most errors are propagated all the way
+to the mgmt API handler function, or the main loop that handles a
+connection with the compute node, and they are all handled the same
+way: the error is logged and returned to the client as an HTTP or
+libpq error.
+
+But in some cases, we need to distinguish between errors and handle
+them differently. For example, attaching a tenant to the pageserver
+could fail either because the tenant has already been attached, or
+because we could not load its metadata from cloud storage. The first
+case is more or less expected. The console sends the Attach request to
+the pageserver, and the pageserver completes the operation, but the
+network connection might be lost before the console receives the
+response. The console will retry the operation in that case, but the
+tenant has already been attached. It is important that the pagserver
+responds with the HTTP 403 Already Exists error in that case, rather
+than a generic HTTP 500 Internal Server Error.
+
+If you need to distinguish between different kinds of errors, create a
+new `Error` type. The `thiserror` crate is useful for that. But in
+most cases `anyhow::Error` is good enough.
+
+## Panics
+
+Depending on where a panic happens, it can cause the whole pageserver
+or safekeeper to restart, or just a single tenant. In either case,
+that is pretty bad and causes an outage. Avoid panics. Never use
+`unwrap()` or other calls that might panic, to verify inputs from the
+network or from disk.
+
+It is acceptable to use functions that might panic, like `unwrap()`, if
+it is obvious that it cannot panic. For example, if you have just
+checked that a variable is not None, it is OK to call `unwrap()` on it,
+but it is still preferable to use `expect("reason")` instead to explain
+why the function cannot fail.
+
+`assert!` and `panic!` are reserved for checking clear invariants and
+very obvious "can't happen" cases. When in doubt, use anyhow `ensure!`
+or `bail!` instead.
+
+## Error levels
+
+`tracing::Level` doesn't provide very clear guidelines on what the
+different levels mean, or when to use which level. Here is how we use
+them:
+
+### Error
+
+Examples:
+- could not open file "foobar"
+- invalid tenant id
+
+Errors are not expected to happen during normal operation. Incorrect
+inputs from client can cause ERRORs. For example, if a client tries to
+call a mgmt API that doesn't exist, or if a compute node sends passes
+an LSN that has already been garbage collected away.
+
+These should *not* happen during normal operations. "Normal
+operations" is not a very precise concept. But for example, disk
+errors are not expected to happen when the system is working, so those
+count as Errors. However, if a TCP connection to a compute node is
+lost, that is not considered an Error, because it doesn't affect the
+pageserver's or safekeeper's operation in any way, and happens fairly
+frequently when compute nodes are shut down, or are killed abruptly
+because of errors in the compute.
+
+**Errors are monitored, and always need human investigation to determine
+the cause.**
+
+Whether something should be logged at ERROR, WARNING or INFO level can
+depend on the callers and clients. For example, it might be unexpected
+and a sign of a serious issue if the console calls the
+"timeline_detail" mgmt API for a timeline that doesn't exist. ERROR
+would be appropriate in that case. But if the console routinely calls
+the API after deleting a timeline, to check if the deletion has
+completed, then it would be totally normal and an INFO or DEBUG level
+message would be more appropriate. If a message is logged as an ERROR,
+but it in fact happens frequently in production and never requires any
+action, it should probably be demoted to an INFO level message.
+
+### Warn
+
+Examples:
+- could not remove temporary file "foobar.temp"
+- unrecognized file "foobar" in timeline directory
+
+Warnings are similar to Errors, in that they should not happen
+when the system is operating normally. The difference between Error and
+Warning is that an Error means that the operation failed, whereas Warning
+means that something unexpected happened, but the operation continued anyway.
+For example, if deleting a file fails because the file already didn't exist,
+it should be logged as Warning.
+
+> **Note:** The python regression tests, under `test_regress`, check the
+> pageserver log after each test for any ERROR and WARN lines. If there are
+> any ERRORs or WARNs that have not been explicitly listed in the test as
+> allowed, the test is marked a failed. This is to catch unexpected errors
+> e.g. in background operations, that don't cause immediate misbehaviour in
+> the tested functionality.
+
+### Info
+
+Info level is used to log useful information when the system is
+operating normally. Info level is appropriate e.g. for logging state
+changes, background operations, and network connections.
+
+Examples:
+- "system is shutting down"
+- "tenant was created"
+- "retrying S3 upload"
+
+### Debug & Trace
+
+Debug and Trace level messages are not printed to the log in our normal
+production configuration, but could be enabled for a specific server or
+tenant, to aid debugging. (Although we don't actually have that
+capability as of this writing).
+
+## Context
+
+We use logging "spans" to hold context information about the current
+operation. Almost every operation happens on a particular tenant and
+timeline, so we enter a span with the "tenant_id" and "timeline_id"
+very early when processing an incoming API request, for example. All
+background operations should also run in a span containing at least
+those two fields, and any other parameters or information that might
+be useful when debugging an error that might happen when performing
+the operation.
+
+TODO: Spans are not captured in the Error when it is created, but when
+the error is logged. It would be more useful to capture them at Error
+creation. We should consider using `tracing_error::SpanTrace` to do
+that.
+
+## Error message style
+
+PostgreSQL has a style guide for writing error messages:
+
+https://www.postgresql.org/docs/current/error-style-guide.html
+
+Follow that guide when writing error messages in the PostgreSQL
+extension. We don't follow it strictly in the pageserver and
+safekeeper, but the advice in the PostgreSQL style guide is generally
+good, and you can't go wrong by following it.
--- a/docs/sourcetree.md
+++ b/docs/sourcetree.md
@@ -83,6 +83,16 @@ A subject for future modularization.
 `/libs/metrics`:
 Helpers for exposing Prometheus metrics from the server.

+### Adding dependencies
+When you add a Cargo dependency, you should update hakari manifest by running commands below and committing the updated `Cargo.lock` and `workspace_hack/`. There may be no changes, that's fine.
+
+```bash
+cargo hakari generate
+cargo hakari manage-deps
+```
+
+If you don't have hakari installed (`error: no such subcommand: hakari`), install it by running `cargo install cargo-hakari`.
+
 ## Using Python
 Note that Debian/Ubuntu Python packages are stale, as it commonly happens,
 so manual installation of dependencies is not recommended.
--- a/libs/persistent_range_query/Cargo.toml
+++ b/libs/persistent_range_query/Cargo.toml
@@ -0,0 +1,12 @@
+[package]
+name = "persistent_range_query"
+version = "0.1.0"
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+workspace_hack = { version = "0.1", path = "../../workspace_hack" }
+
+[dev-dependencies]
+rand = "0.8.3"
--- a/libs/persistent_range_query/src/lib.rs
+++ b/libs/persistent_range_query/src/lib.rs
@@ -0,0 +1,78 @@
+use std::ops::Range;
+
+pub mod naive;
+pub mod ops;
+pub mod segment_tree;
+
+/// Should be a monoid:
+/// * Identity element: for all a: combine(new_for_empty_range(), a) = combine(a, new_for_empty_range()) = a
+/// * Associativity: for all a, b, c: combine(combine(a, b), c) == combine(a, combine(b, c))
+pub trait RangeQueryResult<Key>: Sized + Clone {
+    // Clone is equivalent to combine with an empty range.
+
+    fn new_for_empty_range() -> Self;
+
+    // Contract: left_range.end == right_range.start
+    // left_range.start == left_range.end == right_range.start == right_range.end is still possible
+    fn combine(
+        left: &Self,
+        left_range: &Range<Key>,
+        right: &Self,
+        right_range: &Range<Key>,
+    ) -> Self;
+
+    fn add(left: &mut Self, left_range: &Range<Key>, right: &Self, right_range: &Range<Key>);
+}
+
+pub trait LazyRangeInitializer<Result: RangeQueryResult<Key>, Key> {
+    fn get(&self, range: &Range<Key>) -> Result;
+}
+
+/// Should be a monoid:
+/// * Identity element: for all op: compose(no_op(), op) == compose(op, no_op()) == op
+/// * Associativity: for all op_1, op_2, op_3: compose(compose(op_1, op_2), op_3) == compose(op_1, compose(op_2, op_3))
+///
+/// Should left act on Result:
+/// * Identity operation: for all r: no_op().apply(r) == r
+/// * Compatibility: for all op_1, op_2, r: op_1.apply(op_2.apply(r)) == compose(op_1, op_2).apply(r)
+pub trait RangeModification<Key> {
+    type Result: RangeQueryResult<Key>;
+
+    fn no_op() -> Self;
+    fn is_no_op(&self) -> bool;
+    fn is_reinitialization(&self) -> bool;
+    fn apply(&self, result: &mut Self::Result, range: &Range<Key>);
+    fn compose(later: &Self, earlier: &mut Self);
+}
+
+pub trait VecReadableVersion<Modification: RangeModification<Key>, Key> {
+    fn get(&self, keys: &Range<Key>) -> Modification::Result;
+}
+
+// TODO: use trait alias when stabilized
+pub trait VecFrozenVersion<Modification: RangeModification<Key>, Key>:
+    Clone + VecReadableVersion<Modification, Key>
+{
+}
+
+impl<
+        T: Clone + VecReadableVersion<Modification, Key>,
+        Modification: RangeModification<Key>,
+        Key,
+    > VecFrozenVersion<Modification, Key> for T
+{
+}
+
+pub trait PersistentVecStorage<
+    Modification: RangeModification<Key>,
+    Initializer: LazyRangeInitializer<Modification::Result, Key>,
+    Key,
+>: VecReadableVersion<Modification, Key>
+{
+    fn new(all_keys: Range<Key>, initializer: Initializer) -> Self;
+
+    type FrozenVersion: VecFrozenVersion<Modification, Key>;
+
+    fn modify(&mut self, keys: &Range<Key>, modification: &Modification);
+    fn freeze(&mut self) -> Self::FrozenVersion;
+}
--- a/libs/persistent_range_query/src/naive.rs
+++ b/libs/persistent_range_query/src/naive.rs
@@ -0,0 +1,115 @@
+use crate::{
+    LazyRangeInitializer, PersistentVecStorage, RangeModification, RangeQueryResult,
+    VecReadableVersion,
+};
+use std::marker::PhantomData;
+use std::ops::Range;
+use std::rc::Rc;
+
+pub struct NaiveFrozenVersion<Modification: RangeModification<Key>, Key> {
+    all_keys: Range<Key>,
+    values: Rc<Box<Vec<Modification::Result>>>,
+}
+
+pub trait IndexableKey: Clone {
+    fn index(all_keys: &Range<Self>, key: &Self) -> usize;
+    fn element_range(all_keys: &Range<Self>, index: usize) -> Range<Self>;
+}
+
+fn get<Modification: RangeModification<Key>, Key: IndexableKey>(
+    all_keys: &Range<Key>,
+    values: &Vec<Modification::Result>,
+    keys: &Range<Key>,
+) -> Modification::Result {
+    let mut result = Modification::Result::new_for_empty_range();
+    let mut result_range = keys.start.clone()..keys.start.clone();
+    for index in
+        IndexableKey::index(&all_keys, &keys.start)..IndexableKey::index(&all_keys, &keys.end)
+    {
+        let element_range = IndexableKey::element_range(&all_keys, index);
+        Modification::Result::add(&mut result, &result_range, &values[index], &element_range);
+        result_range.end = element_range.end;
+    }
+    result
+}
+
+impl<Modification: RangeModification<Key>, Key: IndexableKey> VecReadableVersion<Modification, Key>
+    for NaiveFrozenVersion<Modification, Key>
+{
+    fn get(&self, keys: &Range<Key>) -> Modification::Result {
+        get::<Modification, Key>(&self.all_keys, &self.values, keys)
+    }
+}
+
+// Manual implementation of `Clone` becase `derive` requires `Modification: Clone`
+impl<Modification: RangeModification<Key>, Key: Clone> Clone
+    for NaiveFrozenVersion<Modification, Key>
+{
+    fn clone(&self) -> Self {
+        Self {
+            all_keys: self.all_keys.clone(),
+            values: self.values.clone(),
+        }
+    }
+}
+
+// TODO: is it at all possible to store previous versions in this struct,
+// without any Rc<>?
+pub struct NaiveVecStorage<
+    Modification: RangeModification<Key>,
+    Initializer: LazyRangeInitializer<Modification::Result, Key>,
+    Key: IndexableKey,
+> {
+    all_keys: Range<Key>,
+    last_version: Vec<Modification::Result>,
+    _initializer: PhantomData<Initializer>,
+}
+
+impl<
+        Modification: RangeModification<Key>,
+        Initializer: LazyRangeInitializer<Modification::Result, Key>,
+        Key: IndexableKey,
+    > VecReadableVersion<Modification, Key> for NaiveVecStorage<Modification, Initializer, Key>
+{
+    fn get(&self, keys: &Range<Key>) -> Modification::Result {
+        get::<Modification, Key>(&self.all_keys, &self.last_version, keys)
+    }
+}
+
+impl<
+        Modification: RangeModification<Key>,
+        Initializer: LazyRangeInitializer<Modification::Result, Key>,
+        Key: IndexableKey,
+    > PersistentVecStorage<Modification, Initializer, Key>
+    for NaiveVecStorage<Modification, Initializer, Key>
+{
+    fn new(all_keys: Range<Key>, initializer: Initializer) -> Self {
+        let mut values = Vec::with_capacity(IndexableKey::index(&all_keys, &all_keys.end));
+        for index in 0..values.capacity() {
+            values.push(initializer.get(&IndexableKey::element_range(&all_keys, index)));
+        }
+        NaiveVecStorage {
+            all_keys,
+            last_version: values,
+            _initializer: PhantomData,
+        }
+    }
+
+    type FrozenVersion = NaiveFrozenVersion<Modification, Key>;
+
+    fn modify(&mut self, keys: &Range<Key>, modification: &Modification) {
+        for index in IndexableKey::index(&self.all_keys, &keys.start)
+            ..IndexableKey::index(&self.all_keys, &keys.end)
+        {
+            let element_range = IndexableKey::element_range(&self.all_keys, index);
+            modification.apply(&mut self.last_version[index], &element_range);
+        }
+    }
+
+    fn freeze(&mut self) -> Self::FrozenVersion {
+        NaiveFrozenVersion::<Modification, Key> {
+            all_keys: self.all_keys.clone(),
+            values: Rc::new(Box::new(self.last_version.clone())),
+        }
+    }
+}
--- a/libs/persistent_range_query/src/ops/mod.rs
+++ b/libs/persistent_range_query/src/ops/mod.rs
@@ -0,0 +1,14 @@
+pub mod rsq;
+
+#[derive(Copy, Clone, Debug)]
+pub struct SameElementsInitializer<T> {
+    initial_element_value: T,
+}
+
+impl<T> SameElementsInitializer<T> {
+    pub fn new(initial_element_value: T) -> Self {
+        SameElementsInitializer {
+            initial_element_value,
+        }
+    }
+}
--- a/libs/persistent_range_query/src/ops/rsq.rs
+++ b/libs/persistent_range_query/src/ops/rsq.rs
@@ -0,0 +1,118 @@
+//! # Range Sum Query
+
+use crate::ops::SameElementsInitializer;
+use crate::{LazyRangeInitializer, RangeModification, RangeQueryResult};
+use std::borrow::Borrow;
+use std::ops::{Add, AddAssign, Range};
+
+// TODO: commutative Add
+
+#[derive(Clone, Copy, Debug)]
+pub struct SumResult<T> {
+    sum: T,
+}
+
+impl<T> SumResult<T> {
+    pub fn sum(&self) -> &T {
+        &self.sum
+    }
+}
+
+impl<T: Clone + for<'a> AddAssign<&'a T> + From<u8>, Key> RangeQueryResult<Key> for SumResult<T>
+where
+    for<'a> &'a T: Add<&'a T, Output = T>,
+{
+    fn new_for_empty_range() -> Self {
+        SumResult { sum: 0.into() }
+    }
+
+    fn combine(
+        left: &Self,
+        _left_range: &Range<Key>,
+        right: &Self,
+        _right_range: &Range<Key>,
+    ) -> Self {
+        SumResult {
+            sum: &left.sum + &right.sum,
+        }
+    }
+
+    fn add(left: &mut Self, _left_range: &Range<Key>, right: &Self, _right_range: &Range<Key>) {
+        left.sum += &right.sum
+    }
+}
+
+pub trait SumOfSameElements<Key> {
+    fn sum(initial_element_value: &Self, keys: &Range<Key>) -> Self;
+}
+
+impl<T: SumOfSameElements<Key>, TB: Borrow<T>, Key> LazyRangeInitializer<SumResult<T>, Key>
+    for SameElementsInitializer<TB>
+where
+    SumResult<T>: RangeQueryResult<Key>,
+{
+    fn get(&self, range: &Range<Key>) -> SumResult<T> {
+        SumResult {
+            sum: SumOfSameElements::sum(self.initial_element_value.borrow(), range),
+        }
+    }
+}
+
+#[derive(Copy, Clone, Debug)]
+pub enum AddAssignModification<T> {
+    None,
+    Add(T),
+    Assign(T),
+}
+
+impl<T: Clone + for<'a> AddAssign<&'a T>, Key> RangeModification<Key> for AddAssignModification<T>
+where
+    SumResult<T>: RangeQueryResult<Key>,
+    for<'a> SameElementsInitializer<&'a T>: LazyRangeInitializer<SumResult<T>, Key>,
+{
+    type Result = SumResult<T>;
+
+    fn no_op() -> Self {
+        AddAssignModification::None
+    }
+
+    fn is_no_op(&self) -> bool {
+        match self {
+            AddAssignModification::None => true,
+            _ => false,
+        }
+    }
+
+    fn is_reinitialization(&self) -> bool {
+        match self {
+            AddAssignModification::Assign(_) => true,
+            _ => false,
+        }
+    }
+
+    fn apply(&self, result: &mut SumResult<T>, range: &Range<Key>) {
+        use AddAssignModification::*;
+        match self {
+            None => {}
+            Add(x) | Assign(x) => {
+                let to_add = SameElementsInitializer::new(x).get(range).sum;
+                if let Assign(_) = self {
+                    result.sum = to_add;
+                } else {
+                    result.sum += &to_add;
+                }
+            }
+        }
+    }
+
+    fn compose(later: &Self, earlier: &mut Self) {
+        use AddAssignModification::*;
+        match (later, earlier) {
+            (_, e @ None) => *e = later.clone(),
+            (None, _) => {}
+            (Assign(_), e) => *e = later.clone(),
+            (Add(x), Add(y)) => *y += x,
+            (Add(x), Assign(value)) => *value += x,
+        }
+    }
+}
--- a/libs/persistent_range_query/src/segment_tree.rs
+++ b/libs/persistent_range_query/src/segment_tree.rs
@@ -0,0 +1,255 @@
+//! # Segment Tree
+//! It is a competitive programming folklore data structure. Do not confuse with the interval tree.
+
+use crate::{LazyRangeInitializer, PersistentVecStorage, RangeQueryResult, VecReadableVersion};
+use std::ops::Range;
+use std::rc::Rc;
+
+pub trait MidpointableKey: Clone + Ord + Sized {
+    fn midpoint(range: &Range<Self>) -> Self;
+}
+
+pub trait RangeModification<Key>: Clone + crate::RangeModification<Key> {}
+
+// TODO: use trait alias when stabilized
+impl<T: Clone + crate::RangeModification<Key>, Key> RangeModification<Key> for T {}
+
+#[derive(Debug)]
+struct Node<Modification: RangeModification<Key>, Key> {
+    result: Modification::Result,
+    modify_children: Modification,
+    left: Option<Rc<Self>>,
+    right: Option<Rc<Self>>,
+}
+
+// Manual implementation because we don't need `Key: Clone` for this, unlike with `derive`.
+impl<Modification: RangeModification<Key>, Key> Clone for Node<Modification, Key> {
+    fn clone(&self) -> Self {
+        Node {
+            result: self.result.clone(),
+            modify_children: self.modify_children.clone(),
+            left: self.left.clone(),
+            right: self.right.clone(),
+        }
+    }
+}
+
+impl<Modification: RangeModification<Key>, Key> Node<Modification, Key> {
+    fn new<Initializer: LazyRangeInitializer<Modification::Result, Key>>(
+        range: &Range<Key>,
+        initializer: &Initializer,
+    ) -> Self {
+        Node {
+            result: initializer.get(range),
+            modify_children: Modification::no_op(),
+            left: None,
+            right: None,
+        }
+    }
+
+    pub fn apply(&mut self, modification: &Modification, range: &Range<Key>) {
+        modification.apply(&mut self.result, range);
+        Modification::compose(modification, &mut self.modify_children);
+        if self.modify_children.is_reinitialization() {
+            self.left = None;
+            self.right = None;
+        }
+    }
+
+    pub fn force_children<Initializer: LazyRangeInitializer<Modification::Result, Key>>(
+        &mut self,
+        initializer: &Initializer,
+        range_left: &Range<Key>,
+        range_right: &Range<Key>,
+    ) {
+        let left = Rc::make_mut(
+            self.left
+                .get_or_insert_with(|| Rc::new(Node::new(&range_left, initializer))),
+        );
+        let right = Rc::make_mut(
+            self.right
+                .get_or_insert_with(|| Rc::new(Node::new(&range_right, initializer))),
+        );
+        left.apply(&self.modify_children, &range_left);
+        right.apply(&self.modify_children, &range_right);
+        self.modify_children = Modification::no_op();
+    }
+
+    pub fn recalculate_from_children(&mut self, range_left: &Range<Key>, range_right: &Range<Key>) {
+        assert!(self.modify_children.is_no_op());
+        assert!(self.left.is_some());
+        assert!(self.right.is_some());
+        self.result = Modification::Result::combine(
+            &self.left.as_ref().unwrap().result,
+            &range_left,
+            &self.right.as_ref().unwrap().result,
+            &range_right,
+        );
+    }
+}
+
+fn split_range<Key: MidpointableKey>(range: &Range<Key>) -> (Range<Key>, Range<Key>) {
+    let range_left = range.start.clone()..MidpointableKey::midpoint(range);
+    let range_right = range_left.end.clone()..range.end.clone();
+    (range_left, range_right)
+}
+
+pub struct PersistentSegmentTreeVersion<
+    Modification: RangeModification<Key>,
+    Initializer: LazyRangeInitializer<Modification::Result, Key>,
+    Key: Clone,
+> {
+    root: Rc<Node<Modification, Key>>,
+    all_keys: Range<Key>,
+    initializer: Rc<Initializer>,
+}
+
+// Manual implementation because we don't need `Key: Clone` for this, unlike with `derive`.
+impl<
+        Modification: RangeModification<Key>,
+        Initializer: LazyRangeInitializer<Modification::Result, Key>,
+        Key: Clone,
+    > Clone for PersistentSegmentTreeVersion<Modification, Initializer, Key>
+{
+    fn clone(&self) -> Self {
+        Self {
+            root: self.root.clone(),
+            all_keys: self.all_keys.clone(),
+            initializer: self.initializer.clone(),
+        }
+    }
+}
+
+fn get<
+    Modification: RangeModification<Key>,
+    Initializer: LazyRangeInitializer<Modification::Result, Key>,
+    Key: MidpointableKey,
+>(
+    node: &mut Rc<Node<Modification, Key>>,
+    node_keys: &Range<Key>,
+    initializer: &Initializer,
+    keys: &Range<Key>,
+) -> Modification::Result {
+    if node_keys.end <= keys.start || keys.end <= node_keys.start {
+        return Modification::Result::new_for_empty_range();
+    }
+    if keys.start <= node_keys.start && node_keys.end <= keys.end {
+        return node.result.clone();
+    }
+    let node = Rc::make_mut(node);
+    let (left_keys, right_keys) = split_range(node_keys);
+    node.force_children(initializer, &left_keys, &right_keys);
+    let mut result = get(node.left.as_mut().unwrap(), &left_keys, initializer, keys);
+    Modification::Result::add(
+        &mut result,
+        &left_keys,
+        &get(node.right.as_mut().unwrap(), &right_keys, initializer, keys),
+        &right_keys,
+    );
+    result
+}
+
+fn modify<
+    Modification: RangeModification<Key>,
+    Initializer: LazyRangeInitializer<Modification::Result, Key>,
+    Key: MidpointableKey,
+>(
+    node: &mut Rc<Node<Modification, Key>>,
+    node_keys: &Range<Key>,
+    initializer: &Initializer,
+    keys: &Range<Key>,
+    modification: &Modification,
+) {
+    if modification.is_no_op() || node_keys.end <= keys.start || keys.end <= node_keys.start {
+        return;
+    }
+    let node = Rc::make_mut(node);
+    if keys.start <= node_keys.start && node_keys.end <= keys.end {
+        node.apply(modification, node_keys);
+        return;
+    }
+    let (left_keys, right_keys) = split_range(node_keys);
+    node.force_children(initializer, &left_keys, &right_keys);
+    modify(
+        node.left.as_mut().unwrap(),
+        &left_keys,
+        initializer,
+        keys,
+        &modification,
+    );
+    modify(
+        node.right.as_mut().unwrap(),
+        &right_keys,
+        initializer,
+        keys,
+        &modification,
+    );
+    node.recalculate_from_children(&left_keys, &right_keys);
+}
+
+impl<
+        Modification: RangeModification<Key>,
+        Initializer: LazyRangeInitializer<Modification::Result, Key>,
+        Key: MidpointableKey,
+    > VecReadableVersion<Modification, Key>
+    for PersistentSegmentTreeVersion<Modification, Initializer, Key>
+{
+    fn get(&self, keys: &Range<Key>) -> Modification::Result {
+        get(
+            &mut self.root.clone(), // TODO: do not always force a branch
+            &self.all_keys,
+            self.initializer.as_ref(),
+            keys,
+        )
+    }
+}
+
+pub struct PersistentSegmentTree<
+    Modification: RangeModification<Key>,
+    Initializer: LazyRangeInitializer<Modification::Result, Key>,
+    Key: MidpointableKey,
+>(PersistentSegmentTreeVersion<Modification, Initializer, Key>);
+
+impl<
+        Modification: RangeModification<Key>,
+        Initializer: LazyRangeInitializer<Modification::Result, Key>,
+        Key: MidpointableKey,
+    > VecReadableVersion<Modification, Key>
+    for PersistentSegmentTree<Modification, Initializer, Key>
+{
+    fn get(&self, keys: &Range<Key>) -> Modification::Result {
+        self.0.get(keys)
+    }
+}
+
+impl<
+        Modification: RangeModification<Key>,
+        Initializer: LazyRangeInitializer<Modification::Result, Key>,
+        Key: MidpointableKey,
+    > PersistentVecStorage<Modification, Initializer, Key>
+    for PersistentSegmentTree<Modification, Initializer, Key>
+{
+    fn new(all_keys: Range<Key>, initializer: Initializer) -> Self {
+        PersistentSegmentTree(PersistentSegmentTreeVersion {
+            root: Rc::new(Node::new(&all_keys, &initializer)),
+            all_keys: all_keys,
+            initializer: Rc::new(initializer),
+        })
+    }
+
+    type FrozenVersion = PersistentSegmentTreeVersion<Modification, Initializer, Key>;
+
+    fn modify(&mut self, keys: &Range<Key>, modification: &Modification) {
+        modify(
+            &mut self.0.root, // TODO: do not always force a branch
+            &self.0.all_keys,
+            self.0.initializer.as_ref(),
+            keys,
+            modification,
+        )
+    }
+
+    fn freeze(&mut self) -> Self::FrozenVersion {
+        self.0.clone()
+    }
+}
--- a/libs/persistent_range_query/tests/layer_map_test.rs
+++ b/libs/persistent_range_query/tests/layer_map_test.rs
@@ -0,0 +1,295 @@
+use persistent_range_query::naive::{IndexableKey, NaiveVecStorage};
+use persistent_range_query::ops::SameElementsInitializer;
+use persistent_range_query::segment_tree::{MidpointableKey, PersistentSegmentTree};
+use persistent_range_query::{
+    LazyRangeInitializer, PersistentVecStorage, RangeModification, RangeQueryResult,
+    VecReadableVersion,
+};
+use std::cmp::Ordering;
+use std::ops::Range;
+
+#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd)]
+struct PageIndex(u32);
+type LayerId = String;
+
+impl IndexableKey for PageIndex {
+    fn index(all_keys: &Range<Self>, key: &Self) -> usize {
+        (key.0 as usize) - (all_keys.start.0 as usize)
+    }
+
+    fn element_range(all_keys: &Range<Self>, index: usize) -> Range<Self> {
+        PageIndex(all_keys.start.0 + index as u32)..PageIndex(all_keys.start.0 + index as u32 + 1)
+    }
+}
+
+impl MidpointableKey for PageIndex {
+    fn midpoint(range: &Range<Self>) -> Self {
+        PageIndex(range.start.0 + (range.end.0 - range.start.0) / 2)
+    }
+}
+
+#[derive(Clone, Debug, Eq, PartialEq)]
+struct LayerMapInformation {
+    // Only make sense for a range of length 1.
+    last_layer: Option<LayerId>,
+    last_image_layer: Option<LayerId>,
+    // Work for all ranges
+    max_delta_layers: (usize, Range<PageIndex>),
+}
+
+impl LayerMapInformation {
+    fn last_layers(&self) -> (&Option<LayerId>, &Option<LayerId>) {
+        (&self.last_layer, &self.last_image_layer)
+    }
+
+    fn max_delta_layers(&self) -> &(usize, Range<PageIndex>) {
+        &self.max_delta_layers
+    }
+}
+
+fn merge_ranges(left: &Range<PageIndex>, right: &Range<PageIndex>) -> Range<PageIndex> {
+    if left.is_empty() {
+        right.clone()
+    } else if right.is_empty() {
+        left.clone()
+    } else if left.end == right.start {
+        left.start..right.end
+    } else {
+        left.clone()
+    }
+}
+
+impl RangeQueryResult<PageIndex> for LayerMapInformation {
+    fn new_for_empty_range() -> Self {
+        LayerMapInformation {
+            last_layer: None,
+            last_image_layer: None,
+            max_delta_layers: (0, PageIndex(0)..PageIndex(0)),
+        }
+    }
+
+    fn combine(
+        left: &Self,
+        _left_range: &Range<PageIndex>,
+        right: &Self,
+        _right_range: &Range<PageIndex>,
+    ) -> Self {
+        // Note that either range may be empty.
+        LayerMapInformation {
+            last_layer: left
+                .last_layer
+                .as_ref()
+                .or_else(|| right.last_layer.as_ref())
+                .cloned(),
+            last_image_layer: left
+                .last_image_layer
+                .as_ref()
+                .or_else(|| right.last_image_layer.as_ref())
+                .cloned(),
+            max_delta_layers: match left.max_delta_layers.0.cmp(&right.max_delta_layers.0) {
+                Ordering::Less => right.max_delta_layers.clone(),
+                Ordering::Greater => left.max_delta_layers.clone(),
+                Ordering::Equal => (
+                    left.max_delta_layers.0,
+                    merge_ranges(&left.max_delta_layers.1, &right.max_delta_layers.1),
+                ),
+            },
+        }
+    }
+
+    fn add(
+        left: &mut Self,
+        left_range: &Range<PageIndex>,
+        right: &Self,
+        right_range: &Range<PageIndex>,
+    ) {
+        *left = Self::combine(&left, left_range, right, right_range);
+    }
+}
+
+#[derive(Clone, Debug)]
+struct AddDeltaLayers {
+    last_layer: LayerId,
+    count: usize,
+}
+
+#[derive(Clone, Debug)]
+struct LayerMapModification {
+    add_image_layer: Option<LayerId>,
+    add_delta_layers: Option<AddDeltaLayers>,
+}
+
+impl LayerMapModification {
+    fn add_image_layer(layer: impl Into<LayerId>) -> Self {
+        LayerMapModification {
+            add_image_layer: Some(layer.into()),
+            add_delta_layers: None,
+        }
+    }
+
+    fn add_delta_layer(layer: impl Into<LayerId>) -> Self {
+        LayerMapModification {
+            add_image_layer: None,
+            add_delta_layers: Some(AddDeltaLayers {
+                last_layer: layer.into(),
+                count: 1,
+            }),
+        }
+    }
+}
+
+impl RangeModification<PageIndex> for LayerMapModification {
+    type Result = LayerMapInformation;
+
+    fn no_op() -> Self {
+        LayerMapModification {
+            add_image_layer: None,
+            add_delta_layers: None,
+        }
+    }
+
+    fn is_no_op(&self) -> bool {
+        self.add_image_layer.is_none() && self.add_delta_layers.is_none()
+    }
+
+    fn is_reinitialization(&self) -> bool {
+        self.add_image_layer.is_some()
+    }
+
+    fn apply(&self, result: &mut Self::Result, range: &Range<PageIndex>) {
+        if let Some(layer) = &self.add_image_layer {
+            result.last_layer = Some(layer.clone());
+            result.last_image_layer = Some(layer.clone());
+            result.max_delta_layers = (0, range.clone());
+        }
+        if let Some(AddDeltaLayers { last_layer, count }) = &self.add_delta_layers {
+            result.last_layer = Some(last_layer.clone());
+            result.max_delta_layers.0 += count;
+        }
+    }
+
+    fn compose(later: &Self, earlier: &mut Self) {
+        if later.add_image_layer.is_some() {
+            *earlier = later.clone();
+            return;
+        }
+        if let Some(AddDeltaLayers { last_layer, count }) = &later.add_delta_layers {
+            let res = earlier.add_delta_layers.get_or_insert(AddDeltaLayers {
+                last_layer: LayerId::default(),
+                count: 0,
+            });
+            res.last_layer = last_layer.clone();
+            res.count += count;
+        }
+    }
+}
+
+impl LazyRangeInitializer<LayerMapInformation, PageIndex> for SameElementsInitializer<()> {
+    fn get(&self, range: &Range<PageIndex>) -> LayerMapInformation {
+        LayerMapInformation {
+            last_layer: None,
+            last_image_layer: None,
+            max_delta_layers: (0, range.clone()),
+        }
+    }
+}
+
+fn test_layer_map<
+    S: PersistentVecStorage<LayerMapModification, SameElementsInitializer<()>, PageIndex>,
+>() {
+    let mut s = S::new(
+        PageIndex(0)..PageIndex(100),
+        SameElementsInitializer::new(()),
+    );
+    s.modify(
+        &(PageIndex(0)..PageIndex(70)),
+        &LayerMapModification::add_image_layer("Img0..70"),
+    );
+    s.modify(
+        &(PageIndex(50)..PageIndex(100)),
+        &LayerMapModification::add_image_layer("Img50..100"),
+    );
+    s.modify(
+        &(PageIndex(10)..PageIndex(60)),
+        &LayerMapModification::add_delta_layer("Delta10..60"),
+    );
+    let s_before_last_delta = s.freeze();
+    s.modify(
+        &(PageIndex(20)..PageIndex(80)),
+        &LayerMapModification::add_delta_layer("Delta20..80"),
+    );
+
+    assert_eq!(
+        s.get(&(PageIndex(5)..PageIndex(6))).last_layers(),
+        (&Some("Img0..70".to_owned()), &Some("Img0..70".to_owned()))
+    );
+    assert_eq!(
+        s.get(&(PageIndex(15)..PageIndex(16))).last_layers(),
+        (
+            &Some("Delta10..60".to_owned()),
+            &Some("Img0..70".to_owned())
+        )
+    );
+    assert_eq!(
+        s.get(&(PageIndex(25)..PageIndex(26))).last_layers(),
+        (
+            &Some("Delta20..80".to_owned()),
+            &Some("Img0..70".to_owned())
+        )
+    );
+    assert_eq!(
+        s.get(&(PageIndex(65)..PageIndex(66))).last_layers(),
+        (
+            &Some("Delta20..80".to_owned()),
+            &Some("Img50..100".to_owned())
+        )
+    );
+    assert_eq!(
+        s.get(&(PageIndex(95)..PageIndex(96))).last_layers(),
+        (
+            &Some("Img50..100".to_owned()),
+            &Some("Img50..100".to_owned())
+        )
+    );
+
+    assert_eq!(
+        s.get(&(PageIndex(0)..PageIndex(100))).max_delta_layers(),
+        &(2, PageIndex(20)..PageIndex(60)),
+    );
+    assert_eq!(
+        *s_before_last_delta
+            .get(&(PageIndex(0)..PageIndex(100)))
+            .max_delta_layers(),
+        (1, PageIndex(10)..PageIndex(60)),
+    );
+
+    assert_eq!(
+        *s.get(&(PageIndex(10)..PageIndex(30))).max_delta_layers(),
+        (2, PageIndex(20)..PageIndex(30))
+    );
+    assert_eq!(
+        *s.get(&(PageIndex(10)..PageIndex(20))).max_delta_layers(),
+        (1, PageIndex(10)..PageIndex(20))
+    );
+
+    assert_eq!(
+        *s.get(&(PageIndex(70)..PageIndex(80))).max_delta_layers(),
+        (1, PageIndex(70)..PageIndex(80))
+    );
+    assert_eq!(
+        *s_before_last_delta
+            .get(&(PageIndex(70)..PageIndex(80)))
+            .max_delta_layers(),
+        (0, PageIndex(70)..PageIndex(80))
+    );
+}
+
+#[test]
+fn test_naive() {
+    test_layer_map::<NaiveVecStorage<_, _, _>>();
+}
+
+#[test]
+fn test_segment_tree() {
+    test_layer_map::<PersistentSegmentTree<_, _, _>>();
+}
--- a/libs/persistent_range_query/tests/rsq_test.rs
+++ b/libs/persistent_range_query/tests/rsq_test.rs
@@ -0,0 +1,116 @@
+use persistent_range_query::naive::*;
+use persistent_range_query::ops::rsq::AddAssignModification::Add;
+use persistent_range_query::ops::rsq::*;
+use persistent_range_query::ops::SameElementsInitializer;
+use persistent_range_query::segment_tree::{MidpointableKey, PersistentSegmentTree};
+use persistent_range_query::{PersistentVecStorage, VecReadableVersion};
+use rand::{Rng, SeedableRng};
+use std::ops::Range;
+
+#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
+struct K(u16);
+
+impl IndexableKey for K {
+    fn index(all_keys: &Range<Self>, key: &Self) -> usize {
+        (key.0 as usize) - (all_keys.start.0 as usize)
+    }
+
+    fn element_range(all_keys: &Range<Self>, index: usize) -> Range<Self> {
+        K(all_keys.start.0 + index as u16)..K(all_keys.start.0 + index as u16 + 1)
+    }
+}
+
+impl SumOfSameElements<K> for i32 {
+    fn sum(initial_element_value: &Self, keys: &Range<K>) -> Self {
+        initial_element_value * (keys.end.0 - keys.start.0) as Self
+    }
+}
+
+impl MidpointableKey for K {
+    fn midpoint(range: &Range<Self>) -> Self {
+        K(range.start.0 + (range.end.0 - range.start.0) / 2)
+    }
+}
+
+fn test_storage<
+    S: PersistentVecStorage<AddAssignModification<i32>, SameElementsInitializer<i32>, K>,
+>() {
+    let mut s = S::new(K(0)..K(12), SameElementsInitializer::new(0i32));
+    assert_eq!(*s.get(&(K(0)..K(12))).sum(), 0);
+
+    s.modify(&(K(2)..K(5)), &AddAssignModification::Add(3));
+    assert_eq!(*s.get(&(K(0)..K(12))).sum(), 3 + 3 + 3);
+    let s_old = s.freeze();
+
+    s.modify(&(K(3)..K(6)), &AddAssignModification::Assign(10));
+    assert_eq!(*s.get(&(K(0)..K(12))).sum(), 3 + 10 + 10 + 10);
+
+    s.modify(&(K(4)..K(7)), &AddAssignModification::Add(2));
+    assert_eq!(*s.get(&(K(0)..K(12))).sum(), 3 + 10 + 12 + 12 + 2);
+
+    assert_eq!(*s.get(&(K(4)..K(6))).sum(), 12 + 12);
+    assert_eq!(*s_old.get(&(K(4)..K(6))).sum(), 3);
+}
+
+#[test]
+fn test_naive() {
+    test_storage::<NaiveVecStorage<_, _, _>>();
+}
+
+#[test]
+fn test_segment_tree() {
+    test_storage::<PersistentSegmentTree<_, _, _>>();
+}
+
+#[test]
+fn test_stress() {
+    const LEN: u16 = 17_238;
+    const OPERATIONS: i32 = 20_000;
+
+    let mut rng = rand::rngs::StdRng::seed_from_u64(0);
+    let mut naive: NaiveVecStorage<AddAssignModification<i32>, _, _> =
+        NaiveVecStorage::new(K(0)..K(LEN), SameElementsInitializer::new(2i32));
+    let mut segm_tree: PersistentSegmentTree<AddAssignModification<i32>, _, _> =
+        PersistentSegmentTree::new(K(0)..K(LEN), SameElementsInitializer::new(2i32));
+
+    fn gen_range(rng: &mut impl Rng) -> Range<K> {
+        let l: u16 = rng.gen_range(0..LEN);
+        let r: u16 = rng.gen_range(0..LEN);
+        if l <= r {
+            K(l)..K(r)
+        } else {
+            K(r)..K(l)
+        }
+    }
+
+    for _ in 0..2 {
+        let checksum_range = gen_range(&mut rng);
+        let checksum_before: i32 = *naive.get(&checksum_range).sum();
+        assert_eq!(checksum_before, *segm_tree.get(&checksum_range).sum());
+
+        let naive_before = naive.freeze();
+        let segm_tree_before = segm_tree.freeze();
+        assert_eq!(checksum_before, *naive_before.get(&checksum_range).sum());
+        assert_eq!(checksum_before, *segm_tree.get(&checksum_range).sum());
+
+        for _ in 0..OPERATIONS {
+            {
+                let range = gen_range(&mut rng);
+                assert_eq!(naive.get(&range).sum(), segm_tree.get(&range).sum());
+            }
+            {
+                let range = gen_range(&mut rng);
+                let val = rng.gen_range(-10i32..=10i32);
+                let op = Add(val);
+                naive.modify(&range, &op);
+                segm_tree.modify(&range, &op);
+            }
+        }
+
+        assert_eq!(checksum_before, *naive_before.get(&checksum_range).sum());
+        assert_eq!(
+            checksum_before,
+            *segm_tree_before.get(&checksum_range).sum()
+        );
+    }
+}
--- a/libs/tenant_size_model/src/lib.rs
+++ b/libs/tenant_size_model/src/lib.rs
@@ -33,8 +33,8 @@ pub struct Segment {
    /// Logical size before this state
    start_size: u64,

-    /// Logical size at this state
-    pub end_size: u64,
+    /// Logical size at this state. Can be None in the last Segment of a branch.
+    pub end_size: Option<u64>,

    /// Indices to [`Storage::segments`]
    ///
@@ -115,7 +115,7 @@ impl<K: std::hash::Hash + Eq + 'static> Storage<K> {
            start_lsn: 0,
            end_lsn: 0,
            start_size: 0,
-            end_size: 0,
+            end_size: Some(0),
            children_after: Vec::new(),
        };

@@ -125,6 +125,39 @@ impl<K: std::hash::Hash + Eq + 'static> Storage<K> {
        }
    }

+    /// Advances the branch with a new point, at given LSN.
+    pub fn insert_point<Q: ?Sized>(
+        &mut self,
+        branch: &Q,
+        op: Cow<'static, str>,
+        lsn: u64,
+        size: Option<u64>,
+    ) where
+        K: std::borrow::Borrow<Q>,
+        Q: std::hash::Hash + Eq,
+    {
+        let lastseg_id = *self.branches.get(branch).unwrap();
+        let newseg_id = self.segments.len();
+        let lastseg = &mut self.segments[lastseg_id];
+
+        assert!(lsn > lastseg.end_lsn);
+
+        let newseg = Segment {
+            op,
+            parent: Some(lastseg_id),
+            start_lsn: lastseg.end_lsn,
+            end_lsn: lsn,
+            start_size: lastseg.end_size.unwrap(),
+            end_size: size,
+            children_after: Vec::new(),
+            needed: false,
+        };
+        lastseg.children_after.push(newseg_id);
+
+        self.segments.push(newseg);
+        *self.branches.get_mut(branch).expect("read already") = newseg_id;
+    }
+
    /// Advances the branch with the named operation, by the relative LSN and logical size bytes.
    pub fn modify_branch<Q: ?Sized>(
        &mut self,
@@ -145,8 +178,8 @@ impl<K: std::hash::Hash + Eq + 'static> Storage<K> {
            parent: Some(lastseg_id),
            start_lsn: lastseg.end_lsn,
            end_lsn: lastseg.end_lsn + lsn_bytes,
-            start_size: lastseg.end_size,
-            end_size: (lastseg.end_size as i64 + size_bytes) as u64,
+            start_size: lastseg.end_size.unwrap(),
+            end_size: Some((lastseg.end_size.unwrap() as i64 + size_bytes) as u64),
            children_after: Vec::new(),
            needed: false,
        };
@@ -321,7 +354,7 @@ impl<K: std::hash::Hash + Eq + 'static> Storage<K> {
                Some(SegmentSize {
                    seg_id,
                    method: SnapshotAfter,
-                    this_size: seg.end_size,
+                    this_size: seg.end_size.unwrap(),
                    children,
                })
            } else {
--- a/libs/tenant_size_model/src/main.rs
+++ b/libs/tenant_size_model/src/main.rs
@@ -174,7 +174,7 @@ fn graphviz_recurse(segments: &[Segment], node: &SegmentSize) {
    let seg_id = node.seg_id;
    let seg = segments.get(seg_id).unwrap();
    let lsn = seg.end_lsn;
-    let size = seg.end_size;
+    let size = seg.end_size.unwrap_or(0);
    let method = node.method;

    println!("  {{");
@@ -226,7 +226,7 @@ fn graphviz_recurse(segments: &[Segment], node: &SegmentSize) {
            print!(
                " label=\"{} / {}\"",
                next.end_lsn - seg.end_lsn,
-                (next.end_size as i128 - seg.end_size as i128)
+                (next.end_size.unwrap_or(0) as i128 - seg.end_size.unwrap_or(0) as i128)
            );
        } else {
            print!(" label=\"{}: {}\"", next.op, next.end_lsn - seg.end_lsn);
--- a/libs/utils/src/lib.rs
+++ b/libs/utils/src/lib.rs
@@ -48,6 +48,25 @@ pub mod nonblock;
 // Default signal handling
 pub mod signals;

+/// use with fail::cfg("$name", "return(2000)")
+#[macro_export]
+macro_rules! failpoint_sleep_millis_async {
+    ($name:literal) => {{
+        let should_sleep: Option<std::time::Duration> = (|| {
+            fail::fail_point!($name, |v: Option<_>| {
+                let millis = v.unwrap().parse::<u64>().unwrap();
+                Some(Duration::from_millis(millis))
+            });
+            None
+        })();
+        if let Some(d) = should_sleep {
+            tracing::info!("failpoint {:?}: sleeping for {:?}", $name, d);
+            tokio::time::sleep(d).await;
+            tracing::info!("failpoint {:?}: sleep done", $name);
+        }
+    }};
+}
+
 /// This is a shortcut to embed git sha into binaries and avoid copying the same build script to all packages
 ///
 /// we have several cases:
--- a/libs/utils/src/lsn.rs
+++ b/libs/utils/src/lsn.rs
@@ -138,7 +138,7 @@ impl FromStr for Lsn {
    ///
    /// If the input string is missing the '/' character, then use `Lsn::from_hex`
    fn from_str(s: &str) -> Result<Self, Self::Err> {
-        let mut splitter = s.split('/');
+        let mut splitter = s.trim().split('/');
        if let (Some(left), Some(right), None) = (splitter.next(), splitter.next(), splitter.next())
        {
            let left_num = u32::from_str_radix(left, 16).map_err(|_| LsnParseError)?;
@@ -270,6 +270,11 @@ mod tests {
        );
        assert_eq!(Lsn::from_hex("0"), Ok(Lsn(0)));
        assert_eq!(Lsn::from_hex("F12345678AAAA5555"), Err(LsnParseError));
+
+        let expected_lsn = Lsn(0x3C490F8);
+        assert_eq!(" 0/3C490F8".parse(), Ok(expected_lsn));
+        assert_eq!("0/3C490F8 ".parse(), Ok(expected_lsn));
+        assert_eq!(" 0/3C490F8 ".parse(), Ok(expected_lsn));
    }

    #[test]
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -76,3 +76,7 @@ tempfile = "3.2"
 [[bench]]
 name = "bench_layer_map"
 harness = false
+
+[[bench]]
+name = "bench_walredo"
+harness = false
--- a/pageserver/benches/bench_walredo.rs
+++ b/pageserver/benches/bench_walredo.rs
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -199,6 +199,20 @@ fn start_pageserver(conf: &'static PageServerConf) -> anyhow::Result<()> {
    logging::init(conf.log_format)?;
    info!("version: {}", version());

+    // If any failpoints were set from FAILPOINTS environment variable,
+    // print them to the log for debugging purposes
+    let failpoints = fail::list();
+    if !failpoints.is_empty() {
+        info!(
+            "started with failpoints: {}",
+            failpoints
+                .iter()
+                .map(|(name, actions)| format!("{name}={actions}"))
+                .collect::<Vec<String>>()
+                .join(";")
+        )
+    }
+
    let lock_file_path = conf.workdir.join(PID_FILE_NAME);
    let lock_file = match lock_file::create_lock_file(&lock_file_path, Pid::this().to_string()) {
        lock_file::LockCreationResult::Created {
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -614,8 +614,9 @@ impl PageServerConf {
        PathBuf::from(format!("../tmp_check/test_{test_name}"))
    }

-    #[cfg(test)]
    pub fn dummy_conf(repo_dir: PathBuf) -> Self {
+        let pg_distrib_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../pg_install");
+
        PageServerConf {
            id: NodeId(0),
            wait_lsn_timeout: Duration::from_secs(60),
@@ -626,7 +627,7 @@ impl PageServerConf {
            listen_http_addr: defaults::DEFAULT_HTTP_LISTEN_ADDR.to_string(),
            superuser: "cloud_admin".to_string(),
            workdir: repo_dir,
-            pg_distrib_dir: PathBuf::new(),
+            pg_distrib_dir,
            auth_type: AuthType::Trust,
            auth_validation_public_key_path: None,
            remote_storage_config: None,
--- a/pageserver/src/http/openapi_spec.yml
+++ b/pageserver/src/http/openapi_spec.yml
@@ -667,6 +667,7 @@ components:
        - disk_consistent_lsn
        - awaits_download
        - state
+        - latest_gc_cutoff_lsn
      properties:
        timeline_id:
          type: string
@@ -711,6 +712,9 @@ components:
          type: boolean
        state:
          type: string
+        latest_gc_cutoff_lsn:
+          type: string
+          format: hex

        # These 'local' and 'remote' fields just duplicate some of the fields
        # above. They are kept for backwards-compatibility. They can be removed,
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -76,6 +76,12 @@ fn copyin_stream(pgb: &mut PostgresBackend) -> impl Stream<Item = io::Result<Byt
                        FeMessage::CopyData(bytes) => bytes,
                        FeMessage::CopyDone => { break },
                        FeMessage::Sync => continue,
+                        FeMessage::Terminate => {
+                            let msg = format!("client terminated connection with Terminate message during COPY");
+                            pgb.write_message(&BeMessage::ErrorResponse(&msg))?;
+                            Err(io::Error::new(io::ErrorKind::ConnectionReset, msg))?;
+                            break;
+                        }
                        m => {
                            let msg = format!("unexpected message {:?}", m);
                            pgb.write_message(&BeMessage::ErrorResponse(&msg))?;
@@ -87,10 +93,10 @@ fn copyin_stream(pgb: &mut PostgresBackend) -> impl Stream<Item = io::Result<Byt
                    yield copy_data_bytes;
                }
                Ok(None) => {
-                    let msg = "client closed connection";
+                    let msg = "client closed connection during COPY";
                    pgb.write_message(&BeMessage::ErrorResponse(msg))?;
                    pgb.flush().await?;
-                    Err(io::Error::new(io::ErrorKind::Other, msg))?;
+                    Err(io::Error::new(io::ErrorKind::ConnectionReset, msg))?;
                }
                Err(e) => {
                    Err(io::Error::new(io::ErrorKind::Other, e))?;
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -461,14 +461,7 @@ impl Tenant {
                    .context("Cannot branch off the timeline that's not present in pageserver")?;

                if let Some(lsn) = ancestor_start_lsn.as_mut() {
-                    // Wait for the WAL to arrive and be processed on the parent branch up
-                    // to the requested branch point. The repository code itself doesn't
-                    // require it, but if we start to receive WAL on the new timeline,
-                    // decoding the new WAL might need to look up previous pages, relation
-                    // sizes etc. and that would get confused if the previous page versions
-                    // are not in the repository yet.
                    *lsn = lsn.align();
-                    ancestor_timeline.wait_lsn(*lsn).await?;

                    let ancestor_ancestor_lsn = ancestor_timeline.get_ancestor_lsn();
                    if ancestor_ancestor_lsn > *lsn {
@@ -480,6 +473,14 @@ impl Tenant {
                            ancestor_ancestor_lsn,
                        );
                    }
+
+                    // Wait for the WAL to arrive and be processed on the parent branch up
+                    // to the requested branch point. The repository code itself doesn't
+                    // require it, but if we start to receive WAL on the new timeline,
+                    // decoding the new WAL might need to look up previous pages, relation
+                    // sizes etc. and that would get confused if the previous page versions
+                    // are not in the repository yet.
+                    ancestor_timeline.wait_lsn(*lsn).await?;
                }

                self.branch_timeline(ancestor_timeline_id, new_timeline_id, ancestor_start_lsn)?
@@ -1010,6 +1011,10 @@ impl Tenant {

        let gc_timelines = self.refresh_gc_info_internal(target_timeline_id, horizon, pitr)?;

+        utils::failpoint_sleep_millis_async!("gc_iteration_internal_after_getting_gc_timelines");
+
+        info!("starting on {} timelines", gc_timelines.len());
+
        // Perform GC for each timeline.
        //
        // Note that we don't hold the GC lock here because we don't want
--- a/pageserver/src/tenant/size.rs
+++ b/pageserver/src/tenant/size.rs
@@ -183,6 +183,19 @@ pub(super) async fn gather_inputs(
            }
        }

+        // all timelines also have an end point if they have made any progress
+        if last_record_lsn > timeline.get_ancestor_lsn()
+            && !interesting_lsns
+                .iter()
+                .any(|(lsn, _)| lsn == &last_record_lsn)
+        {
+            updates.push(Update {
+                lsn: last_record_lsn,
+                command: Command::EndOfBranch,
+                timeline_id: timeline.timeline_id,
+            });
+        }
+
        timeline_inputs.insert(
            timeline.timeline_id,
            TimelineInputs {
@@ -270,48 +283,22 @@ impl ModelInputs {
        // impossible to always determine the a one main branch.
        let mut storage = tenant_size_model::Storage::<Option<TimelineId>>::new(None);

-        // tracking these not to require modifying the current implementation of the size model,
-        // which works in relative LSNs and sizes.
-        let mut last_state: HashMap<TimelineId, (Lsn, u64)> = HashMap::new();
-
        for update in &self.updates {
            let Update {
                lsn,
                command: op,
                timeline_id,
            } = update;
+            let Lsn(now) = *lsn;
            match op {
                Command::Update(sz) => {
-                    let latest = last_state.get_mut(timeline_id).ok_or_else(|| {
-                        anyhow::anyhow!(
-                        "ordering-mismatch: there must had been a previous state for {timeline_id}"
-                    )
-                    })?;
-
-                    let lsn_bytes = {
-                        let Lsn(now) = lsn;
-                        let Lsn(prev) = latest.0;
-                        debug_assert!(prev <= *now, "self.updates should had been sorted");
-                        now - prev
-                    };
-
-                    let size_diff =
-                        i64::try_from(*sz as i128 - latest.1 as i128).with_context(|| {
-                            format!("size difference i64 overflow for {timeline_id}")
-                        })?;
-
-                    storage.modify_branch(&Some(*timeline_id), "".into(), lsn_bytes, size_diff);
-                    *latest = (*lsn, *sz);
+                    storage.insert_point(&Some(*timeline_id), "".into(), now, Some(*sz));
+                }
+                Command::EndOfBranch => {
+                    storage.insert_point(&Some(*timeline_id), "".into(), now, None);
                }
                Command::BranchFrom(parent) => {
                    storage.branch(parent, Some(*timeline_id));
-
-                    let size = parent
-                        .as_ref()
-                        .and_then(|id| last_state.get(id))
-                        .map(|x| x.1)
-                        .unwrap_or(0);
-                    last_state.insert(*timeline_id, (*lsn, size));
                }
            }
        }
@@ -320,10 +307,7 @@ impl ModelInputs {
    }
 }

-/// Single size model update.
-///
-/// Sizing model works with relative increments over latest branch state.
-/// Updates are absolute, so additional state needs to be tracked when applying.
+/// A point of interest in the tree of branches
 #[serde_with::serde_as]
 #[derive(
    Debug, PartialEq, PartialOrd, Eq, Ord, Clone, Copy, serde::Serialize, serde::Deserialize,
@@ -342,6 +326,7 @@ struct Update {
 enum Command {
    Update(u64),
    BranchFrom(#[serde_as(as = "Option<serde_with::DisplayFromStr>")] Option<TimelineId>),
+    EndOfBranch,
 }

 impl std::fmt::Debug for Command {
@@ -351,6 +336,7 @@ impl std::fmt::Debug for Command {
        match self {
            Self::Update(arg0) => write!(f, "Update({arg0})"),
            Self::BranchFrom(arg0) => write!(f, "BranchFrom({arg0:?})"),
+            Self::EndOfBranch => write!(f, "EndOfBranch"),
        }
    }
 }
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -61,6 +61,13 @@ use crate::{
    storage_sync::{self, index::LayerFileMetadata},
 };

+#[derive(Debug, PartialEq, Eq, Clone, Copy)]
+enum FlushLoopState {
+    NotStarted,
+    Running,
+    Exited,
+}
+
 pub struct Timeline {
    conf: &'static PageServerConf,
    tenant_conf: Arc<RwLock<TenantConfOpt>>,
@@ -122,7 +129,7 @@ pub struct Timeline {
    write_lock: Mutex<()>,

    /// Used to avoid multiple `flush_loop` tasks running
-    flush_loop_started: Mutex<bool>,
+    flush_loop_state: Mutex<FlushLoopState>,

    /// layer_flush_start_tx can be used to wake up the layer-flushing task.
    /// The value is a counter, incremented every time a new flush cycle is requested.
@@ -755,7 +762,7 @@ impl Timeline {

            upload_layers: AtomicBool::new(upload_layers),

-            flush_loop_started: Mutex::new(false),
+            flush_loop_state: Mutex::new(FlushLoopState::NotStarted),

            layer_flush_start_tx,
            layer_flush_done_tx,
@@ -794,13 +801,23 @@ impl Timeline {
    }

    pub(super) fn maybe_spawn_flush_loop(self: &Arc<Self>) {
-        let mut flush_loop_started = self.flush_loop_started.lock().unwrap();
-        if *flush_loop_started {
-            info!(
-                "skipping attempt to start flush_loop twice {}/{}",
-                self.tenant_id, self.timeline_id
-            );
-            return;
+        let mut flush_loop_state = self.flush_loop_state.lock().unwrap();
+        match *flush_loop_state {
+            FlushLoopState::NotStarted => (),
+            FlushLoopState::Running => {
+                info!(
+                    "skipping attempt to start flush_loop twice {}/{}",
+                    self.tenant_id, self.timeline_id
+                );
+                return;
+            }
+            FlushLoopState::Exited => {
+                warn!(
+                    "ignoring attempt to restart exited flush_loop {}/{}",
+                    self.tenant_id, self.timeline_id
+                );
+                return;
+            }
        }

        let layer_flush_start_rx = self.layer_flush_start_tx.subscribe();
@@ -813,11 +830,16 @@ impl Timeline {
                    Some(self.timeline_id),
                    "layer flush task",
                    false,
-                    async move { self_clone.flush_loop(layer_flush_start_rx).await; Ok(()) }
+                    async move {
+                         self_clone.flush_loop(layer_flush_start_rx).await;
+                         let mut flush_loop_state = self_clone.flush_loop_state.lock().unwrap();
+                         assert_eq!(*flush_loop_state, FlushLoopState::Running);
+                         *flush_loop_state  = FlushLoopState::Exited;
+                         Ok(()) }
                    .instrument(info_span!(parent: None, "layer flush task", tenant = %self.tenant_id, timeline = %self.timeline_id))
                );

-        *flush_loop_started = true;
+        *flush_loop_state = FlushLoopState::Running;
    }

    pub(super) fn launch_wal_receiver(self: &Arc<Self>) {
@@ -1365,8 +1387,9 @@ impl Timeline {
        // finished, instead of some other flush that was started earlier.
        let mut my_flush_request = 0;

-        if !&*self.flush_loop_started.lock().unwrap() {
-            anyhow::bail!("cannot flush frozen layers when flush_loop is not running")
+        let flush_loop_state = { *self.flush_loop_state.lock().unwrap() };
+        if flush_loop_state != FlushLoopState::Running {
+            anyhow::bail!("cannot flush frozen layers when flush_loop is not running, state is {flush_loop_state:?}")
        }

        self.layer_flush_start_tx.send_modify(|counter| {
--- a/pageserver/src/tenant_config.rs
+++ b/pageserver/src/tenant_config.rs
@@ -216,7 +216,6 @@ impl TenantConf {
        }
    }

-    #[cfg(test)]
    pub fn dummy_conf() -> Self {
        TenantConf {
            checkpoint_distance: defaults::DEFAULT_CHECKPOINT_DISTANCE,
--- a/pageserver/src/tenant_tasks.rs
+++ b/pageserver/src/tenant_tasks.rs
@@ -71,9 +71,7 @@ async fn compaction_loop(tenant_id: TenantId) {
            let mut sleep_duration = tenant.get_compaction_period();
            if let Err(e) = tenant.compaction_iteration() {
                sleep_duration = wait_duration;
-                error!("Compaction failed, retrying in {:?}: {e:#}", sleep_duration);
-                #[cfg(feature = "testing")]
-                std::process::abort();
+                error!("Compaction failed, retrying in {:?}: {e:?}", sleep_duration);
            }

            // Sleep
@@ -122,9 +120,7 @@ async fn gc_loop(tenant_id: TenantId) {
                if let Err(e) = tenant.gc_iteration(None, gc_horizon, tenant.get_pitr_interval(), false).await
                {
                    sleep_duration = wait_duration;
-                    error!("Gc failed, retrying in {:?}: {e:#}", sleep_duration);
-                    #[cfg(feature = "testing")]
-                    std::process::abort();
+                    error!("Gc failed, retrying in {:?}: {e:?}", sleep_duration);
                }
            }

--- a/pageserver/src/walredo.rs
+++ b/pageserver/src/walredo.rs
@@ -210,6 +210,16 @@ impl PostgresRedoManager {
        }
    }

+    /// Launch process pre-emptively. Should not be needed except for benchmarking.
+    pub fn launch_process(&mut self, pg_version: u32) -> anyhow::Result<()> {
+        let inner = self.process.get_mut().unwrap();
+        if inner.is_none() {
+            let p = PostgresRedoProcess::launch(self.conf, self.tenant_id, pg_version)?;
+            *inner = Some(p);
+        }
+        Ok(())
+    }
+
    ///
    /// Process one request for WAL redo using wal-redo postgres
    ///
--- a/pgxn/neon/libpagestore.c
+++ b/pgxn/neon/libpagestore.c
@@ -32,11 +32,6 @@

 #define PageStoreTrace DEBUG5

-#define NEON_TAG "[NEON_SMGR] "
-#define neon_log(tag, fmt, ...) ereport(tag,                                  \
-										(errmsg(NEON_TAG fmt, ##__VA_ARGS__), \
-										 errhidestmt(true), errhidecontext(true)))
-
 bool		connected = false;
 PGconn	   *pageserver_conn = NULL;

@@ -97,11 +92,10 @@ pageserver_connect()

 	while (PQisBusy(pageserver_conn))
 	{
-		int			wc;
 		WaitEvent	event;

 		/* Sleep until there's something to do */
-		wc = WaitEventSetWait(pageserver_conn_wes, -1L, &event, 1, PG_WAIT_EXTENSION);
+		(void) WaitEventSetWait(pageserver_conn_wes, -1L, &event, 1, PG_WAIT_EXTENSION);
 		ResetLatch(MyLatch);

 		CHECK_FOR_INTERRUPTS();
@@ -141,11 +135,10 @@ retry:

 	if (ret == 0)
 	{
-		int			wc;
 		WaitEvent	event;

 		/* Sleep until there's something to do */
-		wc = WaitEventSetWait(pageserver_conn_wes, -1L, &event, 1, PG_WAIT_EXTENSION);
+		(void) WaitEventSetWait(pageserver_conn_wes, -1L, &event, 1, PG_WAIT_EXTENSION);
 		ResetLatch(MyLatch);

 		CHECK_FOR_INTERRUPTS();
@@ -239,6 +232,9 @@ pageserver_receive(void)
 	StringInfoData resp_buff;
 	NeonResponse *resp;

+	if (!connected)
+		return NULL;
+
 	PG_TRY();
 	{
 		/* read response */
@@ -248,7 +244,10 @@ pageserver_receive(void)
 		if (resp_buff.len < 0)
 		{
 			if (resp_buff.len == -1)
-				neon_log(ERROR, "end of COPY");
+			{
+				pageserver_disconnect();
+				return NULL;
+			}
 			else if (resp_buff.len == -2)
 				neon_log(ERROR, "could not read COPY data: %s", PQerrorMessage(pageserver_conn));
 		}
--- a/pgxn/neon/pagestore_client.h
+++ b/pgxn/neon/pagestore_client.h
@@ -49,6 +49,11 @@ typedef struct

 #define messageTag(m) (((const NeonMessage *)(m))->tag)

+#define NEON_TAG "[NEON_SMGR] "
+#define neon_log(tag, fmt, ...) ereport(tag,                                  \
+										(errmsg(NEON_TAG fmt, ##__VA_ARGS__), \
+										 errhidestmt(true), errhidecontext(true)))
+
 /*
 * supertype of all the Neon*Request structs below
 *
--- a/pgxn/neon/pagestore_smgr.c
+++ b/pgxn/neon/pagestore_smgr.c
@@ -251,9 +251,9 @@ XLogRecPtr	prefetch_lsn = 0;

 static void consume_prefetch_responses(void);
 static uint64 prefetch_register_buffer(BufferTag tag, bool *force_latest, XLogRecPtr *force_lsn);
-static void prefetch_read(PrefetchRequest *slot);
+static bool prefetch_read(PrefetchRequest *slot);
 static void prefetch_do_request(PrefetchRequest *slot, bool *force_latest, XLogRecPtr *force_lsn);
-static void prefetch_wait_for(uint64 ring_index);
+static bool prefetch_wait_for(uint64 ring_index);
 static void prefetch_cleanup(void);
 static inline void prefetch_set_unused(uint64 ring_index);

@@ -393,7 +393,7 @@ prefetch_cleanup(void)
 * NOTE: this function may indirectly update MyPState->pfs_hash; which
 * invalidates any active pointers into the hash table.
 */
-static void
+static bool
 prefetch_wait_for(uint64 ring_index)
 {
 	PrefetchRequest *entry;
@@ -412,8 +412,10 @@ prefetch_wait_for(uint64 ring_index)
 		entry = GetPrfSlot(MyPState->ring_receive);

 		Assert(entry->status == PRFS_REQUESTED);
-		prefetch_read(entry);
+		if (!prefetch_read(entry))
+			return false;
 	}
+	return true;
 }

 /*
@@ -425,7 +427,7 @@ prefetch_wait_for(uint64 ring_index)
 * NOTE: this function may indirectly update MyPState->pfs_hash; which
 * invalidates any active pointers into the hash table.
 */
-static void
+static bool
 prefetch_read(PrefetchRequest *slot)
 {
 	NeonResponse *response;
@@ -438,15 +440,22 @@ prefetch_read(PrefetchRequest *slot)
 	old = MemoryContextSwitchTo(MyPState->errctx);
 	response = (NeonResponse *) page_server->receive();
 	MemoryContextSwitchTo(old);
-	
-	/* update prefetch state */
-	MyPState->n_responses_buffered += 1;
-	MyPState->n_requests_inflight -= 1;
-	MyPState->ring_receive += 1;
+	if (response)
+	{
+		/* update prefetch state */
+		MyPState->n_responses_buffered += 1;
+		MyPState->n_requests_inflight -= 1;
+		MyPState->ring_receive += 1;

-	/* update slot state */
-	slot->status = PRFS_RECEIVED;
-	slot->response = response;
+		/* update slot state */
+		slot->status = PRFS_RECEIVED;
+		slot->response = response;
+		return true;
+	}
+	else
+	{
+		return false;
+	}
 }

 /*
@@ -746,11 +755,16 @@ prefetch_register_buffer(BufferTag tag, bool *force_latest, XLogRecPtr *force_ls
 static NeonResponse *
 page_server_request(void const *req)
 {
-	page_server->send((NeonRequest *) req);
-	page_server->flush();
-	MyPState->ring_flush = MyPState->ring_unused;
-	consume_prefetch_responses();
-	return page_server->receive();
+	NeonResponse* resp;
+	do {
+		page_server->send((NeonRequest *) req);
+		page_server->flush();
+		MyPState->ring_flush = MyPState->ring_unused;
+		consume_prefetch_responses();
+		resp = page_server->receive();
+	} while (resp == NULL);
+	return resp;
+
 }


@@ -1635,7 +1649,8 @@ neon_close(SMgrRelation reln, ForkNumber forknum)
 bool
 neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
 {
-	uint64 ring_index PG_USED_FOR_ASSERTS_ONLY;
+	BufferTag	tag;
+	uint64		ring_index PG_USED_FOR_ASSERTS_ONLY;

 	switch (reln->smgr_relpersistence)
 	{
@@ -1651,7 +1666,7 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
 			elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
 	}

-	BufferTag tag = (BufferTag) {
+	tag = (BufferTag) {
 		.rnode = reln->smgr_rnode.node,
 		.forkNum = forknum,
 		.blockNum = blocknum
@@ -1755,22 +1770,24 @@ neon_read_at_lsn(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno,
 		}
 	}

-	if (entry == NULL)
+	do
 	{
-		n_prefetch_misses += 1;
+		if (entry == NULL)
+		{
+			n_prefetch_misses += 1;

-		ring_index = prefetch_register_buffer(buftag, &request_latest,
-											  &request_lsn);
-		slot = GetPrfSlot(ring_index);
-	}
+			ring_index = prefetch_register_buffer(buftag, &request_latest,
+												  &request_lsn);
+			slot = GetPrfSlot(ring_index);
+		}

-	Assert(slot->my_ring_index == ring_index);
-	Assert(MyPState->ring_last <= ring_index &&
-		   MyPState->ring_unused > ring_index);
-	Assert(slot->status != PRFS_UNUSED);
-	Assert(GetPrfSlot(ring_index) == slot);
+		Assert(slot->my_ring_index == ring_index);
+		Assert(MyPState->ring_last <= ring_index &&
+			   MyPState->ring_unused > ring_index);
+		Assert(slot->status != PRFS_UNUSED);
+		Assert(GetPrfSlot(ring_index) == slot);

-	prefetch_wait_for(ring_index);
+	} while (!prefetch_wait_for(ring_index));

 	Assert(slot->status == PRFS_RECEIVED);

--- a/pgxn/neon/walproposer.c
+++ b/pgxn/neon/walproposer.c
@@ -119,6 +119,7 @@ static TimestampTz last_reconnect_attempt;
 static WalproposerShmemState * walprop_shared;

 /* Prototypes for private functions */
+static void WalProposerRegister(void);
 static void WalProposerInit(XLogRecPtr flushRecPtr, uint64 systemId);
 static void WalProposerStart(void);
 static void WalProposerLoop(void);
@@ -455,7 +456,7 @@ WalProposerPoll(void)
 /*
 * Register a background worker proposing WAL to wal acceptors.
 */
-void
+static void
 WalProposerRegister(void)
 {
 	BackgroundWorker bgw;
--- a/pgxn/neon/walproposer.h
+++ b/pgxn/neon/walproposer.h
@@ -377,18 +377,18 @@ typedef struct Safekeeper
 	AppendResponse appendResponse;	/* feedback for master */
 } Safekeeper;

-extern PGDLLIMPORT void WalProposerMain(Datum main_arg);
-void		WalProposerBroadcast(XLogRecPtr startpos, XLogRecPtr endpos);
-void		WalProposerPoll(void);
-void		WalProposerRegister(void);
-void		ParseReplicationFeedbackMessage(StringInfo reply_message,
-											ReplicationFeedback * rf);
+extern void WalProposerSync(int argc, char *argv[]);
+extern void WalProposerMain(Datum main_arg);
+extern void WalProposerBroadcast(XLogRecPtr startpos, XLogRecPtr endpos);
+extern void WalProposerPoll(void);
+extern void ParseReplicationFeedbackMessage(StringInfo reply_message,
+											ReplicationFeedback *rf);
 extern void StartProposerReplication(StartReplicationCmd *cmd);

-Size		WalproposerShmemSize(void);
-bool		WalproposerShmemInit(void);
-void		replication_feedback_set(ReplicationFeedback * rf);
-void		replication_feedback_get_lsns(XLogRecPtr *writeLsn, XLogRecPtr *flushLsn, XLogRecPtr *applyLsn);
+extern Size WalproposerShmemSize(void);
+extern bool WalproposerShmemInit(void);
+extern void replication_feedback_set(ReplicationFeedback *rf);
+extern void replication_feedback_get_lsns(XLogRecPtr *writeLsn, XLogRecPtr *flushLsn, XLogRecPtr *applyLsn);

 /* libpqwalproposer hooks & helper type */

--- a/proxy/src/auth.rs
+++ b/proxy/src/auth.rs
@@ -1,7 +1,7 @@
 //! Client authentication mechanisms.

 pub mod backend;
-pub use backend::{BackendType, ConsoleReqExtra, DatabaseInfo};
+pub use backend::{BackendType, ConsoleReqExtra};

 mod credentials;
 pub use credentials::ClientCredentials;
--- a/proxy/src/auth/backend.rs
+++ b/proxy/src/auth/backend.rs
@@ -12,7 +12,6 @@ use crate::{
    waiters::{self, Waiter, Waiters},
 };
 use once_cell::sync::Lazy;
-use serde::{Deserialize, Serialize};
 use std::borrow::Cow;
 use tokio::io::{AsyncRead, AsyncWrite};
 use tracing::{info, warn};
@@ -36,45 +35,6 @@ pub fn notify(psql_session_id: &str, msg: mgmt::ComputeReady) -> Result<(), wait
    CPLANE_WAITERS.notify(psql_session_id, msg)
 }

-/// Compute node connection params provided by the cloud.
-/// Note how it implements serde traits, since we receive it over the wire.
-#[derive(Serialize, Deserialize, Default)]
-pub struct DatabaseInfo {
-    pub host: String,
-    pub port: u16,
-    pub dbname: String,
-    pub user: String,
-    pub password: Option<String>,
-}
-
-// Manually implement debug to omit personal and sensitive info.
-impl std::fmt::Debug for DatabaseInfo {
-    fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
-        fmt.debug_struct("DatabaseInfo")
-            .field("host", &self.host)
-            .field("port", &self.port)
-            .finish_non_exhaustive()
-    }
-}
-
-impl From<DatabaseInfo> for tokio_postgres::Config {
-    fn from(db_info: DatabaseInfo) -> Self {
-        let mut config = tokio_postgres::Config::new();
-
-        config
-            .host(&db_info.host)
-            .port(db_info.port)
-            .dbname(&db_info.dbname)
-            .user(&db_info.user);
-
-        if let Some(password) = db_info.password {
-            config.password(password);
-        }
-
-        config
-    }
-}
-
 /// Extra query params we'd like to pass to the console.
 pub struct ConsoleReqExtra<'a> {
    /// A unique identifier for a connection.
@@ -158,54 +118,107 @@ impl<'a, T, E> BackendType<'a, Result<T, E>> {
    }
 }

+/// A product of successful authentication.
+pub struct AuthSuccess<T> {
+    /// Did we send [`pq_proto::BeMessage::AuthenticationOk`] to client?
+    pub reported_auth_ok: bool,
+    /// Something to be considered a positive result.
+    pub value: T,
+}
+
+impl<T> AuthSuccess<T> {
+    /// Very similar to [`std::option::Option::map`].
+    /// Maps [`AuthSuccess<T>`] to [`AuthSuccess<R>`] by applying
+    /// a function to a contained value.
+    pub fn map<R>(self, f: impl FnOnce(T) -> R) -> AuthSuccess<R> {
+        AuthSuccess {
+            reported_auth_ok: self.reported_auth_ok,
+            value: f(self.value),
+        }
+    }
+}
+
+/// Info for establishing a connection to a compute node.
+/// This is what we get after auth succeeded, but not before!
+pub struct NodeInfo {
+    /// Project from [`auth::ClientCredentials`].
+    pub project: String,
+    /// Compute node connection params.
+    pub config: compute::ConnCfg,
+}
+
 impl BackendType<'_, ClientCredentials<'_>> {
+    /// Do something special if user didn't provide the `project` parameter.
+    async fn try_password_hack(
+        &mut self,
+        extra: &ConsoleReqExtra<'_>,
+        client: &mut stream::PqStream<impl AsyncRead + AsyncWrite + Unpin + Send>,
+    ) -> auth::Result<Option<AuthSuccess<NodeInfo>>> {
+        use BackendType::*;
+
+        // If there's no project so far, that entails that client doesn't
+        // support SNI or other means of passing the project name.
+        // We now expect to see a very specific payload in the place of password.
+        let fetch_magic_payload = async {
+            warn!("project name not specified, resorting to the password hack auth flow");
+            let payload = AuthFlow::new(client)
+                .begin(auth::PasswordHack)
+                .await?
+                .authenticate()
+                .await?;
+
+            info!(project = &payload.project, "received missing parameter");
+            auth::Result::Ok(payload)
+        };
+
+        // TODO: find a proper way to merge those very similar blocks.
+        let (mut config, payload) = match self {
+            Console(endpoint, creds) if creds.project.is_none() => {
+                let payload = fetch_magic_payload.await?;
+
+                let mut creds = creds.as_ref();
+                creds.project = Some(payload.project.as_str().into());
+                let config = console::Api::new(endpoint, extra, &creds)
+                    .wake_compute()
+                    .await?;
+
+                (config, payload)
+            }
+            Postgres(endpoint, creds) if creds.project.is_none() => {
+                let payload = fetch_magic_payload.await?;
+
+                let mut creds = creds.as_ref();
+                creds.project = Some(payload.project.as_str().into());
+                let config = postgres::Api::new(endpoint, &creds).wake_compute().await?;
+
+                (config, payload)
+            }
+            _ => return Ok(None),
+        };
+
+        config.password(payload.password);
+        Ok(Some(AuthSuccess {
+            reported_auth_ok: false,
+            value: NodeInfo {
+                project: payload.project,
+                config,
+            },
+        }))
+    }
+
    /// Authenticate the client via the requested backend, possibly using credentials.
    pub async fn authenticate(
        mut self,
        extra: &ConsoleReqExtra<'_>,
        client: &mut stream::PqStream<impl AsyncRead + AsyncWrite + Unpin + Send>,
-    ) -> super::Result<compute::NodeInfo> {
+    ) -> auth::Result<AuthSuccess<NodeInfo>> {
        use BackendType::*;

-        if let Console(_, creds) | Postgres(_, creds) = &mut self {
-            // If there's no project so far, that entails that client doesn't
-            // support SNI or other means of passing the project name.
-            // We now expect to see a very specific payload in the place of password.
-            if creds.project().is_none() {
-                warn!("project name not specified, resorting to the password hack auth flow");
-
-                let payload = AuthFlow::new(client)
-                    .begin(auth::PasswordHack)
-                    .await?
-                    .authenticate()
-                    .await?;
-
-                // Finally we may finish the initialization of `creds`.
-                // TODO: add missing type safety to ClientCredentials.
-                info!(project = &payload.project, "received missing parameter");
-                creds.project = Some(payload.project.into());
-
-                let mut config = match &self {
-                    Console(endpoint, creds) => {
-                        console::Api::new(endpoint, extra, creds)
-                            .wake_compute()
-                            .await?
-                    }
-                    Postgres(endpoint, creds) => {
-                        postgres::Api::new(endpoint, creds).wake_compute().await?
-                    }
-                    _ => unreachable!("see the patterns above"),
-                };
-
-                // We should use a password from payload as well.
-                config.password(payload.password);
-
-                info!("user successfully authenticated (using the password hack)");
-                return Ok(compute::NodeInfo {
-                    reported_auth_ok: false,
-                    config,
-                });
-            }
+        // Handle cases when `project` is missing in `creds`.
+        // TODO: type safety: return `creds` with irrefutable `project`.
+        if let Some(res) = self.try_password_hack(extra, client).await? {
+            info!("user successfully authenticated (using the password hack)");
+            return Ok(res);
        }

        let res = match self {
@@ -215,22 +228,34 @@ impl BackendType<'_, ClientCredentials<'_>> {
                    project = creds.project(),
                    "performing authentication using the console"
                );
+
+                assert!(creds.project.is_some());
                console::Api::new(&endpoint, extra, &creds)
                    .handle_user(client)
-                    .await
+                    .await?
+                    .map(|config| NodeInfo {
+                        project: creds.project.unwrap().into_owned(),
+                        config,
+                    })
            }
            Postgres(endpoint, creds) => {
                info!("performing mock authentication using a local postgres instance");
+
+                assert!(creds.project.is_some());
                postgres::Api::new(&endpoint, &creds)
                    .handle_user(client)
-                    .await
+                    .await?
+                    .map(|config| NodeInfo {
+                        project: creds.project.unwrap().into_owned(),
+                        config,
+                    })
            }
            // NOTE: this auth backend doesn't use client credentials.
            Link(url) => {
                info!("performing link authentication");
-                link::handle_user(&url, client).await
+                link::handle_user(&url, client).await?
            }
-        }?;
+        };

        info!("user successfully authenticated");
        Ok(res)
--- a/proxy/src/auth/backend/console.rs
+++ b/proxy/src/auth/backend/console.rs
@@ -1,9 +1,9 @@
 //! Cloud API V2.

-use super::ConsoleReqExtra;
+use super::{AuthSuccess, ConsoleReqExtra};
 use crate::{
    auth::{self, AuthFlow, ClientCredentials},
-    compute::{self, ComputeConnCfg},
+    compute,
    error::{io_error, UserFacingError},
    http, scram,
    stream::PqStream,
@@ -128,7 +128,7 @@ impl<'a> Api<'a> {
    pub(super) async fn handle_user(
        self,
        client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin + Send>,
-    ) -> auth::Result<compute::NodeInfo> {
+    ) -> auth::Result<AuthSuccess<compute::ConnCfg>> {
        handle_user(client, &self, Self::get_auth_info, Self::wake_compute).await
    }

@@ -164,7 +164,7 @@ impl<'a> Api<'a> {
    }

    /// Wake up the compute node and return the corresponding connection info.
-    pub(super) async fn wake_compute(&self) -> Result<ComputeConnCfg, WakeComputeError> {
+    pub(super) async fn wake_compute(&self) -> Result<compute::ConnCfg, WakeComputeError> {
        let request_id = uuid::Uuid::new_v4().to_string();
        let req = self
            .endpoint
@@ -195,7 +195,7 @@ impl<'a> Api<'a> {
            Some(x) => x,
        };

-        let mut config = ComputeConnCfg::new();
+        let mut config = compute::ConnCfg::new();
        config
            .host(host)
            .port(port)
@@ -213,10 +213,10 @@ pub(super) async fn handle_user<'a, Endpoint, GetAuthInfo, WakeCompute>(
    endpoint: &'a Endpoint,
    get_auth_info: impl FnOnce(&'a Endpoint) -> GetAuthInfo,
    wake_compute: impl FnOnce(&'a Endpoint) -> WakeCompute,
-) -> auth::Result<compute::NodeInfo>
+) -> auth::Result<AuthSuccess<compute::ConnCfg>>
 where
    GetAuthInfo: Future<Output = Result<AuthInfo, GetAuthInfoError>>,
-    WakeCompute: Future<Output = Result<ComputeConnCfg, WakeComputeError>>,
+    WakeCompute: Future<Output = Result<compute::ConnCfg, WakeComputeError>>,
 {
    info!("fetching user's authentication info");
    let auth_info = get_auth_info(endpoint).await?;
@@ -243,9 +243,9 @@ where
        config.auth_keys(tokio_postgres::config::AuthKeys::ScramSha256(keys));
    }

-    Ok(compute::NodeInfo {
+    Ok(AuthSuccess {
        reported_auth_ok: false,
-        config,
+        value: config,
    })
 }

--- a/proxy/src/auth/backend/link.rs
+++ b/proxy/src/auth/backend/link.rs
@@ -1,3 +1,4 @@
+use super::{AuthSuccess, NodeInfo};
 use crate::{auth, compute, error::UserFacingError, stream::PqStream, waiters};
 use pq_proto::{BeMessage as Be, BeParameterStatusMessage};
 use thiserror::Error;
@@ -49,7 +50,7 @@ pub fn new_psql_session_id() -> String {
 pub async fn handle_user(
    link_uri: &reqwest::Url,
    client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin>,
-) -> auth::Result<compute::NodeInfo> {
+) -> auth::Result<AuthSuccess<NodeInfo>> {
    let psql_session_id = new_psql_session_id();
    let span = info_span!("link", psql_session_id = &psql_session_id);
    let greeting = hello_message(link_uri, &psql_session_id);
@@ -71,8 +72,22 @@ pub async fn handle_user(

    client.write_message_noflush(&Be::NoticeResponse("Connecting to database."))?;

-    Ok(compute::NodeInfo {
+    let mut config = compute::ConnCfg::new();
+    config
+        .host(&db_info.host)
+        .port(db_info.port)
+        .dbname(&db_info.dbname)
+        .user(&db_info.user);
+
+    if let Some(password) = db_info.password {
+        config.password(password);
+    }
+
+    Ok(AuthSuccess {
        reported_auth_ok: true,
-        config: db_info.into(),
+        value: NodeInfo {
+            project: db_info.project,
+            config,
+        },
    })
 }
--- a/proxy/src/auth/backend/postgres.rs
+++ b/proxy/src/auth/backend/postgres.rs
@@ -1,12 +1,12 @@
 //! Local mock of Cloud API V2.

+use super::{
+    console::{self, AuthInfo, GetAuthInfoError, TransportError, WakeComputeError},
+    AuthSuccess,
+};
 use crate::{
-    auth::{
-        self,
-        backend::console::{self, AuthInfo, GetAuthInfoError, TransportError, WakeComputeError},
-        ClientCredentials,
-    },
-    compute::{self, ComputeConnCfg},
+    auth::{self, ClientCredentials},
+    compute,
    error::io_error,
    scram,
    stream::PqStream,
@@ -37,7 +37,7 @@ impl<'a> Api<'a> {
    pub(super) async fn handle_user(
        self,
        client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin + Send>,
-    ) -> auth::Result<compute::NodeInfo> {
+    ) -> auth::Result<AuthSuccess<compute::ConnCfg>> {
        // We reuse user handling logic from a production module.
        console::handle_user(client, &self, Self::get_auth_info, Self::wake_compute).await
    }
@@ -82,8 +82,8 @@ impl<'a> Api<'a> {
    }

    /// We don't need to wake anything locally, so we just return the connection info.
-    pub(super) async fn wake_compute(&self) -> Result<ComputeConnCfg, WakeComputeError> {
-        let mut config = ComputeConnCfg::new();
+    pub(super) async fn wake_compute(&self) -> Result<compute::ConnCfg, WakeComputeError> {
+        let mut config = compute::ConnCfg::new();
        config
            .host(self.endpoint.host_str().unwrap_or("localhost"))
            .port(self.endpoint.port().unwrap_or(5432))
--- a/proxy/src/auth/credentials.rs
+++ b/proxy/src/auth/credentials.rs
@@ -36,11 +36,23 @@ pub struct ClientCredentials<'a> {
 }

 impl ClientCredentials<'_> {
+    #[inline]
    pub fn project(&self) -> Option<&str> {
        self.project.as_deref()
    }
 }

+impl<'a> ClientCredentials<'a> {
+    #[inline]
+    pub fn as_ref(&'a self) -> ClientCredentials<'a> {
+        Self {
+            user: self.user,
+            dbname: self.dbname,
+            project: self.project().map(Cow::Borrowed),
+        }
+    }
+}
+
 impl<'a> ClientCredentials<'a> {
    pub fn parse(
        params: &'a StartupMessageParams,
--- a/proxy/src/compute.rs
+++ b/proxy/src/compute.rs
@@ -40,17 +40,36 @@ impl UserFacingError for ConnectionError {
 /// A pair of `ClientKey` & `ServerKey` for `SCRAM-SHA-256`.
 pub type ScramKeys = tokio_postgres::config::ScramKeys<32>;

-pub type ComputeConnCfg = tokio_postgres::Config;
+/// A config for establishing a connection to compute node.
+/// Eventually, `tokio_postgres` will be replaced with something better.
+/// Newtype allows us to implement methods on top of it.
+#[repr(transparent)]
+pub struct ConnCfg(pub tokio_postgres::Config);

-/// Various compute node info for establishing connection etc.
-pub struct NodeInfo {
-    /// Did we send [`pq_proto::BeMessage::AuthenticationOk`]?
-    pub reported_auth_ok: bool,
-    /// Compute node connection params.
-    pub config: tokio_postgres::Config,
+impl ConnCfg {
+    /// Construct a new connection config.
+    pub fn new() -> Self {
+        Self(tokio_postgres::Config::new())
+    }
 }

-impl NodeInfo {
+impl std::ops::Deref for ConnCfg {
+    type Target = tokio_postgres::Config;
+
+    fn deref(&self) -> &Self::Target {
+        &self.0
+    }
+}
+
+/// For now, let's make it easier to setup the config.
+impl std::ops::DerefMut for ConnCfg {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        &mut self.0
+    }
+}
+
+impl ConnCfg {
+    /// Establish a raw TCP connection to the compute node.
    async fn connect_raw(&self) -> io::Result<(SocketAddr, TcpStream)> {
        use tokio_postgres::config::Host;

@@ -68,8 +87,8 @@ impl NodeInfo {
        // because it has no means for extracting the underlying socket which we
        // require for our business.
        let mut connection_error = None;
-        let ports = self.config.get_ports();
-        let hosts = self.config.get_hosts();
+        let ports = self.0.get_ports();
+        let hosts = self.0.get_hosts();
        // the ports array is supposed to have 0 entries, 1 entry, or as many entries as in the hosts array
        if ports.len() > 1 && ports.len() != hosts.len() {
            return Err(io::Error::new(
@@ -77,7 +96,7 @@ impl NodeInfo {
                format!(
                    "couldn't connect: bad compute config, \
                        ports and hosts entries' count does not match: {:?}",
-                    self.config
+                    self.0
                ),
            ));
        }
@@ -103,7 +122,7 @@ impl NodeInfo {
        Err(connection_error.unwrap_or_else(|| {
            io::Error::new(
                io::ErrorKind::Other,
-                format!("couldn't connect: bad compute config: {:?}", self.config),
+                format!("couldn't connect: bad compute config: {:?}", self.0),
            )
        }))
    }
@@ -116,7 +135,7 @@ pub struct PostgresConnection {
    pub version: String,
 }

-impl NodeInfo {
+impl ConnCfg {
    /// Connect to a corresponding compute node.
    pub async fn connect(
        mut self,
@@ -130,21 +149,21 @@ impl NodeInfo {
                .intersperse(" ") // TODO: use impl from std once it's stabilized
                .collect();

-            self.config.options(&options);
+            self.0.options(&options);
        }

        if let Some(app_name) = params.get("application_name") {
-            self.config.application_name(app_name);
+            self.0.application_name(app_name);
        }

        if let Some(replication) = params.get("replication") {
            use tokio_postgres::config::ReplicationMode;
            match replication {
                "true" | "on" | "yes" | "1" => {
-                    self.config.replication_mode(ReplicationMode::Physical);
+                    self.0.replication_mode(ReplicationMode::Physical);
                }
                "database" => {
-                    self.config.replication_mode(ReplicationMode::Logical);
+                    self.0.replication_mode(ReplicationMode::Logical);
                }
                _other => {}
            }
@@ -160,7 +179,7 @@ impl NodeInfo {
            .map_err(|_| ConnectionError::FailedToConnectToCompute)?;

        // TODO: establish a secure connection to the DB
-        let (client, conn) = self.config.connect_raw(&mut stream, NoTls).await?;
+        let (client, conn) = self.0.connect_raw(&mut stream, NoTls).await?;
        let version = conn
            .parameter("server_version")
            .ok_or(ConnectionError::FailedToFetchPgVersion)?
--- a/proxy/src/mgmt.rs
+++ b/proxy/src/mgmt.rs
@@ -6,16 +6,11 @@ use std::{
    net::{TcpListener, TcpStream},
    thread,
 };
-use tracing::{error, info};
+use tracing::{error, info, info_span};
 use utils::postgres_backend::{self, AuthType, PostgresBackend};

-/// TODO: move all of that to auth-backend/link.rs when we ditch legacy-console backend
-
-///
-/// Main proxy listener loop.
-///
-/// Listens for connections, and launches a new handler thread for each.
-///
+/// Console management API listener thread.
+/// It spawns console response handlers needed for the link auth.
 pub fn thread_main(listener: TcpListener) -> anyhow::Result<()> {
    scopeguard::defer! {
        info!("mgmt has shut down");
@@ -24,6 +19,7 @@ pub fn thread_main(listener: TcpListener) -> anyhow::Result<()> {
    listener
        .set_nonblocking(false)
        .context("failed to set listener to blocking")?;
+
    loop {
        let (socket, peer_addr) = listener.accept().context("failed to accept a new client")?;
        info!("accepted connection from {peer_addr}");
@@ -31,9 +27,19 @@ pub fn thread_main(listener: TcpListener) -> anyhow::Result<()> {
            .set_nodelay(true)
            .context("failed to set client socket option")?;

+        // TODO: replace with async tasks.
        thread::spawn(move || {
-            if let Err(err) = handle_connection(socket) {
-                error!("{err}");
+            let tid = std::thread::current().id();
+            let span = info_span!("mgmt", thread = format_args!("{tid:?}"));
+            let _enter = span.enter();
+
+            info!("started a new console management API thread");
+            scopeguard::defer! {
+                info!("console management API thread is about to finish");
+            }
+
+            if let Err(e) = handle_connection(socket) {
+                error!("thread failed with an error: {e}");
            }
        });
    }
@@ -44,44 +50,21 @@ fn handle_connection(socket: TcpStream) -> anyhow::Result<()> {
    pgbackend.run(&mut MgmtHandler)
 }

-struct MgmtHandler;
-
-/// Serialized examples:
-// {
-//     "session_id": "71d6d03e6d93d99a",
-//     "result": {
-//         "Success": {
-//             "host": "127.0.0.1",
-//             "port": 5432,
-//             "dbname": "stas",
-//             "user": "stas",
-//             "password": "mypass"
-//         }
-//     }
-// }
-// {
-//     "session_id": "71d6d03e6d93d99a",
-//     "result": {
-//         "Failure": "oops"
-//     }
-// }
-//
-// // to test manually by sending a query to mgmt interface:
-// psql -h 127.0.0.1 -p 9999 -c '{"session_id":"4f10dde522e14739","result":{"Success":{"host":"127.0.0.1","port":5432,"dbname":"stas","user":"stas","password":"stas"}}}'
-#[derive(Deserialize)]
+/// Known as `kickResponse` in the console.
+#[derive(Debug, Deserialize)]
 struct PsqlSessionResponse {
    session_id: String,
    result: PsqlSessionResult,
 }

-#[derive(Deserialize)]
+#[derive(Debug, Deserialize)]
 enum PsqlSessionResult {
-    Success(auth::DatabaseInfo),
+    Success(DatabaseInfo),
    Failure(String),
 }

 /// A message received by `mgmt` when a compute node is ready.
-pub type ComputeReady = Result<auth::DatabaseInfo, String>;
+pub type ComputeReady = Result<DatabaseInfo, String>;

 impl PsqlSessionResult {
    fn into_compute_ready(self) -> ComputeReady {
@@ -92,25 +75,51 @@ impl PsqlSessionResult {
    }
 }

-impl postgres_backend::Handler for MgmtHandler {
-    fn process_query(
-        &mut self,
-        pgb: &mut PostgresBackend,
-        query_string: &str,
-    ) -> anyhow::Result<()> {
-        let res = try_process_query(pgb, query_string);
-        // intercept and log error message
-        if res.is_err() {
-            error!("mgmt query failed: {res:?}");
-        }
-        res
+/// Compute node connection params provided by the console.
+/// This struct and its parents are mgmt API implementation
+/// detail and thus should remain in this module.
+// TODO: restore deserialization tests from git history.
+#[derive(Deserialize)]
+pub struct DatabaseInfo {
+    pub host: String,
+    pub port: u16,
+    pub dbname: String,
+    pub user: String,
+    /// Console always provides a password, but it might
+    /// be inconvenient for debug with local PG instance.
+    pub password: Option<String>,
+    pub project: String,
+}
+
+// Manually implement debug to omit sensitive info.
+impl std::fmt::Debug for DatabaseInfo {
+    fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
+        fmt.debug_struct("DatabaseInfo")
+            .field("host", &self.host)
+            .field("port", &self.port)
+            .field("dbname", &self.dbname)
+            .field("user", &self.user)
+            .finish_non_exhaustive()
    }
 }

-fn try_process_query(pgb: &mut PostgresBackend, query_string: &str) -> anyhow::Result<()> {
-    info!("got mgmt query [redacted]"); // Content contains password, don't print it
+// TODO: replace with an http-based protocol.
+struct MgmtHandler;
+impl postgres_backend::Handler for MgmtHandler {
+    fn process_query(&mut self, pgb: &mut PostgresBackend, query: &str) -> anyhow::Result<()> {
+        try_process_query(pgb, query).map_err(|e| {
+            error!("failed to process response: {e:?}");
+            e
+        })
+    }
+}

-    let resp: PsqlSessionResponse = serde_json::from_str(query_string)?;
+fn try_process_query(pgb: &mut PostgresBackend, query: &str) -> anyhow::Result<()> {
+    let resp: PsqlSessionResponse = serde_json::from_str(query)?;
+
+    let span = info_span!("event", session_id = resp.session_id);
+    let _enter = span.enter();
+    info!("got response: {:?}", resp.result);

    match auth::backend::notify(&resp.session_id, resp.result.into_compute_ready()) {
        Ok(()) => {
@@ -119,9 +128,50 @@ fn try_process_query(pgb: &mut PostgresBackend, query_string: &str) -> anyhow::R
                .write_message(&BeMessage::CommandComplete(b"SELECT 1"))?;
        }
        Err(e) => {
+            error!("failed to deliver response to per-client task");
            pgb.write_message(&BeMessage::ErrorResponse(&e.to_string()))?;
        }
    }

    Ok(())
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use serde_json::json;
+
+    #[test]
+    fn parse_db_info() -> anyhow::Result<()> {
+        // with password
+        let _: DatabaseInfo = serde_json::from_value(json!({
+            "host": "localhost",
+            "port": 5432,
+            "dbname": "postgres",
+            "user": "john_doe",
+            "password": "password",
+            "project": "hello_world",
+        }))?;
+
+        // without password
+        let _: DatabaseInfo = serde_json::from_value(json!({
+            "host": "localhost",
+            "port": 5432,
+            "dbname": "postgres",
+            "user": "john_doe",
+            "project": "hello_world",
+        }))?;
+
+        // new field (forward compatibility)
+        let _: DatabaseInfo = serde_json::from_value(json!({
+            "host": "localhost",
+            "port": 5432,
+            "dbname": "postgres",
+            "user": "john_doe",
+            "project": "hello_world",
+            "N.E.W": "forward compatibility check",
+        }))?;
+
+        Ok(())
+    }
+}
--- a/proxy/src/proxy.rs
+++ b/proxy/src/proxy.rs
@@ -4,7 +4,7 @@ use crate::config::{ProxyConfig, TlsConfig};
 use crate::stream::{MeasuredStream, PqStream, Stream};
 use anyhow::{bail, Context};
 use futures::TryFutureExt;
-use metrics::{register_int_counter, IntCounter};
+use metrics::{register_int_counter, register_int_counter_vec, IntCounter, IntCounterVec};
 use once_cell::sync::Lazy;
 use pq_proto::{BeMessage as Be, *};
 use std::sync::Arc;
@@ -30,10 +30,16 @@ static NUM_CONNECTIONS_CLOSED_COUNTER: Lazy<IntCounter> = Lazy::new(|| {
    .unwrap()
 });

-static NUM_BYTES_PROXIED_COUNTER: Lazy<IntCounter> = Lazy::new(|| {
-    register_int_counter!(
-        "proxy_io_bytes_total",
-        "Number of bytes sent/received between any client and backend."
+static NUM_BYTES_PROXIED_COUNTER: Lazy<IntCounterVec> = Lazy::new(|| {
+    register_int_counter_vec!(
+        "proxy_io_bytes_per_client",
+        "Number of bytes sent/received between client and backend.",
+        &[
+            // Received (rx) / sent (tx).
+            "direction",
+            // Proxy can keep calling it `project` internally.
+            "endpoint_id"
+        ]
    )
    .unwrap()
 });
@@ -230,16 +236,17 @@ impl<S: AsyncRead + AsyncWrite + Unpin + Send> Client<'_, S> {
            application_name: params.get("application_name"),
        };

-        // Authenticate and connect to a compute node.
-        let auth = creds
-            .authenticate(&extra, &mut stream)
-            .instrument(info_span!("auth"))
-            .await;
-
-        let node = async { auth }.or_else(|e| stream.throw_error(e)).await?;
-        let reported_auth_ok = node.reported_auth_ok;
+        let auth_result = async {
+            // `&mut stream` doesn't let us merge those 2 lines.
+            let res = creds.authenticate(&extra, &mut stream).await;
+            async { res }.or_else(|e| stream.throw_error(e)).await
+        }
+        .instrument(info_span!("auth"))
+        .await?;

+        let node = auth_result.value;
        let (db, cancel_closure) = node
+            .config
            .connect(params)
            .or_else(|e| stream.throw_error(e))
            .await?;
@@ -247,7 +254,9 @@ impl<S: AsyncRead + AsyncWrite + Unpin + Send> Client<'_, S> {
        let cancel_key_data = session.enable_query_cancellation(cancel_closure);

        // Report authentication success if we haven't done this already.
-        if !reported_auth_ok {
+        // Note that we do this only (for the most part) after we've connected
+        // to a compute (see above) which performs its own authentication.
+        if !auth_result.reported_auth_ok {
            stream
                .write_message_noflush(&Be::AuthenticationOk)?
                .write_message_noflush(&BeParameterStatusMessage::encoding())?;
@@ -261,17 +270,23 @@ impl<S: AsyncRead + AsyncWrite + Unpin + Send> Client<'_, S> {
            .write_message(&BeMessage::ReadyForQuery)
            .await?;

-        /// This function will be called for writes to either direction.
-        fn inc_proxied(cnt: usize) {
-            // Consider inventing something more sophisticated
-            // if this ever becomes a bottleneck (cacheline bouncing).
-            NUM_BYTES_PROXIED_COUNTER.inc_by(cnt as u64);
-        }
+        // TODO: add more identifiers.
+        let metric_id = node.project;
+
+        let m_sent = NUM_BYTES_PROXIED_COUNTER.with_label_values(&["tx", &metric_id]);
+        let mut client = MeasuredStream::new(stream.into_inner(), |cnt| {
+            // Number of bytes we sent to the client (outbound).
+            m_sent.inc_by(cnt as u64);
+        });
+
+        let m_recv = NUM_BYTES_PROXIED_COUNTER.with_label_values(&["rx", &metric_id]);
+        let mut db = MeasuredStream::new(db.stream, |cnt| {
+            // Number of bytes the client sent to the compute node (inbound).
+            m_recv.inc_by(cnt as u64);
+        });

        // Starting from here we only proxy the client's traffic.
        info!("performing the proxy pass...");
-        let mut db = MeasuredStream::new(db.stream, inc_proxied);
-        let mut client = MeasuredStream::new(stream.into_inner(), inc_proxied);
        let _ = tokio::io::copy_bidirectional(&mut client, &mut db).await?;

        Ok(())
--- a/test_runner/fixtures/benchmark_fixture.py
+++ b/test_runner/fixtures/benchmark_fixture.py
@@ -440,68 +440,35 @@ def pytest_terminal_summary(

    terminalreporter.section("Benchmark results", "-")

-    # TODO group by test report
-    reports = {
-        report.head_line: report
-        for report in terminalreporter.stats.get("passed", [])
-    }
+    result = []
+    for test_report in terminalreporter.stats.get("passed", []):
+        result_entry = []

-    results = []
-    for name, report in reports.items():
-        # terminalreporter.write(f"{name}", green=True)
-        # terminalreporter.line("")
-        if "[neon" in name:
-            vanilla_report = reports.get(name.replace("[neon", "[vanilla"))
-            if vanilla_report:
-                for key, prop in report.user_properties:
-                    if prop["unit"] == "s":
-                        neon_value = prop["value"]
-                        vanilla_value = dict(vanilla_report.user_properties)[key]["value"]
-                        try:
-                            ratio = float(neon_value) / vanilla_value
-                        except ZeroDivisionError:
-                            ratio = 99999
+        for _, recorded_property in test_report.user_properties:
+            terminalreporter.write(
+                "{}.{}: ".format(test_report.head_line, recorded_property["name"])
+            )
+            unit = recorded_property["unit"]
+            value = recorded_property["value"]
+            if unit == "MB":
+                terminalreporter.write("{0:,.0f}".format(value), green=True)
+            elif unit in ("s", "ms") and isinstance(value, float):
+                terminalreporter.write("{0:,.3f}".format(value), green=True)
+            elif isinstance(value, float):
+                terminalreporter.write("{0:,.4f}".format(value), green=True)
+            else:
+                terminalreporter.write(str(value), green=True)
+            terminalreporter.line(" {}".format(unit))

-                        results.append((ratio, name.replace("[neon", "[neon/vanilla"), prop["name"]))
+            result_entry.append(recorded_property)

-    results.sort(reverse=True)
-    for ratio, test, prop in results:
-        terminalreporter.write("{}.{}: ".format(test, prop))
-        terminalreporter.write("{0:,.3f}".format(ratio), green=True)
-        terminalreporter.line("")
-
-    # result = []
-    # for test_report in terminalreporter.stats.get("passed", []):
-    #     result_entry = []
-
-    #     durations = [
-    #         prop
-    #         for _, prop in test_report.user_properties
-    #         if prop["unit"] == "s"
-    #     ]
-
-    #     for _, recorded_property in test_report.user_properties:
-    #         terminalreporter.write("{}.{}: ".format(test_report.head_line,
-    #                                                 recorded_property["name"]))
-    #         unit = recorded_property["unit"]
-    #         value = recorded_property["value"]
-    #         if unit == "MB":
-    #             terminalreporter.write("{0:,.0f}".format(value), green=True)
-    #         elif unit in ("s", "ms") and isinstance(value, float):
-    #             terminalreporter.write("{0:,.3f}".format(value), green=True)
-    #         elif isinstance(value, float):
-    #             terminalreporter.write("{0:,.4f}".format(value), green=True)
-    #         else:
-    #             terminalreporter.write(str(value), green=True)
-    #         terminalreporter.line(" {}".format(unit))
-
-    #         result_entry.append(recorded_property)
-
-    #     result.append({
-    #         "suit": test_report.nodeid,
-    #         "total_duration": test_report.duration,
-    #         "data": result_entry,
-    #     })
+        result.append(
+            {
+                "suit": test_report.nodeid,
+                "total_duration": test_report.duration,
+                "data": result_entry,
+            }
+        )

    out_dir = config.getoption("out_dir")
    if out_dir is None:
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -784,6 +784,8 @@ class NeonEnvBuilder:

            self.cleanup_remote_storage()

+            self.env.pageserver.assert_no_errors()
+

 class NeonEnv:
    """
@@ -1566,6 +1568,7 @@ class NeonCli(AbstractNeonCli):
    def pageserver_start(
        self,
        overrides: Tuple[str, ...] = (),
+        extra_env_vars: Optional[Dict[str, str]] = None,
    ) -> "subprocess.CompletedProcess[str]":
        start_args = ["pageserver", "start", *overrides]
        append_pageserver_param_overrides(
@@ -1575,11 +1578,11 @@ class NeonCli(AbstractNeonCli):
            pageserver_config_override=self.env.pageserver.config_override,
        )

-        s3_env_vars = None
        if self.env.remote_storage is not None and isinstance(self.env.remote_storage, S3Storage):
            s3_env_vars = self.env.remote_storage.access_env_vars()
+            extra_env_vars = (extra_env_vars or {}) | s3_env_vars

-        return self.raw_cli(start_args, extra_env_vars=s3_env_vars)
+        return self.raw_cli(start_args, extra_env_vars=extra_env_vars)

    def pageserver_stop(self, immediate=False) -> "subprocess.CompletedProcess[str]":
        cmd = ["pageserver", "stop"]
@@ -1723,7 +1726,50 @@ class NeonPageserver(PgProtocol):
        self.config_override = config_override
        self.version = env.get_pageserver_version()

-    def start(self, overrides: Tuple[str, ...] = ()) -> "NeonPageserver":
+        # After a test finishes, we will scrape the log to see if there are any
+        # unexpected error messages. If your test expects an error, add it to
+        # 'allowed_errors' in the test with something like:
+        #
+        # env.pageserver.allowed_errors.append(".*could not open garage door.*")
+        #
+        # The entries in the list are regular experessions.
+        self.allowed_errors = [
+            # All tests print these, when starting up or shutting down
+            ".*wal receiver task finished with an error: walreceiver connection handling failure.*",
+            ".*Shutdown task error: walreceiver connection handling failure.*",
+            ".*Etcd client error: grpc request error: status: Unavailable.*",
+            ".*query handler for .* failed: Connection reset by peer.*",
+            ".*serving compute connection task.*exited with error: Broken pipe.*",
+            ".*Connection aborted: error communicating with the server: Broken pipe.*",
+            ".*Connection aborted: error communicating with the server: Transport endpoint is not connected.*",
+            ".*Connection aborted: error communicating with the server: Connection reset by peer.*",
+            ".*kill_and_wait_impl.*: wait successful.*",
+            ".*end streaming to Some.*",
+            # safekeeper connection can fail with this, in the window between timeline creation
+            # and streaming start
+            ".*Failed to process query for timeline .*: state uninitialized, no data to read.*",
+            # Tests related to authentication and authorization print these
+            ".*Error processing HTTP request: Forbidden",
+            # intentional failpoints
+            ".*failpoint ",
+            # FIXME: there is a race condition between GC and detach, see
+            # https://github.com/neondatabase/neon/issues/2442
+            ".*could not remove ephemeral file.*No such file or directory.*",
+            # FIXME: These need investigation
+            ".*gc_loop.*Failed to get a tenant .* Tenant .* not found in the local state.*",
+            ".*compaction_loop.*Failed to get a tenant .* Tenant .* not found in the local state.*",
+            ".*manual_gc.*is_shutdown_requested\\(\\) called in an unexpected task or thread.*",
+            ".*tenant_list: timeline is not found in remote index while it is present in the tenants registry.*",
+            ".*Removing intermediate uninit mark file.*",
+            # FIXME: known race condition in TaskHandle: https://github.com/neondatabase/neon/issues/2885
+            ".*sender is dropped while join handle is still alive.*",
+        ]
+
+    def start(
+        self,
+        overrides: Tuple[str, ...] = (),
+        extra_env_vars: Optional[Dict[str, str]] = None,
+    ) -> "NeonPageserver":
        """
        Start the page server.
        `overrides` allows to add some config to this pageserver start.
@@ -1731,7 +1777,7 @@ class NeonPageserver(PgProtocol):
        """
        assert self.running is False

-        self.env.neon_cli.pageserver_start(overrides=overrides)
+        self.env.neon_cli.pageserver_start(overrides=overrides, extra_env_vars=extra_env_vars)
        self.running = True
        return self

@@ -1771,6 +1817,26 @@ class NeonPageserver(PgProtocol):
            is_testing_enabled_or_skip=self.is_testing_enabled_or_skip,
        )

+    def assert_no_errors(self):
+        logfile = open(os.path.join(self.env.repo_dir, "pageserver.log"), "r")
+
+        error_or_warn = re.compile("ERROR|WARN")
+        errors = []
+        while True:
+            line = logfile.readline()
+            if not line:
+                break
+
+            if error_or_warn.search(line):
+                # It's an ERROR or WARN. Is it in the allow-list?
+                for a in self.allowed_errors:
+                    if re.match(a, line):
+                        break
+                else:
+                    errors.append(line)
+
+        assert not errors
+

 def append_pageserver_param_overrides(
    params_to_update: List[str],
@@ -2014,9 +2080,9 @@ class NeonProxy(PgProtocol):
        self,
        proxy_port: int,
        http_port: int,
+        mgmt_port: int,
        neon_binpath: Path,
        auth_endpoint=None,
-        mgmt_port=None,
    ):
        super().__init__(dsn=auth_endpoint, port=proxy_port)
        self.host = "127.0.0.1"
@@ -2030,7 +2096,8 @@ class NeonProxy(PgProtocol):

    def start(self):
        """
-        Starts a proxy with option '--auth-backend postgres' and a postgres instance already provided though '--auth-endpoint <postgress-instance>'."
+        Starts a proxy with option '--auth-backend postgres' and a postgres instance
+        already provided though '--auth-endpoint <postgress-instance>'."
        """
        assert self._popen is None
        assert self.auth_endpoint is not None
@@ -2040,6 +2107,7 @@ class NeonProxy(PgProtocol):
            str(self.neon_binpath / "proxy"),
            *["--http", f"{self.host}:{self.http_port}"],
            *["--proxy", f"{self.host}:{self.proxy_port}"],
+            *["--mgmt", f"{self.host}:{self.mgmt_port}"],
            *["--auth-backend", "postgres"],
            *["--auth-endpoint", self.auth_endpoint],
        ]
@@ -2116,11 +2184,13 @@ def static_proxy(
    auth_endpoint = f"postgres://proxy:password@{host}:{port}/{dbname}"

    proxy_port = port_distributor.get_port()
+    mgmt_port = port_distributor.get_port()
    http_port = port_distributor.get_port()

    with NeonProxy(
        proxy_port=proxy_port,
        http_port=http_port,
+        mgmt_port=mgmt_port,
        neon_binpath=neon_binpath,
        auth_endpoint=auth_endpoint,
    ) as proxy:
@@ -2661,8 +2731,6 @@ def test_output_dir(request: FixtureRequest, top_output_dir: Path) -> Iterator[P

    yield test_dir

-    shutil.rmtree(test_dir)
-
    allure_attach_from_dir(test_dir)


--- a/test_runner/performance/test_seqscans.py
+++ b/test_runner/performance/test_seqscans.py
@@ -6,6 +6,7 @@ import pytest
 from fixtures.benchmark_fixture import MetricReport
 from fixtures.compare_fixtures import PgCompare
 from fixtures.log_helper import log
+from pytest_lazyfixture import lazy_fixture  # type: ignore


@pytest.mark.parametrize(
@@ -20,11 +21,24 @@ from fixtures.log_helper import log
        pytest.param(10000000, 1, 4),
    ],
 )
-def test_seqscans(neon_with_baseline: PgCompare, rows: int, iters: int, workers: int):
-    env = neon_with_baseline
+@pytest.mark.parametrize(
+    "env, scale",
+    [
+        # Run on all envs. Use 50x larger table on remote cluster to make sure
+        # it doesn't fit in shared buffers, which are larger on remote than local.
+        pytest.param(lazy_fixture("neon_compare"), 1, id="neon"),
+        pytest.param(lazy_fixture("vanilla_compare"), 1, id="vanilla"),
+        pytest.param(
+            lazy_fixture("remote_compare"), 50, id="remote", marks=pytest.mark.remote_cluster
+        ),
+    ],
+)
+def test_seqscans(env: PgCompare, scale: int, rows: int, iters: int, workers: int):
+    rows = scale * rows

    with closing(env.pg.connect()) as conn:
        with conn.cursor() as cur:
+            cur.execute("drop table if exists t;")
            cur.execute("create table t (i integer);")
            cur.execute(f"insert into t values (generate_series(1,{rows}));")

--- a/test_runner/regress/test_branch_and_gc.py
+++ b/test_runner/regress/test_branch_and_gc.py
@@ -116,6 +116,13 @@ def test_branch_creation_before_gc(neon_simple_env: NeonEnv):
    env = neon_simple_env
    pageserver_http_client = env.pageserver.http_client()

+    env.pageserver.allowed_errors.extend(
+        [
+            ".*invalid branch start lsn: less than latest GC cutoff.*",
+            ".*invalid branch start lsn: less than planned GC cutoff.*",
+        ]
+    )
+
    # Disable background GC but set the `pitr_interval` to be small, so GC can delete something
    tenant, _ = env.neon_cli.create_tenant(
        conf={
--- a/test_runner/regress/test_branch_behind.py
+++ b/test_runner/regress/test_branch_behind.py
@@ -13,6 +13,9 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder):
    neon_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '0 sec'}"
    env = neon_env_builder.init_start()

+    env.pageserver.allowed_errors.append(".*invalid branch start lsn.*")
+    env.pageserver.allowed_errors.append(".*invalid start lsn .* for ancestor timeline.*")
+
    # Branch at the point where only 100 rows were inserted
    env.neon_cli.create_branch("test_branch_behind")
    pgmain = env.postgres.create_start("test_branch_behind")
--- a/test_runner/regress/test_broken_timeline.py
+++ b/test_runner/regress/test_broken_timeline.py
@@ -11,10 +11,17 @@ from fixtures.types import TenantId, TimelineId
 # Test restarting page server, while safekeeper and compute node keep
 # running.
 def test_broken_timeline(neon_env_builder: NeonEnvBuilder):
-    # One safekeeper is enough for this test.
-    neon_env_builder.num_safekeepers = 3
    env = neon_env_builder.init_start()

+    env.pageserver.allowed_errors.extend(
+        [
+            ".*No timelines to attach received.*",
+            ".*Failed to process timeline dir contents.*",
+            ".*Failed to load delta layer.*",
+            ".*Timeline .* was not found.*",
+        ]
+    )
+
    tenant_timelines: List[Tuple[TenantId, TimelineId, Postgres]] = []

    for n in range(4):
@@ -72,23 +79,24 @@ def test_broken_timeline(neon_env_builder: NeonEnvBuilder):
    # First timeline would not get loaded into pageserver due to corrupt metadata file
    with pytest.raises(Exception, match=f"Timeline {tenant1}/{timeline1} was not found") as err:
        pg1.start()
-    log.info(f"compute startup failed eagerly for timeline with corrupt metadata: {err}")
+    log.info(
+        f"As expected, compute startup failed eagerly for timeline with corrupt metadata: {err}"
+    )

    # Second timeline has no ancestors, only the metadata file and no layer files
    # We don't have the remote storage enabled, which means timeline is in an incorrect state,
    # it's not loaded at all
    with pytest.raises(Exception, match=f"Timeline {tenant2}/{timeline2} was not found") as err:
        pg2.start()
-    log.info(f"compute startup failed eagerly for timeline with corrupt metadata: {err}")
+    log.info(f"As expected, compute startup failed for timeline with missing layers: {err}")

-    # Yet other timelines will fail when their layers will be queried during basebackup: we don't check layer file contents on startup, when loading the timeline
-    for n in range(3, 4):
-        (bad_tenant, bad_timeline, pg) = tenant_timelines[n]
-        with pytest.raises(Exception, match="extracting base backup failed") as err:
-            pg.start()
-        log.info(
-            f"compute startup failed lazily for timeline {bad_tenant}/{bad_timeline} with corrupt layers, during basebackup preparation: {err}"
-        )
+    # Third timeline will also fail during basebackup, because the layer file is corrupt.
+    # (We don't check layer file contents on startup, when loading the timeline)
+    with pytest.raises(Exception, match="Failed to load delta layer") as err:
+        pg3.start()
+    log.info(
+        f"As expected, compute startup failed for timeline {tenant3}/{timeline3} with corrupt layers: {err}"
+    )


 def test_create_multiple_timelines_parallel(neon_simple_env: NeonEnv):
@@ -111,6 +119,13 @@ def test_timeline_init_break_before_checkpoint(neon_simple_env: NeonEnv):
    env = neon_simple_env
    pageserver_http = env.pageserver.http_client()

+    env.pageserver.allowed_errors.extend(
+        [
+            ".*Failed to process timeline dir contents.*Timeline has no ancestor and no layer files.*",
+            ".*Timeline got dropped without initializing, cleaning its files.*",
+        ]
+    )
+
    tenant_id, _ = env.neon_cli.create_tenant()

    timelines_dir = env.repo_dir / "tenants" / str(tenant_id) / "timelines"
--- a/test_runner/regress/test_compatibility.py
+++ b/test_runner/regress/test_compatibility.py
@@ -2,7 +2,7 @@ import os
 import shutil
 import subprocess
 from pathlib import Path
-from typing import Any
+from typing import Any, Optional

 import pytest
 import toml  # TODO: replace with tomllib for Python >= 3.11
@@ -50,6 +50,12 @@ def test_create_snapshot(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin, test_o

    env = neon_env_builder.init_start()
    pg = env.postgres.create_start("main")
+
+    # FIXME: Is this expected?
+    env.pageserver.allowed_errors.append(
+        ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
+    )
+
    pg_bin.run(["pgbench", "--initialize", "--scale=10", pg.connstr()])
    pg_bin.run(["pgbench", "--time=60", "--progress=2", pg.connstr()])
    pg_bin.run(["pg_dumpall", f"--dbname={pg.connstr()}", f"--file={test_output_dir / 'dump.sql'}"])
@@ -154,6 +160,7 @@ def test_forward_compatibility(
        from_dir=compatibility_snapshot_dir,
        to_dir=test_output_dir / "compatibility_snapshot",
        port_distributor=port_distributor,
+        pg_distrib_dir=compatibility_postgres_distrib_dir,
    )

    breaking_changes_allowed = (
@@ -183,7 +190,12 @@ def test_forward_compatibility(
    ), "Breaking changes are allowed by ALLOW_FORWARD_COMPATIBILITY_BREAKAGE, but the test has passed without any breakage"


-def prepare_snapshot(from_dir: Path, to_dir: Path, port_distributor: PortDistributor):
+def prepare_snapshot(
+    from_dir: Path,
+    to_dir: Path,
+    port_distributor: PortDistributor,
+    pg_distrib_dir: Optional[Path] = None,
+):
    assert from_dir.exists(), f"Snapshot '{from_dir}' doesn't exist"
    assert (from_dir / "repo").exists(), f"Snapshot '{from_dir}' doesn't contain a repo directory"
    assert (from_dir / "dump.sql").exists(), f"Snapshot '{from_dir}' doesn't contain a dump.sql"
@@ -208,7 +220,7 @@ def prepare_snapshot(from_dir: Path, to_dir: Path, port_distributor: PortDistrib
    # Update paths and ports in config files
    pageserver_toml = repo_dir / "pageserver.toml"
    pageserver_config = toml.load(pageserver_toml)
-    pageserver_config["remote_storage"]["local_path"] = repo_dir / "local_fs_remote_storage"
+    pageserver_config["remote_storage"]["local_path"] = str(repo_dir / "local_fs_remote_storage")
    pageserver_config["listen_http_addr"] = port_distributor.replace_with_new_port(
        pageserver_config["listen_http_addr"]
    )
@@ -219,6 +231,9 @@ def prepare_snapshot(from_dir: Path, to_dir: Path, port_distributor: PortDistrib
        port_distributor.replace_with_new_port(ep) for ep in pageserver_config["broker_endpoints"]
    ]

+    if pg_distrib_dir:
+        pageserver_config["pg_distrib_dir"] = str(pg_distrib_dir)
+
    with pageserver_toml.open("w") as f:
        toml.dump(pageserver_config, f)

@@ -238,7 +253,10 @@ def prepare_snapshot(from_dir: Path, to_dir: Path, port_distributor: PortDistrib
        sk["http_port"] = port_distributor.replace_with_new_port(sk["http_port"])
        sk["pg_port"] = port_distributor.replace_with_new_port(sk["pg_port"])

-    with (snapshot_config_toml).open("w") as f:
+    if pg_distrib_dir:
+        snapshot_config["pg_distrib_dir"] = str(pg_distrib_dir)
+
+    with snapshot_config_toml.open("w") as f:
        toml.dump(snapshot_config, f)

    # Ensure that snapshot doesn't contain references to the original path
--- a/test_runner/regress/test_compute_ctl.py
+++ b/test_runner/regress/test_compute_ctl.py
@@ -179,7 +179,16 @@ def test_sync_safekeepers_logs(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
    # run compute_ctl and wait for 10s
    try:
        ctl.raw_cli(
-            ["--connstr", ps_connstr, "--pgdata", pgdata, "--spec", spec, "--pgbin", pg_bin_path],
+            [
+                "--connstr",
+                "postgres://invalid/",
+                "--pgdata",
+                pgdata,
+                "--spec",
+                spec,
+                "--pgbin",
+                pg_bin_path,
+            ],
            timeout=10,
        )
    except TimeoutExpired as exc:
--- a/test_runner/regress/test_gc_cutoff.py
+++ b/test_runner/regress/test_gc_cutoff.py
@@ -1,3 +1,4 @@
+import pytest
 from fixtures.neon_fixtures import NeonEnvBuilder, PgBin


@@ -7,8 +8,14 @@ from fixtures.neon_fixtures import NeonEnvBuilder, PgBin
 # normally restarts after it. Also, there should be GC ERRORs in the log,
 # but the fixture checks the log for any unexpected ERRORs after every
 # test anyway, so it doesn't need any special attention here.
+@pytest.mark.timeout(600)
 def test_gc_cutoff(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
    env = neon_env_builder.init_start()
+
+    # These warnings are expected, when the pageserver is restarted abruptly
+    env.pageserver.allowed_errors.append(".*found future image layer.*")
+    env.pageserver.allowed_errors.append(".*found future delta layer.*")
+
    pageserver_http = env.pageserver.http_client()

    # Use aggressive GC and checkpoint settings, so that we also exercise GC during the test
@@ -30,10 +37,9 @@ def test_gc_cutoff(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):

    pageserver_http.configure_failpoints(("after-timeline-gc-removed-layers", "exit"))

-    for i in range(5):
-        try:
-            pg_bin.run_capture(["pgbench", "-N", "-c5", "-T100", "-Mprepared", connstr])
-        except Exception:
-            env.pageserver.stop()
-            env.pageserver.start()
-            pageserver_http.configure_failpoints(("after-timeline-gc-removed-layers", "exit"))
+    for _ in range(5):
+        with pytest.raises(Exception):
+            pg_bin.run_capture(["pgbench", "-P1", "-N", "-c5", "-T500", "-Mprepared", connstr])
+        env.pageserver.stop()
+        env.pageserver.start()
+        pageserver_http.configure_failpoints(("after-timeline-gc-removed-layers", "exit"))
--- a/test_runner/regress/test_import.py
+++ b/test_runner/regress/test_import.py
@@ -76,6 +76,26 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build
    env = neon_env_builder.init_start()
    env.pageserver.http_client().tenant_create(tenant)

+    env.pageserver.allowed_errors.extend(
+        [
+            ".*error importing base backup .*",
+            ".*Timeline got dropped without initializing, cleaning its files.*",
+            ".*Removing intermediate uninit mark file.*",
+            ".*InternalServerError.*timeline not found.*",
+            ".*InternalServerError.*Tenant .* not found.*",
+            ".*InternalServerError.*Timeline .* not found.*",
+            ".*InternalServerError.*Cannot delete timeline which has child timelines.*",
+        ]
+    )
+
+    # FIXME: we should clean up pageserver to not print this
+    env.pageserver.allowed_errors.append(".*exited with error: unexpected message type: CopyData.*")
+
+    # FIXME: Is this expected?
+    env.pageserver.allowed_errors.append(
+        ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
+    )
+
    def import_tar(base, wal):
        env.neon_cli.raw_cli(
            [
@@ -122,6 +142,11 @@ def test_import_from_pageserver_small(pg_bin: PgBin, neon_env_builder: NeonEnvBu
    neon_env_builder.enable_local_fs_remote_storage()
    env = neon_env_builder.init_start()

+    # FIXME: Is this expected?
+    env.pageserver.allowed_errors.append(
+        ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
+    )
+
    timeline = env.neon_cli.create_branch("test_import_from_pageserver_small")
    pg = env.postgres.create_start("test_import_from_pageserver_small")

--- a/test_runner/regress/test_pageserver_restart.py
+++ b/test_runner/regress/test_pageserver_restart.py
@@ -67,6 +67,10 @@ def test_pageserver_restart(neon_env_builder: NeonEnvBuilder):
 def test_pageserver_chaos(neon_env_builder: NeonEnvBuilder):
    env = neon_env_builder.init_start()

+    # These warnings are expected, when the pageserver is restarted abruptly
+    env.pageserver.allowed_errors.append(".*found future image layer.*")
+    env.pageserver.allowed_errors.append(".*found future delta layer.*")
+
    # Use a tiny checkpoint distance, to create a lot of layers quickly.
    # That allows us to stress the compaction and layer flushing logic more.
    tenant, _ = env.neon_cli.create_tenant(
--- a/test_runner/regress/test_proxy.py
+++ b/test_runner/regress/test_proxy.py
@@ -1,5 +1,4 @@
 import json
-import subprocess
 from urllib.parse import urlparse

 import psycopg2
@@ -8,11 +7,11 @@ from fixtures.log_helper import log
 from fixtures.neon_fixtures import PSQL, NeonProxy, VanillaPostgres


-def test_proxy_select_1(static_proxy):
+def test_proxy_select_1(static_proxy: NeonProxy):
    static_proxy.safe_psql("select 1", options="project=generic-project-name")


-def test_password_hack(static_proxy):
+def test_password_hack(static_proxy: NeonProxy):
    user = "borat"
    password = "password"
    static_proxy.safe_psql(
@@ -24,118 +23,75 @@ def test_password_hack(static_proxy):
    static_proxy.safe_psql("select 1", sslsni=0, user=user, password=magic)

    # Must also check that invalid magic won't be accepted.
-    with pytest.raises(psycopg2.errors.OperationalError):
+    with pytest.raises(psycopg2.OperationalError):
        magic = "broken"
        static_proxy.safe_psql("select 1", sslsni=0, user=user, password=magic)


-def get_session_id_from_uri_line(uri_prefix, uri_line):
+def get_session_id(uri_prefix, uri_line):
    assert uri_prefix in uri_line

    url_parts = urlparse(uri_line)
    psql_session_id = url_parts.path[1:]
-    assert psql_session_id.isalnum(), "session_id should only contain alphanumeric chars."
-    link_auth_uri_prefix = uri_line[: -len(url_parts.path)]
-    # invariant: the prefix must match the uri_prefix.
-    assert (
-        link_auth_uri_prefix == uri_prefix
-    ), f"Line='{uri_line}' should contain a http auth link of form '{uri_prefix}/<psql_session_id>'."
-    # invariant: the entire link_auth_uri should be on its own line, module spaces.
-    assert " ".join(uri_line.split(" ")) == f"{uri_prefix}/{psql_session_id}"
+    assert psql_session_id.isalnum(), "session_id should only contain alphanumeric chars"

    return psql_session_id


-def create_and_send_db_info(local_vanilla_pg, psql_session_id, mgmt_port):
-    pg_user = "proxy"
-    pg_password = "password"
-
-    local_vanilla_pg.start()
-    query = f"create user {pg_user} with login superuser password '{pg_password}'"
-    local_vanilla_pg.safe_psql(query)
-
-    port = local_vanilla_pg.default_options["port"]
-    host = local_vanilla_pg.default_options["host"]
-    dbname = local_vanilla_pg.default_options["dbname"]
-
-    db_info_dict = {
-        "session_id": psql_session_id,
-        "result": {
-            "Success": {
-                "host": host,
-                "port": port,
-                "dbname": dbname,
-                "user": pg_user,
-                "password": pg_password,
-            }
-        },
-    }
-    db_info_str = json.dumps(db_info_dict)
-    cmd_args = [
-        "psql",
-        "-h",
-        "127.0.0.1",  # localhost
-        "-p",
-        f"{mgmt_port}",
-        "-c",
-        db_info_str,
-    ]
-
-    log.info(f"Sending to proxy the user and db info: {' '.join(cmd_args)}")
-    p = subprocess.Popen(cmd_args, stdout=subprocess.PIPE)
-    out, err = p.communicate()
-    assert "ok" in str(out)
-
-
-async def get_uri_line_from_process_welcome_notice(link_auth_uri_prefix, proc):
-    """
-    Returns the line from the welcome notice from proc containing link_auth_uri_prefix.
-    :param link_auth_uri_prefix: the uri prefix used to indicate the line of interest
-    :param proc: the process to read the welcome message from.
-    :return: a line containing the full link authentication uri.
-    """
-    max_num_lines_of_welcome_message = 15
-    for attempt in range(max_num_lines_of_welcome_message):
-        raw_line = await proc.stderr.readline()
-        line = raw_line.decode("utf-8").strip()
+async def find_auth_link(link_auth_uri_prefix, proc):
+    for _ in range(100):
+        line = (await proc.stderr.readline()).decode("utf-8").strip()
+        log.info(f"psql line: {line}")
        if link_auth_uri_prefix in line:
+            log.info(f"SUCCESS, found auth url: {line}")
            return line
-    assert False, f"did not find line containing '{link_auth_uri_prefix}'"
+
+
+async def activate_link_auth(local_vanilla_pg, link_proxy, psql_session_id):
+    pg_user = "proxy"
+
+    log.info("creating a new user for link auth test")
+    local_vanilla_pg.start()
+    local_vanilla_pg.safe_psql(f"create user {pg_user} with login superuser")
+
+    db_info = json.dumps(
+        {
+            "session_id": psql_session_id,
+            "result": {
+                "Success": {
+                    "host": local_vanilla_pg.default_options["host"],
+                    "port": local_vanilla_pg.default_options["port"],
+                    "dbname": local_vanilla_pg.default_options["dbname"],
+                    "user": pg_user,
+                    "project": "irrelevant",
+                }
+            },
+        }
+    )
+
+    log.info("sending session activation message")
+    psql = await PSQL(host=link_proxy.host, port=link_proxy.mgmt_port).run(db_info)
+    out = (await psql.stdout.read()).decode("utf-8").strip()
+    assert out == "ok"


@pytest.mark.asyncio
 async def test_psql_session_id(vanilla_pg: VanillaPostgres, link_proxy: NeonProxy):
-    """
-    Test copied and modified from: test_project_psql_link_auth test from cloud/tests_e2e/tests/test_project.py
-     Step 1. establish connection to the proxy
-     Step 2. retrieve session_id:
-        Step 2.1: read welcome message
-        Step 2.2: parse session_id
-     Step 3. create a vanilla_pg and send user and db info via command line (using Popen) a psql query via mgmt port to proxy.
-     Step 4. assert that select 1 has been executed correctly.
-    """
-
-    psql = PSQL(
-        host=link_proxy.host,
-        port=link_proxy.proxy_port,
-    )
-    proc = await psql.run("select 42")
+    psql = await PSQL(host=link_proxy.host, port=link_proxy.proxy_port).run("select 42")

    uri_prefix = link_proxy.link_auth_uri_prefix
-    line_str = await get_uri_line_from_process_welcome_notice(uri_prefix, proc)
+    link = await find_auth_link(uri_prefix, psql)

-    psql_session_id = get_session_id_from_uri_line(uri_prefix, line_str)
-    log.info(f"Parsed psql_session_id='{psql_session_id}' from Neon welcome message.")
+    psql_session_id = get_session_id(uri_prefix, link)
+    await activate_link_auth(vanilla_pg, link_proxy, psql_session_id)

-    create_and_send_db_info(vanilla_pg, psql_session_id, link_proxy.mgmt_port)
-
-    assert proc.stdout is not None
-    out = (await proc.stdout.read()).decode("utf-8").strip()
+    assert psql.stdout is not None
+    out = (await psql.stdout.read()).decode("utf-8").strip()
    assert out == "42"


 # Pass extra options to the server.
-def test_proxy_options(static_proxy):
+def test_proxy_options(static_proxy: NeonProxy):
    with static_proxy.connect(options="project=irrelevant -cproxytest.option=value") as conn:
        with conn.cursor() as cur:
            cur.execute("SHOW proxytest.option")
--- a/test_runner/regress/test_read_validation.py
+++ b/test_runner/regress/test_read_validation.py
@@ -143,6 +143,8 @@ def test_read_validation_neg(neon_simple_env: NeonEnv):
    env = neon_simple_env
    env.neon_cli.create_branch("test_read_validation_neg", "empty")

+    env.pageserver.allowed_errors.append(".*invalid LSN\\(0\\) in request.*")
+
    pg = env.postgres.create_start("test_read_validation_neg")
    log.info("postgres is running on 'test_read_validation_neg' branch")

--- a/test_runner/regress/test_readonly_node.py
+++ b/test_runner/regress/test_readonly_node.py
@@ -17,6 +17,8 @@ def test_readonly_node(neon_simple_env: NeonEnv):
    pgmain = env.postgres.create_start("test_readonly_node")
    log.info("postgres is running on 'test_readonly_node' branch")

+    env.pageserver.allowed_errors.append(".*basebackup .* failed: invalid basebackup lsn.*")
+
    main_pg_conn = pgmain.connect()
    main_cur = main_pg_conn.cursor()

--- a/test_runner/regress/test_recovery.py
+++ b/test_runner/regress/test_recovery.py
@@ -17,6 +17,10 @@ def test_pageserver_recovery(neon_env_builder: NeonEnvBuilder):

    neon_env_builder.start()

+    # These warnings are expected, when the pageserver is restarted abruptly
+    env.pageserver.allowed_errors.append(".*found future delta layer.*")
+    env.pageserver.allowed_errors.append(".*found future image layer.*")
+
    # Create a branch for us
    env.neon_cli.create_branch("test_pageserver_recovery", "main")

--- a/test_runner/regress/test_remote_storage.py
+++ b/test_runner/regress/test_remote_storage.py
@@ -56,6 +56,17 @@ def test_remote_storage_backup_and_restore(

    ##### First start, insert secret data and upload it to the remote storage
    env = neon_env_builder.init_start()
+
+    # FIXME: Is this expected?
+    env.pageserver.allowed_errors.append(
+        ".*marking .* as locally complete, while it doesnt exist in remote index.*"
+    )
+    env.pageserver.allowed_errors.append(".*No timelines to attach received.*")
+
+    env.pageserver.allowed_errors.append(".*Tenant download is already in progress.*")
+    env.pageserver.allowed_errors.append(".*Failed to get local tenant state.*")
+    env.pageserver.allowed_errors.append(".*No metadata file found in the timeline directory.*")
+
    pageserver_http = env.pageserver.http_client()
    pg = env.postgres.create_start("main")

--- a/test_runner/regress/test_tenant_detach.py
+++ b/test_runner/regress/test_tenant_detach.py
@@ -1,3 +1,4 @@
+import time
 from threading import Thread

 import pytest
@@ -11,15 +12,30 @@ def do_gc_target(
 ):
    """Hack to unblock main, see https://github.com/neondatabase/neon/issues/2211"""
    try:
+        log.info("sending gc http request")
        pageserver_http.timeline_gc(tenant_id, timeline_id, 0)
    except Exception as e:
        log.error("do_gc failed: %s", e)
+    finally:
+        log.info("gc http thread returning")


+@pytest.mark.skip(
+    reason="""
+Commit 'make test_tenant_detach_smoke fail reproducibly' adds failpoint to make this test fail reproducibly.
+Fix in https://github.com/neondatabase/neon/pull/2851 will come as part of
+https://github.com/neondatabase/neon/pull/2785 .
+"""
+)
 def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder):
    env = neon_env_builder.init_start()
    pageserver_http = env.pageserver.http_client()

+    env.pageserver.allowed_errors.append(".*NotFound\\(Tenant .* not found in the local state")
+    # FIXME: we have a race condition between GC and detach. GC might fail with this
+    # error. Similar to https://github.com/neondatabase/neon/issues/2671
+    env.pageserver.allowed_errors.append(".*InternalServerError\\(No such file or directory.*")
+
    # first check for non existing tenant
    tenant_id = TenantId.generate()
    with pytest.raises(
@@ -28,6 +44,9 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder):
    ):
        pageserver_http.tenant_detach(tenant_id)

+    # the error will be printed to the log too
+    env.pageserver.allowed_errors.append(".*Tenant not found for id.*")
+
    # create new nenant
    tenant_id, timeline_id = env.neon_cli.create_tenant()

@@ -43,32 +62,34 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder):
        ]
    )

-    # gc should not try to even start
+    # gc should not try to even start on a timeline that doesn't exist
    with pytest.raises(
        expected_exception=PageserverApiException, match="gc target timeline does not exist"
    ):
        bogus_timeline_id = TimelineId.generate()
        pageserver_http.timeline_gc(tenant_id, bogus_timeline_id, 0)

-    # try to concurrently run gc and detach
+        # the error will be printed to the log too
+    env.pageserver.allowed_errors.append(".*gc target timeline does not exist.*")
+
+    # Detach while running manual GC.
+    # It should wait for manual GC to finish (right now it doesn't that's why this test fails sometimes)
+    pageserver_http.configure_failpoints(
+        ("gc_iteration_internal_after_getting_gc_timelines", "return(2000)")
+    )
    gc_thread = Thread(target=lambda: do_gc_target(pageserver_http, tenant_id, timeline_id))
    gc_thread.start()
+    time.sleep(1)
+    # By now the gc task is spawned but in sleep for another second due to the failpoint.

-    last_error = None
-    for i in range(3):
-        try:
-            pageserver_http.tenant_detach(tenant_id)
-        except Exception as e:
-            last_error = e
-            log.error(f"try {i} error detaching tenant: {e}")
-            continue
-        else:
-            break
-    # else is called if the loop finished without reaching "break"
-    else:
-        pytest.fail(f"could not detach tenant: {last_error}")
+    log.info("detaching tenant")
+    pageserver_http.tenant_detach(tenant_id)
+    log.info("tenant detached without error")

+    log.info("wait for gc thread to return")
    gc_thread.join(timeout=10)
+    assert not gc_thread.is_alive()
+    log.info("gc thread returned")

    # check that nothing is left on disk for deleted tenant
    assert not (env.repo_dir / "tenants" / str(tenant_id)).exists()
--- a/test_runner/regress/test_tenant_relocation.py
+++ b/test_runner/regress/test_tenant_relocation.py
@@ -259,6 +259,11 @@ def test_tenant_relocation(

    env = neon_env_builder.init_start()

+    # FIXME: Is this expected?
+    env.pageserver.allowed_errors.append(
+        ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
+    )
+
    # create folder for remote storage mock
    remote_storage_mock_path = env.repo_dir / "local_fs_remote_storage"

--- a/test_runner/regress/test_tenant_size.py
+++ b/test_runner/regress/test_tenant_size.py
@@ -166,6 +166,10 @@ def test_get_tenant_size_with_multiple_branches(neon_env_builder: NeonEnvBuilder

    env = neon_env_builder.init_start()

+    # FIXME: we have a race condition between GC and delete timeline. GC might fail with this
+    # error. Similar to https://github.com/neondatabase/neon/issues/2671
+    env.pageserver.allowed_errors.append(".*InternalServerError\\(No such file or directory.*")
+
    tenant_id = env.initial_tenant
    main_branch_name, main_timeline_id = env.neon_cli.list_timelines(tenant_id)[0]

@@ -188,10 +192,8 @@ def test_get_tenant_size_with_multiple_branches(neon_env_builder: NeonEnvBuilder
        "first-branch", main_branch_name, tenant_id
    )

-    # unsure why this happens, the size difference is more than a page alignment
    size_after_first_branch = http_client.tenant_size(tenant_id)
-    assert size_after_first_branch > size_at_branch
-    assert size_after_first_branch - size_at_branch == gc_horizon
+    assert size_after_first_branch == size_at_branch

    first_branch_pg = env.postgres.create_start("first-branch", tenant_id=tenant_id)

@@ -217,7 +219,7 @@ def test_get_tenant_size_with_multiple_branches(neon_env_builder: NeonEnvBuilder
        "second-branch", main_branch_name, tenant_id
    )
    size_after_second_branch = http_client.tenant_size(tenant_id)
-    assert size_after_second_branch > size_after_continuing_on_main
+    assert size_after_second_branch == size_after_continuing_on_main

    second_branch_pg = env.postgres.create_start("second-branch", tenant_id=tenant_id)

@@ -263,6 +265,8 @@ def test_get_tenant_size_with_multiple_branches(neon_env_builder: NeonEnvBuilder
        except PageserverApiException as e:
            # compaction is ok but just retry if this fails; related to #2442
            if "cannot lock compaction critical section" in str(e):
+                # also ignore it in the log
+                env.pageserver.allowed_errors.append(".*cannot lock compaction critical section.*")
                time.sleep(1)
                continue
            raise
--- a/test_runner/regress/test_tenants.py
+++ b/test_runner/regress/test_tenants.py
@@ -25,6 +25,13 @@ def test_tenant_creation_fails(neon_simple_env: NeonEnv):
    )
    initial_tenant_dirs = [d for d in tenants_dir.iterdir()]

+    neon_simple_env.pageserver.allowed_errors.extend(
+        [
+            ".*Failed to create directory structure for tenant .*, cleaning tmp data.*",
+            ".*Failed to fsync removed temporary tenant directory .*",
+        ]
+    )
+
    pageserver_http = neon_simple_env.pageserver.http_client()
    pageserver_http.configure_failpoints(("tenant-creation-before-tmp-rename", "return"))
    with pytest.raises(Exception, match="tenant-creation-before-tmp-rename"):
@@ -206,6 +213,13 @@ def test_pageserver_with_empty_tenants(
    )

    env = neon_env_builder.init_start()
+
+    env.pageserver.allowed_errors.append(
+        ".*marking .* as locally complete, while it doesnt exist in remote index.*"
+    )
+    env.pageserver.allowed_errors.append(".*Tenant .* has no timelines directory.*")
+    env.pageserver.allowed_errors.append(".*No timelines to attach received.*")
+
    client = env.pageserver.http_client()

    tenant_without_timelines_dir = env.initial_tenant
--- a/test_runner/regress/test_tenants_with_remote_storage.py
+++ b/test_runner/regress/test_tenants_with_remote_storage.py
@@ -66,6 +66,11 @@ def test_tenants_many(neon_env_builder: NeonEnvBuilder, remote_storage_kind: Rem

    env = neon_env_builder.init_start()

+    # FIXME: Is this expected?
+    env.pageserver.allowed_errors.append(
+        ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
+    )
+
    tenants_pgs: List[Tuple[TenantId, Postgres]] = []

    for _ in range(1, 5):
@@ -117,6 +122,13 @@ def test_tenants_attached_after_download(

    ##### First start, insert secret data and upload it to the remote storage
    env = neon_env_builder.init_start()
+
+    # FIXME: Are these expected?
+    env.pageserver.allowed_errors.append(".*No timelines to attach received.*")
+    env.pageserver.allowed_errors.append(
+        ".*marking .* as locally complete, while it doesnt exist in remote index.*"
+    )
+
    pageserver_http = env.pageserver.http_client()
    pg = env.postgres.create_start("main")

@@ -209,6 +221,16 @@ def test_tenant_upgrades_index_json_from_v0(
    # launch pageserver, populate the default tenants timeline, wait for it to be uploaded,
    # then go ahead and modify the "remote" version as if it was downgraded, needing upgrade
    env = neon_env_builder.init_start()
+
+    # FIXME: Are these expected?
+    env.pageserver.allowed_errors.append(
+        ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
+    )
+    env.pageserver.allowed_errors.append(".*No timelines to attach received.*")
+    env.pageserver.allowed_errors.append(
+        ".*Failed to get local tenant state: Tenant .* not found in the local state.*"
+    )
+
    pageserver_http = env.pageserver.http_client()
    pg = env.postgres.create_start("main")

@@ -315,6 +337,20 @@ def test_tenant_redownloads_truncated_file_on_startup(
    )

    env = neon_env_builder.init_start()
+
+    env.pageserver.allowed_errors.append(
+        ".*Redownloading locally existing .* due to size mismatch.*"
+    )
+    env.pageserver.allowed_errors.append(
+        ".*Downloaded layer exists already but layer file metadata mismatches.*"
+    )
+
+    # FIXME: Are these expected?
+    env.pageserver.allowed_errors.append(
+        ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
+    )
+    env.pageserver.allowed_errors.append(".*No timelines to attach received.*")
+
    pageserver_http = env.pageserver.http_client()
    pg = env.postgres.create_start("main")

--- a/test_runner/regress/test_timeline_delete.py
+++ b/test_runner/regress/test_timeline_delete.py
@@ -7,6 +7,11 @@ from fixtures.utils import wait_until
 def test_timeline_delete(neon_simple_env: NeonEnv):
    env = neon_simple_env

+    env.pageserver.allowed_errors.append(".*Timeline .* was not found.*")
+    env.pageserver.allowed_errors.append(".*timeline not found.*")
+    env.pageserver.allowed_errors.append(".*Cannot delete timeline which has child timelines.*")
+    env.pageserver.allowed_errors.append(".*Tenant .* not found in the local state.*")
+
    ps_http = env.pageserver.http_client()

    # first try to delete non existing timeline
--- a/test_runner/regress/test_wal_acceptor.py
+++ b/test_runner/regress/test_wal_acceptor.py
@@ -263,6 +263,12 @@ def test_broker(neon_env_builder: NeonEnvBuilder):
    env = neon_env_builder.init_start()

    env.neon_cli.create_branch("test_broker", "main")
+
+    # FIXME: Is this expected?
+    env.pageserver.allowed_errors.append(
+        ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
+    )
+
    pg = env.postgres.create_start("test_broker")
    pg.safe_psql("CREATE TABLE t(key int primary key, value text)")

@@ -306,6 +312,11 @@ def test_wal_removal(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
    neon_env_builder.auth_enabled = auth_enabled
    env = neon_env_builder.init_start()

+    # FIXME: Is this expected?
+    env.pageserver.allowed_errors.append(
+        ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
+    )
+
    env.neon_cli.create_branch("test_safekeepers_wal_removal")
    pg = env.postgres.create_start("test_safekeepers_wal_removal")

@@ -538,6 +549,7 @@ def test_s3_wal_replay(neon_env_builder: NeonEnvBuilder, remote_storage_kind: Re
    )

    pg.stop_and_destroy()
+    ps_cli.timeline_delete(tenant_id, timeline_id)

    # Also delete and manually create timeline on safekeepers -- this tests
    # scenario of manual recovery on different set of safekeepers.
@@ -562,7 +574,6 @@ def test_s3_wal_replay(neon_env_builder: NeonEnvBuilder, remote_storage_kind: Re
        shutil.copy(f_partial_saved, f_partial_path)

    # recreate timeline on pageserver from scratch
-    ps_cli.timeline_delete(tenant_id, timeline_id)
    ps_cli.timeline_create(tenant_id, timeline_id)

    wait_lsn_timeout = 60 * 3
@@ -1081,6 +1092,14 @@ def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
    neon_env_builder.auth_enabled = auth_enabled
    env = neon_env_builder.init_start()

+    # FIXME: are these expected?
+    env.pageserver.allowed_errors.extend(
+        [
+            ".*Failed to process query for timeline .*: Timeline .* was not found in global map.*",
+            ".*end streaming to Some.*",
+        ]
+    )
+
    # Create two tenants: one will be deleted, other should be preserved.
    tenant_id = env.initial_tenant
    timeline_id_1 = env.neon_cli.create_branch("br1")  # Active, delete explicitly
--- a/test_runner/regress/test_walredo_not_left_behind_on_detach.py
+++ b/test_runner/regress/test_walredo_not_left_behind_on_detach.py
@@ -22,6 +22,8 @@ def assert_child_processes(pageserver_pid, wal_redo_present=False, defunct_prese
 # as a zombie process.
 def test_walredo_not_left_behind_on_detach(neon_env_builder: NeonEnvBuilder):
    env = neon_env_builder.init_start()
+    # We intentionally test for a non-existent tenant.
+    env.pageserver.allowed_errors.append(".*Tenant not found.*")
    pageserver_http = env.pageserver.http_client()

    pagserver_pid = int((env.repo_dir / "pageserver.pid").read_text())
--- a/vendor/postgres-v14
+++ b/vendor/postgres-v14
--- a/vendor/postgres-v15
+++ b/vendor/postgres-v15