wip

Merge branch 'main' into perf-summary
WIP add perf test summary
2026-03-08 02:40:37 +00:00 · 2022-11-15 23:08:22 -05:00 · 2022-11-15 13:10:20 -05:00 · 2022-02-24 12:46:53 -05:00
85 changed files with 798 additions and 2889 deletions
--- a/.github/ansible/.gitignore
+++ b/.github/ansible/.gitignore
@@ -1,3 +1,5 @@
+zenith_install.tar.gz
+.zenith_current_version
 neon_install.tar.gz
 .neon_current_version

--- a/.github/ansible/production.hosts.yaml
+++ b/.github/ansible/production.hosts.yaml
@@ -22,10 +22,6 @@ storage:
          console_region_id: aws-us-west-2
        zenith-1-ps-3:
          console_region_id: aws-us-west-2
-        zenith-1-ps-4:
-          console_region_id: aws-us-west-2
-        zenith-1-ps-5:
-          console_region_id: aws-us-west-2

    safekeepers:
      hosts:
--- a/.github/ansible/staging.eu-west-1.hosts.yaml
+++ b/.github/ansible/staging.eu-west-1.hosts.yaml
@@ -1,33 +0,0 @@
-storage:
-  vars:
-    bucket_name: neon-dev-storage-eu-west-1
-    bucket_region: eu-west-1
-    console_mgmt_base_url: http://console-staging.local
-    etcd_endpoints: etcd-0.eu-west-1.aws.neon.build:2379
-    pageserver_config_stub:
-      pg_distrib_dir: /usr/local
-      remote_storage:
-        bucket_name: "{{ bucket_name }}"
-        bucket_region: "{{ bucket_region }}"
-        prefix_in_bucket: "pageserver/v1"
-    safekeeper_s3_prefix: safekeeper/v1/wal
-    hostname_suffix: ""
-    remote_user: ssm-user
-    ansible_aws_ssm_region: eu-west-1
-    ansible_aws_ssm_bucket_name: neon-dev-storage-eu-west-1
-    console_region_id: aws-eu-west-1
-
-  children:
-    pageservers:
-      hosts:
-        pageserver-0.eu-west-1.aws.neon.build:
-          ansible_host: i-01d496c5041c7f34c
-
-    safekeepers:
-      hosts:
-        safekeeper-0.eu-west-1.aws.neon.build:
-          ansible_host: i-05226ef85722831bf
-        safekeeper-1.eu-west-1.aws.neon.build:
-          ansible_host: i-06969ee1bf2958bfc
-        safekeeper-2.eu-west-1.aws.neon.build:
-          ansible_host: i-087892e9625984a0b
--- a/.github/ansible/staging.hosts.yaml
+++ b/.github/ansible/staging.hosts.yaml
@@ -3,7 +3,7 @@ storage:
    bucket_name: zenith-staging-storage-us-east-1
    bucket_region: us-east-1
    console_mgmt_base_url: http://console-staging.local
-    etcd_endpoints: etcd-0.us-east-2.aws.neon.build:2379
+    etcd_endpoints: zenith-us-stage-etcd.local:2379
    pageserver_config_stub:
      pg_distrib_dir: /usr/local
      remote_storage:
--- a/.github/ansible/staging.us-east-2.hosts.yaml
+++ b/.github/ansible/staging.us-east-2.hosts.yaml
@@ -22,8 +22,6 @@ storage:
      hosts:
        pageserver-0.us-east-2.aws.neon.build:
          ansible_host: i-0c3e70929edb5d691
-        pageserver-1.us-east-2.aws.neon.build:
-          ansible_host: i-0565a8b4008aa3f40

    safekeepers:
      hosts:
--- a/.github/helm-values/dev-eu-west-1-zeta.neon-proxy-scram.yaml
+++ b/.github/helm-values/dev-eu-west-1-zeta.neon-proxy-scram.yaml
@@ -1,31 +0,0 @@
-# Helm chart values for neon-proxy-scram.
-# This is a YAML-formatted file.
-
-image:
-  repository: neondatabase/neon
-
-settings:
-  authBackend: "console"
-  authEndpoint: "http://console-staging.local/management/api/v2"
-  domain: "*.eu-west-1.aws.neon.build"
-
-# -- Additional labels for neon-proxy pods
-podLabels:
-  zenith_service: proxy-scram
-  zenith_env: dev
-  zenith_region: eu-west-1
-  zenith_region_slug: eu-west-1
-
-exposedService:
-  annotations:
-    service.beta.kubernetes.io/aws-load-balancer-type: external
-    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
-    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
-    external-dns.alpha.kubernetes.io/hostname: eu-west-1.aws.neon.build
-
-#metrics:
-#  enabled: true
-#  serviceMonitor:
-#    enabled: true
-#    selector:
-#      release: kube-prometheus-stack
--- a/.github/workflows/benchmarking.yml
+++ b/.github/workflows/benchmarking.yml
@@ -144,9 +144,7 @@ jobs:
        # neon-captest-new: Run pgbench in a freshly created project
        # neon-captest-reuse: Same, but reusing existing project
        # neon-captest-prefetch: Same, with prefetching enabled (new project)
-        # rds-aurora: Aurora Postgres Serverless v2 with autoscaling from 0.5 to 2 ACUs
-        # rds-postgres: RDS Postgres db.m5.large instance (2 vCPU, 8 GiB) with gp3 EBS storage
-        platform: [ neon-captest-new, neon-captest-reuse, neon-captest-prefetch, rds-postgres ]
+        platform: [ neon-captest-new, neon-captest-reuse, neon-captest-prefetch ]
        db_size: [ 10gb ]
        include:
          - platform: neon-captest-new
@@ -166,7 +164,7 @@ jobs:
      SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref == 'refs/heads/main' ) }}
      PLATFORM: ${{ matrix.platform }}

-    runs-on: [ self-hosted, dev, x64 ]
+    runs-on: dev
    container:
      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rustlegacy:pinned
      options: --init
@@ -209,11 +207,8 @@ jobs:
          rds-aurora)
            CONNSTR=${{ secrets.BENCHMARK_RDS_CONNSTR }}
            ;;
-          rds-postgres)
-            CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_CONNSTR }}
-            ;;
          *)
-            echo 2>&1 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'neon-captest-new', 'neon-captest-prefetch', 'rds-aurora', or 'rds-postgres'"
+            echo 2>&1 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'neon-captest-new', 'neon-captest-prefetch' or 'rds-aurora'"
            exit 1
            ;;
        esac
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -18,8 +18,8 @@ env:

 jobs:
  tag:
-    runs-on: [ self-hosted, dev, x64 ]
-    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:pinned
+    runs-on: dev
+    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:latest
    outputs:
      build-tag: ${{steps.build-tag.outputs.tag}}

@@ -46,7 +46,7 @@ jobs:
        id: build-tag

  build-neon:
-    runs-on: [ self-hosted, dev, x64 ]
+    runs-on: dev
    container:
      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
      options: --init
@@ -236,7 +236,7 @@ jobs:
        uses: ./.github/actions/save-coverage-data

  regress-tests:
-    runs-on: [ self-hosted, dev, x64 ]
+    runs-on: dev
    container:
      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
      options: --init
@@ -269,7 +269,7 @@ jobs:
        uses: ./.github/actions/save-coverage-data

  benchmarks:
-    runs-on: [ self-hosted, dev, x64 ]
+    runs-on: dev
    container:
      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
      options: --init
@@ -300,7 +300,7 @@ jobs:
      # while coverage is currently collected for the debug ones

  merge-allure-report:
-    runs-on: [ self-hosted, dev, x64 ]
+    runs-on: dev
    container:
      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
      options: --init
@@ -338,7 +338,7 @@ jobs:
          DATABASE_URL="$TEST_RESULT_CONNSTR" poetry run python3 scripts/ingest_regress_test_result.py --revision ${SHA} --reference ${GITHUB_REF} --build-type ${BUILD_TYPE} --ingest suites.json

  coverage-report:
-    runs-on: [ self-hosted, dev, x64 ]
+    runs-on: dev
    container:
      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
      options: --init
@@ -415,7 +415,7 @@ jobs:
        shell: bash -euxo pipefail {0}

  trigger-e2e-tests:
-    runs-on: [ self-hosted, dev, x64 ]
+    runs-on: dev
    container:
      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:pinned
      options: --init
@@ -460,7 +460,7 @@ jobs:
            }"

  neon-image:
-    runs-on: [ self-hosted, dev, x64 ]
+    runs-on: dev
    needs: [ tag ]
    container: gcr.io/kaniko-project/executor:v1.9.0-debug

@@ -478,7 +478,7 @@ jobs:
        run: /kaniko/executor --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --build-arg GIT_VERSION=${{ github.sha }} --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}}

  compute-tools-image:
-    runs-on: [ self-hosted, dev, x64 ]
+    runs-on: dev
    needs: [ tag ]
    container: gcr.io/kaniko-project/executor:v1.9.0-debug

@@ -493,7 +493,7 @@ jobs:
        run: /kaniko/executor --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --build-arg GIT_VERSION=${{ github.sha }} --dockerfile Dockerfile.compute-tools --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}}

  compute-node-image-v14:
-    runs-on: [ self-hosted, dev, x64 ]
+    runs-on: dev
    container: gcr.io/kaniko-project/executor:v1.9.0-debug
    needs: [ tag ]
    steps:
@@ -510,7 +510,7 @@ jobs:
        run: /kaniko/executor --skip-unused-stages  --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache  --context . --build-arg GIT_VERSION=${{ github.sha }} --dockerfile Dockerfile.compute-node-v14 --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:${{needs.tag.outputs.build-tag}}

  compute-node-image-v15:
-    runs-on: [ self-hosted, dev, x64 ]
+    runs-on: dev
    container: gcr.io/kaniko-project/executor:v1.9.0-debug
    needs: [ tag ]
    steps:
@@ -528,7 +528,7 @@ jobs:

  test-images:
    needs: [ tag, neon-image, compute-node-image-v14, compute-node-image-v15, compute-tools-image ]
-    runs-on: [ self-hosted, dev, x64 ]
+    runs-on: dev

    steps:
      - name: Checkout
@@ -570,7 +570,7 @@ jobs:
          docker compose -f ./docker-compose/docker-compose.yml down

  promote-images:
-    runs-on: [ self-hosted, dev, x64 ]
+    runs-on: dev
    needs: [ tag, test-images ]
    if: github.event_name != 'workflow_dispatch'
    container: amazon/aws-cli
@@ -586,7 +586,7 @@ jobs:
          aws ecr put-image --repository-name ${{ matrix.name }} --image-tag latest --image-manifest "$MANIFEST"

  push-docker-hub:
-    runs-on: [ self-hosted, dev, x64 ]
+    runs-on: dev
    needs: [ promote-images, tag ]
    container: golang:1.19-bullseye

@@ -736,7 +736,7 @@ jobs:
          rm -f neon_install.tar.gz .neon_current_version

  deploy-new:
-    runs-on: [ self-hosted, dev, x64 ]
+    runs-on: dev
    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
    # We need both storage **and** compute images for deploy, because control plane picks the compute version based on the storage version.
    # If it notices a fresh storage it may bump the compute version. And if compute image failed to build it may break things badly
@@ -761,6 +761,7 @@ jobs:
        run: |
          export DOCKER_TAG=${{needs.tag.outputs.build-tag}}
          cd "$(pwd)/.github/ansible"
+
          if [[ "$GITHUB_REF_NAME" == "main" ]]; then
            ./get_binaries.sh
          elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
@@ -769,38 +770,6 @@ jobs:
            echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
            exit 1
          fi
-          ansible-galaxy collection install sivel.toiletwater
-          ansible-playbook deploy.yaml -i staging.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{secrets.NEON_STAGING_API_KEY}}
-          rm -f neon_install.tar.gz .neon_current_version
-
-  deploy-pr-test-new:
-    runs-on: [ self-hosted, dev, x64 ]
-    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
-    # We need both storage **and** compute images for deploy, because control plane picks the compute version based on the storage version.
-    # If it notices a fresh storage it may bump the compute version. And if compute image failed to build it may break things badly
-    needs: [ push-docker-hub, tag, regress-tests ]
-    if: |
-      contains(github.event.pull_request.labels.*.name, 'deploy-test-storage') && 
-      github.event_name != 'workflow_dispatch'
-    defaults:
-      run:
-        shell: bash
-    strategy:
-      matrix:
-        target_region: [ eu-west-1 ]
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v3
-        with:
-          submodules: true
-          fetch-depth: 0
-
-      - name: Redeploy
-        run: |
-          export DOCKER_TAG=${{needs.tag.outputs.build-tag}}
-          cd "$(pwd)/.github/ansible"
-
-          ./get_binaries.sh

          ansible-galaxy collection install sivel.toiletwater
          ansible-playbook deploy.yaml -i staging.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{secrets.NEON_STAGING_API_KEY}}
@@ -811,7 +780,7 @@ jobs:
    container: 093970136003.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
    # We need both storage **and** compute images for deploy, because control plane picks the compute version based on the storage version.
    # If it notices a fresh storage it may bump the compute version. And if compute image failed to build it may break things badly
-    needs: [ push-docker-hub, tag, regress-tests ]
+    needs: [ push-docker-hub, calculate-deploy-targets, tag, regress-tests ]
    if: |
      (github.ref_name == 'release') &&
      github.event_name != 'workflow_dispatch'
@@ -847,7 +816,7 @@ jobs:
          rm -f neon_install.tar.gz .neon_current_version

  deploy-proxy:
-    runs-on: [ self-hosted, dev, x64 ]
+    runs-on: dev
    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:latest
    # Compute image isn't strictly required for proxy deploy, but let's still wait for it to run all deploy jobs consistently.
    needs: [ push-docker-hub, calculate-deploy-targets, tag, regress-tests ]
@@ -889,10 +858,10 @@ jobs:
          helm upgrade ${{ matrix.proxy_job }}-scram neondatabase/neon-proxy --namespace neon-proxy --install -f .github/helm-values/${{ matrix.proxy_config }}-scram.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s

  deploy-proxy-new:
-    runs-on: [ self-hosted, dev, x64 ]
+    runs-on: dev
    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
    # Compute image isn't strictly required for proxy deploy, but let's still wait for it to run all deploy jobs consistently.
-    needs: [ push-docker-hub, tag, regress-tests ]
+    needs: [ push-docker-hub, calculate-deploy-targets, tag, regress-tests ]
    if: |
      (github.ref_name == 'main') &&
      github.event_name != 'workflow_dispatch'
@@ -904,8 +873,6 @@ jobs:
        include:
          - target_region:  us-east-2
            target_cluster: dev-us-east-2-beta
-          - target_region:  eu-west-1
-            target_cluster: dev-eu-west-1-zeta
    steps:
      - name: Checkout
        uses: actions/checkout@v3
@@ -927,7 +894,7 @@ jobs:
    runs-on: prod
    container: 093970136003.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
    # Compute image isn't strictly required for proxy deploy, but let's still wait for it to run all deploy jobs consistently.
-    needs: [ push-docker-hub, tag, regress-tests ]
+    needs: [ push-docker-hub, calculate-deploy-targets, tag, regress-tests ]
    if: |
      (github.ref_name == 'release') &&
      github.event_name != 'workflow_dispatch'
@@ -961,7 +928,7 @@ jobs:
          helm upgrade neon-proxy-scram neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s

  promote-compatibility-data:
-    runs-on: [ self-hosted, dev, x64 ]
+    runs-on: dev
    container:
      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
      options: --init
--- a/.github/workflows/codestyle.yml
+++ b/.github/workflows/codestyle.yml
@@ -115,7 +115,7 @@ jobs:
        run: cargo build --locked --all --all-targets

  check-rust-dependencies:
-    runs-on: [ self-hosted, dev, x64 ]
+    runs-on: dev
    container:
      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
      options: --init
--- a/1
+++ b/1
@@ -8,4 +8,3 @@
 /pgxn/ @neondatabase/compute
 /proxy/ @neondatabase/control-plane 
 /safekeeper/ @neondatabase/safekeepers
-/vendor/ @neondatabase/compute
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2255,14 +2255,6 @@ version = "2.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e"

-[[package]]
-name = "persistent_range_query"
-version = "0.1.0"
-dependencies = [
- "rand",
- "workspace_hack",
-]
-
 [[package]]
 name = "petgraph"
 version = "0.6.2"
--- a/28
+++ b/28
@@ -20,18 +20,18 @@ else
 	$(error Bad build type '$(BUILD_TYPE)', see Makefile for options)
 endif

+# Seccomp BPF is only available for Linux
 UNAME_S := $(shell uname -s)
 ifeq ($(UNAME_S),Linux)
-	# Seccomp BPF is only available for Linux
 	PG_CONFIGURE_OPTS += --with-libseccomp
-else ifeq ($(UNAME_S),Darwin)
-	# macOS with brew-installed openssl requires explicit paths
-	# It can be configured with OPENSSL_PREFIX variable
-	OPENSSL_PREFIX ?= $(shell brew --prefix openssl@3)
-	PG_CONFIGURE_OPTS += --with-includes=$(OPENSSL_PREFIX)/include --with-libraries=$(OPENSSL_PREFIX)/lib
-	# macOS already has bison and flex in the system, but they are old and result in postgres-v14 target failure
-	# brew formulae are keg-only and not symlinked into HOMEBREW_PREFIX, force their usage
-	EXTRA_PATH_OVERRIDES += $(shell brew --prefix bison)/bin/:$(shell brew --prefix flex)/bin/:
+endif
+
+# macOS with brew-installed openssl requires explicit paths
+# It can be configured with OPENSSL_PREFIX variable
+UNAME_S := $(shell uname -s)
+ifeq ($(UNAME_S),Darwin)
+    OPENSSL_PREFIX ?= $(shell brew --prefix openssl@3)
+    PG_CONFIGURE_OPTS += --with-includes=$(OPENSSL_PREFIX)/include --with-libraries=$(OPENSSL_PREFIX)/lib
 endif

 # Use -C option so that when PostgreSQL "make install" installs the
@@ -73,8 +73,7 @@ $(POSTGRES_INSTALL_DIR)/build/v14/config.status:
 	+@echo "Configuring Postgres v14 build"
 	mkdir -p $(POSTGRES_INSTALL_DIR)/build/v14
 	(cd $(POSTGRES_INSTALL_DIR)/build/v14 && \
-	env PATH="$(EXTRA_PATH_OVERRIDES):$$PATH" $(ROOT_PROJECT_DIR)/vendor/postgres-v14/configure \
-		CFLAGS='$(PG_CFLAGS)' \
+	$(ROOT_PROJECT_DIR)/vendor/postgres-v14/configure CFLAGS='$(PG_CFLAGS)' \
 		$(PG_CONFIGURE_OPTS) \
 		--prefix=$(abspath $(POSTGRES_INSTALL_DIR))/v14 > configure.log)

@@ -82,8 +81,7 @@ $(POSTGRES_INSTALL_DIR)/build/v15/config.status:
 	+@echo "Configuring Postgres v15 build"
 	mkdir -p $(POSTGRES_INSTALL_DIR)/build/v15
 	(cd $(POSTGRES_INSTALL_DIR)/build/v15 && \
-	env PATH="$(EXTRA_PATH_OVERRIDES):$$PATH" $(ROOT_PROJECT_DIR)/vendor/postgres-v15/configure \
-		CFLAGS='$(PG_CFLAGS)' \
+	$(ROOT_PROJECT_DIR)/vendor/postgres-v15/configure CFLAGS='$(PG_CFLAGS)' \
 		$(PG_CONFIGURE_OPTS) \
 		--prefix=$(abspath $(POSTGRES_INSTALL_DIR))/v15 > configure.log)

@@ -113,8 +111,6 @@ postgres-v14: postgres-v14-configure \
 	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14 MAKELEVEL=0 install
 	+@echo "Compiling libpq v14"
 	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14/src/interfaces/libpq install
-	+@echo "Compiling pg_prewarm v14"
-	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14/contrib/pg_prewarm install
 	+@echo "Compiling pg_buffercache v14"
 	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14/contrib/pg_buffercache install
 	+@echo "Compiling pageinspect v14"
@@ -127,8 +123,6 @@ postgres-v15: postgres-v15-configure \
 	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15 MAKELEVEL=0 install
 	+@echo "Compiling libpq v15"
 	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15/src/interfaces/libpq install
-	+@echo "Compiling pg_prewarm v15"
-	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15/contrib/pg_prewarm install
 	+@echo "Compiling pg_buffercache v15"
 	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15/contrib/pg_buffercache install
 	+@echo "Compiling pageinspect v15"
--- a/README.md
+++ b/README.md
@@ -53,7 +53,7 @@ curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
 1. Install XCode and dependencies
 ```
 xcode-select --install
-brew install protobuf etcd openssl flex bison
+brew install protobuf etcd openssl
 ```

 2. [Install Rust](https://www.rust-lang.org/tools/install)
@@ -125,23 +125,24 @@ Python (3.9 or higher), and install python3 packages using `./scripts/pysync` (r
 # Create repository in .neon with proper paths to binaries and data
 # Later that would be responsibility of a package install script
 > ./target/debug/neon_local init
-Starting pageserver at '127.0.0.1:64000' in '.neon'.
-pageserver started, pid: 2545906
-Successfully initialized timeline de200bd42b49cc1814412c7e592dd6e9
-Stopped pageserver 1 process with pid 2545906
+Starting pageserver at '127.0.0.1:64000' in '.neon'
+
+Pageserver started
+Successfully initialized timeline 7dd0907914ac399ff3be45fb252bfdb7
+Stopping pageserver gracefully...done!

 # start pageserver and safekeeper
 > ./target/debug/neon_local start
-Starting etcd broker using "/usr/bin/etcd"
-etcd started, pid: 2545996
-Starting pageserver at '127.0.0.1:64000' in '.neon'.
-pageserver started, pid: 2546005
-Starting safekeeper at '127.0.0.1:5454' in '.neon/safekeepers/sk1'.
-safekeeper 1 started, pid: 2546041
+Starting etcd broker using /usr/bin/etcd
+Starting pageserver at '127.0.0.1:64000' in '.neon'
+
+Pageserver started
+Starting safekeeper at '127.0.0.1:5454' in '.neon/safekeepers/sk1'
+Safekeeper started

 # start postgres compute node
 > ./target/debug/neon_local pg start main
-Starting new postgres (v14) main on timeline de200bd42b49cc1814412c7e592dd6e9 ...
+Starting new postgres main on timeline de200bd42b49cc1814412c7e592dd6e9 ...
 Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/main port=55432
 Starting postgres node at 'host=127.0.0.1 port=55432 user=cloud_admin dbname=postgres'

--- a/cli-v2-story.md
+++ b/cli-v2-story.md
@@ -0,0 +1,188 @@
+Create a new Zenith repository in the current directory:
+
+    ~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli init
+    The files belonging to this database system will be owned by user "heikki".
+    This user must also own the server process.
+    
+    The database cluster will be initialized with locale "en_GB.UTF-8".
+    The default database encoding has accordingly been set to "UTF8".
+    The default text search configuration will be set to "english".
+    
+    Data page checksums are disabled.
+    
+    creating directory tmp ... ok
+    creating subdirectories ... ok
+    selecting dynamic shared memory implementation ... posix
+    selecting default max_connections ... 100
+    selecting default shared_buffers ... 128MB
+    selecting default time zone ... Europe/Helsinki
+    creating configuration files ... ok
+    running bootstrap script ... ok
+    performing post-bootstrap initialization ... ok
+    syncing data to disk ... ok
+    
+    initdb: warning: enabling "trust" authentication for local connections
+    You can change this by editing pg_hba.conf or using the option -A, or
+    --auth-local and --auth-host, the next time you run initdb.
+    new zenith repository was created in .zenith
+
+Initially, there is only one branch:
+
+    ~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli branch
+      main
+
+Start a local Postgres instance on the branch:
+
+    ~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli start main
+    Creating data directory from snapshot at 0/15FFB08...
+    waiting for server to start....2021-04-13 09:27:43.919 EEST [984664] LOG:  starting PostgreSQL 14devel on x86_64-pc-linux-gnu, compiled by gcc (Debian 10.2.1-6) 10.2.1 20210110, 64-bit
+    2021-04-13 09:27:43.920 EEST [984664] LOG:  listening on IPv6 address "::1", port 5432
+    2021-04-13 09:27:43.920 EEST [984664] LOG:  listening on IPv4 address "127.0.0.1", port 5432
+    2021-04-13 09:27:43.927 EEST [984664] LOG:  listening on Unix socket "/tmp/.s.PGSQL.5432"
+    2021-04-13 09:27:43.939 EEST [984665] LOG:  database system was interrupted; last known up at 2021-04-13 09:27:33 EEST
+    2021-04-13 09:27:43.939 EEST [984665] LOG:  creating missing WAL directory "pg_wal/archive_status"
+    2021-04-13 09:27:44.189 EEST [984665] LOG:  database system was not properly shut down; automatic recovery in progress
+    2021-04-13 09:27:44.195 EEST [984665] LOG:  invalid record length at 0/15FFB80: wanted 24, got 0
+    2021-04-13 09:27:44.195 EEST [984665] LOG:  redo is not required
+    2021-04-13 09:27:44.225 EEST [984664] LOG:  database system is ready to accept connections
+     done
+    server started
+
+Run some commands against it:
+
+    ~/git-sandbox/zenith (cli-v2)$ psql postgres -c "create table foo (t text);" 
+    CREATE TABLE
+    ~/git-sandbox/zenith (cli-v2)$ psql postgres -c "insert into foo values ('inserted on the main branch');" 
+    INSERT 0 1
+    ~/git-sandbox/zenith (cli-v2)$ psql postgres -c "select * from foo" 
+                  t              
+    -----------------------------
+     inserted on the main branch
+    (1 row)
+
+Create a new branch called 'experimental'. We create it from the
+current end of the 'main' branch, but you could specify a different
+LSN as the start point instead.
+
+    ~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli branch experimental main
+    branching at end of WAL: 0/161F478
+    
+    ~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli branch 
+      experimental
+      main
+
+Start another Postgres instance off the 'experimental' branch:
+
+    ~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli start experimental -- -o -p5433
+    Creating data directory from snapshot at 0/15FFB08...
+    waiting for server to start....2021-04-13 09:28:41.874 EEST [984766] LOG:  starting PostgreSQL 14devel on x86_64-pc-linux-gnu, compiled by gcc (Debian 10.2.1-6) 10.2.1 20210110, 64-bit
+    2021-04-13 09:28:41.875 EEST [984766] LOG:  listening on IPv6 address "::1", port 5433
+    2021-04-13 09:28:41.875 EEST [984766] LOG:  listening on IPv4 address "127.0.0.1", port 5433
+    2021-04-13 09:28:41.883 EEST [984766] LOG:  listening on Unix socket "/tmp/.s.PGSQL.5433"
+    2021-04-13 09:28:41.896 EEST [984767] LOG:  database system was interrupted; last known up at 2021-04-13 09:27:33 EEST
+    2021-04-13 09:28:42.265 EEST [984767] LOG:  database system was not properly shut down; automatic recovery in progress
+    2021-04-13 09:28:42.269 EEST [984767] LOG:  redo starts at 0/15FFB80
+    2021-04-13 09:28:42.272 EEST [984767] LOG:  invalid record length at 0/161F4B0: wanted 24, got 0
+    2021-04-13 09:28:42.272 EEST [984767] LOG:  redo done at 0/161F478 system usage: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s
+    2021-04-13 09:28:42.321 EEST [984766] LOG:  database system is ready to accept connections
+     done
+    server started
+
+Insert some a row on the 'experimental' branch:
+
+    ~/git-sandbox/zenith (cli-v2)$ psql postgres -p5433 -c "select * from foo" 
+                  t              
+    -----------------------------
+     inserted on the main branch
+    (1 row)
+    
+    ~/git-sandbox/zenith (cli-v2)$ psql postgres -p5433 -c "insert into foo values ('inserted on experimental')" 
+    INSERT 0 1
+    ~/git-sandbox/zenith (cli-v2)$ psql postgres -p5433 -c "select * from foo" 
+                  t              
+    -----------------------------
+     inserted on the main branch
+     inserted on experimental
+    (2 rows)
+    
+See that the other Postgres instance is still running on 'main' branch on port 5432:
+
+
+    ~/git-sandbox/zenith (cli-v2)$ psql postgres -p5432 -c "select * from foo" 
+                  t              
+    -----------------------------
+     inserted on the main branch
+    (1 row)
+
+
+
+
+Everything is stored in the .zenith directory:
+
+    ~/git-sandbox/zenith (cli-v2)$ ls -l .zenith/
+    total 12
+    drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:28 datadirs
+    drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:27 refs
+    drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:28 timelines
+
+The 'datadirs' directory contains the datadirs of the running instances:
+
+    ~/git-sandbox/zenith (cli-v2)$ ls -l .zenith/datadirs/
+    total 8
+    drwx------ 18 heikki heikki 4096 Apr 13 09:27 3c0c634c1674079b2c6d4edf7c91523e
+    drwx------ 18 heikki heikki 4096 Apr 13 09:28 697e3c103d4b1763cd6e82e4ff361d76
+    ~/git-sandbox/zenith (cli-v2)$ ls -l .zenith/datadirs/3c0c634c1674079b2c6d4edf7c91523e/
+    total 124
+    drwxr-xr-x 5 heikki heikki  4096 Apr 13 09:27 base
+    drwxr-xr-x 2 heikki heikki  4096 Apr 13 09:27 global
+    drwxr-xr-x 2 heikki heikki  4096 Apr 13 09:27 pg_commit_ts
+    drwxr-xr-x 2 heikki heikki  4096 Apr 13 09:27 pg_dynshmem
+    -rw------- 1 heikki heikki  4760 Apr 13 09:27 pg_hba.conf
+    -rw------- 1 heikki heikki  1636 Apr 13 09:27 pg_ident.conf
+    drwxr-xr-x 4 heikki heikki  4096 Apr 13 09:32 pg_logical
+    drwxr-xr-x 4 heikki heikki  4096 Apr 13 09:27 pg_multixact
+    drwxr-xr-x 2 heikki heikki  4096 Apr 13 09:27 pg_notify
+    drwxr-xr-x 2 heikki heikki  4096 Apr 13 09:27 pg_replslot
+    drwxr-xr-x 2 heikki heikki  4096 Apr 13 09:27 pg_serial
+    drwxr-xr-x 2 heikki heikki  4096 Apr 13 09:27 pg_snapshots
+    drwxr-xr-x 2 heikki heikki  4096 Apr 13 09:27 pg_stat
+    drwxr-xr-x 2 heikki heikki  4096 Apr 13 09:34 pg_stat_tmp
+    drwxr-xr-x 2 heikki heikki  4096 Apr 13 09:27 pg_subtrans
+    drwxr-xr-x 2 heikki heikki  4096 Apr 13 09:27 pg_tblspc
+    drwxr-xr-x 2 heikki heikki  4096 Apr 13 09:27 pg_twophase
+    -rw------- 1 heikki heikki     3 Apr 13 09:27 PG_VERSION
+    lrwxrwxrwx 1 heikki heikki    52 Apr 13 09:27 pg_wal -> ../../timelines/3c0c634c1674079b2c6d4edf7c91523e/wal
+    drwxr-xr-x 2 heikki heikki  4096 Apr 13 09:27 pg_xact
+    -rw------- 1 heikki heikki    88 Apr 13 09:27 postgresql.auto.conf
+    -rw------- 1 heikki heikki 28688 Apr 13 09:27 postgresql.conf
+    -rw------- 1 heikki heikki    96 Apr 13 09:27 postmaster.opts
+    -rw------- 1 heikki heikki   149 Apr 13 09:27 postmaster.pid
+
+Note how 'pg_wal' is just a symlink to the 'timelines' directory. The
+datadir is ephemeral, you can delete it at any time, and it can be reconstructed
+from the snapshots and WAL stored in the 'timelines' directory. So if you push/pull
+the repository, the 'datadirs' are not included. (They are like git working trees)
+
+    ~/git-sandbox/zenith (cli-v2)$ killall -9 postgres
+    ~/git-sandbox/zenith (cli-v2)$ rm -rf .zenith/datadirs/*
+    ~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli start experimental -- -o -p5433
+    Creating data directory from snapshot at 0/15FFB08...
+    waiting for server to start....2021-04-13 09:37:05.476 EEST [985340] LOG:  starting PostgreSQL 14devel on x86_64-pc-linux-gnu, compiled by gcc (Debian 10.2.1-6) 10.2.1 20210110, 64-bit
+    2021-04-13 09:37:05.477 EEST [985340] LOG:  listening on IPv6 address "::1", port 5433
+    2021-04-13 09:37:05.477 EEST [985340] LOG:  listening on IPv4 address "127.0.0.1", port 5433
+    2021-04-13 09:37:05.487 EEST [985340] LOG:  listening on Unix socket "/tmp/.s.PGSQL.5433"
+    2021-04-13 09:37:05.498 EEST [985341] LOG:  database system was interrupted; last known up at 2021-04-13 09:27:33 EEST
+    2021-04-13 09:37:05.808 EEST [985341] LOG:  database system was not properly shut down; automatic recovery in progress
+    2021-04-13 09:37:05.813 EEST [985341] LOG:  redo starts at 0/15FFB80
+    2021-04-13 09:37:05.815 EEST [985341] LOG:  invalid record length at 0/161F770: wanted 24, got 0
+    2021-04-13 09:37:05.815 EEST [985341] LOG:  redo done at 0/161F738 system usage: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s
+    2021-04-13 09:37:05.866 EEST [985340] LOG:  database system is ready to accept connections
+     done
+    server started
+    ~/git-sandbox/zenith (cli-v2)$ psql postgres -p5433 -c "select * from foo" 
+                  t              
+    -----------------------------
+     inserted on the main branch
+     inserted on experimental
+    (2 rows)
+
--- a/control_plane/src/background_process.rs
+++ b/control_plane/src/background_process.rs
@@ -26,18 +26,8 @@ use nix::unistd::Pid;

 use utils::lock_file;

-// These constants control the loop used to poll for process start / stop.
-//
-// The loop waits for at most 10 seconds, polling every 100 ms.
-// Once a second, it prints a dot ("."), to give the user an indication that
-// it's waiting. If the process hasn't started/stopped after 5 seconds,
-// it prints a notice that it's taking long, but keeps waiting.
-//
-const RETRY_UNTIL_SECS: u64 = 10;
-const RETRIES: u64 = (RETRY_UNTIL_SECS * 1000) / RETRY_INTERVAL_MILLIS;
-const RETRY_INTERVAL_MILLIS: u64 = 100;
-const DOT_EVERY_RETRIES: u64 = 10;
-const NOTICE_AFTER_RETRIES: u64 = 50;
+const RETRIES: u32 = 15;
+const RETRY_TIMEOUT_MILLIS: u64 = 500;

 /// Argument to `start_process`, to indicate whether it should create pidfile or if the process creates
 /// it itself.
@@ -117,16 +107,16 @@ where
                return Ok(spawned_process);
            }
            Ok(false) => {
-                if retries == NOTICE_AFTER_RETRIES {
-                    // The process is taking a long time to start up. Keep waiting, but
-                    // print a message
-                    print!("\n{process_name} has not started yet, continuing to wait");
-                }
-                if retries % DOT_EVERY_RETRIES == 0 {
+                if retries < 5 {
                    print!(".");
                    io::stdout().flush().unwrap();
+                } else {
+                    if retries == 5 {
+                        println!() // put a line break after dots for second message
+                    }
+                    println!("{process_name} has not started yet, retrying ({retries})...");
                }
-                thread::sleep(Duration::from_millis(RETRY_INTERVAL_MILLIS));
+                thread::sleep(Duration::from_millis(RETRY_TIMEOUT_MILLIS));
            }
            Err(e) => {
                println!("{process_name} failed to start: {e:#}");
@@ -137,8 +127,7 @@ where
            }
        }
    }
-    println!();
-    anyhow::bail!("{process_name} did not start in {RETRY_UNTIL_SECS} seconds");
+    anyhow::bail!("{process_name} could not start in {RETRIES} attempts");
 }

 /// Stops the process, using the pid file given. Returns Ok also if the process is already not running.
@@ -169,7 +158,7 @@ pub fn stop_process(immediate: bool, process_name: &str, pid_file: &Path) -> any
    }

    // Wait until process is gone
-    for retries in 0..RETRIES {
+    for _ in 0..RETRIES {
        match process_has_stopped(pid) {
            Ok(true) => {
                println!("\n{process_name} stopped");
@@ -181,16 +170,9 @@ pub fn stop_process(immediate: bool, process_name: &str, pid_file: &Path) -> any
                return Ok(());
            }
            Ok(false) => {
-                if retries == NOTICE_AFTER_RETRIES {
-                    // The process is taking a long time to start up. Keep waiting, but
-                    // print a message
-                    print!("\n{process_name} has not stopped yet, continuing to wait");
-                }
-                if retries % DOT_EVERY_RETRIES == 0 {
-                    print!(".");
-                    io::stdout().flush().unwrap();
-                }
-                thread::sleep(Duration::from_millis(RETRY_INTERVAL_MILLIS));
+                print!(".");
+                io::stdout().flush().unwrap();
+                thread::sleep(Duration::from_secs(1))
            }
            Err(e) => {
                println!("{process_name} with pid {pid} failed to stop: {e:#}");
@@ -198,21 +180,24 @@ pub fn stop_process(immediate: bool, process_name: &str, pid_file: &Path) -> any
            }
        }
    }
-    println!();
-    anyhow::bail!("{process_name} with pid {pid} did not stop in {RETRY_UNTIL_SECS} seconds");
+
+    anyhow::bail!("{process_name} with pid {pid} failed to stop in {RETRIES} attempts");
 }

 fn fill_rust_env_vars(cmd: &mut Command) -> &mut Command {
    let mut filled_cmd = cmd.env_clear().env("RUST_BACKTRACE", "1");

-    // Pass through these environment variables to the command
-    for var in ["LLVM_PROFILE_FILE", "FAILPOINTS", "RUST_LOG"] {
-        if let Some(val) = std::env::var_os(var) {
-            filled_cmd = filled_cmd.env(var, val);
-        }
+    let var = "LLVM_PROFILE_FILE";
+    if let Some(val) = std::env::var_os(var) {
+        filled_cmd = filled_cmd.env(var, val);
    }

-    filled_cmd
+    const RUST_LOG_KEY: &str = "RUST_LOG";
+    if let Ok(rust_log_value) = std::env::var(RUST_LOG_KEY) {
+        filled_cmd.env(RUST_LOG_KEY, rust_log_value)
+    } else {
+        filled_cmd
+    }
 }

 fn fill_aws_secrets_vars(mut cmd: &mut Command) -> &mut Command {
--- a/control_plane/src/compute.rs
+++ b/control_plane/src/compute.rs
@@ -343,7 +343,7 @@ impl PostgresNode {
        //   To be able to restore database in case of pageserver node crash, safekeeper should not
        //   remove WAL beyond this point. Too large lag can cause space exhaustion in safekeepers
        //   (if they are not able to upload WAL to S3).
-        conf.append("max_replication_write_lag", "15MB");
+        conf.append("max_replication_write_lag", "500MB");
        conf.append("max_replication_flush_lag", "10GB");

        if !self.env.safekeepers.is_empty() {
--- a/control_plane/src/etcd.rs
+++ b/control_plane/src/etcd.rs
@@ -6,7 +6,7 @@ use crate::{background_process, local_env};

 pub fn start_etcd_process(env: &local_env::LocalEnv) -> anyhow::Result<()> {
    let etcd_broker = &env.etcd_broker;
-    print!(
+    println!(
        "Starting etcd broker using {:?}",
        etcd_broker.etcd_binary_path
    );
--- a/control_plane/src/pageserver.rs
+++ b/control_plane/src/pageserver.rs
@@ -237,7 +237,7 @@ impl PageServerNode {
        datadir: &Path,
        update_config: bool,
    ) -> anyhow::Result<Child> {
-        print!(
+        println!(
            "Starting pageserver at '{}' in '{}'",
            self.pg_connection_config.raw_address(),
            datadir.display()
--- a/docs/SUMMARY.md
+++ b/docs/SUMMARY.md
@@ -37,7 +37,7 @@

 - [Source view](./sourcetree.md)
  - [docker.md](./docker.md) — Docker images and building pipeline.
-  - [Error handling and logging](./error-handling.md)
+  - [Error handling and logging]()
  - [Testing]()
    - [Unit testing]()
    - [Integration testing]()
--- a/docs/error-handling.md
+++ b/docs/error-handling.md
@@ -1,198 +0,0 @@
-# Error handling and logging
-
-## Logging errors
-
-The principle is that errors are logged when they are handled. If you
-just propagate an error to the caller in a function, you don't need to
-log it; the caller will. But if you consume an error in a function,
-you *must* log it (if it needs to be logged at all).
-
-For example:
-
-```rust
-fn read_motd_file() -> std::io::Result<String> {
-    let mut f = File::open("/etc/motd")?;
-    let mut result = String::new();
-    f.read_to_string(&mut result)?;
-    result
-}
-```
-
-Opening or reading the file could fail, but there is no need to log
-the error here. The function merely propagates the error to the
-caller, and it is up to the caller to log the error or propagate it
-further, if the failure is not expected. But if, for example, it is
-normal that the "/etc/motd" file doesn't exist, the caller can choose
-to silently ignore the error, or log it as an INFO or DEBUG level
-message:
-
-```rust
-fn get_message_of_the_day() -> String {
-    // Get the motd from /etc/motd, or return the default proverb
-    match read_motd_file() {
-        Ok(motd) => motd,
-        Err(err)  => {
-            // It's normal that /etc/motd doesn't exist, but if we fail to
-            // read it for some other reason, that's unexpected. The message
-            // of the day isn't very important though, so we just WARN and
-            // continue with the default in any case.
-            if err.kind() != std::io::ErrorKind::NotFound {
-                 tracing::warn!("could not read \"/etc/motd\": {err:?}");
-            }
-            "An old error is always more popular than a new truth. - German proverb"
-        }
-    }
-}
-```
-
-## Error types
-
-We use the `anyhow` crate widely. It contains many convenient macros
-like `bail!` and `ensure!` to construct and return errors, and to
-propagate many kinds of low-level errors, wrapped in `anyhow::Error`.
-
-A downside of `anyhow::Error` is that the caller cannot distinguish
-between different error cases. Most errors are propagated all the way
-to the mgmt API handler function, or the main loop that handles a
-connection with the compute node, and they are all handled the same
-way: the error is logged and returned to the client as an HTTP or
-libpq error.
-
-But in some cases, we need to distinguish between errors and handle
-them differently. For example, attaching a tenant to the pageserver
-could fail either because the tenant has already been attached, or
-because we could not load its metadata from cloud storage. The first
-case is more or less expected. The console sends the Attach request to
-the pageserver, and the pageserver completes the operation, but the
-network connection might be lost before the console receives the
-response. The console will retry the operation in that case, but the
-tenant has already been attached. It is important that the pagserver
-responds with the HTTP 403 Already Exists error in that case, rather
-than a generic HTTP 500 Internal Server Error.
-
-If you need to distinguish between different kinds of errors, create a
-new `Error` type. The `thiserror` crate is useful for that. But in
-most cases `anyhow::Error` is good enough.
-
-## Panics
-
-Depending on where a panic happens, it can cause the whole pageserver
-or safekeeper to restart, or just a single tenant. In either case,
-that is pretty bad and causes an outage. Avoid panics. Never use
-`unwrap()` or other calls that might panic, to verify inputs from the
-network or from disk.
-
-It is acceptable to use functions that might panic, like `unwrap()`, if
-it is obvious that it cannot panic. For example, if you have just
-checked that a variable is not None, it is OK to call `unwrap()` on it,
-but it is still preferable to use `expect("reason")` instead to explain
-why the function cannot fail.
-
-`assert!` and `panic!` are reserved for checking clear invariants and
-very obvious "can't happen" cases. When in doubt, use anyhow `ensure!`
-or `bail!` instead.
-
-## Error levels
-
-`tracing::Level` doesn't provide very clear guidelines on what the
-different levels mean, or when to use which level. Here is how we use
-them:
-
-### Error
-
-Examples:
- could not open file "foobar"
- invalid tenant id
-
-Errors are not expected to happen during normal operation. Incorrect
-inputs from client can cause ERRORs. For example, if a client tries to
-call a mgmt API that doesn't exist, or if a compute node sends passes
-an LSN that has already been garbage collected away.
-
-These should *not* happen during normal operations. "Normal
-operations" is not a very precise concept. But for example, disk
-errors are not expected to happen when the system is working, so those
-count as Errors. However, if a TCP connection to a compute node is
-lost, that is not considered an Error, because it doesn't affect the
-pageserver's or safekeeper's operation in any way, and happens fairly
-frequently when compute nodes are shut down, or are killed abruptly
-because of errors in the compute.
-
-**Errors are monitored, and always need human investigation to determine
-the cause.**
-
-Whether something should be logged at ERROR, WARNING or INFO level can
-depend on the callers and clients. For example, it might be unexpected
-and a sign of a serious issue if the console calls the
-"timeline_detail" mgmt API for a timeline that doesn't exist. ERROR
-would be appropriate in that case. But if the console routinely calls
-the API after deleting a timeline, to check if the deletion has
-completed, then it would be totally normal and an INFO or DEBUG level
-message would be more appropriate. If a message is logged as an ERROR,
-but it in fact happens frequently in production and never requires any
-action, it should probably be demoted to an INFO level message.
-
-### Warn
-
-Examples:
- could not remove temporary file "foobar.temp"
- unrecognized file "foobar" in timeline directory
-
-Warnings are similar to Errors, in that they should not happen
-when the system is operating normally. The difference between Error and
-Warning is that an Error means that the operation failed, whereas Warning
-means that something unexpected happened, but the operation continued anyway.
-For example, if deleting a file fails because the file already didn't exist,
-it should be logged as Warning.
-
-> **Note:** The python regression tests, under `test_regress`, check the
-> pageserver log after each test for any ERROR and WARN lines. If there are
-> any ERRORs or WARNs that have not been explicitly listed in the test as
-> allowed, the test is marked a failed. This is to catch unexpected errors
-> e.g. in background operations, that don't cause immediate misbehaviour in
-> the tested functionality.
-
-### Info
-
-Info level is used to log useful information when the system is
-operating normally. Info level is appropriate e.g. for logging state
-changes, background operations, and network connections.
-
-Examples:
- "system is shutting down"
- "tenant was created"
- "retrying S3 upload"
-
-### Debug & Trace
-
-Debug and Trace level messages are not printed to the log in our normal
-production configuration, but could be enabled for a specific server or
-tenant, to aid debugging. (Although we don't actually have that
-capability as of this writing).
-
-## Context
-
-We use logging "spans" to hold context information about the current
-operation. Almost every operation happens on a particular tenant and
-timeline, so we enter a span with the "tenant_id" and "timeline_id"
-very early when processing an incoming API request, for example. All
-background operations should also run in a span containing at least
-those two fields, and any other parameters or information that might
-be useful when debugging an error that might happen when performing
-the operation.
-
-TODO: Spans are not captured in the Error when it is created, but when
-the error is logged. It would be more useful to capture them at Error
-creation. We should consider using `tracing_error::SpanTrace` to do
-that.
-
-## Error message style
-
-PostgreSQL has a style guide for writing error messages:
-
-https://www.postgresql.org/docs/current/error-style-guide.html
-
-Follow that guide when writing error messages in the PostgreSQL
-extension. We don't follow it strictly in the pageserver and
-safekeeper, but the advice in the PostgreSQL style guide is generally
-good, and you can't go wrong by following it.
--- a/docs/sourcetree.md
+++ b/docs/sourcetree.md
@@ -83,16 +83,6 @@ A subject for future modularization.
 `/libs/metrics`:
 Helpers for exposing Prometheus metrics from the server.

-### Adding dependencies
-When you add a Cargo dependency, you should update hakari manifest by running commands below and committing the updated `Cargo.lock` and `workspace_hack/`. There may be no changes, that's fine.
-
-```bash
-cargo hakari generate
-cargo hakari manage-deps
-```
-
-If you don't have hakari installed (`error: no such subcommand: hakari`), install it by running `cargo install cargo-hakari`.
-
 ## Using Python
 Note that Debian/Ubuntu Python packages are stale, as it commonly happens,
 so manual installation of dependencies is not recommended.
--- a/libs/persistent_range_query/Cargo.toml
+++ b/libs/persistent_range_query/Cargo.toml
@@ -1,12 +0,0 @@
-[package]
-name = "persistent_range_query"
-version = "0.1.0"
-edition = "2021"
-
-# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
-
-[dependencies]
-workspace_hack = { version = "0.1", path = "../../workspace_hack" }
-
-[dev-dependencies]
-rand = "0.8.3"
--- a/libs/persistent_range_query/src/lib.rs
+++ b/libs/persistent_range_query/src/lib.rs
@@ -1,78 +0,0 @@
-use std::ops::Range;
-
-pub mod naive;
-pub mod ops;
-pub mod segment_tree;
-
-/// Should be a monoid:
-/// * Identity element: for all a: combine(new_for_empty_range(), a) = combine(a, new_for_empty_range()) = a
-/// * Associativity: for all a, b, c: combine(combine(a, b), c) == combine(a, combine(b, c))
-pub trait RangeQueryResult<Key>: Sized + Clone {
-    // Clone is equivalent to combine with an empty range.
-
-    fn new_for_empty_range() -> Self;
-
-    // Contract: left_range.end == right_range.start
-    // left_range.start == left_range.end == right_range.start == right_range.end is still possible
-    fn combine(
-        left: &Self,
-        left_range: &Range<Key>,
-        right: &Self,
-        right_range: &Range<Key>,
-    ) -> Self;
-
-    fn add(left: &mut Self, left_range: &Range<Key>, right: &Self, right_range: &Range<Key>);
-}
-
-pub trait LazyRangeInitializer<Result: RangeQueryResult<Key>, Key> {
-    fn get(&self, range: &Range<Key>) -> Result;
-}
-
-/// Should be a monoid:
-/// * Identity element: for all op: compose(no_op(), op) == compose(op, no_op()) == op
-/// * Associativity: for all op_1, op_2, op_3: compose(compose(op_1, op_2), op_3) == compose(op_1, compose(op_2, op_3))
-///
-/// Should left act on Result:
-/// * Identity operation: for all r: no_op().apply(r) == r
-/// * Compatibility: for all op_1, op_2, r: op_1.apply(op_2.apply(r)) == compose(op_1, op_2).apply(r)
-pub trait RangeModification<Key> {
-    type Result: RangeQueryResult<Key>;
-
-    fn no_op() -> Self;
-    fn is_no_op(&self) -> bool;
-    fn is_reinitialization(&self) -> bool;
-    fn apply(&self, result: &mut Self::Result, range: &Range<Key>);
-    fn compose(later: &Self, earlier: &mut Self);
-}
-
-pub trait VecReadableVersion<Modification: RangeModification<Key>, Key> {
-    fn get(&self, keys: &Range<Key>) -> Modification::Result;
-}
-
-// TODO: use trait alias when stabilized
-pub trait VecFrozenVersion<Modification: RangeModification<Key>, Key>:
-    Clone + VecReadableVersion<Modification, Key>
-{
-}
-
-impl<
-        T: Clone + VecReadableVersion<Modification, Key>,
-        Modification: RangeModification<Key>,
-        Key,
-    > VecFrozenVersion<Modification, Key> for T
-{
-}
-
-pub trait PersistentVecStorage<
-    Modification: RangeModification<Key>,
-    Initializer: LazyRangeInitializer<Modification::Result, Key>,
-    Key,
->: VecReadableVersion<Modification, Key>
-{
-    fn new(all_keys: Range<Key>, initializer: Initializer) -> Self;
-
-    type FrozenVersion: VecFrozenVersion<Modification, Key>;
-
-    fn modify(&mut self, keys: &Range<Key>, modification: &Modification);
-    fn freeze(&mut self) -> Self::FrozenVersion;
-}
--- a/libs/persistent_range_query/src/naive.rs
+++ b/libs/persistent_range_query/src/naive.rs
@@ -1,115 +0,0 @@
-use crate::{
-    LazyRangeInitializer, PersistentVecStorage, RangeModification, RangeQueryResult,
-    VecReadableVersion,
-};
-use std::marker::PhantomData;
-use std::ops::Range;
-use std::rc::Rc;
-
-pub struct NaiveFrozenVersion<Modification: RangeModification<Key>, Key> {
-    all_keys: Range<Key>,
-    values: Rc<Box<Vec<Modification::Result>>>,
-}
-
-pub trait IndexableKey: Clone {
-    fn index(all_keys: &Range<Self>, key: &Self) -> usize;
-    fn element_range(all_keys: &Range<Self>, index: usize) -> Range<Self>;
-}
-
-fn get<Modification: RangeModification<Key>, Key: IndexableKey>(
-    all_keys: &Range<Key>,
-    values: &Vec<Modification::Result>,
-    keys: &Range<Key>,
-) -> Modification::Result {
-    let mut result = Modification::Result::new_for_empty_range();
-    let mut result_range = keys.start.clone()..keys.start.clone();
-    for index in
-        IndexableKey::index(&all_keys, &keys.start)..IndexableKey::index(&all_keys, &keys.end)
-    {
-        let element_range = IndexableKey::element_range(&all_keys, index);
-        Modification::Result::add(&mut result, &result_range, &values[index], &element_range);
-        result_range.end = element_range.end;
-    }
-    result
-}
-
-impl<Modification: RangeModification<Key>, Key: IndexableKey> VecReadableVersion<Modification, Key>
-    for NaiveFrozenVersion<Modification, Key>
-{
-    fn get(&self, keys: &Range<Key>) -> Modification::Result {
-        get::<Modification, Key>(&self.all_keys, &self.values, keys)
-    }
-}
-
-// Manual implementation of `Clone` becase `derive` requires `Modification: Clone`
-impl<Modification: RangeModification<Key>, Key: Clone> Clone
-    for NaiveFrozenVersion<Modification, Key>
-{
-    fn clone(&self) -> Self {
-        Self {
-            all_keys: self.all_keys.clone(),
-            values: self.values.clone(),
-        }
-    }
-}
-
-// TODO: is it at all possible to store previous versions in this struct,
-// without any Rc<>?
-pub struct NaiveVecStorage<
-    Modification: RangeModification<Key>,
-    Initializer: LazyRangeInitializer<Modification::Result, Key>,
-    Key: IndexableKey,
-> {
-    all_keys: Range<Key>,
-    last_version: Vec<Modification::Result>,
-    _initializer: PhantomData<Initializer>,
-}
-
-impl<
-        Modification: RangeModification<Key>,
-        Initializer: LazyRangeInitializer<Modification::Result, Key>,
-        Key: IndexableKey,
-    > VecReadableVersion<Modification, Key> for NaiveVecStorage<Modification, Initializer, Key>
-{
-    fn get(&self, keys: &Range<Key>) -> Modification::Result {
-        get::<Modification, Key>(&self.all_keys, &self.last_version, keys)
-    }
-}
-
-impl<
-        Modification: RangeModification<Key>,
-        Initializer: LazyRangeInitializer<Modification::Result, Key>,
-        Key: IndexableKey,
-    > PersistentVecStorage<Modification, Initializer, Key>
-    for NaiveVecStorage<Modification, Initializer, Key>
-{
-    fn new(all_keys: Range<Key>, initializer: Initializer) -> Self {
-        let mut values = Vec::with_capacity(IndexableKey::index(&all_keys, &all_keys.end));
-        for index in 0..values.capacity() {
-            values.push(initializer.get(&IndexableKey::element_range(&all_keys, index)));
-        }
-        NaiveVecStorage {
-            all_keys,
-            last_version: values,
-            _initializer: PhantomData,
-        }
-    }
-
-    type FrozenVersion = NaiveFrozenVersion<Modification, Key>;
-
-    fn modify(&mut self, keys: &Range<Key>, modification: &Modification) {
-        for index in IndexableKey::index(&self.all_keys, &keys.start)
-            ..IndexableKey::index(&self.all_keys, &keys.end)
-        {
-            let element_range = IndexableKey::element_range(&self.all_keys, index);
-            modification.apply(&mut self.last_version[index], &element_range);
-        }
-    }
-
-    fn freeze(&mut self) -> Self::FrozenVersion {
-        NaiveFrozenVersion::<Modification, Key> {
-            all_keys: self.all_keys.clone(),
-            values: Rc::new(Box::new(self.last_version.clone())),
-        }
-    }
-}
--- a/libs/persistent_range_query/src/ops/mod.rs
+++ b/libs/persistent_range_query/src/ops/mod.rs
@@ -1,14 +0,0 @@
-pub mod rsq;
-
-#[derive(Copy, Clone, Debug)]
-pub struct SameElementsInitializer<T> {
-    initial_element_value: T,
-}
-
-impl<T> SameElementsInitializer<T> {
-    pub fn new(initial_element_value: T) -> Self {
-        SameElementsInitializer {
-            initial_element_value,
-        }
-    }
-}
--- a/libs/persistent_range_query/src/ops/rsq.rs
+++ b/libs/persistent_range_query/src/ops/rsq.rs
@@ -1,118 +0,0 @@
-//! # Range Sum Query
-
-use crate::ops::SameElementsInitializer;
-use crate::{LazyRangeInitializer, RangeModification, RangeQueryResult};
-use std::borrow::Borrow;
-use std::ops::{Add, AddAssign, Range};
-
-// TODO: commutative Add
-
-#[derive(Clone, Copy, Debug)]
-pub struct SumResult<T> {
-    sum: T,
-}
-
-impl<T> SumResult<T> {
-    pub fn sum(&self) -> &T {
-        &self.sum
-    }
-}
-
-impl<T: Clone + for<'a> AddAssign<&'a T> + From<u8>, Key> RangeQueryResult<Key> for SumResult<T>
-where
-    for<'a> &'a T: Add<&'a T, Output = T>,
-{
-    fn new_for_empty_range() -> Self {
-        SumResult { sum: 0.into() }
-    }
-
-    fn combine(
-        left: &Self,
-        _left_range: &Range<Key>,
-        right: &Self,
-        _right_range: &Range<Key>,
-    ) -> Self {
-        SumResult {
-            sum: &left.sum + &right.sum,
-        }
-    }
-
-    fn add(left: &mut Self, _left_range: &Range<Key>, right: &Self, _right_range: &Range<Key>) {
-        left.sum += &right.sum
-    }
-}
-
-pub trait SumOfSameElements<Key> {
-    fn sum(initial_element_value: &Self, keys: &Range<Key>) -> Self;
-}
-
-impl<T: SumOfSameElements<Key>, TB: Borrow<T>, Key> LazyRangeInitializer<SumResult<T>, Key>
-    for SameElementsInitializer<TB>
-where
-    SumResult<T>: RangeQueryResult<Key>,
-{
-    fn get(&self, range: &Range<Key>) -> SumResult<T> {
-        SumResult {
-            sum: SumOfSameElements::sum(self.initial_element_value.borrow(), range),
-        }
-    }
-}
-
-#[derive(Copy, Clone, Debug)]
-pub enum AddAssignModification<T> {
-    None,
-    Add(T),
-    Assign(T),
-}
-
-impl<T: Clone + for<'a> AddAssign<&'a T>, Key> RangeModification<Key> for AddAssignModification<T>
-where
-    SumResult<T>: RangeQueryResult<Key>,
-    for<'a> SameElementsInitializer<&'a T>: LazyRangeInitializer<SumResult<T>, Key>,
-{
-    type Result = SumResult<T>;
-
-    fn no_op() -> Self {
-        AddAssignModification::None
-    }
-
-    fn is_no_op(&self) -> bool {
-        match self {
-            AddAssignModification::None => true,
-            _ => false,
-        }
-    }
-
-    fn is_reinitialization(&self) -> bool {
-        match self {
-            AddAssignModification::Assign(_) => true,
-            _ => false,
-        }
-    }
-
-    fn apply(&self, result: &mut SumResult<T>, range: &Range<Key>) {
-        use AddAssignModification::*;
-        match self {
-            None => {}
-            Add(x) | Assign(x) => {
-                let to_add = SameElementsInitializer::new(x).get(range).sum;
-                if let Assign(_) = self {
-                    result.sum = to_add;
-                } else {
-                    result.sum += &to_add;
-                }
-            }
-        }
-    }
-
-    fn compose(later: &Self, earlier: &mut Self) {
-        use AddAssignModification::*;
-        match (later, earlier) {
-            (_, e @ None) => *e = later.clone(),
-            (None, _) => {}
-            (Assign(_), e) => *e = later.clone(),
-            (Add(x), Add(y)) => *y += x,
-            (Add(x), Assign(value)) => *value += x,
-        }
-    }
-}
--- a/libs/persistent_range_query/src/segment_tree.rs
+++ b/libs/persistent_range_query/src/segment_tree.rs
@@ -1,255 +0,0 @@
-//! # Segment Tree
-//! It is a competitive programming folklore data structure. Do not confuse with the interval tree.
-
-use crate::{LazyRangeInitializer, PersistentVecStorage, RangeQueryResult, VecReadableVersion};
-use std::ops::Range;
-use std::rc::Rc;
-
-pub trait MidpointableKey: Clone + Ord + Sized {
-    fn midpoint(range: &Range<Self>) -> Self;
-}
-
-pub trait RangeModification<Key>: Clone + crate::RangeModification<Key> {}
-
-// TODO: use trait alias when stabilized
-impl<T: Clone + crate::RangeModification<Key>, Key> RangeModification<Key> for T {}
-
-#[derive(Debug)]
-struct Node<Modification: RangeModification<Key>, Key> {
-    result: Modification::Result,
-    modify_children: Modification,
-    left: Option<Rc<Self>>,
-    right: Option<Rc<Self>>,
-}
-
-// Manual implementation because we don't need `Key: Clone` for this, unlike with `derive`.
-impl<Modification: RangeModification<Key>, Key> Clone for Node<Modification, Key> {
-    fn clone(&self) -> Self {
-        Node {
-            result: self.result.clone(),
-            modify_children: self.modify_children.clone(),
-            left: self.left.clone(),
-            right: self.right.clone(),
-        }
-    }
-}
-
-impl<Modification: RangeModification<Key>, Key> Node<Modification, Key> {
-    fn new<Initializer: LazyRangeInitializer<Modification::Result, Key>>(
-        range: &Range<Key>,
-        initializer: &Initializer,
-    ) -> Self {
-        Node {
-            result: initializer.get(range),
-            modify_children: Modification::no_op(),
-            left: None,
-            right: None,
-        }
-    }
-
-    pub fn apply(&mut self, modification: &Modification, range: &Range<Key>) {
-        modification.apply(&mut self.result, range);
-        Modification::compose(modification, &mut self.modify_children);
-        if self.modify_children.is_reinitialization() {
-            self.left = None;
-            self.right = None;
-        }
-    }
-
-    pub fn force_children<Initializer: LazyRangeInitializer<Modification::Result, Key>>(
-        &mut self,
-        initializer: &Initializer,
-        range_left: &Range<Key>,
-        range_right: &Range<Key>,
-    ) {
-        let left = Rc::make_mut(
-            self.left
-                .get_or_insert_with(|| Rc::new(Node::new(&range_left, initializer))),
-        );
-        let right = Rc::make_mut(
-            self.right
-                .get_or_insert_with(|| Rc::new(Node::new(&range_right, initializer))),
-        );
-        left.apply(&self.modify_children, &range_left);
-        right.apply(&self.modify_children, &range_right);
-        self.modify_children = Modification::no_op();
-    }
-
-    pub fn recalculate_from_children(&mut self, range_left: &Range<Key>, range_right: &Range<Key>) {
-        assert!(self.modify_children.is_no_op());
-        assert!(self.left.is_some());
-        assert!(self.right.is_some());
-        self.result = Modification::Result::combine(
-            &self.left.as_ref().unwrap().result,
-            &range_left,
-            &self.right.as_ref().unwrap().result,
-            &range_right,
-        );
-    }
-}
-
-fn split_range<Key: MidpointableKey>(range: &Range<Key>) -> (Range<Key>, Range<Key>) {
-    let range_left = range.start.clone()..MidpointableKey::midpoint(range);
-    let range_right = range_left.end.clone()..range.end.clone();
-    (range_left, range_right)
-}
-
-pub struct PersistentSegmentTreeVersion<
-    Modification: RangeModification<Key>,
-    Initializer: LazyRangeInitializer<Modification::Result, Key>,
-    Key: Clone,
-> {
-    root: Rc<Node<Modification, Key>>,
-    all_keys: Range<Key>,
-    initializer: Rc<Initializer>,
-}
-
-// Manual implementation because we don't need `Key: Clone` for this, unlike with `derive`.
-impl<
-        Modification: RangeModification<Key>,
-        Initializer: LazyRangeInitializer<Modification::Result, Key>,
-        Key: Clone,
-    > Clone for PersistentSegmentTreeVersion<Modification, Initializer, Key>
-{
-    fn clone(&self) -> Self {
-        Self {
-            root: self.root.clone(),
-            all_keys: self.all_keys.clone(),
-            initializer: self.initializer.clone(),
-        }
-    }
-}
-
-fn get<
-    Modification: RangeModification<Key>,
-    Initializer: LazyRangeInitializer<Modification::Result, Key>,
-    Key: MidpointableKey,
->(
-    node: &mut Rc<Node<Modification, Key>>,
-    node_keys: &Range<Key>,
-    initializer: &Initializer,
-    keys: &Range<Key>,
-) -> Modification::Result {
-    if node_keys.end <= keys.start || keys.end <= node_keys.start {
-        return Modification::Result::new_for_empty_range();
-    }
-    if keys.start <= node_keys.start && node_keys.end <= keys.end {
-        return node.result.clone();
-    }
-    let node = Rc::make_mut(node);
-    let (left_keys, right_keys) = split_range(node_keys);
-    node.force_children(initializer, &left_keys, &right_keys);
-    let mut result = get(node.left.as_mut().unwrap(), &left_keys, initializer, keys);
-    Modification::Result::add(
-        &mut result,
-        &left_keys,
-        &get(node.right.as_mut().unwrap(), &right_keys, initializer, keys),
-        &right_keys,
-    );
-    result
-}
-
-fn modify<
-    Modification: RangeModification<Key>,
-    Initializer: LazyRangeInitializer<Modification::Result, Key>,
-    Key: MidpointableKey,
->(
-    node: &mut Rc<Node<Modification, Key>>,
-    node_keys: &Range<Key>,
-    initializer: &Initializer,
-    keys: &Range<Key>,
-    modification: &Modification,
-) {
-    if modification.is_no_op() || node_keys.end <= keys.start || keys.end <= node_keys.start {
-        return;
-    }
-    let node = Rc::make_mut(node);
-    if keys.start <= node_keys.start && node_keys.end <= keys.end {
-        node.apply(modification, node_keys);
-        return;
-    }
-    let (left_keys, right_keys) = split_range(node_keys);
-    node.force_children(initializer, &left_keys, &right_keys);
-    modify(
-        node.left.as_mut().unwrap(),
-        &left_keys,
-        initializer,
-        keys,
-        &modification,
-    );
-    modify(
-        node.right.as_mut().unwrap(),
-        &right_keys,
-        initializer,
-        keys,
-        &modification,
-    );
-    node.recalculate_from_children(&left_keys, &right_keys);
-}
-
-impl<
-        Modification: RangeModification<Key>,
-        Initializer: LazyRangeInitializer<Modification::Result, Key>,
-        Key: MidpointableKey,
-    > VecReadableVersion<Modification, Key>
-    for PersistentSegmentTreeVersion<Modification, Initializer, Key>
-{
-    fn get(&self, keys: &Range<Key>) -> Modification::Result {
-        get(
-            &mut self.root.clone(), // TODO: do not always force a branch
-            &self.all_keys,
-            self.initializer.as_ref(),
-            keys,
-        )
-    }
-}
-
-pub struct PersistentSegmentTree<
-    Modification: RangeModification<Key>,
-    Initializer: LazyRangeInitializer<Modification::Result, Key>,
-    Key: MidpointableKey,
->(PersistentSegmentTreeVersion<Modification, Initializer, Key>);
-
-impl<
-        Modification: RangeModification<Key>,
-        Initializer: LazyRangeInitializer<Modification::Result, Key>,
-        Key: MidpointableKey,
-    > VecReadableVersion<Modification, Key>
-    for PersistentSegmentTree<Modification, Initializer, Key>
-{
-    fn get(&self, keys: &Range<Key>) -> Modification::Result {
-        self.0.get(keys)
-    }
-}
-
-impl<
-        Modification: RangeModification<Key>,
-        Initializer: LazyRangeInitializer<Modification::Result, Key>,
-        Key: MidpointableKey,
-    > PersistentVecStorage<Modification, Initializer, Key>
-    for PersistentSegmentTree<Modification, Initializer, Key>
-{
-    fn new(all_keys: Range<Key>, initializer: Initializer) -> Self {
-        PersistentSegmentTree(PersistentSegmentTreeVersion {
-            root: Rc::new(Node::new(&all_keys, &initializer)),
-            all_keys: all_keys,
-            initializer: Rc::new(initializer),
-        })
-    }
-
-    type FrozenVersion = PersistentSegmentTreeVersion<Modification, Initializer, Key>;
-
-    fn modify(&mut self, keys: &Range<Key>, modification: &Modification) {
-        modify(
-            &mut self.0.root, // TODO: do not always force a branch
-            &self.0.all_keys,
-            self.0.initializer.as_ref(),
-            keys,
-            modification,
-        )
-    }
-
-    fn freeze(&mut self) -> Self::FrozenVersion {
-        self.0.clone()
-    }
-}
--- a/libs/persistent_range_query/tests/layer_map_test.rs
+++ b/libs/persistent_range_query/tests/layer_map_test.rs
@@ -1,295 +0,0 @@
-use persistent_range_query::naive::{IndexableKey, NaiveVecStorage};
-use persistent_range_query::ops::SameElementsInitializer;
-use persistent_range_query::segment_tree::{MidpointableKey, PersistentSegmentTree};
-use persistent_range_query::{
-    LazyRangeInitializer, PersistentVecStorage, RangeModification, RangeQueryResult,
-    VecReadableVersion,
-};
-use std::cmp::Ordering;
-use std::ops::Range;
-
-#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd)]
-struct PageIndex(u32);
-type LayerId = String;
-
-impl IndexableKey for PageIndex {
-    fn index(all_keys: &Range<Self>, key: &Self) -> usize {
-        (key.0 as usize) - (all_keys.start.0 as usize)
-    }
-
-    fn element_range(all_keys: &Range<Self>, index: usize) -> Range<Self> {
-        PageIndex(all_keys.start.0 + index as u32)..PageIndex(all_keys.start.0 + index as u32 + 1)
-    }
-}
-
-impl MidpointableKey for PageIndex {
-    fn midpoint(range: &Range<Self>) -> Self {
-        PageIndex(range.start.0 + (range.end.0 - range.start.0) / 2)
-    }
-}
-
-#[derive(Clone, Debug, Eq, PartialEq)]
-struct LayerMapInformation {
-    // Only make sense for a range of length 1.
-    last_layer: Option<LayerId>,
-    last_image_layer: Option<LayerId>,
-    // Work for all ranges
-    max_delta_layers: (usize, Range<PageIndex>),
-}
-
-impl LayerMapInformation {
-    fn last_layers(&self) -> (&Option<LayerId>, &Option<LayerId>) {
-        (&self.last_layer, &self.last_image_layer)
-    }
-
-    fn max_delta_layers(&self) -> &(usize, Range<PageIndex>) {
-        &self.max_delta_layers
-    }
-}
-
-fn merge_ranges(left: &Range<PageIndex>, right: &Range<PageIndex>) -> Range<PageIndex> {
-    if left.is_empty() {
-        right.clone()
-    } else if right.is_empty() {
-        left.clone()
-    } else if left.end == right.start {
-        left.start..right.end
-    } else {
-        left.clone()
-    }
-}
-
-impl RangeQueryResult<PageIndex> for LayerMapInformation {
-    fn new_for_empty_range() -> Self {
-        LayerMapInformation {
-            last_layer: None,
-            last_image_layer: None,
-            max_delta_layers: (0, PageIndex(0)..PageIndex(0)),
-        }
-    }
-
-    fn combine(
-        left: &Self,
-        _left_range: &Range<PageIndex>,
-        right: &Self,
-        _right_range: &Range<PageIndex>,
-    ) -> Self {
-        // Note that either range may be empty.
-        LayerMapInformation {
-            last_layer: left
-                .last_layer
-                .as_ref()
-                .or_else(|| right.last_layer.as_ref())
-                .cloned(),
-            last_image_layer: left
-                .last_image_layer
-                .as_ref()
-                .or_else(|| right.last_image_layer.as_ref())
-                .cloned(),
-            max_delta_layers: match left.max_delta_layers.0.cmp(&right.max_delta_layers.0) {
-                Ordering::Less => right.max_delta_layers.clone(),
-                Ordering::Greater => left.max_delta_layers.clone(),
-                Ordering::Equal => (
-                    left.max_delta_layers.0,
-                    merge_ranges(&left.max_delta_layers.1, &right.max_delta_layers.1),
-                ),
-            },
-        }
-    }
-
-    fn add(
-        left: &mut Self,
-        left_range: &Range<PageIndex>,
-        right: &Self,
-        right_range: &Range<PageIndex>,
-    ) {
-        *left = Self::combine(&left, left_range, right, right_range);
-    }
-}
-
-#[derive(Clone, Debug)]
-struct AddDeltaLayers {
-    last_layer: LayerId,
-    count: usize,
-}
-
-#[derive(Clone, Debug)]
-struct LayerMapModification {
-    add_image_layer: Option<LayerId>,
-    add_delta_layers: Option<AddDeltaLayers>,
-}
-
-impl LayerMapModification {
-    fn add_image_layer(layer: impl Into<LayerId>) -> Self {
-        LayerMapModification {
-            add_image_layer: Some(layer.into()),
-            add_delta_layers: None,
-        }
-    }
-
-    fn add_delta_layer(layer: impl Into<LayerId>) -> Self {
-        LayerMapModification {
-            add_image_layer: None,
-            add_delta_layers: Some(AddDeltaLayers {
-                last_layer: layer.into(),
-                count: 1,
-            }),
-        }
-    }
-}
-
-impl RangeModification<PageIndex> for LayerMapModification {
-    type Result = LayerMapInformation;
-
-    fn no_op() -> Self {
-        LayerMapModification {
-            add_image_layer: None,
-            add_delta_layers: None,
-        }
-    }
-
-    fn is_no_op(&self) -> bool {
-        self.add_image_layer.is_none() && self.add_delta_layers.is_none()
-    }
-
-    fn is_reinitialization(&self) -> bool {
-        self.add_image_layer.is_some()
-    }
-
-    fn apply(&self, result: &mut Self::Result, range: &Range<PageIndex>) {
-        if let Some(layer) = &self.add_image_layer {
-            result.last_layer = Some(layer.clone());
-            result.last_image_layer = Some(layer.clone());
-            result.max_delta_layers = (0, range.clone());
-        }
-        if let Some(AddDeltaLayers { last_layer, count }) = &self.add_delta_layers {
-            result.last_layer = Some(last_layer.clone());
-            result.max_delta_layers.0 += count;
-        }
-    }
-
-    fn compose(later: &Self, earlier: &mut Self) {
-        if later.add_image_layer.is_some() {
-            *earlier = later.clone();
-            return;
-        }
-        if let Some(AddDeltaLayers { last_layer, count }) = &later.add_delta_layers {
-            let res = earlier.add_delta_layers.get_or_insert(AddDeltaLayers {
-                last_layer: LayerId::default(),
-                count: 0,
-            });
-            res.last_layer = last_layer.clone();
-            res.count += count;
-        }
-    }
-}
-
-impl LazyRangeInitializer<LayerMapInformation, PageIndex> for SameElementsInitializer<()> {
-    fn get(&self, range: &Range<PageIndex>) -> LayerMapInformation {
-        LayerMapInformation {
-            last_layer: None,
-            last_image_layer: None,
-            max_delta_layers: (0, range.clone()),
-        }
-    }
-}
-
-fn test_layer_map<
-    S: PersistentVecStorage<LayerMapModification, SameElementsInitializer<()>, PageIndex>,
->() {
-    let mut s = S::new(
-        PageIndex(0)..PageIndex(100),
-        SameElementsInitializer::new(()),
-    );
-    s.modify(
-        &(PageIndex(0)..PageIndex(70)),
-        &LayerMapModification::add_image_layer("Img0..70"),
-    );
-    s.modify(
-        &(PageIndex(50)..PageIndex(100)),
-        &LayerMapModification::add_image_layer("Img50..100"),
-    );
-    s.modify(
-        &(PageIndex(10)..PageIndex(60)),
-        &LayerMapModification::add_delta_layer("Delta10..60"),
-    );
-    let s_before_last_delta = s.freeze();
-    s.modify(
-        &(PageIndex(20)..PageIndex(80)),
-        &LayerMapModification::add_delta_layer("Delta20..80"),
-    );
-
-    assert_eq!(
-        s.get(&(PageIndex(5)..PageIndex(6))).last_layers(),
-        (&Some("Img0..70".to_owned()), &Some("Img0..70".to_owned()))
-    );
-    assert_eq!(
-        s.get(&(PageIndex(15)..PageIndex(16))).last_layers(),
-        (
-            &Some("Delta10..60".to_owned()),
-            &Some("Img0..70".to_owned())
-        )
-    );
-    assert_eq!(
-        s.get(&(PageIndex(25)..PageIndex(26))).last_layers(),
-        (
-            &Some("Delta20..80".to_owned()),
-            &Some("Img0..70".to_owned())
-        )
-    );
-    assert_eq!(
-        s.get(&(PageIndex(65)..PageIndex(66))).last_layers(),
-        (
-            &Some("Delta20..80".to_owned()),
-            &Some("Img50..100".to_owned())
-        )
-    );
-    assert_eq!(
-        s.get(&(PageIndex(95)..PageIndex(96))).last_layers(),
-        (
-            &Some("Img50..100".to_owned()),
-            &Some("Img50..100".to_owned())
-        )
-    );
-
-    assert_eq!(
-        s.get(&(PageIndex(0)..PageIndex(100))).max_delta_layers(),
-        &(2, PageIndex(20)..PageIndex(60)),
-    );
-    assert_eq!(
-        *s_before_last_delta
-            .get(&(PageIndex(0)..PageIndex(100)))
-            .max_delta_layers(),
-        (1, PageIndex(10)..PageIndex(60)),
-    );
-
-    assert_eq!(
-        *s.get(&(PageIndex(10)..PageIndex(30))).max_delta_layers(),
-        (2, PageIndex(20)..PageIndex(30))
-    );
-    assert_eq!(
-        *s.get(&(PageIndex(10)..PageIndex(20))).max_delta_layers(),
-        (1, PageIndex(10)..PageIndex(20))
-    );
-
-    assert_eq!(
-        *s.get(&(PageIndex(70)..PageIndex(80))).max_delta_layers(),
-        (1, PageIndex(70)..PageIndex(80))
-    );
-    assert_eq!(
-        *s_before_last_delta
-            .get(&(PageIndex(70)..PageIndex(80)))
-            .max_delta_layers(),
-        (0, PageIndex(70)..PageIndex(80))
-    );
-}
-
-#[test]
-fn test_naive() {
-    test_layer_map::<NaiveVecStorage<_, _, _>>();
-}
-
-#[test]
-fn test_segment_tree() {
-    test_layer_map::<PersistentSegmentTree<_, _, _>>();
-}
--- a/libs/persistent_range_query/tests/rsq_test.rs
+++ b/libs/persistent_range_query/tests/rsq_test.rs
@@ -1,116 +0,0 @@
-use persistent_range_query::naive::*;
-use persistent_range_query::ops::rsq::AddAssignModification::Add;
-use persistent_range_query::ops::rsq::*;
-use persistent_range_query::ops::SameElementsInitializer;
-use persistent_range_query::segment_tree::{MidpointableKey, PersistentSegmentTree};
-use persistent_range_query::{PersistentVecStorage, VecReadableVersion};
-use rand::{Rng, SeedableRng};
-use std::ops::Range;
-
-#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
-struct K(u16);
-
-impl IndexableKey for K {
-    fn index(all_keys: &Range<Self>, key: &Self) -> usize {
-        (key.0 as usize) - (all_keys.start.0 as usize)
-    }
-
-    fn element_range(all_keys: &Range<Self>, index: usize) -> Range<Self> {
-        K(all_keys.start.0 + index as u16)..K(all_keys.start.0 + index as u16 + 1)
-    }
-}
-
-impl SumOfSameElements<K> for i32 {
-    fn sum(initial_element_value: &Self, keys: &Range<K>) -> Self {
-        initial_element_value * (keys.end.0 - keys.start.0) as Self
-    }
-}
-
-impl MidpointableKey for K {
-    fn midpoint(range: &Range<Self>) -> Self {
-        K(range.start.0 + (range.end.0 - range.start.0) / 2)
-    }
-}
-
-fn test_storage<
-    S: PersistentVecStorage<AddAssignModification<i32>, SameElementsInitializer<i32>, K>,
->() {
-    let mut s = S::new(K(0)..K(12), SameElementsInitializer::new(0i32));
-    assert_eq!(*s.get(&(K(0)..K(12))).sum(), 0);
-
-    s.modify(&(K(2)..K(5)), &AddAssignModification::Add(3));
-    assert_eq!(*s.get(&(K(0)..K(12))).sum(), 3 + 3 + 3);
-    let s_old = s.freeze();
-
-    s.modify(&(K(3)..K(6)), &AddAssignModification::Assign(10));
-    assert_eq!(*s.get(&(K(0)..K(12))).sum(), 3 + 10 + 10 + 10);
-
-    s.modify(&(K(4)..K(7)), &AddAssignModification::Add(2));
-    assert_eq!(*s.get(&(K(0)..K(12))).sum(), 3 + 10 + 12 + 12 + 2);
-
-    assert_eq!(*s.get(&(K(4)..K(6))).sum(), 12 + 12);
-    assert_eq!(*s_old.get(&(K(4)..K(6))).sum(), 3);
-}
-
-#[test]
-fn test_naive() {
-    test_storage::<NaiveVecStorage<_, _, _>>();
-}
-
-#[test]
-fn test_segment_tree() {
-    test_storage::<PersistentSegmentTree<_, _, _>>();
-}
-
-#[test]
-fn test_stress() {
-    const LEN: u16 = 17_238;
-    const OPERATIONS: i32 = 20_000;
-
-    let mut rng = rand::rngs::StdRng::seed_from_u64(0);
-    let mut naive: NaiveVecStorage<AddAssignModification<i32>, _, _> =
-        NaiveVecStorage::new(K(0)..K(LEN), SameElementsInitializer::new(2i32));
-    let mut segm_tree: PersistentSegmentTree<AddAssignModification<i32>, _, _> =
-        PersistentSegmentTree::new(K(0)..K(LEN), SameElementsInitializer::new(2i32));
-
-    fn gen_range(rng: &mut impl Rng) -> Range<K> {
-        let l: u16 = rng.gen_range(0..LEN);
-        let r: u16 = rng.gen_range(0..LEN);
-        if l <= r {
-            K(l)..K(r)
-        } else {
-            K(r)..K(l)
-        }
-    }
-
-    for _ in 0..2 {
-        let checksum_range = gen_range(&mut rng);
-        let checksum_before: i32 = *naive.get(&checksum_range).sum();
-        assert_eq!(checksum_before, *segm_tree.get(&checksum_range).sum());
-
-        let naive_before = naive.freeze();
-        let segm_tree_before = segm_tree.freeze();
-        assert_eq!(checksum_before, *naive_before.get(&checksum_range).sum());
-        assert_eq!(checksum_before, *segm_tree.get(&checksum_range).sum());
-
-        for _ in 0..OPERATIONS {
-            {
-                let range = gen_range(&mut rng);
-                assert_eq!(naive.get(&range).sum(), segm_tree.get(&range).sum());
-            }
-            {
-                let range = gen_range(&mut rng);
-                let val = rng.gen_range(-10i32..=10i32);
-                let op = Add(val);
-                naive.modify(&range, &op);
-                segm_tree.modify(&range, &op);
-            }
-        }
-
-        assert_eq!(checksum_before, *naive_before.get(&checksum_range).sum());
-        assert_eq!(
-            checksum_before,
-            *segm_tree_before.get(&checksum_range).sum()
-        );
-    }
-}
--- a/libs/tenant_size_model/src/lib.rs
+++ b/libs/tenant_size_model/src/lib.rs
@@ -33,8 +33,8 @@ pub struct Segment {
    /// Logical size before this state
    start_size: u64,

-    /// Logical size at this state. Can be None in the last Segment of a branch.
-    pub end_size: Option<u64>,
+    /// Logical size at this state
+    pub end_size: u64,

    /// Indices to [`Storage::segments`]
    ///
@@ -115,7 +115,7 @@ impl<K: std::hash::Hash + Eq + 'static> Storage<K> {
            start_lsn: 0,
            end_lsn: 0,
            start_size: 0,
-            end_size: Some(0),
+            end_size: 0,
            children_after: Vec::new(),
        };

@@ -125,39 +125,6 @@ impl<K: std::hash::Hash + Eq + 'static> Storage<K> {
        }
    }

-    /// Advances the branch with a new point, at given LSN.
-    pub fn insert_point<Q: ?Sized>(
-        &mut self,
-        branch: &Q,
-        op: Cow<'static, str>,
-        lsn: u64,
-        size: Option<u64>,
-    ) where
-        K: std::borrow::Borrow<Q>,
-        Q: std::hash::Hash + Eq,
-    {
-        let lastseg_id = *self.branches.get(branch).unwrap();
-        let newseg_id = self.segments.len();
-        let lastseg = &mut self.segments[lastseg_id];
-
-        assert!(lsn > lastseg.end_lsn);
-
-        let newseg = Segment {
-            op,
-            parent: Some(lastseg_id),
-            start_lsn: lastseg.end_lsn,
-            end_lsn: lsn,
-            start_size: lastseg.end_size.unwrap(),
-            end_size: size,
-            children_after: Vec::new(),
-            needed: false,
-        };
-        lastseg.children_after.push(newseg_id);
-
-        self.segments.push(newseg);
-        *self.branches.get_mut(branch).expect("read already") = newseg_id;
-    }
-
    /// Advances the branch with the named operation, by the relative LSN and logical size bytes.
    pub fn modify_branch<Q: ?Sized>(
        &mut self,
@@ -178,8 +145,8 @@ impl<K: std::hash::Hash + Eq + 'static> Storage<K> {
            parent: Some(lastseg_id),
            start_lsn: lastseg.end_lsn,
            end_lsn: lastseg.end_lsn + lsn_bytes,
-            start_size: lastseg.end_size.unwrap(),
-            end_size: Some((lastseg.end_size.unwrap() as i64 + size_bytes) as u64),
+            start_size: lastseg.end_size,
+            end_size: (lastseg.end_size as i64 + size_bytes) as u64,
            children_after: Vec::new(),
            needed: false,
        };
@@ -354,7 +321,7 @@ impl<K: std::hash::Hash + Eq + 'static> Storage<K> {
                Some(SegmentSize {
                    seg_id,
                    method: SnapshotAfter,
-                    this_size: seg.end_size.unwrap(),
+                    this_size: seg.end_size,
                    children,
                })
            } else {
--- a/libs/tenant_size_model/src/main.rs
+++ b/libs/tenant_size_model/src/main.rs
@@ -174,7 +174,7 @@ fn graphviz_recurse(segments: &[Segment], node: &SegmentSize) {
    let seg_id = node.seg_id;
    let seg = segments.get(seg_id).unwrap();
    let lsn = seg.end_lsn;
-    let size = seg.end_size.unwrap_or(0);
+    let size = seg.end_size;
    let method = node.method;

    println!("  {{");
@@ -226,7 +226,7 @@ fn graphviz_recurse(segments: &[Segment], node: &SegmentSize) {
            print!(
                " label=\"{} / {}\"",
                next.end_lsn - seg.end_lsn,
-                (next.end_size.unwrap_or(0) as i128 - seg.end_size.unwrap_or(0) as i128)
+                (next.end_size as i128 - seg.end_size as i128)
            );
        } else {
            print!(" label=\"{}: {}\"", next.op, next.end_lsn - seg.end_lsn);
--- a/libs/utils/src/lib.rs
+++ b/libs/utils/src/lib.rs
@@ -48,25 +48,6 @@ pub mod nonblock;
 // Default signal handling
 pub mod signals;

-/// use with fail::cfg("$name", "return(2000)")
-#[macro_export]
-macro_rules! failpoint_sleep_millis_async {
-    ($name:literal) => {{
-        let should_sleep: Option<std::time::Duration> = (|| {
-            fail::fail_point!($name, |v: Option<_>| {
-                let millis = v.unwrap().parse::<u64>().unwrap();
-                Some(Duration::from_millis(millis))
-            });
-            None
-        })();
-        if let Some(d) = should_sleep {
-            tracing::info!("failpoint {:?}: sleeping for {:?}", $name, d);
-            tokio::time::sleep(d).await;
-            tracing::info!("failpoint {:?}: sleep done", $name);
-        }
-    }};
-}
-
 /// This is a shortcut to embed git sha into binaries and avoid copying the same build script to all packages
 ///
 /// we have several cases:
--- a/libs/utils/src/lsn.rs
+++ b/libs/utils/src/lsn.rs
@@ -138,7 +138,7 @@ impl FromStr for Lsn {
    ///
    /// If the input string is missing the '/' character, then use `Lsn::from_hex`
    fn from_str(s: &str) -> Result<Self, Self::Err> {
-        let mut splitter = s.trim().split('/');
+        let mut splitter = s.split('/');
        if let (Some(left), Some(right), None) = (splitter.next(), splitter.next(), splitter.next())
        {
            let left_num = u32::from_str_radix(left, 16).map_err(|_| LsnParseError)?;
@@ -270,11 +270,6 @@ mod tests {
        );
        assert_eq!(Lsn::from_hex("0"), Ok(Lsn(0)));
        assert_eq!(Lsn::from_hex("F12345678AAAA5555"), Err(LsnParseError));
-
-        let expected_lsn = Lsn(0x3C490F8);
-        assert_eq!(" 0/3C490F8".parse(), Ok(expected_lsn));
-        assert_eq!("0/3C490F8 ".parse(), Ok(expected_lsn));
-        assert_eq!(" 0/3C490F8 ".parse(), Ok(expected_lsn));
    }

    #[test]
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -76,7 +76,3 @@ tempfile = "3.2"
 [[bench]]
 name = "bench_layer_map"
 harness = false
-
-[[bench]]
-name = "bench_walredo"
-harness = false
--- a/pageserver/benches/bench_walredo.rs
+++ b/pageserver/benches/bench_walredo.rs
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -199,20 +199,6 @@ fn start_pageserver(conf: &'static PageServerConf) -> anyhow::Result<()> {
    logging::init(conf.log_format)?;
    info!("version: {}", version());

-    // If any failpoints were set from FAILPOINTS environment variable,
-    // print them to the log for debugging purposes
-    let failpoints = fail::list();
-    if !failpoints.is_empty() {
-        info!(
-            "started with failpoints: {}",
-            failpoints
-                .iter()
-                .map(|(name, actions)| format!("{name}={actions}"))
-                .collect::<Vec<String>>()
-                .join(";")
-        )
-    }
-
    let lock_file_path = conf.workdir.join(PID_FILE_NAME);
    let lock_file = match lock_file::create_lock_file(&lock_file_path, Pid::this().to_string()) {
        lock_file::LockCreationResult::Created {
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -614,9 +614,8 @@ impl PageServerConf {
        PathBuf::from(format!("../tmp_check/test_{test_name}"))
    }

+    #[cfg(test)]
    pub fn dummy_conf(repo_dir: PathBuf) -> Self {
-        let pg_distrib_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../pg_install");
-
        PageServerConf {
            id: NodeId(0),
            wait_lsn_timeout: Duration::from_secs(60),
@@ -627,7 +626,7 @@ impl PageServerConf {
            listen_http_addr: defaults::DEFAULT_HTTP_LISTEN_ADDR.to_string(),
            superuser: "cloud_admin".to_string(),
            workdir: repo_dir,
-            pg_distrib_dir,
+            pg_distrib_dir: PathBuf::new(),
            auth_type: AuthType::Trust,
            auth_validation_public_key_path: None,
            remote_storage_config: None,
--- a/pageserver/src/http/openapi_spec.yml
+++ b/pageserver/src/http/openapi_spec.yml
@@ -667,7 +667,6 @@ components:
        - disk_consistent_lsn
        - awaits_download
        - state
-        - latest_gc_cutoff_lsn
      properties:
        timeline_id:
          type: string
@@ -712,9 +711,6 @@ components:
          type: boolean
        state:
          type: string
-        latest_gc_cutoff_lsn:
-          type: string
-          format: hex

        # These 'local' and 'remote' fields just duplicate some of the fields
        # above. They are kept for backwards-compatibility. They can be removed,
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -76,12 +76,6 @@ fn copyin_stream(pgb: &mut PostgresBackend) -> impl Stream<Item = io::Result<Byt
                        FeMessage::CopyData(bytes) => bytes,
                        FeMessage::CopyDone => { break },
                        FeMessage::Sync => continue,
-                        FeMessage::Terminate => {
-                            let msg = format!("client terminated connection with Terminate message during COPY");
-                            pgb.write_message(&BeMessage::ErrorResponse(&msg))?;
-                            Err(io::Error::new(io::ErrorKind::ConnectionReset, msg))?;
-                            break;
-                        }
                        m => {
                            let msg = format!("unexpected message {:?}", m);
                            pgb.write_message(&BeMessage::ErrorResponse(&msg))?;
@@ -93,10 +87,10 @@ fn copyin_stream(pgb: &mut PostgresBackend) -> impl Stream<Item = io::Result<Byt
                    yield copy_data_bytes;
                }
                Ok(None) => {
-                    let msg = "client closed connection during COPY";
+                    let msg = "client closed connection";
                    pgb.write_message(&BeMessage::ErrorResponse(msg))?;
                    pgb.flush().await?;
-                    Err(io::Error::new(io::ErrorKind::ConnectionReset, msg))?;
+                    Err(io::Error::new(io::ErrorKind::Other, msg))?;
                }
                Err(e) => {
                    Err(io::Error::new(io::ErrorKind::Other, e))?;
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -461,7 +461,14 @@ impl Tenant {
                    .context("Cannot branch off the timeline that's not present in pageserver")?;

                if let Some(lsn) = ancestor_start_lsn.as_mut() {
+                    // Wait for the WAL to arrive and be processed on the parent branch up
+                    // to the requested branch point. The repository code itself doesn't
+                    // require it, but if we start to receive WAL on the new timeline,
+                    // decoding the new WAL might need to look up previous pages, relation
+                    // sizes etc. and that would get confused if the previous page versions
+                    // are not in the repository yet.
                    *lsn = lsn.align();
+                    ancestor_timeline.wait_lsn(*lsn).await?;

                    let ancestor_ancestor_lsn = ancestor_timeline.get_ancestor_lsn();
                    if ancestor_ancestor_lsn > *lsn {
@@ -473,14 +480,6 @@ impl Tenant {
                            ancestor_ancestor_lsn,
                        );
                    }
-
-                    // Wait for the WAL to arrive and be processed on the parent branch up
-                    // to the requested branch point. The repository code itself doesn't
-                    // require it, but if we start to receive WAL on the new timeline,
-                    // decoding the new WAL might need to look up previous pages, relation
-                    // sizes etc. and that would get confused if the previous page versions
-                    // are not in the repository yet.
-                    ancestor_timeline.wait_lsn(*lsn).await?;
                }

                self.branch_timeline(ancestor_timeline_id, new_timeline_id, ancestor_start_lsn)?
@@ -1011,10 +1010,6 @@ impl Tenant {

        let gc_timelines = self.refresh_gc_info_internal(target_timeline_id, horizon, pitr)?;

-        utils::failpoint_sleep_millis_async!("gc_iteration_internal_after_getting_gc_timelines");
-
-        info!("starting on {} timelines", gc_timelines.len());
-
        // Perform GC for each timeline.
        //
        // Note that we don't hold the GC lock here because we don't want
--- a/pageserver/src/tenant/size.rs
+++ b/pageserver/src/tenant/size.rs
@@ -183,19 +183,6 @@ pub(super) async fn gather_inputs(
            }
        }

-        // all timelines also have an end point if they have made any progress
-        if last_record_lsn > timeline.get_ancestor_lsn()
-            && !interesting_lsns
-                .iter()
-                .any(|(lsn, _)| lsn == &last_record_lsn)
-        {
-            updates.push(Update {
-                lsn: last_record_lsn,
-                command: Command::EndOfBranch,
-                timeline_id: timeline.timeline_id,
-            });
-        }
-
        timeline_inputs.insert(
            timeline.timeline_id,
            TimelineInputs {
@@ -283,22 +270,48 @@ impl ModelInputs {
        // impossible to always determine the a one main branch.
        let mut storage = tenant_size_model::Storage::<Option<TimelineId>>::new(None);

+        // tracking these not to require modifying the current implementation of the size model,
+        // which works in relative LSNs and sizes.
+        let mut last_state: HashMap<TimelineId, (Lsn, u64)> = HashMap::new();
+
        for update in &self.updates {
            let Update {
                lsn,
                command: op,
                timeline_id,
            } = update;
-            let Lsn(now) = *lsn;
            match op {
                Command::Update(sz) => {
-                    storage.insert_point(&Some(*timeline_id), "".into(), now, Some(*sz));
-                }
-                Command::EndOfBranch => {
-                    storage.insert_point(&Some(*timeline_id), "".into(), now, None);
+                    let latest = last_state.get_mut(timeline_id).ok_or_else(|| {
+                        anyhow::anyhow!(
+                        "ordering-mismatch: there must had been a previous state for {timeline_id}"
+                    )
+                    })?;
+
+                    let lsn_bytes = {
+                        let Lsn(now) = lsn;
+                        let Lsn(prev) = latest.0;
+                        debug_assert!(prev <= *now, "self.updates should had been sorted");
+                        now - prev
+                    };
+
+                    let size_diff =
+                        i64::try_from(*sz as i128 - latest.1 as i128).with_context(|| {
+                            format!("size difference i64 overflow for {timeline_id}")
+                        })?;
+
+                    storage.modify_branch(&Some(*timeline_id), "".into(), lsn_bytes, size_diff);
+                    *latest = (*lsn, *sz);
                }
                Command::BranchFrom(parent) => {
                    storage.branch(parent, Some(*timeline_id));
+
+                    let size = parent
+                        .as_ref()
+                        .and_then(|id| last_state.get(id))
+                        .map(|x| x.1)
+                        .unwrap_or(0);
+                    last_state.insert(*timeline_id, (*lsn, size));
                }
            }
        }
@@ -307,7 +320,10 @@ impl ModelInputs {
    }
 }

-/// A point of interest in the tree of branches
+/// Single size model update.
+///
+/// Sizing model works with relative increments over latest branch state.
+/// Updates are absolute, so additional state needs to be tracked when applying.
 #[serde_with::serde_as]
 #[derive(
    Debug, PartialEq, PartialOrd, Eq, Ord, Clone, Copy, serde::Serialize, serde::Deserialize,
@@ -326,7 +342,6 @@ struct Update {
 enum Command {
    Update(u64),
    BranchFrom(#[serde_as(as = "Option<serde_with::DisplayFromStr>")] Option<TimelineId>),
-    EndOfBranch,
 }

 impl std::fmt::Debug for Command {
@@ -336,7 +351,6 @@ impl std::fmt::Debug for Command {
        match self {
            Self::Update(arg0) => write!(f, "Update({arg0})"),
            Self::BranchFrom(arg0) => write!(f, "BranchFrom({arg0:?})"),
-            Self::EndOfBranch => write!(f, "EndOfBranch"),
        }
    }
 }
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -61,13 +61,6 @@ use crate::{
    storage_sync::{self, index::LayerFileMetadata},
 };

-#[derive(Debug, PartialEq, Eq, Clone, Copy)]
-enum FlushLoopState {
-    NotStarted,
-    Running,
-    Exited,
-}
-
 pub struct Timeline {
    conf: &'static PageServerConf,
    tenant_conf: Arc<RwLock<TenantConfOpt>>,
@@ -129,7 +122,7 @@ pub struct Timeline {
    write_lock: Mutex<()>,

    /// Used to avoid multiple `flush_loop` tasks running
-    flush_loop_state: Mutex<FlushLoopState>,
+    flush_loop_started: Mutex<bool>,

    /// layer_flush_start_tx can be used to wake up the layer-flushing task.
    /// The value is a counter, incremented every time a new flush cycle is requested.
@@ -762,7 +755,7 @@ impl Timeline {

            upload_layers: AtomicBool::new(upload_layers),

-            flush_loop_state: Mutex::new(FlushLoopState::NotStarted),
+            flush_loop_started: Mutex::new(false),

            layer_flush_start_tx,
            layer_flush_done_tx,
@@ -801,23 +794,13 @@ impl Timeline {
    }

    pub(super) fn maybe_spawn_flush_loop(self: &Arc<Self>) {
-        let mut flush_loop_state = self.flush_loop_state.lock().unwrap();
-        match *flush_loop_state {
-            FlushLoopState::NotStarted => (),
-            FlushLoopState::Running => {
-                info!(
-                    "skipping attempt to start flush_loop twice {}/{}",
-                    self.tenant_id, self.timeline_id
-                );
-                return;
-            }
-            FlushLoopState::Exited => {
-                warn!(
-                    "ignoring attempt to restart exited flush_loop {}/{}",
-                    self.tenant_id, self.timeline_id
-                );
-                return;
-            }
+        let mut flush_loop_started = self.flush_loop_started.lock().unwrap();
+        if *flush_loop_started {
+            info!(
+                "skipping attempt to start flush_loop twice {}/{}",
+                self.tenant_id, self.timeline_id
+            );
+            return;
        }

        let layer_flush_start_rx = self.layer_flush_start_tx.subscribe();
@@ -830,16 +813,11 @@ impl Timeline {
                    Some(self.timeline_id),
                    "layer flush task",
                    false,
-                    async move {
-                         self_clone.flush_loop(layer_flush_start_rx).await;
-                         let mut flush_loop_state = self_clone.flush_loop_state.lock().unwrap();
-                         assert_eq!(*flush_loop_state, FlushLoopState::Running);
-                         *flush_loop_state  = FlushLoopState::Exited;
-                         Ok(()) }
+                    async move { self_clone.flush_loop(layer_flush_start_rx).await; Ok(()) }
                    .instrument(info_span!(parent: None, "layer flush task", tenant = %self.tenant_id, timeline = %self.timeline_id))
                );

-        *flush_loop_state = FlushLoopState::Running;
+        *flush_loop_started = true;
    }

    pub(super) fn launch_wal_receiver(self: &Arc<Self>) {
@@ -1387,9 +1365,8 @@ impl Timeline {
        // finished, instead of some other flush that was started earlier.
        let mut my_flush_request = 0;

-        let flush_loop_state = { *self.flush_loop_state.lock().unwrap() };
-        if flush_loop_state != FlushLoopState::Running {
-            anyhow::bail!("cannot flush frozen layers when flush_loop is not running, state is {flush_loop_state:?}")
+        if !&*self.flush_loop_started.lock().unwrap() {
+            anyhow::bail!("cannot flush frozen layers when flush_loop is not running")
        }

        self.layer_flush_start_tx.send_modify(|counter| {
--- a/pageserver/src/tenant_config.rs
+++ b/pageserver/src/tenant_config.rs
@@ -216,6 +216,7 @@ impl TenantConf {
        }
    }

+    #[cfg(test)]
    pub fn dummy_conf() -> Self {
        TenantConf {
            checkpoint_distance: defaults::DEFAULT_CHECKPOINT_DISTANCE,
--- a/pageserver/src/tenant_tasks.rs
+++ b/pageserver/src/tenant_tasks.rs
@@ -71,7 +71,9 @@ async fn compaction_loop(tenant_id: TenantId) {
            let mut sleep_duration = tenant.get_compaction_period();
            if let Err(e) = tenant.compaction_iteration() {
                sleep_duration = wait_duration;
-                error!("Compaction failed, retrying in {:?}: {e:?}", sleep_duration);
+                error!("Compaction failed, retrying in {:?}: {e:#}", sleep_duration);
+                #[cfg(feature = "testing")]
+                std::process::abort();
            }

            // Sleep
@@ -120,7 +122,9 @@ async fn gc_loop(tenant_id: TenantId) {
                if let Err(e) = tenant.gc_iteration(None, gc_horizon, tenant.get_pitr_interval(), false).await
                {
                    sleep_duration = wait_duration;
-                    error!("Gc failed, retrying in {:?}: {e:?}", sleep_duration);
+                    error!("Gc failed, retrying in {:?}: {e:#}", sleep_duration);
+                    #[cfg(feature = "testing")]
+                    std::process::abort();
                }
            }

--- a/pageserver/src/walredo.rs
+++ b/pageserver/src/walredo.rs
@@ -210,16 +210,6 @@ impl PostgresRedoManager {
        }
    }

-    /// Launch process pre-emptively. Should not be needed except for benchmarking.
-    pub fn launch_process(&mut self, pg_version: u32) -> anyhow::Result<()> {
-        let inner = self.process.get_mut().unwrap();
-        if inner.is_none() {
-            let p = PostgresRedoProcess::launch(self.conf, self.tenant_id, pg_version)?;
-            *inner = Some(p);
-        }
-        Ok(())
-    }
-
    ///
    /// Process one request for WAL redo using wal-redo postgres
    ///
--- a/pgxn/neon/libpagestore.c
+++ b/pgxn/neon/libpagestore.c
@@ -32,6 +32,11 @@

 #define PageStoreTrace DEBUG5

+#define NEON_TAG "[NEON_SMGR] "
+#define neon_log(tag, fmt, ...) ereport(tag,                                  \
+										(errmsg(NEON_TAG fmt, ##__VA_ARGS__), \
+										 errhidestmt(true), errhidecontext(true)))
+
 bool		connected = false;
 PGconn	   *pageserver_conn = NULL;

@@ -92,10 +97,11 @@ pageserver_connect()

 	while (PQisBusy(pageserver_conn))
 	{
+		int			wc;
 		WaitEvent	event;

 		/* Sleep until there's something to do */
-		(void) WaitEventSetWait(pageserver_conn_wes, -1L, &event, 1, PG_WAIT_EXTENSION);
+		wc = WaitEventSetWait(pageserver_conn_wes, -1L, &event, 1, PG_WAIT_EXTENSION);
 		ResetLatch(MyLatch);

 		CHECK_FOR_INTERRUPTS();
@@ -135,10 +141,11 @@ retry:

 	if (ret == 0)
 	{
+		int			wc;
 		WaitEvent	event;

 		/* Sleep until there's something to do */
-		(void) WaitEventSetWait(pageserver_conn_wes, -1L, &event, 1, PG_WAIT_EXTENSION);
+		wc = WaitEventSetWait(pageserver_conn_wes, -1L, &event, 1, PG_WAIT_EXTENSION);
 		ResetLatch(MyLatch);

 		CHECK_FOR_INTERRUPTS();
@@ -232,9 +239,6 @@ pageserver_receive(void)
 	StringInfoData resp_buff;
 	NeonResponse *resp;

-	if (!connected)
-		return NULL;
-
 	PG_TRY();
 	{
 		/* read response */
@@ -244,10 +248,7 @@ pageserver_receive(void)
 		if (resp_buff.len < 0)
 		{
 			if (resp_buff.len == -1)
-			{
-				pageserver_disconnect();
-				return NULL;
-			}
+				neon_log(ERROR, "end of COPY");
 			else if (resp_buff.len == -2)
 				neon_log(ERROR, "could not read COPY data: %s", PQerrorMessage(pageserver_conn));
 		}
--- a/pgxn/neon/pagestore_client.h
+++ b/pgxn/neon/pagestore_client.h
@@ -49,11 +49,6 @@ typedef struct

 #define messageTag(m) (((const NeonMessage *)(m))->tag)

-#define NEON_TAG "[NEON_SMGR] "
-#define neon_log(tag, fmt, ...) ereport(tag,                                  \
-										(errmsg(NEON_TAG fmt, ##__VA_ARGS__), \
-										 errhidestmt(true), errhidecontext(true)))
-
 /*
 * supertype of all the Neon*Request structs below
 *
--- a/pgxn/neon/pagestore_smgr.c
+++ b/pgxn/neon/pagestore_smgr.c
@@ -251,9 +251,9 @@ XLogRecPtr	prefetch_lsn = 0;

 static void consume_prefetch_responses(void);
 static uint64 prefetch_register_buffer(BufferTag tag, bool *force_latest, XLogRecPtr *force_lsn);
-static bool prefetch_read(PrefetchRequest *slot);
+static void prefetch_read(PrefetchRequest *slot);
 static void prefetch_do_request(PrefetchRequest *slot, bool *force_latest, XLogRecPtr *force_lsn);
-static bool prefetch_wait_for(uint64 ring_index);
+static void prefetch_wait_for(uint64 ring_index);
 static void prefetch_cleanup(void);
 static inline void prefetch_set_unused(uint64 ring_index);

@@ -393,7 +393,7 @@ prefetch_cleanup(void)
 * NOTE: this function may indirectly update MyPState->pfs_hash; which
 * invalidates any active pointers into the hash table.
 */
-static bool
+static void
 prefetch_wait_for(uint64 ring_index)
 {
 	PrefetchRequest *entry;
@@ -412,10 +412,8 @@ prefetch_wait_for(uint64 ring_index)
 		entry = GetPrfSlot(MyPState->ring_receive);

 		Assert(entry->status == PRFS_REQUESTED);
-		if (!prefetch_read(entry))
-			return false;
+		prefetch_read(entry);
 	}
-	return true;
 }

 /*
@@ -427,7 +425,7 @@ prefetch_wait_for(uint64 ring_index)
 * NOTE: this function may indirectly update MyPState->pfs_hash; which
 * invalidates any active pointers into the hash table.
 */
-static bool
+static void
 prefetch_read(PrefetchRequest *slot)
 {
 	NeonResponse *response;
@@ -440,22 +438,15 @@ prefetch_read(PrefetchRequest *slot)
 	old = MemoryContextSwitchTo(MyPState->errctx);
 	response = (NeonResponse *) page_server->receive();
 	MemoryContextSwitchTo(old);
-	if (response)
-	{
-		/* update prefetch state */
-		MyPState->n_responses_buffered += 1;
-		MyPState->n_requests_inflight -= 1;
-		MyPState->ring_receive += 1;
+	
+	/* update prefetch state */
+	MyPState->n_responses_buffered += 1;
+	MyPState->n_requests_inflight -= 1;
+	MyPState->ring_receive += 1;

-		/* update slot state */
-		slot->status = PRFS_RECEIVED;
-		slot->response = response;
-		return true;
-	}
-	else
-	{
-		return false;
-	}
+	/* update slot state */
+	slot->status = PRFS_RECEIVED;
+	slot->response = response;
 }

 /*
@@ -755,16 +746,11 @@ prefetch_register_buffer(BufferTag tag, bool *force_latest, XLogRecPtr *force_ls
 static NeonResponse *
 page_server_request(void const *req)
 {
-	NeonResponse* resp;
-	do {
-		page_server->send((NeonRequest *) req);
-		page_server->flush();
-		MyPState->ring_flush = MyPState->ring_unused;
-		consume_prefetch_responses();
-		resp = page_server->receive();
-	} while (resp == NULL);
-	return resp;
-
+	page_server->send((NeonRequest *) req);
+	page_server->flush();
+	MyPState->ring_flush = MyPState->ring_unused;
+	consume_prefetch_responses();
+	return page_server->receive();
 }


@@ -1649,8 +1635,7 @@ neon_close(SMgrRelation reln, ForkNumber forknum)
 bool
 neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
 {
-	BufferTag	tag;
-	uint64		ring_index PG_USED_FOR_ASSERTS_ONLY;
+	uint64 ring_index PG_USED_FOR_ASSERTS_ONLY;

 	switch (reln->smgr_relpersistence)
 	{
@@ -1666,7 +1651,7 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
 			elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
 	}

-	tag = (BufferTag) {
+	BufferTag tag = (BufferTag) {
 		.rnode = reln->smgr_rnode.node,
 		.forkNum = forknum,
 		.blockNum = blocknum
@@ -1770,24 +1755,22 @@ neon_read_at_lsn(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno,
 		}
 	}

-	do
+	if (entry == NULL)
 	{
-		if (entry == NULL)
-		{
-			n_prefetch_misses += 1;
+		n_prefetch_misses += 1;

-			ring_index = prefetch_register_buffer(buftag, &request_latest,
-												  &request_lsn);
-			slot = GetPrfSlot(ring_index);
-		}
+		ring_index = prefetch_register_buffer(buftag, &request_latest,
+											  &request_lsn);
+		slot = GetPrfSlot(ring_index);
+	}

-		Assert(slot->my_ring_index == ring_index);
-		Assert(MyPState->ring_last <= ring_index &&
-			   MyPState->ring_unused > ring_index);
-		Assert(slot->status != PRFS_UNUSED);
-		Assert(GetPrfSlot(ring_index) == slot);
+	Assert(slot->my_ring_index == ring_index);
+	Assert(MyPState->ring_last <= ring_index &&
+		   MyPState->ring_unused > ring_index);
+	Assert(slot->status != PRFS_UNUSED);
+	Assert(GetPrfSlot(ring_index) == slot);

-	} while (!prefetch_wait_for(ring_index));
+	prefetch_wait_for(ring_index);

 	Assert(slot->status == PRFS_RECEIVED);

--- a/pgxn/neon/walproposer.c
+++ b/pgxn/neon/walproposer.c
@@ -119,7 +119,6 @@ static TimestampTz last_reconnect_attempt;
 static WalproposerShmemState * walprop_shared;

 /* Prototypes for private functions */
-static void WalProposerRegister(void);
 static void WalProposerInit(XLogRecPtr flushRecPtr, uint64 systemId);
 static void WalProposerStart(void);
 static void WalProposerLoop(void);
@@ -456,7 +455,7 @@ WalProposerPoll(void)
 /*
 * Register a background worker proposing WAL to wal acceptors.
 */
-static void
+void
 WalProposerRegister(void)
 {
 	BackgroundWorker bgw;
--- a/pgxn/neon/walproposer.h
+++ b/pgxn/neon/walproposer.h
@@ -377,18 +377,18 @@ typedef struct Safekeeper
 	AppendResponse appendResponse;	/* feedback for master */
 } Safekeeper;

-extern void WalProposerSync(int argc, char *argv[]);
-extern void WalProposerMain(Datum main_arg);
-extern void WalProposerBroadcast(XLogRecPtr startpos, XLogRecPtr endpos);
-extern void WalProposerPoll(void);
-extern void ParseReplicationFeedbackMessage(StringInfo reply_message,
-											ReplicationFeedback *rf);
+extern PGDLLIMPORT void WalProposerMain(Datum main_arg);
+void		WalProposerBroadcast(XLogRecPtr startpos, XLogRecPtr endpos);
+void		WalProposerPoll(void);
+void		WalProposerRegister(void);
+void		ParseReplicationFeedbackMessage(StringInfo reply_message,
+											ReplicationFeedback * rf);
 extern void StartProposerReplication(StartReplicationCmd *cmd);

-extern Size WalproposerShmemSize(void);
-extern bool WalproposerShmemInit(void);
-extern void replication_feedback_set(ReplicationFeedback *rf);
-extern void replication_feedback_get_lsns(XLogRecPtr *writeLsn, XLogRecPtr *flushLsn, XLogRecPtr *applyLsn);
+Size		WalproposerShmemSize(void);
+bool		WalproposerShmemInit(void);
+void		replication_feedback_set(ReplicationFeedback * rf);
+void		replication_feedback_get_lsns(XLogRecPtr *writeLsn, XLogRecPtr *flushLsn, XLogRecPtr *applyLsn);

 /* libpqwalproposer hooks & helper type */

--- a/proxy/src/auth.rs
+++ b/proxy/src/auth.rs
@@ -1,7 +1,7 @@
 //! Client authentication mechanisms.

 pub mod backend;
-pub use backend::{BackendType, ConsoleReqExtra};
+pub use backend::{BackendType, ConsoleReqExtra, DatabaseInfo};

 mod credentials;
 pub use credentials::ClientCredentials;
--- a/proxy/src/auth/backend.rs
+++ b/proxy/src/auth/backend.rs
@@ -12,6 +12,7 @@ use crate::{
    waiters::{self, Waiter, Waiters},
 };
 use once_cell::sync::Lazy;
+use serde::{Deserialize, Serialize};
 use std::borrow::Cow;
 use tokio::io::{AsyncRead, AsyncWrite};
 use tracing::{info, warn};
@@ -35,6 +36,45 @@ pub fn notify(psql_session_id: &str, msg: mgmt::ComputeReady) -> Result<(), wait
    CPLANE_WAITERS.notify(psql_session_id, msg)
 }

+/// Compute node connection params provided by the cloud.
+/// Note how it implements serde traits, since we receive it over the wire.
+#[derive(Serialize, Deserialize, Default)]
+pub struct DatabaseInfo {
+    pub host: String,
+    pub port: u16,
+    pub dbname: String,
+    pub user: String,
+    pub password: Option<String>,
+}
+
+// Manually implement debug to omit personal and sensitive info.
+impl std::fmt::Debug for DatabaseInfo {
+    fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
+        fmt.debug_struct("DatabaseInfo")
+            .field("host", &self.host)
+            .field("port", &self.port)
+            .finish_non_exhaustive()
+    }
+}
+
+impl From<DatabaseInfo> for tokio_postgres::Config {
+    fn from(db_info: DatabaseInfo) -> Self {
+        let mut config = tokio_postgres::Config::new();
+
+        config
+            .host(&db_info.host)
+            .port(db_info.port)
+            .dbname(&db_info.dbname)
+            .user(&db_info.user);
+
+        if let Some(password) = db_info.password {
+            config.password(password);
+        }
+
+        config
+    }
+}
+
 /// Extra query params we'd like to pass to the console.
 pub struct ConsoleReqExtra<'a> {
    /// A unique identifier for a connection.
@@ -118,107 +158,54 @@ impl<'a, T, E> BackendType<'a, Result<T, E>> {
    }
 }

-/// A product of successful authentication.
-pub struct AuthSuccess<T> {
-    /// Did we send [`pq_proto::BeMessage::AuthenticationOk`] to client?
-    pub reported_auth_ok: bool,
-    /// Something to be considered a positive result.
-    pub value: T,
-}
-
-impl<T> AuthSuccess<T> {
-    /// Very similar to [`std::option::Option::map`].
-    /// Maps [`AuthSuccess<T>`] to [`AuthSuccess<R>`] by applying
-    /// a function to a contained value.
-    pub fn map<R>(self, f: impl FnOnce(T) -> R) -> AuthSuccess<R> {
-        AuthSuccess {
-            reported_auth_ok: self.reported_auth_ok,
-            value: f(self.value),
-        }
-    }
-}
-
-/// Info for establishing a connection to a compute node.
-/// This is what we get after auth succeeded, but not before!
-pub struct NodeInfo {
-    /// Project from [`auth::ClientCredentials`].
-    pub project: String,
-    /// Compute node connection params.
-    pub config: compute::ConnCfg,
-}
-
 impl BackendType<'_, ClientCredentials<'_>> {
-    /// Do something special if user didn't provide the `project` parameter.
-    async fn try_password_hack(
-        &mut self,
-        extra: &ConsoleReqExtra<'_>,
-        client: &mut stream::PqStream<impl AsyncRead + AsyncWrite + Unpin + Send>,
-    ) -> auth::Result<Option<AuthSuccess<NodeInfo>>> {
-        use BackendType::*;
-
-        // If there's no project so far, that entails that client doesn't
-        // support SNI or other means of passing the project name.
-        // We now expect to see a very specific payload in the place of password.
-        let fetch_magic_payload = async {
-            warn!("project name not specified, resorting to the password hack auth flow");
-            let payload = AuthFlow::new(client)
-                .begin(auth::PasswordHack)
-                .await?
-                .authenticate()
-                .await?;
-
-            info!(project = &payload.project, "received missing parameter");
-            auth::Result::Ok(payload)
-        };
-
-        // TODO: find a proper way to merge those very similar blocks.
-        let (mut config, payload) = match self {
-            Console(endpoint, creds) if creds.project.is_none() => {
-                let payload = fetch_magic_payload.await?;
-
-                let mut creds = creds.as_ref();
-                creds.project = Some(payload.project.as_str().into());
-                let config = console::Api::new(endpoint, extra, &creds)
-                    .wake_compute()
-                    .await?;
-
-                (config, payload)
-            }
-            Postgres(endpoint, creds) if creds.project.is_none() => {
-                let payload = fetch_magic_payload.await?;
-
-                let mut creds = creds.as_ref();
-                creds.project = Some(payload.project.as_str().into());
-                let config = postgres::Api::new(endpoint, &creds).wake_compute().await?;
-
-                (config, payload)
-            }
-            _ => return Ok(None),
-        };
-
-        config.password(payload.password);
-        Ok(Some(AuthSuccess {
-            reported_auth_ok: false,
-            value: NodeInfo {
-                project: payload.project,
-                config,
-            },
-        }))
-    }
-
    /// Authenticate the client via the requested backend, possibly using credentials.
    pub async fn authenticate(
        mut self,
        extra: &ConsoleReqExtra<'_>,
        client: &mut stream::PqStream<impl AsyncRead + AsyncWrite + Unpin + Send>,
-    ) -> auth::Result<AuthSuccess<NodeInfo>> {
+    ) -> super::Result<compute::NodeInfo> {
        use BackendType::*;

-        // Handle cases when `project` is missing in `creds`.
-        // TODO: type safety: return `creds` with irrefutable `project`.
-        if let Some(res) = self.try_password_hack(extra, client).await? {
-            info!("user successfully authenticated (using the password hack)");
-            return Ok(res);
+        if let Console(_, creds) | Postgres(_, creds) = &mut self {
+            // If there's no project so far, that entails that client doesn't
+            // support SNI or other means of passing the project name.
+            // We now expect to see a very specific payload in the place of password.
+            if creds.project().is_none() {
+                warn!("project name not specified, resorting to the password hack auth flow");
+
+                let payload = AuthFlow::new(client)
+                    .begin(auth::PasswordHack)
+                    .await?
+                    .authenticate()
+                    .await?;
+
+                // Finally we may finish the initialization of `creds`.
+                // TODO: add missing type safety to ClientCredentials.
+                info!(project = &payload.project, "received missing parameter");
+                creds.project = Some(payload.project.into());
+
+                let mut config = match &self {
+                    Console(endpoint, creds) => {
+                        console::Api::new(endpoint, extra, creds)
+                            .wake_compute()
+                            .await?
+                    }
+                    Postgres(endpoint, creds) => {
+                        postgres::Api::new(endpoint, creds).wake_compute().await?
+                    }
+                    _ => unreachable!("see the patterns above"),
+                };
+
+                // We should use a password from payload as well.
+                config.password(payload.password);
+
+                info!("user successfully authenticated (using the password hack)");
+                return Ok(compute::NodeInfo {
+                    reported_auth_ok: false,
+                    config,
+                });
+            }
        }

        let res = match self {
@@ -228,34 +215,22 @@ impl BackendType<'_, ClientCredentials<'_>> {
                    project = creds.project(),
                    "performing authentication using the console"
                );
-
-                assert!(creds.project.is_some());
                console::Api::new(&endpoint, extra, &creds)
                    .handle_user(client)
-                    .await?
-                    .map(|config| NodeInfo {
-                        project: creds.project.unwrap().into_owned(),
-                        config,
-                    })
+                    .await
            }
            Postgres(endpoint, creds) => {
                info!("performing mock authentication using a local postgres instance");
-
-                assert!(creds.project.is_some());
                postgres::Api::new(&endpoint, &creds)
                    .handle_user(client)
-                    .await?
-                    .map(|config| NodeInfo {
-                        project: creds.project.unwrap().into_owned(),
-                        config,
-                    })
+                    .await
            }
            // NOTE: this auth backend doesn't use client credentials.
            Link(url) => {
                info!("performing link authentication");
-                link::handle_user(&url, client).await?
+                link::handle_user(&url, client).await
            }
-        };
+        }?;

        info!("user successfully authenticated");
        Ok(res)
--- a/proxy/src/auth/backend/console.rs
+++ b/proxy/src/auth/backend/console.rs
@@ -1,9 +1,9 @@
 //! Cloud API V2.

-use super::{AuthSuccess, ConsoleReqExtra};
+use super::ConsoleReqExtra;
 use crate::{
    auth::{self, AuthFlow, ClientCredentials},
-    compute,
+    compute::{self, ComputeConnCfg},
    error::{io_error, UserFacingError},
    http, scram,
    stream::PqStream,
@@ -128,7 +128,7 @@ impl<'a> Api<'a> {
    pub(super) async fn handle_user(
        self,
        client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin + Send>,
-    ) -> auth::Result<AuthSuccess<compute::ConnCfg>> {
+    ) -> auth::Result<compute::NodeInfo> {
        handle_user(client, &self, Self::get_auth_info, Self::wake_compute).await
    }

@@ -164,7 +164,7 @@ impl<'a> Api<'a> {
    }

    /// Wake up the compute node and return the corresponding connection info.
-    pub(super) async fn wake_compute(&self) -> Result<compute::ConnCfg, WakeComputeError> {
+    pub(super) async fn wake_compute(&self) -> Result<ComputeConnCfg, WakeComputeError> {
        let request_id = uuid::Uuid::new_v4().to_string();
        let req = self
            .endpoint
@@ -195,7 +195,7 @@ impl<'a> Api<'a> {
            Some(x) => x,
        };

-        let mut config = compute::ConnCfg::new();
+        let mut config = ComputeConnCfg::new();
        config
            .host(host)
            .port(port)
@@ -213,10 +213,10 @@ pub(super) async fn handle_user<'a, Endpoint, GetAuthInfo, WakeCompute>(
    endpoint: &'a Endpoint,
    get_auth_info: impl FnOnce(&'a Endpoint) -> GetAuthInfo,
    wake_compute: impl FnOnce(&'a Endpoint) -> WakeCompute,
-) -> auth::Result<AuthSuccess<compute::ConnCfg>>
+) -> auth::Result<compute::NodeInfo>
 where
    GetAuthInfo: Future<Output = Result<AuthInfo, GetAuthInfoError>>,
-    WakeCompute: Future<Output = Result<compute::ConnCfg, WakeComputeError>>,
+    WakeCompute: Future<Output = Result<ComputeConnCfg, WakeComputeError>>,
 {
    info!("fetching user's authentication info");
    let auth_info = get_auth_info(endpoint).await?;
@@ -243,9 +243,9 @@ where
        config.auth_keys(tokio_postgres::config::AuthKeys::ScramSha256(keys));
    }

-    Ok(AuthSuccess {
+    Ok(compute::NodeInfo {
        reported_auth_ok: false,
-        value: config,
+        config,
    })
 }

--- a/proxy/src/auth/backend/link.rs
+++ b/proxy/src/auth/backend/link.rs
@@ -1,4 +1,3 @@
-use super::{AuthSuccess, NodeInfo};
 use crate::{auth, compute, error::UserFacingError, stream::PqStream, waiters};
 use pq_proto::{BeMessage as Be, BeParameterStatusMessage};
 use thiserror::Error;
@@ -50,7 +49,7 @@ pub fn new_psql_session_id() -> String {
 pub async fn handle_user(
    link_uri: &reqwest::Url,
    client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin>,
-) -> auth::Result<AuthSuccess<NodeInfo>> {
+) -> auth::Result<compute::NodeInfo> {
    let psql_session_id = new_psql_session_id();
    let span = info_span!("link", psql_session_id = &psql_session_id);
    let greeting = hello_message(link_uri, &psql_session_id);
@@ -72,22 +71,8 @@ pub async fn handle_user(

    client.write_message_noflush(&Be::NoticeResponse("Connecting to database."))?;

-    let mut config = compute::ConnCfg::new();
-    config
-        .host(&db_info.host)
-        .port(db_info.port)
-        .dbname(&db_info.dbname)
-        .user(&db_info.user);
-
-    if let Some(password) = db_info.password {
-        config.password(password);
-    }
-
-    Ok(AuthSuccess {
+    Ok(compute::NodeInfo {
        reported_auth_ok: true,
-        value: NodeInfo {
-            project: db_info.project,
-            config,
-        },
+        config: db_info.into(),
    })
 }
--- a/proxy/src/auth/backend/postgres.rs
+++ b/proxy/src/auth/backend/postgres.rs
@@ -1,12 +1,12 @@
 //! Local mock of Cloud API V2.

-use super::{
-    console::{self, AuthInfo, GetAuthInfoError, TransportError, WakeComputeError},
-    AuthSuccess,
-};
 use crate::{
-    auth::{self, ClientCredentials},
-    compute,
+    auth::{
+        self,
+        backend::console::{self, AuthInfo, GetAuthInfoError, TransportError, WakeComputeError},
+        ClientCredentials,
+    },
+    compute::{self, ComputeConnCfg},
    error::io_error,
    scram,
    stream::PqStream,
@@ -37,7 +37,7 @@ impl<'a> Api<'a> {
    pub(super) async fn handle_user(
        self,
        client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin + Send>,
-    ) -> auth::Result<AuthSuccess<compute::ConnCfg>> {
+    ) -> auth::Result<compute::NodeInfo> {
        // We reuse user handling logic from a production module.
        console::handle_user(client, &self, Self::get_auth_info, Self::wake_compute).await
    }
@@ -82,8 +82,8 @@ impl<'a> Api<'a> {
    }

    /// We don't need to wake anything locally, so we just return the connection info.
-    pub(super) async fn wake_compute(&self) -> Result<compute::ConnCfg, WakeComputeError> {
-        let mut config = compute::ConnCfg::new();
+    pub(super) async fn wake_compute(&self) -> Result<ComputeConnCfg, WakeComputeError> {
+        let mut config = ComputeConnCfg::new();
        config
            .host(self.endpoint.host_str().unwrap_or("localhost"))
            .port(self.endpoint.port().unwrap_or(5432))
--- a/proxy/src/auth/credentials.rs
+++ b/proxy/src/auth/credentials.rs
@@ -36,23 +36,11 @@ pub struct ClientCredentials<'a> {
 }

 impl ClientCredentials<'_> {
-    #[inline]
    pub fn project(&self) -> Option<&str> {
        self.project.as_deref()
    }
 }

-impl<'a> ClientCredentials<'a> {
-    #[inline]
-    pub fn as_ref(&'a self) -> ClientCredentials<'a> {
-        Self {
-            user: self.user,
-            dbname: self.dbname,
-            project: self.project().map(Cow::Borrowed),
-        }
-    }
-}
-
 impl<'a> ClientCredentials<'a> {
    pub fn parse(
        params: &'a StartupMessageParams,
--- a/proxy/src/compute.rs
+++ b/proxy/src/compute.rs
@@ -40,36 +40,17 @@ impl UserFacingError for ConnectionError {
 /// A pair of `ClientKey` & `ServerKey` for `SCRAM-SHA-256`.
 pub type ScramKeys = tokio_postgres::config::ScramKeys<32>;

-/// A config for establishing a connection to compute node.
-/// Eventually, `tokio_postgres` will be replaced with something better.
-/// Newtype allows us to implement methods on top of it.
-#[repr(transparent)]
-pub struct ConnCfg(pub tokio_postgres::Config);
+pub type ComputeConnCfg = tokio_postgres::Config;

-impl ConnCfg {
-    /// Construct a new connection config.
-    pub fn new() -> Self {
-        Self(tokio_postgres::Config::new())
-    }
+/// Various compute node info for establishing connection etc.
+pub struct NodeInfo {
+    /// Did we send [`pq_proto::BeMessage::AuthenticationOk`]?
+    pub reported_auth_ok: bool,
+    /// Compute node connection params.
+    pub config: tokio_postgres::Config,
 }

-impl std::ops::Deref for ConnCfg {
-    type Target = tokio_postgres::Config;
-
-    fn deref(&self) -> &Self::Target {
-        &self.0
-    }
-}
-
-/// For now, let's make it easier to setup the config.
-impl std::ops::DerefMut for ConnCfg {
-    fn deref_mut(&mut self) -> &mut Self::Target {
-        &mut self.0
-    }
-}
-
-impl ConnCfg {
-    /// Establish a raw TCP connection to the compute node.
+impl NodeInfo {
    async fn connect_raw(&self) -> io::Result<(SocketAddr, TcpStream)> {
        use tokio_postgres::config::Host;

@@ -87,8 +68,8 @@ impl ConnCfg {
        // because it has no means for extracting the underlying socket which we
        // require for our business.
        let mut connection_error = None;
-        let ports = self.0.get_ports();
-        let hosts = self.0.get_hosts();
+        let ports = self.config.get_ports();
+        let hosts = self.config.get_hosts();
        // the ports array is supposed to have 0 entries, 1 entry, or as many entries as in the hosts array
        if ports.len() > 1 && ports.len() != hosts.len() {
            return Err(io::Error::new(
@@ -96,7 +77,7 @@ impl ConnCfg {
                format!(
                    "couldn't connect: bad compute config, \
                        ports and hosts entries' count does not match: {:?}",
-                    self.0
+                    self.config
                ),
            ));
        }
@@ -122,7 +103,7 @@ impl ConnCfg {
        Err(connection_error.unwrap_or_else(|| {
            io::Error::new(
                io::ErrorKind::Other,
-                format!("couldn't connect: bad compute config: {:?}", self.0),
+                format!("couldn't connect: bad compute config: {:?}", self.config),
            )
        }))
    }
@@ -135,7 +116,7 @@ pub struct PostgresConnection {
    pub version: String,
 }

-impl ConnCfg {
+impl NodeInfo {
    /// Connect to a corresponding compute node.
    pub async fn connect(
        mut self,
@@ -149,21 +130,21 @@ impl ConnCfg {
                .intersperse(" ") // TODO: use impl from std once it's stabilized
                .collect();

-            self.0.options(&options);
+            self.config.options(&options);
        }

        if let Some(app_name) = params.get("application_name") {
-            self.0.application_name(app_name);
+            self.config.application_name(app_name);
        }

        if let Some(replication) = params.get("replication") {
            use tokio_postgres::config::ReplicationMode;
            match replication {
                "true" | "on" | "yes" | "1" => {
-                    self.0.replication_mode(ReplicationMode::Physical);
+                    self.config.replication_mode(ReplicationMode::Physical);
                }
                "database" => {
-                    self.0.replication_mode(ReplicationMode::Logical);
+                    self.config.replication_mode(ReplicationMode::Logical);
                }
                _other => {}
            }
@@ -179,7 +160,7 @@ impl ConnCfg {
            .map_err(|_| ConnectionError::FailedToConnectToCompute)?;

        // TODO: establish a secure connection to the DB
-        let (client, conn) = self.0.connect_raw(&mut stream, NoTls).await?;
+        let (client, conn) = self.config.connect_raw(&mut stream, NoTls).await?;
        let version = conn
            .parameter("server_version")
            .ok_or(ConnectionError::FailedToFetchPgVersion)?
--- a/proxy/src/mgmt.rs
+++ b/proxy/src/mgmt.rs
@@ -6,11 +6,16 @@ use std::{
    net::{TcpListener, TcpStream},
    thread,
 };
-use tracing::{error, info, info_span};
+use tracing::{error, info};
 use utils::postgres_backend::{self, AuthType, PostgresBackend};

-/// Console management API listener thread.
-/// It spawns console response handlers needed for the link auth.
+/// TODO: move all of that to auth-backend/link.rs when we ditch legacy-console backend
+
+///
+/// Main proxy listener loop.
+///
+/// Listens for connections, and launches a new handler thread for each.
+///
 pub fn thread_main(listener: TcpListener) -> anyhow::Result<()> {
    scopeguard::defer! {
        info!("mgmt has shut down");
@@ -19,7 +24,6 @@ pub fn thread_main(listener: TcpListener) -> anyhow::Result<()> {
    listener
        .set_nonblocking(false)
        .context("failed to set listener to blocking")?;
-
    loop {
        let (socket, peer_addr) = listener.accept().context("failed to accept a new client")?;
        info!("accepted connection from {peer_addr}");
@@ -27,19 +31,9 @@ pub fn thread_main(listener: TcpListener) -> anyhow::Result<()> {
            .set_nodelay(true)
            .context("failed to set client socket option")?;

-        // TODO: replace with async tasks.
        thread::spawn(move || {
-            let tid = std::thread::current().id();
-            let span = info_span!("mgmt", thread = format_args!("{tid:?}"));
-            let _enter = span.enter();
-
-            info!("started a new console management API thread");
-            scopeguard::defer! {
-                info!("console management API thread is about to finish");
-            }
-
-            if let Err(e) = handle_connection(socket) {
-                error!("thread failed with an error: {e}");
+            if let Err(err) = handle_connection(socket) {
+                error!("{err}");
            }
        });
    }
@@ -50,21 +44,44 @@ fn handle_connection(socket: TcpStream) -> anyhow::Result<()> {
    pgbackend.run(&mut MgmtHandler)
 }

-/// Known as `kickResponse` in the console.
-#[derive(Debug, Deserialize)]
+struct MgmtHandler;
+
+/// Serialized examples:
+// {
+//     "session_id": "71d6d03e6d93d99a",
+//     "result": {
+//         "Success": {
+//             "host": "127.0.0.1",
+//             "port": 5432,
+//             "dbname": "stas",
+//             "user": "stas",
+//             "password": "mypass"
+//         }
+//     }
+// }
+// {
+//     "session_id": "71d6d03e6d93d99a",
+//     "result": {
+//         "Failure": "oops"
+//     }
+// }
+//
+// // to test manually by sending a query to mgmt interface:
+// psql -h 127.0.0.1 -p 9999 -c '{"session_id":"4f10dde522e14739","result":{"Success":{"host":"127.0.0.1","port":5432,"dbname":"stas","user":"stas","password":"stas"}}}'
+#[derive(Deserialize)]
 struct PsqlSessionResponse {
    session_id: String,
    result: PsqlSessionResult,
 }

-#[derive(Debug, Deserialize)]
+#[derive(Deserialize)]
 enum PsqlSessionResult {
-    Success(DatabaseInfo),
+    Success(auth::DatabaseInfo),
    Failure(String),
 }

 /// A message received by `mgmt` when a compute node is ready.
-pub type ComputeReady = Result<DatabaseInfo, String>;
+pub type ComputeReady = Result<auth::DatabaseInfo, String>;

 impl PsqlSessionResult {
    fn into_compute_ready(self) -> ComputeReady {
@@ -75,51 +92,25 @@ impl PsqlSessionResult {
    }
 }

-/// Compute node connection params provided by the console.
-/// This struct and its parents are mgmt API implementation
-/// detail and thus should remain in this module.
-// TODO: restore deserialization tests from git history.
-#[derive(Deserialize)]
-pub struct DatabaseInfo {
-    pub host: String,
-    pub port: u16,
-    pub dbname: String,
-    pub user: String,
-    /// Console always provides a password, but it might
-    /// be inconvenient for debug with local PG instance.
-    pub password: Option<String>,
-    pub project: String,
-}
-
-// Manually implement debug to omit sensitive info.
-impl std::fmt::Debug for DatabaseInfo {
-    fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
-        fmt.debug_struct("DatabaseInfo")
-            .field("host", &self.host)
-            .field("port", &self.port)
-            .field("dbname", &self.dbname)
-            .field("user", &self.user)
-            .finish_non_exhaustive()
-    }
-}
-
-// TODO: replace with an http-based protocol.
-struct MgmtHandler;
 impl postgres_backend::Handler for MgmtHandler {
-    fn process_query(&mut self, pgb: &mut PostgresBackend, query: &str) -> anyhow::Result<()> {
-        try_process_query(pgb, query).map_err(|e| {
-            error!("failed to process response: {e:?}");
-            e
-        })
+    fn process_query(
+        &mut self,
+        pgb: &mut PostgresBackend,
+        query_string: &str,
+    ) -> anyhow::Result<()> {
+        let res = try_process_query(pgb, query_string);
+        // intercept and log error message
+        if res.is_err() {
+            error!("mgmt query failed: {res:?}");
+        }
+        res
    }
 }

-fn try_process_query(pgb: &mut PostgresBackend, query: &str) -> anyhow::Result<()> {
-    let resp: PsqlSessionResponse = serde_json::from_str(query)?;
+fn try_process_query(pgb: &mut PostgresBackend, query_string: &str) -> anyhow::Result<()> {
+    info!("got mgmt query [redacted]"); // Content contains password, don't print it

-    let span = info_span!("event", session_id = resp.session_id);
-    let _enter = span.enter();
-    info!("got response: {:?}", resp.result);
+    let resp: PsqlSessionResponse = serde_json::from_str(query_string)?;

    match auth::backend::notify(&resp.session_id, resp.result.into_compute_ready()) {
        Ok(()) => {
@@ -128,50 +119,9 @@ fn try_process_query(pgb: &mut PostgresBackend, query: &str) -> anyhow::Result<(
                .write_message(&BeMessage::CommandComplete(b"SELECT 1"))?;
        }
        Err(e) => {
-            error!("failed to deliver response to per-client task");
            pgb.write_message(&BeMessage::ErrorResponse(&e.to_string()))?;
        }
    }

    Ok(())
 }
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use serde_json::json;
-
-    #[test]
-    fn parse_db_info() -> anyhow::Result<()> {
-        // with password
-        let _: DatabaseInfo = serde_json::from_value(json!({
-            "host": "localhost",
-            "port": 5432,
-            "dbname": "postgres",
-            "user": "john_doe",
-            "password": "password",
-            "project": "hello_world",
-        }))?;
-
-        // without password
-        let _: DatabaseInfo = serde_json::from_value(json!({
-            "host": "localhost",
-            "port": 5432,
-            "dbname": "postgres",
-            "user": "john_doe",
-            "project": "hello_world",
-        }))?;
-
-        // new field (forward compatibility)
-        let _: DatabaseInfo = serde_json::from_value(json!({
-            "host": "localhost",
-            "port": 5432,
-            "dbname": "postgres",
-            "user": "john_doe",
-            "project": "hello_world",
-            "N.E.W": "forward compatibility check",
-        }))?;
-
-        Ok(())
-    }
-}
--- a/proxy/src/proxy.rs
+++ b/proxy/src/proxy.rs
@@ -4,7 +4,7 @@ use crate::config::{ProxyConfig, TlsConfig};
 use crate::stream::{MeasuredStream, PqStream, Stream};
 use anyhow::{bail, Context};
 use futures::TryFutureExt;
-use metrics::{register_int_counter, register_int_counter_vec, IntCounter, IntCounterVec};
+use metrics::{register_int_counter, IntCounter};
 use once_cell::sync::Lazy;
 use pq_proto::{BeMessage as Be, *};
 use std::sync::Arc;
@@ -30,16 +30,10 @@ static NUM_CONNECTIONS_CLOSED_COUNTER: Lazy<IntCounter> = Lazy::new(|| {
    .unwrap()
 });

-static NUM_BYTES_PROXIED_COUNTER: Lazy<IntCounterVec> = Lazy::new(|| {
-    register_int_counter_vec!(
-        "proxy_io_bytes_per_client",
-        "Number of bytes sent/received between client and backend.",
-        &[
-            // Received (rx) / sent (tx).
-            "direction",
-            // Proxy can keep calling it `project` internally.
-            "endpoint_id"
-        ]
+static NUM_BYTES_PROXIED_COUNTER: Lazy<IntCounter> = Lazy::new(|| {
+    register_int_counter!(
+        "proxy_io_bytes_total",
+        "Number of bytes sent/received between any client and backend."
    )
    .unwrap()
 });
@@ -236,17 +230,16 @@ impl<S: AsyncRead + AsyncWrite + Unpin + Send> Client<'_, S> {
            application_name: params.get("application_name"),
        };

-        let auth_result = async {
-            // `&mut stream` doesn't let us merge those 2 lines.
-            let res = creds.authenticate(&extra, &mut stream).await;
-            async { res }.or_else(|e| stream.throw_error(e)).await
-        }
-        .instrument(info_span!("auth"))
-        .await?;
+        // Authenticate and connect to a compute node.
+        let auth = creds
+            .authenticate(&extra, &mut stream)
+            .instrument(info_span!("auth"))
+            .await;
+
+        let node = async { auth }.or_else(|e| stream.throw_error(e)).await?;
+        let reported_auth_ok = node.reported_auth_ok;

-        let node = auth_result.value;
        let (db, cancel_closure) = node
-            .config
            .connect(params)
            .or_else(|e| stream.throw_error(e))
            .await?;
@@ -254,9 +247,7 @@ impl<S: AsyncRead + AsyncWrite + Unpin + Send> Client<'_, S> {
        let cancel_key_data = session.enable_query_cancellation(cancel_closure);

        // Report authentication success if we haven't done this already.
-        // Note that we do this only (for the most part) after we've connected
-        // to a compute (see above) which performs its own authentication.
-        if !auth_result.reported_auth_ok {
+        if !reported_auth_ok {
            stream
                .write_message_noflush(&Be::AuthenticationOk)?
                .write_message_noflush(&BeParameterStatusMessage::encoding())?;
@@ -270,23 +261,17 @@ impl<S: AsyncRead + AsyncWrite + Unpin + Send> Client<'_, S> {
            .write_message(&BeMessage::ReadyForQuery)
            .await?;

-        // TODO: add more identifiers.
-        let metric_id = node.project;
-
-        let m_sent = NUM_BYTES_PROXIED_COUNTER.with_label_values(&["tx", &metric_id]);
-        let mut client = MeasuredStream::new(stream.into_inner(), |cnt| {
-            // Number of bytes we sent to the client (outbound).
-            m_sent.inc_by(cnt as u64);
-        });
-
-        let m_recv = NUM_BYTES_PROXIED_COUNTER.with_label_values(&["rx", &metric_id]);
-        let mut db = MeasuredStream::new(db.stream, |cnt| {
-            // Number of bytes the client sent to the compute node (inbound).
-            m_recv.inc_by(cnt as u64);
-        });
+        /// This function will be called for writes to either direction.
+        fn inc_proxied(cnt: usize) {
+            // Consider inventing something more sophisticated
+            // if this ever becomes a bottleneck (cacheline bouncing).
+            NUM_BYTES_PROXIED_COUNTER.inc_by(cnt as u64);
+        }

        // Starting from here we only proxy the client's traffic.
        info!("performing the proxy pass...");
+        let mut db = MeasuredStream::new(db.stream, inc_proxied);
+        let mut client = MeasuredStream::new(stream.into_inner(), inc_proxied);
        let _ = tokio::io::copy_bidirectional(&mut client, &mut db).await?;

        Ok(())
--- a/test_runner/fixtures/benchmark_fixture.py
+++ b/test_runner/fixtures/benchmark_fixture.py
@@ -440,35 +440,68 @@ def pytest_terminal_summary(

    terminalreporter.section("Benchmark results", "-")

-    result = []
-    for test_report in terminalreporter.stats.get("passed", []):
-        result_entry = []
+    # TODO group by test report
+    reports = {
+        report.head_line: report
+        for report in terminalreporter.stats.get("passed", [])
+    }

-        for _, recorded_property in test_report.user_properties:
-            terminalreporter.write(
-                "{}.{}: ".format(test_report.head_line, recorded_property["name"])
-            )
-            unit = recorded_property["unit"]
-            value = recorded_property["value"]
-            if unit == "MB":
-                terminalreporter.write("{0:,.0f}".format(value), green=True)
-            elif unit in ("s", "ms") and isinstance(value, float):
-                terminalreporter.write("{0:,.3f}".format(value), green=True)
-            elif isinstance(value, float):
-                terminalreporter.write("{0:,.4f}".format(value), green=True)
-            else:
-                terminalreporter.write(str(value), green=True)
-            terminalreporter.line(" {}".format(unit))
+    results = []
+    for name, report in reports.items():
+        # terminalreporter.write(f"{name}", green=True)
+        # terminalreporter.line("")
+        if "[neon" in name:
+            vanilla_report = reports.get(name.replace("[neon", "[vanilla"))
+            if vanilla_report:
+                for key, prop in report.user_properties:
+                    if prop["unit"] == "s":
+                        neon_value = prop["value"]
+                        vanilla_value = dict(vanilla_report.user_properties)[key]["value"]
+                        try:
+                            ratio = float(neon_value) / vanilla_value
+                        except ZeroDivisionError:
+                            ratio = 99999

-            result_entry.append(recorded_property)
+                        results.append((ratio, name.replace("[neon", "[neon/vanilla"), prop["name"]))

-        result.append(
-            {
-                "suit": test_report.nodeid,
-                "total_duration": test_report.duration,
-                "data": result_entry,
-            }
-        )
+    results.sort(reverse=True)
+    for ratio, test, prop in results:
+        terminalreporter.write("{}.{}: ".format(test, prop))
+        terminalreporter.write("{0:,.3f}".format(ratio), green=True)
+        terminalreporter.line("")
+
+    # result = []
+    # for test_report in terminalreporter.stats.get("passed", []):
+    #     result_entry = []
+
+    #     durations = [
+    #         prop
+    #         for _, prop in test_report.user_properties
+    #         if prop["unit"] == "s"
+    #     ]
+
+    #     for _, recorded_property in test_report.user_properties:
+    #         terminalreporter.write("{}.{}: ".format(test_report.head_line,
+    #                                                 recorded_property["name"]))
+    #         unit = recorded_property["unit"]
+    #         value = recorded_property["value"]
+    #         if unit == "MB":
+    #             terminalreporter.write("{0:,.0f}".format(value), green=True)
+    #         elif unit in ("s", "ms") and isinstance(value, float):
+    #             terminalreporter.write("{0:,.3f}".format(value), green=True)
+    #         elif isinstance(value, float):
+    #             terminalreporter.write("{0:,.4f}".format(value), green=True)
+    #         else:
+    #             terminalreporter.write(str(value), green=True)
+    #         terminalreporter.line(" {}".format(unit))
+
+    #         result_entry.append(recorded_property)
+
+    #     result.append({
+    #         "suit": test_report.nodeid,
+    #         "total_duration": test_report.duration,
+    #         "data": result_entry,
+    #     })

    out_dir = config.getoption("out_dir")
    if out_dir is None:
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -784,8 +784,6 @@ class NeonEnvBuilder:

            self.cleanup_remote_storage()

-            self.env.pageserver.assert_no_errors()
-

 class NeonEnv:
    """
@@ -1568,7 +1566,6 @@ class NeonCli(AbstractNeonCli):
    def pageserver_start(
        self,
        overrides: Tuple[str, ...] = (),
-        extra_env_vars: Optional[Dict[str, str]] = None,
    ) -> "subprocess.CompletedProcess[str]":
        start_args = ["pageserver", "start", *overrides]
        append_pageserver_param_overrides(
@@ -1578,11 +1575,11 @@ class NeonCli(AbstractNeonCli):
            pageserver_config_override=self.env.pageserver.config_override,
        )

+        s3_env_vars = None
        if self.env.remote_storage is not None and isinstance(self.env.remote_storage, S3Storage):
            s3_env_vars = self.env.remote_storage.access_env_vars()
-            extra_env_vars = (extra_env_vars or {}) | s3_env_vars

-        return self.raw_cli(start_args, extra_env_vars=extra_env_vars)
+        return self.raw_cli(start_args, extra_env_vars=s3_env_vars)

    def pageserver_stop(self, immediate=False) -> "subprocess.CompletedProcess[str]":
        cmd = ["pageserver", "stop"]
@@ -1726,50 +1723,7 @@ class NeonPageserver(PgProtocol):
        self.config_override = config_override
        self.version = env.get_pageserver_version()

-        # After a test finishes, we will scrape the log to see if there are any
-        # unexpected error messages. If your test expects an error, add it to
-        # 'allowed_errors' in the test with something like:
-        #
-        # env.pageserver.allowed_errors.append(".*could not open garage door.*")
-        #
-        # The entries in the list are regular experessions.
-        self.allowed_errors = [
-            # All tests print these, when starting up or shutting down
-            ".*wal receiver task finished with an error: walreceiver connection handling failure.*",
-            ".*Shutdown task error: walreceiver connection handling failure.*",
-            ".*Etcd client error: grpc request error: status: Unavailable.*",
-            ".*query handler for .* failed: Connection reset by peer.*",
-            ".*serving compute connection task.*exited with error: Broken pipe.*",
-            ".*Connection aborted: error communicating with the server: Broken pipe.*",
-            ".*Connection aborted: error communicating with the server: Transport endpoint is not connected.*",
-            ".*Connection aborted: error communicating with the server: Connection reset by peer.*",
-            ".*kill_and_wait_impl.*: wait successful.*",
-            ".*end streaming to Some.*",
-            # safekeeper connection can fail with this, in the window between timeline creation
-            # and streaming start
-            ".*Failed to process query for timeline .*: state uninitialized, no data to read.*",
-            # Tests related to authentication and authorization print these
-            ".*Error processing HTTP request: Forbidden",
-            # intentional failpoints
-            ".*failpoint ",
-            # FIXME: there is a race condition between GC and detach, see
-            # https://github.com/neondatabase/neon/issues/2442
-            ".*could not remove ephemeral file.*No such file or directory.*",
-            # FIXME: These need investigation
-            ".*gc_loop.*Failed to get a tenant .* Tenant .* not found in the local state.*",
-            ".*compaction_loop.*Failed to get a tenant .* Tenant .* not found in the local state.*",
-            ".*manual_gc.*is_shutdown_requested\\(\\) called in an unexpected task or thread.*",
-            ".*tenant_list: timeline is not found in remote index while it is present in the tenants registry.*",
-            ".*Removing intermediate uninit mark file.*",
-            # FIXME: known race condition in TaskHandle: https://github.com/neondatabase/neon/issues/2885
-            ".*sender is dropped while join handle is still alive.*",
-        ]
-
-    def start(
-        self,
-        overrides: Tuple[str, ...] = (),
-        extra_env_vars: Optional[Dict[str, str]] = None,
-    ) -> "NeonPageserver":
+    def start(self, overrides: Tuple[str, ...] = ()) -> "NeonPageserver":
        """
        Start the page server.
        `overrides` allows to add some config to this pageserver start.
@@ -1777,7 +1731,7 @@ class NeonPageserver(PgProtocol):
        """
        assert self.running is False

-        self.env.neon_cli.pageserver_start(overrides=overrides, extra_env_vars=extra_env_vars)
+        self.env.neon_cli.pageserver_start(overrides=overrides)
        self.running = True
        return self

@@ -1817,26 +1771,6 @@ class NeonPageserver(PgProtocol):
            is_testing_enabled_or_skip=self.is_testing_enabled_or_skip,
        )

-    def assert_no_errors(self):
-        logfile = open(os.path.join(self.env.repo_dir, "pageserver.log"), "r")
-
-        error_or_warn = re.compile("ERROR|WARN")
-        errors = []
-        while True:
-            line = logfile.readline()
-            if not line:
-                break
-
-            if error_or_warn.search(line):
-                # It's an ERROR or WARN. Is it in the allow-list?
-                for a in self.allowed_errors:
-                    if re.match(a, line):
-                        break
-                else:
-                    errors.append(line)
-
-        assert not errors
-

 def append_pageserver_param_overrides(
    params_to_update: List[str],
@@ -2080,9 +2014,9 @@ class NeonProxy(PgProtocol):
        self,
        proxy_port: int,
        http_port: int,
-        mgmt_port: int,
        neon_binpath: Path,
        auth_endpoint=None,
+        mgmt_port=None,
    ):
        super().__init__(dsn=auth_endpoint, port=proxy_port)
        self.host = "127.0.0.1"
@@ -2096,8 +2030,7 @@ class NeonProxy(PgProtocol):

    def start(self):
        """
-        Starts a proxy with option '--auth-backend postgres' and a postgres instance
-        already provided though '--auth-endpoint <postgress-instance>'."
+        Starts a proxy with option '--auth-backend postgres' and a postgres instance already provided though '--auth-endpoint <postgress-instance>'."
        """
        assert self._popen is None
        assert self.auth_endpoint is not None
@@ -2107,7 +2040,6 @@ class NeonProxy(PgProtocol):
            str(self.neon_binpath / "proxy"),
            *["--http", f"{self.host}:{self.http_port}"],
            *["--proxy", f"{self.host}:{self.proxy_port}"],
-            *["--mgmt", f"{self.host}:{self.mgmt_port}"],
            *["--auth-backend", "postgres"],
            *["--auth-endpoint", self.auth_endpoint],
        ]
@@ -2184,13 +2116,11 @@ def static_proxy(
    auth_endpoint = f"postgres://proxy:password@{host}:{port}/{dbname}"

    proxy_port = port_distributor.get_port()
-    mgmt_port = port_distributor.get_port()
    http_port = port_distributor.get_port()

    with NeonProxy(
        proxy_port=proxy_port,
        http_port=http_port,
-        mgmt_port=mgmt_port,
        neon_binpath=neon_binpath,
        auth_endpoint=auth_endpoint,
    ) as proxy:
@@ -2731,6 +2661,8 @@ def test_output_dir(request: FixtureRequest, top_output_dir: Path) -> Iterator[P

    yield test_dir

+    shutil.rmtree(test_dir)
+
    allure_attach_from_dir(test_dir)


--- a/test_runner/performance/test_seqscans.py
+++ b/test_runner/performance/test_seqscans.py
@@ -6,7 +6,6 @@ import pytest
 from fixtures.benchmark_fixture import MetricReport
 from fixtures.compare_fixtures import PgCompare
 from fixtures.log_helper import log
-from pytest_lazyfixture import lazy_fixture  # type: ignore


@pytest.mark.parametrize(
@@ -21,24 +20,11 @@ from pytest_lazyfixture import lazy_fixture  # type: ignore
        pytest.param(10000000, 1, 4),
    ],
 )
-@pytest.mark.parametrize(
-    "env, scale",
-    [
-        # Run on all envs. Use 50x larger table on remote cluster to make sure
-        # it doesn't fit in shared buffers, which are larger on remote than local.
-        pytest.param(lazy_fixture("neon_compare"), 1, id="neon"),
-        pytest.param(lazy_fixture("vanilla_compare"), 1, id="vanilla"),
-        pytest.param(
-            lazy_fixture("remote_compare"), 50, id="remote", marks=pytest.mark.remote_cluster
-        ),
-    ],
-)
-def test_seqscans(env: PgCompare, scale: int, rows: int, iters: int, workers: int):
-    rows = scale * rows
+def test_seqscans(neon_with_baseline: PgCompare, rows: int, iters: int, workers: int):
+    env = neon_with_baseline

    with closing(env.pg.connect()) as conn:
        with conn.cursor() as cur:
-            cur.execute("drop table if exists t;")
            cur.execute("create table t (i integer);")
            cur.execute(f"insert into t values (generate_series(1,{rows}));")

--- a/test_runner/regress/test_branch_and_gc.py
+++ b/test_runner/regress/test_branch_and_gc.py
@@ -116,13 +116,6 @@ def test_branch_creation_before_gc(neon_simple_env: NeonEnv):
    env = neon_simple_env
    pageserver_http_client = env.pageserver.http_client()

-    env.pageserver.allowed_errors.extend(
-        [
-            ".*invalid branch start lsn: less than latest GC cutoff.*",
-            ".*invalid branch start lsn: less than planned GC cutoff.*",
-        ]
-    )
-
    # Disable background GC but set the `pitr_interval` to be small, so GC can delete something
    tenant, _ = env.neon_cli.create_tenant(
        conf={
--- a/test_runner/regress/test_branch_behind.py
+++ b/test_runner/regress/test_branch_behind.py
@@ -13,9 +13,6 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder):
    neon_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '0 sec'}"
    env = neon_env_builder.init_start()

-    env.pageserver.allowed_errors.append(".*invalid branch start lsn.*")
-    env.pageserver.allowed_errors.append(".*invalid start lsn .* for ancestor timeline.*")
-
    # Branch at the point where only 100 rows were inserted
    env.neon_cli.create_branch("test_branch_behind")
    pgmain = env.postgres.create_start("test_branch_behind")
--- a/test_runner/regress/test_broken_timeline.py
+++ b/test_runner/regress/test_broken_timeline.py
@@ -11,17 +11,10 @@ from fixtures.types import TenantId, TimelineId
 # Test restarting page server, while safekeeper and compute node keep
 # running.
 def test_broken_timeline(neon_env_builder: NeonEnvBuilder):
+    # One safekeeper is enough for this test.
+    neon_env_builder.num_safekeepers = 3
    env = neon_env_builder.init_start()

-    env.pageserver.allowed_errors.extend(
-        [
-            ".*No timelines to attach received.*",
-            ".*Failed to process timeline dir contents.*",
-            ".*Failed to load delta layer.*",
-            ".*Timeline .* was not found.*",
-        ]
-    )
-
    tenant_timelines: List[Tuple[TenantId, TimelineId, Postgres]] = []

    for n in range(4):
@@ -79,24 +72,23 @@ def test_broken_timeline(neon_env_builder: NeonEnvBuilder):
    # First timeline would not get loaded into pageserver due to corrupt metadata file
    with pytest.raises(Exception, match=f"Timeline {tenant1}/{timeline1} was not found") as err:
        pg1.start()
-    log.info(
-        f"As expected, compute startup failed eagerly for timeline with corrupt metadata: {err}"
-    )
+    log.info(f"compute startup failed eagerly for timeline with corrupt metadata: {err}")

    # Second timeline has no ancestors, only the metadata file and no layer files
    # We don't have the remote storage enabled, which means timeline is in an incorrect state,
    # it's not loaded at all
    with pytest.raises(Exception, match=f"Timeline {tenant2}/{timeline2} was not found") as err:
        pg2.start()
-    log.info(f"As expected, compute startup failed for timeline with missing layers: {err}")
+    log.info(f"compute startup failed eagerly for timeline with corrupt metadata: {err}")

-    # Third timeline will also fail during basebackup, because the layer file is corrupt.
-    # (We don't check layer file contents on startup, when loading the timeline)
-    with pytest.raises(Exception, match="Failed to load delta layer") as err:
-        pg3.start()
-    log.info(
-        f"As expected, compute startup failed for timeline {tenant3}/{timeline3} with corrupt layers: {err}"
-    )
+    # Yet other timelines will fail when their layers will be queried during basebackup: we don't check layer file contents on startup, when loading the timeline
+    for n in range(3, 4):
+        (bad_tenant, bad_timeline, pg) = tenant_timelines[n]
+        with pytest.raises(Exception, match="extracting base backup failed") as err:
+            pg.start()
+        log.info(
+            f"compute startup failed lazily for timeline {bad_tenant}/{bad_timeline} with corrupt layers, during basebackup preparation: {err}"
+        )


 def test_create_multiple_timelines_parallel(neon_simple_env: NeonEnv):
@@ -119,13 +111,6 @@ def test_timeline_init_break_before_checkpoint(neon_simple_env: NeonEnv):
    env = neon_simple_env
    pageserver_http = env.pageserver.http_client()

-    env.pageserver.allowed_errors.extend(
-        [
-            ".*Failed to process timeline dir contents.*Timeline has no ancestor and no layer files.*",
-            ".*Timeline got dropped without initializing, cleaning its files.*",
-        ]
-    )
-
    tenant_id, _ = env.neon_cli.create_tenant()

    timelines_dir = env.repo_dir / "tenants" / str(tenant_id) / "timelines"
--- a/test_runner/regress/test_compatibility.py
+++ b/test_runner/regress/test_compatibility.py
@@ -2,7 +2,7 @@ import os
 import shutil
 import subprocess
 from pathlib import Path
-from typing import Any, Optional
+from typing import Any

 import pytest
 import toml  # TODO: replace with tomllib for Python >= 3.11
@@ -50,12 +50,6 @@ def test_create_snapshot(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin, test_o

    env = neon_env_builder.init_start()
    pg = env.postgres.create_start("main")
-
-    # FIXME: Is this expected?
-    env.pageserver.allowed_errors.append(
-        ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
-    )
-
    pg_bin.run(["pgbench", "--initialize", "--scale=10", pg.connstr()])
    pg_bin.run(["pgbench", "--time=60", "--progress=2", pg.connstr()])
    pg_bin.run(["pg_dumpall", f"--dbname={pg.connstr()}", f"--file={test_output_dir / 'dump.sql'}"])
@@ -160,7 +154,6 @@ def test_forward_compatibility(
        from_dir=compatibility_snapshot_dir,
        to_dir=test_output_dir / "compatibility_snapshot",
        port_distributor=port_distributor,
-        pg_distrib_dir=compatibility_postgres_distrib_dir,
    )

    breaking_changes_allowed = (
@@ -190,12 +183,7 @@ def test_forward_compatibility(
    ), "Breaking changes are allowed by ALLOW_FORWARD_COMPATIBILITY_BREAKAGE, but the test has passed without any breakage"


-def prepare_snapshot(
-    from_dir: Path,
-    to_dir: Path,
-    port_distributor: PortDistributor,
-    pg_distrib_dir: Optional[Path] = None,
-):
+def prepare_snapshot(from_dir: Path, to_dir: Path, port_distributor: PortDistributor):
    assert from_dir.exists(), f"Snapshot '{from_dir}' doesn't exist"
    assert (from_dir / "repo").exists(), f"Snapshot '{from_dir}' doesn't contain a repo directory"
    assert (from_dir / "dump.sql").exists(), f"Snapshot '{from_dir}' doesn't contain a dump.sql"
@@ -220,7 +208,7 @@ def prepare_snapshot(
    # Update paths and ports in config files
    pageserver_toml = repo_dir / "pageserver.toml"
    pageserver_config = toml.load(pageserver_toml)
-    pageserver_config["remote_storage"]["local_path"] = str(repo_dir / "local_fs_remote_storage")
+    pageserver_config["remote_storage"]["local_path"] = repo_dir / "local_fs_remote_storage"
    pageserver_config["listen_http_addr"] = port_distributor.replace_with_new_port(
        pageserver_config["listen_http_addr"]
    )
@@ -231,9 +219,6 @@ def prepare_snapshot(
        port_distributor.replace_with_new_port(ep) for ep in pageserver_config["broker_endpoints"]
    ]

-    if pg_distrib_dir:
-        pageserver_config["pg_distrib_dir"] = str(pg_distrib_dir)
-
    with pageserver_toml.open("w") as f:
        toml.dump(pageserver_config, f)

@@ -253,10 +238,7 @@ def prepare_snapshot(
        sk["http_port"] = port_distributor.replace_with_new_port(sk["http_port"])
        sk["pg_port"] = port_distributor.replace_with_new_port(sk["pg_port"])

-    if pg_distrib_dir:
-        snapshot_config["pg_distrib_dir"] = str(pg_distrib_dir)
-
-    with snapshot_config_toml.open("w") as f:
+    with (snapshot_config_toml).open("w") as f:
        toml.dump(snapshot_config, f)

    # Ensure that snapshot doesn't contain references to the original path
--- a/test_runner/regress/test_compute_ctl.py
+++ b/test_runner/regress/test_compute_ctl.py
@@ -179,16 +179,7 @@ def test_sync_safekeepers_logs(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
    # run compute_ctl and wait for 10s
    try:
        ctl.raw_cli(
-            [
-                "--connstr",
-                "postgres://invalid/",
-                "--pgdata",
-                pgdata,
-                "--spec",
-                spec,
-                "--pgbin",
-                pg_bin_path,
-            ],
+            ["--connstr", ps_connstr, "--pgdata", pgdata, "--spec", spec, "--pgbin", pg_bin_path],
            timeout=10,
        )
    except TimeoutExpired as exc:
--- a/test_runner/regress/test_gc_cutoff.py
+++ b/test_runner/regress/test_gc_cutoff.py
@@ -1,4 +1,3 @@
-import pytest
 from fixtures.neon_fixtures import NeonEnvBuilder, PgBin


@@ -8,14 +7,8 @@ from fixtures.neon_fixtures import NeonEnvBuilder, PgBin
 # normally restarts after it. Also, there should be GC ERRORs in the log,
 # but the fixture checks the log for any unexpected ERRORs after every
 # test anyway, so it doesn't need any special attention here.
-@pytest.mark.timeout(600)
 def test_gc_cutoff(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
    env = neon_env_builder.init_start()
-
-    # These warnings are expected, when the pageserver is restarted abruptly
-    env.pageserver.allowed_errors.append(".*found future image layer.*")
-    env.pageserver.allowed_errors.append(".*found future delta layer.*")
-
    pageserver_http = env.pageserver.http_client()

    # Use aggressive GC and checkpoint settings, so that we also exercise GC during the test
@@ -37,9 +30,10 @@ def test_gc_cutoff(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):

    pageserver_http.configure_failpoints(("after-timeline-gc-removed-layers", "exit"))

-    for _ in range(5):
-        with pytest.raises(Exception):
-            pg_bin.run_capture(["pgbench", "-P1", "-N", "-c5", "-T500", "-Mprepared", connstr])
-        env.pageserver.stop()
-        env.pageserver.start()
-        pageserver_http.configure_failpoints(("after-timeline-gc-removed-layers", "exit"))
+    for i in range(5):
+        try:
+            pg_bin.run_capture(["pgbench", "-N", "-c5", "-T100", "-Mprepared", connstr])
+        except Exception:
+            env.pageserver.stop()
+            env.pageserver.start()
+            pageserver_http.configure_failpoints(("after-timeline-gc-removed-layers", "exit"))
--- a/test_runner/regress/test_import.py
+++ b/test_runner/regress/test_import.py
@@ -76,26 +76,6 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build
    env = neon_env_builder.init_start()
    env.pageserver.http_client().tenant_create(tenant)

-    env.pageserver.allowed_errors.extend(
-        [
-            ".*error importing base backup .*",
-            ".*Timeline got dropped without initializing, cleaning its files.*",
-            ".*Removing intermediate uninit mark file.*",
-            ".*InternalServerError.*timeline not found.*",
-            ".*InternalServerError.*Tenant .* not found.*",
-            ".*InternalServerError.*Timeline .* not found.*",
-            ".*InternalServerError.*Cannot delete timeline which has child timelines.*",
-        ]
-    )
-
-    # FIXME: we should clean up pageserver to not print this
-    env.pageserver.allowed_errors.append(".*exited with error: unexpected message type: CopyData.*")
-
-    # FIXME: Is this expected?
-    env.pageserver.allowed_errors.append(
-        ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
-    )
-
    def import_tar(base, wal):
        env.neon_cli.raw_cli(
            [
@@ -142,11 +122,6 @@ def test_import_from_pageserver_small(pg_bin: PgBin, neon_env_builder: NeonEnvBu
    neon_env_builder.enable_local_fs_remote_storage()
    env = neon_env_builder.init_start()

-    # FIXME: Is this expected?
-    env.pageserver.allowed_errors.append(
-        ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
-    )
-
    timeline = env.neon_cli.create_branch("test_import_from_pageserver_small")
    pg = env.postgres.create_start("test_import_from_pageserver_small")

--- a/test_runner/regress/test_pageserver_restart.py
+++ b/test_runner/regress/test_pageserver_restart.py
@@ -67,10 +67,6 @@ def test_pageserver_restart(neon_env_builder: NeonEnvBuilder):
 def test_pageserver_chaos(neon_env_builder: NeonEnvBuilder):
    env = neon_env_builder.init_start()

-    # These warnings are expected, when the pageserver is restarted abruptly
-    env.pageserver.allowed_errors.append(".*found future image layer.*")
-    env.pageserver.allowed_errors.append(".*found future delta layer.*")
-
    # Use a tiny checkpoint distance, to create a lot of layers quickly.
    # That allows us to stress the compaction and layer flushing logic more.
    tenant, _ = env.neon_cli.create_tenant(
--- a/test_runner/regress/test_proxy.py
+++ b/test_runner/regress/test_proxy.py
@@ -1,4 +1,5 @@
 import json
+import subprocess
 from urllib.parse import urlparse

 import psycopg2
@@ -7,11 +8,11 @@ from fixtures.log_helper import log
 from fixtures.neon_fixtures import PSQL, NeonProxy, VanillaPostgres


-def test_proxy_select_1(static_proxy: NeonProxy):
+def test_proxy_select_1(static_proxy):
    static_proxy.safe_psql("select 1", options="project=generic-project-name")


-def test_password_hack(static_proxy: NeonProxy):
+def test_password_hack(static_proxy):
    user = "borat"
    password = "password"
    static_proxy.safe_psql(
@@ -23,75 +24,118 @@ def test_password_hack(static_proxy: NeonProxy):
    static_proxy.safe_psql("select 1", sslsni=0, user=user, password=magic)

    # Must also check that invalid magic won't be accepted.
-    with pytest.raises(psycopg2.OperationalError):
+    with pytest.raises(psycopg2.errors.OperationalError):
        magic = "broken"
        static_proxy.safe_psql("select 1", sslsni=0, user=user, password=magic)


-def get_session_id(uri_prefix, uri_line):
+def get_session_id_from_uri_line(uri_prefix, uri_line):
    assert uri_prefix in uri_line

    url_parts = urlparse(uri_line)
    psql_session_id = url_parts.path[1:]
-    assert psql_session_id.isalnum(), "session_id should only contain alphanumeric chars"
+    assert psql_session_id.isalnum(), "session_id should only contain alphanumeric chars."
+    link_auth_uri_prefix = uri_line[: -len(url_parts.path)]
+    # invariant: the prefix must match the uri_prefix.
+    assert (
+        link_auth_uri_prefix == uri_prefix
+    ), f"Line='{uri_line}' should contain a http auth link of form '{uri_prefix}/<psql_session_id>'."
+    # invariant: the entire link_auth_uri should be on its own line, module spaces.
+    assert " ".join(uri_line.split(" ")) == f"{uri_prefix}/{psql_session_id}"

    return psql_session_id


-async def find_auth_link(link_auth_uri_prefix, proc):
-    for _ in range(100):
-        line = (await proc.stderr.readline()).decode("utf-8").strip()
-        log.info(f"psql line: {line}")
-        if link_auth_uri_prefix in line:
-            log.info(f"SUCCESS, found auth url: {line}")
-            return line
-
-
-async def activate_link_auth(local_vanilla_pg, link_proxy, psql_session_id):
+def create_and_send_db_info(local_vanilla_pg, psql_session_id, mgmt_port):
    pg_user = "proxy"
+    pg_password = "password"

-    log.info("creating a new user for link auth test")
    local_vanilla_pg.start()
-    local_vanilla_pg.safe_psql(f"create user {pg_user} with login superuser")
+    query = f"create user {pg_user} with login superuser password '{pg_password}'"
+    local_vanilla_pg.safe_psql(query)

-    db_info = json.dumps(
-        {
-            "session_id": psql_session_id,
-            "result": {
-                "Success": {
-                    "host": local_vanilla_pg.default_options["host"],
-                    "port": local_vanilla_pg.default_options["port"],
-                    "dbname": local_vanilla_pg.default_options["dbname"],
-                    "user": pg_user,
-                    "project": "irrelevant",
-                }
-            },
-        }
-    )
+    port = local_vanilla_pg.default_options["port"]
+    host = local_vanilla_pg.default_options["host"]
+    dbname = local_vanilla_pg.default_options["dbname"]

-    log.info("sending session activation message")
-    psql = await PSQL(host=link_proxy.host, port=link_proxy.mgmt_port).run(db_info)
-    out = (await psql.stdout.read()).decode("utf-8").strip()
-    assert out == "ok"
+    db_info_dict = {
+        "session_id": psql_session_id,
+        "result": {
+            "Success": {
+                "host": host,
+                "port": port,
+                "dbname": dbname,
+                "user": pg_user,
+                "password": pg_password,
+            }
+        },
+    }
+    db_info_str = json.dumps(db_info_dict)
+    cmd_args = [
+        "psql",
+        "-h",
+        "127.0.0.1",  # localhost
+        "-p",
+        f"{mgmt_port}",
+        "-c",
+        db_info_str,
+    ]
+
+    log.info(f"Sending to proxy the user and db info: {' '.join(cmd_args)}")
+    p = subprocess.Popen(cmd_args, stdout=subprocess.PIPE)
+    out, err = p.communicate()
+    assert "ok" in str(out)
+
+
+async def get_uri_line_from_process_welcome_notice(link_auth_uri_prefix, proc):
+    """
+    Returns the line from the welcome notice from proc containing link_auth_uri_prefix.
+    :param link_auth_uri_prefix: the uri prefix used to indicate the line of interest
+    :param proc: the process to read the welcome message from.
+    :return: a line containing the full link authentication uri.
+    """
+    max_num_lines_of_welcome_message = 15
+    for attempt in range(max_num_lines_of_welcome_message):
+        raw_line = await proc.stderr.readline()
+        line = raw_line.decode("utf-8").strip()
+        if link_auth_uri_prefix in line:
+            return line
+    assert False, f"did not find line containing '{link_auth_uri_prefix}'"


@pytest.mark.asyncio
 async def test_psql_session_id(vanilla_pg: VanillaPostgres, link_proxy: NeonProxy):
-    psql = await PSQL(host=link_proxy.host, port=link_proxy.proxy_port).run("select 42")
+    """
+    Test copied and modified from: test_project_psql_link_auth test from cloud/tests_e2e/tests/test_project.py
+     Step 1. establish connection to the proxy
+     Step 2. retrieve session_id:
+        Step 2.1: read welcome message
+        Step 2.2: parse session_id
+     Step 3. create a vanilla_pg and send user and db info via command line (using Popen) a psql query via mgmt port to proxy.
+     Step 4. assert that select 1 has been executed correctly.
+    """
+
+    psql = PSQL(
+        host=link_proxy.host,
+        port=link_proxy.proxy_port,
+    )
+    proc = await psql.run("select 42")

    uri_prefix = link_proxy.link_auth_uri_prefix
-    link = await find_auth_link(uri_prefix, psql)
+    line_str = await get_uri_line_from_process_welcome_notice(uri_prefix, proc)

-    psql_session_id = get_session_id(uri_prefix, link)
-    await activate_link_auth(vanilla_pg, link_proxy, psql_session_id)
+    psql_session_id = get_session_id_from_uri_line(uri_prefix, line_str)
+    log.info(f"Parsed psql_session_id='{psql_session_id}' from Neon welcome message.")

-    assert psql.stdout is not None
-    out = (await psql.stdout.read()).decode("utf-8").strip()
+    create_and_send_db_info(vanilla_pg, psql_session_id, link_proxy.mgmt_port)
+
+    assert proc.stdout is not None
+    out = (await proc.stdout.read()).decode("utf-8").strip()
    assert out == "42"


 # Pass extra options to the server.
-def test_proxy_options(static_proxy: NeonProxy):
+def test_proxy_options(static_proxy):
    with static_proxy.connect(options="project=irrelevant -cproxytest.option=value") as conn:
        with conn.cursor() as cur:
            cur.execute("SHOW proxytest.option")
--- a/test_runner/regress/test_read_validation.py
+++ b/test_runner/regress/test_read_validation.py
@@ -143,8 +143,6 @@ def test_read_validation_neg(neon_simple_env: NeonEnv):
    env = neon_simple_env
    env.neon_cli.create_branch("test_read_validation_neg", "empty")

-    env.pageserver.allowed_errors.append(".*invalid LSN\\(0\\) in request.*")
-
    pg = env.postgres.create_start("test_read_validation_neg")
    log.info("postgres is running on 'test_read_validation_neg' branch")

--- a/test_runner/regress/test_readonly_node.py
+++ b/test_runner/regress/test_readonly_node.py
@@ -17,8 +17,6 @@ def test_readonly_node(neon_simple_env: NeonEnv):
    pgmain = env.postgres.create_start("test_readonly_node")
    log.info("postgres is running on 'test_readonly_node' branch")

-    env.pageserver.allowed_errors.append(".*basebackup .* failed: invalid basebackup lsn.*")
-
    main_pg_conn = pgmain.connect()
    main_cur = main_pg_conn.cursor()

--- a/test_runner/regress/test_recovery.py
+++ b/test_runner/regress/test_recovery.py
@@ -17,10 +17,6 @@ def test_pageserver_recovery(neon_env_builder: NeonEnvBuilder):

    neon_env_builder.start()

-    # These warnings are expected, when the pageserver is restarted abruptly
-    env.pageserver.allowed_errors.append(".*found future delta layer.*")
-    env.pageserver.allowed_errors.append(".*found future image layer.*")
-
    # Create a branch for us
    env.neon_cli.create_branch("test_pageserver_recovery", "main")

--- a/test_runner/regress/test_remote_storage.py
+++ b/test_runner/regress/test_remote_storage.py
@@ -56,17 +56,6 @@ def test_remote_storage_backup_and_restore(

    ##### First start, insert secret data and upload it to the remote storage
    env = neon_env_builder.init_start()
-
-    # FIXME: Is this expected?
-    env.pageserver.allowed_errors.append(
-        ".*marking .* as locally complete, while it doesnt exist in remote index.*"
-    )
-    env.pageserver.allowed_errors.append(".*No timelines to attach received.*")
-
-    env.pageserver.allowed_errors.append(".*Tenant download is already in progress.*")
-    env.pageserver.allowed_errors.append(".*Failed to get local tenant state.*")
-    env.pageserver.allowed_errors.append(".*No metadata file found in the timeline directory.*")
-
    pageserver_http = env.pageserver.http_client()
    pg = env.postgres.create_start("main")

--- a/test_runner/regress/test_tenant_detach.py
+++ b/test_runner/regress/test_tenant_detach.py
@@ -1,4 +1,3 @@
-import time
 from threading import Thread

 import pytest
@@ -12,30 +11,15 @@ def do_gc_target(
 ):
    """Hack to unblock main, see https://github.com/neondatabase/neon/issues/2211"""
    try:
-        log.info("sending gc http request")
        pageserver_http.timeline_gc(tenant_id, timeline_id, 0)
    except Exception as e:
        log.error("do_gc failed: %s", e)
-    finally:
-        log.info("gc http thread returning")


-@pytest.mark.skip(
-    reason="""
-Commit 'make test_tenant_detach_smoke fail reproducibly' adds failpoint to make this test fail reproducibly.
-Fix in https://github.com/neondatabase/neon/pull/2851 will come as part of
-https://github.com/neondatabase/neon/pull/2785 .
-"""
-)
 def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder):
    env = neon_env_builder.init_start()
    pageserver_http = env.pageserver.http_client()

-    env.pageserver.allowed_errors.append(".*NotFound\\(Tenant .* not found in the local state")
-    # FIXME: we have a race condition between GC and detach. GC might fail with this
-    # error. Similar to https://github.com/neondatabase/neon/issues/2671
-    env.pageserver.allowed_errors.append(".*InternalServerError\\(No such file or directory.*")
-
    # first check for non existing tenant
    tenant_id = TenantId.generate()
    with pytest.raises(
@@ -44,9 +28,6 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder):
    ):
        pageserver_http.tenant_detach(tenant_id)

-    # the error will be printed to the log too
-    env.pageserver.allowed_errors.append(".*Tenant not found for id.*")
-
    # create new nenant
    tenant_id, timeline_id = env.neon_cli.create_tenant()

@@ -62,34 +43,32 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder):
        ]
    )

-    # gc should not try to even start on a timeline that doesn't exist
+    # gc should not try to even start
    with pytest.raises(
        expected_exception=PageserverApiException, match="gc target timeline does not exist"
    ):
        bogus_timeline_id = TimelineId.generate()
        pageserver_http.timeline_gc(tenant_id, bogus_timeline_id, 0)

-        # the error will be printed to the log too
-    env.pageserver.allowed_errors.append(".*gc target timeline does not exist.*")
-
-    # Detach while running manual GC.
-    # It should wait for manual GC to finish (right now it doesn't that's why this test fails sometimes)
-    pageserver_http.configure_failpoints(
-        ("gc_iteration_internal_after_getting_gc_timelines", "return(2000)")
-    )
+    # try to concurrently run gc and detach
    gc_thread = Thread(target=lambda: do_gc_target(pageserver_http, tenant_id, timeline_id))
    gc_thread.start()
-    time.sleep(1)
-    # By now the gc task is spawned but in sleep for another second due to the failpoint.

-    log.info("detaching tenant")
-    pageserver_http.tenant_detach(tenant_id)
-    log.info("tenant detached without error")
+    last_error = None
+    for i in range(3):
+        try:
+            pageserver_http.tenant_detach(tenant_id)
+        except Exception as e:
+            last_error = e
+            log.error(f"try {i} error detaching tenant: {e}")
+            continue
+        else:
+            break
+    # else is called if the loop finished without reaching "break"
+    else:
+        pytest.fail(f"could not detach tenant: {last_error}")

-    log.info("wait for gc thread to return")
    gc_thread.join(timeout=10)
-    assert not gc_thread.is_alive()
-    log.info("gc thread returned")

    # check that nothing is left on disk for deleted tenant
    assert not (env.repo_dir / "tenants" / str(tenant_id)).exists()
--- a/test_runner/regress/test_tenant_relocation.py
+++ b/test_runner/regress/test_tenant_relocation.py
@@ -259,11 +259,6 @@ def test_tenant_relocation(

    env = neon_env_builder.init_start()

-    # FIXME: Is this expected?
-    env.pageserver.allowed_errors.append(
-        ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
-    )
-
    # create folder for remote storage mock
    remote_storage_mock_path = env.repo_dir / "local_fs_remote_storage"

--- a/test_runner/regress/test_tenant_size.py
+++ b/test_runner/regress/test_tenant_size.py
@@ -166,10 +166,6 @@ def test_get_tenant_size_with_multiple_branches(neon_env_builder: NeonEnvBuilder

    env = neon_env_builder.init_start()

-    # FIXME: we have a race condition between GC and delete timeline. GC might fail with this
-    # error. Similar to https://github.com/neondatabase/neon/issues/2671
-    env.pageserver.allowed_errors.append(".*InternalServerError\\(No such file or directory.*")
-
    tenant_id = env.initial_tenant
    main_branch_name, main_timeline_id = env.neon_cli.list_timelines(tenant_id)[0]

@@ -192,8 +188,10 @@ def test_get_tenant_size_with_multiple_branches(neon_env_builder: NeonEnvBuilder
        "first-branch", main_branch_name, tenant_id
    )

+    # unsure why this happens, the size difference is more than a page alignment
    size_after_first_branch = http_client.tenant_size(tenant_id)
-    assert size_after_first_branch == size_at_branch
+    assert size_after_first_branch > size_at_branch
+    assert size_after_first_branch - size_at_branch == gc_horizon

    first_branch_pg = env.postgres.create_start("first-branch", tenant_id=tenant_id)

@@ -219,7 +217,7 @@ def test_get_tenant_size_with_multiple_branches(neon_env_builder: NeonEnvBuilder
        "second-branch", main_branch_name, tenant_id
    )
    size_after_second_branch = http_client.tenant_size(tenant_id)
-    assert size_after_second_branch == size_after_continuing_on_main
+    assert size_after_second_branch > size_after_continuing_on_main

    second_branch_pg = env.postgres.create_start("second-branch", tenant_id=tenant_id)

@@ -265,8 +263,6 @@ def test_get_tenant_size_with_multiple_branches(neon_env_builder: NeonEnvBuilder
        except PageserverApiException as e:
            # compaction is ok but just retry if this fails; related to #2442
            if "cannot lock compaction critical section" in str(e):
-                # also ignore it in the log
-                env.pageserver.allowed_errors.append(".*cannot lock compaction critical section.*")
                time.sleep(1)
                continue
            raise
--- a/test_runner/regress/test_tenants.py
+++ b/test_runner/regress/test_tenants.py
@@ -25,13 +25,6 @@ def test_tenant_creation_fails(neon_simple_env: NeonEnv):
    )
    initial_tenant_dirs = [d for d in tenants_dir.iterdir()]

-    neon_simple_env.pageserver.allowed_errors.extend(
-        [
-            ".*Failed to create directory structure for tenant .*, cleaning tmp data.*",
-            ".*Failed to fsync removed temporary tenant directory .*",
-        ]
-    )
-
    pageserver_http = neon_simple_env.pageserver.http_client()
    pageserver_http.configure_failpoints(("tenant-creation-before-tmp-rename", "return"))
    with pytest.raises(Exception, match="tenant-creation-before-tmp-rename"):
@@ -213,13 +206,6 @@ def test_pageserver_with_empty_tenants(
    )

    env = neon_env_builder.init_start()
-
-    env.pageserver.allowed_errors.append(
-        ".*marking .* as locally complete, while it doesnt exist in remote index.*"
-    )
-    env.pageserver.allowed_errors.append(".*Tenant .* has no timelines directory.*")
-    env.pageserver.allowed_errors.append(".*No timelines to attach received.*")
-
    client = env.pageserver.http_client()

    tenant_without_timelines_dir = env.initial_tenant
--- a/test_runner/regress/test_tenants_with_remote_storage.py
+++ b/test_runner/regress/test_tenants_with_remote_storage.py
@@ -66,11 +66,6 @@ def test_tenants_many(neon_env_builder: NeonEnvBuilder, remote_storage_kind: Rem

    env = neon_env_builder.init_start()

-    # FIXME: Is this expected?
-    env.pageserver.allowed_errors.append(
-        ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
-    )
-
    tenants_pgs: List[Tuple[TenantId, Postgres]] = []

    for _ in range(1, 5):
@@ -122,13 +117,6 @@ def test_tenants_attached_after_download(

    ##### First start, insert secret data and upload it to the remote storage
    env = neon_env_builder.init_start()
-
-    # FIXME: Are these expected?
-    env.pageserver.allowed_errors.append(".*No timelines to attach received.*")
-    env.pageserver.allowed_errors.append(
-        ".*marking .* as locally complete, while it doesnt exist in remote index.*"
-    )
-
    pageserver_http = env.pageserver.http_client()
    pg = env.postgres.create_start("main")

@@ -221,16 +209,6 @@ def test_tenant_upgrades_index_json_from_v0(
    # launch pageserver, populate the default tenants timeline, wait for it to be uploaded,
    # then go ahead and modify the "remote" version as if it was downgraded, needing upgrade
    env = neon_env_builder.init_start()
-
-    # FIXME: Are these expected?
-    env.pageserver.allowed_errors.append(
-        ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
-    )
-    env.pageserver.allowed_errors.append(".*No timelines to attach received.*")
-    env.pageserver.allowed_errors.append(
-        ".*Failed to get local tenant state: Tenant .* not found in the local state.*"
-    )
-
    pageserver_http = env.pageserver.http_client()
    pg = env.postgres.create_start("main")

@@ -337,20 +315,6 @@ def test_tenant_redownloads_truncated_file_on_startup(
    )

    env = neon_env_builder.init_start()
-
-    env.pageserver.allowed_errors.append(
-        ".*Redownloading locally existing .* due to size mismatch.*"
-    )
-    env.pageserver.allowed_errors.append(
-        ".*Downloaded layer exists already but layer file metadata mismatches.*"
-    )
-
-    # FIXME: Are these expected?
-    env.pageserver.allowed_errors.append(
-        ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
-    )
-    env.pageserver.allowed_errors.append(".*No timelines to attach received.*")
-
    pageserver_http = env.pageserver.http_client()
    pg = env.postgres.create_start("main")

--- a/test_runner/regress/test_timeline_delete.py
+++ b/test_runner/regress/test_timeline_delete.py
@@ -7,11 +7,6 @@ from fixtures.utils import wait_until
 def test_timeline_delete(neon_simple_env: NeonEnv):
    env = neon_simple_env

-    env.pageserver.allowed_errors.append(".*Timeline .* was not found.*")
-    env.pageserver.allowed_errors.append(".*timeline not found.*")
-    env.pageserver.allowed_errors.append(".*Cannot delete timeline which has child timelines.*")
-    env.pageserver.allowed_errors.append(".*Tenant .* not found in the local state.*")
-
    ps_http = env.pageserver.http_client()

    # first try to delete non existing timeline
--- a/test_runner/regress/test_wal_acceptor.py
+++ b/test_runner/regress/test_wal_acceptor.py
@@ -263,12 +263,6 @@ def test_broker(neon_env_builder: NeonEnvBuilder):
    env = neon_env_builder.init_start()

    env.neon_cli.create_branch("test_broker", "main")
-
-    # FIXME: Is this expected?
-    env.pageserver.allowed_errors.append(
-        ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
-    )
-
    pg = env.postgres.create_start("test_broker")
    pg.safe_psql("CREATE TABLE t(key int primary key, value text)")

@@ -312,11 +306,6 @@ def test_wal_removal(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
    neon_env_builder.auth_enabled = auth_enabled
    env = neon_env_builder.init_start()

-    # FIXME: Is this expected?
-    env.pageserver.allowed_errors.append(
-        ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
-    )
-
    env.neon_cli.create_branch("test_safekeepers_wal_removal")
    pg = env.postgres.create_start("test_safekeepers_wal_removal")

@@ -549,7 +538,6 @@ def test_s3_wal_replay(neon_env_builder: NeonEnvBuilder, remote_storage_kind: Re
    )

    pg.stop_and_destroy()
-    ps_cli.timeline_delete(tenant_id, timeline_id)

    # Also delete and manually create timeline on safekeepers -- this tests
    # scenario of manual recovery on different set of safekeepers.
@@ -574,6 +562,7 @@ def test_s3_wal_replay(neon_env_builder: NeonEnvBuilder, remote_storage_kind: Re
        shutil.copy(f_partial_saved, f_partial_path)

    # recreate timeline on pageserver from scratch
+    ps_cli.timeline_delete(tenant_id, timeline_id)
    ps_cli.timeline_create(tenant_id, timeline_id)

    wait_lsn_timeout = 60 * 3
@@ -1092,14 +1081,6 @@ def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
    neon_env_builder.auth_enabled = auth_enabled
    env = neon_env_builder.init_start()

-    # FIXME: are these expected?
-    env.pageserver.allowed_errors.extend(
-        [
-            ".*Failed to process query for timeline .*: Timeline .* was not found in global map.*",
-            ".*end streaming to Some.*",
-        ]
-    )
-
    # Create two tenants: one will be deleted, other should be preserved.
    tenant_id = env.initial_tenant
    timeline_id_1 = env.neon_cli.create_branch("br1")  # Active, delete explicitly
--- a/test_runner/regress/test_walredo_not_left_behind_on_detach.py
+++ b/test_runner/regress/test_walredo_not_left_behind_on_detach.py
@@ -22,8 +22,6 @@ def assert_child_processes(pageserver_pid, wal_redo_present=False, defunct_prese
 # as a zombie process.
 def test_walredo_not_left_behind_on_detach(neon_env_builder: NeonEnvBuilder):
    env = neon_env_builder.init_start()
-    # We intentionally test for a non-existent tenant.
-    env.pageserver.allowed_errors.append(".*Tenant not found.*")
    pageserver_http = env.pageserver.http_client()

    pagserver_pid = int((env.repo_dir / "pageserver.pid").read_text())
--- a/vendor/postgres-v14
+++ b/vendor/postgres-v14
--- a/vendor/postgres-v15
+++ b/vendor/postgres-v15
Author	SHA1	Message	Date
Bojan Serafimov	612f1f2dba	wip	2022-11-15 23:08:22 -05:00
Bojan Serafimov	accbf4a313	Merge branch 'main' into perf-summary	2022-11-15 13:10:20 -05:00
Bojan Serafimov	ea0207c4b7	WIP add perf test summary	2022-02-24 12:46:53 -05:00