diff --git a/.github/actions/run-python-test-set/action.yml b/.github/actions/run-python-test-set/action.yml
index 4493985587..dec1f47e47 100644
--- a/.github/actions/run-python-test-set/action.yml
+++ b/.github/actions/run-python-test-set/action.yml
@@ -36,14 +36,6 @@ inputs:
     description: 'Region name for real s3 tests'
     required: false
     default: ''
-  real_s3_access_key_id:
-    description: 'Access key id'
-    required: false
-    default: ''
-  real_s3_secret_access_key:
-    description: 'Secret access key'
-    required: false
-    default: ''
   rerun_flaky:
     description: 'Whether to rerun flaky tests'
     required: false
@@ -104,8 +96,6 @@ runs:
         COMPATIBILITY_POSTGRES_DISTRIB_DIR: /tmp/neon-previous/pg_install
         TEST_OUTPUT: /tmp/test_output
         BUILD_TYPE: ${{ inputs.build_type }}
-        AWS_ACCESS_KEY_ID: ${{ inputs.real_s3_access_key_id }}
-        AWS_SECRET_ACCESS_KEY: ${{ inputs.real_s3_secret_access_key }}
         COMPATIBILITY_SNAPSHOT_DIR: /tmp/compatibility_snapshot_pg${{ inputs.pg_version }}
         ALLOW_BACKWARD_COMPATIBILITY_BREAKAGE: contains(github.event.pull_request.labels.*.name, 'backward compatibility breakage')
         ALLOW_FORWARD_COMPATIBILITY_BREAKAGE: contains(github.event.pull_request.labels.*.name, 'forward compatibility breakage')
diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 845a21ad0e..b732095f8f 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -346,10 +346,8 @@ jobs:
           test_selection: regress
           needs_postgres_source: true
           run_with_real_s3: true
-          real_s3_bucket: ci-tests-s3
-          real_s3_region: us-west-2
-          real_s3_access_key_id: "${{ secrets.AWS_ACCESS_KEY_ID_CI_TESTS_S3 }}"
-          real_s3_secret_access_key: "${{ secrets.AWS_SECRET_ACCESS_KEY_CI_TESTS_S3 }}"
+          real_s3_bucket: neon-github-ci-tests
+          real_s3_region: eu-central-1
           rerun_flaky: true
           pg_version: ${{ matrix.pg_version }}
         env:
@@ -409,9 +407,7 @@ jobs:
         uses: ./.github/actions/allure-report-generate
 
       - uses: actions/github-script@v6
-        if: >
-          !cancelled() &&
-          github.event_name == 'pull_request'
+        if: ${{ !cancelled() }}
         with:
           # Retry script for 5XX server errors: https://github.com/actions/github-script#retries
           retries: 5
@@ -421,7 +417,7 @@ jobs:
               reportJsonUrl: "${{ steps.create-allure-report.outputs.report-json-url }}",
             }
 
-            const script = require("./scripts/pr-comment-test-report.js")
+            const script = require("./scripts/comment-test-report.js")
             await script({
               github,
               context,
@@ -496,19 +492,24 @@ jobs:
         env:
           COMMIT_URL: ${{ github.server_url }}/${{ github.repository }}/commit/${{ github.event.pull_request.head.sha || github.sha }}
         run: |
-          scripts/coverage \
-            --dir=/tmp/coverage report \
+          scripts/coverage --dir=/tmp/coverage \
+            report \
             --input-objects=/tmp/coverage/binaries.list \
             --commit-url=${COMMIT_URL} \
             --format=github
 
+          scripts/coverage --dir=/tmp/coverage \
+            report \
+            --input-objects=/tmp/coverage/binaries.list \
+            --format=lcov
+
       - name: Upload coverage report
         id: upload-coverage-report
         env:
           BUCKET: neon-github-public-dev
           COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
         run: |
-          aws s3 cp --only-show-errors --recursive /tmp/coverage/report s3://neon-github-public-dev/code-coverage/${COMMIT_SHA}
+          aws s3 cp --only-show-errors --recursive /tmp/coverage/report s3://${BUCKET}/code-coverage/${COMMIT_SHA}
 
           REPORT_URL=https://${BUCKET}.s3.amazonaws.com/code-coverage/${COMMIT_SHA}/index.html
           echo "report-url=${REPORT_URL}" >> $GITHUB_OUTPUT
@@ -663,6 +664,9 @@ jobs:
           project: nrdv0s4kcs
           push: true
           tags: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:depot-${{needs.tag.outputs.build-tag}}
+          build-args: |
+            GIT_VERSION=${{ github.sha }}
+            REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
 
   compute-tools-image:
     runs-on: [ self-hosted, gen3, large ]
@@ -777,7 +781,7 @@ jobs:
       run:
         shell: sh -eu {0}
     env:
-      VM_BUILDER_VERSION: v0.7.3-alpha3
+      VM_BUILDER_VERSION: v0.8.0
 
     steps:
       - name: Checkout
@@ -798,7 +802,7 @@ jobs:
 
       - name: Build vm image
         run: |
-          ./vm-builder -src=369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}} -dst=369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}
+          ./vm-builder -enable-file-cache -src=369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}} -dst=369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}
 
       - name: Pushing vm-compute-node image
         run: |
diff --git a/Cargo.lock b/Cargo.lock
index 2223453a08..d390df94e0 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -17,17 +17,6 @@ version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
 
-[[package]]
-name = "ahash"
-version = "0.7.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47"
-dependencies = [
- "getrandom",
- "once_cell",
- "version_check",
-]
-
 [[package]]
 name = "ahash"
 version = "0.8.3"
@@ -41,9 +30,9 @@ dependencies = [
 
 [[package]]
 name = "aho-corasick"
-version = "0.7.20"
+version = "1.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac"
+checksum = "67fc08ce920c31afb70f013dcce1bfc3a3195de6a228474e45e1f145b36f8d04"
 dependencies = [
  "memchr",
 ]
@@ -65,9 +54,9 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
 
 [[package]]
 name = "anstream"
-version = "0.3.0"
+version = "0.3.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9e579a7752471abc2a8268df8b20005e3eadd975f585398f17efcfd8d4927371"
+checksum = "0ca84f3628370c59db74ee214b3263d58f9aadd9b4fe7e711fd87dc452b7f163"
 dependencies = [
  "anstyle",
  "anstyle-parse",
@@ -104,9 +93,9 @@ dependencies = [
 
 [[package]]
 name = "anstyle-wincon"
-version = "1.0.0"
+version = "1.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4bcd8291a340dd8ac70e18878bc4501dd7b4ff970cfa21c207d36ece51ea88fd"
+checksum = "180abfa45703aebe0093f79badacc01b8fd4ea2e35118747e5811127f926e188"
 dependencies = [
  "anstyle",
  "windows-sys 0.48.0",
@@ -114,9 +103,9 @@ dependencies = [
 
 [[package]]
 name = "anyhow"
-version = "1.0.70"
+version = "1.0.71"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7de8ce5e0f9f8d88245311066a578d72b7af3e7088f32783804676302df237e4"
+checksum = "9c7d0618f0e0b7e8ff11427422b64564d5fb0be1940354bfe2e0529b18a9d9b8"
 dependencies = [
  "backtrace",
 ]
@@ -188,7 +177,7 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.15",
+ "syn 2.0.16",
 ]
 
 [[package]]
@@ -199,7 +188,7 @@ checksum = "b9ccdd8f2a161be9bd5c023df56f1b2a0bd1d83872ae53b71a84a12c9bf6e842"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.15",
+ "syn 2.0.16",
 ]
 
 [[package]]
@@ -230,9 +219,9 @@ checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
 
 [[package]]
 name = "aws-config"
-version = "0.55.2"
+version = "0.55.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fc00553f5f3c06ffd4510a9d576f92143618706c45ea6ff81e84ad9be9588abd"
+checksum = "bcdcf0d683fe9c23d32cf5b53c9918ea0a500375a9fb20109802552658e576c9"
 dependencies = [
  "aws-credential-types",
  "aws-http",
@@ -256,9 +245,9 @@ dependencies = [
 
 [[package]]
 name = "aws-credential-types"
-version = "0.55.2"
+version = "0.55.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4cb57ac6088805821f78d282c0ba8aec809f11cbee10dda19a97b03ab040ccc2"
+checksum = "1fcdb2f7acbc076ff5ad05e7864bdb191ca70a6fd07668dc3a1a8bcd051de5ae"
 dependencies = [
  "aws-smithy-async",
  "aws-smithy-types",
@@ -270,9 +259,9 @@ dependencies = [
 
 [[package]]
 name = "aws-endpoint"
-version = "0.55.2"
+version = "0.55.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9c5f6f84a4f46f95a9bb71d9300b73cd67eb868bc43ae84f66ad34752299f4ac"
+checksum = "8cce1c41a6cfaa726adee9ebb9a56fcd2bbfd8be49fd8a04c5e20fd968330b04"
 dependencies = [
  "aws-smithy-http",
  "aws-smithy-types",
@@ -284,9 +273,9 @@ dependencies = [
 
 [[package]]
 name = "aws-http"
-version = "0.55.2"
+version = "0.55.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a754683c322f7dc5167484266489fdebdcd04d26e53c162cad1f3f949f2c5671"
+checksum = "aadbc44e7a8f3e71c8b374e03ecd972869eb91dd2bc89ed018954a52ba84bc44"
 dependencies = [
  "aws-credential-types",
  "aws-smithy-http",
@@ -303,9 +292,9 @@ dependencies = [
 
 [[package]]
 name = "aws-sdk-s3"
-version = "0.25.1"
+version = "0.27.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "392b9811ca489747ac84349790e49deaa1f16631949e7dd4156000251c260eae"
+checksum = "37c77060408d653d3efa6ea7b66c1389bc35a0342352984c8bf8bcb814a8fc27"
 dependencies = [
  "aws-credential-types",
  "aws-endpoint",
@@ -336,9 +325,9 @@ dependencies = [
 
 [[package]]
 name = "aws-sdk-sts"
-version = "0.27.0"
+version = "0.28.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2d0fbe3c2c342bc8dfea4bb43937405a8ec06f99140a0dcb9c7b59e54dfa93a1"
+checksum = "265fac131fbfc188e5c3d96652ea90ecc676a934e3174eaaee523c6cec040b3b"
 dependencies = [
  "aws-credential-types",
  "aws-endpoint",
@@ -362,9 +351,9 @@ dependencies = [
 
 [[package]]
 name = "aws-sig-auth"
-version = "0.55.2"
+version = "0.55.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "84dc92a63ede3c2cbe43529cb87ffa58763520c96c6a46ca1ced80417afba845"
+checksum = "3b94acb10af0c879ecd5c7bdf51cda6679a0a4f4643ce630905a77673bfa3c61"
 dependencies = [
  "aws-credential-types",
  "aws-sigv4",
@@ -377,9 +366,9 @@ dependencies = [
 
 [[package]]
 name = "aws-sigv4"
-version = "0.55.2"
+version = "0.55.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "392fefab9d6fcbd76d518eb3b1c040b84728ab50f58df0c3c53ada4bea9d327e"
+checksum = "9d2ce6f507be68e968a33485ced670111d1cbad161ddbbab1e313c03d37d8f4c"
 dependencies = [
  "aws-smithy-eventstream",
  "aws-smithy-http",
@@ -398,9 +387,9 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-async"
-version = "0.55.2"
+version = "0.55.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ae23b9fe7a07d0919000116c4c5c0578303fbce6fc8d32efca1f7759d4c20faf"
+checksum = "13bda3996044c202d75b91afeb11a9afae9db9a721c6a7a427410018e286b880"
 dependencies = [
  "futures-util",
  "pin-project-lite",
@@ -410,9 +399,9 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-checksums"
-version = "0.55.2"
+version = "0.55.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a6367acbd6849b8c7c659e166955531274ae147bf83ab4312885991f6b6706cb"
+checksum = "07ed8b96d95402f3f6b8b57eb4e0e45ee365f78b1a924faf20ff6e97abf1eae6"
 dependencies = [
  "aws-smithy-http",
  "aws-smithy-types",
@@ -431,9 +420,9 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-client"
-version = "0.55.2"
+version = "0.55.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5230d25d244a51339273b8870f0f77874cd4449fb4f8f629b21188ae10cfc0ba"
+checksum = "0a86aa6e21e86c4252ad6a0e3e74da9617295d8d6e374d552be7d3059c41cedd"
 dependencies = [
  "aws-smithy-async",
  "aws-smithy-http",
@@ -444,7 +433,7 @@ dependencies = [
  "http",
  "http-body",
  "hyper",
- "hyper-rustls",
+ "hyper-rustls 0.23.2",
  "lazy_static",
  "pin-project-lite",
  "rustls 0.20.8",
@@ -455,9 +444,9 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-eventstream"
-version = "0.55.2"
+version = "0.55.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "22d2a2bcc16e5c4d949ffd2b851da852b9bbed4bb364ed4ae371b42137ca06d9"
+checksum = "460c8da5110835e3d9a717c61f5556b20d03c32a1dec57f8fc559b360f733bb8"
 dependencies = [
  "aws-smithy-types",
  "bytes",
@@ -466,9 +455,9 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-http"
-version = "0.55.2"
+version = "0.55.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b60e2133beb9fe6ffe0b70deca57aaeff0a35ad24a9c6fab2fd3b4f45b99fdb5"
+checksum = "2b3b693869133551f135e1f2c77cb0b8277d9e3e17feaf2213f735857c4f0d28"
 dependencies = [
  "aws-smithy-eventstream",
  "aws-smithy-types",
@@ -489,9 +478,9 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-http-tower"
-version = "0.55.2"
+version = "0.55.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3a4d94f556c86a0dd916a5d7c39747157ea8cb909ca469703e20fee33e448b67"
+checksum = "3ae4f6c5798a247fac98a867698197d9ac22643596dc3777f0c76b91917616b9"
 dependencies = [
  "aws-smithy-http",
  "aws-smithy-types",
@@ -505,18 +494,18 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-json"
-version = "0.55.2"
+version = "0.55.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5ce3d6e6ebb00b2cce379f079ad5ec508f9bcc3a9510d9b9c1840ed1d6f8af39"
+checksum = "23f9f42fbfa96d095194a632fbac19f60077748eba536eb0b9fecc28659807f8"
 dependencies = [
  "aws-smithy-types",
 ]
 
 [[package]]
 name = "aws-smithy-query"
-version = "0.55.2"
+version = "0.55.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d58edfca32ef9bfbc1ca394599e17ea329cb52d6a07359827be74235b64b3298"
+checksum = "98819eb0b04020a1c791903533b638534ae6c12e2aceda3e6e6fba015608d51d"
 dependencies = [
  "aws-smithy-types",
  "urlencoding",
@@ -524,9 +513,9 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-types"
-version = "0.55.2"
+version = "0.55.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "58db46fc1f4f26be01ebdb821751b4e2482cd43aa2b64a0348fb89762defaffa"
+checksum = "16a3d0bf4f324f4ef9793b86a1701d9700fbcdbd12a846da45eed104c634c6e8"
 dependencies = [
  "base64-simd",
  "itoa",
@@ -537,18 +526,18 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-xml"
-version = "0.55.2"
+version = "0.55.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fb557fe4995bd9ec87fb244bbb254666a971dc902a783e9da8b7711610e9664c"
+checksum = "b1b9d12875731bd07e767be7baad95700c3137b56730ec9ddeedb52a5e5ca63b"
 dependencies = [
  "xmlparser",
 ]
 
 [[package]]
 name = "aws-types"
-version = "0.55.2"
+version = "0.55.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "de0869598bfe46ec44ffe17e063ed33336e59df90356ca8ff0e8da6f7c1d994b"
+checksum = "6dd209616cc8d7bfb82f87811a5c655dc97537f592689b18743bddf5dc5c4829"
 dependencies = [
  "aws-credential-types",
  "aws-smithy-async",
@@ -562,9 +551,9 @@ dependencies = [
 
 [[package]]
 name = "axum"
-version = "0.6.15"
+version = "0.6.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3b32c5ea3aabaf4deb5f5ced2d688ec0844c881c9e6c696a8b769a05fc691e62"
+checksum = "f8175979259124331c1d7bf6586ee7e0da434155e4b2d48ec2c8386281d8df39"
 dependencies = [
  "async-trait",
  "axum-core",
@@ -634,9 +623,9 @@ checksum = "0ea22880d78093b0cbe17c89f64a7d457941e65759157ec6cb31a31d652b05e5"
 
 [[package]]
 name = "base64"
-version = "0.21.0"
+version = "0.21.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a4a4ddaa51a5bc52a6948f74c06d20aaaddb71924eab79b8c97a8c556e942d6a"
+checksum = "3f1e31e207a6b8fb791a38ea3105e6cb541f55e4d029902d3039a4ad07cc4105"
 
 [[package]]
 name = "base64-simd"
@@ -670,13 +659,13 @@ dependencies = [
  "lazycell",
  "log",
  "peeking_take_while",
- "prettyplease 0.2.4",
+ "prettyplease 0.2.6",
  "proc-macro2",
  "quote",
  "regex",
  "rustc-hash",
  "shlex",
- "syn 2.0.15",
+ "syn 2.0.16",
  "which",
 ]
 
@@ -697,9 +686,9 @@ dependencies = [
 
 [[package]]
 name = "bstr"
-version = "1.4.0"
+version = "1.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c3d4260bcc2e8fc9df1eac4919a720effeb63a3f0952f5bf4944adfa18897f09"
+checksum = "a246e68bb43f6cd9db24bea052a53e40405417c5fb372e3d1a8a7f770a564ef5"
 dependencies = [
  "memchr",
  "once_cell",
@@ -709,9 +698,9 @@ dependencies = [
 
 [[package]]
 name = "bumpalo"
-version = "3.12.0"
+version = "3.13.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0d261e256854913907f67ed06efbc3338dfe6179796deefc1ff763fc1aee5535"
+checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1"
 
 [[package]]
 name = "byteorder"
@@ -780,9 +769,9 @@ dependencies = [
 
 [[package]]
 name = "ciborium"
-version = "0.2.0"
+version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b0c137568cc60b904a7724001b35ce2630fd00d5d84805fbb608ab89509d788f"
+checksum = "effd91f6c78e5a4ace8a5d3c0b6bfaec9e2baaef55f3efc00e45fb2e477ee926"
 dependencies = [
  "ciborium-io",
  "ciborium-ll",
@@ -791,15 +780,15 @@ dependencies = [
 
 [[package]]
 name = "ciborium-io"
-version = "0.2.0"
+version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "346de753af073cc87b52b2083a506b38ac176a44cfb05497b622e27be899b369"
+checksum = "cdf919175532b369853f5d5e20b26b43112613fd6fe7aee757e35f7a44642656"
 
 [[package]]
 name = "ciborium-ll"
-version = "0.2.0"
+version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "213030a2b5a4e0c0892b6652260cf6ccac84827b83a85a534e178e3906c4cf1b"
+checksum = "defaa24ecc093c77630e6c15e17c51f5e187bf35ee514f4e2d67baaa96dae22b"
 dependencies = [
  "ciborium-io",
  "half",
@@ -818,9 +807,9 @@ dependencies = [
 
 [[package]]
 name = "clap"
-version = "3.2.23"
+version = "3.2.25"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "71655c45cb9845d3270c9d6df84ebe72b4dad3c2ba3f7023ad47c144e4e473a5"
+checksum = "4ea181bf566f71cb9a5d17a59e1871af638180a18fb0035c92ae62b705207123"
 dependencies = [
  "bitflags",
  "clap_lex 0.2.4",
@@ -830,9 +819,9 @@ dependencies = [
 
 [[package]]
 name = "clap"
-version = "4.2.2"
+version = "4.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9b802d85aaf3a1cdb02b224ba472ebdea62014fccfcb269b95a4d76443b5ee5a"
+checksum = "93aae7a4192245f70fe75dd9157fc7b4a5bf53e88d30bd4396f7d8f9284d5acc"
 dependencies = [
  "clap_builder",
  "clap_derive",
@@ -841,27 +830,27 @@ dependencies = [
 
 [[package]]
 name = "clap_builder"
-version = "4.2.2"
+version = "4.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "14a1a858f532119338887a4b8e1af9c60de8249cd7bafd68036a489e261e37b6"
+checksum = "4f423e341edefb78c9caba2d9c7f7687d0e72e89df3ce3394554754393ac3990"
 dependencies = [
  "anstream",
  "anstyle",
  "bitflags",
- "clap_lex 0.4.1",
+ "clap_lex 0.5.0",
  "strsim",
 ]
 
 [[package]]
 name = "clap_derive"
-version = "4.2.0"
+version = "4.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3f9644cd56d6b87dbe899ef8b053e331c0637664e9e21a33dfcdc36093f5c5c4"
+checksum = "191d9573962933b4027f932c600cd252ce27a8ad5979418fe78e43c07996f27b"
 dependencies = [
  "heck",
  "proc-macro2",
  "quote",
- "syn 2.0.15",
+ "syn 2.0.16",
 ]
 
 [[package]]
@@ -875,9 +864,9 @@ dependencies = [
 
 [[package]]
 name = "clap_lex"
-version = "0.4.1"
+version = "0.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8a2dd5a6fe8c6e3502f568a6353e5273bbb15193ad9a89e457b9970798efbea1"
+checksum = "2da6da31387c7e4ef160ffab6d5e7f00c42626fe39aea70a7b0f1773f7dd6c1b"
 
 [[package]]
 name = "close_fds"
@@ -889,16 +878,6 @@ dependencies = [
  "libc",
 ]
 
-[[package]]
-name = "codespan-reporting"
-version = "0.11.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3538270d33cc669650c4b093848450d380def10c331d38c768e34cac80576e6e"
-dependencies = [
- "termcolor",
- "unicode-width",
-]
-
 [[package]]
 name = "colorchoice"
 version = "1.0.0"
@@ -936,7 +915,7 @@ version = "0.1.0"
 dependencies = [
  "anyhow",
  "chrono",
- "clap 4.2.2",
+ "clap 4.3.0",
  "compute_api",
  "futures",
  "hyper",
@@ -998,7 +977,7 @@ name = "control_plane"
 version = "0.1.0"
 dependencies = [
  "anyhow",
- "clap 4.2.2",
+ "clap 4.3.0",
  "comfy-table",
  "compute_api",
  "git-version",
@@ -1041,9 +1020,9 @@ checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa"
 
 [[package]]
 name = "cpufeatures"
-version = "0.2.6"
+version = "0.2.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "280a9f2d8b3a38871a3c8a46fb80db65e5e5ed97da80c4d08bf27fb63e35e181"
+checksum = "3e4c1eaa2012c47becbbad2ab175484c2a84d1185b566fb2cc5b8707343dfe58"
 dependencies = [
  "libc",
 ]
@@ -1076,7 +1055,7 @@ dependencies = [
  "atty",
  "cast",
  "ciborium",
- "clap 3.2.23",
+ "clap 3.2.25",
  "criterion-plot",
  "itertools",
  "lazy_static",
@@ -1186,55 +1165,11 @@ dependencies = [
  "typenum",
 ]
 
-[[package]]
-name = "cxx"
-version = "1.0.94"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f61f1b6389c3fe1c316bf8a4dccc90a38208354b330925bce1f74a6c4756eb93"
-dependencies = [
- "cc",
- "cxxbridge-flags",
- "cxxbridge-macro",
- "link-cplusplus",
-]
-
-[[package]]
-name = "cxx-build"
-version = "1.0.94"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "12cee708e8962df2aeb38f594aae5d827c022b6460ac71a7a3e2c3c2aae5a07b"
-dependencies = [
- "cc",
- "codespan-reporting",
- "once_cell",
- "proc-macro2",
- "quote",
- "scratch",
- "syn 2.0.15",
-]
-
-[[package]]
-name = "cxxbridge-flags"
-version = "1.0.94"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7944172ae7e4068c533afbb984114a56c46e9ccddda550499caa222902c7f7bb"
-
-[[package]]
-name = "cxxbridge-macro"
-version = "1.0.94"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2345488264226bf682893e25de0769f3360aac9957980ec49361b083ddaa5bc5"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 2.0.15",
-]
-
 [[package]]
 name = "darling"
-version = "0.14.4"
+version = "0.20.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7b750cb3417fd1b327431a470f388520309479ab0bf5e323505daf0290cd3850"
+checksum = "0558d22a7b463ed0241e993f76f09f30b126687447751a8638587b864e4b3944"
 dependencies = [
  "darling_core",
  "darling_macro",
@@ -1242,27 +1177,27 @@ dependencies = [
 
 [[package]]
 name = "darling_core"
-version = "0.14.4"
+version = "0.20.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "109c1ca6e6b7f82cc233a97004ea8ed7ca123a9af07a8230878fcfda9b158bf0"
+checksum = "ab8bfa2e259f8ee1ce5e97824a3c55ec4404a0d772ca7fa96bf19f0752a046eb"
 dependencies = [
  "fnv",
  "ident_case",
  "proc-macro2",
  "quote",
  "strsim",
- "syn 1.0.109",
+ "syn 2.0.16",
 ]
 
 [[package]]
 name = "darling_macro"
-version = "0.14.4"
+version = "0.20.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a4aab4dbc9f7611d8b55048a3a16d2d010c2c8334e46304b40ac1cc14bf3b48e"
+checksum = "29a358ff9f12ec09c3e61fef9b5a9902623a695a46a917b07f269bff1445611a"
 dependencies = [
  "darling_core",
  "quote",
- "syn 1.0.109",
+ "syn 2.0.16",
 ]
 
 [[package]]
@@ -1280,9 +1215,9 @@ dependencies = [
 
 [[package]]
 name = "data-encoding"
-version = "2.3.3"
+version = "2.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "23d8666cb01533c39dde32bcbab8e227b4ed6679b2c925eba05feabea39508fb"
+checksum = "c2e66c9d817f1720209181c316d28635c050fa304f9c79e47a520882661b7308"
 
 [[package]]
 name = "debugid"
@@ -1310,9 +1245,9 @@ dependencies = [
 
 [[package]]
 name = "digest"
-version = "0.10.6"
+version = "0.10.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8168378f4e5023e7218c89c891c0fd8ecdb5e5e4f18cb78f38cf245dd021e76f"
+checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
 dependencies = [
  "block-buffer",
  "crypto-common",
@@ -1321,13 +1256,13 @@ dependencies = [
 
 [[package]]
 name = "displaydoc"
-version = "0.2.3"
+version = "0.2.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3bf95dc3f046b9da4f2d51833c0d3547d8564ef6910f5c1ed130306a75b92886"
+checksum = "487585f4d0c6655fe74905e2504d8ad6908e4db67f744eb140876906c2f3175d"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 1.0.109",
+ "syn 2.0.16",
 ]
 
 [[package]]
@@ -1367,23 +1302,23 @@ dependencies = [
 
 [[package]]
 name = "enumset"
-version = "1.0.12"
+version = "1.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "19be8061a06ab6f3a6cf21106c873578bf01bd42ad15e0311a9c76161cb1c753"
+checksum = "e875f1719c16de097dee81ed675e2d9bb63096823ed3f0ca827b7dea3028bbbb"
 dependencies = [
  "enumset_derive",
 ]
 
 [[package]]
 name = "enumset_derive"
-version = "0.6.1"
+version = "0.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "03e7b551eba279bf0fa88b83a46330168c1560a52a94f5126f892f0b364ab3e0"
+checksum = "e08b6c6ab82d70f08844964ba10c7babb716de2ecaeab9be5717918a5177d3af"
 dependencies = [
  "darling",
  "proc-macro2",
  "quote",
- "syn 1.0.109",
+ "syn 2.0.16",
 ]
 
 [[package]]
@@ -1569,7 +1504,7 @@ checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.15",
+ "syn 2.0.16",
 ]
 
 [[package]]
@@ -1667,9 +1602,9 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
 
 [[package]]
 name = "h2"
-version = "0.3.18"
+version = "0.3.19"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "17f8a914c2987b688368b5138aa05321db91f4090cf26118185672ad588bce21"
+checksum = "d357c7ae988e7d2182f7d7871d0b963962420b0678b0997ce7de72001aeab782"
 dependencies = [
  "bytes",
  "fnv",
@@ -1704,9 +1639,6 @@ name = "hashbrown"
 version = "0.12.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
-dependencies = [
- "ahash 0.7.6",
-]
 
 [[package]]
 name = "hashbrown"
@@ -1714,16 +1646,16 @@ version = "0.13.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e"
 dependencies = [
- "ahash 0.8.3",
+ "ahash",
 ]
 
 [[package]]
 name = "hashlink"
-version = "0.8.1"
+version = "0.8.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "69fe1fcf8b4278d860ad0548329f892a3631fb63f82574df68275f34cdbe0ffa"
+checksum = "0761a1b9491c4f2e3d66aa0f62d0fba0af9a0e2852e4d48ea506632a4b56e6aa"
 dependencies = [
- "hashbrown 0.12.3",
+ "hashbrown 0.13.2",
 ]
 
 [[package]]
@@ -1892,6 +1824,19 @@ dependencies = [
  "tokio-rustls 0.23.4",
 ]
 
+[[package]]
+name = "hyper-rustls"
+version = "0.24.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0646026eb1b3eea4cd9ba47912ea5ce9cc07713d105b1a14698f4e6433d348b7"
+dependencies = [
+ "http",
+ "hyper",
+ "rustls 0.21.1",
+ "tokio",
+ "tokio-rustls 0.24.0",
+]
+
 [[package]]
 name = "hyper-timeout"
 version = "0.4.1"
@@ -1933,12 +1878,11 @@ dependencies = [
 
 [[package]]
 name = "iana-time-zone-haiku"
-version = "0.1.1"
+version = "0.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0703ae284fc167426161c2e3f1da3ea71d94b21bedbcc9494e92b28e334e3dca"
+checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
 dependencies = [
- "cxx",
- "cxx-build",
+ "cc",
 ]
 
 [[package]]
@@ -1999,9 +1943,9 @@ dependencies = [
 
 [[package]]
 name = "io-lifetimes"
-version = "1.0.10"
+version = "1.0.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9c66c74d2ae7e79a5a8f7ac924adbe38ee42a859c6539ad869eb51f0b52dc220"
+checksum = "eae7b9aee968036d54dce06cebaefd919e4472e753296daccd6d344e3e2df0c2"
 dependencies = [
  "hermit-abi 0.3.1",
  "libc",
@@ -2022,7 +1966,7 @@ checksum = "adcf93614601c8129ddf72e2d5633df827ba6551541c6d8c59520a371475be1f"
 dependencies = [
  "hermit-abi 0.3.1",
  "io-lifetimes",
- "rustix 0.37.11",
+ "rustix 0.37.19",
  "windows-sys 0.48.0",
 ]
 
@@ -2043,9 +1987,9 @@ checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6"
 
 [[package]]
 name = "js-sys"
-version = "0.3.61"
+version = "0.3.63"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "445dde2150c55e483f3d8416706b97ec8e8237c307e5b7b4b8dd15e6af2a0730"
+checksum = "2f37a4a5928311ac501dee68b3c7613a1037d0edb30c8e5427bd832d55d1b790"
 dependencies = [
  "wasm-bindgen",
 ]
@@ -2056,7 +2000,7 @@ version = "8.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6971da4d9c3aa03c3d8f3ff0f4155b534aad021292003895a469716b2a230378"
 dependencies = [
- "base64 0.21.0",
+ "base64 0.21.1",
  "pem",
  "ring",
  "serde",
@@ -2098,9 +2042,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
 
 [[package]]
 name = "libc"
-version = "0.2.141"
+version = "0.2.144"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3304a64d199bb964be99741b7a14d26972741915b3649639149b2479bb46f4b5"
+checksum = "2b00cc1c228a6782d0f076e7b232802e0c5689d41bb5df366f2a6b6621cfdfe1"
 
 [[package]]
 name = "libloading"
@@ -2112,15 +2056,6 @@ dependencies = [
  "winapi",
 ]
 
-[[package]]
-name = "link-cplusplus"
-version = "1.0.8"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ecd207c9c713c34f95a097a5b029ac2ce6010530c7b49d7fea24d977dede04f5"
-dependencies = [
- "cc",
-]
-
 [[package]]
 name = "linux-raw-sys"
 version = "0.1.4"
@@ -2129,9 +2064,9 @@ checksum = "f051f77a7c8e6957c0696eac88f26b0117e54f52d3fc682ab19397a8812846a4"
 
 [[package]]
 name = "linux-raw-sys"
-version = "0.3.1"
+version = "0.3.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d59d8c75012853d2e872fb56bc8a2e53718e2cafe1a4c823143141c6d90c322f"
+checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519"
 
 [[package]]
 name = "lock_api"
@@ -2316,9 +2251,9 @@ dependencies = [
 
 [[package]]
 name = "notify"
-version = "5.1.0"
+version = "5.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "58ea850aa68a06e48fdb069c0ec44d0d64c8dbffa49bf3b6f7f0a901fdea1ba9"
+checksum = "729f63e1ca555a43fe3efa4f3efdf4801c479da85b432242a7b726f353c88486"
 dependencies = [
  "bitflags",
  "crossbeam-channel",
@@ -2329,7 +2264,7 @@ dependencies = [
  "libc",
  "mio",
  "walkdir",
- "windows-sys 0.42.0",
+ "windows-sys 0.45.0",
 ]
 
 [[package]]
@@ -2435,7 +2370,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.15",
+ "syn 2.0.16",
 ]
 
 [[package]]
@@ -2587,6 +2522,21 @@ version = "0.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
 
+[[package]]
+name = "pagectl"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "bytes",
+ "clap 4.3.0",
+ "git-version",
+ "pageserver",
+ "postgres_ffi",
+ "svg_fmt",
+ "utils",
+ "workspace_hack",
+]
+
 [[package]]
 name = "pageserver"
 version = "0.1.0"
@@ -2597,7 +2547,7 @@ dependencies = [
  "byteorder",
  "bytes",
  "chrono",
- "clap 4.2.2",
+ "clap 4.3.0",
  "close_fds",
  "const_format",
  "consumption_metrics",
@@ -2753,22 +2703,22 @@ dependencies = [
 
 [[package]]
 name = "pin-project"
-version = "1.0.12"
+version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ad29a609b6bcd67fee905812e544992d216af9d755757c05ed2d0e15a74c6ecc"
+checksum = "c95a7476719eab1e366eaf73d0260af3021184f18177925b07f54b30089ceead"
 dependencies = [
  "pin-project-internal",
 ]
 
 [[package]]
 name = "pin-project-internal"
-version = "1.0.12"
+version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "069bdb1e05adc7a8990dce9cc75370895fbe4e3d58b9b73bf1aee56359344a55"
+checksum = "39407670928234ebc5e6e580247dd567ad73a3578460c5990f9503df207e8f07"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 1.0.109",
+ "syn 2.0.16",
 ]
 
 [[package]]
@@ -2785,9 +2735,9 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
 
 [[package]]
 name = "pkg-config"
-version = "0.3.26"
+version = "0.3.27"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160"
+checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964"
 
 [[package]]
 name = "plotters"
@@ -2961,12 +2911,12 @@ dependencies = [
 
 [[package]]
 name = "prettyplease"
-version = "0.2.4"
+version = "0.2.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1ceca8aaf45b5c46ec7ed39fff75f57290368c1846d33d24a122ca81416ab058"
+checksum = "3b69d39aab54d069e7f2fe8cb970493e7834601ca2d8c65fd7bbd183578080d1"
 dependencies = [
  "proc-macro2",
- "syn 2.0.15",
+ "syn 2.0.16",
 ]
 
 [[package]]
@@ -2977,9 +2927,9 @@ checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068"
 
 [[package]]
 name = "proc-macro2"
-version = "1.0.56"
+version = "1.0.58"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2b63bdb0cd06f1f4dedf69b254734f9b45af66e4a031e42a7480257d9898b435"
+checksum = "fa1fb82fc0c281dd9671101b66b771ebbe1eaf967b96ac8740dcba4b70005ca8"
 dependencies = [
  "unicode-ident",
 ]
@@ -2994,7 +2944,7 @@ dependencies = [
  "byteorder",
  "hex",
  "lazy_static",
- "rustix 0.36.12",
+ "rustix 0.36.14",
 ]
 
 [[package]]
@@ -3078,7 +3028,7 @@ dependencies = [
  "bstr",
  "bytes",
  "chrono",
- "clap 4.2.2",
+ "clap 4.3.0",
  "consumption_metrics",
  "futures",
  "git-version",
@@ -3116,7 +3066,7 @@ dependencies = [
  "serde",
  "serde_json",
  "sha2",
- "socket2 0.5.2",
+ "socket2 0.5.3",
  "sync_wrapper",
  "thiserror",
  "tls-listener",
@@ -3139,9 +3089,9 @@ dependencies = [
 
 [[package]]
 name = "quote"
-version = "1.0.26"
+version = "1.0.27"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4424af4bf778aae2051a77b60283332f386554255d722233d09fbfc7e30da2fc"
+checksum = "8f4f29d145265ec1c483c7c654450edde0bfe043d3938d6972630663356d9500"
 dependencies = [
  "proc-macro2",
 ]
@@ -3230,13 +3180,13 @@ dependencies = [
 
 [[package]]
 name = "regex"
-version = "1.7.3"
+version = "1.8.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8b1f693b24f6ac912f4893ef08244d70b6067480d2f1a46e950c9691e6749d1d"
+checksum = "d1a59b5d8e97dee33696bf13c5ba8ab85341c002922fba050069326b9c498974"
 dependencies = [
  "aho-corasick",
  "memchr",
- "regex-syntax",
+ "regex-syntax 0.7.2",
 ]
 
 [[package]]
@@ -3245,7 +3195,7 @@ version = "0.1.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
 dependencies = [
- "regex-syntax",
+ "regex-syntax 0.6.29",
 ]
 
 [[package]]
@@ -3254,6 +3204,12 @@ version = "0.6.29"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
 
+[[package]]
+name = "regex-syntax"
+version = "0.7.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "436b050e76ed2903236f032a59761c1eb99e1b0aead2c257922771dab1fc8c78"
+
 [[package]]
 name = "remote_storage"
 version = "0.1.0"
@@ -3283,11 +3239,11 @@ dependencies = [
 
 [[package]]
 name = "reqwest"
-version = "0.11.16"
+version = "0.11.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "27b71749df584b7f4cac2c426c127a7c785a5106cc98f7a8feb044115f0fa254"
+checksum = "cde824a14b7c14f85caff81225f411faacc04a2013f41670f41443742b1c1c55"
 dependencies = [
- "base64 0.21.0",
+ "base64 0.21.1",
  "bytes",
  "encoding_rs",
  "futures-core",
@@ -3296,7 +3252,7 @@ dependencies = [
  "http",
  "http-body",
  "hyper",
- "hyper-rustls",
+ "hyper-rustls 0.24.0",
  "ipnet",
  "js-sys",
  "log",
@@ -3305,13 +3261,13 @@ dependencies = [
  "once_cell",
  "percent-encoding",
  "pin-project-lite",
- "rustls 0.20.8",
+ "rustls 0.21.1",
  "rustls-pemfile",
  "serde",
  "serde_json",
  "serde_urlencoded",
  "tokio",
- "tokio-rustls 0.23.4",
+ "tokio-rustls 0.24.0",
  "tower-service",
  "url",
  "wasm-bindgen",
@@ -3323,9 +3279,9 @@ dependencies = [
 
 [[package]]
 name = "reqwest-middleware"
-version = "0.2.1"
+version = "0.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "99c50db2c7ccd815f976473dd7d0bde296f8c3b77c383acf4fc021cdcf10852b"
+checksum = "4531c89d50effe1fac90d095c8b133c20c5c714204feee0bfc3fd158e784209d"
 dependencies = [
  "anyhow",
  "async-trait",
@@ -3338,12 +3294,14 @@ dependencies = [
 
 [[package]]
 name = "reqwest-tracing"
-version = "0.4.1"
+version = "0.4.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8a71d77945a1c5ae9604f0504901e77a1e2e71f2932b1cb8103078179ca62ff8"
+checksum = "783e8130d2427ddd7897dd3f814d4a3aea31b05deb42a4fdf8c18258fe5aefd1"
 dependencies = [
+ "anyhow",
  "async-trait",
  "getrandom",
+ "matchit",
  "opentelemetry",
  "reqwest",
  "reqwest-middleware",
@@ -3417,9 +3375,9 @@ dependencies = [
 
 [[package]]
 name = "rustc-demangle"
-version = "0.1.22"
+version = "0.1.23"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d4a36c42d1873f9a77c53bde094f9664d9891bc604a45b4798fd2c389ed12e5b"
+checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76"
 
 [[package]]
 name = "rustc-hash"
@@ -3447,9 +3405,9 @@ dependencies = [
 
 [[package]]
 name = "rustix"
-version = "0.36.12"
+version = "0.36.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e0af200a3324fa5bcd922e84e9b55a298ea9f431a489f01961acdebc6e908f25"
+checksum = "14e4d67015953998ad0eb82887a0eb0129e18a7e2f3b7b0f6c422fddcd503d62"
 dependencies = [
  "bitflags",
  "errno",
@@ -3461,15 +3419,15 @@ dependencies = [
 
 [[package]]
 name = "rustix"
-version = "0.37.11"
+version = "0.37.19"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "85597d61f83914ddeba6a47b3b8ffe7365107221c2e557ed94426489fefb5f77"
+checksum = "acf8729d8542766f1b2cf77eb034d52f40d375bb8b615d0b147089946e16613d"
 dependencies = [
  "bitflags",
  "errno",
  "io-lifetimes",
  "libc",
- "linux-raw-sys 0.3.1",
+ "linux-raw-sys 0.3.8",
  "windows-sys 0.48.0",
 ]
 
@@ -3487,9 +3445,9 @@ dependencies = [
 
 [[package]]
 name = "rustls"
-version = "0.21.0"
+version = "0.21.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "07180898a28ed6a7f7ba2311594308f595e3dd2e3c3812fa0a80a47b45f17e5d"
+checksum = "c911ba11bc8433e811ce56fde130ccf32f5127cab0e0194e9c68c5a5b671791e"
 dependencies = [
  "log",
  "ring",
@@ -3515,7 +3473,7 @@ version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d194b56d58803a43635bdc398cd17e383d6f71f9182b9a192c127ca42494a59b"
 dependencies = [
- "base64 0.21.0",
+ "base64 0.21.1",
 ]
 
 [[package]]
@@ -3550,7 +3508,7 @@ dependencies = [
  "byteorder",
  "bytes",
  "chrono",
- "clap 4.2.2",
+ "clap 4.3.0",
  "const_format",
  "crc32c",
  "fs2",
@@ -3624,12 +3582,6 @@ version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
 
-[[package]]
-name = "scratch"
-version = "1.0.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1792db035ce95be60c3f8853017b3999209281c24e2ba5bc8e59bf97a0c590c1"
-
 [[package]]
 name = "sct"
 version = "0.7.0"
@@ -3642,9 +3594,9 @@ dependencies = [
 
 [[package]]
 name = "security-framework"
-version = "2.8.2"
+version = "2.9.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a332be01508d814fed64bf28f798a146d73792121129962fdf335bb3c49a4254"
+checksum = "1fc758eb7bffce5b308734e9b0c1468893cae9ff70ebf13e7090be8dcbcc83a8"
 dependencies = [
  "bitflags",
  "core-foundation",
@@ -3655,9 +3607,9 @@ dependencies = [
 
 [[package]]
 name = "security-framework-sys"
-version = "2.8.0"
+version = "2.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "31c9bb296072e961fcbd8853511dd39c2d8be2deb1e17c6860b1d30732b323b4"
+checksum = "f51d0c0d83bec45f16480d0ce0058397a69e48fcdc52d1dc8855fb68acbd31a7"
 dependencies = [
  "core-foundation-sys",
  "libc",
@@ -3755,22 +3707,22 @@ dependencies = [
 
 [[package]]
 name = "serde"
-version = "1.0.160"
+version = "1.0.163"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bb2f3770c8bce3bcda7e149193a069a0f4365bda1fa5cd88e03bca26afc1216c"
+checksum = "2113ab51b87a539ae008b5c6c02dc020ffa39afd2d83cffcb3f4eb2722cebec2"
 dependencies = [
  "serde_derive",
 ]
 
 [[package]]
 name = "serde_derive"
-version = "1.0.160"
+version = "1.0.163"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "291a097c63d8497e00160b166a967a4a79c64f3facdd01cbd7502231688d77df"
+checksum = "8c805777e3930c8883389c602315a24224bcc738b63905ef87cd1420353ea93e"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.15",
+ "syn 2.0.16",
 ]
 
 [[package]]
@@ -3786,9 +3738,9 @@ dependencies = [
 
 [[package]]
 name = "serde_spanned"
-version = "0.6.1"
+version = "0.6.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0efd8caf556a6cebd3b285caf480045fcc1ac04f6bd786b09a6f11af30c4fcf4"
+checksum = "93107647184f6027e3b7dcb2e11034cf95ffa1e3a682c67951963ac69c1c007d"
 dependencies = [
  "serde",
 ]
@@ -3807,9 +3759,9 @@ dependencies = [
 
 [[package]]
 name = "serde_with"
-version = "2.3.2"
+version = "2.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "331bb8c3bf9b92457ab7abecf07078c13f7d270ba490103e84e8b014490cd0b0"
+checksum = "07ff71d2c147a7b57362cead5e22f772cd52f6ab31cfcd9edcd7f6aeb2a0afbe"
 dependencies = [
  "base64 0.13.1",
  "chrono",
@@ -3823,14 +3775,14 @@ dependencies = [
 
 [[package]]
 name = "serde_with_macros"
-version = "2.3.2"
+version = "2.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "859011bddcc11f289f07f467cc1fe01c7a941daa4d8f6c40d4d1c92eb6d9319c"
+checksum = "881b6f881b17d13214e5d494c939ebab463d01264ce1811e9d4ac3a882e7695f"
 dependencies = [
  "darling",
  "proc-macro2",
  "quote",
- "syn 1.0.109",
+ "syn 2.0.16",
 ]
 
 [[package]]
@@ -3944,9 +3896,9 @@ dependencies = [
 
 [[package]]
 name = "socket2"
-version = "0.5.2"
+version = "0.5.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6d283f86695ae989d1e18440a943880967156325ba025f05049946bff47bcc2b"
+checksum = "2538b18701741680e0322a2302176d3253a35388e2e62f172f64f4f16605f877"
 dependencies = [
  "libc",
  "windows-sys 0.48.0",
@@ -3986,7 +3938,7 @@ dependencies = [
  "anyhow",
  "async-stream",
  "bytes",
- "clap 4.2.2",
+ "clap 4.3.0",
  "const_format",
  "futures",
  "futures-core",
@@ -4000,8 +3952,8 @@ dependencies = [
  "prost",
  "tokio",
  "tokio-stream",
- "tonic 0.9.1",
- "tonic-build 0.9.1",
+ "tonic 0.9.2",
+ "tonic-build 0.9.2",
  "tracing",
  "utils",
  "workspace_hack",
@@ -4044,9 +3996,9 @@ dependencies = [
 
 [[package]]
 name = "subtle"
-version = "2.4.1"
+version = "2.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601"
+checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc"
 
 [[package]]
 name = "svg_fmt"
@@ -4067,9 +4019,9 @@ dependencies = [
 
 [[package]]
 name = "syn"
-version = "2.0.15"
+version = "2.0.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a34fcf3e8b60f57e6a14301a2e916d323af98b0ea63c599441eec8558660c822"
+checksum = "a6f671d4b5ffdb8eadec19c0ae67fe2639df8684bd7bc4b83d986b8db549cf01"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -4123,7 +4075,7 @@ dependencies = [
  "cfg-if",
  "fastrand",
  "redox_syscall 0.3.5",
- "rustix 0.37.11",
+ "rustix 0.37.19",
  "windows-sys 0.45.0",
 ]
 
@@ -4190,7 +4142,7 @@ checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.15",
+ "syn 2.0.16",
 ]
 
 [[package]]
@@ -4205,9 +4157,9 @@ dependencies = [
 
 [[package]]
 name = "time"
-version = "0.3.20"
+version = "0.3.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cd0cbfecb4d19b5ea75bb31ad904eb5b9fa13f21079c3b92017ebdf4999a5890"
+checksum = "8f3403384eaacbca9923fa06940178ac13e4edb725486d70e8e15881d0c836cc"
 dependencies = [
  "itoa",
  "serde",
@@ -4217,15 +4169,15 @@ dependencies = [
 
 [[package]]
 name = "time-core"
-version = "0.1.0"
+version = "0.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2e153e1f1acaef8acc537e68b44906d2db6436e2b35ac2c6b42640fff91f00fd"
+checksum = "7300fbefb4dadc1af235a9cef3737cea692a9d97e1b9cbcd4ebdae6f8868e6fb"
 
 [[package]]
 name = "time-macros"
-version = "0.2.8"
+version = "0.2.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fd80a657e71da814b8e5d60d3374fc6d35045062245d80224748ae522dd76f36"
+checksum = "372950940a5f07bf38dbe211d7283c9e6d7327df53794992d293e534c733d09b"
 dependencies = [
  "time-core",
 ]
@@ -4305,7 +4257,7 @@ checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.15",
+ "syn 2.0.16",
 ]
 
 [[package]]
@@ -4372,15 +4324,15 @@ version = "0.24.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e0d409377ff5b1e3ca6437aa86c1eb7d40c134bfec254e44c830defa92669db5"
 dependencies = [
- "rustls 0.21.0",
+ "rustls 0.21.1",
  "tokio",
 ]
 
 [[package]]
 name = "tokio-stream"
-version = "0.1.12"
+version = "0.1.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8fb52b74f05dbf495a8fba459fdc331812b96aa086d9eb78101fa0d4569c3313"
+checksum = "397c988d37662c7dda6d2208364a706264bf3d6138b11d436cbac0ad38832842"
 dependencies = [
  "futures-core",
  "pin-project-lite",
@@ -4415,9 +4367,9 @@ dependencies = [
 
 [[package]]
 name = "tokio-util"
-version = "0.7.7"
+version = "0.7.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5427d89453009325de0d8f342c9490009f76e999cb7672d77e46267448f7e6b2"
+checksum = "806fe8c2c87eccc8b3267cbae29ed3ab2d0bd37fca70ab622e46aaa9375ddb7d"
 dependencies = [
  "bytes",
  "futures-core",
@@ -4429,9 +4381,9 @@ dependencies = [
 
 [[package]]
 name = "toml"
-version = "0.7.3"
+version = "0.7.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b403acf6f2bb0859c93c7f0d967cb4a75a7ac552100f9322faf64dc047669b21"
+checksum = "d6135d499e69981f9ff0ef2167955a5333c35e36f6937d382974566b3d5b94ec"
 dependencies = [
  "serde",
  "serde_spanned",
@@ -4441,18 +4393,18 @@ dependencies = [
 
 [[package]]
 name = "toml_datetime"
-version = "0.6.1"
+version = "0.6.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3ab8ed2edee10b50132aed5f331333428b011c99402b5a534154ed15746f9622"
+checksum = "5a76a9312f5ba4c2dec6b9161fdf25d87ad8a09256ccea5a556fef03c706a10f"
 dependencies = [
  "serde",
 ]
 
 [[package]]
 name = "toml_edit"
-version = "0.19.8"
+version = "0.19.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "239410c8609e8125456927e6707163a3b1fdb40561e4b803bc041f466ccfdc13"
+checksum = "2380d56e8670370eee6566b0bfd4265f65b3f432e8c6d85623f728d4fa31f739"
 dependencies = [
  "indexmap",
  "serde",
@@ -4495,14 +4447,14 @@ dependencies = [
 
 [[package]]
 name = "tonic"
-version = "0.9.1"
+version = "0.9.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "38bd8e87955eb13c1986671838177d6792cdc52af9bffced0d2c8a9a7f741ab3"
+checksum = "3082666a3a6433f7f511c7192923fa1fe07c69332d3c6a2e6bb040b569199d5a"
 dependencies = [
  "async-stream",
  "async-trait",
  "axum",
- "base64 0.21.0",
+ "base64 0.21.1",
  "bytes",
  "futures-core",
  "futures-util",
@@ -4540,9 +4492,9 @@ dependencies = [
 
 [[package]]
 name = "tonic-build"
-version = "0.9.1"
+version = "0.9.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0f60a933bbea70c95d633c04c951197ddf084958abaa2ed502a3743bdd8d8dd7"
+checksum = "a6fdaae4c2c638bb70fe42803a26fbd6fc6ac8c72f5c59f67ecc2a2dcabf4b07"
 dependencies = [
  "prettyplease 0.1.25",
  "proc-macro2",
@@ -4588,7 +4540,7 @@ name = "trace"
 version = "0.1.0"
 dependencies = [
  "anyhow",
- "clap 4.2.2",
+ "clap 4.3.0",
  "pageserver_api",
  "utils",
  "workspace_hack",
@@ -4609,20 +4561,20 @@ dependencies = [
 
 [[package]]
 name = "tracing-attributes"
-version = "0.1.23"
+version = "0.1.24"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4017f8f45139870ca7e672686113917c71c7a6e02d4924eda67186083c03081a"
+checksum = "0f57e3ca2a01450b1a921183a9c9cbfda207fd822cef4ccb00a65402cbba7a74"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 1.0.109",
+ "syn 2.0.16",
 ]
 
 [[package]]
 name = "tracing-core"
-version = "0.1.30"
+version = "0.1.31"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "24eb03ba0eab1fd845050058ce5e616558e8f8d8fca633e6b163fe25c797213a"
+checksum = "0955b8137a1df6f1a2e9a37d8a6656291ff0297c1a97c24e0d8425fe2312f79a"
 dependencies = [
  "once_cell",
  "valuable",
@@ -4685,9 +4637,9 @@ dependencies = [
 
 [[package]]
 name = "tracing-subscriber"
-version = "0.3.16"
+version = "0.3.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a6176eae26dd70d0c919749377897b54a9276bd7061339665dd68777926b5a70"
+checksum = "30a651bc37f915e81f087d86e62a18eec5f79550c7faff886f7090b4ea757c77"
 dependencies = [
  "matchers",
  "nu-ansi-term",
@@ -4777,9 +4729,9 @@ checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460"
 
 [[package]]
 name = "unicode-ident"
-version = "1.0.8"
+version = "1.0.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4"
+checksum = "b15811caf2415fb889178633e7724bad2509101cde276048e013b9def5e51fa0"
 
 [[package]]
 name = "unicode-normalization"
@@ -4899,9 +4851,9 @@ dependencies = [
 
 [[package]]
 name = "uuid"
-version = "1.3.1"
+version = "1.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5b55a3fef2a1e3b3a00ce878640918820d3c51081576ac657d23af9fc7928fdb"
+checksum = "345444e32442451b267fc254ae85a209c64be56d2890e601a0c37ff0c3c5ecd2"
 dependencies = [
  "getrandom",
  "serde",
@@ -4936,7 +4888,7 @@ name = "wal_craft"
 version = "0.1.0"
 dependencies = [
  "anyhow",
- "clap 4.2.2",
+ "clap 4.3.0",
  "env_logger",
  "log",
  "once_cell",
@@ -4974,9 +4926,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
 
 [[package]]
 name = "wasm-bindgen"
-version = "0.2.84"
+version = "0.2.86"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "31f8dcbc21f30d9b8f2ea926ecb58f6b91192c17e9d33594b3df58b2007ca53b"
+checksum = "5bba0e8cb82ba49ff4e229459ff22a191bbe9a1cb3a341610c9c33efc27ddf73"
 dependencies = [
  "cfg-if",
  "wasm-bindgen-macro",
@@ -4984,24 +4936,24 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-backend"
-version = "0.2.84"
+version = "0.2.86"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "95ce90fd5bcc06af55a641a86428ee4229e44e07033963a2290a8e241607ccb9"
+checksum = "19b04bc93f9d6bdee709f6bd2118f57dd6679cf1176a1af464fca3ab0d66d8fb"
 dependencies = [
  "bumpalo",
  "log",
  "once_cell",
  "proc-macro2",
  "quote",
- "syn 1.0.109",
+ "syn 2.0.16",
  "wasm-bindgen-shared",
 ]
 
 [[package]]
 name = "wasm-bindgen-futures"
-version = "0.4.34"
+version = "0.4.36"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f219e0d211ba40266969f6dbdd90636da12f75bee4fc9d6c23d1260dadb51454"
+checksum = "2d1985d03709c53167ce907ff394f5316aa22cb4e12761295c5dc57dacb6297e"
 dependencies = [
  "cfg-if",
  "js-sys",
@@ -5011,9 +4963,9 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-macro"
-version = "0.2.84"
+version = "0.2.86"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4c21f77c0bedc37fd5dc21f897894a5ca01e7bb159884559461862ae90c0b4c5"
+checksum = "14d6b024f1a526bb0234f52840389927257beb670610081360e5a03c5df9c258"
 dependencies = [
  "quote",
  "wasm-bindgen-macro-support",
@@ -5021,28 +4973,28 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-macro-support"
-version = "0.2.84"
+version = "0.2.86"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2aff81306fcac3c7515ad4e177f521b5c9a15f2b08f4e32d823066102f35a5f6"
+checksum = "e128beba882dd1eb6200e1dc92ae6c5dbaa4311aa7bb211ca035779e5efc39f8"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 1.0.109",
+ "syn 2.0.16",
  "wasm-bindgen-backend",
  "wasm-bindgen-shared",
 ]
 
 [[package]]
 name = "wasm-bindgen-shared"
-version = "0.2.84"
+version = "0.2.86"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0046fef7e28c3804e5e38bfa31ea2a0f73905319b677e57ebe37e49358989b5d"
+checksum = "ed9d5b4305409d1fc9482fee2d7f9bcbf24b3972bf59817ef757e23982242a93"
 
 [[package]]
 name = "web-sys"
-version = "0.3.61"
+version = "0.3.63"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e33b99f4b23ba3eec1a53ac264e35a755f00e966e0065077d6027c0f575b0b97"
+checksum = "3bdd9ef4e984da1187bf8110c5cf5b845fbc87a23602cdf912386a76fcd3a7c2"
 dependencies = [
  "js-sys",
  "wasm-bindgen",
@@ -5276,9 +5228,9 @@ checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a"
 
 [[package]]
 name = "winnow"
-version = "0.4.1"
+version = "0.4.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ae8970b36c66498d8ff1d66685dc86b91b29db0c7739899012f63a63814b4b28"
+checksum = "61de7bac303dc551fe038e2b3cef0f571087a47571ea6e79a87692ac99b99699"
 dependencies = [
  "memchr",
 ]
@@ -5299,7 +5251,7 @@ dependencies = [
  "anyhow",
  "bytes",
  "chrono",
- "clap 4.2.2",
+ "clap 4.3.0",
  "clap_builder",
  "crossbeam-utils",
  "either",
@@ -5310,7 +5262,6 @@ dependencies = [
  "futures-executor",
  "futures-sink",
  "futures-util",
- "hashbrown 0.12.3",
  "itertools",
  "libc",
  "log",
@@ -5322,7 +5273,7 @@ dependencies = [
  "prost",
  "rand",
  "regex",
- "regex-syntax",
+ "regex-syntax 0.7.2",
  "reqwest",
  "ring",
  "rustls 0.20.8",
@@ -5331,7 +5282,7 @@ dependencies = [
  "serde_json",
  "socket2 0.4.9",
  "syn 1.0.109",
- "syn 2.0.15",
+ "syn 2.0.16",
  "tokio",
  "tokio-rustls 0.23.4",
  "tokio-util",
diff --git a/Cargo.toml b/Cargo.toml
index 7895459841..1cb8d65948 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -3,6 +3,7 @@ members = [
     "compute_tools",
     "control_plane",
     "pageserver",
+    "pageserver/ctl",
     "proxy",
     "safekeeper",
     "storage_broker",
@@ -22,7 +23,7 @@ async-stream = "0.3"
 async-trait = "0.1"
 atty = "0.2.14"
 aws-config = { version = "0.55", default-features = false, features=["rustls"] }
-aws-sdk-s3 = "0.25"
+aws-sdk-s3 = "0.27"
 aws-smithy-http = "0.55"
 aws-credential-types = "0.55"
 aws-types = "0.55"
diff --git a/Dockerfile b/Dockerfile
index 7364654641..9467e41ae4 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -47,8 +47,7 @@ RUN set -e \
     && mold -run cargo build  \
       --bin pg_sni_router  \
       --bin pageserver  \
-      --bin pageserver_binutils  \
-      --bin draw_timeline_dir \
+      --bin pagectl  \
       --bin safekeeper  \
       --bin storage_broker  \
       --bin proxy  \
@@ -73,8 +72,7 @@ RUN set -e \
 
 COPY --from=build --chown=neon:neon /home/nonroot/target/release/pg_sni_router       /usr/local/bin
 COPY --from=build --chown=neon:neon /home/nonroot/target/release/pageserver          /usr/local/bin
-COPY --from=build --chown=neon:neon /home/nonroot/target/release/pageserver_binutils /usr/local/bin
-COPY --from=build --chown=neon:neon /home/nonroot/target/release/draw_timeline_dir   /usr/local/bin
+COPY --from=build --chown=neon:neon /home/nonroot/target/release/pagectl             /usr/local/bin
 COPY --from=build --chown=neon:neon /home/nonroot/target/release/safekeeper          /usr/local/bin
 COPY --from=build --chown=neon:neon /home/nonroot/target/release/storage_broker         /usr/local/bin
 COPY --from=build --chown=neon:neon /home/nonroot/target/release/proxy               /usr/local/bin
diff --git a/Dockerfile.compute-node b/Dockerfile.compute-node
index 3a3dee8a8a..44e13a6c73 100644
--- a/Dockerfile.compute-node
+++ b/Dockerfile.compute-node
@@ -517,6 +517,22 @@ RUN wget https://github.com/kelvich/pg_tiktoken/archive/801f84f08c6881c8aa30f405
     cargo pgx install --release && \
     echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_tiktoken.control
 
+#########################################################################################
+#
+# Layer "pg-pgx-ulid-build"
+# Compile "pgx_ulid" extension
+#
+#########################################################################################
+
+FROM rust-extensions-build AS pg-pgx-ulid-build
+
+RUN wget https://github.com/pksunkara/pgx_ulid/archive/refs/tags/v0.1.0.tar.gz -O pgx_ulid.tar.gz && \
+    echo "908b7358e6f846e87db508ae5349fb56a88ee6305519074b12f3d5b0ff09f791 pgx_ulid.tar.gz" | sha256sum --check && \
+    mkdir pgx_ulid-src && cd pgx_ulid-src && tar xvzf ../pgx_ulid.tar.gz --strip-components=1 -C . && \
+    sed -i 's/pgx        = "=0.7.3"/pgx = { version = "0.7.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
+    cargo pgx install --release && \
+    echo "trusted = true" >> /usr/local/pgsql/share/extension/ulid.control
+
 #########################################################################################
 #
 # Layer "neon-pg-ext-build"
@@ -547,6 +563,7 @@ COPY --from=timescaledb-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg-hint-plan-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=kq-imcx-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg-cron-pg-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pg-pgx-ulid-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY pgxn/ pgxn/
 
 RUN make -j $(getconf _NPROCESSORS_ONLN) \
@@ -556,6 +573,10 @@ RUN make -j $(getconf _NPROCESSORS_ONLN) \
     make -j $(getconf _NPROCESSORS_ONLN) \
         PG_CONFIG=/usr/local/pgsql/bin/pg_config \
         -C pgxn/neon_utils \
+        -s install && \
+    make -j $(getconf _NPROCESSORS_ONLN) \
+        PG_CONFIG=/usr/local/pgsql/bin/pg_config \
+        -C pgxn/hnsw \
         -s install
 
 #########################################################################################
@@ -632,6 +653,7 @@ RUN apt update &&  \
         libxml2 \
         libxslt1.1 \
         libzstd1 \
+        libcurl4-openssl-dev \
         procps && \
     rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
     localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8
diff --git a/Makefile b/Makefile
index 9d78c5d0fc..ae979b8b4c 100644
--- a/Makefile
+++ b/Makefile
@@ -138,6 +138,11 @@ neon-pg-ext-%: postgres-%
 	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
 		-C $(POSTGRES_INSTALL_DIR)/build/neon-utils-$* \
 		-f $(ROOT_PROJECT_DIR)/pgxn/neon_utils/Makefile install
+	+@echo "Compiling hnsw $*"
+	mkdir -p $(POSTGRES_INSTALL_DIR)/build/hnsw-$*
+	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
+		-C $(POSTGRES_INSTALL_DIR)/build/hnsw-$* \
+		-f $(ROOT_PROJECT_DIR)/pgxn/hnsw/Makefile install
 
 .PHONY: neon-pg-ext-clean-%
 neon-pg-ext-clean-%:
@@ -153,6 +158,9 @@ neon-pg-ext-clean-%:
 	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config \
 	-C $(POSTGRES_INSTALL_DIR)/build/neon-utils-$* \
 	-f $(ROOT_PROJECT_DIR)/pgxn/neon_utils/Makefile clean
+	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config \
+	-C $(POSTGRES_INSTALL_DIR)/build/hnsw-$* \
+	-f $(ROOT_PROJECT_DIR)/pgxn/hnsw/Makefile clean
 
 .PHONY: neon-pg-ext
 neon-pg-ext: \
diff --git a/README.md b/README.md
index 8e6f2cda81..efa714e5be 100644
--- a/README.md
+++ b/README.md
@@ -28,18 +28,19 @@ See developer documentation in [SUMMARY.md](/docs/SUMMARY.md) for more informati
 * On Ubuntu or Debian, this set of packages should be sufficient to build the code:
 ```bash
 apt install build-essential libtool libreadline-dev zlib1g-dev flex bison libseccomp-dev \
-libssl-dev clang pkg-config libpq-dev cmake postgresql-client protobuf-compiler
+libssl-dev clang pkg-config libpq-dev cmake postgresql-client protobuf-compiler \
+libcurl4-openssl-dev
 ```
 * On Fedora, these packages are needed:
 ```bash
 dnf install flex bison readline-devel zlib-devel openssl-devel \
   libseccomp-devel perl clang cmake postgresql postgresql-contrib protobuf-compiler \
-  protobuf-devel
+  protobuf-devel libcurl-devel
 ```
 * On Arch based systems, these packages are needed:
 ```bash
 pacman -S base-devel readline zlib libseccomp openssl clang \
-postgresql-libs cmake postgresql protobuf
+postgresql-libs cmake postgresql protobuf curl
 ```
 
 Building Neon requires 3.15+ version of `protoc` (protobuf-compiler). If your distribution provides an older version, you can install a newer version from [here](https://github.com/protocolbuffers/protobuf/releases).
diff --git a/compute_tools/src/bin/compute_ctl.rs b/compute_tools/src/bin/compute_ctl.rs
index 2f515c9bf1..c6cfde1d1a 100644
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -59,6 +59,9 @@ fn main() -> Result<()> {
 
     let matches = cli().get_matches();
 
+    let http_port = *matches
+        .get_one::<u16>("http-port")
+        .expect("http-port is required");
     let pgdata = matches
         .get_one::<String>("pgdata")
         .expect("PGDATA path is required");
@@ -178,7 +181,8 @@ fn main() -> Result<()> {
 
     // Launch http service first, so we were able to serve control-plane
     // requests, while configuration is still in progress.
-    let _http_handle = launch_http_server(&compute).expect("cannot launch http endpoint thread");
+    let _http_handle =
+        launch_http_server(http_port, &compute).expect("cannot launch http endpoint thread");
 
     if !spec_set {
         // No spec provided, hang waiting for it.
@@ -286,6 +290,14 @@ fn cli() -> clap::Command {
     let version = option_env!("CARGO_PKG_VERSION").unwrap_or("unknown");
     clap::Command::new("compute_ctl")
         .version(version)
+        .arg(
+            Arg::new("http-port")
+                .long("http-port")
+                .value_name("HTTP_PORT")
+                .default_value("3080")
+                .value_parser(clap::value_parser!(u16))
+                .required(false),
+        )
         .arg(
             Arg::new("connstr")
                 .short('C')
diff --git a/compute_tools/src/compute.rs b/compute_tools/src/compute.rs
index da5ad00da6..617b330704 100644
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -1,19 +1,3 @@
-//
-// XXX: This starts to be scarry similar to the `PostgresNode` from `control_plane`,
-// but there are several things that makes `PostgresNode` usage inconvenient in the
-// cloud:
-// - it inherits from `LocalEnv`, which contains **all-all** the information about
-//   a complete service running
-// - it uses `PageServerNode` with information about http endpoint, which we do not
-//   need in the cloud again
-// - many tiny pieces like, for example, we do not use `pg_ctl` in the cloud
-//
-// Thus, to use `PostgresNode` in the cloud, we need to 'mock' a bunch of required
-// attributes (not required for the cloud). Yet, it is still tempting to unify these
-// `PostgresNode` and `ComputeNode` and use one in both places.
-//
-// TODO: stabilize `ComputeNode` and think about using it in the `control_plane`.
-//
 use std::fs;
 use std::os::unix::fs::PermissionsExt;
 use std::path::Path;
@@ -106,26 +90,38 @@ pub struct ParsedSpec {
 impl TryFrom<ComputeSpec> for ParsedSpec {
     type Error = String;
     fn try_from(spec: ComputeSpec) -> Result<Self, String> {
+        // Extract the options from the spec file that are needed to connect to
+        // the storage system.
+        //
+        // For backwards-compatibility, the top-level fields in the spec file
+        // may be empty. In that case, we need to dig them from the GUCs in the
+        // cluster.settings field.
         let pageserver_connstr = spec
-            .cluster
-            .settings
-            .find("neon.pageserver_connstring")
+            .pageserver_connstring
+            .clone()
+            .or_else(|| spec.cluster.settings.find("neon.pageserver_connstring"))
             .ok_or("pageserver connstr should be provided")?;
         let storage_auth_token = spec.storage_auth_token.clone();
-        let tenant_id: TenantId = spec
-            .cluster
-            .settings
-            .find("neon.tenant_id")
-            .ok_or("tenant id should be provided")
-            .map(|s| TenantId::from_str(&s))?
-            .or(Err("invalid tenant id"))?;
-        let timeline_id: TimelineId = spec
-            .cluster
-            .settings
-            .find("neon.timeline_id")
-            .ok_or("timeline id should be provided")
-            .map(|s| TimelineId::from_str(&s))?
-            .or(Err("invalid timeline id"))?;
+        let tenant_id: TenantId = if let Some(tenant_id) = spec.tenant_id {
+            tenant_id
+        } else {
+            spec.cluster
+                .settings
+                .find("neon.tenant_id")
+                .ok_or("tenant id should be provided")
+                .map(|s| TenantId::from_str(&s))?
+                .or(Err("invalid tenant id"))?
+        };
+        let timeline_id: TimelineId = if let Some(timeline_id) = spec.timeline_id {
+            timeline_id
+        } else {
+            spec.cluster
+                .settings
+                .find("neon.timeline_id")
+                .ok_or("timeline id should be provided")
+                .map(|s| TimelineId::from_str(&s))?
+                .or(Err("invalid timeline id"))?
+        };
 
         Ok(ParsedSpec {
             spec,
@@ -295,8 +291,8 @@ impl ComputeNode {
         update_pg_hba(pgdata_path)?;
 
         match spec.mode {
-            ComputeMode::Primary | ComputeMode::Static(..) => {}
-            ComputeMode::Replica => {
+            ComputeMode::Primary => {}
+            ComputeMode::Replica | ComputeMode::Static(..) => {
                 add_standby_signal(pgdata_path)?;
             }
         }
@@ -362,6 +358,8 @@ impl ComputeNode {
         };
 
         // Proceed with post-startup configuration. Note, that order of operations is important.
+        // Disable DDL forwarding because control plane already knows about these roles/databases.
+        client.simple_query("SET neon.forward_ddl = false")?;
         let spec = &compute_state.pspec.as_ref().expect("spec must be set").spec;
         handle_roles(spec, &mut client)?;
         handle_databases(spec, &mut client)?;
@@ -374,7 +372,7 @@ impl ComputeNode {
 
         info!(
             "finished configuration of compute for project {}",
-            spec.cluster.cluster_id
+            spec.cluster.cluster_id.as_deref().unwrap_or("None")
         );
 
         Ok(())
@@ -403,7 +401,9 @@ impl ComputeNode {
         self.pg_reload_conf(&mut client)?;
 
         // Proceed with post-startup configuration. Note, that order of operations is important.
+        // Disable DDL forwarding because control plane already knows about these roles/databases.
         if spec.mode == ComputeMode::Primary {
+            client.simple_query("SET neon.forward_ddl = false")?;
             handle_roles(&spec, &mut client)?;
             handle_databases(&spec, &mut client)?;
             handle_role_deletions(&spec, self.connstr.as_str(), &mut client)?;
@@ -430,7 +430,7 @@ impl ComputeNode {
         let spec = compute_state.pspec.as_ref().expect("spec must be set");
         info!(
             "starting compute for project {}, operation {}, tenant {}, timeline {}",
-            spec.spec.cluster.cluster_id,
+            spec.spec.cluster.cluster_id.as_deref().unwrap_or("None"),
             spec.spec.operation_uuid.as_deref().unwrap_or("None"),
             spec.tenant_id,
             spec.timeline_id,
diff --git a/compute_tools/src/config.rs b/compute_tools/src/config.rs
index 1168f3876a..99346433d0 100644
--- a/compute_tools/src/config.rs
+++ b/compute_tools/src/config.rs
@@ -5,6 +5,7 @@ use std::path::Path;
 
 use anyhow::Result;
 
+use crate::pg_helpers::escape_conf_value;
 use crate::pg_helpers::PgOptionsSerialize;
 use compute_api::spec::{ComputeMode, ComputeSpec};
 
@@ -36,10 +37,44 @@ pub fn write_postgres_conf(path: &Path, spec: &ComputeSpec) -> Result<()> {
     // File::create() destroys the file content if it exists.
     let mut file = File::create(path)?;
 
-    writeln!(file, "# Managed by compute_ctl: begin")?;
+    // Write the postgresql.conf content from the spec file as is.
+    if let Some(conf) = &spec.cluster.postgresql_conf {
+        writeln!(file, "{}", conf)?;
+    }
 
     write!(file, "{}", &spec.cluster.settings.as_pg_settings())?;
 
+    // Add options for connecting to storage
+    writeln!(file, "# Neon storage settings")?;
+    if let Some(s) = &spec.pageserver_connstring {
+        writeln!(
+            file,
+            "neon.pageserver_connstring='{}'",
+            escape_conf_value(s)
+        )?;
+    }
+    if !spec.safekeeper_connstrings.is_empty() {
+        writeln!(
+            file,
+            "neon.safekeepers='{}'",
+            escape_conf_value(&spec.safekeeper_connstrings.join(","))
+        )?;
+    }
+    if let Some(s) = &spec.tenant_id {
+        writeln!(
+            file,
+            "neon.tenant_id='{}'",
+            escape_conf_value(&s.to_string())
+        )?;
+    }
+    if let Some(s) = &spec.timeline_id {
+        writeln!(
+            file,
+            "neon.timeline_id='{}'",
+            escape_conf_value(&s.to_string())
+        )?;
+    }
+
     match spec.mode {
         ComputeMode::Primary => {}
         ComputeMode::Static(lsn) => {
@@ -53,7 +88,12 @@ pub fn write_postgres_conf(path: &Path, spec: &ComputeSpec) -> Result<()> {
         }
     }
 
-    writeln!(file, "# Managed by compute_ctl: end")?;
+    // If there are any extra options in the 'settings' field, append those
+    if spec.cluster.settings.is_some() {
+        writeln!(file, "# Managed by compute_ctl: begin")?;
+        write!(file, "{}", spec.cluster.settings.as_pg_settings())?;
+        writeln!(file, "# Managed by compute_ctl: end")?;
+    }
 
     Ok(())
 }
diff --git a/compute_tools/src/http/api.rs b/compute_tools/src/http/api.rs
index 4468f6f5e4..afd9c2fb54 100644
--- a/compute_tools/src/http/api.rs
+++ b/compute_tools/src/http/api.rs
@@ -220,8 +220,8 @@ fn render_json_error(e: &str, status: StatusCode) -> Response<Body> {
 
 // Main Hyper HTTP server function that runs it and blocks waiting on it forever.
 #[tokio::main]
-async fn serve(state: Arc<ComputeNode>) {
-    let addr = SocketAddr::from(([0, 0, 0, 0], 3080));
+async fn serve(port: u16, state: Arc<ComputeNode>) {
+    let addr = SocketAddr::from(([0, 0, 0, 0], port));
 
     let make_service = make_service_fn(move |_conn| {
         let state = state.clone();
@@ -256,10 +256,10 @@ async fn serve(state: Arc<ComputeNode>) {
 }
 
 /// Launch a separate Hyper HTTP API server thread and return its `JoinHandle`.
-pub fn launch_http_server(state: &Arc<ComputeNode>) -> Result<thread::JoinHandle<()>> {
+pub fn launch_http_server(port: u16, state: &Arc<ComputeNode>) -> Result<thread::JoinHandle<()>> {
     let state = Arc::clone(state);
 
     Ok(thread::Builder::new()
         .name("http-endpoint".into())
-        .spawn(move || serve(state))?)
+        .spawn(move || serve(port, state))?)
 }
diff --git a/compute_tools/src/logger.rs b/compute_tools/src/logger.rs
index 1b5cf647b0..f6fc882968 100644
--- a/compute_tools/src/logger.rs
+++ b/compute_tools/src/logger.rs
@@ -33,5 +33,7 @@ pub fn init_tracing_and_logging(default_log_level: &str) -> anyhow::Result<()> {
         .init();
     tracing::info!("logging and tracing started");
 
+    utils::logging::replace_panic_hook_with_tracing_panic_hook().forget();
+
     Ok(())
 }
diff --git a/compute_tools/src/pg_helpers.rs b/compute_tools/src/pg_helpers.rs
index 40dbea6907..d5c845e9ea 100644
--- a/compute_tools/src/pg_helpers.rs
+++ b/compute_tools/src/pg_helpers.rs
@@ -23,7 +23,7 @@ fn escape_literal(s: &str) -> String {
 
 /// Escape a string so that it can be used in postgresql.conf.
 /// Same as escape_literal, currently.
-fn escape_conf_value(s: &str) -> String {
+pub fn escape_conf_value(s: &str) -> String {
     s.replace('\'', "''").replace('\\', "\\\\")
 }
 
@@ -121,9 +121,8 @@ impl RoleExt for Role {
     /// string of arguments.
     fn to_pg_options(&self) -> String {
         // XXX: consider putting LOGIN as a default option somewhere higher, e.g. in control-plane.
-        // For now, we do not use generic `options` for roles. Once used, add
-        // `self.options.as_pg_options()` somewhere here.
-        let mut params: String = "LOGIN".to_string();
+        let mut params: String = self.options.as_pg_options();
+        params.push_str(" LOGIN");
 
         if let Some(pass) = &self.encrypted_password {
             // Some time ago we supported only md5 and treated all encrypted_password as md5.
diff --git a/compute_tools/src/spec.rs b/compute_tools/src/spec.rs
index bf3c407202..a2a19ae0da 100644
--- a/compute_tools/src/spec.rs
+++ b/compute_tools/src/spec.rs
@@ -62,7 +62,7 @@ fn do_control_plane_request(
     }
 }
 
-/// Request spec from the control-plane by compute_id. If `NEON_CONSOLE_JWT`
+/// Request spec from the control-plane by compute_id. If `NEON_CONTROL_PLANE_TOKEN`
 /// env variable is set, it will be used for authorization.
 pub fn get_spec_from_control_plane(
     base_uri: &str,
diff --git a/compute_tools/tests/pg_helpers_tests.rs b/compute_tools/tests/pg_helpers_tests.rs
index a63ee038c7..265556d3b9 100644
--- a/compute_tools/tests/pg_helpers_tests.rs
+++ b/compute_tools/tests/pg_helpers_tests.rs
@@ -16,7 +16,7 @@ mod pg_helpers_tests {
         );
         assert_eq!(
             spec.cluster.roles.first().unwrap().to_pg_options(),
-            "LOGIN PASSWORD 'md56b1d16b78004bbd51fa06af9eda75972'"
+            " LOGIN PASSWORD 'md56b1d16b78004bbd51fa06af9eda75972'"
         );
     }
 
diff --git a/control_plane/src/bin/neon_local.rs b/control_plane/src/bin/neon_local.rs
index 30880565ab..52af936d7b 100644
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -41,7 +41,7 @@ const DEFAULT_PAGESERVER_ID: NodeId = NodeId(1);
 const DEFAULT_BRANCH_NAME: &str = "main";
 project_git_version!(GIT_VERSION);
 
-const DEFAULT_PG_VERSION: &str = "14";
+const DEFAULT_PG_VERSION: &str = "15";
 
 fn default_conf() -> String {
     format!(
@@ -476,10 +476,11 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
 
             println!("Creating endpoint for imported timeline ...");
             cplane.new_endpoint(
-                tenant_id,
                 name,
+                tenant_id,
                 timeline_id,
                 None,
+                None,
                 pg_version,
                 ComputeMode::Primary,
             )?;
@@ -591,7 +592,7 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<(
 
                 table.add_row([
                     endpoint_id.as_str(),
-                    &endpoint.address.to_string(),
+                    &endpoint.pg_address.to_string(),
                     &endpoint.timeline_id.to_string(),
                     branch_name,
                     lsn_str.as_str(),
@@ -620,8 +621,8 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<(
                 .get_branch_timeline_id(branch_name, tenant_id)
                 .ok_or_else(|| anyhow!("Found no timeline id for branch name '{branch_name}'"))?;
 
-            let port: Option<u16> = sub_args.get_one::<u16>("port").copied();
-
+            let pg_port: Option<u16> = sub_args.get_one::<u16>("pg-port").copied();
+            let http_port: Option<u16> = sub_args.get_one::<u16>("http-port").copied();
             let pg_version = sub_args
                 .get_one::<u32>("pg-version")
                 .copied()
@@ -639,14 +640,38 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<(
                 (Some(_), true) => anyhow::bail!("cannot specify both lsn and hot-standby"),
             };
 
-            cplane.new_endpoint(tenant_id, &endpoint_id, timeline_id, port, pg_version, mode)?;
+            cplane.new_endpoint(
+                &endpoint_id,
+                tenant_id,
+                timeline_id,
+                pg_port,
+                http_port,
+                pg_version,
+                mode,
+            )?;
         }
         "start" => {
-            let port: Option<u16> = sub_args.get_one::<u16>("port").copied();
+            let pg_port: Option<u16> = sub_args.get_one::<u16>("pg-port").copied();
+            let http_port: Option<u16> = sub_args.get_one::<u16>("http-port").copied();
             let endpoint_id = sub_args
                 .get_one::<String>("endpoint_id")
                 .ok_or_else(|| anyhow!("No endpoint ID was provided to start"))?;
 
+            // If --safekeepers argument is given, use only the listed safekeeper nodes.
+            let safekeepers =
+                if let Some(safekeepers_str) = sub_args.get_one::<String>("safekeepers") {
+                    let mut safekeepers: Vec<NodeId> = Vec::new();
+                    for sk_id in safekeepers_str.split(',').map(str::trim) {
+                        let sk_id = NodeId(u64::from_str(sk_id).map_err(|_| {
+                            anyhow!("invalid node ID \"{sk_id}\" in --safekeepers list")
+                        })?);
+                        safekeepers.push(sk_id);
+                    }
+                    safekeepers
+                } else {
+                    env.safekeepers.iter().map(|sk| sk.id).collect()
+                };
+
             let endpoint = cplane.endpoints.get(endpoint_id.as_str());
 
             let auth_token = if matches!(env.pageserver.pg_auth_type, AuthType::NeonJWT) {
@@ -673,7 +698,7 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<(
                     _ => {}
                 }
                 println!("Starting existing endpoint {endpoint_id}...");
-                endpoint.start(&auth_token)?;
+                endpoint.start(&auth_token, safekeepers)?;
             } else {
                 let branch_name = sub_args
                     .get_one::<String>("branch-name")
@@ -709,14 +734,15 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<(
                 println!("Starting new endpoint {endpoint_id} (PostgreSQL v{pg_version}) on timeline {timeline_id} ...");
 
                 let ep = cplane.new_endpoint(
-                    tenant_id,
                     endpoint_id,
+                    tenant_id,
                     timeline_id,
-                    port,
+                    pg_port,
+                    http_port,
                     pg_version,
                     mode,
                 )?;
-                ep.start(&auth_token)?;
+                ep.start(&auth_token, safekeepers)?;
             }
         }
         "stop" => {
@@ -944,11 +970,22 @@ fn cli() -> Command {
         .value_parser(value_parser!(u32))
         .default_value(DEFAULT_PG_VERSION);
 
-    let port_arg = Arg::new("port")
-        .long("port")
+    let pg_port_arg = Arg::new("pg-port")
+        .long("pg-port")
         .required(false)
         .value_parser(value_parser!(u16))
-        .value_name("port");
+        .value_name("pg-port");
+
+    let http_port_arg = Arg::new("http-port")
+        .long("http-port")
+        .required(false)
+        .value_parser(value_parser!(u16))
+        .value_name("http-port");
+
+    let safekeepers_arg = Arg::new("safekeepers")
+        .long("safekeepers")
+        .required(false)
+        .value_name("safekeepers");
 
     let stop_mode_arg = Arg::new("stop-mode")
         .short('m')
@@ -1093,7 +1130,8 @@ fn cli() -> Command {
                     .arg(branch_name_arg.clone())
                     .arg(tenant_id_arg.clone())
                     .arg(lsn_arg.clone())
-                    .arg(port_arg.clone())
+                    .arg(pg_port_arg.clone())
+                    .arg(http_port_arg.clone())
                     .arg(
                         Arg::new("config-only")
                             .help("Don't do basebackup, create endpoint directory with only config files")
@@ -1109,9 +1147,11 @@ fn cli() -> Command {
                     .arg(branch_name_arg)
                     .arg(timeline_id_arg)
                     .arg(lsn_arg)
-                    .arg(port_arg)
+                    .arg(pg_port_arg)
+                    .arg(http_port_arg)
                     .arg(pg_version_arg)
                     .arg(hot_standby_arg)
+                    .arg(safekeepers_arg)
                 )
                 .subcommand(
                     Command::new("stop")
diff --git a/control_plane/src/broker.rs b/control_plane/src/broker.rs
index 6c0604a076..ad19dfa204 100644
--- a/control_plane/src/broker.rs
+++ b/control_plane/src/broker.rs
@@ -1,3 +1,9 @@
+//! Code to manage the storage broker
+//!
+//! In the local test environment, the data for each safekeeper is stored in
+//!
+//!   .neon/safekeepers/<safekeeper id>
+//!
 use anyhow::Context;
 
 use std::path::PathBuf;
diff --git a/control_plane/src/endpoint.rs b/control_plane/src/endpoint.rs
index cc5a7a4168..b28315a35d 100644
--- a/control_plane/src/endpoint.rs
+++ b/control_plane/src/endpoint.rs
@@ -1,40 +1,71 @@
+//! Code to manage compute endpoints
+//!
+//! In the local test environment, the data for each endpoint is stored in
+//!
+//!   .neon/endpoints/<endpoint id>
+//!
+//! Some basic information about the endpoint, like the tenant and timeline IDs,
+//! are stored in the `endpoint.json` file. The `endpoint.json` file is created
+//! when the endpoint is created, and doesn't change afterwards.
+//!
+//! The endpoint is managed by the `compute_ctl` binary. When an endpoint is
+//! started, we launch `compute_ctl` It synchronizes the safekeepers, downloads
+//! the basebackup from the pageserver to initialize the the data directory, and
+//! finally launches the PostgreSQL process. It watches the PostgreSQL process
+//! until it exits.
+//!
+//! When an endpoint is created, a `postgresql.conf` file is also created in
+//! the endpoint's directory. The file can be modified before starting PostgreSQL.
+//! However, the `postgresql.conf` file in the endpoint directory is not used directly
+//! by PostgreSQL. It is passed to `compute_ctl`, and `compute_ctl` writes another
+//! copy of it in the data directory.
+//!
+//! Directory contents:
+//!
+//! ```ignore
+//! .neon/endpoints/main/
+//!     compute.log               - log output of `compute_ctl` and `postgres`
+//!     endpoint.json             - serialized `EndpointConf` struct
+//!     postgresql.conf           - postgresql settings
+//!     spec.json                 - passed to `compute_ctl`
+//!     pgdata/
+//!         postgresql.conf       - copy of postgresql.conf created by `compute_ctl`
+//!         zenith.signal
+//!         <other PostgreSQL files>
+//! ```
+//!
 use std::collections::BTreeMap;
-use std::fs::{self, File};
-use std::io::Write;
 use std::net::SocketAddr;
 use std::net::TcpStream;
-use std::os::unix::fs::PermissionsExt;
 use std::path::PathBuf;
-use std::process::{Command, Stdio};
-use std::str::FromStr;
+use std::process::Command;
 use std::sync::Arc;
 use std::time::Duration;
 
-use anyhow::{Context, Result};
+use anyhow::{anyhow, bail, Context, Result};
 use serde::{Deserialize, Serialize};
 use serde_with::{serde_as, DisplayFromStr};
-use utils::{
-    id::{TenantId, TimelineId},
-    lsn::Lsn,
-};
+use utils::id::{NodeId, TenantId, TimelineId};
 
 use crate::local_env::LocalEnv;
 use crate::pageserver::PageServerNode;
 use crate::postgresql_conf::PostgresConf;
 
-use compute_api::spec::ComputeMode;
+use compute_api::responses::{ComputeState, ComputeStatus};
+use compute_api::spec::{Cluster, ComputeMode, ComputeSpec};
 
 // contents of a endpoint.json file
 #[serde_as]
 #[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
 pub struct EndpointConf {
-    name: String,
+    endpoint_id: String,
     #[serde_as(as = "DisplayFromStr")]
     tenant_id: TenantId,
     #[serde_as(as = "DisplayFromStr")]
     timeline_id: TimelineId,
     mode: ComputeMode,
-    port: u16,
+    pg_port: u16,
+    http_port: u16,
     pg_version: u32,
 }
 
@@ -57,11 +88,11 @@ impl ComputeControlPlane {
         let pageserver = Arc::new(PageServerNode::from_env(&env));
 
         let mut endpoints = BTreeMap::default();
-        for endpoint_dir in fs::read_dir(env.endpoints_path())
+        for endpoint_dir in std::fs::read_dir(env.endpoints_path())
             .with_context(|| format!("failed to list {}", env.endpoints_path().display()))?
         {
             let ep = Endpoint::from_dir_entry(endpoint_dir?, &env, &pageserver)?;
-            endpoints.insert(ep.name.clone(), Arc::new(ep));
+            endpoints.insert(ep.endpoint_id.clone(), Arc::new(ep));
         }
 
         Ok(ComputeControlPlane {
@@ -76,25 +107,28 @@ impl ComputeControlPlane {
         1 + self
             .endpoints
             .values()
-            .map(|ep| ep.address.port())
+            .map(|ep| std::cmp::max(ep.pg_address.port(), ep.http_address.port()))
             .max()
             .unwrap_or(self.base_port)
     }
 
+    #[allow(clippy::too_many_arguments)]
     pub fn new_endpoint(
         &mut self,
+        endpoint_id: &str,
         tenant_id: TenantId,
-        name: &str,
         timeline_id: TimelineId,
-        port: Option<u16>,
+        pg_port: Option<u16>,
+        http_port: Option<u16>,
         pg_version: u32,
         mode: ComputeMode,
     ) -> Result<Arc<Endpoint>> {
-        let port = port.unwrap_or_else(|| self.get_port());
-
+        let pg_port = pg_port.unwrap_or_else(|| self.get_port());
+        let http_port = http_port.unwrap_or_else(|| self.get_port() + 1);
         let ep = Arc::new(Endpoint {
-            name: name.to_owned(),
-            address: SocketAddr::new("127.0.0.1".parse().unwrap(), port),
+            endpoint_id: endpoint_id.to_owned(),
+            pg_address: SocketAddr::new("127.0.0.1".parse().unwrap(), pg_port),
+            http_address: SocketAddr::new("127.0.0.1".parse().unwrap(), http_port),
             env: self.env.clone(),
             pageserver: Arc::clone(&self.pageserver),
             timeline_id,
@@ -102,21 +136,27 @@ impl ComputeControlPlane {
             tenant_id,
             pg_version,
         });
-        ep.create_pgdata()?;
+
+        ep.create_endpoint_dir()?;
         std::fs::write(
             ep.endpoint_path().join("endpoint.json"),
             serde_json::to_string_pretty(&EndpointConf {
-                name: name.to_string(),
+                endpoint_id: endpoint_id.to_string(),
                 tenant_id,
                 timeline_id,
                 mode,
-                port,
+                http_port,
+                pg_port,
                 pg_version,
             })?,
         )?;
-        ep.setup_pg_conf()?;
+        std::fs::write(
+            ep.endpoint_path().join("postgresql.conf"),
+            ep.setup_pg_conf()?.to_string(),
+        )?;
 
-        self.endpoints.insert(ep.name.clone(), Arc::clone(&ep));
+        self.endpoints
+            .insert(ep.endpoint_id.clone(), Arc::clone(&ep));
 
         Ok(ep)
     }
@@ -127,13 +167,15 @@ impl ComputeControlPlane {
 #[derive(Debug)]
 pub struct Endpoint {
     /// used as the directory name
-    name: String,
+    endpoint_id: String,
     pub tenant_id: TenantId,
     pub timeline_id: TimelineId,
     pub mode: ComputeMode,
 
-    // port and address of the Postgres server
-    pub address: SocketAddr,
+    // port and address of the Postgres server and `compute_ctl`'s HTTP API
+    pub pg_address: SocketAddr,
+    pub http_address: SocketAddr,
+
     // postgres major version in the format: 14, 15, etc.
     pg_version: u32,
 
@@ -158,16 +200,16 @@ impl Endpoint {
 
         // parse data directory name
         let fname = entry.file_name();
-        let name = fname.to_str().unwrap().to_string();
+        let endpoint_id = fname.to_str().unwrap().to_string();
 
         // Read the endpoint.json file
         let conf: EndpointConf =
             serde_json::from_slice(&std::fs::read(entry.path().join("endpoint.json"))?)?;
 
-        // ok now
         Ok(Endpoint {
-            address: SocketAddr::new("127.0.0.1".parse().unwrap(), conf.port),
-            name,
+            pg_address: SocketAddr::new("127.0.0.1".parse().unwrap(), conf.pg_port),
+            http_address: SocketAddr::new("127.0.0.1".parse().unwrap(), conf.http_port),
+            endpoint_id,
             env: env.clone(),
             pageserver: Arc::clone(pageserver),
             timeline_id: conf.timeline_id,
@@ -177,104 +219,17 @@ impl Endpoint {
         })
     }
 
-    fn sync_safekeepers(&self, auth_token: &Option<String>, pg_version: u32) -> Result<Lsn> {
-        let pg_path = self.env.pg_bin_dir(pg_version)?.join("postgres");
-        let mut cmd = Command::new(pg_path);
-
-        cmd.arg("--sync-safekeepers")
-            .env_clear()
-            .env(
-                "LD_LIBRARY_PATH",
-                self.env.pg_lib_dir(pg_version)?.to_str().unwrap(),
-            )
-            .env(
-                "DYLD_LIBRARY_PATH",
-                self.env.pg_lib_dir(pg_version)?.to_str().unwrap(),
-            )
-            .env("PGDATA", self.pgdata().to_str().unwrap())
-            .stdout(Stdio::piped())
-            // Comment this to avoid capturing stderr (useful if command hangs)
-            .stderr(Stdio::piped());
-
-        if let Some(token) = auth_token {
-            cmd.env("NEON_AUTH_TOKEN", token);
-        }
-
-        let sync_handle = cmd
-            .spawn()
-            .expect("postgres --sync-safekeepers failed to start");
-
-        let sync_output = sync_handle
-            .wait_with_output()
-            .expect("postgres --sync-safekeepers failed");
-        if !sync_output.status.success() {
-            anyhow::bail!(
-                "sync-safekeepers failed: '{}'",
-                String::from_utf8_lossy(&sync_output.stderr)
-            );
-        }
-
-        let lsn = Lsn::from_str(std::str::from_utf8(&sync_output.stdout)?.trim())?;
-        println!("Safekeepers synced on {}", lsn);
-        Ok(lsn)
-    }
-
-    /// Get basebackup from the pageserver as a tar archive and extract it
-    /// to the `self.pgdata()` directory.
-    fn do_basebackup(&self, lsn: Option<Lsn>) -> Result<()> {
-        println!(
-            "Extracting base backup to create postgres instance: path={} port={}",
-            self.pgdata().display(),
-            self.address.port()
-        );
-
-        let sql = if let Some(lsn) = lsn {
-            format!("basebackup {} {} {}", self.tenant_id, self.timeline_id, lsn)
-        } else {
-            format!("basebackup {} {}", self.tenant_id, self.timeline_id)
-        };
-
-        let mut client = self
-            .pageserver
-            .page_server_psql_client()
-            .context("connecting to page server failed")?;
-
-        let copyreader = client
-            .copy_out(sql.as_str())
-            .context("page server 'basebackup' command failed")?;
-
-        // Read the archive directly from the `CopyOutReader`
-        //
-        // Set `ignore_zeros` so that unpack() reads all the Copy data and
-        // doesn't stop at the end-of-archive marker. Otherwise, if the server
-        // sends an Error after finishing the tarball, we will not notice it.
-        let mut ar = tar::Archive::new(copyreader);
-        ar.set_ignore_zeros(true);
-        ar.unpack(&self.pgdata())
-            .context("extracting base backup failed")?;
-
-        Ok(())
-    }
-
-    fn create_pgdata(&self) -> Result<()> {
-        fs::create_dir_all(self.pgdata()).with_context(|| {
+    fn create_endpoint_dir(&self) -> Result<()> {
+        std::fs::create_dir_all(self.endpoint_path()).with_context(|| {
             format!(
-                "could not create data directory {}",
-                self.pgdata().display()
+                "could not create endpoint directory {}",
+                self.endpoint_path().display()
             )
-        })?;
-        fs::set_permissions(self.pgdata().as_path(), fs::Permissions::from_mode(0o700))
-            .with_context(|| {
-                format!(
-                    "could not set permissions in data directory {}",
-                    self.pgdata().display()
-                )
-            })
+        })
     }
 
-    // Write postgresql.conf with default configuration
-    // and PG_VERSION file to the data directory of a new endpoint.
-    fn setup_pg_conf(&self) -> Result<()> {
+    // Generate postgresql.conf with default configuration
+    fn setup_pg_conf(&self) -> Result<PostgresConf> {
         let mut conf = PostgresConf::new();
         conf.append("max_wal_senders", "10");
         conf.append("wal_log_hints", "off");
@@ -287,25 +242,14 @@ impl Endpoint {
         // wal_sender_timeout is the maximum time to wait for WAL replication.
         // It also defines how often the walreciever will send a feedback message to the wal sender.
         conf.append("wal_sender_timeout", "5s");
-        conf.append("listen_addresses", &self.address.ip().to_string());
-        conf.append("port", &self.address.port().to_string());
+        conf.append("listen_addresses", &self.pg_address.ip().to_string());
+        conf.append("port", &self.pg_address.port().to_string());
         conf.append("wal_keep_size", "0");
         // walproposer panics when basebackup is invalid, it is pointless to restart in this case.
         conf.append("restart_after_crash", "off");
 
-        // Configure the Neon Postgres extension to fetch pages from pageserver
-        let pageserver_connstr = {
-            let config = &self.pageserver.pg_connection_config;
-            let (host, port) = (config.host(), config.port());
-
-            // NOTE: avoid spaces in connection string, because it is less error prone if we forward it somewhere.
-            format!("postgresql://no_user@{host}:{port}")
-        };
+        // Load the 'neon' extension
         conf.append("shared_preload_libraries", "neon");
-        conf.append_line("");
-        conf.append("neon.pageserver_connstring", &pageserver_connstr);
-        conf.append("neon.tenant_id", &self.tenant_id.to_string());
-        conf.append("neon.timeline_id", &self.timeline_id.to_string());
 
         conf.append_line("");
         // Replication-related configurations, such as WAL sending
@@ -390,46 +334,11 @@ impl Endpoint {
             }
         }
 
-        let mut file = File::create(self.pgdata().join("postgresql.conf"))?;
-        file.write_all(conf.to_string().as_bytes())?;
-
-        let mut file = File::create(self.pgdata().join("PG_VERSION"))?;
-        file.write_all(self.pg_version.to_string().as_bytes())?;
-
-        Ok(())
-    }
-
-    fn load_basebackup(&self, auth_token: &Option<String>) -> Result<()> {
-        let backup_lsn = match &self.mode {
-            ComputeMode::Primary => {
-                if !self.env.safekeepers.is_empty() {
-                    // LSN 0 means that it is bootstrap and we need to download just
-                    // latest data from the pageserver. That is a bit clumsy but whole bootstrap
-                    // procedure evolves quite actively right now, so let's think about it again
-                    // when things would be more stable (TODO).
-                    let lsn = self.sync_safekeepers(auth_token, self.pg_version)?;
-                    if lsn == Lsn(0) {
-                        None
-                    } else {
-                        Some(lsn)
-                    }
-                } else {
-                    None
-                }
-            }
-            ComputeMode::Static(lsn) => Some(*lsn),
-            ComputeMode::Replica => {
-                None // Take the latest snapshot available to start with
-            }
-        };
-
-        self.do_basebackup(backup_lsn)?;
-
-        Ok(())
+        Ok(conf)
     }
 
     pub fn endpoint_path(&self) -> PathBuf {
-        self.env.endpoints_path().join(&self.name)
+        self.env.endpoints_path().join(&self.endpoint_id)
     }
 
     pub fn pgdata(&self) -> PathBuf {
@@ -439,7 +348,7 @@ impl Endpoint {
     pub fn status(&self) -> &str {
         let timeout = Duration::from_millis(300);
         let has_pidfile = self.pgdata().join("postmaster.pid").exists();
-        let can_connect = TcpStream::connect_timeout(&self.address, timeout).is_ok();
+        let can_connect = TcpStream::connect_timeout(&self.pg_address, timeout).is_ok();
 
         match (has_pidfile, can_connect) {
             (true, true) => "running",
@@ -457,8 +366,6 @@ impl Endpoint {
                 &[
                     "-D",
                     self.pgdata().to_str().unwrap(),
-                    "-l",
-                    self.pgdata().join("pg.log").to_str().unwrap(),
                     "-w", //wait till pg_ctl actually does what was asked
                 ],
                 args,
@@ -494,36 +401,183 @@ impl Endpoint {
         Ok(())
     }
 
-    pub fn start(&self, auth_token: &Option<String>) -> Result<()> {
+    pub fn start(&self, auth_token: &Option<String>, safekeepers: Vec<NodeId>) -> Result<()> {
         if self.status() == "running" {
             anyhow::bail!("The endpoint is already running");
         }
 
-        // 1. We always start Postgres from scratch, so
-        // if old dir exists, preserve 'postgresql.conf' and drop the directory
-        let postgresql_conf_path = self.pgdata().join("postgresql.conf");
-        let postgresql_conf = fs::read(&postgresql_conf_path).with_context(|| {
-            format!(
-                "failed to read config file in {}",
-                postgresql_conf_path.to_str().unwrap()
-            )
-        })?;
-        fs::remove_dir_all(self.pgdata())?;
-        self.create_pgdata()?;
+        // Slurp the endpoints/<endpoint id>/postgresql.conf file into
+        // memory. We will include it in the spec file that we pass to
+        // `compute_ctl`, and `compute_ctl` will write it to the postgresql.conf
+        // in the data directory.
+        let postgresql_conf_path = self.endpoint_path().join("postgresql.conf");
+        let postgresql_conf = match std::fs::read(&postgresql_conf_path) {
+            Ok(content) => String::from_utf8(content)?,
+            Err(e) if e.kind() == std::io::ErrorKind::NotFound => "".to_string(),
+            Err(e) => {
+                return Err(anyhow::Error::new(e).context(format!(
+                    "failed to read config file in {}",
+                    postgresql_conf_path.to_str().unwrap()
+                )))
+            }
+        };
 
-        // 2. Bring back config files
-        fs::write(&postgresql_conf_path, postgresql_conf)?;
-
-        // 3. Load basebackup
-        self.load_basebackup(auth_token)?;
-
-        if self.mode != ComputeMode::Primary {
-            File::create(self.pgdata().join("standby.signal"))?;
+        // We always start the compute node from scratch, so if the Postgres
+        // data dir exists from a previous launch, remove it first.
+        if self.pgdata().exists() {
+            std::fs::remove_dir_all(self.pgdata())?;
         }
 
-        // 4. Finally start postgres
-        println!("Starting postgres at '{}'", self.connstr());
-        self.pg_ctl(&["start"], auth_token)
+        let pageserver_connstring = {
+            let config = &self.pageserver.pg_connection_config;
+            let (host, port) = (config.host(), config.port());
+
+            // NOTE: avoid spaces in connection string, because it is less error prone if we forward it somewhere.
+            format!("postgresql://no_user@{host}:{port}")
+        };
+        let mut safekeeper_connstrings = Vec::new();
+        if self.mode == ComputeMode::Primary {
+            for sk_id in safekeepers {
+                let sk = self
+                    .env
+                    .safekeepers
+                    .iter()
+                    .find(|node| node.id == sk_id)
+                    .ok_or_else(|| anyhow!("safekeeper {sk_id} does not exist"))?;
+                safekeeper_connstrings.push(format!("127.0.0.1:{}", sk.pg_port));
+            }
+        }
+
+        // Create spec file
+        let spec = ComputeSpec {
+            format_version: 1.0,
+            operation_uuid: None,
+            cluster: Cluster {
+                cluster_id: None, // project ID: not used
+                name: None,       // project name: not used
+                state: None,
+                roles: vec![],
+                databases: vec![],
+                settings: None,
+                postgresql_conf: Some(postgresql_conf),
+            },
+            delta_operations: None,
+            tenant_id: Some(self.tenant_id),
+            timeline_id: Some(self.timeline_id),
+            mode: self.mode,
+            pageserver_connstring: Some(pageserver_connstring),
+            safekeeper_connstrings,
+            storage_auth_token: auth_token.clone(),
+        };
+        let spec_path = self.endpoint_path().join("spec.json");
+        std::fs::write(spec_path, serde_json::to_string_pretty(&spec)?)?;
+
+        // Open log file. We'll redirect the stdout and stderr of `compute_ctl` to it.
+        let logfile = std::fs::OpenOptions::new()
+            .create(true)
+            .append(true)
+            .open(self.endpoint_path().join("compute.log"))?;
+
+        // Launch compute_ctl
+        println!("Starting postgres node at '{}'", self.connstr());
+        let mut cmd = Command::new(self.env.neon_distrib_dir.join("compute_ctl"));
+        cmd.args(["--http-port", &self.http_address.port().to_string()])
+            .args(["--pgdata", self.pgdata().to_str().unwrap()])
+            .args(["--connstr", &self.connstr()])
+            .args([
+                "--spec-path",
+                self.endpoint_path().join("spec.json").to_str().unwrap(),
+            ])
+            .args([
+                "--pgbin",
+                self.env
+                    .pg_bin_dir(self.pg_version)?
+                    .join("postgres")
+                    .to_str()
+                    .unwrap(),
+            ])
+            .stdin(std::process::Stdio::null())
+            .stderr(logfile.try_clone()?)
+            .stdout(logfile);
+        let _child = cmd.spawn()?;
+
+        // Wait for it to start
+        let mut attempt = 0;
+        const ATTEMPT_INTERVAL: Duration = Duration::from_millis(100);
+        const MAX_ATTEMPTS: u32 = 10 * 30; // Wait up to 30 s
+        loop {
+            attempt += 1;
+            match self.get_status() {
+                Ok(state) => {
+                    match state.status {
+                        ComputeStatus::Init => {
+                            if attempt == MAX_ATTEMPTS {
+                                bail!("compute startup timed out; still in Init state");
+                            }
+                            // keep retrying
+                        }
+                        ComputeStatus::Running => {
+                            // All good!
+                            break;
+                        }
+                        ComputeStatus::Failed => {
+                            bail!(
+                                "compute startup failed: {}",
+                                state
+                                    .error
+                                    .as_deref()
+                                    .unwrap_or("<no error from compute_ctl>")
+                            );
+                        }
+                        ComputeStatus::Empty
+                        | ComputeStatus::ConfigurationPending
+                        | ComputeStatus::Configuration => {
+                            bail!("unexpected compute status: {:?}", state.status)
+                        }
+                    }
+                }
+                Err(e) => {
+                    if attempt == MAX_ATTEMPTS {
+                        return Err(e).context(
+                            "timed out waiting to connect to compute_ctl HTTP; last error: {e}",
+                        );
+                    }
+                }
+            }
+            std::thread::sleep(ATTEMPT_INTERVAL);
+        }
+
+        Ok(())
+    }
+
+    // Call the /status HTTP API
+    pub fn get_status(&self) -> Result<ComputeState> {
+        let client = reqwest::blocking::Client::new();
+
+        let response = client
+            .request(
+                reqwest::Method::GET,
+                format!(
+                    "http://{}:{}/status",
+                    self.http_address.ip(),
+                    self.http_address.port()
+                ),
+            )
+            .send()?;
+
+        // Interpret the response
+        let status = response.status();
+        if !(status.is_client_error() || status.is_server_error()) {
+            Ok(response.json()?)
+        } else {
+            // reqwest does not export its error construction utility functions, so let's craft the message ourselves
+            let url = response.url().to_owned();
+            let msg = match response.text() {
+                Ok(err_body) => format!("Error: {}", err_body),
+                Err(_) => format!("Http error ({}) at {}.", status.as_u16(), url),
+            };
+            Err(anyhow::anyhow!(msg))
+        }
     }
 
     pub fn stop(&self, destroy: bool) -> Result<()> {
@@ -540,7 +594,7 @@ impl Endpoint {
                 "Destroying postgres data directory '{}'",
                 self.pgdata().to_str().unwrap()
             );
-            fs::remove_dir_all(self.endpoint_path())?;
+            std::fs::remove_dir_all(self.endpoint_path())?;
         } else {
             self.pg_ctl(&["stop"], &None)?;
         }
@@ -549,10 +603,10 @@ impl Endpoint {
 
     pub fn connstr(&self) -> String {
         format!(
-            "host={} port={} user={} dbname={}",
-            self.address.ip(),
-            self.address.port(),
+            "postgresql://{}@{}:{}/{}",
             "cloud_admin",
+            self.pg_address.ip(),
+            self.pg_address.port(),
             "postgres"
         )
     }
diff --git a/control_plane/src/local_env.rs b/control_plane/src/local_env.rs
index 2b1eec7c4b..df70cb3139 100644
--- a/control_plane/src/local_env.rs
+++ b/control_plane/src/local_env.rs
@@ -24,7 +24,7 @@ use utils::{
 
 use crate::safekeeper::SafekeeperNode;
 
-pub const DEFAULT_PG_VERSION: u32 = 14;
+pub const DEFAULT_PG_VERSION: u32 = 15;
 
 //
 // This data structures represents neon_local CLI config
@@ -37,7 +37,7 @@ pub const DEFAULT_PG_VERSION: u32 = 14;
 #[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
 pub struct LocalEnv {
     // Base directory for all the nodes (the pageserver, safekeepers and
-    // compute nodes).
+    // compute endpoints).
     //
     // This is not stored in the config file. Rather, this is the path where the
     // config file itself is. It is read from the NEON_REPO_DIR env variable or
diff --git a/control_plane/src/pageserver.rs b/control_plane/src/pageserver.rs
index 6309494b71..2ff09021e5 100644
--- a/control_plane/src/pageserver.rs
+++ b/control_plane/src/pageserver.rs
@@ -1,3 +1,9 @@
+//! Code to manage pageservers
+//!
+//! In the local test environment, the pageserver stores its data directly in
+//!
+//!   .neon/
+//!
 use std::borrow::Cow;
 use std::collections::HashMap;
 use std::fs::File;
@@ -369,7 +375,16 @@ impl PageServerNode {
             evictions_low_residence_duration_metric_threshold: settings
                 .remove("evictions_low_residence_duration_metric_threshold")
                 .map(|x| x.to_string()),
+            gc_feedback: settings
+                .remove("gc_feedback")
+                .map(|x| x.parse::<bool>())
+                .transpose()
+                .context("Failed to parse 'gc_feedback' as bool")?,
         };
+
+        // If tenant ID was not specified, generate one
+        let new_tenant_id = new_tenant_id.unwrap_or(TenantId::generate());
+
         let request = models::TenantCreateRequest {
             new_tenant_id,
             config,
@@ -459,6 +474,11 @@ impl PageServerNode {
                 evictions_low_residence_duration_metric_threshold: settings
                     .remove("evictions_low_residence_duration_metric_threshold")
                     .map(|x| x.to_string()),
+                gc_feedback: settings
+                    .remove("gc_feedback")
+                    .map(|x| x.parse::<bool>())
+                    .transpose()
+                    .context("Failed to parse 'gc_feedback' as bool")?,
             }
         };
 
@@ -495,6 +515,9 @@ impl PageServerNode {
         ancestor_timeline_id: Option<TimelineId>,
         pg_version: Option<u32>,
     ) -> anyhow::Result<TimelineInfo> {
+        // If timeline ID was not specified, generate one
+        let new_timeline_id = new_timeline_id.unwrap_or(TimelineId::generate());
+
         self.http_request(
             Method::POST,
             format!("{}/tenant/{}/timeline", self.http_base_url, tenant_id),
diff --git a/control_plane/src/safekeeper.rs b/control_plane/src/safekeeper.rs
index d358f73343..9e053ff1f1 100644
--- a/control_plane/src/safekeeper.rs
+++ b/control_plane/src/safekeeper.rs
@@ -1,3 +1,9 @@
+//! Code to manage safekeepers
+//!
+//! In the local test environment, the data for each safekeeper is stored in
+//!
+//!   .neon/safekeepers/<safekeeper id>
+//!
 use std::io::Write;
 use std::path::PathBuf;
 use std::process::Child;
diff --git a/docker-compose/compute_wrapper/shell/compute.sh b/docker-compose/compute_wrapper/shell/compute.sh
index cef2b485f3..22660a63ce 100755
--- a/docker-compose/compute_wrapper/shell/compute.sh
+++ b/docker-compose/compute_wrapper/shell/compute.sh
@@ -1,6 +1,14 @@
 #!/bin/bash
 set -eux
 
+# Generate a random tenant or timeline ID
+#
+# Takes a variable name as argument. The result is stored in that variable.
+generate_id() {
+    local -n resvar=$1
+    printf -v resvar '%08x%08x%08x%08x' $SRANDOM $SRANDOM $SRANDOM $SRANDOM
+}
+
 PG_VERSION=${PG_VERSION:-14}
 
 SPEC_FILE_ORG=/var/db/postgres/specs/spec.json
@@ -13,29 +21,29 @@ done
 echo "Page server is ready."
 
 echo "Create a tenant and timeline"
+generate_id tenant_id
 PARAMS=(
      -sb 
      -X POST
      -H "Content-Type: application/json"
-     -d "{}"
+     -d "{\"new_tenant_id\": \"${tenant_id}\"}"
      http://pageserver:9898/v1/tenant/
 )
-tenant_id=$(curl "${PARAMS[@]}" | sed 's/"//g')
+result=$(curl "${PARAMS[@]}")
+echo $result | jq .
 
+generate_id timeline_id
 PARAMS=(
      -sb 
      -X POST
      -H "Content-Type: application/json"
-     -d "{\"tenant_id\":\"${tenant_id}\", \"pg_version\": ${PG_VERSION}}"
+     -d "{\"new_timeline_id\": \"${timeline_id}\", \"pg_version\": ${PG_VERSION}}"
      "http://pageserver:9898/v1/tenant/${tenant_id}/timeline/"
 )
 result=$(curl "${PARAMS[@]}")
 echo $result | jq .
 
 echo "Overwrite tenant id and timeline id in spec file"
-tenant_id=$(echo ${result} | jq -r .tenant_id)
-timeline_id=$(echo ${result} | jq -r .timeline_id)
-
 sed "s/TENANT_ID/${tenant_id}/" ${SPEC_FILE_ORG} > ${SPEC_FILE}
 sed -i "s/TIMELINE_ID/${timeline_id}/" ${SPEC_FILE}
 
diff --git a/docs/pageserver-thread-mgmt.md b/docs/pageserver-thread-mgmt.md
index 0cc897f154..b911933528 100644
--- a/docs/pageserver-thread-mgmt.md
+++ b/docs/pageserver-thread-mgmt.md
@@ -52,9 +52,7 @@ completion, or shield the rest of the code from surprise cancellations
 by spawning a separate task. The code that handles incoming HTTP
 requests, for example, spawns a separate task for each request,
 because Hyper will drop the request-handling Future if the HTTP
-connection is lost.  (FIXME: our HTTP handlers do not do that
-currently, but we should fix that. See [issue
-3478](https://github.com/neondatabase/neon/issues/3478)).
+connection is lost.
 
 
 #### How to cancel, then?
diff --git a/libs/compute_api/src/responses.rs b/libs/compute_api/src/responses.rs
index d181c018b1..ce73dda08a 100644
--- a/libs/compute_api/src/responses.rs
+++ b/libs/compute_api/src/responses.rs
@@ -5,13 +5,13 @@ use serde::{Deserialize, Serialize, Serializer};
 
 use crate::spec::ComputeSpec;
 
-#[derive(Serialize, Debug)]
+#[derive(Serialize, Debug, Deserialize)]
 pub struct GenericAPIError {
     pub error: String,
 }
 
 /// Response of the /status API
-#[derive(Serialize, Debug)]
+#[derive(Serialize, Debug, Deserialize)]
 #[serde(rename_all = "snake_case")]
 pub struct ComputeStatusResponse {
     pub start_time: DateTime<Utc>,
@@ -23,7 +23,7 @@ pub struct ComputeStatusResponse {
     pub error: Option<String>,
 }
 
-#[derive(Serialize)]
+#[derive(Deserialize, Serialize)]
 #[serde(rename_all = "snake_case")]
 pub struct ComputeState {
     pub status: ComputeStatus,
@@ -33,7 +33,7 @@ pub struct ComputeState {
     pub error: Option<String>,
 }
 
-#[derive(Serialize, Clone, Copy, Debug, PartialEq, Eq)]
+#[derive(Serialize, Clone, Copy, Debug, Deserialize, PartialEq, Eq)]
 #[serde(rename_all = "snake_case")]
 pub enum ComputeStatus {
     // Spec wasn't provided at start, waiting for it to be
diff --git a/libs/compute_api/src/spec.rs b/libs/compute_api/src/spec.rs
index 6072980ed8..4014774a7e 100644
--- a/libs/compute_api/src/spec.rs
+++ b/libs/compute_api/src/spec.rs
@@ -5,6 +5,7 @@
 //! and connect it to the storage nodes.
 use serde::{Deserialize, Serialize};
 use serde_with::{serde_as, DisplayFromStr};
+use utils::id::{TenantId, TimelineId};
 use utils::lsn::Lsn;
 
 /// String type alias representing Postgres identifier and
@@ -14,7 +15,7 @@ pub type PgIdent = String;
 /// Cluster spec or configuration represented as an optional number of
 /// delta operations + final cluster state description.
 #[serde_as]
-#[derive(Clone, Debug, Default, Deserialize)]
+#[derive(Clone, Debug, Default, Deserialize, Serialize)]
 pub struct ComputeSpec {
     pub format_version: f32,
 
@@ -26,9 +27,32 @@ pub struct ComputeSpec {
     pub cluster: Cluster,
     pub delta_operations: Option<Vec<DeltaOp>>,
 
+    // Information needed to connect to the storage layer.
+    //
+    // `tenant_id`, `timeline_id` and `pageserver_connstring` are always needed.
+    //
+    // Depending on `mode`, this can be a primary read-write node, a read-only
+    // replica, or a read-only node pinned at an older LSN.
+    // `safekeeper_connstrings` must be set for a primary.
+    //
+    // For backwards compatibility, the control plane may leave out all of
+    // these, and instead set the "neon.tenant_id", "neon.timeline_id",
+    // etc. GUCs in cluster.settings. TODO: Once the control plane has been
+    // updated to fill these fields, we can make these non optional.
+    #[serde_as(as = "Option<DisplayFromStr>")]
+    pub tenant_id: Option<TenantId>,
+    #[serde_as(as = "Option<DisplayFromStr>")]
+    pub timeline_id: Option<TimelineId>,
+    #[serde_as(as = "Option<DisplayFromStr>")]
+    pub pageserver_connstring: Option<String>,
+    #[serde(default)]
+    pub safekeeper_connstrings: Vec<String>,
+
     #[serde(default)]
     pub mode: ComputeMode,
 
+    /// If set, 'storage_auth_token' is used as the password to authenticate to
+    /// the pageserver and safekeepers.
     pub storage_auth_token: Option<String>,
 }
 
@@ -47,13 +71,19 @@ pub enum ComputeMode {
     Replica,
 }
 
-#[derive(Clone, Debug, Default, Deserialize)]
+#[derive(Clone, Debug, Default, Deserialize, Serialize)]
 pub struct Cluster {
-    pub cluster_id: String,
-    pub name: String,
+    pub cluster_id: Option<String>,
+    pub name: Option<String>,
     pub state: Option<String>,
     pub roles: Vec<Role>,
     pub databases: Vec<Database>,
+
+    /// Desired contents of 'postgresql.conf' file. (The 'compute_ctl'
+    /// tool may add additional settings to the final file.)
+    pub postgresql_conf: Option<String>,
+
+    /// Additional settings that will be appended to the 'postgresql.conf' file.
     pub settings: GenericOptions,
 }
 
@@ -63,7 +93,7 @@ pub struct Cluster {
 /// - DROP ROLE
 /// - ALTER ROLE name RENAME TO new_name
 /// - ALTER DATABASE name RENAME TO new_name
-#[derive(Clone, Debug, Deserialize)]
+#[derive(Clone, Debug, Deserialize, Serialize)]
 pub struct DeltaOp {
     pub action: String,
     pub name: PgIdent,
@@ -72,7 +102,7 @@ pub struct DeltaOp {
 
 /// Rust representation of Postgres role info with only those fields
 /// that matter for us.
-#[derive(Clone, Debug, Deserialize)]
+#[derive(Clone, Debug, Deserialize, Serialize)]
 pub struct Role {
     pub name: PgIdent,
     pub encrypted_password: Option<String>,
@@ -81,7 +111,7 @@ pub struct Role {
 
 /// Rust representation of Postgres database info with only those fields
 /// that matter for us.
-#[derive(Clone, Debug, Deserialize)]
+#[derive(Clone, Debug, Deserialize, Serialize)]
 pub struct Database {
     pub name: PgIdent,
     pub owner: PgIdent,
@@ -91,7 +121,7 @@ pub struct Database {
 /// Common type representing both SQL statement params with or without value,
 /// like `LOGIN` or `OWNER username` in the `CREATE/ALTER ROLE`, and config
 /// options like `wal_level = logical`.
-#[derive(Clone, Debug, Deserialize)]
+#[derive(Clone, Debug, Deserialize, Serialize)]
 pub struct GenericOption {
     pub name: String,
     pub value: Option<String>,
diff --git a/libs/pageserver_api/src/models.rs b/libs/pageserver_api/src/models.rs
index 3d98fd63a8..162bf6b294 100644
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -18,7 +18,29 @@ use crate::reltag::RelTag;
 use anyhow::bail;
 use bytes::{BufMut, Bytes, BytesMut};
 
-/// A state of a tenant in pageserver's memory.
+/// The state of a tenant in this pageserver.
+///
+/// ```mermaid
+/// stateDiagram-v2
+///
+///     [*] --> Loading: spawn_load()
+///     [*] --> Attaching: spawn_attach()
+///
+///     Loading --> Activating: activate()
+///     Attaching --> Activating: activate()
+///     Activating --> Active: infallible
+///
+///     Loading --> Broken: load() failure
+///     Attaching --> Broken: attach() failure
+///
+///     Active --> Stopping: set_stopping(), part of shutdown & detach
+///     Stopping --> Broken: late error in remove_tenant_from_memory
+///
+///     Broken --> [*]: ignore / detach / shutdown
+///     Stopping --> [*]: remove_from_memory complete
+///
+///     Active --> Broken: cfg(testing)-only tenant break point
+/// ```
 #[derive(
     Clone,
     PartialEq,
@@ -26,43 +48,63 @@ use bytes::{BufMut, Bytes, BytesMut};
     serde::Serialize,
     serde::Deserialize,
     strum_macros::Display,
-    strum_macros::EnumString,
     strum_macros::EnumVariantNames,
     strum_macros::AsRefStr,
     strum_macros::IntoStaticStr,
 )]
 #[serde(tag = "slug", content = "data")]
 pub enum TenantState {
-    /// This tenant is being loaded from local disk
+    /// This tenant is being loaded from local disk.
+    ///
+    /// `set_stopping()` and `set_broken()` do not work in this state and wait for it to pass.
     Loading,
-    /// This tenant is being downloaded from cloud storage.
+    /// This tenant is being attached to the pageserver.
+    ///
+    /// `set_stopping()` and `set_broken()` do not work in this state and wait for it to pass.
     Attaching,
     /// The tenant is transitioning from Loading/Attaching to Active.
-    Activating,
-    /// Tenant is fully operational
+    ///
+    /// While in this state, the individual timelines are being activated.
+    ///
+    /// `set_stopping()` and `set_broken()` do not work in this state and wait for it to pass.
+    Activating(ActivatingFrom),
+    /// The tenant has finished activating and is open for business.
+    ///
+    /// Transitions out of this state are possible through `set_stopping()` and `set_broken()`.
     Active,
-    /// A tenant is recognized by pageserver, but it is being detached or the
+    /// The tenant is recognized by pageserver, but it is being detached or the
     /// system is being shut down.
+    ///
+    /// Transitions out of this state are possible through `set_broken()`.
     Stopping,
-    /// A tenant is recognized by the pageserver, but can no longer be used for
-    /// any operations, because it failed to be activated.
+    /// The tenant is recognized by the pageserver, but can no longer be used for
+    /// any operations.
+    ///
+    /// If the tenant fails to load or attach, it will transition to this state
+    /// and it is guaranteed that no background tasks are running in its name.
+    ///
+    /// The other way to transition into this state is from `Stopping` state
+    /// through `set_broken()` called from `remove_tenant_from_memory()`. That happens
+    /// if the cleanup future executed by `remove_tenant_from_memory()` fails.
     Broken { reason: String, backtrace: String },
 }
 
 impl TenantState {
     pub fn attachment_status(&self) -> TenantAttachmentStatus {
         use TenantAttachmentStatus::*;
+
+        // Below TenantState::Activating is used as "transient" or "transparent" state for
+        // attachment_status determining.
         match self {
             // The attach procedure writes the marker file before adding the Attaching tenant to the tenants map.
             // So, technically, we can return Attached here.
             // However, as soon as Console observes Attached, it will proceed with the Postgres-level health check.
             // But, our attach task might still be fetching the remote timelines, etc.
             // So, return `Maybe` while Attaching, making Console wait for the attach task to finish.
-            Self::Attaching => Maybe,
+            Self::Attaching | Self::Activating(ActivatingFrom::Attaching) => Maybe,
             // tenant mgr startup distinguishes attaching from loading via marker file.
             // If it's loading, there is no attach marker file, i.e., attach had finished in the past.
-            Self::Loading => Attached,
-            Self::Activating => todo!(),
+            Self::Loading | Self::Activating(ActivatingFrom::Loading) => Attached,
             // We only reach Active after successful load / attach.
             // So, call atttachment status Attached.
             Self::Active => Attached,
@@ -101,6 +143,15 @@ impl std::fmt::Debug for TenantState {
     }
 }
 
+/// The only [`TenantState`] variants we could be `TenantState::Activating` from.
+#[derive(Clone, Copy, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
+pub enum ActivatingFrom {
+    /// Arrived to [`TenantState::Activating`] from [`TenantState::Loading`]
+    Loading,
+    /// Arrived to [`TenantState::Activating`] from [`TenantState::Attaching`]
+    Attaching,
+}
+
 /// A state of a timeline in pageserver's memory.
 #[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
 pub enum TimelineState {
@@ -121,9 +172,8 @@ pub enum TimelineState {
 #[serde_as]
 #[derive(Serialize, Deserialize)]
 pub struct TimelineCreateRequest {
-    #[serde(default)]
-    #[serde_as(as = "Option<DisplayFromStr>")]
-    pub new_timeline_id: Option<TimelineId>,
+    #[serde_as(as = "DisplayFromStr")]
+    pub new_timeline_id: TimelineId,
     #[serde(default)]
     #[serde_as(as = "Option<DisplayFromStr>")]
     pub ancestor_timeline_id: Option<TimelineId>,
@@ -134,12 +184,11 @@ pub struct TimelineCreateRequest {
 }
 
 #[serde_as]
-#[derive(Serialize, Deserialize, Debug, Default)]
+#[derive(Serialize, Deserialize, Debug)]
 #[serde(deny_unknown_fields)]
 pub struct TenantCreateRequest {
-    #[serde(default)]
-    #[serde_as(as = "Option<DisplayFromStr>")]
-    pub new_tenant_id: Option<TenantId>,
+    #[serde_as(as = "DisplayFromStr")]
+    pub new_tenant_id: TenantId,
     #[serde(flatten)]
     pub config: TenantConfig, // as we have a flattened field, we should reject all unknown fields in it
 }
@@ -174,6 +223,7 @@ pub struct TenantConfig {
     pub eviction_policy: Option<serde_json::Value>,
     pub min_resident_size_override: Option<u64>,
     pub evictions_low_residence_duration_metric_threshold: Option<String>,
+    pub gc_feedback: Option<bool>,
 }
 
 #[serde_as]
@@ -187,10 +237,10 @@ pub struct StatusResponse {
 }
 
 impl TenantCreateRequest {
-    pub fn new(new_tenant_id: Option<TenantId>) -> TenantCreateRequest {
+    pub fn new(new_tenant_id: TenantId) -> TenantCreateRequest {
         TenantCreateRequest {
             new_tenant_id,
-            ..Default::default()
+            config: TenantConfig::default(),
         }
     }
 }
@@ -232,6 +282,7 @@ impl TenantConfigRequest {
             eviction_policy: None,
             min_resident_size_override: None,
             evictions_low_residence_duration_metric_threshold: None,
+            gc_feedback: None,
         };
         TenantConfigRequest { tenant_id, config }
     }
@@ -834,4 +885,55 @@ mod tests {
             err
         );
     }
+
+    #[test]
+    fn tenantstatus_activating_serde() {
+        let states = [
+            TenantState::Activating(ActivatingFrom::Loading),
+            TenantState::Activating(ActivatingFrom::Attaching),
+        ];
+        let expected = "[{\"slug\":\"Activating\",\"data\":\"Loading\"},{\"slug\":\"Activating\",\"data\":\"Attaching\"}]";
+
+        let actual = serde_json::to_string(&states).unwrap();
+
+        assert_eq!(actual, expected);
+
+        let parsed = serde_json::from_str::<Vec<TenantState>>(&actual).unwrap();
+
+        assert_eq!(states.as_slice(), &parsed);
+    }
+
+    #[test]
+    fn tenantstatus_activating_strum() {
+        // tests added, because we use these for metrics
+        let examples = [
+            (line!(), TenantState::Loading, "Loading"),
+            (line!(), TenantState::Attaching, "Attaching"),
+            (
+                line!(),
+                TenantState::Activating(ActivatingFrom::Loading),
+                "Activating",
+            ),
+            (
+                line!(),
+                TenantState::Activating(ActivatingFrom::Attaching),
+                "Activating",
+            ),
+            (line!(), TenantState::Active, "Active"),
+            (line!(), TenantState::Stopping, "Stopping"),
+            (
+                line!(),
+                TenantState::Broken {
+                    reason: "Example".into(),
+                    backtrace: "Looooong backtrace".into(),
+                },
+                "Broken",
+            ),
+        ];
+
+        for (line, rendered, expected) in examples {
+            let actual: &'static str = rendered.into();
+            assert_eq!(actual, expected, "example on {line}");
+        }
+    }
 }
diff --git a/libs/utils/src/completion.rs b/libs/utils/src/completion.rs
new file mode 100644
index 0000000000..2cdaee548e
--- /dev/null
+++ b/libs/utils/src/completion.rs
@@ -0,0 +1,33 @@
+use std::sync::Arc;
+
+use tokio::sync::{mpsc, Mutex};
+
+/// While a reference is kept around, the associated [`Barrier::wait`] will wait.
+///
+/// Can be cloned, moved and kept around in futures as "guard objects".
+#[derive(Clone)]
+pub struct Completion(mpsc::Sender<()>);
+
+/// Barrier will wait until all clones of [`Completion`] have been dropped.
+#[derive(Clone)]
+pub struct Barrier(Arc<Mutex<mpsc::Receiver<()>>>);
+
+impl Barrier {
+    pub async fn wait(self) {
+        self.0.lock().await.recv().await;
+    }
+
+    pub async fn maybe_wait(barrier: Option<Barrier>) {
+        if let Some(b) = barrier {
+            b.wait().await
+        }
+    }
+}
+
+/// Create new Guard and Barrier pair.
+pub fn channel() -> (Completion, Barrier) {
+    let (tx, rx) = mpsc::channel::<()>(1);
+    let rx = Mutex::new(rx);
+    let rx = Arc::new(rx);
+    (Completion(tx), Barrier(rx))
+}
diff --git a/libs/utils/src/http/endpoint.rs b/libs/utils/src/http/endpoint.rs
index 4bfb5bf994..7cb96d9094 100644
--- a/libs/utils/src/http/endpoint.rs
+++ b/libs/utils/src/http/endpoint.rs
@@ -1,5 +1,5 @@
 use crate::auth::{Claims, JwtAuth};
-use crate::http::error;
+use crate::http::error::{api_error_handler, route_error_handler, ApiError};
 use anyhow::{anyhow, Context};
 use hyper::header::{HeaderName, AUTHORIZATION};
 use hyper::http::HeaderValue;
@@ -16,8 +16,6 @@ use std::future::Future;
 use std::net::TcpListener;
 use std::str::FromStr;
 
-use super::error::ApiError;
-
 static SERVE_METRICS_COUNT: Lazy<IntCounter> = Lazy::new(|| {
     register_int_counter!(
         "libmetrics_metric_handler_requests_total",
@@ -35,8 +33,18 @@ struct RequestId(String);
 /// Adds a tracing info_span! instrumentation around the handler events,
 /// logs the request start and end events for non-GET requests and non-200 responses.
 ///
+/// Usage: Replace `my_handler` with `|r| request_span(r, my_handler)`
+///
 /// Use this to distinguish between logs of different HTTP requests: every request handler wrapped
-/// in this type will get request info logged in the wrapping span, including the unique request ID.
+/// with this will get request info logged in the wrapping span, including the unique request ID.
+///
+/// This also handles errors, logging them and converting them to an HTTP error response.
+///
+/// NB: If the client disconnects, Hyper will drop the Future, without polling it to
+/// completion. In other words, the handler must be async cancellation safe! request_span
+/// prints a warning to the log when that happens, so that you have some trace of it in
+/// the log.
+///
 ///
 /// There could be other ways to implement similar functionality:
 ///
@@ -54,60 +62,56 @@ struct RequestId(String);
 /// tries to achive with its `.instrument` used in the current approach.
 ///
 /// If needed, a declarative macro to substitute the |r| ... closure boilerplate could be introduced.
-pub struct RequestSpan<E, R, H>(pub H)
+pub async fn request_span<R, H>(request: Request<Body>, handler: H) -> R::Output
 where
-    E: Into<Box<dyn std::error::Error + Send + Sync>> + 'static,
-    R: Future<Output = Result<Response<Body>, E>> + Send + 'static,
-    H: Fn(Request<Body>) -> R + Send + Sync + 'static;
-
-impl<E, R, H> RequestSpan<E, R, H>
-where
-    E: Into<Box<dyn std::error::Error + Send + Sync>> + 'static,
-    R: Future<Output = Result<Response<Body>, E>> + Send + 'static,
-    H: Fn(Request<Body>) -> R + Send + Sync + 'static,
+    R: Future<Output = Result<Response<Body>, ApiError>> + Send + 'static,
+    H: FnOnce(Request<Body>) -> R + Send + Sync + 'static,
 {
-    /// Creates a tracing span around inner request handler and executes the request handler in the contex of that span.
-    /// Use as `|r| RequestSpan(my_handler).handle(r)` instead of `my_handler` as the request handler to get the span enabled.
-    pub async fn handle(self, request: Request<Body>) -> Result<Response<Body>, E> {
-        let request_id = request.context::<RequestId>().unwrap_or_default().0;
-        let method = request.method();
-        let path = request.uri().path();
-        let request_span = info_span!("request", %method, %path, %request_id);
+    let request_id = request.context::<RequestId>().unwrap_or_default().0;
+    let method = request.method();
+    let path = request.uri().path();
+    let request_span = info_span!("request", %method, %path, %request_id);
 
-        let log_quietly = method == Method::GET;
-        async move {
-            let cancellation_guard = RequestCancelled::warn_when_dropped_without_responding();
-            if log_quietly {
-                debug!("Handling request");
-            } else {
-                info!("Handling request");
-            }
-
-            // Note that we reuse `error::handler` here and not returning and error at all,
-            // yet cannot use `!` directly in the method signature due to `routerify::RouterBuilder` limitation.
-            // Usage of the error handler also means that we expect only the `ApiError` errors to be raised in this call.
-            //
-            // Panics are not handled separately, there's a `tracing_panic_hook` from another module to do that globally.
-            let res = (self.0)(request).await;
-
-            cancellation_guard.disarm();
-
-            match res {
-                Ok(response) => {
-                    let response_status = response.status();
-                    if log_quietly && response_status.is_success() {
-                        debug!("Request handled, status: {response_status}");
-                    } else {
-                        info!("Request handled, status: {response_status}");
-                    }
-                    Ok(response)
-                }
-                Err(e) => Ok(error::handler(e.into()).await),
-            }
+    let log_quietly = method == Method::GET;
+    async move {
+        let cancellation_guard = RequestCancelled::warn_when_dropped_without_responding();
+        if log_quietly {
+            debug!("Handling request");
+        } else {
+            info!("Handling request");
+        }
+
+        // No special handling for panics here. There's a `tracing_panic_hook` from another
+        // module to do that globally.
+        let res = handler(request).await;
+
+        cancellation_guard.disarm();
+
+        // Log the result if needed.
+        //
+        // We also convert any errors into an Ok response with HTTP error code here.
+        // `make_router` sets a last-resort error handler that would do the same, but
+        // we prefer to do it here, before we exit the request span, so that the error
+        // is still logged with the span.
+        //
+        // (Because we convert errors to Ok response, we never actually return an error,
+        // and we could declare the function to return the never type (`!`). However,
+        // using `routerify::RouterBuilder` requires a proper error type.)
+        match res {
+            Ok(response) => {
+                let response_status = response.status();
+                if log_quietly && response_status.is_success() {
+                    debug!("Request handled, status: {response_status}");
+                } else {
+                    info!("Request handled, status: {response_status}");
+                }
+                Ok(response)
+            }
+            Err(err) => Ok(api_error_handler(err)),
         }
-        .instrument(request_span)
-        .await
     }
+    .instrument(request_span)
+    .await
 }
 
 /// Drop guard to WARN in case the request was dropped before completion.
@@ -207,10 +211,8 @@ pub fn make_router() -> RouterBuilder<hyper::Body, ApiError> {
         .middleware(Middleware::post_with_info(
             add_request_id_header_to_response,
         ))
-        .get("/metrics", |r| {
-            RequestSpan(prometheus_metrics_handler).handle(r)
-        })
-        .err_handler(error::handler)
+        .get("/metrics", |r| request_span(r, prometheus_metrics_handler))
+        .err_handler(route_error_handler)
 }
 
 pub fn attach_openapi_ui(
@@ -220,12 +222,14 @@ pub fn attach_openapi_ui(
     ui_mount_path: &'static str,
 ) -> RouterBuilder<hyper::Body, ApiError> {
     router_builder
-        .get(spec_mount_path, move |r| {
-            RequestSpan(move |_| async move { Ok(Response::builder().body(Body::from(spec)).unwrap()) })
-                .handle(r)
-        })
-        .get(ui_mount_path, move |r| RequestSpan( move |_| async move {
-            Ok(Response::builder().body(Body::from(format!(r#"
+        .get(spec_mount_path,
+            move |r| request_span(r, move |_| async move {
+                Ok(Response::builder().body(Body::from(spec)).unwrap())
+            })
+        )
+        .get(ui_mount_path,
+             move |r| request_span(r, move |_| async move {
+                 Ok(Response::builder().body(Body::from(format!(r#"
                 <!DOCTYPE html>
                 <html lang="en">
                 <head>
@@ -255,7 +259,8 @@ pub fn attach_openapi_ui(
                 </body>
                 </html>
             "#, spec_mount_path))).unwrap())
-        }).handle(r))
+             })
+        )
 }
 
 fn parse_token(header_value: &str) -> Result<&str, ApiError> {
diff --git a/libs/utils/src/http/error.rs b/libs/utils/src/http/error.rs
index 3c6023eb80..4eff16b6a3 100644
--- a/libs/utils/src/http/error.rs
+++ b/libs/utils/src/http/error.rs
@@ -83,13 +83,24 @@ impl HttpErrorBody {
     }
 }
 
-pub async fn handler(err: routerify::RouteError) -> Response<Body> {
-    let api_error = err
-        .downcast::<ApiError>()
-        .expect("handler should always return api error");
+pub async fn route_error_handler(err: routerify::RouteError) -> Response<Body> {
+    match err.downcast::<ApiError>() {
+        Ok(api_error) => api_error_handler(*api_error),
+        Err(other_error) => {
+            // We expect all the request handlers to return an ApiError, so this should
+            // not be reached. But just in case.
+            error!("Error processing HTTP request: {other_error:?}");
+            HttpErrorBody::response_from_msg_and_status(
+                other_error.to_string(),
+                StatusCode::INTERNAL_SERVER_ERROR,
+            )
+        }
+    }
+}
 
+pub fn api_error_handler(api_error: ApiError) -> Response<Body> {
     // Print a stack trace for Internal Server errors
-    if let ApiError::InternalServerError(_) = api_error.as_ref() {
+    if let ApiError::InternalServerError(_) = api_error {
         error!("Error processing HTTP request: {api_error:?}");
     } else {
         error!("Error processing HTTP request: {api_error:#}");
diff --git a/libs/utils/src/lib.rs b/libs/utils/src/lib.rs
index 4e4f79ab6b..69d3a1b9f2 100644
--- a/libs/utils/src/lib.rs
+++ b/libs/utils/src/lib.rs
@@ -60,6 +60,9 @@ pub mod tracing_span_assert;
 
 pub mod rate_limit;
 
+/// Simple once-barrier and a guard which keeps barrier awaiting.
+pub mod completion;
+
 mod failpoint_macro_helpers {
 
     /// use with fail::cfg("$name", "return(2000)")
diff --git a/pageserver/ctl/Cargo.toml b/pageserver/ctl/Cargo.toml
new file mode 100644
index 0000000000..89e0d0486e
--- /dev/null
+++ b/pageserver/ctl/Cargo.toml
@@ -0,0 +1,18 @@
+[package]
+name = "pagectl"
+version = "0.1.0"
+edition.workspace = true
+license.workspace = true
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+anyhow.workspace = true
+bytes.workspace = true
+clap = { workspace = true, features = ["string"] }
+git-version.workspace = true
+pageserver = { path = ".." }
+postgres_ffi.workspace = true
+utils.workspace = true
+svg_fmt.workspace = true
+workspace_hack.workspace = true
diff --git a/pageserver/src/bin/draw_timeline_dir.rs b/pageserver/ctl/src/draw_timeline_dir.rs
similarity index 97%
rename from pageserver/src/bin/draw_timeline_dir.rs
rename to pageserver/ctl/src/draw_timeline_dir.rs
index da13ee452c..bfde5ba054 100644
--- a/pageserver/src/bin/draw_timeline_dir.rs
+++ b/pageserver/ctl/src/draw_timeline_dir.rs
@@ -12,7 +12,7 @@
 //! Example use:
 //! ```
 //! $ ls test_output/test_pgbench\[neon-45-684\]/repo/tenants/$TENANT/timelines/$TIMELINE | \
-//! $   grep "__" | cargo run --release --bin draw_timeline_dir > out.svg
+//! $   grep "__" | cargo run --release --bin pagectl draw-timeline-dir > out.svg
 //! $ firefox out.svg
 //! ```
 //!
@@ -62,7 +62,7 @@ fn parse_filename(name: &str) -> (Range<Key>, Range<Lsn>) {
     (keys, lsns)
 }
 
-fn main() -> Result<()> {
+pub fn main() -> Result<()> {
     // Parse layer filenames from stdin
     let mut ranges: Vec<(Range<Key>, Range<Lsn>)> = vec![];
     let stdin = io::stdin();
diff --git a/pageserver/src/bin/layer_map_analyzer.rs b/pageserver/ctl/src/layer_map_analyzer.rs
similarity index 92%
rename from pageserver/src/bin/layer_map_analyzer.rs
rename to pageserver/ctl/src/layer_map_analyzer.rs
index e740879458..f2ced6154f 100644
--- a/pageserver/src/bin/layer_map_analyzer.rs
+++ b/pageserver/ctl/src/layer_map_analyzer.rs
@@ -6,7 +6,7 @@ use anyhow::Result;
 use std::cmp::Ordering;
 use std::collections::BinaryHeap;
 use std::ops::Range;
-use std::{env, fs, path::Path, path::PathBuf, str, str::FromStr};
+use std::{fs, path::Path, str};
 
 use pageserver::page_cache::PAGE_SZ;
 use pageserver::repository::{Key, KEY_SIZE};
@@ -18,12 +18,14 @@ use pageserver::virtual_file::VirtualFile;
 
 use utils::{bin_ser::BeSer, lsn::Lsn};
 
+use crate::AnalyzeLayerMapCmd;
+
 const MIN_HOLE_LENGTH: i128 = (128 * 1024 * 1024 / PAGE_SZ) as i128;
 const DEFAULT_MAX_HOLES: usize = 10;
 
 /// Wrapper for key range to provide reverse ordering by range length for BinaryHeap
 #[derive(PartialEq, Eq)]
-struct Hole(Range<Key>);
+pub struct Hole(Range<Key>);
 
 impl Ord for Hole {
     fn cmp(&self, other: &Self) -> Ordering {
@@ -39,11 +41,11 @@ impl PartialOrd for Hole {
     }
 }
 
-struct LayerFile {
-    key_range: Range<Key>,
-    lsn_range: Range<Lsn>,
-    is_delta: bool,
-    holes: Vec<Hole>,
+pub(crate) struct LayerFile {
+    pub key_range: Range<Key>,
+    pub lsn_range: Range<Lsn>,
+    pub is_delta: bool,
+    pub holes: Vec<Hole>,
 }
 
 impl LayerFile {
@@ -67,7 +69,7 @@ impl LayerFile {
     }
 }
 
-fn parse_filename(name: &str) -> Option<LayerFile> {
+pub(crate) fn parse_filename(name: &str) -> Option<LayerFile> {
     let split: Vec<&str> = name.split("__").collect();
     if split.len() != 2 {
         return None;
@@ -127,18 +129,9 @@ fn get_holes(path: &Path, max_holes: usize) -> Result<Vec<Hole>> {
     Ok(holes)
 }
 
-fn main() -> Result<()> {
-    let args: Vec<String> = env::args().collect();
-    if args.len() < 2 {
-        println!("Usage: layer_map_analyzer PAGESERVER_DATA_DIR [MAX_HOLES]");
-        return Ok(());
-    }
-    let storage_path = PathBuf::from_str(&args[1])?;
-    let max_holes = if args.len() > 2 {
-        args[2].parse::<usize>().unwrap()
-    } else {
-        DEFAULT_MAX_HOLES
-    };
+pub(crate) fn main(cmd: &AnalyzeLayerMapCmd) -> Result<()> {
+    let storage_path = &cmd.path;
+    let max_holes = cmd.max_holes.unwrap_or(DEFAULT_MAX_HOLES);
 
     // Initialize virtual_file (file desriptor cache) and page cache which are needed to access layer persistent B-Tree.
     pageserver::virtual_file::init(10);
diff --git a/pageserver/ctl/src/layers.rs b/pageserver/ctl/src/layers.rs
new file mode 100644
index 0000000000..d77cf0908c
--- /dev/null
+++ b/pageserver/ctl/src/layers.rs
@@ -0,0 +1,169 @@
+use std::path::{Path, PathBuf};
+
+use anyhow::Result;
+use clap::Subcommand;
+use pageserver::tenant::block_io::BlockCursor;
+use pageserver::tenant::disk_btree::DiskBtreeReader;
+use pageserver::tenant::storage_layer::delta_layer::{BlobRef, Summary};
+use pageserver::{page_cache, virtual_file};
+use pageserver::{
+    repository::{Key, KEY_SIZE},
+    tenant::{
+        block_io::FileBlockReader, disk_btree::VisitDirection,
+        storage_layer::delta_layer::DELTA_KEY_SIZE,
+    },
+    virtual_file::VirtualFile,
+};
+use std::fs;
+use utils::bin_ser::BeSer;
+
+use crate::layer_map_analyzer::parse_filename;
+
+#[derive(Subcommand)]
+pub(crate) enum LayerCmd {
+    /// List all tenants and timelines under the pageserver path
+    ///
+    /// Example: `cargo run --bin pagectl layer list .neon/`
+    List { path: PathBuf },
+    /// List all layers of a given tenant and timeline
+    ///
+    /// Example: `cargo run --bin pagectl layer list .neon/`
+    ListLayer {
+        path: PathBuf,
+        tenant: String,
+        timeline: String,
+    },
+    /// Dump all information of a layer file
+    DumpLayer {
+        path: PathBuf,
+        tenant: String,
+        timeline: String,
+        /// The id from list-layer command
+        id: usize,
+    },
+}
+
+fn read_delta_file(path: impl AsRef<Path>) -> Result<()> {
+    use pageserver::tenant::blob_io::BlobCursor;
+    use pageserver::tenant::block_io::BlockReader;
+
+    let path = path.as_ref();
+    virtual_file::init(10);
+    page_cache::init(100);
+    let file = FileBlockReader::new(VirtualFile::open(path)?);
+    let summary_blk = file.read_blk(0)?;
+    let actual_summary = Summary::des_prefix(summary_blk.as_ref())?;
+    let tree_reader = DiskBtreeReader::<_, DELTA_KEY_SIZE>::new(
+        actual_summary.index_start_blk,
+        actual_summary.index_root_blk,
+        &file,
+    );
+    // TODO(chi): dedup w/ `delta_layer.rs` by exposing the API.
+    let mut all = vec![];
+    tree_reader.visit(
+        &[0u8; DELTA_KEY_SIZE],
+        VisitDirection::Forwards,
+        |key, value_offset| {
+            let curr = Key::from_slice(&key[..KEY_SIZE]);
+            all.push((curr, BlobRef(value_offset)));
+            true
+        },
+    )?;
+    let mut cursor = BlockCursor::new(&file);
+    for (k, v) in all {
+        let value = cursor.read_blob(v.pos())?;
+        println!("key:{} value_len:{}", k, value.len());
+    }
+    // TODO(chi): special handling for last key?
+    Ok(())
+}
+
+pub(crate) fn main(cmd: &LayerCmd) -> Result<()> {
+    match cmd {
+        LayerCmd::List { path } => {
+            for tenant in fs::read_dir(path.join("tenants"))? {
+                let tenant = tenant?;
+                if !tenant.file_type()?.is_dir() {
+                    continue;
+                }
+                println!("tenant {}", tenant.file_name().to_string_lossy());
+                for timeline in fs::read_dir(tenant.path().join("timelines"))? {
+                    let timeline = timeline?;
+                    if !timeline.file_type()?.is_dir() {
+                        continue;
+                    }
+                    println!("- timeline {}", timeline.file_name().to_string_lossy());
+                }
+            }
+        }
+        LayerCmd::ListLayer {
+            path,
+            tenant,
+            timeline,
+        } => {
+            let timeline_path = path
+                .join("tenants")
+                .join(tenant)
+                .join("timelines")
+                .join(timeline);
+            let mut idx = 0;
+            for layer in fs::read_dir(timeline_path)? {
+                let layer = layer?;
+                if let Some(layer_file) = parse_filename(&layer.file_name().into_string().unwrap())
+                {
+                    println!(
+                        "[{:3}]  key:{}-{}\n       lsn:{}-{}\n       delta:{}",
+                        idx,
+                        layer_file.key_range.start,
+                        layer_file.key_range.end,
+                        layer_file.lsn_range.start,
+                        layer_file.lsn_range.end,
+                        layer_file.is_delta,
+                    );
+                    idx += 1;
+                }
+            }
+        }
+        LayerCmd::DumpLayer {
+            path,
+            tenant,
+            timeline,
+            id,
+        } => {
+            let timeline_path = path
+                .join("tenants")
+                .join(tenant)
+                .join("timelines")
+                .join(timeline);
+            let mut idx = 0;
+            for layer in fs::read_dir(timeline_path)? {
+                let layer = layer?;
+                if let Some(layer_file) = parse_filename(&layer.file_name().into_string().unwrap())
+                {
+                    if *id == idx {
+                        // TODO(chi): dedup code
+                        println!(
+                            "[{:3}]  key:{}-{}\n       lsn:{}-{}\n       delta:{}",
+                            idx,
+                            layer_file.key_range.start,
+                            layer_file.key_range.end,
+                            layer_file.lsn_range.start,
+                            layer_file.lsn_range.end,
+                            layer_file.is_delta,
+                        );
+
+                        if layer_file.is_delta {
+                            read_delta_file(layer.path())?;
+                        } else {
+                            anyhow::bail!("not supported yet :(");
+                        }
+
+                        break;
+                    }
+                    idx += 1;
+                }
+            }
+        }
+    }
+    Ok(())
+}
diff --git a/pageserver/ctl/src/main.rs b/pageserver/ctl/src/main.rs
new file mode 100644
index 0000000000..55db9eb7e7
--- /dev/null
+++ b/pageserver/ctl/src/main.rs
@@ -0,0 +1,179 @@
+//! A helper tool to manage pageserver binary files.
+//! Accepts a file as an argument, attempts to parse it with all ways possible
+//! and prints its interpreted context.
+//!
+//! Separate, `metadata` subcommand allows to print and update pageserver's metadata file.
+
+mod draw_timeline_dir;
+mod layer_map_analyzer;
+mod layers;
+
+use clap::{Parser, Subcommand};
+use layers::LayerCmd;
+use pageserver::{
+    context::{DownloadBehavior, RequestContext},
+    page_cache,
+    task_mgr::TaskKind,
+    tenant::{dump_layerfile_from_path, metadata::TimelineMetadata},
+    virtual_file,
+};
+use postgres_ffi::ControlFileData;
+use std::path::{Path, PathBuf};
+use utils::{lsn::Lsn, project_git_version};
+
+project_git_version!(GIT_VERSION);
+
+#[derive(Parser)]
+#[command(
+    version = GIT_VERSION,
+    about = "Neon Pageserver binutils",
+    long_about = "Reads pageserver (and related) binary files management utility"
+)]
+#[command(propagate_version = true)]
+struct CliOpts {
+    #[command(subcommand)]
+    command: Commands,
+}
+
+#[derive(Subcommand)]
+enum Commands {
+    Metadata(MetadataCmd),
+    PrintLayerFile(PrintLayerFileCmd),
+    DrawTimeline {},
+    AnalyzeLayerMap(AnalyzeLayerMapCmd),
+    #[command(subcommand)]
+    Layer(LayerCmd),
+}
+
+/// Read and update pageserver metadata file
+#[derive(Parser)]
+struct MetadataCmd {
+    /// Input metadata file path
+    metadata_path: PathBuf,
+    /// Replace disk consistent Lsn
+    disk_consistent_lsn: Option<Lsn>,
+    /// Replace previous record Lsn
+    prev_record_lsn: Option<Lsn>,
+    /// Replace latest gc cuttoff
+    latest_gc_cuttoff: Option<Lsn>,
+}
+
+#[derive(Parser)]
+struct PrintLayerFileCmd {
+    /// Pageserver data path
+    path: PathBuf,
+}
+
+#[derive(Parser)]
+struct AnalyzeLayerMapCmd {
+    /// Pageserver data path
+    path: PathBuf,
+    /// Max holes
+    max_holes: Option<usize>,
+}
+
+fn main() -> anyhow::Result<()> {
+    let cli = CliOpts::parse();
+
+    match cli.command {
+        Commands::Layer(cmd) => {
+            layers::main(&cmd)?;
+        }
+        Commands::Metadata(cmd) => {
+            handle_metadata(&cmd)?;
+        }
+        Commands::DrawTimeline {} => {
+            draw_timeline_dir::main()?;
+        }
+        Commands::AnalyzeLayerMap(cmd) => {
+            layer_map_analyzer::main(&cmd)?;
+        }
+        Commands::PrintLayerFile(cmd) => {
+            if let Err(e) = read_pg_control_file(&cmd.path) {
+                println!(
+                    "Failed to read input file as a pg control one: {e:#}\n\
+                    Attempting to read it as layer file"
+                );
+                print_layerfile(&cmd.path)?;
+            }
+        }
+    };
+    Ok(())
+}
+
+fn read_pg_control_file(control_file_path: &Path) -> anyhow::Result<()> {
+    let control_file = ControlFileData::decode(&std::fs::read(control_file_path)?)?;
+    println!("{control_file:?}");
+    let control_file_initdb = Lsn(control_file.checkPoint);
+    println!(
+        "pg_initdb_lsn: {}, aligned: {}",
+        control_file_initdb,
+        control_file_initdb.align()
+    );
+    Ok(())
+}
+
+fn print_layerfile(path: &Path) -> anyhow::Result<()> {
+    // Basic initialization of things that don't change after startup
+    virtual_file::init(10);
+    page_cache::init(100);
+    let ctx = RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error);
+    dump_layerfile_from_path(path, true, &ctx)
+}
+
+fn handle_metadata(
+    MetadataCmd {
+        metadata_path: path,
+        disk_consistent_lsn,
+        prev_record_lsn,
+        latest_gc_cuttoff,
+    }: &MetadataCmd,
+) -> Result<(), anyhow::Error> {
+    let metadata_bytes = std::fs::read(path)?;
+    let mut meta = TimelineMetadata::from_bytes(&metadata_bytes)?;
+    println!("Current metadata:\n{meta:?}");
+    let mut update_meta = false;
+    if let Some(disk_consistent_lsn) = disk_consistent_lsn {
+        meta = TimelineMetadata::new(
+            *disk_consistent_lsn,
+            meta.prev_record_lsn(),
+            meta.ancestor_timeline(),
+            meta.ancestor_lsn(),
+            meta.latest_gc_cutoff_lsn(),
+            meta.initdb_lsn(),
+            meta.pg_version(),
+        );
+        update_meta = true;
+    }
+    if let Some(prev_record_lsn) = prev_record_lsn {
+        meta = TimelineMetadata::new(
+            meta.disk_consistent_lsn(),
+            Some(*prev_record_lsn),
+            meta.ancestor_timeline(),
+            meta.ancestor_lsn(),
+            meta.latest_gc_cutoff_lsn(),
+            meta.initdb_lsn(),
+            meta.pg_version(),
+        );
+        update_meta = true;
+    }
+    if let Some(latest_gc_cuttoff) = latest_gc_cuttoff {
+        meta = TimelineMetadata::new(
+            meta.disk_consistent_lsn(),
+            meta.prev_record_lsn(),
+            meta.ancestor_timeline(),
+            meta.ancestor_lsn(),
+            *latest_gc_cuttoff,
+            meta.initdb_lsn(),
+            meta.pg_version(),
+        );
+        update_meta = true;
+    }
+
+    if update_meta {
+        let metadata_bytes = meta.to_bytes()?;
+        std::fs::write(path, metadata_bytes)?;
+    }
+
+    Ok(())
+}
diff --git a/pageserver/src/bin/pageserver.rs b/pageserver/src/bin/pageserver.rs
index 9da3a519a2..1fa5e4ab3b 100644
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -275,6 +275,7 @@ fn start_pageserver(
     let pageserver_listener = tcp_listener::bind(pg_addr)?;
 
     // Launch broker client
+    // The storage_broker::connect call needs to happen inside a tokio runtime thread.
     let broker_client = WALRECEIVER_RUNTIME
         .block_on(async {
             // Note: we do not attempt connecting here (but validate endpoints sanity).
@@ -334,13 +335,119 @@ fn start_pageserver(
     // Set up remote storage client
     let remote_storage = create_remote_storage_client(conf)?;
 
+    // Startup staging or optimizing:
+    //
+    // We want to minimize downtime for `page_service` connections, and trying not to overload
+    // BACKGROUND_RUNTIME by doing initial compactions and initial logical sizes at the same time.
+    //
+    // init_done_rx will notify when all initial load operations have completed.
+    //
+    // background_jobs_can_start (same name used to hold off background jobs from starting at
+    // consumer side) will be dropped once we can start the background jobs. Currently it is behind
+    // completing all initial logical size calculations (init_logical_size_done_rx) and a timeout
+    // (background_task_maximum_delay).
+    let (init_done_tx, init_done_rx) = utils::completion::channel();
+
+    let (init_logical_size_done_tx, init_logical_size_done_rx) = utils::completion::channel();
+
+    let (background_jobs_can_start, background_jobs_barrier) = utils::completion::channel();
+
+    let order = pageserver::InitializationOrder {
+        initial_tenant_load: Some(init_done_tx),
+        initial_logical_size_can_start: init_done_rx.clone(),
+        initial_logical_size_attempt: init_logical_size_done_tx,
+        background_jobs_can_start: background_jobs_barrier.clone(),
+    };
+
     // Scan the local 'tenants/' directory and start loading the tenants
+    let init_started_at = std::time::Instant::now();
+    let shutdown_pageserver = tokio_util::sync::CancellationToken::new();
+
     BACKGROUND_RUNTIME.block_on(mgr::init_tenant_mgr(
         conf,
         broker_client.clone(),
         remote_storage.clone(),
+        order,
     ))?;
 
+    BACKGROUND_RUNTIME.spawn({
+        let init_done_rx = init_done_rx;
+        let shutdown_pageserver = shutdown_pageserver.clone();
+        let drive_init = async move {
+            // NOTE: unlike many futures in pageserver, this one is cancellation-safe
+            let guard = scopeguard::guard_on_success((), |_| tracing::info!("Cancelled before initial load completed"));
+
+            init_done_rx.wait().await;
+            // initial logical sizes can now start, as they were waiting on init_done_rx.
+
+            scopeguard::ScopeGuard::into_inner(guard);
+
+            let init_done = std::time::Instant::now();
+            let elapsed = init_done - init_started_at;
+
+            tracing::info!(
+                elapsed_millis = elapsed.as_millis(),
+                "Initial load completed"
+            );
+
+            let mut init_sizes_done = std::pin::pin!(init_logical_size_done_rx.wait());
+
+            let timeout = conf.background_task_maximum_delay;
+
+            let guard = scopeguard::guard_on_success((), |_| tracing::info!("Cancelled before initial logical sizes completed"));
+
+            let init_sizes_done = tokio::select! {
+                _ = &mut init_sizes_done => {
+                    let now = std::time::Instant::now();
+                    tracing::info!(
+                        from_init_done_millis = (now - init_done).as_millis(),
+                        from_init_millis = (now - init_started_at).as_millis(),
+                        "Initial logical sizes completed"
+                    );
+                    None
+                }
+                _ = tokio::time::sleep(timeout) => {
+                    tracing::info!(
+                        timeout_millis = timeout.as_millis(),
+                        "Initial logical size timeout elapsed; starting background jobs"
+                    );
+                    Some(init_sizes_done)
+                }
+            };
+
+            scopeguard::ScopeGuard::into_inner(guard);
+
+            // allow background jobs to start
+            drop(background_jobs_can_start);
+
+            if let Some(init_sizes_done) = init_sizes_done {
+                // ending up here is not a bug; at the latest logical sizes will be queried by
+                // consumption metrics.
+                let guard = scopeguard::guard_on_success((), |_| tracing::info!("Cancelled before initial logical sizes completed"));
+                init_sizes_done.await;
+
+                scopeguard::ScopeGuard::into_inner(guard);
+
+                let now = std::time::Instant::now();
+                tracing::info!(
+                    from_init_done_millis = (now - init_done).as_millis(),
+                    from_init_millis = (now - init_started_at).as_millis(),
+                    "Initial logical sizes completed after timeout (background jobs already started)"
+                );
+
+            }
+        };
+
+        async move {
+            let mut drive_init = std::pin::pin!(drive_init);
+            // just race these tasks
+            tokio::select! {
+                _ = shutdown_pageserver.cancelled() => {},
+                _ = &mut drive_init => {},
+            }
+        }
+    });
+
     // shared state between the disk-usage backed eviction background task and the http endpoint
     // that allows triggering disk-usage based eviction manually. note that the http endpoint
     // is still accessible even if background task is not configured as long as remote storage has
@@ -352,6 +459,7 @@ fn start_pageserver(
             conf,
             remote_storage.clone(),
             disk_usage_eviction_state.clone(),
+            background_jobs_barrier.clone(),
         )?;
     }
 
@@ -389,6 +497,7 @@ fn start_pageserver(
         );
 
         if let Some(metric_collection_endpoint) = &conf.metric_collection_endpoint {
+            let background_jobs_barrier = background_jobs_barrier;
             let metrics_ctx = RequestContext::todo_child(
                 TaskKind::MetricsCollection,
                 // This task itself shouldn't download anything.
@@ -404,6 +513,18 @@ fn start_pageserver(
                 "consumption metrics collection",
                 true,
                 async move {
+                    // first wait until background jobs are cleared to launch.
+                    //
+                    // this is because we only process active tenants and timelines, and the
+                    // Timeline::get_current_logical_size will spawn the logical size calculation,
+                    // which will not be rate-limited.
+                    let cancel = task_mgr::shutdown_token();
+
+                    tokio::select! {
+                        _ = cancel.cancelled() => { return Ok(()); },
+                        _ = background_jobs_barrier.wait() => {}
+                    };
+
                     pageserver::consumption_metrics::collect_metrics(
                         metric_collection_endpoint,
                         conf.metric_collection_interval,
@@ -452,6 +573,8 @@ fn start_pageserver(
         );
     }
 
+    let mut shutdown_pageserver = Some(shutdown_pageserver.drop_guard());
+
     // All started up! Now just sit and wait for shutdown signal.
     ShutdownSignals::handle(|signal| match signal {
         Signal::Quit => {
@@ -467,6 +590,11 @@ fn start_pageserver(
                 "Got {}. Terminating gracefully in fast shutdown mode",
                 signal.name()
             );
+
+            // This cancels the `shutdown_pageserver` cancellation tree.
+            // Right now that tree doesn't reach very far, and `task_mgr` is used instead.
+            // The plan is to change that over time.
+            shutdown_pageserver.take();
             BACKGROUND_RUNTIME.block_on(pageserver::shutdown_pageserver(0));
             unreachable!()
         }
diff --git a/pageserver/src/bin/pageserver_binutils.rs b/pageserver/src/bin/pageserver_binutils.rs
deleted file mode 100644
index 5e2d39d685..0000000000
--- a/pageserver/src/bin/pageserver_binutils.rs
+++ /dev/null
@@ -1,174 +0,0 @@
-//! A helper tool to manage pageserver binary files.
-//! Accepts a file as an argument, attempts to parse it with all ways possible
-//! and prints its interpreted context.
-//!
-//! Separate, `metadata` subcommand allows to print and update pageserver's metadata file.
-use std::{
-    path::{Path, PathBuf},
-    str::FromStr,
-};
-
-use anyhow::Context;
-use clap::{value_parser, Arg, Command};
-
-use pageserver::{
-    context::{DownloadBehavior, RequestContext},
-    page_cache,
-    task_mgr::TaskKind,
-    tenant::{dump_layerfile_from_path, metadata::TimelineMetadata},
-    virtual_file,
-};
-use postgres_ffi::ControlFileData;
-use utils::{lsn::Lsn, project_git_version};
-
-project_git_version!(GIT_VERSION);
-
-const METADATA_SUBCOMMAND: &str = "metadata";
-
-fn main() -> anyhow::Result<()> {
-    let arg_matches = cli().get_matches();
-
-    match arg_matches.subcommand() {
-        Some((subcommand_name, subcommand_matches)) => {
-            let path = subcommand_matches
-                .get_one::<PathBuf>("metadata_path")
-                .context("'metadata_path' argument is missing")?
-                .to_path_buf();
-            anyhow::ensure!(
-                subcommand_name == METADATA_SUBCOMMAND,
-                "Unknown subcommand {subcommand_name}"
-            );
-            handle_metadata(&path, subcommand_matches)?;
-        }
-        None => {
-            let path = arg_matches
-                .get_one::<PathBuf>("path")
-                .context("'path' argument is missing")?
-                .to_path_buf();
-            println!(
-                "No subcommand specified, attempting to guess the format for file {}",
-                path.display()
-            );
-            if let Err(e) = read_pg_control_file(&path) {
-                println!(
-                    "Failed to read input file as a pg control one: {e:#}\n\
-                    Attempting to read it as layer file"
-                );
-                print_layerfile(&path)?;
-            }
-        }
-    };
-    Ok(())
-}
-
-fn read_pg_control_file(control_file_path: &Path) -> anyhow::Result<()> {
-    let control_file = ControlFileData::decode(&std::fs::read(control_file_path)?)?;
-    println!("{control_file:?}");
-    let control_file_initdb = Lsn(control_file.checkPoint);
-    println!(
-        "pg_initdb_lsn: {}, aligned: {}",
-        control_file_initdb,
-        control_file_initdb.align()
-    );
-    Ok(())
-}
-
-fn print_layerfile(path: &Path) -> anyhow::Result<()> {
-    // Basic initialization of things that don't change after startup
-    virtual_file::init(10);
-    page_cache::init(100);
-    let ctx = RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error);
-    dump_layerfile_from_path(path, true, &ctx)
-}
-
-fn handle_metadata(path: &Path, arg_matches: &clap::ArgMatches) -> Result<(), anyhow::Error> {
-    let metadata_bytes = std::fs::read(path)?;
-    let mut meta = TimelineMetadata::from_bytes(&metadata_bytes)?;
-    println!("Current metadata:\n{meta:?}");
-    let mut update_meta = false;
-    if let Some(disk_consistent_lsn) = arg_matches.get_one::<String>("disk_consistent_lsn") {
-        meta = TimelineMetadata::new(
-            Lsn::from_str(disk_consistent_lsn)?,
-            meta.prev_record_lsn(),
-            meta.ancestor_timeline(),
-            meta.ancestor_lsn(),
-            meta.latest_gc_cutoff_lsn(),
-            meta.initdb_lsn(),
-            meta.pg_version(),
-        );
-        update_meta = true;
-    }
-    if let Some(prev_record_lsn) = arg_matches.get_one::<String>("prev_record_lsn") {
-        meta = TimelineMetadata::new(
-            meta.disk_consistent_lsn(),
-            Some(Lsn::from_str(prev_record_lsn)?),
-            meta.ancestor_timeline(),
-            meta.ancestor_lsn(),
-            meta.latest_gc_cutoff_lsn(),
-            meta.initdb_lsn(),
-            meta.pg_version(),
-        );
-        update_meta = true;
-    }
-    if let Some(latest_gc_cuttoff) = arg_matches.get_one::<String>("latest_gc_cuttoff") {
-        meta = TimelineMetadata::new(
-            meta.disk_consistent_lsn(),
-            meta.prev_record_lsn(),
-            meta.ancestor_timeline(),
-            meta.ancestor_lsn(),
-            Lsn::from_str(latest_gc_cuttoff)?,
-            meta.initdb_lsn(),
-            meta.pg_version(),
-        );
-        update_meta = true;
-    }
-
-    if update_meta {
-        let metadata_bytes = meta.to_bytes()?;
-        std::fs::write(path, metadata_bytes)?;
-    }
-
-    Ok(())
-}
-
-fn cli() -> Command {
-    Command::new("Neon Pageserver binutils")
-        .about("Reads pageserver (and related) binary files management utility")
-        .version(GIT_VERSION)
-        .arg(
-            Arg::new("path")
-                .help("Input file path")
-                .value_parser(value_parser!(PathBuf))
-                .required(false),
-        )
-        .subcommand(
-            Command::new(METADATA_SUBCOMMAND)
-                .about("Read and update pageserver metadata file")
-                .arg(
-                    Arg::new("metadata_path")
-                        .help("Input metadata file path")
-                        .value_parser(value_parser!(PathBuf))
-                        .required(false),
-                )
-                .arg(
-                    Arg::new("disk_consistent_lsn")
-                        .long("disk_consistent_lsn")
-                        .help("Replace disk consistent Lsn"),
-                )
-                .arg(
-                    Arg::new("prev_record_lsn")
-                        .long("prev_record_lsn")
-                        .help("Replace previous record Lsn"),
-                )
-                .arg(
-                    Arg::new("latest_gc_cuttoff")
-                        .long("latest_gc_cuttoff")
-                        .help("Replace latest gc cuttoff"),
-                ),
-        )
-}
-
-#[test]
-fn verify_cli() {
-    cli().debug_assert();
-}
diff --git a/pageserver/src/config.rs b/pageserver/src/config.rs
index 88a7f15b21..17e6e3fb2a 100644
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -63,6 +63,7 @@ pub mod defaults {
     pub const DEFAULT_CACHED_METRIC_COLLECTION_INTERVAL: &str = "1 hour";
     pub const DEFAULT_METRIC_COLLECTION_ENDPOINT: Option<reqwest::Url> = None;
     pub const DEFAULT_SYNTHETIC_SIZE_CALCULATION_INTERVAL: &str = "10 min";
+    pub const DEFAULT_BACKGROUND_TASK_MAXIMUM_DELAY: &str = "10s";
 
     ///
     /// Default built-in configuration file.
@@ -91,9 +92,10 @@ pub mod defaults {
 #cached_metric_collection_interval = '{DEFAULT_CACHED_METRIC_COLLECTION_INTERVAL}'
 #synthetic_size_calculation_interval = '{DEFAULT_SYNTHETIC_SIZE_CALCULATION_INTERVAL}'
 
-
 #disk_usage_based_eviction = {{ max_usage_pct = .., min_avail_bytes = .., period = "10s"}}
 
+#background_task_maximum_delay = '{DEFAULT_BACKGROUND_TASK_MAXIMUM_DELAY}'
+
 # [tenant_config]
 #checkpoint_distance = {DEFAULT_CHECKPOINT_DISTANCE} # in bytes
 #checkpoint_timeout = {DEFAULT_CHECKPOINT_TIMEOUT}
@@ -108,7 +110,7 @@ pub mod defaults {
 
 #min_resident_size_override = .. # in bytes
 #evictions_low_residence_duration_metric_threshold = '{DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD}'
-
+#gc_feedback = false
 # [remote_storage]
 
 "###
@@ -187,6 +189,15 @@ pub struct PageServerConf {
     pub test_remote_failures: u64,
 
     pub ondemand_download_behavior_treat_error_as_warn: bool,
+
+    /// How long will background tasks be delayed at most after initial load of tenants.
+    ///
+    /// Our largest initialization completions are in the range of 100-200s, so perhaps 10s works
+    /// as we now isolate initial loading, initial logical size calculation and background tasks.
+    /// Smaller nodes will have background tasks "not running" for this long unless every timeline
+    /// has it's initial logical size calculated. Not running background tasks for some seconds is
+    /// not terrible.
+    pub background_task_maximum_delay: Duration,
 }
 
 /// We do not want to store this in a PageServerConf because the latter may be logged
@@ -259,6 +270,8 @@ struct PageServerConfigBuilder {
     test_remote_failures: BuilderValue<u64>,
 
     ondemand_download_behavior_treat_error_as_warn: BuilderValue<bool>,
+
+    background_task_maximum_delay: BuilderValue<Duration>,
 }
 
 impl Default for PageServerConfigBuilder {
@@ -316,6 +329,11 @@ impl Default for PageServerConfigBuilder {
             test_remote_failures: Set(0),
 
             ondemand_download_behavior_treat_error_as_warn: Set(false),
+
+            background_task_maximum_delay: Set(humantime::parse_duration(
+                DEFAULT_BACKGROUND_TASK_MAXIMUM_DELAY,
+            )
+            .unwrap()),
         }
     }
 }
@@ -440,6 +458,10 @@ impl PageServerConfigBuilder {
             BuilderValue::Set(ondemand_download_behavior_treat_error_as_warn);
     }
 
+    pub fn background_task_maximum_delay(&mut self, delay: Duration) {
+        self.background_task_maximum_delay = BuilderValue::Set(delay);
+    }
+
     pub fn build(self) -> anyhow::Result<PageServerConf> {
         let concurrent_tenant_size_logical_size_queries = self
             .concurrent_tenant_size_logical_size_queries
@@ -522,6 +544,9 @@ impl PageServerConfigBuilder {
                 .ok_or(anyhow!(
                     "missing ondemand_download_behavior_treat_error_as_warn"
                 ))?,
+            background_task_maximum_delay: self
+                .background_task_maximum_delay
+                .ok_or(anyhow!("missing background_task_maximum_delay"))?,
         })
     }
 }
@@ -710,6 +735,7 @@ impl PageServerConf {
                     )
                 },
                 "ondemand_download_behavior_treat_error_as_warn" => builder.ondemand_download_behavior_treat_error_as_warn(parse_toml_bool(key, item)?),
+                "background_task_maximum_delay" => builder.background_task_maximum_delay(parse_toml_duration(key, item)?),
                 _ => bail!("unrecognized pageserver option '{key}'"),
             }
         }
@@ -828,6 +854,14 @@ impl PageServerConf {
             )?);
         }
 
+        if let Some(gc_feedback) = item.get("gc_feedback") {
+            t_conf.gc_feedback = Some(
+                gc_feedback
+                    .as_bool()
+                    .with_context(|| "configure option gc_feedback is not a bool".to_string())?,
+            );
+        }
+
         Ok(t_conf)
     }
 
@@ -869,6 +903,7 @@ impl PageServerConf {
             disk_usage_based_eviction: None,
             test_remote_failures: 0,
             ondemand_download_behavior_treat_error_as_warn: false,
+            background_task_maximum_delay: Duration::ZERO,
         }
     }
 }
@@ -1028,6 +1063,7 @@ metric_collection_endpoint = 'http://localhost:80/metrics'
 synthetic_size_calculation_interval = '333 s'
 
 log_format = 'json'
+background_task_maximum_delay = '334 s'
 
 "#;
 
@@ -1086,6 +1122,9 @@ log_format = 'json'
                 disk_usage_based_eviction: None,
                 test_remote_failures: 0,
                 ondemand_download_behavior_treat_error_as_warn: false,
+                background_task_maximum_delay: humantime::parse_duration(
+                    defaults::DEFAULT_BACKGROUND_TASK_MAXIMUM_DELAY
+                )?,
             },
             "Correct defaults should be used when no config values are provided"
         );
@@ -1140,6 +1179,7 @@ log_format = 'json'
                 disk_usage_based_eviction: None,
                 test_remote_failures: 0,
                 ondemand_download_behavior_treat_error_as_warn: false,
+                background_task_maximum_delay: Duration::from_secs(334),
             },
             "Should be able to parse all basic config values correctly"
         );
diff --git a/pageserver/src/context.rs b/pageserver/src/context.rs
index e826d28e6d..f53b7736ab 100644
--- a/pageserver/src/context.rs
+++ b/pageserver/src/context.rs
@@ -88,6 +88,7 @@
 use crate::task_mgr::TaskKind;
 
 // The main structure of this module, see module-level comment.
+#[derive(Clone, Debug)]
 pub struct RequestContext {
     task_kind: TaskKind,
     download_behavior: DownloadBehavior,
@@ -95,7 +96,7 @@ pub struct RequestContext {
 
 /// Desired behavior if the operation requires an on-demand download
 /// to proceed.
-#[derive(Clone, Copy, PartialEq, Eq)]
+#[derive(Clone, Copy, PartialEq, Eq, Debug)]
 pub enum DownloadBehavior {
     /// Download the layer file. It can take a while.
     Download,
diff --git a/pageserver/src/disk_usage_eviction_task.rs b/pageserver/src/disk_usage_eviction_task.rs
index 9e5f644759..7869d019b1 100644
--- a/pageserver/src/disk_usage_eviction_task.rs
+++ b/pageserver/src/disk_usage_eviction_task.rs
@@ -54,6 +54,7 @@ use serde::{Deserialize, Serialize};
 use tokio::time::Instant;
 use tokio_util::sync::CancellationToken;
 use tracing::{debug, error, info, instrument, warn, Instrument};
+use utils::completion;
 use utils::serde_percent::Percent;
 
 use crate::{
@@ -82,6 +83,7 @@ pub fn launch_disk_usage_global_eviction_task(
     conf: &'static PageServerConf,
     storage: GenericRemoteStorage,
     state: Arc<State>,
+    background_jobs_barrier: completion::Barrier,
 ) -> anyhow::Result<()> {
     let Some(task_config) = &conf.disk_usage_based_eviction else {
         info!("disk usage based eviction task not configured");
@@ -98,14 +100,16 @@ pub fn launch_disk_usage_global_eviction_task(
         "disk usage based eviction",
         false,
         async move {
-            disk_usage_eviction_task(
-                &state,
-                task_config,
-                storage,
-                &conf.tenants_path(),
-                task_mgr::shutdown_token(),
-            )
-            .await;
+            let cancel = task_mgr::shutdown_token();
+
+            // wait until initial load is complete, because we cannot evict from loading tenants.
+            tokio::select! {
+                _ = cancel.cancelled() => { return Ok(()); },
+                _ = background_jobs_barrier.wait() => { }
+            };
+
+            disk_usage_eviction_task(&state, task_config, storage, &conf.tenants_path(), cancel)
+                .await;
             info!("disk usage based eviction task finishing");
             Ok(())
         },
diff --git a/pageserver/src/http/openapi_spec.yml b/pageserver/src/http/openapi_spec.yml
index e23d3f3a20..0d912c95e0 100644
--- a/pageserver/src/http/openapi_spec.yml
+++ b/pageserver/src/http/openapi_spec.yml
@@ -678,6 +678,8 @@ paths:
           application/json:
             schema:
               type: object
+              required:
+                - new_timeline_id
               properties:
                 new_timeline_id:
                   type: string
@@ -936,6 +938,8 @@ components:
       allOf:
         - $ref: '#/components/schemas/TenantConfig'
         - type: object
+          required:
+            - new_tenant_id
           properties:
             new_tenant_id:
               type: string
diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs
index 2e9f230ace..280e5327fc 100644
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -1,3 +1,6 @@
+//!
+//! Management HTTP API
+//!
 use std::collections::HashMap;
 use std::sync::Arc;
 
@@ -11,7 +14,7 @@ use storage_broker::BrokerClientChannel;
 use tenant_size_model::{SizeResult, StorageModel};
 use tokio_util::sync::CancellationToken;
 use tracing::*;
-use utils::http::endpoint::RequestSpan;
+use utils::http::endpoint::request_span;
 use utils::http::json::json_request_or_empty_body;
 use utils::http::request::{get_request_param, must_get_query_param, parse_query_param};
 
@@ -25,7 +28,9 @@ use crate::metrics::{StorageTimeOperation, STORAGE_TIME_GLOBAL};
 use crate::pgdatadir_mapping::LsnForTimestamp;
 use crate::task_mgr::TaskKind;
 use crate::tenant::config::TenantConfOpt;
-use crate::tenant::mgr::{TenantMapInsertError, TenantStateError};
+use crate::tenant::mgr::{
+    GetTenantError, SetNewTenantConfigError, TenantMapInsertError, TenantStateError,
+};
 use crate::tenant::size::ModelInputs;
 use crate::tenant::storage_layer::LayerAccessStatsReset;
 use crate::tenant::{LogicalSizeCalculationCause, PageReconstructError, Timeline};
@@ -44,7 +49,6 @@ use utils::{
 };
 
 // Imports only used for testing APIs
-#[cfg(feature = "testing")]
 use super::models::ConfigureFailpointsRequest;
 
 struct State {
@@ -144,6 +148,36 @@ impl From<TenantStateError> for ApiError {
     }
 }
 
+impl From<GetTenantError> for ApiError {
+    fn from(tse: GetTenantError) -> ApiError {
+        match tse {
+            GetTenantError::NotFound(tid) => ApiError::NotFound(anyhow!("tenant {}", tid)),
+            e @ GetTenantError::NotActive(_) => {
+                // Why is this not `ApiError::NotFound`?
+                // Because we must be careful to never return 404 for a tenant if it does
+                // in fact exist locally. If we did, the caller could draw the conclusion
+                // that it can attach the tenant to another PS and we'd be in split-brain.
+                //
+                // (We can produce this variant only in `mgr::get_tenant(..., active=true)` calls).
+                ApiError::InternalServerError(anyhow::Error::new(e))
+            }
+        }
+    }
+}
+
+impl From<SetNewTenantConfigError> for ApiError {
+    fn from(e: SetNewTenantConfigError) -> ApiError {
+        match e {
+            SetNewTenantConfigError::GetTenant(tid) => {
+                ApiError::NotFound(anyhow!("tenant {}", tid))
+            }
+            e @ SetNewTenantConfigError::Persist(_) => {
+                ApiError::InternalServerError(anyhow::Error::new(e))
+            }
+        }
+    }
+}
+
 impl From<crate::tenant::DeleteTimelineError> for ApiError {
     fn from(value: crate::tenant::DeleteTimelineError) -> Self {
         use crate::tenant::DeleteTimelineError::*;
@@ -163,7 +197,7 @@ impl From<crate::tenant::mgr::DeleteTimelineError> for ApiError {
         match value {
             // Report Precondition failed so client can distinguish between
             // "tenant is missing" case from "timeline is missing"
-            Tenant(TenantStateError::NotFound(..)) => {
+            Tenant(GetTenantError::NotFound(..)) => {
                 ApiError::PreconditionFailed("Requested tenant is missing")
             }
             Tenant(t) => ApiError::from(t),
@@ -258,20 +292,24 @@ async fn build_timeline_info_common(
 }
 
 // healthcheck handler
-async fn status_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
+async fn status_handler(
+    request: Request<Body>,
+    _cancel: CancellationToken,
+) -> Result<Response<Body>, ApiError> {
     check_permission(&request, None)?;
     let config = get_config(&request);
     json_response(StatusCode::OK, StatusResponse { id: config.id })
 }
 
-async fn timeline_create_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
+async fn timeline_create_handler(
+    mut request: Request<Body>,
+    _cancel: CancellationToken,
+) -> Result<Response<Body>, ApiError> {
     let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
     let request_data: TimelineCreateRequest = json_request(&mut request).await?;
     check_permission(&request, Some(tenant_id))?;
 
-    let new_timeline_id = request_data
-        .new_timeline_id
-        .unwrap_or_else(TimelineId::generate);
+    let new_timeline_id = request_data.new_timeline_id;
 
     let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Error);
 
@@ -299,11 +337,14 @@ async fn timeline_create_handler(mut request: Request<Body>) -> Result<Response<
             Err(err) => Err(ApiError::InternalServerError(err)),
         }
     }
-    .instrument(info_span!("timeline_create", tenant = %tenant_id, new_timeline = ?request_data.new_timeline_id, timeline_id = %new_timeline_id, lsn=?request_data.ancestor_start_lsn, pg_version=?request_data.pg_version))
+    .instrument(info_span!("timeline_create", tenant = %tenant_id, timeline_id = %new_timeline_id, lsn=?request_data.ancestor_start_lsn, pg_version=?request_data.pg_version))
     .await
 }
 
-async fn timeline_list_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
+async fn timeline_list_handler(
+    request: Request<Body>,
+    _cancel: CancellationToken,
+) -> Result<Response<Body>, ApiError> {
     let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
     let include_non_incremental_logical_size: Option<bool> =
         parse_query_param(&request, "include-non-incremental-logical-size")?;
@@ -337,7 +378,10 @@ async fn timeline_list_handler(request: Request<Body>) -> Result<Response<Body>,
     json_response(StatusCode::OK, response_data)
 }
 
-async fn timeline_detail_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
+async fn timeline_detail_handler(
+    request: Request<Body>,
+    _cancel: CancellationToken,
+) -> Result<Response<Body>, ApiError> {
     let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
     let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
     let include_non_incremental_logical_size: Option<bool> =
@@ -372,7 +416,10 @@ async fn timeline_detail_handler(request: Request<Body>) -> Result<Response<Body
     json_response(StatusCode::OK, timeline_info)
 }
 
-async fn get_lsn_by_timestamp_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
+async fn get_lsn_by_timestamp_handler(
+    request: Request<Body>,
+    _cancel: CancellationToken,
+) -> Result<Response<Body>, ApiError> {
     let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
     check_permission(&request, Some(tenant_id))?;
 
@@ -396,7 +443,10 @@ async fn get_lsn_by_timestamp_handler(request: Request<Body>) -> Result<Response
     json_response(StatusCode::OK, result)
 }
 
-async fn tenant_attach_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
+async fn tenant_attach_handler(
+    mut request: Request<Body>,
+    _cancel: CancellationToken,
+) -> Result<Response<Body>, ApiError> {
     let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
     check_permission(&request, Some(tenant_id))?;
 
@@ -432,7 +482,10 @@ async fn tenant_attach_handler(mut request: Request<Body>) -> Result<Response<Bo
     json_response(StatusCode::ACCEPTED, ())
 }
 
-async fn timeline_delete_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
+async fn timeline_delete_handler(
+    request: Request<Body>,
+    _cancel: CancellationToken,
+) -> Result<Response<Body>, ApiError> {
     let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
     let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
     check_permission(&request, Some(tenant_id))?;
@@ -446,7 +499,10 @@ async fn timeline_delete_handler(request: Request<Body>) -> Result<Response<Body
     json_response(StatusCode::OK, ())
 }
 
-async fn tenant_detach_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
+async fn tenant_detach_handler(
+    request: Request<Body>,
+    _cancel: CancellationToken,
+) -> Result<Response<Body>, ApiError> {
     let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
     check_permission(&request, Some(tenant_id))?;
     let detach_ignored: Option<bool> = parse_query_param(&request, "detach_ignored")?;
@@ -460,7 +516,10 @@ async fn tenant_detach_handler(request: Request<Body>) -> Result<Response<Body>,
     json_response(StatusCode::OK, ())
 }
 
-async fn tenant_load_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
+async fn tenant_load_handler(
+    request: Request<Body>,
+    _cancel: CancellationToken,
+) -> Result<Response<Body>, ApiError> {
     let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
     check_permission(&request, Some(tenant_id))?;
 
@@ -480,7 +539,10 @@ async fn tenant_load_handler(request: Request<Body>) -> Result<Response<Body>, A
     json_response(StatusCode::ACCEPTED, ())
 }
 
-async fn tenant_ignore_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
+async fn tenant_ignore_handler(
+    request: Request<Body>,
+    _cancel: CancellationToken,
+) -> Result<Response<Body>, ApiError> {
     let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
     check_permission(&request, Some(tenant_id))?;
 
@@ -493,7 +555,10 @@ async fn tenant_ignore_handler(request: Request<Body>) -> Result<Response<Body>,
     json_response(StatusCode::OK, ())
 }
 
-async fn tenant_list_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
+async fn tenant_list_handler(
+    request: Request<Body>,
+    _cancel: CancellationToken,
+) -> Result<Response<Body>, ApiError> {
     check_permission(&request, None)?;
 
     let response_data = mgr::list_tenants()
@@ -513,7 +578,10 @@ async fn tenant_list_handler(request: Request<Body>) -> Result<Response<Body>, A
     json_response(StatusCode::OK, response_data)
 }
 
-async fn tenant_status(request: Request<Body>) -> Result<Response<Body>, ApiError> {
+async fn tenant_status(
+    request: Request<Body>,
+    _cancel: CancellationToken,
+) -> Result<Response<Body>, ApiError> {
     let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
     check_permission(&request, Some(tenant_id))?;
 
@@ -527,7 +595,7 @@ async fn tenant_status(request: Request<Body>) -> Result<Response<Body>, ApiErro
         }
 
         let state = tenant.current_state();
-        Ok(TenantInfo {
+        Result::<_, ApiError>::Ok(TenantInfo {
             id: tenant_id,
             state: state.clone(),
             current_physical_size: Some(current_physical_size),
@@ -535,8 +603,7 @@ async fn tenant_status(request: Request<Body>) -> Result<Response<Body>, ApiErro
         })
     }
     .instrument(info_span!("tenant_status_handler", tenant = %tenant_id))
-    .await
-    .map_err(ApiError::InternalServerError)?;
+    .await?;
 
     json_response(StatusCode::OK, tenant_info)
 }
@@ -554,7 +621,10 @@ async fn tenant_status(request: Request<Body>) -> Result<Response<Body>, ApiErro
 /// Note: we don't update the cached size and prometheus metric here.
 /// The retention period might be different, and it's nice to have a method to just calculate it
 /// without modifying anything anyway.
-async fn tenant_size_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
+async fn tenant_size_handler(
+    request: Request<Body>,
+    _cancel: CancellationToken,
+) -> Result<Response<Body>, ApiError> {
     let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
     check_permission(&request, Some(tenant_id))?;
     let inputs_only: Option<bool> = parse_query_param(&request, "inputs_only")?;
@@ -619,7 +689,10 @@ async fn tenant_size_handler(request: Request<Body>) -> Result<Response<Body>, A
     )
 }
 
-async fn layer_map_info_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
+async fn layer_map_info_handler(
+    request: Request<Body>,
+    _cancel: CancellationToken,
+) -> Result<Response<Body>, ApiError> {
     let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
     let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
     let reset: LayerAccessStatsReset =
@@ -633,7 +706,10 @@ async fn layer_map_info_handler(request: Request<Body>) -> Result<Response<Body>
     json_response(StatusCode::OK, layer_map_info)
 }
 
-async fn layer_download_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
+async fn layer_download_handler(
+    request: Request<Body>,
+    _cancel: CancellationToken,
+) -> Result<Response<Body>, ApiError> {
     let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
     check_permission(&request, Some(tenant_id))?;
     let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
@@ -656,7 +732,10 @@ async fn layer_download_handler(request: Request<Body>) -> Result<Response<Body>
     }
 }
 
-async fn evict_timeline_layer_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
+async fn evict_timeline_layer_handler(
+    request: Request<Body>,
+    _cancel: CancellationToken,
+) -> Result<Response<Body>, ApiError> {
     let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
     check_permission(&request, Some(tenant_id))?;
     let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
@@ -734,7 +813,12 @@ pub fn html_response(status: StatusCode, data: String) -> Result<Response<Body>,
     Ok(response)
 }
 
-async fn tenant_create_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
+async fn tenant_create_handler(
+    mut request: Request<Body>,
+    _cancel: CancellationToken,
+) -> Result<Response<Body>, ApiError> {
+    let request_data: TenantCreateRequest = json_request(&mut request).await?;
+    let target_tenant_id = request_data.new_tenant_id;
     check_permission(&request, None)?;
 
     let _timer = STORAGE_TIME_GLOBAL
@@ -742,17 +826,10 @@ async fn tenant_create_handler(mut request: Request<Body>) -> Result<Response<Bo
         .expect("bug")
         .start_timer();
 
-    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Warn);
-
-    let request_data: TenantCreateRequest = json_request(&mut request).await?;
-
     let tenant_conf =
         TenantConfOpt::try_from(&request_data.config).map_err(ApiError::BadRequest)?;
 
-    let target_tenant_id = request_data
-        .new_tenant_id
-        .map(TenantId::from)
-        .unwrap_or_else(TenantId::generate);
+    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Warn);
 
     let state = get_state(&request);
 
@@ -786,7 +863,10 @@ async fn tenant_create_handler(mut request: Request<Body>) -> Result<Response<Bo
     )
 }
 
-async fn get_tenant_config_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
+async fn get_tenant_config_handler(
+    request: Request<Body>,
+    _cancel: CancellationToken,
+) -> Result<Response<Body>, ApiError> {
     let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
     check_permission(&request, Some(tenant_id))?;
 
@@ -812,6 +892,7 @@ async fn get_tenant_config_handler(request: Request<Body>) -> Result<Response<Bo
 
 async fn update_tenant_config_handler(
     mut request: Request<Body>,
+    _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
     let request_data: TenantConfigRequest = json_request(&mut request).await?;
     let tenant_id = request_data.tenant_id;
@@ -829,8 +910,10 @@ async fn update_tenant_config_handler(
 }
 
 /// Testing helper to transition a tenant to [`crate::tenant::TenantState::Broken`].
-#[cfg(feature = "testing")]
-async fn handle_tenant_break(r: Request<Body>) -> Result<Response<Body>, ApiError> {
+async fn handle_tenant_break(
+    r: Request<Body>,
+    _cancel: CancellationToken,
+) -> Result<Response<Body>, ApiError> {
     let tenant_id: TenantId = parse_request_param(&r, "tenant_id")?;
 
     let tenant = crate::tenant::mgr::get_tenant(tenant_id, true)
@@ -842,8 +925,10 @@ async fn handle_tenant_break(r: Request<Body>) -> Result<Response<Body>, ApiErro
     json_response(StatusCode::OK, ())
 }
 
-#[cfg(feature = "testing")]
-async fn failpoints_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
+async fn failpoints_handler(
+    mut request: Request<Body>,
+    _cancel: CancellationToken,
+) -> Result<Response<Body>, ApiError> {
     if !fail::has_failpoints() {
         return Err(ApiError::BadRequest(anyhow!(
             "Cannot manage failpoints because pageserver was compiled without failpoints support"
@@ -876,7 +961,10 @@ async fn failpoints_handler(mut request: Request<Body>) -> Result<Response<Body>
 }
 
 // Run GC immediately on given timeline.
-async fn timeline_gc_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
+async fn timeline_gc_handler(
+    mut request: Request<Body>,
+    _cancel: CancellationToken,
+) -> Result<Response<Body>, ApiError> {
     let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
     let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
     check_permission(&request, Some(tenant_id))?;
@@ -895,8 +983,10 @@ async fn timeline_gc_handler(mut request: Request<Body>) -> Result<Response<Body
 }
 
 // Run compaction immediately on given timeline.
-#[cfg(feature = "testing")]
-async fn timeline_compact_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
+async fn timeline_compact_handler(
+    request: Request<Body>,
+    _cancel: CancellationToken,
+) -> Result<Response<Body>, ApiError> {
     let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
     let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
     check_permission(&request, Some(tenant_id))?;
@@ -917,8 +1007,10 @@ async fn timeline_compact_handler(request: Request<Body>) -> Result<Response<Bod
 }
 
 // Run checkpoint immediately on given timeline.
-#[cfg(feature = "testing")]
-async fn timeline_checkpoint_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
+async fn timeline_checkpoint_handler(
+    request: Request<Body>,
+    _cancel: CancellationToken,
+) -> Result<Response<Body>, ApiError> {
     let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
     let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
     check_permission(&request, Some(tenant_id))?;
@@ -942,6 +1034,7 @@ async fn timeline_checkpoint_handler(request: Request<Body>) -> Result<Response<
 
 async fn timeline_download_remote_layers_handler_post(
     mut request: Request<Body>,
+    _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
     let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
     let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
@@ -957,6 +1050,7 @@ async fn timeline_download_remote_layers_handler_post(
 
 async fn timeline_download_remote_layers_handler_get(
     request: Request<Body>,
+    _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
     let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
     check_permission(&request, Some(tenant_id))?;
@@ -981,7 +1075,10 @@ async fn active_timeline_of_active_tenant(
         .map_err(ApiError::NotFound)
 }
 
-async fn always_panic_handler(req: Request<Body>) -> Result<Response<Body>, ApiError> {
+async fn always_panic_handler(
+    req: Request<Body>,
+    _cancel: CancellationToken,
+) -> Result<Response<Body>, ApiError> {
     // Deliberately cause a panic to exercise the panic hook registered via std::panic::set_hook().
     // For pageserver, the relevant panic hook is `tracing_panic_hook` , and the `sentry` crate's wrapper around it.
     // Use catch_unwind to ensure that tokio nor hyper are distracted by our panic.
@@ -992,7 +1089,10 @@ async fn always_panic_handler(req: Request<Body>) -> Result<Response<Body>, ApiE
     json_response(StatusCode::NO_CONTENT, ())
 }
 
-async fn disk_usage_eviction_run(mut r: Request<Body>) -> Result<Response<Body>, ApiError> {
+async fn disk_usage_eviction_run(
+    mut r: Request<Body>,
+    _cancel: CancellationToken,
+) -> Result<Response<Body>, ApiError> {
     check_permission(&r, None)?;
 
     #[derive(Debug, Clone, Copy, serde::Serialize, serde::Deserialize)]
@@ -1082,8 +1182,10 @@ async fn handler_404(_: Request<Body>) -> Result<Response<Body>, ApiError> {
     )
 }
 
-#[cfg(feature = "testing")]
-async fn post_tracing_event_handler(mut r: Request<Body>) -> Result<Response<Body>, ApiError> {
+async fn post_tracing_event_handler(
+    mut r: Request<Body>,
+    _cancel: CancellationToken,
+) -> Result<Response<Body>, ApiError> {
     #[derive(Debug, serde::Deserialize)]
     #[serde(rename_all = "lowercase")]
     enum Level {
@@ -1113,6 +1215,85 @@ async fn post_tracing_event_handler(mut r: Request<Body>) -> Result<Response<Bod
     json_response(StatusCode::OK, ())
 }
 
+/// Common functionality of all the HTTP API handlers.
+///
+/// - Adds a tracing span to each request (by `request_span`)
+/// - Logs the request depending on the request method (by `request_span`)
+/// - Logs the response if it was not successful (by `request_span`
+/// - Shields the handler function from async cancellations. Hyper can drop the handler
+///   Future if the connection to the client is lost, but most of the pageserver code is
+///   not async cancellation safe. This converts the dropped future into a graceful cancellation
+///   request with a CancellationToken.
+async fn api_handler<R, H>(request: Request<Body>, handler: H) -> Result<Response<Body>, ApiError>
+where
+    R: std::future::Future<Output = Result<Response<Body>, ApiError>> + Send + 'static,
+    H: FnOnce(Request<Body>, CancellationToken) -> R + Send + Sync + 'static,
+{
+    // Spawn a new task to handle the request, to protect the handler from unexpected
+    // async cancellations. Most pageserver functions are not async cancellation safe.
+    // We arm a drop-guard, so that if Hyper drops the Future, we signal the task
+    // with the cancellation token.
+    let token = CancellationToken::new();
+    let cancel_guard = token.clone().drop_guard();
+    let result = request_span(request, move |r| async {
+        let handle = tokio::spawn(
+            async {
+                let token_cloned = token.clone();
+                let result = handler(r, token).await;
+                if token_cloned.is_cancelled() {
+                    info!("Cancelled request finished");
+                }
+                result
+            }
+            .in_current_span(),
+        );
+
+        match handle.await {
+            Ok(result) => result,
+            Err(e) => {
+                // The handler task panicked. We have a global panic handler that logs the
+                // panic with its backtrace, so no need to log that here. Only log a brief
+                // message to make it clear that we returned the error to the client.
+                error!("HTTP request handler task panicked: {e:#}");
+
+                // Don't return an Error here, because then fallback error handler that was
+                // installed in make_router() will print the error. Instead, construct the
+                // HTTP error response and return that.
+                Ok(
+                    ApiError::InternalServerError(anyhow!("HTTP request handler task panicked"))
+                        .into_response(),
+                )
+            }
+        }
+    })
+    .await;
+
+    cancel_guard.disarm();
+
+    result
+}
+
+/// Like api_handler, but returns an error response if the server is built without
+/// the 'testing' feature.
+async fn testing_api_handler<R, H>(
+    desc: &str,
+    request: Request<Body>,
+    handler: H,
+) -> Result<Response<Body>, ApiError>
+where
+    R: std::future::Future<Output = Result<Response<Body>, ApiError>> + Send + 'static,
+    H: FnOnce(Request<Body>, CancellationToken) -> R + Send + Sync + 'static,
+{
+    if cfg!(feature = "testing") {
+        api_handler(request, handler).await
+    } else {
+        std::future::ready(Err(ApiError::BadRequest(anyhow!(
+            "Cannot {desc} because pageserver was compiled without testing APIs",
+        ))))
+        .await
+    }
+}
+
 pub fn make_router(
     conf: &'static PageServerConf,
     launch_ts: &'static LaunchTimestamp,
@@ -1142,26 +1323,6 @@ pub fn make_router(
         .expect("construct launch timestamp header middleware"),
     );
 
-    macro_rules! testing_api {
-        ($handler_desc:literal, $handler:path $(,)?) => {{
-            #[cfg(not(feature = "testing"))]
-            async fn cfg_disabled(_req: Request<Body>) -> Result<Response<Body>, ApiError> {
-                Err(ApiError::BadRequest(anyhow!(concat!(
-                    "Cannot ",
-                    $handler_desc,
-                    " because pageserver was compiled without testing APIs",
-                ))))
-            }
-
-            #[cfg(feature = "testing")]
-            let handler = $handler;
-            #[cfg(not(feature = "testing"))]
-            let handler = cfg_disabled;
-
-            move |r| RequestSpan(handler).handle(r)
-        }};
-    }
-
     Ok(router
         .data(Arc::new(
             State::new(
@@ -1173,96 +1334,88 @@ pub fn make_router(
             )
             .context("Failed to initialize router state")?,
         ))
-        .get("/v1/status", |r| RequestSpan(status_handler).handle(r))
-        .put(
-            "/v1/failpoints",
-            testing_api!("manage failpoints", failpoints_handler),
-        )
-        .get("/v1/tenant", |r| RequestSpan(tenant_list_handler).handle(r))
-        .post("/v1/tenant", |r| {
-            RequestSpan(tenant_create_handler).handle(r)
-        })
-        .get("/v1/tenant/:tenant_id", |r| {
-            RequestSpan(tenant_status).handle(r)
+        .get("/v1/status", |r| api_handler(r, status_handler))
+        .put("/v1/failpoints", |r| {
+            testing_api_handler("manage failpoints", r, failpoints_handler)
         })
+        .get("/v1/tenant", |r| api_handler(r, tenant_list_handler))
+        .post("/v1/tenant", |r| api_handler(r, tenant_create_handler))
+        .get("/v1/tenant/:tenant_id", |r| api_handler(r, tenant_status))
         .get("/v1/tenant/:tenant_id/synthetic_size", |r| {
-            RequestSpan(tenant_size_handler).handle(r)
+            api_handler(r, tenant_size_handler)
         })
         .put("/v1/tenant/config", |r| {
-            RequestSpan(update_tenant_config_handler).handle(r)
+            api_handler(r, update_tenant_config_handler)
         })
         .get("/v1/tenant/:tenant_id/config", |r| {
-            RequestSpan(get_tenant_config_handler).handle(r)
+            api_handler(r, get_tenant_config_handler)
         })
         .get("/v1/tenant/:tenant_id/timeline", |r| {
-            RequestSpan(timeline_list_handler).handle(r)
+            api_handler(r, timeline_list_handler)
         })
         .post("/v1/tenant/:tenant_id/timeline", |r| {
-            RequestSpan(timeline_create_handler).handle(r)
+            api_handler(r, timeline_create_handler)
         })
         .post("/v1/tenant/:tenant_id/attach", |r| {
-            RequestSpan(tenant_attach_handler).handle(r)
+            api_handler(r, tenant_attach_handler)
         })
         .post("/v1/tenant/:tenant_id/detach", |r| {
-            RequestSpan(tenant_detach_handler).handle(r)
+            api_handler(r, tenant_detach_handler)
         })
         .post("/v1/tenant/:tenant_id/load", |r| {
-            RequestSpan(tenant_load_handler).handle(r)
+            api_handler(r, tenant_load_handler)
         })
         .post("/v1/tenant/:tenant_id/ignore", |r| {
-            RequestSpan(tenant_ignore_handler).handle(r)
+            api_handler(r, tenant_ignore_handler)
         })
         .get("/v1/tenant/:tenant_id/timeline/:timeline_id", |r| {
-            RequestSpan(timeline_detail_handler).handle(r)
+            api_handler(r, timeline_detail_handler)
         })
         .get(
             "/v1/tenant/:tenant_id/timeline/:timeline_id/get_lsn_by_timestamp",
-            |r| RequestSpan(get_lsn_by_timestamp_handler).handle(r),
+            |r| api_handler(r, get_lsn_by_timestamp_handler),
         )
         .put("/v1/tenant/:tenant_id/timeline/:timeline_id/do_gc", |r| {
-            RequestSpan(timeline_gc_handler).handle(r)
+            api_handler(r, timeline_gc_handler)
+        })
+        .put("/v1/tenant/:tenant_id/timeline/:timeline_id/compact", |r| {
+            testing_api_handler("run timeline compaction", r, timeline_compact_handler)
         })
-        .put(
-            "/v1/tenant/:tenant_id/timeline/:timeline_id/compact",
-            testing_api!("run timeline compaction", timeline_compact_handler),
-        )
         .put(
             "/v1/tenant/:tenant_id/timeline/:timeline_id/checkpoint",
-            testing_api!("run timeline checkpoint", timeline_checkpoint_handler),
+            |r| testing_api_handler("run timeline checkpoint", r, timeline_checkpoint_handler),
         )
         .post(
             "/v1/tenant/:tenant_id/timeline/:timeline_id/download_remote_layers",
-            |r| RequestSpan(timeline_download_remote_layers_handler_post).handle(r),
+            |r| api_handler(r, timeline_download_remote_layers_handler_post),
         )
         .get(
             "/v1/tenant/:tenant_id/timeline/:timeline_id/download_remote_layers",
-            |r| RequestSpan(timeline_download_remote_layers_handler_get).handle(r),
+            |r| api_handler(r, timeline_download_remote_layers_handler_get),
         )
         .delete("/v1/tenant/:tenant_id/timeline/:timeline_id", |r| {
-            RequestSpan(timeline_delete_handler).handle(r)
+            api_handler(r, timeline_delete_handler)
         })
         .get("/v1/tenant/:tenant_id/timeline/:timeline_id/layer", |r| {
-            RequestSpan(layer_map_info_handler).handle(r)
+            api_handler(r, layer_map_info_handler)
         })
         .get(
             "/v1/tenant/:tenant_id/timeline/:timeline_id/layer/:layer_file_name",
-            |r| RequestSpan(layer_download_handler).handle(r),
+            |r| api_handler(r, layer_download_handler),
         )
         .delete(
             "/v1/tenant/:tenant_id/timeline/:timeline_id/layer/:layer_file_name",
-            |r| RequestSpan(evict_timeline_layer_handler).handle(r),
+            |r| api_handler(r, evict_timeline_layer_handler),
         )
         .put("/v1/disk_usage_eviction/run", |r| {
-            RequestSpan(disk_usage_eviction_run).handle(r)
+            api_handler(r, disk_usage_eviction_run)
+        })
+        .put("/v1/tenant/:tenant_id/break", |r| {
+            testing_api_handler("set tenant state to broken", r, handle_tenant_break)
+        })
+        .get("/v1/panic", |r| api_handler(r, always_panic_handler))
+        .post("/v1/tracing/event", |r| {
+            testing_api_handler("emit a tracing event", r, post_tracing_event_handler)
         })
-        .put(
-            "/v1/tenant/:tenant_id/break",
-            testing_api!("set tenant state to broken", handle_tenant_break),
-        )
-        .get("/v1/panic", |r| RequestSpan(always_panic_handler).handle(r))
-        .post(
-            "/v1/tracing/event",
-            testing_api!("emit a tracing event", post_tracing_event_handler),
-        )
         .any(handler_404))
 }
diff --git a/pageserver/src/lib.rs b/pageserver/src/lib.rs
index 4349f0e2ea..5831091098 100644
--- a/pageserver/src/lib.rs
+++ b/pageserver/src/lib.rs
@@ -35,7 +35,7 @@ use tracing::info;
 /// backwards-compatible changes to the metadata format.
 pub const STORAGE_FORMAT_VERSION: u16 = 3;
 
-pub const DEFAULT_PG_VERSION: u32 = 14;
+pub const DEFAULT_PG_VERSION: u32 = 15;
 
 // Magic constants used to identify different kinds of files
 pub const IMAGE_FILE_MAGIC: u16 = 0x5A60;
@@ -45,6 +45,7 @@ static ZERO_PAGE: bytes::Bytes = bytes::Bytes::from_static(&[0u8; 8192]);
 
 pub use crate::metrics::preinitialize_metrics;
 
+#[tracing::instrument]
 pub async fn shutdown_pageserver(exit_code: i32) {
     // Shut down the libpq endpoint task. This prevents new connections from
     // being accepted.
@@ -57,12 +58,6 @@ pub async fn shutdown_pageserver(exit_code: i32) {
     // the checkpoint and GC tasks.
     tenant::mgr::shutdown_all_tenants().await;
 
-    // Stop syncing with remote storage.
-    //
-    // FIXME: Does this wait for the sync tasks to finish syncing what's queued up?
-    // Should it?
-    task_mgr::shutdown_tasks(Some(TaskKind::RemoteUploadTask), None, None).await;
-
     // Shut down the HTTP endpoint last, so that you can still check the server's
     // status while it's shutting down.
     // FIXME: We should probably stop accepting commands like attach/detach earlier.
@@ -137,6 +132,29 @@ pub fn is_uninit_mark(path: &Path) -> bool {
     }
 }
 
+/// During pageserver startup, we need to order operations not to exhaust tokio worker threads by
+/// blocking.
+///
+/// The instances of this value exist only during startup, otherwise `None` is provided, meaning no
+/// delaying is needed.
+#[derive(Clone)]
+pub struct InitializationOrder {
+    /// Each initial tenant load task carries this until completion.
+    pub initial_tenant_load: Option<utils::completion::Completion>,
+
+    /// Barrier for when we can start initial logical size calculations.
+    pub initial_logical_size_can_start: utils::completion::Barrier,
+
+    /// Each timeline owns a clone of this to be consumed on the initial logical size calculation
+    /// attempt. It is important to drop this once the attempt has completed.
+    pub initial_logical_size_attempt: utils::completion::Completion,
+
+    /// Barrier for when we can start any background jobs.
+    ///
+    /// This can be broken up later on, but right now there is just one class of a background job.
+    pub background_jobs_can_start: utils::completion::Barrier,
+}
+
 #[cfg(test)]
 mod backoff_defaults_tests {
     use super::*;
diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs
index 75bea9dbab..cc444c479a 100644
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -84,6 +84,16 @@ pub static STORAGE_TIME_GLOBAL: Lazy<HistogramVec> = Lazy::new(|| {
     .expect("failed to define a metric")
 });
 
+static READ_NUM_FS_LAYERS: Lazy<HistogramVec> = Lazy::new(|| {
+    register_histogram_vec!(
+        "pageserver_read_num_fs_layers",
+        "Number of persistent layers accessed for processing a read request, including those in the cache",
+        &["tenant_id", "timeline_id"],
+        vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 10.0, 20.0, 50.0, 100.0],
+    )
+    .expect("failed to define a metric")
+});
+
 // Metrics collected on operations on the storage repository.
 static RECONSTRUCT_TIME: Lazy<HistogramVec> = Lazy::new(|| {
     register_histogram_vec!(
@@ -95,6 +105,25 @@ static RECONSTRUCT_TIME: Lazy<HistogramVec> = Lazy::new(|| {
     .expect("failed to define a metric")
 });
 
+static MATERIALIZED_PAGE_CACHE_HIT_DIRECT: Lazy<IntCounterVec> = Lazy::new(|| {
+    register_int_counter_vec!(
+        "pageserver_materialized_cache_hits_direct_total",
+        "Number of cache hits from materialized page cache without redo",
+        &["tenant_id", "timeline_id"]
+    )
+    .expect("failed to define a metric")
+});
+
+static GET_RECONSTRUCT_DATA_TIME: Lazy<HistogramVec> = Lazy::new(|| {
+    register_histogram_vec!(
+        "pageserver_getpage_get_reconstruct_data_seconds",
+        "Time spent in get_reconstruct_value_data",
+        &["tenant_id", "timeline_id"],
+        CRITICAL_OP_BUCKETS.into(),
+    )
+    .expect("failed to define a metric")
+});
+
 static MATERIALIZED_PAGE_CACHE_HIT: Lazy<IntCounterVec> = Lazy::new(|| {
     register_int_counter_vec!(
         "pageserver_materialized_cache_hits_total",
@@ -354,6 +383,7 @@ const STORAGE_IO_TIME_BUCKETS: &[f64] = &[
     0.001000, // 1000 usec
     0.030,    // 30 ms
     1.000,    // 1000 ms
+    30.000,   // 30000 ms
 ];
 
 const STORAGE_IO_TIME_OPERATIONS: &[&str] = &[
@@ -622,7 +652,7 @@ pub static WAL_REDO_TIME: Lazy<Histogram> = Lazy::new(|| {
 pub static WAL_REDO_WAIT_TIME: Lazy<Histogram> = Lazy::new(|| {
     register_histogram!(
         "pageserver_wal_redo_wait_seconds",
-        "Time spent waiting for access to the WAL redo process",
+        "Time spent waiting for access to the Postgres WAL redo process",
         redo_histogram_time_buckets!(),
     )
     .expect("failed to define a metric")
@@ -631,7 +661,7 @@ pub static WAL_REDO_WAIT_TIME: Lazy<Histogram> = Lazy::new(|| {
 pub static WAL_REDO_RECORDS_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
     register_histogram!(
         "pageserver_wal_redo_records_histogram",
-        "Histogram of number of records replayed per redo",
+        "Histogram of number of records replayed per redo in the Postgres WAL redo process",
         redo_histogram_count_buckets!(),
     )
     .expect("failed to define a metric")
@@ -640,7 +670,7 @@ pub static WAL_REDO_RECORDS_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
 pub static WAL_REDO_BYTES_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
     register_histogram!(
         "pageserver_wal_redo_bytes_histogram",
-        "Histogram of number of records replayed per redo",
+        "Histogram of number of records replayed per redo sent to Postgres",
         redo_bytes_histogram_count_buckets!(),
     )
     .expect("failed to define a metric")
@@ -723,7 +753,9 @@ pub struct TimelineMetrics {
     tenant_id: String,
     timeline_id: String,
     pub reconstruct_time_histo: Histogram,
+    pub get_reconstruct_data_time_histo: Histogram,
     pub materialized_page_cache_hit_counter: GenericCounter<AtomicU64>,
+    pub materialized_page_cache_hit_upon_request_counter: GenericCounter<AtomicU64>,
     pub flush_time_histo: StorageTimeMetrics,
     pub compact_time_histo: StorageTimeMetrics,
     pub create_images_time_histo: StorageTimeMetrics,
@@ -734,6 +766,7 @@ pub struct TimelineMetrics {
     pub last_record_gauge: IntGauge,
     pub wait_lsn_time_histo: Histogram,
     pub resident_physical_size_gauge: UIntGauge,
+    pub read_num_fs_layers: Histogram,
     /// copy of LayeredTimeline.current_logical_size
     pub current_logical_size_gauge: UIntGauge,
     pub num_persistent_files_created: IntCounter,
@@ -753,6 +786,9 @@ impl TimelineMetrics {
         let reconstruct_time_histo = RECONSTRUCT_TIME
             .get_metric_with_label_values(&[&tenant_id, &timeline_id])
             .unwrap();
+        let get_reconstruct_data_time_histo = GET_RECONSTRUCT_DATA_TIME
+            .get_metric_with_label_values(&[&tenant_id, &timeline_id])
+            .unwrap();
         let materialized_page_cache_hit_counter = MATERIALIZED_PAGE_CACHE_HIT
             .get_metric_with_label_values(&[&tenant_id, &timeline_id])
             .unwrap();
@@ -794,6 +830,12 @@ impl TimelineMetrics {
         let evictions = EVICTIONS
             .get_metric_with_label_values(&[&tenant_id, &timeline_id])
             .unwrap();
+        let read_num_fs_layers = READ_NUM_FS_LAYERS
+            .get_metric_with_label_values(&[&tenant_id, &timeline_id])
+            .unwrap();
+        let materialized_page_cache_hit_upon_request_counter = MATERIALIZED_PAGE_CACHE_HIT_DIRECT
+            .get_metric_with_label_values(&[&tenant_id, &timeline_id])
+            .unwrap();
         let evictions_with_low_residence_duration =
             evictions_with_low_residence_duration_builder.build(&tenant_id, &timeline_id);
 
@@ -801,7 +843,9 @@ impl TimelineMetrics {
             tenant_id,
             timeline_id,
             reconstruct_time_histo,
+            get_reconstruct_data_time_histo,
             materialized_page_cache_hit_counter,
+            materialized_page_cache_hit_upon_request_counter,
             flush_time_histo,
             compact_time_histo,
             create_images_time_histo,
@@ -819,6 +863,7 @@ impl TimelineMetrics {
             evictions_with_low_residence_duration: std::sync::RwLock::new(
                 evictions_with_low_residence_duration,
             ),
+            read_num_fs_layers,
         }
     }
 }
@@ -828,7 +873,9 @@ impl Drop for TimelineMetrics {
         let tenant_id = &self.tenant_id;
         let timeline_id = &self.timeline_id;
         let _ = RECONSTRUCT_TIME.remove_label_values(&[tenant_id, timeline_id]);
+        let _ = GET_RECONSTRUCT_DATA_TIME.remove_label_values(&[tenant_id, timeline_id]);
         let _ = MATERIALIZED_PAGE_CACHE_HIT.remove_label_values(&[tenant_id, timeline_id]);
+        let _ = MATERIALIZED_PAGE_CACHE_HIT_DIRECT.remove_label_values(&[tenant_id, timeline_id]);
         let _ = LAST_RECORD_LSN.remove_label_values(&[tenant_id, timeline_id]);
         let _ = WAIT_LSN_TIME.remove_label_values(&[tenant_id, timeline_id]);
         let _ = RESIDENT_PHYSICAL_SIZE.remove_label_values(&[tenant_id, timeline_id]);
@@ -836,6 +883,8 @@ impl Drop for TimelineMetrics {
         let _ = NUM_PERSISTENT_FILES_CREATED.remove_label_values(&[tenant_id, timeline_id]);
         let _ = PERSISTENT_BYTES_WRITTEN.remove_label_values(&[tenant_id, timeline_id]);
         let _ = EVICTIONS.remove_label_values(&[tenant_id, timeline_id]);
+        let _ = READ_NUM_FS_LAYERS.remove_label_values(&[tenant_id, timeline_id]);
+
         self.evictions_with_low_residence_duration
             .write()
             .unwrap()
diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs
index 03799553a0..d25463fe3e 100644
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -50,7 +50,9 @@ use crate::import_datadir::import_wal_from_tar;
 use crate::metrics::{LIVE_CONNECTIONS_COUNT, SMGR_QUERY_TIME};
 use crate::task_mgr;
 use crate::task_mgr::TaskKind;
+use crate::tenant;
 use crate::tenant::mgr;
+use crate::tenant::mgr::GetTenantError;
 use crate::tenant::{Tenant, Timeline};
 use crate::trace::Tracer;
 
@@ -1150,7 +1152,9 @@ enum GetActiveTenantError {
         wait_time: Duration,
     },
     #[error(transparent)]
-    Other(#[from] anyhow::Error),
+    NotFound(GetTenantError),
+    #[error(transparent)]
+    WaitTenantActive(tenant::WaitToBecomeActiveError),
 }
 
 impl From<GetActiveTenantError> for QueryError {
@@ -1159,7 +1163,8 @@ impl From<GetActiveTenantError> for QueryError {
             GetActiveTenantError::WaitForActiveTimeout { .. } => QueryError::Disconnected(
                 ConnectionError::Io(io::Error::new(io::ErrorKind::TimedOut, e.to_string())),
             ),
-            GetActiveTenantError::Other(e) => QueryError::Other(e),
+            GetActiveTenantError::WaitTenantActive(e) => QueryError::Other(anyhow::Error::new(e)),
+            GetActiveTenantError::NotFound(e) => QueryError::Other(anyhow::Error::new(e)),
         }
     }
 }
@@ -1175,13 +1180,16 @@ async fn get_active_tenant_with_timeout(
 ) -> Result<Arc<Tenant>, GetActiveTenantError> {
     let tenant = match mgr::get_tenant(tenant_id, false).await {
         Ok(tenant) => tenant,
-        Err(e) => return Err(GetActiveTenantError::Other(e.into())),
+        Err(e @ GetTenantError::NotFound(_)) => return Err(GetActiveTenantError::NotFound(e)),
+        Err(GetTenantError::NotActive(_)) => {
+            unreachable!("we're calling get_tenant with active=false")
+        }
     };
     let wait_time = Duration::from_secs(30);
     match tokio::time::timeout(wait_time, tenant.wait_to_become_active()).await {
         Ok(Ok(())) => Ok(tenant),
         // no .context(), the error message is good enough and some tests depend on it
-        Ok(Err(wait_error)) => Err(GetActiveTenantError::Other(wait_error)),
+        Ok(Err(e)) => Err(GetActiveTenantError::WaitTenantActive(e)),
         Err(_) => {
             let latest_state = tenant.current_state();
             if latest_state == TenantState::Active {
@@ -1196,13 +1204,35 @@ async fn get_active_tenant_with_timeout(
     }
 }
 
+#[derive(Debug, thiserror::Error)]
+enum GetActiveTimelineError {
+    #[error(transparent)]
+    Tenant(GetActiveTenantError),
+    #[error(transparent)]
+    Timeline(anyhow::Error),
+}
+
+impl From<GetActiveTimelineError> for QueryError {
+    fn from(e: GetActiveTimelineError) -> Self {
+        match e {
+            GetActiveTimelineError::Tenant(e) => e.into(),
+            GetActiveTimelineError::Timeline(e) => QueryError::Other(e),
+        }
+    }
+}
+
 /// Shorthand for getting a reference to a Timeline of an Active tenant.
 async fn get_active_tenant_timeline(
     tenant_id: TenantId,
     timeline_id: TimelineId,
     ctx: &RequestContext,
-) -> Result<Arc<Timeline>, GetActiveTenantError> {
-    let tenant = get_active_tenant_with_timeout(tenant_id, ctx).await?;
-    let timeline = tenant.get_timeline(timeline_id, true).await?;
+) -> Result<Arc<Timeline>, GetActiveTimelineError> {
+    let tenant = get_active_tenant_with_timeout(tenant_id, ctx)
+        .await
+        .map_err(GetActiveTimelineError::Tenant)?;
+    let timeline = tenant
+        .get_timeline(timeline_id, true)
+        .await
+        .map_err(GetActiveTimelineError::Timeline)?;
     Ok(timeline)
 }
diff --git a/pageserver/src/task_mgr.rs b/pageserver/src/task_mgr.rs
index 82aebc6c07..4df0e4e6f2 100644
--- a/pageserver/src/task_mgr.rs
+++ b/pageserver/src/task_mgr.rs
@@ -476,18 +476,35 @@ pub async fn shutdown_tasks(
                 && (timeline_id.is_none() || task_mut.timeline_id == timeline_id)
             {
                 task.cancel.cancel();
-                victim_tasks.push(Arc::clone(task));
+                victim_tasks.push((
+                    Arc::clone(task),
+                    task.kind,
+                    task_mut.tenant_id,
+                    task_mut.timeline_id,
+                ));
             }
         }
     }
 
-    for task in victim_tasks {
+    let log_all = kind.is_none() && tenant_id.is_none() && timeline_id.is_none();
+
+    for (task, task_kind, tenant_id, timeline_id) in victim_tasks {
         let join_handle = {
             let mut task_mut = task.mutable.lock().unwrap();
             task_mut.join_handle.take()
         };
         if let Some(mut join_handle) = join_handle {
+            if log_all {
+                if tenant_id.is_none() {
+                    // there are quite few of these
+                    info!(name = task.name, kind = ?task_kind, "stopping global task");
+                } else {
+                    // warn to catch these in tests; there shouldn't be any
+                    warn!(name = task.name, tenant_id = ?tenant_id, timeline_id = ?timeline_id, kind = ?task_kind, "stopping left-over");
+                }
+            }
             let completed = tokio::select! {
+                biased;
                 _ = &mut join_handle => { true },
                 _ = tokio::time::sleep(std::time::Duration::from_secs(1)) => {
                     // allow some time to elapse before logging to cut down the number of log
diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs
index 7f589cb971..94376b2ac8 100644
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -20,6 +20,7 @@ use storage_broker::BrokerClientChannel;
 use tokio::sync::watch;
 use tokio::task::JoinSet;
 use tracing::*;
+use utils::completion;
 use utils::crashsafe::path_with_suffix_extension;
 
 use std::cmp::min;
@@ -63,6 +64,7 @@ use crate::tenant::remote_timeline_client::PersistIndexPartWithDeletedFlagError;
 use crate::tenant::storage_layer::DeltaLayer;
 use crate::tenant::storage_layer::ImageLayer;
 use crate::tenant::storage_layer::Layer;
+use crate::InitializationOrder;
 
 use crate::virtual_file::VirtualFile;
 use crate::walredo::PostgresRedoManager;
@@ -77,7 +79,7 @@ use utils::{
     lsn::{Lsn, RecordLsn},
 };
 
-mod blob_io;
+pub mod blob_io;
 pub mod block_io;
 pub mod disk_btree;
 pub(crate) mod ephemeral_file;
@@ -268,7 +270,7 @@ impl UninitializedTimeline<'_> {
         let tl = self
             .initialize_with_lock(ctx, &mut timelines, false)
             .await?;
-        tl.activate(broker_client, ctx);
+        tl.activate(broker_client, None, ctx);
         Ok(tl)
     }
 
@@ -449,11 +451,48 @@ pub enum DeleteTimelineError {
     Other(#[from] anyhow::Error),
 }
 
+pub enum SetStoppingError {
+    AlreadyStopping,
+    Broken,
+}
+
 struct RemoteStartupData {
     index_part: IndexPart,
     remote_metadata: TimelineMetadata,
 }
 
+#[derive(Debug, thiserror::Error)]
+pub(crate) enum WaitToBecomeActiveError {
+    WillNotBecomeActive {
+        tenant_id: TenantId,
+        state: TenantState,
+    },
+    TenantDropped {
+        tenant_id: TenantId,
+    },
+}
+
+impl std::fmt::Display for WaitToBecomeActiveError {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            WaitToBecomeActiveError::WillNotBecomeActive { tenant_id, state } => {
+                write!(
+                    f,
+                    "Tenant {} will not become active. Current state: {:?}",
+                    tenant_id, state
+                )
+            }
+            WaitToBecomeActiveError::TenantDropped { tenant_id } => {
+                write!(f, "Tenant {tenant_id} will not become active (dropped)")
+            }
+        }
+    }
+}
+
+pub(crate) enum ShutdownError {
+    AlreadyStopping,
+}
+
 impl Tenant {
     /// Yet another helper for timeline initialization.
     /// Contains the common part of `load_local_timeline` and `load_remote_timeline`.
@@ -474,6 +513,7 @@ impl Tenant {
         local_metadata: Option<TimelineMetadata>,
         ancestor: Option<Arc<Timeline>>,
         first_save: bool,
+        init_order: Option<&InitializationOrder>,
         ctx: &RequestContext,
     ) -> anyhow::Result<()> {
         let tenant_id = self.tenant_id;
@@ -499,6 +539,7 @@ impl Tenant {
                 up_to_date_metadata,
                 ancestor.clone(),
                 remote_client,
+                init_order,
             )?;
 
             let timeline = UninitializedTimeline {
@@ -527,6 +568,7 @@ impl Tenant {
                             up_to_date_metadata,
                             ancestor.clone(),
                             None,
+                            None,
                         )
                         .with_context(|| {
                             format!("creating broken timeline data for {tenant_id}/{timeline_id}")
@@ -622,16 +664,17 @@ impl Tenant {
             "attach tenant",
             false,
             async move {
-                let doit = async {
-                    tenant_clone.attach(&ctx).await?;
-                    tenant_clone.activate(broker_client, &ctx).await?;
-                    anyhow::Ok(())
-                };
-                match doit.await {
-                    Ok(_) => {}
+                match tenant_clone.attach(&ctx).await {
+                    Ok(()) => {
+                        info!("attach finished, activating");
+                        tenant_clone.activate(broker_client, None, &ctx).await;
+                    }
                     Err(e) => {
-                        tenant_clone.set_broken(e.to_string()).await;
-                        error!("error attaching tenant: {:?}", e);
+                        error!("attach failed, setting tenant state to Broken: {:?}", e);
+                        tenant_clone.state.send_modify(|state| {
+                            assert_eq!(*state, TenantState::Attaching, "the attach task owns the tenant state until activation is complete");
+                            *state = TenantState::broken_from_reason(e.to_string());
+                        });
                     }
                 }
                 Ok(())
@@ -648,6 +691,8 @@ impl Tenant {
     ///
     /// Background task that downloads all data for a tenant and brings it to Active state.
     ///
+    /// No background tasks are started as part of this routine.
+    ///
     async fn attach(self: &Arc<Tenant>, ctx: &RequestContext) -> anyhow::Result<()> {
         debug_assert_current_span_has_tenant_id();
 
@@ -822,6 +867,7 @@ impl Tenant {
             local_metadata,
             ancestor,
             true,
+            None,
             ctx,
         )
         .await
@@ -847,7 +893,6 @@ impl Tenant {
         ))
     }
 
-    ///
     /// Load a tenant that's available on local disk
     ///
     /// This is used at pageserver startup, to rebuild the in-memory
@@ -857,15 +902,17 @@ impl Tenant {
     ///
     /// If the loading fails for some reason, the Tenant will go into Broken
     /// state.
-    ///
-    #[instrument(skip(conf, remote_storage, ctx), fields(tenant_id=%tenant_id))]
+    #[instrument(skip_all, fields(tenant_id=%tenant_id))]
     pub fn spawn_load(
         conf: &'static PageServerConf,
         tenant_id: TenantId,
         broker_client: storage_broker::BrokerClientChannel,
         remote_storage: Option<GenericRemoteStorage>,
+        init_order: Option<InitializationOrder>,
         ctx: &RequestContext,
     ) -> Arc<Tenant> {
+        debug_assert_current_span_has_tenant_id();
+
         let tenant_conf = match Self::load_tenant_config(conf, tenant_id) {
             Ok(conf) => conf,
             Err(e) => {
@@ -897,20 +944,27 @@ impl Tenant {
             "initial tenant load",
             false,
             async move {
-                let doit = async {
-                    tenant_clone.load(&ctx).await?;
-                    tenant_clone.activate(broker_client, &ctx).await?;
-                    anyhow::Ok(())
-                };
-                match doit.await {
-                    Ok(()) => {}
+                let mut init_order = init_order;
+
+                // take the completion because initial tenant loading will complete when all of
+                // these tasks complete.
+                let _completion = init_order.as_mut().and_then(|x| x.initial_tenant_load.take());
+
+                match tenant_clone.load(init_order.as_ref(), &ctx).await {
+                    Ok(()) => {
+                        debug!("load finished, activating");
+                        let background_jobs_can_start = init_order.as_ref().map(|x| &x.background_jobs_can_start);
+                        tenant_clone.activate(broker_client, background_jobs_can_start, &ctx).await;
+                    }
                     Err(err) => {
-                        tenant_clone.set_broken(err.to_string()).await;
-                        error!("could not load tenant {tenant_id}: {err:?}");
+                        error!("load failed, setting tenant state to Broken: {err:?}");
+                        tenant_clone.state.send_modify(|state| {
+                            assert_eq!(*state, TenantState::Loading, "the loading task owns the tenant state until activation is complete");
+                            *state = TenantState::broken_from_reason(err.to_string());
+                        });
                     }
                 }
-                info!("initial load for tenant {tenant_id} finished!");
-                Ok(())
+               Ok(())
             }
             .instrument({
                 let span = tracing::info_span!(parent: None, "load", tenant_id=%tenant_id);
@@ -919,8 +973,6 @@ impl Tenant {
             }),
         );
 
-        info!("spawned load into background");
-
         tenant
     }
 
@@ -928,10 +980,15 @@ impl Tenant {
     /// Background task to load in-memory data structures for this tenant, from
     /// files on disk. Used at pageserver startup.
     ///
-    async fn load(self: &Arc<Tenant>, ctx: &RequestContext) -> anyhow::Result<()> {
+    /// No background tasks are started as part of this routine.
+    async fn load(
+        self: &Arc<Tenant>,
+        init_order: Option<&InitializationOrder>,
+        ctx: &RequestContext,
+    ) -> anyhow::Result<()> {
         debug_assert_current_span_has_tenant_id();
 
-        info!("loading tenant task");
+        debug!("loading tenant task");
 
         utils::failpoint_sleep_millis_async!("before-loading-tenant");
 
@@ -941,112 +998,119 @@ impl Tenant {
         //
         // Scan the directory, peek into the metadata file of each timeline, and
         // collect a list of timelines and their ancestors.
-        let mut timelines_to_load: HashMap<TimelineId, TimelineMetadata> = HashMap::new();
-        let timelines_dir = self.conf.timelines_path(&self.tenant_id);
-        for entry in std::fs::read_dir(&timelines_dir).with_context(|| {
-            format!(
-                "Failed to list timelines directory for tenant {}",
-                self.tenant_id
-            )
-        })? {
-            let entry = entry.with_context(|| {
-                format!("cannot read timeline dir entry for {}", self.tenant_id)
-            })?;
-            let timeline_dir = entry.path();
+        let tenant_id = self.tenant_id;
+        let conf = self.conf;
+        let span = info_span!("blocking");
 
-            if crate::is_temporary(&timeline_dir) {
-                info!(
-                    "Found temporary timeline directory, removing: {}",
-                    timeline_dir.display()
-                );
-                if let Err(e) = std::fs::remove_dir_all(&timeline_dir) {
-                    error!(
-                        "Failed to remove temporary directory '{}': {:?}",
-                        timeline_dir.display(),
-                        e
+        let sorted_timelines: Vec<(_, _)> = tokio::task::spawn_blocking(move || {
+            let _g = span.entered();
+            let mut timelines_to_load: HashMap<TimelineId, TimelineMetadata> = HashMap::new();
+            let timelines_dir = conf.timelines_path(&tenant_id);
+
+            for entry in
+                std::fs::read_dir(&timelines_dir).context("list timelines directory for tenant")?
+            {
+                let entry = entry.context("read timeline dir entry")?;
+                let timeline_dir = entry.path();
+
+                if crate::is_temporary(&timeline_dir) {
+                    info!(
+                        "Found temporary timeline directory, removing: {}",
+                        timeline_dir.display()
                     );
-                }
-            } else if is_uninit_mark(&timeline_dir) {
-                let timeline_uninit_mark_file = &timeline_dir;
-                info!(
-                    "Found an uninit mark file {}, removing the timeline and its uninit mark",
-                    timeline_uninit_mark_file.display()
-                );
-                let timeline_id = timeline_uninit_mark_file
-                    .file_stem()
-                    .and_then(OsStr::to_str)
-                    .unwrap_or_default()
-                    .parse::<TimelineId>()
-                    .with_context(|| {
-                        format!(
+                    if let Err(e) = std::fs::remove_dir_all(&timeline_dir) {
+                        error!(
+                            "Failed to remove temporary directory '{}': {:?}",
+                            timeline_dir.display(),
+                            e
+                        );
+                    }
+                } else if is_uninit_mark(&timeline_dir) {
+                    let timeline_uninit_mark_file = &timeline_dir;
+                    info!(
+                        "Found an uninit mark file {}, removing the timeline and its uninit mark",
+                        timeline_uninit_mark_file.display()
+                    );
+                    let timeline_id = timeline_uninit_mark_file
+                        .file_stem()
+                        .and_then(OsStr::to_str)
+                        .unwrap_or_default()
+                        .parse::<TimelineId>()
+                        .with_context(|| {
+                            format!(
                             "Could not parse timeline id out of the timeline uninit mark name {}",
                             timeline_uninit_mark_file.display()
                         )
-                    })?;
-                let timeline_dir = self.conf.timeline_path(&timeline_id, &self.tenant_id);
-                if let Err(e) =
-                    remove_timeline_and_uninit_mark(&timeline_dir, timeline_uninit_mark_file)
-                {
-                    error!("Failed to clean up uninit marked timeline: {e:?}");
-                }
-            } else {
-                let timeline_id = timeline_dir
-                    .file_name()
-                    .and_then(OsStr::to_str)
-                    .unwrap_or_default()
-                    .parse::<TimelineId>()
-                    .with_context(|| {
-                        format!(
-                            "Could not parse timeline id out of the timeline dir name {}",
-                            timeline_dir.display()
-                        )
-                    })?;
-                let timeline_uninit_mark_file = self
-                    .conf
-                    .timeline_uninit_mark_file_path(self.tenant_id, timeline_id);
-                if timeline_uninit_mark_file.exists() {
-                    info!(
-                        "Found an uninit mark file for timeline {}/{}, removing the timeline and its uninit mark",
-                        self.tenant_id, timeline_id
-                    );
+                        })?;
+                    let timeline_dir = conf.timeline_path(&timeline_id, &tenant_id);
                     if let Err(e) =
-                        remove_timeline_and_uninit_mark(&timeline_dir, &timeline_uninit_mark_file)
+                        remove_timeline_and_uninit_mark(&timeline_dir, timeline_uninit_mark_file)
                     {
                         error!("Failed to clean up uninit marked timeline: {e:?}");
                     }
-                    continue;
-                }
-
-                let file_name = entry.file_name();
-                if let Ok(timeline_id) =
-                    file_name.to_str().unwrap_or_default().parse::<TimelineId>()
-                {
-                    let metadata = load_metadata(self.conf, timeline_id, self.tenant_id)
-                        .context("failed to load metadata")?;
-                    timelines_to_load.insert(timeline_id, metadata);
                 } else {
-                    // A file or directory that doesn't look like a timeline ID
-                    warn!(
-                        "unexpected file or directory in timelines directory: {}",
-                        file_name.to_string_lossy()
-                    );
+                    let timeline_id = timeline_dir
+                        .file_name()
+                        .and_then(OsStr::to_str)
+                        .unwrap_or_default()
+                        .parse::<TimelineId>()
+                        .with_context(|| {
+                            format!(
+                                "Could not parse timeline id out of the timeline dir name {}",
+                                timeline_dir.display()
+                            )
+                        })?;
+                    let timeline_uninit_mark_file =
+                        conf.timeline_uninit_mark_file_path(tenant_id, timeline_id);
+                    if timeline_uninit_mark_file.exists() {
+                        info!(
+                            %timeline_id,
+                            "Found an uninit mark file, removing the timeline and its uninit mark",
+                        );
+                        if let Err(e) = remove_timeline_and_uninit_mark(
+                            &timeline_dir,
+                            &timeline_uninit_mark_file,
+                        ) {
+                            error!("Failed to clean up uninit marked timeline: {e:?}");
+                        }
+                        continue;
+                    }
+
+                    let file_name = entry.file_name();
+                    if let Ok(timeline_id) =
+                        file_name.to_str().unwrap_or_default().parse::<TimelineId>()
+                    {
+                        let metadata = load_metadata(conf, timeline_id, tenant_id)
+                            .context("failed to load metadata")?;
+                        timelines_to_load.insert(timeline_id, metadata);
+                    } else {
+                        // A file or directory that doesn't look like a timeline ID
+                        warn!(
+                            "unexpected file or directory in timelines directory: {}",
+                            file_name.to_string_lossy()
+                        );
+                    }
                 }
             }
-        }
 
-        // Sort the array of timeline IDs into tree-order, so that parent comes before
-        // all its children.
-        let sorted_timelines = tree_sort_timelines(timelines_to_load)?;
+            // Sort the array of timeline IDs into tree-order, so that parent comes before
+            // all its children.
+            tree_sort_timelines(timelines_to_load)
+        })
+        .await
+        .context("load spawn_blocking")
+        .and_then(|res| res)?;
+
         // FIXME original collect_timeline_files contained one more check:
         //    1. "Timeline has no ancestor and no layer files"
 
         for (timeline_id, local_metadata) in sorted_timelines {
-            self.load_local_timeline(timeline_id, local_metadata, ctx)
+            self.load_local_timeline(timeline_id, local_metadata, init_order, ctx)
                 .await
                 .with_context(|| format!("load local timeline {timeline_id}"))?;
         }
 
-        info!("Done");
+        trace!("Done");
 
         Ok(())
     }
@@ -1059,6 +1123,7 @@ impl Tenant {
         &self,
         timeline_id: TimelineId,
         local_metadata: TimelineMetadata,
+        init_order: Option<&InitializationOrder>,
         ctx: &RequestContext,
     ) -> anyhow::Result<()> {
         debug_assert_current_span_has_tenant_id();
@@ -1128,6 +1193,7 @@ impl Tenant {
             Some(local_metadata),
             ancestor,
             false,
+            init_order,
             ctx,
         )
         .await
@@ -1316,7 +1382,7 @@ impl Tenant {
             }
         };
 
-        loaded_timeline.activate(broker_client, ctx);
+        loaded_timeline.activate(broker_client, None, ctx);
 
         if let Some(remote_client) = loaded_timeline.remote_client.as_ref() {
             // Wait for the upload of the 'index_part.json` file to finish, so that when we return
@@ -1352,6 +1418,7 @@ impl Tenant {
         pitr: Duration,
         ctx: &RequestContext,
     ) -> anyhow::Result<GcResult> {
+        // there is a global allowed_error for this
         anyhow::ensure!(
             self.is_active(),
             "Cannot run GC iteration on inactive tenant"
@@ -1395,31 +1462,70 @@ impl Tenant {
         Ok(())
     }
 
-    /// Flush all in-memory data to disk.
+    /// Flush all in-memory data to disk and remote storage, if any.
     ///
     /// Used at graceful shutdown.
-    ///
-    pub async fn freeze_and_flush(&self) -> anyhow::Result<()> {
-        // Scan through the hashmap and collect a list of all the timelines,
-        // while holding the lock. Then drop the lock and actually perform the
-        // flushing. We don't want to block everything else while the
-        // flushing is performed.
-        let timelines_to_flush = {
+    async fn freeze_and_flush_on_shutdown(&self) {
+        let mut js = tokio::task::JoinSet::new();
+
+        // execute on each timeline on the JoinSet, join after.
+        let per_timeline = |timeline_id: TimelineId, timeline: Arc<Timeline>| {
+            async move {
+                debug_assert_current_span_has_tenant_and_timeline_id();
+
+                match timeline.freeze_and_flush().await {
+                    Ok(()) => {}
+                    Err(e) => {
+                        warn!("failed to freeze and flush: {e:#}");
+                        return;
+                    }
+                }
+
+                let res = if let Some(client) = timeline.remote_client.as_ref() {
+                    // if we did not wait for completion here, it might be our shutdown process
+                    // didn't wait for remote uploads to complete at all, as new tasks can forever
+                    // be spawned.
+                    //
+                    // what is problematic is the shutting down of RemoteTimelineClient, because
+                    // obviously it does not make sense to stop while we wait for it, but what
+                    // about corner cases like s3 suddenly hanging up?
+                    client.wait_completion().await
+                } else {
+                    Ok(())
+                };
+
+                if let Err(e) = res {
+                    warn!("failed to await for frozen and flushed uploads: {e:#}");
+                }
+            }
+            .instrument(tracing::info_span!("freeze_and_flush_on_shutdown", %timeline_id))
+        };
+
+        {
             let timelines = self.timelines.lock().await;
             timelines
                 .iter()
-                .map(|(_id, timeline)| Arc::clone(timeline))
-                .collect::<Vec<_>>()
+                .map(|(id, tl)| (*id, Arc::clone(tl)))
+                .for_each(|(timeline_id, timeline)| {
+                    js.spawn(per_timeline(timeline_id, timeline));
+                })
         };
 
-        for timeline in &timelines_to_flush {
-            timeline.freeze_and_flush().await?;
+        while let Some(res) = js.join_next().await {
+            match res {
+                Ok(()) => {}
+                Err(je) if je.is_cancelled() => unreachable!("no cancelling used"),
+                Err(je) if je.is_panic() => { /* logged already */ }
+                Err(je) => warn!("unexpected JoinError: {je:?}"),
+            }
         }
-
-        Ok(())
     }
 
-    /// Removes timeline-related in-memory data
+    /// Shuts down a timeline's tasks, removes its in-memory structures, and deletes its
+    /// data from disk.
+    ///
+    /// This doesn't currently delete all data from S3, but sets a flag in its
+    /// index_part.json file to mark it as deleted.
     pub async fn delete_timeline(
         &self,
         timeline_id: TimelineId,
@@ -1429,7 +1535,11 @@ impl Tenant {
 
         // Transition the timeline into TimelineState::Stopping.
         // This should prevent new operations from starting.
-        let timeline = {
+        //
+        // Also grab the Timeline's delete_lock to prevent another deletion from starting.
+        let timeline;
+        let mut delete_lock_guard;
+        {
             let mut timelines = self.timelines.lock().await;
 
             // Ensure that there are no child timelines **attached to that pageserver**,
@@ -1447,20 +1557,36 @@ impl Tenant {
                 Entry::Vacant(_) => return Err(DeleteTimelineError::NotFound),
             };
 
-            let timeline = Arc::clone(timeline_entry.get());
+            timeline = Arc::clone(timeline_entry.get());
+
+            // Prevent two tasks from trying to delete the timeline at the same time.
+            //
+            // XXX: We should perhaps return an HTTP "202 Accepted" to signal that the caller
+            // needs to poll until the operation has finished. But for now, we return an
+            // error, because the control plane knows to retry errors.
+            delete_lock_guard = timeline.delete_lock.try_lock().map_err(|_| {
+                DeleteTimelineError::Other(anyhow::anyhow!(
+                    "timeline deletion is already in progress"
+                ))
+            })?;
+
+            // If another task finished the deletion just before we acquired the lock,
+            // return success.
+            if *delete_lock_guard {
+                return Ok(());
+            }
+
             timeline.set_state(TimelineState::Stopping);
 
             drop(timelines);
-            timeline
-        };
+        }
 
         // Now that the Timeline is in Stopping state, request all the related tasks to
         // shut down.
         //
-        // NB: If you call delete_timeline multiple times concurrently, they will
-        // all go through the motions here. Make sure the code here is idempotent,
-        // and don't error out if some of the shutdown tasks have already been
-        // completed!
+        // NB: If this fails half-way through, and is retried, the retry will go through
+        // all the same steps again. Make sure the code here is idempotent, and don't
+        // error out if some of the shutdown tasks have already been completed!
 
         // Stop the walreceiver first.
         debug!("waiting for wal receiver to shutdown");
@@ -1501,6 +1627,10 @@ impl Tenant {
                 // If we (now, or already) marked it successfully as deleted, we can proceed
                 Ok(()) | Err(PersistIndexPartWithDeletedFlagError::AlreadyDeleted(_)) => (),
                 // Bail out otherwise
+                //
+                // AlreadyInProgress shouldn't happen, because the 'delete_lock' prevents
+                // two tasks from performing the deletion at the same time. The first task
+                // that starts deletion should run it to completion.
                 Err(e @ PersistIndexPartWithDeletedFlagError::AlreadyInProgress(_))
                 | Err(e @ PersistIndexPartWithDeletedFlagError::Other(_)) => {
                     return Err(DeleteTimelineError::Other(anyhow::anyhow!(e)));
@@ -1511,14 +1641,12 @@ impl Tenant {
         {
             // Grab the layer_removal_cs lock, and actually perform the deletion.
             //
-            // This lock prevents multiple concurrent delete_timeline calls from
-            // stepping on each other's toes, while deleting the files. It also
-            // prevents GC or compaction from running at the same time.
+            // This lock prevents prevents GC or compaction from running at the same time.
+            // The GC task doesn't register itself with the timeline it's operating on,
+            // so it might still be running even though we called `shutdown_tasks`.
             //
             // Note that there are still other race conditions between
-            // GC, compaction and timeline deletion. GC task doesn't
-            // register itself properly with the timeline it's
-            // operating on. See
+            // GC, compaction and timeline deletion. See
             // https://github.com/neondatabase/neon/issues/2671
             //
             // No timeout here, GC & Compaction should be responsive to the
@@ -1580,37 +1708,27 @@ impl Tenant {
         });
 
         // Remove the timeline from the map.
-        let mut timelines = self.timelines.lock().await;
-        let children_exist = timelines
-            .iter()
-            .any(|(_, entry)| entry.get_ancestor_timeline_id() == Some(timeline_id));
-        // XXX this can happen because `branch_timeline` doesn't check `TimelineState::Stopping`.
-        // We already deleted the layer files, so it's probably best to panic.
-        // (Ideally, above remove_dir_all is atomic so we don't see this timeline after a restart)
-        if children_exist {
-            panic!("Timeline grew children while we removed layer files");
+        {
+            let mut timelines = self.timelines.lock().await;
+
+            let children_exist = timelines
+                .iter()
+                .any(|(_, entry)| entry.get_ancestor_timeline_id() == Some(timeline_id));
+            // XXX this can happen because `branch_timeline` doesn't check `TimelineState::Stopping`.
+            // We already deleted the layer files, so it's probably best to panic.
+            // (Ideally, above remove_dir_all is atomic so we don't see this timeline after a restart)
+            if children_exist {
+                panic!("Timeline grew children while we removed layer files");
+            }
+
+            timelines.remove(&timeline_id).expect(
+                "timeline that we were deleting was concurrently removed from 'timelines' map",
+            );
         }
-        let removed_timeline = timelines.remove(&timeline_id);
-        if removed_timeline.is_none() {
-            // This can legitimately happen if there's a concurrent call to this function.
-            //   T1                                             T2
-            //   lock
-            //   unlock
-            //                                                  lock
-            //                                                  unlock
-            //                                                  remove files
-            //                                                  lock
-            //                                                  remove from map
-            //                                                  unlock
-            //                                                  return
-            //   remove files
-            //   lock
-            //   remove from map observes empty map
-            //   unlock
-            //   return
-            debug!("concurrent call to this function won the race");
-        }
-        drop(timelines);
+
+        // All done! Mark the deletion as completed and release the delete_lock
+        *delete_lock_guard = true;
+        drop(delete_lock_guard);
 
         Ok(())
     }
@@ -1624,49 +1742,36 @@ impl Tenant {
     }
 
     /// Changes tenant status to active, unless shutdown was already requested.
+    ///
+    /// `background_jobs_can_start` is an optional barrier set to a value during pageserver startup
+    /// to delay background jobs. Background jobs can be started right away when None is given.
     async fn activate(
-        &self,
+        self: &Arc<Self>,
         broker_client: BrokerClientChannel,
+        background_jobs_can_start: Option<&completion::Barrier>,
         ctx: &RequestContext,
-    ) -> anyhow::Result<()> {
+    ) {
         debug_assert_current_span_has_tenant_id();
 
-        let mut result = Ok(());
         let mut activating = false;
         self.state.send_modify(|current_state| {
+            use pageserver_api::models::ActivatingFrom;
             match &*current_state {
-                TenantState::Activating => {
-                    // activate() was called on an already Activating tenant. Shouldn't happen.
-                    result = Err(anyhow::anyhow!("Tenant is already activating"));
+                TenantState::Activating(_) | TenantState::Active | TenantState::Broken { .. } | TenantState::Stopping => {
+                    panic!("caller is responsible for calling activate() only on Loading / Attaching tenants, got {state:?}", state = current_state);
                 }
-                TenantState::Active => {
-                    // activate() was called on an already Active tenant. Shouldn't happen.
-                    result = Err(anyhow::anyhow!("Tenant is already active"));
+                TenantState::Loading => {
+                    *current_state = TenantState::Activating(ActivatingFrom::Loading);
                 }
-                TenantState::Broken { reason, .. } => {
-                    // This shouldn't happen either
-                    result = Err(anyhow::anyhow!(
-                        "Could not activate tenant because it is in broken state due to: {reason}",
-                    ));
-                }
-                TenantState::Stopping => {
-                    // The tenant was detached, or system shutdown was requested, while we were
-                    // loading or attaching the tenant.
-                    info!("Tenant is already in Stopping state, skipping activation");
-                }
-                TenantState::Loading | TenantState::Attaching => {
-                    *current_state = TenantState::Activating;
-                    debug!(tenant_id = %self.tenant_id, "Activating tenant");
-                    activating = true;
-                    // Continue outside the closure. We need to grab timelines.lock()
-                    // and we plan to turn it into a tokio::sync::Mutex in a future patch.
+                TenantState::Attaching => {
+                    *current_state = TenantState::Activating(ActivatingFrom::Attaching);
                 }
             }
+            debug!(tenant_id = %self.tenant_id, "Activating tenant");
+            activating = true;
+            // Continue outside the closure. We need to grab timelines.lock()
+            // and we plan to turn it into a tokio::sync::Mutex in a future patch.
         });
-        if let Err(e) = result {
-            assert!(!activating, "transition into Activating is infallible");
-            return Err(e);
-        }
 
         if activating {
             let timelines_accessor = self.timelines.lock().await;
@@ -1676,18 +1781,18 @@ impl Tenant {
 
             // Spawn gc and compaction loops. The loops will shut themselves
             // down when they notice that the tenant is inactive.
-            tasks::start_background_loops(self.tenant_id);
+            tasks::start_background_loops(self, background_jobs_can_start);
 
             let mut activated_timelines = 0;
 
             for timeline in not_broken_timelines {
-                timeline.activate(broker_client.clone(), ctx);
+                timeline.activate(broker_client.clone(), background_jobs_can_start, ctx);
                 activated_timelines += 1;
             }
 
             self.state.send_modify(move |current_state| {
                 assert!(
-                    *current_state == TenantState::Activating,
+                    matches!(current_state, TenantState::Activating(_)),
                     "set_stopping and set_broken wait for us to leave Activating state",
                 );
                 *current_state = TenantState::Active;
@@ -1706,96 +1811,188 @@ impl Tenant {
                     "activation attempt finished"
                 );
             });
+        }
+    }
+
+    /// Shutdown the tenant and join all of the spawned tasks.
+    ///
+    /// The method caters for all use-cases:
+    /// - pageserver shutdown (freeze_and_flush == true)
+    /// - detach + ignore (freeze_and_flush == false)
+    ///
+    /// This will attempt to shutdown even if tenant is broken.
+    pub(crate) async fn shutdown(&self, freeze_and_flush: bool) -> Result<(), ShutdownError> {
+        debug_assert_current_span_has_tenant_id();
+        // Set tenant (and its timlines) to Stoppping state.
+        //
+        // Since we can only transition into Stopping state after activation is complete,
+        // run it in a JoinSet so all tenants have a chance to stop before we get SIGKILLed.
+        //
+        // Transitioning tenants to Stopping state has a couple of non-obvious side effects:
+        // 1. Lock out any new requests to the tenants.
+        // 2. Signal cancellation to WAL receivers (we wait on it below).
+        // 3. Signal cancellation for other tenant background loops.
+        // 4. ???
+        //
+        // The waiting for the cancellation is not done uniformly.
+        // We certainly wait for WAL receivers to shut down.
+        // That is necessary so that no new data comes in before the freeze_and_flush.
+        // But the tenant background loops are joined-on in our caller.
+        // It's mesed up.
+        // we just ignore the failure to stop
+        match self.set_stopping().await {
+            Ok(()) => {}
+            Err(SetStoppingError::Broken) => {
+                // assume that this is acceptable
+            }
+            Err(SetStoppingError::AlreadyStopping) => return Err(ShutdownError::AlreadyStopping),
         };
+
+        if freeze_and_flush {
+            // walreceiver has already began to shutdown with TenantState::Stopping, but we need to
+            // await for them to stop.
+            task_mgr::shutdown_tasks(
+                Some(TaskKind::WalReceiverManager),
+                Some(self.tenant_id),
+                None,
+            )
+            .await;
+
+            // this will wait for uploads to complete; in the past, it was done outside tenant
+            // shutdown in pageserver::shutdown_pageserver.
+            self.freeze_and_flush_on_shutdown().await;
+        }
+
+        // shutdown all tenant and timeline tasks: gc, compaction, page service
+        // No new tasks will be started for this tenant because it's in `Stopping` state.
+        //
+        // this will additionally shutdown and await all timeline tasks.
+        task_mgr::shutdown_tasks(None, Some(self.tenant_id), None).await;
+
         Ok(())
     }
 
     /// Change tenant status to Stopping, to mark that it is being shut down.
     ///
+    /// This function waits for the tenant to become active if it isn't already, before transitioning it into Stopping state.
+    ///
     /// This function is not cancel-safe!
-    pub async fn set_stopping(&self) {
-        // Get the rx before checking state inside send_if_modified.
-        // This way, when we later rx.changed().await, we won't have missed
-        // any state changes.
+    async fn set_stopping(&self) -> Result<(), SetStoppingError> {
         let mut rx = self.state.subscribe();
-        while *rx.borrow() == TenantState::Activating {
-            rx.changed()
-                .await
-                .expect("we're a method on Tenant, so, we're keeping self.state alive here");
-        }
-        let mut stopping = false;
-        self.state.send_modify(|current_state| {
-            match current_state {
-                TenantState::Activating => unreachable!("we checked above and never transition back into Activating state"),
-                // FIXME: If the tenant is still Loading or Attaching, new timelines
-                // might be created after this. That's harmless, as the Timelines
-                // won't be accessible to anyone, when the Tenant is in Stopping
-                // state.
-                TenantState::Active | TenantState::Loading | TenantState::Attaching => {
-                    *current_state = TenantState::Stopping;
-                    stopping = true;
-                    // Continue outside the closure. We need to grab timelines.lock()
-                    // and we plan to turn it into a tokio::sync::Mutex in a future patch.
-              }
-                TenantState::Broken { reason, .. } => {
-                    info!("Cannot set tenant to Stopping state, it is in Broken state due to: {reason}");
-                }
-                TenantState::Stopping => {
-                    // The tenant was detached, or system shutdown was requested, while we were
-                    // loading or attaching the tenant.
-                    info!("Tenant is already in Stopping state");
-                }
+
+        // cannot stop before we're done activating, so wait out until we're done activating
+        rx.wait_for(|state| match state {
+            TenantState::Activating(_) | TenantState::Loading | TenantState::Attaching => {
+                info!(
+                    "waiting for {} to turn Active|Broken|Stopping",
+                    <&'static str>::from(state)
+                );
+                false
+            }
+            TenantState::Active | TenantState::Broken { .. } | TenantState::Stopping {} => true,
+        })
+        .await
+        .expect("cannot drop self.state while on a &self method");
+
+        // we now know we're done activating, let's see whether this task is the winner to transition into Stopping
+        let mut err = None;
+        let stopping = self.state.send_if_modified(|current_state| match current_state {
+            TenantState::Activating(_) | TenantState::Loading | TenantState::Attaching => {
+                unreachable!("we ensured above that we're done with activation, and, there is no re-activation")
+            }
+            TenantState::Active => {
+                // FIXME: due to time-of-check vs time-of-use issues, it can happen that new timelines
+                // are created after the transition to Stopping. That's harmless, as the Timelines
+                // won't be accessible to anyone afterwards, because the Tenant is in Stopping state.
+                *current_state = TenantState::Stopping;
+                // Continue stopping outside the closure. We need to grab timelines.lock()
+                // and we plan to turn it into a tokio::sync::Mutex in a future patch.
+                true
+            }
+            TenantState::Broken { reason, .. } => {
+                info!(
+                    "Cannot set tenant to Stopping state, it is in Broken state due to: {reason}"
+                );
+                err = Some(SetStoppingError::Broken);
+                false
+            }
+            TenantState::Stopping => {
+                info!("Tenant is already in Stopping state");
+                err = Some(SetStoppingError::AlreadyStopping);
+                false
             }
         });
-
-        if stopping {
-            let timelines_accessor = self.timelines.lock().await;
-            let not_broken_timelines = timelines_accessor
-                .values()
-                .filter(|timeline| timeline.current_state() != TimelineState::Broken);
-            for timeline in not_broken_timelines {
-                timeline.set_state(TimelineState::Stopping);
-            }
+        match (stopping, err) {
+            (true, None) => {} // continue
+            (false, Some(err)) => return Err(err),
+            (true, Some(_)) => unreachable!(
+                "send_if_modified closure must error out if not transitioning to Stopping"
+            ),
+            (false, None) => unreachable!(
+                "send_if_modified closure must return true if transitioning to Stopping"
+            ),
         }
+
+        let timelines_accessor = self.timelines.lock().await;
+        let not_broken_timelines = timelines_accessor
+            .values()
+            .filter(|timeline| timeline.current_state() != TimelineState::Broken);
+        for timeline in not_broken_timelines {
+            timeline.set_state(TimelineState::Stopping);
+        }
+        Ok(())
     }
 
-    pub async fn set_broken(&self, reason: String) {
+    /// Method for tenant::mgr to transition us into Broken state in case of a late failure in
+    /// `remove_tenant_from_memory`
+    ///
+    /// This function waits for the tenant to become active if it isn't already, before transitioning it into Stopping state.
+    ///
+    /// In tests, we also use this to set tenants to Broken state on purpose.
+    pub(crate) async fn set_broken(&self, reason: String) {
         let mut rx = self.state.subscribe();
-        while *rx.borrow() == TenantState::Activating {
-            rx.changed()
-                .await
-                .expect("we're a method on Tenant, so, we're keeping self.state alive here");
-        }
+
+        // The load & attach routines own the tenant state until it has reached `Active`.
+        // So, wait until it's done.
+        rx.wait_for(|state| match state {
+            TenantState::Activating(_) | TenantState::Loading | TenantState::Attaching => {
+                info!(
+                    "waiting for {} to turn Active|Broken|Stopping",
+                    <&'static str>::from(state)
+                );
+                false
+            }
+            TenantState::Active | TenantState::Broken { .. } | TenantState::Stopping {} => true,
+        })
+        .await
+        .expect("cannot drop self.state while on a &self method");
+
+        // we now know we're done activating, let's see whether this task is the winner to transition into Broken
         self.state.send_modify(|current_state| {
             match *current_state {
-                TenantState::Activating => {
-                    unreachable!("we checked above and never transition back into Activating state")
+                TenantState::Activating(_) | TenantState::Loading | TenantState::Attaching => {
+                    unreachable!("we ensured above that we're done with activation, and, there is no re-activation")
                 }
                 TenantState::Active => {
-                    // Broken tenants can currently only used for fatal errors that happen
-                    // while loading or attaching a tenant. A tenant that has already been
-                    // activated should never be marked as broken. We cope with it the best
-                    // we can, but it shouldn't happen.
-                    warn!("Changing Active tenant to Broken state, reason: {}", reason);
-                    *current_state = TenantState::broken_from_reason(reason);
+                    if cfg!(feature = "testing") {
+                        warn!("Changing Active tenant to Broken state, reason: {}", reason);
+                        *current_state = TenantState::broken_from_reason(reason);
+                    } else {
+                        unreachable!("not allowed to call set_broken on Active tenants in non-testing builds")
+                    }
                 }
                 TenantState::Broken { .. } => {
-                    // This shouldn't happen either
                     warn!("Tenant is already in Broken state");
                 }
+                // This is the only "expected" path, any other path is a bug.
                 TenantState::Stopping => {
-                    // This shouldn't happen either
                     warn!(
                         "Marking Stopping tenant as Broken state, reason: {}",
                         reason
                     );
                     *current_state = TenantState::broken_from_reason(reason);
                 }
-                TenantState::Loading | TenantState::Attaching => {
-                    info!("Setting tenant as Broken state, reason: {}", reason);
-                    *current_state = TenantState::broken_from_reason(reason);
-                }
-            }
+           }
         });
     }
 
@@ -1803,25 +2000,30 @@ impl Tenant {
         self.state.subscribe()
     }
 
-    pub async fn wait_to_become_active(&self) -> anyhow::Result<()> {
+    pub(crate) async fn wait_to_become_active(&self) -> Result<(), WaitToBecomeActiveError> {
         let mut receiver = self.state.subscribe();
         loop {
             let current_state = receiver.borrow_and_update().clone();
             match current_state {
-                TenantState::Loading | TenantState::Attaching | TenantState::Activating => {
+                TenantState::Loading | TenantState::Attaching | TenantState::Activating(_) => {
                     // in these states, there's a chance that we can reach ::Active
-                    receiver.changed().await?;
+                    receiver.changed().await.map_err(
+                        |_e: tokio::sync::watch::error::RecvError| {
+                            WaitToBecomeActiveError::TenantDropped {
+                                tenant_id: self.tenant_id,
+                            }
+                        },
+                    )?;
                 }
                 TenantState::Active { .. } => {
                     return Ok(());
                 }
                 TenantState::Broken { .. } | TenantState::Stopping => {
                     // There's no chance the tenant can transition back into ::Active
-                    anyhow::bail!(
-                        "Tenant {} will not become active. Current state: {:?}",
-                        self.tenant_id,
-                        &current_state,
-                    );
+                    return Err(WaitToBecomeActiveError::WillNotBecomeActive {
+                        tenant_id: self.tenant_id,
+                        state: current_state,
+                    });
                 }
             }
         }
@@ -1975,6 +2177,7 @@ impl Tenant {
         new_metadata: &TimelineMetadata,
         ancestor: Option<Arc<Timeline>>,
         remote_client: Option<RemoteTimelineClient>,
+        init_order: Option<&InitializationOrder>,
     ) -> anyhow::Result<Arc<Timeline>> {
         if let Some(ancestor_timeline_id) = new_metadata.ancestor_timeline() {
             anyhow::ensure!(
@@ -1983,6 +2186,9 @@ impl Tenant {
             )
         }
 
+        let initial_logical_size_can_start = init_order.map(|x| &x.initial_logical_size_can_start);
+        let initial_logical_size_attempt = init_order.map(|x| &x.initial_logical_size_attempt);
+
         let pg_version = new_metadata.pg_version();
         Ok(Timeline::new(
             self.conf,
@@ -1994,6 +2200,8 @@ impl Tenant {
             Arc::clone(&self.walredo_mgr),
             remote_client,
             pg_version,
+            initial_logical_size_can_start.cloned(),
+            initial_logical_size_attempt.cloned(),
         ))
     }
 
@@ -2675,7 +2883,7 @@ impl Tenant {
         remote_client: Option<RemoteTimelineClient>,
     ) -> anyhow::Result<Arc<Timeline>> {
         let timeline_data = self
-            .create_timeline_data(new_timeline_id, new_metadata, ancestor, remote_client)
+            .create_timeline_data(new_timeline_id, new_metadata, ancestor, remote_client, None)
             .context("Failed to create timeline data structure")?;
         crashsafe::create_dir_all(timeline_path).context("Failed to create timeline directory")?;
 
@@ -3134,6 +3342,7 @@ pub mod harness {
                 evictions_low_residence_duration_metric_threshold: Some(
                     tenant_conf.evictions_low_residence_duration_metric_threshold,
                 ),
+                gc_feedback: Some(tenant_conf.gc_feedback),
             }
         }
     }
@@ -3242,7 +3451,7 @@ pub mod harness {
                 timelines_to_load.insert(timeline_id, timeline_metadata);
             }
             tenant
-                .load(ctx)
+                .load(None, ctx)
                 .instrument(info_span!("try_load", tenant_id=%self.tenant_id))
                 .await?;
             tenant.state.send_replace(TenantState::Active);
diff --git a/pageserver/src/tenant/config.rs b/pageserver/src/tenant/config.rs
index 50de316bc4..80d153661a 100644
--- a/pageserver/src/tenant/config.rs
+++ b/pageserver/src/tenant/config.rs
@@ -99,6 +99,7 @@ pub struct TenantConf {
     // See the corresponding metric's help string.
     #[serde(with = "humantime_serde")]
     pub evictions_low_residence_duration_metric_threshold: Duration,
+    pub gc_feedback: bool,
 }
 
 /// Same as TenantConf, but this struct preserves the information about
@@ -175,6 +176,10 @@ pub struct TenantConfOpt {
     #[serde(with = "humantime_serde")]
     #[serde(default)]
     pub evictions_low_residence_duration_metric_threshold: Option<Duration>,
+
+    #[serde(skip_serializing_if = "Option::is_none")]
+    #[serde(default)]
+    pub gc_feedback: Option<bool>,
 }
 
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
@@ -242,6 +247,7 @@ impl TenantConfOpt {
             evictions_low_residence_duration_metric_threshold: self
                 .evictions_low_residence_duration_metric_threshold
                 .unwrap_or(global_conf.evictions_low_residence_duration_metric_threshold),
+            gc_feedback: self.gc_feedback.unwrap_or(global_conf.gc_feedback),
         }
     }
 }
@@ -278,6 +284,7 @@ impl Default for TenantConf {
                 DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD,
             )
             .expect("cannot parse default evictions_low_residence_duration_metric_threshold"),
+            gc_feedback: false,
         }
     }
 }
@@ -372,6 +379,7 @@ impl TryFrom<&'_ models::TenantConfig> for TenantConfOpt {
                     ))?,
             );
         }
+        tenant_conf.gc_feedback = request_data.gc_feedback;
 
         Ok(tenant_conf)
     }
diff --git a/pageserver/src/tenant/layer_map/historic_layer_coverage.rs b/pageserver/src/tenant/layer_map/historic_layer_coverage.rs
index b63c361314..49dcbc63c2 100644
--- a/pageserver/src/tenant/layer_map/historic_layer_coverage.rs
+++ b/pageserver/src/tenant/layer_map/historic_layer_coverage.rs
@@ -204,6 +204,35 @@ fn test_off_by_one() {
     assert_eq!(version.image_coverage.query(5), None);
 }
 
+/// White-box regression test, checking for incorrect removal of node at key.end
+#[test]
+fn test_regression() {
+    let mut map = HistoricLayerCoverage::<String>::new();
+    map.insert(
+        LayerKey {
+            key: 0..5,
+            lsn: 0..5,
+            is_image: false,
+        },
+        "Layer 1".to_string(),
+    );
+    map.insert(
+        LayerKey {
+            key: 0..5,
+            lsn: 1..2,
+            is_image: false,
+        },
+        "Layer 2".to_string(),
+    );
+
+    // If an insertion operation improperly deletes the endpoint of a previous layer
+    // (which is more likely to happen with layers that collide on key.end), we will
+    // end up with an infinite layer, covering the entire keyspace. Here we assert
+    // that there's no layer at key 100 because we didn't insert any layer there.
+    let version = map.get_version(100).unwrap();
+    assert_eq!(version.delta_coverage.query(100), None);
+}
+
 /// Cover edge cases where layers begin or end on the same key
 #[test]
 fn test_key_collision() {
diff --git a/pageserver/src/tenant/layer_map/layer_coverage.rs b/pageserver/src/tenant/layer_map/layer_coverage.rs
index 4e3b4516dc..47aace97a5 100644
--- a/pageserver/src/tenant/layer_map/layer_coverage.rs
+++ b/pageserver/src/tenant/layer_map/layer_coverage.rs
@@ -1,8 +1,8 @@
 use std::ops::Range;
 
-// TODO the `im` crate has 20x more downloads and also has
-// persistent/immutable BTree. It also runs a bit faster but
-// results are not the same on some tests.
+// NOTE the `im` crate has 20x more downloads and also has
+// persistent/immutable BTree. But it's bugged so rpds is a
+// better choice https://github.com/neondatabase/neon/issues/3395
 use rpds::RedBlackTreeMapSync;
 
 /// Data structure that can efficiently:
@@ -10,19 +10,22 @@ use rpds::RedBlackTreeMapSync;
 /// - iterate the latest layers in a key range
 /// - insert layers in non-decreasing lsn.start order
 ///
-/// The struct is parameterized over Value for easier
-/// testing, but in practice it's some sort of layer.
+/// For a detailed explanation and justification of this approach, see:
+/// https://neon.tech/blog/persistent-structures-in-neons-wal-indexing
+///
+/// NOTE The struct is parameterized over Value for easier
+///      testing, but in practice it's some sort of layer.
 pub struct LayerCoverage<Value> {
     /// For every change in coverage (as we sweep the key space)
     /// we store (lsn.end, value).
     ///
-    /// We use an immutable/persistent tree so that we can keep historic
-    /// versions of this coverage without cloning the whole thing and
-    /// incurring quadratic memory cost. See HistoricLayerCoverage.
+    /// NOTE We use an immutable/persistent tree so that we can keep historic
+    ///      versions of this coverage without cloning the whole thing and
+    ///      incurring quadratic memory cost. See HistoricLayerCoverage.
     ///
-    /// We use the Sync version of the map because we want Self to
-    /// be Sync. Using nonsync might be faster, if we can work with
-    /// that.
+    /// NOTE We use the Sync version of the map because we want Self to
+    ///      be Sync. Using nonsync might be faster, if we can work with
+    ///      that.
     nodes: RedBlackTreeMapSync<i128, Option<(u64, Value)>>,
 }
 
@@ -41,6 +44,13 @@ impl<Value: Clone> LayerCoverage<Value> {
 
     /// Helper function to subdivide the key range without changing any values
     ///
+    /// This operation has no semantic effect by itself. It only helps us pin in
+    /// place the part of the coverage we don't want to change when inserting.
+    ///
+    /// As an analogy, think of a polygon. If you add a vertex along one of the
+    /// segments, the polygon is still the same, but it behaves differently when
+    /// we move or delete one of the other points.
+    ///
     /// Complexity: O(log N)
     fn add_node(&mut self, key: i128) {
         let value = match self.nodes.range(..=key).last() {
@@ -74,7 +84,7 @@ impl<Value: Clone> LayerCoverage<Value> {
         let mut to_update = Vec::new();
         let mut to_remove = Vec::new();
         let mut prev_covered = false;
-        for (k, node) in self.nodes.range(key.clone()) {
+        for (k, node) in self.nodes.range(key) {
             let needs_cover = match node {
                 None => true,
                 Some((h, _)) => h < &lsn.end,
@@ -87,9 +97,8 @@ impl<Value: Clone> LayerCoverage<Value> {
             }
             prev_covered = needs_cover;
         }
-        if !prev_covered {
-            to_remove.push(key.end);
-        }
+        // TODO check if the nodes inserted at key.start and key.end are safe
+        //      to remove. It's fine to keep them but they could be redundant.
         for k in to_update {
             self.nodes.insert_mut(k, Some((lsn.end, value.clone())));
         }
diff --git a/pageserver/src/tenant/mgr.rs b/pageserver/src/tenant/mgr.rs
index b3be6061b3..5786db72f6 100644
--- a/pageserver/src/tenant/mgr.rs
+++ b/pageserver/src/tenant/mgr.rs
@@ -10,6 +10,7 @@ use tokio::fs;
 use anyhow::Context;
 use once_cell::sync::Lazy;
 use tokio::sync::RwLock;
+use tokio::task::JoinSet;
 use tracing::*;
 
 use remote_storage::GenericRemoteStorage;
@@ -20,7 +21,7 @@ use crate::context::{DownloadBehavior, RequestContext};
 use crate::task_mgr::{self, TaskKind};
 use crate::tenant::config::TenantConfOpt;
 use crate::tenant::{create_tenant_files, CreateTenantFilesMode, Tenant, TenantState};
-use crate::IGNORED_TENANT_FILE_NAME;
+use crate::{InitializationOrder, IGNORED_TENANT_FILE_NAME};
 
 use utils::fs_ext::PathExt;
 use utils::id::{TenantId, TimelineId};
@@ -63,6 +64,7 @@ pub async fn init_tenant_mgr(
     conf: &'static PageServerConf,
     broker_client: storage_broker::BrokerClientChannel,
     remote_storage: Option<GenericRemoteStorage>,
+    init_order: InitializationOrder,
 ) -> anyhow::Result<()> {
     // Scan local filesystem for attached tenants
     let tenants_dir = conf.tenants_path();
@@ -119,6 +121,7 @@ pub async fn init_tenant_mgr(
                         &tenant_dir_path,
                         broker_client.clone(),
                         remote_storage.clone(),
+                        Some(init_order.clone()),
                         &ctx,
                     ) {
                         Ok(tenant) => {
@@ -154,6 +157,7 @@ pub fn schedule_local_tenant_processing(
     tenant_path: &Path,
     broker_client: storage_broker::BrokerClientChannel,
     remote_storage: Option<GenericRemoteStorage>,
+    init_order: Option<InitializationOrder>,
     ctx: &RequestContext,
 ) -> anyhow::Result<Arc<Tenant>> {
     anyhow::ensure!(
@@ -207,7 +211,14 @@ pub fn schedule_local_tenant_processing(
     } else {
         info!("tenant {tenant_id} is assumed to be loadable, starting load operation");
         // Start loading the tenant into memory. It will initially be in Loading state.
-        Tenant::spawn_load(conf, tenant_id, broker_client, remote_storage, ctx)
+        Tenant::spawn_load(
+            conf,
+            tenant_id,
+            broker_client,
+            remote_storage,
+            init_order,
+            ctx,
+        )
     };
     Ok(tenant)
 }
@@ -222,6 +233,7 @@ pub fn schedule_local_tenant_processing(
 /// That could be easily misinterpreted by control plane, the consumer of the
 /// management API. For example, it could attach the tenant on a different pageserver.
 /// We would then be in split-brain once this pageserver restarts.
+#[instrument]
 pub async fn shutdown_all_tenants() {
     // Prevent new tenants from being created.
     let tenants_to_shut_down = {
@@ -238,39 +250,51 @@ pub async fn shutdown_all_tenants() {
                 tenants_clone
             }
             TenantsMap::ShuttingDown(_) => {
+                // TODO: it is possible that detach and shutdown happen at the same time. as a
+                // result, during shutdown we do not wait for detach.
                 error!("already shutting down, this function isn't supposed to be called more than once");
                 return;
             }
         }
     };
 
-    let mut tenants_to_freeze_and_flush = Vec::with_capacity(tenants_to_shut_down.len());
-    for (_, tenant) in tenants_to_shut_down {
-        if tenant.is_active() {
-            // updates tenant state, forbidding new GC and compaction iterations from starting
-            tenant.set_stopping().await;
-            tenants_to_freeze_and_flush.push(tenant);
+    let mut join_set = JoinSet::new();
+    for (tenant_id, tenant) in tenants_to_shut_down {
+        join_set.spawn(
+            async move {
+                let freeze_and_flush = true;
+
+                match tenant.shutdown(freeze_and_flush).await {
+                    Ok(()) => debug!("tenant successfully stopped"),
+                    Err(super::ShutdownError::AlreadyStopping) => {
+                        warn!("tenant was already shutting down")
+                    }
+                }
+            }
+            .instrument(info_span!("shutdown", %tenant_id)),
+        );
+    }
+
+    let mut panicked = 0;
+
+    while let Some(res) = join_set.join_next().await {
+        match res {
+            Ok(()) => {}
+            Err(join_error) if join_error.is_cancelled() => {
+                unreachable!("we are not cancelling any of the futures");
+            }
+            Err(join_error) if join_error.is_panic() => {
+                // cannot really do anything, as this panic is likely a bug
+                panicked += 1;
+            }
+            Err(join_error) => {
+                warn!("unknown kind of JoinError: {join_error}");
+            }
         }
     }
 
-    // Shut down all existing walreceiver connections and stop accepting the new ones.
-    task_mgr::shutdown_tasks(Some(TaskKind::WalReceiverManager), None, None).await;
-
-    // Ok, no background tasks running anymore. Flush any remaining data in
-    // memory to disk.
-    //
-    // We assume that any incoming connections that might request pages from
-    // the tenant have already been terminated by the caller, so there
-    // should be no more activity in any of the repositories.
-    //
-    // On error, log it but continue with the shutdown for other tenants.
-    for tenant in tenants_to_freeze_and_flush {
-        let tenant_id = tenant.tenant_id();
-        debug!("shutdown tenant {tenant_id}");
-
-        if let Err(err) = tenant.freeze_and_flush().await {
-            error!("Could not checkpoint tenant {tenant_id} during shutdown: {err:?}");
-        }
+    if panicked > 0 {
+        warn!(panicked, "observed panicks while shutting down tenants");
     }
 }
 
@@ -291,7 +315,7 @@ pub async fn create_tenant(
         //       See https://github.com/neondatabase/neon/issues/4233
 
         let created_tenant =
-            schedule_local_tenant_processing(conf, &tenant_directory, broker_client, remote_storage, ctx)?;
+            schedule_local_tenant_processing(conf, &tenant_directory, broker_client, remote_storage, None, ctx)?;
         // TODO: tenant object & its background loops remain, untracked in tenant map, if we fail here.
         //      See https://github.com/neondatabase/neon/issues/4233
 
@@ -304,11 +328,19 @@ pub async fn create_tenant(
     }).await
 }
 
+#[derive(Debug, thiserror::Error)]
+pub enum SetNewTenantConfigError {
+    #[error(transparent)]
+    GetTenant(#[from] GetTenantError),
+    #[error(transparent)]
+    Persist(anyhow::Error),
+}
+
 pub async fn set_new_tenant_config(
     conf: &'static PageServerConf,
     new_tenant_conf: TenantConfOpt,
     tenant_id: TenantId,
-) -> Result<(), TenantStateError> {
+) -> Result<(), SetNewTenantConfigError> {
     info!("configuring tenant {tenant_id}");
     let tenant = get_tenant(tenant_id, true).await?;
 
@@ -318,23 +350,32 @@ pub async fn set_new_tenant_config(
         &tenant_config_path,
         new_tenant_conf,
         false,
-    )?;
+    )
+    .map_err(SetNewTenantConfigError::Persist)?;
     tenant.set_new_tenant_config(new_tenant_conf).await;
     Ok(())
 }
 
+#[derive(Debug, thiserror::Error)]
+pub enum GetTenantError {
+    #[error("Tenant {0} not found")]
+    NotFound(TenantId),
+    #[error("Tenant {0} is not active")]
+    NotActive(TenantId),
+}
+
 /// Gets the tenant from the in-memory data, erroring if it's absent or is not fitting to the query.
 /// `active_only = true` allows to query only tenants that are ready for operations, erroring on other kinds of tenants.
 pub async fn get_tenant(
     tenant_id: TenantId,
     active_only: bool,
-) -> Result<Arc<Tenant>, TenantStateError> {
+) -> Result<Arc<Tenant>, GetTenantError> {
     let m = TENANTS.read().await;
     let tenant = m
         .get(&tenant_id)
-        .ok_or(TenantStateError::NotFound(tenant_id))?;
+        .ok_or(GetTenantError::NotFound(tenant_id))?;
     if active_only && !tenant.is_active() {
-        Err(TenantStateError::NotActive(tenant_id))
+        Err(GetTenantError::NotActive(tenant_id))
     } else {
         Ok(Arc::clone(tenant))
     }
@@ -343,7 +384,7 @@ pub async fn get_tenant(
 #[derive(Debug, thiserror::Error)]
 pub enum DeleteTimelineError {
     #[error("Tenant {0}")]
-    Tenant(#[from] TenantStateError),
+    Tenant(#[from] GetTenantError),
 
     #[error("Timeline {0}")]
     Timeline(#[from] crate::tenant::DeleteTimelineError),
@@ -420,7 +461,7 @@ pub async fn load_tenant(
                 .with_context(|| format!("Failed to remove tenant ignore mark {tenant_ignore_mark:?} during tenant loading"))?;
         }
 
-        let new_tenant = schedule_local_tenant_processing(conf, &tenant_path, broker_client, remote_storage, ctx)
+        let new_tenant = schedule_local_tenant_processing(conf, &tenant_path, broker_client, remote_storage, None, ctx)
             .with_context(|| {
                 format!("Failed to schedule tenant processing in path {tenant_path:?}")
             })?;
@@ -493,7 +534,7 @@ pub async fn attach_tenant(
             .context("check for attach marker file existence")?;
         anyhow::ensure!(marker_file_exists, "create_tenant_files should have created the attach marker file");
 
-        let attached_tenant = schedule_local_tenant_processing(conf, &tenant_dir, broker_client, Some(remote_storage), ctx)?;
+        let attached_tenant = schedule_local_tenant_processing(conf, &tenant_dir, broker_client, Some(remote_storage), None, ctx)?;
         // TODO: tenant object & its background loops remain, untracked in tenant map, if we fail here.
         //      See https://github.com/neondatabase/neon/issues/4233
 
@@ -569,26 +610,26 @@ where
     // The exclusive lock here ensures we don't miss the tenant state updates before trying another removal.
     // tenant-wde cleanup operations may take some time (removing the entire tenant directory), we want to
     // avoid holding the lock for the entire process.
-    {
-        let tenants_accessor = TENANTS.write().await;
-        match tenants_accessor.get(&tenant_id) {
-            Some(tenant) => match tenant.current_state() {
-                TenantState::Attaching
-                | TenantState::Loading
-                | TenantState::Activating
-                | TenantState::Broken { .. }
-                | TenantState::Active => tenant.set_stopping().await,
-                TenantState::Stopping => return Err(TenantStateError::IsStopping(tenant_id)),
-            },
-            None => return Err(TenantStateError::NotFound(tenant_id)),
+    let tenant = {
+        TENANTS
+            .write()
+            .await
+            .get(&tenant_id)
+            .cloned()
+            .ok_or(TenantStateError::NotFound(tenant_id))?
+    };
+
+    let freeze_and_flush = false;
+
+    // shutdown is sure to transition tenant to stopping, and wait for all tasks to complete, so
+    // that we can continue safely to cleanup.
+    match tenant.shutdown(freeze_and_flush).await {
+        Ok(()) => {}
+        Err(super::ShutdownError::AlreadyStopping) => {
+            return Err(TenantStateError::IsStopping(tenant_id))
         }
     }
 
-    // shutdown all tenant and timeline tasks: gc, compaction, page service)
-    // No new tasks will be started for this tenant because it's in `Stopping` state.
-    // Hence, once we're done here, the `tenant_cleanup` callback can mutate tenant on-disk state freely.
-    task_mgr::shutdown_tasks(None, Some(tenant_id), None).await;
-
     match tenant_cleanup
         .await
         .with_context(|| format!("Failed to run cleanup for tenant {tenant_id}"))
@@ -670,7 +711,6 @@ pub async fn immediate_gc(
     Ok(wait_task_done)
 }
 
-#[cfg(feature = "testing")]
 pub async fn immediate_compact(
     tenant_id: TenantId,
     timeline_id: TimelineId,
diff --git a/pageserver/src/tenant/par_fsync.rs b/pageserver/src/tenant/par_fsync.rs
index 0b0217ab58..3cbcfe8774 100644
--- a/pageserver/src/tenant/par_fsync.rs
+++ b/pageserver/src/tenant/par_fsync.rs
@@ -19,14 +19,8 @@ fn parallel_worker(paths: &[PathBuf], next_path_idx: &AtomicUsize) -> io::Result
     Ok(())
 }
 
-pub fn par_fsync(paths: &[PathBuf]) -> io::Result<()> {
-    const PARALLEL_PATH_THRESHOLD: usize = 1;
-    if paths.len() <= PARALLEL_PATH_THRESHOLD {
-        for path in paths {
-            fsync_path(path)?;
-        }
-        return Ok(());
-    }
+fn fsync_in_thread_pool(paths: &[PathBuf]) -> io::Result<()> {
+    // TODO: remove this function in favor of `par_fsync_async` once we asyncify everything.
 
     /// Use at most this number of threads.
     /// Increasing this limit will
@@ -36,11 +30,11 @@ pub fn par_fsync(paths: &[PathBuf]) -> io::Result<()> {
     let num_threads = paths.len().min(MAX_NUM_THREADS);
     let next_path_idx = AtomicUsize::new(0);
 
-    crossbeam_utils::thread::scope(|s| -> io::Result<()> {
+    std::thread::scope(|s| -> io::Result<()> {
         let mut handles = vec![];
         // Spawn `num_threads - 1`, as the current thread is also a worker.
         for _ in 1..num_threads {
-            handles.push(s.spawn(|_| parallel_worker(paths, &next_path_idx)));
+            handles.push(s.spawn(|| parallel_worker(paths, &next_path_idx)));
         }
 
         parallel_worker(paths, &next_path_idx)?;
@@ -51,5 +45,41 @@ pub fn par_fsync(paths: &[PathBuf]) -> io::Result<()> {
 
         Ok(())
     })
-    .unwrap()
+}
+
+/// Parallel fsync all files. Can be used in non-async context as it is using rayon thread pool.
+pub fn par_fsync(paths: &[PathBuf]) -> io::Result<()> {
+    if paths.len() == 1 {
+        fsync_path(&paths[0])?;
+        return Ok(());
+    }
+
+    fsync_in_thread_pool(paths)
+}
+
+/// Parallel fsync asynchronously. If number of files are less than PARALLEL_PATH_THRESHOLD, fsync is done in the current
+/// execution thread. Otherwise, we will spawn_blocking and run it in tokio.
+pub async fn par_fsync_async(paths: &[PathBuf]) -> io::Result<()> {
+    const MAX_CONCURRENT_FSYNC: usize = 64;
+    let mut next = paths.iter().peekable();
+    let mut js = tokio::task::JoinSet::new();
+    loop {
+        while js.len() < MAX_CONCURRENT_FSYNC && next.peek().is_some() {
+            let next = next.next().expect("just peeked");
+            let next = next.to_owned();
+            js.spawn_blocking(move || fsync_path(&next));
+        }
+
+        // now the joinset has been filled up, wait for next to complete
+        if let Some(res) = js.join_next().await {
+            res??;
+        } else {
+            // last item had already completed
+            assert!(
+                next.peek().is_none(),
+                "joinset emptied, we shouldn't have more work"
+            );
+            return Ok(());
+        }
+    }
 }
diff --git a/pageserver/src/tenant/storage_layer.rs b/pageserver/src/tenant/storage_layer.rs
index d30d6c5c6e..7c071463de 100644
--- a/pageserver/src/tenant/storage_layer.rs
+++ b/pageserver/src/tenant/storage_layer.rs
@@ -4,6 +4,7 @@ pub mod delta_layer;
 mod filename;
 mod image_layer;
 mod inmemory_layer;
+mod layer_desc;
 mod remote_layer;
 
 use crate::config::PageServerConf;
@@ -37,6 +38,7 @@ pub use delta_layer::{DeltaLayer, DeltaLayerWriter};
 pub use filename::{DeltaFileName, ImageFileName, LayerFileName};
 pub use image_layer::{ImageLayer, ImageLayerWriter};
 pub use inmemory_layer::InMemoryLayer;
+pub use layer_desc::PersistentLayerDesc;
 pub use remote_layer::RemoteLayer;
 
 use super::layer_map::BatchedUpdates;
@@ -406,14 +408,23 @@ pub type LayerKeyIter<'i> = Box<dyn Iterator<Item = (Key, Lsn, u64)> + 'i>;
 /// An image layer is a snapshot of all the data in a key-range, at a single
 /// LSN.
 pub trait PersistentLayer: Layer {
-    fn get_tenant_id(&self) -> TenantId;
+    /// Get the layer descriptor.
+    fn layer_desc(&self) -> &PersistentLayerDesc;
+
+    fn get_tenant_id(&self) -> TenantId {
+        self.layer_desc().tenant_id
+    }
 
     /// Identify the timeline this layer belongs to
-    fn get_timeline_id(&self) -> TimelineId;
+    fn get_timeline_id(&self) -> TimelineId {
+        self.layer_desc().timeline_id
+    }
 
     /// File name used for this layer, both in the pageserver's local filesystem
     /// state as well as in the remote storage.
-    fn filename(&self) -> LayerFileName;
+    fn filename(&self) -> LayerFileName {
+        self.layer_desc().filename()
+    }
 
     // Path to the layer file in the local filesystem.
     // `None` for `RemoteLayer`.
@@ -542,7 +553,7 @@ impl From<LayerFileName> for LayerDescriptor {
 ///
 /// This is used by DeltaLayer and ImageLayer. Normally, this holds a reference to the
 /// global config, and paths to layer files are constructed using the tenant/timeline
-/// path from the config. But in the 'pageserver_binutils' binary, we need to construct a Layer
+/// path from the config. But in the 'pagectl' binary, we need to construct a Layer
 /// struct for a file on disk, without having a page server running, so that we have no
 /// config. In that case, we use the Path variant to hold the full path to the file on
 /// disk.
diff --git a/pageserver/src/tenant/storage_layer/delta_layer.rs b/pageserver/src/tenant/storage_layer/delta_layer.rs
index ba3ab6dd4c..5f2fb1ebea 100644
--- a/pageserver/src/tenant/storage_layer/delta_layer.rs
+++ b/pageserver/src/tenant/storage_layer/delta_layer.rs
@@ -56,8 +56,8 @@ use utils::{
 };
 
 use super::{
-    DeltaFileName, Layer, LayerAccessStats, LayerAccessStatsReset, LayerFileName, LayerIter,
-    LayerKeyIter, PathOrConf,
+    DeltaFileName, Layer, LayerAccessStats, LayerAccessStatsReset, LayerIter, LayerKeyIter,
+    PathOrConf, PersistentLayerDesc,
 };
 
 ///
@@ -89,10 +89,10 @@ impl From<&DeltaLayer> for Summary {
             magic: DELTA_FILE_MAGIC,
             format_version: STORAGE_FORMAT_VERSION,
 
-            tenant_id: layer.tenant_id,
-            timeline_id: layer.timeline_id,
-            key_range: layer.key_range.clone(),
-            lsn_range: layer.lsn_range.clone(),
+            tenant_id: layer.desc.tenant_id,
+            timeline_id: layer.desc.timeline_id,
+            key_range: layer.desc.key_range.clone(),
+            lsn_range: layer.desc.lsn_range.clone(),
 
             index_start_blk: 0,
             index_root_blk: 0,
@@ -110,7 +110,7 @@ const WILL_INIT: u64 = 1;
 /// reading/deserializing records themselves.
 ///
 #[derive(Debug, Serialize, Deserialize, Copy, Clone)]
-struct BlobRef(u64);
+pub struct BlobRef(pub u64);
 
 impl BlobRef {
     pub fn will_init(&self) -> bool {
@@ -180,10 +180,7 @@ impl DeltaKey {
 pub struct DeltaLayer {
     path_or_conf: PathOrConf,
 
-    pub tenant_id: TenantId,
-    pub timeline_id: TimelineId,
-    pub key_range: Range<Key>,
-    pub lsn_range: Range<Lsn>,
+    pub desc: PersistentLayerDesc,
 
     pub file_size: u64,
 
@@ -197,8 +194,8 @@ impl std::fmt::Debug for DeltaLayer {
         use super::RangeDisplayDebug;
 
         f.debug_struct("DeltaLayer")
-            .field("key_range", &RangeDisplayDebug(&self.key_range))
-            .field("lsn_range", &self.lsn_range)
+            .field("key_range", &RangeDisplayDebug(&self.desc.key_range))
+            .field("lsn_range", &self.desc.lsn_range)
             .field("file_size", &self.file_size)
             .field("inner", &self.inner)
             .finish()
@@ -228,30 +225,16 @@ impl std::fmt::Debug for DeltaLayerInner {
 }
 
 impl Layer for DeltaLayer {
-    fn get_key_range(&self) -> Range<Key> {
-        self.key_range.clone()
-    }
-
-    fn get_lsn_range(&self) -> Range<Lsn> {
-        self.lsn_range.clone()
-    }
-    fn is_incremental(&self) -> bool {
-        true
-    }
-
-    fn short_id(&self) -> String {
-        self.filename().file_name()
-    }
     /// debugging function to print out the contents of the layer
     fn dump(&self, verbose: bool, ctx: &RequestContext) -> Result<()> {
         println!(
             "----- delta layer for ten {} tli {} keys {}-{} lsn {}-{} ----",
-            self.tenant_id,
-            self.timeline_id,
-            self.key_range.start,
-            self.key_range.end,
-            self.lsn_range.start,
-            self.lsn_range.end
+            self.desc.tenant_id,
+            self.desc.timeline_id,
+            self.desc.key_range.start,
+            self.desc.key_range.end,
+            self.desc.lsn_range.start,
+            self.desc.lsn_range.end
         );
 
         if !verbose {
@@ -324,10 +307,10 @@ impl Layer for DeltaLayer {
         reconstruct_state: &mut ValueReconstructState,
         ctx: &RequestContext,
     ) -> anyhow::Result<ValueReconstructResult> {
-        ensure!(lsn_range.start >= self.lsn_range.start);
+        ensure!(lsn_range.start >= self.desc.lsn_range.start);
         let mut need_image = true;
 
-        ensure!(self.key_range.contains(&key));
+        ensure!(self.desc.key_range.contains(&key));
 
         {
             // Open the file and lock the metadata in memory
@@ -402,19 +385,31 @@ impl Layer for DeltaLayer {
             Ok(ValueReconstructResult::Complete)
         }
     }
+
+    /// Boilerplate to implement the Layer trait, always use layer_desc for persistent layers.
+    fn get_key_range(&self) -> Range<Key> {
+        self.layer_desc().key_range.clone()
+    }
+
+    /// Boilerplate to implement the Layer trait, always use layer_desc for persistent layers.
+    fn get_lsn_range(&self) -> Range<Lsn> {
+        self.layer_desc().lsn_range.clone()
+    }
+
+    /// Boilerplate to implement the Layer trait, always use layer_desc for persistent layers.
+    fn is_incremental(&self) -> bool {
+        self.layer_desc().is_incremental
+    }
+
+    /// Boilerplate to implement the Layer trait, always use layer_desc for persistent layers.
+    fn short_id(&self) -> String {
+        self.layer_desc().short_id()
+    }
 }
 
 impl PersistentLayer for DeltaLayer {
-    fn get_tenant_id(&self) -> TenantId {
-        self.tenant_id
-    }
-
-    fn get_timeline_id(&self) -> TimelineId {
-        self.timeline_id
-    }
-
-    fn filename(&self) -> LayerFileName {
-        self.layer_name().into()
+    fn layer_desc(&self) -> &PersistentLayerDesc {
+        &self.desc
     }
 
     fn local_path(&self) -> Option<PathBuf> {
@@ -602,10 +597,12 @@ impl DeltaLayer {
     ) -> DeltaLayer {
         DeltaLayer {
             path_or_conf: PathOrConf::Conf(conf),
-            timeline_id,
-            tenant_id,
-            key_range: filename.key_range.clone(),
-            lsn_range: filename.lsn_range.clone(),
+            desc: PersistentLayerDesc::new_delta(
+                tenant_id,
+                timeline_id,
+                filename.key_range.clone(),
+                filename.lsn_range.clone(),
+            ),
             file_size,
             access_stats,
             inner: RwLock::new(DeltaLayerInner {
@@ -619,7 +616,7 @@ impl DeltaLayer {
 
     /// Create a DeltaLayer struct representing an existing file on disk.
     ///
-    /// This variant is only used for debugging purposes, by the 'pageserver_binutils' binary.
+    /// This variant is only used for debugging purposes, by the 'pagectl' binary.
     pub fn new_for_path(path: &Path, file: File) -> Result<Self> {
         let mut summary_buf = Vec::new();
         summary_buf.resize(PAGE_SZ, 0);
@@ -632,10 +629,12 @@ impl DeltaLayer {
 
         Ok(DeltaLayer {
             path_or_conf: PathOrConf::Path(path.to_path_buf()),
-            timeline_id: summary.timeline_id,
-            tenant_id: summary.tenant_id,
-            key_range: summary.key_range,
-            lsn_range: summary.lsn_range,
+            desc: PersistentLayerDesc::new_delta(
+                summary.tenant_id,
+                summary.timeline_id,
+                summary.key_range,
+                summary.lsn_range,
+            ),
             file_size: metadata.len(),
             access_stats: LayerAccessStats::empty_will_record_residence_event_later(),
             inner: RwLock::new(DeltaLayerInner {
@@ -648,18 +647,14 @@ impl DeltaLayer {
     }
 
     fn layer_name(&self) -> DeltaFileName {
-        DeltaFileName {
-            key_range: self.key_range.clone(),
-            lsn_range: self.lsn_range.clone(),
-        }
+        self.desc.delta_file_name()
     }
-
     /// Path to the layer file in pageserver workdir.
     pub fn path(&self) -> PathBuf {
         Self::path_for(
             &self.path_or_conf,
-            self.timeline_id,
-            self.tenant_id,
+            self.desc.timeline_id,
+            self.desc.tenant_id,
             &self.layer_name(),
         )
     }
@@ -803,10 +798,12 @@ impl DeltaLayerWriterInner {
         // set inner.file here. The first read will have to re-open it.
         let layer = DeltaLayer {
             path_or_conf: PathOrConf::Conf(self.conf),
-            tenant_id: self.tenant_id,
-            timeline_id: self.timeline_id,
-            key_range: self.key_start..key_end,
-            lsn_range: self.lsn_range.clone(),
+            desc: PersistentLayerDesc::new_delta(
+                self.tenant_id,
+                self.timeline_id,
+                self.key_start..key_end,
+                self.lsn_range.clone(),
+            ),
             file_size: metadata.len(),
             access_stats: LayerAccessStats::empty_will_record_residence_event_later(),
             inner: RwLock::new(DeltaLayerInner {
diff --git a/pageserver/src/tenant/storage_layer/filename.rs b/pageserver/src/tenant/storage_layer/filename.rs
index e2112fc388..5dcd54689e 100644
--- a/pageserver/src/tenant/storage_layer/filename.rs
+++ b/pageserver/src/tenant/storage_layer/filename.rs
@@ -9,6 +9,8 @@ use std::str::FromStr;
 
 use utils::lsn::Lsn;
 
+use super::PersistentLayerDesc;
+
 // Note: Timeline::load_layer_map() relies on this sort order
 #[derive(PartialEq, Eq, Clone, Hash)]
 pub struct DeltaFileName {
@@ -153,7 +155,7 @@ impl Ord for ImageFileName {
 impl ImageFileName {
     pub fn lsn_as_range(&self) -> Range<Lsn> {
         // Saves from having to copypaste this all over
-        self.lsn..(self.lsn + 1)
+        PersistentLayerDesc::image_layer_lsn_range(self.lsn)
     }
 }
 
diff --git a/pageserver/src/tenant/storage_layer/image_layer.rs b/pageserver/src/tenant/storage_layer/image_layer.rs
index d298b3e852..b55dd08a6d 100644
--- a/pageserver/src/tenant/storage_layer/image_layer.rs
+++ b/pageserver/src/tenant/storage_layer/image_layer.rs
@@ -52,8 +52,8 @@ use utils::{
     lsn::Lsn,
 };
 
-use super::filename::{ImageFileName, LayerFileName};
-use super::{Layer, LayerAccessStatsReset, LayerIter, PathOrConf};
+use super::filename::ImageFileName;
+use super::{Layer, LayerAccessStatsReset, LayerIter, PathOrConf, PersistentLayerDesc};
 
 ///
 /// Header stored in the beginning of the file
@@ -84,9 +84,9 @@ impl From<&ImageLayer> for Summary {
         Self {
             magic: IMAGE_FILE_MAGIC,
             format_version: STORAGE_FORMAT_VERSION,
-            tenant_id: layer.tenant_id,
-            timeline_id: layer.timeline_id,
-            key_range: layer.key_range.clone(),
+            tenant_id: layer.desc.tenant_id,
+            timeline_id: layer.desc.timeline_id,
+            key_range: layer.desc.key_range.clone(),
             lsn: layer.lsn,
 
             index_start_blk: 0,
@@ -104,14 +104,13 @@ impl From<&ImageLayer> for Summary {
 /// and it needs to be loaded before using it in queries.
 pub struct ImageLayer {
     path_or_conf: PathOrConf,
-    pub tenant_id: TenantId,
-    pub timeline_id: TimelineId,
-    pub key_range: Range<Key>,
-    pub file_size: u64,
 
-    // This entry contains an image of all pages as of this LSN
+    pub desc: PersistentLayerDesc,
+    // This entry contains an image of all pages as of this LSN, should be the same as desc.lsn
     pub lsn: Lsn,
 
+    pub file_size: u64,
+
     access_stats: LayerAccessStats,
 
     inner: RwLock<ImageLayerInner>,
@@ -122,7 +121,7 @@ impl std::fmt::Debug for ImageLayer {
         use super::RangeDisplayDebug;
 
         f.debug_struct("ImageLayer")
-            .field("key_range", &RangeDisplayDebug(&self.key_range))
+            .field("key_range", &RangeDisplayDebug(&self.desc.key_range))
             .field("file_size", &self.file_size)
             .field("lsn", &self.lsn)
             .field("inner", &self.inner)
@@ -153,27 +152,15 @@ impl std::fmt::Debug for ImageLayerInner {
 }
 
 impl Layer for ImageLayer {
-    fn get_key_range(&self) -> Range<Key> {
-        self.key_range.clone()
-    }
-
-    fn get_lsn_range(&self) -> Range<Lsn> {
-        // End-bound is exclusive
-        self.lsn..(self.lsn + 1)
-    }
-    fn is_incremental(&self) -> bool {
-        false
-    }
-
-    fn short_id(&self) -> String {
-        self.filename().file_name()
-    }
-
     /// debugging function to print out the contents of the layer
     fn dump(&self, verbose: bool, ctx: &RequestContext) -> Result<()> {
         println!(
             "----- image layer for ten {} tli {} key {}-{} at {} ----",
-            self.tenant_id, self.timeline_id, self.key_range.start, self.key_range.end, self.lsn
+            self.desc.tenant_id,
+            self.desc.timeline_id,
+            self.desc.key_range.start,
+            self.desc.key_range.end,
+            self.lsn
         );
 
         if !verbose {
@@ -203,7 +190,7 @@ impl Layer for ImageLayer {
         reconstruct_state: &mut ValueReconstructState,
         ctx: &RequestContext,
     ) -> anyhow::Result<ValueReconstructResult> {
-        assert!(self.key_range.contains(&key));
+        assert!(self.desc.key_range.contains(&key));
         assert!(lsn_range.start >= self.lsn);
         assert!(lsn_range.end >= self.lsn);
 
@@ -230,24 +217,37 @@ impl Layer for ImageLayer {
             Ok(ValueReconstructResult::Missing)
         }
     }
+
+    /// Boilerplate to implement the Layer trait, always use layer_desc for persistent layers.
+    fn get_key_range(&self) -> Range<Key> {
+        self.layer_desc().key_range.clone()
+    }
+
+    /// Boilerplate to implement the Layer trait, always use layer_desc for persistent layers.
+    fn get_lsn_range(&self) -> Range<Lsn> {
+        self.layer_desc().lsn_range.clone()
+    }
+
+    /// Boilerplate to implement the Layer trait, always use layer_desc for persistent layers.
+    fn is_incremental(&self) -> bool {
+        self.layer_desc().is_incremental
+    }
+
+    /// Boilerplate to implement the Layer trait, always use layer_desc for persistent layers.
+    fn short_id(&self) -> String {
+        self.layer_desc().short_id()
+    }
 }
 
 impl PersistentLayer for ImageLayer {
-    fn filename(&self) -> LayerFileName {
-        self.layer_name().into()
+    fn layer_desc(&self) -> &PersistentLayerDesc {
+        &self.desc
     }
 
     fn local_path(&self) -> Option<PathBuf> {
         Some(self.path())
     }
 
-    fn get_tenant_id(&self) -> TenantId {
-        self.tenant_id
-    }
-
-    fn get_timeline_id(&self) -> TimelineId {
-        self.timeline_id
-    }
     fn iter(&self, _ctx: &RequestContext) -> Result<LayerIter<'_>> {
         unimplemented!();
     }
@@ -405,9 +405,13 @@ impl ImageLayer {
     ) -> ImageLayer {
         ImageLayer {
             path_or_conf: PathOrConf::Conf(conf),
-            timeline_id,
-            tenant_id,
-            key_range: filename.key_range.clone(),
+            desc: PersistentLayerDesc::new_img(
+                tenant_id,
+                timeline_id,
+                filename.key_range.clone(),
+                filename.lsn,
+                false,
+            ), // Now we assume image layer ALWAYS covers the full range. This may change in the future.
             lsn: filename.lsn,
             file_size,
             access_stats,
@@ -422,7 +426,7 @@ impl ImageLayer {
 
     /// Create an ImageLayer struct representing an existing file on disk.
     ///
-    /// This variant is only used for debugging purposes, by the 'pageserver_binutils' binary.
+    /// This variant is only used for debugging purposes, by the 'pagectl' binary.
     pub fn new_for_path(path: &Path, file: File) -> Result<ImageLayer> {
         let mut summary_buf = Vec::new();
         summary_buf.resize(PAGE_SZ, 0);
@@ -433,9 +437,13 @@ impl ImageLayer {
             .context("get file metadata to determine size")?;
         Ok(ImageLayer {
             path_or_conf: PathOrConf::Path(path.to_path_buf()),
-            timeline_id: summary.timeline_id,
-            tenant_id: summary.tenant_id,
-            key_range: summary.key_range,
+            desc: PersistentLayerDesc::new_img(
+                summary.tenant_id,
+                summary.timeline_id,
+                summary.key_range,
+                summary.lsn,
+                false,
+            ), // Now we assume image layer ALWAYS covers the full range. This may change in the future.
             lsn: summary.lsn,
             file_size: metadata.len(),
             access_stats: LayerAccessStats::empty_will_record_residence_event_later(),
@@ -449,18 +457,15 @@ impl ImageLayer {
     }
 
     fn layer_name(&self) -> ImageFileName {
-        ImageFileName {
-            key_range: self.key_range.clone(),
-            lsn: self.lsn,
-        }
+        self.desc.image_file_name()
     }
 
     /// Path to the layer file in pageserver workdir.
     pub fn path(&self) -> PathBuf {
         Self::path_for(
             &self.path_or_conf,
-            self.timeline_id,
-            self.tenant_id,
+            self.desc.timeline_id,
+            self.desc.tenant_id,
             &self.layer_name(),
         )
     }
@@ -484,6 +489,7 @@ struct ImageLayerWriterInner {
     tenant_id: TenantId,
     key_range: Range<Key>,
     lsn: Lsn,
+    is_incremental: bool,
 
     blob_writer: WriteBlobWriter<VirtualFile>,
     tree: DiskBtreeBuilder<BlockBuf, KEY_SIZE>,
@@ -499,6 +505,7 @@ impl ImageLayerWriterInner {
         tenant_id: TenantId,
         key_range: &Range<Key>,
         lsn: Lsn,
+        is_incremental: bool,
     ) -> anyhow::Result<Self> {
         // Create the file initially with a temporary filename.
         // We'll atomically rename it to the final name when we're done.
@@ -533,6 +540,7 @@ impl ImageLayerWriterInner {
             lsn,
             tree: tree_builder,
             blob_writer,
+            is_incremental,
         };
 
         Ok(writer)
@@ -570,6 +578,14 @@ impl ImageLayerWriterInner {
             file.write_all(buf.as_ref())?;
         }
 
+        let desc = PersistentLayerDesc::new_img(
+            self.tenant_id,
+            self.timeline_id,
+            self.key_range.clone(),
+            self.lsn,
+            self.is_incremental, // for now, image layer ALWAYS covers the full range
+        );
+
         // Fill in the summary on blk 0
         let summary = Summary {
             magic: IMAGE_FILE_MAGIC,
@@ -593,9 +609,7 @@ impl ImageLayerWriterInner {
         // set inner.file here. The first read will have to re-open it.
         let layer = ImageLayer {
             path_or_conf: PathOrConf::Conf(self.conf),
-            timeline_id: self.timeline_id,
-            tenant_id: self.tenant_id,
-            key_range: self.key_range.clone(),
+            desc,
             lsn: self.lsn,
             file_size: metadata.len(),
             access_stats: LayerAccessStats::empty_will_record_residence_event_later(),
@@ -667,6 +681,7 @@ impl ImageLayerWriter {
         tenant_id: TenantId,
         key_range: &Range<Key>,
         lsn: Lsn,
+        is_incremental: bool,
     ) -> anyhow::Result<ImageLayerWriter> {
         Ok(Self {
             inner: Some(ImageLayerWriterInner::new(
@@ -675,6 +690,7 @@ impl ImageLayerWriter {
                 tenant_id,
                 key_range,
                 lsn,
+                is_incremental,
             )?),
         })
     }
diff --git a/pageserver/src/tenant/storage_layer/layer_desc.rs b/pageserver/src/tenant/storage_layer/layer_desc.rs
new file mode 100644
index 0000000000..a9859681d3
--- /dev/null
+++ b/pageserver/src/tenant/storage_layer/layer_desc.rs
@@ -0,0 +1,109 @@
+use std::ops::Range;
+use utils::{
+    id::{TenantId, TimelineId},
+    lsn::Lsn,
+};
+
+use crate::repository::Key;
+
+use super::{DeltaFileName, ImageFileName, LayerFileName};
+
+/// A unique identifier of a persistent layer. This is different from `LayerDescriptor`, which is only used in the
+/// benchmarks. This struct contains all necessary information to find the image / delta layer. It also provides
+/// a unified way to generate layer information like file name.
+#[derive(Debug, PartialEq, Eq, Clone)]
+pub struct PersistentLayerDesc {
+    pub tenant_id: TenantId,
+    pub timeline_id: TimelineId,
+    pub key_range: Range<Key>,
+    /// For image layer, this is `[lsn, lsn+1)`.
+    pub lsn_range: Range<Lsn>,
+    /// Whether this is a delta layer.
+    pub is_delta: bool,
+    /// Whether this layer only contains page images for part of the keys in the range. In the current implementation, this should
+    /// always be equal to `is_delta`. If we land the partial image layer PR someday, image layer could also be
+    /// incremental.
+    pub is_incremental: bool,
+}
+
+impl PersistentLayerDesc {
+    pub fn short_id(&self) -> String {
+        self.filename().file_name()
+    }
+
+    pub fn new_img(
+        tenant_id: TenantId,
+        timeline_id: TimelineId,
+        key_range: Range<Key>,
+        lsn: Lsn,
+        is_incremental: bool,
+    ) -> Self {
+        Self {
+            tenant_id,
+            timeline_id,
+            key_range,
+            lsn_range: Self::image_layer_lsn_range(lsn),
+            is_delta: false,
+            is_incremental,
+        }
+    }
+
+    pub fn new_delta(
+        tenant_id: TenantId,
+        timeline_id: TimelineId,
+        key_range: Range<Key>,
+        lsn_range: Range<Lsn>,
+    ) -> Self {
+        Self {
+            tenant_id,
+            timeline_id,
+            key_range,
+            lsn_range,
+            is_delta: true,
+            is_incremental: true,
+        }
+    }
+
+    /// Get the LSN that the image layer covers.
+    pub fn image_layer_lsn(&self) -> Lsn {
+        assert!(!self.is_delta);
+        assert!(self.lsn_range.start + 1 == self.lsn_range.end);
+        self.lsn_range.start
+    }
+
+    /// Get the LSN range corresponding to a single image layer LSN.
+    pub fn image_layer_lsn_range(lsn: Lsn) -> Range<Lsn> {
+        lsn..(lsn + 1)
+    }
+
+    /// Get a delta file name for this layer.
+    ///
+    /// Panic: if this is not a delta layer.
+    pub fn delta_file_name(&self) -> DeltaFileName {
+        assert!(self.is_delta);
+        DeltaFileName {
+            key_range: self.key_range.clone(),
+            lsn_range: self.lsn_range.clone(),
+        }
+    }
+
+    /// Get a delta file name for this layer.
+    ///
+    /// Panic: if this is not an image layer, or the lsn range is invalid
+    pub fn image_file_name(&self) -> ImageFileName {
+        assert!(!self.is_delta);
+        assert!(self.lsn_range.start + 1 == self.lsn_range.end);
+        ImageFileName {
+            key_range: self.key_range.clone(),
+            lsn: self.lsn_range.start,
+        }
+    }
+
+    pub fn filename(&self) -> LayerFileName {
+        if self.is_delta {
+            self.delta_file_name().into()
+        } else {
+            self.image_file_name().into()
+        }
+    }
+}
diff --git a/pageserver/src/tenant/storage_layer/remote_layer.rs b/pageserver/src/tenant/storage_layer/remote_layer.rs
index 2106587ab2..ff0f44da92 100644
--- a/pageserver/src/tenant/storage_layer/remote_layer.rs
+++ b/pageserver/src/tenant/storage_layer/remote_layer.rs
@@ -18,11 +18,10 @@ use utils::{
     lsn::Lsn,
 };
 
-use super::filename::{DeltaFileName, ImageFileName, LayerFileName};
-use super::image_layer::ImageLayer;
+use super::filename::{DeltaFileName, ImageFileName};
 use super::{
-    DeltaLayer, LayerAccessStats, LayerAccessStatsReset, LayerIter, LayerKeyIter,
-    LayerResidenceStatus, PersistentLayer,
+    DeltaLayer, ImageLayer, LayerAccessStats, LayerAccessStatsReset, LayerIter, LayerKeyIter,
+    LayerResidenceStatus, PersistentLayer, PersistentLayerDesc,
 };
 
 /// RemoteLayer is a not yet downloaded [`ImageLayer`] or
@@ -34,19 +33,10 @@ use super::{
 ///
 /// See: [`crate::context::RequestContext`] for authorization to download
 pub struct RemoteLayer {
-    tenantid: TenantId,
-    timelineid: TimelineId,
-    key_range: Range<Key>,
-    lsn_range: Range<Lsn>,
-
-    pub file_name: LayerFileName,
+    pub desc: PersistentLayerDesc,
 
     pub layer_metadata: LayerFileMetadata,
 
-    is_delta: bool,
-
-    is_incremental: bool,
-
     access_stats: LayerAccessStats,
 
     pub(crate) ongoing_download: Arc<tokio::sync::Semaphore>,
@@ -66,22 +56,14 @@ pub struct RemoteLayer {
 impl std::fmt::Debug for RemoteLayer {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         f.debug_struct("RemoteLayer")
-            .field("file_name", &self.file_name)
+            .field("file_name", &self.desc.filename())
             .field("layer_metadata", &self.layer_metadata)
-            .field("is_incremental", &self.is_incremental)
+            .field("is_incremental", &self.desc.is_incremental)
             .finish()
     }
 }
 
 impl Layer for RemoteLayer {
-    fn get_key_range(&self) -> Range<Key> {
-        self.key_range.clone()
-    }
-
-    fn get_lsn_range(&self) -> Range<Lsn> {
-        self.lsn_range.clone()
-    }
-
     fn get_value_reconstruct_data(
         &self,
         _key: Key,
@@ -95,53 +77,45 @@ impl Layer for RemoteLayer {
         );
     }
 
-    fn is_incremental(&self) -> bool {
-        self.is_incremental
-    }
-
     /// debugging function to print out the contents of the layer
     fn dump(&self, _verbose: bool, _ctx: &RequestContext) -> Result<()> {
         println!(
             "----- remote layer for ten {} tli {} keys {}-{} lsn {}-{} ----",
-            self.tenantid,
-            self.timelineid,
-            self.key_range.start,
-            self.key_range.end,
-            self.lsn_range.start,
-            self.lsn_range.end
+            self.desc.tenant_id,
+            self.desc.timeline_id,
+            self.desc.key_range.start,
+            self.desc.key_range.end,
+            self.desc.lsn_range.start,
+            self.desc.lsn_range.end
         );
 
         Ok(())
     }
 
+    /// Boilerplate to implement the Layer trait, always use layer_desc for persistent layers.
+    fn get_key_range(&self) -> Range<Key> {
+        self.layer_desc().key_range.clone()
+    }
+
+    /// Boilerplate to implement the Layer trait, always use layer_desc for persistent layers.
+    fn get_lsn_range(&self) -> Range<Lsn> {
+        self.layer_desc().lsn_range.clone()
+    }
+
+    /// Boilerplate to implement the Layer trait, always use layer_desc for persistent layers.
+    fn is_incremental(&self) -> bool {
+        self.layer_desc().is_incremental
+    }
+
+    /// Boilerplate to implement the Layer trait, always use layer_desc for persistent layers.
     fn short_id(&self) -> String {
-        self.filename().file_name()
+        self.layer_desc().short_id()
     }
 }
 
 impl PersistentLayer for RemoteLayer {
-    fn get_tenant_id(&self) -> TenantId {
-        self.tenantid
-    }
-
-    fn get_timeline_id(&self) -> TimelineId {
-        self.timelineid
-    }
-
-    fn filename(&self) -> LayerFileName {
-        if self.is_delta {
-            DeltaFileName {
-                key_range: self.key_range.clone(),
-                lsn_range: self.lsn_range.clone(),
-            }
-            .into()
-        } else {
-            ImageFileName {
-                key_range: self.key_range.clone(),
-                lsn: self.lsn_range.start,
-            }
-            .into()
-        }
+    fn layer_desc(&self) -> &PersistentLayerDesc {
+        &self.desc
     }
 
     fn local_path(&self) -> Option<PathBuf> {
@@ -176,7 +150,7 @@ impl PersistentLayer for RemoteLayer {
         let layer_file_name = self.filename().file_name();
         let lsn_range = self.get_lsn_range();
 
-        if self.is_delta {
+        if self.desc.is_delta {
             HistoricLayerInfo::Delta {
                 layer_file_name,
                 layer_file_size: self.layer_metadata.file_size(),
@@ -210,13 +184,13 @@ impl RemoteLayer {
         access_stats: LayerAccessStats,
     ) -> RemoteLayer {
         RemoteLayer {
-            tenantid,
-            timelineid,
-            key_range: fname.key_range.clone(),
-            lsn_range: fname.lsn_as_range(),
-            is_delta: false,
-            is_incremental: false,
-            file_name: fname.to_owned().into(),
+            desc: PersistentLayerDesc::new_img(
+                tenantid,
+                timelineid,
+                fname.key_range.clone(),
+                fname.lsn,
+                false,
+            ),
             layer_metadata: layer_metadata.clone(),
             ongoing_download: Arc::new(tokio::sync::Semaphore::new(1)),
             download_replacement_failure: std::sync::atomic::AtomicBool::default(),
@@ -232,13 +206,12 @@ impl RemoteLayer {
         access_stats: LayerAccessStats,
     ) -> RemoteLayer {
         RemoteLayer {
-            tenantid,
-            timelineid,
-            key_range: fname.key_range.clone(),
-            lsn_range: fname.lsn_range.clone(),
-            is_delta: true,
-            is_incremental: true,
-            file_name: fname.to_owned().into(),
+            desc: PersistentLayerDesc::new_delta(
+                tenantid,
+                timelineid,
+                fname.key_range.clone(),
+                fname.lsn_range.clone(),
+            ),
             layer_metadata: layer_metadata.clone(),
             ongoing_download: Arc::new(tokio::sync::Semaphore::new(1)),
             download_replacement_failure: std::sync::atomic::AtomicBool::default(),
@@ -256,15 +229,12 @@ impl RemoteLayer {
     where
         L: ?Sized + Layer,
     {
-        if self.is_delta {
-            let fname = DeltaFileName {
-                key_range: self.key_range.clone(),
-                lsn_range: self.lsn_range.clone(),
-            };
+        if self.desc.is_delta {
+            let fname = self.desc.delta_file_name();
             Arc::new(DeltaLayer::new(
                 conf,
-                self.timelineid,
-                self.tenantid,
+                self.desc.timeline_id,
+                self.desc.tenant_id,
                 &fname,
                 file_size,
                 self.access_stats.clone_for_residence_change(
@@ -273,14 +243,11 @@ impl RemoteLayer {
                 ),
             ))
         } else {
-            let fname = ImageFileName {
-                key_range: self.key_range.clone(),
-                lsn: self.lsn_range.start,
-            };
+            let fname = self.desc.image_file_name();
             Arc::new(ImageLayer::new(
                 conf,
-                self.timelineid,
-                self.tenantid,
+                self.desc.timeline_id,
+                self.desc.tenant_id,
                 &fname,
                 file_size,
                 self.access_stats.clone_for_residence_change(
diff --git a/pageserver/src/tenant/tasks.rs b/pageserver/src/tenant/tasks.rs
index 6bf26f1da1..360818b5a7 100644
--- a/pageserver/src/tenant/tasks.rs
+++ b/pageserver/src/tenant/tasks.rs
@@ -9,13 +9,17 @@ use crate::context::{DownloadBehavior, RequestContext};
 use crate::metrics::TENANT_TASK_EVENTS;
 use crate::task_mgr;
 use crate::task_mgr::{TaskKind, BACKGROUND_RUNTIME};
-use crate::tenant::mgr;
 use crate::tenant::{Tenant, TenantState};
 use tokio_util::sync::CancellationToken;
 use tracing::*;
-use utils::id::TenantId;
+use utils::completion;
 
-pub fn start_background_loops(tenant_id: TenantId) {
+/// Start per tenant background loops: compaction and gc.
+pub fn start_background_loops(
+    tenant: &Arc<Tenant>,
+    background_jobs_can_start: Option<&completion::Barrier>,
+) {
+    let tenant_id = tenant.tenant_id;
     task_mgr::spawn(
         BACKGROUND_RUNTIME.handle(),
         TaskKind::Compaction,
@@ -23,11 +27,20 @@ pub fn start_background_loops(tenant_id: TenantId) {
         None,
         &format!("compactor for tenant {tenant_id}"),
         false,
-        async move {
-            compaction_loop(tenant_id)
-                .instrument(info_span!("compaction_loop", tenant_id = %tenant_id))
-                .await;
-            Ok(())
+        {
+            let tenant = Arc::clone(tenant);
+            let background_jobs_can_start = background_jobs_can_start.cloned();
+            async move {
+                let cancel = task_mgr::shutdown_token();
+                tokio::select! {
+                    _ = cancel.cancelled() => { return Ok(()) },
+                    _ = completion::Barrier::maybe_wait(background_jobs_can_start) => {}
+                };
+                compaction_loop(tenant, cancel)
+                    .instrument(info_span!("compaction_loop", tenant_id = %tenant_id))
+                    .await;
+                Ok(())
+            }
         },
     );
     task_mgr::spawn(
@@ -37,11 +50,20 @@ pub fn start_background_loops(tenant_id: TenantId) {
         None,
         &format!("garbage collector for tenant {tenant_id}"),
         false,
-        async move {
-            gc_loop(tenant_id)
-                .instrument(info_span!("gc_loop", tenant_id = %tenant_id))
-                .await;
-            Ok(())
+        {
+            let tenant = Arc::clone(tenant);
+            let background_jobs_can_start = background_jobs_can_start.cloned();
+            async move {
+                let cancel = task_mgr::shutdown_token();
+                tokio::select! {
+                    _ = cancel.cancelled() => { return Ok(()) },
+                    _ = completion::Barrier::maybe_wait(background_jobs_can_start) => {}
+                };
+                gc_loop(tenant, cancel)
+                    .instrument(info_span!("gc_loop", tenant_id = %tenant_id))
+                    .await;
+                Ok(())
+            }
         },
     );
 }
@@ -49,27 +71,26 @@ pub fn start_background_loops(tenant_id: TenantId) {
 ///
 /// Compaction task's main loop
 ///
-async fn compaction_loop(tenant_id: TenantId) {
+async fn compaction_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
     let wait_duration = Duration::from_secs(2);
     info!("starting");
     TENANT_TASK_EVENTS.with_label_values(&["start"]).inc();
     async {
-        let cancel = task_mgr::shutdown_token();
         let ctx = RequestContext::todo_child(TaskKind::Compaction, DownloadBehavior::Download);
         let mut first = true;
         loop {
             trace!("waking up");
 
-            let tenant = tokio::select! {
+            tokio::select! {
                 _ = cancel.cancelled() => {
                     info!("received cancellation request");
                     return;
                 },
-                tenant_wait_result = wait_for_active_tenant(tenant_id, wait_duration) => match tenant_wait_result {
+                tenant_wait_result = wait_for_active_tenant(&tenant) => match tenant_wait_result {
                     ControlFlow::Break(()) => return,
-                    ControlFlow::Continue(tenant) => tenant,
+                    ControlFlow::Continue(()) => (),
                 },
-            };
+            }
 
             let period = tenant.get_compaction_period();
 
@@ -119,29 +140,29 @@ async fn compaction_loop(tenant_id: TenantId) {
 ///
 /// GC task's main loop
 ///
-async fn gc_loop(tenant_id: TenantId) {
+async fn gc_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
     let wait_duration = Duration::from_secs(2);
     info!("starting");
     TENANT_TASK_EVENTS.with_label_values(&["start"]).inc();
     async {
-        let cancel = task_mgr::shutdown_token();
         // GC might require downloading, to find the cutoff LSN that corresponds to the
         // cutoff specified as time.
-        let ctx = RequestContext::todo_child(TaskKind::GarbageCollector, DownloadBehavior::Download);
+        let ctx =
+            RequestContext::todo_child(TaskKind::GarbageCollector, DownloadBehavior::Download);
         let mut first = true;
         loop {
             trace!("waking up");
 
-            let tenant = tokio::select! {
+            tokio::select! {
                 _ = cancel.cancelled() => {
                     info!("received cancellation request");
                     return;
                 },
-                tenant_wait_result = wait_for_active_tenant(tenant_id, wait_duration) => match tenant_wait_result {
+                tenant_wait_result = wait_for_active_tenant(&tenant) => match tenant_wait_result {
                     ControlFlow::Break(()) => return,
-                    ControlFlow::Continue(tenant) => tenant,
+                    ControlFlow::Continue(()) => (),
                 },
-            };
+            }
 
             let period = tenant.get_gc_period();
 
@@ -161,7 +182,9 @@ async fn gc_loop(tenant_id: TenantId) {
                 Duration::from_secs(10)
             } else {
                 // Run gc
-                let res = tenant.gc_iteration(None, gc_horizon, tenant.get_pitr_interval(), &ctx).await;
+                let res = tenant
+                    .gc_iteration(None, gc_horizon, tenant.get_pitr_interval(), &ctx)
+                    .await;
                 if let Err(e) = res {
                     error!("Gc failed, retrying in {:?}: {e:?}", wait_duration);
                     wait_duration
@@ -187,23 +210,10 @@ async fn gc_loop(tenant_id: TenantId) {
     trace!("GC loop stopped.");
 }
 
-async fn wait_for_active_tenant(
-    tenant_id: TenantId,
-    wait: Duration,
-) -> ControlFlow<(), Arc<Tenant>> {
-    let tenant = loop {
-        match mgr::get_tenant(tenant_id, false).await {
-            Ok(tenant) => break tenant,
-            Err(e) => {
-                error!("Failed to get a tenant {tenant_id}: {e:#}");
-                tokio::time::sleep(wait).await;
-            }
-        }
-    };
-
+async fn wait_for_active_tenant(tenant: &Arc<Tenant>) -> ControlFlow<()> {
     // if the tenant has a proper status already, no need to wait for anything
     if tenant.current_state() == TenantState::Active {
-        ControlFlow::Continue(tenant)
+        ControlFlow::Continue(())
     } else {
         let mut tenant_state_updates = tenant.subscribe_for_state_updates();
         loop {
@@ -213,7 +223,7 @@ async fn wait_for_active_tenant(
                     match new_state {
                         TenantState::Active => {
                             debug!("Tenant state changed to active, continuing the task loop");
-                            return ControlFlow::Continue(tenant);
+                            return ControlFlow::Continue(());
                         }
                         state => {
                             debug!("Not running the task loop, tenant is not active: {state:?}");
diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs
index ecb01708ac..1d603494ee 100644
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -57,6 +57,7 @@ use pageserver_api::reltag::RelTag;
 use postgres_connection::PgConnectionConfig;
 use postgres_ffi::to_pg_timestamp;
 use utils::{
+    completion,
     id::{TenantId, TimelineId},
     lsn::{AtomicLsn, Lsn, RecordLsn},
     seqwait::SeqWait,
@@ -119,7 +120,7 @@ pub struct Timeline {
 
     pub pg_version: u32,
 
-    pub(crate) layers: tokio::sync::RwLock<LayerMap<dyn PersistentLayer>>,
+    pub(crate) layers: Arc<tokio::sync::RwLock<LayerMap<dyn PersistentLayer>>>,
 
     /// Set of key ranges which should be covered by image layers to
     /// allow GC to remove old layers. This set is created by GC and its cutoff LSN is also stored.
@@ -195,8 +196,9 @@ pub struct Timeline {
     /// Layer removal lock.
     /// A lock to ensure that no layer of the timeline is removed concurrently by other tasks.
     /// This lock is acquired in [`Timeline::gc`], [`Timeline::compact`],
-    /// and [`Tenant::delete_timeline`].
-    pub(super) layer_removal_cs: tokio::sync::Mutex<()>,
+    /// and [`Tenant::delete_timeline`]. This is an `Arc<Mutex>` lock because we need an owned
+    /// lock guard in functions that will be spawned to tokio I/O pool (which requires `'static`).
+    pub(super) layer_removal_cs: Arc<tokio::sync::Mutex<()>>,
 
     // Needed to ensure that we can't create a branch at a point that was already garbage collected
     pub latest_gc_cutoff_lsn: Rcu<Lsn>,
@@ -235,7 +237,18 @@ pub struct Timeline {
 
     state: watch::Sender<TimelineState>,
 
+    /// Prevent two tasks from deleting the timeline at the same time. If held, the
+    /// timeline is being deleted. If 'true', the timeline has already been deleted.
+    pub delete_lock: tokio::sync::Mutex<bool>,
+
     eviction_task_timeline_state: tokio::sync::Mutex<EvictionTaskTimelineState>,
+
+    /// Barrier to wait before doing initial logical size calculation. Used only during startup.
+    initial_logical_size_can_start: Option<completion::Barrier>,
+
+    /// Completion shared between all timelines loaded during startup; used to delay heavier
+    /// background tasks until some logical sizes have been calculated.
+    initial_logical_size_attempt: Mutex<Option<completion::Completion>>,
 }
 
 type LayerMapWriteLockGuard<'t> = tokio::sync::RwLockWriteGuard<'t, LayerMap<dyn PersistentLayer>>;
@@ -522,7 +535,12 @@ impl Timeline {
             Some((cached_lsn, cached_img)) => {
                 match cached_lsn.cmp(&lsn) {
                     Ordering::Less => {} // there might be WAL between cached_lsn and lsn, we need to check
-                    Ordering::Equal => return Ok(cached_img), // exact LSN match, return the image
+                    Ordering::Equal => {
+                        self.metrics
+                            .materialized_page_cache_hit_upon_request_counter
+                            .inc();
+                        return Ok(cached_img); // exact LSN match, return the image
+                    }
                     Ordering::Greater => {
                         unreachable!("the returned lsn should never be after the requested lsn")
                     }
@@ -537,8 +555,10 @@ impl Timeline {
             img: cached_page_img,
         };
 
+        let timer = self.metrics.get_reconstruct_data_time_histo.start_timer();
         self.get_reconstruct_data(key, lsn, &mut reconstruct_state, ctx)
             .await?;
+        timer.stop_and_record();
 
         self.metrics
             .reconstruct_time_histo
@@ -624,7 +644,7 @@ impl Timeline {
         {
             Ok(()) => Ok(()),
             Err(e) => {
-                // walreceiver.status() locks internally, don't count that towards the wait_lsn_time_histo
+                // don't count the time spent waiting for lock below, and also in walreceiver.status(), towards the wait_lsn_time_histo
                 drop(_timer);
                 let walreceiver_status = {
                     match &*self.walreceiver.lock().unwrap() {
@@ -671,7 +691,7 @@ impl Timeline {
     }
 
     /// Outermost timeline compaction operation; downloads needed layers.
-    pub async fn compact(&self, ctx: &RequestContext) -> anyhow::Result<()> {
+    pub async fn compact(self: &Arc<Self>, ctx: &RequestContext) -> anyhow::Result<()> {
         const ROUNDS: usize = 2;
 
         let last_record_lsn = self.get_last_record_lsn();
@@ -760,7 +780,7 @@ impl Timeline {
     }
 
     /// Compaction which might need to be retried after downloading remote layers.
-    async fn compact_inner(&self, ctx: &RequestContext) -> Result<(), CompactionError> {
+    async fn compact_inner(self: &Arc<Self>, ctx: &RequestContext) -> Result<(), CompactionError> {
         //
         // High level strategy for compaction / image creation:
         //
@@ -795,7 +815,7 @@ impl Timeline {
         // Below are functions compact_level0() and create_image_layers()
         // but they are a bit ad hoc and don't quite work like it's explained
         // above. Rewrite it.
-        let layer_removal_cs = self.layer_removal_cs.lock().await;
+        let layer_removal_cs = Arc::new(self.layer_removal_cs.clone().lock_owned().await);
         // Is the timeline being deleted?
         let state = *self.state.borrow();
         if state == TimelineState::Stopping {
@@ -829,7 +849,7 @@ impl Timeline {
 
                 // 3. Compact
                 let timer = self.metrics.compact_time_histo.start_timer();
-                self.compact_level0(&layer_removal_cs, target_file_size, ctx)
+                self.compact_level0(layer_removal_cs.clone(), target_file_size, ctx)
                     .await?;
                 timer.stop_and_record();
             }
@@ -918,10 +938,15 @@ impl Timeline {
         Ok(())
     }
 
-    pub fn activate(self: &Arc<Self>, broker_client: BrokerClientChannel, ctx: &RequestContext) {
+    pub fn activate(
+        self: &Arc<Self>,
+        broker_client: BrokerClientChannel,
+        background_jobs_can_start: Option<&completion::Barrier>,
+        ctx: &RequestContext,
+    ) {
         self.launch_wal_receiver(ctx, broker_client);
         self.set_state(TimelineState::Active);
-        self.launch_eviction_task();
+        self.launch_eviction_task(background_jobs_can_start);
     }
 
     pub fn set_state(&self, new_state: TimelineState) {
@@ -939,6 +964,14 @@ impl Timeline {
                 error!("Not activating a Stopping timeline");
             }
             (_, new_state) => {
+                if matches!(new_state, TimelineState::Stopping | TimelineState::Broken) {
+                    // drop the copmletion guard, if any; it might be holding off the completion
+                    // forever needlessly
+                    self.initial_logical_size_attempt
+                        .lock()
+                        .unwrap_or_else(|e| e.into_inner())
+                        .take();
+                }
                 self.state.send_replace(new_state);
             }
         }
@@ -1288,6 +1321,13 @@ impl Timeline {
             .unwrap_or(default_tenant_conf.evictions_low_residence_duration_metric_threshold)
     }
 
+    fn get_gc_feedback(&self) -> bool {
+        let tenant_conf = self.tenant_conf.read().unwrap();
+        tenant_conf
+            .gc_feedback
+            .unwrap_or(self.conf.default_tenant_conf.gc_feedback)
+    }
+
     pub(super) fn tenant_conf_updated(&self) {
         // NB: Most tenant conf options are read by background loops, so,
         // changes will automatically be picked up.
@@ -1322,6 +1362,8 @@ impl Timeline {
         walredo_mgr: Arc<dyn WalRedoManager + Send + Sync>,
         remote_client: Option<RemoteTimelineClient>,
         pg_version: u32,
+        initial_logical_size_can_start: Option<completion::Barrier>,
+        initial_logical_size_attempt: Option<completion::Completion>,
     ) -> Arc<Self> {
         let disk_consistent_lsn = metadata.disk_consistent_lsn();
         let (state, _) = watch::channel(TimelineState::Loading);
@@ -1346,7 +1388,7 @@ impl Timeline {
                 timeline_id,
                 tenant_id,
                 pg_version,
-                layers: tokio::sync::RwLock::new(LayerMap::default()),
+                layers: Arc::new(tokio::sync::RwLock::new(LayerMap::default())),
                 wanted_image_layers: Mutex::new(None),
 
                 walredo_mgr,
@@ -1415,6 +1457,10 @@ impl Timeline {
                 eviction_task_timeline_state: tokio::sync::Mutex::new(
                     EvictionTaskTimelineState::default(),
                 ),
+                delete_lock: tokio::sync::Mutex::new(false),
+
+                initial_logical_size_can_start,
+                initial_logical_size_attempt: Mutex::new(initial_logical_size_attempt),
             };
             result.repartition_threshold = result.get_checkpoint_distance() / 10;
             result
@@ -1903,9 +1949,30 @@ impl Timeline {
             false,
             // NB: don't log errors here, task_mgr will do that.
             async move {
-                // no cancellation here, because nothing really waits for this to complete compared
+
+                let cancel = task_mgr::shutdown_token();
+
+                // in case we were created during pageserver initialization, wait for
+                // initialization to complete before proceeding. startup time init runs on the same
+                // runtime.
+                tokio::select! {
+                    _ = cancel.cancelled() => { return Ok(()); },
+                    _ = completion::Barrier::maybe_wait(self_clone.initial_logical_size_can_start.clone()) => {}
+                };
+
+                // hold off background tasks from starting until all timelines get to try at least
+                // once initial logical size calculation; though retry will rarely be useful.
+                // holding off is done because heavier tasks execute blockingly on the same
+                // runtime.
+                //
+                // dropping this at every outcome is probably better than trying to cling on to it,
+                // delay will be terminated by a timeout regardless.
+                let _completion = { self_clone.initial_logical_size_attempt.lock().expect("unexpected initial_logical_size_attempt poisoned").take() };
+
+                // no extra cancellation here, because nothing really waits for this to complete compared
                 // to spawn_ondemand_logical_size_calculation.
                 let cancel = CancellationToken::new();
+
                 let calculated_size = match self_clone
                     .logical_size_calculation_task(lsn, LogicalSizeCalculationCause::Initial, &background_ctx, cancel)
                     .await
@@ -2170,7 +2237,7 @@ impl Timeline {
     fn delete_historic_layer(
         &self,
         // we cannot remove layers otherwise, since gc and compaction will race
-        _layer_removal_cs: &tokio::sync::MutexGuard<'_, ()>,
+        _layer_removal_cs: Arc<tokio::sync::OwnedMutexGuard<()>>,
         layer: Arc<dyn PersistentLayer>,
         updates: &mut BatchedUpdates<'_, dyn PersistentLayer>,
     ) -> anyhow::Result<()> {
@@ -2249,6 +2316,9 @@ impl Timeline {
         let mut timeline_owned;
         let mut timeline = self;
 
+        let mut read_count =
+            scopeguard::guard(0, |cnt| self.metrics.read_num_fs_layers.observe(cnt as f64));
+
         // For debugging purposes, collect the path of layers that we traversed
         // through. It's included in the error message if we fail to find the key.
         let mut traversal_path = Vec::<TraversalPathItem>::new();
@@ -2383,6 +2453,7 @@ impl Timeline {
                                 Err(e) => return Err(PageReconstructError::from(e)),
                             };
                             cont_lsn = lsn_floor;
+                            // metrics: open_layer does not count as fs access, so we are not updating `read_count`
                             traversal_path.push((
                                 result,
                                 cont_lsn,
@@ -2409,6 +2480,7 @@ impl Timeline {
                                 Err(e) => return Err(PageReconstructError::from(e)),
                             };
                             cont_lsn = lsn_floor;
+                            // metrics: open_layer does not count as fs access, so we are not updating `read_count`
                             traversal_path.push((
                                 result,
                                 cont_lsn,
@@ -2443,6 +2515,7 @@ impl Timeline {
                                 Err(e) => return Err(PageReconstructError::from(e)),
                             };
                             cont_lsn = lsn_floor;
+                            *read_count += 1;
                             traversal_path.push((
                                 result,
                                 cont_lsn,
@@ -2508,7 +2581,7 @@ impl Timeline {
                     (DownloadBehavior::Error, false) => {
                         return Err(PageReconstructError::NeedsDownload(
                             TenantTimelineId::new(self.tenant_id, self.timeline_id),
-                            remote_layer.file_name.clone(),
+                            remote_layer.filename(),
                         ))
                     }
                 }
@@ -2654,7 +2727,7 @@ impl Timeline {
 
     /// Layer flusher task's main loop.
     async fn flush_loop(
-        &self,
+        self: &Arc<Self>,
         mut layer_flush_start_rx: tokio::sync::watch::Receiver<u64>,
         ctx: &RequestContext,
     ) {
@@ -2743,9 +2816,9 @@ impl Timeline {
     }
 
     /// Flush one frozen in-memory layer to disk, as a new delta layer.
-    #[instrument(skip(self, frozen_layer, ctx), fields(tenant_id=%self.tenant_id, timeline_id=%self.timeline_id, layer=%frozen_layer.short_id()))]
+    #[instrument(skip_all, fields(tenant_id=%self.tenant_id, timeline_id=%self.timeline_id, layer=%frozen_layer.short_id()))]
     async fn flush_frozen_layer(
-        &self,
+        self: &Arc<Self>,
         frozen_layer: Arc<InMemoryLayer>,
         ctx: &RequestContext,
     ) -> anyhow::Result<()> {
@@ -2869,26 +2942,41 @@ impl Timeline {
 
     // Write out the given frozen in-memory layer as a new L0 delta file
     async fn create_delta_layer(
-        &self,
+        self: &Arc<Self>,
         frozen_layer: &InMemoryLayer,
     ) -> anyhow::Result<(LayerFileName, LayerFileMetadata)> {
-        // Write it out
-        let new_delta = frozen_layer.write_to_disk()?;
-        let new_delta_path = new_delta.path();
-        let new_delta_filename = new_delta.filename();
+        // TODO figure out how to use spawn_blocking. Can't use it because frozen_layer is not 'static
+        let (new_delta, sz): (DeltaLayer, _) = tokio::task::block_in_place({
+            let self_clone = Arc::clone(self);
+            move || {
+                // Write it out
+                let new_delta = frozen_layer.write_to_disk()?;
+                let new_delta_path = new_delta.path();
 
-        // Sync it to disk.
-        //
-        // We must also fsync the timeline dir to ensure the directory entries for
-        // new layer files are durable
-        //
-        // TODO: If we're running inside 'flush_frozen_layers' and there are multiple
-        // files to flush, it might be better to first write them all, and then fsync
-        // them all in parallel.
-        par_fsync::par_fsync(&[
-            new_delta_path.clone(),
-            self.conf.timeline_path(&self.timeline_id, &self.tenant_id),
-        ])?;
+                // Sync it to disk.
+                //
+                // We must also fsync the timeline dir to ensure the directory entries for
+                // new layer files are durable
+                //
+                // TODO: If we're running inside 'flush_frozen_layers' and there are multiple
+                // files to flush, it might be better to first write them all, and then fsync
+                // them all in parallel.
+
+                // First sync the delta layer. We still use par_fsync here to keep everything consistent. Feel free to replace
+                // this with a single fsync in future refactors.
+                par_fsync::par_fsync(&[new_delta_path.clone()]).context("fsync of delta layer")?;
+                // Then sync the parent directory.
+                par_fsync::par_fsync(&[self_clone
+                    .conf
+                    .timeline_path(&self_clone.timeline_id, &self_clone.tenant_id)])
+                .context("fsync of timeline dir")?;
+
+                let sz = new_delta_path.metadata()?.len();
+
+                anyhow::Ok((new_delta, sz))
+            }
+        })?;
+        let new_delta_name = new_delta.filename();
 
         // Add it to the layer map
         let l = Arc::new(new_delta);
@@ -2903,14 +2991,12 @@ impl Timeline {
         batch_updates.flush();
 
         // update the timeline's physical size
-        let sz = new_delta_path.metadata()?.len();
-
         self.metrics.resident_physical_size_gauge.add(sz);
         // update metrics
         self.metrics.num_persistent_files_created.inc_by(1);
         self.metrics.persistent_bytes_written.inc_by(sz);
 
-        Ok((new_delta_filename, LayerFileMetadata::new(sz)))
+        Ok((new_delta_name, LayerFileMetadata::new(sz)))
     }
 
     async fn repartition(
@@ -3053,6 +3139,7 @@ impl Timeline {
                     self.tenant_id,
                     &img_range,
                     lsn,
+                    false, // image layer always covers the full range
                 )?;
 
                 fail_point!("image-layer-writer-fail-before-finish", |_| {
@@ -3116,17 +3203,22 @@ impl Timeline {
         let all_paths = image_layers
             .iter()
             .map(|layer| layer.path())
-            .chain(std::iter::once(
-                self.conf.timeline_path(&self.timeline_id, &self.tenant_id),
-            ))
             .collect::<Vec<_>>();
-        par_fsync::par_fsync(&all_paths).context("fsync of newly created layer files")?;
+
+        par_fsync::par_fsync_async(&all_paths)
+            .await
+            .context("fsync of newly created layer files")?;
+
+        par_fsync::par_fsync_async(&[self.conf.timeline_path(&self.timeline_id, &self.tenant_id)])
+            .await
+            .context("fsync of timeline dir")?;
 
         let mut layer_paths_to_upload = HashMap::with_capacity(image_layers.len());
 
         let mut layers = self.layers.write().await;
         let mut updates = layers.batch_update();
         let timeline_path = self.conf.timeline_path(&self.timeline_id, &self.tenant_id);
+
         for l in image_layers {
             let path = l.filename();
             let metadata = timeline_path
@@ -3185,13 +3277,13 @@ impl Timeline {
     /// This method takes the `_layer_removal_cs` guard to highlight it required downloads are
     /// returned as an error. If the `layer_removal_cs` boundary is changed not to be taken in the
     /// start of level0 files compaction, the on-demand download should be revisited as well.
-    async fn compact_level0_phase1(
+    fn compact_level0_phase1(
         &self,
-        _layer_removal_cs: &tokio::sync::MutexGuard<'_, ()>,
+        _layer_removal_cs: Arc<tokio::sync::OwnedMutexGuard<()>>,
+        layers: tokio::sync::OwnedRwLockReadGuard<LayerMap<dyn PersistentLayer>>,
         target_file_size: u64,
         ctx: &RequestContext,
     ) -> Result<CompactLevel0Phase1Result, CompactionError> {
-        let layers = self.layers.read().await;
         let mut level0_deltas = layers.get_level0_deltas()?;
 
         // Only compact if enough layers have accumulated.
@@ -3498,13 +3590,13 @@ impl Timeline {
         if !new_layers.is_empty() {
             let mut layer_paths: Vec<PathBuf> = new_layers.iter().map(|l| l.path()).collect();
 
-            // also sync the directory
-            layer_paths.push(self.conf.timeline_path(&self.timeline_id, &self.tenant_id));
-
             // Fsync all the layer files and directory using multiple threads to
             // minimize latency.
             par_fsync::par_fsync(&layer_paths).context("fsync all new layers")?;
 
+            par_fsync::par_fsync(&[self.conf.timeline_path(&self.timeline_id, &self.tenant_id)])
+                .context("fsync of timeline dir")?;
+
             layer_paths.pop().unwrap();
         }
 
@@ -3521,17 +3613,27 @@ impl Timeline {
     /// as Level 1 files.
     ///
     async fn compact_level0(
-        &self,
-        layer_removal_cs: &tokio::sync::MutexGuard<'_, ()>,
+        self: &Arc<Self>,
+        layer_removal_cs: Arc<tokio::sync::OwnedMutexGuard<()>>,
         target_file_size: u64,
         ctx: &RequestContext,
     ) -> Result<(), CompactionError> {
+        let this = self.clone();
+        let ctx_inner = ctx.clone();
+        let layer_removal_cs_inner = layer_removal_cs.clone();
+        let layers = Arc::clone(&self.layers).read_owned().await;
+        let span = tracing::info_span!("blocking");
         let CompactLevel0Phase1Result {
             new_layers,
             deltas_to_compact,
-        } = self
-            .compact_level0_phase1(layer_removal_cs, target_file_size, ctx)
-            .await?;
+        } = tokio::task::spawn_blocking(move || {
+            let _g = span.entered();
+            this.compact_level0_phase1(layer_removal_cs_inner, layers, target_file_size, &ctx_inner)
+        })
+        .await
+        .context("compact_level0_phase1 spawn_blocking")
+        .map_err(CompactionError::Other)
+        .and_then(|res| res)?;
 
         if new_layers.is_empty() && deltas_to_compact.is_empty() {
             // nothing to do
@@ -3589,7 +3691,7 @@ impl Timeline {
         let mut layer_names_to_delete = Vec::with_capacity(deltas_to_compact.len());
         for l in deltas_to_compact {
             layer_names_to_delete.push(l.filename());
-            self.delete_historic_layer(layer_removal_cs, l, &mut updates)?;
+            self.delete_historic_layer(layer_removal_cs.clone(), l, &mut updates)?;
         }
         updates.flush();
         drop(layers);
@@ -3709,10 +3811,11 @@ impl Timeline {
 
         fail_point!("before-timeline-gc");
 
-        let layer_removal_cs = self.layer_removal_cs.lock().await;
+        let layer_removal_cs = Arc::new(self.layer_removal_cs.clone().lock_owned().await);
         // Is the timeline being deleted?
         let state = *self.state.borrow();
         if state == TimelineState::Stopping {
+            // there's a global allowed_error for this
             anyhow::bail!("timeline is Stopping");
         }
 
@@ -3729,7 +3832,7 @@ impl Timeline {
 
         let res = self
             .gc_timeline(
-                &layer_removal_cs,
+                layer_removal_cs.clone(),
                 horizon_cutoff,
                 pitr_cutoff,
                 retain_lsns,
@@ -3748,7 +3851,7 @@ impl Timeline {
 
     async fn gc_timeline(
         &self,
-        layer_removal_cs: &tokio::sync::MutexGuard<'_, ()>,
+        layer_removal_cs: Arc<tokio::sync::OwnedMutexGuard<()>>,
         horizon_cutoff: Lsn,
         pitr_cutoff: Lsn,
         retain_lsns: Vec<Lsn>,
@@ -3888,7 +3991,7 @@ impl Timeline {
                 // delta layers. Image layers can form "stairs" preventing old image from been deleted.
                 // But image layers are in any case less sparse than delta layers. Also we need some
                 // protection from replacing recent image layers with new one after each GC iteration.
-                if l.is_incremental() && !LayerMap::is_l0(&*l) {
+                if self.get_gc_feedback() && l.is_incremental() && !LayerMap::is_l0(&*l) {
                     wanted_image_layers.add_range(l.get_key_range());
                 }
                 result.layers_not_updated += 1;
@@ -3921,7 +4024,11 @@ impl Timeline {
             {
                 for doomed_layer in layers_to_remove {
                     layer_names_to_delete.push(doomed_layer.filename());
-                    self.delete_historic_layer(layer_removal_cs, doomed_layer, &mut updates)?; // FIXME: schedule succeeded deletions before returning?
+                    self.delete_historic_layer(
+                        layer_removal_cs.clone(),
+                        doomed_layer,
+                        &mut updates,
+                    )?; // FIXME: schedule succeeded deletions before returning?
                     result.layers_removed += 1;
                 }
             }
@@ -4093,7 +4200,7 @@ impl Timeline {
                 // Does retries + exponential back-off internally.
                 // When this fails, don't layer further retry attempts here.
                 let result = remote_client
-                    .download_layer_file(&remote_layer.file_name, &remote_layer.layer_metadata)
+                    .download_layer_file(&remote_layer.filename(), &remote_layer.layer_metadata)
                     .await;
 
                 if let Ok(size) = &result {
diff --git a/pageserver/src/tenant/timeline/eviction_task.rs b/pageserver/src/tenant/timeline/eviction_task.rs
index a7f24c52ed..80c5210211 100644
--- a/pageserver/src/tenant/timeline/eviction_task.rs
+++ b/pageserver/src/tenant/timeline/eviction_task.rs
@@ -34,6 +34,8 @@ use crate::{
     },
 };
 
+use utils::completion;
+
 use super::Timeline;
 
 #[derive(Default)]
@@ -47,8 +49,12 @@ pub struct EvictionTaskTenantState {
 }
 
 impl Timeline {
-    pub(super) fn launch_eviction_task(self: &Arc<Self>) {
+    pub(super) fn launch_eviction_task(
+        self: &Arc<Self>,
+        background_tasks_can_start: Option<&completion::Barrier>,
+    ) {
         let self_clone = Arc::clone(self);
+        let background_tasks_can_start = background_tasks_can_start.cloned();
         task_mgr::spawn(
             BACKGROUND_RUNTIME.handle(),
             TaskKind::Eviction,
@@ -57,7 +63,13 @@ impl Timeline {
             &format!("layer eviction for {}/{}", self.tenant_id, self.timeline_id),
             false,
             async move {
-                self_clone.eviction_task(task_mgr::shutdown_token()).await;
+                let cancel = task_mgr::shutdown_token();
+                tokio::select! {
+                    _ = cancel.cancelled() => { return Ok(()); }
+                    _ = completion::Barrier::maybe_wait(background_tasks_can_start) => {}
+                };
+
+                self_clone.eviction_task(cancel).await;
                 info!("eviction task finishing");
                 Ok(())
             },
diff --git a/pageserver/src/tenant/timeline/walreceiver.rs b/pageserver/src/tenant/timeline/walreceiver.rs
index 7ebf3cf172..ccff735c3c 100644
--- a/pageserver/src/tenant/timeline/walreceiver.rs
+++ b/pageserver/src/tenant/timeline/walreceiver.rs
@@ -25,6 +25,7 @@ mod walreceiver_connection;
 
 use crate::context::{DownloadBehavior, RequestContext};
 use crate::task_mgr::{self, TaskKind, WALRECEIVER_RUNTIME};
+use crate::tenant::debug_assert_current_span_has_tenant_and_timeline_id;
 use crate::tenant::timeline::walreceiver::connection_manager::{
     connection_manager_loop_step, ConnectionManagerState,
 };
@@ -85,7 +86,8 @@ impl WalReceiver {
             &format!("walreceiver for timeline {tenant_id}/{timeline_id}"),
             false,
             async move {
-                info!("WAL receiver manager started, connecting to broker");
+                debug_assert_current_span_has_tenant_and_timeline_id();
+                debug!("WAL receiver manager started, connecting to broker");
                 let mut connection_manager_state = ConnectionManagerState::new(
                     timeline,
                     conf,
@@ -93,7 +95,7 @@ impl WalReceiver {
                 loop {
                     select! {
                         _ = task_mgr::shutdown_watcher() => {
-                            info!("WAL receiver shutdown requested, shutting down");
+                            trace!("WAL receiver shutdown requested, shutting down");
                             break;
                         },
                         loop_step_result = connection_manager_loop_step(
@@ -104,7 +106,7 @@ impl WalReceiver {
                         ) => match loop_step_result {
                             ControlFlow::Continue(()) => continue,
                             ControlFlow::Break(()) => {
-                                info!("Connection manager loop ended, shutting down");
+                                trace!("Connection manager loop ended, shutting down");
                                 break;
                             }
                         },
@@ -115,7 +117,7 @@ impl WalReceiver {
                 *loop_status.write().unwrap() = None;
                 Ok(())
             }
-            .instrument(info_span!(parent: None, "wal_connection_manager", tenant = %tenant_id, timeline = %timeline_id))
+            .instrument(info_span!(parent: None, "wal_connection_manager", tenant_id = %tenant_id, timeline_id = %timeline_id))
         );
 
         Self {
@@ -198,29 +200,19 @@ impl<E: Clone> TaskHandle<E> {
                 TaskEvent::End(match self.join_handle.as_mut() {
                     Some(jh) => {
                         if !jh.is_finished() {
-                            // Barring any implementation errors in this module, we can
-                            // only arrive here while the task that executes the future
-                            // passed to `Self::spawn()` is still execution. Cf the comment
-                            // in Self::spawn().
-                            //
-                            // This was logging at warning level in earlier versions, presumably
-                            // to leave some breadcrumbs in case we had an implementation
-                            // error that would would make us get stuck in `jh.await`.
-                            //
-                            // There hasn't been such a bug so far.
-                            // But in a busy system, e.g., during pageserver restart,
-                            // we arrive here often enough that the warning-level logs
-                            // became a distraction.
-                            // So, tone them down to info-level.
-                            //
-                            // XXX: rewrite this module to eliminate the race condition.
-                            info!("sender is dropped while join handle is still alive");
+                            // See: https://github.com/neondatabase/neon/issues/2885
+                            trace!("sender is dropped while join handle is still alive");
                         }
 
-                        let res = jh
-                            .await
-                            .map_err(|e| anyhow::anyhow!("Failed to join task: {e}"))
-                            .and_then(|x| x);
+                        let res = match jh.await {
+                            Ok(res) => res,
+                            Err(je) if je.is_cancelled() => unreachable!("not used"),
+                            Err(je) if je.is_panic() => {
+                                // already logged
+                                Ok(())
+                            }
+                            Err(je) => Err(anyhow::Error::new(je).context("join walreceiver task")),
+                        };
 
                         // For cancellation-safety, drop join_handle only after successful .await.
                         self.join_handle = None;
@@ -243,12 +235,12 @@ impl<E: Clone> TaskHandle<E> {
             match jh.await {
                 Ok(Ok(())) => debug!("Shutdown success"),
                 Ok(Err(e)) => error!("Shutdown task error: {e:?}"),
-                Err(join_error) => {
-                    if join_error.is_cancelled() {
-                        error!("Shutdown task was cancelled");
-                    } else {
-                        error!("Shutdown task join error: {join_error}")
-                    }
+                Err(je) if je.is_cancelled() => unreachable!("not used"),
+                Err(je) if je.is_panic() => {
+                    // already logged
+                }
+                Err(je) => {
+                    error!("Shutdown task join error: {je}")
                 }
             }
         }
diff --git a/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs b/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs
index c25eea1b70..dd2bd200ac 100644
--- a/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs
+++ b/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs
@@ -18,7 +18,7 @@ use crate::metrics::{
     WALRECEIVER_CANDIDATES_REMOVED, WALRECEIVER_SWITCHES,
 };
 use crate::task_mgr::TaskKind;
-use crate::tenant::Timeline;
+use crate::tenant::{debug_assert_current_span_has_tenant_and_timeline_id, Timeline};
 use anyhow::Context;
 use chrono::{NaiveDateTime, Utc};
 use pageserver_api::models::TimelineState;
@@ -55,8 +55,11 @@ pub(super) async fn connection_manager_loop_step(
         .await
     {
         Ok(()) => {}
-        Err(_) => {
-            info!("Timeline dropped state updates sender before becoming active, stopping wal connection manager loop");
+        Err(new_state) => {
+            debug!(
+                ?new_state,
+                "state changed, stopping wal connection manager loop"
+            );
             return ControlFlow::Break(());
         }
     }
@@ -79,7 +82,7 @@ pub(super) async fn connection_manager_loop_step(
     // with other streams on this client (other connection managers). When
     // object goes out of scope, stream finishes in drop() automatically.
     let mut broker_subscription = subscribe_for_timeline_updates(broker_client, id).await;
-    info!("Subscribed for broker timeline updates");
+    debug!("Subscribed for broker timeline updates");
 
     loop {
         let time_until_next_retry = connection_manager_state.time_until_next_retry();
@@ -151,12 +154,12 @@ pub(super) async fn connection_manager_loop_step(
                                 // we're already active as walreceiver, no need to reactivate
                                 TimelineState::Active => continue,
                                 TimelineState::Broken | TimelineState::Stopping => {
-                                    info!("timeline entered terminal state {new_state:?}, stopping wal connection manager loop");
+                                    debug!("timeline entered terminal state {new_state:?}, stopping wal connection manager loop");
                                     return ControlFlow::Break(());
                                 }
                                 TimelineState::Loading => {
                                     warn!("timeline transitioned back to Loading state, that should not happen");
-                                    return ControlFlow::Continue(new_state);
+                                    return ControlFlow::Continue(());
                                 }
                             }
                         }
@@ -164,12 +167,11 @@ pub(super) async fn connection_manager_loop_step(
                     }
                 }
             } => match new_event {
-                ControlFlow::Continue(new_state) => {
-                    info!("observed timeline state change, new state is {new_state:?}");
+                ControlFlow::Continue(()) => {
                     return ControlFlow::Continue(());
                 }
                 ControlFlow::Break(()) => {
-                    info!("Timeline dropped state updates sender, stopping wal connection manager loop");
+                    debug!("Timeline is no longer active, stopping wal connection manager loop");
                     return ControlFlow::Break(());
                 }
             },
@@ -390,7 +392,6 @@ impl ConnectionManagerState {
 
         self.drop_old_connection(true).await;
 
-        let id = self.id;
         let node_id = new_sk.safekeeper_id;
         let connect_timeout = self.conf.wal_connect_timeout;
         let timeline = Arc::clone(&self.timeline);
@@ -398,9 +399,13 @@ impl ConnectionManagerState {
             TaskKind::WalReceiverConnectionHandler,
             DownloadBehavior::Download,
         );
+
+        let span = info_span!("connection", %node_id);
         let connection_handle = TaskHandle::spawn(move |events_sender, cancellation| {
             async move {
-                super::walreceiver_connection::handle_walreceiver_connection(
+                debug_assert_current_span_has_tenant_and_timeline_id();
+
+                let res = super::walreceiver_connection::handle_walreceiver_connection(
                     timeline,
                     new_sk.wal_source_connconf,
                     events_sender,
@@ -409,12 +414,23 @@ impl ConnectionManagerState {
                     ctx,
                     node_id,
                 )
-                .await
-                .context("walreceiver connection handling failure")
+                .await;
+
+                match res {
+                    Ok(()) => Ok(()),
+                    Err(e) => {
+                        use super::walreceiver_connection::ExpectedError;
+                        if e.is_expected() {
+                            info!("walreceiver connection handling ended: {e:#}");
+                            Ok(())
+                        } else {
+                            // give out an error to have task_mgr give it a really verbose logging
+                            Err(e).context("walreceiver connection handling failure")
+                        }
+                    }
+                }
             }
-            .instrument(
-                info_span!("walreceiver_connection", tenant_id = %id.tenant_id, timeline_id = %id.timeline_id, %node_id),
-            )
+            .instrument(span)
         });
 
         let now = Utc::now().naive_utc();
diff --git a/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs b/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
index 91ff60603a..1c1fe87305 100644
--- a/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
+++ b/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
@@ -21,16 +21,16 @@ use postgres_types::PgLsn;
 use tokio::{select, sync::watch, time};
 use tokio_postgres::{replication::ReplicationStream, Client};
 use tokio_util::sync::CancellationToken;
-use tracing::{debug, error, info, trace, warn};
+use tracing::{debug, error, info, trace, warn, Instrument};
 
 use super::TaskStateUpdate;
-use crate::metrics::LIVE_CONNECTIONS_COUNT;
-use crate::{context::RequestContext, metrics::WALRECEIVER_STARTED_CONNECTIONS};
 use crate::{
+    context::RequestContext,
+    metrics::{LIVE_CONNECTIONS_COUNT, WALRECEIVER_STARTED_CONNECTIONS},
     task_mgr,
     task_mgr::TaskKind,
     task_mgr::WALRECEIVER_RUNTIME,
-    tenant::{Timeline, WalReceiverInfo},
+    tenant::{debug_assert_current_span_has_tenant_and_timeline_id, Timeline, WalReceiverInfo},
     walingest::WalIngest,
     walrecord::DecodedWALRecord,
 };
@@ -81,13 +81,8 @@ pub(super) async fn handle_walreceiver_connection(
         config.application_name("pageserver");
         config.replication_mode(tokio_postgres::config::ReplicationMode::Physical);
         match time::timeout(connect_timeout, config.connect(postgres::NoTls)).await {
-            Ok(Ok(client_and_conn)) => client_and_conn,
-            Ok(Err(conn_err)) => {
-                let expected_error = ignore_expected_errors(conn_err)?;
-                info!("DB connection stream finished: {expected_error}");
-                return Ok(());
-            }
-            Err(_) => {
+            Ok(client_and_conn) => client_and_conn?,
+            Err(_elapsed) => {
                 // Timing out to connect to a safekeeper node could happen long time, due to
                 // many reasons that pageserver cannot control.
                 // Do not produce an error, but make it visible, that timeouts happen by logging the `event.
@@ -97,7 +92,7 @@ pub(super) async fn handle_walreceiver_connection(
         }
     };
 
-    info!("connected!");
+    debug!("connected!");
     let mut connection_status = WalConnectionStatus {
         is_connected: true,
         has_processed_wal: false,
@@ -127,20 +122,25 @@ pub(super) async fn handle_walreceiver_connection(
         "walreceiver connection",
         false,
         async move {
+            debug_assert_current_span_has_tenant_and_timeline_id();
+
             select! {
                 connection_result = connection => match connection_result {
-                    Ok(()) => info!("Walreceiver db connection closed"),
+                    Ok(()) => debug!("Walreceiver db connection closed"),
                     Err(connection_error) => {
-                        if let Err(e) = ignore_expected_errors(connection_error) {
-                            warn!("Connection aborted: {e:#}")
+                        if connection_error.is_expected() {
+                            // silence, because most likely we've already exited the outer call
+                            // with a similar error.
+                        } else {
+                            warn!("Connection aborted: {connection_error:#}")
                         }
                     }
                 },
-                // Future: replace connection_cancellation with connection_ctx cancellation
-                _ = connection_cancellation.cancelled() => info!("Connection cancelled"),
+                _ = connection_cancellation.cancelled() => debug!("Connection cancelled"),
             }
             Ok(())
-        },
+        }
+        .instrument(tracing::info_span!("poller")),
     );
 
     // Immediately increment the gauge, then create a job to decrement it on task exit.
@@ -203,20 +203,13 @@ pub(super) async fn handle_walreceiver_connection(
     while let Some(replication_message) = {
         select! {
             _ = cancellation.cancelled() => {
-                info!("walreceiver interrupted");
+                debug!("walreceiver interrupted");
                 None
             }
             replication_message = physical_stream.next() => replication_message,
         }
     } {
-        let replication_message = match replication_message {
-            Ok(message) => message,
-            Err(replication_error) => {
-                let expected_error = ignore_expected_errors(replication_error)?;
-                info!("Replication stream finished: {expected_error}");
-                return Ok(());
-            }
-        };
+        let replication_message = replication_message?;
 
         let now = Utc::now().naive_utc();
         let last_rec_lsn_before_msg = last_rec_lsn;
@@ -261,8 +254,6 @@ pub(super) async fn handle_walreceiver_connection(
                     let mut decoded = DecodedWALRecord::default();
                     let mut modification = timeline.begin_modification(endlsn);
                     while let Some((lsn, recdata)) = waldecoder.poll_decode()? {
-                        // let _enter = info_span!("processing record", lsn = %lsn).entered();
-
                         // It is important to deal with the aligned records as lsn in getPage@LSN is
                         // aligned and can be several bytes bigger. Without this alignment we are
                         // at risk of hitting a deadlock.
@@ -424,31 +415,50 @@ async fn identify_system(client: &mut Client) -> anyhow::Result<IdentifySystem>
     }
 }
 
-/// We don't want to report connectivity problems as real errors towards connection manager because
-/// 1. they happen frequently enough to make server logs hard to read and
-/// 2. the connection manager can retry other safekeeper.
-///
-/// If this function returns `Ok(pg_error)`, it's such an error.
-/// The caller should log it at info level and then report to connection manager that we're done handling this connection.
-/// Connection manager will then handle reconnections.
-///
-/// If this function returns an `Err()`, the caller can bubble it up using `?`.
-/// The connection manager will log the error at ERROR level.
-fn ignore_expected_errors(pg_error: postgres::Error) -> anyhow::Result<postgres::Error> {
-    if pg_error.is_closed()
-        || pg_error
-            .source()
-            .and_then(|source| source.downcast_ref::<std::io::Error>())
-            .map(is_expected_io_error)
-            .unwrap_or(false)
-    {
-        return Ok(pg_error);
-    } else if let Some(db_error) = pg_error.as_db_error() {
-        if db_error.code() == &SqlState::SUCCESSFUL_COMPLETION
-            && db_error.message().contains("ending streaming")
-        {
-            return Ok(pg_error);
-        }
-    }
-    Err(pg_error).context("connection error")
+/// Trait for avoid reporting walreceiver specific expected or "normal" or "ok" errors.
+pub(super) trait ExpectedError {
+    /// Test if this error is an ok error.
+    ///
+    /// We don't want to report connectivity problems as real errors towards connection manager because
+    /// 1. they happen frequently enough to make server logs hard to read and
+    /// 2. the connection manager can retry other safekeeper.
+    ///
+    /// If this function returns `true`, it's such an error.
+    /// The caller should log it at info level and then report to connection manager that we're done handling this connection.
+    /// Connection manager will then handle reconnections.
+    ///
+    /// If this function returns an `false` the error should be propagated and the connection manager
+    /// will log the error at ERROR level.
+    fn is_expected(&self) -> bool;
+}
+
+impl ExpectedError for postgres::Error {
+    fn is_expected(&self) -> bool {
+        self.is_closed()
+            || self
+                .source()
+                .and_then(|source| source.downcast_ref::<std::io::Error>())
+                .map(is_expected_io_error)
+                .unwrap_or(false)
+            || self
+                .as_db_error()
+                .filter(|db_error| {
+                    db_error.code() == &SqlState::SUCCESSFUL_COMPLETION
+                        && db_error.message().contains("ending streaming")
+                })
+                .is_some()
+    }
+}
+
+impl ExpectedError for anyhow::Error {
+    fn is_expected(&self) -> bool {
+        let head = self.downcast_ref::<postgres::Error>();
+
+        let tail = self
+            .chain()
+            .filter_map(|e| e.downcast_ref::<postgres::Error>());
+
+        // check if self or any of the chained/sourced errors are expected
+        head.into_iter().chain(tail).any(|e| e.is_expected())
+    }
 }
diff --git a/pgxn/hnsw/Makefile b/pgxn/hnsw/Makefile
new file mode 100644
index 0000000000..66436b5920
--- /dev/null
+++ b/pgxn/hnsw/Makefile
@@ -0,0 +1,26 @@
+EXTENSION = hnsw
+EXTVERSION = 0.1.0
+
+MODULE_big = hnsw
+DATA = $(wildcard *--*.sql)
+OBJS = hnsw.o hnswalg.o
+
+TESTS = $(wildcard test/sql/*.sql)
+REGRESS = $(patsubst test/sql/%.sql,%,$(TESTS))
+REGRESS_OPTS = --inputdir=test --load-extension=hnsw
+
+# For auto-vectorization:
+# - GCC (needs -ftree-vectorize OR -O3) - https://gcc.gnu.org/projects/tree-ssa/vectorization.html
+PG_CFLAGS += -O3
+PG_CXXFLAGS +=  -O3 -std=c++11
+PG_LDFLAGS += -lstdc++
+
+all: $(EXTENSION)--$(EXTVERSION).sql
+
+PG_CONFIG ?= pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+
+dist:
+	mkdir -p dist
+	git archive --format zip --prefix=$(EXTENSION)-$(EXTVERSION)/ --output dist/$(EXTENSION)-$(EXTVERSION).zip master
diff --git a/pgxn/hnsw/README.md b/pgxn/hnsw/README.md
new file mode 100644
index 0000000000..bc9c8d571c
--- /dev/null
+++ b/pgxn/hnsw/README.md
@@ -0,0 +1,25 @@
+# Revisiting the Inverted Indices for Billion-Scale Approximate Nearest Neighbors
+
+This ANN extension of Postgres is based
+on [ivf-hnsw](https://github.com/dbaranchuk/ivf-hnsw.git) implementation of [HNSW](https://www.pinecone.io/learn/hnsw),
+the code for the current state-of-the-art billion-scale nearest neighbor search system presented in the paper:
+
+[Revisiting the Inverted Indices for Billion-Scale Approximate Nearest Neighbors](http://openaccess.thecvf.com/content_ECCV_2018/html/Dmitry_Baranchuk_Revisiting_the_Inverted_ECCV_2018_paper.html),
+<br>
+Dmitry Baranchuk, Artem Babenko, Yury Malkov
+
+# Postgres extension
+
+HNSW index is hold in memory (built on demand) and it's maxial size is limited
+by `maxelements` index parameter. Another required parameter is nubmer of dimensions (if it is not specified in column type).
+Optional parameter `ef` specifies number of neighbors which are considered during index construction and search (corresponds `efConstruction` and `efSearch` parameters
+described in the article).
+
+# Example of usage:
+
+```
+create extension hnsw;
+create table embeddings(id integer primary key, payload real[]);
+create index on embeddings using hnsw(payload) with (maxelements=1000000, dims=100, m=32);
+select id from embeddings order by payload <-> array[1.0, 2.0,...] limit 100;
+```
\ No newline at end of file
diff --git a/pgxn/hnsw/hnsw--0.1.0.sql b/pgxn/hnsw/hnsw--0.1.0.sql
new file mode 100644
index 0000000000..ebf424326d
--- /dev/null
+++ b/pgxn/hnsw/hnsw--0.1.0.sql
@@ -0,0 +1,29 @@
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "CREATE EXTENSION hnsw" to load this file. \quit
+
+-- functions
+
+CREATE FUNCTION l2_distance(real[], real[]) RETURNS real
+	AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
+
+-- operators
+
+CREATE OPERATOR <-> (
+	LEFTARG = real[], RIGHTARG = real[], PROCEDURE = l2_distance,
+	COMMUTATOR = '<->'
+);
+
+-- access method
+
+CREATE FUNCTION hnsw_handler(internal) RETURNS index_am_handler
+	AS 'MODULE_PATHNAME' LANGUAGE C;
+
+CREATE ACCESS METHOD hnsw TYPE INDEX HANDLER hnsw_handler;
+
+COMMENT ON ACCESS METHOD hnsw IS 'hnsw index access method';
+
+-- opclasses
+
+CREATE OPERATOR CLASS knn_ops
+	DEFAULT FOR TYPE real[] USING hnsw AS
+	OPERATOR 1 <-> (real[], real[]) FOR ORDER BY float_ops;
diff --git a/pgxn/hnsw/hnsw.c b/pgxn/hnsw/hnsw.c
new file mode 100644
index 0000000000..434f4986f8
--- /dev/null
+++ b/pgxn/hnsw/hnsw.c
@@ -0,0 +1,551 @@
+#include "postgres.h"
+
+#include "access/amapi.h"
+#include "access/generic_xlog.h"
+#include "access/relation.h"
+#include "access/reloptions.h"
+#include "access/tableam.h"
+#include "catalog/index.h"
+#include "commands/vacuum.h"
+#include "nodes/execnodes.h"
+#include "storage/bufmgr.h"
+#include "utils/guc.h"
+#include "utils/selfuncs.h"
+
+#include <math.h>
+#include <float.h>
+
+#include "hnsw.h"
+
+PG_MODULE_MAGIC;
+
+typedef struct {
+	int32 vl_len_;		/* varlena header (do not touch directly!) */
+	int dims;
+	int maxelements;
+	int efConstruction;
+	int efSearch;
+	int M;
+} HnswOptions;
+
+static relopt_kind hnsw_relopt_kind;
+
+typedef struct {
+	HierarchicalNSW* hnsw;
+	size_t curr;
+	size_t n_results;
+	ItemPointer results;
+} HnswScanOpaqueData;
+
+typedef HnswScanOpaqueData* HnswScanOpaque;
+
+typedef struct {
+	Oid relid;
+	uint32 status;
+	HierarchicalNSW* hnsw;
+} HnswHashEntry;
+
+
+#define SH_PREFIX			 hnsw_index
+#define SH_ELEMENT_TYPE		 HnswHashEntry
+#define SH_KEY_TYPE			 Oid
+#define SH_KEY				 relid
+#define SH_STORE_HASH
+#define SH_GET_HASH(tb, a)	 ((a)->relid)
+#define SH_HASH_KEY(tb, key) (key)
+#define SH_EQUAL(tb, a, b)	((a) == (b))
+#define SH_SCOPE			static inline
+#define SH_DEFINE
+#define SH_DECLARE
+#include "lib/simplehash.h"
+
+#define INDEX_HASH_SIZE     11
+
+#define DEFAULT_EF_SEARCH   64
+
+PGDLLEXPORT void _PG_init(void);
+
+static hnsw_index_hash *hnsw_indexes;
+
+/*
+ * Initialize index options and variables
+ */
+void
+_PG_init(void)
+{
+	hnsw_relopt_kind = add_reloption_kind();
+	add_int_reloption(hnsw_relopt_kind, "dims", "Number of dimensions",
+					  0, 0, INT_MAX, AccessExclusiveLock);
+	add_int_reloption(hnsw_relopt_kind, "maxelements", "Maximal number of elements",
+					  0, 0, INT_MAX, AccessExclusiveLock);
+	add_int_reloption(hnsw_relopt_kind, "m", "Number of neighbors of each vertex",
+					  100, 0, INT_MAX, AccessExclusiveLock);
+	add_int_reloption(hnsw_relopt_kind, "efconstruction", "Number of inspected neighbors during index construction",
+					  16, 1, INT_MAX, AccessExclusiveLock);
+	add_int_reloption(hnsw_relopt_kind, "efsearch", "Number of inspected neighbors during index search",
+					  64, 1, INT_MAX, AccessExclusiveLock);
+	hnsw_indexes = hnsw_index_create(TopMemoryContext, INDEX_HASH_SIZE, NULL);
+}
+
+
+static void
+hnsw_build_callback(Relation index, ItemPointer tid, Datum *values,
+					bool *isnull, bool tupleIsAlive, void *state)
+{
+	HierarchicalNSW* hnsw = (HierarchicalNSW*) state;
+	ArrayType* array;
+	int n_items;
+	label_t label = 0;
+
+	/* Skip nulls */
+	if (isnull[0])
+		return;
+
+	array = DatumGetArrayTypeP(values[0]);
+	n_items = ArrayGetNItems(ARR_NDIM(array), ARR_DIMS(array));
+	if (n_items != hnsw_dimensions(hnsw))
+	{
+		elog(ERROR, "Wrong number of dimensions: %d instead of %d expected",
+			 n_items, hnsw_dimensions(hnsw));
+	}
+
+	memcpy(&label, tid, sizeof(*tid));
+	hnsw_add_point(hnsw, (coord_t*)ARR_DATA_PTR(array), label);
+}
+
+static void
+hnsw_populate(HierarchicalNSW* hnsw, Relation indexRel, Relation heapRel)
+{
+	IndexInfo* indexInfo = BuildIndexInfo(indexRel);
+	Assert(indexInfo->ii_NumIndexAttrs == 1);
+	table_index_build_scan(heapRel, indexRel, indexInfo,
+						   true, true, hnsw_build_callback, (void *) hnsw, NULL);
+}
+
+static HierarchicalNSW*
+hnsw_get_index(Relation indexRel, Relation heapRel)
+{
+	HierarchicalNSW* hnsw;
+	Oid indexoid = RelationGetRelid(indexRel);
+	HnswHashEntry* entry = hnsw_index_lookup(hnsw_indexes, indexoid);
+	if (entry == NULL)
+	{
+		size_t dims, maxelements;
+		size_t M;
+		size_t maxM;
+		size_t size_links_level0;
+		size_t size_data_per_element;
+		size_t data_size;
+		dsm_handle handle = indexoid << 1; /* make it even */
+		void* impl_private = NULL;
+		void* mapped_address = NULL;
+		Size  mapped_size = 0;
+		Size  shmem_size;
+		bool exists = true;
+		bool found;
+		HnswOptions *opts = (HnswOptions *) indexRel->rd_options;
+		if (opts == NULL || opts->maxelements == 0 || opts->dims == 0) {
+			elog(ERROR, "HNSW index requires 'maxelements' and 'dims' to be specified");
+		}
+		dims = opts->dims;
+		maxelements = opts->maxelements;
+		M = opts->M;
+		maxM = M * 2;
+		data_size = dims * sizeof(coord_t);
+		size_links_level0 = (maxM + 1) * sizeof(idx_t);
+		size_data_per_element = size_links_level0 + data_size + sizeof(label_t);
+		shmem_size =  hnsw_sizeof() + maxelements * size_data_per_element;
+
+		/* first try to attach to existed index */
+		if (!dsm_impl_op(DSM_OP_ATTACH, handle, 0, &impl_private,
+						 &mapped_address, &mapped_size, DEBUG1))
+		{
+			/* index doesn't exists: try to create it */
+			if (!dsm_impl_op(DSM_OP_CREATE, handle, shmem_size, &impl_private,
+							 &mapped_address, &mapped_size, DEBUG1))
+			{
+				/* We can do it under shared lock, so some other backend may
+				 * try to initialize index. If create is failed because index already
+				 * created by somebody else, then try to attach to it once again
+				 */
+				if (!dsm_impl_op(DSM_OP_ATTACH, handle, 0, &impl_private,
+								 &mapped_address, &mapped_size, ERROR))
+				{
+					return NULL;
+				}
+			}
+			else
+			{
+				exists = false;
+			}
+		}
+		Assert(mapped_size == shmem_size);
+		hnsw = (HierarchicalNSW*)mapped_address;
+
+		if (!exists)
+		{
+			hnsw_init(hnsw, dims, maxelements, M, maxM, opts->efConstruction);
+			hnsw_populate(hnsw, indexRel, heapRel);
+		}
+		entry = hnsw_index_insert(hnsw_indexes, indexoid, &found);
+		Assert(!found);
+		entry->hnsw = hnsw;
+	}
+	else
+	{
+		hnsw = entry->hnsw;
+	}
+	return hnsw;
+}
+
+/*
+ * Start or restart an index scan
+ */
+static IndexScanDesc
+hnsw_beginscan(Relation index, int nkeys, int norderbys)
+{
+	IndexScanDesc scan = RelationGetIndexScan(index, nkeys, norderbys);
+	HnswScanOpaque so = (HnswScanOpaque) palloc(sizeof(HnswScanOpaqueData));
+	Relation heap = relation_open(index->rd_index->indrelid, NoLock);
+	so->hnsw = hnsw_get_index(index, heap);
+	relation_close(heap, NoLock);
+	so->curr = 0;
+	so->n_results = 0;
+	so->results = NULL;
+	scan->opaque = so;
+	return scan;
+}
+
+/*
+ * Start or restart an index scan
+ */
+static void
+hnsw_rescan(IndexScanDesc scan, ScanKey keys, int nkeys, ScanKey orderbys, int norderbys)
+{
+	HnswScanOpaque so = (HnswScanOpaque) scan->opaque;
+	if (so->results)
+	{
+		pfree(so->results);
+		so->results = NULL;
+	}
+	so->curr = 0;
+	if (orderbys && scan->numberOfOrderBys > 0)
+		memmove(scan->orderByData, orderbys, scan->numberOfOrderBys * sizeof(ScanKeyData));
+}
+
+/*
+ * Fetch the next tuple in the given scan
+ */
+static bool
+hnsw_gettuple(IndexScanDesc scan, ScanDirection dir)
+{
+	HnswScanOpaque so = (HnswScanOpaque) scan->opaque;
+
+	/*
+	 * Index can be used to scan backward, but Postgres doesn't support
+	 * backward scan on operators
+	 */
+	Assert(ScanDirectionIsForward(dir));
+
+	if (so->curr == 0)
+	{
+		Datum		value;
+		ArrayType*	array;
+		int         n_items;
+		size_t      n_results;
+		label_t*    results;
+		HnswOptions *opts = (HnswOptions *) scan->indexRelation->rd_options;
+		size_t      efSearch = opts ? opts->efSearch : DEFAULT_EF_SEARCH;
+
+		/* Safety check */
+		if (scan->orderByData == NULL)
+			elog(ERROR, "cannot scan HNSW index without order");
+
+		/* No items will match if null */
+		if (scan->orderByData->sk_flags & SK_ISNULL)
+			return false;
+
+		value = scan->orderByData->sk_argument;
+		array = DatumGetArrayTypeP(value);
+		n_items = ArrayGetNItems(ARR_NDIM(array), ARR_DIMS(array));
+		if (n_items != hnsw_dimensions(so->hnsw))
+		{
+			elog(ERROR, "Wrong number of dimensions: %d instead of %d expected",
+				 n_items, hnsw_dimensions(so->hnsw));
+		}
+
+		if (!hnsw_search(so->hnsw, (coord_t*)ARR_DATA_PTR(array), efSearch, &n_results, &results))
+			elog(ERROR, "HNSW index search failed");
+		so->results = (ItemPointer)palloc(n_results*sizeof(ItemPointerData));
+		so->n_results = n_results;
+		for (size_t i = 0; i < n_results; i++)
+		{
+			memcpy(&so->results[i], &results[i], sizeof(so->results[i]));
+		}
+		free(results);
+	}
+	if (so->curr >= so->n_results)
+	{
+		return false;
+	}
+	else
+	{
+		scan->xs_heaptid = so->results[so->curr++];
+		scan->xs_recheckorderby = false;
+		return true;
+	}
+}
+
+/*
+ * End a scan and release resources
+ */
+static void
+hnsw_endscan(IndexScanDesc scan)
+{
+	HnswScanOpaque so = (HnswScanOpaque) scan->opaque;
+	if (so->results)
+		pfree(so->results);
+	pfree(so);
+	scan->opaque = NULL;
+}
+
+
+/*
+ * Estimate the cost of an index scan
+ */
+static void
+hnsw_costestimate(PlannerInfo *root, IndexPath *path, double loop_count,
+				 Cost *indexStartupCost, Cost *indexTotalCost,
+				 Selectivity *indexSelectivity, double *indexCorrelation
+				 ,double *indexPages
+)
+{
+	GenericCosts costs;
+
+	/* Never use index without order */
+	if (path->indexorderbys == NULL)
+	{
+		*indexStartupCost = DBL_MAX;
+		*indexTotalCost = DBL_MAX;
+		*indexSelectivity = 0;
+		*indexCorrelation = 0;
+		*indexPages = 0;
+		return;
+	}
+
+	MemSet(&costs, 0, sizeof(costs));
+
+	genericcostestimate(root, path, loop_count, &costs);
+
+	/* Startup cost and total cost are same */
+	*indexStartupCost = costs.indexTotalCost;
+	*indexTotalCost = costs.indexTotalCost;
+	*indexSelectivity = costs.indexSelectivity;
+	*indexCorrelation = costs.indexCorrelation;
+	*indexPages = costs.numIndexPages;
+}
+
+/*
+ * Parse and validate the reloptions
+ */
+static bytea *
+hnsw_options(Datum reloptions, bool validate)
+{
+	static const relopt_parse_elt tab[] = {
+		{"dims", RELOPT_TYPE_INT, offsetof(HnswOptions, dims)},
+		{"maxelements", RELOPT_TYPE_INT, offsetof(HnswOptions, maxelements)},
+		{"efconstruction", RELOPT_TYPE_INT, offsetof(HnswOptions, efConstruction)},
+		{"efsearch", RELOPT_TYPE_INT, offsetof(HnswOptions, efSearch)},
+		{"m", RELOPT_TYPE_INT, offsetof(HnswOptions, M)}
+	};
+
+	return (bytea *) build_reloptions(reloptions, validate,
+									  hnsw_relopt_kind,
+									  sizeof(HnswOptions),
+									  tab, lengthof(tab));
+}
+
+/*
+ * Validate catalog entries for the specified operator class
+ */
+static bool
+hnsw_validate(Oid opclassoid)
+{
+	return true;
+}
+
+/*
+ * Build the index for a logged table
+ */
+static IndexBuildResult *
+hnsw_build(Relation heap, Relation index, IndexInfo *indexInfo)
+{
+	HierarchicalNSW* hnsw = hnsw_get_index(index, heap);
+	IndexBuildResult* result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
+	result->heap_tuples = result->index_tuples = hnsw_count(hnsw);
+
+	return result;
+}
+
+/*
+ * Insert a tuple into the index
+ */
+static bool
+hnsw_insert(Relation index, Datum *values, bool *isnull, ItemPointer heap_tid,
+			  Relation heap, IndexUniqueCheck checkUnique,
+			  bool indexUnchanged,
+			  IndexInfo *indexInfo)
+{
+	HierarchicalNSW* hnsw = hnsw_get_index(index, heap);
+	Datum value;
+	ArrayType* array;
+	int n_items;
+	label_t label = 0;
+
+	/* Skip nulls */
+	if (isnull[0])
+		return false;
+
+	/* Detoast value */
+	value = PointerGetDatum(PG_DETOAST_DATUM(values[0]));
+	array = DatumGetArrayTypeP(value);
+	n_items = ArrayGetNItems(ARR_NDIM(array), ARR_DIMS(array));
+	if (n_items != hnsw_dimensions(hnsw))
+	{
+		elog(ERROR, "Wrong number of dimensions: %d instead of %d expected",
+			 n_items, hnsw_dimensions(hnsw));
+	}
+	memcpy(&label, heap_tid, sizeof(*heap_tid));
+	if (!hnsw_add_point(hnsw, (coord_t*)ARR_DATA_PTR(array), label))
+		elog(ERROR, "HNSW index insert failed");
+	return true;
+}
+
+/*
+ * Build the index for an unlogged table
+ */
+static void
+hnsw_buildempty(Relation index)
+{
+	/* index will be constructed on dema nd when accessed */
+}
+
+/*
+ * Clean up after a VACUUM operation
+ */
+static IndexBulkDeleteResult *
+hnsw_vacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
+{
+	Relation	rel = info->index;
+
+	if (stats == NULL)
+		return NULL;
+
+	stats->num_pages = RelationGetNumberOfBlocks(rel);
+
+	return stats;
+}
+
+/*
+ * Bulk delete tuples from the index
+ */
+static IndexBulkDeleteResult *
+hnsw_bulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
+				IndexBulkDeleteCallback callback, void *callback_state)
+{
+	if (stats == NULL)
+		stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
+	return stats;
+}
+
+/*
+ * Define index handler
+ *
+ * See https://www.postgresql.org/docs/current/index-api.html
+ */
+PGDLLEXPORT PG_FUNCTION_INFO_V1(hnsw_handler);
+Datum
+hnsw_handler(PG_FUNCTION_ARGS)
+{
+	IndexAmRoutine *amroutine = makeNode(IndexAmRoutine);
+
+	amroutine->amstrategies = 0;
+	amroutine->amsupport = 0;
+	amroutine->amoptsprocnum = 0;
+	amroutine->amcanorder = false;
+	amroutine->amcanorderbyop = true;
+	amroutine->amcanbackward = false;	/* can change direction mid-scan */
+	amroutine->amcanunique = false;
+	amroutine->amcanmulticol = false;
+	amroutine->amoptionalkey = true;
+	amroutine->amsearcharray = false;
+	amroutine->amsearchnulls = false;
+	amroutine->amstorage = false;
+	amroutine->amclusterable = false;
+	amroutine->ampredlocks = false;
+	amroutine->amcanparallel = false;
+	amroutine->amcaninclude = false;
+	amroutine->amusemaintenanceworkmem = false; /* not used during VACUUM */
+	amroutine->amparallelvacuumoptions = VACUUM_OPTION_PARALLEL_BULKDEL;
+	amroutine->amkeytype = InvalidOid;
+
+	/* Interface functions */
+	amroutine->ambuild = hnsw_build;
+	amroutine->ambuildempty = hnsw_buildempty;
+	amroutine->aminsert = hnsw_insert;
+	amroutine->ambulkdelete = hnsw_bulkdelete;
+	amroutine->amvacuumcleanup = hnsw_vacuumcleanup;
+	amroutine->amcanreturn = NULL;	/* tuple not included in heapsort */
+	amroutine->amcostestimate = hnsw_costestimate;
+	amroutine->amoptions = hnsw_options;
+	amroutine->amproperty = NULL;	/* TODO AMPROP_DISTANCE_ORDERABLE */
+	amroutine->ambuildphasename = NULL;
+	amroutine->amvalidate = hnsw_validate;
+	amroutine->amadjustmembers = NULL;
+	amroutine->ambeginscan = hnsw_beginscan;
+	amroutine->amrescan = hnsw_rescan;
+	amroutine->amgettuple = hnsw_gettuple;
+	amroutine->amgetbitmap = NULL;
+	amroutine->amendscan = hnsw_endscan;
+	amroutine->ammarkpos = NULL;
+	amroutine->amrestrpos = NULL;
+
+	/* Interface functions to support parallel index scans */
+	amroutine->amestimateparallelscan = NULL;
+	amroutine->aminitparallelscan = NULL;
+	amroutine->amparallelrescan = NULL;
+
+	PG_RETURN_POINTER(amroutine);
+}
+
+/*
+ * Get the L2 distance between vectors
+ */
+PGDLLEXPORT PG_FUNCTION_INFO_V1(l2_distance);
+Datum
+l2_distance(PG_FUNCTION_ARGS)
+{
+	ArrayType  *a = PG_GETARG_ARRAYTYPE_P(0);
+	ArrayType  *b = PG_GETARG_ARRAYTYPE_P(1);
+	int         a_dim = ArrayGetNItems(ARR_NDIM(a), ARR_DIMS(a));
+	int         b_dim = ArrayGetNItems(ARR_NDIM(b), ARR_DIMS(b));
+	dist_t 		distance = 0.0;
+	dist_t		diff;
+	coord_t	   *ax = (coord_t*)ARR_DATA_PTR(a);
+	coord_t	   *bx = (coord_t*)ARR_DATA_PTR(b);
+
+	if (a_dim != b_dim)
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_DATA_EXCEPTION),
+				 errmsg("different array dimensions %d and %d", a_dim, b_dim)));
+	}
+
+	for (int i = 0; i < a_dim; i++)
+	{
+		diff = ax[i] - bx[i];
+		distance += diff * diff;
+	}
+
+	PG_RETURN_FLOAT4((dist_t)sqrt(distance));
+}
diff --git a/pgxn/hnsw/hnsw.control b/pgxn/hnsw/hnsw.control
new file mode 100644
index 0000000000..b292b96026
--- /dev/null
+++ b/pgxn/hnsw/hnsw.control
@@ -0,0 +1,5 @@
+comment = 'hNsw index'
+default_version = '0.1.0'
+module_pathname = '$libdir/hnsw'
+relocatable = true
+trusted = true
diff --git a/pgxn/hnsw/hnsw.h b/pgxn/hnsw/hnsw.h
new file mode 100644
index 0000000000..d4065ab8fe
--- /dev/null
+++ b/pgxn/hnsw/hnsw.h
@@ -0,0 +1,15 @@
+#pragma once
+
+typedef float    coord_t;
+typedef float    dist_t;
+typedef uint32_t idx_t;
+typedef uint64_t label_t;
+
+typedef struct HierarchicalNSW HierarchicalNSW;
+
+bool hnsw_search(HierarchicalNSW* hnsw, const coord_t *point, size_t efSearch, size_t* n_results, label_t** results);
+bool hnsw_add_point(HierarchicalNSW* hnsw, const coord_t *point, label_t label);
+void hnsw_init(HierarchicalNSW* hnsw, size_t dim, size_t maxelements, size_t M, size_t maxM, size_t efConstruction);
+int  hnsw_dimensions(HierarchicalNSW* hnsw);
+size_t hnsw_count(HierarchicalNSW* hnsw);
+size_t hnsw_sizeof(void);
diff --git a/pgxn/hnsw/hnswalg.cpp b/pgxn/hnsw/hnswalg.cpp
new file mode 100644
index 0000000000..f6de3b8314
--- /dev/null
+++ b/pgxn/hnsw/hnswalg.cpp
@@ -0,0 +1,379 @@
+#include "hnswalg.h"
+
+#if defined(__GNUC__)
+#define PORTABLE_ALIGN32 __attribute__((aligned(32)))
+#define PREFETCH(addr,hint) __builtin_prefetch(addr, 0, hint)
+#else
+#define PORTABLE_ALIGN32 __declspec(align(32))
+#define PREFETCH(addr,hint)
+#endif
+
+HierarchicalNSW::HierarchicalNSW(size_t dim_, size_t maxelements_, size_t M_, size_t maxM_, size_t efConstruction_)
+{
+    dim = dim_;
+    data_size = dim * sizeof(coord_t);
+
+    efConstruction = efConstruction_;
+
+    maxelements = maxelements_;
+    M = M_;
+    maxM = maxM_;
+    size_links_level0 = (maxM + 1) * sizeof(idx_t);
+    size_data_per_element = size_links_level0 + data_size  + sizeof(label_t);
+    offset_data = size_links_level0;
+	offset_label = offset_data + data_size;
+
+    enterpoint_node = 0;
+    cur_element_count = 0;
+#ifdef __x86_64__
+    use_avx2 = __builtin_cpu_supports("avx2");
+#endif
+}
+
+std::priority_queue<std::pair<dist_t, idx_t>> HierarchicalNSW::searchBaseLayer(const coord_t *point, size_t ef)
+{
+	std::vector<uint32_t> visited;
+	visited.resize((cur_element_count + 31) >> 5);
+
+    std::priority_queue<std::pair<dist_t, idx_t >> topResults;
+    std::priority_queue<std::pair<dist_t, idx_t >> candidateSet;
+
+    dist_t dist = fstdistfunc(point, getDataByInternalId(enterpoint_node));
+
+    topResults.emplace(dist, enterpoint_node);
+    candidateSet.emplace(-dist, enterpoint_node);
+    visited[enterpoint_node >> 5] = 1 << (enterpoint_node & 31);
+    dist_t lowerBound = dist;
+
+    while (!candidateSet.empty())
+    {
+        std::pair<dist_t, idx_t> curr_el_pair = candidateSet.top();
+        if (-curr_el_pair.first > lowerBound)
+            break;
+
+        candidateSet.pop();
+        idx_t curNodeNum = curr_el_pair.second;
+
+        idx_t* data = get_linklist0(curNodeNum);
+        size_t size = *data++;
+
+        PREFETCH(getDataByInternalId(*data), 0);
+
+        for (size_t j = 0; j < size; ++j) {
+            size_t tnum = *(data + j);
+
+            PREFETCH(getDataByInternalId(*(data + j + 1)), 0);
+
+            if (!(visited[tnum >> 5] & (1 << (tnum & 31)))) {
+				visited[tnum >> 5] |= 1 << (tnum & 31);
+
+                dist = fstdistfunc(point, getDataByInternalId(tnum));
+
+                if (topResults.top().first > dist || topResults.size() < ef) {
+                    candidateSet.emplace(-dist, tnum);
+
+                    PREFETCH(get_linklist0(candidateSet.top().second), 0);
+                    topResults.emplace(dist, tnum);
+
+                    if (topResults.size() > ef)
+                        topResults.pop();
+
+                    lowerBound = topResults.top().first;
+                }
+            }
+        }
+    }
+    return topResults;
+}
+
+
+void HierarchicalNSW::getNeighborsByHeuristic(std::priority_queue<std::pair<dist_t, idx_t>> &topResults, size_t NN)
+{
+    if (topResults.size() < NN)
+        return;
+
+    std::priority_queue<std::pair<dist_t, idx_t>> resultSet;
+    std::vector<std::pair<dist_t, idx_t>> returnlist;
+
+    while (topResults.size() > 0) {
+        resultSet.emplace(-topResults.top().first, topResults.top().second);
+        topResults.pop();
+    }
+
+    while (resultSet.size()) {
+        if (returnlist.size() >= NN)
+            break;
+        std::pair<dist_t, idx_t> curen = resultSet.top();
+        dist_t dist_to_query = -curen.first;
+        resultSet.pop();
+        bool good = true;
+        for (std::pair<dist_t, idx_t> curen2 : returnlist) {
+            dist_t curdist = fstdistfunc(getDataByInternalId(curen2.second),
+                                         getDataByInternalId(curen.second));
+            if (curdist < dist_to_query) {
+                good = false;
+                break;
+            }
+        }
+        if (good) returnlist.push_back(curen);
+    }
+    for (std::pair<dist_t, idx_t> elem : returnlist)
+        topResults.emplace(-elem.first, elem.second);
+}
+
+void HierarchicalNSW::mutuallyConnectNewElement(const coord_t *point, idx_t cur_c,
+                               std::priority_queue<std::pair<dist_t, idx_t>> topResults)
+{
+    getNeighborsByHeuristic(topResults, M);
+
+    std::vector<idx_t> res;
+    res.reserve(M);
+    while (topResults.size() > 0) {
+        res.push_back(topResults.top().second);
+        topResults.pop();
+    }
+    {
+        idx_t* data = get_linklist0(cur_c);
+        if (*data)
+            throw std::runtime_error("Should be blank");
+
+        *data++ = res.size();
+
+        for (size_t idx = 0; idx < res.size(); idx++) {
+            if (data[idx])
+                throw std::runtime_error("Should be blank");
+            data[idx] = res[idx];
+        }
+    }
+    for (size_t idx = 0; idx < res.size(); idx++) {
+        if (res[idx] == cur_c)
+            throw std::runtime_error("Connection to the same element");
+
+        size_t resMmax = maxM;
+        idx_t *ll_other = get_linklist0(res[idx]);
+        idx_t sz_link_list_other = *ll_other;
+
+        if (sz_link_list_other > resMmax || sz_link_list_other < 0)
+            throw std::runtime_error("Bad sz_link_list_other");
+
+        if (sz_link_list_other < resMmax) {
+            idx_t *data = ll_other + 1;
+            data[sz_link_list_other] = cur_c;
+            *ll_other = sz_link_list_other + 1;
+        } else {
+            // finding the "weakest" element to replace it with the new one
+            idx_t *data = ll_other + 1;
+            dist_t d_max = fstdistfunc(getDataByInternalId(cur_c), getDataByInternalId(res[idx]));
+            // Heuristic:
+            std::priority_queue<std::pair<dist_t, idx_t>> candidates;
+            candidates.emplace(d_max, cur_c);
+
+            for (size_t j = 0; j < sz_link_list_other; j++)
+                candidates.emplace(fstdistfunc(getDataByInternalId(data[j]), getDataByInternalId(res[idx])), data[j]);
+
+            getNeighborsByHeuristic(candidates, resMmax);
+
+            size_t indx = 0;
+            while (!candidates.empty()) {
+                data[indx] = candidates.top().second;
+                candidates.pop();
+                indx++;
+            }
+            *ll_other = indx;
+        }
+    }
+}
+
+void HierarchicalNSW::addPoint(const coord_t *point, label_t label)
+{
+    if (cur_element_count >= maxelements) {
+        throw std::runtime_error("The number of elements exceeds the specified limit");
+    }
+    idx_t cur_c = cur_element_count++;
+    memset((char *) get_linklist0(cur_c), 0, size_data_per_element);
+    memcpy(getDataByInternalId(cur_c), point, data_size);
+    memcpy(getExternalLabel(cur_c), &label, sizeof label);
+
+    // Do nothing for the first element
+    if (cur_c != 0) {
+        std::priority_queue <std::pair<dist_t, idx_t>> topResults = searchBaseLayer(point, efConstruction);
+        mutuallyConnectNewElement(point, cur_c, topResults);
+    }
+};
+
+std::priority_queue<std::pair<dist_t, label_t>> HierarchicalNSW::searchKnn(const coord_t *query, size_t k)
+{
+	std::priority_queue<std::pair<dist_t, label_t>> topResults;
+	auto topCandidates = searchBaseLayer(query, k);
+    while (topCandidates.size() > k) {
+        topCandidates.pop();
+	}
+	while (!topCandidates.empty()) {
+		std::pair<dist_t, idx_t> rez = topCandidates.top();
+		label_t label;
+		memcpy(&label, getExternalLabel(rez.second), sizeof(label));
+		topResults.push(std::pair<dist_t, label_t>(rez.first, label));
+		topCandidates.pop();
+	}
+
+    return topResults;
+};
+
+dist_t fstdistfunc_scalar(const coord_t *x, const coord_t *y, size_t n)
+{
+    dist_t 	distance = 0.0;
+
+    for (size_t i = 0; i < n; i++)
+    {
+        dist_t diff = x[i] - y[i];
+        distance += diff * diff;
+    }
+    return distance;
+
+}
+
+#ifdef __x86_64__
+#include <immintrin.h>
+
+__attribute__((target("avx2")))
+dist_t fstdistfunc_avx2(const coord_t *x, const coord_t *y, size_t n)
+{
+    const size_t TmpResSz = sizeof(__m256) / sizeof(float);
+    float PORTABLE_ALIGN32 TmpRes[TmpResSz];
+    size_t qty16 = n / 16;
+    const float *pEnd1 = x + (qty16 * 16);
+    __m256 diff, v1, v2;
+    __m256 sum = _mm256_set1_ps(0);
+
+    while (x < pEnd1) {
+        v1 = _mm256_loadu_ps(x);
+        x += 8;
+        v2 = _mm256_loadu_ps(y);
+        y += 8;
+        diff = _mm256_sub_ps(v1, v2);
+        sum = _mm256_add_ps(sum, _mm256_mul_ps(diff, diff));
+
+        v1 = _mm256_loadu_ps(x);
+        x += 8;
+        v2 = _mm256_loadu_ps(y);
+        y += 8;
+        diff = _mm256_sub_ps(v1, v2);
+        sum = _mm256_add_ps(sum, _mm256_mul_ps(diff, diff));
+    }
+    _mm256_store_ps(TmpRes, sum);
+    float res = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] + TmpRes[7];
+    return (res);
+}
+
+dist_t fstdistfunc_sse(const coord_t *x, const coord_t *y, size_t n)
+{
+    const size_t TmpResSz = sizeof(__m128) / sizeof(float);
+    float PORTABLE_ALIGN32 TmpRes[TmpResSz];
+    size_t qty16 = n / 16;
+    const float *pEnd1 = x + (qty16 * 16);
+
+    __m128 diff, v1, v2;
+    __m128 sum = _mm_set1_ps(0);
+
+    while (x < pEnd1) {
+        v1 = _mm_loadu_ps(x);
+        x += 4;
+        v2 = _mm_loadu_ps(y);
+        y += 4;
+        diff = _mm_sub_ps(v1, v2);
+        sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
+
+        v1 = _mm_loadu_ps(x);
+        x += 4;
+        v2 = _mm_loadu_ps(y);
+        y += 4;
+        diff = _mm_sub_ps(v1, v2);
+        sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
+
+        v1 = _mm_loadu_ps(x);
+        x += 4;
+        v2 = _mm_loadu_ps(y);
+        y += 4;
+        diff = _mm_sub_ps(v1, v2);
+        sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
+
+        v1 = _mm_loadu_ps(x);
+        x += 4;
+        v2 = _mm_loadu_ps(y);
+        y += 4;
+        diff = _mm_sub_ps(v1, v2);
+        sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
+    }
+    _mm_store_ps(TmpRes, sum);
+    float res = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
+    return res;
+}
+#endif
+
+dist_t HierarchicalNSW::fstdistfunc(const coord_t *x, const coord_t *y)
+{
+#ifndef __x86_64__
+    return fstdistfunc_scalar(x, y, dim);
+#else
+    if(use_avx2)
+        return fstdistfunc_avx2(x, y, dim);
+
+    return fstdistfunc_sse(x, y, dim);
+#endif
+}
+
+bool hnsw_search(HierarchicalNSW* hnsw, const coord_t *point, size_t efSearch, size_t* n_results, label_t** results)
+{
+	try
+	{
+		auto result = hnsw->searchKnn(point, efSearch);
+		size_t nResults = result.size();
+		*results = (label_t*)malloc(nResults*sizeof(label_t));
+		for (size_t i = nResults; i-- != 0;)
+		{
+			(*results)[i] = result.top().second;
+			result.pop();
+		}
+		*n_results = nResults;
+		return true;
+	}
+	catch (std::exception& x)
+	{
+		return false;
+	}
+}
+
+bool hnsw_add_point(HierarchicalNSW* hnsw, const coord_t *point, label_t label)
+{
+	try
+	{
+		hnsw->addPoint(point, label);
+		return true;
+	}
+	catch (std::exception& x)
+	{
+		fprintf(stderr, "Catch %s\n", x.what());
+		return false;
+	}
+}
+
+void hnsw_init(HierarchicalNSW* hnsw, size_t dims, size_t maxelements, size_t M, size_t maxM, size_t efConstruction)
+{
+	new ((void*)hnsw) HierarchicalNSW(dims, maxelements, M, maxM, efConstruction);
+}
+
+
+int hnsw_dimensions(HierarchicalNSW* hnsw)
+{
+	return (int)hnsw->dim;
+}
+
+size_t hnsw_count(HierarchicalNSW* hnsw)
+{
+	return hnsw->cur_element_count;
+}
+
+size_t hnsw_sizeof(void)
+{
+	return sizeof(HierarchicalNSW);
+}
diff --git a/pgxn/hnsw/hnswalg.h b/pgxn/hnsw/hnswalg.h
new file mode 100644
index 0000000000..f38aeac362
--- /dev/null
+++ b/pgxn/hnsw/hnswalg.h
@@ -0,0 +1,69 @@
+#pragma once
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <unordered_map>
+#include <unordered_set>
+#include <map>
+#include <cmath>
+#include <queue>
+#include <stdexcept>
+
+extern "C" {
+#include "hnsw.h"
+}
+
+struct HierarchicalNSW
+{
+	size_t maxelements;
+	size_t cur_element_count;
+
+	idx_t  enterpoint_node;
+
+	size_t dim;
+	size_t data_size;
+	size_t offset_data;
+	size_t offset_label;
+	size_t size_data_per_element;
+	size_t M;
+	size_t maxM;
+	size_t size_links_level0;
+	size_t efConstruction;
+
+#ifdef __x86_64__
+	bool	use_avx2;
+#endif
+
+	char   data_level0_memory[0]; // varying size
+
+  public:
+	HierarchicalNSW(size_t dim, size_t maxelements, size_t M, size_t maxM, size_t efConstruction);
+	~HierarchicalNSW();
+
+
+	inline coord_t *getDataByInternalId(idx_t internal_id) const {
+		return (coord_t *)&data_level0_memory[internal_id * size_data_per_element + offset_data];
+	}
+
+	inline idx_t *get_linklist0(idx_t internal_id) const {
+		return (idx_t*)&data_level0_memory[internal_id * size_data_per_element];
+	}
+
+	inline label_t *getExternalLabel(idx_t internal_id) const {
+		return (label_t *)&data_level0_memory[internal_id * size_data_per_element + offset_label];
+	}
+
+	std::priority_queue<std::pair<dist_t, idx_t>> searchBaseLayer(const coord_t *x, size_t ef);
+
+	void getNeighborsByHeuristic(std::priority_queue<std::pair<dist_t, idx_t>> &topResults, size_t NN);
+
+	void mutuallyConnectNewElement(const coord_t *x, idx_t id, std::priority_queue<std::pair<dist_t, idx_t>> topResults);
+
+	void addPoint(const coord_t *point, label_t label);
+
+	std::priority_queue<std::pair<dist_t, label_t>> searchKnn(const coord_t *query_data, size_t k);
+
+	dist_t fstdistfunc(const coord_t *x, const coord_t *y);
+};
diff --git a/pgxn/hnsw/test/expected/knn.out b/pgxn/hnsw/test/expected/knn.out
new file mode 100644
index 0000000000..a1cee4525e
--- /dev/null
+++ b/pgxn/hnsw/test/expected/knn.out
@@ -0,0 +1,28 @@
+SET enable_seqscan = off;
+CREATE TABLE t (val real[]);
+INSERT INTO t (val) VALUES ('{0,0,0}'), ('{1,2,3}'), ('{1,1,1}'), (NULL);
+CREATE INDEX ON t USING hnsw (val) WITH (maxelements = 10, dims=3, m=3);
+INSERT INTO t (val) VALUES (array[1,2,4]);
+explain SELECT * FROM t ORDER BY val <-> array[3,3,3];
+                             QUERY PLAN                             
+--------------------------------------------------------------------
+ Index Scan using t_val_idx on t  (cost=4.02..8.06 rows=3 width=36)
+   Order By: (val <-> '{3,3,3}'::real[])
+(2 rows)
+
+SELECT * FROM t ORDER BY val <-> array[3,3,3];
+   val   
+---------
+ {1,2,3}
+ {1,2,4}
+ {1,1,1}
+ {0,0,0}
+(4 rows)
+
+SELECT COUNT(*) FROM t;
+ count 
+-------
+     5
+(1 row)
+
+DROP TABLE t;
diff --git a/pgxn/hnsw/test/sql/knn.sql b/pgxn/hnsw/test/sql/knn.sql
new file mode 100644
index 0000000000..0635bda4a2
--- /dev/null
+++ b/pgxn/hnsw/test/sql/knn.sql
@@ -0,0 +1,13 @@
+SET enable_seqscan = off;
+
+CREATE TABLE t (val real[]);
+INSERT INTO t (val) VALUES ('{0,0,0}'), ('{1,2,3}'), ('{1,1,1}'), (NULL);
+CREATE INDEX ON t USING hnsw (val) WITH (maxelements = 10, dims=3, m=3);
+
+INSERT INTO t (val) VALUES (array[1,2,4]);
+
+explain SELECT * FROM t ORDER BY val <-> array[3,3,3];
+SELECT * FROM t ORDER BY val <-> array[3,3,3];
+SELECT COUNT(*) FROM t;
+
+DROP TABLE t;
diff --git a/pgxn/neon/Makefile b/pgxn/neon/Makefile
index ec377dbb1e..1948023472 100644
--- a/pgxn/neon/Makefile
+++ b/pgxn/neon/Makefile
@@ -11,10 +11,12 @@ OBJS = \
 	pagestore_smgr.o \
 	relsize_cache.o \
 	walproposer.o \
-	walproposer_utils.o
+	walproposer_utils.o \
+	control_plane_connector.o
 
 PG_CPPFLAGS = -I$(libpq_srcdir)
 SHLIB_LINK_INTERNAL = $(libpq)
+SHLIB_LINK = -lcurl
 
 EXTENSION = neon
 DATA = neon--1.0.sql
diff --git a/pgxn/neon/control_plane_connector.c b/pgxn/neon/control_plane_connector.c
new file mode 100644
index 0000000000..82e4af4b4a
--- /dev/null
+++ b/pgxn/neon/control_plane_connector.c
@@ -0,0 +1,830 @@
+/*-------------------------------------------------------------------------
+ *
+ * control_plane_connector.c
+ *	  Captures updates to roles/databases using ProcessUtility_hook and
+ *        sends them to the control ProcessUtility_hook. The changes are sent
+ *        via HTTP to the URL specified by the GUC neon.console_url when the
+ *        transaction commits. Forwarding may be disabled temporarily by
+ *        setting neon.forward_ddl to false.
+ *
+ *        Currently, the transaction may abort AFTER
+ *        changes have already been forwarded, and that case is not handled.
+ *        Subtransactions are handled using a stack of hash tables, which
+ *        accumulate changes. On subtransaction commit, the top of the stack
+ *        is merged with the table below it.
+ *
+ * IDENTIFICATION
+ *	 contrib/neon/control_plane_connector.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+#include "tcop/pquery.h"
+#include "tcop/utility.h"
+#include "access/xact.h"
+#include "utils/hsearch.h"
+#include "utils/memutils.h"
+#include "commands/defrem.h"
+#include "miscadmin.h"
+#include "utils/acl.h"
+#include "fmgr.h"
+#include "utils/guc.h"
+#include "port.h"
+#include <curl/curl.h>
+#include "utils/jsonb.h"
+
+static ProcessUtility_hook_type PreviousProcessUtilityHook = NULL;
+
+/* GUCs */
+static char *ConsoleURL = NULL;
+static bool ForwardDDL = true;
+
+/* Curl structures for sending the HTTP requests */
+static CURL * CurlHandle;
+static struct curl_slist *ContentHeader = NULL;
+
+/*
+ * CURL docs say that this buffer must exist until we call curl_easy_cleanup
+ * (which we never do), so we make this a static
+ */
+static char CurlErrorBuf[CURL_ERROR_SIZE];
+
+typedef enum
+{
+	Op_Set,						/* An upsert: Either a creation or an alter */
+	Op_Delete,
+}			OpType;
+
+typedef struct
+{
+	char		name[NAMEDATALEN];
+	Oid			owner;
+	char		old_name[NAMEDATALEN];
+	OpType		type;
+}			DbEntry;
+
+typedef struct
+{
+	char		name[NAMEDATALEN];
+	char		old_name[NAMEDATALEN];
+	const char *password;
+	OpType		type;
+}			RoleEntry;
+
+/*
+ * We keep one of these for each subtransaction in a stack. When a subtransaction
+ * commits, we merge the top of the stack into the table below it. It is allocated in the
+ * subtransaction's context.
+ */
+typedef struct DdlHashTable
+{
+	struct DdlHashTable *prev_table;
+	HTAB	   *db_table;
+	HTAB	   *role_table;
+}			DdlHashTable;
+
+static DdlHashTable RootTable;
+static DdlHashTable * CurrentDdlTable = &RootTable;
+
+static void
+PushKeyValue(JsonbParseState **state, char *key, char *value)
+{
+	JsonbValue	k,
+				v;
+
+	k.type = jbvString;
+	k.val.string.len = strlen(key);
+	k.val.string.val = key;
+	v.type = jbvString;
+	v.val.string.len = strlen(value);
+	v.val.string.val = value;
+	pushJsonbValue(state, WJB_KEY, &k);
+	pushJsonbValue(state, WJB_VALUE, &v);
+}
+
+static char *
+ConstructDeltaMessage()
+{
+	JsonbParseState *state = NULL;
+
+	pushJsonbValue(&state, WJB_BEGIN_OBJECT, NULL);
+	if (RootTable.db_table)
+	{
+		JsonbValue	dbs;
+
+		dbs.type = jbvString;
+		dbs.val.string.val = "dbs";
+		dbs.val.string.len = strlen(dbs.val.string.val);
+		pushJsonbValue(&state, WJB_KEY, &dbs);
+		pushJsonbValue(&state, WJB_BEGIN_ARRAY, NULL);
+
+		HASH_SEQ_STATUS status;
+		DbEntry    *entry;
+
+		hash_seq_init(&status, RootTable.db_table);
+		while ((entry = hash_seq_search(&status)) != NULL)
+		{
+			pushJsonbValue(&state, WJB_BEGIN_OBJECT, NULL);
+			PushKeyValue(&state, "op", entry->type == Op_Set ? "set" : "del");
+			PushKeyValue(&state, "name", entry->name);
+			if (entry->owner != InvalidOid)
+			{
+				PushKeyValue(&state, "owner", GetUserNameFromId(entry->owner, false));
+			}
+			if (entry->old_name[0] != '\0')
+			{
+				PushKeyValue(&state, "old_name", entry->old_name);
+			}
+			pushJsonbValue(&state, WJB_END_OBJECT, NULL);
+		}
+		pushJsonbValue(&state, WJB_END_ARRAY, NULL);
+	}
+
+	if (RootTable.role_table)
+	{
+		JsonbValue	roles;
+
+		roles.type = jbvString;
+		roles.val.string.val = "roles";
+		roles.val.string.len = strlen(roles.val.string.val);
+		pushJsonbValue(&state, WJB_KEY, &roles);
+		pushJsonbValue(&state, WJB_BEGIN_ARRAY, NULL);
+
+		HASH_SEQ_STATUS status;
+		RoleEntry  *entry;
+
+		hash_seq_init(&status, RootTable.role_table);
+		while ((entry = hash_seq_search(&status)) != NULL)
+		{
+			pushJsonbValue(&state, WJB_BEGIN_OBJECT, NULL);
+			PushKeyValue(&state, "op", entry->type == Op_Set ? "set" : "del");
+			PushKeyValue(&state, "name", entry->name);
+			if (entry->password)
+			{
+				PushKeyValue(&state, "password", (char *) entry->password);
+			}
+			if (entry->old_name[0] != '\0')
+			{
+				PushKeyValue(&state, "old_name", entry->old_name);
+			}
+			pushJsonbValue(&state, WJB_END_OBJECT, NULL);
+		}
+		pushJsonbValue(&state, WJB_END_ARRAY, NULL);
+	}
+	JsonbValue *result = pushJsonbValue(&state, WJB_END_OBJECT, NULL);
+	Jsonb	   *jsonb = JsonbValueToJsonb(result);
+
+	return JsonbToCString(NULL, &jsonb->root, 0 /* estimated_len */ );
+}
+
+#define ERROR_SIZE 1024
+
+typedef struct
+{
+	char		str[ERROR_SIZE];
+	size_t		size;
+}			ErrorString;
+
+static size_t
+ErrorWriteCallback(char *ptr, size_t size, size_t nmemb, void *userdata)
+{
+	/* Docs say size is always 1 */
+	ErrorString *str = userdata;
+
+	size_t		to_write = nmemb;
+
+	/* +1 for null terminator */
+	if (str->size + nmemb + 1 >= ERROR_SIZE)
+		to_write = ERROR_SIZE - str->size - 1;
+
+	/* Ignore everyrthing past the first ERROR_SIZE bytes */
+	if (to_write == 0)
+		return nmemb;
+	memcpy(str->str + str->size, ptr, to_write);
+	str->size += to_write;
+	str->str[str->size] = '\0';
+	return nmemb;
+}
+
+static void
+SendDeltasToControlPlane()
+{
+	if (!RootTable.db_table && !RootTable.role_table)
+		return;
+	if (!ConsoleURL)
+	{
+		elog(LOG, "ConsoleURL not set, skipping forwarding");
+		return;
+	}
+	if (!ForwardDDL)
+		return;
+
+	char	   *message = ConstructDeltaMessage();
+	ErrorString str = {};
+
+	curl_easy_setopt(CurlHandle, CURLOPT_CUSTOMREQUEST, "PATCH");
+	curl_easy_setopt(CurlHandle, CURLOPT_HTTPHEADER, ContentHeader);
+	curl_easy_setopt(CurlHandle, CURLOPT_POSTFIELDS, message);
+	curl_easy_setopt(CurlHandle, CURLOPT_URL, ConsoleURL);
+	curl_easy_setopt(CurlHandle, CURLOPT_ERRORBUFFER, CurlErrorBuf);
+	curl_easy_setopt(CurlHandle, CURLOPT_TIMEOUT, 3L /* seconds */ );
+	curl_easy_setopt(CurlHandle, CURLOPT_WRITEDATA, &str);
+	curl_easy_setopt(CurlHandle, CURLOPT_WRITEFUNCTION, ErrorWriteCallback);
+
+	const int	num_retries = 5;
+	int			curl_status;
+
+	for (int i = 0; i < num_retries; i++)
+	{
+		if ((curl_status = curl_easy_perform(CurlHandle)) == 0)
+			break;
+		elog(LOG, "Curl request failed on attempt %d: %s", i, CurlErrorBuf);
+		pg_usleep(1000 * 1000);
+	}
+	if (curl_status != 0)
+	{
+		elog(ERROR, "Failed to perform curl request: %s", CurlErrorBuf);
+	}
+	else
+	{
+		long		response_code;
+
+		if (curl_easy_getinfo(CurlHandle, CURLINFO_RESPONSE_CODE, &response_code) != CURLE_UNKNOWN_OPTION)
+		{
+			bool		error_exists = str.size != 0;
+
+			if (response_code != 200)
+			{
+				if (error_exists)
+				{
+					elog(ERROR,
+						 "Received HTTP code %ld from control plane: %s",
+						 response_code,
+						 str.str);
+				}
+				else
+				{
+					elog(ERROR,
+						 "Received HTTP code %ld from control plane",
+						 response_code);
+				}
+			}
+		}
+	}
+}
+
+static void
+InitDbTableIfNeeded()
+{
+	if (!CurrentDdlTable->db_table)
+	{
+		HASHCTL		db_ctl = {};
+
+		db_ctl.keysize = NAMEDATALEN;
+		db_ctl.entrysize = sizeof(DbEntry);
+		db_ctl.hcxt = CurTransactionContext;
+		CurrentDdlTable->db_table = hash_create(
+												"Dbs Created",
+												4,
+												&db_ctl,
+												HASH_ELEM | HASH_STRINGS | HASH_CONTEXT);
+	}
+}
+
+static void
+InitRoleTableIfNeeded()
+{
+	if (!CurrentDdlTable->role_table)
+	{
+		HASHCTL		role_ctl = {};
+
+		role_ctl.keysize = NAMEDATALEN;
+		role_ctl.entrysize = sizeof(RoleEntry);
+		role_ctl.hcxt = CurTransactionContext;
+		CurrentDdlTable->role_table = hash_create(
+												  "Roles Created",
+												  4,
+												  &role_ctl,
+												  HASH_ELEM | HASH_STRINGS | HASH_CONTEXT);
+	}
+}
+
+static void
+PushTable()
+{
+	DdlHashTable *new_table = MemoryContextAlloc(CurTransactionContext, sizeof(DdlHashTable));
+
+	new_table->prev_table = CurrentDdlTable;
+	new_table->role_table = NULL;
+	new_table->db_table = NULL;
+	CurrentDdlTable = new_table;
+}
+
+static void
+MergeTable()
+{
+	DdlHashTable *old_table = CurrentDdlTable;
+
+	CurrentDdlTable = old_table->prev_table;
+
+	if (old_table->db_table)
+	{
+		InitDbTableIfNeeded();
+		DbEntry    *entry;
+		HASH_SEQ_STATUS status;
+
+		hash_seq_init(&status, old_table->db_table);
+		while ((entry = hash_seq_search(&status)) != NULL)
+		{
+			DbEntry    *to_write = hash_search(
+											   CurrentDdlTable->db_table,
+											   entry->name,
+											   HASH_ENTER,
+											   NULL);
+
+			to_write->type = entry->type;
+			if (entry->owner != InvalidOid)
+				to_write->owner = entry->owner;
+			strlcpy(to_write->old_name, entry->old_name, NAMEDATALEN);
+			if (entry->old_name[0] != '\0')
+			{
+				bool		found_old = false;
+				DbEntry    *old = hash_search(
+											  CurrentDdlTable->db_table,
+											  entry->old_name,
+											  HASH_FIND,
+											  &found_old);
+
+				if (found_old)
+				{
+					if (old->old_name[0] != '\0')
+						strlcpy(to_write->old_name, old->old_name, NAMEDATALEN);
+					else
+						strlcpy(to_write->old_name, entry->old_name, NAMEDATALEN);
+					hash_search(
+								CurrentDdlTable->db_table,
+								entry->old_name,
+								HASH_REMOVE,
+								NULL);
+				}
+			}
+		}
+		hash_destroy(old_table->db_table);
+	}
+
+	if (old_table->role_table)
+	{
+		InitRoleTableIfNeeded();
+		RoleEntry  *entry;
+		HASH_SEQ_STATUS status;
+
+		hash_seq_init(&status, old_table->role_table);
+		while ((entry = hash_seq_search(&status)) != NULL)
+		{
+			RoleEntry  *to_write = hash_search(
+											   CurrentDdlTable->role_table,
+											   entry->name,
+											   HASH_ENTER,
+											   NULL);
+
+			to_write->type = entry->type;
+			if (entry->password)
+				to_write->password = entry->password;
+			strlcpy(to_write->old_name, entry->old_name, NAMEDATALEN);
+			if (entry->old_name[0] != '\0')
+			{
+				bool		found_old = false;
+				RoleEntry  *old = hash_search(
+											  CurrentDdlTable->role_table,
+											  entry->old_name,
+											  HASH_FIND,
+											  &found_old);
+
+				if (found_old)
+				{
+					if (old->old_name[0] != '\0')
+						strlcpy(to_write->old_name, old->old_name, NAMEDATALEN);
+					else
+						strlcpy(to_write->old_name, entry->old_name, NAMEDATALEN);
+					hash_search(CurrentDdlTable->role_table,
+								entry->old_name,
+								HASH_REMOVE,
+								NULL);
+				}
+			}
+		}
+		hash_destroy(old_table->role_table);
+	}
+}
+
+static void
+PopTable()
+{
+	/*
+	 * Current table gets freed because it is allocated in aborted
+	 * subtransaction's memory context.
+	 */
+	CurrentDdlTable = CurrentDdlTable->prev_table;
+}
+
+static void
+NeonSubXactCallback(
+					SubXactEvent event,
+					SubTransactionId mySubid,
+					SubTransactionId parentSubid,
+					void *arg)
+{
+	switch (event)
+	{
+		case SUBXACT_EVENT_START_SUB:
+			return PushTable();
+		case SUBXACT_EVENT_COMMIT_SUB:
+			return MergeTable();
+		case SUBXACT_EVENT_ABORT_SUB:
+			return PopTable();
+		default:
+			return;
+	}
+}
+
+static void
+NeonXactCallback(XactEvent event, void *arg)
+{
+	if (event == XACT_EVENT_PRE_COMMIT || event == XACT_EVENT_PARALLEL_PRE_COMMIT)
+	{
+		SendDeltasToControlPlane();
+	}
+	RootTable.role_table = NULL;
+	RootTable.db_table = NULL;
+	Assert(CurrentDdlTable == &RootTable);
+}
+
+static void
+HandleCreateDb(CreatedbStmt *stmt)
+{
+	InitDbTableIfNeeded();
+	DefElem    *downer = NULL;
+	ListCell   *option;
+
+	foreach(option, stmt->options)
+	{
+		DefElem    *defel = lfirst(option);
+
+		if (strcmp(defel->defname, "owner") == 0)
+			downer = defel;
+	}
+	bool		found = false;
+	DbEntry    *entry = hash_search(
+									CurrentDdlTable->db_table,
+									stmt->dbname,
+									HASH_ENTER,
+									&found);
+
+	if (!found)
+		memset(entry->old_name, 0, sizeof(entry->old_name));
+
+	entry->type = Op_Set;
+	if (downer && downer->arg)
+		entry->owner = get_role_oid(defGetString(downer), false);
+	else
+		entry->owner = GetUserId();
+}
+
+static void
+HandleAlterOwner(AlterOwnerStmt *stmt)
+{
+	if (stmt->objectType != OBJECT_DATABASE)
+		return;
+	InitDbTableIfNeeded();
+	const char *name = strVal(stmt->object);
+	bool		found = false;
+	DbEntry    *entry = hash_search(
+									CurrentDdlTable->db_table,
+									name,
+									HASH_ENTER,
+									&found);
+
+	if (!found)
+		memset(entry->old_name, 0, sizeof(entry->old_name));
+
+	entry->owner = get_role_oid(get_rolespec_name(stmt->newowner), false);
+	entry->type = Op_Set;
+}
+
+static void
+HandleDbRename(RenameStmt *stmt)
+{
+	Assert(stmt->renameType == OBJECT_DATABASE);
+	InitDbTableIfNeeded();
+	bool		found = false;
+	DbEntry    *entry = hash_search(
+									CurrentDdlTable->db_table,
+									stmt->subname,
+									HASH_FIND,
+									&found);
+	DbEntry    *entry_for_new_name = hash_search(
+												 CurrentDdlTable->db_table,
+												 stmt->newname,
+												 HASH_ENTER,
+												 NULL);
+
+	entry_for_new_name->type = Op_Set;
+	if (found)
+	{
+		if (entry->old_name[0] != '\0')
+			strlcpy(entry_for_new_name->old_name, entry->old_name, NAMEDATALEN);
+		else
+			strlcpy(entry_for_new_name->old_name, entry->name, NAMEDATALEN);
+		entry_for_new_name->owner = entry->owner;
+		hash_search(
+					CurrentDdlTable->db_table,
+					stmt->subname,
+					HASH_REMOVE,
+					NULL);
+	}
+	else
+	{
+		strlcpy(entry_for_new_name->old_name, stmt->subname, NAMEDATALEN);
+		entry_for_new_name->owner = InvalidOid;
+	}
+}
+
+static void
+HandleDropDb(DropdbStmt *stmt)
+{
+	InitDbTableIfNeeded();
+	bool		found = false;
+	DbEntry    *entry = hash_search(
+									CurrentDdlTable->db_table,
+									stmt->dbname,
+									HASH_ENTER,
+									&found);
+
+	entry->type = Op_Delete;
+	entry->owner = InvalidOid;
+	if (!found)
+		memset(entry->old_name, 0, sizeof(entry->old_name));
+}
+
+static void
+HandleCreateRole(CreateRoleStmt *stmt)
+{
+	InitRoleTableIfNeeded();
+	bool		found = false;
+	RoleEntry  *entry = hash_search(
+									CurrentDdlTable->role_table,
+									stmt->role,
+									HASH_ENTER,
+									&found);
+	DefElem    *dpass = NULL;
+	ListCell   *option;
+
+	foreach(option, stmt->options)
+	{
+		DefElem    *defel = lfirst(option);
+
+		if (strcmp(defel->defname, "password") == 0)
+			dpass = defel;
+	}
+	if (!found)
+		memset(entry->old_name, 0, sizeof(entry->old_name));
+	if (dpass && dpass->arg)
+		entry->password = MemoryContextStrdup(CurTransactionContext, strVal(dpass->arg));
+	else
+		entry->password = NULL;
+	entry->type = Op_Set;
+}
+
+static void
+HandleAlterRole(AlterRoleStmt *stmt)
+{
+	InitRoleTableIfNeeded();
+	DefElem    *dpass = NULL;
+	ListCell   *option;
+
+	foreach(option, stmt->options)
+	{
+		DefElem    *defel = lfirst(option);
+
+		if (strcmp(defel->defname, "password") == 0)
+			dpass = defel;
+	}
+	/* We only care about updates to the password */
+	if (!dpass)
+		return;
+	bool		found = false;
+	RoleEntry  *entry = hash_search(
+									CurrentDdlTable->role_table,
+									stmt->role->rolename,
+									HASH_ENTER,
+									&found);
+
+	if (!found)
+		memset(entry->old_name, 0, sizeof(entry->old_name));
+	if (dpass->arg)
+		entry->password = MemoryContextStrdup(CurTransactionContext, strVal(dpass->arg));
+	else
+		entry->password = NULL;
+	entry->type = Op_Set;
+}
+
+static void
+HandleRoleRename(RenameStmt *stmt)
+{
+	InitRoleTableIfNeeded();
+	Assert(stmt->renameType == OBJECT_ROLE);
+	bool		found = false;
+	RoleEntry  *entry = hash_search(
+									CurrentDdlTable->role_table,
+									stmt->subname,
+									HASH_FIND,
+									&found);
+
+	RoleEntry  *entry_for_new_name = hash_search(
+												 CurrentDdlTable->role_table,
+												 stmt->newname,
+												 HASH_ENTER,
+												 NULL);
+
+	entry_for_new_name->type = Op_Set;
+	if (found)
+	{
+		if (entry->old_name[0] != '\0')
+			strlcpy(entry_for_new_name->old_name, entry->old_name, NAMEDATALEN);
+		else
+			strlcpy(entry_for_new_name->old_name, entry->name, NAMEDATALEN);
+		entry_for_new_name->password = entry->password;
+		hash_search(
+					CurrentDdlTable->role_table,
+					entry->name,
+					HASH_REMOVE,
+					NULL);
+	}
+	else
+	{
+		strlcpy(entry_for_new_name->old_name, stmt->subname, NAMEDATALEN);
+		entry_for_new_name->password = NULL;
+	}
+}
+
+static void
+HandleDropRole(DropRoleStmt *stmt)
+{
+	InitRoleTableIfNeeded();
+	ListCell   *item;
+
+	foreach(item, stmt->roles)
+	{
+		RoleSpec   *spec = lfirst(item);
+		bool		found = false;
+		RoleEntry  *entry = hash_search(
+										CurrentDdlTable->role_table,
+										spec->rolename,
+										HASH_ENTER,
+										&found);
+
+		entry->type = Op_Delete;
+		entry->password = NULL;
+		if (!found)
+			memset(entry->old_name, 0, sizeof(entry));
+	}
+}
+
+static void
+HandleRename(RenameStmt *stmt)
+{
+	if (stmt->renameType == OBJECT_DATABASE)
+		return HandleDbRename(stmt);
+	else if (stmt->renameType == OBJECT_ROLE)
+		return HandleRoleRename(stmt);
+}
+
+static void
+NeonProcessUtility(
+				   PlannedStmt *pstmt,
+				   const char *queryString,
+				   bool readOnlyTree,
+				   ProcessUtilityContext context,
+				   ParamListInfo params,
+				   QueryEnvironment *queryEnv,
+				   DestReceiver *dest,
+				   QueryCompletion *qc)
+{
+	Node	   *parseTree = pstmt->utilityStmt;
+
+	switch (nodeTag(parseTree))
+	{
+		case T_CreatedbStmt:
+			HandleCreateDb(castNode(CreatedbStmt, parseTree));
+			break;
+		case T_AlterOwnerStmt:
+			HandleAlterOwner(castNode(AlterOwnerStmt, parseTree));
+			break;
+		case T_RenameStmt:
+			HandleRename(castNode(RenameStmt, parseTree));
+			break;
+		case T_DropdbStmt:
+			HandleDropDb(castNode(DropdbStmt, parseTree));
+			break;
+		case T_CreateRoleStmt:
+			HandleCreateRole(castNode(CreateRoleStmt, parseTree));
+			break;
+		case T_AlterRoleStmt:
+			HandleAlterRole(castNode(AlterRoleStmt, parseTree));
+			break;
+		case T_DropRoleStmt:
+			HandleDropRole(castNode(DropRoleStmt, parseTree));
+			break;
+		default:
+			break;
+	}
+
+	if (PreviousProcessUtilityHook)
+	{
+		PreviousProcessUtilityHook(
+								   pstmt,
+								   queryString,
+								   readOnlyTree,
+								   context,
+								   params,
+								   queryEnv,
+								   dest,
+								   qc);
+	}
+	else
+	{
+		standard_ProcessUtility(
+								pstmt,
+								queryString,
+								readOnlyTree,
+								context,
+								params,
+								queryEnv,
+								dest,
+								qc);
+	}
+}
+
+extern void
+InitControlPlaneConnector()
+{
+	PreviousProcessUtilityHook = ProcessUtility_hook;
+	ProcessUtility_hook = NeonProcessUtility;
+	RegisterXactCallback(NeonXactCallback, NULL);
+	RegisterSubXactCallback(NeonSubXactCallback, NULL);
+
+	DefineCustomStringVariable(
+							   "neon.console_url",
+							   "URL of the Neon Console, which will be forwarded changes to dbs and roles",
+							   NULL,
+							   &ConsoleURL,
+							   NULL,
+							   PGC_POSTMASTER,
+							   0,
+							   NULL,
+							   NULL,
+							   NULL);
+
+	DefineCustomBoolVariable(
+							 "neon.forward_ddl",
+							 "Controls whether to forward DDL to the control plane",
+							 NULL,
+							 &ForwardDDL,
+							 true,
+							 PGC_SUSET,
+							 0,
+							 NULL,
+							 NULL,
+							 NULL);
+
+	const char *jwt_token = getenv("NEON_CONTROL_PLANE_TOKEN");
+
+	if (!jwt_token)
+	{
+		elog(LOG, "Missing NEON_CONTROL_PLANE_TOKEN environment variable, forwarding will not be authenticated");
+	}
+
+	if (curl_global_init(CURL_GLOBAL_DEFAULT))
+	{
+		elog(ERROR, "Failed to initialize curl");
+	}
+	if ((CurlHandle = curl_easy_init()) == NULL)
+	{
+		elog(ERROR, "Failed to initialize curl handle");
+	}
+	if ((ContentHeader = curl_slist_append(ContentHeader, "Content-Type: application/json")) == NULL)
+	{
+		elog(ERROR, "Failed to initialize content header");
+	}
+
+	if (jwt_token)
+	{
+		char		auth_header[8192];
+
+		snprintf(auth_header, sizeof(auth_header), "Authorization: Bearer %s", jwt_token);
+		if ((ContentHeader = curl_slist_append(ContentHeader, auth_header)) == NULL)
+		{
+			elog(ERROR, "Failed to initialize authorization header");
+		}
+	}
+}
diff --git a/pgxn/neon/control_plane_connector.h b/pgxn/neon/control_plane_connector.h
new file mode 100644
index 0000000000..12d6a97562
--- /dev/null
+++ b/pgxn/neon/control_plane_connector.h
@@ -0,0 +1,6 @@
+#ifndef CONTROL_PLANE_CONNECTOR_H
+#define CONTROL_PLANE_CONNECTOR_H
+
+void		InitControlPlaneConnector();
+
+#endif
diff --git a/pgxn/neon/neon.c b/pgxn/neon/neon.c
index 217c1974a0..b45d7cfc32 100644
--- a/pgxn/neon/neon.c
+++ b/pgxn/neon/neon.c
@@ -25,6 +25,7 @@
 #include "neon.h"
 #include "walproposer.h"
 #include "pagestore_client.h"
+#include "control_plane_connector.h"
 
 PG_MODULE_MAGIC;
 void		_PG_init(void);
@@ -34,7 +35,11 @@ _PG_init(void)
 {
 	pg_init_libpagestore();
 	pg_init_walproposer();
+	InitControlPlaneConnector();
 
+        // Important: This must happen after other parts of the extension
+        // are loaded, otherwise any settings to GUCs that were set before
+        // the extension was loaded will be removed.
 	EmitWarningsOnPlaceholders("neon");
 }
 
diff --git a/pgxn/neon/walproposer.c b/pgxn/neon/walproposer.c
index a99be40955..64d980d2e4 100644
--- a/pgxn/neon/walproposer.c
+++ b/pgxn/neon/walproposer.c
@@ -254,20 +254,20 @@ nwp_register_gucs(void)
 
 	DefineCustomIntVariable(
 							"neon.safekeeper_reconnect_timeout",
-							"Timeout for reconnecting to offline wal acceptor.",
+							"Walproposer reconnects to offline safekeepers once in this interval.",
 							NULL,
 							&wal_acceptor_reconnect_timeout,
-							1000, 0, INT_MAX,	/* default, min, max */
+							5000, 0, INT_MAX,	/* default, min, max */
 							PGC_SIGHUP, /* context */
 							GUC_UNIT_MS,	/* flags */
 							NULL, NULL, NULL);
 
 	DefineCustomIntVariable(
 							"neon.safekeeper_connect_timeout",
-							"Timeout for connection establishement and it's maintenance against safekeeper",
+							"Connection or connection attempt to safekeeper is terminated if no message is received (or connection attempt doesn't finish) within this period.",
 							NULL,
 							&wal_acceptor_connection_timeout,
-							5000, 0, INT_MAX,
+							10000, 0, INT_MAX,
 							PGC_SIGHUP,
 							GUC_UNIT_MS,
 							NULL, NULL, NULL);
@@ -441,7 +441,7 @@ WalProposerPoll(void)
 				if (TimestampDifferenceExceeds(sk->latestMsgReceivedAt, now,
 											   wal_acceptor_connection_timeout))
 				{
-					elog(WARNING, "failed to connect to node '%s:%s' in '%s' state: exceeded connection timeout %dms",
+					elog(WARNING, "terminating connection to safekeeper '%s:%s' in '%s' state: no messages received during the last %dms or connection attempt took longer than that",
 						 sk->host, sk->port, FormatSafekeeperState(sk->state), wal_acceptor_connection_timeout);
 					ShutdownConnection(sk);
 				}
@@ -1035,9 +1035,16 @@ RecvAcceptorGreeting(Safekeeper *sk)
 	if (!AsyncReadMessage(sk, (AcceptorProposerMessage *) & sk->greetResponse))
 		return;
 
+	elog(LOG, "received AcceptorGreeting from safekeeper %s:%s", sk->host, sk->port);
+
 	/* Protocol is all good, move to voting. */
 	sk->state = SS_VOTING;
 
+	/* 
+	 * Note: it would be better to track the counter on per safekeeper basis,
+	 * but at worst walproposer would restart with 'term rejected', so leave as
+	 * is for now.
+	 */
 	++n_connected;
 	if (n_connected <= quorum)
 	{
diff --git a/proxy/src/bin/pg_sni_router.rs b/proxy/src/bin/pg_sni_router.rs
index bba2d51caf..a5f50cc7c1 100644
--- a/proxy/src/bin/pg_sni_router.rs
+++ b/proxy/src/bin/pg_sni_router.rs
@@ -17,7 +17,7 @@ use tokio::io::{AsyncRead, AsyncWrite};
 use tokio_util::sync::CancellationToken;
 use utils::{project_git_version, sentry_init::init_sentry};
 
-use tracing::{error, info, warn};
+use tracing::{error, info, warn, Instrument};
 
 project_git_version!(GIT_VERSION);
 
@@ -141,7 +141,6 @@ async fn task_main(
         tokio::select! {
             accept_result = listener.accept() => {
                 let (socket, peer_addr) = accept_result?;
-                info!("accepted postgres client connection from {peer_addr}");
 
                 let session_id = uuid::Uuid::new_v4();
                 let tls_config = Arc::clone(&tls_config);
@@ -149,18 +148,18 @@ async fn task_main(
 
                 connections.spawn(
                     async move {
-                        info!("spawned a task for {peer_addr}");
-
                         socket
                             .set_nodelay(true)
                             .context("failed to set socket option")?;
 
-                        handle_client(dest_suffix, tls_config, session_id, socket).await
+                        info!(%peer_addr, "serving");
+                        handle_client(dest_suffix, tls_config, socket).await
                     }
                     .unwrap_or_else(|e| {
                         // Acknowledge that the task has finished with an error.
                         error!("per-client task finished with an error: {e:#}");
-                    }),
+                    })
+                    .instrument(tracing::info_span!("handle_client", ?session_id))
                 );
             }
             _ = cancellation_token.cancelled() => {
@@ -192,7 +191,6 @@ async fn ssl_handshake<S: AsyncRead + AsyncWrite + Unpin>(
     let mut stream = PqStream::new(Stream::from_raw(raw_stream));
 
     let msg = stream.read_startup_packet().await?;
-    info!("received {msg:?}");
     use pq_proto::FeStartupPacket::*;
 
     match msg {
@@ -215,15 +213,19 @@ async fn ssl_handshake<S: AsyncRead + AsyncWrite + Unpin>(
             }
             Ok(raw.upgrade(tls_config).await?)
         }
-        _ => stream.throw_error_str(ERR_INSECURE_CONNECTION).await?,
+        unexpected => {
+            info!(
+                ?unexpected,
+                "unexpected startup packet, rejecting connection"
+            );
+            stream.throw_error_str(ERR_INSECURE_CONNECTION).await?
+        }
     }
 }
 
-#[tracing::instrument(fields(session_id = ?session_id), skip_all)]
 async fn handle_client(
     dest_suffix: Arc<String>,
     tls_config: Arc<rustls::ServerConfig>,
-    session_id: uuid::Uuid,
     stream: impl AsyncRead + AsyncWrite + Unpin,
 ) -> anyhow::Result<()> {
     let tls_stream = ssl_handshake(stream, tls_config).await?;
diff --git a/proxy/src/console.rs b/proxy/src/console.rs
index 1f3ef99555..0e5eaaf845 100644
--- a/proxy/src/console.rs
+++ b/proxy/src/console.rs
@@ -1,5 +1,5 @@
-///! Various stuff for dealing with the Neon Console.
-///! Later we might move some API wrappers here.
+//! Various stuff for dealing with the Neon Console.
+//! Later we might move some API wrappers here.
 
 /// Payloads used in the console's APIs.
 pub mod messages;
diff --git a/proxy/src/proxy/tests.rs b/proxy/src/proxy/tests.rs
index 60acb588dc..3373c49676 100644
--- a/proxy/src/proxy/tests.rs
+++ b/proxy/src/proxy/tests.rs
@@ -1,4 +1,4 @@
-///! A group of high-level tests for connection establishing logic and auth.
+//! A group of high-level tests for connection establishing logic and auth.
 use super::*;
 use crate::{auth, sasl, scram};
 use async_trait::async_trait;
diff --git a/safekeeper/src/broker.rs b/safekeeper/src/broker.rs
index 5e25d22ec1..48c56ee58f 100644
--- a/safekeeper/src/broker.rs
+++ b/safekeeper/src/broker.rs
@@ -19,8 +19,10 @@ use tokio::task::JoinHandle;
 use tokio::{runtime, time::sleep};
 use tracing::*;
 
+use crate::metrics::BROKER_ITERATION_TIMELINES;
 use crate::metrics::BROKER_PULLED_UPDATES;
 use crate::metrics::BROKER_PUSHED_UPDATES;
+use crate::metrics::BROKER_PUSH_ALL_UPDATES_SECONDS;
 use crate::GlobalTimelines;
 use crate::SafeKeeperConf;
 
@@ -61,8 +63,14 @@ async fn push_loop(conf: SafeKeeperConf) -> anyhow::Result<()> {
                 BROKER_PUSHED_UPDATES.inc();
             }
             let elapsed = now.elapsed();
-            // Log duration every second. Should be about 10MB of logs per day.
-            info!("pushed {} timeline updates to broker in {:?}", active_tlis.len(), elapsed);
+
+            BROKER_PUSH_ALL_UPDATES_SECONDS.observe(elapsed.as_secs_f64());
+            BROKER_ITERATION_TIMELINES.observe(active_tlis.len() as f64);
+
+            if elapsed > push_interval / 2 {
+                info!("broker push is too long, pushed {} timeline updates to broker in {:?}", active_tlis.len(), elapsed);
+            }
+
             sleep(push_interval).await;
         }
     };
diff --git a/safekeeper/src/metrics.rs b/safekeeper/src/metrics.rs
index 189af2b044..235a88501d 100644
--- a/safekeeper/src/metrics.rs
+++ b/safekeeper/src/metrics.rs
@@ -125,6 +125,25 @@ pub static BACKUP_ERRORS: Lazy<IntCounter> = Lazy::new(|| {
     )
     .expect("Failed to register safekeeper_backup_errors_total counter")
 });
+pub static BROKER_PUSH_ALL_UPDATES_SECONDS: Lazy<Histogram> = Lazy::new(|| {
+    register_histogram!(
+        "safekeeper_broker_push_update_seconds",
+        "Seconds to push all timeline updates to the broker",
+        DISK_WRITE_SECONDS_BUCKETS.to_vec()
+    )
+    .expect("Failed to register safekeeper_broker_push_update_seconds histogram vec")
+});
+pub const TIMELINES_COUNT_BUCKETS: &[f64] = &[
+    1.0, 10.0, 50.0, 100.0, 200.0, 500.0, 1000.0, 2000.0, 5000.0, 10000.0, 20000.0, 50000.0,
+];
+pub static BROKER_ITERATION_TIMELINES: Lazy<Histogram> = Lazy::new(|| {
+    register_histogram!(
+        "safekeeper_broker_iteration_timelines",
+        "Count of timelines pushed to the broker in a single iteration",
+        TIMELINES_COUNT_BUCKETS.to_vec()
+    )
+    .expect("Failed to register safekeeper_broker_iteration_timelines histogram vec")
+});
 
 pub const LABEL_UNKNOWN: &str = "unknown";
 
diff --git a/safekeeper/src/safekeeper.rs b/safekeeper/src/safekeeper.rs
index 33da0c8e5a..eb434136d4 100644
--- a/safekeeper/src/safekeeper.rs
+++ b/safekeeper/src/safekeeper.rs
@@ -634,7 +634,8 @@ where
         }
 
         // system_id will be updated on mismatch
-        if self.state.server.system_id != msg.system_id {
+        // sync-safekeepers doesn't know sysid and sends 0, ignore it
+        if self.state.server.system_id != msg.system_id && msg.system_id != 0 {
             if self.state.server.system_id != 0 {
                 warn!(
                     "unexpected system ID arrived, got {}, expected {}",
diff --git a/safekeeper/src/wal_storage.rs b/safekeeper/src/wal_storage.rs
index 1b82bd754e..644c956fc1 100644
--- a/safekeeper/src/wal_storage.rs
+++ b/safekeeper/src/wal_storage.rs
@@ -379,6 +379,12 @@ impl Storage for PhysicalStorage {
             );
         }
 
+        // Quick exit if nothing to do to avoid writing up to 16 MiB of zeros on
+        // disk (this happens on each connect).
+        if end_pos == self.write_lsn {
+            return Ok(());
+        }
+
         // Close previously opened file, if any
         if let Some(mut unflushed_file) = self.file.take() {
             self.fdatasync_file(&mut unflushed_file)?;
diff --git a/scripts/pr-comment-test-report.js b/scripts/comment-test-report.js
similarity index 85%
rename from scripts/pr-comment-test-report.js
rename to scripts/comment-test-report.js
index 3a7bba0daa..a7fd5b0bef 100644
--- a/scripts/pr-comment-test-report.js
+++ b/scripts/comment-test-report.js
@@ -1,5 +1,5 @@
 //
-// The script parses Allure reports and posts a comment with a summary of the test results to the PR.
+// The script parses Allure reports and posts a comment with a summary of the test results to the PR or to the latest commit in the branch.
 //
 // The comment is updated on each run with the latest results.
 //
@@ -7,7 +7,7 @@
 // - uses: actions/github-script@v6
 //   with:
 //     script: |
-//       const script = require("./scripts/pr-comment-test-report.js")
+//       const script = require("./scripts/comment-test-report.js")
 //       await script({
 //         github,
 //         context,
@@ -35,8 +35,12 @@ class DefaultMap extends Map {
 module.exports = async ({ github, context, fetch, report }) => {
     // Marker to find the comment in the subsequent runs
     const startMarker = `<!--AUTOMATIC COMMENT START #${context.payload.number}-->`
+    // If we run the script in the PR or in the branch (main/release/...)
+    const isPullRequest = !!context.payload.pull_request
+    // Latest commit in PR or in the branch
+    const commitSha = isPullRequest ? context.payload.pull_request.head.sha : context.sha
     // Let users know that the comment is updated automatically
-    const autoupdateNotice = `<div align="right"><sub>The comment gets automatically updated with the latest test results<br>${context.payload.pull_request.head.sha} at ${new Date().toISOString()} :recycle:</sub></div>`
+    const autoupdateNotice = `<div align="right"><sub>The comment gets automatically updated with the latest test results<br>${commitSha} at ${new Date().toISOString()} :recycle:</sub></div>`
     // GitHub bot id taken from (https://api.github.com/users/github-actions[bot])
     const githubActionsBotId = 41898282
     // Commend body itself
@@ -166,22 +170,39 @@ module.exports = async ({ github, context, fetch, report }) => {
 
     commentBody += autoupdateNotice
 
-    const { data: comments } = await github.rest.issues.listComments({
-        issue_number: context.payload.number,
+    let createCommentFn, listCommentsFn, updateCommentFn, issueNumberOrSha
+    if (isPullRequest) {
+        createCommentFn  = github.rest.issues.createComment
+        listCommentsFn   = github.rest.issues.listComments
+        updateCommentFn  = github.rest.issues.updateComment
+        issueNumberOrSha = {
+            issue_number: context.payload.number,
+        }
+    } else {
+        updateCommentFn  = github.rest.repos.updateCommitComment
+        listCommentsFn   = github.rest.repos.listCommentsForCommit
+        createCommentFn  = github.rest.repos.createCommitComment
+        issueNumberOrSha = {
+            commit_sha: commitSha,
+        }
+    }
+
+    const { data: comments } = await listCommentsFn({
+        ...issueNumberOrSha,
         ...ownerRepoParams,
     })
 
     const comment = comments.find(comment => comment.user.id === githubActionsBotId && comment.body.startsWith(startMarker))
     if (comment) {
-        await github.rest.issues.updateComment({
+        await updateCommentFn({
             comment_id: comment.id,
             body: commentBody,
             ...ownerRepoParams,
         })
     } else {
-        await github.rest.issues.createComment({
-            issue_number: context.payload.number,
+        await createCommentFn({
             body: commentBody,
+            ...issueNumberOrSha,
             ...ownerRepoParams,
         })
     }
diff --git a/scripts/coverage b/scripts/coverage
index 1dc92e57cc..52a69c93b9 100755
--- a/scripts/coverage
+++ b/scripts/coverage
@@ -156,7 +156,9 @@ class LLVM:
              profdata: Path,
              objects: List[str],
              sources: List[str],
-             demangler: Optional[Path] = None) -> None:
+             demangler: Optional[Path] = None,
+             output_file: Optional[Path] = None,
+             ) -> None:
 
         cwd = self.cargo.cwd
         objects = list(intersperse('-object', objects))
@@ -180,14 +182,18 @@ class LLVM:
             *objects,
             *sources,
         ]
-        subprocess.check_call(cmd, cwd=cwd)
+        if output_file is not None:
+            with output_file.open('w') as outfile:
+                subprocess.check_call(cmd, cwd=cwd, stdout=outfile)
+        else:
+            subprocess.check_call(cmd, cwd=cwd)
 
     def cov_report(self, **kwargs) -> None:
         self._cov(subcommand='report', **kwargs)
 
-    def cov_export(self, *, kind: str, **kwargs) -> None:
+    def cov_export(self, *, kind: str, output_file: Optional[Path], **kwargs) -> None:
         extras = (f'-format={kind}', )
-        self._cov(subcommand='export', *extras, **kwargs)
+        self._cov(subcommand='export', *extras, output_file=output_file, **kwargs)
 
     def cov_show(self, *, kind: str, output_dir: Optional[Path] = None, **kwargs) -> None:
         extras = [f'-format={kind}']
@@ -283,9 +289,12 @@ class TextReport(Report):
         self.llvm.cov_show(kind='text', **self._common_kwargs())
 
 
+@dataclass
 class LcovReport(Report):
+    output_file: Path
+
     def generate(self) -> None:
-        self.llvm.cov_export(kind='lcov', **self._common_kwargs())
+        self.llvm.cov_export(kind='lcov',  output_file=self.output_file, **self._common_kwargs())
 
 
 @dataclass
@@ -475,7 +484,7 @@ class State:
             'text':
             lambda: TextReport(**params),
             'lcov':
-            lambda: LcovReport(**params),
+            lambda: LcovReport(**params, output_file=self.report_dir / 'lcov.info'),
             'summary':
             lambda: SummaryReport(**params),
             'github':
diff --git a/scripts/export_import_between_pageservers.py b/scripts/export_import_between_pageservers.py
index 4b599ce9b6..d95878b341 100755
--- a/scripts/export_import_between_pageservers.py
+++ b/scripts/export_import_between_pageservers.py
@@ -535,8 +535,8 @@ def export_timeline(
 
 
 def main(args: argparse.Namespace):
-    # any psql version will do here. use current DEFAULT_PG_VERSION = 14
-    psql_path = str(Path(args.pg_distrib_dir) / "v14" / "bin" / "psql")
+    # any psql version will do here. use current DEFAULT_PG_VERSION = 15
+    psql_path = str(Path(args.pg_distrib_dir) / "v15" / "bin" / "psql")
 
     old_pageserver_host = args.old_pageserver_host
     new_pageserver_host = args.new_pageserver_host
diff --git a/storage_broker/src/lib.rs b/storage_broker/src/lib.rs
index 8441aaf625..4bc561449d 100644
--- a/storage_broker/src/lib.rs
+++ b/storage_broker/src/lib.rs
@@ -40,6 +40,9 @@ pub type BrokerClientChannel = BrokerServiceClient<Channel>;
 // Create connection object configured to run TLS if schema starts with https://
 // and plain text otherwise. Connection is lazy, only endpoint sanity is
 // validated here.
+//
+// NB: this function is not async, but still must be run on a tokio runtime thread
+// because that's a requirement of tonic_endpoint.connect_lazy()'s Channel::new call.
 pub fn connect<U>(endpoint: U, keepalive_interval: Duration) -> anyhow::Result<BrokerClientChannel>
 where
     U: std::convert::TryInto<Uri>,
diff --git a/test_runner/fixtures/metrics.py b/test_runner/fixtures/metrics.py
index 0e958ddd06..b4c237cfa6 100644
--- a/test_runner/fixtures/metrics.py
+++ b/test_runner/fixtures/metrics.py
@@ -65,12 +65,19 @@ PAGESERVER_PER_TENANT_METRICS: Tuple[str, ...] = (
     "pageserver_getpage_reconstruct_seconds_bucket",
     "pageserver_getpage_reconstruct_seconds_count",
     "pageserver_getpage_reconstruct_seconds_sum",
+    "pageserver_getpage_get_reconstruct_data_seconds_bucket",
+    "pageserver_getpage_get_reconstruct_data_seconds_count",
+    "pageserver_getpage_get_reconstruct_data_seconds_sum",
     "pageserver_io_operations_bytes_total",
     "pageserver_io_operations_seconds_bucket",
     "pageserver_io_operations_seconds_count",
     "pageserver_io_operations_seconds_sum",
     "pageserver_last_record_lsn",
     "pageserver_materialized_cache_hits_total",
+    "pageserver_materialized_cache_hits_direct_total",
+    "pageserver_read_num_fs_layers_bucket",
+    "pageserver_read_num_fs_layers_count",
+    "pageserver_read_num_fs_layers_sum",
     "pageserver_smgr_query_seconds_bucket",
     "pageserver_smgr_query_seconds_count",
     "pageserver_smgr_query_seconds_sum",
diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py
index 59afc104e6..551faa116e 100644
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -629,7 +629,7 @@ class NeonEnvBuilder:
         assert self.env is not None, "environment is not already initialized, call init() first"
         self.env.start()
 
-    def init_start(self) -> NeonEnv:
+    def init_start(self, initial_tenant_conf: Optional[Dict[str, str]] = None) -> NeonEnv:
         env = self.init_configs()
         self.start()
 
@@ -638,7 +638,9 @@ class NeonEnvBuilder:
         log.info(
             f"Services started, creating initial tenant {env.initial_tenant} and its initial timeline"
         )
-        initial_tenant, initial_timeline = env.neon_cli.create_tenant(tenant_id=env.initial_tenant)
+        initial_tenant, initial_timeline = env.neon_cli.create_tenant(
+            tenant_id=env.initial_tenant, conf=initial_tenant_conf
+        )
         env.initial_timeline = initial_timeline
         log.info(f"Initial timeline {initial_tenant}/{initial_timeline} created successfully")
 
@@ -1444,11 +1446,12 @@ class NeonCli(AbstractNeonCli):
     def endpoint_create(
         self,
         branch_name: str,
+        pg_port: int,
+        http_port: int,
         endpoint_id: Optional[str] = None,
         tenant_id: Optional[TenantId] = None,
         hot_standby: bool = False,
         lsn: Optional[Lsn] = None,
-        port: Optional[int] = None,
     ) -> "subprocess.CompletedProcess[str]":
         args = [
             "endpoint",
@@ -1462,8 +1465,10 @@ class NeonCli(AbstractNeonCli):
         ]
         if lsn is not None:
             args.extend(["--lsn", str(lsn)])
-        if port is not None:
-            args.extend(["--port", str(port)])
+        if pg_port is not None:
+            args.extend(["--pg-port", str(pg_port)])
+        if http_port is not None:
+            args.extend(["--http-port", str(http_port)])
         if endpoint_id is not None:
             args.append(endpoint_id)
         if hot_standby:
@@ -1476,9 +1481,11 @@ class NeonCli(AbstractNeonCli):
     def endpoint_start(
         self,
         endpoint_id: str,
+        pg_port: int,
+        http_port: int,
+        safekeepers: Optional[List[int]] = None,
         tenant_id: Optional[TenantId] = None,
         lsn: Optional[Lsn] = None,
-        port: Optional[int] = None,
     ) -> "subprocess.CompletedProcess[str]":
         args = [
             "endpoint",
@@ -1490,8 +1497,10 @@ class NeonCli(AbstractNeonCli):
         ]
         if lsn is not None:
             args.append(f"--lsn={lsn}")
-        if port is not None:
-            args.append(f"--port={port}")
+        args.extend(["--pg-port", str(pg_port)])
+        args.extend(["--http-port", str(http_port)])
+        if safekeepers is not None:
+            args.extend(["--safekeepers", (",".join(map(str, safekeepers)))])
         if endpoint_id is not None:
             args.append(endpoint_id)
 
@@ -1583,13 +1592,11 @@ class NeonPageserver(PgProtocol):
             ".*serving compute connection task.*exited with error: Postgres connection error.*",
             ".*serving compute connection task.*exited with error: Connection reset by peer.*",
             ".*serving compute connection task.*exited with error: Postgres query error.*",
-            ".*Connection aborted: connection error: error communicating with the server: Broken pipe.*",
-            ".*Connection aborted: connection error: error communicating with the server: Transport endpoint is not connected.*",
-            ".*Connection aborted: connection error: error communicating with the server: Connection reset by peer.*",
+            ".*Connection aborted: error communicating with the server: Transport endpoint is not connected.*",
             # FIXME: replication patch for tokio_postgres regards  any but CopyDone/CopyData message in CopyBoth stream as unexpected
-            ".*Connection aborted: connection error: unexpected message from server*",
+            ".*Connection aborted: unexpected message from server*",
             ".*kill_and_wait_impl.*: wait successful.*",
-            ".*Replication stream finished: db error:.*ending streaming to Some*",
+            ".*: db error:.*ending streaming to Some.*",
             ".*query handler for 'pagestream.*failed: Broken pipe.*",  # pageserver notices compute shut down
             ".*query handler for 'pagestream.*failed: Connection reset by peer.*",  # pageserver notices compute shut down
             # safekeeper connection can fail with this, in the window between timeline creation
@@ -1603,24 +1610,25 @@ class NeonPageserver(PgProtocol):
             # https://github.com/neondatabase/neon/issues/2442
             ".*could not remove ephemeral file.*No such file or directory.*",
             # FIXME: These need investigation
-            ".*gc_loop.*Failed to get a tenant .* Tenant .* not found.*",
-            ".*compaction_loop.*Failed to get a tenant .* Tenant .* not found.*",
             ".*manual_gc.*is_shutdown_requested\\(\\) called in an unexpected task or thread.*",
             ".*tenant_list: timeline is not found in remote index while it is present in the tenants registry.*",
             ".*Removing intermediate uninit mark file.*",
-            # FIXME: known race condition in TaskHandle: https://github.com/neondatabase/neon/issues/2885
-            ".*sender is dropped while join handle is still alive.*",
             # Tenant::delete_timeline() can cause any of the four following errors.
             # FIXME: we shouldn't be considering it an error: https://github.com/neondatabase/neon/issues/2946
             ".*could not flush frozen layer.*queue is in state Stopped",  # when schedule layer upload fails because queued got closed before compaction got killed
             ".*wait for layer upload ops to complete.*",  # .*Caused by:.*wait_completion aborted because upload queue was stopped
             ".*gc_loop.*Gc failed, retrying in.*timeline is Stopping",  # When gc checks timeline state after acquiring layer_removal_cs
+            ".*gc_loop.*Gc failed, retrying in.*: Cannot run GC iteration on inactive tenant",  # Tenant::gc precondition
             ".*compaction_loop.*Compaction failed, retrying in.*timeline is Stopping",  # When compaction checks timeline state after acquiring layer_removal_cs
             ".*query handler for 'pagestream.*failed: Timeline .* was not found",  # postgres reconnects while timeline_delete doesn't hold the tenant's timelines.lock()
             ".*query handler for 'pagestream.*failed: Timeline .* is not active",  # timeline delete in progress
             ".*task iteration took longer than the configured period.*",
             # this is until #3501
             ".*Compaction failed, retrying in [^:]+: Cannot run compaction iteration on inactive tenant",
+            # these can happen anytime we do compactions from background task and shutdown pageserver
+            r".*ERROR.*ancestor timeline \S+ is being stopped",
+            # this is expected given our collaborative shutdown approach for the UploadQueue
+            ".*Compaction failed, retrying in .*: queue is in state Stopped.*",
         ]
 
     def start(
@@ -1688,6 +1696,9 @@ class NeonPageserver(PgProtocol):
                 else:
                     errors.append(line)
 
+        for error in errors:
+            log.info(f"not allowed error: {error.strip()}")
+
         assert not errors
 
     def log_contains(self, pattern: str) -> Optional[str]:
@@ -2280,17 +2291,24 @@ class Endpoint(PgProtocol):
     """An object representing a Postgres compute endpoint managed by the control plane."""
 
     def __init__(
-        self, env: NeonEnv, tenant_id: TenantId, port: int, check_stop_result: bool = True
+        self,
+        env: NeonEnv,
+        tenant_id: TenantId,
+        pg_port: int,
+        http_port: int,
+        check_stop_result: bool = True,
     ):
-        super().__init__(host="localhost", port=port, user="cloud_admin", dbname="postgres")
+        super().__init__(host="localhost", port=pg_port, user="cloud_admin", dbname="postgres")
         self.env = env
         self.running = False
         self.branch_name: Optional[str] = None  # dubious
         self.endpoint_id: Optional[str] = None  # dubious, see asserts below
         self.pgdata_dir: Optional[str] = None  # Path to computenode PGDATA
         self.tenant_id = tenant_id
-        self.port = port
+        self.pg_port = pg_port
+        self.http_port = http_port
         self.check_stop_result = check_stop_result
+        self.active_safekeepers: List[int] = list(map(lambda sk: sk.id, env.safekeepers))
         # path to conf is <repo_dir>/endpoints/<endpoint_id>/pgdata/postgresql.conf
 
     def create(
@@ -2320,7 +2338,8 @@ class Endpoint(PgProtocol):
             tenant_id=self.tenant_id,
             lsn=lsn,
             hot_standby=hot_standby,
-            port=self.port,
+            pg_port=self.pg_port,
+            http_port=self.http_port,
         )
         path = Path("endpoints") / self.endpoint_id / "pgdata"
         self.pgdata_dir = os.path.join(self.env.repo_dir, path)
@@ -2345,7 +2364,13 @@ class Endpoint(PgProtocol):
 
         log.info(f"Starting postgres endpoint {self.endpoint_id}")
 
-        self.env.neon_cli.endpoint_start(self.endpoint_id, tenant_id=self.tenant_id, port=self.port)
+        self.env.neon_cli.endpoint_start(
+            self.endpoint_id,
+            pg_port=self.pg_port,
+            http_port=self.http_port,
+            tenant_id=self.tenant_id,
+            safekeepers=self.active_safekeepers,
+        )
         self.running = True
 
         return self
@@ -2369,32 +2394,8 @@ class Endpoint(PgProtocol):
         return os.path.join(self.pg_data_dir_path(), "pg_twophase")
 
     def config_file_path(self) -> str:
-        """Path to postgresql.conf"""
-        return os.path.join(self.pg_data_dir_path(), "postgresql.conf")
-
-    def adjust_for_safekeepers(self, safekeepers: str) -> "Endpoint":
-        """
-        Adjust instance config for working with wal acceptors instead of
-        pageserver (pre-configured by CLI) directly.
-        """
-
-        # TODO: reuse config()
-        with open(self.config_file_path(), "r") as f:
-            cfg_lines = f.readlines()
-        with open(self.config_file_path(), "w") as f:
-            for cfg_line in cfg_lines:
-                # walproposer uses different application_name
-                if (
-                    "synchronous_standby_names" in cfg_line
-                    or
-                    # don't repeat safekeepers/wal_acceptors multiple times
-                    "neon.safekeepers" in cfg_line
-                ):
-                    continue
-                f.write(cfg_line)
-            f.write("synchronous_standby_names = 'walproposer'\n")
-            f.write("neon.safekeepers = '{}'\n".format(safekeepers))
-        return self
+        """Path to the postgresql.conf in the endpoint directory (not the one in pgdata)"""
+        return os.path.join(self.endpoint_path(), "postgresql.conf")
 
     def config(self, lines: List[str]) -> "Endpoint":
         """
@@ -2499,7 +2500,8 @@ class EndpointFactory:
         ep = Endpoint(
             self.env,
             tenant_id=tenant_id or self.env.initial_tenant,
-            port=self.env.port_distributor.get_port(),
+            pg_port=self.env.port_distributor.get_port(),
+            http_port=self.env.port_distributor.get_port(),
         )
         self.num_instances += 1
         self.endpoints.append(ep)
@@ -2524,7 +2526,8 @@ class EndpointFactory:
         ep = Endpoint(
             self.env,
             tenant_id=tenant_id or self.env.initial_tenant,
-            port=self.env.port_distributor.get_port(),
+            pg_port=self.env.port_distributor.get_port(),
+            http_port=self.env.port_distributor.get_port(),
         )
 
         if endpoint_id is None:
@@ -2907,6 +2910,7 @@ SKIP_FILES = frozenset(
         "pg_internal.init",
         "pg.log",
         "zenith.signal",
+        "pg_hba.conf",
         "postgresql.conf",
         "postmaster.opts",
         "postmaster.pid",
diff --git a/test_runner/fixtures/pageserver/http.py b/test_runner/fixtures/pageserver/http.py
index 1272047881..f258a3a24d 100644
--- a/test_runner/fixtures/pageserver/http.py
+++ b/test_runner/fixtures/pageserver/http.py
@@ -155,14 +155,14 @@ class PageserverHttpClient(requests.Session):
         return res_json
 
     def tenant_create(
-        self, new_tenant_id: Optional[TenantId] = None, conf: Optional[Dict[str, Any]] = None
+        self, new_tenant_id: TenantId, conf: Optional[Dict[str, Any]] = None
     ) -> TenantId:
         if conf is not None:
             assert "new_tenant_id" not in conf.keys()
         res = self.post(
             f"http://localhost:{self.port}/v1/tenant",
             json={
-                "new_tenant_id": str(new_tenant_id) if new_tenant_id else None,
+                "new_tenant_id": str(new_tenant_id),
                 **(conf or {}),
             },
         )
@@ -293,13 +293,13 @@ class PageserverHttpClient(requests.Session):
         self,
         pg_version: PgVersion,
         tenant_id: TenantId,
-        new_timeline_id: Optional[TimelineId] = None,
+        new_timeline_id: TimelineId,
         ancestor_timeline_id: Optional[TimelineId] = None,
         ancestor_start_lsn: Optional[Lsn] = None,
         **kwargs,
     ) -> Dict[Any, Any]:
         body: Dict[str, Any] = {
-            "new_timeline_id": str(new_timeline_id) if new_timeline_id else None,
+            "new_timeline_id": str(new_timeline_id),
             "ancestor_start_lsn": str(ancestor_start_lsn) if ancestor_start_lsn else None,
             "ancestor_timeline_id": str(ancestor_timeline_id) if ancestor_timeline_id else None,
         }
diff --git a/test_runner/fixtures/pg_version.py b/test_runner/fixtures/pg_version.py
index d67f088365..14ae88cc2c 100644
--- a/test_runner/fixtures/pg_version.py
+++ b/test_runner/fixtures/pg_version.py
@@ -27,6 +27,10 @@ class PgVersion(str, enum.Enum):
     def __repr__(self) -> str:
         return f"'{self.value}'"
 
+    # Make this explicit for Python 3.11 compatibility, which changes the behavior of enums
+    def __str__(self) -> str:
+        return self.value
+
     # In GitHub workflows we use Postgres version with v-prefix (e.g. v14 instead of just 14),
     # sometime we need to do so in tests.
     @property
@@ -78,11 +82,11 @@ def pytest_addoption(parser: Parser):
 @pytest.fixture(scope="session")
 def pg_version(request: FixtureRequest) -> Iterator[PgVersion]:
     if v := request.config.getoption("--pg-version"):
-        version, source = v, "from --pg-version commad-line argument"
+        version, source = v, "from --pg-version command-line argument"
     elif v := os.environ.get("DEFAULT_PG_VERSION"):
         version, source = PgVersion(v), "from DEFAULT_PG_VERSION environment variable"
     else:
-        version, source = DEFAULT_VERSION, "default verson"
+        version, source = DEFAULT_VERSION, "default version"
 
     log.info(f"pg_version is {version} ({source})")
     yield version
diff --git a/test_runner/regress/test_attach_tenant_config.py b/test_runner/regress/test_attach_tenant_config.py
index eb2ba3e9ed..4df5ae18d6 100644
--- a/test_runner/regress/test_attach_tenant_config.py
+++ b/test_runner/regress/test_attach_tenant_config.py
@@ -20,6 +20,11 @@ def positive_env(neon_env_builder: NeonEnvBuilder) -> NeonEnv:
         test_name="test_attach_tenant_config",
     )
     env = neon_env_builder.init_start()
+
+    # eviction might be the first one after an attach to access the layers
+    env.pageserver.allowed_errors.append(
+        ".*unexpectedly on-demand downloading remote layer remote.* for task kind Eviction"
+    )
     assert isinstance(env.remote_storage, LocalFsStorage)
     return env
 
@@ -158,6 +163,7 @@ def test_fully_custom_config(positive_env: NeonEnv):
             "threshold": "23h",
         },
         "evictions_low_residence_duration_metric_threshold": "2days",
+        "gc_feedback": True,
         "gc_horizon": 23 * (1024 * 1024),
         "gc_period": "2h 13m",
         "image_creation_threshold": 7,
diff --git a/test_runner/regress/test_auth.py b/test_runner/regress/test_auth.py
index 3e4a0bfbbb..fb79748832 100644
--- a/test_runner/regress/test_auth.py
+++ b/test_runner/regress/test_auth.py
@@ -3,7 +3,7 @@ from contextlib import closing
 import pytest
 from fixtures.neon_fixtures import NeonEnvBuilder, PgProtocol
 from fixtures.pageserver.http import PageserverApiException
-from fixtures.types import TenantId
+from fixtures.types import TenantId, TimelineId
 
 
 def test_pageserver_auth(neon_env_builder: NeonEnvBuilder):
@@ -25,21 +25,19 @@ def test_pageserver_auth(neon_env_builder: NeonEnvBuilder):
     ps.safe_psql("set FOO", password=tenant_token)
     ps.safe_psql("set FOO", password=pageserver_token)
 
-    new_timeline_id = env.neon_cli.create_branch(
-        "test_pageserver_auth", tenant_id=env.initial_tenant
-    )
-
     # tenant can create branches
     tenant_http_client.timeline_create(
         pg_version=env.pg_version,
         tenant_id=env.initial_tenant,
-        ancestor_timeline_id=new_timeline_id,
+        new_timeline_id=TimelineId.generate(),
+        ancestor_timeline_id=env.initial_timeline,
     )
     # console can create branches for tenant
     pageserver_http_client.timeline_create(
         pg_version=env.pg_version,
         tenant_id=env.initial_tenant,
-        ancestor_timeline_id=new_timeline_id,
+        new_timeline_id=TimelineId.generate(),
+        ancestor_timeline_id=env.initial_timeline,
     )
 
     # fail to create branch using token with different tenant_id
@@ -49,18 +47,19 @@ def test_pageserver_auth(neon_env_builder: NeonEnvBuilder):
         invalid_tenant_http_client.timeline_create(
             pg_version=env.pg_version,
             tenant_id=env.initial_tenant,
-            ancestor_timeline_id=new_timeline_id,
+            new_timeline_id=TimelineId.generate(),
+            ancestor_timeline_id=env.initial_timeline,
         )
 
     # create tenant using management token
-    pageserver_http_client.tenant_create()
+    pageserver_http_client.tenant_create(TenantId.generate())
 
     # fail to create tenant using tenant token
     with pytest.raises(
         PageserverApiException,
         match="Forbidden: Attempt to access management api with tenant scope. Permission denied",
     ):
-        tenant_http_client.tenant_create()
+        tenant_http_client.tenant_create(TenantId.generate())
 
 
 def test_compute_auth_to_pageserver(neon_env_builder: NeonEnvBuilder):
diff --git a/test_runner/regress/test_broken_timeline.py b/test_runner/regress/test_broken_timeline.py
index fb592bfbc3..0fb3b4f262 100644
--- a/test_runner/regress/test_broken_timeline.py
+++ b/test_runner/regress/test_broken_timeline.py
@@ -20,7 +20,7 @@ def test_broken_timeline(neon_env_builder: NeonEnvBuilder):
             ".*is not active. Current state: Broken.*",
             ".*will not become active. Current state: Broken.*",
             ".*failed to load metadata.*",
-            ".*could not load tenant.*load local timeline.*",
+            ".*load failed.*load local timeline.*",
         ]
     )
 
diff --git a/test_runner/regress/test_compatibility.py b/test_runner/regress/test_compatibility.py
index fe8dc293c1..2635dbd93c 100644
--- a/test_runner/regress/test_compatibility.py
+++ b/test_runner/regress/test_compatibility.py
@@ -383,6 +383,9 @@ def check_neon_works(
     cli_target = NeonCli(config_target)
 
     # And the current binaries to launch computes
+    snapshot_config["neon_distrib_dir"] = str(neon_current_binpath)
+    with (snapshot_config_toml).open("w") as f:
+        toml.dump(snapshot_config, f)
     config_current = copy.copy(config)
     config_current.neon_binpath = neon_current_binpath
     cli_current = NeonCli(config_current)
@@ -391,7 +394,8 @@ def check_neon_works(
     request.addfinalizer(lambda: cli_target.raw_cli(["stop"]))
 
     pg_port = port_distributor.get_port()
-    cli_current.endpoint_start("main", port=pg_port)
+    http_port = port_distributor.get_port()
+    cli_current.endpoint_start("main", pg_port=pg_port, http_port=http_port)
     request.addfinalizer(lambda: cli_current.endpoint_stop("main"))
 
     connstr = f"host=127.0.0.1 port={pg_port} user=cloud_admin dbname=postgres"
diff --git a/test_runner/regress/test_compute_ctl.py b/test_runner/regress/test_compute_ctl.py
deleted file mode 100644
index d72ffe078d..0000000000
--- a/test_runner/regress/test_compute_ctl.py
+++ /dev/null
@@ -1,253 +0,0 @@
-import os
-from pathlib import Path
-from subprocess import TimeoutExpired
-
-from fixtures.log_helper import log
-from fixtures.neon_fixtures import ComputeCtl, NeonEnvBuilder, PgBin
-
-
-# Test that compute_ctl works and prints "--sync-safekeepers" logs.
-def test_sync_safekeepers_logs(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
-    neon_env_builder.num_safekeepers = 3
-    env = neon_env_builder.init_start()
-    ctl = ComputeCtl(env)
-
-    env.neon_cli.create_branch("test_compute_ctl", "main")
-    endpoint = env.endpoints.create_start("test_compute_ctl")
-    endpoint.safe_psql("CREATE TABLE t(key int primary key, value text)")
-
-    with open(endpoint.config_file_path(), "r") as f:
-        cfg_lines = f.readlines()
-    cfg_map = {}
-    for line in cfg_lines:
-        if "=" in line:
-            k, v = line.split("=")
-            cfg_map[k] = v.strip("\n '\"")
-    log.info(f"postgres config: {cfg_map}")
-    pgdata = endpoint.pg_data_dir_path()
-    pg_bin_path = os.path.join(pg_bin.pg_bin_path, "postgres")
-
-    endpoint.stop_and_destroy()
-
-    # stop_and_destroy removes the whole endpoint directory. Recreate it.
-    Path(pgdata).mkdir(parents=True)
-
-    spec = (
-        """
-{
-    "format_version": 1.0,
-
-    "timestamp": "2021-05-23T18:25:43.511Z",
-    "operation_uuid": "0f657b36-4b0f-4a2d-9c2e-1dcd615e7d8b",
-
-    "cluster": {
-        "cluster_id": "test-cluster-42",
-        "name": "Neon Test",
-        "state": "restarted",
-        "roles": [
-        ],
-        "databases": [
-        ],
-        "settings": [
-            {
-                "name": "fsync",
-                "value": "off",
-                "vartype": "bool"
-            },
-            {
-                "name": "wal_level",
-                "value": "replica",
-                "vartype": "enum"
-            },
-            {
-                "name": "neon.safekeepers",
-                "value": """
-        + f'"{cfg_map["neon.safekeepers"]}"'
-        + """,
-                "vartype": "string"
-            },
-            {
-                "name": "wal_log_hints",
-                "value": "on",
-                "vartype": "bool"
-            },
-            {
-                "name": "log_connections",
-                "value": "on",
-                "vartype": "bool"
-            },
-            {
-                "name": "shared_buffers",
-                "value": "32768",
-                "vartype": "integer"
-            },
-            {
-                "name": "port",
-                "value": """
-        + f'"{cfg_map["port"]}"'
-        + """,
-                "vartype": "integer"
-            },
-            {
-                "name": "max_connections",
-                "value": "100",
-                "vartype": "integer"
-            },
-            {
-                "name": "max_wal_senders",
-                "value": "10",
-                "vartype": "integer"
-            },
-            {
-                "name": "listen_addresses",
-                "value": "0.0.0.0",
-                "vartype": "string"
-            },
-            {
-                "name": "wal_sender_timeout",
-                "value": "0",
-                "vartype": "integer"
-            },
-            {
-                "name": "password_encryption",
-                "value": "md5",
-                "vartype": "enum"
-            },
-            {
-                "name": "maintenance_work_mem",
-                "value": "65536",
-                "vartype": "integer"
-            },
-            {
-                "name": "max_parallel_workers",
-                "value": "8",
-                "vartype": "integer"
-            },
-            {
-                "name": "max_worker_processes",
-                "value": "8",
-                "vartype": "integer"
-            },
-            {
-                "name": "neon.tenant_id",
-                "value": """
-        + f'"{cfg_map["neon.tenant_id"]}"'
-        + """,
-                "vartype": "string"
-            },
-            {
-                "name": "max_replication_slots",
-                "value": "10",
-                "vartype": "integer"
-            },
-            {
-                "name": "neon.timeline_id",
-                "value": """
-        + f'"{cfg_map["neon.timeline_id"]}"'
-        + """,
-                "vartype": "string"
-            },
-            {
-                "name": "shared_preload_libraries",
-                "value": "neon",
-                "vartype": "string"
-            },
-            {
-                "name": "synchronous_standby_names",
-                "value": "walproposer",
-                "vartype": "string"
-            },
-            {
-                "name": "neon.pageserver_connstring",
-                "value": """
-        + f'"{cfg_map["neon.pageserver_connstring"]}"'
-        + """,
-                "vartype": "string"
-            }
-        ]
-    },
-    "delta_operations": [
-    ]
-}
-"""
-    )
-
-    ps_connstr = cfg_map["neon.pageserver_connstring"]
-    log.info(f"ps_connstr: {ps_connstr}, pgdata: {pgdata}")
-
-    # run compute_ctl and wait for 10s
-    try:
-        ctl.raw_cli(
-            [
-                "--connstr",
-                "postgres://invalid/",
-                "--pgdata",
-                pgdata,
-                "--spec",
-                spec,
-                "--pgbin",
-                pg_bin_path,
-            ],
-            timeout=10,
-        )
-    except TimeoutExpired as exc:
-        ctl_logs = (exc.stderr or b"").decode("utf-8")
-        log.info(f"compute_ctl stderr:\n{ctl_logs}")
-
-    with ExternalProcessManager(Path(pgdata) / "postmaster.pid"):
-        start = "starting safekeepers syncing"
-        end = "safekeepers synced at LSN"
-        start_pos = ctl_logs.index(start)
-        assert start_pos != -1
-        end_pos = ctl_logs.index(end, start_pos)
-        assert end_pos != -1
-        sync_safekeepers_logs = ctl_logs[start_pos : end_pos + len(end)]
-        log.info("sync_safekeepers_logs:\n" + sync_safekeepers_logs)
-
-        # assert that --sync-safekeepers logs are present in the output
-        assert "connecting with node" in sync_safekeepers_logs
-        assert "connected with node" in sync_safekeepers_logs
-        assert "proposer connected to quorum (2)" in sync_safekeepers_logs
-        assert "got votes from majority (2)" in sync_safekeepers_logs
-        assert "sending elected msg to node" in sync_safekeepers_logs
-
-
-class ExternalProcessManager:
-    """
-    Context manager that kills a process with a pid file on exit.
-    """
-
-    def __init__(self, pid_file: Path):
-        self.path = pid_file
-        self.pid_file = open(pid_file, "r")
-        self.pid = int(self.pid_file.readline().strip())
-
-    def __enter__(self):
-        return self
-
-    def leave_alive(self):
-        self.pid_file.close()
-
-    def __exit__(self, _type, _value, _traceback):
-        import signal
-        import time
-
-        if self.pid_file.closed:
-            return
-
-        with self.pid_file:
-            try:
-                os.kill(self.pid, signal.SIGTERM)
-            except OSError as e:
-                if not self.path.is_file():
-                    return
-                log.info(f"Failed to kill {self.pid}, but the pidfile remains: {e}")
-                return
-
-            for _ in range(20):
-                if not self.path.is_file():
-                    return
-                time.sleep(0.2)
-
-            log.info("Process failed to stop after SIGTERM: {self.pid}")
-            os.kill(self.pid, signal.SIGKILL)
diff --git a/test_runner/regress/test_ddl_forwarding.py b/test_runner/regress/test_ddl_forwarding.py
new file mode 100644
index 0000000000..6bfa8fdbe7
--- /dev/null
+++ b/test_runner/regress/test_ddl_forwarding.py
@@ -0,0 +1,210 @@
+from types import TracebackType
+from typing import Any, Dict, List, Optional, Tuple, Type
+
+import psycopg2
+import pytest
+from fixtures.log_helper import log
+from fixtures.neon_fixtures import VanillaPostgres
+from pytest_httpserver import HTTPServer
+from werkzeug.wrappers.request import Request
+from werkzeug.wrappers.response import Response
+
+
+def handle_db(dbs, roles, operation):
+    if operation["op"] == "set":
+        if "old_name" in operation and operation["old_name"] in dbs:
+            dbs[operation["name"]] = dbs[operation["old_name"]]
+            dbs.pop(operation["old_name"])
+        if "owner" in operation:
+            dbs[operation["name"]] = operation["owner"]
+    elif operation["op"] == "del":
+        dbs.pop(operation["name"])
+    else:
+        raise ValueError("Invalid op")
+
+
+def handle_role(dbs, roles, operation):
+    if operation["op"] == "set":
+        if "old_name" in operation and operation["old_name"] in roles:
+            roles[operation["name"]] = roles[operation["old_name"]]
+            roles.pop(operation["old_name"])
+            for db, owner in dbs.items():
+                if owner == operation["old_name"]:
+                    dbs[db] = operation["name"]
+        if "password" in operation:
+            roles[operation["name"]] = operation["password"]
+    elif operation["op"] == "del":
+        if "old_name" in operation:
+            roles.pop(operation["old_name"])
+        roles.pop(operation["name"])
+    else:
+        raise ValueError("Invalid op")
+
+
+fail = False
+
+
+def ddl_forward_handler(request: Request, dbs: Dict[str, str], roles: Dict[str, str]) -> Response:
+    log.info(f"Received request with data {request.get_data(as_text=True)}")
+    if fail:
+        log.info("FAILING")
+        return Response(status=500, response="Failed just cuz")
+    if request.json is None:
+        log.info("Received invalid JSON")
+        return Response(status=400)
+    json = request.json
+    # Handle roles first
+    if "roles" in json:
+        for operation in json["roles"]:
+            handle_role(dbs, roles, operation)
+    if "dbs" in json:
+        for operation in json["dbs"]:
+            handle_db(dbs, roles, operation)
+    return Response(status=200)
+
+
+class DdlForwardingContext:
+    def __init__(self, httpserver: HTTPServer, vanilla_pg: VanillaPostgres, host: str, port: int):
+        self.server = httpserver
+        self.pg = vanilla_pg
+        self.host = host
+        self.port = port
+        self.dbs: Dict[str, str] = {}
+        self.roles: Dict[str, str] = {}
+        endpoint = "/management/api/v2/roles_and_databases"
+        ddl_url = f"http://{host}:{port}{endpoint}"
+        self.pg.configure(
+            [
+                f"neon.console_url={ddl_url}",
+                "shared_preload_libraries = 'neon'",
+            ]
+        )
+        log.info(f"Listening on {ddl_url}")
+        self.server.expect_request(endpoint, method="PATCH").respond_with_handler(
+            lambda request: ddl_forward_handler(request, self.dbs, self.roles)
+        )
+
+    def __enter__(self):
+        self.pg.start()
+        return self
+
+    def __exit__(
+        self,
+        exc_type: Optional[Type[BaseException]],
+        exc: Optional[BaseException],
+        tb: Optional[TracebackType],
+    ):
+        self.pg.stop()
+
+    def send(self, query: str) -> List[Tuple[Any, ...]]:
+        return self.pg.safe_psql(query)
+
+    def wait(self, timeout=3):
+        self.server.wait(timeout=timeout)
+
+    def send_and_wait(self, query: str, timeout=3) -> List[Tuple[Any, ...]]:
+        res = self.send(query)
+        self.wait(timeout=timeout)
+        return res
+
+
+@pytest.fixture(scope="function")
+def ddl(
+    httpserver: HTTPServer, vanilla_pg: VanillaPostgres, httpserver_listen_address: tuple[str, int]
+):
+    (host, port) = httpserver_listen_address
+    with DdlForwardingContext(httpserver, vanilla_pg, host, port) as ddl:
+        yield ddl
+
+
+def test_ddl_forwarding(ddl: DdlForwardingContext):
+    curr_user = ddl.send("SELECT current_user")[0][0]
+    log.info(f"Current user is {curr_user}")
+    ddl.send_and_wait("CREATE DATABASE bork")
+    assert ddl.dbs == {"bork": curr_user}
+    ddl.send_and_wait("CREATE ROLE volk WITH PASSWORD 'nu_zayats'")
+    ddl.send_and_wait("ALTER DATABASE bork RENAME TO nu_pogodi")
+    assert ddl.dbs == {"nu_pogodi": curr_user}
+    ddl.send_and_wait("ALTER DATABASE nu_pogodi OWNER TO volk")
+    assert ddl.dbs == {"nu_pogodi": "volk"}
+    ddl.send_and_wait("DROP DATABASE nu_pogodi")
+    assert ddl.dbs == {}
+    ddl.send_and_wait("DROP ROLE volk")
+    assert ddl.roles == {}
+
+    ddl.send_and_wait("CREATE ROLE tarzan WITH PASSWORD 'of_the_apes'")
+    assert ddl.roles == {"tarzan": "of_the_apes"}
+    ddl.send_and_wait("DROP ROLE tarzan")
+    assert ddl.roles == {}
+    ddl.send_and_wait("CREATE ROLE tarzan WITH PASSWORD 'of_the_apes'")
+    assert ddl.roles == {"tarzan": "of_the_apes"}
+    ddl.send_and_wait("ALTER ROLE tarzan WITH PASSWORD 'jungle_man'")
+    assert ddl.roles == {"tarzan": "jungle_man"}
+    ddl.send_and_wait("ALTER ROLE tarzan RENAME TO mowgli")
+    assert ddl.roles == {"mowgli": "jungle_man"}
+    ddl.send_and_wait("DROP ROLE mowgli")
+    assert ddl.roles == {}
+
+    conn = ddl.pg.connect()
+    cur = conn.cursor()
+
+    cur.execute("BEGIN")
+    cur.execute("CREATE ROLE bork WITH PASSWORD 'cork'")
+    cur.execute("COMMIT")
+    ddl.wait()
+    assert ddl.roles == {"bork": "cork"}
+    cur.execute("BEGIN")
+    cur.execute("CREATE ROLE stork WITH PASSWORD 'pork'")
+    cur.execute("ABORT")
+    ddl.wait()
+    assert ("stork", "pork") not in ddl.roles.items()
+    cur.execute("BEGIN")
+    cur.execute("ALTER ROLE bork WITH PASSWORD 'pork'")
+    cur.execute("ALTER ROLE bork RENAME TO stork")
+    cur.execute("COMMIT")
+    ddl.wait()
+    assert ddl.roles == {"stork": "pork"}
+    cur.execute("BEGIN")
+    cur.execute("CREATE ROLE dork WITH PASSWORD 'york'")
+    cur.execute("SAVEPOINT point")
+    cur.execute("ALTER ROLE dork WITH PASSWORD 'zork'")
+    cur.execute("ALTER ROLE dork RENAME TO fork")
+    cur.execute("ROLLBACK TO SAVEPOINT point")
+    cur.execute("ALTER ROLE dork WITH PASSWORD 'fork'")
+    cur.execute("ALTER ROLE dork RENAME TO zork")
+    cur.execute("RELEASE SAVEPOINT point")
+    cur.execute("COMMIT")
+    ddl.wait()
+    assert ddl.roles == {"stork": "pork", "zork": "fork"}
+
+    cur.execute("DROP ROLE stork")
+    cur.execute("DROP ROLE zork")
+    ddl.wait()
+    assert ddl.roles == {}
+
+    cur.execute("CREATE ROLE bork WITH PASSWORD 'dork'")
+    cur.execute("CREATE ROLE stork WITH PASSWORD 'cork'")
+    cur.execute("BEGIN")
+    cur.execute("DROP ROLE bork")
+    cur.execute("ALTER ROLE stork RENAME TO bork")
+    cur.execute("COMMIT")
+    ddl.wait()
+    assert ddl.roles == {"bork": "cork"}
+
+    cur.execute("DROP ROLE bork")
+    ddl.wait()
+    assert ddl.roles == {}
+
+    cur.execute("CREATE ROLE bork WITH PASSWORD 'dork'")
+    cur.execute("CREATE DATABASE stork WITH OWNER=bork")
+    cur.execute("ALTER ROLE bork RENAME TO cork")
+    ddl.wait()
+    assert ddl.dbs == {"stork": "cork"}
+
+    with pytest.raises(psycopg2.InternalError):
+        global fail
+        fail = True
+        cur.execute("CREATE DATABASE failure WITH OWNER=cork")
+        ddl.wait()
+
+    conn.close()
diff --git a/test_runner/regress/test_disk_usage_eviction.py b/test_runner/regress/test_disk_usage_eviction.py
index ab67518092..0ec023b9e1 100644
--- a/test_runner/regress/test_disk_usage_eviction.py
+++ b/test_runner/regress/test_disk_usage_eviction.py
@@ -110,6 +110,12 @@ class EvictionEnv:
             overrides=(
                 "--pageserver-config-override=disk_usage_based_eviction="
                 + enc.dump_inline_table(disk_usage_config).replace("\n", " "),
+                # Disk usage based eviction runs as a background task.
+                # But pageserver startup delays launch of background tasks for some time, to prioritize initial logical size calculations during startup.
+                # But, initial logical size calculation may not be triggered if safekeepers don't publish new broker messages.
+                # But, we only have a 10-second-timeout in this test.
+                # So, disable the delay for this test.
+                "--pageserver-config-override=background_task_maximum_delay='0s'",
             ),
         )
 
diff --git a/test_runner/regress/test_metric_collection.py b/test_runner/regress/test_metric_collection.py
index 00ea77f2e7..12e695bcbd 100644
--- a/test_runner/regress/test_metric_collection.py
+++ b/test_runner/regress/test_metric_collection.py
@@ -228,7 +228,6 @@ def proxy_with_metric_collector(
 @pytest.mark.asyncio
 async def test_proxy_metric_collection(
     httpserver: HTTPServer,
-    httpserver_listen_address,
     proxy_with_metric_collector: NeonProxy,
     vanilla_pg: VanillaPostgres,
 ):
diff --git a/test_runner/regress/test_neon_local_cli.py b/test_runner/regress/test_neon_local_cli.py
index f6629c54f9..3314e7fbf6 100644
--- a/test_runner/regress/test_neon_local_cli.py
+++ b/test_runner/regress/test_neon_local_cli.py
@@ -9,11 +9,18 @@ def test_neon_cli_basics(neon_env_builder: NeonEnvBuilder, port_distributor: Por
     try:
         env.neon_cli.start()
         env.neon_cli.create_tenant(tenant_id=env.initial_tenant, set_default=True)
-        env.neon_cli.endpoint_start(endpoint_id="ep-main", port=port_distributor.get_port())
+
+        pg_port = port_distributor.get_port()
+        http_port = port_distributor.get_port()
+        env.neon_cli.endpoint_start(
+            endpoint_id="ep-basic-main", pg_port=pg_port, http_port=http_port
+        )
 
         env.neon_cli.create_branch(new_branch_name="migration_check")
+        pg_port = port_distributor.get_port()
+        http_port = port_distributor.get_port()
         env.neon_cli.endpoint_start(
-            endpoint_id="ep-migration_check", port=port_distributor.get_port()
+            endpoint_id="ep-migration_check", pg_port=pg_port, http_port=http_port
         )
     finally:
         env.neon_cli.stop()
diff --git a/test_runner/regress/test_ondemand_download.py b/test_runner/regress/test_ondemand_download.py
index 1414b4ed8e..c26ec76172 100644
--- a/test_runner/regress/test_ondemand_download.py
+++ b/test_runner/regress/test_ondemand_download.py
@@ -58,11 +58,8 @@ def test_ondemand_download_large_rel(
     )
 
     ##### First start, insert secret data and upload it to the remote storage
-    env = neon_env_builder.init_start()
-
-    # Override defaults, to create more layers
-    tenant, _ = env.neon_cli.create_tenant(
-        conf={
+    env = neon_env_builder.init_start(
+        initial_tenant_conf={
             # disable background GC
             "gc_period": "0s",
             "gc_horizon": f"{10 * 1024 ** 3}",  # 10 GB
@@ -75,7 +72,6 @@ def test_ondemand_download_large_rel(
             "compaction_period": "0s",
         }
     )
-    env.initial_tenant = tenant
 
     endpoint = env.endpoints.create_start("main")
 
diff --git a/test_runner/regress/test_pageserver_restarts_under_workload.py b/test_runner/regress/test_pageserver_restarts_under_workload.py
index bc3f3f2be4..fc93dcffbb 100644
--- a/test_runner/regress/test_pageserver_restarts_under_workload.py
+++ b/test_runner/regress/test_pageserver_restarts_under_workload.py
@@ -17,12 +17,6 @@ def test_pageserver_restarts_under_worload(neon_simple_env: NeonEnv, pg_bin: PgB
     n_restarts = 10
     scale = 10
 
-    # the background task may complete the init task delay after finding an
-    # active tenant, but shutdown starts right before Tenant::gc_iteration
-    env.pageserver.allowed_errors.append(
-        r".*Gc failed, retrying in \S+: Cannot run GC iteration on inactive tenant"
-    )
-
     def run_pgbench(connstr: str):
         log.info(f"Start a pgbench workload on pg {connstr}")
         pg_bin.run_capture(["pgbench", "-i", f"-s{scale}", connstr])
diff --git a/test_runner/regress/test_remote_storage.py b/test_runner/regress/test_remote_storage.py
index 02f1aac99c..baef8ecacc 100644
--- a/test_runner/regress/test_remote_storage.py
+++ b/test_runner/regress/test_remote_storage.py
@@ -140,7 +140,7 @@ def test_remote_storage_backup_and_restore(
     # This is before the failures injected by test_remote_failures, so it's a permanent error.
     pageserver_http.configure_failpoints(("storage-sync-list-remote-timelines", "return"))
     env.pageserver.allowed_errors.append(
-        ".*error attaching tenant: storage-sync-list-remote-timelines",
+        ".*attach failed.*: storage-sync-list-remote-timelines",
     )
     # Attach it. This HTTP request will succeed and launch a
     # background task to load the tenant. In that background task,
@@ -693,15 +693,15 @@ def test_empty_branch_remote_storage_upload_on_restart(
         f".*POST.* path=/v1/tenant/{env.initial_tenant}/timeline.* request was dropped before completing"
     )
 
-    # index upload is now hitting the failpoint, should not block the shutdown
-    env.pageserver.stop()
+    # index upload is now hitting the failpoint, it should block the shutdown
+    env.pageserver.stop(immediate=True)
 
     timeline_path = (
         Path("tenants") / str(env.initial_tenant) / "timelines" / str(new_branch_timeline_id)
     )
 
     local_metadata = env.repo_dir / timeline_path / "metadata"
-    assert local_metadata.is_file(), "timeout cancelled timeline branching, not the upload"
+    assert local_metadata.is_file()
 
     assert isinstance(env.remote_storage, LocalFsStorage)
     new_branch_on_remote_storage = env.remote_storage.root / timeline_path
diff --git a/test_runner/regress/test_sni_router.py b/test_runner/regress/test_sni_router.py
index f3aa429c49..9b78e8287e 100644
--- a/test_runner/regress/test_sni_router.py
+++ b/test_runner/regress/test_sni_router.py
@@ -37,6 +37,7 @@ class PgSniRouter(PgProtocol):
         destination: str,
         tls_cert: Path,
         tls_key: Path,
+        test_output_dir: Path,
     ):
         # Must use a hostname rather than IP here, for SNI to work
         host = "localhost"
@@ -49,6 +50,7 @@ class PgSniRouter(PgProtocol):
         self.tls_cert = tls_cert
         self.tls_key = tls_key
         self._popen: Optional[subprocess.Popen[bytes]] = None
+        self.test_output_dir = test_output_dir
 
     def start(self) -> "PgSniRouter":
         assert self._popen is None
@@ -60,8 +62,12 @@ class PgSniRouter(PgProtocol):
             *["--destination", self.destination],
         ]
 
-        self._popen = subprocess.Popen(args)
+        router_log_path = self.test_output_dir / "pg_sni_router.log"
+        router_log = open(router_log_path, "w")
+
+        self._popen = subprocess.Popen(args, stderr=router_log)
         self._wait_until_ready()
+        log.info(f"pg_sni_router started, log file: {router_log_path}")
         return self
 
     @backoff.on_exception(backoff.expo, OSError, max_time=10)
@@ -121,6 +127,7 @@ def test_pg_sni_router(
         destination="localtest.me",
         tls_cert=test_output_dir / "router.crt",
         tls_key=test_output_dir / "router.key",
+        test_output_dir=test_output_dir,
     ) as router:
         router.start()
 
diff --git a/test_runner/regress/test_tenant_detach.py b/test_runner/regress/test_tenant_detach.py
index 82664cff94..9d0fdcfaf8 100644
--- a/test_runner/regress/test_tenant_detach.py
+++ b/test_runner/regress/test_tenant_detach.py
@@ -59,6 +59,13 @@ def test_tenant_reattach(
     # create new nenant
     tenant_id, timeline_id = env.neon_cli.create_tenant()
 
+    # Attempts to connect from compute to pageserver while the tenant is
+    # temporarily detached produces these errors in the pageserver log.
+    env.pageserver.allowed_errors.append(f".*Tenant {tenant_id} not found.*")
+    env.pageserver.allowed_errors.append(
+        f".*Tenant {tenant_id} will not become active\\. Current state: Stopping.*"
+    )
+
     with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint:
         with endpoint.cursor() as cur:
             cur.execute("CREATE TABLE t(key int primary key, value text)")
@@ -223,13 +230,6 @@ def test_tenant_reattach_while_busy(
     )
     env = neon_env_builder.init_start()
 
-    # Attempts to connect from compute to pageserver while the tenant is
-    # temporarily detached produces these errors in the pageserver log.
-    env.pageserver.allowed_errors.append(".*Tenant .* not found.*")
-    env.pageserver.allowed_errors.append(
-        ".*Tenant .* will not become active\\. Current state: Stopping.*"
-    )
-
     pageserver_http = env.pageserver.http_client()
 
     # create new nenant
@@ -238,6 +238,13 @@ def test_tenant_reattach_while_busy(
         conf={"checkpoint_distance": "100000"}
     )
 
+    # Attempts to connect from compute to pageserver while the tenant is
+    # temporarily detached produces these errors in the pageserver log.
+    env.pageserver.allowed_errors.append(f".*Tenant {tenant_id} not found.*")
+    env.pageserver.allowed_errors.append(
+        f".*Tenant {tenant_id} will not become active\\. Current state: Stopping.*"
+    )
+
     endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
 
     cur = endpoint.connect().cursor()
@@ -275,6 +282,13 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder):
     # create new nenant
     tenant_id, timeline_id = env.neon_cli.create_tenant()
 
+    # Attempts to connect from compute to pageserver while the tenant is
+    # temporarily detached produces these errors in the pageserver log.
+    env.pageserver.allowed_errors.append(f".*Tenant {tenant_id} not found.*")
+    env.pageserver.allowed_errors.append(
+        f".*Tenant {tenant_id} will not become active\\. Current state: Stopping.*"
+    )
+
     # assert tenant exists on disk
     assert (env.repo_dir / "tenants" / str(tenant_id)).exists()
 
@@ -336,6 +350,13 @@ def test_tenant_detach_ignored_tenant(neon_simple_env: NeonEnv):
     # create a new tenant
     tenant_id, _ = env.neon_cli.create_tenant()
 
+    # Attempts to connect from compute to pageserver while the tenant is
+    # temporarily detached produces these errors in the pageserver log.
+    env.pageserver.allowed_errors.append(f".*Tenant {tenant_id} not found.*")
+    env.pageserver.allowed_errors.append(
+        f".*Tenant {tenant_id} will not become active\\. Current state: Stopping.*"
+    )
+
     # assert tenant exists on disk
     assert (env.repo_dir / "tenants" / str(tenant_id)).exists()
 
@@ -385,6 +406,13 @@ def test_tenant_detach_regular_tenant(neon_simple_env: NeonEnv):
     # create a new tenant
     tenant_id, _ = env.neon_cli.create_tenant()
 
+    # Attempts to connect from compute to pageserver while the tenant is
+    # temporarily detached produces these errors in the pageserver log.
+    env.pageserver.allowed_errors.append(f".*Tenant {tenant_id} not found.*")
+    env.pageserver.allowed_errors.append(
+        f".*Tenant {tenant_id} will not become active\\. Current state: Stopping.*"
+    )
+
     # assert tenant exists on disk
     assert (env.repo_dir / "tenants" / str(tenant_id)).exists()
 
@@ -399,6 +427,7 @@ def test_tenant_detach_regular_tenant(neon_simple_env: NeonEnv):
 
     log.info("detaching regular tenant with detach ignored flag")
     client.tenant_detach(tenant_id, True)
+
     log.info("regular tenant detached without error")
 
     # check that nothing is left on disk for deleted tenant
@@ -432,6 +461,13 @@ def test_detach_while_attaching(
     tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0])
     timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0])
 
+    # Attempts to connect from compute to pageserver while the tenant is
+    # temporarily detached produces these errors in the pageserver log.
+    env.pageserver.allowed_errors.append(f".*Tenant {tenant_id} not found.*")
+    env.pageserver.allowed_errors.append(
+        f".*Tenant {tenant_id} will not become active\\. Current state: Stopping.*"
+    )
+
     # Create table, and insert some rows. Make it big enough that it doesn't fit in
     # shared_buffers, otherwise the SELECT after restart will just return answer
     # from shared_buffers without hitting the page server, which defeats the point
@@ -577,6 +613,13 @@ def test_ignored_tenant_download_missing_layers(
     tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0])
     timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0])
 
+    # Attempts to connect from compute to pageserver while the tenant is
+    # temporarily detached produces these errors in the pageserver log.
+    env.pageserver.allowed_errors.append(f".*Tenant {tenant_id} not found.*")
+    env.pageserver.allowed_errors.append(
+        f".*Tenant {tenant_id} will not become active\\. Current state: Stopping.*"
+    )
+
     data_id = 1
     data_secret = "very secret secret"
     insert_test_data(pageserver_http, tenant_id, timeline_id, data_id, data_secret, endpoint)
@@ -636,6 +679,13 @@ def test_ignored_tenant_stays_broken_without_metadata(
     tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0])
     timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0])
 
+    # Attempts to connect from compute to pageserver while the tenant is
+    # temporarily detached produces these errors in the pageserver log.
+    env.pageserver.allowed_errors.append(f".*Tenant {tenant_id} not found.*")
+    env.pageserver.allowed_errors.append(
+        f".*Tenant {tenant_id} will not become active\\. Current state: Broken.*"
+    )
+
     # ignore the tenant and remove its metadata
     pageserver_http.tenant_ignore(tenant_id)
     tenant_timeline_dir = env.repo_dir / "tenants" / str(tenant_id) / "timelines" / str(timeline_id)
@@ -647,7 +697,9 @@ def test_ignored_tenant_stays_broken_without_metadata(
             metadata_removed = True
     assert metadata_removed, f"Failed to find metadata file in {tenant_timeline_dir}"
 
-    env.pageserver.allowed_errors.append(".*could not load tenant .*?: failed to load metadata.*")
+    env.pageserver.allowed_errors.append(
+        f".*{tenant_id}.*: load failed.*: failed to load metadata.*"
+    )
 
     # now, load it from the local files and expect it to be broken due to inability to load tenant files into memory
     pageserver_http.tenant_load(tenant_id=tenant_id)
@@ -670,6 +722,13 @@ def test_load_attach_negatives(
 
     tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0])
 
+    # Attempts to connect from compute to pageserver while the tenant is
+    # temporarily detached produces these errors in the pageserver log.
+    env.pageserver.allowed_errors.append(f".*Tenant {tenant_id} not found.*")
+    env.pageserver.allowed_errors.append(
+        f".*Tenant {tenant_id} will not become active\\. Current state: Stopping.*"
+    )
+
     env.pageserver.allowed_errors.append(".*tenant .*? already exists, state:.*")
     with pytest.raises(
         expected_exception=PageserverApiException,
@@ -712,6 +771,13 @@ def test_ignore_while_attaching(
     tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0])
     timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0])
 
+    # Attempts to connect from compute to pageserver while the tenant is
+    # temporarily detached produces these errors in the pageserver log.
+    env.pageserver.allowed_errors.append(f".*Tenant {tenant_id} not found.*")
+    env.pageserver.allowed_errors.append(
+        f".*Tenant {tenant_id} will not become active\\. Current state: Stopping.*"
+    )
+
     data_id = 1
     data_secret = "very secret secret"
     insert_test_data(pageserver_http, tenant_id, timeline_id, data_id, data_secret, endpoint)
diff --git a/test_runner/regress/test_tenant_size.py b/test_runner/regress/test_tenant_size.py
index 60ab268882..e9dcd1e5cd 100644
--- a/test_runner/regress/test_tenant_size.py
+++ b/test_runner/regress/test_tenant_size.py
@@ -318,7 +318,7 @@ def test_only_heads_within_horizon(neon_simple_env: NeonEnv, test_output_dir: Pa
 
 
 def test_single_branch_get_tenant_size_grows(
-    neon_env_builder: NeonEnvBuilder, test_output_dir: Path
+    neon_env_builder: NeonEnvBuilder, test_output_dir: Path, pg_version: PgVersion
 ):
     """
     Operate on single branch reading the tenants size after each transaction.
@@ -333,6 +333,13 @@ def test_single_branch_get_tenant_size_grows(
     # that there next_gc_cutoff could be smaller than initdb_lsn, which will
     # obviously lead to issues when calculating the size.
     gc_horizon = 0x38000
+
+    # it's a bit of a hack, but different versions of postgres have different
+    # amount of WAL generated for the same amount of data. so we need to
+    # adjust the gc_horizon accordingly.
+    if pg_version == PgVersion.V14:
+        gc_horizon = 0x40000
+
     neon_env_builder.pageserver_config_override = f"tenant_config={{compaction_period='0s', gc_period='0s', pitr_interval='0sec', gc_horizon={gc_horizon}}}"
 
     env = neon_env_builder.init_start()
diff --git a/test_runner/regress/test_tenants.py b/test_runner/regress/test_tenants.py
index 5642449ce6..aef2df4932 100644
--- a/test_runner/regress/test_tenants.py
+++ b/test_runner/regress/test_tenants.py
@@ -22,6 +22,7 @@ from fixtures.neon_fixtures import (
     available_remote_storages,
 )
 from fixtures.types import Lsn, TenantId, TimelineId
+from fixtures.utils import wait_until
 from prometheus_client.samples import Sample
 
 
@@ -266,6 +267,7 @@ def test_pageserver_metrics_removed_after_detach(
                 cur.execute("INSERT INTO t SELECT generate_series(1,100000), 'payload'")
                 cur.execute("SELECT sum(key) FROM t")
                 assert cur.fetchone() == (5000050000,)
+        endpoint.stop()
 
     def get_ps_metric_samples_for_tenant(tenant_id: TenantId) -> List[Sample]:
         ps_metrics = env.pageserver.http_client().get_metrics()
@@ -308,27 +310,26 @@ def test_pageserver_with_empty_tenants(
     env.pageserver.allowed_errors.append(
         ".*marking .* as locally complete, while it doesnt exist in remote index.*"
     )
-    env.pageserver.allowed_errors.append(
-        ".*could not load tenant.*Failed to list timelines directory.*"
-    )
+    env.pageserver.allowed_errors.append(".*load failed.*list timelines directory.*")
 
     client = env.pageserver.http_client()
 
-    tenant_with_empty_timelines_dir = client.tenant_create()
-    temp_timelines = client.timeline_list(tenant_with_empty_timelines_dir)
+    tenant_with_empty_timelines = TenantId.generate()
+    client.tenant_create(tenant_with_empty_timelines)
+    temp_timelines = client.timeline_list(tenant_with_empty_timelines)
     for temp_timeline in temp_timelines:
         client.timeline_delete(
-            tenant_with_empty_timelines_dir, TimelineId(temp_timeline["timeline_id"])
+            tenant_with_empty_timelines, TimelineId(temp_timeline["timeline_id"])
         )
     files_in_timelines_dir = sum(
         1
         for _p in Path.iterdir(
-            Path(env.repo_dir) / "tenants" / str(tenant_with_empty_timelines_dir) / "timelines"
+            Path(env.repo_dir) / "tenants" / str(tenant_with_empty_timelines) / "timelines"
         )
     )
     assert (
         files_in_timelines_dir == 0
-    ), f"Tenant {tenant_with_empty_timelines_dir} should have an empty timelines/ directory"
+    ), f"Tenant {tenant_with_empty_timelines} should have an empty timelines/ directory"
 
     # Trigger timeline re-initialization after pageserver restart
     env.endpoints.stop_all()
@@ -340,9 +341,15 @@ def test_pageserver_with_empty_tenants(
     env.pageserver.start()
 
     client = env.pageserver.http_client()
-    tenants = client.tenant_list()
 
-    assert len(tenants) == 2
+    def not_loading():
+        tenants = client.tenant_list()
+        assert len(tenants) == 2
+        assert all(t["state"]["slug"] != "Loading" for t in tenants)
+
+    wait_until(10, 0.2, not_loading)
+
+    tenants = client.tenant_list()
 
     [broken_tenant] = [t for t in tenants if t["id"] == str(tenant_without_timelines_dir)]
     assert (
@@ -354,17 +361,17 @@ def test_pageserver_with_empty_tenants(
         broken_tenant_status["state"]["slug"] == "Broken"
     ), f"Tenant {tenant_without_timelines_dir} without timelines dir should be broken"
 
-    assert env.pageserver.log_contains(".*Setting tenant as Broken state, reason:.*")
+    assert env.pageserver.log_contains(".*load failed, setting tenant state to Broken:.*")
 
-    [loaded_tenant] = [t for t in tenants if t["id"] == str(tenant_with_empty_timelines_dir)]
+    [loaded_tenant] = [t for t in tenants if t["id"] == str(tenant_with_empty_timelines)]
     assert (
         loaded_tenant["state"]["slug"] == "Active"
-    ), "Tenant {tenant_with_empty_timelines_dir} with empty timelines dir should be active and ready for timeline creation"
+    ), "Tenant {tenant_with_empty_timelines} with empty timelines dir should be active and ready for timeline creation"
 
-    loaded_tenant_status = client.tenant_status(tenant_with_empty_timelines_dir)
+    loaded_tenant_status = client.tenant_status(tenant_with_empty_timelines)
     assert (
         loaded_tenant_status["state"]["slug"] == "Active"
-    ), f"Tenant {tenant_with_empty_timelines_dir} without timelines dir should be active"
+    ), f"Tenant {tenant_with_empty_timelines} without timelines dir should be active"
 
     time.sleep(1)  # to allow metrics propagation
 
@@ -374,7 +381,7 @@ def test_pageserver_with_empty_tenants(
         "state": "Broken",
     }
     active_tenants_metric_filter = {
-        "tenant_id": str(tenant_with_empty_timelines_dir),
+        "tenant_id": str(tenant_with_empty_timelines),
         "state": "Active",
     }
 
@@ -386,7 +393,7 @@ def test_pageserver_with_empty_tenants(
 
     assert (
         tenant_active_count == 1
-    ), f"Tenant {tenant_with_empty_timelines_dir} should have metric as active"
+    ), f"Tenant {tenant_with_empty_timelines} should have metric as active"
 
     tenant_broken_count = int(
         ps_metrics.query_one(
diff --git a/test_runner/regress/test_timeline_delete.py b/test_runner/regress/test_timeline_delete.py
index 7135b621cb..be79538843 100644
--- a/test_runner/regress/test_timeline_delete.py
+++ b/test_runner/regress/test_timeline_delete.py
@@ -271,8 +271,9 @@ def test_timeline_delete_fail_before_local_delete(neon_env_builder: NeonEnvBuild
     env.pageserver.allowed_errors.append(
         ".*Ignoring new state, equal to the existing one: Stopping"
     )
+    # this happens, because the stuck timeline is visible to shutdown
     env.pageserver.allowed_errors.append(
-        ".*during shutdown: cannot flush frozen layers when flush_loop is not running, state is Exited"
+        ".*freeze_and_flush_on_shutdown.+: failed to freeze and flush: cannot flush frozen layers when flush_loop is not running, state is Exited"
     )
 
     ps_http = env.pageserver.http_client()
@@ -371,7 +372,7 @@ def test_concurrent_timeline_delete_if_first_stuck_at_index_upload(
 
         # make the second call and assert behavior
         log.info("second call start")
-        error_msg_re = "another task is already setting the deleted_flag, started at"
+        error_msg_re = "timeline deletion is already in progress"
         with pytest.raises(PageserverApiException, match=error_msg_re) as second_call_err:
             ps_http.timeline_delete(env.initial_tenant, child_timeline_id)
         assert second_call_err.value.status_code == 500
@@ -437,12 +438,22 @@ def test_delete_timeline_client_hangup(neon_env_builder: NeonEnvBuilder):
 
     wait_until(50, 0.1, got_hangup_log_message)
 
-    # ok, retry without failpoint, it should succeed
+    # check that the timeline is still present
+    ps_http.timeline_detail(env.initial_tenant, child_timeline_id)
+
+    # ok, disable the failpoint to let the deletion finish
     ps_http.configure_failpoints((failpoint_name, "off"))
 
-    # this should succeed
-    ps_http.timeline_delete(env.initial_tenant, child_timeline_id, timeout=2)
-    # the second call will try to transition the timeline into Stopping state, but it's already in that state
-    env.pageserver.allowed_errors.append(
-        f".*{child_timeline_id}.*Ignoring new state, equal to the existing one: Stopping"
-    )
+    def first_request_finished():
+        message = f".*DELETE.*{child_timeline_id}.*Cancelled request finished"
+        assert env.pageserver.log_contains(message)
+
+    wait_until(50, 0.1, first_request_finished)
+
+    # check that the timeline is gone
+    notfound_message = f"Timeline {env.initial_tenant}/{child_timeline_id} was not found"
+    env.pageserver.allowed_errors.append(".*" + notfound_message)
+    with pytest.raises(PageserverApiException, match=notfound_message) as exc:
+        ps_http.timeline_detail(env.initial_tenant, child_timeline_id)
+
+    assert exc.value.status_code == 404
diff --git a/test_runner/regress/test_wal_acceptor.py b/test_runner/regress/test_wal_acceptor.py
index 2a4141ed30..8b595596cb 100644
--- a/test_runner/regress/test_wal_acceptor.py
+++ b/test_runner/regress/test_wal_acceptor.py
@@ -1001,9 +1001,6 @@ def test_safekeeper_without_pageserver(
 
 
 def test_replace_safekeeper(neon_env_builder: NeonEnvBuilder):
-    def safekeepers_guc(env: NeonEnv, sk_names: List[int]) -> str:
-        return ",".join([f"localhost:{sk.port.pg}" for sk in env.safekeepers if sk.id in sk_names])
-
     def execute_payload(endpoint: Endpoint):
         with closing(endpoint.connect()) as conn:
             with conn.cursor() as cur:
@@ -1032,9 +1029,8 @@ def test_replace_safekeeper(neon_env_builder: NeonEnvBuilder):
 
     log.info("Use only first 3 safekeepers")
     env.safekeepers[3].stop()
-    active_safekeepers = [1, 2, 3]
     endpoint = env.endpoints.create("test_replace_safekeeper")
-    endpoint.adjust_for_safekeepers(safekeepers_guc(env, active_safekeepers))
+    endpoint.active_safekeepers = [1, 2, 3]
     endpoint.start()
 
     # learn neon timeline from compute
@@ -1072,9 +1068,8 @@ def test_replace_safekeeper(neon_env_builder: NeonEnvBuilder):
 
     log.info("Recreate postgres to replace failed sk1 with new sk4")
     endpoint.stop_and_destroy().create("test_replace_safekeeper")
-    active_safekeepers = [2, 3, 4]
     env.safekeepers[3].start()
-    endpoint.adjust_for_safekeepers(safekeepers_guc(env, active_safekeepers))
+    endpoint.active_safekeepers = [2, 3, 4]
     endpoint.start()
 
     execute_payload(endpoint)
@@ -1293,9 +1288,8 @@ def test_pull_timeline(neon_env_builder: NeonEnvBuilder):
 
     log.info("Use only first 3 safekeepers")
     env.safekeepers[3].stop()
-    active_safekeepers = [1, 2, 3]
     endpoint = env.endpoints.create("test_pull_timeline")
-    endpoint.adjust_for_safekeepers(safekeepers_guc(env, active_safekeepers))
+    endpoint.active_safekeepers = [1, 2, 3]
     endpoint.start()
 
     # learn neon timeline from compute
@@ -1332,10 +1326,8 @@ def test_pull_timeline(neon_env_builder: NeonEnvBuilder):
     show_statuses(env.safekeepers, tenant_id, timeline_id)
 
     log.info("Restarting compute with new config to verify that it works")
-    active_safekeepers = [1, 3, 4]
-
     endpoint.stop_and_destroy().create("test_pull_timeline")
-    endpoint.adjust_for_safekeepers(safekeepers_guc(env, active_safekeepers))
+    endpoint.active_safekeepers = [1, 3, 4]
     endpoint.start()
 
     execute_payload(endpoint)
diff --git a/test_runner/regress/test_wal_acceptor_async.py b/test_runner/regress/test_wal_acceptor_async.py
index 7debeed140..ce33975a0e 100644
--- a/test_runner/regress/test_wal_acceptor_async.py
+++ b/test_runner/regress/test_wal_acceptor_async.py
@@ -2,9 +2,11 @@ import asyncio
 import random
 import time
 from dataclasses import dataclass
+from pathlib import Path
 from typing import List, Optional
 
 import asyncpg
+import toml
 from fixtures.log_helper import getLogger
 from fixtures.neon_fixtures import Endpoint, NeonEnv, NeonEnvBuilder, Safekeeper
 from fixtures.types import Lsn, TenantId, TimelineId
@@ -251,7 +253,8 @@ def endpoint_create_start(env: NeonEnv, branch: str, pgdir_name: Optional[str]):
     endpoint = Endpoint(
         env,
         tenant_id=env.initial_tenant,
-        port=env.port_distributor.get_port(),
+        pg_port=env.port_distributor.get_port(),
+        http_port=env.port_distributor.get_port(),
         # In these tests compute has high probability of terminating on its own
         # before our stop() due to lost consensus leadership.
         check_stop_result=False,
@@ -536,15 +539,20 @@ def test_race_conditions(neon_env_builder: NeonEnvBuilder):
 
 # Check that pageserver can select safekeeper with largest commit_lsn
 # and switch if LSN is not updated for some time (NoWalTimeout).
-async def run_wal_lagging(env: NeonEnv, endpoint: Endpoint):
-    def safekeepers_guc(env: NeonEnv, active_sk: List[bool]) -> str:
-        # use ports 10, 11 and 12 to simulate unavailable safekeepers
-        return ",".join(
-            [
-                f"localhost:{sk.port.pg if active else 10 + i}"
-                for i, (sk, active) in enumerate(zip(env.safekeepers, active_sk))
-            ]
-        )
+async def run_wal_lagging(env: NeonEnv, endpoint: Endpoint, test_output_dir: Path):
+    def adjust_safekeepers(env: NeonEnv, active_sk: List[bool]):
+        # Change the pg ports of the inactive safekeepers in the config file to be
+        # invalid, to make them unavailable to the endpoint.  We use
+        # ports 10, 11 and 12 to simulate unavailable safekeepers.
+        config = toml.load(test_output_dir / "repo" / "config")
+        for i, (sk, active) in enumerate(zip(env.safekeepers, active_sk)):
+            if active:
+                config["safekeepers"][i]["pg_port"] = env.safekeepers[i].port.pg
+            else:
+                config["safekeepers"][i]["pg_port"] = 10 + i
+
+        with open(test_output_dir / "repo" / "config", "w") as f:
+            toml.dump(config, f)
 
     conn = await endpoint.connect_async()
     await conn.execute("CREATE TABLE t(key int primary key, value text)")
@@ -565,7 +573,7 @@ async def run_wal_lagging(env: NeonEnv, endpoint: Endpoint):
             it -= 1
             continue
 
-        endpoint.adjust_for_safekeepers(safekeepers_guc(env, active_sk))
+        adjust_safekeepers(env, active_sk)
         log.info(f"Iteration {it}: {active_sk}")
 
         endpoint.start()
@@ -579,7 +587,7 @@ async def run_wal_lagging(env: NeonEnv, endpoint: Endpoint):
         await conn.close()
         endpoint.stop()
 
-    endpoint.adjust_for_safekeepers(safekeepers_guc(env, [True] * len(env.safekeepers)))
+    adjust_safekeepers(env, [True] * len(env.safekeepers))
     endpoint.start()
     conn = await endpoint.connect_async()
 
@@ -590,11 +598,11 @@ async def run_wal_lagging(env: NeonEnv, endpoint: Endpoint):
 
 
 # do inserts while restarting postgres and messing with safekeeper addresses
-def test_wal_lagging(neon_env_builder: NeonEnvBuilder):
+def test_wal_lagging(neon_env_builder: NeonEnvBuilder, test_output_dir: Path):
     neon_env_builder.num_safekeepers = 3
     env = neon_env_builder.init_start()
 
     env.neon_cli.create_branch("test_wal_lagging")
     endpoint = env.endpoints.create_start("test_wal_lagging")
 
-    asyncio.run(run_wal_lagging(env, endpoint))
+    asyncio.run(run_wal_lagging(env, endpoint, test_output_dir))
diff --git a/test_runner/regress/test_wal_receiver.py b/test_runner/regress/test_wal_receiver.py
index 8e4e154be1..515d47c079 100644
--- a/test_runner/regress/test_wal_receiver.py
+++ b/test_runner/regress/test_wal_receiver.py
@@ -77,7 +77,8 @@ def test_pageserver_lsn_wait_error_safekeeper_stop(neon_env_builder: NeonEnvBuil
     try:
         trigger_wait_lsn_timeout(env, tenant_id)
     except Exception as e:
-        exception_string = str(e)
+        # Strip out the part before stdout, as it contains full command with the list of all safekeepers
+        exception_string = str(e).split("stdout", 1)[-1]
         assert expected_timeout_error in exception_string, "Should time out during waiting for WAL"
 
         for safekeeper in env.safekeepers:
diff --git a/test_runner/regress/test_walredo_not_left_behind_on_detach.py b/test_runner/regress/test_walredo_not_left_behind_on_detach.py
index 7d944bebb3..4a47898935 100644
--- a/test_runner/regress/test_walredo_not_left_behind_on_detach.py
+++ b/test_runner/regress/test_walredo_not_left_behind_on_detach.py
@@ -83,6 +83,9 @@ def test_walredo_not_left_behind_on_detach(neon_env_builder: NeonEnvBuilder):
     # XXX this is quite brittle as the lifecycle of the WAL redo process is an implementation detail
     assert_child_processes(pagserver_pid, wal_redo_present=True, defunct_present=False)
 
+    # Stop the compute before detaching, to avoid errors in the log.
+    endpoint.stop()
+
     last_error = None
     for i in range(3):
         try:
diff --git a/workspace_hack/Cargo.toml b/workspace_hack/Cargo.toml
index 3d40f5dede..677b59f453 100644
--- a/workspace_hack/Cargo.toml
+++ b/workspace_hack/Cargo.toml
@@ -27,7 +27,6 @@ futures-core = { version = "0.3" }
 futures-executor = { version = "0.3" }
 futures-sink = { version = "0.3" }
 futures-util = { version = "0.3", features = ["channel", "io", "sink"] }
-hashbrown = { version = "0.12", features = ["raw"] }
 itertools = { version = "0.10" }
 libc = { version = "0.2", features = ["extra_traits"] }
 log = { version = "0.4", default-features = false, features = ["std"] }
@@ -39,7 +38,7 @@ num-traits = { version = "0.2", features = ["i128"] }
 prost = { version = "0.11" }
 rand = { version = "0.8", features = ["small_rng"] }
 regex = { version = "1" }
-regex-syntax = { version = "0.6" }
+regex-syntax = { version = "0.7" }
 reqwest = { version = "0.11", default-features = false, features = ["blocking", "json", "multipart", "rustls-tls"] }
 ring = { version = "0.16", features = ["std"] }
 rustls = { version = "0.20", features = ["dangerous_configuration"] }
@@ -62,7 +61,6 @@ url = { version = "2", features = ["serde"] }
 anyhow = { version = "1", features = ["backtrace"] }
 bytes = { version = "1", features = ["serde"] }
 either = { version = "1" }
-hashbrown = { version = "0.12", features = ["raw"] }
 itertools = { version = "0.10" }
 libc = { version = "0.2", features = ["extra_traits"] }
 log = { version = "0.4", default-features = false, features = ["std"] }
@@ -70,7 +68,7 @@ memchr = { version = "2" }
 nom = { version = "7" }
 prost = { version = "0.11" }
 regex = { version = "1" }
-regex-syntax = { version = "0.6" }
+regex-syntax = { version = "0.7" }
 serde = { version = "1", features = ["alloc", "derive"] }
 syn-dff4ba8e3ae991db = { package = "syn", version = "1", features = ["extra-traits", "full", "visit", "visit-mut"] }
 syn-f595c2ba2a3f28df = { package = "syn", version = "2", features = ["extra-traits", "full", "visit-mut"] }