proxy: add benchmark for custom json logging vs official fmt logger

2026-01-25 22:30:38 +00:00 · 2025-07-15 19:44:41 +02:00
190 changed files with 2884 additions and 20549 deletions
--- a/.github/actions/run-python-test-set/action.yml
+++ b/.github/actions/run-python-test-set/action.yml
@@ -181,8 +181,6 @@ runs:
          # Ref https://github.com/neondatabase/neon/issues/4540
          # cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
          cov_prefix=()
-          # Explicitly set LLVM_PROFILE_FILE to /dev/null to avoid writing *.profraw files
-          export LLVM_PROFILE_FILE=/dev/null
        else
          cov_prefix=()
        fi
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -87,27 +87,22 @@ jobs:
    uses: ./.github/workflows/build-build-tools-image.yml
    secrets: inherit

-  lint-yamls:
-    needs: [ meta, check-permissions, build-build-tools-image ]
+  lint-openapi-spec:
+    runs-on: ubuntu-22.04
+    needs: [ meta, check-permissions ]
    # We do need to run this in `.*-rc-pr` because of hotfixes.
    if: ${{ contains(fromJSON('["pr", "push-main", "storage-rc-pr", "proxy-rc-pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
-    runs-on: [ self-hosted, small ]
-    container:
-      image: ${{ needs.build-build-tools-image.outputs.image }}
-      credentials:
-        username: ${{ github.actor }}
-        password: ${{ secrets.GITHUB_TOKEN }}
-      options: --init
-
    steps:
      - name: Harden the runner (Audit all outbound calls)
        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
        with:
          egress-policy: audit
-
      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-
-      - run: make -C compute manifest-schema-validation
+      - uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
      - run: make lint-openapi-spec

  check-codestyle-python:
@@ -222,6 +217,28 @@ jobs:
      build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
    secrets: inherit

+  validate-compute-manifest:
+    runs-on: ubuntu-22.04
+    needs: [ meta, check-permissions ]
+    # We do need to run this in `.*-rc-pr` because of hotfixes.
+    if: ${{ contains(fromJSON('["pr", "push-main", "storage-rc-pr", "proxy-rc-pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
+    steps:
+      - name: Harden the runner (Audit all outbound calls)
+        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
+        with:
+          egress-policy: audit
+
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+
+      - name: Set up Node.js
+        uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0
+        with:
+          node-version: '24'
+
+      - name: Validate manifest against schema
+        run: |
+          make -C compute manifest-schema-validation
+
  build-and-test-locally:
    needs: [ meta, build-build-tools-image ]
    # We do need to run this in `.*-rc-pr` because of hotfixes.
--- a/.gitignore
+++ b/.gitignore
@@ -15,7 +15,6 @@ neon.iml
 /.neon
 /integration_tests/.neon
 compaction-suite-results.*
-pgxn/neon/communicator/communicator_bindings.h
 docker-compose/docker-compose-parallel.yml

 # Coverage
@@ -30,6 +29,3 @@ docker-compose/docker-compose-parallel.yml

 # pgindent typedef lists
 *.list
-
-# Node
-**/node_modules/
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -253,17 +253,6 @@ version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a8ab6b55fe97976e46f91ddbed8d147d966475dc29b2032757ba47e02376fbc3"

-[[package]]
-name = "atomic_enum"
-version = "0.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "99e1aca718ea7b89985790c94aad72d77533063fe00bc497bb79a7c2dae6a661"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 2.0.100",
-]
-
 [[package]]
 name = "autocfg"
 version = "1.1.0"
@@ -698,40 +687,13 @@ dependencies = [
 "tracing",
 ]

-[[package]]
-name = "axum"
-version = "0.7.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f"
-dependencies = [
- "async-trait",
- "axum-core 0.4.5",
- "bytes",
- "futures-util",
- "http 1.1.0",
- "http-body 1.0.0",
- "http-body-util",
- "itoa",
- "matchit 0.7.3",
- "memchr",
- "mime",
- "percent-encoding",
- "pin-project-lite",
- "rustversion",
- "serde",
- "sync_wrapper 1.0.1",
- "tower 0.5.2",
- "tower-layer",
- "tower-service",
-]
-
 [[package]]
 name = "axum"
 version = "0.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6d6fd624c75e18b3b4c6b9caf42b1afe24437daaee904069137d8bab077be8b8"
 dependencies = [
- "axum-core 0.5.0",
+ "axum-core",
 "base64 0.22.1",
 "bytes",
 "form_urlencoded",
@@ -739,10 +701,10 @@ dependencies = [
 "http 1.1.0",
 "http-body 1.0.0",
 "http-body-util",
- "hyper 1.6.0",
+ "hyper 1.4.1",
 "hyper-util",
 "itoa",
- "matchit 0.8.4",
+ "matchit",
 "memchr",
 "mime",
 "percent-encoding",
@@ -762,26 +724,6 @@ dependencies = [
 "tracing",
 ]

-[[package]]
-name = "axum-core"
-version = "0.4.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199"
-dependencies = [
- "async-trait",
- "bytes",
- "futures-util",
- "http 1.1.0",
- "http-body 1.0.0",
- "http-body-util",
- "mime",
- "pin-project-lite",
- "rustversion",
- "sync_wrapper 1.0.1",
- "tower-layer",
- "tower-service",
-]
-
 [[package]]
 name = "axum-core"
 version = "0.5.0"
@@ -808,8 +750,8 @@ version = "0.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "460fc6f625a1f7705c6cf62d0d070794e94668988b1c38111baeec177c715f7b"
 dependencies = [
- "axum 0.8.1",
- "axum-core 0.5.0",
+ "axum",
+ "axum-core",
 "bytes",
 "form_urlencoded",
 "futures-util",
@@ -1346,31 +1288,10 @@ dependencies = [

 [[package]]
 name = "communicator"
-version = "0.0.0"
+version = "0.1.0"
 dependencies = [
- "atomic_enum",
- "axum 0.8.1",
- "bytes",
 "cbindgen",
- "clashmap",
- "http 1.1.0",
- "libc",
- "metrics",
 "neon-shmem",
- "nix 0.30.1",
- "pageserver_api",
- "pageserver_client_grpc",
- "pageserver_page_api",
- "prometheus",
- "prost 0.13.5",
- "thiserror 1.0.69",
- "tokio",
- "tokio-pipe",
- "tonic 0.12.3",
- "tracing",
- "tracing-subscriber",
- "uring-common",
- "utils",
 "workspace_hack",
 ]

@@ -1400,7 +1321,7 @@ dependencies = [
 "aws-sdk-kms",
 "aws-sdk-s3",
 "aws-smithy-types",
- "axum 0.8.1",
+ "axum",
 "axum-extra",
 "base64 0.22.1",
 "bytes",
@@ -1705,9 +1626,9 @@ dependencies = [

 [[package]]
 name = "crossbeam-utils"
-version = "0.8.21"
+version = "0.8.19"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
+checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345"

 [[package]]
 name = "crossterm"
@@ -1951,7 +1872,6 @@ dependencies = [
 "diesel_derives",
 "itoa",
 "serde_json",
- "uuid",
 ]

 [[package]]
@@ -2162,7 +2082,7 @@ name = "endpoint_storage"
 version = "0.0.1"
 dependencies = [
 "anyhow",
- "axum 0.8.1",
+ "axum",
 "axum-extra",
 "camino",
 "camino-tempfile",
@@ -2443,7 +2363,7 @@ dependencies = [
 "futures-core",
 "futures-sink",
 "http-body-util",
- "hyper 1.6.0",
+ "hyper 1.4.1",
 "hyper-util",
 "pin-project",
 "rand 0.8.5",
@@ -2613,18 +2533,6 @@ dependencies = [
 "wasm-bindgen",
 ]

-[[package]]
-name = "getrandom"
-version = "0.3.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4"
-dependencies = [
- "cfg-if",
- "libc",
- "r-efi",
- "wasi 0.14.2+wasi-0.2.4",
-]
-
 [[package]]
 name = "gettid"
 version = "0.1.3"
@@ -3014,9 +2922,9 @@ dependencies = [

 [[package]]
 name = "httparse"
-version = "1.10.1"
+version = "1.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87"
+checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904"

 [[package]]
 name = "httpdate"
@@ -3066,9 +2974,9 @@ dependencies = [

 [[package]]
 name = "hyper"
-version = "1.6.0"
+version = "1.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cc2b571658e38e0c01b1fdca3bbbe93c00d3d71693ff2770043f8c29bc7d6f80"
+checksum = "50dfd22e0e76d0f662d429a5f80fcaf3855009297eab6a0a9f8543834744ba05"
 dependencies = [
 "bytes",
 "futures-channel",
@@ -3108,7 +3016,7 @@ checksum = "a0bea761b46ae2b24eb4aef630d8d1c398157b6fc29e6350ecf090a0b70c952c"
 dependencies = [
 "futures-util",
 "http 1.1.0",
- "hyper 1.6.0",
+ "hyper 1.4.1",
 "hyper-util",
 "rustls 0.22.4",
 "rustls-pki-types",
@@ -3123,7 +3031,7 @@ version = "0.5.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3203a961e5c83b6f5498933e78b6b263e208c197b63e9c6c53cc82ffd3f63793"
 dependencies = [
- "hyper 1.6.0",
+ "hyper 1.4.1",
 "hyper-util",
 "pin-project-lite",
 "tokio",
@@ -3132,21 +3040,20 @@ dependencies = [

 [[package]]
 name = "hyper-util"
-version = "0.1.14"
+version = "0.1.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dc2fdfdbff08affe55bb779f33b053aa1fe5dd5b54c257343c17edfa55711bdb"
+checksum = "cde7055719c54e36e95e8719f95883f22072a48ede39db7fc17a4e1d5281e9b9"
 dependencies = [
 "bytes",
 "futures-channel",
- "futures-core",
 "futures-util",
 "http 1.1.0",
 "http-body 1.0.0",
- "hyper 1.6.0",
- "libc",
+ "hyper 1.4.1",
 "pin-project-lite",
 "socket2",
 "tokio",
+ "tower 0.4.13",
 "tower-service",
 "tracing",
 ]
@@ -3699,9 +3606,9 @@ checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104"

 [[package]]
 name = "lock_api"
-version = "0.4.13"
+version = "0.4.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765"
+checksum = "c1cc9717a20b1bb222f333e6a92fd32f7d8a18ddc5a3191a11af45dcbf4dcd16"
 dependencies = [
 "autocfg",
 "scopeguard",
@@ -3744,12 +3651,6 @@ dependencies = [
 "regex-automata 0.1.10",
 ]

-[[package]]
-name = "matchit"
-version = "0.7.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94"
-
 [[package]]
 name = "matchit"
 version = "0.8.4"
@@ -3857,7 +3758,7 @@ dependencies = [
 "procfs",
 "prometheus",
 "rand 0.8.5",
- "rand_distr 0.4.3",
+ "rand_distr",
 "twox-hash",
 ]

@@ -3945,28 +3846,12 @@ checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a"
 name = "neon-shmem"
 version = "0.1.0"
 dependencies = [
- "libc",
- "lock_api",
 "nix 0.30.1",
- "rand 0.9.1",
- "rand_distr 0.5.1",
- "rustc-hash 2.1.1",
 "tempfile",
 "thiserror 1.0.69",
 "workspace_hack",
 ]

-[[package]]
-name = "neonart"
-version = "0.1.0"
-dependencies = [
- "crossbeam-utils",
- "rand 0.9.1",
- "rand_distr 0.5.1",
- "spin",
- "tracing",
-]
-
 [[package]]
 name = "never-say-never"
 version = "6.6.666"
@@ -4400,16 +4285,13 @@ version = "0.1.0"
 dependencies = [
 "anyhow",
 "async-trait",
- "axum 0.8.1",
 "bytes",
 "camino",
 "clap",
 "futures",
 "hdrhistogram",
- "http 1.1.0",
 "humantime",
 "humantime-serde",
- "metrics",
 "pageserver_api",
 "pageserver_client",
 "pageserver_client_grpc",
@@ -4499,7 +4381,6 @@ dependencies = [
 "pageserver_client",
 "pageserver_compaction",
 "pageserver_page_api",
- "peekable",
 "pem",
 "pin-project-lite",
 "postgres-protocol",
@@ -4513,7 +4394,6 @@ dependencies = [
 "pprof",
 "pq_proto",
 "procfs",
- "prost 0.13.5",
 "rand 0.8.5",
 "range-set-blaze",
 "regex",
@@ -4808,15 +4688,6 @@ dependencies = [
 "sha2",
 ]

-[[package]]
-name = "peekable"
-version = "0.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "225f9651e475709164f871dc2f5724956be59cb9edb055372ffeeab01ec2d20b"
-dependencies = [
- "smallvec",
-]
-
 [[package]]
 name = "pem"
 version = "3.0.3"
@@ -5432,6 +5303,7 @@ dependencies = [
 "clashmap",
 "compute_api",
 "consumption_metrics",
+ "criterion",
 "ecdsa 0.16.9",
 "ed25519-dalek",
 "env_logger",
@@ -5451,7 +5323,7 @@ dependencies = [
 "humantime",
 "humantime-serde",
 "hyper 0.14.30",
- "hyper 1.6.0",
+ "hyper 1.4.1",
 "hyper-util",
 "indexmap 2.9.0",
 "ipnet",
@@ -5476,7 +5348,7 @@ dependencies = [
 "postgres_backend",
 "pq_proto",
 "rand 0.8.5",
- "rand_distr 0.4.3",
+ "rand_distr",
 "rcgen",
 "redis",
 "regex",
@@ -5487,7 +5359,7 @@ dependencies = [
 "reqwest-tracing",
 "rsa",
 "rstest",
- "rustc-hash 2.1.1",
+ "rustc-hash 1.1.0",
 "rustls 0.23.27",
 "rustls-native-certs 0.8.0",
 "rustls-pemfile 2.1.1",
@@ -5580,12 +5452,6 @@ dependencies = [
 "proc-macro2",
 ]

-[[package]]
-name = "r-efi"
-version = "5.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
-
 [[package]]
 name = "rand"
 version = "0.7.3"
@@ -5610,16 +5476,6 @@ dependencies = [
 "rand_core 0.6.4",
 ]

-[[package]]
-name = "rand"
-version = "0.9.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97"
-dependencies = [
- "rand_chacha 0.9.0",
- "rand_core 0.9.3",
-]
-
 [[package]]
 name = "rand_chacha"
 version = "0.2.2"
@@ -5640,16 +5496,6 @@ dependencies = [
 "rand_core 0.6.4",
 ]

-[[package]]
-name = "rand_chacha"
-version = "0.9.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb"
-dependencies = [
- "ppv-lite86",
- "rand_core 0.9.3",
-]
-
 [[package]]
 name = "rand_core"
 version = "0.5.1"
@@ -5668,15 +5514,6 @@ dependencies = [
 "getrandom 0.2.11",
 ]

-[[package]]
-name = "rand_core"
-version = "0.9.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38"
-dependencies = [
- "getrandom 0.3.3",
-]
-
 [[package]]
 name = "rand_distr"
 version = "0.4.3"
@@ -5687,16 +5524,6 @@ dependencies = [
 "rand 0.8.5",
 ]

-[[package]]
-name = "rand_distr"
-version = "0.5.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6a8615d50dcf34fa31f7ab52692afec947c4dd0ab803cc87cb3b0b4570ff7463"
-dependencies = [
- "num-traits",
- "rand 0.9.1",
-]
-
 [[package]]
 name = "rand_hc"
 version = "0.2.0"
@@ -5895,7 +5722,7 @@ dependencies = [
 "http-body-util",
 "http-types",
 "humantime-serde",
- "hyper 1.6.0",
+ "hyper 1.4.1",
 "itertools 0.10.5",
 "metrics",
 "once_cell",
@@ -5935,7 +5762,7 @@ dependencies = [
 "http 1.1.0",
 "http-body 1.0.0",
 "http-body-util",
- "hyper 1.6.0",
+ "hyper 1.4.1",
 "hyper-rustls 0.26.0",
 "hyper-util",
 "ipnet",
@@ -5992,7 +5819,7 @@ dependencies = [
 "futures",
 "getrandom 0.2.11",
 "http 1.1.0",
- "hyper 1.6.0",
+ "hyper 1.4.1",
 "parking_lot 0.11.2",
 "reqwest",
 "reqwest-middleware",
@@ -6013,7 +5840,7 @@ dependencies = [
 "async-trait",
 "getrandom 0.2.11",
 "http 1.1.0",
- "matchit 0.8.4",
+ "matchit",
 "opentelemetry",
 "reqwest",
 "reqwest-middleware",
@@ -6378,7 +6205,6 @@ dependencies = [
 "itertools 0.10.5",
 "jsonwebtoken",
 "metrics",
- "nix 0.30.1",
 "once_cell",
 "pageserver_api",
 "parking_lot 0.12.1",
@@ -6963,12 +6789,12 @@ dependencies = [

 [[package]]
 name = "socket2"
-version = "0.5.10"
+version = "0.5.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678"
+checksum = "7b5fac59a5cb5dd637972e5fca70daf0523c9067fcdc4842f053dae04a18f8e9"
 dependencies = [
 "libc",
- "windows-sys 0.52.0",
+ "windows-sys 0.48.0",
 ]

 [[package]]
@@ -6976,9 +6802,6 @@ name = "spin"
 version = "0.9.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
-dependencies = [
- "lock_api",
-]

 [[package]]
 name = "spinning_top"
@@ -7037,7 +6860,7 @@ dependencies = [
 "http-body-util",
 "http-utils",
 "humantime",
- "hyper 1.6.0",
+ "hyper 1.4.1",
 "hyper-util",
 "metrics",
 "once_cell",
@@ -7110,7 +6933,6 @@ dependencies = [
 "tokio-util",
 "tracing",
 "utils",
- "uuid",
 "workspace_hack",
 ]

@@ -7648,16 +7470,6 @@ dependencies = [
 "syn 2.0.100",
 ]

-[[package]]
-name = "tokio-pipe"
-version = "0.2.12"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f213a84bffbd61b8fa0ba8a044b4bbe35d471d0b518867181e82bd5c15542784"
-dependencies = [
- "libc",
- "tokio",
-]
-
 [[package]]
 name = "tokio-postgres"
 version = "0.7.10"
@@ -7853,25 +7665,16 @@ version = "0.12.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52"
 dependencies = [
- "async-stream",
 "async-trait",
- "axum 0.7.9",
 "base64 0.22.1",
 "bytes",
- "h2 0.4.4",
 "http 1.1.0",
 "http-body 1.0.0",
 "http-body-util",
- "hyper 1.6.0",
- "hyper-timeout",
- "hyper-util",
 "percent-encoding",
 "pin-project",
 "prost 0.13.5",
- "socket2",
- "tokio",
 "tokio-stream",
- "tower 0.4.13",
 "tower-layer",
 "tower-service",
 "tracing",
@@ -7884,7 +7687,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7e581ba15a835f4d9ea06c55ab1bd4dce26fc53752c69a04aac00703bfb49ba9"
 dependencies = [
 "async-trait",
- "axum 0.8.1",
+ "axum",
 "base64 0.22.1",
 "bytes",
 "flate2",
@@ -7892,7 +7695,7 @@ dependencies = [
 "http 1.1.0",
 "http-body 1.0.0",
 "http-body-util",
- "hyper 1.6.0",
+ "hyper 1.4.1",
 "hyper-timeout",
 "hyper-util",
 "percent-encoding",
@@ -7945,16 +7748,11 @@ checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c"
 dependencies = [
 "futures-core",
 "futures-util",
- "indexmap 1.9.3",
 "pin-project",
 "pin-project-lite",
- "rand 0.8.5",
- "slab",
 "tokio",
- "tokio-util",
 "tower-layer",
 "tower-service",
- "tracing",
 ]

 [[package]]
@@ -8408,7 +8206,6 @@ dependencies = [
 "tracing-error",
 "tracing-subscriber",
 "tracing-utils",
- "uuid",
 "walkdir",
 ]

@@ -8439,7 +8236,7 @@ name = "vm_monitor"
 version = "0.1.0"
 dependencies = [
 "anyhow",
- "axum 0.8.1",
+ "axum",
 "cgroups-rs",
 "clap",
 "futures",
@@ -8551,15 +8348,6 @@ version = "0.11.0+wasi-snapshot-preview1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"

-[[package]]
-name = "wasi"
-version = "0.14.2+wasi-0.2.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3"
-dependencies = [
- "wit-bindgen-rt",
-]
-
 [[package]]
 name = "wasite"
 version = "0.1.0"
@@ -8917,15 +8705,6 @@ dependencies = [
 "windows-sys 0.48.0",
 ]

-[[package]]
-name = "wit-bindgen-rt"
-version = "0.39.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1"
-dependencies = [
- "bitflags 2.8.0",
-]
-
 [[package]]
 name = "workspace_hack"
 version = "0.1.0"
@@ -8933,8 +8712,8 @@ dependencies = [
 "ahash",
 "anstream",
 "anyhow",
- "axum 0.8.1",
- "axum-core 0.5.0",
+ "axum",
+ "axum-core",
 "base64 0.21.7",
 "base64ct",
 "bytes",
@@ -8968,7 +8747,7 @@ dependencies = [
 "hex",
 "hmac",
 "hyper 0.14.30",
- "hyper 1.6.0",
+ "hyper 1.4.1",
 "hyper-util",
 "indexmap 2.9.0",
 "itertools 0.12.1",
@@ -9028,6 +8807,7 @@ dependencies = [
 "tracing-log",
 "tracing-subscriber",
 "url",
+ "uuid",
 "zeroize",
 "zstd",
 "zstd-safe",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -35,7 +35,6 @@ members = [
    "libs/pq_proto",
    "libs/tenant_size_model",
    "libs/metrics",
-    "libs/neonart",
    "libs/postgres_connection",
    "libs/remote_storage",
    "libs/tracing-utils",
@@ -93,7 +92,6 @@ clap = { version = "4.0", features = ["derive", "env"] }
 clashmap = { version = "1.0", features = ["raw-api"] }
 comfy-table = "7.1"
 const_format = "0.2"
-crossbeam-utils = "0.8.21"
 crc32c = "0.6"
 diatomic-waker = { version = "0.2.3" }
 either = "1.8"
@@ -132,7 +130,6 @@ jemalloc_pprof = { version = "0.7", features = ["symbolize", "flamegraph"] }
 jsonwebtoken = "9"
 lasso = "0.7"
 libc = "0.2"
-lock_api = "0.4.13"
 md5 = "0.7.0"
 measured = { version = "0.0.22", features=["lasso"] }
 measured-process = { version = "0.0.22" }
@@ -153,7 +150,6 @@ parquet = { version = "53", default-features = false, features = ["zstd"] }
 parquet_derive = "53"
 pbkdf2 = { version = "0.12.1", features = ["simple", "std"] }
 pem = "3.0.3"
-peekable = "0.3.0"
 pin-project-lite = "0.2"
 pprof = { version = "0.14", features = ["criterion", "flamegraph", "frame-pointer", "prost-codec"] }
 procfs = "0.16"
@@ -169,7 +165,7 @@ reqwest-middleware = "0.4"
 reqwest-retry = "0.7"
 routerify = "3"
 rpds = "0.13"
-rustc-hash = "2.1.1"
+rustc-hash = "1.1.0"
 rustls = { version = "0.23.16", default-features = false }
 rustls-pemfile = "2"
 rustls-pki-types = "1.11"
@@ -190,7 +186,6 @@ smallvec = "1.11"
 smol_str = { version = "0.2.0", features = ["serde"] }
 socket2 = "0.5"
 spki = "0.7.3"
-spin = "0.9.8"
 strum = "0.26"
 strum_macros = "0.26"
 "subtle"  = "2.5.0"
@@ -202,6 +197,7 @@ thiserror = "1.0"
 tikv-jemallocator = { version = "0.6", features = ["profiling", "stats", "unprefixed_malloc_on_supported_platforms"] }
 tikv-jemalloc-ctl = { version = "0.6", features = ["stats"] }
 tokio = { version = "1.43.1", features = ["macros"] }
+tokio-epoll-uring = { git = "https://github.com/neondatabase/tokio-epoll-uring.git" , branch = "main" }
 tokio-io-timeout = "1.2.0"
 tokio-postgres-rustls = "0.12.0"
 tokio-rustls = { version = "0.26.0", default-features = false, features = ["tls12", "ring"]}
@@ -243,9 +239,6 @@ x509-cert = { version = "0.2.5" }
 env_logger = "0.11"
 log = "0.4"

-tokio-epoll-uring = { git = "https://github.com/neondatabase/tokio-epoll-uring.git" , branch = "main" }
-uring-common = { git = "https://github.com/neondatabase/tokio-epoll-uring.git" , branch = "main" }
-
 ## Libraries from neondatabase/ git forks, ideally with changes to be upstreamed
 postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch = "neon" }
 postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", branch = "neon" }
--- a/14
+++ b/14
@@ -2,7 +2,7 @@ ROOT_PROJECT_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))

 # Where to install Postgres, default is ./pg_install, maybe useful for package
 # managers.
-POSTGRES_INSTALL_DIR ?= $(ROOT_PROJECT_DIR)/pg_install
+POSTGRES_INSTALL_DIR ?= $(ROOT_PROJECT_DIR)/pg_install/

 # Supported PostgreSQL versions
 POSTGRES_VERSIONS = v17 v16 v15 v14
@@ -14,7 +14,7 @@ POSTGRES_VERSIONS = v17 v16 v15 v14
 # it is derived from BUILD_TYPE.

 # All intermediate build artifacts are stored here.
-BUILD_DIR := $(ROOT_PROJECT_DIR)/build
+BUILD_DIR := build

 ICU_PREFIX_DIR := /usr/local/icu

@@ -212,7 +212,7 @@ neon-pgindent: postgres-v17-pg-bsd-indent neon-pg-ext-v17
 		FIND_TYPEDEF=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/find_typedef \
 		INDENT=$(BUILD_DIR)/v17/src/tools/pg_bsd_indent/pg_bsd_indent \
 		PGINDENT_SCRIPT=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/pgindent/pgindent \
-		-C $(BUILD_DIR)/pgxn-v17/neon \
+		-C $(BUILD_DIR)/neon-v17 \
 		-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile pgindent


@@ -220,15 +220,11 @@ neon-pgindent: postgres-v17-pg-bsd-indent neon-pg-ext-v17
 setup-pre-commit-hook:
 	ln -s -f $(ROOT_PROJECT_DIR)/pre-commit.py .git/hooks/pre-commit

-build-tools/node_modules: build-tools/package.json
-	cd build-tools && $(if $(CI),npm ci,npm install)
-	touch build-tools/node_modules
-
 .PHONY: lint-openapi-spec
-lint-openapi-spec: build-tools/node_modules
+lint-openapi-spec:
 	# operation-2xx-response: pageserver timeline delete returns 404 on success
 	find . -iname "openapi_spec.y*ml" -exec\
-		npx --prefix=build-tools/ redocly\
+		docker run --rm -v ${PWD}:/spec ghcr.io/redocly/cli:1.34.4\
 			--skip-rule=operation-operationId --skip-rule=operation-summary --extends=minimal\
 			--skip-rule=no-server-example.com --skip-rule=operation-2xx-response\
 			lint {} \+
--- a/build-tools/Dockerfile
+++ b/build-tools/Dockerfile
@@ -188,12 +188,6 @@ RUN curl -fsSL 'https://apt.llvm.org/llvm-snapshot.gpg.key' | apt-key add - \
    && bash -c 'for f in /usr/bin/clang*-${LLVM_VERSION} /usr/bin/llvm*-${LLVM_VERSION}; do ln -s "${f}" "${f%-${LLVM_VERSION}}"; done' \
    && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*

-# Install node
-ENV NODE_VERSION=24
-RUN curl -fsSL https://deb.nodesource.com/setup_${NODE_VERSION}.x | bash - \
-    && apt install -y nodejs \
-    && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
-
 # Install docker
 RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg \
    && echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/debian ${DEBIAN_VERSION} stable" > /etc/apt/sources.list.d/docker.list \
@@ -317,14 +311,14 @@ RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux
    . "$HOME/.cargo/env" && \
    cargo --version && rustup --version && \
    rustup component add llvm-tools rustfmt clippy && \
-    cargo install rustfilt      --locked --version ${RUSTFILT_VERSION} && \
-    cargo install cargo-hakari  --locked --version ${CARGO_HAKARI_VERSION} && \
-    cargo install cargo-deny    --locked --version ${CARGO_DENY_VERSION} && \
-    cargo install cargo-hack    --locked --version ${CARGO_HACK_VERSION} && \
-    cargo install cargo-nextest --locked --version ${CARGO_NEXTEST_VERSION} && \
-    cargo install cargo-chef    --locked --version ${CARGO_CHEF_VERSION} && \
-    cargo install diesel_cli    --locked --version ${CARGO_DIESEL_CLI_VERSION} \
-                                --features postgres-bundled --no-default-features && \
+    cargo install rustfilt            --version ${RUSTFILT_VERSION} --locked && \
+    cargo install cargo-hakari        --version ${CARGO_HAKARI_VERSION} --locked && \
+    cargo install cargo-deny          --version ${CARGO_DENY_VERSION} --locked && \
+    cargo install cargo-hack          --version ${CARGO_HACK_VERSION} --locked && \
+    cargo install cargo-nextest       --version ${CARGO_NEXTEST_VERSION} --locked && \
+    cargo install cargo-chef          --version ${CARGO_CHEF_VERSION} --locked && \
+    cargo install diesel_cli          --version ${CARGO_DIESEL_CLI_VERSION} --locked \
+                                      --features postgres-bundled --no-default-features && \
    rm -rf /home/nonroot/.cargo/registry && \
    rm -rf /home/nonroot/.cargo/git

--- a/build-tools/package-lock.json
+++ b/build-tools/package-lock.json
--- a/build-tools/package.json
+++ b/build-tools/package.json
@@ -1,8 +0,0 @@
-{
-  "name": "build-tools",
-  "private": true,
-  "devDependencies": {
-    "@redocly/cli": "1.34.4",
-    "@sourcemeta/jsonschema": "10.0.0"
-  }
-}
--- a/compute/Makefile
+++ b/compute/Makefile
@@ -50,9 +50,9 @@ jsonnetfmt-format:
 	jsonnetfmt --in-place $(jsonnet_files)

 .PHONY: manifest-schema-validation
-manifest-schema-validation: ../build-tools/node_modules
-	npx --prefix=../build-tools/ jsonschema validate -d https://json-schema.org/draft/2020-12/schema manifest.schema.json manifest.yaml
+manifest-schema-validation: node_modules
+	node_modules/.bin/jsonschema validate -d https://json-schema.org/draft/2020-12/schema manifest.schema.json manifest.yaml

-../build-tools/node_modules: ../build-tools/package.json
-	cd ../build-tools && $(if $(CI),npm ci,npm install)
-	touch ../build-tools/node_modules
+node_modules: package.json
+	npm install
+	touch node_modules
--- a/compute/compute-node.Dockerfile
+++ b/compute/compute-node.Dockerfile
@@ -170,29 +170,7 @@ RUN case $DEBIAN_VERSION in \
 FROM build-deps AS pg-build
 ARG PG_VERSION
 COPY vendor/postgres-${PG_VERSION:?} postgres
-COPY compute/patches/postgres_fdw.patch .
-COPY compute/patches/pg_stat_statements_pg14-16.patch .
-COPY compute/patches/pg_stat_statements_pg17.patch .
 RUN cd postgres && \
-    # Apply patches to some contrib extensions
-    # For example, we need to grant EXECUTE on pg_stat_statements_reset() to {privileged_role_name}.
-    # In vanilla Postgres this function is limited to Postgres role superuser.
-    # In Neon we have {privileged_role_name} role that is not a superuser but replaces superuser in some cases.
-    # We could add the additional grant statements to the Postgres repository but it would be hard to maintain,
-    # whenever we need to pick up a new Postgres version and we want to limit the changes in our Postgres fork,
-    # so we do it here.
-    case "${PG_VERSION}" in \
-    "v14" | "v15" | "v16") \
-    patch -p1 < /pg_stat_statements_pg14-16.patch; \
-    ;; \
-    "v17") \
-    patch -p1 < /pg_stat_statements_pg17.patch; \
-    ;; \
-    *) \
-    # To do not forget to migrate patches to the next major version
-    echo "No contrib patches for this PostgreSQL version" && exit 1;; \
-    esac && \
-    patch -p1 < /postgres_fdw.patch && \
    export CONFIGURE_CMD="./configure CFLAGS='-O2 -g3 -fsigned-char' --enable-debug --with-openssl --with-uuid=ossp \
    --with-icu --with-libxml --with-libxslt --with-lz4" && \
    if [ "${PG_VERSION:?}" != "v14" ]; then \
@@ -206,6 +184,8 @@ RUN cd postgres && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/autoinc.control && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/dblink.control && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgres_fdw.control && \
+    file=/usr/local/pgsql/share/extension/postgres_fdw--1.0.sql && [ -e $file ] && \
+    echo 'GRANT USAGE ON FOREIGN DATA WRAPPER postgres_fdw TO neon_superuser;' >> $file && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/bloom.control && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/earthdistance.control && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/insert_username.control && \
@@ -215,7 +195,34 @@ RUN cd postgres && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgrowlocks.control && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgstattuple.control && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/refint.control && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/xml2.control
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/xml2.control && \
+    # We need to grant EXECUTE on pg_stat_statements_reset() to neon_superuser.
+    # In vanilla postgres this function is limited to Postgres role superuser.
+    # In neon we have neon_superuser role that is not a superuser but replaces superuser in some cases.
+    # We could add the additional grant statements to the postgres repository but it would be hard to maintain,
+    # whenever we need to pick up a new postgres version and we want to limit the changes in our postgres fork,
+    # so we do it here.
+    for file in /usr/local/pgsql/share/extension/pg_stat_statements--*.sql; do \
+        filename=$(basename "$file"); \
+        # Note that there are no downgrade scripts for pg_stat_statements, so we \
+        # don't have to modify any downgrade paths or (much) older versions: we only \
+        # have to make sure every creation of the pg_stat_statements_reset function \
+        # also adds execute permissions to the neon_superuser.
+        case $filename in \
+          pg_stat_statements--1.4.sql) \
+            # pg_stat_statements_reset is first created with 1.4
+            echo 'GRANT EXECUTE ON FUNCTION pg_stat_statements_reset() TO neon_superuser;' >> $file; \
+            ;; \
+          pg_stat_statements--1.6--1.7.sql) \
+            # Then with the 1.6-1.7 migration it is re-created with a new signature, thus add the permissions back
+            echo 'GRANT EXECUTE ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint) TO neon_superuser;' >> $file; \
+            ;; \
+          pg_stat_statements--1.10--1.11.sql) \
+            # Then with the 1.10-1.11 migration it is re-created with a new signature again, thus add the permissions back
+            echo 'GRANT EXECUTE ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint, boolean) TO neon_superuser;' >> $file; \
+            ;; \
+        esac; \
+    done;

 # Set PATH for all the subsequent build steps
 ENV PATH="/usr/local/pgsql/bin:$PATH"
@@ -1517,7 +1524,7 @@ WORKDIR /ext-src
 COPY compute/patches/pg_duckdb_v031.patch .
 COPY compute/patches/duckdb_v120.patch .
 # pg_duckdb build requires source dir to be a git repo to get submodules
-# allow {privileged_role_name} to execute some functions that in pg_duckdb are available to superuser only:
+# allow neon_superuser to execute some functions that in pg_duckdb are available to superuser only:
 # - extension management function duckdb.install_extension()
 # - access to duckdb.extensions table and its sequence
 RUN git clone --depth 1 --branch v0.3.1 https://github.com/duckdb/pg_duckdb.git pg_duckdb-src && \
--- a/compute/package.json
+++ b/compute/package.json
@@ -0,0 +1,7 @@
+{
+  "name": "neon-compute",
+  "private": true,
+  "dependencies": {
+    "@sourcemeta/jsonschema": "9.3.4"
+  }
+} 
--- a/compute/patches/anon_v2.patch
+++ b/compute/patches/anon_v2.patch
@@ -1,26 +1,22 @@
 diff --git a/sql/anon.sql b/sql/anon.sql
-index 0cdc769..5eab1d6 100644
+index 0cdc769..b450327 100644
 --- a/sql/anon.sql
 +++ b/sql/anon.sql
-@@ -1141,3 +1141,19 @@ $$
+@@ -1141,3 +1141,15 @@ $$
 -- TODO : https://en.wikipedia.org/wiki/L-diversity
 
 -- TODO : https://en.wikipedia.org/wiki/T-closeness
 +
 +-- NEON Patches
 +
+GRANT ALL ON SCHEMA anon to neon_superuser;
+GRANT ALL ON ALL TABLES IN SCHEMA anon TO neon_superuser;
+
 +DO $$
-+DECLARE
-+  privileged_role_name text;
 +BEGIN
-+  privileged_role_name := current_setting('neon.privileged_role_name');
-+
-+  EXECUTE format('GRANT ALL ON SCHEMA anon to %I', privileged_role_name);
-+  EXECUTE format('GRANT ALL ON ALL TABLES IN SCHEMA anon TO %I', privileged_role_name);
-+
-+  IF current_setting('server_version_num')::int >= 150000 THEN
-+    EXECUTE format('GRANT SET ON PARAMETER anon.transparent_dynamic_masking TO %I', privileged_role_name);
-+  END IF;
+    IF current_setting('server_version_num')::int >= 150000 THEN
+        GRANT SET ON PARAMETER anon.transparent_dynamic_masking TO neon_superuser;
+    END IF;
 +END $$;
 diff --git a/sql/init.sql b/sql/init.sql
 index 7da6553..9b6164b 100644
--- a/compute/patches/pg_duckdb_v031.patch
+++ b/compute/patches/pg_duckdb_v031.patch
@@ -21,21 +21,13 @@ index 3235cc8..6b892bc 100644
 include Makefile.global
 
 diff --git a/sql/pg_duckdb--0.2.0--0.3.0.sql b/sql/pg_duckdb--0.2.0--0.3.0.sql
-index d777d76..3b54396 100644
+index d777d76..af60106 100644
 --- a/sql/pg_duckdb--0.2.0--0.3.0.sql
 +++ b/sql/pg_duckdb--0.2.0--0.3.0.sql
-@@ -1056,3 +1056,14 @@ GRANT ALL ON FUNCTION duckdb.cache(TEXT, TEXT) TO PUBLIC;
+@@ -1056,3 +1056,6 @@ GRANT ALL ON FUNCTION duckdb.cache(TEXT, TEXT) TO PUBLIC;
 GRANT ALL ON FUNCTION duckdb.cache_info() TO PUBLIC;
 GRANT ALL ON FUNCTION duckdb.cache_delete(TEXT) TO PUBLIC;
 GRANT ALL ON PROCEDURE duckdb.recycle_ddb() TO PUBLIC;
-+
-+DO $$
-+DECLARE
-+  privileged_role_name text;
-+BEGIN
-+  privileged_role_name := current_setting('neon.privileged_role_name');
-+
-+  EXECUTE format('GRANT ALL ON FUNCTION duckdb.install_extension(TEXT) TO %I', privileged_role_name);
-+  EXECUTE format('GRANT ALL ON TABLE duckdb.extensions TO %I', privileged_role_name);
-+  EXECUTE format('GRANT ALL ON SEQUENCE duckdb.extensions_table_seq TO %I', privileged_role_name);
-+END $$;
+GRANT ALL ON FUNCTION duckdb.install_extension(TEXT) TO neon_superuser;
+GRANT ALL ON TABLE duckdb.extensions TO neon_superuser;
+GRANT ALL ON SEQUENCE duckdb.extensions_table_seq TO neon_superuser;
--- a/compute/patches/pg_stat_statements_pg14-16.patch
+++ b/compute/patches/pg_stat_statements_pg14-16.patch
@@ -1,34 +0,0 @@
-diff --git a/contrib/pg_stat_statements/pg_stat_statements--1.4.sql b/contrib/pg_stat_statements/pg_stat_statements--1.4.sql
-index 58cdf600fce..8be57a996f6 100644
--- a/contrib/pg_stat_statements/pg_stat_statements--1.4.sql
-+++ b/contrib/pg_stat_statements/pg_stat_statements--1.4.sql
-@@ -46,3 +46,12 @@ GRANT SELECT ON pg_stat_statements TO PUBLIC;
- 
- -- Don't want this to be available to non-superusers.
- REVOKE ALL ON FUNCTION pg_stat_statements_reset() FROM PUBLIC;
-+
-+DO $$
-+DECLARE
-+  privileged_role_name text;
-+BEGIN
-+  privileged_role_name := current_setting('neon.privileged_role_name');
-+
-+  EXECUTE format('GRANT EXECUTE ON FUNCTION pg_stat_statements_reset() TO %I', privileged_role_name);
-+END $$;
-diff --git a/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql b/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql
-index 6fc3fed4c93..256345a8f79 100644
--- a/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql
-+++ b/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql
-@@ -20,3 +20,12 @@ LANGUAGE C STRICT PARALLEL SAFE;
- 
- -- Don't want this to be available to non-superusers.
- REVOKE ALL ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint) FROM PUBLIC;
-+
-+DO $$
-+DECLARE
-+  privileged_role_name text;
-+BEGIN
-+  privileged_role_name := current_setting('neon.privileged_role_name');
-+
-+  EXECUTE format('GRANT EXECUTE ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint) TO %I', privileged_role_name);
-+END $$;
--- a/compute/patches/pg_stat_statements_pg17.patch
+++ b/compute/patches/pg_stat_statements_pg17.patch
@@ -1,52 +0,0 @@
-diff --git a/contrib/pg_stat_statements/pg_stat_statements--1.10--1.11.sql b/contrib/pg_stat_statements/pg_stat_statements--1.10--1.11.sql
-index 0bb2c397711..32764db1d8b 100644
--- a/contrib/pg_stat_statements/pg_stat_statements--1.10--1.11.sql
-+++ b/contrib/pg_stat_statements/pg_stat_statements--1.10--1.11.sql
-@@ -80,3 +80,12 @@ LANGUAGE C STRICT PARALLEL SAFE;
- 
- -- Don't want this to be available to non-superusers.
- REVOKE ALL ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint, boolean) FROM PUBLIC;
-+
-+DO $$
-+DECLARE
-+  privileged_role_name text;
-+BEGIN
-+  privileged_role_name := current_setting('neon.privileged_role_name');
-+
-+  EXECUTE format('GRANT EXECUTE ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint, boolean) TO %I', privileged_role_name);
-+END $$;
-\ No newline at end of file
-diff --git a/contrib/pg_stat_statements/pg_stat_statements--1.4.sql b/contrib/pg_stat_statements/pg_stat_statements--1.4.sql
-index 58cdf600fce..8be57a996f6 100644
--- a/contrib/pg_stat_statements/pg_stat_statements--1.4.sql
-+++ b/contrib/pg_stat_statements/pg_stat_statements--1.4.sql
-@@ -46,3 +46,12 @@ GRANT SELECT ON pg_stat_statements TO PUBLIC;
- 
- -- Don't want this to be available to non-superusers.
- REVOKE ALL ON FUNCTION pg_stat_statements_reset() FROM PUBLIC;
-+
-+DO $$
-+DECLARE
-+  privileged_role_name text;
-+BEGIN
-+  privileged_role_name := current_setting('neon.privileged_role_name');
-+
-+  EXECUTE format('GRANT EXECUTE ON FUNCTION pg_stat_statements_reset() TO %I', privileged_role_name);
-+END $$;
-diff --git a/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql b/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql
-index 6fc3fed4c93..256345a8f79 100644
--- a/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql
-+++ b/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql
-@@ -20,3 +20,12 @@ LANGUAGE C STRICT PARALLEL SAFE;
- 
- -- Don't want this to be available to non-superusers.
- REVOKE ALL ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint) FROM PUBLIC;
-+
-+DO $$
-+DECLARE
-+  privileged_role_name text;
-+BEGIN
-+  privileged_role_name := current_setting('neon.privileged_role_name');
-+
-+  EXECUTE format('GRANT EXECUTE ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint) TO %I', privileged_role_name);
-+END $$;
--- a/compute/patches/postgres_fdw.patch
+++ b/compute/patches/postgres_fdw.patch
@@ -1,17 +0,0 @@
-diff --git a/contrib/postgres_fdw/postgres_fdw--1.0.sql b/contrib/postgres_fdw/postgres_fdw--1.0.sql
-index a0f0fc1bf45..ee077f2eea6 100644
--- a/contrib/postgres_fdw/postgres_fdw--1.0.sql
-+++ b/contrib/postgres_fdw/postgres_fdw--1.0.sql
-@@ -16,3 +16,12 @@ LANGUAGE C STRICT;
- CREATE FOREIGN DATA WRAPPER postgres_fdw
-   HANDLER postgres_fdw_handler
-   VALIDATOR postgres_fdw_validator;
-+
-+DO $$
-+DECLARE
-+  privileged_role_name text;
-+BEGIN
-+  privileged_role_name := current_setting('neon.privileged_role_name');
-+
-+  EXECUTE format('GRANT USAGE ON FOREIGN DATA WRAPPER postgres_fdw TO %I', privileged_role_name);
-+END $$;
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -87,14 +87,6 @@ struct Cli {
    #[arg(short = 'C', long, value_name = "DATABASE_URL")]
    pub connstr: String,

-    #[arg(
-        long,
-        default_value = "neon_superuser",
-        value_name = "PRIVILEGED_ROLE_NAME",
-        value_parser = Self::parse_privileged_role_name
-    )]
-    pub privileged_role_name: String,
-
    #[cfg(target_os = "linux")]
    #[arg(long, default_value = "neon-postgres")]
    pub cgroup: String,
@@ -157,21 +149,6 @@ impl Cli {

        Ok(url)
    }
-
-    /// For simplicity, we do not escape `privileged_role_name` anywhere in the code.
-    /// Since it's a system role, which we fully control, that's fine. Still, let's
-    /// validate it to avoid any surprises.
-    fn parse_privileged_role_name(value: &str) -> Result<String> {
-        use regex::Regex;
-
-        let pattern = Regex::new(r"^[a-z_]+$").unwrap();
-
-        if !pattern.is_match(value) {
-            bail!("--privileged-role-name can only contain lowercase letters and underscores")
-        }
-
-        Ok(value.to_string())
-    }
 }

 fn main() -> Result<()> {
@@ -201,7 +178,6 @@ fn main() -> Result<()> {
        ComputeNodeParams {
            compute_id: cli.compute_id,
            connstr,
-            privileged_role_name: cli.privileged_role_name.clone(),
            pgdata: cli.pgdata.clone(),
            pgbin: cli.pgbin.clone(),
            pgversion: get_pg_version_string(&cli.pgbin),
@@ -351,49 +327,4 @@ mod test {
        ])
        .expect_err("URL parameters are not allowed");
    }
-
-    #[test]
-    fn verify_privileged_role_name() {
-        // Valid name
-        let cli = Cli::parse_from([
-            "compute_ctl",
-            "--pgdata=test",
-            "--connstr=test",
-            "--compute-id=test",
-            "--privileged-role-name",
-            "my_superuser",
-        ]);
-        assert_eq!(cli.privileged_role_name, "my_superuser");
-
-        // Invalid names
-        Cli::try_parse_from([
-            "compute_ctl",
-            "--pgdata=test",
-            "--connstr=test",
-            "--compute-id=test",
-            "--privileged-role-name",
-            "NeonSuperuser",
-        ])
-        .expect_err("uppercase letters are not allowed");
-
-        Cli::try_parse_from([
-            "compute_ctl",
-            "--pgdata=test",
-            "--connstr=test",
-            "--compute-id=test",
-            "--privileged-role-name",
-            "$'neon_superuser",
-        ])
-        .expect_err("special characters are not allowed");
-
-        Cli::try_parse_from([
-            "compute_ctl",
-            "--pgdata=test",
-            "--connstr=test",
-            "--compute-id=test",
-            "--privileged-role-name",
-            "",
-        ])
-        .expect_err("empty name is not allowed");
-    }
 }
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -6,8 +6,7 @@ use compute_api::responses::{
    LfcPrewarmState, PromoteState, TlsConfig,
 };
 use compute_api::spec::{
-    ComputeAudit, ComputeFeature, ComputeMode, ComputeSpec, ExtVersion, PageserverConnectionInfo,
-    PageserverProtocol, PageserverShardConnectionInfo, PageserverShardInfo, PgIdent,
+    ComputeAudit, ComputeFeature, ComputeMode, ComputeSpec, ExtVersion, PageserverProtocol, PgIdent,
 };
 use futures::StreamExt;
 use futures::future::join_all;
@@ -75,20 +74,12 @@ const DEFAULT_INSTALLED_EXTENSIONS_COLLECTION_INTERVAL: u64 = 3600;

 /// Static configuration params that don't change after startup. These mostly
 /// come from the CLI args, or are derived from them.
-#[derive(Clone, Debug)]
 pub struct ComputeNodeParams {
    /// The ID of the compute
    pub compute_id: String,
-
-    /// Url type maintains proper escaping
+    // Url type maintains proper escaping
    pub connstr: url::Url,

-    /// The name of the 'weak' superuser role, which we give to the users.
-    /// It follows the allow list approach, i.e., we take a standard role
-    /// and grant it extra permissions with explicit GRANTs here and there,
-    /// and core patches.
-    pub privileged_role_name: String,
-
    pub resize_swap_on_bind: bool,
    pub set_disk_quota_for_fs: Option<String>,

@@ -234,7 +225,7 @@ pub struct ParsedSpec {
    pub spec: ComputeSpec,
    pub tenant_id: TenantId,
    pub timeline_id: TimelineId,
-    pub pageserver_conninfo: PageserverConnectionInfo,
+    pub pageserver_connstr: String,
    pub safekeeper_connstrings: Vec<String>,
    pub storage_auth_token: Option<String>,
    /// k8s dns name and port
@@ -281,114 +272,26 @@ impl ParsedSpec {
    }
 }

-/// Extract PageserverConnectionInfo from a comma-separated list of libpq connection strings.
-///
-/// This is used for backwards-compatilibity, to parse the legacye `pageserver_connstr`
-/// field in the compute spec, or the 'neon.pageserver_connstring' GUC. Nowadays, the
-/// 'pageserver_connection_info' field should be used instead.
-fn extract_pageserver_conninfo_from_connstr(
-    connstr: &str,
-    stripe_size: Option<u32>,
-) -> Result<PageserverConnectionInfo, anyhow::Error> {
-    let shard_infos: Vec<_> = connstr
-        .split(',')
-        .map(|connstr| PageserverShardInfo {
-            pageservers: vec![PageserverShardConnectionInfo {
-                id: None,
-                libpq_url: Some(connstr.to_string()),
-                grpc_url: None,
-            }],
-        })
-        .collect();
-
-    match shard_infos.len() {
-        0 => anyhow::bail!("empty connection string"),
-        1 => {
-            // We assume that if there's only connection string, it means "unsharded",
-            // rather than a sharded system with just a single shard. The latter is
-            // possible in principle, but we never do it.
-            let shard_count = ShardCount::unsharded();
-            let only_shard = shard_infos.first().unwrap().clone();
-            let shards = vec![(ShardIndex::unsharded(), only_shard)];
-            Ok(PageserverConnectionInfo {
-                shard_count,
-                stripe_size: None,
-                shards: shards.into_iter().collect(),
-                prefer_protocol: PageserverProtocol::Libpq,
-            })
-        }
-        n => {
-            if stripe_size.is_none() {
-                anyhow::bail!("{n} shards but no stripe_size");
-            }
-            let shard_count = ShardCount(n.try_into()?);
-            let shards = shard_infos
-                .into_iter()
-                .enumerate()
-                .map(|(idx, shard_info)| {
-                    (
-                        ShardIndex {
-                            shard_count,
-                            shard_number: ShardNumber(
-                                idx.try_into().expect("shard number fits in u8"),
-                            ),
-                        },
-                        shard_info,
-                    )
-                })
-                .collect();
-            Ok(PageserverConnectionInfo {
-                shard_count,
-                stripe_size,
-                shards,
-                prefer_protocol: PageserverProtocol::Libpq,
-            })
-        }
-    }
-}
-
 impl TryFrom<ComputeSpec> for ParsedSpec {
-    type Error = anyhow::Error;
-    fn try_from(spec: ComputeSpec) -> Result<Self, anyhow::Error> {
+    type Error = String;
+    fn try_from(spec: ComputeSpec) -> Result<Self, String> {
        // Extract the options from the spec file that are needed to connect to
        // the storage system.
        //
-        // In compute specs generated by old control plane versions, the spec file might
-        // be missing the `pageserver_connection_info` field. In that case, we need to dig
-        // the pageserver connection info from the `pageserver_connstr` field instead, or
-        // if that's missing too, from the GUC in the cluster.settings field.
-        let mut pageserver_conninfo = spec.pageserver_connection_info.clone();
-        if pageserver_conninfo.is_none() {
-            if let Some(pageserver_connstr_field) = &spec.pageserver_connstring {
-                pageserver_conninfo = Some(extract_pageserver_conninfo_from_connstr(
-                    pageserver_connstr_field,
-                    spec.shard_stripe_size,
-                )?);
-            }
-        }
-        if pageserver_conninfo.is_none() {
-            if let Some(guc) = spec.cluster.settings.find("neon.pageserver_connstring") {
-                let stripe_size = if let Some(guc) = spec.cluster.settings.find("neon.stripe_size")
-                {
-                    Some(u32::from_str(&guc)?)
-                } else {
-                    None
-                };
-                pageserver_conninfo =
-                    Some(extract_pageserver_conninfo_from_connstr(&guc, stripe_size)?);
-            }
-        }
-        let pageserver_conninfo = pageserver_conninfo.ok_or(anyhow::anyhow!(
-            "pageserver connection information should be provided"
-        ))?;
-
-        // Similarly for safekeeper connection strings
+        // For backwards-compatibility, the top-level fields in the spec file
+        // may be empty. In that case, we need to dig them from the GUCs in the
+        // cluster.settings field.
+        let pageserver_connstr = spec
+            .pageserver_connstring
+            .clone()
+            .or_else(|| spec.cluster.settings.find("neon.pageserver_connstring"))
+            .ok_or("pageserver connstr should be provided")?;
        let safekeeper_connstrings = if spec.safekeeper_connstrings.is_empty() {
            if matches!(spec.mode, ComputeMode::Primary) {
                spec.cluster
                    .settings
                    .find("neon.safekeepers")
-                    .ok_or(anyhow::anyhow!("safekeeper connstrings should be provided"))?
+                    .ok_or("safekeeper connstrings should be provided")?
                    .split(',')
                    .map(|str| str.to_string())
                    .collect()
@@ -403,22 +306,22 @@ impl TryFrom<ComputeSpec> for ParsedSpec {
        let tenant_id: TenantId = if let Some(tenant_id) = spec.tenant_id {
            tenant_id
        } else {
-            let guc = spec
-                .cluster
+            spec.cluster
                .settings
                .find("neon.tenant_id")
-                .ok_or(anyhow::anyhow!("tenant id should be provided"))?;
-            TenantId::from_str(&guc).context("invalid tenant id")?
+                .ok_or("tenant id should be provided")
+                .map(|s| TenantId::from_str(&s))?
+                .or(Err("invalid tenant id"))?
        };
        let timeline_id: TimelineId = if let Some(timeline_id) = spec.timeline_id {
            timeline_id
        } else {
-            let guc = spec
-                .cluster
+            spec.cluster
                .settings
                .find("neon.timeline_id")
-                .ok_or(anyhow::anyhow!("timeline id should be provided"))?;
-            TimelineId::from_str(&guc).context(anyhow::anyhow!("invalid timeline id"))?
+                .ok_or("timeline id should be provided")
+                .map(|s| TimelineId::from_str(&s))?
+                .or(Err("invalid timeline id"))?
        };

        let endpoint_storage_addr: Option<String> = spec
@@ -432,7 +335,7 @@ impl TryFrom<ComputeSpec> for ParsedSpec {

        let res = ParsedSpec {
            spec,
-            pageserver_conninfo,
+            pageserver_connstr,
            safekeeper_connstrings,
            storage_auth_token,
            tenant_id,
@@ -442,7 +345,7 @@ impl TryFrom<ComputeSpec> for ParsedSpec {
        };

        // Now check validity of the parsed specification
-        res.validate().map_err(anyhow::Error::msg)?;
+        res.validate()?;
        Ok(res)
    }
 }
@@ -1129,10 +1032,12 @@ impl ComputeNode {
    fn try_get_basebackup(&self, compute_state: &ComputeState, lsn: Lsn) -> Result<()> {
        let spec = compute_state.pspec.as_ref().expect("spec must be set");

+        let shard0_connstr = spec.pageserver_connstr.split(',').next().unwrap();
        let started = Instant::now();
-        let (connected, size) = match spec.pageserver_conninfo.prefer_protocol {
-            PageserverProtocol::Grpc => self.try_get_basebackup_grpc(spec, lsn)?,
+
+        let (connected, size) = match PageserverProtocol::from_connstring(shard0_connstr)? {
            PageserverProtocol::Libpq => self.try_get_basebackup_libpq(spec, lsn)?,
+            PageserverProtocol::Grpc => self.try_get_basebackup_grpc(spec, lsn)?,
        };

        self.fix_zenith_signal_neon_signal()?;
@@ -1170,32 +1075,23 @@ impl ComputeNode {
    /// Fetches a basebackup via gRPC. The connstring must use grpc://. Returns the timestamp when
    /// the connection was established, and the (compressed) size of the basebackup.
    fn try_get_basebackup_grpc(&self, spec: &ParsedSpec, lsn: Lsn) -> Result<(Instant, usize)> {
-        let shard0_index = ShardIndex {
-            shard_number: ShardNumber(0),
-            shard_count: spec.pageserver_conninfo.shard_count,
+        let shard0_connstr = spec
+            .pageserver_connstr
+            .split(',')
+            .next()
+            .unwrap()
+            .to_string();
+        let shard_index = match spec.pageserver_connstr.split(',').count() as u8 {
+            0 | 1 => ShardIndex::unsharded(),
+            count => ShardIndex::new(ShardNumber(0), ShardCount(count)),
        };
-        let shard0 = spec
-            .pageserver_conninfo
-            .shards
-            .get(&shard0_index)
-            .ok_or_else(|| {
-                anyhow::anyhow!("shard connection info missing for shard {}", shard0_index)
-            })?;
-        let pageserver = shard0
-            .pageservers
-            .first()
-            .expect("must have at least one pageserver");
-        let shard0_url = pageserver
-            .grpc_url
-            .clone()
-            .expect("no grpc_url for shard 0");

        let (reader, connected) = tokio::runtime::Handle::current().block_on(async move {
            let mut client = page_api::Client::connect(
-                shard0_url,
+                shard0_connstr,
                spec.tenant_id,
                spec.timeline_id,
-                shard0_index,
+                shard_index,
                spec.storage_auth_token.clone(),
                None, // NB: base backups use payload compression
            )
@@ -1227,26 +1123,8 @@ impl ComputeNode {
    /// Fetches a basebackup via libpq. The connstring must use postgresql://. Returns the timestamp
    /// when the connection was established, and the (compressed) size of the basebackup.
    fn try_get_basebackup_libpq(&self, spec: &ParsedSpec, lsn: Lsn) -> Result<(Instant, usize)> {
-        let shard0_index = ShardIndex {
-            shard_number: ShardNumber(0),
-            shard_count: spec.pageserver_conninfo.shard_count,
-        };
-        let shard0 = spec
-            .pageserver_conninfo
-            .shards
-            .get(&shard0_index)
-            .ok_or_else(|| {
-                anyhow::anyhow!("shard connection info missing for shard {}", shard0_index)
-            })?;
-        let pageserver = shard0
-            .pageservers
-            .first()
-            .expect("must have at least one pageserver");
-        let shard0_connstr = pageserver
-            .libpq_url
-            .clone()
-            .expect("no libpq_url for shard 0");
-        let mut config = postgres::Config::from_str(&shard0_connstr)?;
+        let shard0_connstr = spec.pageserver_connstr.split(',').next().unwrap();
+        let mut config = postgres::Config::from_str(shard0_connstr)?;

        // Use the storage auth token from the config file, if given.
        // Note: this overrides any password set in the connection string.
@@ -1332,7 +1210,10 @@ impl ComputeNode {
                    return result;
                }
                Err(ref e) if attempts < max_attempts => {
-                    warn!("Failed to get basebackup: {e:?} (attempt {attempts}/{max_attempts})");
+                    warn!(
+                        "Failed to get basebackup: {} (attempt {}/{})",
+                        e, attempts, max_attempts
+                    );
                    std::thread::sleep(std::time::Duration::from_millis(retry_period_ms as u64));
                    retry_period_ms *= 1.5;
                }
@@ -1508,7 +1389,6 @@ impl ComputeNode {
        self.create_pgdata()?;
        config::write_postgres_conf(
            pgdata_path,
-            &self.params,
            &pspec.spec,
            self.params.internal_http_port,
            tls_config,
@@ -1540,8 +1420,16 @@ impl ComputeNode {
            }
        };

-        self.get_basebackup(compute_state, lsn)
-            .with_context(|| format!("failed to get basebackup@{lsn}"))?;
+        info!(
+            "getting basebackup@{} from pageserver {}",
+            lsn, &pspec.pageserver_connstr
+        );
+        self.get_basebackup(compute_state, lsn).with_context(|| {
+            format!(
+                "failed to get basebackup@{} from pageserver {}",
+                lsn, &pspec.pageserver_connstr
+            )
+        })?;

        // Update pg_hba.conf received with basebackup.
        update_pg_hba(pgdata_path)?;
@@ -1849,7 +1737,6 @@ impl ComputeNode {
        }

        // Run migrations separately to not hold up cold starts
-        let params = self.params.clone();
        tokio::spawn(async move {
            let mut conf = conf.as_ref().clone();
            conf.application_name("compute_ctl:migrations");
@@ -1861,7 +1748,7 @@ impl ComputeNode {
                            eprintln!("connection error: {e}");
                        }
                    });
-                    if let Err(e) = handle_migrations(params, &mut client).await {
+                    if let Err(e) = handle_migrations(&mut client).await {
                        error!("Failed to run migrations: {}", e);
                    }
                }
@@ -1940,7 +1827,6 @@ impl ComputeNode {
        let pgdata_path = Path::new(&self.params.pgdata);
        config::write_postgres_conf(
            pgdata_path,
-            &self.params,
            &spec,
            self.params.internal_http_port,
            tls_config,
@@ -2484,22 +2370,22 @@ LIMIT 100",
    /// The operation will time out after a specified duration.
    pub fn wait_timeout_while_pageserver_connstr_unchanged(&self, duration: Duration) {
        let state = self.state.lock().unwrap();
-        let old_pageserver_conninfo = state
+        let old_pageserver_connstr = state
            .pspec
            .as_ref()
            .expect("spec must be set")
-            .pageserver_conninfo
+            .pageserver_connstr
            .clone();
        let mut unchanged = true;
        let _ = self
            .state_changed
            .wait_timeout_while(state, duration, |s| {
-                let pageserver_conninfo = &s
+                let pageserver_connstr = &s
                    .pspec
                    .as_ref()
                    .expect("spec must be set")
-                    .pageserver_conninfo;
-                unchanged = pageserver_conninfo == &old_pageserver_conninfo;
+                    .pageserver_connstr;
+                unchanged = pageserver_connstr == &old_pageserver_connstr;
                unchanged
            })
            .unwrap();
@@ -2553,31 +2439,14 @@ LIMIT 100",
    pub fn spawn_lfc_offload_task(self: &Arc<Self>, interval: Duration) {
        self.terminate_lfc_offload_task();
        let secs = interval.as_secs();
+        info!("spawning lfc offload worker with {secs}s interval");
        let this = self.clone();
-
-        info!("spawning LFC offload worker with {secs}s interval");
        let handle = spawn(async move {
            let mut interval = time::interval(interval);
            interval.tick().await; // returns immediately
            loop {
                interval.tick().await;
-
-                let prewarm_state = this.state.lock().unwrap().lfc_prewarm_state.clone();
-                // Do not offload LFC state if we are currently prewarming or any issue occurred.
-                // If we'd do that, we might override the LFC state in endpoint storage with some
-                // incomplete state. Imagine a situation:
-                // 1. Endpoint started with `autoprewarm: true`
-                // 2. While prewarming is not completed, we upload the new incomplete state
-                // 3. Compute gets interrupted and restarts
-                // 4. We start again and try to prewarm with the state from 2. instead of the previous complete state
-                if matches!(
-                    prewarm_state,
-                    LfcPrewarmState::Completed
-                        | LfcPrewarmState::NotPrewarmed
-                        | LfcPrewarmState::Skipped
-                ) {
-                    this.offload_lfc_async().await;
-                }
+                this.offload_lfc_async().await;
            }
        });
        *self.lfc_offload_task.lock().unwrap() = Some(handle);
@@ -2729,10 +2598,7 @@ mod tests {

        match ParsedSpec::try_from(spec.clone()) {
            Ok(_p) => panic!("Failed to detect duplicate entry"),
-            Err(e) => assert!(
-                e.to_string()
-                    .starts_with("duplicate entry in safekeeper_connstrings:")
-            ),
+            Err(e) => assert!(e.starts_with("duplicate entry in safekeeper_connstrings:")),
        };
    }
 }
--- a/compute_tools/src/compute_prewarm.rs
+++ b/compute_tools/src/compute_prewarm.rs
@@ -89,7 +89,7 @@ impl ComputeNode {
        self.state.lock().unwrap().lfc_offload_state.clone()
    }

-    /// If there is a prewarm request ongoing, return `false`, `true` otherwise.
+    /// If there is a prewarm request ongoing, return false, true otherwise
    pub fn prewarm_lfc(self: &Arc<Self>, from_endpoint: Option<String>) -> bool {
        {
            let state = &mut self.state.lock().unwrap().lfc_prewarm_state;
@@ -101,25 +101,15 @@ impl ComputeNode {

        let cloned = self.clone();
        spawn(async move {
-            let state = match cloned.prewarm_impl(from_endpoint).await {
-                Ok(true) => LfcPrewarmState::Completed,
-                Ok(false) => {
-                    info!(
-                        "skipping LFC prewarm because LFC state is not found in endpoint storage"
-                    );
-                    LfcPrewarmState::Skipped
-                }
-                Err(err) => {
-                    crate::metrics::LFC_PREWARM_ERRORS.inc();
-                    error!(%err, "could not prewarm LFC");
-
-                    LfcPrewarmState::Failed {
-                        error: err.to_string(),
-                    }
-                }
+            let Err(err) = cloned.prewarm_impl(from_endpoint).await else {
+                cloned.state.lock().unwrap().lfc_prewarm_state = LfcPrewarmState::Completed;
+                return;
+            };
+            crate::metrics::LFC_PREWARM_ERRORS.inc();
+            error!(%err, "prewarming lfc");
+            cloned.state.lock().unwrap().lfc_prewarm_state = LfcPrewarmState::Failed {
+                error: err.to_string(),
            };
-
-            cloned.state.lock().unwrap().lfc_prewarm_state = state;
        });
        true
    }
@@ -130,21 +120,15 @@ impl ComputeNode {
        EndpointStoragePair::from_spec_and_endpoint(state.pspec.as_ref().unwrap(), from_endpoint)
    }

-    /// Request LFC state from endpoint storage and load corresponding pages into Postgres.
-    /// Returns a result with `false` if the LFC state is not found in endpoint storage.
-    async fn prewarm_impl(&self, from_endpoint: Option<String>) -> Result<bool> {
+    async fn prewarm_impl(&self, from_endpoint: Option<String>) -> Result<()> {
        let EndpointStoragePair { url, token } = self.endpoint_storage_pair(from_endpoint)?;
-
        info!(%url, "requesting LFC state from endpoint storage");
+
        let request = Client::new().get(&url).bearer_auth(token);
        let res = request.send().await.context("querying endpoint storage")?;
        let status = res.status();
-        match status {
-            StatusCode::OK => (),
-            StatusCode::NOT_FOUND => {
-                return Ok(false);
-            }
-            _ => bail!("{status} querying endpoint storage"),
+        if status != StatusCode::OK {
+            bail!("{status} querying endpoint storage")
        }

        let mut uncompressed = Vec::new();
@@ -157,8 +141,7 @@ impl ComputeNode {
            .await
            .context("decoding LFC state")?;
        let uncompressed_len = uncompressed.len();
-
-        info!(%url, "downloaded LFC state, uncompressed size {uncompressed_len}, loading into Postgres");
+        info!(%url, "downloaded LFC state, uncompressed size {uncompressed_len}, loading into postgres");

        ComputeNode::get_maintenance_client(&self.tokio_conn_conf)
            .await
@@ -166,9 +149,7 @@ impl ComputeNode {
            .query_one("select neon.prewarm_local_cache($1)", &[&uncompressed])
            .await
            .context("loading LFC state into postgres")
-            .map(|_| ())?;
-
-        Ok(true)
+            .map(|_| ())
    }

    /// If offload request is ongoing, return false, true otherwise
@@ -196,14 +177,12 @@ impl ComputeNode {

    async fn offload_lfc_with_state_update(&self) {
        crate::metrics::LFC_OFFLOADS.inc();
-
        let Err(err) = self.offload_lfc_impl().await else {
            self.state.lock().unwrap().lfc_offload_state = LfcOffloadState::Completed;
            return;
        };
-
        crate::metrics::LFC_OFFLOAD_ERRORS.inc();
-        error!(%err, "could not offload LFC state to endpoint storage");
+        error!(%err, "offloading lfc");
        self.state.lock().unwrap().lfc_offload_state = LfcOffloadState::Failed {
            error: err.to_string(),
        };
@@ -211,7 +190,7 @@ impl ComputeNode {

    async fn offload_lfc_impl(&self) -> Result<()> {
        let EndpointStoragePair { url, token } = self.endpoint_storage_pair(None)?;
-        info!(%url, "requesting LFC state from Postgres");
+        info!(%url, "requesting LFC state from postgres");

        let mut compressed = Vec::new();
        ComputeNode::get_maintenance_client(&self.tokio_conn_conf)
@@ -226,17 +205,13 @@ impl ComputeNode {
            .read_to_end(&mut compressed)
            .await
            .context("compressing LFC state")?;
-
        let compressed_len = compressed.len();
        info!(%url, "downloaded LFC state, compressed size {compressed_len}, writing to endpoint storage");

        let request = Client::new().put(url).bearer_auth(token).body(compressed);
        match request.send().await {
            Ok(res) if res.status() == StatusCode::OK => Ok(()),
-            Ok(res) => bail!(
-                "Request to endpoint storage failed with status: {}",
-                res.status()
-            ),
+            Ok(res) => bail!("Error writing to endpoint storage: {}", res.status()),
            Err(err) => Err(err).context("writing to endpoint storage"),
        }
    }
--- a/compute_tools/src/config.rs
+++ b/compute_tools/src/config.rs
@@ -9,14 +9,11 @@ use std::path::Path;
 use compute_api::responses::TlsConfig;
 use compute_api::spec::{ComputeAudit, ComputeMode, ComputeSpec, GenericOption};

-use crate::compute::ComputeNodeParams;
 use crate::pg_helpers::{
    GenericOptionExt, GenericOptionsSearch, PgOptionsSerialize, escape_conf_value,
 };
 use crate::tls::{self, SERVER_CRT, SERVER_KEY};

-use utils::shard::{ShardIndex, ShardNumber};
-
 /// Check that `line` is inside a text file and put it there if it is not.
 /// Create file if it doesn't exist.
 pub fn line_in_file(path: &Path, line: &str) -> Result<bool> {
@@ -44,7 +41,6 @@ pub fn line_in_file(path: &Path, line: &str) -> Result<bool> {
 /// Create or completely rewrite configuration file specified by `path`
 pub fn write_postgres_conf(
    pgdata_path: &Path,
-    params: &ComputeNodeParams,
    spec: &ComputeSpec,
    extension_server_port: u16,
    tls_config: &Option<TlsConfig>,
@@ -60,99 +56,12 @@ pub fn write_postgres_conf(

    // Add options for connecting to storage
    writeln!(file, "# Neon storage settings")?;
-    writeln!(file)?;
-    if let Some(conninfo) = &spec.pageserver_connection_info {
-        // Stripe size GUC should be defined prior to connection string
-        if let Some(stripe_size) = conninfo.stripe_size {
-            writeln!(
-                file,
-                "# from compute spec's pageserver_conninfo.stripe_size field"
-            )?;
-            writeln!(file, "neon.stripe_size={stripe_size}")?;
-        }
-
-        let mut libpq_urls: Option<Vec<String>> = Some(Vec::new());
-        let mut grpc_urls: Option<Vec<String>> = Some(Vec::new());
-        let num_shards = if conninfo.shard_count.0 == 0 {
-            1 // unsharded, treat it as a single shard
-        } else {
-            conninfo.shard_count.0
-        };
-
-        for shard_number in 0..num_shards {
-            let shard_index = ShardIndex {
-                shard_number: ShardNumber(shard_number),
-                shard_count: conninfo.shard_count,
-            };
-            let info = conninfo.shards.get(&shard_index).ok_or_else(|| {
-                anyhow::anyhow!(
-                    "shard {shard_index} missing from pageserver_connection_info shard map"
-                )
-            })?;
-
-            let first_pageserver = info
-                .pageservers
-                .first()
-                .expect("must have at least one pageserver");
-
-            // Add the libpq URL to the array, or if the URL is missing, reset the array
-            // forgetting any previous entries. All servers must have a libpq URL, or none
-            // at all.
-            if let Some(url) = &first_pageserver.libpq_url {
-                if let Some(ref mut urls) = libpq_urls {
-                    urls.push(url.clone());
-                }
-            } else {
-                libpq_urls = None
-            }
-            // Similarly for gRPC URLs
-            if let Some(url) = &first_pageserver.grpc_url {
-                if let Some(ref mut urls) = grpc_urls {
-                    urls.push(url.clone());
-                }
-            } else {
-                grpc_urls = None
-            }
-        }
-        if let Some(libpq_urls) = libpq_urls {
-            writeln!(
-                file,
-                "# derived from compute spec's pageserver_conninfo field"
-            )?;
-            writeln!(
-                file,
-                "neon.pageserver_connstring={}",
-                escape_conf_value(&libpq_urls.join(","))
-            )?;
-        } else {
-            writeln!(file, "# no neon.pageserver_connstring")?;
-        }
-        if let Some(grpc_urls) = grpc_urls {
-            writeln!(
-                file,
-                "# derived from compute spec's pageserver_conninfo field"
-            )?;
-            writeln!(
-                file,
-                "neon.pageserver_grpc_urls={}",
-                escape_conf_value(&grpc_urls.join(","))
-            )?;
-        } else {
-            writeln!(file, "# no neon.pageserver_grpc_urls")?;
-        }
-    } else {
-        // Stripe size GUC should be defined prior to connection string
-        if let Some(stripe_size) = spec.shard_stripe_size {
-            writeln!(file, "# from compute spec's shard_stripe_size field")?;
-            writeln!(file, "neon.stripe_size={stripe_size}")?;
-        }
-
-        if let Some(s) = &spec.pageserver_connstring {
-            writeln!(file, "# from compute spec's pageserver_connstring field")?;
-            writeln!(file, "neon.pageserver_connstring={}", escape_conf_value(s))?;
-        }
+    if let Some(s) = &spec.pageserver_connstring {
+        writeln!(file, "neon.pageserver_connstring={}", escape_conf_value(s))?;
+    }
+    if let Some(stripe_size) = spec.shard_stripe_size {
+        writeln!(file, "neon.stripe_size={stripe_size}")?;
    }
-
    if !spec.safekeeper_connstrings.is_empty() {
        let mut neon_safekeepers_value = String::new();
        tracing::info!(
@@ -252,12 +161,6 @@ pub fn write_postgres_conf(
        }
    }

-    writeln!(
-        file,
-        "neon.privileged_role_name={}",
-        escape_conf_value(params.privileged_role_name.as_str())
-    )?;
-
    // If there are any extra options in the 'settings' field, append those
    if spec.cluster.settings.is_some() {
        writeln!(file, "# Managed by compute_ctl: begin")?;
--- a/compute_tools/src/http/openapi_spec.yaml
+++ b/compute_tools/src/http/openapi_spec.yaml
@@ -613,11 +613,11 @@ components:
        - skipped
      properties:
        status:
-          description: LFC prewarm status
-          enum: [not_prewarmed, prewarming, completed, failed, skipped]
+          description: Lfc prewarm status
+          enum: [not_prewarmed, prewarming, completed, failed]
          type: string
        error:
-          description: LFC prewarm error, if any
+          description: Lfc prewarm error, if any
          type: string
        total:
          description: Total pages processed
@@ -635,11 +635,11 @@ components:
        - status
      properties:
        status:
-          description: LFC offload status
+          description: Lfc offload status
          enum: [not_offloaded, offloading, completed, failed]
          type: string
        error:
-          description: LFC offload error, if any
+          description: Lfc offload error, if any
          type: string

    PromoteState:
--- a/compute_tools/src/lsn_lease.rs
+++ b/compute_tools/src/lsn_lease.rs
@@ -4,13 +4,14 @@ use std::thread;
 use std::time::{Duration, SystemTime};

 use anyhow::{Result, bail};
-use compute_api::spec::{ComputeMode, PageserverConnectionInfo, PageserverProtocol};
+use compute_api::spec::{ComputeMode, PageserverProtocol};
+use itertools::Itertools as _;
 use pageserver_page_api as page_api;
 use postgres::{NoTls, SimpleQueryMessage};
 use tracing::{info, warn};
 use utils::id::{TenantId, TimelineId};
 use utils::lsn::Lsn;
-use utils::shard::TenantShardId;
+use utils::shard::{ShardCount, ShardNumber, TenantShardId};

 use crate::compute::ComputeNode;

@@ -77,16 +78,17 @@ fn acquire_lsn_lease_with_retry(

    loop {
        // Note: List of pageservers is dynamic, need to re-read configs before each attempt.
-        let (conninfo, auth) = {
+        let (connstrings, auth) = {
            let state = compute.state.lock().unwrap();
            let spec = state.pspec.as_ref().expect("spec must be set");
            (
-                spec.pageserver_conninfo.clone(),
+                spec.pageserver_connstr.clone(),
                spec.storage_auth_token.clone(),
            )
        };

-        let result = try_acquire_lsn_lease(conninfo, auth.as_deref(), tenant_id, timeline_id, lsn);
+        let result =
+            try_acquire_lsn_lease(&connstrings, auth.as_deref(), tenant_id, timeline_id, lsn);
        match result {
            Ok(Some(res)) => {
                return Ok(res);
@@ -110,44 +112,35 @@ fn acquire_lsn_lease_with_retry(

 /// Tries to acquire LSN leases on all Pageserver shards.
 fn try_acquire_lsn_lease(
-    conninfo: PageserverConnectionInfo,
+    connstrings: &str,
    auth: Option<&str>,
    tenant_id: TenantId,
    timeline_id: TimelineId,
    lsn: Lsn,
 ) -> Result<Option<SystemTime>> {
+    let connstrings = connstrings.split(',').collect_vec();
+    let shard_count = connstrings.len();
    let mut leases = Vec::new();

-    for (shard_index, shard) in conninfo.shards.into_iter() {
-        let tenant_shard_id = TenantShardId {
-            tenant_id,
-            shard_number: shard_index.shard_number,
-            shard_count: shard_index.shard_count,
+    for (shard_number, &connstring) in connstrings.iter().enumerate() {
+        let tenant_shard_id = match shard_count {
+            0 | 1 => TenantShardId::unsharded(tenant_id),
+            shard_count => TenantShardId {
+                tenant_id,
+                shard_number: ShardNumber(shard_number as u8),
+                shard_count: ShardCount::new(shard_count as u8),
+            },
        };

-        // XXX: If there are more than pageserver for the one shard, do we need to get a
-        // leas on all of them? Currently, that's what we assume, but this is hypothetical
-        // as of this writing, as we never pass the info for more than one pageserver per
-        // shard.
-        for pageserver in shard.pageservers {
-            let lease = match conninfo.prefer_protocol {
-                PageserverProtocol::Grpc => acquire_lsn_lease_grpc(
-                    &pageserver.grpc_url.unwrap(),
-                    auth,
-                    tenant_shard_id,
-                    timeline_id,
-                    lsn,
-                )?,
-                PageserverProtocol::Libpq => acquire_lsn_lease_libpq(
-                    &pageserver.libpq_url.unwrap(),
-                    auth,
-                    tenant_shard_id,
-                    timeline_id,
-                    lsn,
-                )?,
-            };
-            leases.push(lease);
-        }
+        let lease = match PageserverProtocol::from_connstring(connstring)? {
+            PageserverProtocol::Libpq => {
+                acquire_lsn_lease_libpq(connstring, auth, tenant_shard_id, timeline_id, lsn)?
+            }
+            PageserverProtocol::Grpc => {
+                acquire_lsn_lease_grpc(connstring, auth, tenant_shard_id, timeline_id, lsn)?
+            }
+        };
+        leases.push(lease);
    }

    Ok(leases.into_iter().min().flatten())
--- a/compute_tools/src/migrations/0001-add_bypass_rls_to_privileged_role.sql
+++ b/compute_tools/src/migrations/0001-add_bypass_rls_to_privileged_role.sql
@@ -1 +0,0 @@
-ALTER ROLE {privileged_role_name} BYPASSRLS;
--- a/compute_tools/src/migrations/0001-neon_superuser_bypass_rls.sql
+++ b/compute_tools/src/migrations/0001-neon_superuser_bypass_rls.sql
@@ -0,0 +1 @@
+ALTER ROLE neon_superuser BYPASSRLS;
--- a/compute_tools/src/migrations/0002-alter_roles.sql
+++ b/compute_tools/src/migrations/0002-alter_roles.sql
@@ -15,7 +15,7 @@ DO $$
 DECLARE
    role_name text;
 BEGIN
-    FOR role_name IN SELECT rolname FROM pg_roles WHERE pg_has_role(rolname, '{privileged_role_name}', 'member')
+    FOR role_name IN SELECT rolname FROM pg_roles WHERE pg_has_role(rolname, 'neon_superuser', 'member')
    LOOP
        RAISE NOTICE 'EXECUTING ALTER ROLE % INHERIT', quote_ident(role_name);
        EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' INHERIT';
@@ -23,7 +23,7 @@ BEGIN

    FOR role_name IN SELECT rolname FROM pg_roles
        WHERE
-            NOT pg_has_role(rolname, '{privileged_role_name}', 'member') AND NOT starts_with(rolname, 'pg_')
+            NOT pg_has_role(rolname, 'neon_superuser', 'member') AND NOT starts_with(rolname, 'pg_')
    LOOP
        RAISE NOTICE 'EXECUTING ALTER ROLE % NOBYPASSRLS', quote_ident(role_name);
        EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' NOBYPASSRLS';
--- a/compute_tools/src/migrations/0003-grant_pg_create_subscription_to_privileged_role.sql
+++ b/compute_tools/src/migrations/0003-grant_pg_create_subscription_to_privileged_role.sql
@@ -1,6 +1,6 @@
 DO $$
 BEGIN
    IF (SELECT setting::numeric >= 160000 FROM pg_settings WHERE name = 'server_version_num') THEN
-        EXECUTE 'GRANT pg_create_subscription TO {privileged_role_name}';
+        EXECUTE 'GRANT pg_create_subscription TO neon_superuser';
    END IF;
 END $$;
--- a/compute_tools/src/migrations/0004-grant_pg_monitor_to_neon_superuser.sql
+++ b/compute_tools/src/migrations/0004-grant_pg_monitor_to_neon_superuser.sql
@@ -0,0 +1 @@
+GRANT pg_monitor TO neon_superuser WITH ADMIN OPTION;
--- a/compute_tools/src/migrations/0004-grant_pg_monitor_to_privileged_role.sql
+++ b/compute_tools/src/migrations/0004-grant_pg_monitor_to_privileged_role.sql
@@ -1 +0,0 @@
-GRANT pg_monitor TO {privileged_role_name} WITH ADMIN OPTION;
--- a/compute_tools/src/migrations/0005-grant_all_on_tables_to_privileged_role.sql
+++ b/compute_tools/src/migrations/0005-grant_all_on_tables_to_privileged_role.sql
@@ -1,4 +1,4 @@
 -- SKIP: Deemed insufficient for allowing relations created by extensions to be
--       interacted with by {privileged_role_name} without permission issues.
+--       interacted with by neon_superuser without permission issues.

-ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO {privileged_role_name};
+ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO neon_superuser;
--- a/compute_tools/src/migrations/0006-grant_all_on_sequences_to_privileged_role.sql
+++ b/compute_tools/src/migrations/0006-grant_all_on_sequences_to_privileged_role.sql
@@ -1,4 +1,4 @@
 -- SKIP: Deemed insufficient for allowing relations created by extensions to be
--       interacted with by {privileged_role_name} without permission issues.
+--       interacted with by neon_superuser without permission issues.

-ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO {privileged_role_name};
+ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO neon_superuser;
--- a/compute_tools/src/migrations/0007-grant_all_on_tables_with_grant_option_to_privileged_role.sql
+++ b/compute_tools/src/migrations/0007-grant_all_on_tables_with_grant_option_to_privileged_role.sql
@@ -1,3 +1,3 @@
 -- SKIP: Moved inline to the handle_grants() functions.

-ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO {privileged_role_name} WITH GRANT OPTION;
+ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO neon_superuser WITH GRANT OPTION;
--- a/compute_tools/src/migrations/0008-grant_all_on_sequences_with_grant_option_to_privileged_role.sql
+++ b/compute_tools/src/migrations/0008-grant_all_on_sequences_with_grant_option_to_privileged_role.sql
@@ -1,3 +1,3 @@
 -- SKIP: Moved inline to the handle_grants() functions.

-ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO {privileged_role_name} WITH GRANT OPTION;
+ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO neon_superuser WITH GRANT OPTION;
--- a/compute_tools/src/migrations/0010-grant_snapshot_synchronization_funcs_to_privileged_role.sql
+++ b/compute_tools/src/migrations/0010-grant_snapshot_synchronization_funcs_to_privileged_role.sql
@@ -1,7 +1,7 @@
 DO $$
 BEGIN
    IF (SELECT setting::numeric >= 160000 FROM pg_settings WHERE name = 'server_version_num') THEN
-       EXECUTE 'GRANT EXECUTE ON FUNCTION pg_export_snapshot TO {privileged_role_name}';
-       EXECUTE 'GRANT EXECUTE ON FUNCTION pg_log_standby_snapshot TO {privileged_role_name}';
+       EXECUTE 'GRANT EXECUTE ON FUNCTION pg_export_snapshot TO neon_superuser';
+       EXECUTE 'GRANT EXECUTE ON FUNCTION pg_log_standby_snapshot TO neon_superuser';
    END IF;
 END $$;
--- a/compute_tools/src/migrations/0011-grant_pg_show_replication_origin_status_to_neon_superuser.sql
+++ b/compute_tools/src/migrations/0011-grant_pg_show_replication_origin_status_to_neon_superuser.sql
@@ -0,0 +1 @@
+GRANT EXECUTE ON FUNCTION pg_show_replication_origin_status TO neon_superuser;
--- a/compute_tools/src/migrations/0011-grant_pg_show_replication_origin_status_to_privileged_role.sql
+++ b/compute_tools/src/migrations/0011-grant_pg_show_replication_origin_status_to_privileged_role.sql
@@ -1 +0,0 @@
-GRANT EXECUTE ON FUNCTION pg_show_replication_origin_status TO {privileged_role_name};
--- a/compute_tools/src/migrations/0012-grant_pg_signal_backend_to_neon_superuser.sql
+++ b/compute_tools/src/migrations/0012-grant_pg_signal_backend_to_neon_superuser.sql
@@ -0,0 +1 @@
+GRANT pg_signal_backend TO neon_superuser WITH ADMIN OPTION;
--- a/compute_tools/src/migrations/0012-grant_pg_signal_backend_to_privileged_role.sql
+++ b/compute_tools/src/migrations/0012-grant_pg_signal_backend_to_privileged_role.sql
@@ -1 +0,0 @@
-GRANT pg_signal_backend TO {privileged_role_name} WITH ADMIN OPTION;
--- a/compute_tools/src/migrations/tests/0001-add_bypass_rls_to_privileged_role.sql
+++ b/compute_tools/src/migrations/tests/0001-add_bypass_rls_to_privileged_role.sql
--- a/compute_tools/src/migrations/tests/0003-grant_pg_create_subscription_to_privileged_role.sql
+++ b/compute_tools/src/migrations/tests/0003-grant_pg_create_subscription_to_privileged_role.sql
--- a/compute_tools/src/migrations/tests/0004-grant_pg_monitor_to_privileged_role.sql
+++ b/compute_tools/src/migrations/tests/0004-grant_pg_monitor_to_privileged_role.sql
--- a/compute_tools/src/migrations/tests/0005-grant_all_on_tables_to_privileged_role.sql
+++ b/compute_tools/src/migrations/tests/0005-grant_all_on_tables_to_privileged_role.sql
--- a/compute_tools/src/migrations/tests/0006-grant_all_on_sequences_to_privileged_role.sql
+++ b/compute_tools/src/migrations/tests/0006-grant_all_on_sequences_to_privileged_role.sql
--- a/compute_tools/src/migrations/tests/0007-grant_all_on_tables_with_grant_option_to_privileged_role.sql
+++ b/compute_tools/src/migrations/tests/0007-grant_all_on_tables_with_grant_option_to_privileged_role.sql
--- a/compute_tools/src/migrations/tests/0008-grant_all_on_sequences_with_grant_option_to_privileged_role.sql
+++ b/compute_tools/src/migrations/tests/0008-grant_all_on_sequences_with_grant_option_to_privileged_role.sql
--- a/compute_tools/src/migrations/tests/0010-grant_snapshot_synchronization_funcs_to_privileged_role.sql
+++ b/compute_tools/src/migrations/tests/0010-grant_snapshot_synchronization_funcs_to_privileged_role.sql
--- a/compute_tools/src/migrations/tests/0011-grant_pg_show_replication_origin_status_to_privileged_role.sql
+++ b/compute_tools/src/migrations/tests/0011-grant_pg_show_replication_origin_status_to_privileged_role.sql
--- a/compute_tools/src/migrations/tests/0012-grant_pg_signal_backend_to_privileged_role.sql
+++ b/compute_tools/src/migrations/tests/0012-grant_pg_signal_backend_to_privileged_role.sql
--- a/compute_tools/src/spec.rs
+++ b/compute_tools/src/spec.rs
@@ -9,7 +9,6 @@ use reqwest::StatusCode;
 use tokio_postgres::Client;
 use tracing::{error, info, instrument};

-use crate::compute::ComputeNodeParams;
 use crate::config;
 use crate::metrics::{CPLANE_REQUESTS_TOTAL, CPlaneRequestRPC, UNKNOWN_HTTP_STATUS};
 use crate::migration::MigrationRunner;
@@ -170,7 +169,7 @@ pub async fn handle_neon_extension_upgrade(client: &mut Client) -> Result<()> {
 }

 #[instrument(skip_all)]
-pub async fn handle_migrations(params: ComputeNodeParams, client: &mut Client) -> Result<()> {
+pub async fn handle_migrations(client: &mut Client) -> Result<()> {
    info!("handle migrations");

    // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
@@ -179,59 +178,26 @@ pub async fn handle_migrations(params: ComputeNodeParams, client: &mut Client) -

    // Add new migrations in numerical order.
    let migrations = [
-        &format!(
-            include_str!("./migrations/0001-add_bypass_rls_to_privileged_role.sql"),
-            privileged_role_name = params.privileged_role_name
+        include_str!("./migrations/0001-neon_superuser_bypass_rls.sql"),
+        include_str!("./migrations/0002-alter_roles.sql"),
+        include_str!("./migrations/0003-grant_pg_create_subscription_to_neon_superuser.sql"),
+        include_str!("./migrations/0004-grant_pg_monitor_to_neon_superuser.sql"),
+        include_str!("./migrations/0005-grant_all_on_tables_to_neon_superuser.sql"),
+        include_str!("./migrations/0006-grant_all_on_sequences_to_neon_superuser.sql"),
+        include_str!(
+            "./migrations/0007-grant_all_on_tables_to_neon_superuser_with_grant_option.sql"
        ),
-        &format!(
-            include_str!("./migrations/0002-alter_roles.sql"),
-            privileged_role_name = params.privileged_role_name
-        ),
-        &format!(
-            include_str!("./migrations/0003-grant_pg_create_subscription_to_privileged_role.sql"),
-            privileged_role_name = params.privileged_role_name
-        ),
-        &format!(
-            include_str!("./migrations/0004-grant_pg_monitor_to_privileged_role.sql"),
-            privileged_role_name = params.privileged_role_name
-        ),
-        &format!(
-            include_str!("./migrations/0005-grant_all_on_tables_to_privileged_role.sql"),
-            privileged_role_name = params.privileged_role_name
-        ),
-        &format!(
-            include_str!("./migrations/0006-grant_all_on_sequences_to_privileged_role.sql"),
-            privileged_role_name = params.privileged_role_name
-        ),
-        &format!(
-            include_str!(
-                "./migrations/0007-grant_all_on_tables_with_grant_option_to_privileged_role.sql"
-            ),
-            privileged_role_name = params.privileged_role_name
-        ),
-        &format!(
-            include_str!(
-                "./migrations/0008-grant_all_on_sequences_with_grant_option_to_privileged_role.sql"
-            ),
-            privileged_role_name = params.privileged_role_name
+        include_str!(
+            "./migrations/0008-grant_all_on_sequences_to_neon_superuser_with_grant_option.sql"
        ),
        include_str!("./migrations/0009-revoke_replication_for_previously_allowed_roles.sql"),
-        &format!(
-            include_str!(
-                "./migrations/0010-grant_snapshot_synchronization_funcs_to_privileged_role.sql"
-            ),
-            privileged_role_name = params.privileged_role_name
+        include_str!(
+            "./migrations/0010-grant_snapshot_synchronization_funcs_to_neon_superuser.sql"
        ),
-        &format!(
-            include_str!(
-                "./migrations/0011-grant_pg_show_replication_origin_status_to_privileged_role.sql"
-            ),
-            privileged_role_name = params.privileged_role_name
-        ),
-        &format!(
-            include_str!("./migrations/0012-grant_pg_signal_backend_to_privileged_role.sql"),
-            privileged_role_name = params.privileged_role_name
+        include_str!(
+            "./migrations/0011-grant_pg_show_replication_origin_status_to_neon_superuser.sql"
        ),
+        include_str!("./migrations/0012-grant_pg_signal_backend_to_neon_superuser.sql"),
    ];

    MigrationRunner::new(client, &migrations)
--- a/compute_tools/src/spec_apply.rs
+++ b/compute_tools/src/spec_apply.rs
@@ -13,14 +13,14 @@ use tokio_postgres::Client;
 use tokio_postgres::error::SqlState;
 use tracing::{Instrument, debug, error, info, info_span, instrument, warn};

-use crate::compute::{ComputeNode, ComputeNodeParams, ComputeState};
+use crate::compute::{ComputeNode, ComputeState};
 use crate::pg_helpers::{
    DatabaseExt, Escaping, GenericOptionsSearch, RoleExt, get_existing_dbs_async,
    get_existing_roles_async,
 };
 use crate::spec_apply::ApplySpecPhase::{
-    CreateAndAlterDatabases, CreateAndAlterRoles, CreateAvailabilityCheck, CreatePgauditExtension,
-    CreatePgauditlogtofileExtension, CreatePrivilegedRole, CreateSchemaNeon,
+    CreateAndAlterDatabases, CreateAndAlterRoles, CreateAvailabilityCheck, CreateNeonSuperuser,
+    CreatePgauditExtension, CreatePgauditlogtofileExtension, CreateSchemaNeon,
    DisablePostgresDBPgAudit, DropInvalidDatabases, DropRoles, FinalizeDropLogicalSubscriptions,
    HandleNeonExtension, HandleOtherExtensions, RenameAndDeleteDatabases, RenameRoles,
    RunInEachDatabase,
@@ -49,7 +49,6 @@ impl ComputeNode {
            // Proceed with post-startup configuration. Note, that order of operations is important.
            let client = Self::get_maintenance_client(&conf).await?;
            let spec = spec.clone();
-            let params = Arc::new(self.params.clone());

            let databases = get_existing_dbs_async(&client).await?;
            let roles = get_existing_roles_async(&client)
@@ -158,7 +157,6 @@ impl ComputeNode {

                    let conf = Arc::new(conf);
                    let fut = Self::apply_spec_sql_db(
-                        params.clone(),
                        spec.clone(),
                        conf,
                        ctx.clone(),
@@ -187,7 +185,7 @@ impl ComputeNode {
            }

            for phase in [
-                CreatePrivilegedRole,
+                CreateNeonSuperuser,
                DropInvalidDatabases,
                RenameRoles,
                CreateAndAlterRoles,
@@ -197,7 +195,6 @@ impl ComputeNode {
            ] {
                info!("Applying phase {:?}", &phase);
                apply_operations(
-                    params.clone(),
                    spec.clone(),
                    ctx.clone(),
                    jwks_roles.clone(),
@@ -246,7 +243,6 @@ impl ComputeNode {
                    }

                    let fut = Self::apply_spec_sql_db(
-                        params.clone(),
                        spec.clone(),
                        conf,
                        ctx.clone(),
@@ -297,7 +293,6 @@ impl ComputeNode {
            for phase in phases {
                debug!("Applying phase {:?}", &phase);
                apply_operations(
-                    params.clone(),
                    spec.clone(),
                    ctx.clone(),
                    jwks_roles.clone(),
@@ -318,9 +313,7 @@ impl ComputeNode {
    /// May opt to not connect to databases that don't have any scheduled
    /// operations.  The function is concurrency-controlled with the provided
    /// semaphore.  The caller has to make sure the semaphore isn't exhausted.
-    #[allow(clippy::too_many_arguments)] // TODO: needs bigger refactoring
    async fn apply_spec_sql_db(
-        params: Arc<ComputeNodeParams>,
        spec: Arc<ComputeSpec>,
        conf: Arc<tokio_postgres::Config>,
        ctx: Arc<tokio::sync::RwLock<MutableApplyContext>>,
@@ -335,7 +328,6 @@ impl ComputeNode {

        for subphase in subphases {
            apply_operations(
-                params.clone(),
                spec.clone(),
                ctx.clone(),
                jwks_roles.clone(),
@@ -475,7 +467,7 @@ pub enum PerDatabasePhase {

 #[derive(Clone, Debug)]
 pub enum ApplySpecPhase {
-    CreatePrivilegedRole,
+    CreateNeonSuperuser,
    DropInvalidDatabases,
    RenameRoles,
    CreateAndAlterRoles,
@@ -518,7 +510,6 @@ pub struct MutableApplyContext {
 /// - No timeouts have (yet) been implemented.
 /// - The caller is responsible for limiting and/or applying concurrency.
 pub async fn apply_operations<'a, Fut, F>(
-    params: Arc<ComputeNodeParams>,
    spec: Arc<ComputeSpec>,
    ctx: Arc<RwLock<MutableApplyContext>>,
    jwks_roles: Arc<HashSet<String>>,
@@ -536,7 +527,7 @@ where
        debug!("Processing phase {:?}", &apply_spec_phase);
        let ctx = ctx;

-        let mut ops = get_operations(&params, &spec, &ctx, &jwks_roles, &apply_spec_phase)
+        let mut ops = get_operations(&spec, &ctx, &jwks_roles, &apply_spec_phase)
            .await?
            .peekable();

@@ -597,18 +588,14 @@ where
 /// sort/merge/batch execution, but for now this is a nice way to improve
 /// batching behavior of the commands.
 async fn get_operations<'a>(
-    params: &'a ComputeNodeParams,
    spec: &'a ComputeSpec,
    ctx: &'a RwLock<MutableApplyContext>,
    jwks_roles: &'a HashSet<String>,
    apply_spec_phase: &'a ApplySpecPhase,
 ) -> Result<Box<dyn Iterator<Item = Operation> + 'a + Send>> {
    match apply_spec_phase {
-        ApplySpecPhase::CreatePrivilegedRole => Ok(Box::new(once(Operation {
-            query: format!(
-                include_str!("sql/create_privileged_role.sql"),
-                privileged_role_name = params.privileged_role_name
-            ),
+        ApplySpecPhase::CreateNeonSuperuser => Ok(Box::new(once(Operation {
+            query: include_str!("sql/create_neon_superuser.sql").to_string(),
            comment: None,
        }))),
        ApplySpecPhase::DropInvalidDatabases => {
@@ -710,9 +697,8 @@ async fn get_operations<'a>(
                        None => {
                            let query = if !jwks_roles.contains(role.name.as_str()) {
                                format!(
-                                    "CREATE ROLE {} INHERIT CREATEROLE CREATEDB BYPASSRLS REPLICATION IN ROLE {} {}",
+                                    "CREATE ROLE {} INHERIT CREATEROLE CREATEDB BYPASSRLS REPLICATION IN ROLE neon_superuser {}",
                                    role.name.pg_quote(),
-                                    params.privileged_role_name,
                                    role.to_pg_options(),
                                )
                            } else {
@@ -863,9 +849,8 @@ async fn get_operations<'a>(
                                // ALL PRIVILEGES grants CREATE, CONNECT, and TEMPORARY on the database
                                // (see https://www.postgresql.org/docs/current/ddl-priv.html)
                                query: format!(
-                                    "GRANT ALL PRIVILEGES ON DATABASE {} TO {}",
-                                    db.name.pg_quote(),
-                                    params.privileged_role_name
+                                    "GRANT ALL PRIVILEGES ON DATABASE {} TO neon_superuser",
+                                    db.name.pg_quote()
                                ),
                                comment: None,
                            },
--- a/compute_tools/src/sql/create_neon_superuser.sql
+++ b/compute_tools/src/sql/create_neon_superuser.sql
@@ -0,0 +1,8 @@
+DO $$
+    BEGIN
+        IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = 'neon_superuser')
+        THEN
+            CREATE ROLE neon_superuser CREATEDB CREATEROLE NOLOGIN REPLICATION BYPASSRLS IN ROLE pg_read_all_data, pg_write_all_data;
+        END IF;
+    END
+$$;
--- a/compute_tools/src/sql/create_privileged_role.sql
+++ b/compute_tools/src/sql/create_privileged_role.sql
@@ -1,8 +0,0 @@
-DO $$
-    BEGIN
-        IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = '{privileged_role_name}')
-        THEN
-            CREATE ROLE {privileged_role_name} CREATEDB CREATEROLE NOLOGIN REPLICATION BYPASSRLS IN ROLE pg_read_all_data, pg_write_all_data;
-        END IF;
-    END
-$$;
--- a/control_plane/README.md
+++ b/control_plane/README.md
@@ -8,10 +8,10 @@ code changes locally, but not suitable for running production systems.

 ## Example: Start with Postgres 16

-To create and start a local development environment with Postgres 16, you will need to provide `--pg-version` flag to 2 of the start-up commands.
+To create and start a local development environment with Postgres 16, you will need to provide `--pg-version` flag to 3 of the start-up commands.

 ```shell
-cargo neon init
+cargo neon init --pg-version 16
 cargo neon start
 cargo neon tenant create --set-default --pg-version 16
 cargo neon endpoint create main --pg-version 16
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -16,14 +16,9 @@ use std::time::Duration;
 use anyhow::{Context, Result, anyhow, bail};
 use clap::Parser;
 use compute_api::requests::ComputeClaimsScope;
-use compute_api::spec::{
-    ComputeMode, PageserverConnectionInfo, PageserverProtocol, PageserverShardInfo,
-};
+use compute_api::spec::{ComputeMode, PageserverProtocol};
 use control_plane::broker::StorageBroker;
 use control_plane::endpoint::{ComputeControlPlane, EndpointTerminateMode};
-use control_plane::endpoint::{
-    pageserver_conf_to_shard_conn_info, tenant_locate_response_to_conn_info,
-};
 use control_plane::endpoint_storage::{ENDPOINT_STORAGE_DEFAULT_ADDR, EndpointStorage};
 use control_plane::local_env;
 use control_plane::local_env::{
@@ -49,6 +44,7 @@ use pageserver_api::models::{
 };
 use pageserver_api::shard::{DEFAULT_STRIPE_SIZE, ShardCount, ShardStripeSize, TenantShardId};
 use postgres_backend::AuthType;
+use postgres_connection::parse_host_port;
 use safekeeper_api::membership::{SafekeeperGeneration, SafekeeperId};
 use safekeeper_api::{
    DEFAULT_HTTP_LISTEN_PORT as DEFAULT_SAFEKEEPER_HTTP_PORT,
@@ -56,11 +52,11 @@ use safekeeper_api::{
 };
 use storage_broker::DEFAULT_LISTEN_ADDR as DEFAULT_BROKER_ADDR;
 use tokio::task::JoinSet;
+use url::Host;
 use utils::auth::{Claims, Scope};
 use utils::id::{NodeId, TenantId, TenantTimelineId, TimelineId};
 use utils::lsn::Lsn;
 use utils::project_git_version;
-use utils::shard::ShardIndex;

 // Default id of a safekeeper node, if not specified on the command line.
 const DEFAULT_SAFEKEEPER_ID: NodeId = NodeId(1);
@@ -635,10 +631,6 @@ struct EndpointCreateCmdArgs {
        help = "Allow multiple primary endpoints running on the same branch. Shouldn't be used normally, but useful for tests."
    )]
    allow_multiple: bool,
-
-    /// Only allow changing it on creation
-    #[clap(long, help = "Name of the privileged role for the endpoint")]
-    privileged_role_name: Option<String>,
 }

 #[derive(clap::Args)]
@@ -1488,7 +1480,6 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
                args.grpc,
                !args.update_catalog,
                false,
-                args.privileged_role_name.clone(),
            )?;
        }
        EndpointCmd::Start(args) => {
@@ -1525,56 +1516,62 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
                )?;
            }

-            let prefer_protocol = if endpoint.grpc {
-                PageserverProtocol::Grpc
-            } else {
-                PageserverProtocol::Libpq
-            };
-
-            let mut pageserver_conninfo = if let Some(ps_id) = pageserver_id {
-                let conf = env.get_pageserver_conf(ps_id).unwrap();
-                let ps_conninfo = pageserver_conf_to_shard_conn_info(conf)?;
-
-                let shard_info = PageserverShardInfo {
-                    pageservers: vec![ps_conninfo],
+            let (pageservers, stripe_size) = if let Some(pageserver_id) = pageserver_id {
+                let conf = env.get_pageserver_conf(pageserver_id).unwrap();
+                // Use gRPC if requested.
+                let pageserver = if endpoint.grpc {
+                    let grpc_addr = conf.listen_grpc_addr.as_ref().expect("bad config");
+                    let (host, port) = parse_host_port(grpc_addr)?;
+                    let port = port.unwrap_or(DEFAULT_PAGESERVER_GRPC_PORT);
+                    (PageserverProtocol::Grpc, host, port)
+                } else {
+                    let (host, port) = parse_host_port(&conf.listen_pg_addr)?;
+                    let port = port.unwrap_or(5432);
+                    (PageserverProtocol::Libpq, host, port)
                };
                // If caller is telling us what pageserver to use, this is not a tenant which is
                // fully managed by storage controller, therefore not sharded.
-                let shards: HashMap<_, _> = vec![(ShardIndex::unsharded(), shard_info)]
-                    .into_iter()
-                    .collect();
-                PageserverConnectionInfo {
-                    shard_count: ShardCount(0),
-                    stripe_size: None,
-                    shards,
-                    prefer_protocol,
-                }
+                (vec![pageserver], DEFAULT_STRIPE_SIZE)
            } else {
                // Look up the currently attached location of the tenant, and its striping metadata,
                // to pass these on to postgres.
                let storage_controller = StorageController::from_env(env);
                let locate_result = storage_controller.tenant_locate(endpoint.tenant_id).await?;
-                assert!(!locate_result.shards.is_empty());
-
-                // Initialize LSN leases for static computes.
-                if let ComputeMode::Static(lsn) = endpoint.mode {
-                    futures::future::try_join_all(locate_result.shards.iter().map(
-                        |shard| async move {
+                let pageservers = futures::future::try_join_all(
+                    locate_result.shards.into_iter().map(|shard| async move {
+                        if let ComputeMode::Static(lsn) = endpoint.mode {
+                            // Initialize LSN leases for static computes.
                            let conf = env.get_pageserver_conf(shard.node_id).unwrap();
                            let pageserver = PageServerNode::from_env(env, conf);

                            pageserver
                                .http_client
                                .timeline_init_lsn_lease(shard.shard_id, endpoint.timeline_id, lsn)
-                                .await
-                        },
-                    ))
-                    .await?;
-                }
+                                .await?;
+                        }

-                tenant_locate_response_to_conn_info(&locate_result)?
+                        let pageserver = if endpoint.grpc {
+                            (
+                                PageserverProtocol::Grpc,
+                                Host::parse(&shard.listen_grpc_addr.expect("no gRPC address"))?,
+                                shard.listen_grpc_port.expect("no gRPC port"),
+                            )
+                        } else {
+                            (
+                                PageserverProtocol::Libpq,
+                                Host::parse(&shard.listen_pg_addr)?,
+                                shard.listen_pg_port,
+                            )
+                        };
+                        anyhow::Ok(pageserver)
+                    }),
+                )
+                .await?;
+                let stripe_size = locate_result.shard_params.stripe_size;
+
+                (pageservers, stripe_size)
            };
-            pageserver_conninfo.prefer_protocol = prefer_protocol;
+            assert!(!pageservers.is_empty());

            let ps_conf = env.get_pageserver_conf(DEFAULT_PAGESERVER_ID)?;
            let auth_token = if matches!(ps_conf.pg_auth_type, AuthType::NeonJWT) {
@@ -1604,8 +1601,9 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
                endpoint_storage_addr,
                safekeepers_generation,
                safekeepers,
-                pageserver_conninfo,
+                pageservers,
                remote_ext_base_url: remote_ext_base_url.clone(),
+                shard_stripe_size: stripe_size.0 as usize,
                create_test_user: args.create_test_user,
                start_timeout: args.start_timeout,
                autoprewarm: args.autoprewarm,
@@ -1622,45 +1620,51 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
                .endpoints
                .get(endpoint_id.as_str())
                .with_context(|| format!("postgres endpoint {endpoint_id} is not found"))?;
-
-            let prefer_protocol = if endpoint.grpc {
-                PageserverProtocol::Grpc
-            } else {
-                PageserverProtocol::Libpq
-            };
-            let mut pageserver_conninfo = if let Some(ps_id) = args.endpoint_pageserver_id {
+            let pageservers = if let Some(ps_id) = args.endpoint_pageserver_id {
                let conf = env.get_pageserver_conf(ps_id)?;
-                let ps_conninfo = pageserver_conf_to_shard_conn_info(conf)?;
-                let shard_info = PageserverShardInfo {
-                    pageservers: vec![ps_conninfo],
+                // Use gRPC if requested.
+                let pageserver = if endpoint.grpc {
+                    let grpc_addr = conf.listen_grpc_addr.as_ref().expect("bad config");
+                    let (host, port) = parse_host_port(grpc_addr)?;
+                    let port = port.unwrap_or(DEFAULT_PAGESERVER_GRPC_PORT);
+                    (PageserverProtocol::Grpc, host, port)
+                } else {
+                    let (host, port) = parse_host_port(&conf.listen_pg_addr)?;
+                    let port = port.unwrap_or(5432);
+                    (PageserverProtocol::Libpq, host, port)
                };
-
-                // If caller is telling us what pageserver to use, this is not a tenant which is
-                // fully managed by storage controller, therefore not sharded.
-                let shards: HashMap<_, _> = vec![(ShardIndex::unsharded(), shard_info)]
-                    .into_iter()
-                    .collect();
-                PageserverConnectionInfo {
-                    shard_count: ShardCount::unsharded(),
-                    stripe_size: None,
-                    shards,
-                    prefer_protocol,
-                }
+                vec![pageserver]
            } else {
-                // Look up the currently attached location of the tenant, and its striping metadata,
-                // to pass these on to postgres.
                let storage_controller = StorageController::from_env(env);
-                let locate_result = storage_controller.tenant_locate(endpoint.tenant_id).await?;
-
-                tenant_locate_response_to_conn_info(&locate_result)?
+                storage_controller
+                    .tenant_locate(endpoint.tenant_id)
+                    .await?
+                    .shards
+                    .into_iter()
+                    .map(|shard| {
+                        // Use gRPC if requested.
+                        if endpoint.grpc {
+                            (
+                                PageserverProtocol::Grpc,
+                                Host::parse(&shard.listen_grpc_addr.expect("no gRPC address"))
+                                    .expect("bad hostname"),
+                                shard.listen_grpc_port.expect("no gRPC port"),
+                            )
+                        } else {
+                            (
+                                PageserverProtocol::Libpq,
+                                Host::parse(&shard.listen_pg_addr).expect("bad hostname"),
+                                shard.listen_pg_port,
+                            )
+                        }
+                    })
+                    .collect::<Vec<_>>()
            };
-            pageserver_conninfo.prefer_protocol = prefer_protocol;
-
            // If --safekeepers argument is given, use only the listed
            // safekeeper nodes; otherwise all from the env.
            let safekeepers = parse_safekeepers(&args.safekeepers)?;
            endpoint
-                .reconfigure(Some(&pageserver_conninfo), safekeepers, None)
+                .reconfigure(Some(pageservers), None, safekeepers, None)
                .await?;
        }
        EndpointCmd::Stop(args) => {
--- a/control_plane/src/endpoint.rs
+++ b/control_plane/src/endpoint.rs
@@ -37,7 +37,7 @@
 //!         <other PostgreSQL files>
 //! ```
 //!
-use std::collections::{BTreeMap, HashMap};
+use std::collections::BTreeMap;
 use std::fmt::Display;
 use std::net::{IpAddr, Ipv4Addr, SocketAddr, TcpStream};
 use std::path::PathBuf;
@@ -58,17 +58,14 @@ use compute_api::responses::{
 };
 use compute_api::spec::{
    Cluster, ComputeAudit, ComputeFeature, ComputeMode, ComputeSpec, Database, PageserverProtocol,
-    PageserverShardInfo, PgIdent, RemoteExtSpec, Role,
+    PgIdent, RemoteExtSpec, Role,
 };
-
-// re-export these, because they're used in the reconfigure() function
-pub use compute_api::spec::{PageserverConnectionInfo, PageserverShardConnectionInfo};
-
 use jsonwebtoken::jwk::{
    AlgorithmParameters, CommonParameters, EllipticCurve, Jwk, JwkSet, KeyAlgorithm, KeyOperations,
    OctetKeyPairParameters, OctetKeyPairType, PublicKeyUse,
 };
 use nix::sys::signal::{Signal, kill};
+use pageserver_api::shard::ShardStripeSize;
 use pem::Pem;
 use reqwest::header::CONTENT_TYPE;
 use safekeeper_api::PgMajorVersion;
@@ -78,11 +75,8 @@ use sha2::{Digest, Sha256};
 use spki::der::Decode;
 use spki::{SubjectPublicKeyInfo, SubjectPublicKeyInfoRef};
 use tracing::debug;
+use url::Host;
 use utils::id::{NodeId, TenantId, TimelineId};
-use utils::shard::{ShardIndex, ShardNumber};
-
-use pageserver_api::config::DEFAULT_GRPC_LISTEN_PORT as DEFAULT_PAGESERVER_GRPC_PORT;
-use postgres_connection::parse_host_port;

 use crate::local_env::LocalEnv;
 use crate::postgresql_conf::PostgresConf;
@@ -105,7 +99,6 @@ pub struct EndpointConf {
    features: Vec<ComputeFeature>,
    cluster: Option<Cluster>,
    compute_ctl_config: ComputeCtlConfig,
-    privileged_role_name: Option<String>,
 }

 //
@@ -206,7 +199,6 @@ impl ComputeControlPlane {
        grpc: bool,
        skip_pg_catalog_updates: bool,
        drop_subscriptions_before_start: bool,
-        privileged_role_name: Option<String>,
    ) -> Result<Arc<Endpoint>> {
        let pg_port = pg_port.unwrap_or_else(|| self.get_port());
        let external_http_port = external_http_port.unwrap_or_else(|| self.get_port() + 1);
@@ -244,7 +236,6 @@ impl ComputeControlPlane {
            features: vec![],
            cluster: None,
            compute_ctl_config: compute_ctl_config.clone(),
-            privileged_role_name: privileged_role_name.clone(),
        });

        ep.create_endpoint_dir()?;
@@ -266,7 +257,6 @@ impl ComputeControlPlane {
                features: vec![],
                cluster: None,
                compute_ctl_config,
-                privileged_role_name,
            })?,
        )?;
        std::fs::write(
@@ -342,9 +332,6 @@ pub struct Endpoint {

    /// The compute_ctl config for the endpoint's compute.
    compute_ctl_config: ComputeCtlConfig,
-
-    /// The name of the privileged role for the endpoint.
-    privileged_role_name: Option<String>,
 }

 #[derive(PartialEq, Eq)]
@@ -393,8 +380,9 @@ pub struct EndpointStartArgs {
    pub endpoint_storage_addr: String,
    pub safekeepers_generation: Option<SafekeeperGeneration>,
    pub safekeepers: Vec<NodeId>,
-    pub pageserver_conninfo: PageserverConnectionInfo,
+    pub pageservers: Vec<(PageserverProtocol, Host, u16)>,
    pub remote_ext_base_url: Option<String>,
+    pub shard_stripe_size: usize,
    pub create_test_user: bool,
    pub start_timeout: Duration,
    pub autoprewarm: bool,
@@ -444,7 +432,6 @@ impl Endpoint {
            features: conf.features,
            cluster: conf.cluster,
            compute_ctl_config: conf.compute_ctl_config,
-            privileged_role_name: conf.privileged_role_name,
        })
    }

@@ -667,6 +654,14 @@ impl Endpoint {
        }
    }

+    fn build_pageserver_connstr(pageservers: &[(PageserverProtocol, Host, u16)]) -> String {
+        pageservers
+            .iter()
+            .map(|(scheme, host, port)| format!("{scheme}://no_user@{host}:{port}"))
+            .collect::<Vec<_>>()
+            .join(",")
+    }
+
    /// Map safekeepers ids to the actual connection strings.
    fn build_safekeepers_connstrs(&self, sk_ids: Vec<NodeId>) -> Result<Vec<String>> {
        let mut safekeeper_connstrings = Vec::new();
@@ -712,6 +707,9 @@ impl Endpoint {
            std::fs::remove_dir_all(self.pgdata())?;
        }

+        let pageserver_connstring = Self::build_pageserver_connstr(&args.pageservers);
+        assert!(!pageserver_connstring.is_empty());
+
        let safekeeper_connstrings = self.build_safekeepers_connstrs(args.safekeepers)?;

        // check for file remote_extensions_spec.json
@@ -726,46 +724,6 @@ impl Endpoint {
            remote_extensions = None;
        };

-        // For the sake of backwards-compatibility, also fill in 'pageserver_connstring'
-        //
-        // XXX: I believe this is not really needed, except to make
-        // test_forward_compatibility happy.
-        //
-        // Use a closure so that we can conviniently return None in the middle of the
-        // loop.
-        let pageserver_connstring = (|| {
-            let num_shards = if args.pageserver_conninfo.shard_count.is_unsharded() {
-                1
-            } else {
-                args.pageserver_conninfo.shard_count.0
-            };
-            let mut connstrings = Vec::new();
-            for shard_no in 0..num_shards {
-                let shard_index = ShardIndex {
-                    shard_count: args.pageserver_conninfo.shard_count,
-                    shard_number: ShardNumber(shard_no),
-                };
-                let shard = args
-                    .pageserver_conninfo
-                    .shards
-                    .get(&shard_index)
-                    .expect(&format!(
-                        "shard {} not found in pageserver_connection_info",
-                        shard_index
-                    ));
-                let pageserver = shard
-                    .pageservers
-                    .first()
-                    .expect("must have at least one pageserver");
-                if let Some(libpq_url) = &pageserver.libpq_url {
-                    connstrings.push(libpq_url.clone());
-                } else {
-                    return None;
-                }
-            }
-            Some(connstrings.join(","))
-        })();
-
        // Create config file
        let config = {
            let mut spec = ComputeSpec {
@@ -810,14 +768,13 @@ impl Endpoint {
                branch_id: None,
                endpoint_id: Some(self.endpoint_id.clone()),
                mode: self.mode,
-                pageserver_connection_info: Some(args.pageserver_conninfo.clone()),
-                pageserver_connstring,
+                pageserver_connstring: Some(pageserver_connstring),
                safekeepers_generation: args.safekeepers_generation.map(|g| g.into_inner()),
                safekeeper_connstrings,
                storage_auth_token: args.auth_token.clone(),
                remote_extensions,
                pgbouncer_settings: None,
-                shard_stripe_size: args.pageserver_conninfo.stripe_size, // redundant with pageserver_connection_info.stripe_size
+                shard_stripe_size: Some(args.shard_stripe_size),
                local_proxy_config: None,
                reconfigure_concurrency: self.reconfigure_concurrency,
                drop_subscriptions_before_start: self.drop_subscriptions_before_start,
@@ -913,10 +870,6 @@ impl Endpoint {
            cmd.arg("--dev");
        }

-        if let Some(privileged_role_name) = self.privileged_role_name.clone() {
-            cmd.args(["--privileged-role-name", &privileged_role_name]);
-        }
-
        let child = cmd.spawn()?;
        // set up a scopeguard to kill & wait for the child in case we panic or bail below
        let child = scopeguard::guard(child, |mut child| {
@@ -1029,7 +982,8 @@ impl Endpoint {

    pub async fn reconfigure(
        &self,
-        pageserver_conninfo: Option<&PageserverConnectionInfo>,
+        pageservers: Option<Vec<(PageserverProtocol, Host, u16)>>,
+        stripe_size: Option<ShardStripeSize>,
        safekeepers: Option<Vec<NodeId>>,
        safekeeper_generation: Option<SafekeeperGeneration>,
    ) -> Result<()> {
@@ -1044,15 +998,15 @@ impl Endpoint {
        let postgresql_conf = self.read_postgresql_conf()?;
        spec.cluster.postgresql_conf = Some(postgresql_conf);

-        if let Some(pageserver_conninfo) = pageserver_conninfo {
-            // If pageservers are provided, we need to ensure that they are not empty.
-            // This is a requirement for the compute_ctl configuration.
-            anyhow::ensure!(
-                !pageserver_conninfo.shards.is_empty(),
-                "no pageservers provided"
-            );
-            spec.pageserver_connection_info = Some(pageserver_conninfo.clone());
-            spec.shard_stripe_size = pageserver_conninfo.stripe_size;
+        // If pageservers are not specified, don't change them.
+        if let Some(pageservers) = pageservers {
+            anyhow::ensure!(!pageservers.is_empty(), "no pageservers provided");
+
+            let pageserver_connstr = Self::build_pageserver_connstr(&pageservers);
+            spec.pageserver_connstring = Some(pageserver_connstr);
+            if stripe_size.is_some() {
+                spec.shard_stripe_size = stripe_size.map(|s| s.0 as usize);
+            }
        }

        // If safekeepers are not specified, don't change them.
@@ -1101,9 +1055,11 @@ impl Endpoint {

    pub async fn reconfigure_pageservers(
        &self,
-        pageservers: &PageserverConnectionInfo,
+        pageservers: Vec<(PageserverProtocol, Host, u16)>,
+        stripe_size: Option<ShardStripeSize>,
    ) -> Result<()> {
-        self.reconfigure(Some(pageservers), None, None).await
+        self.reconfigure(Some(pageservers), stripe_size, None, None)
+            .await
    }

    pub async fn reconfigure_safekeepers(
@@ -1111,7 +1067,7 @@ impl Endpoint {
        safekeepers: Vec<NodeId>,
        generation: SafekeeperGeneration,
    ) -> Result<()> {
-        self.reconfigure(None, Some(safekeepers), Some(generation))
+        self.reconfigure(None, None, Some(safekeepers), Some(generation))
            .await
    }

@@ -1167,68 +1123,3 @@ impl Endpoint {
        )
    }
 }
-
-pub fn pageserver_conf_to_shard_conn_info(
-    conf: &crate::local_env::PageServerConf,
-) -> Result<PageserverShardConnectionInfo> {
-    let libpq_url = {
-        let (host, port) = parse_host_port(&conf.listen_pg_addr)?;
-        let port = port.unwrap_or(5432);
-        Some(format!("postgres://no_user@{host}:{port}"))
-    };
-    let grpc_url = if let Some(grpc_addr) = &conf.listen_grpc_addr {
-        let (host, port) = parse_host_port(grpc_addr)?;
-        let port = port.unwrap_or(DEFAULT_PAGESERVER_GRPC_PORT);
-        Some(format!("grpc://no_user@{host}:{port}"))
-    } else {
-        None
-    };
-    Ok(PageserverShardConnectionInfo {
-        id: Some(conf.id.to_string()),
-        libpq_url,
-        grpc_url,
-    })
-}
-
-pub fn tenant_locate_response_to_conn_info(
-    response: &pageserver_api::controller_api::TenantLocateResponse,
-) -> Result<PageserverConnectionInfo> {
-    let mut shards = HashMap::new();
-    for shard in response.shards.iter() {
-        tracing::info!("parsing {}", shard.listen_pg_addr);
-        let libpq_url = {
-            let host = &shard.listen_pg_addr;
-            let port = shard.listen_pg_port;
-            Some(format!("postgres://no_user@{host}:{port}"))
-        };
-        let grpc_url = if let Some(grpc_addr) = &shard.listen_grpc_addr {
-            let host = grpc_addr;
-            let port = shard.listen_grpc_port.expect("no gRPC port");
-            Some(format!("grpc://no_user@{host}:{port}"))
-        } else {
-            None
-        };
-
-        let shard_info = PageserverShardInfo {
-            pageservers: vec![PageserverShardConnectionInfo {
-                id: Some(shard.node_id.to_string()),
-                libpq_url,
-                grpc_url,
-            }],
-        };
-
-        shards.insert(shard.shard_id.to_index(), shard_info);
-    }
-
-    let stripe_size = if response.shard_params.count.is_unsharded() {
-        None
-    } else {
-        Some(response.shard_params.stripe_size.0)
-    };
-    Ok(PageserverConnectionInfo {
-        shard_count: response.shard_params.count,
-        stripe_size,
-        shards,
-        prefer_protocol: PageserverProtocol::default(),
-    })
-}
--- a/control_plane/storcon_cli/src/main.rs
+++ b/control_plane/storcon_cli/src/main.rs
@@ -76,12 +76,6 @@ enum Command {
    NodeStartDelete {
        #[arg(long)]
        node_id: NodeId,
-        /// When `force` is true, skip waiting for shards to prewarm during migration.
-        /// This can significantly speed up node deletion since prewarming all shards
-        /// can take considerable time, but may result in slower initial access to
-        /// migrated shards until they warm up naturally.
-        #[arg(long)]
-        force: bool,
    },
    /// Cancel deletion of the specified pageserver and wait for `timeout`
    /// for the operation to be canceled. May be retried.
@@ -958,14 +952,13 @@ async fn main() -> anyhow::Result<()> {
                .dispatch::<(), ()>(Method::DELETE, format!("control/v1/node/{node_id}"), None)
                .await?;
        }
-        Command::NodeStartDelete { node_id, force } => {
-            let query = if force {
-                format!("control/v1/node/{node_id}/delete?force=true")
-            } else {
-                format!("control/v1/node/{node_id}/delete")
-            };
+        Command::NodeStartDelete { node_id } => {
            storcon_client
-                .dispatch::<(), ()>(Method::PUT, query, None)
+                .dispatch::<(), ()>(
+                    Method::PUT,
+                    format!("control/v1/node/{node_id}/delete"),
+                    None,
+                )
                .await?;
            println!("Delete started for {node_id}");
        }
--- a/libs/compute_api/src/responses.rs
+++ b/libs/compute_api/src/responses.rs
@@ -46,33 +46,16 @@ pub struct ExtensionInstallResponse {
    pub version: ExtVersion,
 }

-/// Status of the LFC prewarm process. The same state machine is reused for
-/// both autoprewarm (prewarm after compute/Postgres start using the previously
-/// stored LFC state) and explicit prewarming via API.
 #[derive(Serialize, Default, Debug, Clone, PartialEq)]
 #[serde(tag = "status", rename_all = "snake_case")]
 pub enum LfcPrewarmState {
-    /// Default value when compute boots up.
    #[default]
    NotPrewarmed,
-    /// Prewarming thread is active and loading pages into LFC.
    Prewarming,
-    /// We found requested LFC state in the endpoint storage and
-    /// completed prewarming successfully.
    Completed,
-    /// Unexpected error happened during prewarming. Note, `Not Found 404`
-    /// response from the endpoint storage is explicitly excluded here
-    /// because it can normally happen on the first compute start,
-    /// since LFC state is not available yet.
-    Failed { error: String },
-    /// We tried to fetch the corresponding LFC state from the endpoint storage,
-    /// but received `Not Found 404`. This should normally happen only during the
-    /// first endpoint start after creation with `autoprewarm: true`.
-    ///
-    /// During the orchestrated prewarm via API, when a caller explicitly
-    /// provides the LFC state key to prewarm from, it's the caller responsibility
-    /// to handle this status as an error state in this case.
-    Skipped,
+    Failed {
+        error: String,
+    },
 }

 impl Display for LfcPrewarmState {
@@ -81,7 +64,6 @@ impl Display for LfcPrewarmState {
            LfcPrewarmState::NotPrewarmed => f.write_str("NotPrewarmed"),
            LfcPrewarmState::Prewarming => f.write_str("Prewarming"),
            LfcPrewarmState::Completed => f.write_str("Completed"),
-            LfcPrewarmState::Skipped => f.write_str("Skipped"),
            LfcPrewarmState::Failed { error } => write!(f, "Error({error})"),
        }
    }
--- a/libs/compute_api/src/spec.rs
+++ b/libs/compute_api/src/spec.rs
@@ -14,7 +14,6 @@ use serde::{Deserialize, Serialize};
 use url::Url;
 use utils::id::{TenantId, TimelineId};
 use utils::lsn::Lsn;
-use utils::shard::{ShardCount, ShardIndex};

 use crate::responses::TlsConfig;

@@ -106,17 +105,6 @@ pub struct ComputeSpec {
    // updated to fill these fields, we can make these non optional.
    pub tenant_id: Option<TenantId>,
    pub timeline_id: Option<TimelineId>,
-
-    /// Pageserver information can be passed in three different ways:
-    /// 1. Here in `pageserver_connection_info`
-    /// 2. In the `pageserver_connstring` field.
-    /// 3. in `cluster.settings`.
-    ///
-    /// The goal is to use method 1. everywhere. But for backwards-compatibility with old
-    /// versions of the control plane, `compute_ctl` will check 2. and 3. if the
-    /// `pageserver_connection_info` field is missing.
-    pub pageserver_connection_info: Option<PageserverConnectionInfo>,
-
    pub pageserver_connstring: Option<String>,

    // More neon ids that we expose to the compute_ctl
@@ -153,7 +141,7 @@ pub struct ComputeSpec {

    // Stripe size for pageserver sharding, in pages
    #[serde(default)]
-    pub shard_stripe_size: Option<u32>,
+    pub shard_stripe_size: Option<usize>,

    /// Local Proxy configuration used for JWT authentication
    #[serde(default)]
@@ -226,32 +214,6 @@ pub enum ComputeFeature {
    UnknownFeature,
 }

-#[derive(Clone, Debug, Deserialize, Serialize, Eq, PartialEq)]
-pub struct PageserverConnectionInfo {
-    /// NB: 0 for unsharded tenants, 1 for sharded tenants with 1 shard, following storage
-    pub shard_count: ShardCount,
-
-    /// INVARIANT: null if shard_count is 0, otherwise non-null and immutable
-    pub stripe_size: Option<u32>,
-
-    pub shards: HashMap<ShardIndex, PageserverShardInfo>,
-
-    #[serde(default)]
-    pub prefer_protocol: PageserverProtocol,
-}
-
-#[derive(Clone, Debug, Deserialize, Serialize, Eq, PartialEq)]
-pub struct PageserverShardInfo {
-    pub pageservers: Vec<PageserverShardConnectionInfo>,
-}
-
-#[derive(Clone, Debug, Deserialize, Serialize, Eq, PartialEq)]
-pub struct PageserverShardConnectionInfo {
-    pub id: Option<String>,
-    pub libpq_url: Option<String>,
-    pub grpc_url: Option<String>,
-}
-
 #[derive(Clone, Debug, Default, Deserialize, Serialize)]
 pub struct RemoteExtSpec {
    pub public_extensions: Option<Vec<String>>,
@@ -369,12 +331,6 @@ impl ComputeMode {
    }
 }

-impl Display for ComputeMode {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        f.write_str(self.to_type_str())
-    }
-}
-
 /// Log level for audit logging
 #[derive(Clone, Debug, Default, Eq, PartialEq, Deserialize, Serialize)]
 pub enum ComputeAudit {
@@ -485,15 +441,13 @@ pub struct JwksSettings {
    pub jwt_audience: Option<String>,
 }

-/// Protocol used to connect to a Pageserver.
-#[derive(Clone, Copy, Debug, Default, Deserialize, Serialize, PartialEq, Eq)]
+/// Protocol used to connect to a Pageserver. Parsed from the connstring scheme.
+#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
 pub enum PageserverProtocol {
    /// The original protocol based on libpq and COPY. Uses postgresql:// or postgres:// scheme.
    #[default]
-    #[serde(rename = "libpq")]
    Libpq,
    /// A newer, gRPC-based protocol. Uses grpc:// scheme.
-    #[serde(rename = "grpc")]
    Grpc,
 }

--- a/libs/metrics/src/lib.rs
+++ b/libs/metrics/src/lib.rs
@@ -4,14 +4,12 @@
 //! a default registry.
 #![deny(clippy::undocumented_unsafe_blocks)]

-use std::sync::RwLock;
-
 use measured::label::{LabelGroupSet, LabelGroupVisitor, LabelName, NoLabels};
 use measured::metric::counter::CounterState;
 use measured::metric::gauge::GaugeState;
 use measured::metric::group::Encoding;
 use measured::metric::name::{MetricName, MetricNameEncoder};
-use measured::metric::{MetricEncoding, MetricFamilyEncoding, MetricType};
+use measured::metric::{MetricEncoding, MetricFamilyEncoding};
 use measured::{FixedCardinalityLabel, LabelGroup, MetricGroup};
 use once_cell::sync::Lazy;
 use prometheus::Registry;
@@ -118,52 +116,12 @@ pub fn pow2_buckets(start: usize, end: usize) -> Vec<f64> {
        .collect()
 }

-pub struct InfoMetric<L: LabelGroup, M: MetricType = GaugeState> {
-    label: RwLock<L>,
-    metric: M,
-}
-
-impl<L: LabelGroup> InfoMetric<L> {
-    pub fn new(label: L) -> Self {
-        Self::with_metric(label, GaugeState::new(1))
-    }
-}
-
-impl<L: LabelGroup, M: MetricType<Metadata = ()>> InfoMetric<L, M> {
-    pub fn with_metric(label: L, metric: M) -> Self {
-        Self {
-            label: RwLock::new(label),
-            metric,
-        }
-    }
-
-    pub fn set_label(&self, label: L) {
-        *self.label.write().unwrap() = label;
-    }
-}
-
-impl<L, M, E> MetricFamilyEncoding<E> for InfoMetric<L, M>
-where
-    L: LabelGroup,
-    M: MetricEncoding<E, Metadata = ()>,
-    E: Encoding,
-{
-    fn collect_family_into(
-        &self,
-        name: impl measured::metric::name::MetricNameEncoder,
-        enc: &mut E,
-    ) -> Result<(), E::Err> {
-        M::write_type(&name, enc)?;
-        self.metric
-            .collect_into(&(), &*self.label.read().unwrap(), name, enc)
-    }
-}
-
 pub struct BuildInfo {
    pub revision: &'static str,
    pub build_tag: &'static str,
 }

+// todo: allow label group without the set
 impl LabelGroup for BuildInfo {
    fn visit_values(&self, v: &mut impl LabelGroupVisitor) {
        const REVISION: &LabelName = LabelName::from_str("revision");
@@ -173,6 +131,24 @@ impl LabelGroup for BuildInfo {
    }
 }

+impl<T: Encoding> MetricFamilyEncoding<T> for BuildInfo
+where
+    GaugeState: MetricEncoding<T>,
+{
+    fn collect_family_into(
+        &self,
+        name: impl measured::metric::name::MetricNameEncoder,
+        enc: &mut T,
+    ) -> Result<(), T::Err> {
+        enc.write_help(&name, "Build/version information")?;
+        GaugeState::write_type(&name, enc)?;
+        GaugeState {
+            count: std::sync::atomic::AtomicI64::new(1),
+        }
+        .collect_into(&(), self, name, enc)
+    }
+}
+
 #[derive(MetricGroup)]
 #[metric(new(build_info: BuildInfo))]
 pub struct NeonMetrics {
@@ -189,8 +165,8 @@ pub struct NeonMetrics {
 #[derive(MetricGroup)]
 #[metric(new(build_info: BuildInfo))]
 pub struct LibMetrics {
-    #[metric(init = InfoMetric::new(build_info))]
-    build_info: InfoMetric<BuildInfo>,
+    #[metric(init = build_info)]
+    build_info: BuildInfo,

    #[metric(flatten)]
    rusage: Rusage,
--- a/libs/neon-shmem/Cargo.toml
+++ b/libs/neon-shmem/Cargo.toml
@@ -8,13 +8,6 @@ license.workspace = true
 thiserror.workspace = true
 nix.workspace=true
 workspace_hack = { version = "0.1", path = "../../workspace_hack" }
-libc.workspace = true
-lock_api.workspace = true
-rustc-hash.workspace = true

 [target.'cfg(target_os = "macos")'.dependencies]
 tempfile = "3.14.0"
-
-[dev-dependencies]
-rand = "0.9"
-rand_distr = "0.5.1"
--- a/libs/neon-shmem/benches/hmap_resize.rs
+++ b/libs/neon-shmem/benches/hmap_resize.rs
@@ -1,330 +0,0 @@
-use criterion::{BatchSize, BenchmarkId, Criterion, criterion_group, criterion_main};
-use neon_shmem::hash::HashMapAccess;
-use neon_shmem::hash::HashMapInit;
-use neon_shmem::hash::entry::Entry;
-use rand::distr::{Distribution, StandardUniform};
-use rand::prelude::*;
-use std::default::Default;
-use std::hash::BuildHasher;
-
-// Taken from bindings to C code
-
-#[derive(Clone, Debug, Hash, Eq, PartialEq)]
-#[repr(C)]
-pub struct FileCacheKey {
-    pub _spc_id: u32,
-    pub _db_id: u32,
-    pub _rel_number: u32,
-    pub _fork_num: u32,
-    pub _block_num: u32,
-}
-
-impl Distribution<FileCacheKey> for StandardUniform {
-    // questionable, but doesn't need to be good randomness
-    fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> FileCacheKey {
-        FileCacheKey {
-            _spc_id: rng.random(),
-            _db_id: rng.random(),
-            _rel_number: rng.random(),
-            _fork_num: rng.random(),
-            _block_num: rng.random(),
-        }
-    }
-}
-
-#[derive(Clone, Debug)]
-#[repr(C)]
-pub struct FileCacheEntry {
-    pub _offset: u32,
-    pub _access_count: u32,
-    pub _prev: *mut FileCacheEntry,
-    pub _next: *mut FileCacheEntry,
-    pub _state: [u32; 8],
-}
-
-impl FileCacheEntry {
-    fn dummy() -> Self {
-        Self {
-            _offset: 0,
-            _access_count: 0,
-            _prev: std::ptr::null_mut(),
-            _next: std::ptr::null_mut(),
-            _state: [0; 8],
-        }
-    }
-}
-
-// Utilities for applying operations.
-
-#[derive(Clone, Debug)]
-struct TestOp<K, V>(K, Option<V>);
-
-fn apply_op<K: Clone + std::hash::Hash + Eq, V, S: std::hash::BuildHasher>(
-    op: TestOp<K, V>,
-    map: &mut HashMapAccess<K, V, S>,
-) {
-    let entry = map.entry(op.0);
-
-    match op.1 {
-        Some(new) => match entry {
-            Entry::Occupied(mut e) => Some(e.insert(new)),
-            Entry::Vacant(e) => {
-                _ = e.insert(new).unwrap();
-                None
-            }
-        },
-        None => match entry {
-            Entry::Occupied(e) => Some(e.remove()),
-            Entry::Vacant(_) => None,
-        },
-    };
-}
-
-// Hash utilities
-
-struct SeaRandomState {
-    k1: u64,
-    k2: u64,
-    k3: u64,
-    k4: u64,
-}
-
-impl std::hash::BuildHasher for SeaRandomState {
-    type Hasher = seahash::SeaHasher;
-
-    fn build_hasher(&self) -> Self::Hasher {
-        seahash::SeaHasher::with_seeds(self.k1, self.k2, self.k3, self.k4)
-    }
-}
-
-impl SeaRandomState {
-    fn new() -> Self {
-        let mut rng = rand::rng();
-        Self {
-            k1: rng.random(),
-            k2: rng.random(),
-            k3: rng.random(),
-            k4: rng.random(),
-        }
-    }
-}
-
-fn small_benchs(c: &mut Criterion) {
-    let mut group = c.benchmark_group("Small maps");
-    group.sample_size(10);
-
-    group.bench_function("small_rehash", |b| {
-        let ideal_filled = 4_000_000;
-        let size = 5_000_000;
-        let mut writer = HashMapInit::new_resizeable(size, size * 2).attach_writer();
-        let mut rng = rand::rng();
-        while writer.get_num_buckets_in_use() < ideal_filled as usize {
-            let key: FileCacheKey = rng.random();
-            let val = FileCacheEntry::dummy();
-            apply_op(TestOp(key, Some(val)), &mut writer);
-        }
-        b.iter(|| writer.shuffle());
-    });
-
-    group.bench_function("small_rehash_xxhash", |b| {
-        let ideal_filled = 4_000_000;
-        let size = 5_000_000;
-        let mut writer = HashMapInit::new_resizeable(size, size * 2)
-            .with_hasher(twox_hash::xxhash64::RandomState::default())
-            .attach_writer();
-        let mut rng = rand::rng();
-        while writer.get_num_buckets_in_use() < ideal_filled as usize {
-            let key: FileCacheKey = rng.random();
-            let val = FileCacheEntry::dummy();
-            apply_op(TestOp(key, Some(val)), &mut writer);
-        }
-        b.iter(|| writer.shuffle());
-    });
-
-    group.bench_function("small_rehash_ahash", |b| {
-        let ideal_filled = 4_000_000;
-        let size = 5_000_000;
-        let mut writer = HashMapInit::new_resizeable(size, size * 2)
-            .with_hasher(ahash::RandomState::default())
-            .attach_writer();
-        let mut rng = rand::rng();
-        while writer.get_num_buckets_in_use() < ideal_filled as usize {
-            let key: FileCacheKey = rng.random();
-            let val = FileCacheEntry::dummy();
-            apply_op(TestOp(key, Some(val)), &mut writer);
-        }
-        b.iter(|| writer.shuffle());
-    });
-
-    group.bench_function("small_rehash_seahash", |b| {
-        let ideal_filled = 4_000_000;
-        let size = 5_000_000;
-        let mut writer = HashMapInit::new_resizeable(size, size * 2)
-            .with_hasher(SeaRandomState::new())
-            .attach_writer();
-        let mut rng = rand::rng();
-        while writer.get_num_buckets_in_use() < ideal_filled as usize {
-            let key: FileCacheKey = rng.random();
-            let val = FileCacheEntry::dummy();
-            apply_op(TestOp(key, Some(val)), &mut writer);
-        }
-        b.iter(|| writer.shuffle());
-    });
-
-    group.finish();
-}
-
-fn real_benchs(c: &mut Criterion) {
-    let mut group = c.benchmark_group("Realistic workloads");
-    group.sample_size(10);
-    group.bench_function("real_bulk_insert", |b| {
-        let size = 125_000_000;
-        let ideal_filled = 100_000_000;
-        let mut rng = rand::rng();
-        b.iter_batched(
-            || HashMapInit::new_resizeable(size, size * 2).attach_writer(),
-            |writer| {
-                for _ in 0..ideal_filled {
-                    let key: FileCacheKey = rng.random();
-                    let val = FileCacheEntry::dummy();
-                    let entry = writer.entry(key);
-                    std::hint::black_box(match entry {
-                        Entry::Occupied(mut e) => {
-                            e.insert(val);
-                        }
-                        Entry::Vacant(e) => {
-                            _ = e.insert(val).unwrap();
-                        }
-                    })
-                }
-            },
-            BatchSize::SmallInput,
-        )
-    });
-
-    group.bench_function("real_rehash", |b| {
-        let size = 125_000_000;
-        let ideal_filled = 100_000_000;
-        let mut writer = HashMapInit::new_resizeable(size, size).attach_writer();
-        let mut rng = rand::rng();
-        while writer.get_num_buckets_in_use() < ideal_filled {
-            let key: FileCacheKey = rng.random();
-            let val = FileCacheEntry::dummy();
-            apply_op(TestOp(key, Some(val)), &mut writer);
-        }
-        b.iter(|| writer.shuffle());
-    });
-
-    group.bench_function("real_rehash_hashbrown", |b| {
-        let size = 125_000_000;
-        let ideal_filled = 100_000_000;
-        let mut writer = hashbrown::raw::RawTable::new();
-        let mut rng = rand::rng();
-        let hasher = rustc_hash::FxBuildHasher::default();
-        unsafe {
-            writer
-                .resize(
-                    size,
-                    |(k, _)| hasher.hash_one(&k),
-                    hashbrown::raw::Fallibility::Infallible,
-                )
-                .unwrap();
-        }
-        while writer.len() < ideal_filled as usize {
-            let key: FileCacheKey = rng.random();
-            let val = FileCacheEntry::dummy();
-            writer.insert(hasher.hash_one(&key), (key, val), |(k, _)| {
-                hasher.hash_one(&k)
-            });
-        }
-        b.iter(|| unsafe {
-            writer.table.rehash_in_place(
-                &|table, index| {
-                    hasher.hash_one(
-                        &table
-                            .bucket::<(FileCacheKey, FileCacheEntry)>(index)
-                            .as_ref()
-                            .0,
-                    )
-                },
-                std::mem::size_of::<(FileCacheKey, FileCacheEntry)>(),
-                if std::mem::needs_drop::<(FileCacheKey, FileCacheEntry)>() {
-                    Some(|ptr| std::ptr::drop_in_place(ptr as *mut (FileCacheKey, FileCacheEntry)))
-                } else {
-                    None
-                },
-            )
-        });
-    });
-
-    for elems in [2, 4, 8, 16, 32, 64, 96, 112] {
-        group.bench_with_input(
-            BenchmarkId::new("real_rehash_varied", elems),
-            &elems,
-            |b, &size| {
-                let ideal_filled = size * 1_000_000;
-                let size = 125_000_000;
-                let mut writer = HashMapInit::new_resizeable(size, size).attach_writer();
-                let mut rng = rand::rng();
-                while writer.get_num_buckets_in_use() < ideal_filled as usize {
-                    let key: FileCacheKey = rng.random();
-                    let val = FileCacheEntry::dummy();
-                    apply_op(TestOp(key, Some(val)), &mut writer);
-                }
-                b.iter(|| writer.shuffle());
-            },
-        );
-        group.bench_with_input(
-            BenchmarkId::new("real_rehash_varied_hashbrown", elems),
-            &elems,
-            |b, &size| {
-                let ideal_filled = size * 1_000_000;
-                let size = 125_000_000;
-                let mut writer = hashbrown::raw::RawTable::new();
-                let mut rng = rand::rng();
-                let hasher = rustc_hash::FxBuildHasher::default();
-                unsafe {
-                    writer
-                        .resize(
-                            size,
-                            |(k, _)| hasher.hash_one(&k),
-                            hashbrown::raw::Fallibility::Infallible,
-                        )
-                        .unwrap();
-                }
-                while writer.len() < ideal_filled as usize {
-                    let key: FileCacheKey = rng.random();
-                    let val = FileCacheEntry::dummy();
-                    writer.insert(hasher.hash_one(&key), (key, val), |(k, _)| {
-                        hasher.hash_one(&k)
-                    });
-                }
-                b.iter(|| unsafe {
-                    writer.table.rehash_in_place(
-                        &|table, index| {
-                            hasher.hash_one(
-                                &table
-                                    .bucket::<(FileCacheKey, FileCacheEntry)>(index)
-                                    .as_ref()
-                                    .0,
-                            )
-                        },
-                        std::mem::size_of::<(FileCacheKey, FileCacheEntry)>(),
-                        if std::mem::needs_drop::<(FileCacheKey, FileCacheEntry)>() {
-                            Some(|ptr| {
-                                std::ptr::drop_in_place(ptr as *mut (FileCacheKey, FileCacheEntry))
-                            })
-                        } else {
-                            None
-                        },
-                    )
-                });
-            },
-        );
-    }
-
-    group.finish();
-}
-
-criterion_group!(benches, small_benchs, real_benchs);
-criterion_main!(benches);
--- a/libs/neon-shmem/src/hash.rs
+++ b/libs/neon-shmem/src/hash.rs
@@ -1,614 +0,0 @@
-//! Resizable hash table implementation on top of byte-level storage (either a [`ShmemHandle`] or a fixed byte array).
-//!
-//! This hash table has two major components: the bucket array and the dictionary. Each bucket within the
-//! bucket array contains a `Option<(K, V)>` and an index of another bucket. In this way there is both an
-//! implicit freelist within the bucket array (`None` buckets point to other `None` entries) and various hash
-//! chains within the bucket array (a Some bucket will point to other Some buckets that had the same hash).
-//!
-//! Buckets are never moved unless they are within a region that is being shrunk, and so the actual hash-
-//! dependent component is done with the dictionary. When a new key is inserted into the map, a position
-//! within the dictionary is decided based on its hash, the data is inserted into an empty bucket based
-//! off of the freelist, and then the index of said bucket is placed in the dictionary.
-//!
-//! This map is resizable (if initialized on top of a [`ShmemHandle`]). Both growing and shrinking happen
-//! in-place and are at a high level achieved by expanding/reducing the bucket array and rebuilding the
-//! dictionary by rehashing all keys.
-//!
-//! Concurrency is managed very simply: the entire map is guarded by one shared-memory RwLock.
-
-use std::fmt::Debug;
-use std::hash::{BuildHasher, Hash};
-use std::mem::MaybeUninit;
-
-use crate::shmem::ShmemHandle;
-use crate::{shmem, sync::*};
-
-mod core;
-pub mod entry;
-
-#[cfg(test)]
-mod tests;
-
-use core::{Bucket, CoreHashMap, INVALID_POS};
-use entry::{Entry, OccupiedEntry, PrevPos, VacantEntry};
-
-use thiserror::Error;
-
-/// Error type for a hashmap shrink operation.
-#[derive(Error, Debug)]
-pub enum HashMapShrinkError {
-    /// There was an error encountered while resizing the memory area.
-    #[error("shmem resize failed: {0}")]
-    ResizeError(shmem::Error),
-    /// Occupied entries in to-be-shrunk space were encountered beginning at the given index.
-    #[error("occupied entry in deallocated space found at {0}")]
-    RemainingEntries(usize),
-}
-
-/// This represents a hash table that (possibly) lives in shared memory.
-/// If a new process is launched with fork(), the child process inherits
-/// this struct.
-#[must_use]
-pub struct HashMapInit<'a, K, V, S = rustc_hash::FxBuildHasher> {
-    shmem_handle: Option<ShmemHandle>,
-    shared_ptr: *mut HashMapShared<'a, K, V>,
-    shared_size: usize,
-    hasher: S,
-    num_buckets: u32,
-}
-
-impl<'a, K, V, S> Debug for HashMapInit<'a, K, V, S>
-where
-    K: Debug,
-    V: Debug,
-{
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        f.debug_struct("HashMapInit")
-            .field("shmem_handle", &self.shmem_handle)
-            .field("shared_ptr", &self.shared_ptr)
-            .field("shared_size", &self.shared_size)
-            // .field("hasher", &self.hasher)
-            .field("num_buckets", &self.num_buckets)
-            .finish()
-    }
-}
-
-/// This is a per-process handle to a hash table that (possibly) lives in shared memory.
-/// If a child process is launched with fork(), the child process should
-/// get its own HashMapAccess by calling HashMapInit::attach_writer/reader().
-///
-/// XXX: We're not making use of it at the moment, but this struct could
-/// hold process-local information in the future.
-pub struct HashMapAccess<'a, K, V, S = rustc_hash::FxBuildHasher> {
-    shmem_handle: Option<ShmemHandle>,
-    shared_ptr: *mut HashMapShared<'a, K, V>,
-    hasher: S,
-}
-
-unsafe impl<K: Sync, V: Sync, S> Sync for HashMapAccess<'_, K, V, S> {}
-unsafe impl<K: Send, V: Send, S> Send for HashMapAccess<'_, K, V, S> {}
-
-impl<'a, K, V, S> Debug for HashMapAccess<'a, K, V, S>
-where
-    K: Debug,
-    V: Debug,
-{
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        f.debug_struct("HashMapAccess")
-            .field("shmem_handle", &self.shmem_handle)
-            .field("shared_ptr", &self.shared_ptr)
-            // .field("hasher", &self.hasher)
-            .finish()
-    }
-}
-
-impl<'a, K: Clone + Hash + Eq, V, S> HashMapInit<'a, K, V, S> {
-    /// Change the 'hasher' used by the hash table.
-    ///
-    /// NOTE: This must be called right after creating the hash table,
-    /// before inserting any entries and before calling attach_writer/reader.
-    /// Otherwise different accessors could be using different hash function,
-    /// with confusing results.
-    pub fn with_hasher<T: BuildHasher>(self, hasher: T) -> HashMapInit<'a, K, V, T> {
-        HashMapInit {
-            hasher,
-            shmem_handle: self.shmem_handle,
-            shared_ptr: self.shared_ptr,
-            shared_size: self.shared_size,
-            num_buckets: self.num_buckets,
-        }
-    }
-
-    /// Loosely (over)estimate the size needed to store a hash table with `num_buckets` buckets.
-    pub fn estimate_size(num_buckets: u32) -> usize {
-        // add some margin to cover alignment etc.
-        CoreHashMap::<K, V>::estimate_size(num_buckets) + size_of::<HashMapShared<K, V>>() + 1000
-    }
-
-    fn new(
-        num_buckets: u32,
-        shmem_handle: Option<ShmemHandle>,
-        area_ptr: *mut u8,
-        area_size: usize,
-        hasher: S,
-    ) -> Self {
-        let mut ptr: *mut u8 = area_ptr;
-        let end_ptr: *mut u8 = unsafe { ptr.add(area_size) };
-
-        // carve out area for the One Big Lock (TM) and the HashMapShared.
-        ptr = unsafe { ptr.add(ptr.align_offset(align_of::<libc::pthread_rwlock_t>())) };
-        let raw_lock_ptr = ptr;
-        ptr = unsafe { ptr.add(size_of::<libc::pthread_rwlock_t>()) };
-        ptr = unsafe { ptr.add(ptr.align_offset(align_of::<HashMapShared<K, V>>())) };
-        let shared_ptr: *mut HashMapShared<K, V> = ptr.cast();
-        ptr = unsafe { ptr.add(size_of::<HashMapShared<K, V>>()) };
-
-        // carve out the buckets
-        ptr = unsafe { ptr.byte_add(ptr.align_offset(align_of::<core::Bucket<K, V>>())) };
-        let buckets_ptr = ptr;
-        ptr = unsafe { ptr.add(size_of::<core::Bucket<K, V>>() * num_buckets as usize) };
-
-        // use remaining space for the dictionary
-        ptr = unsafe { ptr.byte_add(ptr.align_offset(align_of::<u32>())) };
-        assert!(ptr.addr() < end_ptr.addr());
-        let dictionary_ptr = ptr;
-        let dictionary_size = unsafe { end_ptr.byte_offset_from(ptr) / size_of::<u32>() as isize };
-        assert!(dictionary_size > 0);
-
-        let buckets =
-            unsafe { std::slice::from_raw_parts_mut(buckets_ptr.cast(), num_buckets as usize) };
-        let dictionary = unsafe {
-            std::slice::from_raw_parts_mut(dictionary_ptr.cast(), dictionary_size as usize)
-        };
-
-        let hashmap = CoreHashMap::new(buckets, dictionary);
-        unsafe {
-            let lock = RwLock::from_raw(PthreadRwLock::new(raw_lock_ptr.cast()), hashmap);
-            std::ptr::write(shared_ptr, lock);
-        }
-
-        Self {
-            num_buckets,
-            shmem_handle,
-            shared_ptr,
-            shared_size: area_size,
-            hasher,
-        }
-    }
-
-    /// Attach to a hash table for writing.
-    pub fn attach_writer(self) -> HashMapAccess<'a, K, V, S> {
-        HashMapAccess {
-            shmem_handle: self.shmem_handle,
-            shared_ptr: self.shared_ptr,
-            hasher: self.hasher,
-        }
-    }
-
-    /// Initialize a table for reading. Currently identical to [`HashMapInit::attach_writer`].
-    ///
-    /// This is a holdover from a previous implementation and is being kept around for
-    /// backwards compatibility reasons.
-    pub fn attach_reader(self) -> HashMapAccess<'a, K, V, S> {
-        self.attach_writer()
-    }
-}
-
-/// Hash table data that is actually stored in the shared memory area.
-///
-/// NOTE: We carve out the parts from a contiguous chunk. Growing and shrinking the hash table
-/// relies on the memory layout! The data structures are laid out in the contiguous shared memory
-/// area as follows:
-///
-/// [`libc::pthread_rwlock_t`]
-/// [`HashMapShared`]
-/// buckets
-/// dictionary
-///
-/// In between the above parts, there can be padding bytes to align the parts correctly.
-type HashMapShared<'a, K, V> = RwLock<CoreHashMap<'a, K, V>>;
-
-impl<'a, K, V> HashMapInit<'a, K, V, rustc_hash::FxBuildHasher>
-where
-    K: Clone + Hash + Eq,
-{
-    /// Place the hash table within a user-supplied fixed memory area.
-    pub fn with_fixed(num_buckets: u32, area: &'a mut [MaybeUninit<u8>]) -> Self {
-        Self::new(
-            num_buckets,
-            None,
-            area.as_mut_ptr().cast(),
-            area.len(),
-            rustc_hash::FxBuildHasher,
-        )
-    }
-
-    /// Place a new hash map in the given shared memory area
-    ///
-    /// # Panics
-    /// Will panic on failure to resize area to expected map size.
-    pub fn with_shmem(num_buckets: u32, shmem: ShmemHandle) -> Self {
-        let size = Self::estimate_size(num_buckets);
-        shmem
-            .set_size(size)
-            .expect("could not resize shared memory area");
-        let ptr = shmem.data_ptr.as_ptr().cast();
-        Self::new(
-            num_buckets,
-            Some(shmem),
-            ptr,
-            size,
-            rustc_hash::FxBuildHasher,
-        )
-    }
-
-    /// Make a resizable hash map within a new shared memory area with the given name.
-    pub fn new_resizeable_named(num_buckets: u32, max_buckets: u32, name: &str) -> Self {
-        let size = Self::estimate_size(num_buckets);
-        let max_size = Self::estimate_size(max_buckets);
-        let shmem =
-            ShmemHandle::new(name, size, max_size).expect("failed to make shared memory area");
-        let ptr = shmem.data_ptr.as_ptr().cast();
-
-        Self::new(
-            num_buckets,
-            Some(shmem),
-            ptr,
-            size,
-            rustc_hash::FxBuildHasher,
-        )
-    }
-
-    /// Make a resizable hash map within a new anonymous shared memory area.
-    pub fn new_resizeable(num_buckets: u32, max_buckets: u32) -> Self {
-        use std::sync::atomic::{AtomicUsize, Ordering};
-        static COUNTER: AtomicUsize = AtomicUsize::new(0);
-        let val = COUNTER.fetch_add(1, Ordering::Relaxed);
-        let name = format!("neon_shmem_hmap{val}");
-        Self::new_resizeable_named(num_buckets, max_buckets, &name)
-    }
-}
-
-impl<'a, K, V, S: BuildHasher> HashMapAccess<'a, K, V, S>
-where
-    K: Clone + Hash + Eq,
-{
-    /// Hash a key using the map's hasher.
-    #[inline]
-    fn get_hash_value(&self, key: &K) -> u64 {
-        self.hasher.hash_one(key)
-    }
-
-    fn entry_with_hash(&self, key: K, hash: u64) -> Entry<'a, '_, K, V> {
-        let mut map = unsafe { self.shared_ptr.as_ref() }.unwrap().write();
-        let dict_pos = hash as usize % map.dictionary.len();
-        let first = map.dictionary[dict_pos];
-        if first == INVALID_POS {
-            // no existing entry
-            return Entry::Vacant(VacantEntry {
-                map,
-                key,
-                dict_pos: dict_pos as u32,
-            });
-        }
-
-        let mut prev_pos = PrevPos::First(dict_pos as u32);
-        let mut next = first;
-        loop {
-            let bucket = &mut map.buckets[next as usize];
-            let (bucket_key, _bucket_value) = bucket.inner.as_mut().expect("entry is in use");
-            if *bucket_key == key {
-                // found existing entry
-                return Entry::Occupied(OccupiedEntry {
-                    map,
-                    _key: key,
-                    prev_pos,
-                    bucket_pos: next,
-                });
-            }
-
-            if bucket.next == INVALID_POS {
-                // No existing entry
-                return Entry::Vacant(VacantEntry {
-                    map,
-                    key,
-                    dict_pos: dict_pos as u32,
-                });
-            }
-            prev_pos = PrevPos::Chained(next);
-            next = bucket.next;
-        }
-    }
-
-    /// Get a reference to the corresponding value for a key.
-    pub fn get<'e>(&'e self, key: &K) -> Option<ValueReadGuard<'e, V>> {
-        let hash = self.get_hash_value(key);
-        let map = unsafe { self.shared_ptr.as_ref() }.unwrap().read();
-        RwLockReadGuard::try_map(map, |m| m.get_with_hash(key, hash)).ok()
-    }
-
-    /// Get a reference to the entry containing a key.
-    ///
-    /// NB: This takes a write lock as there's no way to distinguish whether the intention
-    /// is to use the entry for reading or for writing in advance.
-    pub fn entry(&self, key: K) -> Entry<'a, '_, K, V> {
-        let hash = self.get_hash_value(&key);
-        self.entry_with_hash(key, hash)
-    }
-
-    /// Remove a key given its hash. Returns the associated value if it existed.
-    pub fn remove(&self, key: &K) -> Option<V> {
-        let hash = self.get_hash_value(key);
-        match self.entry_with_hash(key.clone(), hash) {
-            Entry::Occupied(e) => Some(e.remove()),
-            Entry::Vacant(_) => None,
-        }
-    }
-
-    /// Insert/update a key. Returns the previous associated value if it existed.
-    ///
-    /// # Errors
-    /// Will return [`core::FullError`] if there is no more space left in the map.
-    pub fn insert(&self, key: K, value: V) -> Result<Option<V>, core::FullError> {
-        let hash = self.get_hash_value(&key);
-        match self.entry_with_hash(key.clone(), hash) {
-            Entry::Occupied(mut e) => Ok(Some(e.insert(value))),
-            Entry::Vacant(e) => {
-                _ = e.insert(value)?;
-                Ok(None)
-            }
-        }
-    }
-
-    /// Optionally return the entry for a bucket at a given index if it exists.
-    ///
-    /// Has more overhead than one would intuitively expect: performs both a clone of the key
-    /// due to the [`OccupiedEntry`] type owning the key and also a hash of the key in order
-    /// to enable repairing the hash chain if the entry is removed.
-    pub fn entry_at_bucket(&self, pos: usize) -> Option<OccupiedEntry<'a, '_, K, V>> {
-        let map = unsafe { self.shared_ptr.as_mut() }.unwrap().write();
-        if pos >= map.buckets.len() {
-            return None;
-        }
-
-        let entry = map.buckets[pos].inner.as_ref();
-        match entry {
-            Some((key, _)) => Some(OccupiedEntry {
-                _key: key.clone(),
-                bucket_pos: pos as u32,
-                prev_pos: entry::PrevPos::Unknown(self.get_hash_value(key)),
-                map,
-            }),
-            _ => None,
-        }
-    }
-
-    /// Returns the number of buckets in the table.
-    pub fn get_num_buckets(&self) -> usize {
-        let map = unsafe { self.shared_ptr.as_ref() }.unwrap().read();
-        map.get_num_buckets()
-    }
-
-    /// Return the key and value stored in bucket with given index. This can be used to
-    /// iterate through the hash map.
-    // TODO: An Iterator might be nicer. The communicator's clock algorithm needs to
-    // _slowly_ iterate through all buckets with its clock hand,  without holding a lock.
-    // If we switch to an Iterator, it must not hold the lock.
-    pub fn get_at_bucket(&self, pos: usize) -> Option<ValueReadGuard<(K, V)>> {
-        let map = unsafe { self.shared_ptr.as_ref() }.unwrap().read();
-        if pos >= map.buckets.len() {
-            return None;
-        }
-        RwLockReadGuard::try_map(map, |m| m.buckets[pos].inner.as_ref()).ok()
-    }
-
-    /// Returns the index of the bucket a given value corresponds to.
-    pub fn get_bucket_for_value(&self, val_ptr: *const V) -> usize {
-        let map = unsafe { self.shared_ptr.as_ref() }.unwrap().read();
-
-        let origin = map.buckets.as_ptr();
-        let idx = (val_ptr as usize - origin as usize) / size_of::<Bucket<K, V>>();
-        assert!(idx < map.buckets.len());
-
-        idx
-    }
-
-    /// Returns the number of occupied buckets in the table.
-    pub fn get_num_buckets_in_use(&self) -> usize {
-        let map = unsafe { self.shared_ptr.as_ref() }.unwrap().read();
-        map.buckets_in_use as usize
-    }
-
-    /// Clears all entries in a table. Does not reset any shrinking operations.
-    pub fn clear(&self) {
-        let mut map = unsafe { self.shared_ptr.as_mut() }.unwrap().write();
-        map.clear();
-    }
-
-    /// Perform an in-place rehash of some region (0..`rehash_buckets`) of the table and reset
-    /// the `buckets` and `dictionary` slices to be as long as `num_buckets`. Resets the freelist
-    /// in the process.
-    fn rehash_dict(
-        &self,
-        inner: &mut CoreHashMap<'a, K, V>,
-        buckets_ptr: *mut core::Bucket<K, V>,
-        end_ptr: *mut u8,
-        num_buckets: u32,
-        rehash_buckets: u32,
-    ) {
-        inner.free_head = INVALID_POS;
-
-        let buckets;
-        let dictionary;
-        unsafe {
-            let buckets_end_ptr = buckets_ptr.add(num_buckets as usize);
-            let dictionary_ptr: *mut u32 = buckets_end_ptr
-                .byte_add(buckets_end_ptr.align_offset(align_of::<u32>()))
-                .cast();
-            let dictionary_size: usize =
-                end_ptr.byte_offset_from(buckets_end_ptr) as usize / size_of::<u32>();
-
-            buckets = std::slice::from_raw_parts_mut(buckets_ptr, num_buckets as usize);
-            dictionary = std::slice::from_raw_parts_mut(dictionary_ptr, dictionary_size);
-        }
-        for e in dictionary.iter_mut() {
-            *e = INVALID_POS;
-        }
-
-        for (i, bucket) in buckets.iter_mut().enumerate().take(rehash_buckets as usize) {
-            if bucket.inner.is_none() {
-                bucket.next = inner.free_head;
-                inner.free_head = i as u32;
-                continue;
-            }
-
-            let hash = self.hasher.hash_one(&bucket.inner.as_ref().unwrap().0);
-            let pos: usize = (hash % dictionary.len() as u64) as usize;
-            bucket.next = dictionary[pos];
-            dictionary[pos] = i as u32;
-        }
-
-        inner.dictionary = dictionary;
-        inner.buckets = buckets;
-    }
-
-    /// Rehash the map without growing or shrinking.
-    pub fn shuffle(&self) {
-        let mut map = unsafe { self.shared_ptr.as_mut() }.unwrap().write();
-        let num_buckets = map.get_num_buckets() as u32;
-        let size_bytes = HashMapInit::<K, V, S>::estimate_size(num_buckets);
-        let end_ptr: *mut u8 = unsafe { self.shared_ptr.byte_add(size_bytes).cast() };
-        let buckets_ptr = map.buckets.as_mut_ptr();
-        self.rehash_dict(&mut map, buckets_ptr, end_ptr, num_buckets, num_buckets);
-    }
-
-    /// Grow the number of buckets within the table.
-    ///
-    /// 1. Grows the underlying shared memory area
-    /// 2. Initializes new buckets and overwrites the current dictionary
-    /// 3. Rehashes the dictionary
-    ///
-    /// # Panics
-    /// Panics if called on a map initialized with [`HashMapInit::with_fixed`].
-    ///
-    /// # Errors
-    /// Returns an [`shmem::Error`] if any errors occur resizing the memory region.
-    pub fn grow(&self, num_buckets: u32) -> Result<(), shmem::Error> {
-        let mut map = unsafe { self.shared_ptr.as_mut() }.unwrap().write();
-        let old_num_buckets = map.buckets.len() as u32;
-
-        assert!(
-            num_buckets >= old_num_buckets,
-            "grow called with a smaller number of buckets"
-        );
-        if num_buckets == old_num_buckets {
-            return Ok(());
-        }
-        let shmem_handle = self
-            .shmem_handle
-            .as_ref()
-            .expect("grow called on a fixed-size hash table");
-
-        let size_bytes = HashMapInit::<K, V, S>::estimate_size(num_buckets);
-        shmem_handle.set_size(size_bytes)?;
-        let end_ptr: *mut u8 = unsafe { shmem_handle.data_ptr.as_ptr().add(size_bytes) };
-
-        // Initialize new buckets. The new buckets are linked to the free list.
-        // NB: This overwrites the dictionary!
-        let buckets_ptr = map.buckets.as_mut_ptr();
-        unsafe {
-            for i in old_num_buckets..num_buckets {
-                let bucket = buckets_ptr.add(i as usize);
-                bucket.write(core::Bucket {
-                    next: if i < num_buckets - 1 {
-                        i + 1
-                    } else {
-                        map.free_head
-                    },
-                    inner: None,
-                });
-            }
-        }
-
-        self.rehash_dict(&mut map, buckets_ptr, end_ptr, num_buckets, old_num_buckets);
-        map.free_head = old_num_buckets;
-
-        Ok(())
-    }
-
-    /// Begin a shrink, limiting all new allocations to be in buckets with index below `num_buckets`.
-    ///
-    /// # Panics
-    /// Panics if called on a map initialized with [`HashMapInit::with_fixed`] or if `num_buckets` is
-    /// greater than the number of buckets in the map.
-    pub fn begin_shrink(&self, num_buckets: u32) {
-        let mut map = unsafe { self.shared_ptr.as_mut() }.unwrap().write();
-        assert!(
-            num_buckets <= map.get_num_buckets() as u32,
-            "shrink called with a larger number of buckets"
-        );
-        _ = self
-            .shmem_handle
-            .as_ref()
-            .expect("shrink called on a fixed-size hash table");
-        map.alloc_limit = num_buckets;
-    }
-
-    /// If a shrink operation is underway, returns the target size of the map. Otherwise, returns None.
-    pub fn shrink_goal(&self) -> Option<usize> {
-        let map = unsafe { self.shared_ptr.as_mut() }.unwrap().read();
-        let goal = map.alloc_limit;
-        if goal == INVALID_POS {
-            None
-        } else {
-            Some(goal as usize)
-        }
-    }
-
-    /// Complete a shrink after caller has evicted entries, removing the unused buckets and rehashing.
-    ///
-    /// # Panics
-    /// The following cases result in a panic:
-    /// - Calling this function on a map initialized with [`HashMapInit::with_fixed`].
-    /// - Calling this function on a map when no shrink operation is in progress.
-    pub fn finish_shrink(&self) -> Result<(), HashMapShrinkError> {
-        let mut map = unsafe { self.shared_ptr.as_mut() }.unwrap().write();
-        assert!(
-            map.alloc_limit != INVALID_POS,
-            "called finish_shrink when no shrink is in progress"
-        );
-
-        let num_buckets = map.alloc_limit;
-
-        if map.get_num_buckets() == num_buckets as usize {
-            return Ok(());
-        }
-
-        assert!(
-            map.buckets_in_use <= num_buckets,
-            "called finish_shrink before enough entries were removed"
-        );
-
-        for i in (num_buckets as usize)..map.buckets.len() {
-            if map.buckets[i].inner.is_some() {
-                return Err(HashMapShrinkError::RemainingEntries(i));
-            }
-        }
-
-        let shmem_handle = self
-            .shmem_handle
-            .as_ref()
-            .expect("shrink called on a fixed-size hash table");
-
-        let size_bytes = HashMapInit::<K, V, S>::estimate_size(num_buckets);
-        if let Err(e) = shmem_handle.set_size(size_bytes) {
-            return Err(HashMapShrinkError::ResizeError(e));
-        }
-        let end_ptr: *mut u8 = unsafe { shmem_handle.data_ptr.as_ptr().add(size_bytes) };
-        let buckets_ptr = map.buckets.as_mut_ptr();
-        self.rehash_dict(&mut map, buckets_ptr, end_ptr, num_buckets, num_buckets);
-        map.alloc_limit = INVALID_POS;
-
-        Ok(())
-    }
-}
--- a/libs/neon-shmem/src/hash/core.rs
+++ b/libs/neon-shmem/src/hash/core.rs
@@ -1,204 +0,0 @@
-//! Simple hash table with chaining.
-
-use std::fmt::Debug;
-use std::hash::Hash;
-use std::mem::MaybeUninit;
-
-use crate::hash::entry::*;
-
-/// Invalid position within the map (either within the dictionary or bucket array).
-pub(crate) const INVALID_POS: u32 = u32::MAX;
-
-/// Fundamental storage unit within the hash table. Either empty or contains a key-value pair.
-/// Always part of a chain of some kind (either a freelist if empty or a hash chain if full).
-pub(crate) struct Bucket<K, V> {
-    /// Index of next bucket in the chain.
-    pub(crate) next: u32,
-    /// Key-value pair contained within bucket.
-    pub(crate) inner: Option<(K, V)>,
-}
-
-impl<K, V> Debug for Bucket<K, V>
-where
-    K: Debug,
-    V: Debug,
-{
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        f.debug_struct("Bucket")
-            .field("next", &self.next)
-            .field("inner", &self.inner)
-            .finish()
-    }
-}
-
-/// Core hash table implementation.
-pub(crate) struct CoreHashMap<'a, K, V> {
-    /// Dictionary used to map hashes to bucket indices.
-    pub(crate) dictionary: &'a mut [u32],
-    /// Buckets containing key-value pairs.
-    pub(crate) buckets: &'a mut [Bucket<K, V>],
-    /// Head of the freelist.
-    pub(crate) free_head: u32,
-    /// Maximum index of a bucket allowed to be allocated. [`INVALID_POS`] if no limit.
-    pub(crate) alloc_limit: u32,
-    /// The number of currently occupied buckets.
-    pub(crate) buckets_in_use: u32,
-}
-
-impl<'a, K, V> Debug for CoreHashMap<'a, K, V>
-where
-    K: Debug,
-    V: Debug,
-{
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        f.debug_struct("CoreHashMap")
-            .field("dictionary", &self.dictionary)
-            .field("buckets", &self.buckets)
-            .field("free_head", &self.free_head)
-            .field("alloc_limit", &self.alloc_limit)
-            .field("buckets_in_use", &self.buckets_in_use)
-            .finish()
-    }
-}
-
-/// Error for when there are no empty buckets left but one is needed.
-#[derive(Debug, PartialEq)]
-pub struct FullError;
-
-impl<'a, K: Clone + Hash + Eq, V> CoreHashMap<'a, K, V> {
-    const FILL_FACTOR: f32 = 0.60;
-
-    /// Estimate the size of data contained within the the hash map.
-    pub fn estimate_size(num_buckets: u32) -> usize {
-        let mut size = 0;
-
-        // buckets
-        size += size_of::<Bucket<K, V>>() * num_buckets as usize;
-
-        // dictionary
-        size += (f32::ceil((size_of::<u32>() * num_buckets as usize) as f32 / Self::FILL_FACTOR))
-            as usize;
-
-        size
-    }
-
-    pub fn new(
-        buckets: &'a mut [MaybeUninit<Bucket<K, V>>],
-        dictionary: &'a mut [MaybeUninit<u32>],
-    ) -> Self {
-        // Initialize the buckets
-        for i in 0..buckets.len() {
-            buckets[i].write(Bucket {
-                next: if i < buckets.len() - 1 {
-                    i as u32 + 1
-                } else {
-                    INVALID_POS
-                },
-                inner: None,
-            });
-        }
-
-        // Initialize the dictionary
-        for e in dictionary.iter_mut() {
-            e.write(INVALID_POS);
-        }
-
-        // TODO: use std::slice::assume_init_mut() once it stabilizes
-        let buckets =
-            unsafe { std::slice::from_raw_parts_mut(buckets.as_mut_ptr().cast(), buckets.len()) };
-        let dictionary = unsafe {
-            std::slice::from_raw_parts_mut(dictionary.as_mut_ptr().cast(), dictionary.len())
-        };
-
-        Self {
-            dictionary,
-            buckets,
-            free_head: 0,
-            buckets_in_use: 0,
-            alloc_limit: INVALID_POS,
-        }
-    }
-
-    /// Get the value associated with a key (if it exists) given its hash.
-    pub fn get_with_hash(&self, key: &K, hash: u64) -> Option<&V> {
-        let mut next = self.dictionary[hash as usize % self.dictionary.len()];
-        loop {
-            if next == INVALID_POS {
-                return None;
-            }
-
-            let bucket = &self.buckets[next as usize];
-            let (bucket_key, bucket_value) = bucket.inner.as_ref().expect("entry is in use");
-            if bucket_key == key {
-                return Some(bucket_value);
-            }
-            next = bucket.next;
-        }
-    }
-
-    /// Get number of buckets in map.
-    pub fn get_num_buckets(&self) -> usize {
-        self.buckets.len()
-    }
-
-    /// Clears all entries from the hashmap.
-    ///
-    /// Does not reset any allocation limits, but does clear any entries beyond them.
-    pub fn clear(&mut self) {
-        for i in 0..self.buckets.len() {
-            self.buckets[i] = Bucket {
-                next: if i < self.buckets.len() - 1 {
-                    i as u32 + 1
-                } else {
-                    INVALID_POS
-                },
-                inner: None,
-            }
-        }
-        for i in 0..self.dictionary.len() {
-            self.dictionary[i] = INVALID_POS;
-        }
-
-        self.free_head = 0;
-        self.buckets_in_use = 0;
-    }
-
-    /// Find the position of an unused bucket via the freelist and initialize it.
-    pub(crate) fn alloc_bucket(&mut self, key: K, value: V) -> Result<u32, FullError> {
-        let mut pos = self.free_head;
-
-        // Find the first bucket we're *allowed* to use.
-        let mut prev = PrevPos::First(self.free_head);
-        while pos != INVALID_POS && pos >= self.alloc_limit {
-            let bucket = &mut self.buckets[pos as usize];
-            prev = PrevPos::Chained(pos);
-            pos = bucket.next;
-        }
-        if pos == INVALID_POS {
-            return Err(FullError);
-        }
-
-        // Repair the freelist.
-        match prev {
-            PrevPos::First(_) => {
-                let next_pos = self.buckets[pos as usize].next;
-                self.free_head = next_pos;
-            }
-            PrevPos::Chained(p) => {
-                if p != INVALID_POS {
-                    let next_pos = self.buckets[pos as usize].next;
-                    self.buckets[p as usize].next = next_pos;
-                }
-            }
-            _ => unreachable!(),
-        }
-
-        // Initialize the bucket.
-        let bucket = &mut self.buckets[pos as usize];
-        self.buckets_in_use += 1;
-        bucket.next = INVALID_POS;
-        bucket.inner = Some((key, value));
-
-        Ok(pos)
-    }
-}
--- a/libs/neon-shmem/src/hash/entry.rs
+++ b/libs/neon-shmem/src/hash/entry.rs
@@ -1,130 +0,0 @@
-//! Equivalent of [`std::collections::hash_map::Entry`] for this hashmap.
-
-use crate::hash::core::{CoreHashMap, FullError, INVALID_POS};
-use crate::sync::{RwLockWriteGuard, ValueWriteGuard};
-
-use std::hash::Hash;
-use std::mem;
-
-pub enum Entry<'a, 'b, K, V> {
-    Occupied(OccupiedEntry<'a, 'b, K, V>),
-    Vacant(VacantEntry<'a, 'b, K, V>),
-}
-
-/// Enum representing the previous position within a chain.
-#[derive(Clone, Copy)]
-pub(crate) enum PrevPos {
-    /// Starting index within the dictionary.  
-    First(u32),
-    /// Regular index within the buckets.
-    Chained(u32),
-    /// Unknown - e.g. the associated entry was retrieved by index instead of chain.
-    Unknown(u64),
-}
-
-pub struct OccupiedEntry<'a, 'b, K, V> {
-    /// Mutable reference to the map containing this entry.
-    pub(crate) map: RwLockWriteGuard<'b, CoreHashMap<'a, K, V>>,
-    /// The key of the occupied entry
-    pub(crate) _key: K,
-    /// The index of the previous entry in the chain.
-    pub(crate) prev_pos: PrevPos,
-    /// The position of the bucket in the [`CoreHashMap`] bucket array.
-    pub(crate) bucket_pos: u32,
-}
-
-impl<K, V> OccupiedEntry<'_, '_, K, V> {
-    pub fn get(&self) -> &V {
-        &self.map.buckets[self.bucket_pos as usize]
-            .inner
-            .as_ref()
-            .unwrap()
-            .1
-    }
-
-    pub fn get_mut(&mut self) -> &mut V {
-        &mut self.map.buckets[self.bucket_pos as usize]
-            .inner
-            .as_mut()
-            .unwrap()
-            .1
-    }
-
-    /// Inserts a value into the entry, replacing (and returning) the existing value.
-    pub fn insert(&mut self, value: V) -> V {
-        let bucket = &mut self.map.buckets[self.bucket_pos as usize];
-        // This assumes inner is Some, which it must be for an OccupiedEntry
-        mem::replace(&mut bucket.inner.as_mut().unwrap().1, value)
-    }
-
-    /// Removes the entry from the hash map, returning the value originally stored within it.
-    ///
-    /// This may result in multiple bucket accesses if the entry was obtained by index as the
-    /// previous chain entry needs to be discovered in this case.
-    pub fn remove(mut self) -> V {
-        // If this bucket was queried by index, go ahead and follow its chain from the start.
-        let prev = if let PrevPos::Unknown(hash) = self.prev_pos {
-            let dict_idx = hash as usize % self.map.dictionary.len();
-            let mut prev = PrevPos::First(dict_idx as u32);
-            let mut curr = self.map.dictionary[dict_idx];
-            while curr != self.bucket_pos {
-                assert!(curr != INVALID_POS);
-                prev = PrevPos::Chained(curr);
-                curr = self.map.buckets[curr as usize].next;
-            }
-            prev
-        } else {
-            self.prev_pos
-        };
-
-        // CoreHashMap::remove returns Option<(K, V)>. We know it's Some for an OccupiedEntry.
-        let bucket = &mut self.map.buckets[self.bucket_pos as usize];
-
-        // unlink it from the chain
-        match prev {
-            PrevPos::First(dict_pos) => {
-                self.map.dictionary[dict_pos as usize] = bucket.next;
-            }
-            PrevPos::Chained(bucket_pos) => {
-                self.map.buckets[bucket_pos as usize].next = bucket.next;
-            }
-            _ => unreachable!(),
-        }
-
-        // and add it to the freelist
-        let free = self.map.free_head;
-        let bucket = &mut self.map.buckets[self.bucket_pos as usize];
-        let old_value = bucket.inner.take();
-        bucket.next = free;
-        self.map.free_head = self.bucket_pos;
-        self.map.buckets_in_use -= 1;
-
-        old_value.unwrap().1
-    }
-}
-
-/// An abstract view into a vacant entry within the map.
-pub struct VacantEntry<'a, 'b, K, V> {
-    /// Mutable reference to the map containing this entry.
-    pub(crate) map: RwLockWriteGuard<'b, CoreHashMap<'a, K, V>>,
-    /// The key to be inserted into this entry.
-    pub(crate) key: K,
-    /// The position within the dictionary corresponding to the key's hash.
-    pub(crate) dict_pos: u32,
-}
-
-impl<'b, K: Clone + Hash + Eq, V> VacantEntry<'_, 'b, K, V> {
-    /// Insert a value into the vacant entry, finding and populating an empty bucket in the process.
-    ///
-    /// # Errors
-    /// Will return [`FullError`] if there are no unoccupied buckets in the map.
-    pub fn insert(mut self, value: V) -> Result<ValueWriteGuard<'b, V>, FullError> {
-        let pos = self.map.alloc_bucket(self.key, value)?;
-        self.map.buckets[pos as usize].next = self.map.dictionary[self.dict_pos as usize];
-        self.map.dictionary[self.dict_pos as usize] = pos;
-
-        Ok(RwLockWriteGuard::map(self.map, |m| {
-            &mut m.buckets[pos as usize].inner.as_mut().unwrap().1
-        }))
-    }
-}
--- a/libs/neon-shmem/src/hash/tests.rs
+++ b/libs/neon-shmem/src/hash/tests.rs
@@ -1,428 +0,0 @@
-use std::collections::BTreeMap;
-use std::collections::HashSet;
-use std::fmt::Debug;
-use std::mem::MaybeUninit;
-
-use crate::hash::Entry;
-use crate::hash::HashMapAccess;
-use crate::hash::HashMapInit;
-use crate::hash::core::FullError;
-
-use rand::seq::SliceRandom;
-use rand::{Rng, RngCore};
-use rand_distr::Zipf;
-
-const TEST_KEY_LEN: usize = 16;
-
-#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
-struct TestKey([u8; TEST_KEY_LEN]);
-
-impl From<&TestKey> for u128 {
-    fn from(val: &TestKey) -> u128 {
-        u128::from_be_bytes(val.0)
-    }
-}
-
-impl From<u128> for TestKey {
-    fn from(val: u128) -> TestKey {
-        TestKey(val.to_be_bytes())
-    }
-}
-
-impl<'a> From<&'a [u8]> for TestKey {
-    fn from(bytes: &'a [u8]) -> TestKey {
-        TestKey(bytes.try_into().unwrap())
-    }
-}
-
-fn test_inserts<K: Into<TestKey> + Copy>(keys: &[K]) {
-    let w = HashMapInit::<TestKey, usize>::new_resizeable_named(100000, 120000, "test_inserts")
-        .attach_writer();
-
-    for (idx, k) in keys.iter().enumerate() {
-        let res = w.entry((*k).into());
-        match res {
-            Entry::Occupied(mut e) => {
-                e.insert(idx);
-            }
-            Entry::Vacant(e) => {
-                let res = e.insert(idx);
-                assert!(res.is_ok());
-            }
-        };
-    }
-
-    for (idx, k) in keys.iter().enumerate() {
-        let x = w.get(&(*k).into());
-        let value = x.as_deref().copied();
-        assert_eq!(value, Some(idx));
-    }
-}
-
-#[test]
-fn dense() {
-    // This exercises splitting a node with prefix
-    let keys: &[u128] = &[0, 1, 2, 3, 256];
-    test_inserts(keys);
-
-    // Dense keys
-    let mut keys: Vec<u128> = (0..10000).collect();
-    test_inserts(&keys);
-
-    // Do the same in random orders
-    for _ in 1..10 {
-        keys.shuffle(&mut rand::rng());
-        test_inserts(&keys);
-    }
-}
-
-#[test]
-fn sparse() {
-    // sparse keys
-    let mut keys: Vec<TestKey> = Vec::new();
-    let mut used_keys = HashSet::new();
-    for _ in 0..10000 {
-        loop {
-            let key = rand::random::<u128>();
-            if used_keys.contains(&key) {
-                continue;
-            }
-            used_keys.insert(key);
-            keys.push(key.into());
-            break;
-        }
-    }
-    test_inserts(&keys);
-}
-
-#[derive(Clone, Debug)]
-struct TestOp(TestKey, Option<usize>);
-
-fn apply_op(
-    op: &TestOp,
-    map: &mut HashMapAccess<TestKey, usize>,
-    shadow: &mut BTreeMap<TestKey, usize>,
-) {
-    // apply the change to the shadow tree first
-    let shadow_existing = if let Some(v) = op.1 {
-        shadow.insert(op.0, v)
-    } else {
-        shadow.remove(&op.0)
-    };
-
-    let entry = map.entry(op.0);
-    let hash_existing = match op.1 {
-        Some(new) => match entry {
-            Entry::Occupied(mut e) => Some(e.insert(new)),
-            Entry::Vacant(e) => {
-                _ = e.insert(new).unwrap();
-                None
-            }
-        },
-        None => match entry {
-            Entry::Occupied(e) => Some(e.remove()),
-            Entry::Vacant(_) => None,
-        },
-    };
-
-    assert_eq!(shadow_existing, hash_existing);
-}
-
-fn do_random_ops(
-    num_ops: usize,
-    size: u32,
-    del_prob: f64,
-    writer: &mut HashMapAccess<TestKey, usize>,
-    shadow: &mut BTreeMap<TestKey, usize>,
-    rng: &mut rand::rngs::ThreadRng,
-) {
-    for i in 0..num_ops {
-        let key: TestKey = ((rng.next_u32() % size) as u128).into();
-        let op = TestOp(
-            key,
-            if rng.random_bool(del_prob) {
-                Some(i)
-            } else {
-                None
-            },
-        );
-        apply_op(&op, writer, shadow);
-    }
-}
-
-fn do_deletes(
-    num_ops: usize,
-    writer: &mut HashMapAccess<TestKey, usize>,
-    shadow: &mut BTreeMap<TestKey, usize>,
-) {
-    for _ in 0..num_ops {
-        let (k, _) = shadow.pop_first().unwrap();
-        writer.remove(&k);
-    }
-}
-
-fn do_shrink(
-    writer: &mut HashMapAccess<TestKey, usize>,
-    shadow: &mut BTreeMap<TestKey, usize>,
-    from: u32,
-    to: u32,
-) {
-    assert!(writer.shrink_goal().is_none());
-    writer.begin_shrink(to);
-    assert_eq!(writer.shrink_goal(), Some(to as usize));
-    for i in to..from {
-        if let Some(entry) = writer.entry_at_bucket(i as usize) {
-            shadow.remove(&entry._key);
-            entry.remove();
-        }
-    }
-    let old_usage = writer.get_num_buckets_in_use();
-    writer.finish_shrink().unwrap();
-    assert!(writer.shrink_goal().is_none());
-    assert_eq!(writer.get_num_buckets_in_use(), old_usage);
-}
-
-#[test]
-fn random_ops() {
-    let mut writer =
-        HashMapInit::<TestKey, usize>::new_resizeable_named(100000, 120000, "test_random")
-            .attach_writer();
-    let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
-
-    let distribution = Zipf::new(u128::MAX as f64, 1.1).unwrap();
-    let mut rng = rand::rng();
-    for i in 0..100000 {
-        let key: TestKey = (rng.sample(distribution) as u128).into();
-
-        let op = TestOp(key, if rng.random_bool(0.75) { Some(i) } else { None });
-
-        apply_op(&op, &mut writer, &mut shadow);
-    }
-}
-
-#[test]
-fn test_shuffle() {
-    let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1000, 1200, "test_shuf")
-        .attach_writer();
-    let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
-    let mut rng = rand::rng();
-
-    do_random_ops(10000, 1000, 0.75, &mut writer, &mut shadow, &mut rng);
-    writer.shuffle();
-    do_random_ops(10000, 1000, 0.75, &mut writer, &mut shadow, &mut rng);
-}
-
-#[test]
-fn test_grow() {
-    let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1000, 2000, "test_grow")
-        .attach_writer();
-    let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
-    let mut rng = rand::rng();
-
-    do_random_ops(10000, 1000, 0.75, &mut writer, &mut shadow, &mut rng);
-    let old_usage = writer.get_num_buckets_in_use();
-    writer.grow(1500).unwrap();
-    assert_eq!(writer.get_num_buckets_in_use(), old_usage);
-    assert_eq!(writer.get_num_buckets(), 1500);
-    do_random_ops(10000, 1500, 0.75, &mut writer, &mut shadow, &mut rng);
-}
-
-#[test]
-fn test_clear() {
-    let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2000, "test_clear")
-        .attach_writer();
-    let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
-    let mut rng = rand::rng();
-    do_random_ops(2000, 1500, 0.75, &mut writer, &mut shadow, &mut rng);
-    writer.clear();
-    assert_eq!(writer.get_num_buckets_in_use(), 0);
-    assert_eq!(writer.get_num_buckets(), 1500);
-    while let Some((key, _)) = shadow.pop_first() {
-        assert!(writer.get(&key).is_none());
-    }
-    do_random_ops(2000, 1500, 0.75, &mut writer, &mut shadow, &mut rng);
-    for i in 0..(1500 - writer.get_num_buckets_in_use()) {
-        writer.insert((1500 + i as u128).into(), 0).unwrap();
-    }
-    assert_eq!(writer.insert(5000.into(), 0), Err(FullError {}));
-    writer.clear();
-    assert!(writer.insert(5000.into(), 0).is_ok());
-}
-
-#[test]
-fn test_idx_remove() {
-    let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2000, "test_clear")
-        .attach_writer();
-    let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
-    let mut rng = rand::rng();
-    do_random_ops(2000, 1500, 0.25, &mut writer, &mut shadow, &mut rng);
-    for _ in 0..100 {
-        let idx = (rng.next_u32() % 1500) as usize;
-        if let Some(e) = writer.entry_at_bucket(idx) {
-            shadow.remove(&e._key);
-            e.remove();
-        }
-    }
-    while let Some((key, val)) = shadow.pop_first() {
-        assert_eq!(*writer.get(&key).unwrap(), val);
-    }
-}
-
-#[test]
-fn test_idx_get() {
-    let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2000, "test_clear")
-        .attach_writer();
-    let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
-    let mut rng = rand::rng();
-    do_random_ops(2000, 1500, 0.25, &mut writer, &mut shadow, &mut rng);
-    for _ in 0..100 {
-        let idx = (rng.next_u32() % 1500) as usize;
-        if let Some(pair) = writer.get_at_bucket(idx) {
-            {
-                let v: *const usize = &pair.1;
-                assert_eq!(writer.get_bucket_for_value(v), idx);
-            }
-            {
-                let v: *const usize = &pair.1;
-                assert_eq!(writer.get_bucket_for_value(v), idx);
-            }
-        }
-    }
-}
-
-#[test]
-fn test_shrink() {
-    let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2000, "test_shrink")
-        .attach_writer();
-    let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
-    let mut rng = rand::rng();
-
-    do_random_ops(10000, 1500, 0.75, &mut writer, &mut shadow, &mut rng);
-    do_shrink(&mut writer, &mut shadow, 1500, 1000);
-    assert_eq!(writer.get_num_buckets(), 1000);
-    do_deletes(500, &mut writer, &mut shadow);
-    do_random_ops(10000, 500, 0.75, &mut writer, &mut shadow, &mut rng);
-    assert!(writer.get_num_buckets_in_use() <= 1000);
-}
-
-#[test]
-fn test_shrink_grow_seq() {
-    let mut writer =
-        HashMapInit::<TestKey, usize>::new_resizeable_named(1000, 20000, "test_grow_seq")
-            .attach_writer();
-    let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
-    let mut rng = rand::rng();
-
-    do_random_ops(500, 1000, 0.1, &mut writer, &mut shadow, &mut rng);
-    eprintln!("Shrinking to 750");
-    do_shrink(&mut writer, &mut shadow, 1000, 750);
-    do_random_ops(200, 1000, 0.5, &mut writer, &mut shadow, &mut rng);
-    eprintln!("Growing to 1500");
-    writer.grow(1500).unwrap();
-    do_random_ops(600, 1500, 0.1, &mut writer, &mut shadow, &mut rng);
-    eprintln!("Shrinking to 200");
-    while shadow.len() > 100 {
-        do_deletes(1, &mut writer, &mut shadow);
-    }
-    do_shrink(&mut writer, &mut shadow, 1500, 200);
-    do_random_ops(50, 1500, 0.25, &mut writer, &mut shadow, &mut rng);
-    eprintln!("Growing to 10k");
-    writer.grow(10000).unwrap();
-    do_random_ops(10000, 5000, 0.25, &mut writer, &mut shadow, &mut rng);
-}
-
-#[test]
-fn test_bucket_ops() {
-    let writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1000, 1200, "test_bucket_ops")
-        .attach_writer();
-    match writer.entry(1.into()) {
-        Entry::Occupied(mut e) => {
-            e.insert(2);
-        }
-        Entry::Vacant(e) => {
-            _ = e.insert(2).unwrap();
-        }
-    }
-    assert_eq!(writer.get_num_buckets_in_use(), 1);
-    assert_eq!(writer.get_num_buckets(), 1000);
-    assert_eq!(*writer.get(&1.into()).unwrap(), 2);
-    let pos = match writer.entry(1.into()) {
-        Entry::Occupied(e) => {
-            assert_eq!(e._key, 1.into());
-            e.bucket_pos as usize
-        }
-        Entry::Vacant(_) => {
-            panic!("Insert didn't affect entry");
-        }
-    };
-    assert_eq!(writer.entry_at_bucket(pos).unwrap()._key, 1.into());
-    assert_eq!(*writer.get_at_bucket(pos).unwrap(), (1.into(), 2));
-    {
-        let ptr: *const usize = &*writer.get(&1.into()).unwrap();
-        assert_eq!(writer.get_bucket_for_value(ptr), pos);
-    }
-    writer.remove(&1.into());
-    assert!(writer.get(&1.into()).is_none());
-}
-
-#[test]
-fn test_shrink_zero() {
-    let mut writer =
-        HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2000, "test_shrink_zero")
-            .attach_writer();
-    writer.begin_shrink(0);
-    for i in 0..1500 {
-        writer.entry_at_bucket(i).map(|x| x.remove());
-    }
-    writer.finish_shrink().unwrap();
-    assert_eq!(writer.get_num_buckets_in_use(), 0);
-    let entry = writer.entry(1.into());
-    if let Entry::Vacant(v) = entry {
-        assert!(v.insert(2).is_err());
-    } else {
-        panic!("Somehow got non-vacant entry in empty map.")
-    }
-    writer.grow(50).unwrap();
-    let entry = writer.entry(1.into());
-    if let Entry::Vacant(v) = entry {
-        assert!(v.insert(2).is_ok());
-    } else {
-        panic!("Somehow got non-vacant entry in empty map.")
-    }
-    assert_eq!(writer.get_num_buckets_in_use(), 1);
-}
-
-#[test]
-#[should_panic]
-fn test_grow_oom() {
-    let writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2000, "test_grow_oom")
-        .attach_writer();
-    writer.grow(20000).unwrap();
-}
-
-#[test]
-#[should_panic]
-fn test_shrink_bigger() {
-    let mut writer =
-        HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2500, "test_shrink_bigger")
-            .attach_writer();
-    writer.begin_shrink(2000);
-}
-
-#[test]
-#[should_panic]
-fn test_shrink_early_finish() {
-    let writer =
-        HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2500, "test_shrink_early_finish")
-            .attach_writer();
-    writer.finish_shrink().unwrap();
-}
-
-#[test]
-#[should_panic]
-fn test_shrink_fixed_size() {
-    let mut area = [MaybeUninit::uninit(); 10000];
-    let init_struct = HashMapInit::<TestKey, usize>::with_fixed(3, &mut area);
-    let mut writer = init_struct.attach_writer();
-    writer.begin_shrink(1);
-}
--- a/libs/neon-shmem/src/lib.rs
+++ b/libs/neon-shmem/src/lib.rs
@@ -1,3 +1,418 @@
-pub mod hash;
-pub mod shmem;
-pub mod sync;
+//! Shared memory utilities for neon communicator
+
+use std::num::NonZeroUsize;
+use std::os::fd::{AsFd, BorrowedFd, OwnedFd};
+use std::ptr::NonNull;
+use std::sync::atomic::{AtomicUsize, Ordering};
+
+use nix::errno::Errno;
+use nix::sys::mman::MapFlags;
+use nix::sys::mman::ProtFlags;
+use nix::sys::mman::mmap as nix_mmap;
+use nix::sys::mman::munmap as nix_munmap;
+use nix::unistd::ftruncate as nix_ftruncate;
+
+/// ShmemHandle represents a shared memory area that can be shared by processes over fork().
+/// Unlike shared memory allocated by Postgres, this area is resizable, up to 'max_size' that's
+/// specified at creation.
+///
+/// The area is backed by an anonymous file created with memfd_create(). The full address space for
+/// 'max_size' is reserved up-front with mmap(), but whenever you call [`ShmemHandle::set_size`],
+/// the underlying file is resized. Do not access the area beyond the current size. Currently, that
+/// will cause the file to be expanded, but we might use mprotect() etc. to enforce that in the
+/// future.
+pub struct ShmemHandle {
+    /// memfd file descriptor
+    fd: OwnedFd,
+
+    max_size: usize,
+
+    // Pointer to the beginning of the shared memory area. The header is stored there.
+    shared_ptr: NonNull<SharedStruct>,
+
+    // Pointer to the beginning of the user data
+    pub data_ptr: NonNull<u8>,
+}
+
+/// This is stored at the beginning in the shared memory area.
+struct SharedStruct {
+    max_size: usize,
+
+    /// Current size of the backing file. The high-order bit is used for the RESIZE_IN_PROGRESS flag
+    current_size: AtomicUsize,
+}
+
+const RESIZE_IN_PROGRESS: usize = 1 << 63;
+
+const HEADER_SIZE: usize = std::mem::size_of::<SharedStruct>();
+
+/// Error type returned by the ShmemHandle functions.
+#[derive(thiserror::Error, Debug)]
+#[error("{msg}: {errno}")]
+pub struct Error {
+    pub msg: String,
+    pub errno: Errno,
+}
+
+impl Error {
+    fn new(msg: &str, errno: Errno) -> Error {
+        Error {
+            msg: msg.to_string(),
+            errno,
+        }
+    }
+}
+
+impl ShmemHandle {
+    /// Create a new shared memory area. To communicate between processes, the processes need to be
+    /// fork()'d after calling this, so that the ShmemHandle is inherited by all processes.
+    ///
+    /// If the ShmemHandle is dropped, the memory is unmapped from the current process. Other
+    /// processes can continue using it, however.
+    pub fn new(name: &str, initial_size: usize, max_size: usize) -> Result<ShmemHandle, Error> {
+        // create the backing anonymous file.
+        let fd = create_backing_file(name)?;
+
+        Self::new_with_fd(fd, initial_size, max_size)
+    }
+
+    fn new_with_fd(
+        fd: OwnedFd,
+        initial_size: usize,
+        max_size: usize,
+    ) -> Result<ShmemHandle, Error> {
+        // We reserve the high-order bit for the RESIZE_IN_PROGRESS flag, and the actual size
+        // is a little larger than this because of the SharedStruct header. Make the upper limit
+        // somewhat smaller than that, because with anything close to that, you'll run out of
+        // memory anyway.
+        if max_size >= 1 << 48 {
+            panic!("max size {max_size} too large");
+        }
+        if initial_size > max_size {
+            panic!("initial size {initial_size} larger than max size {max_size}");
+        }
+
+        // The actual initial / max size is the one given by the caller, plus the size of
+        // 'SharedStruct'.
+        let initial_size = HEADER_SIZE + initial_size;
+        let max_size = NonZeroUsize::new(HEADER_SIZE + max_size).unwrap();
+
+        // Reserve address space for it with mmap
+        //
+        // TODO: Use MAP_HUGETLB if possible
+        let start_ptr = unsafe {
+            nix_mmap(
+                None,
+                max_size,
+                ProtFlags::PROT_READ | ProtFlags::PROT_WRITE,
+                MapFlags::MAP_SHARED,
+                &fd,
+                0,
+            )
+        }
+        .map_err(|e| Error::new("mmap failed: {e}", e))?;
+
+        // Reserve space for the initial size
+        enlarge_file(fd.as_fd(), initial_size as u64)?;
+
+        // Initialize the header
+        let shared: NonNull<SharedStruct> = start_ptr.cast();
+        unsafe {
+            shared.write(SharedStruct {
+                max_size: max_size.into(),
+                current_size: AtomicUsize::new(initial_size),
+            })
+        };
+
+        // The user data begins after the header
+        let data_ptr = unsafe { start_ptr.cast().add(HEADER_SIZE) };
+
+        Ok(ShmemHandle {
+            fd,
+            max_size: max_size.into(),
+            shared_ptr: shared,
+            data_ptr,
+        })
+    }
+
+    // return reference to the header
+    fn shared(&self) -> &SharedStruct {
+        unsafe { self.shared_ptr.as_ref() }
+    }
+
+    /// Resize the shared memory area. 'new_size' must not be larger than the 'max_size' specified
+    /// when creating the area.
+    ///
+    /// This may only be called from one process/thread concurrently. We detect that case
+    /// and return an Error.
+    pub fn set_size(&self, new_size: usize) -> Result<(), Error> {
+        let new_size = new_size + HEADER_SIZE;
+        let shared = self.shared();
+
+        if new_size > self.max_size {
+            panic!(
+                "new size ({} is greater than max size ({})",
+                new_size, self.max_size
+            );
+        }
+        assert_eq!(self.max_size, shared.max_size);
+
+        // Lock the area by setting the bit in 'current_size'
+        //
+        // Ordering::Relaxed would probably be sufficient here, as we don't access any other memory
+        // and the posix_fallocate/ftruncate call is surely a synchronization point anyway. But
+        // since this is not performance-critical, better safe than sorry .
+        let mut old_size = shared.current_size.load(Ordering::Acquire);
+        loop {
+            if (old_size & RESIZE_IN_PROGRESS) != 0 {
+                return Err(Error::new(
+                    "concurrent resize detected",
+                    Errno::UnknownErrno,
+                ));
+            }
+            match shared.current_size.compare_exchange(
+                old_size,
+                new_size,
+                Ordering::Acquire,
+                Ordering::Relaxed,
+            ) {
+                Ok(_) => break,
+                Err(x) => old_size = x,
+            }
+        }
+
+        // Ok, we got the lock.
+        //
+        // NB: If anything goes wrong, we *must* clear the bit!
+        let result = {
+            use std::cmp::Ordering::{Equal, Greater, Less};
+            match new_size.cmp(&old_size) {
+                Less => nix_ftruncate(&self.fd, new_size as i64).map_err(|e| {
+                    Error::new("could not shrink shmem segment, ftruncate failed: {e}", e)
+                }),
+                Equal => Ok(()),
+                Greater => enlarge_file(self.fd.as_fd(), new_size as u64),
+            }
+        };
+
+        // Unlock
+        shared.current_size.store(
+            if result.is_ok() { new_size } else { old_size },
+            Ordering::Release,
+        );
+
+        result
+    }
+
+    /// Returns the current user-visible size of the shared memory segment.
+    ///
+    /// NOTE: a concurrent set_size() call can change the size at any time. It is the caller's
+    /// responsibility not to access the area beyond the current size.
+    pub fn current_size(&self) -> usize {
+        let total_current_size =
+            self.shared().current_size.load(Ordering::Relaxed) & !RESIZE_IN_PROGRESS;
+        total_current_size - HEADER_SIZE
+    }
+}
+
+impl Drop for ShmemHandle {
+    fn drop(&mut self) {
+        // SAFETY: The pointer was obtained from mmap() with the given size.
+        // We unmap the entire region.
+        let _ = unsafe { nix_munmap(self.shared_ptr.cast(), self.max_size) };
+        // The fd is dropped automatically by OwnedFd.
+    }
+}
+
+/// Create a "backing file" for the shared memory area. On Linux, use memfd_create(), to create an
+/// anonymous in-memory file. One macos, fall back to a regular file. That's good enough for
+/// development and testing, but in production we want the file to stay in memory.
+///
+/// disable 'unused_variables' warnings, because in the macos path, 'name' is unused.
+#[allow(unused_variables)]
+fn create_backing_file(name: &str) -> Result<OwnedFd, Error> {
+    #[cfg(not(target_os = "macos"))]
+    {
+        nix::sys::memfd::memfd_create(name, nix::sys::memfd::MFdFlags::empty())
+            .map_err(|e| Error::new("memfd_create failed: {e}", e))
+    }
+    #[cfg(target_os = "macos")]
+    {
+        let file = tempfile::tempfile().map_err(|e| {
+            Error::new(
+                "could not create temporary file to back shmem area: {e}",
+                nix::errno::Errno::from_raw(e.raw_os_error().unwrap_or(0)),
+            )
+        })?;
+        Ok(OwnedFd::from(file))
+    }
+}
+
+fn enlarge_file(fd: BorrowedFd, size: u64) -> Result<(), Error> {
+    // Use posix_fallocate() to enlarge the file. It reserves the space correctly, so that
+    // we don't get a segfault later when trying to actually use it.
+    #[cfg(not(target_os = "macos"))]
+    {
+        nix::fcntl::posix_fallocate(fd, 0, size as i64).map_err(|e| {
+            Error::new(
+                "could not grow shmem segment, posix_fallocate failed: {e}",
+                e,
+            )
+        })
+    }
+    // As a fallback on macos, which doesn't have posix_fallocate, use plain 'fallocate'
+    #[cfg(target_os = "macos")]
+    {
+        nix::unistd::ftruncate(fd, size as i64)
+            .map_err(|e| Error::new("could not grow shmem segment, ftruncate failed: {e}", e))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    use nix::unistd::ForkResult;
+    use std::ops::Range;
+
+    /// check that all bytes in given range have the expected value.
+    fn assert_range(ptr: *const u8, expected: u8, range: Range<usize>) {
+        for i in range {
+            let b = unsafe { *(ptr.add(i)) };
+            assert_eq!(expected, b, "unexpected byte at offset {i}");
+        }
+    }
+
+    /// Write 'b' to all bytes in the given range
+    fn write_range(ptr: *mut u8, b: u8, range: Range<usize>) {
+        unsafe { std::ptr::write_bytes(ptr.add(range.start), b, range.end - range.start) };
+    }
+
+    // simple single-process test of growing and shrinking
+    #[test]
+    fn test_shmem_resize() -> Result<(), Error> {
+        let max_size = 1024 * 1024;
+        let init_struct = ShmemHandle::new("test_shmem_resize", 0, max_size)?;
+
+        assert_eq!(init_struct.current_size(), 0);
+
+        // Initial grow
+        let size1 = 10000;
+        init_struct.set_size(size1).unwrap();
+        assert_eq!(init_struct.current_size(), size1);
+
+        // Write some data
+        let data_ptr = init_struct.data_ptr.as_ptr();
+        write_range(data_ptr, 0xAA, 0..size1);
+        assert_range(data_ptr, 0xAA, 0..size1);
+
+        // Shrink
+        let size2 = 5000;
+        init_struct.set_size(size2).unwrap();
+        assert_eq!(init_struct.current_size(), size2);
+
+        // Grow again
+        let size3 = 20000;
+        init_struct.set_size(size3).unwrap();
+        assert_eq!(init_struct.current_size(), size3);
+
+        // Try to read it. The area that was shrunk and grown again should read as all zeros now
+        assert_range(data_ptr, 0xAA, 0..5000);
+        assert_range(data_ptr, 0, 5000..size1);
+
+        // Try to grow beyond max_size
+        //let size4 = max_size + 1;
+        //assert!(init_struct.set_size(size4).is_err());
+
+        // Dropping init_struct should unmap the memory
+        drop(init_struct);
+
+        Ok(())
+    }
+
+    /// This is used in tests to coordinate between test processes. It's like std::sync::Barrier,
+    /// but is stored in the shared memory area and works across processes. It's implemented by
+    /// polling, because e.g. standard rust mutexes are not guaranteed to work across processes.
+    struct SimpleBarrier {
+        num_procs: usize,
+        count: AtomicUsize,
+    }
+
+    impl SimpleBarrier {
+        unsafe fn init(ptr: *mut SimpleBarrier, num_procs: usize) {
+            unsafe {
+                *ptr = SimpleBarrier {
+                    num_procs,
+                    count: AtomicUsize::new(0),
+                }
+            }
+        }
+
+        pub fn wait(&self) {
+            let old = self.count.fetch_add(1, Ordering::Relaxed);
+
+            let generation = old / self.num_procs;
+
+            let mut current = old + 1;
+            while current < (generation + 1) * self.num_procs {
+                std::thread::sleep(std::time::Duration::from_millis(10));
+                current = self.count.load(Ordering::Relaxed);
+            }
+        }
+    }
+
+    #[test]
+    fn test_multi_process() {
+        // Initialize
+        let max_size = 1_000_000_000_000;
+        let init_struct = ShmemHandle::new("test_multi_process", 0, max_size).unwrap();
+        let ptr = init_struct.data_ptr.as_ptr();
+
+        // Store the SimpleBarrier in the first 1k of the area.
+        init_struct.set_size(10000).unwrap();
+        let barrier_ptr: *mut SimpleBarrier = unsafe {
+            ptr.add(ptr.align_offset(std::mem::align_of::<SimpleBarrier>()))
+                .cast()
+        };
+        unsafe { SimpleBarrier::init(barrier_ptr, 2) };
+        let barrier = unsafe { barrier_ptr.as_ref().unwrap() };
+
+        // Fork another test process. The code after this runs in both processes concurrently.
+        let fork_result = unsafe { nix::unistd::fork().unwrap() };
+
+        // In the parent, fill bytes between 1000..2000. In the child, between 2000..3000
+        if fork_result.is_parent() {
+            write_range(ptr, 0xAA, 1000..2000);
+        } else {
+            write_range(ptr, 0xBB, 2000..3000);
+        }
+        barrier.wait();
+        // Verify the contents. (in both processes)
+        assert_range(ptr, 0xAA, 1000..2000);
+        assert_range(ptr, 0xBB, 2000..3000);
+
+        // Grow, from the child this time
+        let size = 10_000_000;
+        if !fork_result.is_parent() {
+            init_struct.set_size(size).unwrap();
+        }
+        barrier.wait();
+
+        // make some writes at the end
+        if fork_result.is_parent() {
+            write_range(ptr, 0xAA, (size - 10)..size);
+        } else {
+            write_range(ptr, 0xBB, (size - 20)..(size - 10));
+        }
+        barrier.wait();
+
+        // Verify the contents. (This runs in both processes)
+        assert_range(ptr, 0, (size - 1000)..(size - 20));
+        assert_range(ptr, 0xBB, (size - 20)..(size - 10));
+        assert_range(ptr, 0xAA, (size - 10)..size);
+
+        if let ForkResult::Parent { child } = fork_result {
+            nix::sys::wait::waitpid(child, None).unwrap();
+        }
+    }
+}
--- a/libs/neon-shmem/src/shmem.rs
+++ b/libs/neon-shmem/src/shmem.rs
@@ -1,411 +0,0 @@
-//! Dynamically resizable contiguous chunk of shared memory
-
-use std::num::NonZeroUsize;
-use std::os::fd::{AsFd, BorrowedFd, OwnedFd};
-use std::ptr::NonNull;
-use std::sync::atomic::{AtomicUsize, Ordering};
-
-use nix::errno::Errno;
-use nix::sys::mman::MapFlags;
-use nix::sys::mman::ProtFlags;
-use nix::sys::mman::mmap as nix_mmap;
-use nix::sys::mman::munmap as nix_munmap;
-use nix::unistd::ftruncate as nix_ftruncate;
-
-/// `ShmemHandle` represents a shared memory area that can be shared by processes over `fork()`.
-/// Unlike shared memory allocated by Postgres, this area is resizable, up to `max_size` that's
-/// specified at creation.
-///
-/// The area is backed by an anonymous file created with `memfd_create()`. The full address space for
-/// `max_size` is reserved up-front with `mmap()`, but whenever you call [`ShmemHandle::set_size`],
-/// the underlying file is resized. Do not access the area beyond the current size. Currently, that
-/// will cause the file to be expanded, but we might use `mprotect()` etc. to enforce that in the
-/// future.
-#[derive(Debug)]
-pub struct ShmemHandle {
-    /// memfd file descriptor
-    fd: OwnedFd,
-
-    max_size: usize,
-
-    // Pointer to the beginning of the shared memory area. The header is stored there.
-    shared_ptr: NonNull<SharedStruct>,
-
-    // Pointer to the beginning of the user data
-    pub data_ptr: NonNull<u8>,
-}
-
-/// This is stored at the beginning in the shared memory area.
-#[derive(Debug)]
-struct SharedStruct {
-    max_size: usize,
-
-    /// Current size of the backing file. The high-order bit is used for the [`RESIZE_IN_PROGRESS`] flag.
-    current_size: AtomicUsize,
-}
-
-const RESIZE_IN_PROGRESS: usize = 1 << 63;
-
-const HEADER_SIZE: usize = std::mem::size_of::<SharedStruct>();
-
-/// Error type returned by the [`ShmemHandle`] functions.
-#[derive(thiserror::Error, Debug)]
-#[error("{msg}: {errno}")]
-pub struct Error {
-    pub msg: String,
-    pub errno: Errno,
-}
-
-impl Error {
-    fn new(msg: &str, errno: Errno) -> Self {
-        Self {
-            msg: msg.to_string(),
-            errno,
-        }
-    }
-}
-
-impl ShmemHandle {
-    /// Create a new shared memory area. To communicate between processes, the processes need to be
-    /// `fork()`'d after calling this, so that the `ShmemHandle` is inherited by all processes.
-    ///
-    /// If the `ShmemHandle` is dropped, the memory is unmapped from the current process. Other
-    /// processes can continue using it, however.
-    pub fn new(name: &str, initial_size: usize, max_size: usize) -> Result<Self, Error> {
-        // create the backing anonymous file.
-        let fd = create_backing_file(name)?;
-
-        Self::new_with_fd(fd, initial_size, max_size)
-    }
-
-    fn new_with_fd(fd: OwnedFd, initial_size: usize, max_size: usize) -> Result<Self, Error> {
-        // We reserve the high-order bit for the `RESIZE_IN_PROGRESS` flag, and the actual size
-        // is a little larger than this because of the SharedStruct header. Make the upper limit
-        // somewhat smaller than that, because with anything close to that, you'll run out of
-        // memory anyway.
-        assert!(max_size < 1 << 48, "max size {max_size} too large");
-
-        assert!(
-            initial_size <= max_size,
-            "initial size {initial_size} larger than max size {max_size}"
-        );
-
-        // The actual initial / max size is the one given by the caller, plus the size of
-        // 'SharedStruct'.
-        let initial_size = HEADER_SIZE + initial_size;
-        let max_size = NonZeroUsize::new(HEADER_SIZE + max_size).unwrap();
-
-        // Reserve address space for it with mmap
-        //
-        // TODO: Use MAP_HUGETLB if possible
-        let start_ptr = unsafe {
-            nix_mmap(
-                None,
-                max_size,
-                ProtFlags::PROT_READ | ProtFlags::PROT_WRITE,
-                MapFlags::MAP_SHARED,
-                &fd,
-                0,
-            )
-        }
-        .map_err(|e| Error::new("mmap failed", e))?;
-
-        // Reserve space for the initial size
-        enlarge_file(fd.as_fd(), initial_size as u64)?;
-
-        // Initialize the header
-        let shared: NonNull<SharedStruct> = start_ptr.cast();
-        unsafe {
-            shared.write(SharedStruct {
-                max_size: max_size.into(),
-                current_size: AtomicUsize::new(initial_size),
-            });
-        }
-
-        // The user data begins after the header
-        let data_ptr = unsafe { start_ptr.cast().add(HEADER_SIZE) };
-
-        Ok(Self {
-            fd,
-            max_size: max_size.into(),
-            shared_ptr: shared,
-            data_ptr,
-        })
-    }
-
-    // return reference to the header
-    fn shared(&self) -> &SharedStruct {
-        unsafe { self.shared_ptr.as_ref() }
-    }
-
-    /// Resize the shared memory area. `new_size` must not be larger than the `max_size` specified
-    /// when creating the area.
-    ///
-    /// This may only be called from one process/thread concurrently. We detect that case
-    /// and return an [`shmem::Error`](Error).
-    pub fn set_size(&self, new_size: usize) -> Result<(), Error> {
-        let new_size = new_size + HEADER_SIZE;
-        let shared = self.shared();
-
-        assert!(
-            new_size <= self.max_size,
-            "new size ({new_size}) is greater than max size ({})",
-            self.max_size
-        );
-
-        assert_eq!(self.max_size, shared.max_size);
-
-        // Lock the area by setting the bit in `current_size`
-        //
-        // Ordering::Relaxed would probably be sufficient here, as we don't access any other memory
-        // and the `posix_fallocate`/`ftruncate` call is surely a synchronization point anyway. But
-        // since this is not performance-critical, better safe than sorry.
-        let mut old_size = shared.current_size.load(Ordering::Acquire);
-        loop {
-            if (old_size & RESIZE_IN_PROGRESS) != 0 {
-                return Err(Error::new(
-                    "concurrent resize detected",
-                    Errno::UnknownErrno,
-                ));
-            }
-            match shared.current_size.compare_exchange(
-                old_size,
-                new_size,
-                Ordering::Acquire,
-                Ordering::Relaxed,
-            ) {
-                Ok(_) => break,
-                Err(x) => old_size = x,
-            }
-        }
-
-        // Ok, we got the lock.
-        //
-        // NB: If anything goes wrong, we *must* clear the bit!
-        let result = {
-            use std::cmp::Ordering::{Equal, Greater, Less};
-            match new_size.cmp(&old_size) {
-                Less => nix_ftruncate(&self.fd, new_size as i64)
-                    .map_err(|e| Error::new("could not shrink shmem segment, ftruncate failed", e)),
-                Equal => Ok(()),
-                Greater => enlarge_file(self.fd.as_fd(), new_size as u64),
-            }
-        };
-
-        // Unlock
-        shared.current_size.store(
-            if result.is_ok() { new_size } else { old_size },
-            Ordering::Release,
-        );
-
-        result
-    }
-
-    /// Returns the current user-visible size of the shared memory segment.
-    ///
-    /// NOTE: a concurrent [`ShmemHandle::set_size()`] call can change the size at any time.
-    /// It is the caller's responsibility not to access the area beyond the current size.
-    pub fn current_size(&self) -> usize {
-        let total_current_size =
-            self.shared().current_size.load(Ordering::Relaxed) & !RESIZE_IN_PROGRESS;
-        total_current_size - HEADER_SIZE
-    }
-}
-
-impl Drop for ShmemHandle {
-    fn drop(&mut self) {
-        // SAFETY: The pointer was obtained from mmap() with the given size.
-        // We unmap the entire region.
-        let _ = unsafe { nix_munmap(self.shared_ptr.cast(), self.max_size) };
-        // The fd is dropped automatically by OwnedFd.
-    }
-}
-
-/// Create a "backing file" for the shared memory area. On Linux, use `memfd_create()`, to create an
-/// anonymous in-memory file. One macos, fall back to a regular file. That's good enough for
-/// development and testing, but in production we want the file to stay in memory.
-///
-/// Disable unused variables warnings because `name` is unused in the macos path.
-#[allow(unused_variables)]
-fn create_backing_file(name: &str) -> Result<OwnedFd, Error> {
-    #[cfg(not(target_os = "macos"))]
-    {
-        nix::sys::memfd::memfd_create(name, nix::sys::memfd::MFdFlags::empty())
-            .map_err(|e| Error::new("memfd_create failed", e))
-    }
-    #[cfg(target_os = "macos")]
-    {
-        let file = tempfile::tempfile().map_err(|e| {
-            Error::new(
-                "could not create temporary file to back shmem area",
-                nix::errno::Errno::from_raw(e.raw_os_error().unwrap_or(0)),
-            )
-        })?;
-        Ok(OwnedFd::from(file))
-    }
-}
-
-fn enlarge_file(fd: BorrowedFd, size: u64) -> Result<(), Error> {
-    // Use posix_fallocate() to enlarge the file. It reserves the space correctly, so that
-    // we don't get a segfault later when trying to actually use it.
-    #[cfg(not(target_os = "macos"))]
-    {
-        nix::fcntl::posix_fallocate(fd, 0, size as i64)
-            .map_err(|e| Error::new("could not grow shmem segment, posix_fallocate failed", e))
-    }
-    // As a fallback on macos, which doesn't have posix_fallocate, use plain 'fallocate'
-    #[cfg(target_os = "macos")]
-    {
-        nix::unistd::ftruncate(fd, size as i64)
-            .map_err(|e| Error::new("could not grow shmem segment, ftruncate failed", e))
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use nix::unistd::ForkResult;
-    use std::ops::Range;
-
-    /// check that all bytes in given range have the expected value.
-    fn assert_range(ptr: *const u8, expected: u8, range: Range<usize>) {
-        for i in range {
-            let b = unsafe { *(ptr.add(i)) };
-            assert_eq!(expected, b, "unexpected byte at offset {i}");
-        }
-    }
-
-    /// Write 'b' to all bytes in the given range
-    fn write_range(ptr: *mut u8, b: u8, range: Range<usize>) {
-        unsafe { std::ptr::write_bytes(ptr.add(range.start), b, range.end - range.start) };
-    }
-
-    // simple single-process test of growing and shrinking
-    #[test]
-    fn test_shmem_resize() -> Result<(), Error> {
-        let max_size = 1024 * 1024;
-        let init_struct = ShmemHandle::new("test_shmem_resize", 0, max_size)?;
-
-        assert_eq!(init_struct.current_size(), 0);
-
-        // Initial grow
-        let size1 = 10000;
-        init_struct.set_size(size1).unwrap();
-        assert_eq!(init_struct.current_size(), size1);
-
-        // Write some data
-        let data_ptr = init_struct.data_ptr.as_ptr();
-        write_range(data_ptr, 0xAA, 0..size1);
-        assert_range(data_ptr, 0xAA, 0..size1);
-
-        // Shrink
-        let size2 = 5000;
-        init_struct.set_size(size2).unwrap();
-        assert_eq!(init_struct.current_size(), size2);
-
-        // Grow again
-        let size3 = 20000;
-        init_struct.set_size(size3).unwrap();
-        assert_eq!(init_struct.current_size(), size3);
-
-        // Try to read it. The area that was shrunk and grown again should read as all zeros now
-        assert_range(data_ptr, 0xAA, 0..5000);
-        assert_range(data_ptr, 0, 5000..size1);
-
-        // Try to grow beyond max_size
-        //let size4 = max_size + 1;
-        //assert!(init_struct.set_size(size4).is_err());
-
-        // Dropping init_struct should unmap the memory
-        drop(init_struct);
-
-        Ok(())
-    }
-
-    /// This is used in tests to coordinate between test processes. It's like `std::sync::Barrier`,
-    /// but is stored in the shared memory area and works across processes. It's implemented by
-    /// polling, because e.g. standard rust mutexes are not guaranteed to work across processes.
-    struct SimpleBarrier {
-        num_procs: usize,
-        count: AtomicUsize,
-    }
-
-    impl SimpleBarrier {
-        unsafe fn init(ptr: *mut SimpleBarrier, num_procs: usize) {
-            unsafe {
-                *ptr = SimpleBarrier {
-                    num_procs,
-                    count: AtomicUsize::new(0),
-                }
-            }
-        }
-
-        pub fn wait(&self) {
-            let old = self.count.fetch_add(1, Ordering::Relaxed);
-
-            let generation = old / self.num_procs;
-
-            let mut current = old + 1;
-            while current < (generation + 1) * self.num_procs {
-                std::thread::sleep(std::time::Duration::from_millis(10));
-                current = self.count.load(Ordering::Relaxed);
-            }
-        }
-    }
-
-    #[test]
-    fn test_multi_process() {
-        // Initialize
-        let max_size = 1_000_000_000_000;
-        let init_struct = ShmemHandle::new("test_multi_process", 0, max_size).unwrap();
-        let ptr = init_struct.data_ptr.as_ptr();
-
-        // Store the SimpleBarrier in the first 1k of the area.
-        init_struct.set_size(10000).unwrap();
-        let barrier_ptr: *mut SimpleBarrier = unsafe {
-            ptr.add(ptr.align_offset(std::mem::align_of::<SimpleBarrier>()))
-                .cast()
-        };
-        unsafe { SimpleBarrier::init(barrier_ptr, 2) };
-        let barrier = unsafe { barrier_ptr.as_ref().unwrap() };
-
-        // Fork another test process. The code after this runs in both processes concurrently.
-        let fork_result = unsafe { nix::unistd::fork().unwrap() };
-
-        // In the parent, fill bytes between 1000..2000. In the child, between 2000..3000
-        if fork_result.is_parent() {
-            write_range(ptr, 0xAA, 1000..2000);
-        } else {
-            write_range(ptr, 0xBB, 2000..3000);
-        }
-        barrier.wait();
-        // Verify the contents. (in both processes)
-        assert_range(ptr, 0xAA, 1000..2000);
-        assert_range(ptr, 0xBB, 2000..3000);
-
-        // Grow, from the child this time
-        let size = 10_000_000;
-        if !fork_result.is_parent() {
-            init_struct.set_size(size).unwrap();
-        }
-        barrier.wait();
-
-        // make some writes at the end
-        if fork_result.is_parent() {
-            write_range(ptr, 0xAA, (size - 10)..size);
-        } else {
-            write_range(ptr, 0xBB, (size - 20)..(size - 10));
-        }
-        barrier.wait();
-
-        // Verify the contents. (This runs in both processes)
-        assert_range(ptr, 0, (size - 1000)..(size - 20));
-        assert_range(ptr, 0xBB, (size - 20)..(size - 10));
-        assert_range(ptr, 0xAA, (size - 10)..size);
-
-        if let ForkResult::Parent { child } = fork_result {
-            nix::sys::wait::waitpid(child, None).unwrap();
-        }
-    }
-}
--- a/libs/neon-shmem/src/sync.rs
+++ b/libs/neon-shmem/src/sync.rs
@@ -1,111 +0,0 @@
-//! Simple utilities akin to what's in [`std::sync`] but designed to work with shared memory.
-
-use std::mem::MaybeUninit;
-use std::ptr::NonNull;
-
-use nix::errno::Errno;
-
-pub type RwLock<T> = lock_api::RwLock<PthreadRwLock, T>;
-pub type RwLockReadGuard<'a, T> = lock_api::RwLockReadGuard<'a, PthreadRwLock, T>;
-pub type RwLockWriteGuard<'a, T> = lock_api::RwLockWriteGuard<'a, PthreadRwLock, T>;
-pub type ValueReadGuard<'a, T> = lock_api::MappedRwLockReadGuard<'a, PthreadRwLock, T>;
-pub type ValueWriteGuard<'a, T> = lock_api::MappedRwLockWriteGuard<'a, PthreadRwLock, T>;
-
-/// Shared memory read-write lock.
-pub struct PthreadRwLock(Option<NonNull<libc::pthread_rwlock_t>>);
-
-/// Simple macro that calls a function in the libc namespace and panics if return value is nonzero.
-macro_rules! libc_checked {
-    ($fn_name:ident ( $($arg:expr),* )) => {{
-        let res = libc::$fn_name($($arg),*);
-        if res != 0 {
-            panic!("{} failed with {}", stringify!($fn_name), Errno::from_raw(res));
-        }
-    }};
-}
-
-impl PthreadRwLock {
-    /// Creates a new `PthreadRwLock` on top of a pointer to a pthread rwlock.
-    ///
-    /// # Safety
-    /// `lock` must be non-null. Every unsafe operation will panic in the event of an error.
-    pub unsafe fn new(lock: *mut libc::pthread_rwlock_t) -> Self {
-        unsafe {
-            let mut attrs = MaybeUninit::uninit();
-            libc_checked!(pthread_rwlockattr_init(attrs.as_mut_ptr()));
-            libc_checked!(pthread_rwlockattr_setpshared(
-                attrs.as_mut_ptr(),
-                libc::PTHREAD_PROCESS_SHARED
-            ));
-            libc_checked!(pthread_rwlock_init(lock, attrs.as_mut_ptr()));
-            // Safety: POSIX specifies that "any function affecting the attributes
-            // object (including destruction) shall not affect any previously
-            // initialized read-write locks".
-            libc_checked!(pthread_rwlockattr_destroy(attrs.as_mut_ptr()));
-            Self(Some(NonNull::new_unchecked(lock)))
-        }
-    }
-
-    fn inner(&self) -> NonNull<libc::pthread_rwlock_t> {
-        match self.0 {
-            None => {
-                panic!("PthreadRwLock constructed badly - something likely used RawRwLock::INIT")
-            }
-            Some(x) => x,
-        }
-    }
-}
-
-unsafe impl lock_api::RawRwLock for PthreadRwLock {
-    type GuardMarker = lock_api::GuardSend;
-    const INIT: Self = Self(None);
-
-    fn try_lock_shared(&self) -> bool {
-        unsafe {
-            let res = libc::pthread_rwlock_tryrdlock(self.inner().as_ptr());
-            match res {
-                0 => true,
-                libc::EAGAIN => false,
-                _ => panic!(
-                    "pthread_rwlock_tryrdlock failed with {}",
-                    Errno::from_raw(res)
-                ),
-            }
-        }
-    }
-
-    fn try_lock_exclusive(&self) -> bool {
-        unsafe {
-            let res = libc::pthread_rwlock_trywrlock(self.inner().as_ptr());
-            match res {
-                0 => true,
-                libc::EAGAIN => false,
-                _ => panic!("try_wrlock failed with {}", Errno::from_raw(res)),
-            }
-        }
-    }
-
-    fn lock_shared(&self) {
-        unsafe {
-            libc_checked!(pthread_rwlock_rdlock(self.inner().as_ptr()));
-        }
-    }
-
-    fn lock_exclusive(&self) {
-        unsafe {
-            libc_checked!(pthread_rwlock_wrlock(self.inner().as_ptr()));
-        }
-    }
-
-    unsafe fn unlock_exclusive(&self) {
-        unsafe {
-            libc_checked!(pthread_rwlock_unlock(self.inner().as_ptr()));
-        }
-    }
-
-    unsafe fn unlock_shared(&self) {
-        unsafe {
-            libc_checked!(pthread_rwlock_unlock(self.inner().as_ptr()));
-        }
-    }
-}
--- a/libs/neonart/Cargo.toml
+++ b/libs/neonart/Cargo.toml
@@ -1,14 +0,0 @@
-[package]
-name = "neonart"
-version = "0.1.0"
-edition.workspace = true
-license.workspace = true
-
-[dependencies]
-crossbeam-utils.workspace = true
-spin.workspace = true
-tracing.workspace = true
-
-[dev-dependencies]
-rand = "0.9.1"
-rand_distr = "0.5.1"
--- a/libs/neonart/src/algorithm.rs
+++ b/libs/neonart/src/algorithm.rs
@@ -1,599 +0,0 @@
-mod lock_and_version;
-pub(crate) mod node_ptr;
-mod node_ref;
-
-use std::vec::Vec;
-
-use crate::algorithm::lock_and_version::ConcurrentUpdateError;
-use crate::algorithm::node_ptr::MAX_PREFIX_LEN;
-use crate::algorithm::node_ref::{NewNodeRef, NodeRef, ReadLockedNodeRef, WriteLockedNodeRef};
-use crate::allocator::OutOfMemoryError;
-
-use crate::TreeWriteGuard;
-use crate::UpdateAction;
-use crate::allocator::ArtAllocator;
-use crate::epoch::EpochPin;
-use crate::{Key, Value};
-
-pub(crate) type RootPtr<V> = node_ptr::NodePtr<V>;
-
-#[derive(Debug)]
-pub enum ArtError {
-    ConcurrentUpdate, // need to retry
-    OutOfMemory,
-}
-
-impl From<ConcurrentUpdateError> for ArtError {
-    fn from(_: ConcurrentUpdateError) -> ArtError {
-        ArtError::ConcurrentUpdate
-    }
-}
-
-impl From<OutOfMemoryError> for ArtError {
-    fn from(_: OutOfMemoryError) -> ArtError {
-        ArtError::OutOfMemory
-    }
-}
-
-pub fn new_root<V: Value>(
-    allocator: &impl ArtAllocator<V>,
-) -> Result<RootPtr<V>, OutOfMemoryError> {
-    node_ptr::new_root(allocator)
-}
-
-pub(crate) fn search<'e, K: Key, V: Value>(
-    key: &K,
-    root: RootPtr<V>,
-    epoch_pin: &'e EpochPin,
-) -> Option<&'e V> {
-    loop {
-        let root_ref = NodeRef::from_root_ptr(root);
-        if let Ok(result) = lookup_recurse(key.as_bytes(), root_ref, None, epoch_pin) {
-            break result;
-        }
-        // retry
-    }
-}
-
-pub(crate) fn iter_next<'e, V: Value>(
-    key: &[u8],
-    root: RootPtr<V>,
-    epoch_pin: &'e EpochPin,
-) -> Option<(Vec<u8>, &'e V)> {
-    loop {
-        let mut path = Vec::new();
-        let root_ref = NodeRef::from_root_ptr(root);
-
-        match next_recurse(key, &mut path, root_ref, epoch_pin) {
-            Ok(Some(v)) => {
-                assert_eq!(path.len(), key.len());
-                break Some((path, v));
-            }
-            Ok(None) => break None,
-            Err(ConcurrentUpdateError()) => {
-                // retry
-                continue;
-            }
-        }
-    }
-}
-
-pub(crate) fn update_fn<'e, 'g, K: Key, V: Value, A: ArtAllocator<V>, F>(
-    key: &K,
-    value_fn: F,
-    root: RootPtr<V>,
-    guard: &'g mut TreeWriteGuard<'e, K, V, A>,
-) -> Result<(), OutOfMemoryError>
-where
-    F: FnOnce(Option<&V>) -> UpdateAction<V>,
-{
-    let value_fn_cell = std::cell::Cell::new(Some(value_fn));
-    loop {
-        let root_ref = NodeRef::from_root_ptr(root);
-        let this_value_fn = |arg: Option<&V>| value_fn_cell.take().unwrap()(arg);
-        let key_bytes = key.as_bytes();
-
-        match update_recurse(
-            key_bytes,
-            this_value_fn,
-            root_ref,
-            None,
-            None,
-            guard,
-            0,
-            key_bytes,
-        ) {
-            Ok(()) => break Ok(()),
-            Err(ArtError::ConcurrentUpdate) => {
-                continue; // retry
-            }
-            Err(ArtError::OutOfMemory) => break Err(OutOfMemoryError()),
-        }
-    }
-}
-
-// Error means you must retry.
-//
-// This corresponds to the 'lookupOpt' function in the paper
-#[allow(clippy::only_used_in_recursion)]
-fn lookup_recurse<'e, V: Value>(
-    key: &[u8],
-    node: NodeRef<'e, V>,
-    parent: Option<ReadLockedNodeRef<V>>,
-    epoch_pin: &'e EpochPin,
-) -> Result<Option<&'e V>, ConcurrentUpdateError> {
-    let rnode = node.read_lock_or_restart()?;
-    if let Some(parent) = parent {
-        parent.read_unlock_or_restart()?;
-    }
-
-    // check if the prefix matches, may increment level
-    let prefix_len = if let Some(prefix_len) = rnode.prefix_matches(key) {
-        prefix_len
-    } else {
-        rnode.read_unlock_or_restart()?;
-        return Ok(None);
-    };
-
-    if rnode.is_leaf() {
-        assert_eq!(key.len(), prefix_len);
-        let vptr = rnode.get_leaf_value_ptr()?;
-        // safety: It's OK to return a ref of the pointer because we checked the version
-        // and the lifetime of 'epoch_pin' enforces that the reference is only accessible
-        // as long as the epoch is pinned.
-        let v = unsafe { vptr.as_ref().unwrap() };
-        return Ok(Some(v));
-    }
-
-    let key = &key[prefix_len..];
-
-    // find child (or leaf value)
-    let next_node = rnode.find_child_or_restart(key[0])?;
-
-    match next_node {
-        None => Ok(None), // key not found
-        Some(child) => lookup_recurse(&key[1..], child, Some(rnode), epoch_pin),
-    }
-}
-
-#[allow(clippy::only_used_in_recursion)]
-fn next_recurse<'e, V: Value>(
-    min_key: &[u8],
-    path: &mut Vec<u8>,
-    node: NodeRef<'e, V>,
-    epoch_pin: &'e EpochPin,
-) -> Result<Option<&'e V>, ConcurrentUpdateError> {
-    let rnode = node.read_lock_or_restart()?;
-    let prefix = rnode.get_prefix();
-    if !prefix.is_empty() {
-        path.extend_from_slice(prefix);
-    }
-
-    use std::cmp::Ordering;
-    let comparison = path.as_slice().cmp(&min_key[0..path.len()]);
-    if comparison == Ordering::Less {
-        rnode.read_unlock_or_restart()?;
-        return Ok(None);
-    }
-
-    if rnode.is_leaf() {
-        assert_eq!(path.len(), min_key.len());
-        let vptr = rnode.get_leaf_value_ptr()?;
-        // safety: It's OK to return a ref of the pointer because we checked the version
-        // and the lifetime of 'epoch_pin' enforces that the reference is only accessible
-        // as long as the epoch is pinned.
-        let v = unsafe { vptr.as_ref().unwrap() };
-        return Ok(Some(v));
-    }
-
-    let mut min_key_byte = match comparison {
-        Ordering::Less => unreachable!(), // checked this above already
-        Ordering::Equal => min_key[path.len()],
-        Ordering::Greater => 0,
-    };
-
-    loop {
-        match rnode.find_next_child_or_restart(min_key_byte)? {
-            None => {
-                return Ok(None);
-            }
-            Some((key_byte, child_ref)) => {
-                let path_len = path.len();
-                path.push(key_byte);
-                let result = next_recurse(min_key, path, child_ref, epoch_pin)?;
-                if result.is_some() {
-                    return Ok(result);
-                }
-                if key_byte == u8::MAX {
-                    return Ok(None);
-                }
-                path.truncate(path_len);
-                min_key_byte = key_byte + 1;
-            }
-        }
-    }
-}
-
-// This corresponds to the 'insertOpt' function in the paper
-#[allow(clippy::only_used_in_recursion)]
-#[allow(clippy::too_many_arguments)]
-pub(crate) fn update_recurse<'e, K: Key, V: Value, A: ArtAllocator<V>, F>(
-    key: &[u8],
-    value_fn: F,
-    node: NodeRef<'e, V>,
-    rparent: Option<(ReadLockedNodeRef<V>, u8)>,
-    rgrandparent: Option<(ReadLockedNodeRef<V>, u8)>,
-    guard: &'_ mut TreeWriteGuard<'e, K, V, A>,
-    level: usize,
-    orig_key: &[u8],
-) -> Result<(), ArtError>
-where
-    F: FnOnce(Option<&V>) -> UpdateAction<V>,
-{
-    let rnode = node.read_lock_or_restart()?;
-
-    let prefix_match_len = rnode.prefix_matches(key);
-    if prefix_match_len.is_none() {
-        let (rparent, parent_key) = rparent.expect("direct children of the root have no prefix");
-        let mut wparent = rparent.upgrade_to_write_lock_or_restart()?;
-        let mut wnode = rnode.upgrade_to_write_lock_or_restart()?;
-
-        match value_fn(None) {
-            UpdateAction::Nothing => {}
-            UpdateAction::Insert(new_value) => {
-                insert_split_prefix(key, new_value, &mut wnode, &mut wparent, parent_key, guard)?;
-            }
-            UpdateAction::Remove => {
-                panic!("unexpected Remove action on insertion");
-            }
-        }
-        wnode.write_unlock();
-        wparent.write_unlock();
-        return Ok(());
-    }
-    let prefix_match_len = prefix_match_len.unwrap();
-    let key = &key[prefix_match_len..];
-    let level = level + prefix_match_len;
-
-    if rnode.is_leaf() {
-        assert_eq!(key.len(), 0);
-        let (rparent, parent_key) = rparent.expect("root cannot be leaf");
-        let mut wparent = rparent.upgrade_to_write_lock_or_restart()?;
-        let mut wnode = rnode.upgrade_to_write_lock_or_restart()?;
-
-        // safety: Now that we have acquired the write lock, we have exclusive access to the
-        // value. XXX: There might be concurrent reads though?
-        let value_mut = wnode.get_leaf_value_mut();
-
-        match value_fn(Some(value_mut)) {
-            UpdateAction::Nothing => {
-                wparent.write_unlock();
-                wnode.write_unlock();
-            }
-            UpdateAction::Insert(_) => panic!("cannot insert over existing value"),
-            UpdateAction::Remove => {
-                guard.remember_obsolete_node(wnode.as_ptr());
-                wparent.delete_child(parent_key);
-                wnode.write_unlock_obsolete();
-
-                if let Some(rgrandparent) = rgrandparent {
-                    // FIXME: Ignore concurrency error. It doesn't lead to
-                    // corruption, but it means we might leak something. Until
-                    // another update cleans it up.
-                    let _ = cleanup_parent(wparent, rgrandparent, guard);
-                }
-            }
-        }
-
-        return Ok(());
-    }
-
-    let next_node = rnode.find_child_or_restart(key[0])?;
-
-    if next_node.is_none() {
-        if rnode.is_full() {
-            let (rparent, parent_key) = rparent.expect("root node cannot become full");
-            let mut wparent = rparent.upgrade_to_write_lock_or_restart()?;
-            let wnode = rnode.upgrade_to_write_lock_or_restart()?;
-
-            match value_fn(None) {
-                UpdateAction::Nothing => {
-                    wnode.write_unlock();
-                    wparent.write_unlock();
-                }
-                UpdateAction::Insert(new_value) => {
-                    insert_and_grow(key, new_value, wnode, &mut wparent, parent_key, guard)?;
-                    wparent.write_unlock();
-                }
-                UpdateAction::Remove => {
-                    panic!("unexpected Remove action on insertion");
-                }
-            };
-        } else {
-            let mut wnode = rnode.upgrade_to_write_lock_or_restart()?;
-            if let Some((rparent, _)) = rparent {
-                rparent.read_unlock_or_restart()?;
-            }
-            match value_fn(None) {
-                UpdateAction::Nothing => {}
-                UpdateAction::Insert(new_value) => {
-                    insert_to_node(&mut wnode, key, new_value, guard)?;
-                }
-                UpdateAction::Remove => {
-                    panic!("unexpected Remove action on insertion");
-                }
-            };
-            wnode.write_unlock();
-        }
-        Ok(())
-    } else {
-        let next_child = next_node.unwrap(); // checked above it's not None
-        if let Some((ref rparent, _)) = rparent {
-            rparent.check_or_restart()?;
-        }
-
-        // recurse to next level
-        update_recurse(
-            &key[1..],
-            value_fn,
-            next_child,
-            Some((rnode, key[0])),
-            rparent,
-            guard,
-            level + 1,
-            orig_key,
-        )
-    }
-}
-
-#[derive(Clone)]
-enum PathElement {
-    Prefix(Vec<u8>),
-    KeyByte(u8),
-}
-
-impl std::fmt::Debug for PathElement {
-    fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
-        match self {
-            PathElement::Prefix(prefix) => write!(fmt, "{prefix:?}"),
-            PathElement::KeyByte(key_byte) => write!(fmt, "{key_byte}"),
-        }
-    }
-}
-
-pub(crate) fn dump_tree<V: Value + std::fmt::Debug>(
-    root: RootPtr<V>,
-    epoch_pin: &'_ EpochPin,
-    dst: &mut dyn std::io::Write,
-) {
-    let root_ref = NodeRef::from_root_ptr(root);
-
-    let _ = dump_recurse(&[], root_ref, epoch_pin, 0, dst);
-}
-
-// TODO: return an Err if writeln!() returns error, instead of unwrapping
-#[allow(clippy::only_used_in_recursion)]
-fn dump_recurse<'e, V: Value + std::fmt::Debug>(
-    path: &[PathElement],
-    node: NodeRef<'e, V>,
-    epoch_pin: &'e EpochPin,
-    level: usize,
-    dst: &mut dyn std::io::Write,
-) -> Result<(), ConcurrentUpdateError> {
-    let indent = str::repeat(" ", level);
-
-    let rnode = node.read_lock_or_restart()?;
-    let mut path = Vec::from(path);
-    let prefix = rnode.get_prefix();
-    if !prefix.is_empty() {
-        path.push(PathElement::Prefix(Vec::from(prefix)));
-    }
-
-    if rnode.is_leaf() {
-        let vptr = rnode.get_leaf_value_ptr()?;
-        // safety: It's OK to return a ref of the pointer because we checked the version
-        // and the lifetime of 'epoch_pin' enforces that the reference is only accessible
-        // as long as the epoch is pinned.
-        let val = unsafe { vptr.as_ref().unwrap() };
-        writeln!(dst, "{indent} {path:?}: {val:?}").unwrap();
-        return Ok(());
-    }
-
-    for key_byte in 0..=u8::MAX {
-        match rnode.find_child_or_restart(key_byte)? {
-            None => continue,
-            Some(child_ref) => {
-                let rchild = child_ref.read_lock_or_restart()?;
-                writeln!(
-                    dst,
-                    "{} {:?}, {}: prefix {:?}",
-                    indent,
-                    &path,
-                    key_byte,
-                    rchild.get_prefix()
-                )
-                .unwrap();
-
-                let mut child_path = path.clone();
-                child_path.push(PathElement::KeyByte(key_byte));
-
-                dump_recurse(&child_path, child_ref, epoch_pin, level + 1, dst)?;
-            }
-        }
-    }
-
-    Ok(())
-}
-
-///```text
-///        [fooba]r -> value
-///
-/// [foo]b -> [a]r  -> value
-///      e -> [ls]e -> value
-///```
-fn insert_split_prefix<K: Key, V: Value, A: ArtAllocator<V>>(
-    key: &[u8],
-    value: V,
-    node: &mut WriteLockedNodeRef<V>,
-    parent: &mut WriteLockedNodeRef<V>,
-    parent_key: u8,
-    guard: &'_ TreeWriteGuard<K, V, A>,
-) -> Result<(), OutOfMemoryError> {
-    let old_node = node;
-    let old_prefix = old_node.get_prefix();
-    let common_prefix_len = common_prefix(key, old_prefix);
-
-    // Allocate a node for the new value.
-    let new_value_node = allocate_node_for_value(
-        &key[common_prefix_len + 1..],
-        value,
-        guard.tree_writer.allocator,
-    )?;
-
-    // Allocate a new internal node with the common prefix
-    // FIXME: deallocate 'new_value_node' on OOM
-    let mut prefix_node =
-        node_ref::new_internal(&key[..common_prefix_len], guard.tree_writer.allocator)?;
-
-    // Add the old node and the new nodes to the new internal node
-    prefix_node.insert_old_child(old_prefix[common_prefix_len], old_node);
-    prefix_node.insert_new_child(key[common_prefix_len], new_value_node);
-
-    // Modify the prefix of the old child in place
-    old_node.truncate_prefix(old_prefix.len() - common_prefix_len - 1);
-
-    // replace the pointer in the parent
-    parent.replace_child(parent_key, prefix_node.into_ptr());
-
-    Ok(())
-}
-
-fn insert_to_node<K: Key, V: Value, A: ArtAllocator<V>>(
-    wnode: &mut WriteLockedNodeRef<V>,
-    key: &[u8],
-    value: V,
-    guard: &'_ TreeWriteGuard<K, V, A>,
-) -> Result<(), OutOfMemoryError> {
-    let value_child = allocate_node_for_value(&key[1..], value, guard.tree_writer.allocator)?;
-    wnode.insert_child(key[0], value_child.into_ptr());
-    Ok(())
-}
-
-// On entry: 'parent' and 'node' are locked
-fn insert_and_grow<'e, 'g, K: Key, V: Value, A: ArtAllocator<V>>(
-    key: &[u8],
-    value: V,
-    wnode: WriteLockedNodeRef<V>,
-    parent: &mut WriteLockedNodeRef<V>,
-    parent_key_byte: u8,
-    guard: &'g mut TreeWriteGuard<'e, K, V, A>,
-) -> Result<(), ArtError> {
-    let mut bigger_node = wnode.grow(guard.tree_writer.allocator)?;
-
-    // FIXME: deallocate 'bigger_node' on OOM
-    let value_child = allocate_node_for_value(&key[1..], value, guard.tree_writer.allocator)?;
-    bigger_node.insert_new_child(key[0], value_child);
-
-    // Replace the pointer in the parent
-    parent.replace_child(parent_key_byte, bigger_node.into_ptr());
-
-    guard.remember_obsolete_node(wnode.as_ptr());
-    wnode.write_unlock_obsolete();
-
-    Ok(())
-}
-
-fn cleanup_parent<'e, 'g, K: Key, V: Value, A: ArtAllocator<V>>(
-    wparent: WriteLockedNodeRef<V>,
-    rgrandparent: (ReadLockedNodeRef<V>, u8),
-    guard: &'g mut TreeWriteGuard<'e, K, V, A>,
-) -> Result<(), ArtError> {
-    let (rgrandparent, grandparent_key_byte) = rgrandparent;
-
-    // If the parent becomes completely empty after the deletion, remove the parent from the
-    // grandparent. (This case is possible because we reserve only 8 bytes for the prefix.)
-    // TODO: not implemented.
-
-    // If the parent has only one child, replace the parent with the remaining child. (This is not
-    // possible if the child's prefix field cannot absorb the parent's)
-    if wparent.num_children() == 1 {
-        // Try to lock the remaining child. This can fail if the child is updated
-        // concurrently.
-        let (key_byte, remaining_child) = wparent.find_remaining_child();
-
-        let mut wremaining_child = remaining_child.write_lock_or_restart()?;
-
-        if 1 + wremaining_child.get_prefix().len() + wparent.get_prefix().len() <= MAX_PREFIX_LEN {
-            let mut wgrandparent = rgrandparent.upgrade_to_write_lock_or_restart()?;
-
-            // Ok, we have locked the leaf, the parent, the grandparent, and the parent's only
-            // remaining leaf. Proceed with the updates.
-
-            // Update the prefix on the remaining leaf
-            wremaining_child.prepend_prefix(wparent.get_prefix(), key_byte);
-
-            // Replace the pointer in the grandparent to point directly to the remaining leaf
-            wgrandparent.replace_child(grandparent_key_byte, wremaining_child.as_ptr());
-
-            // Mark the parent as deleted.
-            guard.remember_obsolete_node(wparent.as_ptr());
-            wparent.write_unlock_obsolete();
-            return Ok(());
-        }
-    }
-
-    // If the parent's children would fit on a smaller node type after the deletion, replace it with
-    // a smaller node.
-    if wparent.can_shrink() {
-        let mut wgrandparent = rgrandparent.upgrade_to_write_lock_or_restart()?;
-        let smaller_node = wparent.shrink(guard.tree_writer.allocator)?;
-
-        // Replace the pointer in the grandparent
-        wgrandparent.replace_child(grandparent_key_byte, smaller_node.into_ptr());
-
-        guard.remember_obsolete_node(wparent.as_ptr());
-        wparent.write_unlock_obsolete();
-        return Ok(());
-    }
-
-    // nothing to do
-    wparent.write_unlock();
-    Ok(())
-}
-
-// Allocate a new leaf node to hold 'value'. If the key is long, we
-// may need to allocate new internal nodes to hold it too
-fn allocate_node_for_value<'a, V: Value, A: ArtAllocator<V>>(
-    key: &[u8],
-    value: V,
-    allocator: &'a A,
-) -> Result<NewNodeRef<'a, V, A>, OutOfMemoryError> {
-    let mut prefix_off = key.len().saturating_sub(MAX_PREFIX_LEN);
-
-    let leaf_node = node_ref::new_leaf(&key[prefix_off..key.len()], value, allocator)?;
-
-    let mut node = leaf_node;
-    while prefix_off > 0 {
-        // Need another internal node
-        let remain_prefix = &key[0..prefix_off];
-
-        prefix_off = remain_prefix.len().saturating_sub(MAX_PREFIX_LEN + 1);
-        let mut internal_node = node_ref::new_internal(
-            &remain_prefix[prefix_off..remain_prefix.len() - 1],
-            allocator,
-        )?;
-        internal_node.insert_new_child(*remain_prefix.last().unwrap(), node);
-        node = internal_node;
-    }
-
-    Ok(node)
-}
-
-fn common_prefix(a: &[u8], b: &[u8]) -> usize {
-    for i in 0..MAX_PREFIX_LEN {
-        if a[i] != b[i] {
-            return i;
-        }
-    }
-    panic!("prefixes are equal");
-}
--- a/libs/neonart/src/algorithm/lock_and_version.rs
+++ b/libs/neonart/src/algorithm/lock_and_version.rs
@@ -1,117 +0,0 @@
-//! Each node in the tree has contains one atomic word that stores three things:
-//!
-//! Bit 0: set if the node is "obsolete". An obsolete node has been removed from the tree,
-//!        but might still be accessed by concurrent readers until the epoch expires.
-//! Bit 1: set if the node is currently write-locked. Used as a spinlock.
-//! Bits 2-63: Version number, incremented every time the node is modified.
-//!
-//! AtomicLockAndVersion represents that.
-
-use std::sync::atomic::{AtomicU64, Ordering};
-
-pub(crate) struct ConcurrentUpdateError();
-
-pub(crate) struct AtomicLockAndVersion {
-    inner: AtomicU64,
-}
-
-impl AtomicLockAndVersion {
-    pub(crate) fn new() -> AtomicLockAndVersion {
-        AtomicLockAndVersion {
-            inner: AtomicU64::new(0),
-        }
-    }
-}
-
-impl AtomicLockAndVersion {
-    pub(crate) fn read_lock_or_restart(&self) -> Result<u64, ConcurrentUpdateError> {
-        let version = self.await_node_unlocked();
-        if is_obsolete(version) {
-            return Err(ConcurrentUpdateError());
-        }
-        Ok(version)
-    }
-
-    pub(crate) fn check_or_restart(&self, version: u64) -> Result<(), ConcurrentUpdateError> {
-        self.read_unlock_or_restart(version)
-    }
-
-    pub(crate) fn read_unlock_or_restart(&self, version: u64) -> Result<(), ConcurrentUpdateError> {
-        if self.inner.load(Ordering::Acquire) != version {
-            return Err(ConcurrentUpdateError());
-        }
-        Ok(())
-    }
-
-    pub(crate) fn upgrade_to_write_lock_or_restart(
-        &self,
-        version: u64,
-    ) -> Result<(), ConcurrentUpdateError> {
-        if self
-            .inner
-            .compare_exchange(
-                version,
-                set_locked_bit(version),
-                Ordering::Acquire,
-                Ordering::Relaxed,
-            )
-            .is_err()
-        {
-            return Err(ConcurrentUpdateError());
-        }
-        Ok(())
-    }
-
-    pub(crate) fn write_lock_or_restart(&self) -> Result<(), ConcurrentUpdateError> {
-        let old = self.inner.load(Ordering::Relaxed);
-        if is_obsolete(old) || is_locked(old) {
-            return Err(ConcurrentUpdateError());
-        }
-        if self
-            .inner
-            .compare_exchange(
-                old,
-                set_locked_bit(old),
-                Ordering::Acquire,
-                Ordering::Relaxed,
-            )
-            .is_err()
-        {
-            return Err(ConcurrentUpdateError());
-        }
-        Ok(())
-    }
-
-    pub(crate) fn write_unlock(&self) {
-        // reset locked bit and overflow into version
-        self.inner.fetch_add(2, Ordering::Release);
-    }
-
-    pub(crate) fn write_unlock_obsolete(&self) {
-        // set obsolete, reset locked, overflow into version
-        self.inner.fetch_add(3, Ordering::Release);
-    }
-
-    // Helper functions
-    fn await_node_unlocked(&self) -> u64 {
-        let mut version = self.inner.load(Ordering::Acquire);
-        while is_locked(version) {
-            // spinlock
-            std::thread::yield_now();
-            version = self.inner.load(Ordering::Acquire)
-        }
-        version
-    }
-}
-
-fn set_locked_bit(version: u64) -> u64 {
-    version + 2
-}
-
-fn is_obsolete(version: u64) -> bool {
-    (version & 1) == 1
-}
-
-fn is_locked(version: u64) -> bool {
-    (version & 2) == 2
-}
--- a/libs/neonart/src/algorithm/node_ptr.rs
+++ b/libs/neonart/src/algorithm/node_ptr.rs
--- a/libs/neonart/src/algorithm/node_ref.rs
+++ b/libs/neonart/src/algorithm/node_ref.rs
@@ -1,349 +0,0 @@
-use std::fmt::Debug;
-use std::marker::PhantomData;
-
-use super::node_ptr;
-use super::node_ptr::NodePtr;
-use crate::EpochPin;
-use crate::Value;
-use crate::algorithm::lock_and_version::AtomicLockAndVersion;
-use crate::algorithm::lock_and_version::ConcurrentUpdateError;
-use crate::allocator::ArtAllocator;
-use crate::allocator::OutOfMemoryError;
-
-pub struct NodeRef<'e, V> {
-    ptr: NodePtr<V>,
-
-    phantom: PhantomData<&'e EpochPin<'e>>,
-}
-
-impl<'e, V> Debug for NodeRef<'e, V> {
-    fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
-        write!(fmt, "{:?}", self.ptr)
-    }
-}
-
-impl<'e, V: Value> NodeRef<'e, V> {
-    pub(crate) fn from_root_ptr(root_ptr: NodePtr<V>) -> NodeRef<'e, V> {
-        NodeRef {
-            ptr: root_ptr,
-            phantom: PhantomData,
-        }
-    }
-
-    pub(crate) fn read_lock_or_restart(
-        &self,
-    ) -> Result<ReadLockedNodeRef<'e, V>, ConcurrentUpdateError> {
-        let version = self.lockword().read_lock_or_restart()?;
-        Ok(ReadLockedNodeRef {
-            ptr: self.ptr,
-            version,
-            phantom: self.phantom,
-        })
-    }
-
-    pub(crate) fn write_lock_or_restart(
-        &self,
-    ) -> Result<WriteLockedNodeRef<'e, V>, ConcurrentUpdateError> {
-        self.lockword().write_lock_or_restart()?;
-        Ok(WriteLockedNodeRef {
-            ptr: self.ptr,
-            phantom: self.phantom,
-        })
-    }
-
-    fn lockword(&self) -> &AtomicLockAndVersion {
-        self.ptr.lockword()
-    }
-}
-
-/// A reference to a node that has been optimistically read-locked. The functions re-check
-/// the version after each read.
-pub struct ReadLockedNodeRef<'e, V> {
-    ptr: NodePtr<V>,
-    version: u64,
-
-    phantom: PhantomData<&'e EpochPin<'e>>,
-}
-
-impl<'e, V: Value> ReadLockedNodeRef<'e, V> {
-    pub(crate) fn is_leaf(&self) -> bool {
-        self.ptr.is_leaf()
-    }
-
-    pub(crate) fn is_full(&self) -> bool {
-        self.ptr.is_full()
-    }
-
-    pub(crate) fn get_prefix(&self) -> &[u8] {
-        self.ptr.get_prefix()
-    }
-
-    /// Note: because we're only holding a read lock, the prefix can change concurrently.
-    /// You must be prepared to restart, if read_unlock() returns error later.
-    ///
-    /// Returns the length of the prefix, or None if it's not a match
-    pub(crate) fn prefix_matches(&self, key: &[u8]) -> Option<usize> {
-        self.ptr.prefix_matches(key)
-    }
-
-    pub(crate) fn find_child_or_restart(
-        &self,
-        key_byte: u8,
-    ) -> Result<Option<NodeRef<'e, V>>, ConcurrentUpdateError> {
-        let child_or_value = self.ptr.find_child(key_byte);
-        self.ptr.lockword().check_or_restart(self.version)?;
-
-        match child_or_value {
-            None => Ok(None),
-            Some(child_ptr) => Ok(Some(NodeRef {
-                ptr: child_ptr,
-                phantom: self.phantom,
-            })),
-        }
-    }
-
-    pub(crate) fn find_next_child_or_restart(
-        &self,
-        min_key_byte: u8,
-    ) -> Result<Option<(u8, NodeRef<'e, V>)>, ConcurrentUpdateError> {
-        let child_or_value = self.ptr.find_next_child(min_key_byte);
-        self.ptr.lockword().check_or_restart(self.version)?;
-
-        match child_or_value {
-            None => Ok(None),
-            Some((k, child_ptr)) => Ok(Some((
-                k,
-                NodeRef {
-                    ptr: child_ptr,
-                    phantom: self.phantom,
-                },
-            ))),
-        }
-    }
-
-    pub(crate) fn get_leaf_value_ptr(&self) -> Result<*const V, ConcurrentUpdateError> {
-        let result = self.ptr.get_leaf_value();
-        self.ptr.lockword().check_or_restart(self.version)?;
-
-        // Extend the lifetime.
-        let result = std::ptr::from_ref(result);
-
-        Ok(result)
-    }
-
-    pub(crate) fn upgrade_to_write_lock_or_restart(
-        self,
-    ) -> Result<WriteLockedNodeRef<'e, V>, ConcurrentUpdateError> {
-        self.ptr
-            .lockword()
-            .upgrade_to_write_lock_or_restart(self.version)?;
-
-        Ok(WriteLockedNodeRef {
-            ptr: self.ptr,
-            phantom: self.phantom,
-        })
-    }
-
-    pub(crate) fn read_unlock_or_restart(self) -> Result<(), ConcurrentUpdateError> {
-        self.ptr.lockword().check_or_restart(self.version)?;
-        Ok(())
-    }
-
-    pub(crate) fn check_or_restart(&self) -> Result<(), ConcurrentUpdateError> {
-        self.ptr.lockword().check_or_restart(self.version)?;
-        Ok(())
-    }
-}
-
-/// A reference to a node that has been optimistically read-locked. The functions re-check
-/// the version after each read.
-pub struct WriteLockedNodeRef<'e, V> {
-    ptr: NodePtr<V>,
-    phantom: PhantomData<&'e EpochPin<'e>>,
-}
-
-impl<'e, V: Value> WriteLockedNodeRef<'e, V> {
-    pub(crate) fn can_shrink(&self) -> bool {
-        self.ptr.can_shrink()
-    }
-
-    pub(crate) fn num_children(&self) -> usize {
-        self.ptr.num_children()
-    }
-
-    pub(crate) fn write_unlock(mut self) {
-        self.ptr.lockword().write_unlock();
-        self.ptr = NodePtr::null();
-    }
-
-    pub(crate) fn write_unlock_obsolete(mut self) {
-        self.ptr.lockword().write_unlock_obsolete();
-        self.ptr = NodePtr::null();
-    }
-
-    pub(crate) fn get_prefix(&self) -> &[u8] {
-        self.ptr.get_prefix()
-    }
-
-    pub(crate) fn truncate_prefix(&mut self, new_prefix_len: usize) {
-        self.ptr.truncate_prefix(new_prefix_len)
-    }
-
-    pub(crate) fn prepend_prefix(&mut self, prefix: &[u8], prefix_byte: u8) {
-        self.ptr.prepend_prefix(prefix, prefix_byte)
-    }
-
-    pub(crate) fn insert_child(&mut self, key_byte: u8, child: NodePtr<V>) {
-        self.ptr.insert_child(key_byte, child)
-    }
-
-    pub(crate) fn get_leaf_value_mut(&mut self) -> &mut V {
-        self.ptr.get_leaf_value_mut()
-    }
-
-    pub(crate) fn grow<'a, A>(
-        &self,
-        allocator: &'a A,
-    ) -> Result<NewNodeRef<'a, V, A>, OutOfMemoryError>
-    where
-        A: ArtAllocator<V>,
-    {
-        let new_node = self.ptr.grow(allocator)?;
-        Ok(NewNodeRef {
-            ptr: new_node,
-            allocator,
-            extra_nodes: Vec::new(),
-        })
-    }
-
-    pub(crate) fn shrink<'a, A>(
-        &self,
-        allocator: &'a A,
-    ) -> Result<NewNodeRef<'a, V, A>, OutOfMemoryError>
-    where
-        A: ArtAllocator<V>,
-    {
-        let new_node = self.ptr.shrink(allocator)?;
-        Ok(NewNodeRef {
-            ptr: new_node,
-            allocator,
-            extra_nodes: Vec::new(),
-        })
-    }
-
-    pub(crate) fn as_ptr(&self) -> NodePtr<V> {
-        self.ptr
-    }
-
-    pub(crate) fn replace_child(&mut self, key_byte: u8, replacement: NodePtr<V>) {
-        self.ptr.replace_child(key_byte, replacement);
-    }
-
-    pub(crate) fn delete_child(&mut self, key_byte: u8) {
-        self.ptr.delete_child(key_byte);
-    }
-
-    pub(crate) fn find_remaining_child(&self) -> (u8, NodeRef<'e, V>) {
-        assert_eq!(self.num_children(), 1);
-        let child_or_value = self.ptr.find_next_child(0);
-
-        match child_or_value {
-            None => panic!("could not find only child in node"),
-            Some((k, child_ptr)) => (
-                k,
-                NodeRef {
-                    ptr: child_ptr,
-                    phantom: self.phantom,
-                },
-            ),
-        }
-    }
-}
-
-impl<'e, V> Drop for WriteLockedNodeRef<'e, V> {
-    fn drop(&mut self) {
-        if !self.ptr.is_null() {
-            self.ptr.lockword().write_unlock();
-        }
-    }
-}
-
-pub(crate) struct NewNodeRef<'a, V, A>
-where
-    V: Value,
-    A: ArtAllocator<V>,
-{
-    ptr: NodePtr<V>,
-    allocator: &'a A,
-
-    extra_nodes: Vec<NodePtr<V>>,
-}
-
-impl<'a, V, A> NewNodeRef<'a, V, A>
-where
-    V: Value,
-    A: ArtAllocator<V>,
-{
-    pub(crate) fn insert_old_child(&mut self, key_byte: u8, child: &WriteLockedNodeRef<V>) {
-        self.ptr.insert_child(key_byte, child.as_ptr())
-    }
-
-    pub(crate) fn into_ptr(mut self) -> NodePtr<V> {
-        let ptr = self.ptr;
-        self.ptr = NodePtr::null();
-        ptr
-    }
-
-    pub(crate) fn insert_new_child(&mut self, key_byte: u8, child: NewNodeRef<'a, V, A>) {
-        let child_ptr = child.into_ptr();
-        self.ptr.insert_child(key_byte, child_ptr);
-        self.extra_nodes.push(child_ptr);
-    }
-}
-
-impl<'a, V, A> Drop for NewNodeRef<'a, V, A>
-where
-    V: Value,
-    A: ArtAllocator<V>,
-{
-    /// This drop implementation deallocates the newly allocated node, if into_ptr() was not called.
-    fn drop(&mut self) {
-        if !self.ptr.is_null() {
-            self.ptr.deallocate(self.allocator);
-            for p in self.extra_nodes.iter() {
-                p.deallocate(self.allocator);
-            }
-        }
-    }
-}
-
-pub(crate) fn new_internal<'a, V, A>(
-    prefix: &[u8],
-    allocator: &'a A,
-) -> Result<NewNodeRef<'a, V, A>, OutOfMemoryError>
-where
-    V: Value,
-    A: ArtAllocator<V>,
-{
-    Ok(NewNodeRef {
-        ptr: node_ptr::new_internal(prefix, allocator)?,
-        allocator,
-        extra_nodes: Vec::new(),
-    })
-}
-
-pub(crate) fn new_leaf<'a, V, A>(
-    prefix: &[u8],
-    value: V,
-    allocator: &'a A,
-) -> Result<NewNodeRef<'a, V, A>, OutOfMemoryError>
-where
-    V: Value,
-    A: ArtAllocator<V>,
-{
-    Ok(NewNodeRef {
-        ptr: node_ptr::new_leaf(prefix, value, allocator)?,
-        allocator,
-        extra_nodes: Vec::new(),
-    })
-}
--- a/libs/neonart/src/allocator.rs
+++ b/libs/neonart/src/allocator.rs
@@ -1,156 +0,0 @@
-pub mod block;
-mod multislab;
-mod slab;
-pub mod r#static;
-
-use std::alloc::Layout;
-use std::marker::PhantomData;
-use std::mem::MaybeUninit;
-use std::sync::atomic::Ordering;
-
-use crate::allocator::multislab::MultiSlabAllocator;
-use crate::allocator::r#static::alloc_from_slice;
-
-use spin;
-
-use crate::Tree;
-pub use crate::algorithm::node_ptr::{
-    NodeInternal4, NodeInternal16, NodeInternal48, NodeInternal256, NodeLeaf,
-};
-
-#[derive(Debug)]
-pub struct OutOfMemoryError();
-
-pub trait ArtAllocator<V: crate::Value> {
-    fn alloc_tree(&self) -> *mut Tree<V>;
-
-    fn alloc_node_internal4(&self) -> *mut NodeInternal4<V>;
-    fn alloc_node_internal16(&self) -> *mut NodeInternal16<V>;
-    fn alloc_node_internal48(&self) -> *mut NodeInternal48<V>;
-    fn alloc_node_internal256(&self) -> *mut NodeInternal256<V>;
-    fn alloc_node_leaf(&self) -> *mut NodeLeaf<V>;
-
-    fn dealloc_node_internal4(&self, ptr: *mut NodeInternal4<V>);
-    fn dealloc_node_internal16(&self, ptr: *mut NodeInternal16<V>);
-    fn dealloc_node_internal48(&self, ptr: *mut NodeInternal48<V>);
-    fn dealloc_node_internal256(&self, ptr: *mut NodeInternal256<V>);
-    fn dealloc_node_leaf(&self, ptr: *mut NodeLeaf<V>);
-}
-
-pub struct ArtMultiSlabAllocator<'t, V>
-where
-    V: crate::Value,
-{
-    tree_area: spin::Mutex<Option<&'t mut MaybeUninit<Tree<V>>>>,
-
-    pub(crate) inner: MultiSlabAllocator<'t, 5>,
-
-    phantom_val: PhantomData<V>,
-}
-
-impl<'t, V: crate::Value> ArtMultiSlabAllocator<'t, V> {
-    const LAYOUTS: [Layout; 5] = [
-        Layout::new::<NodeInternal4<V>>(),
-        Layout::new::<NodeInternal16<V>>(),
-        Layout::new::<NodeInternal48<V>>(),
-        Layout::new::<NodeInternal256<V>>(),
-        Layout::new::<NodeLeaf<V>>(),
-    ];
-
-    pub fn new(area: &'t mut [MaybeUninit<u8>]) -> &'t mut ArtMultiSlabAllocator<'t, V> {
-        let (allocator_area, remain) = alloc_from_slice::<ArtMultiSlabAllocator<V>>(area);
-        let (tree_area, remain) = alloc_from_slice::<Tree<V>>(remain);
-
-        allocator_area.write(ArtMultiSlabAllocator {
-            tree_area: spin::Mutex::new(Some(tree_area)),
-            inner: MultiSlabAllocator::new(remain, &Self::LAYOUTS),
-            phantom_val: PhantomData,
-        })
-    }
-}
-
-impl<'t, V: crate::Value> ArtAllocator<V> for ArtMultiSlabAllocator<'t, V> {
-    fn alloc_tree(&self) -> *mut Tree<V> {
-        let mut t = self.tree_area.lock();
-        if let Some(tree_area) = t.take() {
-            return tree_area.as_mut_ptr().cast();
-        }
-        panic!("cannot allocate more than one tree");
-    }
-
-    fn alloc_node_internal4(&self) -> *mut NodeInternal4<V> {
-        self.inner.alloc_slab(0).cast()
-    }
-    fn alloc_node_internal16(&self) -> *mut NodeInternal16<V> {
-        self.inner.alloc_slab(1).cast()
-    }
-    fn alloc_node_internal48(&self) -> *mut NodeInternal48<V> {
-        self.inner.alloc_slab(2).cast()
-    }
-    fn alloc_node_internal256(&self) -> *mut NodeInternal256<V> {
-        self.inner.alloc_slab(3).cast()
-    }
-    fn alloc_node_leaf(&self) -> *mut NodeLeaf<V> {
-        self.inner.alloc_slab(4).cast()
-    }
-
-    fn dealloc_node_internal4(&self, ptr: *mut NodeInternal4<V>) {
-        self.inner.dealloc_slab(0, ptr.cast())
-    }
-
-    fn dealloc_node_internal16(&self, ptr: *mut NodeInternal16<V>) {
-        self.inner.dealloc_slab(1, ptr.cast())
-    }
-    fn dealloc_node_internal48(&self, ptr: *mut NodeInternal48<V>) {
-        self.inner.dealloc_slab(2, ptr.cast())
-    }
-    fn dealloc_node_internal256(&self, ptr: *mut NodeInternal256<V>) {
-        self.inner.dealloc_slab(3, ptr.cast())
-    }
-    fn dealloc_node_leaf(&self, ptr: *mut NodeLeaf<V>) {
-        self.inner.dealloc_slab(4, ptr.cast())
-    }
-}
-
-impl<'t, V: crate::Value> ArtMultiSlabAllocator<'t, V> {
-    pub(crate) fn get_statistics(&self) -> ArtMultiSlabStats {
-        ArtMultiSlabStats {
-            num_internal4: self.inner.slab_descs[0]
-                .num_allocated
-                .load(Ordering::Relaxed),
-            num_internal16: self.inner.slab_descs[1]
-                .num_allocated
-                .load(Ordering::Relaxed),
-            num_internal48: self.inner.slab_descs[2]
-                .num_allocated
-                .load(Ordering::Relaxed),
-            num_internal256: self.inner.slab_descs[3]
-                .num_allocated
-                .load(Ordering::Relaxed),
-            num_leaf: self.inner.slab_descs[4]
-                .num_allocated
-                .load(Ordering::Relaxed),
-
-            num_blocks_internal4: self.inner.slab_descs[0].num_blocks.load(Ordering::Relaxed),
-            num_blocks_internal16: self.inner.slab_descs[1].num_blocks.load(Ordering::Relaxed),
-            num_blocks_internal48: self.inner.slab_descs[2].num_blocks.load(Ordering::Relaxed),
-            num_blocks_internal256: self.inner.slab_descs[3].num_blocks.load(Ordering::Relaxed),
-            num_blocks_leaf: self.inner.slab_descs[4].num_blocks.load(Ordering::Relaxed),
-        }
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct ArtMultiSlabStats {
-    pub num_internal4: u64,
-    pub num_internal16: u64,
-    pub num_internal48: u64,
-    pub num_internal256: u64,
-    pub num_leaf: u64,
-
-    pub num_blocks_internal4: u64,
-    pub num_blocks_internal16: u64,
-    pub num_blocks_internal48: u64,
-    pub num_blocks_internal256: u64,
-    pub num_blocks_leaf: u64,
-}
--- a/libs/neonart/src/allocator/block.rs
+++ b/libs/neonart/src/allocator/block.rs
@@ -1,191 +0,0 @@
-//! Simple allocator of fixed-size blocks
-
-use std::mem::MaybeUninit;
-use std::sync::atomic::{AtomicU64, Ordering};
-
-use spin;
-
-pub const BLOCK_SIZE: usize = 16 * 1024;
-
-const INVALID_BLOCK: u64 = u64::MAX;
-
-pub(crate) struct BlockAllocator<'t> {
-    blocks_ptr: &'t [MaybeUninit<u8>],
-    num_blocks: u64,
-    num_initialized: AtomicU64,
-
-    freelist_head: spin::Mutex<u64>,
-}
-
-struct FreeListBlock {
-    inner: spin::Mutex<FreeListBlockInner>,
-}
-
-struct FreeListBlockInner {
-    next: u64,
-
-    num_free_blocks: u64,
-    free_blocks: [u64; 100], // FIXME: fill the rest of the block
-}
-
-impl<'t> BlockAllocator<'t> {
-    pub(crate) fn new(area: &'t mut [MaybeUninit<u8>]) -> Self {
-        // Use all the space for the blocks
-        let padding = area.as_ptr().align_offset(BLOCK_SIZE);
-        let remain = &mut area[padding..];
-
-        let num_blocks = (remain.len() / BLOCK_SIZE) as u64;
-
-        BlockAllocator {
-            blocks_ptr: remain,
-            num_blocks,
-            num_initialized: AtomicU64::new(0),
-            freelist_head: spin::Mutex::new(INVALID_BLOCK),
-        }
-    }
-
-    /// safety: you must hold a lock on the pointer to this block, otherwise it might get
-    /// reused for another kind of block
-    fn read_freelist_block(&self, blkno: u64) -> &FreeListBlock {
-        let ptr: *const FreeListBlock = self.get_block_ptr(blkno).cast();
-        unsafe { ptr.as_ref().unwrap() }
-    }
-
-    fn get_block_ptr(&self, blkno: u64) -> *mut u8 {
-        assert!(blkno < self.num_blocks);
-        unsafe {
-            self.blocks_ptr
-                .as_ptr()
-                .byte_offset(blkno as isize * BLOCK_SIZE as isize)
-        }
-        .cast_mut()
-        .cast()
-    }
-
-    #[allow(clippy::mut_from_ref)]
-    pub(crate) fn alloc_block(&self) -> &mut [MaybeUninit<u8>] {
-        // FIXME: handle OOM
-        let blkno = self.alloc_block_internal();
-        if blkno == INVALID_BLOCK {
-            panic!("out of memory");
-        }
-
-        let ptr: *mut MaybeUninit<u8> = self.get_block_ptr(blkno).cast();
-        unsafe { std::slice::from_raw_parts_mut(ptr, BLOCK_SIZE) }
-    }
-
-    fn alloc_block_internal(&self) -> u64 {
-        //  check the free list.
-        {
-            let mut freelist_head = self.freelist_head.lock();
-            if *freelist_head != INVALID_BLOCK {
-                let freelist_block = self.read_freelist_block(*freelist_head);
-
-                // acquire lock on the freelist block before releasing the lock on the parent (i.e. lock coupling)
-                let mut g = freelist_block.inner.lock();
-
-                if g.num_free_blocks > 0 {
-                    g.num_free_blocks -= 1;
-                    let result = g.free_blocks[g.num_free_blocks as usize];
-                    return result;
-                } else {
-                    // consume the freelist block itself
-                    let result = *freelist_head;
-                    *freelist_head = g.next;
-                    // This freelist block is now unlinked and can be repurposed
-                    drop(g);
-                    return result;
-                }
-            }
-        }
-
-        // If there are some blocks left that we've never used, pick next such block
-        let mut next_uninitialized = self.num_initialized.load(Ordering::Relaxed);
-        while next_uninitialized < self.num_blocks {
-            match self.num_initialized.compare_exchange(
-                next_uninitialized,
-                next_uninitialized + 1,
-                Ordering::Relaxed,
-                Ordering::Relaxed,
-            ) {
-                Ok(_) => {
-                    return next_uninitialized;
-                }
-                Err(old) => {
-                    next_uninitialized = old;
-                    continue;
-                }
-            }
-        }
-
-        // out of blocks
-        INVALID_BLOCK
-    }
-
-    // TODO: this is currently unused. The slab allocator never releases blocks
-    #[allow(dead_code)]
-    pub(crate) fn release_block(&self, block_ptr: *mut u8) {
-        let blockno = unsafe { block_ptr.byte_offset_from(self.blocks_ptr) / BLOCK_SIZE as isize };
-        self.release_block_internal(blockno as u64);
-    }
-
-    fn release_block_internal(&self, blockno: u64) {
-        let mut freelist_head = self.freelist_head.lock();
-        if *freelist_head != INVALID_BLOCK {
-            let freelist_block = self.read_freelist_block(*freelist_head);
-
-            // acquire lock on the freelist block before releasing the lock on the parent (i.e. lock coupling)
-            let mut g = freelist_block.inner.lock();
-
-            let num_free_blocks = g.num_free_blocks;
-            if num_free_blocks < g.free_blocks.len() as u64 {
-                g.free_blocks[num_free_blocks as usize] = blockno;
-                g.num_free_blocks += 1;
-                return;
-            }
-        }
-
-        // Convert the block into a new freelist block
-        let block_ptr: *mut FreeListBlock = self.get_block_ptr(blockno).cast();
-        let init = FreeListBlock {
-            inner: spin::Mutex::new(FreeListBlockInner {
-                next: *freelist_head,
-                num_free_blocks: 0,
-                free_blocks: [INVALID_BLOCK; 100],
-            }),
-        };
-        unsafe { (*block_ptr) = init };
-        *freelist_head = blockno;
-    }
-
-    // for debugging
-    pub(crate) fn get_statistics(&self) -> BlockAllocatorStats {
-        let mut num_free_blocks = 0;
-
-        let mut _prev_lock = None;
-        let head_lock = self.freelist_head.lock();
-        let mut next_blk = *head_lock;
-        let mut _head_lock = Some(head_lock);
-        while next_blk != INVALID_BLOCK {
-            let freelist_block = self.read_freelist_block(next_blk);
-            let lock = freelist_block.inner.lock();
-            num_free_blocks += lock.num_free_blocks;
-            next_blk = lock.next;
-            _prev_lock = Some(lock); // hold the lock until we've read the next block
-            _head_lock = None;
-        }
-
-        BlockAllocatorStats {
-            num_blocks: self.num_blocks,
-            num_initialized: self.num_initialized.load(Ordering::Relaxed),
-            num_free_blocks,
-        }
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct BlockAllocatorStats {
-    pub num_blocks: u64,
-    pub num_initialized: u64,
-    pub num_free_blocks: u64,
-}
--- a/libs/neonart/src/allocator/multislab.rs
+++ b/libs/neonart/src/allocator/multislab.rs
@@ -1,33 +0,0 @@
-use std::alloc::Layout;
-use std::mem::MaybeUninit;
-
-use crate::allocator::block::BlockAllocator;
-use crate::allocator::slab::SlabDesc;
-
-pub struct MultiSlabAllocator<'t, const N: usize> {
-    pub(crate) block_allocator: BlockAllocator<'t>,
-
-    pub(crate) slab_descs: [SlabDesc; N],
-}
-
-impl<'t, const N: usize> MultiSlabAllocator<'t, N> {
-    pub(crate) fn new(
-        area: &'t mut [MaybeUninit<u8>],
-        layouts: &[Layout; N],
-    ) -> MultiSlabAllocator<'t, N> {
-        let block_allocator = BlockAllocator::new(area);
-        MultiSlabAllocator {
-            block_allocator,
-
-            slab_descs: std::array::from_fn(|i| SlabDesc::new(&layouts[i])),
-        }
-    }
-
-    pub(crate) fn alloc_slab(&self, slab_idx: usize) -> *mut u8 {
-        self.slab_descs[slab_idx].alloc_chunk(&self.block_allocator)
-    }
-
-    pub(crate) fn dealloc_slab(&self, slab_idx: usize, ptr: *mut u8) {
-        self.slab_descs[slab_idx].dealloc_chunk(ptr, &self.block_allocator)
-    }
-}
--- a/libs/neonart/src/allocator/slab.rs
+++ b/libs/neonart/src/allocator/slab.rs
@@ -1,433 +0,0 @@
-//! A slab allocator that carves out fixed-size chunks from larger blocks.
-//!
-//!
-
-use std::alloc::Layout;
-use std::mem::MaybeUninit;
-use std::ops::Deref;
-use std::sync::atomic::{AtomicU32, AtomicU64, Ordering};
-
-use spin;
-
-use super::alloc_from_slice;
-use super::block::BlockAllocator;
-
-use crate::allocator::block::BLOCK_SIZE;
-
-pub(crate) struct SlabDesc {
-    pub(crate) layout: Layout,
-
-    block_lists: spin::RwLock<BlockLists>,
-
-    pub(crate) num_blocks: AtomicU64,
-    pub(crate) num_allocated: AtomicU64,
-}
-
-// FIXME: Not sure if SlabDesc is really Sync or Send. It probably is when it's empty, but
-// 'block_lists' contains pointers when it's not empty. In the current use as part of the
-// the art tree, SlabDescs are only moved during initialization.
-unsafe impl Sync for SlabDesc {}
-unsafe impl Send for SlabDesc {}
-
-#[derive(Default, Debug)]
-struct BlockLists {
-    full_blocks: BlockList,
-    nonfull_blocks: BlockList,
-}
-
-impl BlockLists {
-    // Unlink a node. It must be in either one of the two lists.
-    unsafe fn unlink(&mut self, elem: *mut SlabBlockHeader) {
-        let list = unsafe {
-            if (*elem).next.is_null() {
-                if self.full_blocks.tail == elem {
-                    Some(&mut self.full_blocks)
-                } else {
-                    Some(&mut self.nonfull_blocks)
-                }
-            } else if (*elem).prev.is_null() {
-                if self.full_blocks.head == elem {
-                    Some(&mut self.full_blocks)
-                } else {
-                    Some(&mut self.nonfull_blocks)
-                }
-            } else {
-                None
-            }
-        };
-        unsafe { unlink_slab_block(list, elem) };
-    }
-}
-
-unsafe fn unlink_slab_block(mut list: Option<&mut BlockList>, elem: *mut SlabBlockHeader) {
-    unsafe {
-        if (*elem).next.is_null() {
-            assert_eq!(list.as_ref().unwrap().tail, elem);
-            list.as_mut().unwrap().tail = (*elem).prev;
-        } else {
-            assert_eq!((*(*elem).next).prev, elem);
-            (*(*elem).next).prev = (*elem).prev;
-        }
-        if (*elem).prev.is_null() {
-            assert_eq!(list.as_ref().unwrap().head, elem);
-            list.as_mut().unwrap().head = (*elem).next;
-        } else {
-            assert_eq!((*(*elem).prev).next, elem);
-            (*(*elem).prev).next = (*elem).next;
-        }
-    }
-}
-
-#[derive(Debug)]
-struct BlockList {
-    head: *mut SlabBlockHeader,
-    tail: *mut SlabBlockHeader,
-}
-
-impl Default for BlockList {
-    fn default() -> Self {
-        BlockList {
-            head: std::ptr::null_mut(),
-            tail: std::ptr::null_mut(),
-        }
-    }
-}
-
-impl BlockList {
-    unsafe fn push_head(&mut self, elem: *mut SlabBlockHeader) {
-        unsafe {
-            if self.is_empty() {
-                self.tail = elem;
-                (*elem).next = std::ptr::null_mut();
-            } else {
-                (*elem).next = self.head;
-                (*self.head).prev = elem;
-            }
-            (*elem).prev = std::ptr::null_mut();
-            self.head = elem;
-        }
-    }
-
-    fn is_empty(&self) -> bool {
-        self.head.is_null()
-    }
-
-    unsafe fn unlink(&mut self, elem: *mut SlabBlockHeader) {
-        unsafe { unlink_slab_block(Some(self), elem) }
-    }
-
-    #[cfg(test)]
-    fn dump(&self) {
-        let mut next = self.head;
-
-        while !next.is_null() {
-            let n = unsafe { next.as_ref() }.unwrap();
-            eprintln!(
-                "  blk {:?} (free {}/{})",
-                next,
-                n.num_free_chunks.load(Ordering::Relaxed),
-                n.num_chunks
-            );
-            next = n.next;
-        }
-    }
-}
-
-impl SlabDesc {
-    pub(crate) fn new(layout: &Layout) -> SlabDesc {
-        SlabDesc {
-            layout: *layout,
-            block_lists: spin::RwLock::new(BlockLists::default()),
-            num_allocated: AtomicU64::new(0),
-            num_blocks: AtomicU64::new(0),
-        }
-    }
-}
-
-#[derive(Debug)]
-struct SlabBlockHeader {
-    free_chunks_head: spin::Mutex<*mut FreeChunk>,
-    num_free_chunks: AtomicU32,
-    num_chunks: u32, // this is really a constant for a given Layout
-
-    // these fields are protected by the lock on the BlockLists
-    prev: *mut SlabBlockHeader,
-    next: *mut SlabBlockHeader,
-}
-
-struct FreeChunk {
-    next: *mut FreeChunk,
-}
-
-enum ReadOrWriteGuard<'a, T> {
-    Read(spin::RwLockReadGuard<'a, T>),
-    Write(spin::RwLockWriteGuard<'a, T>),
-}
-
-impl<'a, T> Deref for ReadOrWriteGuard<'a, T> {
-    type Target = T;
-
-    fn deref(&self) -> &<Self as Deref>::Target {
-        match self {
-            ReadOrWriteGuard::Read(g) => g.deref(),
-            ReadOrWriteGuard::Write(g) => g.deref(),
-        }
-    }
-}
-
-impl SlabDesc {
-    pub fn alloc_chunk(&self, block_allocator: &BlockAllocator) -> *mut u8 {
-        // Are there any free chunks?
-        let mut acquire_write = false;
-        'outer: loop {
-            let mut block_lists_guard = if acquire_write {
-                ReadOrWriteGuard::Write(self.block_lists.write())
-            } else {
-                ReadOrWriteGuard::Read(self.block_lists.read())
-            };
-            'inner: loop {
-                let block_ptr = block_lists_guard.nonfull_blocks.head;
-                if block_ptr.is_null() {
-                    break 'outer;
-                }
-                unsafe {
-                    let mut free_chunks_head = (*block_ptr).free_chunks_head.lock();
-                    if !(*free_chunks_head).is_null() {
-                        let result = *free_chunks_head;
-                        (*free_chunks_head) = (*result).next;
-                        let _old = (*block_ptr).num_free_chunks.fetch_sub(1, Ordering::Relaxed);
-
-                        self.num_allocated.fetch_add(1, Ordering::Relaxed);
-                        return result.cast();
-                    }
-                }
-
-                // The block at the head of the list was full. Grab write lock and retry
-                match block_lists_guard {
-                    ReadOrWriteGuard::Read(_) => {
-                        acquire_write = true;
-                        continue 'outer;
-                    }
-                    ReadOrWriteGuard::Write(ref mut g) => {
-                        // move the node to the list of full blocks
-                        unsafe {
-                            g.nonfull_blocks.unlink(block_ptr);
-                            g.full_blocks.push_head(block_ptr);
-                        };
-                        continue 'inner;
-                    }
-                }
-            }
-        }
-
-        // no free chunks. Allocate a new block (and the chunk from that)
-        let (new_block, new_chunk) = self.alloc_block_and_chunk(block_allocator);
-        self.num_blocks.fetch_add(1, Ordering::Relaxed);
-
-        // Add the block to the list in the SlabDesc
-        unsafe {
-            let mut block_lists_guard = self.block_lists.write();
-            block_lists_guard.nonfull_blocks.push_head(new_block);
-        }
-        self.num_allocated.fetch_add(1, Ordering::Relaxed);
-        new_chunk
-    }
-
-    pub fn dealloc_chunk(&self, chunk_ptr: *mut u8, _block_allocator: &BlockAllocator) {
-        // Find the block it belongs to. You can find the block from the address. (And knowing the
-        // layout, you could calculate the chunk number too.)
-        let block_ptr: *mut SlabBlockHeader = {
-            let block_addr = (chunk_ptr.addr() / BLOCK_SIZE) * BLOCK_SIZE;
-            chunk_ptr.with_addr(block_addr).cast()
-        };
-        let chunk_ptr: *mut FreeChunk = chunk_ptr.cast();
-
-        // Mark the chunk as free in 'freechunks' list
-        let num_chunks;
-        let num_free_chunks;
-        unsafe {
-            let mut free_chunks_head = (*block_ptr).free_chunks_head.lock();
-            (*chunk_ptr).next = *free_chunks_head;
-            *free_chunks_head = chunk_ptr;
-
-            num_free_chunks = (*block_ptr).num_free_chunks.fetch_add(1, Ordering::Relaxed) + 1;
-            num_chunks = (*block_ptr).num_chunks;
-        }
-
-        if num_free_chunks == 1 {
-            // If the block was full previously, add it to the nonfull blocks list. Note that
-            // we're not holding the lock anymore, so it can immediately become full again.
-            // That's harmless, it will be moved back to the full list again when a call
-            // to alloc_chunk() sees it.
-            let mut block_lists = self.block_lists.write();
-            unsafe {
-                block_lists.unlink(block_ptr);
-                block_lists.nonfull_blocks.push_head(block_ptr);
-            };
-        } else if num_free_chunks == num_chunks {
-            // If the block became completely empty, move it to the free list
-            // TODO
-            // FIXME: we're still holding the spinlock. It's not exactly safe to return it to
-            // the free blocks list, is it? Defer it as garbage to wait out concurrent updates?
-            //block_allocator.release_block()
-        }
-
-        // update stats
-        self.num_allocated.fetch_sub(1, Ordering::Relaxed);
-    }
-
-    fn alloc_block_and_chunk(
-        &self,
-        block_allocator: &BlockAllocator,
-    ) -> (*mut SlabBlockHeader, *mut u8) {
-        // fixme: handle OOM
-        let block_slice: &mut [MaybeUninit<u8>] = block_allocator.alloc_block();
-        let (block_header, remain) = alloc_from_slice::<SlabBlockHeader>(block_slice);
-
-        let padding = remain.as_ptr().align_offset(self.layout.align());
-
-        let num_chunks = (remain.len() - padding) / self.layout.size();
-
-        let first_chunk_ptr: *mut FreeChunk = remain[padding..].as_mut_ptr().cast();
-
-        unsafe {
-            let mut chunk_ptr = first_chunk_ptr;
-            for _ in 0..num_chunks - 1 {
-                let next_chunk_ptr = chunk_ptr.byte_add(self.layout.size());
-                (*chunk_ptr).next = next_chunk_ptr;
-                chunk_ptr = next_chunk_ptr;
-            }
-            (*chunk_ptr).next = std::ptr::null_mut();
-
-            let result_chunk = first_chunk_ptr;
-
-            let block_header = block_header.write(SlabBlockHeader {
-                free_chunks_head: spin::Mutex::new((*first_chunk_ptr).next),
-                prev: std::ptr::null_mut(),
-                next: std::ptr::null_mut(),
-                num_chunks: num_chunks as u32,
-                num_free_chunks: AtomicU32::new(num_chunks as u32 - 1),
-            });
-
-            (block_header, result_chunk.cast())
-        }
-    }
-
-    #[cfg(test)]
-    fn dump(&self) {
-        eprintln!(
-            "slab dump ({} blocks, {} allocated chunks)",
-            self.num_blocks.load(Ordering::Relaxed),
-            self.num_allocated.load(Ordering::Relaxed)
-        );
-        let lists = self.block_lists.read();
-
-        eprintln!("nonfull blocks:");
-        lists.nonfull_blocks.dump();
-        eprintln!("full blocks:");
-        lists.full_blocks.dump();
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use rand::Rng;
-    use rand_distr::Zipf;
-
-    struct TestObject {
-        val: usize,
-        _dummy: [u8; BLOCK_SIZE / 4],
-    }
-
-    struct TestObjectSlab<'a>(SlabDesc, BlockAllocator<'a>);
-    impl<'a> TestObjectSlab<'a> {
-        fn new(block_allocator: BlockAllocator) -> TestObjectSlab {
-            TestObjectSlab(SlabDesc::new(&Layout::new::<TestObject>()), block_allocator)
-        }
-
-        fn alloc(&self, val: usize) -> *mut TestObject {
-            let obj: *mut TestObject = self.0.alloc_chunk(&self.1).cast();
-            unsafe { (*obj).val = val };
-            obj
-        }
-
-        fn dealloc(&self, obj: *mut TestObject) {
-            self.0.dealloc_chunk(obj.cast(), &self.1)
-        }
-    }
-
-    #[test]
-    fn test_slab_alloc() {
-        const MEM_SIZE: usize = 100000000;
-        let mut area = Box::new_uninit_slice(MEM_SIZE);
-        let block_allocator = BlockAllocator::new(&mut area);
-
-        let slab = TestObjectSlab::new(block_allocator);
-
-        let mut all: Vec<*mut TestObject> = Vec::new();
-        for i in 0..11 {
-            all.push(slab.alloc(i));
-        }
-        #[allow(clippy::needless_range_loop)]
-        for i in 0..11 {
-            assert!(unsafe { (*all[i]).val == i });
-        }
-
-        let distribution = Zipf::new(10.0, 1.1).unwrap();
-        let mut rng = rand::rng();
-        for _ in 0..100000 {
-            slab.0.dump();
-            let idx = rng.sample(distribution) as usize;
-            let ptr: *mut TestObject = all[idx];
-            if !ptr.is_null() {
-                assert_eq!(unsafe { (*ptr).val }, idx);
-                slab.dealloc(ptr);
-                all[idx] = std::ptr::null_mut();
-            } else {
-                all[idx] = slab.alloc(idx);
-            }
-        }
-    }
-
-    fn new_test_blk(i: u32) -> *mut SlabBlockHeader {
-        Box::into_raw(Box::new(SlabBlockHeader {
-            free_chunks_head: spin::Mutex::new(std::ptr::null_mut()),
-            num_free_chunks: AtomicU32::new(0),
-            num_chunks: i,
-            prev: std::ptr::null_mut(),
-            next: std::ptr::null_mut(),
-        }))
-    }
-
-    #[test]
-    fn test_block_linked_list() {
-        // note: these are leaked, but that's OK for tests
-        let a = new_test_blk(0);
-        let b = new_test_blk(1);
-
-        let mut list = BlockList::default();
-        assert!(list.is_empty());
-
-        unsafe {
-            list.push_head(a);
-            assert!(!list.is_empty());
-            list.unlink(a);
-        }
-        assert!(list.is_empty());
-
-        unsafe {
-            list.push_head(b);
-            list.push_head(a);
-            assert_eq!(list.head, a);
-            assert_eq!((*a).next, b);
-            assert_eq!((*b).prev, a);
-            assert_eq!(list.tail, b);
-
-            list.unlink(a);
-            list.unlink(b);
-            assert!(list.is_empty());
-        }
-    }
-}
--- a/libs/neonart/src/allocator/static.rs
+++ b/libs/neonart/src/allocator/static.rs
@@ -1,44 +0,0 @@
-use std::mem::MaybeUninit;
-
-pub fn alloc_from_slice<T>(
-    area: &mut [MaybeUninit<u8>],
-) -> (&mut MaybeUninit<T>, &mut [MaybeUninit<u8>]) {
-    let layout = std::alloc::Layout::new::<T>();
-
-    let area_start = area.as_mut_ptr();
-
-    // pad to satisfy alignment requirements
-    let padding = area_start.align_offset(layout.align());
-    if padding + layout.size() > area.len() {
-        panic!("out of memory");
-    }
-    let area = &mut area[padding..];
-    let (result_area, remain) = area.split_at_mut(layout.size());
-
-    let result_ptr: *mut MaybeUninit<T> = result_area.as_mut_ptr().cast();
-    let result = unsafe { result_ptr.as_mut().unwrap() };
-
-    (result, remain)
-}
-
-pub fn alloc_array_from_slice<T>(
-    area: &mut [MaybeUninit<u8>],
-    len: usize,
-) -> (&mut [MaybeUninit<T>], &mut [MaybeUninit<u8>]) {
-    let layout = std::alloc::Layout::new::<T>();
-
-    let area_start = area.as_mut_ptr();
-
-    // pad to satisfy alignment requirements
-    let padding = area_start.align_offset(layout.align());
-    if padding + layout.size() * len > area.len() {
-        panic!("out of memory");
-    }
-    let area = &mut area[padding..];
-    let (result_area, remain) = area.split_at_mut(layout.size() * len);
-
-    let result_ptr: *mut MaybeUninit<T> = result_area.as_mut_ptr().cast();
-    let result = unsafe { std::slice::from_raw_parts_mut(result_ptr.as_mut().unwrap(), len) };
-
-    (result, remain)
-}
--- a/libs/neonart/src/epoch.rs
+++ b/libs/neonart/src/epoch.rs
@@ -1,142 +0,0 @@
-//! This is similar to crossbeam_epoch crate, but works in shared memory
-
-use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
-
-use crossbeam_utils::CachePadded;
-
-const NUM_SLOTS: usize = 1000;
-
-/// This is the struct that is stored in shmem
-///
-/// bit 0: is it pinned or not?
-/// rest of the bits are the epoch counter.
-pub struct EpochShared {
-    global_epoch: AtomicU64,
-    participants: [CachePadded<AtomicU64>; NUM_SLOTS],
-
-    broadcast_lock: spin::Mutex<()>,
-}
-
-impl EpochShared {
-    pub fn new() -> EpochShared {
-        EpochShared {
-            global_epoch: AtomicU64::new(2),
-            participants: [const { CachePadded::new(AtomicU64::new(2)) }; NUM_SLOTS],
-            broadcast_lock: spin::Mutex::new(()),
-        }
-    }
-
-    pub fn register(&self) -> LocalHandle {
-        LocalHandle {
-            global: self,
-            last_slot: AtomicUsize::new(0), // todo: choose more intelligently
-        }
-    }
-
-    fn release_pin(&self, slot: usize, _epoch: u64) {
-        let global_epoch = self.global_epoch.load(Ordering::Relaxed);
-        self.participants[slot].store(global_epoch, Ordering::Relaxed);
-    }
-
-    fn pin_internal(&self, slot_hint: usize) -> (usize, u64) {
-        // pick a slot
-        let mut slot = slot_hint;
-        let epoch = loop {
-            let old = self.participants[slot].fetch_or(1, Ordering::Relaxed);
-            if old & 1 == 0 {
-                // Got this slot
-                break old;
-            }
-
-            // the slot was busy by another thread / process. try a different slot
-            slot += 1;
-            if slot == NUM_SLOTS {
-                slot = 0;
-            }
-            continue;
-        };
-        (slot, epoch)
-    }
-
-    pub(crate) fn advance(&self) -> u64 {
-        // Advance the global epoch
-        let old_epoch = self.global_epoch.fetch_add(2, Ordering::Relaxed);
-        // Anyone that release their pin after this will update their slot.
-        old_epoch + 2
-    }
-
-    pub(crate) fn broadcast(&self) {
-        let Some(_guard) = self.broadcast_lock.try_lock() else {
-            return;
-        };
-
-        let epoch = self.global_epoch.load(Ordering::Relaxed);
-        let old_epoch = epoch.wrapping_sub(2);
-
-        // Update all free slots.
-        for i in 0..NUM_SLOTS {
-            // TODO: check result, as a sanity check. It should either be the old epoch, or pinned
-            let _ = self.participants[i].compare_exchange(
-                old_epoch,
-                epoch,
-                Ordering::Relaxed,
-                Ordering::Relaxed,
-            );
-        }
-
-        // FIXME: memory fence here, since we used Relaxed?
-    }
-
-    pub(crate) fn get_oldest(&self) -> u64 {
-        // Read all slots.
-        let now = self.global_epoch.load(Ordering::Relaxed);
-        let mut oldest = now;
-        for i in 0..NUM_SLOTS {
-            let this_epoch = self.participants[i].load(Ordering::Relaxed);
-            let delta = now.wrapping_sub(this_epoch);
-            if delta > u64::MAX / 2 {
-                // this is very recent
-            } else if delta > now.wrapping_sub(oldest) {
-                oldest = this_epoch;
-            }
-        }
-        oldest
-    }
-
-    pub(crate) fn get_current(&self) -> u64 {
-        self.global_epoch.load(Ordering::Relaxed)
-    }
-}
-
-pub(crate) struct EpochPin<'e> {
-    slot: usize,
-    pub(crate) epoch: u64,
-
-    handle: &'e LocalHandle<'e>,
-}
-
-impl<'e> Drop for EpochPin<'e> {
-    fn drop(&mut self) {
-        self.handle.global.release_pin(self.slot, self.epoch);
-    }
-}
-
-pub struct LocalHandle<'g> {
-    global: &'g EpochShared,
-
-    last_slot: AtomicUsize,
-}
-
-impl<'g> LocalHandle<'g> {
-    pub fn pin(&self) -> EpochPin {
-        let (slot, epoch) = self
-            .global
-            .pin_internal(self.last_slot.load(Ordering::Relaxed));
-        self.last_slot.store(slot, Ordering::Relaxed);
-        EpochPin {
-            handle: self,
-            epoch,
-            slot,
-        }
-    }
-}
--- a/libs/neonart/src/lib.rs
+++ b/libs/neonart/src/lib.rs
@@ -1,583 +0,0 @@
-//! Adaptive Radix Tree (ART) implementation, with Optimistic Lock Coupling.
-//!
-//! The data structure is described in these two papers:
-//!
-//! [1] Leis, V. & Kemper, Alfons & Neumann, Thomas. (2013).
-//!     The adaptive radix tree: ARTful indexing for main-memory databases.
-//!     Proceedings - International Conference on Data Engineering. 38-49. 10.1109/ICDE.2013.6544812.
-//!     https://db.in.tum.de/~leis/papers/ART.pdf
-//!
-//! [2] Leis, Viktor & Scheibner, Florian & Kemper, Alfons & Neumann, Thomas. (2016).
-//!     The ART of practical synchronization.
-//!     1-8. 10.1145/2933349.2933352.
-//!     https://db.in.tum.de/~leis/papers/artsync.pdf
-//!
-//! [1] describes the base data structure, and [2] describes the Optimistic Lock Coupling that we
-//! use.
-//!
-//! The papers mention a few different variants. We have made the following choices in this
-//! implementation:
-//!
-//! - All keys have the same length
-//!
-//! - Single-value leaves.
-//!
-//! - For collapsing inner nodes, we use the Pessimistic approach, where each inner node stores a
-//!   variable length "prefix", which stores the keys of all the one-way nodes which have been
-//!   removed. However, similar to the "hybrid" approach described in the paper, each node only has
-//!   space for a constant-size prefix of 8 bytes. If a node would have a longer prefix, then we
-//!   create create one-way nodes to store them. (There was no particular reason for this choice,
-//!   the "hybrid" approach described in the paper might be better.)
-//!
-//! - For concurrency, we use Optimistic Lock Coupling. The paper [2] also describes another method,
-//!   ROWEX, which generally performs better when there is contention, but that is not important
-//!   for use and Optimisic Lock Coupling is simpler to implement.
-//!
-//! ## Requirements
-//!
-//! This data structure is currently used for the integrated LFC, relsize and last-written LSN cache
-//! in the compute communicator, part of the 'neon' Postgres extension. We have some unique
-//! requirements, which is why we had to write our own. Namely:
-//!
-//! - The data structure has to live in fixed-sized shared memory segment. That rules out any
-//!   built-in Rust collections and most crates. (Except possibly with the 'allocator_api' rust
-//!   feature, which still nightly-only experimental as of this writing).
-//!
-//! - The data structure is accessed from multiple processes. Only one process updates the data
-//!   structure, but other processes perform reads. That rules out using built-in Rust locking
-//!   primitives like Mutex and RwLock, and most crates too.
-//!
-//! - Within the one process with write-access, multiple threads can perform updates concurrently.
-//!   That rules out using PostgreSQL LWLocks for the locking.
-//!
-//! The implementation is generic, and doesn't depend on any PostgreSQL specifics, but it has been
-//! written with that usage and the above constraints in mind. Some noteworthy assumptions:
-//!
-//! - Contention is assumed to be rare. In the integrated cache in PostgreSQL, there's higher level
-//!   locking in the PostgreSQL buffer manager, which ensures that two backends should not try to
-//!   read / write the same page at the same time. (Prefetching can conflict with actual reads,
-//!   however.)
-//!
-//!  - The keys in the integrated cache are 17 bytes long.
-//!
-//! ## Usage
-//!
-//! Because this is designed to be used as a Postgres shared memory data structure, initialization
-//! happens in three stages:
-//!
-//! 0. A fixed area of shared memory is allocated at postmaster startup.
-//!
-//! 1. TreeInitStruct::new() is called to initialize it, still in Postmaster process, before any
-//!    other process or thread is running. It returns a TreeInitStruct, which is inherited by all
-//!    the processes through fork().
-//!
-//! 2. One process may have write-access to the struct, by calling
-//!    [TreeInitStruct::attach_writer]. (That process is the communicator process.)
-//!
-//! 3. Other processes get read-access to the struct, by calling [TreeInitStruct::attach_reader]
-//!
-//! "Write access" means that you can insert / update / delete values in the tree.
-//!
-//! NOTE: The Values stored in the tree are sometimes moved, when a leaf node fills up and a new
-//! larger node needs to be allocated. The versioning and epoch-based allocator ensure that the data
-//! structure stays consistent, but if the Value has interior mutability, like atomic fields,
-//! updates to such fields might be lost if the leaf node is concurrently moved! If that becomes a
-//! problem, the version check could be passed up to the caller, so that the caller could detect the
-//! lost updates and retry the operation.
-//!
-//! ## Implementation
-//!
-//! node_ptr: Provides low-level implementations of the four different node types (eight actually,
-//! since there is an Internal and Leaf variant of each)
-//!
-//! lock_and_version.rs: Provides an abstraction for the combined lock and version counter on each
-//! node.
-//!
-//! node_ref.rs: The code in node_ptr.rs deals with raw pointers. node_ref.rs provides more type-safe
-//!   abstractions on top.
-//!
-//! algorithm.rs: Contains the functions to implement lookups and updates in the tree
-//!
-//! allocator.rs: Provides a facility to allocate memory for the tree nodes. (We must provide our
-//!   own abstraction for that because we need the data structure to live in a pre-allocated shared
-//!   memory segment).
-//!
-//! epoch.rs: The data structure requires that when a node is removed from the tree, it is not
-//!   immediately deallocated, but stays around for as long as concurrent readers might still have
-//!   pointers to them. This is enforced by an epoch system. This is similar to
-//!   e.g. crossbeam_epoch, but we couldn't use that either because it has to work across processes
-//!   communicating over the shared memory segment.
-//!
-//! ## See also
-//!
-//! There are some existing Rust ART implementations out there, but none of them filled all
-//! the requirements:
-//!
-//! - https://github.com/XiangpengHao/congee
-//! - https://github.com/declanvk/blart
-//!
-//! ## TODO
-//!
-//! - Removing values has not been implemented
-
-mod algorithm;
-pub mod allocator;
-mod epoch;
-
-use algorithm::RootPtr;
-use algorithm::node_ptr::NodePtr;
-
-use std::collections::VecDeque;
-use std::fmt::Debug;
-use std::marker::PhantomData;
-use std::ptr::NonNull;
-use std::sync::atomic::{AtomicBool, Ordering};
-
-use crate::epoch::EpochPin;
-
-#[cfg(test)]
-mod tests;
-
-use allocator::ArtAllocator;
-pub use allocator::ArtMultiSlabAllocator;
-pub use allocator::OutOfMemoryError;
-
-/// Fixed-length key type.
-///
-pub trait Key: Debug {
-    const KEY_LEN: usize;
-
-    fn as_bytes(&self) -> &[u8];
-}
-
-/// Values stored in the tree
-///
-/// Values need to be Cloneable, because when a node "grows", the value is copied to a new node and
-/// the old sticks around until all readers that might see the old value are gone.
-// fixme obsolete, no longer needs Clone
-pub trait Value {}
-
-const MAX_GARBAGE: usize = 1024;
-
-/// The root of the tree, plus other tree-wide data. This is stored in the shared memory.
-pub struct Tree<V: Value> {
-    /// For simplicity, so that we never need to grow or shrink the root, the root node is always an
-    /// Internal256 node. Also, it never has a prefix (that's actually a bit wasteful, incurring one
-    /// indirection to every lookup)
-    root: RootPtr<V>,
-
-    writer_attached: AtomicBool,
-
-    epoch: epoch::EpochShared,
-}
-
-unsafe impl<V: Value + Sync> Sync for Tree<V> {}
-unsafe impl<V: Value + Send> Send for Tree<V> {}
-
-struct GarbageQueue<V>(VecDeque<(NodePtr<V>, u64)>);
-
-unsafe impl<V: Value + Sync> Sync for GarbageQueue<V> {}
-unsafe impl<V: Value + Send> Send for GarbageQueue<V> {}
-
-impl<V> GarbageQueue<V> {
-    fn new() -> GarbageQueue<V> {
-        GarbageQueue(VecDeque::with_capacity(MAX_GARBAGE))
-    }
-
-    fn remember_obsolete_node(&mut self, ptr: NodePtr<V>, epoch: u64) {
-        self.0.push_front((ptr, epoch));
-    }
-
-    fn next_obsolete(&mut self, cutoff_epoch: u64) -> Option<NodePtr<V>> {
-        if let Some(back) = self.0.back() {
-            if back.1 < cutoff_epoch {
-                return Some(self.0.pop_back().unwrap().0);
-            }
-        }
-        None
-    }
-}
-
-/// Struct created at postmaster startup
-pub struct TreeInitStruct<'t, K: Key, V: Value, A: ArtAllocator<V>> {
-    tree: &'t Tree<V>,
-
-    allocator: &'t A,
-
-    phantom_key: PhantomData<K>,
-}
-
-/// The worker process has a reference to this. The write operations are only safe
-/// from the worker process
-pub struct TreeWriteAccess<'t, K: Key, V: Value, A: ArtAllocator<V>>
-where
-    K: Key,
-    V: Value,
-{
-    tree: &'t Tree<V>,
-
-    pub allocator: &'t A,
-
-    epoch_handle: epoch::LocalHandle<'t>,
-
-    phantom_key: PhantomData<K>,
-
-    /// Obsolete nodes that cannot be recycled until their epoch expires.
-    garbage: spin::Mutex<GarbageQueue<V>>,
-}
-
-/// The backends have a reference to this. It cannot be used to modify the tree
-pub struct TreeReadAccess<'t, K: Key, V: Value>
-where
-    K: Key,
-    V: Value,
-{
-    tree: &'t Tree<V>,
-
-    epoch_handle: epoch::LocalHandle<'t>,
-
-    phantom_key: PhantomData<K>,
-}
-
-impl<'t, K: Key, V: Value, A: ArtAllocator<V>> TreeInitStruct<'t, K, V, A> {
-    pub fn new(allocator: &'t A) -> TreeInitStruct<'t, K, V, A> {
-        let tree_ptr = allocator.alloc_tree();
-        let tree_ptr = NonNull::new(tree_ptr).expect("out of memory");
-        let init = Tree {
-            root: algorithm::new_root(allocator).expect("out of memory"),
-            writer_attached: AtomicBool::new(false),
-            epoch: epoch::EpochShared::new(),
-        };
-        unsafe { tree_ptr.write(init) };
-
-        TreeInitStruct {
-            tree: unsafe { tree_ptr.as_ref() },
-            allocator,
-            phantom_key: PhantomData,
-        }
-    }
-
-    pub fn attach_writer(self) -> TreeWriteAccess<'t, K, V, A> {
-        let previously_attached = self.tree.writer_attached.swap(true, Ordering::Relaxed);
-        if previously_attached {
-            panic!("writer already attached");
-        }
-        TreeWriteAccess {
-            tree: self.tree,
-            allocator: self.allocator,
-            phantom_key: PhantomData,
-            epoch_handle: self.tree.epoch.register(),
-            garbage: spin::Mutex::new(GarbageQueue::new()),
-        }
-    }
-
-    pub fn attach_reader(self) -> TreeReadAccess<'t, K, V> {
-        TreeReadAccess {
-            tree: self.tree,
-            phantom_key: PhantomData,
-            epoch_handle: self.tree.epoch.register(),
-        }
-    }
-}
-
-impl<'t, K: Key, V: Value, A: ArtAllocator<V>> TreeWriteAccess<'t, K, V, A> {
-    pub fn start_write<'g>(&'t self) -> TreeWriteGuard<'g, K, V, A>
-    where
-        't: 'g,
-    {
-        TreeWriteGuard {
-            tree_writer: self,
-            epoch_pin: self.epoch_handle.pin(),
-            phantom_key: PhantomData,
-            created_garbage: false,
-        }
-    }
-
-    pub fn start_read(&'t self) -> TreeReadGuard<'t, K, V> {
-        TreeReadGuard {
-            tree: self.tree,
-            epoch_pin: self.epoch_handle.pin(),
-            phantom_key: PhantomData,
-        }
-    }
-}
-
-impl<'t, K: Key, V: Value> TreeReadAccess<'t, K, V> {
-    pub fn start_read(&'t self) -> TreeReadGuard<'t, K, V> {
-        TreeReadGuard {
-            tree: self.tree,
-            epoch_pin: self.epoch_handle.pin(),
-            phantom_key: PhantomData,
-        }
-    }
-}
-
-pub struct TreeReadGuard<'e, K, V>
-where
-    K: Key,
-    V: Value,
-{
-    tree: &'e Tree<V>,
-
-    epoch_pin: EpochPin<'e>,
-    phantom_key: PhantomData<K>,
-}
-
-impl<'e, K: Key, V: Value> TreeReadGuard<'e, K, V> {
-    pub fn get(&'e self, key: &K) -> Option<&'e V> {
-        algorithm::search(key, self.tree.root, &self.epoch_pin)
-    }
-}
-
-pub struct TreeWriteGuard<'e, K, V, A>
-where
-    K: Key,
-    V: Value,
-    A: ArtAllocator<V>,
-{
-    tree_writer: &'e TreeWriteAccess<'e, K, V, A>,
-
-    epoch_pin: EpochPin<'e>,
-    phantom_key: PhantomData<K>,
-
-    created_garbage: bool,
-}
-
-pub enum UpdateAction<V> {
-    Nothing,
-    Insert(V),
-    Remove,
-}
-
-impl<'e, K: Key, V: Value, A: ArtAllocator<V>> TreeWriteGuard<'e, K, V, A> {
-    /// Get a value
-    pub fn get(&'e mut self, key: &K) -> Option<&'e V> {
-        algorithm::search(key, self.tree_writer.tree.root, &self.epoch_pin)
-    }
-
-    /// Insert a value
-    pub fn insert(self, key: &K, value: V) -> Result<bool, OutOfMemoryError> {
-        let mut success = None;
-
-        self.update_with_fn(key, |existing| {
-            if existing.is_some() {
-                success = Some(false);
-                UpdateAction::Nothing
-            } else {
-                success = Some(true);
-                UpdateAction::Insert(value)
-            }
-        })?;
-        Ok(success.expect("value_fn not called"))
-    }
-
-    /// Remove value. Returns true if it existed
-    pub fn remove(self, key: &K) -> bool {
-        let mut result = false;
-        // FIXME: It's not clear if OOM is expected while removing. It seems
-        // not nice, but shrinking a node can OOM. Then again, we could opt
-        // to not shrink a node if we cannot allocate, to live a little longer.
-        self.update_with_fn(key, |existing| match existing {
-            Some(_) => {
-                result = true;
-                UpdateAction::Remove
-            }
-            None => UpdateAction::Nothing,
-        })
-        .expect("out of memory while removing");
-        result
-    }
-
-    /// Try to remove value and return the old value.
-    pub fn remove_and_return(self, key: &K) -> Option<V>
-    where
-        V: Clone,
-    {
-        let mut old = None;
-        self.update_with_fn(key, |existing| {
-            old = existing.cloned();
-            UpdateAction::Remove
-        })
-        .expect("out of memory while removing");
-        old
-    }
-
-    /// Update key using the given function. All the other modifying operations are based on this.
-    ///
-    /// The function is passed a reference to the existing value, if any. If the function
-    /// returns None, the value is removed from the tree (or if there was no existing value,
-    /// does nothing). If the function returns Some, the existing value is replaced, of if there
-    /// was no existing value, it is inserted. FIXME: update comment
-    pub fn update_with_fn<F>(mut self, key: &K, value_fn: F) -> Result<(), OutOfMemoryError>
-    where
-        F: FnOnce(Option<&V>) -> UpdateAction<V>,
-    {
-        algorithm::update_fn(key, value_fn, self.tree_writer.tree.root, &mut self)?;
-
-        if self.created_garbage {
-            let _ = self.collect_garbage();
-        }
-        Ok(())
-    }
-
-    fn remember_obsolete_node(&mut self, ptr: NodePtr<V>) {
-        self.tree_writer
-            .garbage
-            .lock()
-            .remember_obsolete_node(ptr, self.epoch_pin.epoch);
-        self.created_garbage = true;
-    }
-
-    // returns number of nodes recycled
-    fn collect_garbage(&self) -> usize {
-        self.tree_writer.tree.epoch.advance();
-        self.tree_writer.tree.epoch.broadcast();
-
-        let cutoff_epoch = self.tree_writer.tree.epoch.get_oldest();
-
-        let mut result = 0;
-        let mut garbage_queue = self.tree_writer.garbage.lock();
-        while let Some(ptr) = garbage_queue.next_obsolete(cutoff_epoch) {
-            ptr.deallocate(self.tree_writer.allocator);
-            result += 1;
-        }
-        result
-    }
-}
-
-pub struct TreeIterator<K>
-where
-    K: Key + for<'a> From<&'a [u8]>,
-{
-    done: bool,
-    pub next_key: Vec<u8>,
-    max_key: Option<Vec<u8>>,
-
-    phantom_key: PhantomData<K>,
-}
-
-impl<K> TreeIterator<K>
-where
-    K: Key + for<'a> From<&'a [u8]>,
-{
-    pub fn new_wrapping() -> TreeIterator<K> {
-        TreeIterator {
-            done: false,
-            next_key: vec![0; K::KEY_LEN],
-            max_key: None,
-            phantom_key: PhantomData,
-        }
-    }
-
-    pub fn new(range: &std::ops::Range<K>) -> TreeIterator<K> {
-        let result = TreeIterator {
-            done: false,
-            next_key: Vec::from(range.start.as_bytes()),
-            max_key: Some(Vec::from(range.end.as_bytes())),
-            phantom_key: PhantomData,
-        };
-        assert_eq!(result.next_key.len(), K::KEY_LEN);
-        assert_eq!(result.max_key.as_ref().unwrap().len(), K::KEY_LEN);
-
-        result
-    }
-
-    pub fn next<'g, V>(&mut self, read_guard: &'g TreeReadGuard<'g, K, V>) -> Option<(K, &'g V)>
-    where
-        V: Value,
-    {
-        if self.done {
-            return None;
-        }
-
-        let mut wrapped_around = false;
-        loop {
-            assert_eq!(self.next_key.len(), K::KEY_LEN);
-            if let Some((k, v)) =
-                algorithm::iter_next(&self.next_key, read_guard.tree.root, &read_guard.epoch_pin)
-            {
-                assert_eq!(k.len(), K::KEY_LEN);
-                assert_eq!(self.next_key.len(), K::KEY_LEN);
-
-                // Check if we reached the end of the range
-                if let Some(max_key) = &self.max_key {
-                    if k.as_slice() >= max_key.as_slice() {
-                        self.done = true;
-                        break None;
-                    }
-                }
-
-                // increment the key
-                self.next_key = k.clone();
-                increment_key(self.next_key.as_mut_slice());
-                let k = k.as_slice().into();
-
-                break Some((k, v));
-            } else {
-                if self.max_key.is_some() {
-                    self.done = true;
-                } else {
-                    // Start from beginning
-                    if !wrapped_around {
-                        for i in 0..K::KEY_LEN {
-                            self.next_key[i] = 0;
-                        }
-                        wrapped_around = true;
-                        continue;
-                    } else {
-                        // The tree is completely empty
-                        // FIXME: perhaps we should remember the starting point instead.
-                        // Currently this will scan some ranges twice.
-                        break None;
-                    }
-                }
-                break None;
-            }
-        }
-    }
-}
-
-fn increment_key(key: &mut [u8]) -> bool {
-    for i in (0..key.len()).rev() {
-        let (byte, overflow) = key[i].overflowing_add(1);
-        key[i] = byte;
-        if !overflow {
-            return false;
-        }
-    }
-    true
-}
-
-// Debugging functions
-impl<'e, K: Key, V: Value + Debug, A: ArtAllocator<V>> TreeWriteGuard<'e, K, V, A> {
-    pub fn dump(&mut self, dst: &mut dyn std::io::Write) {
-        algorithm::dump_tree(self.tree_writer.tree.root, &self.epoch_pin, dst)
-    }
-}
-impl<'e, K: Key, V: Value + Debug> TreeReadGuard<'e, K, V> {
-    pub fn dump(&mut self, dst: &mut dyn std::io::Write) {
-        algorithm::dump_tree(self.tree.root, &self.epoch_pin, dst)
-    }
-}
-impl<'e, K: Key, V: Value> TreeWriteAccess<'e, K, V, ArtMultiSlabAllocator<'e, V>> {
-    pub fn get_statistics(&self) -> ArtTreeStatistics {
-        self.allocator.get_statistics();
-        ArtTreeStatistics {
-            blocks: self.allocator.inner.block_allocator.get_statistics(),
-            slabs: self.allocator.get_statistics(),
-            epoch: self.tree.epoch.get_current(),
-            oldest_epoch: self.tree.epoch.get_oldest(),
-            num_garbage: self.garbage.lock().0.len() as u64,
-        }
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct ArtTreeStatistics {
-    pub blocks: allocator::block::BlockAllocatorStats,
-    pub slabs: allocator::ArtMultiSlabStats,
-
-    pub epoch: u64,
-    pub oldest_epoch: u64,
-    pub num_garbage: u64,
-}
--- a/libs/neonart/src/tests.rs
+++ b/libs/neonart/src/tests.rs
@@ -1,236 +0,0 @@
-use std::collections::BTreeMap;
-use std::collections::HashSet;
-use std::fmt::{Debug, Formatter};
-use std::sync::atomic::{AtomicUsize, Ordering};
-
-use crate::ArtAllocator;
-use crate::ArtMultiSlabAllocator;
-use crate::TreeInitStruct;
-use crate::TreeIterator;
-use crate::TreeWriteAccess;
-use crate::UpdateAction;
-
-use crate::{Key, Value};
-
-use rand::Rng;
-use rand::seq::SliceRandom;
-use rand_distr::Zipf;
-
-const TEST_KEY_LEN: usize = 16;
-
-#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
-struct TestKey([u8; TEST_KEY_LEN]);
-
-impl TestKey {
-    const MIN: TestKey = TestKey([0; TEST_KEY_LEN]);
-    const MAX: TestKey = TestKey([u8::MAX; TEST_KEY_LEN]);
-}
-
-impl Key for TestKey {
-    const KEY_LEN: usize = TEST_KEY_LEN;
-    fn as_bytes(&self) -> &[u8] {
-        &self.0
-    }
-}
-
-impl From<&TestKey> for u128 {
-    fn from(val: &TestKey) -> u128 {
-        u128::from_be_bytes(val.0)
-    }
-}
-
-impl From<u128> for TestKey {
-    fn from(val: u128) -> TestKey {
-        TestKey(val.to_be_bytes())
-    }
-}
-
-impl<'a> From<&'a [u8]> for TestKey {
-    fn from(bytes: &'a [u8]) -> TestKey {
-        TestKey(bytes.try_into().unwrap())
-    }
-}
-
-impl Value for usize {}
-
-fn test_inserts<K: Into<TestKey> + Copy>(keys: &[K]) {
-    const MEM_SIZE: usize = 10000000;
-    let mut area = Box::new_uninit_slice(MEM_SIZE);
-
-    let allocator = ArtMultiSlabAllocator::new(&mut area);
-
-    let init_struct = TreeInitStruct::<TestKey, usize, _>::new(allocator);
-    let tree_writer = init_struct.attach_writer();
-
-    for (idx, k) in keys.iter().enumerate() {
-        let w = tree_writer.start_write();
-        let res = w.insert(&(*k).into(), idx);
-        assert!(res.is_ok());
-    }
-
-    for (idx, k) in keys.iter().enumerate() {
-        let r = tree_writer.start_read();
-        let value = r.get(&(*k).into());
-        assert_eq!(value, Some(idx).as_ref());
-    }
-
-    eprintln!("stats: {:?}", tree_writer.get_statistics());
-}
-
-#[test]
-fn dense() {
-    // This exercises splitting a node with prefix
-    let keys: &[u128] = &[0, 1, 2, 3, 256];
-    test_inserts(keys);
-
-    // Dense keys
-    let mut keys: Vec<u128> = (0..10000).collect();
-    test_inserts(&keys);
-
-    // Do the same in random orders
-    for _ in 1..10 {
-        keys.shuffle(&mut rand::rng());
-        test_inserts(&keys);
-    }
-}
-
-#[test]
-fn sparse() {
-    // sparse keys
-    let mut keys: Vec<TestKey> = Vec::new();
-    let mut used_keys = HashSet::new();
-    for _ in 0..10000 {
-        loop {
-            let key = rand::random::<u128>();
-            if used_keys.contains(&key) {
-                continue;
-            }
-            used_keys.insert(key);
-            keys.push(key.into());
-            break;
-        }
-    }
-    test_inserts(&keys);
-}
-
-struct TestValue(AtomicUsize);
-
-impl TestValue {
-    fn new(val: usize) -> TestValue {
-        TestValue(AtomicUsize::new(val))
-    }
-
-    fn load(&self) -> usize {
-        self.0.load(Ordering::Relaxed)
-    }
-}
-
-impl Value for TestValue {}
-
-impl Clone for TestValue {
-    fn clone(&self) -> TestValue {
-        TestValue::new(self.load())
-    }
-}
-
-impl Debug for TestValue {
-    fn fmt(&self, fmt: &mut Formatter<'_>) -> Result<(), std::fmt::Error> {
-        write!(fmt, "{:?}", self.load())
-    }
-}
-
-#[derive(Clone, Debug)]
-struct TestOp(TestKey, Option<usize>);
-
-fn apply_op<A: ArtAllocator<TestValue>>(
-    op: &TestOp,
-    tree: &TreeWriteAccess<TestKey, TestValue, A>,
-    shadow: &mut BTreeMap<TestKey, usize>,
-) {
-    eprintln!("applying op: {op:?}");
-
-    // apply the change to the shadow tree first
-    let shadow_existing = if let Some(v) = op.1 {
-        shadow.insert(op.0, v)
-    } else {
-        shadow.remove(&op.0)
-    };
-
-    // apply to Art tree
-    let w = tree.start_write();
-    w.update_with_fn(&op.0, |existing| {
-        assert_eq!(existing.map(TestValue::load), shadow_existing);
-
-        match (existing, op.1) {
-            (None, None) => UpdateAction::Nothing,
-            (None, Some(new_val)) => UpdateAction::Insert(TestValue::new(new_val)),
-            (Some(_old_val), None) => UpdateAction::Remove,
-            (Some(old_val), Some(new_val)) => {
-                old_val.0.store(new_val, Ordering::Relaxed);
-                UpdateAction::Nothing
-            }
-        }
-    })
-    .expect("out of memory");
-}
-
-fn test_iter<A: ArtAllocator<TestValue>>(
-    tree: &TreeWriteAccess<TestKey, TestValue, A>,
-    shadow: &BTreeMap<TestKey, usize>,
-) {
-    let mut shadow_iter = shadow.iter();
-    let mut iter = TreeIterator::new(&(TestKey::MIN..TestKey::MAX));
-
-    loop {
-        let shadow_item = shadow_iter.next().map(|(k, v)| (*k, *v));
-        let r = tree.start_read();
-        let item = iter.next(&r);
-
-        if shadow_item != item.map(|(k, v)| (k, v.load())) {
-            eprintln!("FAIL: iterator returned {item:?}, expected {shadow_item:?}");
-            tree.start_read().dump(&mut std::io::stderr());
-
-            eprintln!("SHADOW:");
-            for si in shadow {
-                eprintln!("key: {:?}, val: {}", si.0, si.1);
-            }
-            panic!("FAIL: iterator returned {item:?}, expected {shadow_item:?}");
-        }
-        if item.is_none() {
-            break;
-        }
-    }
-}
-
-#[test]
-fn random_ops() {
-    const MEM_SIZE: usize = 10000000;
-    let mut area = Box::new_uninit_slice(MEM_SIZE);
-
-    let allocator = ArtMultiSlabAllocator::new(&mut area);
-
-    let init_struct = TreeInitStruct::<TestKey, TestValue, _>::new(allocator);
-    let tree_writer = init_struct.attach_writer();
-
-    let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
-
-    let distribution = Zipf::new(u128::MAX as f64, 1.1).unwrap();
-    let mut rng = rand::rng();
-    for i in 0..100000 {
-        let mut key: TestKey = (rng.sample(distribution) as u128).into();
-
-        if rng.random_bool(0.10) {
-            key = TestKey::from(u128::from(&key) | 0xffffffff);
-        }
-
-        let op = TestOp(key, if rng.random_bool(0.75) { Some(i) } else { None });
-
-        apply_op(&op, &tree_writer, &mut shadow);
-
-        if i % 1000 == 0 {
-            eprintln!("{i} ops processed");
-            eprintln!("stats: {:?}", tree_writer.get_statistics());
-            test_iter(&tree_writer, &shadow);
-        }
-    }
-}
--- a/libs/postgres_backend/src/lib.rs
+++ b/libs/postgres_backend/src/lib.rs
@@ -749,18 +749,7 @@ impl<IO: AsyncRead + AsyncWrite + Unpin> PostgresBackend<IO> {
                trace!("got query {query_string:?}");
                if let Err(e) = handler.process_query(self, query_string).await {
                    match e {
-                        err @ QueryError::Shutdown => {
-                            // Notify postgres of the connection shutdown at the libpq
-                            // protocol level. This avoids postgres having to tell apart
-                            // from an idle connection and a stale one, which is bug prone.
-                            let shutdown_error = short_error(&err);
-                            self.write_message_noflush(&BeMessage::ErrorResponse(
-                                &shutdown_error,
-                                Some(err.pg_error_code()),
-                            ))?;
-
-                            return Ok(ProcessMsgResult::Break);
-                        }
+                        QueryError::Shutdown => return Ok(ProcessMsgResult::Break),
                        QueryError::SimulatedConnectionError => {
                            return Err(QueryError::SimulatedConnectionError);
                        }
--- a/libs/utils/Cargo.toml
+++ b/libs/utils/Cargo.toml
@@ -47,7 +47,6 @@ tracing-subscriber = { workspace = true, features = ["json", "registry"] }
 tracing-utils.workspace = true
 rand.workspace = true
 scopeguard.workspace = true
-uuid.workspace = true
 strum.workspace = true
 strum_macros.workspace = true
 walkdir.workspace = true
--- a/libs/utils/src/auth.rs
+++ b/libs/utils/src/auth.rs
@@ -12,8 +12,7 @@ use jsonwebtoken::{
    Algorithm, DecodingKey, EncodingKey, Header, TokenData, Validation, decode, encode,
 };
 use pem::Pem;
-use serde::{Deserialize, Deserializer, Serialize, de::DeserializeOwned};
-use uuid::Uuid;
+use serde::{Deserialize, Serialize, de::DeserializeOwned};

 use crate::id::TenantId;

@@ -26,11 +25,6 @@ pub enum Scope {
    /// Provides access to all data for a specific tenant (specified in `struct Claims` below)
    // TODO: join these two?
    Tenant,
-    /// Provides access to all data for a specific tenant, but based on endpoint ID. This token scope
-    /// is only used by compute to fetch the spec for a specific endpoint. The spec contains a Tenant-scoped
-    /// token authorizing access to all data of a tenant, so the spec-fetch API requires a TenantEndpoint
-    /// scope token to ensure that untrusted compute nodes can't fetch spec for arbitrary endpoints.
-    TenantEndpoint,
    /// Provides blanket access to all tenants on the pageserver plus pageserver-wide APIs.
    /// Should only be used e.g. for status check/tenant creation/list.
    PageServerApi,
@@ -57,43 +51,17 @@ pub enum Scope {
    ControllerPeer,
 }

-fn deserialize_empty_string_as_none_uuid<'de, D>(deserializer: D) -> Result<Option<Uuid>, D::Error>
-where
-    D: Deserializer<'de>,
-{
-    let opt = Option::<String>::deserialize(deserializer)?;
-    match opt.as_deref() {
-        Some("") => Ok(None),
-        Some(s) => Uuid::parse_str(s)
-            .map(Some)
-            .map_err(serde::de::Error::custom),
-        None => Ok(None),
-    }
-}
-
 /// JWT payload. See docs/authentication.md for the format
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct Claims {
    #[serde(default)]
    pub tenant_id: Option<TenantId>,
-    #[serde(
-        default,
-        skip_serializing_if = "Option::is_none",
-        // Neon control plane includes this field as empty in the claims.
-        // Consider it None in those cases.
-        deserialize_with = "deserialize_empty_string_as_none_uuid"
-    )]
-    pub endpoint_id: Option<Uuid>,
    pub scope: Scope,
 }

 impl Claims {
    pub fn new(tenant_id: Option<TenantId>, scope: Scope) -> Self {
-        Self {
-            tenant_id,
-            scope,
-            endpoint_id: None,
-        }
+        Self { tenant_id, scope }
    }
 }

@@ -244,7 +212,6 @@ MC4CAQAwBQYDK2VwBCIEID/Drmc1AA6U/znNRWpF3zEGegOATQxfkdWxitcOMsIH
        let expected_claims = Claims {
            tenant_id: Some(TenantId::from_str("3d1f7595b468230304e0b73cecbcb081").unwrap()),
            scope: Scope::Tenant,
-            endpoint_id: None,
        };

        // A test token containing the following payload, signed using TEST_PRIV_KEY_ED25519:
@@ -273,7 +240,6 @@ MC4CAQAwBQYDK2VwBCIEID/Drmc1AA6U/znNRWpF3zEGegOATQxfkdWxitcOMsIH
        let claims = Claims {
            tenant_id: Some(TenantId::from_str("3d1f7595b468230304e0b73cecbcb081").unwrap()),
            scope: Scope::Tenant,
-            endpoint_id: None,
        };

        let pem = pem::parse(TEST_PRIV_KEY_ED25519).unwrap();
--- a/libs/utils/src/shard.rs
+++ b/libs/utils/src/shard.rs
@@ -53,10 +53,6 @@ impl ShardCount {
    pub const MAX: Self = Self(u8::MAX);
    pub const MIN: Self = Self(0);

-    pub fn unsharded() -> Self {
-        ShardCount(0)
-    }
-
    /// The internal value of a ShardCount may be zero, which means "1 shard, but use
    /// legacy format for TenantShardId that excludes the shard suffix", also known
    /// as [`TenantShardId::unsharded`].
--- a/libs/walproposer/src/api_bindings.rs
+++ b/libs/walproposer/src/api_bindings.rs
@@ -431,7 +431,7 @@ pub fn empty_shmem() -> crate::bindings::WalproposerShmemState {
    let empty_wal_rate_limiter = crate::bindings::WalRateLimiter {
        should_limit: crate::bindings::pg_atomic_uint32 { value: 0 },
        sent_bytes: 0,
-        last_recorded_time_us: crate::bindings::pg_atomic_uint64 { value: 0 },
+        last_recorded_time_us: 0,
    };

    crate::bindings::WalproposerShmemState {
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -54,7 +54,6 @@ pageserver_api.workspace = true
 pageserver_client.workspace = true # for ResponseErrorMessageExt TOOD refactor that
 pageserver_compaction.workspace = true
 pageserver_page_api.workspace = true
-peekable.workspace = true
 pem.workspace = true
 pin-project-lite.workspace = true
 postgres_backend.workspace = true
@@ -67,7 +66,6 @@ postgres-types.workspace = true
 posthog_client_lite.workspace = true
 pprof.workspace = true
 pq_proto.workspace = true
-prost.workspace = true
 rand.workspace = true
 range-set-blaze = { version = "0.1.16", features = ["alloc"] }
 regex.workspace = true
--- a/pageserver/client/src/mgmt_api.rs
+++ b/pageserver/client/src/mgmt_api.rs
@@ -873,22 +873,6 @@ impl Client {
            .map_err(Error::ReceiveBody)
    }

-    pub async fn reset_alert_gauges(&self) -> Result<()> {
-        let uri = format!(
-            "{}/hadron-internal/reset_alert_gauges",
-            self.mgmt_api_endpoint
-        );
-        self.start_request(Method::POST, uri)
-            .send()
-            .await
-            .map_err(Error::SendRequest)?
-            .error_from_body()
-            .await?
-            .json()
-            .await
-            .map_err(Error::ReceiveBody)
-    }
-
    pub async fn wait_lsn(
        &self,
        tenant_shard_id: TenantShardId,
--- a/pageserver/client_grpc/src/lib.rs
+++ b/pageserver/client_grpc/src/lib.rs
@@ -4,4 +4,3 @@ mod retry;
 mod split;

 pub use client::{PageserverClient, ShardSpec};
-pub use pageserver_api::shard::ShardStripeSize; // used in ShardSpec
--- a/pageserver/page_api/src/model.rs
+++ b/pageserver/page_api/src/model.rs
@@ -33,8 +33,6 @@ pub enum ProtocolError {
    Invalid(&'static str, String),
    #[error("required field '{0}' is missing")]
    Missing(&'static str),
-    #[error("invalid combination of not_modified_lsn '{0}' and request_lsn '{1}'")]
-    InvalidLsns(Lsn, Lsn),
 }

 impl ProtocolError {
@@ -87,9 +85,9 @@ impl TryFrom<proto::ReadLsn> for ReadLsn {
            return Err(ProtocolError::invalid("request_lsn", pb.request_lsn));
        }
        if pb.not_modified_since_lsn > pb.request_lsn {
-            return Err(ProtocolError::InvalidLsns(
-                Lsn(pb.not_modified_since_lsn),
-                Lsn(pb.request_lsn),
+            return Err(ProtocolError::invalid(
+                "not_modified_since_lsn",
+                pb.not_modified_since_lsn,
            ));
        }
        Ok(Self {
--- a/pageserver/pagebench/Cargo.toml
+++ b/pageserver/pagebench/Cargo.toml
@@ -25,9 +25,6 @@ tracing.workspace = true
 tokio.workspace = true
 tokio-stream.workspace = true
 tokio-util.workspace = true
-axum.workspace = true
-http.workspace = true
-metrics.workspace = true
 tonic.workspace = true
 url.workspace = true

--- a/pageserver/pagebench/src/cmd/getpage_latest_lsn.rs
+++ b/pageserver/pagebench/src/cmd/getpage_latest_lsn.rs
@@ -34,10 +34,6 @@ use crate::util::{request_stats, tokio_thread_local_stats};
 /// GetPage@LatestLSN, uniformly distributed across the compute-accessible keyspace.
 #[derive(clap::Parser)]
 pub(crate) struct Args {
-    #[clap(long, default_value = "false")]
-    grpc: bool,
-    #[clap(long, default_value = "false")]
-    grpc_stream: bool,
    #[clap(long, default_value = "http://localhost:9898")]
    mgmt_api_endpoint: String,
    /// Pageserver connection string. Supports postgresql:// and grpc:// protocols.
@@ -82,9 +78,6 @@ pub(crate) struct Args {
    #[clap(long)]
    set_io_mode: Option<pageserver_api::models::virtual_file::IoMode>,

-    #[clap(long)]
-    only_relnode: Option<u32>,
-
    /// Queue depth generated in each client.
    #[clap(long, default_value = "1")]
    queue_depth: NonZeroUsize,
@@ -99,31 +92,10 @@ pub(crate) struct Args {
    #[clap(long, default_value = "1")]
    batch_size: NonZeroUsize,

+    #[clap(long)]
+    only_relnode: Option<u32>,
+
    targets: Option<Vec<TenantTimelineId>>,
-
-    #[clap(long, default_value = "100")]
-    pool_max_consumers: NonZeroUsize,
-
-    #[clap(long, default_value = "5")]
-    pool_error_threshold: NonZeroUsize,
-
-    #[clap(long, default_value = "5000")]
-    pool_connect_timeout: NonZeroUsize,
-
-    #[clap(long, default_value = "1000")]
-    pool_connect_backoff: NonZeroUsize,
-
-    #[clap(long, default_value = "60000")]
-    pool_max_idle_duration: NonZeroUsize,
-
-    #[clap(long, default_value = "0")]
-    max_delay_ms: usize,
-
-    #[clap(long, default_value = "0")]
-    percent_drops: usize,
-
-    #[clap(long, default_value = "0")]
-    percent_hangs: usize,
 }

 /// State shared by all clients
@@ -180,6 +152,7 @@ pub(crate) fn main(args: Args) -> anyhow::Result<()> {
        main_impl(args, thread_local_stats)
    })
 }
+
 async fn main_impl(
    args: Args,
    all_thread_local_stats: AllThreadLocalStats<request_stats::Stats>,
@@ -344,7 +317,6 @@ async fn main_impl(
    let rps_period = args
        .per_client_rate
        .map(|rps_limit| Duration::from_secs_f64(1.0 / (rps_limit as f64)));
-
    let make_worker: &dyn Fn(WorkerId) -> Pin<Box<dyn Send + Future<Output = ()>>> = &|worker_id| {
        let ss = shared_state.clone();
        let cancel = cancel.clone();
--- a/pageserver/src/auth.rs
+++ b/pageserver/src/auth.rs
@@ -20,8 +20,7 @@ pub fn check_permission(claims: &Claims, tenant_id: Option<TenantId>) -> Result<
            | Scope::GenerationsApi
            | Scope::Infra
            | Scope::Scrubber
-            | Scope::ControllerPeer
-            | Scope::TenantEndpoint,
+            | Scope::ControllerPeer,
            _,
        ) => Err(AuthError(
            format!(
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -2357,7 +2357,6 @@ async fn timeline_compact_handler(
        flags,
        sub_compaction,
        sub_compaction_max_job_size_mb,
-        gc_compaction_do_metadata_compaction: false,
    };

    let scheduled = compact_request
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -3218,25 +3218,13 @@ where
 pub struct GrpcPageServiceHandler {
    tenant_manager: Arc<TenantManager>,
    ctx: RequestContext,
-
-    /// Cancelled to shut down the server. Tonic will shut down in response to this, but wait for
-    /// in-flight requests to complete. Any tasks we spawn ourselves must respect this token.
-    cancel: CancellationToken,
-
-    /// Any tasks we spawn ourselves should clone this gate guard, so that we can wait for them to
-    /// complete during shutdown. Request handlers implicitly hold this guard already.
    gate_guard: GateGuard,
-
-    /// `get_vectored` concurrency setting.
    get_vectored_concurrent_io: GetVectoredConcurrentIo,
 }

 impl GrpcPageServiceHandler {
    /// Spawns a gRPC server for the page service.
    ///
-    /// Returns a `CancellableTask` handle that can be used to shut down the server. It waits for
-    /// any in-flight requests and tasks to complete first.
-    ///
    /// TODO: this doesn't support TLS. We need TLS reloading via ReloadingCertificateResolver, so we
    /// need to reimplement the TCP+TLS accept loop ourselves.
    pub fn spawn(
@@ -3246,15 +3234,12 @@ impl GrpcPageServiceHandler {
        get_vectored_concurrent_io: GetVectoredConcurrentIo,
        listener: std::net::TcpListener,
    ) -> anyhow::Result<CancellableTask> {
-        // Set up a cancellation token for shutting down the server, and a gate to wait for all
-        // requests and spawned tasks to complete.
        let cancel = CancellationToken::new();
-        let gate = Gate::default();
-
        let ctx = RequestContextBuilder::new(TaskKind::PageRequestHandler)
            .download_behavior(DownloadBehavior::Download)
            .perf_span_dispatch(perf_trace_dispatch)
            .detached_child();
+        let gate = Gate::default();

        // Set up the TCP socket. We take a preconfigured TcpListener to bind the
        // port early during startup.
@@ -3285,7 +3270,6 @@ impl GrpcPageServiceHandler {
        let page_service_handler = GrpcPageServiceHandler {
            tenant_manager,
            ctx,
-            cancel: cancel.clone(),
            gate_guard: gate.enter().expect("gate was just created"),
            get_vectored_concurrent_io,
        };
@@ -3322,20 +3306,19 @@ impl GrpcPageServiceHandler {
            .build_v1()?;
        let server = server.add_service(reflection_service);

-        // Spawn server task. It runs until the cancellation token fires and in-flight requests and
-        // tasks complete. The `CancellableTask` will wait for the task's join handle, which
-        // implicitly waits for the gate to close.
+        // Spawn server task.
        let task_cancel = cancel.clone();
        let task = COMPUTE_REQUEST_RUNTIME.spawn(task_mgr::exit_on_panic_or_error(
-            "grpc pageservice listener",
+            "grpc listener",
            async move {
-                server
+                let result = server
                    .serve_with_incoming_shutdown(incoming, task_cancel.cancelled())
-                    .await?;
-                // Server exited cleanly. All requests should have completed by now. Wait for any
-                // spawned tasks to complete as well (e.g. IoConcurrency sidecars) via the gate.
-                gate.close().await;
-                anyhow::Ok(())
+                    .await;
+                if result.is_ok() {
+                    // TODO: revisit shutdown logic once page service is implemented.
+                    gate.close().await;
+                }
+                result
            },
        ));

@@ -3525,10 +3508,7 @@ impl GrpcPageServiceHandler {

 /// Implements the gRPC page service.
 ///
-/// Tonic will drop the request handler futures if the client goes away (e.g. due to a timeout or
-/// cancellation), so the read path must be cancellation-safe. On shutdown, Tonic will wait for
-/// in-flight requests to complete.
-///
+/// TODO: cancellation.
 /// TODO: when the libpq impl is removed, remove the Pagestream types and inline the handler code.
 #[tonic::async_trait]
 impl proto::PageService for GrpcPageServiceHandler {
@@ -3613,14 +3593,8 @@ impl proto::PageService for GrpcPageServiceHandler {

        // Spawn a task to run the basebackup.
        let span = Span::current();
-        let gate_guard = self
-            .gate_guard
-            .try_clone()
-            .map_err(|_| tonic::Status::unavailable("shutting down"))?;
        let (mut simplex_read, mut simplex_write) = tokio::io::simplex(CHUNK_SIZE);
        let jh = tokio::spawn(async move {
-            let _gate_guard = gate_guard; // keep gate open until task completes
-
            let gzip_level = match req.compression {
                page_api::BaseBackupCompression::None => None,
                // NB: using fast compression because it's on the critical path for compute
@@ -3744,17 +3718,15 @@ impl proto::PageService for GrpcPageServiceHandler {
            .await?;

        // Spawn an IoConcurrency sidecar, if enabled.
-        let gate_guard = self
-            .gate_guard
-            .try_clone()
-            .map_err(|_| tonic::Status::unavailable("shutting down"))?;
+        let Ok(gate_guard) = self.gate_guard.try_clone() else {
+            return Err(tonic::Status::unavailable("shutting down"));
+        };
        let io_concurrency =
            IoConcurrency::spawn_from_conf(self.get_vectored_concurrent_io, gate_guard);

-        // Construct the GetPageRequest stream handler.
+        // Spawn a task to handle the GetPageRequest stream.
        let span = Span::current();
        let ctx = self.ctx.attached_child();
-        let cancel = self.cancel.clone();
        let mut reqs = req.into_inner();

        let resps = async_stream::try_stream! {
@@ -3762,17 +3734,7 @@ impl proto::PageService for GrpcPageServiceHandler {
                .get(ttid.tenant_id, ttid.timeline_id, shard_selector)
                .await?
                .downgrade();
-            loop {
-                // NB: Tonic considers the entire stream to be an in-flight request and will wait
-                // for it to complete before shutting down. React to cancellation between requests.
-                let req = tokio::select! {
-                    result = reqs.message() => match result {
-                        Ok(Some(req)) => Ok(req),
-                        Ok(None) => break, // client closed the stream
-                        Err(err) => Err(err),
-                    },
-                    _ = cancel.cancelled() => Err(tonic::Status::unavailable("shutting down")),
-                }?;
+            while let Some(req) = reqs.message().await? {
                let req_id = req.request_id.map(page_api::RequestID::from).unwrap_or_default();
                let result = Self::get_page(&ctx, &timeline, req, io_concurrency.clone())
                    .instrument(span.clone()) // propagate request span
--- a/pageserver/src/pgdatadir_mapping.rs
+++ b/pageserver/src/pgdatadir_mapping.rs
@@ -286,10 +286,6 @@ impl Timeline {
    /// Like [`Self::get_rel_page_at_lsn`], but returns a batch of pages.
    ///
    /// The ordering of the returned vec corresponds to the ordering of `pages`.
-    ///
-    /// NB: the read path must be cancellation-safe. The Tonic gRPC service will drop the future
-    /// if the client goes away (e.g. due to timeout or cancellation).
-    /// TODO: verify that it actually is cancellation-safe.
    pub(crate) async fn get_rel_page_at_lsn_batched(
        &self,
        pages: impl ExactSizeIterator<Item = (&RelTag, &BlockNumber, LsnRange, RequestContext)>,
@@ -817,7 +813,6 @@ impl Timeline {
        let gc_cutoff_lsn_guard = self.get_applied_gc_cutoff_lsn();
        let gc_cutoff_planned = {
            let gc_info = self.gc_info.read().unwrap();
-            info!(cutoffs=?gc_info.cutoffs, applied_cutoff=%*gc_cutoff_lsn_guard, "starting find_lsn_for_timestamp");
            gc_info.min_cutoff()
        };
        // Usually the planned cutoff is newer than the cutoff of the last gc run,
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -9216,11 +9216,7 @@ mod tests {

        let cancel = CancellationToken::new();
        tline
-            .compact_with_gc(
-                &cancel,
-                CompactOptions::default_for_gc_compaction_unit_tests(),
-                &ctx,
-            )
+            .compact_with_gc(&cancel, CompactOptions::default(), &ctx)
            .await
            .unwrap();

@@ -9303,11 +9299,7 @@ mod tests {
            guard.cutoffs.space = Lsn(0x40);
        }
        tline
-            .compact_with_gc(
-                &cancel,
-                CompactOptions::default_for_gc_compaction_unit_tests(),
-                &ctx,
-            )
+            .compact_with_gc(&cancel, CompactOptions::default(), &ctx)
            .await
            .unwrap();

@@ -9844,11 +9836,7 @@ mod tests {

        let cancel = CancellationToken::new();
        tline
-            .compact_with_gc(
-                &cancel,
-                CompactOptions::default_for_gc_compaction_unit_tests(),
-                &ctx,
-            )
+            .compact_with_gc(&cancel, CompactOptions::default(), &ctx)
            .await
            .unwrap();

@@ -9883,11 +9871,7 @@ mod tests {
            guard.cutoffs.space = Lsn(0x40);
        }
        tline
-            .compact_with_gc(
-                &cancel,
-                CompactOptions::default_for_gc_compaction_unit_tests(),
-                &ctx,
-            )
+            .compact_with_gc(&cancel, CompactOptions::default(), &ctx)
            .await
            .unwrap();

@@ -10462,7 +10446,7 @@ mod tests {
                &cancel,
                CompactOptions {
                    flags: dryrun_flags,
-                    ..CompactOptions::default_for_gc_compaction_unit_tests()
+                    ..Default::default()
                },
                &ctx,
            )
@@ -10473,22 +10457,14 @@ mod tests {
        verify_result().await;

        tline
-            .compact_with_gc(
-                &cancel,
-                CompactOptions::default_for_gc_compaction_unit_tests(),
-                &ctx,
-            )
+            .compact_with_gc(&cancel, CompactOptions::default(), &ctx)
            .await
            .unwrap();
        verify_result().await;

        // compact again
        tline
-            .compact_with_gc(
-                &cancel,
-                CompactOptions::default_for_gc_compaction_unit_tests(),
-                &ctx,
-            )
+            .compact_with_gc(&cancel, CompactOptions::default(), &ctx)
            .await
            .unwrap();
        verify_result().await;
@@ -10507,22 +10483,14 @@ mod tests {
            guard.cutoffs.space = Lsn(0x38);
        }
        tline
-            .compact_with_gc(
-                &cancel,
-                CompactOptions::default_for_gc_compaction_unit_tests(),
-                &ctx,
-            )
+            .compact_with_gc(&cancel, CompactOptions::default(), &ctx)
            .await
            .unwrap();
        verify_result().await; // no wals between 0x30 and 0x38, so we should obtain the same result

        // not increasing the GC horizon and compact again
        tline
-            .compact_with_gc(
-                &cancel,
-                CompactOptions::default_for_gc_compaction_unit_tests(),
-                &ctx,
-            )
+            .compact_with_gc(&cancel, CompactOptions::default(), &ctx)
            .await
            .unwrap();
        verify_result().await;
@@ -10727,7 +10695,7 @@ mod tests {
                &cancel,
                CompactOptions {
                    flags: dryrun_flags,
-                    ..CompactOptions::default_for_gc_compaction_unit_tests()
+                    ..Default::default()
                },
                &ctx,
            )
@@ -10738,22 +10706,14 @@ mod tests {
        verify_result().await;

        tline
-            .compact_with_gc(
-                &cancel,
-                CompactOptions::default_for_gc_compaction_unit_tests(),
-                &ctx,
-            )
+            .compact_with_gc(&cancel, CompactOptions::default(), &ctx)
            .await
            .unwrap();
        verify_result().await;

        // compact again
        tline
-            .compact_with_gc(
-                &cancel,
-                CompactOptions::default_for_gc_compaction_unit_tests(),
-                &ctx,
-            )
+            .compact_with_gc(&cancel, CompactOptions::default(), &ctx)
            .await
            .unwrap();
        verify_result().await;
@@ -10953,11 +10913,7 @@ mod tests {

        let cancel = CancellationToken::new();
        branch_tline
-            .compact_with_gc(
-                &cancel,
-                CompactOptions::default_for_gc_compaction_unit_tests(),
-                &ctx,
-            )
+            .compact_with_gc(&cancel, CompactOptions::default(), &ctx)
            .await
            .unwrap();

@@ -10970,7 +10926,7 @@ mod tests {
                &cancel,
                CompactOptions {
                    compact_lsn_range: Some(CompactLsnRange::above(Lsn(0x40))),
-                    ..CompactOptions::default_for_gc_compaction_unit_tests()
+                    ..Default::default()
                },
                &ctx,
            )
@@ -11638,7 +11594,7 @@ mod tests {
                CompactOptions {
                    flags: EnumSet::new(),
                    compact_key_range: Some((get_key(0)..get_key(2)).into()),
-                    ..CompactOptions::default_for_gc_compaction_unit_tests()
+                    ..Default::default()
                },
                &ctx,
            )
@@ -11685,7 +11641,7 @@ mod tests {
                CompactOptions {
                    flags: EnumSet::new(),
                    compact_key_range: Some((get_key(2)..get_key(4)).into()),
-                    ..CompactOptions::default_for_gc_compaction_unit_tests()
+                    ..Default::default()
                },
                &ctx,
            )
@@ -11737,7 +11693,7 @@ mod tests {
                CompactOptions {
                    flags: EnumSet::new(),
                    compact_key_range: Some((get_key(4)..get_key(9)).into()),
-                    ..CompactOptions::default_for_gc_compaction_unit_tests()
+                    ..Default::default()
                },
                &ctx,
            )
@@ -11788,7 +11744,7 @@ mod tests {
                CompactOptions {
                    flags: EnumSet::new(),
                    compact_key_range: Some((get_key(9)..get_key(10)).into()),
-                    ..CompactOptions::default_for_gc_compaction_unit_tests()
+                    ..Default::default()
                },
                &ctx,
            )
@@ -11844,7 +11800,7 @@ mod tests {
                CompactOptions {
                    flags: EnumSet::new(),
                    compact_key_range: Some((get_key(0)..get_key(10)).into()),
-                    ..CompactOptions::default_for_gc_compaction_unit_tests()
+                    ..Default::default()
                },
                &ctx,
            )
@@ -12115,7 +12071,7 @@ mod tests {
                &cancel,
                CompactOptions {
                    compact_lsn_range: Some(CompactLsnRange::above(Lsn(0x28))),
-                    ..CompactOptions::default_for_gc_compaction_unit_tests()
+                    ..Default::default()
                },
                &ctx,
            )
@@ -12150,11 +12106,7 @@ mod tests {

        // compact again
        tline
-            .compact_with_gc(
-                &cancel,
-                CompactOptions::default_for_gc_compaction_unit_tests(),
-                &ctx,
-            )
+            .compact_with_gc(&cancel, CompactOptions::default(), &ctx)
            .await
            .unwrap();
        verify_result().await;
@@ -12373,7 +12325,7 @@ mod tests {
                CompactOptions {
                    compact_key_range: Some((get_key(0)..get_key(2)).into()),
                    compact_lsn_range: Some((Lsn(0x20)..Lsn(0x28)).into()),
-                    ..CompactOptions::default_for_gc_compaction_unit_tests()
+                    ..Default::default()
                },
                &ctx,
            )
@@ -12419,7 +12371,7 @@ mod tests {
                CompactOptions {
                    compact_key_range: Some((get_key(3)..get_key(8)).into()),
                    compact_lsn_range: Some((Lsn(0x28)..Lsn(0x40)).into()),
-                    ..CompactOptions::default_for_gc_compaction_unit_tests()
+                    ..Default::default()
                },
                &ctx,
            )
@@ -12467,7 +12419,7 @@ mod tests {
                CompactOptions {
                    compact_key_range: Some((get_key(0)..get_key(5)).into()),
                    compact_lsn_range: Some((Lsn(0x20)..Lsn(0x50)).into()),
-                    ..CompactOptions::default_for_gc_compaction_unit_tests()
+                    ..Default::default()
                },
                &ctx,
            )
@@ -12502,11 +12454,7 @@ mod tests {

        // final full compaction
        tline
-            .compact_with_gc(
-                &cancel,
-                CompactOptions::default_for_gc_compaction_unit_tests(),
-                &ctx,
-            )
+            .compact_with_gc(&cancel, CompactOptions::default(), &ctx)
            .await
            .unwrap();
        verify_result().await;
@@ -12616,7 +12564,7 @@ mod tests {
                CompactOptions {
                    compact_key_range: None,
                    compact_lsn_range: None,
-                    ..CompactOptions::default_for_gc_compaction_unit_tests()
+                    ..Default::default()
                },
                &ctx,
            )
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -939,20 +939,6 @@ pub(crate) struct CompactOptions {
    /// Set job size for the GC compaction.
    /// This option is only used by GC compaction.
    pub sub_compaction_max_job_size_mb: Option<u64>,
-    /// Only for GC compaction.
-    /// If set, the compaction will compact the metadata layers. Should be only set to true in unit tests
-    /// because metadata compaction is not fully supported yet.
-    pub gc_compaction_do_metadata_compaction: bool,
-}
-
-impl CompactOptions {
-    #[cfg(test)]
-    pub fn default_for_gc_compaction_unit_tests() -> Self {
-        Self {
-            gc_compaction_do_metadata_compaction: true,
-            ..Default::default()
-        }
-    }
 }

 impl std::fmt::Debug for Timeline {
@@ -1324,9 +1310,6 @@ impl Timeline {
    ///
    /// This naive implementation will be replaced with a more efficient one
    /// which actually vectorizes the read path.
-    ///
-    /// NB: the read path must be cancellation-safe. The Tonic gRPC service will drop the future
-    /// if the client goes away (e.g. due to timeout or cancellation).
    pub(crate) async fn get_vectored(
        &self,
        query: VersionedKeySpaceQuery,
@@ -2202,7 +2185,6 @@ impl Timeline {
                    compact_lsn_range: None,
                    sub_compaction: false,
                    sub_compaction_max_job_size_mb: None,
-                    gc_compaction_do_metadata_compaction: false,
                },
                ctx,
            )
--- a/pageserver/src/tenant/timeline/compaction.rs
+++ b/pageserver/src/tenant/timeline/compaction.rs
@@ -396,7 +396,6 @@ impl GcCompactionQueue {
                    }),
                    compact_lsn_range: None,
                    sub_compaction_max_job_size_mb: None,
-                    gc_compaction_do_metadata_compaction: false,
                },
                permit,
            );
@@ -513,7 +512,6 @@ impl GcCompactionQueue {
                    compact_key_range: Some(job.compact_key_range.into()),
                    compact_lsn_range: Some(job.compact_lsn_range.into()),
                    sub_compaction_max_job_size_mb: None,
-                    gc_compaction_do_metadata_compaction: false,
                };
                pending_tasks.push(GcCompactionQueueItem::SubCompactionJob {
                    options,
@@ -787,8 +785,6 @@ pub(crate) struct GcCompactJob {
    /// as specified here. The true range being compacted is `min_lsn/max_lsn` in [`GcCompactionJobDescription`].
    /// min_lsn will always <= the lower bound specified here, and max_lsn will always >= the upper bound specified here.
    pub compact_lsn_range: Range<Lsn>,
-    /// See [`CompactOptions::gc_compaction_do_metadata_compaction`].
-    pub do_metadata_compaction: bool,
 }

 impl GcCompactJob {
@@ -803,7 +799,6 @@ impl GcCompactJob {
                .compact_lsn_range
                .map(|x| x.into())
                .unwrap_or(Lsn::INVALID..Lsn::MAX),
-            do_metadata_compaction: options.gc_compaction_do_metadata_compaction,
        }
    }
 }
@@ -3179,7 +3174,6 @@ impl Timeline {
                        dry_run: job.dry_run,
                        compact_key_range: start..end,
                        compact_lsn_range: job.compact_lsn_range.start..compact_below_lsn,
-                        do_metadata_compaction: false,
                    });
                    current_start = Some(end);
                }
@@ -3242,7 +3236,7 @@ impl Timeline {
    async fn compact_with_gc_inner(
        self: &Arc<Self>,
        cancel: &CancellationToken,
-        mut job: GcCompactJob,
+        job: GcCompactJob,
        ctx: &RequestContext,
        yield_for_l0: bool,
    ) -> Result<CompactionOutcome, CompactionError> {
@@ -3250,28 +3244,6 @@ impl Timeline {
        // with legacy compaction tasks in the future. Always ensure the lock order is compaction -> gc.
        // Note that we already acquired the compaction lock when the outer `compact` function gets called.

-        // If the job is not configured to compact the metadata key range, shrink the key range
-        // to exclude the metadata key range. The check is done by checking if the end of the key range
-        // is larger than the start of the metadata key range. Note that metadata keys cover the entire
-        // second half of the keyspace, so it's enough to only check the end of the key range.
-        if !job.do_metadata_compaction
-            && job.compact_key_range.end > Key::metadata_key_range().start
-        {
-            tracing::info!(
-                "compaction for metadata key range is not supported yet, overriding compact_key_range from {} to {}",
-                job.compact_key_range.end,
-                Key::metadata_key_range().start
-            );
-            // Shrink the key range to exclude the metadata key range.
-            job.compact_key_range.end = Key::metadata_key_range().start;
-
-            // Skip the job if the key range completely lies within the metadata key range.
-            if job.compact_key_range.start >= job.compact_key_range.end {
-                tracing::info!("compact_key_range is empty, skipping compaction");
-                return Ok(CompactionOutcome::Done);
-            }
-        }
-
        let timer = Instant::now();
        let begin_timer = timer;

--- a/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs
+++ b/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs
@@ -184,7 +184,7 @@ pub(super) async fn connection_manager_loop_step(

            // If we've not received any updates from the broker from a while, are waiting for WAL
            // and have no safekeeper connection or connection candidates, then it might be that
-            // the broker subscription is wedged. Drop the current subscription and re-subscribe
+            // the broker subscription is wedged. Drop the currrent subscription and re-subscribe
            // with the goal of unblocking it.
            _ = broker_reset_interval.tick() => {
                let awaiting_lsn = wait_lsn_status.borrow().is_some();
@@ -192,7 +192,7 @@ pub(super) async fn connection_manager_loop_step(
                let no_connection = connection_manager_state.wal_connection.is_none();

                if awaiting_lsn && no_candidates && no_connection {
-                    tracing::info!("No broker updates received for a while, but waiting for WAL. Re-setting stream ...");
+                    tracing::warn!("No broker updates received for a while, but waiting for WAL. Re-setting stream ...");
                    broker_subscription = subscribe_for_timeline_updates(broker_client, id, cancel).await?;
                }
            },
--- a/Show More
+++ b/Show More
				`@@ -1 +0,0 @@`
				`ALTER ROLE {privileged_role_name} BYPASSRLS;`
				`@@ -0,0 +1 @@`
				`GRANT pg_monitor TO neon_superuser WITH ADMIN OPTION;`
				`@@ -1 +0,0 @@`
				`GRANT pg_monitor TO {privileged_role_name} WITH ADMIN OPTION;`
				`@@ -0,0 +1 @@`
				`GRANT EXECUTE ON FUNCTION pg_show_replication_origin_status TO neon_superuser;`
				`@@ -0,0 +1 @@`
				`GRANT pg_signal_backend TO neon_superuser WITH ADMIN OPTION;`
				`@@ -1 +0,0 @@`
				`GRANT pg_signal_backend TO {privileged_role_name} WITH ADMIN OPTION;`