revert + add tests

Signed-off-by: Alex Chi Z <chi@neon.tech>
fix(pageserver): do not allow delete to bypass upload metadata
2026-07-03 04:00:37 +00:00 · 2025-07-02 14:38:40 -07:00 · 2025-07-02 13:55:39 -07:00
180 changed files with 2656 additions and 18132 deletions
--- a/.github/workflows/build-macos.yml
+++ b/.github/workflows/build-macos.yml
@@ -32,14 +32,162 @@ permissions:
  contents: read

 jobs:
-  make-all:
+  build-pgxn:
+    if: |
+      inputs.pg_versions != '[]' || inputs.rebuild_everything ||
+      contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos')  ||
+      contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
+      github.ref_name == 'main'
+    timeout-minutes: 30
+    runs-on: macos-15
+    strategy:
+      matrix:
+        postgres-version: ${{ inputs.rebuild_everything && fromJSON('["v14", "v15", "v16", "v17"]') || fromJSON(inputs.pg_versions) }}
+    env:
+      # Use release build only, to have less debug info around
+      # Hence keeping target/ (and general cache size) smaller
+      BUILD_TYPE: release
+    steps:
+      - name: Harden the runner (Audit all outbound calls)
+        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
+        with:
+          egress-policy: audit
+
+      - name: Checkout main repo
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+
+      - name: Set pg ${{ matrix.postgres-version }} for caching
+        id: pg_rev
+        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-${{ matrix.postgres-version }}) | tee -a "${GITHUB_OUTPUT}"
+
+      - name: Cache postgres ${{ matrix.postgres-version }} build
+        id: cache_pg
+        uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
+        with:
+          path: pg_install/${{ matrix.postgres-version }}
+          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ matrix.postgres-version }}-${{ steps.pg_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
+
+      - name: Checkout submodule vendor/postgres-${{ matrix.postgres-version }}
+        if: steps.cache_pg.outputs.cache-hit != 'true'
+        run: |
+          git submodule init vendor/postgres-${{ matrix.postgres-version }}
+          git submodule update --depth 1 --recursive
+
+      - name: Install build dependencies
+        if: steps.cache_pg.outputs.cache-hit != 'true'
+        run: |
+          brew install flex bison openssl protobuf icu4c
+
+      - name: Set extra env for macOS
+        if: steps.cache_pg.outputs.cache-hit != 'true'
+        run: |
+          echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
+          echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV
+
+      - name: Build Postgres ${{ matrix.postgres-version }}
+        if: steps.cache_pg.outputs.cache-hit != 'true'
+        run: |
+          make postgres-${{ matrix.postgres-version }} -j$(sysctl -n hw.ncpu)
+
+      - name: Build Neon Pg Ext ${{ matrix.postgres-version }}
+        if: steps.cache_pg.outputs.cache-hit != 'true'
+        run: |
+          make "neon-pg-ext-${{ matrix.postgres-version }}" -j$(sysctl -n hw.ncpu)
+
+      - name: Upload "pg_install/${{ matrix.postgres-version }}" artifact
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
+        with:
+          name: pg_install--${{ matrix.postgres-version }}
+          path: pg_install/${{ matrix.postgres-version }}
+          # The artifact is supposed to be used by the next job in the same workflow,
+          # so there’s no need to store it for too long.
+          retention-days: 1
+
+  build-walproposer-lib:
+    if: |
+      contains(inputs.pg_versions, 'v17') || inputs.rebuild_everything ||
+      contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos')  ||
+      contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
+      github.ref_name == 'main'
+    timeout-minutes: 30
+    runs-on: macos-15
+    needs: [build-pgxn]
+    env:
+      # Use release build only, to have less debug info around
+      # Hence keeping target/ (and general cache size) smaller
+      BUILD_TYPE: release
+    steps:
+      - name: Harden the runner (Audit all outbound calls)
+        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
+        with:
+          egress-policy: audit
+
+      - name: Checkout main repo
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+
+      - name: Set pg v17 for caching
+        id: pg_rev
+        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v17) | tee -a "${GITHUB_OUTPUT}"
+
+      - name: Download "pg_install/v17" artifact
+        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
+        with:
+          name: pg_install--v17
+          path: pg_install/v17
+
+      # `actions/download-artifact` doesn't preserve permissions:
+      # https://github.com/actions/download-artifact?tab=readme-ov-file#permission-loss
+      - name: Make pg_install/v*/bin/* executable
+        run: |
+          chmod +x pg_install/v*/bin/*
+
+      - name: Cache walproposer-lib
+        id: cache_walproposer_lib
+        uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
+        with:
+          path: build/walproposer-lib
+          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-walproposer_lib-v17-${{ steps.pg_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
+
+      - name: Checkout submodule vendor/postgres-v17
+        if: steps.cache_walproposer_lib.outputs.cache-hit != 'true'
+        run: |
+          git submodule init vendor/postgres-v17
+          git submodule update --depth 1 --recursive
+
+      - name: Install build dependencies
+        if: steps.cache_walproposer_lib.outputs.cache-hit != 'true'
+        run: |
+          brew install flex bison openssl protobuf icu4c
+
+      - name: Set extra env for macOS
+        if: steps.cache_walproposer_lib.outputs.cache-hit != 'true'
+        run: |
+          echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
+          echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV
+
+      - name: Build walproposer-lib (only for v17)
+        if: steps.cache_walproposer_lib.outputs.cache-hit != 'true'
+        run:
+          make walproposer-lib -j$(sysctl -n hw.ncpu) PG_INSTALL_CACHED=1
+
+      - name: Upload "build/walproposer-lib" artifact
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
+        with:
+          name: build--walproposer-lib
+          path: build/walproposer-lib
+          # The artifact is supposed to be used by the next job in the same workflow,
+          # so there’s no need to store it for too long.
+          retention-days: 1
+
+  cargo-build:
    if: |
      inputs.pg_versions != '[]' || inputs.rebuild_rust_code || inputs.rebuild_everything ||
      contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos') ||
      contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
      github.ref_name == 'main'
-    timeout-minutes: 60
+    timeout-minutes: 30
    runs-on: macos-15
+    needs: [build-pgxn, build-walproposer-lib]
    env:
      # Use release build only, to have less debug info around
      # Hence keeping target/ (and general cache size) smaller
@@ -55,53 +203,41 @@ jobs:
        with:
          submodules: true

-      - name: Install build dependencies
-        run: |
-          brew install flex bison openssl protobuf icu4c
-
-      - name: Set extra env for macOS
-        run: |
-          echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
-          echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV
-
-      - name: Restore "pg_install/" cache
-        id: cache_pg
-        uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
+      - name: Download "pg_install/v14" artifact
+        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
        with:
-          path: pg_install
-          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-install-v14-${{ hashFiles('Makefile', 'postgres.mk', 'vendor/revisions.json') }}
+          name: pg_install--v14
+          path: pg_install/v14

-      - name: Checkout vendor/postgres submodules
-        if: steps.cache_pg.outputs.cache-hit != 'true'
-        run: |
-          git submodule init
-          git submodule update --depth 1 --recursive
+      - name: Download "pg_install/v15" artifact
+        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
+        with:
+          name: pg_install--v15
+          path: pg_install/v15

-      - name: Build Postgres
-        if: steps.cache_pg.outputs.cache-hit != 'true'
-        run: |
-          make postgres -j$(sysctl -n hw.ncpu)
+      - name: Download "pg_install/v16" artifact
+        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
+        with:
+          name: pg_install--v16
+          path: pg_install/v16

-      # This isn't strictly necessary, but it makes the cached and non-cached builds more similar,
-      # When pg_install is restored from cache, there is no 'build/' directory. By removing it
-      # in a non-cached build too, we enforce that the rest of the steps don't depend on it,
-      # so that we notice any build caching bugs earlier.
-      - name: Remove build artifacts
-        if: steps.cache_pg.outputs.cache-hit != 'true'
-        run: |
-          rm -rf build
+      - name: Download "pg_install/v17" artifact
+        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
+        with:
+          name: pg_install--v17
+          path: pg_install/v17

-      # Explicitly update the rust toolchain before running 'make'. The parallel make build can
-      # invoke 'cargo build' more than once in parallel, for different crates.  That's OK, 'cargo'
-      # does its own locking to prevent concurrent builds from stepping on each other's
-      # toes. However, it will first try to update the toolchain, and that step is not locked the
-      # same way. To avoid two toolchain updates running in parallel and stepping on each other's
-      # toes, ensure that the toolchain is up-to-date beforehand.
-      - name: Update rust toolchain
+      - name: Download "build/walproposer-lib" artifact
+        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
+        with:
+          name: build--walproposer-lib
+          path: build/walproposer-lib
+
+      # `actions/download-artifact` doesn't preserve permissions:
+      # https://github.com/actions/download-artifact?tab=readme-ov-file#permission-loss
+      - name: Make pg_install/v*/bin/* executable
        run: |
-          rustup --version &&
-          rustup update &&
-          rustup show
+          chmod +x pg_install/v*/bin/*

      - name: Cache cargo deps
        uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
@@ -113,12 +249,17 @@ jobs:
            target
          key: v1-${{ runner.os }}-${{ runner.arch }}-cargo-${{ hashFiles('./Cargo.lock') }}-${{ hashFiles('./rust-toolchain.toml') }}-rust

-      # Build the neon-specific postgres extensions, and all the Rust bits.
-      #
-      # Pass PG_INSTALL_CACHED=1 because PostgreSQL was already built and cached
-      # separately.
-      - name: Build all
-        run: PG_INSTALL_CACHED=1 BUILD_TYPE=release make -j$(sysctl -n hw.ncpu) all
+      - name: Install build dependencies
+        run: |
+          brew install flex bison openssl protobuf icu4c
+
+      - name: Set extra env for macOS
+        run: |
+          echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
+          echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV
+
+      - name: Run cargo build
+        run: cargo build --all --release -j$(sysctl -n hw.ncpu)

      - name: Check that no warnings are produced
        run: ./run_clippy.sh
--- a/.gitignore
+++ b/.gitignore
@@ -15,7 +15,6 @@ neon.iml
 /.neon
 /integration_tests/.neon
 compaction-suite-results.*
-pgxn/neon/communicator/communicator_bindings.h

 # Coverage
 *.profraw
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -247,32 +247,12 @@ dependencies = [
 "syn 2.0.100",
 ]

-[[package]]
-name = "atomic"
-version = "0.6.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a89cbf775b137e9b968e67227ef7f775587cde3fd31b0d8599dbd0f598a48340"
-dependencies = [
- "bytemuck",
-]
-
 [[package]]
 name = "atomic-take"
 version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a8ab6b55fe97976e46f91ddbed8d147d966475dc29b2032757ba47e02376fbc3"

-[[package]]
-name = "atomic_enum"
-version = "0.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "99e1aca718ea7b89985790c94aad72d77533063fe00bc497bb79a7c2dae6a661"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 2.0.100",
-]
-
 [[package]]
 name = "autocfg"
 version = "1.1.0"
@@ -707,40 +687,13 @@ dependencies = [
 "tracing",
 ]

-[[package]]
-name = "axum"
-version = "0.7.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f"
-dependencies = [
- "async-trait",
- "axum-core 0.4.5",
- "bytes",
- "futures-util",
- "http 1.1.0",
- "http-body 1.0.0",
- "http-body-util",
- "itoa",
- "matchit 0.7.3",
- "memchr",
- "mime",
- "percent-encoding",
- "pin-project-lite",
- "rustversion",
- "serde",
- "sync_wrapper 1.0.1",
- "tower 0.5.2",
- "tower-layer",
- "tower-service",
-]
-
 [[package]]
 name = "axum"
 version = "0.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6d6fd624c75e18b3b4c6b9caf42b1afe24437daaee904069137d8bab077be8b8"
 dependencies = [
- "axum-core 0.5.0",
+ "axum-core",
 "base64 0.22.1",
 "bytes",
 "form_urlencoded",
@@ -748,10 +701,10 @@ dependencies = [
 "http 1.1.0",
 "http-body 1.0.0",
 "http-body-util",
- "hyper 1.6.0",
+ "hyper 1.4.1",
 "hyper-util",
 "itoa",
- "matchit 0.8.4",
+ "matchit",
 "memchr",
 "mime",
 "percent-encoding",
@@ -771,26 +724,6 @@ dependencies = [
 "tracing",
 ]

-[[package]]
-name = "axum-core"
-version = "0.4.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199"
-dependencies = [
- "async-trait",
- "bytes",
- "futures-util",
- "http 1.1.0",
- "http-body 1.0.0",
- "http-body-util",
- "mime",
- "pin-project-lite",
- "rustversion",
- "sync_wrapper 1.0.1",
- "tower-layer",
- "tower-service",
-]
-
 [[package]]
 name = "axum-core"
 version = "0.5.0"
@@ -817,8 +750,8 @@ version = "0.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "460fc6f625a1f7705c6cf62d0d070794e94668988b1c38111baeec177c715f7b"
 dependencies = [
- "axum 0.8.1",
- "axum-core 0.5.0",
+ "axum",
+ "axum-core",
 "bytes",
 "form_urlencoded",
 "futures-util",
@@ -1096,23 +1029,9 @@ checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1"

 [[package]]
 name = "bytemuck"
-version = "1.23.1"
+version = "1.16.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5c76a5792e44e4abe34d3abf15636779261d45a7450612059293d1d2cfc63422"
-dependencies = [
- "bytemuck_derive",
-]
-
-[[package]]
-name = "bytemuck_derive"
-version = "1.9.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7ecc273b49b3205b83d648f0690daa588925572cc5063745bfe547fe7ec8e1a1"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 2.0.100",
-]
+checksum = "102087e286b4677862ea56cf8fc58bb2cdfa8725c40ffb80fe3a008eb7f2fc83"

 [[package]]
 name = "byteorder"
@@ -1164,25 +1083,6 @@ version = "0.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"

-[[package]]
-name = "cbindgen"
-version = "0.29.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "975982cdb7ad6a142be15bdf84aea7ec6a9e5d4d797c004d43185b24cfe4e684"
-dependencies = [
- "clap",
- "heck",
- "indexmap 2.9.0",
- "log",
- "proc-macro2",
- "quote",
- "serde",
- "serde_json",
- "syn 2.0.100",
- "tempfile",
- "toml",
-]
-
 [[package]]
 name = "cc"
 version = "1.2.16"
@@ -1367,36 +1267,6 @@ dependencies = [
 "unicode-width",
 ]

-[[package]]
-name = "communicator"
-version = "0.0.0"
-dependencies = [
- "atomic_enum",
- "axum 0.8.1",
- "bytes",
- "cbindgen",
- "clashmap",
- "http 1.1.0",
- "libc",
- "metrics",
- "neon-shmem",
- "nix 0.30.1",
- "pageserver_api",
- "pageserver_client_grpc",
- "pageserver_page_api",
- "prometheus",
- "prost 0.13.5",
- "thiserror 1.0.69",
- "tokio",
- "tokio-pipe",
- "tonic 0.12.3",
- "tracing",
- "tracing-subscriber",
- "uring-common",
- "utils",
- "workspace_hack",
-]
-
 [[package]]
 name = "compute_api"
 version = "0.1.0"
@@ -1423,7 +1293,7 @@ dependencies = [
 "aws-sdk-kms",
 "aws-sdk-s3",
 "aws-smithy-types",
- "axum 0.8.1",
+ "axum",
 "axum-extra",
 "base64 0.22.1",
 "bytes",
@@ -1727,9 +1597,9 @@ dependencies = [

 [[package]]
 name = "crossbeam-utils"
-version = "0.8.21"
+version = "0.8.19"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
+checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345"

 [[package]]
 name = "crossterm"
@@ -2183,7 +2053,7 @@ name = "endpoint_storage"
 version = "0.0.1"
 dependencies = [
 "anyhow",
- "axum 0.8.1",
+ "axum",
 "axum-extra",
 "camino",
 "camino-tempfile",
@@ -2444,12 +2314,6 @@ version = "1.0.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"

-[[package]]
-name = "foldhash"
-version = "0.1.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
-
 [[package]]
 name = "form_urlencoded"
 version = "1.2.1"
@@ -2470,7 +2334,7 @@ dependencies = [
 "futures-core",
 "futures-sink",
 "http-body-util",
- "hyper 1.6.0",
+ "hyper 1.4.1",
 "hyper-util",
 "pin-project",
 "rand 0.8.5",
@@ -2640,18 +2504,6 @@ dependencies = [
 "wasm-bindgen",
 ]

-[[package]]
-name = "getrandom"
-version = "0.3.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4"
-dependencies = [
- "cfg-if",
- "libc",
- "r-efi",
- "wasi 0.14.2+wasi-0.2.4",
-]
-
 [[package]]
 name = "gettid"
 version = "0.1.3"
@@ -2817,16 +2669,6 @@ version = "0.15.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289"

-[[package]]
-name = "hashbrown"
-version = "0.15.4"
-source = "git+https://github.com/quantumish/hashbrown.git?rev=6610e6d#6610e6d2b1f288ef7b0709a3efefbc846395dc5e"
-dependencies = [
- "allocator-api2",
- "equivalent",
- "foldhash",
-]
-
 [[package]]
 name = "hashlink"
 version = "0.9.1"
@@ -3051,9 +2893,9 @@ dependencies = [

 [[package]]
 name = "httparse"
-version = "1.10.1"
+version = "1.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87"
+checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904"

 [[package]]
 name = "httpdate"
@@ -3103,9 +2945,9 @@ dependencies = [

 [[package]]
 name = "hyper"
-version = "1.6.0"
+version = "1.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cc2b571658e38e0c01b1fdca3bbbe93c00d3d71693ff2770043f8c29bc7d6f80"
+checksum = "50dfd22e0e76d0f662d429a5f80fcaf3855009297eab6a0a9f8543834744ba05"
 dependencies = [
 "bytes",
 "futures-channel",
@@ -3145,7 +2987,7 @@ checksum = "a0bea761b46ae2b24eb4aef630d8d1c398157b6fc29e6350ecf090a0b70c952c"
 dependencies = [
 "futures-util",
 "http 1.1.0",
- "hyper 1.6.0",
+ "hyper 1.4.1",
 "hyper-util",
 "rustls 0.22.4",
 "rustls-pki-types",
@@ -3160,7 +3002,7 @@ version = "0.5.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3203a961e5c83b6f5498933e78b6b263e208c197b63e9c6c53cc82ffd3f63793"
 dependencies = [
- "hyper 1.6.0",
+ "hyper 1.4.1",
 "hyper-util",
 "pin-project-lite",
 "tokio",
@@ -3169,21 +3011,20 @@ dependencies = [

 [[package]]
 name = "hyper-util"
-version = "0.1.14"
+version = "0.1.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dc2fdfdbff08affe55bb779f33b053aa1fe5dd5b54c257343c17edfa55711bdb"
+checksum = "cde7055719c54e36e95e8719f95883f22072a48ede39db7fc17a4e1d5281e9b9"
 dependencies = [
 "bytes",
 "futures-channel",
- "futures-core",
 "futures-util",
 "http 1.1.0",
 "http-body 1.0.0",
- "hyper 1.6.0",
- "libc",
+ "hyper 1.4.1",
 "pin-project-lite",
 "socket2",
 "tokio",
+ "tower 0.4.13",
 "tower-service",
 "tracing",
 ]
@@ -3727,9 +3568,9 @@ checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104"

 [[package]]
 name = "lock_api"
-version = "0.4.13"
+version = "0.4.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765"
+checksum = "c1cc9717a20b1bb222f333e6a92fd32f7d8a18ddc5a3191a11af45dcbf4dcd16"
 dependencies = [
 "autocfg",
 "scopeguard",
@@ -3772,12 +3613,6 @@ dependencies = [
 "regex-automata 0.1.10",
 ]

-[[package]]
-name = "matchit"
-version = "0.7.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94"
-
 [[package]]
 name = "matchit"
 version = "0.8.4"
@@ -3885,8 +3720,8 @@ dependencies = [
 "procfs",
 "prometheus",
 "rand 0.8.5",
- "rand_distr 0.4.3",
- "twox-hash 1.6.3",
+ "rand_distr",
+ "twox-hash",
 ]

 [[package]]
@@ -3973,35 +3808,10 @@ checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a"
 name = "neon-shmem"
 version = "0.1.0"
 dependencies = [
- "ahash",
- "atomic",
- "bytemuck",
- "criterion",
- "foldhash",
- "hashbrown 0.15.4",
- "libc",
- "lock_api",
 "nix 0.30.1",
- "rand 0.9.1",
- "rand_distr 0.5.1",
- "rustc-hash 2.1.1",
- "seahash",
 "tempfile",
 "thiserror 1.0.69",
- "twox-hash 2.1.1",
 "workspace_hack",
- "xxhash-rust",
-]
-
-[[package]]
-name = "neonart"
-version = "0.1.0"
-dependencies = [
- "crossbeam-utils",
- "rand 0.9.1",
- "rand_distr 0.5.1",
- "spin",
- "tracing",
 ]

 [[package]]
@@ -4437,19 +4247,15 @@ version = "0.1.0"
 dependencies = [
 "anyhow",
 "async-trait",
- "axum 0.8.1",
 "bytes",
 "camino",
 "clap",
 "futures",
 "hdrhistogram",
- "http 1.1.0",
 "humantime",
 "humantime-serde",
- "metrics",
 "pageserver_api",
 "pageserver_client",
- "pageserver_client_grpc",
 "pageserver_page_api",
 "rand 0.8.5",
 "reqwest",
@@ -4533,7 +4339,6 @@ dependencies = [
 "pageserver_client",
 "pageserver_compaction",
 "pageserver_page_api",
- "peekable",
 "pem",
 "pin-project-lite",
 "postgres-protocol",
@@ -4547,7 +4352,6 @@ dependencies = [
 "pprof",
 "pq_proto",
 "procfs",
- "prost 0.13.5",
 "rand 0.8.5",
 "range-set-blaze",
 "regex",
@@ -4584,7 +4388,7 @@ dependencies = [
 "tower 0.5.2",
 "tracing",
 "tracing-utils",
- "twox-hash 1.6.3",
+ "twox-hash",
 "url",
 "utils",
 "uuid",
@@ -4651,38 +4455,6 @@ dependencies = [
 "workspace_hack",
 ]

-[[package]]
-name = "pageserver_client_grpc"
-version = "0.1.0"
-dependencies = [
- "anyhow",
- "arc-swap",
- "async-trait",
- "bytes",
- "chrono",
- "compute_api",
- "dashmap 5.5.0",
- "futures",
- "http 1.1.0",
- "hyper 1.6.0",
- "hyper-util",
- "metrics",
- "pageserver_api",
- "pageserver_page_api",
- "priority-queue",
- "rand 0.8.5",
- "scopeguard",
- "thiserror 1.0.69",
- "tokio",
- "tokio-stream",
- "tokio-util",
- "tonic 0.13.1",
- "tower 0.4.13",
- "tracing",
- "utils",
- "uuid",
-]
-
 [[package]]
 name = "pageserver_compaction"
 version = "0.1.0"
@@ -4808,7 +4580,7 @@ dependencies = [
 "paste",
 "seq-macro",
 "thrift",
- "twox-hash 1.6.3",
+ "twox-hash",
 "zstd",
 "zstd-sys",
 ]
@@ -4854,15 +4626,6 @@ dependencies = [
 "sha2",
 ]

-[[package]]
-name = "peekable"
-version = "0.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "225f9651e475709164f871dc2f5724956be59cb9edb055372ffeeab01ec2d20b"
-dependencies = [
- "smallvec",
-]
-
 [[package]]
 name = "pem"
 version = "3.0.3"
@@ -5296,17 +5059,6 @@ dependencies = [
 "elliptic-curve 0.13.8",
 ]

-[[package]]
-name = "priority-queue"
-version = "2.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5676d703dda103cbb035b653a9f11448c0a7216c7926bd35fcb5865475d0c970"
-dependencies = [
- "autocfg",
- "equivalent",
- "indexmap 2.9.0",
-]
-
 [[package]]
 name = "proc-macro2"
 version = "1.0.94"
@@ -5507,7 +5259,7 @@ dependencies = [
 "humantime",
 "humantime-serde",
 "hyper 0.14.30",
- "hyper 1.6.0",
+ "hyper 1.4.1",
 "hyper-util",
 "indexmap 2.9.0",
 "ipnet",
@@ -5531,7 +5283,7 @@ dependencies = [
 "postgres_backend",
 "pq_proto",
 "rand 0.8.5",
- "rand_distr 0.4.3",
+ "rand_distr",
 "rcgen",
 "redis",
 "regex",
@@ -5635,12 +5387,6 @@ dependencies = [
 "proc-macro2",
 ]

-[[package]]
-name = "r-efi"
-version = "5.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
-
 [[package]]
 name = "rand"
 version = "0.7.3"
@@ -5665,16 +5411,6 @@ dependencies = [
 "rand_core 0.6.4",
 ]

-[[package]]
-name = "rand"
-version = "0.9.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97"
-dependencies = [
- "rand_chacha 0.9.0",
- "rand_core 0.9.3",
-]
-
 [[package]]
 name = "rand_chacha"
 version = "0.2.2"
@@ -5695,16 +5431,6 @@ dependencies = [
 "rand_core 0.6.4",
 ]

-[[package]]
-name = "rand_chacha"
-version = "0.9.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb"
-dependencies = [
- "ppv-lite86",
- "rand_core 0.9.3",
-]
-
 [[package]]
 name = "rand_core"
 version = "0.5.1"
@@ -5723,15 +5449,6 @@ dependencies = [
 "getrandom 0.2.11",
 ]

-[[package]]
-name = "rand_core"
-version = "0.9.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38"
-dependencies = [
- "getrandom 0.3.3",
-]
-
 [[package]]
 name = "rand_distr"
 version = "0.4.3"
@@ -5742,16 +5459,6 @@ dependencies = [
 "rand 0.8.5",
 ]

-[[package]]
-name = "rand_distr"
-version = "0.5.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6a8615d50dcf34fa31f7ab52692afec947c4dd0ab803cc87cb3b0b4570ff7463"
-dependencies = [
- "num-traits",
- "rand 0.9.1",
-]
-
 [[package]]
 name = "rand_hc"
 version = "0.2.0"
@@ -5948,7 +5655,7 @@ dependencies = [
 "http-body-util",
 "http-types",
 "humantime-serde",
- "hyper 1.6.0",
+ "hyper 1.4.1",
 "itertools 0.10.5",
 "metrics",
 "once_cell",
@@ -5988,7 +5695,7 @@ dependencies = [
 "http 1.1.0",
 "http-body 1.0.0",
 "http-body-util",
- "hyper 1.6.0",
+ "hyper 1.4.1",
 "hyper-rustls 0.26.0",
 "hyper-util",
 "ipnet",
@@ -6045,7 +5752,7 @@ dependencies = [
 "futures",
 "getrandom 0.2.11",
 "http 1.1.0",
- "hyper 1.6.0",
+ "hyper 1.4.1",
 "parking_lot 0.11.2",
 "reqwest",
 "reqwest-middleware",
@@ -6066,7 +5773,7 @@ dependencies = [
 "async-trait",
 "getrandom 0.2.11",
 "http 1.1.0",
- "matchit 0.8.4",
+ "matchit",
 "opentelemetry",
 "reqwest",
 "reqwest-middleware",
@@ -6553,12 +6260,6 @@ version = "0.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "621e3680f3e07db4c9c2c3fb07c6223ab2fab2e54bd3c04c3ae037990f428c32"

-[[package]]
-name = "seahash"
-version = "4.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b"
-
 [[package]]
 name = "sec1"
 version = "0.3.0"
@@ -7020,12 +6721,12 @@ dependencies = [

 [[package]]
 name = "socket2"
-version = "0.5.10"
+version = "0.5.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678"
+checksum = "7b5fac59a5cb5dd637972e5fca70daf0523c9067fcdc4842f053dae04a18f8e9"
 dependencies = [
 "libc",
- "windows-sys 0.52.0",
+ "windows-sys 0.48.0",
 ]

 [[package]]
@@ -7033,9 +6734,6 @@ name = "spin"
 version = "0.9.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
-dependencies = [
- "lock_api",
-]

 [[package]]
 name = "spinning_top"
@@ -7094,7 +6792,7 @@ dependencies = [
 "http-body-util",
 "http-utils",
 "humantime",
- "hyper 1.6.0",
+ "hyper 1.4.1",
 "hyper-util",
 "metrics",
 "once_cell",
@@ -7703,16 +7401,6 @@ dependencies = [
 "syn 2.0.100",
 ]

-[[package]]
-name = "tokio-pipe"
-version = "0.2.12"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f213a84bffbd61b8fa0ba8a044b4bbe35d471d0b518867181e82bd5c15542784"
-dependencies = [
- "libc",
- "tokio",
-]
-
 [[package]]
 name = "tokio-postgres"
 version = "0.7.10"
@@ -7907,25 +7595,16 @@ version = "0.12.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52"
 dependencies = [
- "async-stream",
 "async-trait",
- "axum 0.7.9",
 "base64 0.22.1",
 "bytes",
- "h2 0.4.4",
 "http 1.1.0",
 "http-body 1.0.0",
 "http-body-util",
- "hyper 1.6.0",
- "hyper-timeout",
- "hyper-util",
 "percent-encoding",
 "pin-project",
 "prost 0.13.5",
- "socket2",
- "tokio",
 "tokio-stream",
- "tower 0.4.13",
 "tower-layer",
 "tower-service",
 "tracing",
@@ -7938,7 +7617,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7e581ba15a835f4d9ea06c55ab1bd4dce26fc53752c69a04aac00703bfb49ba9"
 dependencies = [
 "async-trait",
- "axum 0.8.1",
+ "axum",
 "base64 0.22.1",
 "bytes",
 "flate2",
@@ -7946,7 +7625,7 @@ dependencies = [
 "http 1.1.0",
 "http-body 1.0.0",
 "http-body-util",
- "hyper 1.6.0",
+ "hyper 1.4.1",
 "hyper-timeout",
 "hyper-util",
 "percent-encoding",
@@ -7999,16 +7678,11 @@ checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c"
 dependencies = [
 "futures-core",
 "futures-util",
- "indexmap 1.9.3",
 "pin-project",
 "pin-project-lite",
- "rand 0.8.5",
- "slab",
 "tokio",
- "tokio-util",
 "tower-layer",
 "tower-service",
- "tracing",
 ]

 [[package]]
@@ -8279,15 +7953,6 @@ dependencies = [
 "static_assertions",
 ]

-[[package]]
-name = "twox-hash"
-version = "2.1.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8b907da542cbced5261bd3256de1b3a1bf340a3d37f93425a07362a1d687de56"
-dependencies = [
- "rand 0.9.1",
-]
-
 [[package]]
 name = "typed-json"
 version = "0.1.1"
@@ -8501,7 +8166,7 @@ name = "vm_monitor"
 version = "0.1.0"
 dependencies = [
 "anyhow",
- "axum 0.8.1",
+ "axum",
 "cgroups-rs",
 "clap",
 "futures",
@@ -8613,15 +8278,6 @@ version = "0.11.0+wasi-snapshot-preview1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"

-[[package]]
-name = "wasi"
-version = "0.14.2+wasi-0.2.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3"
-dependencies = [
- "wit-bindgen-rt",
-]
-
 [[package]]
 name = "wasite"
 version = "0.1.0"
@@ -8979,15 +8635,6 @@ dependencies = [
 "windows-sys 0.48.0",
 ]

-[[package]]
-name = "wit-bindgen-rt"
-version = "0.39.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1"
-dependencies = [
- "bitflags 2.8.0",
-]
-
 [[package]]
 name = "workspace_hack"
 version = "0.1.0"
@@ -8995,8 +8642,8 @@ dependencies = [
 "ahash",
 "anstream",
 "anyhow",
- "axum 0.8.1",
- "axum-core 0.5.0",
+ "axum",
+ "axum-core",
 "base64 0.21.7",
 "base64ct",
 "bytes",
@@ -9028,7 +8675,7 @@ dependencies = [
 "hex",
 "hmac",
 "hyper 0.14.30",
- "hyper 1.6.0",
+ "hyper 1.4.1",
 "hyper-util",
 "indexmap 2.9.0",
 "itertools 0.12.1",
@@ -9046,7 +8693,6 @@ dependencies = [
 "num-iter",
 "num-rational",
 "num-traits",
- "once_cell",
 "p256 0.13.2",
 "parquet",
 "prettyplease",
@@ -9153,12 +8799,6 @@ version = "0.13.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4d25c75bf9ea12c4040a97f829154768bbbce366287e2dc044af160cd79a13fd"

-[[package]]
-name = "xxhash-rust"
-version = "0.8.15"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3"
-
 [[package]]
 name = "yasna"
 version = "0.5.2"
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -8,7 +8,6 @@ members = [
    "pageserver/compaction",
    "pageserver/ctl",
    "pageserver/client",
-    "pageserver/client_grpc",
    "pageserver/pagebench",
    "pageserver/page_api",
    "proxy",
@@ -35,7 +34,6 @@ members = [
    "libs/pq_proto",
    "libs/tenant_size_model",
    "libs/metrics",
-    "libs/neonart",
    "libs/postgres_connection",
    "libs/remote_storage",
    "libs/tracing-utils",
@@ -48,7 +46,6 @@ members = [
    "libs/proxy/postgres-types2",
    "libs/proxy/tokio-postgres2",
    "endpoint_storage",
-    "pgxn/neon/communicator",
 ]

 [workspace.package]
@@ -92,7 +89,6 @@ clap = { version = "4.0", features = ["derive", "env"] }
 clashmap = { version = "1.0", features = ["raw-api"] }
 comfy-table = "7.1"
 const_format = "0.2"
-crossbeam-utils = "0.8.21"
 crc32c = "0.6"
 diatomic-waker = { version = "0.2.3" }
 either = "1.8"
@@ -151,7 +147,6 @@ parquet = { version = "53", default-features = false, features = ["zstd"] }
 parquet_derive = "53"
 pbkdf2 = { version = "0.12.1", features = ["simple", "std"] }
 pem = "3.0.3"
-peekable = "0.3.0"
 pin-project-lite = "0.2"
 pprof = { version = "0.14", features = ["criterion", "flamegraph", "frame-pointer", "prost-codec"] }
 procfs = "0.16"
@@ -188,7 +183,6 @@ smallvec = "1.11"
 smol_str = { version = "0.2.0", features = ["serde"] }
 socket2 = "0.5"
 spki = "0.7.3"
-spin = "0.9.8"
 strum = "0.26"
 strum_macros = "0.26"
 "subtle"  = "2.5.0"
@@ -200,6 +194,7 @@ thiserror = "1.0"
 tikv-jemallocator = { version = "0.6", features = ["profiling", "stats", "unprefixed_malloc_on_supported_platforms"] }
 tikv-jemalloc-ctl = { version = "0.6", features = ["stats"] }
 tokio = { version = "1.43.1", features = ["macros"] }
+tokio-epoll-uring = { git = "https://github.com/neondatabase/tokio-epoll-uring.git" , branch = "main" }
 tokio-io-timeout = "1.2.0"
 tokio-postgres-rustls = "0.12.0"
 tokio-rustls = { version = "0.26.0", default-features = false, features = ["tls12", "ring"]}
@@ -241,9 +236,6 @@ x509-cert = { version = "0.2.5" }
 env_logger = "0.11"
 log = "0.4"

-tokio-epoll-uring = { git = "https://github.com/neondatabase/tokio-epoll-uring.git" , branch = "main" }
-uring-common = { git = "https://github.com/neondatabase/tokio-epoll-uring.git" , branch = "main" }
-
 ## Libraries from neondatabase/ git forks, ideally with changes to be upstreamed
 postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch = "neon" }
 postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", branch = "neon" }
@@ -263,11 +255,9 @@ desim = { version = "0.1", path = "./libs/desim" }
 endpoint_storage = { version = "0.0.1", path = "./endpoint_storage/" }
 http-utils = { version = "0.1", path = "./libs/http-utils/" }
 metrics = { version = "0.1", path = "./libs/metrics/" }
-neon-shmem = { version = "0.1", path = "./libs/neon-shmem/" }
 pageserver = { path = "./pageserver" }
 pageserver_api = { version = "0.1", path = "./libs/pageserver_api/" }
 pageserver_client = { path = "./pageserver/client" }
-pageserver_client_grpc = { path = "./pageserver/client_grpc" }
 pageserver_compaction = { version = "0.1", path = "./pageserver/compaction/" }
 pageserver_page_api = { path = "./pageserver/page_api" }
 postgres_backend = { version = "0.1", path = "./libs/postgres_backend/" }
@@ -294,7 +284,6 @@ walproposer = { version = "0.1", path = "./libs/walproposer/" }
 workspace_hack = { version = "0.1", path = "./workspace_hack/" }

 ## Build dependencies
-cbindgen = "0.29.0"
 criterion = "0.5.1"
 rcgen = "0.13"
 rstest = "0.18"
--- a/53
+++ b/53
@@ -30,18 +30,7 @@ ARG BASE_IMAGE_SHA=debian:${DEBIAN_FLAVOR}
 ARG BASE_IMAGE_SHA=${BASE_IMAGE_SHA/debian:bookworm-slim/debian@$BOOKWORM_SLIM_SHA}
 ARG BASE_IMAGE_SHA=${BASE_IMAGE_SHA/debian:bullseye-slim/debian@$BULLSEYE_SLIM_SHA}

-# Naive way:
-#
-# 1. COPY . .
-# 1. make neon-pg-ext
-# 2. cargo build <storage binaries>
-#
-# But to enable docker to cache intermediate layers, we perform a few preparatory steps:
-#
-# - Build all postgres versions, depending on just the contents of vendor/
-# - Use cargo chef to build all rust dependencies
-
-# 1. Build all postgres versions
+# Build Postgres
 FROM $REPOSITORY/$IMAGE:$TAG AS pg-build
 WORKDIR /home/nonroot

@@ -49,15 +38,17 @@ COPY --chown=nonroot vendor/postgres-v14 vendor/postgres-v14
 COPY --chown=nonroot vendor/postgres-v15 vendor/postgres-v15
 COPY --chown=nonroot vendor/postgres-v16 vendor/postgres-v16
 COPY --chown=nonroot vendor/postgres-v17 vendor/postgres-v17
+COPY --chown=nonroot pgxn pgxn
 COPY --chown=nonroot Makefile Makefile
 COPY --chown=nonroot postgres.mk postgres.mk
 COPY --chown=nonroot scripts/ninstall.sh scripts/ninstall.sh

 ENV BUILD_TYPE=release
 RUN set -e \
-    && mold -run make -j $(nproc) -s postgres
+    && mold -run make -j $(nproc) -s neon-pg-ext \
+    && tar -C pg_install -czf /home/nonroot/postgres_install.tar.gz .

-# 2. Prepare cargo-chef recipe
+# Prepare cargo-chef recipe
 FROM $REPOSITORY/$IMAGE:$TAG AS plan
 WORKDIR /home/nonroot

@@ -65,22 +56,23 @@ COPY --chown=nonroot . .

 RUN cargo chef prepare --recipe-path recipe.json

-# Main build image
+# Build neon binaries
 FROM $REPOSITORY/$IMAGE:$TAG AS build
 WORKDIR /home/nonroot
 ARG GIT_VERSION=local
 ARG BUILD_TAG
+
+COPY --from=pg-build /home/nonroot/pg_install/v14/include/postgresql/server pg_install/v14/include/postgresql/server
+COPY --from=pg-build /home/nonroot/pg_install/v15/include/postgresql/server pg_install/v15/include/postgresql/server
+COPY --from=pg-build /home/nonroot/pg_install/v16/include/postgresql/server pg_install/v16/include/postgresql/server
+COPY --from=pg-build /home/nonroot/pg_install/v17/include/postgresql/server pg_install/v17/include/postgresql/server
+COPY --from=plan     /home/nonroot/recipe.json                              recipe.json
+
 ARG ADDITIONAL_RUSTFLAGS=""

-# 3. Build cargo dependencies. Note that this step doesn't depend on anything else than
-# `recipe.json`, so the layer can be reused as long as none of the dependencies change.
-COPY --from=plan     /home/nonroot/recipe.json                              recipe.json
 RUN set -e \
    && RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment -Cforce-frame-pointers=yes ${ADDITIONAL_RUSTFLAGS}" cargo chef cook --locked --release --recipe-path recipe.json

-# Perform the main build. We reuse the Postgres build artifacts from the intermediate 'pg-build'
-# layer, and the cargo dependencies built in the previous step.
-COPY --chown=nonroot --from=pg-build /home/nonroot/pg_install/ pg_install
 COPY --chown=nonroot . .

 RUN set -e \
@@ -95,10 +87,10 @@ RUN set -e \
      --bin endpoint_storage \
      --bin neon_local \
      --bin storage_scrubber \
-      --locked --release \
-    && mold -run make -j $(nproc) -s neon-pg-ext
+      --locked --release

-# Assemble the final image
+# Build final image
+#
 FROM $BASE_IMAGE_SHA
 WORKDIR /data

@@ -138,15 +130,12 @@ COPY --from=build --chown=neon:neon /home/nonroot/target/release/proxy
 COPY --from=build --chown=neon:neon /home/nonroot/target/release/endpoint_storage    /usr/local/bin
 COPY --from=build --chown=neon:neon /home/nonroot/target/release/neon_local          /usr/local/bin
 COPY --from=build --chown=neon:neon /home/nonroot/target/release/storage_scrubber    /usr/local/bin
-COPY --from=build /home/nonroot/pg_install/v14 /usr/local/v14/
-COPY --from=build /home/nonroot/pg_install/v15 /usr/local/v15/
-COPY --from=build /home/nonroot/pg_install/v16 /usr/local/v16/
-COPY --from=build /home/nonroot/pg_install/v17 /usr/local/v17/

-# Deprecated: Old deployment scripts use this tarball which contains all the Postgres binaries.
-# That's obsolete, since all the same files are also present under /usr/local/v*. But to keep the
-# old scripts working for now, create the tarball.
-RUN tar -C /usr/local -cvzf /data/postgres_install.tar.gz v14 v15 v16 v17
+COPY --from=pg-build /home/nonroot/pg_install/v14 /usr/local/v14/
+COPY --from=pg-build /home/nonroot/pg_install/v15 /usr/local/v15/
+COPY --from=pg-build /home/nonroot/pg_install/v16 /usr/local/v16/
+COPY --from=pg-build /home/nonroot/pg_install/v17 /usr/local/v17/
+COPY --from=pg-build /home/nonroot/postgres_install.tar.gz /data/

 # By default, pageserver uses `.neon/` working directory in WORKDIR, so create one and fill it with the dummy config.
 # Now, when `docker run ... pageserver` is run, it can start without errors, yet will have some default dummy values.
--- a/16
+++ b/16
@@ -30,18 +30,11 @@ ifeq ($(BUILD_TYPE),release)
 	PG_CFLAGS += -O2 -g3 $(CFLAGS)
 	PG_LDFLAGS = $(LDFLAGS)
 	CARGO_PROFILE ?= --profile=release
-	# NEON_CARGO_ARTIFACT_TARGET_DIR is the directory where `cargo build` places
-	# the final build artifacts. There is unfortunately no easy way of changing
-	# it to a fully predictable path, nor to extract the path with a simple
-	# command. See https://github.com/rust-lang/cargo/issues/9661 and
-	# https://github.com/rust-lang/cargo/issues/6790.
-	NEON_CARGO_ARTIFACT_TARGET_DIR = $(ROOT_PROJECT_DIR)/target/release
 else ifeq ($(BUILD_TYPE),debug)
 	PG_CONFIGURE_OPTS = --enable-debug --with-openssl --enable-cassert --enable-depend
 	PG_CFLAGS += -O0 -g3 $(CFLAGS)
 	PG_LDFLAGS = $(LDFLAGS)
 	CARGO_PROFILE ?= --profile=dev
-	NEON_CARGO_ARTIFACT_TARGET_DIR = $(ROOT_PROJECT_DIR)/target/debug
 else
 	$(error Bad build type '$(BUILD_TYPE)', see Makefile for options)
 endif
@@ -109,7 +102,7 @@ all: neon postgres-install neon-pg-ext

 ### Neon Rust bits
 #
-# The 'postgres_ffi' crate depends on the Postgres headers.
+# The 'postgres_ffi' depends on the Postgres headers.
 .PHONY: neon
 neon: postgres-headers-install walproposer-lib cargo-target-dir
 	+@echo "Compiling Neon"
@@ -122,13 +115,10 @@ cargo-target-dir:
 	test -e target/CACHEDIR.TAG || echo "$(CACHEDIR_TAG_CONTENTS)" > target/CACHEDIR.TAG

 .PHONY: neon-pg-ext-%
-neon-pg-ext-%: postgres-install-% cargo-target-dir
+neon-pg-ext-%: postgres-install-%
 	+@echo "Compiling neon-specific Postgres extensions for $*"
 	mkdir -p $(BUILD_DIR)/pgxn-$*
-	$(MAKE) PG_CONFIG="$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config" COPT='$(COPT)' \
-		NEON_CARGO_ARTIFACT_TARGET_DIR="$(NEON_CARGO_ARTIFACT_TARGET_DIR)" \
-		CARGO_BUILD_FLAGS="$(CARGO_BUILD_FLAGS)" \
-		CARGO_PROFILE="$(CARGO_PROFILE)" \
+	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
 		-C $(BUILD_DIR)/pgxn-$*\
 		-f $(ROOT_PROJECT_DIR)/pgxn/Makefile  install

--- a/compute/compute-node.Dockerfile
+++ b/compute/compute-node.Dockerfile
@@ -1636,14 +1636,11 @@ RUN make install USE_PGXS=1 -j $(getconf _NPROCESSORS_ONLN)
 # compile neon extensions
 #
 #########################################################################################
-FROM pg-build-with-cargo AS neon-ext-build
+FROM pg-build AS neon-ext-build
 ARG PG_VERSION

-USER root
-COPY . .
-
-RUN make -j $(getconf _NPROCESSORS_ONLN) -C pgxn -s install-compute \
-      BUILD_TYPE=release CARGO_BUILD_FLAGS="--locked --release" NEON_CARGO_ARTIFACT_TARGET_DIR="$(pwd)/target/release"
+COPY pgxn/ pgxn/
+RUN make -j $(getconf _NPROCESSORS_ONLN) -C pgxn -s install-compute

 #########################################################################################
 #
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -1,4 +1,4 @@
-use anyhow::{Context, Result, anyhow};
+use anyhow::{Context, Result};
 use chrono::{DateTime, Utc};
 use compute_api::privilege::Privilege;
 use compute_api::responses::{
@@ -6,8 +6,7 @@ use compute_api::responses::{
    LfcPrewarmState, TlsConfig,
 };
 use compute_api::spec::{
-    ComputeAudit, ComputeFeature, ComputeMode, ComputeSpec, ExtVersion, PageserverConnectionInfo,
-    PageserverShardConnectionInfo, PgIdent,
+    ComputeAudit, ComputeFeature, ComputeMode, ComputeSpec, ExtVersion, PageserverProtocol, PgIdent,
 };
 use futures::StreamExt;
 use futures::future::join_all;
@@ -30,8 +29,7 @@ use std::sync::atomic::{AtomicU32, AtomicU64, Ordering};
 use std::sync::{Arc, Condvar, Mutex, RwLock};
 use std::time::{Duration, Instant};
 use std::{env, fs};
-use tokio::task::JoinHandle;
-use tokio::{spawn, time};
+use tokio::spawn;
 use tracing::{Instrument, debug, error, info, instrument, warn};
 use url::Url;
 use utils::id::{TenantId, TimelineId};
@@ -109,8 +107,6 @@ pub struct ComputeNodeParams {
    pub installed_extensions_collection_interval: Arc<AtomicU64>,
 }

-type TaskHandle = Mutex<Option<JoinHandle<()>>>;
-
 /// Compute node info shared across several `compute_ctl` threads.
 pub struct ComputeNode {
    pub params: ComputeNodeParams,
@@ -133,8 +129,7 @@ pub struct ComputeNode {
    pub compute_ctl_config: ComputeCtlConfig,

    /// Handle to the extension stats collection task
-    extension_stats_task: TaskHandle,
-    lfc_offload_task: TaskHandle,
+    extension_stats_task: Mutex<Option<tokio::task::JoinHandle<()>>>,
 }

 // store some metrics about download size that might impact startup time
@@ -225,7 +220,7 @@ pub struct ParsedSpec {
    pub spec: ComputeSpec,
    pub tenant_id: TenantId,
    pub timeline_id: TimelineId,
-    pub pageserver_conninfo: PageserverConnectionInfo,
+    pub pageserver_connstr: String,
    pub safekeeper_connstrings: Vec<String>,
    pub storage_auth_token: Option<String>,
    /// k8s dns name and port
@@ -272,27 +267,6 @@ impl ParsedSpec {
    }
 }

-fn extract_pageserver_conninfo_from_guc(
-    pageserver_connstring_guc: &str,
-) -> PageserverConnectionInfo {
-    PageserverConnectionInfo {
-        shards: pageserver_connstring_guc
-            .split(',')
-            .enumerate()
-            .map(|(i, connstr)| {
-                (
-                    i as u32,
-                    PageserverShardConnectionInfo {
-                        libpq_url: Some(connstr.to_string()),
-                        grpc_url: None,
-                    },
-                )
-            })
-            .collect(),
-        prefer_grpc: false,
-    }
-}
-
 impl TryFrom<ComputeSpec> for ParsedSpec {
    type Error = String;
    fn try_from(spec: ComputeSpec) -> Result<Self, String> {
@@ -302,17 +276,11 @@ impl TryFrom<ComputeSpec> for ParsedSpec {
        // For backwards-compatibility, the top-level fields in the spec file
        // may be empty. In that case, we need to dig them from the GUCs in the
        // cluster.settings field.
-        let pageserver_conninfo = match &spec.pageserver_connection_info {
-            Some(x) => x.clone(),
-            None => {
-                if let Some(guc) = spec.cluster.settings.find("neon.pageserver_connstring") {
-                    extract_pageserver_conninfo_from_guc(&guc)
-                } else {
-                    return Err("pageserver connstr should be provided".to_string());
-                }
-            }
-        };
-
+        let pageserver_connstr = spec
+            .pageserver_connstring
+            .clone()
+            .or_else(|| spec.cluster.settings.find("neon.pageserver_connstring"))
+            .ok_or("pageserver connstr should be provided")?;
        let safekeeper_connstrings = if spec.safekeeper_connstrings.is_empty() {
            if matches!(spec.mode, ComputeMode::Primary) {
                spec.cluster
@@ -362,7 +330,7 @@ impl TryFrom<ComputeSpec> for ParsedSpec {

        let res = ParsedSpec {
            spec,
-            pageserver_conninfo,
+            pageserver_connstr,
            safekeeper_connstrings,
            storage_auth_token,
            tenant_id,
@@ -400,7 +368,7 @@ fn maybe_cgexec(cmd: &str) -> Command {

 struct PostgresHandle {
    postgres: std::process::Child,
-    log_collector: JoinHandle<Result<()>>,
+    log_collector: tokio::task::JoinHandle<Result<()>>,
 }

 impl PostgresHandle {
@@ -414,7 +382,7 @@ struct StartVmMonitorResult {
    #[cfg(target_os = "linux")]
    token: tokio_util::sync::CancellationToken,
    #[cfg(target_os = "linux")]
-    vm_monitor: Option<JoinHandle<Result<()>>>,
+    vm_monitor: Option<tokio::task::JoinHandle<Result<()>>>,
 }

 impl ComputeNode {
@@ -452,7 +420,7 @@ impl ComputeNode {

        let mut new_state = ComputeState::new();
        if let Some(spec) = config.spec {
-            let pspec = ParsedSpec::try_from(spec).map_err(|msg| anyhow!(msg))?;
+            let pspec = ParsedSpec::try_from(spec).map_err(|msg| anyhow::anyhow!(msg))?;
            new_state.pspec = Some(pspec);
        }

@@ -465,7 +433,6 @@ impl ComputeNode {
            ext_download_progress: RwLock::new(HashMap::new()),
            compute_ctl_config: config.compute_ctl_config,
            extension_stats_task: Mutex::new(None),
-            lfc_offload_task: Mutex::new(None),
        })
    }

@@ -553,8 +520,8 @@ impl ComputeNode {
            None
        };

+        // Terminate the extension stats collection task
        this.terminate_extension_stats_task();
-        this.terminate_lfc_offload_task();

        // Terminate the vm_monitor so it releases the file watcher on
        // /sys/fs/cgroup/neon-postgres.
@@ -884,15 +851,12 @@ impl ComputeNode {
        // Log metrics so that we can search for slow operations in logs
        info!(?metrics, postmaster_pid = %postmaster_pid, "compute start finished");

+        // Spawn the extension stats background task
        self.spawn_extension_stats_task();

        if pspec.spec.autoprewarm {
-            info!("autoprewarming on startup as requested");
            self.prewarm_lfc(None);
        }
-        if let Some(seconds) = pspec.spec.offload_lfc_interval_seconds {
-            self.spawn_lfc_offload_task(Duration::from_secs(seconds.into()));
-        };
        Ok(())
    }

@@ -1053,11 +1017,12 @@ impl ComputeNode {
    fn try_get_basebackup(&self, compute_state: &ComputeState, lsn: Lsn) -> Result<()> {
        let spec = compute_state.pspec.as_ref().expect("spec must be set");

+        let shard0_connstr = spec.pageserver_connstr.split(',').next().unwrap();
        let started = Instant::now();
-        let (connected, size) = if spec.pageserver_conninfo.prefer_grpc {
-            self.try_get_basebackup_grpc(spec, lsn)?
-        } else {
-            self.try_get_basebackup_libpq(spec, lsn)?
+
+        let (connected, size) = match PageserverProtocol::from_connstring(shard0_connstr)? {
+            PageserverProtocol::Libpq => self.try_get_basebackup_libpq(spec, lsn)?,
+            PageserverProtocol::Grpc => self.try_get_basebackup_grpc(spec, lsn)?,
        };

        let mut state = self.state.lock().unwrap();
@@ -1072,21 +1037,20 @@ impl ComputeNode {
    /// Fetches a basebackup via gRPC. The connstring must use grpc://. Returns the timestamp when
    /// the connection was established, and the (compressed) size of the basebackup.
    fn try_get_basebackup_grpc(&self, spec: &ParsedSpec, lsn: Lsn) -> Result<(Instant, usize)> {
-        let shard0 = spec
-            .pageserver_conninfo
-            .shards
-            .get(&0)
-            .expect("shard 0 connection info missing");
-        let shard0_url = shard0.grpc_url.clone().expect("no grpc_url for shard 0");
-
-        let shard_index = match spec.pageserver_conninfo.shards.len() as u8 {
+        let shard0_connstr = spec
+            .pageserver_connstr
+            .split(',')
+            .next()
+            .unwrap()
+            .to_string();
+        let shard_index = match spec.pageserver_connstr.split(',').count() as u8 {
            0 | 1 => ShardIndex::unsharded(),
            count => ShardIndex::new(ShardNumber(0), ShardCount(count)),
        };

        let (reader, connected) = tokio::runtime::Handle::current().block_on(async move {
-            let mut client = page_api::Client::connect(
-                shard0_url,
+            let mut client = page_api::Client::new(
+                shard0_connstr,
                spec.tenant_id,
                spec.timeline_id,
                shard_index,
@@ -1121,13 +1085,8 @@ impl ComputeNode {
    /// Fetches a basebackup via libpq. The connstring must use postgresql://. Returns the timestamp
    /// when the connection was established, and the (compressed) size of the basebackup.
    fn try_get_basebackup_libpq(&self, spec: &ParsedSpec, lsn: Lsn) -> Result<(Instant, usize)> {
-        let shard0 = spec
-            .pageserver_conninfo
-            .shards
-            .get(&0)
-            .expect("shard 0 connection info missing");
-        let shard0_connstr = shard0.libpq_url.clone().expect("no libpq_url for shard 0");
-        let mut config = postgres::Config::from_str(&shard0_connstr)?;
+        let shard0_connstr = spec.pageserver_connstr.split(',').next().unwrap();
+        let mut config = postgres::Config::from_str(shard0_connstr)?;

        // Use the storage auth token from the config file, if given.
        // Note: this overrides any password set in the connection string.
@@ -1213,7 +1172,10 @@ impl ComputeNode {
                    return result;
                }
                Err(ref e) if attempts < max_attempts => {
-                    warn!("Failed to get basebackup: {e:?} (attempt {attempts}/{max_attempts})");
+                    warn!(
+                        "Failed to get basebackup: {} (attempt {}/{})",
+                        e, attempts, max_attempts
+                    );
                    std::thread::sleep(std::time::Duration::from_millis(retry_period_ms as u64));
                    retry_period_ms *= 1.5;
                }
@@ -1422,8 +1384,16 @@ impl ComputeNode {
            }
        };

-        self.get_basebackup(compute_state, lsn)
-            .with_context(|| format!("failed to get basebackup@{lsn}"))?;
+        info!(
+            "getting basebackup@{} from pageserver {}",
+            lsn, &pspec.pageserver_connstr
+        );
+        self.get_basebackup(compute_state, lsn).with_context(|| {
+            format!(
+                "failed to get basebackup@{} from pageserver {}",
+                lsn, &pspec.pageserver_connstr
+            )
+        })?;

        // Update pg_hba.conf received with basebackup.
        update_pg_hba(pgdata_path)?;
@@ -2087,7 +2057,7 @@ LIMIT 100",
            self.params
                .remote_ext_base_url
                .as_ref()
-                .ok_or(DownloadError::BadInput(anyhow!(
+                .ok_or(DownloadError::BadInput(anyhow::anyhow!(
                    "Remote extensions storage is not configured",
                )))?;

@@ -2283,7 +2253,7 @@ LIMIT 100",
        let remote_extensions = spec
            .remote_extensions
            .as_ref()
-            .ok_or(anyhow!("Remote extensions are not configured"))?;
+            .ok_or(anyhow::anyhow!("Remote extensions are not configured"))?;

        info!("parse shared_preload_libraries from spec.cluster.settings");
        let mut libs_vec = Vec::new();
@@ -2362,22 +2332,22 @@ LIMIT 100",
    /// The operation will time out after a specified duration.
    pub fn wait_timeout_while_pageserver_connstr_unchanged(&self, duration: Duration) {
        let state = self.state.lock().unwrap();
-        let old_pageserver_conninfo = state
+        let old_pageserver_connstr = state
            .pspec
            .as_ref()
            .expect("spec must be set")
-            .pageserver_conninfo
+            .pageserver_connstr
            .clone();
        let mut unchanged = true;
        let _ = self
            .state_changed
            .wait_timeout_while(state, duration, |s| {
-                let pageserver_conninfo = &s
+                let pageserver_connstr = &s
                    .pspec
                    .as_ref()
                    .expect("spec must be set")
-                    .pageserver_conninfo;
-                unchanged = pageserver_conninfo == &old_pageserver_conninfo;
+                    .pageserver_connstr;
+                unchanged = pageserver_connstr == &old_pageserver_connstr;
                unchanged
            })
            .unwrap();
@@ -2387,7 +2357,10 @@ LIMIT 100",
    }

    pub fn spawn_extension_stats_task(&self) {
-        self.terminate_extension_stats_task();
+        // Cancel any existing task
+        if let Some(handle) = self.extension_stats_task.lock().unwrap().take() {
+            handle.abort();
+        }

        let conf = self.tokio_conn_conf.clone();
        let atomic_interval = self.params.installed_extensions_collection_interval.clone();
@@ -2398,23 +2371,24 @@ LIMIT 100",
            installed_extensions_collection_interval
        );
        let handle = tokio::spawn(async move {
+            // An initial sleep is added to ensure that two collections don't happen at the same time.
+            // The first collection happens during compute startup.
+            tokio::time::sleep(tokio::time::Duration::from_secs(
+                installed_extensions_collection_interval,
+            ))
+            .await;
+            let mut interval = tokio::time::interval(tokio::time::Duration::from_secs(
+                installed_extensions_collection_interval,
+            ));
            loop {
-                info!(
-                    "[NEON_EXT_INT_SLEEP]: Interval: {}",
-                    installed_extensions_collection_interval
-                );
-                // Sleep at the start of the loop to ensure that two collections don't happen at the same time.
-                // The first collection happens during compute startup.
-                tokio::time::sleep(tokio::time::Duration::from_secs(
-                    installed_extensions_collection_interval,
-                ))
-                .await;
+                interval.tick().await;
                let _ = installed_extensions(conf.clone()).await;
                // Acquire a read lock on the compute spec and then update the interval if necessary
-                installed_extensions_collection_interval = std::cmp::max(
+                interval = tokio::time::interval(tokio::time::Duration::from_secs(std::cmp::max(
                    installed_extensions_collection_interval,
                    2 * atomic_interval.load(std::sync::atomic::Ordering::SeqCst),
-                );
+                )));
+                installed_extensions_collection_interval = interval.period().as_secs();
            }
        });

@@ -2423,30 +2397,8 @@ LIMIT 100",
    }

    fn terminate_extension_stats_task(&self) {
-        if let Some(h) = self.extension_stats_task.lock().unwrap().take() {
-            h.abort()
-        }
-    }
-
-    pub fn spawn_lfc_offload_task(self: &Arc<Self>, interval: Duration) {
-        self.terminate_lfc_offload_task();
-        let secs = interval.as_secs();
-        info!("spawning lfc offload worker with {secs}s interval");
-        let this = self.clone();
-        let handle = spawn(async move {
-            let mut interval = time::interval(interval);
-            interval.tick().await; // returns immediately
-            loop {
-                interval.tick().await;
-                this.offload_lfc_async().await;
-            }
-        });
-        *self.lfc_offload_task.lock().unwrap() = Some(handle);
-    }
-
-    fn terminate_lfc_offload_task(&self) {
-        if let Some(h) = self.lfc_offload_task.lock().unwrap().take() {
-            h.abort()
+        if let Some(handle) = self.extension_stats_task.lock().unwrap().take() {
+            handle.abort();
        }
    }

--- a/compute_tools/src/compute_prewarm.rs
+++ b/compute_tools/src/compute_prewarm.rs
@@ -5,7 +5,6 @@ use compute_api::responses::LfcOffloadState;
 use compute_api::responses::LfcPrewarmState;
 use http::StatusCode;
 use reqwest::Client;
-use std::mem::replace;
 use std::sync::Arc;
 use tokio::{io::AsyncReadExt, spawn};
 use tracing::{error, info};
@@ -89,15 +88,17 @@ impl ComputeNode {
        self.state.lock().unwrap().lfc_offload_state.clone()
    }

-    /// If there is a prewarm request ongoing, return false, true otherwise
+    /// Returns false if there is a prewarm request ongoing, true otherwise
    pub fn prewarm_lfc(self: &Arc<Self>, from_endpoint: Option<String>) -> bool {
+        crate::metrics::LFC_PREWARM_REQUESTS.inc();
        {
            let state = &mut self.state.lock().unwrap().lfc_prewarm_state;
-            if let LfcPrewarmState::Prewarming = replace(state, LfcPrewarmState::Prewarming) {
+            if let LfcPrewarmState::Prewarming =
+                std::mem::replace(state, LfcPrewarmState::Prewarming)
+            {
                return false;
            }
        }
-        crate::metrics::LFC_PREWARMS.inc();

        let cloned = self.clone();
        spawn(async move {
@@ -151,39 +152,30 @@ impl ComputeNode {
            .map(|_| ())
    }

-    /// If offload request is ongoing, return false, true otherwise
+    /// Returns false if there is an offload request ongoing, true otherwise
    pub fn offload_lfc(self: &Arc<Self>) -> bool {
+        crate::metrics::LFC_OFFLOAD_REQUESTS.inc();
        {
            let state = &mut self.state.lock().unwrap().lfc_offload_state;
-            if replace(state, LfcOffloadState::Offloading) == LfcOffloadState::Offloading {
+            if let LfcOffloadState::Offloading =
+                std::mem::replace(state, LfcOffloadState::Offloading)
+            {
                return false;
            }
        }
+
        let cloned = self.clone();
-        spawn(async move { cloned.offload_lfc_with_state_update().await });
-        true
-    }
-
-    pub async fn offload_lfc_async(self: &Arc<Self>) {
-        {
-            let state = &mut self.state.lock().unwrap().lfc_offload_state;
-            if replace(state, LfcOffloadState::Offloading) == LfcOffloadState::Offloading {
+        spawn(async move {
+            let Err(err) = cloned.offload_lfc_impl().await else {
+                cloned.state.lock().unwrap().lfc_offload_state = LfcOffloadState::Completed;
                return;
-            }
-        }
-        self.offload_lfc_with_state_update().await
-    }
-
-    async fn offload_lfc_with_state_update(&self) {
-        crate::metrics::LFC_OFFLOADS.inc();
-        let Err(err) = self.offload_lfc_impl().await else {
-            self.state.lock().unwrap().lfc_offload_state = LfcOffloadState::Completed;
-            return;
-        };
-        error!(%err);
-        self.state.lock().unwrap().lfc_offload_state = LfcOffloadState::Failed {
-            error: err.to_string(),
-        };
+            };
+            error!(%err);
+            cloned.state.lock().unwrap().lfc_offload_state = LfcOffloadState::Failed {
+                error: err.to_string(),
+            };
+        });
+        true
    }

    async fn offload_lfc_impl(&self) -> Result<()> {
--- a/compute_tools/src/config.rs
+++ b/compute_tools/src/config.rs
@@ -56,51 +56,9 @@ pub fn write_postgres_conf(

    // Add options for connecting to storage
    writeln!(file, "# Neon storage settings")?;
-
-    if let Some(conninfo) = &spec.pageserver_connection_info {
-        let mut libpq_urls: Option<Vec<String>> = Some(Vec::new());
-        let mut grpc_urls: Option<Vec<String>> = Some(Vec::new());
-
-        for shardno in 0..conninfo.shards.len() {
-            let info = conninfo.shards.get(&(shardno as u32)).ok_or_else(|| {
-                anyhow::anyhow!("shard {shardno} missing from pageserver_connection_info shard map")
-            })?;
-
-            if let Some(url) = &info.libpq_url {
-                if let Some(ref mut urls) = libpq_urls {
-                    urls.push(url.clone());
-                }
-            } else {
-                libpq_urls = None
-            }
-            if let Some(url) = &info.grpc_url {
-                if let Some(ref mut urls) = grpc_urls {
-                    urls.push(url.clone());
-                }
-            } else {
-                grpc_urls = None
-            }
-        }
-        if let Some(libpq_urls) = libpq_urls {
-            writeln!(
-                file,
-                "neon.pageserver_connstring={}",
-                escape_conf_value(&libpq_urls.join(","))
-            )?;
-        } else {
-            writeln!(file, "# no neon.pageserver_connstring")?;
-        }
-        if let Some(grpc_urls) = grpc_urls {
-            writeln!(
-                file,
-                "neon.pageserver_grpc_urls={}",
-                escape_conf_value(&grpc_urls.join(","))
-            )?;
-        } else {
-            writeln!(file, "# no neon.pageserver_grpc_urls")?;
-        }
+    if let Some(s) = &spec.pageserver_connstring {
+        writeln!(file, "neon.pageserver_connstring={}", escape_conf_value(s))?;
    }
-
    if let Some(stripe_size) = spec.shard_stripe_size {
        writeln!(file, "neon.stripe_size={stripe_size}")?;
    }
--- a/compute_tools/src/lsn_lease.rs
+++ b/compute_tools/src/lsn_lease.rs
@@ -4,7 +4,8 @@ use std::thread;
 use std::time::{Duration, SystemTime};

 use anyhow::{Result, bail};
-use compute_api::spec::{ComputeMode, PageserverConnectionInfo};
+use compute_api::spec::{ComputeMode, PageserverProtocol};
+use itertools::Itertools as _;
 use pageserver_page_api as page_api;
 use postgres::{NoTls, SimpleQueryMessage};
 use tracing::{info, warn};
@@ -77,16 +78,17 @@ fn acquire_lsn_lease_with_retry(

    loop {
        // Note: List of pageservers is dynamic, need to re-read configs before each attempt.
-        let (conninfo, auth) = {
+        let (connstrings, auth) = {
            let state = compute.state.lock().unwrap();
            let spec = state.pspec.as_ref().expect("spec must be set");
            (
-                spec.pageserver_conninfo.clone(),
+                spec.pageserver_connstr.clone(),
                spec.storage_auth_token.clone(),
            )
        };

-        let result = try_acquire_lsn_lease(conninfo, auth.as_deref(), tenant_id, timeline_id, lsn);
+        let result =
+            try_acquire_lsn_lease(&connstrings, auth.as_deref(), tenant_id, timeline_id, lsn);
        match result {
            Ok(Some(res)) => {
                return Ok(res);
@@ -110,16 +112,17 @@ fn acquire_lsn_lease_with_retry(

 /// Tries to acquire LSN leases on all Pageserver shards.
 fn try_acquire_lsn_lease(
-    conninfo: PageserverConnectionInfo,
+    connstrings: &str,
    auth: Option<&str>,
    tenant_id: TenantId,
    timeline_id: TimelineId,
    lsn: Lsn,
 ) -> Result<Option<SystemTime>> {
-    let shard_count = conninfo.shards.len();
+    let connstrings = connstrings.split(',').collect_vec();
+    let shard_count = connstrings.len();
    let mut leases = Vec::new();

-    for (shard_number, shard) in conninfo.shards.into_iter() {
+    for (shard_number, &connstring) in connstrings.iter().enumerate() {
        let tenant_shard_id = match shard_count {
            0 | 1 => TenantShardId::unsharded(tenant_id),
            shard_count => TenantShardId {
@@ -129,22 +132,13 @@ fn try_acquire_lsn_lease(
            },
        };

-        let lease = if conninfo.prefer_grpc {
-            acquire_lsn_lease_grpc(
-                &shard.grpc_url.unwrap(),
-                auth,
-                tenant_shard_id,
-                timeline_id,
-                lsn,
-            )?
-        } else {
-            acquire_lsn_lease_libpq(
-                &shard.libpq_url.unwrap(),
-                auth,
-                tenant_shard_id,
-                timeline_id,
-                lsn,
-            )?
+        let lease = match PageserverProtocol::from_connstring(connstring)? {
+            PageserverProtocol::Libpq => {
+                acquire_lsn_lease_libpq(connstring, auth, tenant_shard_id, timeline_id, lsn)?
+            }
+            PageserverProtocol::Grpc => {
+                acquire_lsn_lease_grpc(connstring, auth, tenant_shard_id, timeline_id, lsn)?
+            }
        };
        leases.push(lease);
    }
@@ -198,7 +192,7 @@ fn acquire_lsn_lease_grpc(
    lsn: Lsn,
 ) -> Result<Option<SystemTime>> {
    tokio::runtime::Handle::current().block_on(async move {
-        let mut client = page_api::Client::connect(
+        let mut client = page_api::Client::new(
            connstring.to_string(),
            tenant_shard_id.tenant_id,
            timeline_id,
--- a/compute_tools/src/metrics.rs
+++ b/compute_tools/src/metrics.rs
@@ -97,18 +97,20 @@ pub(crate) static PG_TOTAL_DOWNTIME_MS: Lazy<GenericCounter<AtomicU64>> = Lazy::
    .expect("failed to define a metric")
 });

-pub(crate) static LFC_PREWARMS: Lazy<IntCounter> = Lazy::new(|| {
+/// Needed as neon.file_cache_prewarm_batch == 0 doesn't mean we never tried to prewarm.
+/// On the other hand, LFC_PREWARMED_PAGES is excessive as we can GET /lfc/prewarm
+pub(crate) static LFC_PREWARM_REQUESTS: Lazy<IntCounter> = Lazy::new(|| {
    register_int_counter!(
-        "compute_ctl_lfc_prewarms_total",
-        "Total number of LFC prewarms requested by compute_ctl or autoprewarm option",
+        "compute_ctl_lfc_prewarm_requests_total",
+        "Total number of LFC prewarm requests made by compute_ctl",
    )
    .expect("failed to define a metric")
 });

-pub(crate) static LFC_OFFLOADS: Lazy<IntCounter> = Lazy::new(|| {
+pub(crate) static LFC_OFFLOAD_REQUESTS: Lazy<IntCounter> = Lazy::new(|| {
    register_int_counter!(
-        "compute_ctl_lfc_offloads_total",
-        "Total number of LFC offloads requested by compute_ctl or lfc_offload_period_seconds option",
+        "compute_ctl_lfc_offload_requests_total",
+        "Total number of LFC offload requests made by compute_ctl",
    )
    .expect("failed to define a metric")
 });
@@ -122,7 +124,7 @@ pub fn collect() -> Vec<MetricFamily> {
    metrics.extend(AUDIT_LOG_DIR_SIZE.collect());
    metrics.extend(PG_CURR_DOWNTIME_MS.collect());
    metrics.extend(PG_TOTAL_DOWNTIME_MS.collect());
-    metrics.extend(LFC_PREWARMS.collect());
-    metrics.extend(LFC_OFFLOADS.collect());
+    metrics.extend(LFC_PREWARM_REQUESTS.collect());
+    metrics.extend(LFC_OFFLOAD_REQUESTS.collect());
    metrics
 }
--- a/compute_tools/tests/pg_helpers_tests.rs
+++ b/compute_tools/tests/pg_helpers_tests.rs
@@ -31,7 +31,6 @@ mod pg_helpers_tests {
 wal_level = logical
 hot_standby = on
 autoprewarm = off
-offload_lfc_interval_seconds = 20
 neon.safekeepers = '127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501'
 wal_log_hints = on
 log_connections = on
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -16,7 +16,7 @@ use std::time::Duration;
 use anyhow::{Context, Result, anyhow, bail};
 use clap::Parser;
 use compute_api::requests::ComputeClaimsScope;
-use compute_api::spec::{ComputeMode, PageserverConnectionInfo, PageserverShardConnectionInfo};
+use compute_api::spec::{ComputeMode, PageserverProtocol};
 use control_plane::broker::StorageBroker;
 use control_plane::endpoint::{ComputeControlPlane, EndpointTerminateMode};
 use control_plane::endpoint_storage::{ENDPOINT_STORAGE_DEFAULT_ADDR, EndpointStorage};
@@ -64,9 +64,7 @@ const DEFAULT_PAGESERVER_ID: NodeId = NodeId(1);
 const DEFAULT_BRANCH_NAME: &str = "main";
 project_git_version!(GIT_VERSION);

-#[allow(dead_code)]
 const DEFAULT_PG_VERSION: PgMajorVersion = PgMajorVersion::PG17;
-const DEFAULT_PG_VERSION_NUM: &str = "17";

 const DEFAULT_PAGESERVER_CONTROL_PLANE_API: &str = "http://127.0.0.1:1234/upcall/v1/";

@@ -169,7 +167,7 @@ struct TenantCreateCmdArgs {
    #[clap(short = 'c')]
    config: Vec<String>,

-    #[arg(default_value = DEFAULT_PG_VERSION_NUM)]
+    #[arg(default_value_t = DEFAULT_PG_VERSION)]
    #[clap(long, help = "Postgres version to use for the initial timeline")]
    pg_version: PgMajorVersion,

@@ -292,7 +290,7 @@ struct TimelineCreateCmdArgs {
    #[clap(long, help = "Human-readable alias for the new timeline")]
    branch_name: String,

-    #[arg(default_value = DEFAULT_PG_VERSION_NUM)]
+    #[arg(default_value_t = DEFAULT_PG_VERSION)]
    #[clap(long, help = "Postgres version")]
    pg_version: PgMajorVersion,
 }
@@ -324,7 +322,7 @@ struct TimelineImportCmdArgs {
    #[clap(long, help = "Lsn the basebackup ends at")]
    end_lsn: Option<Lsn>,

-    #[arg(default_value = DEFAULT_PG_VERSION_NUM)]
+    #[arg(default_value_t = DEFAULT_PG_VERSION)]
    #[clap(long, help = "Postgres version of the backup being imported")]
    pg_version: PgMajorVersion,
 }
@@ -603,7 +601,7 @@ struct EndpointCreateCmdArgs {
    )]
    config_only: bool,

-    #[arg(default_value = DEFAULT_PG_VERSION_NUM)]
+    #[arg(default_value_t = DEFAULT_PG_VERSION)]
    #[clap(long, help = "Postgres version")]
    pg_version: PgMajorVersion,

@@ -675,16 +673,6 @@ struct EndpointStartCmdArgs {
    #[arg(default_value = "90s")]
    start_timeout: Duration,

-    #[clap(
-        long,
-        help = "Download LFC cache from endpoint storage on endpoint startup",
-        default_value = "false"
-    )]
-    autoprewarm: bool,
-
-    #[clap(long, help = "Upload LFC cache to endpoint storage periodically")]
-    offload_lfc_interval_seconds: Option<std::num::NonZeroU64>,
-
    #[clap(
        long,
        help = "Run in development mode, skipping VM-specific operations like process termination",
@@ -1516,35 +1504,29 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
                )?;
            }

-            let (shards, stripe_size) = if let Some(ps_id) = pageserver_id {
-                let conf = env.get_pageserver_conf(ps_id).unwrap();
-                let libpq_url = Some({
-                    let (host, port) = parse_host_port(&conf.listen_pg_addr)?;
-                    let port = port.unwrap_or(5432);
-                    format!("postgres://no_user@{host}:{port}")
-                });
-                let grpc_url = if let Some(grpc_addr) = &conf.listen_grpc_addr {
+            let (pageservers, stripe_size) = if let Some(pageserver_id) = pageserver_id {
+                let conf = env.get_pageserver_conf(pageserver_id).unwrap();
+                // Use gRPC if requested.
+                let pageserver = if endpoint.grpc {
+                    let grpc_addr = conf.listen_grpc_addr.as_ref().expect("bad config");
                    let (host, port) = parse_host_port(grpc_addr)?;
                    let port = port.unwrap_or(DEFAULT_PAGESERVER_GRPC_PORT);
-                    Some(format!("grpc://no_user@{host}:{port}"))
+                    (PageserverProtocol::Grpc, host, port)
                } else {
-                    None
+                    let (host, port) = parse_host_port(&conf.listen_pg_addr)?;
+                    let port = port.unwrap_or(5432);
+                    (PageserverProtocol::Libpq, host, port)
                };
-                let pageserver = PageserverShardConnectionInfo {
-                    libpq_url,
-                    grpc_url,
-                };
-
                // If caller is telling us what pageserver to use, this is not a tenant which is
                // fully managed by storage controller, therefore not sharded.
-                (vec![(0, pageserver)], DEFAULT_STRIPE_SIZE)
+                (vec![pageserver], DEFAULT_STRIPE_SIZE)
            } else {
                // Look up the currently attached location of the tenant, and its striping metadata,
                // to pass these on to postgres.
                let storage_controller = StorageController::from_env(env);
                let locate_result = storage_controller.tenant_locate(endpoint.tenant_id).await?;
-                let shards = futures::future::try_join_all(locate_result.shards.into_iter().map(
-                    |shard| async move {
+                let pageservers = futures::future::try_join_all(
+                    locate_result.shards.into_iter().map(|shard| async move {
                        if let ComputeMode::Static(lsn) = endpoint.mode {
                            // Initialize LSN leases for static computes.
                            let conf = env.get_pageserver_conf(shard.node_id).unwrap();
@@ -1556,34 +1538,28 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
                                .await?;
                        }

-                        let libpq_host = Host::parse(&shard.listen_pg_addr)?;
-                        let libpq_port = shard.listen_pg_port;
-                        let libpq_url =
-                            Some(format!("postgres://no_user@{libpq_host}:{libpq_port}"));
-
-                        let grpc_url = if let Some(grpc_host) = shard.listen_grpc_addr {
-                            let grpc_port = shard.listen_grpc_port.expect("no gRPC port");
-                            Some(format!("grpc://no_user@{grpc_host}:{grpc_port}"))
+                        let pageserver = if endpoint.grpc {
+                            (
+                                PageserverProtocol::Grpc,
+                                Host::parse(&shard.listen_grpc_addr.expect("no gRPC address"))?,
+                                shard.listen_grpc_port.expect("no gRPC port"),
+                            )
                        } else {
-                            None
+                            (
+                                PageserverProtocol::Libpq,
+                                Host::parse(&shard.listen_pg_addr)?,
+                                shard.listen_pg_port,
+                            )
                        };
-                        let pageserver = PageserverShardConnectionInfo {
-                            libpq_url,
-                            grpc_url,
-                        };
-                        anyhow::Ok((shard.shard_id.shard_number.0 as u32, pageserver))
-                    },
-                ))
+                        anyhow::Ok(pageserver)
+                    }),
+                )
                .await?;
                let stripe_size = locate_result.shard_params.stripe_size;

-                (shards, stripe_size)
-            };
-            assert!(!shards.is_empty());
-            let pageserver_conninfo = PageserverConnectionInfo {
-                shards: shards.into_iter().collect(),
-                prefer_grpc: endpoint.grpc,
+                (pageservers, stripe_size)
            };
+            assert!(!pageservers.is_empty());

            let ps_conf = env.get_pageserver_conf(DEFAULT_PAGESERVER_ID)?;
            let auth_token = if matches!(ps_conf.pg_auth_type, AuthType::NeonJWT) {
@@ -1607,24 +1583,22 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
            let endpoint_storage_token = env.generate_auth_token(&claims)?;
            let endpoint_storage_addr = env.endpoint_storage.listen_addr.to_string();

-            let args = control_plane::endpoint::EndpointStartArgs {
-                auth_token,
-                endpoint_storage_token,
-                endpoint_storage_addr,
-                safekeepers_generation,
-                safekeepers,
-                pageserver_conninfo,
-                remote_ext_base_url: remote_ext_base_url.clone(),
-                shard_stripe_size: stripe_size.0 as usize,
-                create_test_user: args.create_test_user,
-                start_timeout: args.start_timeout,
-                autoprewarm: args.autoprewarm,
-                offload_lfc_interval_seconds: args.offload_lfc_interval_seconds,
-                dev: args.dev,
-            };
-
            println!("Starting existing endpoint {endpoint_id}...");
-            endpoint.start(args).await?;
+            endpoint
+                .start(
+                    &auth_token,
+                    endpoint_storage_token,
+                    endpoint_storage_addr,
+                    safekeepers_generation,
+                    safekeepers,
+                    pageservers,
+                    remote_ext_base_url.as_ref(),
+                    stripe_size.0 as usize,
+                    args.create_test_user,
+                    args.start_timeout,
+                    args.dev,
+                )
+                .await?;
        }
        EndpointCmd::Reconfigure(args) => {
            let endpoint_id = &args.endpoint_id;
@@ -1632,27 +1606,20 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
                .endpoints
                .get(endpoint_id.as_str())
                .with_context(|| format!("postgres endpoint {endpoint_id} is not found"))?;
-            let shards = if let Some(ps_id) = args.endpoint_pageserver_id {
+            let pageservers = if let Some(ps_id) = args.endpoint_pageserver_id {
                let conf = env.get_pageserver_conf(ps_id)?;
-                let libpq_url = Some({
-                    let (host, port) = parse_host_port(&conf.listen_pg_addr)?;
-                    let port = port.unwrap_or(5432);
-                    format!("postgres://no_user@{host}:{port}")
-                });
-                let grpc_url = if let Some(grpc_addr) = &conf.listen_grpc_addr {
+                // Use gRPC if requested.
+                let pageserver = if endpoint.grpc {
+                    let grpc_addr = conf.listen_grpc_addr.as_ref().expect("bad config");
                    let (host, port) = parse_host_port(grpc_addr)?;
                    let port = port.unwrap_or(DEFAULT_PAGESERVER_GRPC_PORT);
-                    Some(format!("grpc://no_user@{host}:{port}"))
+                    (PageserverProtocol::Grpc, host, port)
                } else {
-                    None
+                    let (host, port) = parse_host_port(&conf.listen_pg_addr)?;
+                    let port = port.unwrap_or(5432);
+                    (PageserverProtocol::Libpq, host, port)
                };
-                let pageserver = PageserverShardConnectionInfo {
-                    libpq_url,
-                    grpc_url,
-                };
-                // If caller is telling us what pageserver to use, this is not a tenant which is
-                // fully managed by storage controller, therefore not sharded.
-                vec![(0, pageserver)]
+                vec![pageserver]
            } else {
                let storage_controller = StorageController::from_env(env);
                storage_controller
@@ -1662,36 +1629,28 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
                    .into_iter()
                    .map(|shard| {
                        // Use gRPC if requested.
-                        let libpq_host = Host::parse(&shard.listen_pg_addr).expect("bad hostname");
-                        let libpq_port = shard.listen_pg_port;
-                        let libpq_url =
-                            Some(format!("postgres://no_user@{libpq_host}:{libpq_port}"));
-
-                        let grpc_url = if let Some(grpc_host) = shard.listen_grpc_addr {
-                            let grpc_port = shard.listen_grpc_port.expect("no gRPC port");
-                            Some(format!("grpc://no_user@{grpc_host}:{grpc_port}"))
+                        if endpoint.grpc {
+                            (
+                                PageserverProtocol::Grpc,
+                                Host::parse(&shard.listen_grpc_addr.expect("no gRPC address"))
+                                    .expect("bad hostname"),
+                                shard.listen_grpc_port.expect("no gRPC port"),
+                            )
                        } else {
-                            None
-                        };
-                        (
-                            shard.shard_id.shard_number.0 as u32,
-                            PageserverShardConnectionInfo {
-                                libpq_url,
-                                grpc_url,
-                            },
-                        )
+                            (
+                                PageserverProtocol::Libpq,
+                                Host::parse(&shard.listen_pg_addr).expect("bad hostname"),
+                                shard.listen_pg_port,
+                            )
+                        }
                    })
                    .collect::<Vec<_>>()
            };
-            let pageserver_conninfo = PageserverConnectionInfo {
-                shards: shards.into_iter().collect(),
-                prefer_grpc: endpoint.grpc,
-            };
            // If --safekeepers argument is given, use only the listed
            // safekeeper nodes; otherwise all from the env.
            let safekeepers = parse_safekeepers(&args.safekeepers)?;
            endpoint
-                .reconfigure(Some(pageserver_conninfo), None, safekeepers, None)
+                .reconfigure(Some(pageservers), None, safekeepers, None)
                .await?;
        }
        EndpointCmd::Stop(args) => {
--- a/control_plane/src/endpoint.rs
+++ b/control_plane/src/endpoint.rs
@@ -56,13 +56,9 @@ use compute_api::responses::{
    TlsConfig,
 };
 use compute_api::spec::{
-    Cluster, ComputeAudit, ComputeFeature, ComputeMode, ComputeSpec, Database, PgIdent,
-    RemoteExtSpec, Role,
+    Cluster, ComputeAudit, ComputeFeature, ComputeMode, ComputeSpec, Database, PageserverProtocol,
+    PgIdent, RemoteExtSpec, Role,
 };
-
-// re-export these, because they're used in the reconfigure() function
-pub use compute_api::spec::{PageserverConnectionInfo, PageserverShardConnectionInfo};
-
 use jsonwebtoken::jwk::{
    AlgorithmParameters, CommonParameters, EllipticCurve, Jwk, JwkSet, KeyAlgorithm, KeyOperations,
    OctetKeyPairParameters, OctetKeyPairType, PublicKeyUse,
@@ -78,6 +74,7 @@ use sha2::{Digest, Sha256};
 use spki::der::Decode;
 use spki::{SubjectPublicKeyInfo, SubjectPublicKeyInfoRef};
 use tracing::debug;
+use url::Host;
 use utils::id::{NodeId, TenantId, TimelineId};

 use crate::local_env::LocalEnv;
@@ -376,22 +373,6 @@ impl std::fmt::Display for EndpointTerminateMode {
    }
 }

-pub struct EndpointStartArgs {
-    pub auth_token: Option<String>,
-    pub endpoint_storage_token: String,
-    pub endpoint_storage_addr: String,
-    pub safekeepers_generation: Option<SafekeeperGeneration>,
-    pub safekeepers: Vec<NodeId>,
-    pub pageserver_conninfo: PageserverConnectionInfo,
-    pub remote_ext_base_url: Option<String>,
-    pub shard_stripe_size: usize,
-    pub create_test_user: bool,
-    pub start_timeout: Duration,
-    pub autoprewarm: bool,
-    pub offload_lfc_interval_seconds: Option<std::num::NonZeroU64>,
-    pub dev: bool,
-}
-
 impl Endpoint {
    fn from_dir_entry(entry: std::fs::DirEntry, env: &LocalEnv) -> Result<Endpoint> {
        if !entry.file_type()?.is_dir() {
@@ -656,6 +637,14 @@ impl Endpoint {
        }
    }

+    fn build_pageserver_connstr(pageservers: &[(PageserverProtocol, Host, u16)]) -> String {
+        pageservers
+            .iter()
+            .map(|(scheme, host, port)| format!("{scheme}://no_user@{host}:{port}"))
+            .collect::<Vec<_>>()
+            .join(",")
+    }
+
    /// Map safekeepers ids to the actual connection strings.
    fn build_safekeepers_connstrs(&self, sk_ids: Vec<NodeId>) -> Result<Vec<String>> {
        let mut safekeeper_connstrings = Vec::new();
@@ -688,7 +677,21 @@ impl Endpoint {
        })
    }

-    pub async fn start(&self, args: EndpointStartArgs) -> Result<()> {
+    #[allow(clippy::too_many_arguments)]
+    pub async fn start(
+        &self,
+        auth_token: &Option<String>,
+        endpoint_storage_token: String,
+        endpoint_storage_addr: String,
+        safekeepers_generation: Option<SafekeeperGeneration>,
+        safekeepers: Vec<NodeId>,
+        pageservers: Vec<(PageserverProtocol, Host, u16)>,
+        remote_ext_base_url: Option<&String>,
+        shard_stripe_size: usize,
+        create_test_user: bool,
+        start_timeout: Duration,
+        dev: bool,
+    ) -> Result<()> {
        if self.status() == EndpointStatus::Running {
            anyhow::bail!("The endpoint is already running");
        }
@@ -701,7 +704,10 @@ impl Endpoint {
            std::fs::remove_dir_all(self.pgdata())?;
        }

-        let safekeeper_connstrings = self.build_safekeepers_connstrs(args.safekeepers)?;
+        let pageserver_connstring = Self::build_pageserver_connstr(&pageservers);
+        assert!(!pageserver_connstring.is_empty());
+
+        let safekeeper_connstrings = self.build_safekeepers_connstrs(safekeepers)?;

        // check for file remote_extensions_spec.json
        // if it is present, read it and pass to compute_ctl
@@ -729,7 +735,7 @@ impl Endpoint {
                    cluster_id: None, // project ID: not used
                    name: None,       // project name: not used
                    state: None,
-                    roles: if args.create_test_user {
+                    roles: if create_test_user {
                        vec![Role {
                            name: PgIdent::from_str("test").unwrap(),
                            encrypted_password: None,
@@ -738,7 +744,7 @@ impl Endpoint {
                    } else {
                        Vec::new()
                    },
-                    databases: if args.create_test_user {
+                    databases: if create_test_user {
                        vec![Database {
                            name: PgIdent::from_str("neondb").unwrap(),
                            owner: PgIdent::from_str("test").unwrap(),
@@ -759,22 +765,21 @@ impl Endpoint {
                branch_id: None,
                endpoint_id: Some(self.endpoint_id.clone()),
                mode: self.mode,
-                pageserver_connection_info: Some(args.pageserver_conninfo),
-                safekeepers_generation: args.safekeepers_generation.map(|g| g.into_inner()),
+                pageserver_connstring: Some(pageserver_connstring),
+                safekeepers_generation: safekeepers_generation.map(|g| g.into_inner()),
                safekeeper_connstrings,
-                storage_auth_token: args.auth_token.clone(),
+                storage_auth_token: auth_token.clone(),
                remote_extensions,
                pgbouncer_settings: None,
-                shard_stripe_size: Some(args.shard_stripe_size),
+                shard_stripe_size: Some(shard_stripe_size),
                local_proxy_config: None,
                reconfigure_concurrency: self.reconfigure_concurrency,
                drop_subscriptions_before_start: self.drop_subscriptions_before_start,
                audit_log_level: ComputeAudit::Disabled,
                logs_export_host: None::<String>,
-                endpoint_storage_addr: Some(args.endpoint_storage_addr),
-                endpoint_storage_token: Some(args.endpoint_storage_token),
-                autoprewarm: args.autoprewarm,
-                offload_lfc_interval_seconds: args.offload_lfc_interval_seconds,
+                endpoint_storage_addr: Some(endpoint_storage_addr),
+                endpoint_storage_token: Some(endpoint_storage_token),
+                autoprewarm: false,
                suspend_timeout_seconds: -1, // Only used in neon_local.
            };

@@ -786,7 +791,7 @@ impl Endpoint {
                debug!("spec.cluster {:?}", spec.cluster);

                // fill missing fields again
-                if args.create_test_user {
+                if create_test_user {
                    spec.cluster.roles.push(Role {
                        name: PgIdent::from_str("test").unwrap(),
                        encrypted_password: None,
@@ -821,7 +826,7 @@ impl Endpoint {
        // Launch compute_ctl
        let conn_str = self.connstr("cloud_admin", "postgres");
        println!("Starting postgres node at '{conn_str}'");
-        if args.create_test_user {
+        if create_test_user {
            let conn_str = self.connstr("test", "neondb");
            println!("Also at '{conn_str}'");
        }
@@ -853,11 +858,11 @@ impl Endpoint {
        .stderr(logfile.try_clone()?)
        .stdout(logfile);

-        if let Some(remote_ext_base_url) = args.remote_ext_base_url {
-            cmd.args(["--remote-ext-base-url", &remote_ext_base_url]);
+        if let Some(remote_ext_base_url) = remote_ext_base_url {
+            cmd.args(["--remote-ext-base-url", remote_ext_base_url]);
        }

-        if args.dev {
+        if dev {
            cmd.arg("--dev");
        }

@@ -889,11 +894,10 @@ impl Endpoint {
                Ok(state) => {
                    match state.status {
                        ComputeStatus::Init => {
-                            let timeout = args.start_timeout;
-                            if Instant::now().duration_since(start_at) > timeout {
+                            if Instant::now().duration_since(start_at) > start_timeout {
                                bail!(
                                    "compute startup timed out {:?}; still in Init state",
-                                    timeout
+                                    start_timeout
                                );
                            }
                            // keep retrying
@@ -921,10 +925,9 @@ impl Endpoint {
                    }
                }
                Err(e) => {
-                    if Instant::now().duration_since(start_at) > args.start_timeout {
+                    if Instant::now().duration_since(start_at) > start_timeout {
                        return Err(e).context(format!(
-                            "timed out {:?} waiting to connect to compute_ctl HTTP",
-                            args.start_timeout
+                            "timed out {start_timeout:?} waiting to connect to compute_ctl HTTP",
                        ));
                    }
                }
@@ -972,7 +975,7 @@ impl Endpoint {

    pub async fn reconfigure(
        &self,
-        pageserver_conninfo: Option<PageserverConnectionInfo>,
+        pageservers: Option<Vec<(PageserverProtocol, Host, u16)>>,
        stripe_size: Option<ShardStripeSize>,
        safekeepers: Option<Vec<NodeId>>,
        safekeeper_generation: Option<SafekeeperGeneration>,
@@ -988,17 +991,15 @@ impl Endpoint {
        let postgresql_conf = self.read_postgresql_conf()?;
        spec.cluster.postgresql_conf = Some(postgresql_conf);

-        if let Some(pageserver_conninfo) = pageserver_conninfo {
-            // If pageservers are provided, we need to ensure that they are not empty.
-            // This is a requirement for the compute_ctl configuration.
-            anyhow::ensure!(
-                !pageserver_conninfo.shards.is_empty(),
-                "no pageservers provided"
-            );
-            spec.pageserver_connection_info = Some(pageserver_conninfo);
-        }
-        if stripe_size.is_some() {
-            spec.shard_stripe_size = stripe_size.map(|s| s.0 as usize);
+        // If pageservers are not specified, don't change them.
+        if let Some(pageservers) = pageservers {
+            anyhow::ensure!(!pageservers.is_empty(), "no pageservers provided");
+
+            let pageserver_connstr = Self::build_pageserver_connstr(&pageservers);
+            spec.pageserver_connstring = Some(pageserver_connstr);
+            if stripe_size.is_some() {
+                spec.shard_stripe_size = stripe_size.map(|s| s.0 as usize);
+            }
        }

        // If safekeepers are not specified, don't change them.
@@ -1047,7 +1048,7 @@ impl Endpoint {

    pub async fn reconfigure_pageservers(
        &self,
-        pageservers: PageserverConnectionInfo,
+        pageservers: Vec<(PageserverProtocol, Host, u16)>,
        stripe_size: Option<ShardStripeSize>,
    ) -> Result<()> {
        self.reconfigure(Some(pageservers), stripe_size, None, None)
--- a/control_plane/storcon_cli/src/main.rs
+++ b/control_plane/storcon_cli/src/main.rs
@@ -65,27 +65,12 @@ enum Command {
        #[arg(long)]
        scheduling: Option<NodeSchedulingPolicy>,
    },
-    /// Exists for backup usage and will be removed in future.
-    /// Use [`Command::NodeStartDelete`] instead, if possible.
+    // Set a node status as deleted.
    NodeDelete {
        #[arg(long)]
        node_id: NodeId,
    },
-    /// Start deletion of the specified pageserver.
-    NodeStartDelete {
-        #[arg(long)]
-        node_id: NodeId,
-    },
-    /// Cancel deletion of the specified pageserver and wait for `timeout`
-    /// for the operation to be canceled. May be retried.
-    NodeCancelDelete {
-        #[arg(long)]
-        node_id: NodeId,
-        #[arg(long)]
-        timeout: humantime::Duration,
-    },
    /// Delete a tombstone of node from the storage controller.
-    /// This is used when we want to allow the node to be re-registered.
    NodeDeleteTombstone {
        #[arg(long)]
        node_id: NodeId,
@@ -927,43 +912,10 @@ async fn main() -> anyhow::Result<()> {
                .await?;
        }
        Command::NodeDelete { node_id } => {
-            eprintln!("Warning: This command is obsolete and will be removed in a future version");
-            eprintln!("Use `NodeStartDelete` instead, if possible");
            storcon_client
                .dispatch::<(), ()>(Method::DELETE, format!("control/v1/node/{node_id}"), None)
                .await?;
        }
-        Command::NodeStartDelete { node_id } => {
-            storcon_client
-                .dispatch::<(), ()>(
-                    Method::PUT,
-                    format!("control/v1/node/{node_id}/delete"),
-                    None,
-                )
-                .await?;
-            println!("Delete started for {node_id}");
-        }
-        Command::NodeCancelDelete { node_id, timeout } => {
-            storcon_client
-                .dispatch::<(), ()>(
-                    Method::DELETE,
-                    format!("control/v1/node/{node_id}/delete"),
-                    None,
-                )
-                .await?;
-
-            println!("Waiting for node {node_id} to quiesce on scheduling policy ...");
-
-            let final_policy =
-                wait_for_scheduling_policy(storcon_client, node_id, *timeout, |sched| {
-                    !matches!(sched, NodeSchedulingPolicy::Deleting)
-                })
-                .await?;
-
-            println!(
-                "Delete was cancelled for node {node_id}. Schedulling policy is now {final_policy:?}"
-            );
-        }
        Command::NodeDeleteTombstone { node_id } => {
            storcon_client
                .dispatch::<(), ()>(
--- a/libs/compute_api/src/responses.rs
+++ b/libs/compute_api/src/responses.rs
@@ -58,7 +58,7 @@ pub enum LfcPrewarmState {
    },
 }

-#[derive(Serialize, Default, Debug, Clone, PartialEq)]
+#[derive(Serialize, Default, Debug, Clone)]
 #[serde(tag = "status", rename_all = "snake_case")]
 pub enum LfcOffloadState {
    #[default]
--- a/libs/compute_api/src/spec.rs
+++ b/libs/compute_api/src/spec.rs
@@ -105,11 +105,7 @@ pub struct ComputeSpec {
    // updated to fill these fields, we can make these non optional.
    pub tenant_id: Option<TenantId>,
    pub timeline_id: Option<TimelineId>,
-
-    // Pageserver information can be passed in two different ways:
-    // 1. Here
-    // 2. in cluster.settings. This is legacy, we are switching to method 1.
-    pub pageserver_connection_info: Option<PageserverConnectionInfo>,
+    pub pageserver_connstring: Option<String>,

    // More neon ids that we expose to the compute_ctl
    // and to postgres as neon extension GUCs.
@@ -185,14 +181,10 @@ pub struct ComputeSpec {
    /// JWT for authorizing requests to endpoint storage service
    pub endpoint_storage_token: Option<String>,

+    /// Download LFC state from endpoint_storage and pass it to Postgres on startup
    #[serde(default)]
-    /// Download LFC state from endpoint storage and pass it to Postgres on compute startup
    pub autoprewarm: bool,

-    #[serde(default)]
-    /// Upload LFC state to endpoint storage periodically. Default value (None) means "don't upload"
-    pub offload_lfc_interval_seconds: Option<std::num::NonZeroU64>,
-
    /// Suspend timeout in seconds.
    ///
    /// We use this value to derive other values, such as the installed extensions metric.
@@ -218,20 +210,6 @@ pub enum ComputeFeature {
    UnknownFeature,
 }

-/// Feature flag to signal `compute_ctl` to enable certain experimental functionality.
-#[derive(Clone, Debug, Default, Deserialize, Serialize, Eq, PartialEq)]
-pub struct PageserverConnectionInfo {
-    pub shards: HashMap<u32, PageserverShardConnectionInfo>,
-
-    pub prefer_grpc: bool,
-}
-
-#[derive(Clone, Debug, Default, Deserialize, Serialize, Eq, PartialEq)]
-pub struct PageserverShardConnectionInfo {
-    pub libpq_url: Option<String>,
-    pub grpc_url: Option<String>,
-}
-
 #[derive(Clone, Debug, Default, Deserialize, Serialize)]
 pub struct RemoteExtSpec {
    pub public_extensions: Option<Vec<String>>,
@@ -349,12 +327,6 @@ impl ComputeMode {
    }
 }

-impl Display for ComputeMode {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        f.write_str(self.to_type_str())
-    }
-}
-
 /// Log level for audit logging
 #[derive(Clone, Debug, Default, Eq, PartialEq, Deserialize, Serialize)]
 pub enum ComputeAudit {
@@ -466,7 +438,7 @@ pub struct JwksSettings {
 }

 /// Protocol used to connect to a Pageserver. Parsed from the connstring scheme.
-#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
+#[derive(Clone, Copy, Debug, Default)]
 pub enum PageserverProtocol {
    /// The original protocol based on libpq and COPY. Uses postgresql:// or postgres:// scheme.
    #[default]
--- a/libs/compute_api/tests/cluster_spec.json
+++ b/libs/compute_api/tests/cluster_spec.json
@@ -90,11 +90,6 @@
                "value": "off",
                "vartype": "bool"
            },
-            {
-                "name": "offload_lfc_interval_seconds",
-                "value": "20",
-                "vartype": "integer"
-            },
            {
                "name": "neon.safekeepers",
                "value": "127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501",
--- a/libs/neon-shmem/Cargo.toml
+++ b/libs/neon-shmem/Cargo.toml
@@ -6,29 +6,8 @@ license.workspace = true

 [dependencies]
 thiserror.workspace = true
-nix.workspace = true
+nix.workspace=true
 workspace_hack = { version = "0.1", path = "../../workspace_hack" }
-rustc-hash = { version = "2.1.1" }
-rand = "0.9.1"
-libc.workspace = true
-lock_api = "0.4.13"
-atomic = "0.6.1"
-bytemuck = { version = "1.23.1", features = ["derive"] }
-
-[dev-dependencies]
-criterion = { workspace = true, features = ["html_reports"] }
-rand_distr = "0.5.1"
-xxhash-rust = { version = "0.8.15", features = ["xxh3"] }
-ahash.workspace = true
-twox-hash = { version = "2.1.1" }
-seahash = "4.1.0"
-hashbrown = { git = "https://github.com/quantumish/hashbrown.git", rev = "6610e6d" }
-foldhash = "0.1.5"
-

 [target.'cfg(target_os = "macos")'.dependencies]
 tempfile = "3.14.0"
-
-[[bench]]
-name = "hmap_resize"
-harness = false
--- a/libs/neon-shmem/benches/hmap_resize.rs
+++ b/libs/neon-shmem/benches/hmap_resize.rs
@@ -1,330 +0,0 @@
-use criterion::{BatchSize, BenchmarkId, Criterion, criterion_group, criterion_main};
-use neon_shmem::hash::HashMapAccess;
-use neon_shmem::hash::HashMapInit;
-use neon_shmem::hash::entry::Entry;
-use rand::distr::{Distribution, StandardUniform};
-use rand::prelude::*;
-use std::default::Default;
-use std::hash::BuildHasher;
-
-// Taken from bindings to C code
-
-#[derive(Clone, Debug, Hash, Eq, PartialEq)]
-#[repr(C)]
-pub struct FileCacheKey {
-    pub _spc_id: u32,
-    pub _db_id: u32,
-    pub _rel_number: u32,
-    pub _fork_num: u32,
-    pub _block_num: u32,
-}
-
-impl Distribution<FileCacheKey> for StandardUniform {
-    // questionable, but doesn't need to be good randomness
-    fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> FileCacheKey {
-        FileCacheKey {
-            _spc_id: rng.random(),
-            _db_id: rng.random(),
-            _rel_number: rng.random(),
-            _fork_num: rng.random(),
-            _block_num: rng.random(),
-        }
-    }
-}
-
-#[derive(Clone, Debug)]
-#[repr(C)]
-pub struct FileCacheEntry {
-    pub _offset: u32,
-    pub _access_count: u32,
-    pub _prev: *mut FileCacheEntry,
-    pub _next: *mut FileCacheEntry,
-    pub _state: [u32; 8],
-}
-
-impl FileCacheEntry {
-    fn dummy() -> Self {
-        Self {
-            _offset: 0,
-            _access_count: 0,
-            _prev: std::ptr::null_mut(),
-            _next: std::ptr::null_mut(),
-            _state: [0; 8],
-        }
-    }
-}
-
-// Utilities for applying operations.
-
-#[derive(Clone, Debug)]
-struct TestOp<K, V>(K, Option<V>);
-
-fn apply_op<K: Clone + std::hash::Hash + Eq, V, S: std::hash::BuildHasher>(
-    op: TestOp<K, V>,
-    map: &mut HashMapAccess<K, V, S>,
-) {
-    let entry = map.entry(op.0);
-
-    match op.1 {
-        Some(new) => match entry {
-            Entry::Occupied(mut e) => Some(e.insert(new)),
-            Entry::Vacant(e) => {
-                _ = e.insert(new).unwrap();
-                None
-            }
-        },
-        None => match entry {
-            Entry::Occupied(e) => Some(e.remove()),
-            Entry::Vacant(_) => None,
-        },
-    };
-}
-
-// Hash utilities
-
-struct SeaRandomState {
-    k1: u64,
-    k2: u64,
-    k3: u64,
-    k4: u64,
-}
-
-impl std::hash::BuildHasher for SeaRandomState {
-    type Hasher = seahash::SeaHasher;
-
-    fn build_hasher(&self) -> Self::Hasher {
-        seahash::SeaHasher::with_seeds(self.k1, self.k2, self.k3, self.k4)
-    }
-}
-
-impl SeaRandomState {
-    fn new() -> Self {
-        let mut rng = rand::rng();
-        Self {
-            k1: rng.random(),
-            k2: rng.random(),
-            k3: rng.random(),
-            k4: rng.random(),
-        }
-    }
-}
-
-fn small_benchs(c: &mut Criterion) {
-    let mut group = c.benchmark_group("Small maps");
-    group.sample_size(10);
-
-    group.bench_function("small_rehash", |b| {
-        let ideal_filled = 4_000_000;
-        let size = 5_000_000;
-        let mut writer = HashMapInit::new_resizeable(size, size * 2).attach_writer();
-        let mut rng = rand::rng();
-        while writer.get_num_buckets_in_use() < ideal_filled as usize {
-            let key: FileCacheKey = rng.random();
-            let val = FileCacheEntry::dummy();
-            apply_op(TestOp(key, Some(val)), &mut writer);
-        }
-        b.iter(|| writer.shuffle());
-    });
-
-    group.bench_function("small_rehash_xxhash", |b| {
-        let ideal_filled = 4_000_000;
-        let size = 5_000_000;
-        let mut writer = HashMapInit::new_resizeable(size, size * 2)
-            .with_hasher(twox_hash::xxhash64::RandomState::default())
-            .attach_writer();
-        let mut rng = rand::rng();
-        while writer.get_num_buckets_in_use() < ideal_filled as usize {
-            let key: FileCacheKey = rng.random();
-            let val = FileCacheEntry::dummy();
-            apply_op(TestOp(key, Some(val)), &mut writer);
-        }
-        b.iter(|| writer.shuffle());
-    });
-
-    group.bench_function("small_rehash_ahash", |b| {
-        let ideal_filled = 4_000_000;
-        let size = 5_000_000;
-        let mut writer = HashMapInit::new_resizeable(size, size * 2)
-            .with_hasher(ahash::RandomState::default())
-            .attach_writer();
-        let mut rng = rand::rng();
-        while writer.get_num_buckets_in_use() < ideal_filled as usize {
-            let key: FileCacheKey = rng.random();
-            let val = FileCacheEntry::dummy();
-            apply_op(TestOp(key, Some(val)), &mut writer);
-        }
-        b.iter(|| writer.shuffle());
-    });
-
-    group.bench_function("small_rehash_seahash", |b| {
-        let ideal_filled = 4_000_000;
-        let size = 5_000_000;
-        let mut writer = HashMapInit::new_resizeable(size, size * 2)
-            .with_hasher(SeaRandomState::new())
-            .attach_writer();
-        let mut rng = rand::rng();
-        while writer.get_num_buckets_in_use() < ideal_filled as usize {
-            let key: FileCacheKey = rng.random();
-            let val = FileCacheEntry::dummy();
-            apply_op(TestOp(key, Some(val)), &mut writer);
-        }
-        b.iter(|| writer.shuffle());
-    });
-
-    group.finish();
-}
-
-fn real_benchs(c: &mut Criterion) {
-    let mut group = c.benchmark_group("Realistic workloads");
-    group.sample_size(10);
-    group.bench_function("real_bulk_insert", |b| {
-        let size = 125_000_000;
-        let ideal_filled = 100_000_000;
-        let mut rng = rand::rng();
-        b.iter_batched(
-            || HashMapInit::new_resizeable(size, size * 2).attach_writer(),
-            |writer| {
-                for _ in 0..ideal_filled {
-                    let key: FileCacheKey = rng.random();
-                    let val = FileCacheEntry::dummy();
-                    let entry = writer.entry(key);
-                    std::hint::black_box(match entry {
-                        Entry::Occupied(mut e) => {
-                            e.insert(val);
-                        }
-                        Entry::Vacant(e) => {
-                            _ = e.insert(val).unwrap();
-                        }
-                    })
-                }
-            },
-            BatchSize::SmallInput,
-        )
-    });
-
-    group.bench_function("real_rehash", |b| {
-        let size = 125_000_000;
-        let ideal_filled = 100_000_000;
-        let mut writer = HashMapInit::new_resizeable(size, size).attach_writer();
-        let mut rng = rand::rng();
-        while writer.get_num_buckets_in_use() < ideal_filled {
-            let key: FileCacheKey = rng.random();
-            let val = FileCacheEntry::dummy();
-            apply_op(TestOp(key, Some(val)), &mut writer);
-        }
-        b.iter(|| writer.shuffle());
-    });
-
-    group.bench_function("real_rehash_hashbrown", |b| {
-        let size = 125_000_000;
-        let ideal_filled = 100_000_000;
-        let mut writer = hashbrown::raw::RawTable::new();
-        let mut rng = rand::rng();
-        let hasher = rustc_hash::FxBuildHasher::default();
-        unsafe {
-            writer
-                .resize(
-                    size,
-                    |(k, _)| hasher.hash_one(&k),
-                    hashbrown::raw::Fallibility::Infallible,
-                )
-                .unwrap();
-        }
-        while writer.len() < ideal_filled as usize {
-            let key: FileCacheKey = rng.random();
-            let val = FileCacheEntry::dummy();
-            writer.insert(hasher.hash_one(&key), (key, val), |(k, _)| {
-                hasher.hash_one(&k)
-            });
-        }
-        b.iter(|| unsafe {
-            writer.table.rehash_in_place(
-                &|table, index| {
-                    hasher.hash_one(
-                        &table
-                            .bucket::<(FileCacheKey, FileCacheEntry)>(index)
-                            .as_ref()
-                            .0,
-                    )
-                },
-                std::mem::size_of::<(FileCacheKey, FileCacheEntry)>(),
-                if std::mem::needs_drop::<(FileCacheKey, FileCacheEntry)>() {
-                    Some(|ptr| std::ptr::drop_in_place(ptr as *mut (FileCacheKey, FileCacheEntry)))
-                } else {
-                    None
-                },
-            )
-        });
-    });
-
-    for elems in [2, 4, 8, 16, 32, 64, 96, 112] {
-        group.bench_with_input(
-            BenchmarkId::new("real_rehash_varied", elems),
-            &elems,
-            |b, &size| {
-                let ideal_filled = size * 1_000_000;
-                let size = 125_000_000;
-                let mut writer = HashMapInit::new_resizeable(size, size).attach_writer();
-                let mut rng = rand::rng();
-                while writer.get_num_buckets_in_use() < ideal_filled as usize {
-                    let key: FileCacheKey = rng.random();
-                    let val = FileCacheEntry::dummy();
-                    apply_op(TestOp(key, Some(val)), &mut writer);
-                }
-                b.iter(|| writer.shuffle());
-            },
-        );
-        group.bench_with_input(
-            BenchmarkId::new("real_rehash_varied_hashbrown", elems),
-            &elems,
-            |b, &size| {
-                let ideal_filled = size * 1_000_000;
-                let size = 125_000_000;
-                let mut writer = hashbrown::raw::RawTable::new();
-                let mut rng = rand::rng();
-                let hasher = rustc_hash::FxBuildHasher::default();
-                unsafe {
-                    writer
-                        .resize(
-                            size,
-                            |(k, _)| hasher.hash_one(&k),
-                            hashbrown::raw::Fallibility::Infallible,
-                        )
-                        .unwrap();
-                }
-                while writer.len() < ideal_filled as usize {
-                    let key: FileCacheKey = rng.random();
-                    let val = FileCacheEntry::dummy();
-                    writer.insert(hasher.hash_one(&key), (key, val), |(k, _)| {
-                        hasher.hash_one(&k)
-                    });
-                }
-                b.iter(|| unsafe {
-                    writer.table.rehash_in_place(
-                        &|table, index| {
-                            hasher.hash_one(
-                                &table
-                                    .bucket::<(FileCacheKey, FileCacheEntry)>(index)
-                                    .as_ref()
-                                    .0,
-                            )
-                        },
-                        std::mem::size_of::<(FileCacheKey, FileCacheEntry)>(),
-                        if std::mem::needs_drop::<(FileCacheKey, FileCacheEntry)>() {
-                            Some(|ptr| {
-                                std::ptr::drop_in_place(ptr as *mut (FileCacheKey, FileCacheEntry))
-                            })
-                        } else {
-                            None
-                        },
-                    )
-                });
-            },
-        );
-    }
-
-    group.finish();
-}
-
-criterion_group!(benches, small_benchs, real_benchs);
-criterion_main!(benches);
--- a/libs/neon-shmem/src/hash.rs
+++ b/libs/neon-shmem/src/hash.rs
@@ -1,622 +0,0 @@
-use std::cell::UnsafeCell;
-use std::hash::{BuildHasher, Hash};
-use std::mem::MaybeUninit;
-use std::ptr::NonNull;
-use std::sync::atomic::Ordering;
-
-use crate::shmem::ShmemHandle;
-use crate::{shmem, sync::*};
-
-mod core;
-mod bucket;
-pub mod entry;
-
-#[cfg(test)]
-mod tests;
-
-use core::{
-	CoreHashMap, DictShard, EntryKey, EntryTag,
-	FullError, MaybeUninitDictShard
-};
-use bucket::{Bucket, BucketIdx};
-use entry::Entry;
-
-/// Wrapper struct around multiple [`ShmemHandle`]s.
-struct HashMapHandles {
-	keys_shmem: ShmemHandle,
-	idxs_shmem: ShmemHandle,
-	vals_shmem: ShmemHandle,
-}
-
-/// This represents a hash table that (possibly) lives in shared memory.
-/// If a new process is launched with fork(), the child process inherits
-/// this struct.
-#[must_use]
-pub struct HashMapInit<'a, K, V, S = rustc_hash::FxBuildHasher> {
-    shmem_handles: Option<HashMapHandles>,
-    shared_ptr: *mut HashMapShared<'a, K, V>,
-    hasher: S,
-    num_buckets: usize,
-	num_shards: usize,
-	resize_lock: Mutex<()>,
-}
-
-/// This is a per-process handle to a hash table that (possibly) lives in shared memory.
-/// If a child process is launched with fork(), the child process should
-/// get its own HashMapAccess by calling HashMapInit::attach_writer/reader().
-///
-/// XXX: We're not making use of it at the moment, but this struct could
-/// hold process-local information in the future.
-pub struct HashMapAccess<'a, K, V, S = rustc_hash::FxBuildHasher> {
-    shmem_handles: Option<HashMapHandles>,
-    shared_ptr: *mut HashMapShared<'a, K, V>,
-    hasher: S,
-	resize_lock: Mutex<()>,
-}
-
-unsafe impl<K: Sync, V: Sync, S> Sync for HashMapAccess<'_, K, V, S> {}
-unsafe impl<K: Send, V: Send, S> Send for HashMapAccess<'_, K, V, S> {}
-
-impl<'a, K: Clone + Hash + Eq, V, S> HashMapInit<'a, K, V, S> {
-    /// Change the 'hasher' used by the hash table.
-    ///
-    /// NOTE: This must be called right after creating the hash table,
-    /// before inserting any entries and before calling attach_writer/reader.
-    /// Otherwise different accessors could be using different hash function,
-    /// with confusing results.
-	///
-	/// TODO(quantumish): consider splitting out into a separate builder type?
-    pub fn with_hasher<T: BuildHasher>(self, hasher: T) -> HashMapInit<'a, K, V, T> {
-        HashMapInit {
-            hasher,
-            shmem_handles: self.shmem_handles,
-            shared_ptr: self.shared_ptr,
-            num_buckets: self.num_buckets,
-			num_shards: self.num_shards,
-			resize_lock: self.resize_lock,
-        }
-    }
-
-    /// Loosely (over)estimate the size needed to store a hash table with `num_buckets` buckets.
-    pub fn estimate_sizes(num_buckets: usize, num_shards: usize) -> (usize, usize, usize) {
-		(
-			(size_of::<EntryKey<K>>() * num_buckets)
-				+ (size_of::<libc::pthread_rwlock_t>() * num_shards)
-				+ (size_of::<RwLock<DictShard<'_, K>>>() * num_shards)
-				+ size_of::<HashMapShared<K, V>>()
-				+ 1000,
-			(size_of::<BucketIdx>() * num_buckets)+ 1000,
-			(size_of::<Bucket<V>>() * num_buckets) + 1000
-		)
-	}
-
-	fn carve_space<T>(ptr: &mut *mut u8, amount: usize) -> *mut T {
-		*ptr = unsafe { ptr.byte_add(ptr.align_offset(align_of::<T>())) };
-        let out = ptr.cast();
-        *ptr = unsafe { ptr.add(size_of::<T>() * amount) };
-		out
-	}
-	
-    fn new(
-        num_buckets: usize,
-		num_shards: usize,
-        mut keys_ptr: *mut u8,
-		mut idxs_ptr: *mut u8,
-		mut vals_ptr: *mut u8,
-        shmem_handles: Option<HashMapHandles>,
-        hasher: S,
-    ) -> Self {
-		// Set up the main area: hashmap info at front, keys at back
-		let mutex_ptr = Self::carve_space::<libc::pthread_mutex_t>(&mut keys_ptr, 1);
-		let shared_ptr = Self::carve_space::<HashMapShared<K, V>>(&mut keys_ptr, 1);
-		let shards_ptr = Self::carve_space::<RwLock<DictShard<'_, K>>>(&mut keys_ptr, num_shards);
-		let locks_ptr = Self::carve_space::<libc::pthread_rwlock_t>(&mut keys_ptr, num_shards);
-		let keys_ptr = Self::carve_space::<EntryKey<K>>(&mut keys_ptr, num_buckets);
-		
-		// Set up the area of bucket idxs and the area of buckets. Not much to do!
-		let idxs_ptr = Self::carve_space::<BucketIdx>(&mut idxs_ptr, num_buckets);
-		let vals_ptr = Self::carve_space::<Bucket<V>>(&mut vals_ptr, num_buckets);
-
-		// Initialize the shards.
-		let shards_uninit: &mut [MaybeUninit<RwLock<MaybeUninitDictShard<'_, K>>>] =
-            unsafe { std::slice::from_raw_parts_mut(shards_ptr.cast(), num_shards) };
-		let shard_size = num_buckets / num_shards;
-		for i in 0..num_shards {
-			let size = ((i + 1) * shard_size).min(num_buckets) - (i * shard_size);
-			unsafe {
-				shards_uninit[i].write(RwLock::from_raw(
-					PthreadRwLock::new(NonNull::new_unchecked(locks_ptr.add(i))),
-					MaybeUninitDictShard {
-						keys: std::slice::from_raw_parts_mut(keys_ptr.add(i * shard_size).cast(), size),
-						idxs: std::slice::from_raw_parts_mut(idxs_ptr.add(i * shard_size).cast(), size)
-					}
-				));
-			};
-		}
-		let shards: &mut [RwLock<MaybeUninitDictShard<'_, K>>] =
-            unsafe { std::slice::from_raw_parts_mut(shards_ptr.cast(), num_shards) };
-        let buckets: *const [MaybeUninit<Bucket<V>>] = 
-            unsafe { std::slice::from_raw_parts(vals_ptr.cast(), num_buckets) };
-
-		unsafe { 
-			let hashmap = CoreHashMap::new(&*(buckets as *const UnsafeCell<_>), shards);
-			std::ptr::write(shared_ptr, hashmap);
-		}
-
-		let resize_lock = Mutex::from_raw(
-			unsafe { PthreadMutex::new(NonNull::new_unchecked(mutex_ptr)) }, ()
-		);
-		
-        Self {
-			num_shards,
-            num_buckets,
-            shmem_handles,
-            shared_ptr,
-            hasher,
-			resize_lock, 
-        }
-    }
-
-    /// Attach to a hash table for writing.
-    pub fn attach_writer(self) -> HashMapAccess<'a, K, V, S> {
-        HashMapAccess {
-            shmem_handles: self.shmem_handles,
-            shared_ptr: self.shared_ptr,
-            hasher: self.hasher,
-			resize_lock: self.resize_lock,
-        }
-    }
-
-    /// Initialize a table for reading. Currently identical to [`HashMapInit::attach_writer`].
-    pub fn attach_reader(self) -> HashMapAccess<'a, K, V, S> {
-        self.attach_writer()
-    }
-}
-
-type HashMapShared<'a, K, V> = CoreHashMap<'a, K, V>;
-
-impl<'a, K, V> HashMapInit<'a, K, V, rustc_hash::FxBuildHasher>
-where
-    K: Clone + Hash + Eq,
-{
-    /// Place the hash table within a user-supplied fixed memory area.
-    pub fn with_fixed(
-		num_buckets: usize,
-		num_shards: usize,
-		area: &'a mut [MaybeUninit<u8>]
-	) -> Self {
-		let (keys_size, idxs_size, _) = Self::estimate_sizes(num_buckets, num_shards);
-		let ptr = area.as_mut_ptr().cast();
-        Self::new(
-            num_buckets,
-			num_shards,
-            ptr,
-			unsafe { ptr.add(keys_size) },
-			unsafe { ptr.add(keys_size).add(idxs_size) },
-            None,
-            rustc_hash::FxBuildHasher,
-        )
-    }
-
-    /// Place a new hash map in the given shared memory area
-    ///
-    /// # Panics
-    /// Will panic on failure to resize area to expected map size.
-    pub fn with_shmems(
-		num_buckets: usize,
-		num_shards: usize,
-		keys_shmem: ShmemHandle,
-		idxs_shmem: ShmemHandle,
-		vals_shmem: ShmemHandle,
-	) -> Self {
-		let (keys_size, idxs_size, vals_size) = Self::estimate_sizes(num_buckets, num_shards);
-        keys_shmem.set_size(keys_size).expect("could not resize shared memory area");
-        idxs_shmem.set_size(idxs_size).expect("could not resize shared memory area");
-        vals_shmem.set_size(vals_size).expect("could not resize shared memory area");
-        Self::new(
-            num_buckets,
-			num_shards,
-            keys_shmem.data_ptr.as_ptr().cast(),
-			idxs_shmem.data_ptr.as_ptr().cast(),
-			vals_shmem.data_ptr.as_ptr().cast(),
-            Some(HashMapHandles { keys_shmem, idxs_shmem, vals_shmem }),
-            rustc_hash::FxBuildHasher,
-        )
-    }
-
-    /// Make a resizable hash map within a new shared memory area with the given name.
-    pub fn new_resizeable_named(
-		num_buckets: usize,
-		max_buckets: usize,
-		num_shards: usize,
-		name: &str
-	) -> Self {
-		let (keys_size, idxs_size, vals_size) = Self::estimate_sizes(num_buckets, num_shards);
-		let (keys_max, idxs_max, vals_max) = Self::estimate_sizes(max_buckets, num_shards);
-        let keys_shmem = ShmemHandle::new(&format!("{name}_keys"), keys_size, keys_max)
-			.expect("failed to make shared memory area");
-		let idxs_shmem = ShmemHandle::new(&format!("{name}_idxs"), idxs_size, idxs_max)
-			.expect("failed to make shared memory area");
-		let vals_shmem = ShmemHandle::new(&format!("{name}_vals"), vals_size, vals_max)
-			.expect("failed to make shared memory area");
-        Self::new(
-            num_buckets,
-			num_shards,
-            keys_shmem.data_ptr.as_ptr().cast(),
-			idxs_shmem.data_ptr.as_ptr().cast(),
-			vals_shmem.data_ptr.as_ptr().cast(),
-            Some(HashMapHandles { keys_shmem, idxs_shmem, vals_shmem }),
-            rustc_hash::FxBuildHasher,
-        )
-    }
-
-    /// Make a resizable hash map within a new anonymous shared memory area.
-    pub fn new_resizeable(
-		num_buckets: usize,
-		max_buckets: usize,
-		num_shards: usize,
-	) -> Self {
-        use std::sync::atomic::{AtomicUsize, Ordering};
-        static COUNTER: AtomicUsize = AtomicUsize::new(0);
-        let val = COUNTER.fetch_add(1, Ordering::Relaxed);
-        let name = format!("neon_shmem_hmap{val}");
-        Self::new_resizeable_named(num_buckets, max_buckets, num_shards, &name)
-    }
-}
-
-impl<'a, K, V, S: BuildHasher> HashMapAccess<'a, K, V, S>
-where
-    K: Clone + Hash + Eq,
-{
-    /// Hash a key using the map's hasher.
-    #[inline]
-    fn get_hash_value(&self, key: &K) -> u64 {
-        self.hasher.hash_one(key)
-    }
-
-    /// Get a reference to the corresponding value for a key.
-    pub fn get<'e>(&'e self, key: &K) -> Option<ValueReadGuard<'e, V>> {
-        let hash = self.get_hash_value(key);
-        let map = unsafe { self.shared_ptr.as_ref() }.unwrap();
-		map.get_with_hash(key, hash)
-    }
-
-    /// Get a reference to the entry containing a key.
-    pub fn entry(&self, key: K) -> Result<Entry<'a, K, V>, FullError> {
-        let hash = self.get_hash_value(&key);
-		let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
-        map.entry_with_hash(key, hash)
-    }
-
-    /// Remove a key given its hash. Returns the associated value if it existed.
-    pub fn remove(&self, key: &K) -> Option<V> {
-		let hash = self.get_hash_value(key);
-		let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
-        match map.entry_with_hash(key.clone(), hash) {
-            Ok(Entry::Occupied(mut e)) => Some(e.remove()),
-            _ => None,
-        }
-    }
-
-    /// Insert/update a key. Returns the previous associated value if it existed.
-    ///
-    /// # Errors
-    /// Will return [`core::FullError`] if there is no more space left in the map.
-    pub fn insert(&self, key: K, value: V) -> Result<Option<V>, core::FullError> {
-        let hash = self.get_hash_value(&key);
-		let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
-        match map.entry_with_hash(key.clone(), hash)? {
-            Entry::Occupied(mut e) => Ok(Some(e.insert(value))),
-            Entry::Vacant(e) => {
-                _ = e.insert(value);
-                Ok(None)
-            }
-        }
-    }
-
-    pub unsafe fn get_at_bucket(&self, pos: usize) -> Option<&V> {
-        let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
-        if pos >= map.bucket_arr.len() {
-            return None;
-        }
-
-		let bucket = &map.bucket_arr[pos];
-		if bucket.next.load(Ordering::Relaxed).full_checked().is_some() {
-			Some(unsafe { bucket.val.assume_init_ref() })
-		} else {
-			None
-		}
-    }
-
-	pub unsafe fn entry_at_bucket(&self, pos: usize) -> Option<entry::OccupiedEntry<'a, K, V>> {
-        let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
-        if pos >= map.bucket_arr.len() {
-            return None;
-        }
-
-		let bucket = &map.bucket_arr[pos];
-		bucket.next.load(Ordering::Relaxed).full_checked().map(|entry_pos| {
-			let shard_size = map.get_num_buckets() / map.dict_shards.len();
-			let shard_index = entry_pos / shard_size;
-			let shard_off = entry_pos % shard_size;
-			entry::OccupiedEntry {
-				shard: map.dict_shards[shard_index].write(),
-				shard_pos: shard_off,
-				bucket_pos: pos,
-				bucket_arr: &map.bucket_arr,
-				key_pos: entry_pos,
-			}		
-		})
-    }
-	
-    /// bucket the number of buckets in the table.
-    pub fn get_num_buckets(&self) -> usize {
-        let map = unsafe { self.shared_ptr.as_ref() }.unwrap();
-        map.get_num_buckets()
-    }
-
-    /// Returns the index of the bucket a given value corresponds to.
-    pub fn get_bucket_for_value(&self, val_ptr: *const V) -> usize {
-        let map = unsafe { self.shared_ptr.as_ref() }.unwrap();
-
-        let origin = map.bucket_arr.as_mut_ptr() as *const _;
-        let idx = (val_ptr as usize - origin as usize) / size_of::<Bucket<V>>();
-        assert!(idx < map.bucket_arr.len());
-
-        idx
-    }
-
-    /// Returns the number of occupied buckets in the table.
-    pub fn get_num_buckets_in_use(&self) -> usize {
-        let map = unsafe { self.shared_ptr.as_ref() }.unwrap();
-        map.bucket_arr.buckets_in_use.load(Ordering::Relaxed)
-    }
-
-    /// Clears all entries in a table. Does not reset any shrinking operations.
-    pub fn clear(&self) {
-        let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
-        map.clear();
-	}
-
-	/// Begin a rehash operation. Converts all existing entries
-	// TODO: missing logic to prevent furhter resize operations when one is already underway.
-	// One future feature could be to allow interruptible resizes. We wouldn't pay much of a
-	// space penalty if we used something like https://crates.io/crates/u4 inside EntryTag
-	// to allow for many tiers of older chains (we would have to track previous sizes within
-	// a sliding window at the front of the memory region or something)
-    fn begin_rehash(
-		&self,
-		shards: &mut Vec<RwLockWriteGuard<'_, DictShard<'_, K>>>,
-		rehash_buckets: usize
-	) -> bool {
-        let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
-		assert!(rehash_buckets <= map.get_num_buckets(), "rehashing subset of buckets");
-
-		if map.rehash_index.load(Ordering::Relaxed) >= map.rehash_end.load(Ordering::Relaxed) {
-			return false;
-		}
-		
-		shards.iter_mut().for_each(|x| x.keys.iter_mut().for_each(|key| {
-			match key.tag {
-				EntryTag::Occupied => key.tag = EntryTag::Rehash,
-				EntryTag::Tombstone => key.tag = EntryTag::RehashTombstone,
-				_ => (),
-			}
-		}));
-
-		map.rehash_index.store(0, Ordering::Relaxed);
-		map.rehash_end.store(rehash_buckets, Ordering::Relaxed);
-		true
-    }
-
-	// Unfinished, final large-ish piece standing in the way of a prototype.
-	//
-	// Based off the hashbrown implementation but adapted to an incremental context. See below:
-	// https://github.com/quantumish/hashbrown/blob/6610e6d2b1f288ef7b0709a3efefbc846395dc5e/src/raw/mod.rs#L2866
-	fn do_rehash(&self) -> bool {
-		let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
-		// TODO(quantumish): refactor these out into settable quantities
-		const REHASH_CHUNK_SIZE: usize = 10;
-
-		let end = map.rehash_end.load(Ordering::Relaxed);
-		let ind = map.rehash_index.load(Ordering::Relaxed);
-		if ind >= end { return true }
-
-		// We have to use a mutex to prevent concurrent rehashes as they provide a pretty
-		// obvious chance at a deadlock: one thread wants to rehash an entry into a shard
-		// which is held by another thread which wants to rehash its block into the shard
-		// held by the first. Doesn't seem like there's an obvious way around this?
-		let _guard = self.resize_lock.try_lock();
-		if _guard.is_none() { return false }
-		
-		map.rehash_index.store((ind+REHASH_CHUNK_SIZE).min(end), Ordering::Relaxed);
-		
-		let shard_size = map.get_num_buckets() / map.dict_shards.len();
-		for i in ind..(ind+REHASH_CHUNK_SIZE).min(end) {
-			let (shard_index, shard_off) = (i / shard_size, i % shard_size);
-			let mut shard = map.dict_shards[shard_index].write();
-			if shard.keys[shard_off].tag != EntryTag::Rehash {
-				continue;
-			}
-			loop {
-				let hash = self.get_hash_value(unsafe {
-					shard.keys[shard_off].val.assume_init_ref()
-				});
-
-				let key = unsafe { shard.keys[shard_off].val.assume_init_ref() }.clone();
-				let new = map.entry(key, hash, |tag| match tag {
-					EntryTag::Empty => core::MapEntryType::Empty,
-					EntryTag::Occupied => core::MapEntryType::Occupied,
-					EntryTag::Tombstone => core::MapEntryType::Skip,
-					_ => core::MapEntryType::Tombstone,
-				}).unwrap();
-
-				// I believe the blocker here is that this unfortunately this would require
-				// duplicating a lot of the logic of a write lookup again but with the caveat
-				// that we're already holding one of the shard locks and need to pass that
-				// context on. One thing I was considering at the time was using a hashmap to
-				// manage the lock guards and passing that around?
-				todo!("finish rehash implementation")
-				// match new.tag() {
-				// 	EntryTag::Empty | EntryTag::RehashTombstone => {
-				// 		shard.keys[shard_off].tag = EntryTag::Empty;
-				// 		unsafe {
-				// 			std::mem::swap(
-				// 				shard.keys[shard_off].val.assume_init_mut(),
-				// 				new.
-				// 	},
-				// 	EntryTag::Rehash => {
-						
-				// 	},
-				// 	_ => unreachable!()
-				// }
-			}
-		}
-		false
-	}
-
-	pub fn finish_rehash(&self) {
-		let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
-		while self.do_rehash() {}
-	}
-
-	pub fn shuffle(&self) {
-        let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
-		let mut shards: Vec<_> = map.dict_shards.iter().map(|x| x.write()).collect();
-		self.begin_rehash(&mut shards, map.get_num_buckets());
-    }
-	
-	fn reshard(&self, shards: &mut Vec<RwLockWriteGuard<'_, DictShard<'_, K>>>, num_buckets: usize) {
-		let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
-		let shard_size = num_buckets / map.dict_shards.len();
-		for i in 0..map.dict_shards.len() {
-			let size = ((i + 1) * shard_size).min(num_buckets) - (i * shard_size);
-			unsafe {
-				shards[i].keys = std::slice::from_raw_parts_mut(shards[i].keys.as_mut_ptr(), size);
-				shards[i].idxs = std::slice::from_raw_parts_mut(shards[i].idxs.as_mut_ptr(), size);
-			}
-		}
-	}
-
-	fn resize_shmem(&self, num_buckets: usize) -> Result<(), shmem::Error> {
-		let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
-		let shmem_handles = self
-            .shmem_handles
-            .as_ref()
-            .expect("grow called on a fixed-size hash table");
-
-		let (keys_size, idxs_size, vals_size) =
-			HashMapInit::<K, V, S>::estimate_sizes(num_buckets, map.dict_shards.len());
-        shmem_handles.keys_shmem.set_size(keys_size)?;
-		shmem_handles.idxs_shmem.set_size(idxs_size)?;
-		shmem_handles.vals_shmem.set_size(vals_size)?;
-		Ok(())
-	}
-
-    pub fn grow(&self, num_buckets: usize) -> Result<(), shmem::Error> {
-        let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
-		let _resize_guard = self.resize_lock.lock();
-		let mut shards: Vec<_> = map.dict_shards.iter().map(|x| x.write()).collect();
-
-		let old_num_buckets = map.bucket_arr.len();
-        assert!(
-            num_buckets >= old_num_buckets,
-            "grow called with a smaller number of buckets"
-        );
-        if num_buckets == old_num_buckets {
-            return Ok(());
-        }
-
-		// Grow memory areas and initialize each of them.
-		self.resize_shmem(num_buckets)?;                
-        unsafe {
-			let buckets_ptr = map.bucket_arr.as_mut_ptr();
-            for i in old_num_buckets..num_buckets {
-                let bucket = buckets_ptr.add(i);
-                bucket.write(Bucket::empty(
-                    if i < num_buckets - 1 {
-                        BucketIdx::new(i + 1)
-                    } else {
-                        map.bucket_arr.free_head.load(Ordering::Relaxed)
-                    }
-                ));
-            }
-
-			// TODO(quantumish) a bit questionable to use pointers here
-			let first_shard = &mut shards[0];
-			let keys_ptr = first_shard.keys.as_mut_ptr();			
-			for i in old_num_buckets..num_buckets {
-                let key = keys_ptr.add(i);
-                key.write(EntryKey {
-					tag: EntryTag::Empty,
-					val: MaybeUninit::uninit(),
-				});
-            }
-			
-			let idxs_ptr = first_shard.idxs.as_mut_ptr();
-			for i in old_num_buckets..num_buckets {
-                let idx = idxs_ptr.add(i);
-                idx.write(BucketIdx::INVALID);
-            }
-        }
-
-		self.reshard(&mut shards, num_buckets);
-        map.bucket_arr.free_head.store(
-			BucketIdx::new(old_num_buckets), Ordering::Relaxed
-		);
-        self.begin_rehash(&mut shards, old_num_buckets);
-        Ok(())
-    }
-
-    pub fn begin_shrink(&mut self, num_buckets: usize) {
-        let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
-		let _resize_guard = self.resize_lock.lock();
-        assert!(
-            num_buckets <= map.get_num_buckets(),
-            "shrink called with a larger number of buckets"
-        );
-        _ = self
-            .shmem_handles
-            .as_ref()
-            .expect("shrink called on a fixed-size hash table");
-        map.bucket_arr.alloc_limit.store(
-			BucketIdx::new(num_buckets), Ordering::SeqCst
-		);
-    }
-
-	// TODO(quantumish): Safety? Maybe replace this with expanded version of finish_shrink?
-    pub fn shrink_goal(&self) -> Option<usize> {
-        let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
-        let goal = map.bucket_arr.alloc_limit.load(Ordering::Relaxed);
-		goal.next_checked()
-	}
-
-    pub fn finish_shrink(&self) -> Result<(), shmem::Error> {
-        let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
-		let _resize_guard = self.resize_lock.lock();
-		let mut shards: Vec<_> = map.dict_shards.iter().map(|x| x.write()).collect();
-		
-        let num_buckets = map.bucket_arr.alloc_limit
-			.load(Ordering::Relaxed)
-			.next_checked()
-			.expect("called finish_shrink when no shrink is in progress");
-        
-        if map.get_num_buckets() == num_buckets {
-            return Ok(());
-        }
-
-        assert!(
-            map.bucket_arr.buckets_in_use.load(Ordering::Relaxed) <= num_buckets,
-            "called finish_shrink before enough entries were removed"
-        );
-
-		self.resize_shmem(num_buckets)?;
-
-		self.reshard(&mut shards, num_buckets);
-		
-        map.bucket_arr.alloc_limit.store(BucketIdx::INVALID, Ordering::Relaxed);
-        self.begin_rehash(&mut shards, num_buckets);
-
-        Ok(())
-    }
-}
--- a/libs/neon-shmem/src/hash/bucket.rs
+++ b/libs/neon-shmem/src/hash/bucket.rs
@@ -1,301 +0,0 @@
-//! Lock-free stable array of buckets managed with a freelist.
-//!
-//! Since the positions of entries in the dictionary and the bucket array are not correlated,
-//! we either had to separately shard both and deal with the overhead of two lock acquisitions
-//! per read/write, or make the bucket array lock free. This is *generally* fine since most
-//! accesses of the bucket array are done while holding the lock on the corresponding dict shard
-//! and thus synchronized. May not hold up to the removals done by the LFC which is a problem.
-//!
-//! Routines are pretty closely adapted from https://timharris.uk/papers/2001-disc.pdf 
-//! 
-//! Notable caveats:
-//! - Can only store around 2^30 entries, which is actually only 10x our current workload.
-//!  - This is because we need two tag bits to distinguish full/empty and marked/unmarked entries.
-//! - Has not been seriously tested.
-//!
-//! Full entries also store the index to their corresponding dictionary entry in order
-//! to enable .entry_at_bucket() which is needed for the clock eviction algo in the LFC.
-
-use std::cell::UnsafeCell;
-use std::mem::MaybeUninit;
-use std::sync::atomic::{AtomicUsize, Ordering};
-
-use atomic::Atomic;
-
-#[derive(bytemuck::NoUninit, Clone, Copy, PartialEq, Eq)]
-#[repr(transparent)]
-pub(crate) struct BucketIdx(pub(super) u32);
-
-// This should always be true as `BucketIdx` is a simple newtype.
-const _: () = assert!(Atomic::<BucketIdx>::is_lock_free());
-
-impl BucketIdx {
-	/// Tag for next pointers in free entries.
-	const NEXT_TAG: u32 = 0b00 << 30;
-	/// Tag for marked next pointers in free entries.
-	const MARK_TAG: u32 = 0b01 << 30;
-	/// Tag for full entries.
-	const FULL_TAG: u32 = 0b10 << 30;
-	/// Reserved. Don't use me.
-	const RSVD_TAG: u32 = 0b11 << 30;
-
-	/// Invalid index within the bucket array (can be mixed with any tag).
-	pub const INVALID: Self = Self(0x3FFFFFFF);
-	/// Max index within the bucket array (can be mixed with any tag).
-	pub const MAX: usize = Self::INVALID.0 as usize - 1;
-
-	pub(super) fn is_marked(&self) -> bool {
-		self.0 & Self::RSVD_TAG == Self::MARK_TAG
-	}
-
-	pub(super) fn as_marked(self) -> Self {
-		Self((self.0 & Self::INVALID.0) | Self::MARK_TAG)
-	}
-
-	pub(super) fn get_unmarked(self) -> Self {
-		Self(self.0 & Self::INVALID.0)
-	}
-	
-	pub fn new(val: usize) -> Self {
-		debug_assert!(val < Self::MAX);
-		Self(val as u32)
-	}
-
-	pub fn new_full(val: usize) -> Self {
-		debug_assert!(val < Self::MAX);
-		Self(val as u32 | Self::FULL_TAG)
-	}
-
-	/// Try to extract a valid index if the tag is NEXT.
-	pub fn next_checked(&self) -> Option<usize> {
-		if self.0 & Self::RSVD_TAG == Self::NEXT_TAG && *self != Self::INVALID {
-			Some(self.0 as usize)
-		} else {
-			None
-		}
-	}
-
-	/// Try to extract an index if the tag is FULL.
-	pub fn full_checked(&self) -> Option<usize> {
-		if self.0 & Self::RSVD_TAG == Self::FULL_TAG {
-			Some((self.0 & Self::INVALID.0) as usize) 
-		} else {
-			None
-		}
-	}
-}
-
-/// Entry within the bucket array. Value is only initialized if you 
-pub(crate) struct Bucket<V> {
-	// Only initialized if `next` field is tagged with FULL.
-	pub val: MaybeUninit<V>,
-	// Either points to next entry in freelist if empty or points
-	// to the corresponding entry in dictionary if full.
-	pub next: Atomic<BucketIdx>,
-}
-
-impl<V> Bucket<V> {
-	pub fn empty(next: BucketIdx) -> Self {		
-		Self {
-			val: MaybeUninit::uninit(),
-			next: Atomic::new(next)
-		}
-	}
-
-	pub fn as_ref(&self) -> &V {
-		unsafe { self.val.assume_init_ref() }
-	}
-
-	pub fn as_mut(&mut self) -> &mut V {
-		unsafe { self.val.assume_init_mut() }
-	}
-
-	pub fn replace(&mut self, new_val: V) -> V {
-		unsafe { std::mem::replace(self.val.assume_init_mut(), new_val) }
-	}
-}
-
-pub(crate) struct BucketArray<'a, V> {
-	/// Buckets containing values.
-    pub(crate) buckets: &'a UnsafeCell<[Bucket<V>]>,
-    /// Head of the freelist.
-    pub(crate) free_head: Atomic<BucketIdx>,
-    /// Maximum index of a bucket allowed to be allocated.
-    pub(crate) alloc_limit: Atomic<BucketIdx>,
-    /// The number of currently occupied buckets.
-    pub(crate) buckets_in_use: AtomicUsize,
-    // Unclear what the purpose of this is.
-    pub(crate) _user_list_head: Atomic<BucketIdx>,
-}
-
-impl <'a, V> std::ops::Index<usize> for BucketArray<'a, V> {
-	type Output = Bucket<V>;
-		
-	fn index(&self, index: usize) -> &Self::Output {
-		let buckets: &[_] = unsafe { &*(self.buckets.get() as *mut _) };
-		&buckets[index]
-	}
-}
-
-impl <'a, V> std::ops::IndexMut<usize> for BucketArray<'a, V> {
-	fn index_mut(&mut self, index: usize) -> &mut Self::Output {
-		let buckets: &mut [_] = unsafe { &mut *(self.buckets.get() as *mut _) };
-		&mut buckets[index]
-	}
-}
-
-impl<'a, V> BucketArray<'a, V> {
-	pub fn new(buckets: &'a UnsafeCell<[Bucket<V>]>) -> Self {		
-		Self {
-			buckets,
-			free_head: Atomic::new(BucketIdx(0)),
-			_user_list_head: Atomic::new(BucketIdx(0)),
-			alloc_limit: Atomic::new(BucketIdx::INVALID),
-			buckets_in_use: 0.into(),
-		}
-	}
-
-	pub fn as_mut_ptr(&self) -> *mut Bucket<V> {
-		unsafe { (&mut *self.buckets.get()).as_mut_ptr() }
-	}
-
-	pub fn get_mut(&self, index: usize) -> &mut Bucket<V> {
-		let buckets: &mut [_] = unsafe { &mut *(self.buckets.get() as *mut _) };
-		&mut buckets[index]
-	}
-	
-	pub fn len(&self) -> usize {
-		unsafe { (&*self.buckets.get()).len() }
-	}
-
-	/// Deallocate a bucket, adding it to the free list.
-	// Adapted from List::insert in https://timharris.uk/papers/2001-disc.pdf
-	pub fn dealloc_bucket(&self, pos: usize) -> V {
-		loop {
-			let free = self.free_head.load(Ordering::Relaxed);
-			self[pos].next.store(free, Ordering::Relaxed);
-			if self.free_head.compare_exchange_weak(
-				free, BucketIdx::new(pos), Ordering::Relaxed, Ordering::Relaxed
-			).is_ok() {
-				self.buckets_in_use.fetch_sub(1, Ordering::Relaxed);
-				return unsafe { self[pos].val.assume_init_read() };
-			}
-		}
-	}
-
-	/// Find a usable bucket at the front of the free list.
-	// Adapted from List::search in https://timharris.uk/papers/2001-disc.pdf
-	#[allow(unused_assignments)]
-	fn find_bucket(&self) -> (BucketIdx, BucketIdx) {
-		let mut left_node = BucketIdx::INVALID;
-		let mut right_node = BucketIdx::INVALID;
-		let mut left_node_next = BucketIdx::INVALID;
-		
-		loop { 
-			let mut t = BucketIdx::INVALID;
-			let mut t_next = self.free_head.load(Ordering::Relaxed);
-			let alloc_limit = self.alloc_limit.load(Ordering::Relaxed).next_checked();
-			while t_next.is_marked() || t.next_checked()
-				.map_or(true, |v| alloc_limit.map_or(false, |l| v > l))
-			{
-				if !t_next.is_marked() {
-					left_node = t;
-					left_node_next = t_next;
-				}
-				t = t_next.get_unmarked();
-				if t == BucketIdx::INVALID { break }
-				t_next = self[t.0 as usize].next.load(Ordering::Relaxed);
-			}
-			right_node = t;
-
-			if left_node_next == right_node {
-				if right_node != BucketIdx::INVALID && self[right_node.0 as usize]
-					.next.load(Ordering::Relaxed).is_marked()
-				{					
-					continue;
-				} else {
-					return (left_node, right_node);
-				}
-			}
-
-			let left_ref = if left_node != BucketIdx::INVALID {
-				&self[left_node.0 as usize].next					
-			} else { &self.free_head };
-			
-			if left_ref.compare_exchange_weak(
-				left_node_next, right_node, Ordering::Relaxed, Ordering::Relaxed
-			).is_ok() {
-				if right_node != BucketIdx::INVALID && self[right_node.0 as usize]
-					.next.load(Ordering::Relaxed).is_marked()
-				{
-					continue;
-				} else {
-					return (left_node, right_node);
-				}
-			}			
-		}
-	}
-
-	/// Pop a bucket from the free list. 
-	// Adapted from List::delete in https://timharris.uk/papers/2001-disc.pdf
-	#[allow(unused_assignments)]
-    pub(crate) fn alloc_bucket(&self, value: V, key_pos: usize) -> Option<BucketIdx> {
-		let mut right_node_next = BucketIdx::INVALID;
-		let mut left_idx = BucketIdx::INVALID;
-		let mut right_idx = BucketIdx::INVALID;
-		
-		loop {
-			(left_idx, right_idx) = self.find_bucket();
-			if right_idx == BucketIdx::INVALID {
-				return None;
-			}
-			
-			let right = &self[right_idx.0 as usize];
-			right_node_next = right.next.load(Ordering::Relaxed);
-			if !right_node_next.is_marked() {
-				if right.next.compare_exchange_weak(
-					right_node_next, right_node_next.as_marked(),
-					Ordering::Relaxed, Ordering::Relaxed
-				).is_ok() {
-					break;
-				}
-			}
-		}
-
-		let left_ref = if left_idx != BucketIdx::INVALID {
-			&self[left_idx.0 as usize].next
-		} else {
-			&self.free_head
-		};
-		
-		if left_ref.compare_exchange_weak(
-			right_idx, right_node_next,
-			Ordering::Relaxed, Ordering::Relaxed
-		).is_err() {
-			todo!()
-		}
-
-        self.buckets_in_use.fetch_add(1, Ordering::Relaxed);
-		self[right_idx.0 as usize].next.store(
-			BucketIdx::new_full(key_pos), Ordering::Relaxed
-		);
-		self.get_mut(right_idx.0 as usize).val.write(value);
-		Some(right_idx)
-    }
-
-	pub fn clear(&mut self) {
-		for i in 0..self.len() {
-			self[i] = Bucket::empty(
-				if i < self.len() - 1 {
-					BucketIdx::new(i + 1)
-				} else {
-					BucketIdx::INVALID
-				}				
-			);
-        }
-
-		self.free_head.store(BucketIdx(0), Ordering::Relaxed);
-        self.buckets_in_use.store(0, Ordering::Relaxed);
-	}
-}
-
--- a/libs/neon-shmem/src/hash/core.rs
+++ b/libs/neon-shmem/src/hash/core.rs
@@ -1,335 +0,0 @@
-//! Sharded linear probing hash table.
-
-//! NOTE/FIXME: one major bug with this design is that the current hashmap DOES NOT TRACK
-//! the previous size of the hashmap and thus does lookups incorrectly/badly. This should
-//! be a reasonably minor fix?
-
-use std::cell::UnsafeCell;
-use std::hash::Hash;
-use std::mem::MaybeUninit;
-use std::sync::atomic::{Ordering, AtomicUsize};
-
-use crate::sync::*;
-use crate::hash::{
-	entry::*,
-	bucket::{BucketArray, Bucket, BucketIdx}
-};
-
-/// Metadata tag for the type of an entry in the hashmap.
-#[derive(PartialEq, Eq, Clone, Copy)]
-pub(crate) enum EntryTag {
-	/// An occupied entry inserted after a resize operation.
-	Occupied,
-	/// An occupied entry inserted before a resize operation
-	/// a.k.a. an entry that needs to be rehashed at some point.
-	Rehash,
-	/// An entry that was once `Occupied`.
-	Tombstone,
-	/// An entry that was once `Rehash`.
-	RehashTombstone,
-	/// An empty entry.
-	Empty,
-}
-
-/// Searching the chains of a hashmap oftentimes requires interpreting
-/// a set of metadata tags differently. This enum encodes the ways a
-/// metadata tag can be treated during a lookup.
-pub(crate) enum MapEntryType {
-	/// Should be treated as if it were occupied.
-	Occupied,
-	/// Should be treated as if it were a tombstone.
-	Tombstone,
-	/// Should be treated as if it were empty.
-	Empty,
-	/// Should be ignored.
-	Skip
-}
-
-/// A key within the dictionary component of the hashmap.
-pub(crate) struct EntryKey<K> {
-	// NOTE: This could be split out to save 3 bytes per entry!
-	// Wasn't sure it was worth the penalty of another shmem area.
-	pub(crate) tag: EntryTag,
-	pub(crate) val: MaybeUninit<K>,
-}
-
-/// A shard of the dictionary.
-pub(crate) struct DictShard<'a, K> {
-	pub(crate) keys: &'a mut [EntryKey<K>],
-	pub(crate) idxs: &'a mut [BucketIdx],
-}
-
-impl<'a, K> DictShard<'a, K> {
-	fn len(&self) -> usize {
-		self.keys.len()
-	}
-}
-
- pub(crate) struct MaybeUninitDictShard<'a, K> {
-	pub(crate) keys: &'a mut [MaybeUninit<EntryKey<K>>],
-	pub(crate) idxs: &'a mut [MaybeUninit<BucketIdx>],
-}
-
-/// Core hash table implementation.
-pub(crate) struct CoreHashMap<'a, K, V> {
-	/// Dictionary used to map hashes to bucket indices.
-    pub(crate) dict_shards: &'a mut [RwLock<DictShard<'a, K>>],
-	/// Stable bucket array used to store the values.
-	pub(crate) bucket_arr: BucketArray<'a, V>,
-	/// Index of the next entry to process for rehashing.
-	pub(crate) rehash_index: AtomicUsize,
-	/// Index of the end of the range to be rehashed.
-	pub(crate) rehash_end: AtomicUsize,
-}
-
-/// Error for when there are no empty buckets left but one is needed.
-#[derive(Debug, PartialEq)]
-pub struct FullError();
-
-impl<'a, K: Clone + Hash + Eq, V> CoreHashMap<'a, K, V> {
-    pub fn new(
-        buckets_cell: &'a UnsafeCell<[MaybeUninit<Bucket<V>>]>,
-        dict_shards: &'a mut [RwLock<MaybeUninitDictShard<'a, K>>],
-    ) -> Self {
-		let buckets = unsafe { &mut *buckets_cell.get() };
-        // Initialize the buckets
-		for i in 0..buckets.len() {
-			buckets[i].write(Bucket::empty(
-				if i < buckets.len() - 1 {
-					BucketIdx::new(i + 1)
-				} else {
-					BucketIdx::INVALID
-				})
-			);
-        }
-
-        // Initialize the dictionary
-		for shard in dict_shards.iter_mut() {
-			let mut dicts = shard.write();
-			for e in dicts.keys.iter_mut() {
-				e.write(EntryKey {
-					tag: EntryTag::Empty,
-					val: MaybeUninit::uninit(),
-				});
-			}
-			for e in dicts.idxs.iter_mut() {
-				e.write(BucketIdx::INVALID);
-			}
-		}
-
-        let buckets_cell = unsafe {
-			&*(buckets_cell as *const _ as *const UnsafeCell<_>)
-		};
-        // TODO: use std::slice::assume_init_mut() once it stabilizes
-        let dict_shards = unsafe {
-            std::slice::from_raw_parts_mut(dict_shards.as_mut_ptr().cast(),
-										   dict_shards.len())
-        };
-
-        Self {
-            dict_shards,
-			rehash_index: buckets.len().into(),
-			rehash_end: buckets.len().into(),
-			bucket_arr: BucketArray::new(buckets_cell),
-        }
-    }
-
-    /// Get the value associated with a key (if it exists) given its hash.
-	pub fn get_with_hash(&'a self, key: &K, hash: u64) -> Option<ValueReadGuard<'a, V>> {
-		let ind = self.rehash_index.load(Ordering::Relaxed);
-		let end = self.rehash_end.load(Ordering::Relaxed);
-
-		// First search the chains from the current context (thus treat 
-		// to-be-rehashed entries as tombstones within a current chain).
-		let res = self.get(key, hash, |tag| match tag {
-			EntryTag::Empty => MapEntryType::Empty,
-			EntryTag::Occupied => MapEntryType::Occupied,
-			_ => MapEntryType::Tombstone,
-		});
-		if res.is_some() {
-			return res;
-		}
-
-		if ind < end {
-			// Search chains from the previous size of the map if a rehash is in progress.
-			// Ignore any entries inserted since the resize operation occurred.
-			self.get(key, hash, |tag| match tag {
-				EntryTag::Empty => MapEntryType::Empty,
-				EntryTag::Rehash => MapEntryType::Occupied,
-				_ => MapEntryType::Tombstone,
-			})
-		} else { 
-			None
-		}
-	}
-	
-	pub fn entry_with_hash(&'a mut self, key: K, hash: u64) -> Result<Entry<'a, K, V>, FullError> {
-		let ind = self.rehash_index.load(Ordering::Relaxed);
-		let end = self.rehash_end.load(Ordering::Relaxed);
-
-		let res = self.entry(key.clone(), hash, |tag| match tag {
-			EntryTag::Empty => MapEntryType::Empty,
-			EntryTag::Occupied => MapEntryType::Occupied,
-			// We can't treat old entries as tombstones here, as we definitely can't
-			// insert over them! Instead we can just skip directly over them.
-			EntryTag::Rehash => MapEntryType::Skip,
-			_ => MapEntryType::Tombstone,
-		});
-		if ind < end {
-			if let Ok(Entry::Occupied(_)) = res {
-				res
-			} else {
-				self.entry(key, hash, |tag| match tag {
-					EntryTag::Empty => MapEntryType::Empty,
-					EntryTag::Occupied => MapEntryType::Skip,
-					EntryTag::Rehash => MapEntryType::Occupied,
-					_ => MapEntryType::Tombstone
-				})
-			}
-		} else {
-			res
-		}
-	}
-	
-    fn get<F>(&'a self, key: &K, hash: u64, f: F) -> Option<ValueReadGuard<'a, V>>
-	    where F: Fn(EntryTag) -> MapEntryType
-	{	
-		let num_buckets = self.get_num_buckets();
-		let shard_size = num_buckets / self.dict_shards.len();
-		let bucket_pos = hash as usize % num_buckets;
-		let shard_start = bucket_pos / shard_size;
-		for off in 0..self.dict_shards.len() {
-			let shard_idx = (shard_start + off) % self.dict_shards.len();
-			let shard = self.dict_shards[shard_idx].read();
-			let entry_start = if off == 0 { bucket_pos % shard_size } else { 0 };
-			for entry_idx in entry_start..shard.len() {
-				match f(shard.keys[entry_idx].tag) {
-					MapEntryType::Empty => return None,
-					MapEntryType::Tombstone | MapEntryType::Skip => continue, 
-					MapEntryType::Occupied => {
-						let cand_key = unsafe { shard.keys[entry_idx].val.assume_init_ref() };
-						if cand_key == key {
-							let bucket_idx = shard.idxs[entry_idx].next_checked()
-								.expect("position is valid");
-							return Some(RwLockReadGuard::map(
-								shard, |_| self.bucket_arr[bucket_idx].as_ref()
-							));
-						} 
-					},
-				}
-			}
-		}
-		None
-	}
-
-	
-    pub fn entry<F>(&'a self, key: K, hash: u64, f: F) -> Result<Entry<'a, K, V>, FullError>
-	    where F: Fn(EntryTag) -> MapEntryType
-	{
-		// We need to keep holding on the locks for each shard we process since if we don't find the
-		// key anywhere, we want to insert it at the earliest possible position (which may be several
-		// shards away). Ideally cross-shard chains are quite rare, so this shouldn't be a big deal.
-		//
-		// NB: Somewhat real chance of a deadlock! E.g. one thread has a ridiculously long chain that
-		// starts at block N and wraps around the hashmap to N-1, yet another thread begins a lookup at
-		// N-1 during this and has a chain that lasts a few shards. Then thread 1 is blocked on thread 2
-		// to get to shard N-1 but thread 2 is blocked on thread 1 to get to shard N. Pretty fringe case
-		// since chains shouldn't last very long, but still a problem with this somewhat naive sharding
-		// mechanism.
-		//
-		// We could fix this by either refusing to hold locks and only inserting into the earliest entry
-		// within the current shard (which effectively means after a while we forget about certain open
-		// entries at the end of shards) or by pivoting to a more involved concurrency setup?
-		let mut shards = Vec::new();
-		let mut insert_pos = None;
-		let mut insert_shard = None;
-
-		let num_buckets = self.get_num_buckets();
-		let shard_size = num_buckets / self.dict_shards.len();
-		let mut entry_pos = hash as usize % num_buckets;
-		let shard_start = entry_pos / shard_size;
-		for off in 0..self.dict_shards.len() {
-			let shard_idx = (shard_start + off) % self.dict_shards.len();			
-			let shard = self.dict_shards[shard_idx].write();
-			let mut inserted = false;
-			let entry_start = if off == 0 { entry_pos % shard_size } else { 0 };
-			for entry_idx in entry_start..shard.len() {
-				entry_pos += 1;
-				match f(shard.keys[entry_idx].tag) {
-					MapEntryType::Skip => continue,
-					MapEntryType::Empty => {
-						let ((shard, idx), shard_pos) = match (insert_shard, insert_pos) {
-							(Some((s, i)), Some(p)) => ((s, i), p),
-							(None, Some(p)) => ((shard, shard_idx), p),
-							(None, None) => ((shard, shard_idx), entry_idx),
-							_ => unreachable!()
-						};
-						return Ok(Entry::Vacant(VacantEntry {
-							_key: key,
-							shard,
-							shard_pos,
-							key_pos: (shard_size * idx) + shard_pos,
-							bucket_arr: &self.bucket_arr,
-						}))
-					},
-					MapEntryType::Tombstone => {
-						if insert_pos.is_none() {
-							insert_pos = Some(entry_idx);
-							inserted = true;
-						}
-					},
-					MapEntryType::Occupied => {
-						let cand_key = unsafe { shard.keys[entry_idx].val.assume_init_ref() };
-						if *cand_key == key {
-							let bucket_pos = shard.idxs[entry_idx].next_checked().unwrap();
-							return Ok(Entry::Occupied(OccupiedEntry {
-								shard,
-								shard_pos: entry_idx,
-								bucket_pos,
-								bucket_arr: &self.bucket_arr,
-								key_pos: entry_pos,
-							}));
-						}	
-					}
-				} 
-			}
-			if inserted {
-				insert_shard = Some((shard, shard_idx));
-			} else {
-				shards.push(shard);
-			}
-		}
-		
-		if let (Some((shard, idx)), Some(shard_pos)) = (insert_shard, insert_pos) {
-			Ok(Entry::Vacant(VacantEntry {
-				_key: key,
-				shard,
-				shard_pos,
-				key_pos: (shard_size * idx) + shard_pos,
-				bucket_arr: &self.bucket_arr,
-			}))
-		} else {
-			Err(FullError{})
-		}
-	}
-	
-    /// Get number of buckets in map.
-    pub fn get_num_buckets(&self) -> usize {
-        self.bucket_arr.len()
-    }
-
-    pub fn clear(&mut self) {
-		let mut shards: Vec<_> = self.dict_shards.iter().map(|x| x.write()).collect();
-        for shard in shards.iter_mut() {
-			for e in shard.keys.iter_mut() {
-				e.tag = EntryTag::Empty;
-			}
-			for e in shard.idxs.iter_mut() {
-				*e = BucketIdx::INVALID;
-			}
-		}
-
-        self.bucket_arr.clear();
-    }
-}
- 
--- a/libs/neon-shmem/src/hash/entry.rs
+++ b/libs/neon-shmem/src/hash/entry.rs
@@ -1,81 +0,0 @@
-//! Equivalent of [`std::collections::hash_map::Entry`] for this hashmap.
-
-use crate::hash::{
-	core::{DictShard, EntryTag},
-	bucket::{BucketArray, BucketIdx}
-};
-use crate::sync::{RwLockWriteGuard, ValueWriteGuard};
-
-use std::hash::Hash;
-
-pub enum Entry<'a, K, V> {
-    Occupied(OccupiedEntry<'a, K, V>),
-    Vacant(VacantEntry<'a, K, V>),
-}
-
-pub struct OccupiedEntry<'a, K, V> {
-    /// Mutable reference to the shard of the map the entry is in.
-    pub(crate) shard: RwLockWriteGuard<'a, DictShard<'a, K>>,
-	/// The position of the entry in the shard.
-    pub(crate) shard_pos: usize,
-	/// True logical position of the entry in the map.
-	pub(crate) key_pos: usize,
-	/// Mutable reference to the bucket array containing entry.
-	pub(crate) bucket_arr: &'a BucketArray<'a, V>,
-    /// The position of the bucket in the [`CoreHashMap`] bucket array.
-    pub(crate) bucket_pos: usize,
-}
-
-impl<K, V> OccupiedEntry<'_, K, V> {
-    pub fn get(&self) -> &V {
-		self.bucket_arr[self.bucket_pos].as_ref()
-    }
-
-    pub fn get_mut(&mut self) -> &mut V {
-		self.bucket_arr.get_mut(self.bucket_pos).as_mut()
-    }
-
-    /// Inserts a value into the entry, replacing (and returning) the existing value.
-    pub fn insert(&mut self, value: V) -> V {
-        self.bucket_arr.get_mut(self.bucket_pos).replace(value)
-    }
-
-    /// Removes the entry from the hash map, returning the value originally stored within it.
-    pub fn remove(&mut self) -> V {
-		self.shard.idxs[self.shard_pos] = BucketIdx::INVALID;
-		self.shard.keys[self.shard_pos].tag = EntryTag::Tombstone;
-        self.bucket_arr.dealloc_bucket(self.bucket_pos)
-    }
-}
-
-/// An abstract view into a vacant entry within the map.
-pub struct VacantEntry<'a, K, V> {
-    /// The key of the occupied entry
-    pub(crate) _key: K,
-    /// Mutable reference to the shard of the map the entry is in.
-    pub(crate) shard: RwLockWriteGuard<'a, DictShard<'a, K>>,
-	/// The position of the entry in the shard.
-    pub(crate) shard_pos: usize,
-	/// True logical position of the entry in the map.
-	pub(crate) key_pos: usize,
-	/// Mutable reference to the bucket array containing entry.
-	pub(crate) bucket_arr: &'a BucketArray<'a, V>,
-}
-
-impl<'a, K: Clone + Hash + Eq, V> VacantEntry<'a, K, V> {
-    /// Insert a value into the vacant entry, finding and populating an empty bucket in the process.
-    pub fn insert(mut self, value: V) -> ValueWriteGuard<'a, V> {
-		let pos = self.bucket_arr.alloc_bucket(value, self.key_pos)
-			.expect("bucket is available if entry is");
-		self.shard.keys[self.shard_pos].tag = EntryTag::Occupied;
-		self.shard.keys[self.shard_pos].val.write(self._key);
-		let idx = pos.next_checked().expect("position is valid");
-		self.shard.idxs[self.shard_pos] = pos;
-
-        RwLockWriteGuard::map(self.shard, |_| {
-            self.bucket_arr.get_mut(idx).as_mut()
-        })
-    }
-}
-	
-
--- a/libs/neon-shmem/src/hash/tests.rs
+++ b/libs/neon-shmem/src/hash/tests.rs
@@ -1,428 +0,0 @@
-use std::collections::BTreeMap;
-use std::collections::HashSet;
-use std::fmt::Debug;
-use std::mem::MaybeUninit;
-
-use crate::hash::Entry;
-use crate::hash::HashMapAccess;
-use crate::hash::HashMapInit;
-use crate::hash::core::FullError;
-
-use rand::seq::SliceRandom;
-use rand::{Rng, RngCore};
-use rand_distr::Zipf;
-
-const TEST_KEY_LEN: usize = 16;
-
-#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
-struct TestKey([u8; TEST_KEY_LEN]);
-
-impl From<&TestKey> for u128 {
-    fn from(val: &TestKey) -> u128 {
-        u128::from_be_bytes(val.0)
-    }
-}
-
-impl From<u128> for TestKey {
-    fn from(val: u128) -> TestKey {
-        TestKey(val.to_be_bytes())
-    }
-}
-
-impl<'a> From<&'a [u8]> for TestKey {
-    fn from(bytes: &'a [u8]) -> TestKey {
-        TestKey(bytes.try_into().unwrap())
-    }
-}
-
-fn test_inserts<K: Into<TestKey> + Copy>(keys: &[K]) {
-    let w = HashMapInit::<TestKey, usize>::new_resizeable_named(100000, 120000, 100, "test_inserts")
-        .attach_writer();
-
-    for (idx, k) in keys.iter().enumerate() {
-        let res = w.entry((*k).into());
-        match res.unwrap() {
-            Entry::Occupied(mut e) => {
-                e.insert(idx);
-            }
-            Entry::Vacant(e) => {
-                _ = e.insert(idx);
-            }
-        };
-    }
-
-    for (idx, k) in keys.iter().enumerate() {
-        let x = w.get(&(*k).into());
-        let value = x.as_deref().copied();
-        assert_eq!(value, Some(idx));
-    }
-}
-
-#[test]
-fn dense() {
-    // This exercises splitting a node with prefix
-    let keys: &[u128] = &[0, 1, 2, 3, 256];
-    test_inserts(keys);
-
-    // Dense keys
-    let mut keys: Vec<u128> = (0..10000).collect();
-    test_inserts(&keys);
-
-    // Do the same in random orders
-    for _ in 1..10 {
-        keys.shuffle(&mut rand::rng());
-        test_inserts(&keys);
-    }
-}
-
-#[test]
-fn sparse() {
-    // sparse keys
-    let mut keys: Vec<TestKey> = Vec::new();
-    let mut used_keys = HashSet::new();
-    for _ in 0..10000 {
-        loop {
-            let key = rand::random::<u128>();
-            if used_keys.contains(&key) {
-                continue;
-            }
-            used_keys.insert(key);
-            keys.push(key.into());
-            break;
-        }
-    }
-    test_inserts(&keys);
-}
-
-#[derive(Clone, Debug)]
-struct TestOp(TestKey, Option<usize>);
-
-fn apply_op(
-    op: &TestOp,
-    map: &mut HashMapAccess<TestKey, usize>,
-    shadow: &mut BTreeMap<TestKey, usize>,
-) {
-    // apply the change to the shadow tree first
-    let shadow_existing = if let Some(v) = op.1 {
-        shadow.insert(op.0, v)
-    } else {
-        shadow.remove(&op.0)
-    };
-
-    let entry = map.entry(op.0);
-    let hash_existing = match op.1 {
-        Some(new) => match entry.unwrap() {
-            Entry::Occupied(mut e) => Some(e.insert(new)),
-            Entry::Vacant(e) => {
-                _ = e.insert(new);
-                None
-            }
-        },
-        None => match entry.unwrap() {
-            Entry::Occupied(mut e) => Some(e.remove()),
-            Entry::Vacant(_) => None,
-        },
-    };
-
-    assert_eq!(shadow_existing, hash_existing);
-}
-
-fn do_random_ops(
-    num_ops: usize,
-    size: u32,
-    del_prob: f64,
-    writer: &mut HashMapAccess<TestKey, usize>,
-    shadow: &mut BTreeMap<TestKey, usize>,
-    rng: &mut rand::rngs::ThreadRng,
-) {
-    for i in 0..num_ops {
-        let key: TestKey = ((rng.next_u32() % size) as u128).into();
-        let op = TestOp(
-            key,
-            if rng.random_bool(del_prob) {
-                Some(i)
-            } else {
-                None
-            },
-        );
-        apply_op(&op, writer, shadow);
-    }
-}
-
-fn do_deletes(
-    num_ops: usize,
-    writer: &mut HashMapAccess<TestKey, usize>,
-    shadow: &mut BTreeMap<TestKey, usize>,
-) {
-    for _ in 0..num_ops {
-        let (k, _) = shadow.pop_first().unwrap();
-        writer.remove(&k);
-    }
-}
-
-fn do_shrink(
-    writer: &mut HashMapAccess<TestKey, usize>,
-    shadow: &mut BTreeMap<TestKey, usize>,
-    to: usize,
-) {
-    assert!(writer.shrink_goal().is_none());
-    writer.begin_shrink(to);
-    assert_eq!(writer.shrink_goal(), Some(to as usize));
-    while writer.get_num_buckets_in_use() > to as usize {
-        let (k, _) = shadow.pop_first().unwrap();
-        let entry = writer.entry(k).unwrap();
-        if let Entry::Occupied(mut e) = entry {
-            e.remove();
-        }
-    }
-    let old_usage = writer.get_num_buckets_in_use();
-    writer.finish_shrink().unwrap();
-    assert!(writer.shrink_goal().is_none());
-    assert_eq!(writer.get_num_buckets_in_use(), old_usage);
-}
-
-#[test]
-fn random_ops() {
-    let mut writer =
-        HashMapInit::<TestKey, usize>::new_resizeable_named(100000, 120000, 10, "test_random")
-            .attach_writer();
-    let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
-
-    let distribution = Zipf::new(u128::MAX as f64, 1.1).unwrap();
-    let mut rng = rand::rng();
-    for i in 0..100000 {
-        let key: TestKey = (rng.sample(distribution) as u128).into();
-
-        let op = TestOp(key, if rng.random_bool(0.75) { Some(i) } else { None });
-
-        apply_op(&op, &mut writer, &mut shadow);
-    }
-}
-
-// #[test]
-// fn test_shuffle() {
-//     let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1000, 1200, 10, "test_shuf")
-//         .attach_writer();
-//     let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
-//     let mut rng = rand::rng();
-
-//     do_random_ops(10000, 1000, 0.75, &mut writer, &mut shadow, &mut rng);
-//     writer.shuffle();
-//     do_random_ops(10000, 1000, 0.75, &mut writer, &mut shadow, &mut rng);
-// }
-
-// #[test]
-// fn test_grow() {
-//     let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1000, 2000, 10, "test_grow")
-//         .attach_writer();
-//     let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
-//     let mut rng = rand::rng();
-
-//     do_random_ops(10000, 1000, 0.75, &mut writer, &mut shadow, &mut rng);
-//     let old_usage = writer.get_num_buckets_in_use();
-//     writer.grow(1500).unwrap();
-//     assert_eq!(writer.get_num_buckets_in_use(), old_usage);
-//     assert_eq!(writer.get_num_buckets(), 1500);
-//     do_random_ops(10000, 1500, 0.75, &mut writer, &mut shadow, &mut rng);
-// }
-
-#[test]
-fn test_clear() {
-    let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2000, 10, "test_clear")
-        .attach_writer();
-    // let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
-    // let mut rng = rand::rng();
-    // do_random_ops(2000, 1500, 0.75, &mut writer, &mut shadow, &mut rng);
-    // writer.clear();
-    // assert_eq!(writer.get_num_buckets_in_use(), 0);
-    // assert_eq!(writer.get_num_buckets(), 1500);
-    // while let Some((key, _)) = shadow.pop_first() {
-    //     assert!(writer.get(&key).is_none());
-    // }
-    // do_random_ops(2000, 1500, 0.75, &mut writer, &mut shadow, &mut rng);
-    // for i in 0..(1500 - writer.get_num_buckets_in_use()) {
-    //     writer.insert((1500 + i as u128).into(), 0).unwrap();
-    // }
-    // assert_eq!(writer.insert(5000.into(), 0), Err(FullError {}));
-    // writer.clear();
-    // assert!(writer.insert(5000.into(), 0).is_ok());
-}
-
-// #[test]
-// fn test_idx_remove() {
-//     let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2000, 10, "test_clear")
-//         .attach_writer();
-//     let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
-//     let mut rng = rand::rng();
-//     do_random_ops(2000, 1500, 0.25, &mut writer, &mut shadow, &mut rng);
-//     for _ in 0..100 {
-//         let idx = (rng.next_u32() % 1500) as usize;
-//         if let Some(e) = writer.entry_at_bucket(idx) {
-//             shadow.remove(&e._key);
-//             e.remove();
-//         }
-//     }
-//     while let Some((key, val)) = shadow.pop_first() {
-//         assert_eq!(*writer.get(&key).unwrap(), val);
-//     }
-// }
-
-// #[test]
-// fn test_idx_get() {
-//     let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2000, "test_clear")
-//         .attach_writer();
-//     let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
-//     let mut rng = rand::rng();
-//     do_random_ops(2000, 1500, 0.25, &mut writer, &mut shadow, &mut rng);
-//     for _ in 0..100 {
-//         let idx = (rng.next_u32() % 1500) as usize;
-//         if let Some(pair) = writer.get_at_bucket(idx) {
-//             {
-//                 let v: *const usize = &pair.1;
-//                 assert_eq!(writer.get_bucket_for_value(v), idx);
-//             }
-//             {
-//                 let v: *const usize = &pair.1;
-//                 assert_eq!(writer.get_bucket_for_value(v), idx);
-//             }
-//         }
-//     }
-// }
-
-// #[test]
-// fn test_shrink() {
-//     let mut writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2000, "test_shrink")
-//         .attach_writer();
-//     let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
-//     let mut rng = rand::rng();
-
-//     do_random_ops(10000, 1500, 0.75, &mut writer, &mut shadow, &mut rng);
-//     do_shrink(&mut writer, &mut shadow, 1000);
-//     assert_eq!(writer.get_num_buckets(), 1000);
-//     do_deletes(500, &mut writer, &mut shadow);
-//     do_random_ops(10000, 500, 0.75, &mut writer, &mut shadow, &mut rng);
-//     assert!(writer.get_num_buckets_in_use() <= 1000);
-// }
-
-// #[test]
-// fn test_shrink_grow_seq() {
-//     let mut writer =
-//         HashMapInit::<TestKey, usize>::new_resizeable_named(1000, 20000, "test_grow_seq")
-//             .attach_writer();
-//     let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
-//     let mut rng = rand::rng();
-
-//     do_random_ops(500, 1000, 0.1, &mut writer, &mut shadow, &mut rng);
-//     eprintln!("Shrinking to 750");
-//     do_shrink(&mut writer, &mut shadow, 750);
-//     do_random_ops(200, 1000, 0.5, &mut writer, &mut shadow, &mut rng);
-//     eprintln!("Growing to 1500");
-//     writer.grow(1500).unwrap();
-//     do_random_ops(600, 1500, 0.1, &mut writer, &mut shadow, &mut rng);
-//     eprintln!("Shrinking to 200");
-//     while shadow.len() > 100 {
-//         do_deletes(1, &mut writer, &mut shadow);
-//     }
-//     do_shrink(&mut writer, &mut shadow, 200);
-//     do_random_ops(50, 1500, 0.25, &mut writer, &mut shadow, &mut rng);
-//     eprintln!("Growing to 10k");
-//     writer.grow(10000).unwrap();
-//     do_random_ops(10000, 5000, 0.25, &mut writer, &mut shadow, &mut rng);
-// }
-
-#[test]
-fn test_bucket_ops() {
-
-	let writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1000, 1200, 10, "test_bucket_ops")
-        .attach_writer();
-    match writer.entry(1.into()).unwrap() {
-        Entry::Occupied(mut e) => {
-            e.insert(2);
-        }
-        Entry::Vacant(e) => {
-            _ = e.insert(2);
-        },
-    }
-    assert_eq!(writer.get_num_buckets_in_use(), 1);
-    assert_eq!(writer.get_num_buckets(), 1000);
-    assert_eq!(*writer.get(&1.into()).unwrap(), 2);
-    let pos = match writer.entry(1.into()).unwrap() {
-        Entry::Occupied(e) => {
-            assert_eq!(e._key, 1.into());
-            let pos = e.bucket_pos as usize;
-            pos
-        }
-        Entry::Vacant(_) => {
-            panic!("Insert didn't affect entry");
-        }
-    };
-    assert_eq!(unsafe { writer.get_at_bucket(pos).unwrap() }, &2);
-    {
-        let ptr: *const usize = &*writer.get(&1.into()).unwrap();
-        assert_eq!(writer.get_bucket_for_value(ptr), pos);
-    }
-    writer.remove(&1.into());
-    assert!(writer.get(&1.into()).is_none());
-}
-
-// #[test]
-// fn test_shrink_zero() {
-//     let mut writer =
-//         HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2000, "test_shrink_zero")
-//             .attach_writer();
-//     writer.begin_shrink(0);
-//     for i in 0..1500 {
-//         writer.entry_at_bucket(i).map(|x| x.remove());
-//     }
-//     writer.finish_shrink().unwrap();
-//     assert_eq!(writer.get_num_buckets_in_use(), 0);
-//     let entry = writer.entry(1.into());
-//     if let Entry::Vacant(v) = entry {
-//         assert!(v.insert(2).is_err());
-//     } else {
-//         panic!("Somehow got non-vacant entry in empty map.")
-//     }
-//     writer.grow(50).unwrap();
-//     let entry = writer.entry(1.into());
-//     if let Entry::Vacant(v) = entry {
-//         assert!(v.insert(2).is_ok());
-//     } else {
-//         panic!("Somehow got non-vacant entry in empty map.")
-//     }
-//     assert_eq!(writer.get_num_buckets_in_use(), 1);
-// }
-
-// #[test]
-// #[should_panic]
-// fn test_grow_oom() {
-//     let writer = HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2000, "test_grow_oom")
-//         .attach_writer();
-//     writer.grow(20000).unwrap();
-// }
-
-// #[test]
-// #[should_panic]
-// fn test_shrink_bigger() {
-//     let mut writer =
-//         HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2500, "test_shrink_bigger")
-//             .attach_writer();
-//     writer.begin_shrink(2000);
-// }
-
-// #[test]
-// #[should_panic]
-// fn test_shrink_early_finish() {
-//     let writer =
-//         HashMapInit::<TestKey, usize>::new_resizeable_named(1500, 2500, "test_shrink_early_finish")
-//             .attach_writer();
-//     writer.finish_shrink().unwrap();
-// }
-
-// #[test]
-// #[should_panic]
-// fn test_shrink_fixed_size() {
-//     let mut area = [MaybeUninit::uninit(); 10000];
-//     let init_struct = HashMapInit::<TestKey, usize>::with_fixed(3, &mut area);
-//     let mut writer = init_struct.attach_writer();
-//     writer.begin_shrink(1);
-// }
--- a/libs/neon-shmem/src/lib.rs
+++ b/libs/neon-shmem/src/lib.rs
@@ -1,5 +1,418 @@
 //! Shared memory utilities for neon communicator

-pub mod hash;
-pub mod shmem;
-pub mod sync;
+use std::num::NonZeroUsize;
+use std::os::fd::{AsFd, BorrowedFd, OwnedFd};
+use std::ptr::NonNull;
+use std::sync::atomic::{AtomicUsize, Ordering};
+
+use nix::errno::Errno;
+use nix::sys::mman::MapFlags;
+use nix::sys::mman::ProtFlags;
+use nix::sys::mman::mmap as nix_mmap;
+use nix::sys::mman::munmap as nix_munmap;
+use nix::unistd::ftruncate as nix_ftruncate;
+
+/// ShmemHandle represents a shared memory area that can be shared by processes over fork().
+/// Unlike shared memory allocated by Postgres, this area is resizable, up to 'max_size' that's
+/// specified at creation.
+///
+/// The area is backed by an anonymous file created with memfd_create(). The full address space for
+/// 'max_size' is reserved up-front with mmap(), but whenever you call [`ShmemHandle::set_size`],
+/// the underlying file is resized. Do not access the area beyond the current size. Currently, that
+/// will cause the file to be expanded, but we might use mprotect() etc. to enforce that in the
+/// future.
+pub struct ShmemHandle {
+    /// memfd file descriptor
+    fd: OwnedFd,
+
+    max_size: usize,
+
+    // Pointer to the beginning of the shared memory area. The header is stored there.
+    shared_ptr: NonNull<SharedStruct>,
+
+    // Pointer to the beginning of the user data
+    pub data_ptr: NonNull<u8>,
+}
+
+/// This is stored at the beginning in the shared memory area.
+struct SharedStruct {
+    max_size: usize,
+
+    /// Current size of the backing file. The high-order bit is used for the RESIZE_IN_PROGRESS flag
+    current_size: AtomicUsize,
+}
+
+const RESIZE_IN_PROGRESS: usize = 1 << 63;
+
+const HEADER_SIZE: usize = std::mem::size_of::<SharedStruct>();
+
+/// Error type returned by the ShmemHandle functions.
+#[derive(thiserror::Error, Debug)]
+#[error("{msg}: {errno}")]
+pub struct Error {
+    pub msg: String,
+    pub errno: Errno,
+}
+
+impl Error {
+    fn new(msg: &str, errno: Errno) -> Error {
+        Error {
+            msg: msg.to_string(),
+            errno,
+        }
+    }
+}
+
+impl ShmemHandle {
+    /// Create a new shared memory area. To communicate between processes, the processes need to be
+    /// fork()'d after calling this, so that the ShmemHandle is inherited by all processes.
+    ///
+    /// If the ShmemHandle is dropped, the memory is unmapped from the current process. Other
+    /// processes can continue using it, however.
+    pub fn new(name: &str, initial_size: usize, max_size: usize) -> Result<ShmemHandle, Error> {
+        // create the backing anonymous file.
+        let fd = create_backing_file(name)?;
+
+        Self::new_with_fd(fd, initial_size, max_size)
+    }
+
+    fn new_with_fd(
+        fd: OwnedFd,
+        initial_size: usize,
+        max_size: usize,
+    ) -> Result<ShmemHandle, Error> {
+        // We reserve the high-order bit for the RESIZE_IN_PROGRESS flag, and the actual size
+        // is a little larger than this because of the SharedStruct header. Make the upper limit
+        // somewhat smaller than that, because with anything close to that, you'll run out of
+        // memory anyway.
+        if max_size >= 1 << 48 {
+            panic!("max size {max_size} too large");
+        }
+        if initial_size > max_size {
+            panic!("initial size {initial_size} larger than max size {max_size}");
+        }
+
+        // The actual initial / max size is the one given by the caller, plus the size of
+        // 'SharedStruct'.
+        let initial_size = HEADER_SIZE + initial_size;
+        let max_size = NonZeroUsize::new(HEADER_SIZE + max_size).unwrap();
+
+        // Reserve address space for it with mmap
+        //
+        // TODO: Use MAP_HUGETLB if possible
+        let start_ptr = unsafe {
+            nix_mmap(
+                None,
+                max_size,
+                ProtFlags::PROT_READ | ProtFlags::PROT_WRITE,
+                MapFlags::MAP_SHARED,
+                &fd,
+                0,
+            )
+        }
+        .map_err(|e| Error::new("mmap failed: {e}", e))?;
+
+        // Reserve space for the initial size
+        enlarge_file(fd.as_fd(), initial_size as u64)?;
+
+        // Initialize the header
+        let shared: NonNull<SharedStruct> = start_ptr.cast();
+        unsafe {
+            shared.write(SharedStruct {
+                max_size: max_size.into(),
+                current_size: AtomicUsize::new(initial_size),
+            })
+        };
+
+        // The user data begins after the header
+        let data_ptr = unsafe { start_ptr.cast().add(HEADER_SIZE) };
+
+        Ok(ShmemHandle {
+            fd,
+            max_size: max_size.into(),
+            shared_ptr: shared,
+            data_ptr,
+        })
+    }
+
+    // return reference to the header
+    fn shared(&self) -> &SharedStruct {
+        unsafe { self.shared_ptr.as_ref() }
+    }
+
+    /// Resize the shared memory area. 'new_size' must not be larger than the 'max_size' specified
+    /// when creating the area.
+    ///
+    /// This may only be called from one process/thread concurrently. We detect that case
+    /// and return an Error.
+    pub fn set_size(&self, new_size: usize) -> Result<(), Error> {
+        let new_size = new_size + HEADER_SIZE;
+        let shared = self.shared();
+
+        if new_size > self.max_size {
+            panic!(
+                "new size ({} is greater than max size ({})",
+                new_size, self.max_size
+            );
+        }
+        assert_eq!(self.max_size, shared.max_size);
+
+        // Lock the area by setting the bit in 'current_size'
+        //
+        // Ordering::Relaxed would probably be sufficient here, as we don't access any other memory
+        // and the posix_fallocate/ftruncate call is surely a synchronization point anyway. But
+        // since this is not performance-critical, better safe than sorry .
+        let mut old_size = shared.current_size.load(Ordering::Acquire);
+        loop {
+            if (old_size & RESIZE_IN_PROGRESS) != 0 {
+                return Err(Error::new(
+                    "concurrent resize detected",
+                    Errno::UnknownErrno,
+                ));
+            }
+            match shared.current_size.compare_exchange(
+                old_size,
+                new_size,
+                Ordering::Acquire,
+                Ordering::Relaxed,
+            ) {
+                Ok(_) => break,
+                Err(x) => old_size = x,
+            }
+        }
+
+        // Ok, we got the lock.
+        //
+        // NB: If anything goes wrong, we *must* clear the bit!
+        let result = {
+            use std::cmp::Ordering::{Equal, Greater, Less};
+            match new_size.cmp(&old_size) {
+                Less => nix_ftruncate(&self.fd, new_size as i64).map_err(|e| {
+                    Error::new("could not shrink shmem segment, ftruncate failed: {e}", e)
+                }),
+                Equal => Ok(()),
+                Greater => enlarge_file(self.fd.as_fd(), new_size as u64),
+            }
+        };
+
+        // Unlock
+        shared.current_size.store(
+            if result.is_ok() { new_size } else { old_size },
+            Ordering::Release,
+        );
+
+        result
+    }
+
+    /// Returns the current user-visible size of the shared memory segment.
+    ///
+    /// NOTE: a concurrent set_size() call can change the size at any time. It is the caller's
+    /// responsibility not to access the area beyond the current size.
+    pub fn current_size(&self) -> usize {
+        let total_current_size =
+            self.shared().current_size.load(Ordering::Relaxed) & !RESIZE_IN_PROGRESS;
+        total_current_size - HEADER_SIZE
+    }
+}
+
+impl Drop for ShmemHandle {
+    fn drop(&mut self) {
+        // SAFETY: The pointer was obtained from mmap() with the given size.
+        // We unmap the entire region.
+        let _ = unsafe { nix_munmap(self.shared_ptr.cast(), self.max_size) };
+        // The fd is dropped automatically by OwnedFd.
+    }
+}
+
+/// Create a "backing file" for the shared memory area. On Linux, use memfd_create(), to create an
+/// anonymous in-memory file. One macos, fall back to a regular file. That's good enough for
+/// development and testing, but in production we want the file to stay in memory.
+///
+/// disable 'unused_variables' warnings, because in the macos path, 'name' is unused.
+#[allow(unused_variables)]
+fn create_backing_file(name: &str) -> Result<OwnedFd, Error> {
+    #[cfg(not(target_os = "macos"))]
+    {
+        nix::sys::memfd::memfd_create(name, nix::sys::memfd::MFdFlags::empty())
+            .map_err(|e| Error::new("memfd_create failed: {e}", e))
+    }
+    #[cfg(target_os = "macos")]
+    {
+        let file = tempfile::tempfile().map_err(|e| {
+            Error::new(
+                "could not create temporary file to back shmem area: {e}",
+                nix::errno::Errno::from_raw(e.raw_os_error().unwrap_or(0)),
+            )
+        })?;
+        Ok(OwnedFd::from(file))
+    }
+}
+
+fn enlarge_file(fd: BorrowedFd, size: u64) -> Result<(), Error> {
+    // Use posix_fallocate() to enlarge the file. It reserves the space correctly, so that
+    // we don't get a segfault later when trying to actually use it.
+    #[cfg(not(target_os = "macos"))]
+    {
+        nix::fcntl::posix_fallocate(fd, 0, size as i64).map_err(|e| {
+            Error::new(
+                "could not grow shmem segment, posix_fallocate failed: {e}",
+                e,
+            )
+        })
+    }
+    // As a fallback on macos, which doesn't have posix_fallocate, use plain 'fallocate'
+    #[cfg(target_os = "macos")]
+    {
+        nix::unistd::ftruncate(fd, size as i64)
+            .map_err(|e| Error::new("could not grow shmem segment, ftruncate failed: {e}", e))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    use nix::unistd::ForkResult;
+    use std::ops::Range;
+
+    /// check that all bytes in given range have the expected value.
+    fn assert_range(ptr: *const u8, expected: u8, range: Range<usize>) {
+        for i in range {
+            let b = unsafe { *(ptr.add(i)) };
+            assert_eq!(expected, b, "unexpected byte at offset {i}");
+        }
+    }
+
+    /// Write 'b' to all bytes in the given range
+    fn write_range(ptr: *mut u8, b: u8, range: Range<usize>) {
+        unsafe { std::ptr::write_bytes(ptr.add(range.start), b, range.end - range.start) };
+    }
+
+    // simple single-process test of growing and shrinking
+    #[test]
+    fn test_shmem_resize() -> Result<(), Error> {
+        let max_size = 1024 * 1024;
+        let init_struct = ShmemHandle::new("test_shmem_resize", 0, max_size)?;
+
+        assert_eq!(init_struct.current_size(), 0);
+
+        // Initial grow
+        let size1 = 10000;
+        init_struct.set_size(size1).unwrap();
+        assert_eq!(init_struct.current_size(), size1);
+
+        // Write some data
+        let data_ptr = init_struct.data_ptr.as_ptr();
+        write_range(data_ptr, 0xAA, 0..size1);
+        assert_range(data_ptr, 0xAA, 0..size1);
+
+        // Shrink
+        let size2 = 5000;
+        init_struct.set_size(size2).unwrap();
+        assert_eq!(init_struct.current_size(), size2);
+
+        // Grow again
+        let size3 = 20000;
+        init_struct.set_size(size3).unwrap();
+        assert_eq!(init_struct.current_size(), size3);
+
+        // Try to read it. The area that was shrunk and grown again should read as all zeros now
+        assert_range(data_ptr, 0xAA, 0..5000);
+        assert_range(data_ptr, 0, 5000..size1);
+
+        // Try to grow beyond max_size
+        //let size4 = max_size + 1;
+        //assert!(init_struct.set_size(size4).is_err());
+
+        // Dropping init_struct should unmap the memory
+        drop(init_struct);
+
+        Ok(())
+    }
+
+    /// This is used in tests to coordinate between test processes. It's like std::sync::Barrier,
+    /// but is stored in the shared memory area and works across processes. It's implemented by
+    /// polling, because e.g. standard rust mutexes are not guaranteed to work across processes.
+    struct SimpleBarrier {
+        num_procs: usize,
+        count: AtomicUsize,
+    }
+
+    impl SimpleBarrier {
+        unsafe fn init(ptr: *mut SimpleBarrier, num_procs: usize) {
+            unsafe {
+                *ptr = SimpleBarrier {
+                    num_procs,
+                    count: AtomicUsize::new(0),
+                }
+            }
+        }
+
+        pub fn wait(&self) {
+            let old = self.count.fetch_add(1, Ordering::Relaxed);
+
+            let generation = old / self.num_procs;
+
+            let mut current = old + 1;
+            while current < (generation + 1) * self.num_procs {
+                std::thread::sleep(std::time::Duration::from_millis(10));
+                current = self.count.load(Ordering::Relaxed);
+            }
+        }
+    }
+
+    #[test]
+    fn test_multi_process() {
+        // Initialize
+        let max_size = 1_000_000_000_000;
+        let init_struct = ShmemHandle::new("test_multi_process", 0, max_size).unwrap();
+        let ptr = init_struct.data_ptr.as_ptr();
+
+        // Store the SimpleBarrier in the first 1k of the area.
+        init_struct.set_size(10000).unwrap();
+        let barrier_ptr: *mut SimpleBarrier = unsafe {
+            ptr.add(ptr.align_offset(std::mem::align_of::<SimpleBarrier>()))
+                .cast()
+        };
+        unsafe { SimpleBarrier::init(barrier_ptr, 2) };
+        let barrier = unsafe { barrier_ptr.as_ref().unwrap() };
+
+        // Fork another test process. The code after this runs in both processes concurrently.
+        let fork_result = unsafe { nix::unistd::fork().unwrap() };
+
+        // In the parent, fill bytes between 1000..2000. In the child, between 2000..3000
+        if fork_result.is_parent() {
+            write_range(ptr, 0xAA, 1000..2000);
+        } else {
+            write_range(ptr, 0xBB, 2000..3000);
+        }
+        barrier.wait();
+        // Verify the contents. (in both processes)
+        assert_range(ptr, 0xAA, 1000..2000);
+        assert_range(ptr, 0xBB, 2000..3000);
+
+        // Grow, from the child this time
+        let size = 10_000_000;
+        if !fork_result.is_parent() {
+            init_struct.set_size(size).unwrap();
+        }
+        barrier.wait();
+
+        // make some writes at the end
+        if fork_result.is_parent() {
+            write_range(ptr, 0xAA, (size - 10)..size);
+        } else {
+            write_range(ptr, 0xBB, (size - 20)..(size - 10));
+        }
+        barrier.wait();
+
+        // Verify the contents. (This runs in both processes)
+        assert_range(ptr, 0, (size - 1000)..(size - 20));
+        assert_range(ptr, 0xBB, (size - 20)..(size - 10));
+        assert_range(ptr, 0xAA, (size - 10)..size);
+
+        if let ForkResult::Parent { child } = fork_result {
+            nix::sys::wait::waitpid(child, None).unwrap();
+        }
+    }
+}
--- a/libs/neon-shmem/src/shmem.rs
+++ b/libs/neon-shmem/src/shmem.rs
@@ -1,409 +0,0 @@
-//! Dynamically resizable contiguous chunk of shared memory
-
-use std::num::NonZeroUsize;
-use std::os::fd::{AsFd, BorrowedFd, OwnedFd};
-use std::ptr::NonNull;
-use std::sync::atomic::{AtomicUsize, Ordering};
-
-use nix::errno::Errno;
-use nix::sys::mman::MapFlags;
-use nix::sys::mman::ProtFlags;
-use nix::sys::mman::mmap as nix_mmap;
-use nix::sys::mman::munmap as nix_munmap;
-use nix::unistd::ftruncate as nix_ftruncate;
-
-/// `ShmemHandle` represents a shared memory area that can be shared by processes over `fork()`.
-/// Unlike shared memory allocated by Postgres, this area is resizable, up to `max_size` that's
-/// specified at creation.
-///
-/// The area is backed by an anonymous file created with `memfd_create()`. The full address space for
-/// `max_size` is reserved up-front with `mmap()`, but whenever you call [`ShmemHandle::set_size`],
-/// the underlying file is resized. Do not access the area beyond the current size. Currently, that
-/// will cause the file to be expanded, but we might use `mprotect()` etc. to enforce that in the
-/// future.
-pub struct ShmemHandle {
-    /// memfd file descriptor
-    fd: OwnedFd,
-
-    max_size: usize,
-
-    // Pointer to the beginning of the shared memory area. The header is stored there.
-    shared_ptr: NonNull<SharedStruct>,
-
-    // Pointer to the beginning of the user data
-    pub data_ptr: NonNull<u8>,
-}
-
-/// This is stored at the beginning in the shared memory area.
-struct SharedStruct {
-    max_size: usize,
-
-    /// Current size of the backing file. The high-order bit is used for the [`RESIZE_IN_PROGRESS`] flag.
-    current_size: AtomicUsize,
-}
-
-const RESIZE_IN_PROGRESS: usize = 1 << 63;
-
-const HEADER_SIZE: usize = std::mem::size_of::<SharedStruct>();
-
-/// Error type returned by the [`ShmemHandle`] functions.
-#[derive(thiserror::Error, Debug)]
-#[error("{msg}: {errno}")]
-pub struct Error {
-    pub msg: String,
-    pub errno: Errno,
-}
-
-impl Error {
-    fn new(msg: &str, errno: Errno) -> Self {
-        Self {
-            msg: msg.to_string(),
-            errno,
-        }
-    }
-}
-
-impl ShmemHandle {
-    /// Create a new shared memory area. To communicate between processes, the processes need to be
-    /// `fork()`'d after calling this, so that the `ShmemHandle` is inherited by all processes.
-    ///
-    /// If the `ShmemHandle` is dropped, the memory is unmapped from the current process. Other
-    /// processes can continue using it, however.
-    pub fn new(name: &str, initial_size: usize, max_size: usize) -> Result<Self, Error> {
-        // create the backing anonymous file.
-        let fd = create_backing_file(name)?;
-
-        Self::new_with_fd(fd, initial_size, max_size)
-    }
-
-    fn new_with_fd(fd: OwnedFd, initial_size: usize, max_size: usize) -> Result<Self, Error> {
-        // We reserve the high-order bit for the `RESIZE_IN_PROGRESS` flag, and the actual size
-        // is a little larger than this because of the SharedStruct header. Make the upper limit
-        // somewhat smaller than that, because with anything close to that, you'll run out of
-        // memory anyway.
-        assert!(max_size < 1 << 48, "max size {max_size} too large");
-
-        assert!(
-            initial_size <= max_size,
-            "initial size {initial_size} larger than max size {max_size}"
-        );
-
-        // The actual initial / max size is the one given by the caller, plus the size of
-        // 'SharedStruct'.
-        let initial_size = HEADER_SIZE + initial_size;
-        let max_size = NonZeroUsize::new(HEADER_SIZE + max_size).unwrap();
-
-        // Reserve address space for it with mmap
-        //
-        // TODO: Use MAP_HUGETLB if possible
-        let start_ptr = unsafe {
-            nix_mmap(
-                None,
-                max_size,
-                ProtFlags::PROT_READ | ProtFlags::PROT_WRITE,
-                MapFlags::MAP_SHARED,
-                &fd,
-                0,
-            )
-        }
-        .map_err(|e| Error::new("mmap failed", e))?;
-
-        // Reserve space for the initial size
-        enlarge_file(fd.as_fd(), initial_size as u64)?;
-
-        // Initialize the header
-        let shared: NonNull<SharedStruct> = start_ptr.cast();
-        unsafe {
-            shared.write(SharedStruct {
-                max_size: max_size.into(),
-                current_size: AtomicUsize::new(initial_size),
-            });
-        }
-
-        // The user data begins after the header
-        let data_ptr = unsafe { start_ptr.cast().add(HEADER_SIZE) };
-
-        Ok(Self {
-            fd,
-            max_size: max_size.into(),
-            shared_ptr: shared,
-            data_ptr,
-        })
-    }
-
-    // return reference to the header
-    fn shared(&self) -> &SharedStruct {
-        unsafe { self.shared_ptr.as_ref() }
-    }
-
-    /// Resize the shared memory area. `new_size` must not be larger than the `max_size` specified
-    /// when creating the area.
-    ///
-    /// This may only be called from one process/thread concurrently. We detect that case
-    /// and return an [`shmem::Error`](Error).
-    pub fn set_size(&self, new_size: usize) -> Result<(), Error> {
-        let new_size = new_size + HEADER_SIZE;
-        let shared = self.shared();
-
-        assert!(
-            new_size <= self.max_size,
-            "new size ({new_size}) is greater than max size ({})",
-            self.max_size
-        );
-
-        assert_eq!(self.max_size, shared.max_size);
-
-        // Lock the area by setting the bit in `current_size`
-        //
-        // Ordering::Relaxed would probably be sufficient here, as we don't access any other memory
-        // and the `posix_fallocate`/`ftruncate` call is surely a synchronization point anyway. But
-        // since this is not performance-critical, better safe than sorry.
-        let mut old_size = shared.current_size.load(Ordering::Acquire);
-        loop {
-            if (old_size & RESIZE_IN_PROGRESS) != 0 {
-                return Err(Error::new(
-                    "concurrent resize detected",
-                    Errno::UnknownErrno,
-                ));
-            }
-            match shared.current_size.compare_exchange(
-                old_size,
-                new_size,
-                Ordering::Acquire,
-                Ordering::Relaxed,
-            ) {
-                Ok(_) => break,
-                Err(x) => old_size = x,
-            }
-        }
-
-        // Ok, we got the lock.
-        //
-        // NB: If anything goes wrong, we *must* clear the bit!
-        let result = {
-            use std::cmp::Ordering::{Equal, Greater, Less};
-            match new_size.cmp(&old_size) {
-                Less => nix_ftruncate(&self.fd, new_size as i64)
-                    .map_err(|e| Error::new("could not shrink shmem segment, ftruncate failed", e)),
-                Equal => Ok(()),
-                Greater => enlarge_file(self.fd.as_fd(), new_size as u64),
-            }
-        };
-
-        // Unlock
-        shared.current_size.store(
-            if result.is_ok() { new_size } else { old_size },
-            Ordering::Release,
-        );
-
-        result
-    }
-
-    /// Returns the current user-visible size of the shared memory segment.
-    ///
-    /// NOTE: a concurrent [`ShmemHandle::set_size()`] call can change the size at any time.
-    /// It is the caller's responsibility not to access the area beyond the current size.
-    pub fn current_size(&self) -> usize {
-        let total_current_size =
-            self.shared().current_size.load(Ordering::Relaxed) & !RESIZE_IN_PROGRESS;
-        total_current_size - HEADER_SIZE
-    }
-}
-
-impl Drop for ShmemHandle {
-    fn drop(&mut self) {
-        // SAFETY: The pointer was obtained from mmap() with the given size.
-        // We unmap the entire region.
-        let _ = unsafe { nix_munmap(self.shared_ptr.cast(), self.max_size) };
-        // The fd is dropped automatically by OwnedFd.
-    }
-}
-
-/// Create a "backing file" for the shared memory area. On Linux, use `memfd_create()`, to create an
-/// anonymous in-memory file. One macos, fall back to a regular file. That's good enough for
-/// development and testing, but in production we want the file to stay in memory.
-///
-/// Disable unused variables warnings because `name` is unused in the macos path.
-#[allow(unused_variables)]
-fn create_backing_file(name: &str) -> Result<OwnedFd, Error> {
-    #[cfg(not(target_os = "macos"))]
-    {
-        nix::sys::memfd::memfd_create(name, nix::sys::memfd::MFdFlags::empty())
-            .map_err(|e| Error::new("memfd_create failed", e))
-    }
-    #[cfg(target_os = "macos")]
-    {
-        let file = tempfile::tempfile().map_err(|e| {
-            Error::new(
-                "could not create temporary file to back shmem area",
-                nix::errno::Errno::from_raw(e.raw_os_error().unwrap_or(0)),
-            )
-        })?;
-        Ok(OwnedFd::from(file))
-    }
-}
-
-fn enlarge_file(fd: BorrowedFd, size: u64) -> Result<(), Error> {
-    // Use posix_fallocate() to enlarge the file. It reserves the space correctly, so that
-    // we don't get a segfault later when trying to actually use it.
-    #[cfg(not(target_os = "macos"))]
-    {
-        nix::fcntl::posix_fallocate(fd, 0, size as i64)
-            .map_err(|e| Error::new("could not grow shmem segment, posix_fallocate failed", e))
-    }
-    // As a fallback on macos, which doesn't have posix_fallocate, use plain 'fallocate'
-    #[cfg(target_os = "macos")]
-    {
-        nix::unistd::ftruncate(fd, size as i64)
-            .map_err(|e| Error::new("could not grow shmem segment, ftruncate failed", e))
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use nix::unistd::ForkResult;
-    use std::ops::Range;
-
-    /// check that all bytes in given range have the expected value.
-    fn assert_range(ptr: *const u8, expected: u8, range: Range<usize>) {
-        for i in range {
-            let b = unsafe { *(ptr.add(i)) };
-            assert_eq!(expected, b, "unexpected byte at offset {i}");
-        }
-    }
-
-    /// Write 'b' to all bytes in the given range
-    fn write_range(ptr: *mut u8, b: u8, range: Range<usize>) {
-        unsafe { std::ptr::write_bytes(ptr.add(range.start), b, range.end - range.start) };
-    }
-
-    // simple single-process test of growing and shrinking
-    #[test]
-    fn test_shmem_resize() -> Result<(), Error> {
-        let max_size = 1024 * 1024;
-        let init_struct = ShmemHandle::new("test_shmem_resize", 0, max_size)?;
-
-        assert_eq!(init_struct.current_size(), 0);
-
-        // Initial grow
-        let size1 = 10000;
-        init_struct.set_size(size1).unwrap();
-        assert_eq!(init_struct.current_size(), size1);
-
-        // Write some data
-        let data_ptr = init_struct.data_ptr.as_ptr();
-        write_range(data_ptr, 0xAA, 0..size1);
-        assert_range(data_ptr, 0xAA, 0..size1);
-
-        // Shrink
-        let size2 = 5000;
-        init_struct.set_size(size2).unwrap();
-        assert_eq!(init_struct.current_size(), size2);
-
-        // Grow again
-        let size3 = 20000;
-        init_struct.set_size(size3).unwrap();
-        assert_eq!(init_struct.current_size(), size3);
-
-        // Try to read it. The area that was shrunk and grown again should read as all zeros now
-        assert_range(data_ptr, 0xAA, 0..5000);
-        assert_range(data_ptr, 0, 5000..size1);
-
-        // Try to grow beyond max_size
-        //let size4 = max_size + 1;
-        //assert!(init_struct.set_size(size4).is_err());
-
-        // Dropping init_struct should unmap the memory
-        drop(init_struct);
-
-        Ok(())
-    }
-
-    /// This is used in tests to coordinate between test processes. It's like `std::sync::Barrier`,
-    /// but is stored in the shared memory area and works across processes. It's implemented by
-    /// polling, because e.g. standard rust mutexes are not guaranteed to work across processes.
-    struct SimpleBarrier {
-        num_procs: usize,
-        count: AtomicUsize,
-    }
-
-    impl SimpleBarrier {
-        unsafe fn init(ptr: *mut SimpleBarrier, num_procs: usize) {
-            unsafe {
-                *ptr = SimpleBarrier {
-                    num_procs,
-                    count: AtomicUsize::new(0),
-                }
-            }
-        }
-
-        pub fn wait(&self) {
-            let old = self.count.fetch_add(1, Ordering::Relaxed);
-
-            let generation = old / self.num_procs;
-
-            let mut current = old + 1;
-            while current < (generation + 1) * self.num_procs {
-                std::thread::sleep(std::time::Duration::from_millis(10));
-                current = self.count.load(Ordering::Relaxed);
-            }
-        }
-    }
-
-    #[test]
-    fn test_multi_process() {
-        // Initialize
-        let max_size = 1_000_000_000_000;
-        let init_struct = ShmemHandle::new("test_multi_process", 0, max_size).unwrap();
-        let ptr = init_struct.data_ptr.as_ptr();
-
-        // Store the SimpleBarrier in the first 1k of the area.
-        init_struct.set_size(10000).unwrap();
-        let barrier_ptr: *mut SimpleBarrier = unsafe {
-            ptr.add(ptr.align_offset(std::mem::align_of::<SimpleBarrier>()))
-                .cast()
-        };
-        unsafe { SimpleBarrier::init(barrier_ptr, 2) };
-        let barrier = unsafe { barrier_ptr.as_ref().unwrap() };
-
-        // Fork another test process. The code after this runs in both processes concurrently.
-        let fork_result = unsafe { nix::unistd::fork().unwrap() };
-
-        // In the parent, fill bytes between 1000..2000. In the child, between 2000..3000
-        if fork_result.is_parent() {
-            write_range(ptr, 0xAA, 1000..2000);
-        } else {
-            write_range(ptr, 0xBB, 2000..3000);
-        }
-        barrier.wait();
-        // Verify the contents. (in both processes)
-        assert_range(ptr, 0xAA, 1000..2000);
-        assert_range(ptr, 0xBB, 2000..3000);
-
-        // Grow, from the child this time
-        let size = 10_000_000;
-        if !fork_result.is_parent() {
-            init_struct.set_size(size).unwrap();
-        }
-        barrier.wait();
-
-        // make some writes at the end
-        if fork_result.is_parent() {
-            write_range(ptr, 0xAA, (size - 10)..size);
-        } else {
-            write_range(ptr, 0xBB, (size - 20)..(size - 10));
-        }
-        barrier.wait();
-
-        // Verify the contents. (This runs in both processes)
-        assert_range(ptr, 0, (size - 1000)..(size - 20));
-        assert_range(ptr, 0xBB, (size - 20)..(size - 10));
-        assert_range(ptr, 0xAA, (size - 10)..size);
-
-        if let ForkResult::Parent { child } = fork_result {
-            nix::sys::wait::waitpid(child, None).unwrap();
-        }
-    }
-}
--- a/libs/neon-shmem/src/sync.rs
+++ b/libs/neon-shmem/src/sync.rs
@@ -1,169 +0,0 @@
-//! Simple utilities akin to what's in [`std::sync`] but designed to work with shared memory.
-
-use std::mem::MaybeUninit;
-use std::ptr::NonNull;
-
-use nix::errno::Errno;
-
-pub type RwLock<T> = lock_api::RwLock<PthreadRwLock, T>;
-pub type Mutex<T> = lock_api::Mutex<PthreadMutex, T>;
-pub(crate) type RwLockReadGuard<'a, T> = lock_api::RwLockReadGuard<'a, PthreadRwLock, T>;
-pub type RwLockWriteGuard<'a, T> = lock_api::RwLockWriteGuard<'a, PthreadRwLock, T>;
-pub type ValueReadGuard<'a, T> = lock_api::MappedRwLockReadGuard<'a, PthreadRwLock, T>;
-pub type ValueWriteGuard<'a, T> = lock_api::MappedRwLockWriteGuard<'a, PthreadRwLock, T>;
-
-/// Wrapper around a pointer to a [`libc::pthread_rwlock_t`].
-///
-/// `PthreadRwLock(None)` is an invalid state for this type. It only exists because the
-/// [`lock_api::RawRwLock`] trait has a mandatory `INIT` const member to allow for static
-/// initialization of the lock. Unfortunately, pthread seemingly does not support any way
-/// to statically initialize a `pthread_rwlock_t` with `PTHREAD_PROCESS_SHARED` set. However,
-/// `lock_api` allows manual construction and seemingly doesn't use `INIT` itself so for
-/// now it's set to this invalid value to satisfy the trait constraints.
-pub struct PthreadRwLock(Option<NonNull<libc::pthread_rwlock_t>>);
-
-impl PthreadRwLock {
-	pub fn new(lock: NonNull<libc::pthread_rwlock_t>) -> Self {
-		unsafe {
-			let mut attrs = MaybeUninit::uninit();
-			// Ignoring return value here - only possible error is OOM.
-			libc::pthread_rwlockattr_init(attrs.as_mut_ptr());
-			libc::pthread_rwlockattr_setpshared(
-				attrs.as_mut_ptr(),
-				libc::PTHREAD_PROCESS_SHARED
-			);
-			// TODO(quantumish): worth making this function fallible?
-			libc::pthread_rwlock_init(lock.as_ptr(), attrs.as_mut_ptr());
-			// Safety: POSIX specifies that "any function affecting the attributes
-			// object (including destruction) shall not affect any previously
-			// initialized read-write locks". 
-			libc::pthread_rwlockattr_destroy(attrs.as_mut_ptr());
-			Self(Some(lock))
-		}
-	}
-	
-	fn inner(&self) -> NonNull<libc::pthread_rwlock_t> {
-		self.0.unwrap_or_else(
-			|| panic!("PthreadRwLock constructed badly - something likely used RawRwLock::INIT")
-		)
-	}
-
-	fn unlock(&self) {
-		unsafe {
-			let res = libc::pthread_rwlock_unlock(self.inner().as_ptr());
-			assert!(res == 0, "unlock failed with {}", Errno::from_raw(res));
-		}
-	}
-}
-
-unsafe impl lock_api::RawRwLock for PthreadRwLock {
-	type GuardMarker = lock_api::GuardSend;
-
-	/// *DO NOT USE THIS.* See [`PthreadRwLock`] for the full explanation.
-	const INIT: Self = Self(None);	
-	
-	fn lock_shared(&self) {
-		unsafe {
-			let res = libc::pthread_rwlock_rdlock(self.inner().as_ptr());
-			assert!(res == 0, "rdlock failed with {}", Errno::from_raw(res));
-		}
-	}
-
-	fn try_lock_shared(&self) -> bool {
-		unsafe {
-			let res = libc::pthread_rwlock_tryrdlock(self.inner().as_ptr());
-			match res {
-				0 => true,
-				libc::EAGAIN => false,
-				o => panic!("try_rdlock failed with {}", Errno::from_raw(o)),
-			}
-		}
-	}
-
-	fn lock_exclusive(&self) {
-		unsafe {
-			let res = libc::pthread_rwlock_wrlock(self.inner().as_ptr());
-			assert!(res == 0, "wrlock failed with {}", Errno::from_raw(res));
-		}
-	}
-
-	fn try_lock_exclusive(&self) -> bool {
-		unsafe {
-			let res = libc::pthread_rwlock_trywrlock(self.inner().as_ptr());
-			match res {
-				0 => true,
-				libc::EAGAIN => false,
-				o => panic!("try_wrlock failed with {}", Errno::from_raw(o)),
-			}
-		}
-	}
-
-	unsafe fn unlock_exclusive(&self) {
-		self.unlock();
-	}
-
-	unsafe fn unlock_shared(&self) {
-		self.unlock();
-	}
-}
-
-pub struct PthreadMutex(Option<NonNull<libc::pthread_mutex_t>>);
-
-impl PthreadMutex {
-	pub fn new(lock: NonNull<libc::pthread_mutex_t>) -> Self {
-		unsafe {
-			let mut attrs = MaybeUninit::uninit();
-			// Ignoring return value here - only possible error is OOM.
-			libc::pthread_mutexattr_init(attrs.as_mut_ptr());
-			libc::pthread_mutexattr_setpshared(
-				attrs.as_mut_ptr(),
-				libc::PTHREAD_PROCESS_SHARED
-			);
-			libc::pthread_mutex_init(lock.as_ptr(), attrs.as_mut_ptr());
-			// Safety: POSIX specifies that "any function affecting the attributes
-			// object (including destruction) shall not affect any previously
-			// initialized read-write locks". 
-			libc::pthread_mutexattr_destroy(attrs.as_mut_ptr());
-			Self(Some(lock))
-		}
-	}
-
-	fn inner(&self) -> NonNull<libc::pthread_mutex_t> {
-		self.0.unwrap_or_else(
-			|| panic!("PthreadMutex constructed badly - something likely used RawMutex::INIT")
-		)
-	}
-
-}
-
-unsafe impl lock_api::RawMutex for PthreadMutex {
-	type GuardMarker = lock_api::GuardSend;
-
-	/// *DO NOT USE THIS.* See [`PthreadRwLock`] for the full explanation.
-	const INIT: Self = Self(None);	
-
-	fn lock(&self) {
-		unsafe {
-			let res = libc::pthread_mutex_lock(self.inner().as_ptr());
-			assert!(res == 0, "lock failed with {}", Errno::from_raw(res));
-		}
-	}
-
-	fn try_lock(&self) -> bool {
-		unsafe {
-			let res = libc::pthread_mutex_trylock(self.inner().as_ptr());
-			match res {
-				0 => true,
-				libc::EAGAIN => false,
-				o => panic!("try_rdlock failed with {}", Errno::from_raw(o)),
-			}
-		}
-	}
-
-	unsafe fn unlock(&self) {
-		unsafe {
-			let res = libc::pthread_mutex_unlock(self.inner().as_ptr());
-			assert!(res == 0, "unlock failed with {}", Errno::from_raw(res));
-		}
-	}
-}
--- a/libs/neonart/Cargo.toml
+++ b/libs/neonart/Cargo.toml
@@ -1,14 +0,0 @@
-[package]
-name = "neonart"
-version = "0.1.0"
-edition.workspace = true
-license.workspace = true
-
-[dependencies]
-crossbeam-utils.workspace = true
-spin.workspace = true
-tracing.workspace = true
-
-[dev-dependencies]
-rand = "0.9.1"
-rand_distr = "0.5.1"
--- a/libs/neonart/src/algorithm.rs
+++ b/libs/neonart/src/algorithm.rs
@@ -1,599 +0,0 @@
-mod lock_and_version;
-pub(crate) mod node_ptr;
-mod node_ref;
-
-use std::vec::Vec;
-
-use crate::algorithm::lock_and_version::ConcurrentUpdateError;
-use crate::algorithm::node_ptr::MAX_PREFIX_LEN;
-use crate::algorithm::node_ref::{NewNodeRef, NodeRef, ReadLockedNodeRef, WriteLockedNodeRef};
-use crate::allocator::OutOfMemoryError;
-
-use crate::TreeWriteGuard;
-use crate::UpdateAction;
-use crate::allocator::ArtAllocator;
-use crate::epoch::EpochPin;
-use crate::{Key, Value};
-
-pub(crate) type RootPtr<V> = node_ptr::NodePtr<V>;
-
-#[derive(Debug)]
-pub enum ArtError {
-    ConcurrentUpdate, // need to retry
-    OutOfMemory,
-}
-
-impl From<ConcurrentUpdateError> for ArtError {
-    fn from(_: ConcurrentUpdateError) -> ArtError {
-        ArtError::ConcurrentUpdate
-    }
-}
-
-impl From<OutOfMemoryError> for ArtError {
-    fn from(_: OutOfMemoryError) -> ArtError {
-        ArtError::OutOfMemory
-    }
-}
-
-pub fn new_root<V: Value>(
-    allocator: &impl ArtAllocator<V>,
-) -> Result<RootPtr<V>, OutOfMemoryError> {
-    node_ptr::new_root(allocator)
-}
-
-pub(crate) fn search<'e, K: Key, V: Value>(
-    key: &K,
-    root: RootPtr<V>,
-    epoch_pin: &'e EpochPin,
-) -> Option<&'e V> {
-    loop {
-        let root_ref = NodeRef::from_root_ptr(root);
-        if let Ok(result) = lookup_recurse(key.as_bytes(), root_ref, None, epoch_pin) {
-            break result;
-        }
-        // retry
-    }
-}
-
-pub(crate) fn iter_next<'e, V: Value>(
-    key: &[u8],
-    root: RootPtr<V>,
-    epoch_pin: &'e EpochPin,
-) -> Option<(Vec<u8>, &'e V)> {
-    loop {
-        let mut path = Vec::new();
-        let root_ref = NodeRef::from_root_ptr(root);
-
-        match next_recurse(key, &mut path, root_ref, epoch_pin) {
-            Ok(Some(v)) => {
-                assert_eq!(path.len(), key.len());
-                break Some((path, v));
-            }
-            Ok(None) => break None,
-            Err(ConcurrentUpdateError()) => {
-                // retry
-                continue;
-            }
-        }
-    }
-}
-
-pub(crate) fn update_fn<'e, 'g, K: Key, V: Value, A: ArtAllocator<V>, F>(
-    key: &K,
-    value_fn: F,
-    root: RootPtr<V>,
-    guard: &'g mut TreeWriteGuard<'e, K, V, A>,
-) -> Result<(), OutOfMemoryError>
-where
-    F: FnOnce(Option<&V>) -> UpdateAction<V>,
-{
-    let value_fn_cell = std::cell::Cell::new(Some(value_fn));
-    loop {
-        let root_ref = NodeRef::from_root_ptr(root);
-        let this_value_fn = |arg: Option<&V>| value_fn_cell.take().unwrap()(arg);
-        let key_bytes = key.as_bytes();
-
-        match update_recurse(
-            key_bytes,
-            this_value_fn,
-            root_ref,
-            None,
-            None,
-            guard,
-            0,
-            key_bytes,
-        ) {
-            Ok(()) => break Ok(()),
-            Err(ArtError::ConcurrentUpdate) => {
-                continue; // retry
-            }
-            Err(ArtError::OutOfMemory) => break Err(OutOfMemoryError()),
-        }
-    }
-}
-
-// Error means you must retry.
-//
-// This corresponds to the 'lookupOpt' function in the paper
-#[allow(clippy::only_used_in_recursion)]
-fn lookup_recurse<'e, V: Value>(
-    key: &[u8],
-    node: NodeRef<'e, V>,
-    parent: Option<ReadLockedNodeRef<V>>,
-    epoch_pin: &'e EpochPin,
-) -> Result<Option<&'e V>, ConcurrentUpdateError> {
-    let rnode = node.read_lock_or_restart()?;
-    if let Some(parent) = parent {
-        parent.read_unlock_or_restart()?;
-    }
-
-    // check if the prefix matches, may increment level
-    let prefix_len = if let Some(prefix_len) = rnode.prefix_matches(key) {
-        prefix_len
-    } else {
-        rnode.read_unlock_or_restart()?;
-        return Ok(None);
-    };
-
-    if rnode.is_leaf() {
-        assert_eq!(key.len(), prefix_len);
-        let vptr = rnode.get_leaf_value_ptr()?;
-        // safety: It's OK to return a ref of the pointer because we checked the version
-        // and the lifetime of 'epoch_pin' enforces that the reference is only accessible
-        // as long as the epoch is pinned.
-        let v = unsafe { vptr.as_ref().unwrap() };
-        return Ok(Some(v));
-    }
-
-    let key = &key[prefix_len..];
-
-    // find child (or leaf value)
-    let next_node = rnode.find_child_or_restart(key[0])?;
-
-    match next_node {
-        None => Ok(None), // key not found
-        Some(child) => lookup_recurse(&key[1..], child, Some(rnode), epoch_pin),
-    }
-}
-
-#[allow(clippy::only_used_in_recursion)]
-fn next_recurse<'e, V: Value>(
-    min_key: &[u8],
-    path: &mut Vec<u8>,
-    node: NodeRef<'e, V>,
-    epoch_pin: &'e EpochPin,
-) -> Result<Option<&'e V>, ConcurrentUpdateError> {
-    let rnode = node.read_lock_or_restart()?;
-    let prefix = rnode.get_prefix();
-    if !prefix.is_empty() {
-        path.extend_from_slice(prefix);
-    }
-
-    use std::cmp::Ordering;
-    let comparison = path.as_slice().cmp(&min_key[0..path.len()]);
-    if comparison == Ordering::Less {
-        rnode.read_unlock_or_restart()?;
-        return Ok(None);
-    }
-
-    if rnode.is_leaf() {
-        assert_eq!(path.len(), min_key.len());
-        let vptr = rnode.get_leaf_value_ptr()?;
-        // safety: It's OK to return a ref of the pointer because we checked the version
-        // and the lifetime of 'epoch_pin' enforces that the reference is only accessible
-        // as long as the epoch is pinned.
-        let v = unsafe { vptr.as_ref().unwrap() };
-        return Ok(Some(v));
-    }
-
-    let mut min_key_byte = match comparison {
-        Ordering::Less => unreachable!(), // checked this above already
-        Ordering::Equal => min_key[path.len()],
-        Ordering::Greater => 0,
-    };
-
-    loop {
-        match rnode.find_next_child_or_restart(min_key_byte)? {
-            None => {
-                return Ok(None);
-            }
-            Some((key_byte, child_ref)) => {
-                let path_len = path.len();
-                path.push(key_byte);
-                let result = next_recurse(min_key, path, child_ref, epoch_pin)?;
-                if result.is_some() {
-                    return Ok(result);
-                }
-                if key_byte == u8::MAX {
-                    return Ok(None);
-                }
-                path.truncate(path_len);
-                min_key_byte = key_byte + 1;
-            }
-        }
-    }
-}
-
-// This corresponds to the 'insertOpt' function in the paper
-#[allow(clippy::only_used_in_recursion)]
-#[allow(clippy::too_many_arguments)]
-pub(crate) fn update_recurse<'e, K: Key, V: Value, A: ArtAllocator<V>, F>(
-    key: &[u8],
-    value_fn: F,
-    node: NodeRef<'e, V>,
-    rparent: Option<(ReadLockedNodeRef<V>, u8)>,
-    rgrandparent: Option<(ReadLockedNodeRef<V>, u8)>,
-    guard: &'_ mut TreeWriteGuard<'e, K, V, A>,
-    level: usize,
-    orig_key: &[u8],
-) -> Result<(), ArtError>
-where
-    F: FnOnce(Option<&V>) -> UpdateAction<V>,
-{
-    let rnode = node.read_lock_or_restart()?;
-
-    let prefix_match_len = rnode.prefix_matches(key);
-    if prefix_match_len.is_none() {
-        let (rparent, parent_key) = rparent.expect("direct children of the root have no prefix");
-        let mut wparent = rparent.upgrade_to_write_lock_or_restart()?;
-        let mut wnode = rnode.upgrade_to_write_lock_or_restart()?;
-
-        match value_fn(None) {
-            UpdateAction::Nothing => {}
-            UpdateAction::Insert(new_value) => {
-                insert_split_prefix(key, new_value, &mut wnode, &mut wparent, parent_key, guard)?;
-            }
-            UpdateAction::Remove => {
-                panic!("unexpected Remove action on insertion");
-            }
-        }
-        wnode.write_unlock();
-        wparent.write_unlock();
-        return Ok(());
-    }
-    let prefix_match_len = prefix_match_len.unwrap();
-    let key = &key[prefix_match_len..];
-    let level = level + prefix_match_len;
-
-    if rnode.is_leaf() {
-        assert_eq!(key.len(), 0);
-        let (rparent, parent_key) = rparent.expect("root cannot be leaf");
-        let mut wparent = rparent.upgrade_to_write_lock_or_restart()?;
-        let mut wnode = rnode.upgrade_to_write_lock_or_restart()?;
-
-        // safety: Now that we have acquired the write lock, we have exclusive access to the
-        // value. XXX: There might be concurrent reads though?
-        let value_mut = wnode.get_leaf_value_mut();
-
-        match value_fn(Some(value_mut)) {
-            UpdateAction::Nothing => {
-                wparent.write_unlock();
-                wnode.write_unlock();
-            }
-            UpdateAction::Insert(_) => panic!("cannot insert over existing value"),
-            UpdateAction::Remove => {
-                guard.remember_obsolete_node(wnode.as_ptr());
-                wparent.delete_child(parent_key);
-                wnode.write_unlock_obsolete();
-
-                if let Some(rgrandparent) = rgrandparent {
-                    // FIXME: Ignore concurrency error. It doesn't lead to
-                    // corruption, but it means we might leak something. Until
-                    // another update cleans it up.
-                    let _ = cleanup_parent(wparent, rgrandparent, guard);
-                }
-            }
-        }
-
-        return Ok(());
-    }
-
-    let next_node = rnode.find_child_or_restart(key[0])?;
-
-    if next_node.is_none() {
-        if rnode.is_full() {
-            let (rparent, parent_key) = rparent.expect("root node cannot become full");
-            let mut wparent = rparent.upgrade_to_write_lock_or_restart()?;
-            let wnode = rnode.upgrade_to_write_lock_or_restart()?;
-
-            match value_fn(None) {
-                UpdateAction::Nothing => {
-                    wnode.write_unlock();
-                    wparent.write_unlock();
-                }
-                UpdateAction::Insert(new_value) => {
-                    insert_and_grow(key, new_value, wnode, &mut wparent, parent_key, guard)?;
-                    wparent.write_unlock();
-                }
-                UpdateAction::Remove => {
-                    panic!("unexpected Remove action on insertion");
-                }
-            };
-        } else {
-            let mut wnode = rnode.upgrade_to_write_lock_or_restart()?;
-            if let Some((rparent, _)) = rparent {
-                rparent.read_unlock_or_restart()?;
-            }
-            match value_fn(None) {
-                UpdateAction::Nothing => {}
-                UpdateAction::Insert(new_value) => {
-                    insert_to_node(&mut wnode, key, new_value, guard)?;
-                }
-                UpdateAction::Remove => {
-                    panic!("unexpected Remove action on insertion");
-                }
-            };
-            wnode.write_unlock();
-        }
-        Ok(())
-    } else {
-        let next_child = next_node.unwrap(); // checked above it's not None
-        if let Some((ref rparent, _)) = rparent {
-            rparent.check_or_restart()?;
-        }
-
-        // recurse to next level
-        update_recurse(
-            &key[1..],
-            value_fn,
-            next_child,
-            Some((rnode, key[0])),
-            rparent,
-            guard,
-            level + 1,
-            orig_key,
-        )
-    }
-}
-
-#[derive(Clone)]
-enum PathElement {
-    Prefix(Vec<u8>),
-    KeyByte(u8),
-}
-
-impl std::fmt::Debug for PathElement {
-    fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
-        match self {
-            PathElement::Prefix(prefix) => write!(fmt, "{prefix:?}"),
-            PathElement::KeyByte(key_byte) => write!(fmt, "{key_byte}"),
-        }
-    }
-}
-
-pub(crate) fn dump_tree<V: Value + std::fmt::Debug>(
-    root: RootPtr<V>,
-    epoch_pin: &'_ EpochPin,
-    dst: &mut dyn std::io::Write,
-) {
-    let root_ref = NodeRef::from_root_ptr(root);
-
-    let _ = dump_recurse(&[], root_ref, epoch_pin, 0, dst);
-}
-
-// TODO: return an Err if writeln!() returns error, instead of unwrapping
-#[allow(clippy::only_used_in_recursion)]
-fn dump_recurse<'e, V: Value + std::fmt::Debug>(
-    path: &[PathElement],
-    node: NodeRef<'e, V>,
-    epoch_pin: &'e EpochPin,
-    level: usize,
-    dst: &mut dyn std::io::Write,
-) -> Result<(), ConcurrentUpdateError> {
-    let indent = str::repeat(" ", level);
-
-    let rnode = node.read_lock_or_restart()?;
-    let mut path = Vec::from(path);
-    let prefix = rnode.get_prefix();
-    if !prefix.is_empty() {
-        path.push(PathElement::Prefix(Vec::from(prefix)));
-    }
-
-    if rnode.is_leaf() {
-        let vptr = rnode.get_leaf_value_ptr()?;
-        // safety: It's OK to return a ref of the pointer because we checked the version
-        // and the lifetime of 'epoch_pin' enforces that the reference is only accessible
-        // as long as the epoch is pinned.
-        let val = unsafe { vptr.as_ref().unwrap() };
-        writeln!(dst, "{indent} {path:?}: {val:?}").unwrap();
-        return Ok(());
-    }
-
-    for key_byte in 0..=u8::MAX {
-        match rnode.find_child_or_restart(key_byte)? {
-            None => continue,
-            Some(child_ref) => {
-                let rchild = child_ref.read_lock_or_restart()?;
-                writeln!(
-                    dst,
-                    "{} {:?}, {}: prefix {:?}",
-                    indent,
-                    &path,
-                    key_byte,
-                    rchild.get_prefix()
-                )
-                .unwrap();
-
-                let mut child_path = path.clone();
-                child_path.push(PathElement::KeyByte(key_byte));
-
-                dump_recurse(&child_path, child_ref, epoch_pin, level + 1, dst)?;
-            }
-        }
-    }
-
-    Ok(())
-}
-
-///```text
-///        [fooba]r -> value
-///
-/// [foo]b -> [a]r  -> value
-///      e -> [ls]e -> value
-///```
-fn insert_split_prefix<K: Key, V: Value, A: ArtAllocator<V>>(
-    key: &[u8],
-    value: V,
-    node: &mut WriteLockedNodeRef<V>,
-    parent: &mut WriteLockedNodeRef<V>,
-    parent_key: u8,
-    guard: &'_ TreeWriteGuard<K, V, A>,
-) -> Result<(), OutOfMemoryError> {
-    let old_node = node;
-    let old_prefix = old_node.get_prefix();
-    let common_prefix_len = common_prefix(key, old_prefix);
-
-    // Allocate a node for the new value.
-    let new_value_node = allocate_node_for_value(
-        &key[common_prefix_len + 1..],
-        value,
-        guard.tree_writer.allocator,
-    )?;
-
-    // Allocate a new internal node with the common prefix
-    // FIXME: deallocate 'new_value_node' on OOM
-    let mut prefix_node =
-        node_ref::new_internal(&key[..common_prefix_len], guard.tree_writer.allocator)?;
-
-    // Add the old node and the new nodes to the new internal node
-    prefix_node.insert_old_child(old_prefix[common_prefix_len], old_node);
-    prefix_node.insert_new_child(key[common_prefix_len], new_value_node);
-
-    // Modify the prefix of the old child in place
-    old_node.truncate_prefix(old_prefix.len() - common_prefix_len - 1);
-
-    // replace the pointer in the parent
-    parent.replace_child(parent_key, prefix_node.into_ptr());
-
-    Ok(())
-}
-
-fn insert_to_node<K: Key, V: Value, A: ArtAllocator<V>>(
-    wnode: &mut WriteLockedNodeRef<V>,
-    key: &[u8],
-    value: V,
-    guard: &'_ TreeWriteGuard<K, V, A>,
-) -> Result<(), OutOfMemoryError> {
-    let value_child = allocate_node_for_value(&key[1..], value, guard.tree_writer.allocator)?;
-    wnode.insert_child(key[0], value_child.into_ptr());
-    Ok(())
-}
-
-// On entry: 'parent' and 'node' are locked
-fn insert_and_grow<'e, 'g, K: Key, V: Value, A: ArtAllocator<V>>(
-    key: &[u8],
-    value: V,
-    wnode: WriteLockedNodeRef<V>,
-    parent: &mut WriteLockedNodeRef<V>,
-    parent_key_byte: u8,
-    guard: &'g mut TreeWriteGuard<'e, K, V, A>,
-) -> Result<(), ArtError> {
-    let mut bigger_node = wnode.grow(guard.tree_writer.allocator)?;
-
-    // FIXME: deallocate 'bigger_node' on OOM
-    let value_child = allocate_node_for_value(&key[1..], value, guard.tree_writer.allocator)?;
-    bigger_node.insert_new_child(key[0], value_child);
-
-    // Replace the pointer in the parent
-    parent.replace_child(parent_key_byte, bigger_node.into_ptr());
-
-    guard.remember_obsolete_node(wnode.as_ptr());
-    wnode.write_unlock_obsolete();
-
-    Ok(())
-}
-
-fn cleanup_parent<'e, 'g, K: Key, V: Value, A: ArtAllocator<V>>(
-    wparent: WriteLockedNodeRef<V>,
-    rgrandparent: (ReadLockedNodeRef<V>, u8),
-    guard: &'g mut TreeWriteGuard<'e, K, V, A>,
-) -> Result<(), ArtError> {
-    let (rgrandparent, grandparent_key_byte) = rgrandparent;
-
-    // If the parent becomes completely empty after the deletion, remove the parent from the
-    // grandparent. (This case is possible because we reserve only 8 bytes for the prefix.)
-    // TODO: not implemented.
-
-    // If the parent has only one child, replace the parent with the remaining child. (This is not
-    // possible if the child's prefix field cannot absorb the parent's)
-    if wparent.num_children() == 1 {
-        // Try to lock the remaining child. This can fail if the child is updated
-        // concurrently.
-        let (key_byte, remaining_child) = wparent.find_remaining_child();
-
-        let mut wremaining_child = remaining_child.write_lock_or_restart()?;
-
-        if 1 + wremaining_child.get_prefix().len() + wparent.get_prefix().len() <= MAX_PREFIX_LEN {
-            let mut wgrandparent = rgrandparent.upgrade_to_write_lock_or_restart()?;
-
-            // Ok, we have locked the leaf, the parent, the grandparent, and the parent's only
-            // remaining leaf. Proceed with the updates.
-
-            // Update the prefix on the remaining leaf
-            wremaining_child.prepend_prefix(wparent.get_prefix(), key_byte);
-
-            // Replace the pointer in the grandparent to point directly to the remaining leaf
-            wgrandparent.replace_child(grandparent_key_byte, wremaining_child.as_ptr());
-
-            // Mark the parent as deleted.
-            guard.remember_obsolete_node(wparent.as_ptr());
-            wparent.write_unlock_obsolete();
-            return Ok(());
-        }
-    }
-
-    // If the parent's children would fit on a smaller node type after the deletion, replace it with
-    // a smaller node.
-    if wparent.can_shrink() {
-        let mut wgrandparent = rgrandparent.upgrade_to_write_lock_or_restart()?;
-        let smaller_node = wparent.shrink(guard.tree_writer.allocator)?;
-
-        // Replace the pointer in the grandparent
-        wgrandparent.replace_child(grandparent_key_byte, smaller_node.into_ptr());
-
-        guard.remember_obsolete_node(wparent.as_ptr());
-        wparent.write_unlock_obsolete();
-        return Ok(());
-    }
-
-    // nothing to do
-    wparent.write_unlock();
-    Ok(())
-}
-
-// Allocate a new leaf node to hold 'value'. If the key is long, we
-// may need to allocate new internal nodes to hold it too
-fn allocate_node_for_value<'a, V: Value, A: ArtAllocator<V>>(
-    key: &[u8],
-    value: V,
-    allocator: &'a A,
-) -> Result<NewNodeRef<'a, V, A>, OutOfMemoryError> {
-    let mut prefix_off = key.len().saturating_sub(MAX_PREFIX_LEN);
-
-    let leaf_node = node_ref::new_leaf(&key[prefix_off..key.len()], value, allocator)?;
-
-    let mut node = leaf_node;
-    while prefix_off > 0 {
-        // Need another internal node
-        let remain_prefix = &key[0..prefix_off];
-
-        prefix_off = remain_prefix.len().saturating_sub(MAX_PREFIX_LEN + 1);
-        let mut internal_node = node_ref::new_internal(
-            &remain_prefix[prefix_off..remain_prefix.len() - 1],
-            allocator,
-        )?;
-        internal_node.insert_new_child(*remain_prefix.last().unwrap(), node);
-        node = internal_node;
-    }
-
-    Ok(node)
-}
-
-fn common_prefix(a: &[u8], b: &[u8]) -> usize {
-    for i in 0..MAX_PREFIX_LEN {
-        if a[i] != b[i] {
-            return i;
-        }
-    }
-    panic!("prefixes are equal");
-}
--- a/libs/neonart/src/algorithm/lock_and_version.rs
+++ b/libs/neonart/src/algorithm/lock_and_version.rs
@@ -1,117 +0,0 @@
-//! Each node in the tree has contains one atomic word that stores three things:
-//!
-//! Bit 0: set if the node is "obsolete". An obsolete node has been removed from the tree,
-//!        but might still be accessed by concurrent readers until the epoch expires.
-//! Bit 1: set if the node is currently write-locked. Used as a spinlock.
-//! Bits 2-63: Version number, incremented every time the node is modified.
-//!
-//! AtomicLockAndVersion represents that.
-
-use std::sync::atomic::{AtomicU64, Ordering};
-
-pub(crate) struct ConcurrentUpdateError();
-
-pub(crate) struct AtomicLockAndVersion {
-    inner: AtomicU64,
-}
-
-impl AtomicLockAndVersion {
-    pub(crate) fn new() -> AtomicLockAndVersion {
-        AtomicLockAndVersion {
-            inner: AtomicU64::new(0),
-        }
-    }
-}
-
-impl AtomicLockAndVersion {
-    pub(crate) fn read_lock_or_restart(&self) -> Result<u64, ConcurrentUpdateError> {
-        let version = self.await_node_unlocked();
-        if is_obsolete(version) {
-            return Err(ConcurrentUpdateError());
-        }
-        Ok(version)
-    }
-
-    pub(crate) fn check_or_restart(&self, version: u64) -> Result<(), ConcurrentUpdateError> {
-        self.read_unlock_or_restart(version)
-    }
-
-    pub(crate) fn read_unlock_or_restart(&self, version: u64) -> Result<(), ConcurrentUpdateError> {
-        if self.inner.load(Ordering::Acquire) != version {
-            return Err(ConcurrentUpdateError());
-        }
-        Ok(())
-    }
-
-    pub(crate) fn upgrade_to_write_lock_or_restart(
-        &self,
-        version: u64,
-    ) -> Result<(), ConcurrentUpdateError> {
-        if self
-            .inner
-            .compare_exchange(
-                version,
-                set_locked_bit(version),
-                Ordering::Acquire,
-                Ordering::Relaxed,
-            )
-            .is_err()
-        {
-            return Err(ConcurrentUpdateError());
-        }
-        Ok(())
-    }
-
-    pub(crate) fn write_lock_or_restart(&self) -> Result<(), ConcurrentUpdateError> {
-        let old = self.inner.load(Ordering::Relaxed);
-        if is_obsolete(old) || is_locked(old) {
-            return Err(ConcurrentUpdateError());
-        }
-        if self
-            .inner
-            .compare_exchange(
-                old,
-                set_locked_bit(old),
-                Ordering::Acquire,
-                Ordering::Relaxed,
-            )
-            .is_err()
-        {
-            return Err(ConcurrentUpdateError());
-        }
-        Ok(())
-    }
-
-    pub(crate) fn write_unlock(&self) {
-        // reset locked bit and overflow into version
-        self.inner.fetch_add(2, Ordering::Release);
-    }
-
-    pub(crate) fn write_unlock_obsolete(&self) {
-        // set obsolete, reset locked, overflow into version
-        self.inner.fetch_add(3, Ordering::Release);
-    }
-
-    // Helper functions
-    fn await_node_unlocked(&self) -> u64 {
-        let mut version = self.inner.load(Ordering::Acquire);
-        while is_locked(version) {
-            // spinlock
-            std::thread::yield_now();
-            version = self.inner.load(Ordering::Acquire)
-        }
-        version
-    }
-}
-
-fn set_locked_bit(version: u64) -> u64 {
-    version + 2
-}
-
-fn is_obsolete(version: u64) -> bool {
-    (version & 1) == 1
-}
-
-fn is_locked(version: u64) -> bool {
-    (version & 2) == 2
-}
--- a/libs/neonart/src/algorithm/node_ptr.rs
+++ b/libs/neonart/src/algorithm/node_ptr.rs
--- a/libs/neonart/src/algorithm/node_ref.rs
+++ b/libs/neonart/src/algorithm/node_ref.rs
@@ -1,349 +0,0 @@
-use std::fmt::Debug;
-use std::marker::PhantomData;
-
-use super::node_ptr;
-use super::node_ptr::NodePtr;
-use crate::EpochPin;
-use crate::Value;
-use crate::algorithm::lock_and_version::AtomicLockAndVersion;
-use crate::algorithm::lock_and_version::ConcurrentUpdateError;
-use crate::allocator::ArtAllocator;
-use crate::allocator::OutOfMemoryError;
-
-pub struct NodeRef<'e, V> {
-    ptr: NodePtr<V>,
-
-    phantom: PhantomData<&'e EpochPin<'e>>,
-}
-
-impl<'e, V> Debug for NodeRef<'e, V> {
-    fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
-        write!(fmt, "{:?}", self.ptr)
-    }
-}
-
-impl<'e, V: Value> NodeRef<'e, V> {
-    pub(crate) fn from_root_ptr(root_ptr: NodePtr<V>) -> NodeRef<'e, V> {
-        NodeRef {
-            ptr: root_ptr,
-            phantom: PhantomData,
-        }
-    }
-
-    pub(crate) fn read_lock_or_restart(
-        &self,
-    ) -> Result<ReadLockedNodeRef<'e, V>, ConcurrentUpdateError> {
-        let version = self.lockword().read_lock_or_restart()?;
-        Ok(ReadLockedNodeRef {
-            ptr: self.ptr,
-            version,
-            phantom: self.phantom,
-        })
-    }
-
-    pub(crate) fn write_lock_or_restart(
-        &self,
-    ) -> Result<WriteLockedNodeRef<'e, V>, ConcurrentUpdateError> {
-        self.lockword().write_lock_or_restart()?;
-        Ok(WriteLockedNodeRef {
-            ptr: self.ptr,
-            phantom: self.phantom,
-        })
-    }
-
-    fn lockword(&self) -> &AtomicLockAndVersion {
-        self.ptr.lockword()
-    }
-}
-
-/// A reference to a node that has been optimistically read-locked. The functions re-check
-/// the version after each read.
-pub struct ReadLockedNodeRef<'e, V> {
-    ptr: NodePtr<V>,
-    version: u64,
-
-    phantom: PhantomData<&'e EpochPin<'e>>,
-}
-
-impl<'e, V: Value> ReadLockedNodeRef<'e, V> {
-    pub(crate) fn is_leaf(&self) -> bool {
-        self.ptr.is_leaf()
-    }
-
-    pub(crate) fn is_full(&self) -> bool {
-        self.ptr.is_full()
-    }
-
-    pub(crate) fn get_prefix(&self) -> &[u8] {
-        self.ptr.get_prefix()
-    }
-
-    /// Note: because we're only holding a read lock, the prefix can change concurrently.
-    /// You must be prepared to restart, if read_unlock() returns error later.
-    ///
-    /// Returns the length of the prefix, or None if it's not a match
-    pub(crate) fn prefix_matches(&self, key: &[u8]) -> Option<usize> {
-        self.ptr.prefix_matches(key)
-    }
-
-    pub(crate) fn find_child_or_restart(
-        &self,
-        key_byte: u8,
-    ) -> Result<Option<NodeRef<'e, V>>, ConcurrentUpdateError> {
-        let child_or_value = self.ptr.find_child(key_byte);
-        self.ptr.lockword().check_or_restart(self.version)?;
-
-        match child_or_value {
-            None => Ok(None),
-            Some(child_ptr) => Ok(Some(NodeRef {
-                ptr: child_ptr,
-                phantom: self.phantom,
-            })),
-        }
-    }
-
-    pub(crate) fn find_next_child_or_restart(
-        &self,
-        min_key_byte: u8,
-    ) -> Result<Option<(u8, NodeRef<'e, V>)>, ConcurrentUpdateError> {
-        let child_or_value = self.ptr.find_next_child(min_key_byte);
-        self.ptr.lockword().check_or_restart(self.version)?;
-
-        match child_or_value {
-            None => Ok(None),
-            Some((k, child_ptr)) => Ok(Some((
-                k,
-                NodeRef {
-                    ptr: child_ptr,
-                    phantom: self.phantom,
-                },
-            ))),
-        }
-    }
-
-    pub(crate) fn get_leaf_value_ptr(&self) -> Result<*const V, ConcurrentUpdateError> {
-        let result = self.ptr.get_leaf_value();
-        self.ptr.lockword().check_or_restart(self.version)?;
-
-        // Extend the lifetime.
-        let result = std::ptr::from_ref(result);
-
-        Ok(result)
-    }
-
-    pub(crate) fn upgrade_to_write_lock_or_restart(
-        self,
-    ) -> Result<WriteLockedNodeRef<'e, V>, ConcurrentUpdateError> {
-        self.ptr
-            .lockword()
-            .upgrade_to_write_lock_or_restart(self.version)?;
-
-        Ok(WriteLockedNodeRef {
-            ptr: self.ptr,
-            phantom: self.phantom,
-        })
-    }
-
-    pub(crate) fn read_unlock_or_restart(self) -> Result<(), ConcurrentUpdateError> {
-        self.ptr.lockword().check_or_restart(self.version)?;
-        Ok(())
-    }
-
-    pub(crate) fn check_or_restart(&self) -> Result<(), ConcurrentUpdateError> {
-        self.ptr.lockword().check_or_restart(self.version)?;
-        Ok(())
-    }
-}
-
-/// A reference to a node that has been optimistically read-locked. The functions re-check
-/// the version after each read.
-pub struct WriteLockedNodeRef<'e, V> {
-    ptr: NodePtr<V>,
-    phantom: PhantomData<&'e EpochPin<'e>>,
-}
-
-impl<'e, V: Value> WriteLockedNodeRef<'e, V> {
-    pub(crate) fn can_shrink(&self) -> bool {
-        self.ptr.can_shrink()
-    }
-
-    pub(crate) fn num_children(&self) -> usize {
-        self.ptr.num_children()
-    }
-
-    pub(crate) fn write_unlock(mut self) {
-        self.ptr.lockword().write_unlock();
-        self.ptr = NodePtr::null();
-    }
-
-    pub(crate) fn write_unlock_obsolete(mut self) {
-        self.ptr.lockword().write_unlock_obsolete();
-        self.ptr = NodePtr::null();
-    }
-
-    pub(crate) fn get_prefix(&self) -> &[u8] {
-        self.ptr.get_prefix()
-    }
-
-    pub(crate) fn truncate_prefix(&mut self, new_prefix_len: usize) {
-        self.ptr.truncate_prefix(new_prefix_len)
-    }
-
-    pub(crate) fn prepend_prefix(&mut self, prefix: &[u8], prefix_byte: u8) {
-        self.ptr.prepend_prefix(prefix, prefix_byte)
-    }
-
-    pub(crate) fn insert_child(&mut self, key_byte: u8, child: NodePtr<V>) {
-        self.ptr.insert_child(key_byte, child)
-    }
-
-    pub(crate) fn get_leaf_value_mut(&mut self) -> &mut V {
-        self.ptr.get_leaf_value_mut()
-    }
-
-    pub(crate) fn grow<'a, A>(
-        &self,
-        allocator: &'a A,
-    ) -> Result<NewNodeRef<'a, V, A>, OutOfMemoryError>
-    where
-        A: ArtAllocator<V>,
-    {
-        let new_node = self.ptr.grow(allocator)?;
-        Ok(NewNodeRef {
-            ptr: new_node,
-            allocator,
-            extra_nodes: Vec::new(),
-        })
-    }
-
-    pub(crate) fn shrink<'a, A>(
-        &self,
-        allocator: &'a A,
-    ) -> Result<NewNodeRef<'a, V, A>, OutOfMemoryError>
-    where
-        A: ArtAllocator<V>,
-    {
-        let new_node = self.ptr.shrink(allocator)?;
-        Ok(NewNodeRef {
-            ptr: new_node,
-            allocator,
-            extra_nodes: Vec::new(),
-        })
-    }
-
-    pub(crate) fn as_ptr(&self) -> NodePtr<V> {
-        self.ptr
-    }
-
-    pub(crate) fn replace_child(&mut self, key_byte: u8, replacement: NodePtr<V>) {
-        self.ptr.replace_child(key_byte, replacement);
-    }
-
-    pub(crate) fn delete_child(&mut self, key_byte: u8) {
-        self.ptr.delete_child(key_byte);
-    }
-
-    pub(crate) fn find_remaining_child(&self) -> (u8, NodeRef<'e, V>) {
-        assert_eq!(self.num_children(), 1);
-        let child_or_value = self.ptr.find_next_child(0);
-
-        match child_or_value {
-            None => panic!("could not find only child in node"),
-            Some((k, child_ptr)) => (
-                k,
-                NodeRef {
-                    ptr: child_ptr,
-                    phantom: self.phantom,
-                },
-            ),
-        }
-    }
-}
-
-impl<'e, V> Drop for WriteLockedNodeRef<'e, V> {
-    fn drop(&mut self) {
-        if !self.ptr.is_null() {
-            self.ptr.lockword().write_unlock();
-        }
-    }
-}
-
-pub(crate) struct NewNodeRef<'a, V, A>
-where
-    V: Value,
-    A: ArtAllocator<V>,
-{
-    ptr: NodePtr<V>,
-    allocator: &'a A,
-
-    extra_nodes: Vec<NodePtr<V>>,
-}
-
-impl<'a, V, A> NewNodeRef<'a, V, A>
-where
-    V: Value,
-    A: ArtAllocator<V>,
-{
-    pub(crate) fn insert_old_child(&mut self, key_byte: u8, child: &WriteLockedNodeRef<V>) {
-        self.ptr.insert_child(key_byte, child.as_ptr())
-    }
-
-    pub(crate) fn into_ptr(mut self) -> NodePtr<V> {
-        let ptr = self.ptr;
-        self.ptr = NodePtr::null();
-        ptr
-    }
-
-    pub(crate) fn insert_new_child(&mut self, key_byte: u8, child: NewNodeRef<'a, V, A>) {
-        let child_ptr = child.into_ptr();
-        self.ptr.insert_child(key_byte, child_ptr);
-        self.extra_nodes.push(child_ptr);
-    }
-}
-
-impl<'a, V, A> Drop for NewNodeRef<'a, V, A>
-where
-    V: Value,
-    A: ArtAllocator<V>,
-{
-    /// This drop implementation deallocates the newly allocated node, if into_ptr() was not called.
-    fn drop(&mut self) {
-        if !self.ptr.is_null() {
-            self.ptr.deallocate(self.allocator);
-            for p in self.extra_nodes.iter() {
-                p.deallocate(self.allocator);
-            }
-        }
-    }
-}
-
-pub(crate) fn new_internal<'a, V, A>(
-    prefix: &[u8],
-    allocator: &'a A,
-) -> Result<NewNodeRef<'a, V, A>, OutOfMemoryError>
-where
-    V: Value,
-    A: ArtAllocator<V>,
-{
-    Ok(NewNodeRef {
-        ptr: node_ptr::new_internal(prefix, allocator)?,
-        allocator,
-        extra_nodes: Vec::new(),
-    })
-}
-
-pub(crate) fn new_leaf<'a, V, A>(
-    prefix: &[u8],
-    value: V,
-    allocator: &'a A,
-) -> Result<NewNodeRef<'a, V, A>, OutOfMemoryError>
-where
-    V: Value,
-    A: ArtAllocator<V>,
-{
-    Ok(NewNodeRef {
-        ptr: node_ptr::new_leaf(prefix, value, allocator)?,
-        allocator,
-        extra_nodes: Vec::new(),
-    })
-}
--- a/libs/neonart/src/allocator.rs
+++ b/libs/neonart/src/allocator.rs
@@ -1,156 +0,0 @@
-pub mod block;
-mod multislab;
-mod slab;
-pub mod r#static;
-
-use std::alloc::Layout;
-use std::marker::PhantomData;
-use std::mem::MaybeUninit;
-use std::sync::atomic::Ordering;
-
-use crate::allocator::multislab::MultiSlabAllocator;
-use crate::allocator::r#static::alloc_from_slice;
-
-use spin;
-
-use crate::Tree;
-pub use crate::algorithm::node_ptr::{
-    NodeInternal4, NodeInternal16, NodeInternal48, NodeInternal256, NodeLeaf,
-};
-
-#[derive(Debug)]
-pub struct OutOfMemoryError();
-
-pub trait ArtAllocator<V: crate::Value> {
-    fn alloc_tree(&self) -> *mut Tree<V>;
-
-    fn alloc_node_internal4(&self) -> *mut NodeInternal4<V>;
-    fn alloc_node_internal16(&self) -> *mut NodeInternal16<V>;
-    fn alloc_node_internal48(&self) -> *mut NodeInternal48<V>;
-    fn alloc_node_internal256(&self) -> *mut NodeInternal256<V>;
-    fn alloc_node_leaf(&self) -> *mut NodeLeaf<V>;
-
-    fn dealloc_node_internal4(&self, ptr: *mut NodeInternal4<V>);
-    fn dealloc_node_internal16(&self, ptr: *mut NodeInternal16<V>);
-    fn dealloc_node_internal48(&self, ptr: *mut NodeInternal48<V>);
-    fn dealloc_node_internal256(&self, ptr: *mut NodeInternal256<V>);
-    fn dealloc_node_leaf(&self, ptr: *mut NodeLeaf<V>);
-}
-
-pub struct ArtMultiSlabAllocator<'t, V>
-where
-    V: crate::Value,
-{
-    tree_area: spin::Mutex<Option<&'t mut MaybeUninit<Tree<V>>>>,
-
-    pub(crate) inner: MultiSlabAllocator<'t, 5>,
-
-    phantom_val: PhantomData<V>,
-}
-
-impl<'t, V: crate::Value> ArtMultiSlabAllocator<'t, V> {
-    const LAYOUTS: [Layout; 5] = [
-        Layout::new::<NodeInternal4<V>>(),
-        Layout::new::<NodeInternal16<V>>(),
-        Layout::new::<NodeInternal48<V>>(),
-        Layout::new::<NodeInternal256<V>>(),
-        Layout::new::<NodeLeaf<V>>(),
-    ];
-
-    pub fn new(area: &'t mut [MaybeUninit<u8>]) -> &'t mut ArtMultiSlabAllocator<'t, V> {
-        let (allocator_area, remain) = alloc_from_slice::<ArtMultiSlabAllocator<V>>(area);
-        let (tree_area, remain) = alloc_from_slice::<Tree<V>>(remain);
-
-        allocator_area.write(ArtMultiSlabAllocator {
-            tree_area: spin::Mutex::new(Some(tree_area)),
-            inner: MultiSlabAllocator::new(remain, &Self::LAYOUTS),
-            phantom_val: PhantomData,
-        })
-    }
-}
-
-impl<'t, V: crate::Value> ArtAllocator<V> for ArtMultiSlabAllocator<'t, V> {
-    fn alloc_tree(&self) -> *mut Tree<V> {
-        let mut t = self.tree_area.lock();
-        if let Some(tree_area) = t.take() {
-            return tree_area.as_mut_ptr().cast();
-        }
-        panic!("cannot allocate more than one tree");
-    }
-
-    fn alloc_node_internal4(&self) -> *mut NodeInternal4<V> {
-        self.inner.alloc_slab(0).cast()
-    }
-    fn alloc_node_internal16(&self) -> *mut NodeInternal16<V> {
-        self.inner.alloc_slab(1).cast()
-    }
-    fn alloc_node_internal48(&self) -> *mut NodeInternal48<V> {
-        self.inner.alloc_slab(2).cast()
-    }
-    fn alloc_node_internal256(&self) -> *mut NodeInternal256<V> {
-        self.inner.alloc_slab(3).cast()
-    }
-    fn alloc_node_leaf(&self) -> *mut NodeLeaf<V> {
-        self.inner.alloc_slab(4).cast()
-    }
-
-    fn dealloc_node_internal4(&self, ptr: *mut NodeInternal4<V>) {
-        self.inner.dealloc_slab(0, ptr.cast())
-    }
-
-    fn dealloc_node_internal16(&self, ptr: *mut NodeInternal16<V>) {
-        self.inner.dealloc_slab(1, ptr.cast())
-    }
-    fn dealloc_node_internal48(&self, ptr: *mut NodeInternal48<V>) {
-        self.inner.dealloc_slab(2, ptr.cast())
-    }
-    fn dealloc_node_internal256(&self, ptr: *mut NodeInternal256<V>) {
-        self.inner.dealloc_slab(3, ptr.cast())
-    }
-    fn dealloc_node_leaf(&self, ptr: *mut NodeLeaf<V>) {
-        self.inner.dealloc_slab(4, ptr.cast())
-    }
-}
-
-impl<'t, V: crate::Value> ArtMultiSlabAllocator<'t, V> {
-    pub(crate) fn get_statistics(&self) -> ArtMultiSlabStats {
-        ArtMultiSlabStats {
-            num_internal4: self.inner.slab_descs[0]
-                .num_allocated
-                .load(Ordering::Relaxed),
-            num_internal16: self.inner.slab_descs[1]
-                .num_allocated
-                .load(Ordering::Relaxed),
-            num_internal48: self.inner.slab_descs[2]
-                .num_allocated
-                .load(Ordering::Relaxed),
-            num_internal256: self.inner.slab_descs[3]
-                .num_allocated
-                .load(Ordering::Relaxed),
-            num_leaf: self.inner.slab_descs[4]
-                .num_allocated
-                .load(Ordering::Relaxed),
-
-            num_blocks_internal4: self.inner.slab_descs[0].num_blocks.load(Ordering::Relaxed),
-            num_blocks_internal16: self.inner.slab_descs[1].num_blocks.load(Ordering::Relaxed),
-            num_blocks_internal48: self.inner.slab_descs[2].num_blocks.load(Ordering::Relaxed),
-            num_blocks_internal256: self.inner.slab_descs[3].num_blocks.load(Ordering::Relaxed),
-            num_blocks_leaf: self.inner.slab_descs[4].num_blocks.load(Ordering::Relaxed),
-        }
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct ArtMultiSlabStats {
-    pub num_internal4: u64,
-    pub num_internal16: u64,
-    pub num_internal48: u64,
-    pub num_internal256: u64,
-    pub num_leaf: u64,
-
-    pub num_blocks_internal4: u64,
-    pub num_blocks_internal16: u64,
-    pub num_blocks_internal48: u64,
-    pub num_blocks_internal256: u64,
-    pub num_blocks_leaf: u64,
-}
--- a/libs/neonart/src/allocator/block.rs
+++ b/libs/neonart/src/allocator/block.rs
@@ -1,191 +0,0 @@
-//! Simple allocator of fixed-size blocks
-
-use std::mem::MaybeUninit;
-use std::sync::atomic::{AtomicU64, Ordering};
-
-use spin;
-
-pub const BLOCK_SIZE: usize = 16 * 1024;
-
-const INVALID_BLOCK: u64 = u64::MAX;
-
-pub(crate) struct BlockAllocator<'t> {
-    blocks_ptr: &'t [MaybeUninit<u8>],
-    num_blocks: u64,
-    num_initialized: AtomicU64,
-
-    freelist_head: spin::Mutex<u64>,
-}
-
-struct FreeListBlock {
-    inner: spin::Mutex<FreeListBlockInner>,
-}
-
-struct FreeListBlockInner {
-    next: u64,
-
-    num_free_blocks: u64,
-    free_blocks: [u64; 100], // FIXME: fill the rest of the block
-}
-
-impl<'t> BlockAllocator<'t> {
-    pub(crate) fn new(area: &'t mut [MaybeUninit<u8>]) -> Self {
-        // Use all the space for the blocks
-        let padding = area.as_ptr().align_offset(BLOCK_SIZE);
-        let remain = &mut area[padding..];
-
-        let num_blocks = (remain.len() / BLOCK_SIZE) as u64;
-
-        BlockAllocator {
-            blocks_ptr: remain,
-            num_blocks,
-            num_initialized: AtomicU64::new(0),
-            freelist_head: spin::Mutex::new(INVALID_BLOCK),
-        }
-    }
-
-    /// safety: you must hold a lock on the pointer to this block, otherwise it might get
-    /// reused for another kind of block
-    fn read_freelist_block(&self, blkno: u64) -> &FreeListBlock {
-        let ptr: *const FreeListBlock = self.get_block_ptr(blkno).cast();
-        unsafe { ptr.as_ref().unwrap() }
-    }
-
-    fn get_block_ptr(&self, blkno: u64) -> *mut u8 {
-        assert!(blkno < self.num_blocks);
-        unsafe {
-            self.blocks_ptr
-                .as_ptr()
-                .byte_offset(blkno as isize * BLOCK_SIZE as isize)
-        }
-        .cast_mut()
-        .cast()
-    }
-
-    #[allow(clippy::mut_from_ref)]
-    pub(crate) fn alloc_block(&self) -> &mut [MaybeUninit<u8>] {
-        // FIXME: handle OOM
-        let blkno = self.alloc_block_internal();
-        if blkno == INVALID_BLOCK {
-            panic!("out of memory");
-        }
-
-        let ptr: *mut MaybeUninit<u8> = self.get_block_ptr(blkno).cast();
-        unsafe { std::slice::from_raw_parts_mut(ptr, BLOCK_SIZE) }
-    }
-
-    fn alloc_block_internal(&self) -> u64 {
-        //  check the free list.
-        {
-            let mut freelist_head = self.freelist_head.lock();
-            if *freelist_head != INVALID_BLOCK {
-                let freelist_block = self.read_freelist_block(*freelist_head);
-
-                // acquire lock on the freelist block before releasing the lock on the parent (i.e. lock coupling)
-                let mut g = freelist_block.inner.lock();
-
-                if g.num_free_blocks > 0 {
-                    g.num_free_blocks -= 1;
-                    let result = g.free_blocks[g.num_free_blocks as usize];
-                    return result;
-                } else {
-                    // consume the freelist block itself
-                    let result = *freelist_head;
-                    *freelist_head = g.next;
-                    // This freelist block is now unlinked and can be repurposed
-                    drop(g);
-                    return result;
-                }
-            }
-        }
-
-        // If there are some blocks left that we've never used, pick next such block
-        let mut next_uninitialized = self.num_initialized.load(Ordering::Relaxed);
-        while next_uninitialized < self.num_blocks {
-            match self.num_initialized.compare_exchange(
-                next_uninitialized,
-                next_uninitialized + 1,
-                Ordering::Relaxed,
-                Ordering::Relaxed,
-            ) {
-                Ok(_) => {
-                    return next_uninitialized;
-                }
-                Err(old) => {
-                    next_uninitialized = old;
-                    continue;
-                }
-            }
-        }
-
-        // out of blocks
-        INVALID_BLOCK
-    }
-
-    // TODO: this is currently unused. The slab allocator never releases blocks
-    #[allow(dead_code)]
-    pub(crate) fn release_block(&self, block_ptr: *mut u8) {
-        let blockno = unsafe { block_ptr.byte_offset_from(self.blocks_ptr) / BLOCK_SIZE as isize };
-        self.release_block_internal(blockno as u64);
-    }
-
-    fn release_block_internal(&self, blockno: u64) {
-        let mut freelist_head = self.freelist_head.lock();
-        if *freelist_head != INVALID_BLOCK {
-            let freelist_block = self.read_freelist_block(*freelist_head);
-
-            // acquire lock on the freelist block before releasing the lock on the parent (i.e. lock coupling)
-            let mut g = freelist_block.inner.lock();
-
-            let num_free_blocks = g.num_free_blocks;
-            if num_free_blocks < g.free_blocks.len() as u64 {
-                g.free_blocks[num_free_blocks as usize] = blockno;
-                g.num_free_blocks += 1;
-                return;
-            }
-        }
-
-        // Convert the block into a new freelist block
-        let block_ptr: *mut FreeListBlock = self.get_block_ptr(blockno).cast();
-        let init = FreeListBlock {
-            inner: spin::Mutex::new(FreeListBlockInner {
-                next: *freelist_head,
-                num_free_blocks: 0,
-                free_blocks: [INVALID_BLOCK; 100],
-            }),
-        };
-        unsafe { (*block_ptr) = init };
-        *freelist_head = blockno;
-    }
-
-    // for debugging
-    pub(crate) fn get_statistics(&self) -> BlockAllocatorStats {
-        let mut num_free_blocks = 0;
-
-        let mut _prev_lock = None;
-        let head_lock = self.freelist_head.lock();
-        let mut next_blk = *head_lock;
-        let mut _head_lock = Some(head_lock);
-        while next_blk != INVALID_BLOCK {
-            let freelist_block = self.read_freelist_block(next_blk);
-            let lock = freelist_block.inner.lock();
-            num_free_blocks += lock.num_free_blocks;
-            next_blk = lock.next;
-            _prev_lock = Some(lock); // hold the lock until we've read the next block
-            _head_lock = None;
-        }
-
-        BlockAllocatorStats {
-            num_blocks: self.num_blocks,
-            num_initialized: self.num_initialized.load(Ordering::Relaxed),
-            num_free_blocks,
-        }
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct BlockAllocatorStats {
-    pub num_blocks: u64,
-    pub num_initialized: u64,
-    pub num_free_blocks: u64,
-}
--- a/libs/neonart/src/allocator/multislab.rs
+++ b/libs/neonart/src/allocator/multislab.rs
@@ -1,33 +0,0 @@
-use std::alloc::Layout;
-use std::mem::MaybeUninit;
-
-use crate::allocator::block::BlockAllocator;
-use crate::allocator::slab::SlabDesc;
-
-pub struct MultiSlabAllocator<'t, const N: usize> {
-    pub(crate) block_allocator: BlockAllocator<'t>,
-
-    pub(crate) slab_descs: [SlabDesc; N],
-}
-
-impl<'t, const N: usize> MultiSlabAllocator<'t, N> {
-    pub(crate) fn new(
-        area: &'t mut [MaybeUninit<u8>],
-        layouts: &[Layout; N],
-    ) -> MultiSlabAllocator<'t, N> {
-        let block_allocator = BlockAllocator::new(area);
-        MultiSlabAllocator {
-            block_allocator,
-
-            slab_descs: std::array::from_fn(|i| SlabDesc::new(&layouts[i])),
-        }
-    }
-
-    pub(crate) fn alloc_slab(&self, slab_idx: usize) -> *mut u8 {
-        self.slab_descs[slab_idx].alloc_chunk(&self.block_allocator)
-    }
-
-    pub(crate) fn dealloc_slab(&self, slab_idx: usize, ptr: *mut u8) {
-        self.slab_descs[slab_idx].dealloc_chunk(ptr, &self.block_allocator)
-    }
-}
--- a/libs/neonart/src/allocator/slab.rs
+++ b/libs/neonart/src/allocator/slab.rs
@@ -1,433 +0,0 @@
-//! A slab allocator that carves out fixed-size chunks from larger blocks.
-//!
-//!
-
-use std::alloc::Layout;
-use std::mem::MaybeUninit;
-use std::ops::Deref;
-use std::sync::atomic::{AtomicU32, AtomicU64, Ordering};
-
-use spin;
-
-use super::alloc_from_slice;
-use super::block::BlockAllocator;
-
-use crate::allocator::block::BLOCK_SIZE;
-
-pub(crate) struct SlabDesc {
-    pub(crate) layout: Layout,
-
-    block_lists: spin::RwLock<BlockLists>,
-
-    pub(crate) num_blocks: AtomicU64,
-    pub(crate) num_allocated: AtomicU64,
-}
-
-// FIXME: Not sure if SlabDesc is really Sync or Send. It probably is when it's empty, but
-// 'block_lists' contains pointers when it's not empty. In the current use as part of the
-// the art tree, SlabDescs are only moved during initialization.
-unsafe impl Sync for SlabDesc {}
-unsafe impl Send for SlabDesc {}
-
-#[derive(Default, Debug)]
-struct BlockLists {
-    full_blocks: BlockList,
-    nonfull_blocks: BlockList,
-}
-
-impl BlockLists {
-    // Unlink a node. It must be in either one of the two lists.
-    unsafe fn unlink(&mut self, elem: *mut SlabBlockHeader) {
-        let list = unsafe {
-            if (*elem).next.is_null() {
-                if self.full_blocks.tail == elem {
-                    Some(&mut self.full_blocks)
-                } else {
-                    Some(&mut self.nonfull_blocks)
-                }
-            } else if (*elem).prev.is_null() {
-                if self.full_blocks.head == elem {
-                    Some(&mut self.full_blocks)
-                } else {
-                    Some(&mut self.nonfull_blocks)
-                }
-            } else {
-                None
-            }
-        };
-        unsafe { unlink_slab_block(list, elem) };
-    }
-}
-
-unsafe fn unlink_slab_block(mut list: Option<&mut BlockList>, elem: *mut SlabBlockHeader) {
-    unsafe {
-        if (*elem).next.is_null() {
-            assert_eq!(list.as_ref().unwrap().tail, elem);
-            list.as_mut().unwrap().tail = (*elem).prev;
-        } else {
-            assert_eq!((*(*elem).next).prev, elem);
-            (*(*elem).next).prev = (*elem).prev;
-        }
-        if (*elem).prev.is_null() {
-            assert_eq!(list.as_ref().unwrap().head, elem);
-            list.as_mut().unwrap().head = (*elem).next;
-        } else {
-            assert_eq!((*(*elem).prev).next, elem);
-            (*(*elem).prev).next = (*elem).next;
-        }
-    }
-}
-
-#[derive(Debug)]
-struct BlockList {
-    head: *mut SlabBlockHeader,
-    tail: *mut SlabBlockHeader,
-}
-
-impl Default for BlockList {
-    fn default() -> Self {
-        BlockList {
-            head: std::ptr::null_mut(),
-            tail: std::ptr::null_mut(),
-        }
-    }
-}
-
-impl BlockList {
-    unsafe fn push_head(&mut self, elem: *mut SlabBlockHeader) {
-        unsafe {
-            if self.is_empty() {
-                self.tail = elem;
-                (*elem).next = std::ptr::null_mut();
-            } else {
-                (*elem).next = self.head;
-                (*self.head).prev = elem;
-            }
-            (*elem).prev = std::ptr::null_mut();
-            self.head = elem;
-        }
-    }
-
-    fn is_empty(&self) -> bool {
-        self.head.is_null()
-    }
-
-    unsafe fn unlink(&mut self, elem: *mut SlabBlockHeader) {
-        unsafe { unlink_slab_block(Some(self), elem) }
-    }
-
-    #[cfg(test)]
-    fn dump(&self) {
-        let mut next = self.head;
-
-        while !next.is_null() {
-            let n = unsafe { next.as_ref() }.unwrap();
-            eprintln!(
-                "  blk {:?} (free {}/{})",
-                next,
-                n.num_free_chunks.load(Ordering::Relaxed),
-                n.num_chunks
-            );
-            next = n.next;
-        }
-    }
-}
-
-impl SlabDesc {
-    pub(crate) fn new(layout: &Layout) -> SlabDesc {
-        SlabDesc {
-            layout: *layout,
-            block_lists: spin::RwLock::new(BlockLists::default()),
-            num_allocated: AtomicU64::new(0),
-            num_blocks: AtomicU64::new(0),
-        }
-    }
-}
-
-#[derive(Debug)]
-struct SlabBlockHeader {
-    free_chunks_head: spin::Mutex<*mut FreeChunk>,
-    num_free_chunks: AtomicU32,
-    num_chunks: u32, // this is really a constant for a given Layout
-
-    // these fields are protected by the lock on the BlockLists
-    prev: *mut SlabBlockHeader,
-    next: *mut SlabBlockHeader,
-}
-
-struct FreeChunk {
-    next: *mut FreeChunk,
-}
-
-enum ReadOrWriteGuard<'a, T> {
-    Read(spin::RwLockReadGuard<'a, T>),
-    Write(spin::RwLockWriteGuard<'a, T>),
-}
-
-impl<'a, T> Deref for ReadOrWriteGuard<'a, T> {
-    type Target = T;
-
-    fn deref(&self) -> &<Self as Deref>::Target {
-        match self {
-            ReadOrWriteGuard::Read(g) => g.deref(),
-            ReadOrWriteGuard::Write(g) => g.deref(),
-        }
-    }
-}
-
-impl SlabDesc {
-    pub fn alloc_chunk(&self, block_allocator: &BlockAllocator) -> *mut u8 {
-        // Are there any free chunks?
-        let mut acquire_write = false;
-        'outer: loop {
-            let mut block_lists_guard = if acquire_write {
-                ReadOrWriteGuard::Write(self.block_lists.write())
-            } else {
-                ReadOrWriteGuard::Read(self.block_lists.read())
-            };
-            'inner: loop {
-                let block_ptr = block_lists_guard.nonfull_blocks.head;
-                if block_ptr.is_null() {
-                    break 'outer;
-                }
-                unsafe {
-                    let mut free_chunks_head = (*block_ptr).free_chunks_head.lock();
-                    if !(*free_chunks_head).is_null() {
-                        let result = *free_chunks_head;
-                        (*free_chunks_head) = (*result).next;
-                        let _old = (*block_ptr).num_free_chunks.fetch_sub(1, Ordering::Relaxed);
-
-                        self.num_allocated.fetch_add(1, Ordering::Relaxed);
-                        return result.cast();
-                    }
-                }
-
-                // The block at the head of the list was full. Grab write lock and retry
-                match block_lists_guard {
-                    ReadOrWriteGuard::Read(_) => {
-                        acquire_write = true;
-                        continue 'outer;
-                    }
-                    ReadOrWriteGuard::Write(ref mut g) => {
-                        // move the node to the list of full blocks
-                        unsafe {
-                            g.nonfull_blocks.unlink(block_ptr);
-                            g.full_blocks.push_head(block_ptr);
-                        };
-                        continue 'inner;
-                    }
-                }
-            }
-        }
-
-        // no free chunks. Allocate a new block (and the chunk from that)
-        let (new_block, new_chunk) = self.alloc_block_and_chunk(block_allocator);
-        self.num_blocks.fetch_add(1, Ordering::Relaxed);
-
-        // Add the block to the list in the SlabDesc
-        unsafe {
-            let mut block_lists_guard = self.block_lists.write();
-            block_lists_guard.nonfull_blocks.push_head(new_block);
-        }
-        self.num_allocated.fetch_add(1, Ordering::Relaxed);
-        new_chunk
-    }
-
-    pub fn dealloc_chunk(&self, chunk_ptr: *mut u8, _block_allocator: &BlockAllocator) {
-        // Find the block it belongs to. You can find the block from the address. (And knowing the
-        // layout, you could calculate the chunk number too.)
-        let block_ptr: *mut SlabBlockHeader = {
-            let block_addr = (chunk_ptr.addr() / BLOCK_SIZE) * BLOCK_SIZE;
-            chunk_ptr.with_addr(block_addr).cast()
-        };
-        let chunk_ptr: *mut FreeChunk = chunk_ptr.cast();
-
-        // Mark the chunk as free in 'freechunks' list
-        let num_chunks;
-        let num_free_chunks;
-        unsafe {
-            let mut free_chunks_head = (*block_ptr).free_chunks_head.lock();
-            (*chunk_ptr).next = *free_chunks_head;
-            *free_chunks_head = chunk_ptr;
-
-            num_free_chunks = (*block_ptr).num_free_chunks.fetch_add(1, Ordering::Relaxed) + 1;
-            num_chunks = (*block_ptr).num_chunks;
-        }
-
-        if num_free_chunks == 1 {
-            // If the block was full previously, add it to the nonfull blocks list. Note that
-            // we're not holding the lock anymore, so it can immediately become full again.
-            // That's harmless, it will be moved back to the full list again when a call
-            // to alloc_chunk() sees it.
-            let mut block_lists = self.block_lists.write();
-            unsafe {
-                block_lists.unlink(block_ptr);
-                block_lists.nonfull_blocks.push_head(block_ptr);
-            };
-        } else if num_free_chunks == num_chunks {
-            // If the block became completely empty, move it to the free list
-            // TODO
-            // FIXME: we're still holding the spinlock. It's not exactly safe to return it to
-            // the free blocks list, is it? Defer it as garbage to wait out concurrent updates?
-            //block_allocator.release_block()
-        }
-
-        // update stats
-        self.num_allocated.fetch_sub(1, Ordering::Relaxed);
-    }
-
-    fn alloc_block_and_chunk(
-        &self,
-        block_allocator: &BlockAllocator,
-    ) -> (*mut SlabBlockHeader, *mut u8) {
-        // fixme: handle OOM
-        let block_slice: &mut [MaybeUninit<u8>] = block_allocator.alloc_block();
-        let (block_header, remain) = alloc_from_slice::<SlabBlockHeader>(block_slice);
-
-        let padding = remain.as_ptr().align_offset(self.layout.align());
-
-        let num_chunks = (remain.len() - padding) / self.layout.size();
-
-        let first_chunk_ptr: *mut FreeChunk = remain[padding..].as_mut_ptr().cast();
-
-        unsafe {
-            let mut chunk_ptr = first_chunk_ptr;
-            for _ in 0..num_chunks - 1 {
-                let next_chunk_ptr = chunk_ptr.byte_add(self.layout.size());
-                (*chunk_ptr).next = next_chunk_ptr;
-                chunk_ptr = next_chunk_ptr;
-            }
-            (*chunk_ptr).next = std::ptr::null_mut();
-
-            let result_chunk = first_chunk_ptr;
-
-            let block_header = block_header.write(SlabBlockHeader {
-                free_chunks_head: spin::Mutex::new((*first_chunk_ptr).next),
-                prev: std::ptr::null_mut(),
-                next: std::ptr::null_mut(),
-                num_chunks: num_chunks as u32,
-                num_free_chunks: AtomicU32::new(num_chunks as u32 - 1),
-            });
-
-            (block_header, result_chunk.cast())
-        }
-    }
-
-    #[cfg(test)]
-    fn dump(&self) {
-        eprintln!(
-            "slab dump ({} blocks, {} allocated chunks)",
-            self.num_blocks.load(Ordering::Relaxed),
-            self.num_allocated.load(Ordering::Relaxed)
-        );
-        let lists = self.block_lists.read();
-
-        eprintln!("nonfull blocks:");
-        lists.nonfull_blocks.dump();
-        eprintln!("full blocks:");
-        lists.full_blocks.dump();
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use rand::Rng;
-    use rand_distr::Zipf;
-
-    struct TestObject {
-        val: usize,
-        _dummy: [u8; BLOCK_SIZE / 4],
-    }
-
-    struct TestObjectSlab<'a>(SlabDesc, BlockAllocator<'a>);
-    impl<'a> TestObjectSlab<'a> {
-        fn new(block_allocator: BlockAllocator) -> TestObjectSlab {
-            TestObjectSlab(SlabDesc::new(&Layout::new::<TestObject>()), block_allocator)
-        }
-
-        fn alloc(&self, val: usize) -> *mut TestObject {
-            let obj: *mut TestObject = self.0.alloc_chunk(&self.1).cast();
-            unsafe { (*obj).val = val };
-            obj
-        }
-
-        fn dealloc(&self, obj: *mut TestObject) {
-            self.0.dealloc_chunk(obj.cast(), &self.1)
-        }
-    }
-
-    #[test]
-    fn test_slab_alloc() {
-        const MEM_SIZE: usize = 100000000;
-        let mut area = Box::new_uninit_slice(MEM_SIZE);
-        let block_allocator = BlockAllocator::new(&mut area);
-
-        let slab = TestObjectSlab::new(block_allocator);
-
-        let mut all: Vec<*mut TestObject> = Vec::new();
-        for i in 0..11 {
-            all.push(slab.alloc(i));
-        }
-        #[allow(clippy::needless_range_loop)]
-        for i in 0..11 {
-            assert!(unsafe { (*all[i]).val == i });
-        }
-
-        let distribution = Zipf::new(10.0, 1.1).unwrap();
-        let mut rng = rand::rng();
-        for _ in 0..100000 {
-            slab.0.dump();
-            let idx = rng.sample(distribution) as usize;
-            let ptr: *mut TestObject = all[idx];
-            if !ptr.is_null() {
-                assert_eq!(unsafe { (*ptr).val }, idx);
-                slab.dealloc(ptr);
-                all[idx] = std::ptr::null_mut();
-            } else {
-                all[idx] = slab.alloc(idx);
-            }
-        }
-    }
-
-    fn new_test_blk(i: u32) -> *mut SlabBlockHeader {
-        Box::into_raw(Box::new(SlabBlockHeader {
-            free_chunks_head: spin::Mutex::new(std::ptr::null_mut()),
-            num_free_chunks: AtomicU32::new(0),
-            num_chunks: i,
-            prev: std::ptr::null_mut(),
-            next: std::ptr::null_mut(),
-        }))
-    }
-
-    #[test]
-    fn test_block_linked_list() {
-        // note: these are leaked, but that's OK for tests
-        let a = new_test_blk(0);
-        let b = new_test_blk(1);
-
-        let mut list = BlockList::default();
-        assert!(list.is_empty());
-
-        unsafe {
-            list.push_head(a);
-            assert!(!list.is_empty());
-            list.unlink(a);
-        }
-        assert!(list.is_empty());
-
-        unsafe {
-            list.push_head(b);
-            list.push_head(a);
-            assert_eq!(list.head, a);
-            assert_eq!((*a).next, b);
-            assert_eq!((*b).prev, a);
-            assert_eq!(list.tail, b);
-
-            list.unlink(a);
-            list.unlink(b);
-            assert!(list.is_empty());
-        }
-    }
-}
--- a/libs/neonart/src/allocator/static.rs
+++ b/libs/neonart/src/allocator/static.rs
@@ -1,44 +0,0 @@
-use std::mem::MaybeUninit;
-
-pub fn alloc_from_slice<T>(
-    area: &mut [MaybeUninit<u8>],
-) -> (&mut MaybeUninit<T>, &mut [MaybeUninit<u8>]) {
-    let layout = std::alloc::Layout::new::<T>();
-
-    let area_start = area.as_mut_ptr();
-
-    // pad to satisfy alignment requirements
-    let padding = area_start.align_offset(layout.align());
-    if padding + layout.size() > area.len() {
-        panic!("out of memory");
-    }
-    let area = &mut area[padding..];
-    let (result_area, remain) = area.split_at_mut(layout.size());
-
-    let result_ptr: *mut MaybeUninit<T> = result_area.as_mut_ptr().cast();
-    let result = unsafe { result_ptr.as_mut().unwrap() };
-
-    (result, remain)
-}
-
-pub fn alloc_array_from_slice<T>(
-    area: &mut [MaybeUninit<u8>],
-    len: usize,
-) -> (&mut [MaybeUninit<T>], &mut [MaybeUninit<u8>]) {
-    let layout = std::alloc::Layout::new::<T>();
-
-    let area_start = area.as_mut_ptr();
-
-    // pad to satisfy alignment requirements
-    let padding = area_start.align_offset(layout.align());
-    if padding + layout.size() * len > area.len() {
-        panic!("out of memory");
-    }
-    let area = &mut area[padding..];
-    let (result_area, remain) = area.split_at_mut(layout.size() * len);
-
-    let result_ptr: *mut MaybeUninit<T> = result_area.as_mut_ptr().cast();
-    let result = unsafe { std::slice::from_raw_parts_mut(result_ptr.as_mut().unwrap(), len) };
-
-    (result, remain)
-}
--- a/libs/neonart/src/epoch.rs
+++ b/libs/neonart/src/epoch.rs
@@ -1,142 +0,0 @@
-//! This is similar to crossbeam_epoch crate, but works in shared memory
-
-use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
-
-use crossbeam_utils::CachePadded;
-
-const NUM_SLOTS: usize = 1000;
-
-/// This is the struct that is stored in shmem
-///
-/// bit 0: is it pinned or not?
-/// rest of the bits are the epoch counter.
-pub struct EpochShared {
-    global_epoch: AtomicU64,
-    participants: [CachePadded<AtomicU64>; NUM_SLOTS],
-
-    broadcast_lock: spin::Mutex<()>,
-}
-
-impl EpochShared {
-    pub fn new() -> EpochShared {
-        EpochShared {
-            global_epoch: AtomicU64::new(2),
-            participants: [const { CachePadded::new(AtomicU64::new(2)) }; NUM_SLOTS],
-            broadcast_lock: spin::Mutex::new(()),
-        }
-    }
-
-    pub fn register(&self) -> LocalHandle {
-        LocalHandle {
-            global: self,
-            last_slot: AtomicUsize::new(0), // todo: choose more intelligently
-        }
-    }
-
-    fn release_pin(&self, slot: usize, _epoch: u64) {
-        let global_epoch = self.global_epoch.load(Ordering::Relaxed);
-        self.participants[slot].store(global_epoch, Ordering::Relaxed);
-    }
-
-    fn pin_internal(&self, slot_hint: usize) -> (usize, u64) {
-        // pick a slot
-        let mut slot = slot_hint;
-        let epoch = loop {
-            let old = self.participants[slot].fetch_or(1, Ordering::Relaxed);
-            if old & 1 == 0 {
-                // Got this slot
-                break old;
-            }
-
-            // the slot was busy by another thread / process. try a different slot
-            slot += 1;
-            if slot == NUM_SLOTS {
-                slot = 0;
-            }
-            continue;
-        };
-        (slot, epoch)
-    }
-
-    pub(crate) fn advance(&self) -> u64 {
-        // Advance the global epoch
-        let old_epoch = self.global_epoch.fetch_add(2, Ordering::Relaxed);
-        // Anyone that release their pin after this will update their slot.
-        old_epoch + 2
-    }
-
-    pub(crate) fn broadcast(&self) {
-        let Some(_guard) = self.broadcast_lock.try_lock() else {
-            return;
-        };
-
-        let epoch = self.global_epoch.load(Ordering::Relaxed);
-        let old_epoch = epoch.wrapping_sub(2);
-
-        // Update all free slots.
-        for i in 0..NUM_SLOTS {
-            // TODO: check result, as a sanity check. It should either be the old epoch, or pinned
-            let _ = self.participants[i].compare_exchange(
-                old_epoch,
-                epoch,
-                Ordering::Relaxed,
-                Ordering::Relaxed,
-            );
-        }
-
-        // FIXME: memory fence here, since we used Relaxed?
-    }
-
-    pub(crate) fn get_oldest(&self) -> u64 {
-        // Read all slots.
-        let now = self.global_epoch.load(Ordering::Relaxed);
-        let mut oldest = now;
-        for i in 0..NUM_SLOTS {
-            let this_epoch = self.participants[i].load(Ordering::Relaxed);
-            let delta = now.wrapping_sub(this_epoch);
-            if delta > u64::MAX / 2 {
-                // this is very recent
-            } else if delta > now.wrapping_sub(oldest) {
-                oldest = this_epoch;
-            }
-        }
-        oldest
-    }
-
-    pub(crate) fn get_current(&self) -> u64 {
-        self.global_epoch.load(Ordering::Relaxed)
-    }
-}
-
-pub(crate) struct EpochPin<'e> {
-    slot: usize,
-    pub(crate) epoch: u64,
-
-    handle: &'e LocalHandle<'e>,
-}
-
-impl<'e> Drop for EpochPin<'e> {
-    fn drop(&mut self) {
-        self.handle.global.release_pin(self.slot, self.epoch);
-    }
-}
-
-pub struct LocalHandle<'g> {
-    global: &'g EpochShared,
-
-    last_slot: AtomicUsize,
-}
-
-impl<'g> LocalHandle<'g> {
-    pub fn pin(&self) -> EpochPin {
-        let (slot, epoch) = self
-            .global
-            .pin_internal(self.last_slot.load(Ordering::Relaxed));
-        self.last_slot.store(slot, Ordering::Relaxed);
-        EpochPin {
-            handle: self,
-            epoch,
-            slot,
-        }
-    }
-}
--- a/libs/neonart/src/lib.rs
+++ b/libs/neonart/src/lib.rs
@@ -1,583 +0,0 @@
-//! Adaptive Radix Tree (ART) implementation, with Optimistic Lock Coupling.
-//!
-//! The data structure is described in these two papers:
-//!
-//! [1] Leis, V. & Kemper, Alfons & Neumann, Thomas. (2013).
-//!     The adaptive radix tree: ARTful indexing for main-memory databases.
-//!     Proceedings - International Conference on Data Engineering. 38-49. 10.1109/ICDE.2013.6544812.
-//!     https://db.in.tum.de/~leis/papers/ART.pdf
-//!
-//! [2] Leis, Viktor & Scheibner, Florian & Kemper, Alfons & Neumann, Thomas. (2016).
-//!     The ART of practical synchronization.
-//!     1-8. 10.1145/2933349.2933352.
-//!     https://db.in.tum.de/~leis/papers/artsync.pdf
-//!
-//! [1] describes the base data structure, and [2] describes the Optimistic Lock Coupling that we
-//! use.
-//!
-//! The papers mention a few different variants. We have made the following choices in this
-//! implementation:
-//!
-//! - All keys have the same length
-//!
-//! - Single-value leaves.
-//!
-//! - For collapsing inner nodes, we use the Pessimistic approach, where each inner node stores a
-//!   variable length "prefix", which stores the keys of all the one-way nodes which have been
-//!   removed. However, similar to the "hybrid" approach described in the paper, each node only has
-//!   space for a constant-size prefix of 8 bytes. If a node would have a longer prefix, then we
-//!   create create one-way nodes to store them. (There was no particular reason for this choice,
-//!   the "hybrid" approach described in the paper might be better.)
-//!
-//! - For concurrency, we use Optimistic Lock Coupling. The paper [2] also describes another method,
-//!   ROWEX, which generally performs better when there is contention, but that is not important
-//!   for use and Optimisic Lock Coupling is simpler to implement.
-//!
-//! ## Requirements
-//!
-//! This data structure is currently used for the integrated LFC, relsize and last-written LSN cache
-//! in the compute communicator, part of the 'neon' Postgres extension. We have some unique
-//! requirements, which is why we had to write our own. Namely:
-//!
-//! - The data structure has to live in fixed-sized shared memory segment. That rules out any
-//!   built-in Rust collections and most crates. (Except possibly with the 'allocator_api' rust
-//!   feature, which still nightly-only experimental as of this writing).
-//!
-//! - The data structure is accessed from multiple processes. Only one process updates the data
-//!   structure, but other processes perform reads. That rules out using built-in Rust locking
-//!   primitives like Mutex and RwLock, and most crates too.
-//!
-//! - Within the one process with write-access, multiple threads can perform updates concurrently.
-//!   That rules out using PostgreSQL LWLocks for the locking.
-//!
-//! The implementation is generic, and doesn't depend on any PostgreSQL specifics, but it has been
-//! written with that usage and the above constraints in mind. Some noteworthy assumptions:
-//!
-//! - Contention is assumed to be rare. In the integrated cache in PostgreSQL, there's higher level
-//!   locking in the PostgreSQL buffer manager, which ensures that two backends should not try to
-//!   read / write the same page at the same time. (Prefetching can conflict with actual reads,
-//!   however.)
-//!
-//!  - The keys in the integrated cache are 17 bytes long.
-//!
-//! ## Usage
-//!
-//! Because this is designed to be used as a Postgres shared memory data structure, initialization
-//! happens in three stages:
-//!
-//! 0. A fixed area of shared memory is allocated at postmaster startup.
-//!
-//! 1. TreeInitStruct::new() is called to initialize it, still in Postmaster process, before any
-//!    other process or thread is running. It returns a TreeInitStruct, which is inherited by all
-//!    the processes through fork().
-//!
-//! 2. One process may have write-access to the struct, by calling
-//!    [TreeInitStruct::attach_writer]. (That process is the communicator process.)
-//!
-//! 3. Other processes get read-access to the struct, by calling [TreeInitStruct::attach_reader]
-//!
-//! "Write access" means that you can insert / update / delete values in the tree.
-//!
-//! NOTE: The Values stored in the tree are sometimes moved, when a leaf node fills up and a new
-//! larger node needs to be allocated. The versioning and epoch-based allocator ensure that the data
-//! structure stays consistent, but if the Value has interior mutability, like atomic fields,
-//! updates to such fields might be lost if the leaf node is concurrently moved! If that becomes a
-//! problem, the version check could be passed up to the caller, so that the caller could detect the
-//! lost updates and retry the operation.
-//!
-//! ## Implementation
-//!
-//! node_ptr: Provides low-level implementations of the four different node types (eight actually,
-//! since there is an Internal and Leaf variant of each)
-//!
-//! lock_and_version.rs: Provides an abstraction for the combined lock and version counter on each
-//! node.
-//!
-//! node_ref.rs: The code in node_ptr.rs deals with raw pointers. node_ref.rs provides more type-safe
-//!   abstractions on top.
-//!
-//! algorithm.rs: Contains the functions to implement lookups and updates in the tree
-//!
-//! allocator.rs: Provides a facility to allocate memory for the tree nodes. (We must provide our
-//!   own abstraction for that because we need the data structure to live in a pre-allocated shared
-//!   memory segment).
-//!
-//! epoch.rs: The data structure requires that when a node is removed from the tree, it is not
-//!   immediately deallocated, but stays around for as long as concurrent readers might still have
-//!   pointers to them. This is enforced by an epoch system. This is similar to
-//!   e.g. crossbeam_epoch, but we couldn't use that either because it has to work across processes
-//!   communicating over the shared memory segment.
-//!
-//! ## See also
-//!
-//! There are some existing Rust ART implementations out there, but none of them filled all
-//! the requirements:
-//!
-//! - https://github.com/XiangpengHao/congee
-//! - https://github.com/declanvk/blart
-//!
-//! ## TODO
-//!
-//! - Removing values has not been implemented
-
-mod algorithm;
-pub mod allocator;
-mod epoch;
-
-use algorithm::RootPtr;
-use algorithm::node_ptr::NodePtr;
-
-use std::collections::VecDeque;
-use std::fmt::Debug;
-use std::marker::PhantomData;
-use std::ptr::NonNull;
-use std::sync::atomic::{AtomicBool, Ordering};
-
-use crate::epoch::EpochPin;
-
-#[cfg(test)]
-mod tests;
-
-use allocator::ArtAllocator;
-pub use allocator::ArtMultiSlabAllocator;
-pub use allocator::OutOfMemoryError;
-
-/// Fixed-length key type.
-///
-pub trait Key: Debug {
-    const KEY_LEN: usize;
-
-    fn as_bytes(&self) -> &[u8];
-}
-
-/// Values stored in the tree
-///
-/// Values need to be Cloneable, because when a node "grows", the value is copied to a new node and
-/// the old sticks around until all readers that might see the old value are gone.
-// fixme obsolete, no longer needs Clone
-pub trait Value {}
-
-const MAX_GARBAGE: usize = 1024;
-
-/// The root of the tree, plus other tree-wide data. This is stored in the shared memory.
-pub struct Tree<V: Value> {
-    /// For simplicity, so that we never need to grow or shrink the root, the root node is always an
-    /// Internal256 node. Also, it never has a prefix (that's actually a bit wasteful, incurring one
-    /// indirection to every lookup)
-    root: RootPtr<V>,
-
-    writer_attached: AtomicBool,
-
-    epoch: epoch::EpochShared,
-}
-
-unsafe impl<V: Value + Sync> Sync for Tree<V> {}
-unsafe impl<V: Value + Send> Send for Tree<V> {}
-
-struct GarbageQueue<V>(VecDeque<(NodePtr<V>, u64)>);
-
-unsafe impl<V: Value + Sync> Sync for GarbageQueue<V> {}
-unsafe impl<V: Value + Send> Send for GarbageQueue<V> {}
-
-impl<V> GarbageQueue<V> {
-    fn new() -> GarbageQueue<V> {
-        GarbageQueue(VecDeque::with_capacity(MAX_GARBAGE))
-    }
-
-    fn remember_obsolete_node(&mut self, ptr: NodePtr<V>, epoch: u64) {
-        self.0.push_front((ptr, epoch));
-    }
-
-    fn next_obsolete(&mut self, cutoff_epoch: u64) -> Option<NodePtr<V>> {
-        if let Some(back) = self.0.back() {
-            if back.1 < cutoff_epoch {
-                return Some(self.0.pop_back().unwrap().0);
-            }
-        }
-        None
-    }
-}
-
-/// Struct created at postmaster startup
-pub struct TreeInitStruct<'t, K: Key, V: Value, A: ArtAllocator<V>> {
-    tree: &'t Tree<V>,
-
-    allocator: &'t A,
-
-    phantom_key: PhantomData<K>,
-}
-
-/// The worker process has a reference to this. The write operations are only safe
-/// from the worker process
-pub struct TreeWriteAccess<'t, K: Key, V: Value, A: ArtAllocator<V>>
-where
-    K: Key,
-    V: Value,
-{
-    tree: &'t Tree<V>,
-
-    pub allocator: &'t A,
-
-    epoch_handle: epoch::LocalHandle<'t>,
-
-    phantom_key: PhantomData<K>,
-
-    /// Obsolete nodes that cannot be recycled until their epoch expires.
-    garbage: spin::Mutex<GarbageQueue<V>>,
-}
-
-/// The backends have a reference to this. It cannot be used to modify the tree
-pub struct TreeReadAccess<'t, K: Key, V: Value>
-where
-    K: Key,
-    V: Value,
-{
-    tree: &'t Tree<V>,
-
-    epoch_handle: epoch::LocalHandle<'t>,
-
-    phantom_key: PhantomData<K>,
-}
-
-impl<'t, K: Key, V: Value, A: ArtAllocator<V>> TreeInitStruct<'t, K, V, A> {
-    pub fn new(allocator: &'t A) -> TreeInitStruct<'t, K, V, A> {
-        let tree_ptr = allocator.alloc_tree();
-        let tree_ptr = NonNull::new(tree_ptr).expect("out of memory");
-        let init = Tree {
-            root: algorithm::new_root(allocator).expect("out of memory"),
-            writer_attached: AtomicBool::new(false),
-            epoch: epoch::EpochShared::new(),
-        };
-        unsafe { tree_ptr.write(init) };
-
-        TreeInitStruct {
-            tree: unsafe { tree_ptr.as_ref() },
-            allocator,
-            phantom_key: PhantomData,
-        }
-    }
-
-    pub fn attach_writer(self) -> TreeWriteAccess<'t, K, V, A> {
-        let previously_attached = self.tree.writer_attached.swap(true, Ordering::Relaxed);
-        if previously_attached {
-            panic!("writer already attached");
-        }
-        TreeWriteAccess {
-            tree: self.tree,
-            allocator: self.allocator,
-            phantom_key: PhantomData,
-            epoch_handle: self.tree.epoch.register(),
-            garbage: spin::Mutex::new(GarbageQueue::new()),
-        }
-    }
-
-    pub fn attach_reader(self) -> TreeReadAccess<'t, K, V> {
-        TreeReadAccess {
-            tree: self.tree,
-            phantom_key: PhantomData,
-            epoch_handle: self.tree.epoch.register(),
-        }
-    }
-}
-
-impl<'t, K: Key, V: Value, A: ArtAllocator<V>> TreeWriteAccess<'t, K, V, A> {
-    pub fn start_write<'g>(&'t self) -> TreeWriteGuard<'g, K, V, A>
-    where
-        't: 'g,
-    {
-        TreeWriteGuard {
-            tree_writer: self,
-            epoch_pin: self.epoch_handle.pin(),
-            phantom_key: PhantomData,
-            created_garbage: false,
-        }
-    }
-
-    pub fn start_read(&'t self) -> TreeReadGuard<'t, K, V> {
-        TreeReadGuard {
-            tree: self.tree,
-            epoch_pin: self.epoch_handle.pin(),
-            phantom_key: PhantomData,
-        }
-    }
-}
-
-impl<'t, K: Key, V: Value> TreeReadAccess<'t, K, V> {
-    pub fn start_read(&'t self) -> TreeReadGuard<'t, K, V> {
-        TreeReadGuard {
-            tree: self.tree,
-            epoch_pin: self.epoch_handle.pin(),
-            phantom_key: PhantomData,
-        }
-    }
-}
-
-pub struct TreeReadGuard<'e, K, V>
-where
-    K: Key,
-    V: Value,
-{
-    tree: &'e Tree<V>,
-
-    epoch_pin: EpochPin<'e>,
-    phantom_key: PhantomData<K>,
-}
-
-impl<'e, K: Key, V: Value> TreeReadGuard<'e, K, V> {
-    pub fn get(&'e self, key: &K) -> Option<&'e V> {
-        algorithm::search(key, self.tree.root, &self.epoch_pin)
-    }
-}
-
-pub struct TreeWriteGuard<'e, K, V, A>
-where
-    K: Key,
-    V: Value,
-    A: ArtAllocator<V>,
-{
-    tree_writer: &'e TreeWriteAccess<'e, K, V, A>,
-
-    epoch_pin: EpochPin<'e>,
-    phantom_key: PhantomData<K>,
-
-    created_garbage: bool,
-}
-
-pub enum UpdateAction<V> {
-    Nothing,
-    Insert(V),
-    Remove,
-}
-
-impl<'e, K: Key, V: Value, A: ArtAllocator<V>> TreeWriteGuard<'e, K, V, A> {
-    /// Get a value
-    pub fn get(&'e mut self, key: &K) -> Option<&'e V> {
-        algorithm::search(key, self.tree_writer.tree.root, &self.epoch_pin)
-    }
-
-    /// Insert a value
-    pub fn insert(self, key: &K, value: V) -> Result<bool, OutOfMemoryError> {
-        let mut success = None;
-
-        self.update_with_fn(key, |existing| {
-            if existing.is_some() {
-                success = Some(false);
-                UpdateAction::Nothing
-            } else {
-                success = Some(true);
-                UpdateAction::Insert(value)
-            }
-        })?;
-        Ok(success.expect("value_fn not called"))
-    }
-
-    /// Remove value. Returns true if it existed
-    pub fn remove(self, key: &K) -> bool {
-        let mut result = false;
-        // FIXME: It's not clear if OOM is expected while removing. It seems
-        // not nice, but shrinking a node can OOM. Then again, we could opt
-        // to not shrink a node if we cannot allocate, to live a little longer.
-        self.update_with_fn(key, |existing| match existing {
-            Some(_) => {
-                result = true;
-                UpdateAction::Remove
-            }
-            None => UpdateAction::Nothing,
-        })
-        .expect("out of memory while removing");
-        result
-    }
-
-    /// Try to remove value and return the old value.
-    pub fn remove_and_return(self, key: &K) -> Option<V>
-    where
-        V: Clone,
-    {
-        let mut old = None;
-        self.update_with_fn(key, |existing| {
-            old = existing.cloned();
-            UpdateAction::Remove
-        })
-        .expect("out of memory while removing");
-        old
-    }
-
-    /// Update key using the given function. All the other modifying operations are based on this.
-    ///
-    /// The function is passed a reference to the existing value, if any. If the function
-    /// returns None, the value is removed from the tree (or if there was no existing value,
-    /// does nothing). If the function returns Some, the existing value is replaced, of if there
-    /// was no existing value, it is inserted. FIXME: update comment
-    pub fn update_with_fn<F>(mut self, key: &K, value_fn: F) -> Result<(), OutOfMemoryError>
-    where
-        F: FnOnce(Option<&V>) -> UpdateAction<V>,
-    {
-        algorithm::update_fn(key, value_fn, self.tree_writer.tree.root, &mut self)?;
-
-        if self.created_garbage {
-            let _ = self.collect_garbage();
-        }
-        Ok(())
-    }
-
-    fn remember_obsolete_node(&mut self, ptr: NodePtr<V>) {
-        self.tree_writer
-            .garbage
-            .lock()
-            .remember_obsolete_node(ptr, self.epoch_pin.epoch);
-        self.created_garbage = true;
-    }
-
-    // returns number of nodes recycled
-    fn collect_garbage(&self) -> usize {
-        self.tree_writer.tree.epoch.advance();
-        self.tree_writer.tree.epoch.broadcast();
-
-        let cutoff_epoch = self.tree_writer.tree.epoch.get_oldest();
-
-        let mut result = 0;
-        let mut garbage_queue = self.tree_writer.garbage.lock();
-        while let Some(ptr) = garbage_queue.next_obsolete(cutoff_epoch) {
-            ptr.deallocate(self.tree_writer.allocator);
-            result += 1;
-        }
-        result
-    }
-}
-
-pub struct TreeIterator<K>
-where
-    K: Key + for<'a> From<&'a [u8]>,
-{
-    done: bool,
-    pub next_key: Vec<u8>,
-    max_key: Option<Vec<u8>>,
-
-    phantom_key: PhantomData<K>,
-}
-
-impl<K> TreeIterator<K>
-where
-    K: Key + for<'a> From<&'a [u8]>,
-{
-    pub fn new_wrapping() -> TreeIterator<K> {
-        TreeIterator {
-            done: false,
-            next_key: vec![0; K::KEY_LEN],
-            max_key: None,
-            phantom_key: PhantomData,
-        }
-    }
-
-    pub fn new(range: &std::ops::Range<K>) -> TreeIterator<K> {
-        let result = TreeIterator {
-            done: false,
-            next_key: Vec::from(range.start.as_bytes()),
-            max_key: Some(Vec::from(range.end.as_bytes())),
-            phantom_key: PhantomData,
-        };
-        assert_eq!(result.next_key.len(), K::KEY_LEN);
-        assert_eq!(result.max_key.as_ref().unwrap().len(), K::KEY_LEN);
-
-        result
-    }
-
-    pub fn next<'g, V>(&mut self, read_guard: &'g TreeReadGuard<'g, K, V>) -> Option<(K, &'g V)>
-    where
-        V: Value,
-    {
-        if self.done {
-            return None;
-        }
-
-        let mut wrapped_around = false;
-        loop {
-            assert_eq!(self.next_key.len(), K::KEY_LEN);
-            if let Some((k, v)) =
-                algorithm::iter_next(&self.next_key, read_guard.tree.root, &read_guard.epoch_pin)
-            {
-                assert_eq!(k.len(), K::KEY_LEN);
-                assert_eq!(self.next_key.len(), K::KEY_LEN);
-
-                // Check if we reached the end of the range
-                if let Some(max_key) = &self.max_key {
-                    if k.as_slice() >= max_key.as_slice() {
-                        self.done = true;
-                        break None;
-                    }
-                }
-
-                // increment the key
-                self.next_key = k.clone();
-                increment_key(self.next_key.as_mut_slice());
-                let k = k.as_slice().into();
-
-                break Some((k, v));
-            } else {
-                if self.max_key.is_some() {
-                    self.done = true;
-                } else {
-                    // Start from beginning
-                    if !wrapped_around {
-                        for i in 0..K::KEY_LEN {
-                            self.next_key[i] = 0;
-                        }
-                        wrapped_around = true;
-                        continue;
-                    } else {
-                        // The tree is completely empty
-                        // FIXME: perhaps we should remember the starting point instead.
-                        // Currently this will scan some ranges twice.
-                        break None;
-                    }
-                }
-                break None;
-            }
-        }
-    }
-}
-
-fn increment_key(key: &mut [u8]) -> bool {
-    for i in (0..key.len()).rev() {
-        let (byte, overflow) = key[i].overflowing_add(1);
-        key[i] = byte;
-        if !overflow {
-            return false;
-        }
-    }
-    true
-}
-
-// Debugging functions
-impl<'e, K: Key, V: Value + Debug, A: ArtAllocator<V>> TreeWriteGuard<'e, K, V, A> {
-    pub fn dump(&mut self, dst: &mut dyn std::io::Write) {
-        algorithm::dump_tree(self.tree_writer.tree.root, &self.epoch_pin, dst)
-    }
-}
-impl<'e, K: Key, V: Value + Debug> TreeReadGuard<'e, K, V> {
-    pub fn dump(&mut self, dst: &mut dyn std::io::Write) {
-        algorithm::dump_tree(self.tree.root, &self.epoch_pin, dst)
-    }
-}
-impl<'e, K: Key, V: Value> TreeWriteAccess<'e, K, V, ArtMultiSlabAllocator<'e, V>> {
-    pub fn get_statistics(&self) -> ArtTreeStatistics {
-        self.allocator.get_statistics();
-        ArtTreeStatistics {
-            blocks: self.allocator.inner.block_allocator.get_statistics(),
-            slabs: self.allocator.get_statistics(),
-            epoch: self.tree.epoch.get_current(),
-            oldest_epoch: self.tree.epoch.get_oldest(),
-            num_garbage: self.garbage.lock().0.len() as u64,
-        }
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct ArtTreeStatistics {
-    pub blocks: allocator::block::BlockAllocatorStats,
-    pub slabs: allocator::ArtMultiSlabStats,
-
-    pub epoch: u64,
-    pub oldest_epoch: u64,
-    pub num_garbage: u64,
-}
--- a/libs/neonart/src/tests.rs
+++ b/libs/neonart/src/tests.rs
@@ -1,236 +0,0 @@
-use std::collections::BTreeMap;
-use std::collections::HashSet;
-use std::fmt::{Debug, Formatter};
-use std::sync::atomic::{AtomicUsize, Ordering};
-
-use crate::ArtAllocator;
-use crate::ArtMultiSlabAllocator;
-use crate::TreeInitStruct;
-use crate::TreeIterator;
-use crate::TreeWriteAccess;
-use crate::UpdateAction;
-
-use crate::{Key, Value};
-
-use rand::Rng;
-use rand::seq::SliceRandom;
-use rand_distr::Zipf;
-
-const TEST_KEY_LEN: usize = 16;
-
-#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
-struct TestKey([u8; TEST_KEY_LEN]);
-
-impl TestKey {
-    const MIN: TestKey = TestKey([0; TEST_KEY_LEN]);
-    const MAX: TestKey = TestKey([u8::MAX; TEST_KEY_LEN]);
-}
-
-impl Key for TestKey {
-    const KEY_LEN: usize = TEST_KEY_LEN;
-    fn as_bytes(&self) -> &[u8] {
-        &self.0
-    }
-}
-
-impl From<&TestKey> for u128 {
-    fn from(val: &TestKey) -> u128 {
-        u128::from_be_bytes(val.0)
-    }
-}
-
-impl From<u128> for TestKey {
-    fn from(val: u128) -> TestKey {
-        TestKey(val.to_be_bytes())
-    }
-}
-
-impl<'a> From<&'a [u8]> for TestKey {
-    fn from(bytes: &'a [u8]) -> TestKey {
-        TestKey(bytes.try_into().unwrap())
-    }
-}
-
-impl Value for usize {}
-
-fn test_inserts<K: Into<TestKey> + Copy>(keys: &[K]) {
-    const MEM_SIZE: usize = 10000000;
-    let mut area = Box::new_uninit_slice(MEM_SIZE);
-
-    let allocator = ArtMultiSlabAllocator::new(&mut area);
-
-    let init_struct = TreeInitStruct::<TestKey, usize, _>::new(allocator);
-    let tree_writer = init_struct.attach_writer();
-
-    for (idx, k) in keys.iter().enumerate() {
-        let w = tree_writer.start_write();
-        let res = w.insert(&(*k).into(), idx);
-        assert!(res.is_ok());
-    }
-
-    for (idx, k) in keys.iter().enumerate() {
-        let r = tree_writer.start_read();
-        let value = r.get(&(*k).into());
-        assert_eq!(value, Some(idx).as_ref());
-    }
-
-    eprintln!("stats: {:?}", tree_writer.get_statistics());
-}
-
-#[test]
-fn dense() {
-    // This exercises splitting a node with prefix
-    let keys: &[u128] = &[0, 1, 2, 3, 256];
-    test_inserts(keys);
-
-    // Dense keys
-    let mut keys: Vec<u128> = (0..10000).collect();
-    test_inserts(&keys);
-
-    // Do the same in random orders
-    for _ in 1..10 {
-        keys.shuffle(&mut rand::rng());
-        test_inserts(&keys);
-    }
-}
-
-#[test]
-fn sparse() {
-    // sparse keys
-    let mut keys: Vec<TestKey> = Vec::new();
-    let mut used_keys = HashSet::new();
-    for _ in 0..10000 {
-        loop {
-            let key = rand::random::<u128>();
-            if used_keys.contains(&key) {
-                continue;
-            }
-            used_keys.insert(key);
-            keys.push(key.into());
-            break;
-        }
-    }
-    test_inserts(&keys);
-}
-
-struct TestValue(AtomicUsize);
-
-impl TestValue {
-    fn new(val: usize) -> TestValue {
-        TestValue(AtomicUsize::new(val))
-    }
-
-    fn load(&self) -> usize {
-        self.0.load(Ordering::Relaxed)
-    }
-}
-
-impl Value for TestValue {}
-
-impl Clone for TestValue {
-    fn clone(&self) -> TestValue {
-        TestValue::new(self.load())
-    }
-}
-
-impl Debug for TestValue {
-    fn fmt(&self, fmt: &mut Formatter<'_>) -> Result<(), std::fmt::Error> {
-        write!(fmt, "{:?}", self.load())
-    }
-}
-
-#[derive(Clone, Debug)]
-struct TestOp(TestKey, Option<usize>);
-
-fn apply_op<A: ArtAllocator<TestValue>>(
-    op: &TestOp,
-    tree: &TreeWriteAccess<TestKey, TestValue, A>,
-    shadow: &mut BTreeMap<TestKey, usize>,
-) {
-    eprintln!("applying op: {op:?}");
-
-    // apply the change to the shadow tree first
-    let shadow_existing = if let Some(v) = op.1 {
-        shadow.insert(op.0, v)
-    } else {
-        shadow.remove(&op.0)
-    };
-
-    // apply to Art tree
-    let w = tree.start_write();
-    w.update_with_fn(&op.0, |existing| {
-        assert_eq!(existing.map(TestValue::load), shadow_existing);
-
-        match (existing, op.1) {
-            (None, None) => UpdateAction::Nothing,
-            (None, Some(new_val)) => UpdateAction::Insert(TestValue::new(new_val)),
-            (Some(_old_val), None) => UpdateAction::Remove,
-            (Some(old_val), Some(new_val)) => {
-                old_val.0.store(new_val, Ordering::Relaxed);
-                UpdateAction::Nothing
-            }
-        }
-    })
-    .expect("out of memory");
-}
-
-fn test_iter<A: ArtAllocator<TestValue>>(
-    tree: &TreeWriteAccess<TestKey, TestValue, A>,
-    shadow: &BTreeMap<TestKey, usize>,
-) {
-    let mut shadow_iter = shadow.iter();
-    let mut iter = TreeIterator::new(&(TestKey::MIN..TestKey::MAX));
-
-    loop {
-        let shadow_item = shadow_iter.next().map(|(k, v)| (*k, *v));
-        let r = tree.start_read();
-        let item = iter.next(&r);
-
-        if shadow_item != item.map(|(k, v)| (k, v.load())) {
-            eprintln!("FAIL: iterator returned {item:?}, expected {shadow_item:?}");
-            tree.start_read().dump(&mut std::io::stderr());
-
-            eprintln!("SHADOW:");
-            for si in shadow {
-                eprintln!("key: {:?}, val: {}", si.0, si.1);
-            }
-            panic!("FAIL: iterator returned {item:?}, expected {shadow_item:?}");
-        }
-        if item.is_none() {
-            break;
-        }
-    }
-}
-
-#[test]
-fn random_ops() {
-    const MEM_SIZE: usize = 10000000;
-    let mut area = Box::new_uninit_slice(MEM_SIZE);
-
-    let allocator = ArtMultiSlabAllocator::new(&mut area);
-
-    let init_struct = TreeInitStruct::<TestKey, TestValue, _>::new(allocator);
-    let tree_writer = init_struct.attach_writer();
-
-    let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
-
-    let distribution = Zipf::new(u128::MAX as f64, 1.1).unwrap();
-    let mut rng = rand::rng();
-    for i in 0..100000 {
-        let mut key: TestKey = (rng.sample(distribution) as u128).into();
-
-        if rng.random_bool(0.10) {
-            key = TestKey::from(u128::from(&key) | 0xffffffff);
-        }
-
-        let op = TestOp(key, if rng.random_bool(0.75) { Some(i) } else { None });
-
-        apply_op(&op, &tree_writer, &mut shadow);
-
-        if i % 1000 == 0 {
-            eprintln!("{i} ops processed");
-            eprintln!("stats: {:?}", tree_writer.get_statistics());
-            test_iter(&tree_writer, &shadow);
-        }
-    }
-}
--- a/libs/pageserver_api/src/controller_api.rs
+++ b/libs/pageserver_api/src/controller_api.rs
@@ -386,7 +386,6 @@ pub enum NodeSchedulingPolicy {
    Pause,
    PauseForRestart,
    Draining,
-    Deleting,
 }

 impl FromStr for NodeSchedulingPolicy {
@@ -399,7 +398,6 @@ impl FromStr for NodeSchedulingPolicy {
            "pause" => Ok(Self::Pause),
            "pause_for_restart" => Ok(Self::PauseForRestart),
            "draining" => Ok(Self::Draining),
-            "deleting" => Ok(Self::Deleting),
            _ => Err(anyhow::anyhow!("Unknown scheduling state '{s}'")),
        }
    }
@@ -414,7 +412,6 @@ impl From<NodeSchedulingPolicy> for String {
            Pause => "pause",
            PauseForRestart => "pause_for_restart",
            Draining => "draining",
-            Deleting => "deleting",
        }
        .to_string()
    }
@@ -423,7 +420,6 @@ impl From<NodeSchedulingPolicy> for String {
 #[derive(Serialize, Deserialize, Clone, Copy, Eq, PartialEq, Debug)]
 pub enum SkSchedulingPolicy {
    Active,
-    Activating,
    Pause,
    Decomissioned,
 }
@@ -434,7 +430,6 @@ impl FromStr for SkSchedulingPolicy {
    fn from_str(s: &str) -> Result<Self, Self::Err> {
        Ok(match s {
            "active" => Self::Active,
-            "activating" => Self::Activating,
            "pause" => Self::Pause,
            "decomissioned" => Self::Decomissioned,
            _ => {
@@ -451,7 +446,6 @@ impl From<SkSchedulingPolicy> for String {
        use SkSchedulingPolicy::*;
        match value {
            Active => "active",
-            Activating => "activating",
            Pause => "pause",
            Decomissioned => "decomissioned",
        }
--- a/libs/pageserver_api/src/shard.rs
+++ b/libs/pageserver_api/src/shard.rs
@@ -332,11 +332,7 @@ fn hash_combine(mut a: u32, mut b: u32) -> u32 {
 ///
 /// The mapping of key to shard is not stable across changes to ShardCount: this is intentional
 /// and will be handled at higher levels when shards are split.
-pub fn key_to_shard_number(
-    count: ShardCount,
-    stripe_size: ShardStripeSize,
-    key: &Key,
-) -> ShardNumber {
+fn key_to_shard_number(count: ShardCount, stripe_size: ShardStripeSize, key: &Key) -> ShardNumber {
    // Fast path for un-sharded tenants or broadcast keys
    if count < ShardCount(2) || key_is_shard0(key) {
        return ShardNumber(0);
--- a/libs/postgres_backend/src/lib.rs
+++ b/libs/postgres_backend/src/lib.rs
@@ -78,13 +78,7 @@ pub fn is_expected_io_error(e: &io::Error) -> bool {
    use io::ErrorKind::*;
    matches!(
        e.kind(),
-        HostUnreachable
-            | NetworkUnreachable
-            | BrokenPipe
-            | ConnectionRefused
-            | ConnectionAborted
-            | ConnectionReset
-            | TimedOut,
+        BrokenPipe | ConnectionRefused | ConnectionAborted | ConnectionReset | TimedOut
    )
 }

--- a/libs/proxy/postgres-protocol2/src/authentication/sasl.rs
+++ b/libs/proxy/postgres-protocol2/src/authentication/sasl.rs
@@ -52,7 +52,7 @@ pub(crate) async fn hi(str: &[u8], salt: &[u8], iterations: u32) -> [u8; 32] {
        }
        // yield every ~250us
        // hopefully reduces tail latencies
-        if i.is_multiple_of(1024) {
+        if i % 1024 == 0 {
            yield_now().await
        }
    }
--- a/libs/proxy/tokio-postgres2/src/client.rs
+++ b/libs/proxy/tokio-postgres2/src/client.rs
@@ -90,7 +90,7 @@ pub struct InnerClient {
 }

 impl InnerClient {
-    pub fn start(&mut self) -> Result<PartialQuery<'_>, Error> {
+    pub fn start(&mut self) -> Result<PartialQuery, Error> {
        self.responses.waiting += 1;
        Ok(PartialQuery(Some(self)))
    }
@@ -227,7 +227,7 @@ impl Client {
        &mut self,
        statement: &str,
        params: I,
-    ) -> Result<RowStream<'_>, Error>
+    ) -> Result<RowStream, Error>
    where
        S: AsRef<str>,
        I: IntoIterator<Item = Option<S>>,
@@ -262,7 +262,7 @@ impl Client {
    pub(crate) async fn simple_query_raw(
        &mut self,
        query: &str,
-    ) -> Result<SimpleQueryStream<'_>, Error> {
+    ) -> Result<SimpleQueryStream, Error> {
        simple_query::simple_query(self.inner_mut(), query).await
    }

--- a/libs/proxy/tokio-postgres2/src/generic_client.rs
+++ b/libs/proxy/tokio-postgres2/src/generic_client.rs
@@ -12,11 +12,7 @@ mod private {
 /// This trait is "sealed", and cannot be implemented outside of this crate.
 pub trait GenericClient: private::Sealed {
    /// Like `Client::query_raw_txt`.
-    async fn query_raw_txt<S, I>(
-        &mut self,
-        statement: &str,
-        params: I,
-    ) -> Result<RowStream<'_>, Error>
+    async fn query_raw_txt<S, I>(&mut self, statement: &str, params: I) -> Result<RowStream, Error>
    where
        S: AsRef<str> + Sync + Send,
        I: IntoIterator<Item = Option<S>> + Sync + Send,
@@ -26,11 +22,7 @@ pub trait GenericClient: private::Sealed {
 impl private::Sealed for Client {}

 impl GenericClient for Client {
-    async fn query_raw_txt<S, I>(
-        &mut self,
-        statement: &str,
-        params: I,
-    ) -> Result<RowStream<'_>, Error>
+    async fn query_raw_txt<S, I>(&mut self, statement: &str, params: I) -> Result<RowStream, Error>
    where
        S: AsRef<str> + Sync + Send,
        I: IntoIterator<Item = Option<S>> + Sync + Send,
@@ -43,11 +35,7 @@ impl GenericClient for Client {
 impl private::Sealed for Transaction<'_> {}

 impl GenericClient for Transaction<'_> {
-    async fn query_raw_txt<S, I>(
-        &mut self,
-        statement: &str,
-        params: I,
-    ) -> Result<RowStream<'_>, Error>
+    async fn query_raw_txt<S, I>(&mut self, statement: &str, params: I) -> Result<RowStream, Error>
    where
        S: AsRef<str> + Sync + Send,
        I: IntoIterator<Item = Option<S>> + Sync + Send,
--- a/libs/proxy/tokio-postgres2/src/transaction.rs
+++ b/libs/proxy/tokio-postgres2/src/transaction.rs
@@ -47,7 +47,7 @@ impl<'a> Transaction<'a> {
        &mut self,
        statement: &str,
        params: I,
-    ) -> Result<RowStream<'_>, Error>
+    ) -> Result<RowStream, Error>
    where
        S: AsRef<str>,
        I: IntoIterator<Item = Option<S>>,
--- a/libs/utils/src/logging.rs
+++ b/libs/utils/src/logging.rs
@@ -24,28 +24,12 @@ macro_rules! critical {
        if cfg!(debug_assertions) {
            panic!($($arg)*);
        }
-        // Increment both metrics
        $crate::logging::TRACING_EVENT_COUNT_METRIC.inc_critical();
        let backtrace = std::backtrace::Backtrace::capture();
        tracing::error!("CRITICAL: {}\n{backtrace}", format!($($arg)*));
    }};
 }

-#[macro_export]
-macro_rules! critical_timeline {
-    ($tenant_shard_id:expr, $timeline_id:expr, $($arg:tt)*) => {{
-        if cfg!(debug_assertions) {
-            panic!($($arg)*);
-        }
-        // Increment both metrics
-        $crate::logging::TRACING_EVENT_COUNT_METRIC.inc_critical();
-        $crate::logging::HADRON_CRITICAL_STORAGE_EVENT_COUNT_METRIC.inc(&$tenant_shard_id.to_string(), &$timeline_id.to_string());
-        let backtrace = std::backtrace::Backtrace::capture();
-        tracing::error!("CRITICAL: [tenant_shard_id: {}, timeline_id: {}] {}\n{backtrace}",
-                       $tenant_shard_id, $timeline_id, format!($($arg)*));
-    }};
-}
-
 #[derive(EnumString, strum_macros::Display, VariantNames, Eq, PartialEq, Debug, Clone, Copy)]
 #[strum(serialize_all = "snake_case")]
 pub enum LogFormat {
@@ -77,36 +61,6 @@ pub struct TracingEventCountMetric {
    trace: IntCounter,
 }

-// Begin Hadron: Add a HadronCriticalStorageEventCountMetric metric that is sliced by tenant_id and timeline_id
-pub struct HadronCriticalStorageEventCountMetric {
-    critical: IntCounterVec,
-}
-
-pub static HADRON_CRITICAL_STORAGE_EVENT_COUNT_METRIC: Lazy<HadronCriticalStorageEventCountMetric> =
-    Lazy::new(|| {
-        let vec = metrics::register_int_counter_vec!(
-            "hadron_critical_storage_event_count",
-            "Number of critical storage events, by tenant_id and timeline_id",
-            &["tenant_shard_id", "timeline_id"]
-        )
-        .expect("failed to define metric");
-        HadronCriticalStorageEventCountMetric::new(vec)
-    });
-
-impl HadronCriticalStorageEventCountMetric {
-    fn new(vec: IntCounterVec) -> Self {
-        Self { critical: vec }
-    }
-
-    // Allow public access from `critical!` macro.
-    pub fn inc(&self, tenant_shard_id: &str, timeline_id: &str) {
-        self.critical
-            .with_label_values(&[tenant_shard_id, timeline_id])
-            .inc();
-    }
-}
-// End Hadron
-
 pub static TRACING_EVENT_COUNT_METRIC: Lazy<TracingEventCountMetric> = Lazy::new(|| {
    let vec = metrics::register_int_counter_vec!(
        "libmetrics_tracing_event_count",
--- a/libs/utils/src/shard.rs
+++ b/libs/utils/src/shard.rs
@@ -49,12 +49,6 @@ pub struct TenantShardId {
    pub shard_count: ShardCount,
 }

-impl std::fmt::Display for ShardCount {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        self.0.fmt(f)
-    }
-}
-
 impl ShardCount {
    pub const MAX: Self = Self(u8::MAX);
    pub const MIN: Self = Self(0);
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -54,7 +54,6 @@ pageserver_api.workspace = true
 pageserver_client.workspace = true # for ResponseErrorMessageExt TOOD refactor that
 pageserver_compaction.workspace = true
 pageserver_page_api.workspace = true
-peekable.workspace = true
 pem.workspace = true
 pin-project-lite.workspace = true
 postgres_backend.workspace = true
@@ -67,7 +66,6 @@ postgres-types.workspace = true
 posthog_client_lite.workspace = true
 pprof.workspace = true
 pq_proto.workspace = true
-prost.workspace = true
 rand.workspace = true
 range-set-blaze = { version = "0.1.16", features = ["alloc"] }
 regex.workspace = true
--- a/pageserver/client_grpc/Cargo.toml
+++ b/pageserver/client_grpc/Cargo.toml
@@ -1,44 +0,0 @@
-[package]
-name = "pageserver_client_grpc"
-version = "0.1.0"
-edition = "2024"
-
-[features]
-testing = ["pageserver_api/testing"]
-
-[dependencies]
-anyhow.workspace = true
-arc-swap.workspace = true
-bytes.workspace = true
-futures.workspace = true
-http.workspace = true
-thiserror.workspace = true
-tonic.workspace = true
-tracing.workspace = true
-tokio = { version = "1.43.1", features = [
-    "full",
-    "macros",
-    "net",
-    "io-util",
-    "rt",
-    "rt-multi-thread",
-] }
-uuid = { version = "1", features = ["v4"] }
-tower = { version = "0.4", features = ["timeout", "util"] }
-rand = "0.8"
-tokio-util = { version = "0.7", features = ["compat"] }
-hyper-util = "0.1.9"
-hyper = "1.6.0"
-metrics.workspace = true
-priority-queue = "2.3.1"
-scopeguard.workspace = true
-async-trait = { version = "0.1" }
-tokio-stream = "0.1"
-dashmap = "5"
-chrono = { version = "0.4", features = ["serde"] }
-compute_api.workspace = true
-
-
-pageserver_page_api.workspace = true
-pageserver_api.workspace = true
-utils.workspace = true
--- a/pageserver/client_grpc/src/client.rs
+++ b/pageserver/client_grpc/src/client.rs
@@ -1,471 +0,0 @@
-use std::collections::HashMap;
-use std::num::NonZero;
-use std::sync::Arc;
-
-use anyhow::anyhow;
-use arc_swap::ArcSwap;
-use futures::stream::FuturesUnordered;
-use futures::{FutureExt as _, StreamExt as _};
-use tracing::instrument;
-
-use crate::pool::{ChannelPool, ClientGuard, ClientPool, StreamGuard, StreamPool};
-use crate::retry::Retry;
-use crate::split::GetPageSplitter;
-use compute_api::spec::PageserverProtocol;
-use pageserver_api::shard::ShardStripeSize;
-use pageserver_page_api as page_api;
-use utils::id::{TenantId, TimelineId};
-use utils::shard::{ShardCount, ShardIndex, ShardNumber};
-
-/// Max number of concurrent clients per channel (i.e. TCP connection). New channels will be spun up
-/// when full.
-///
-/// TODO: tune all of these constants, and consider making them configurable.
-/// TODO: consider separate limits for unary and streaming clients, so we don't fill up channels
-/// with only streams.
-const MAX_CLIENTS_PER_CHANNEL: NonZero<usize> = NonZero::new(16).unwrap();
-
-/// Max number of concurrent unary request clients per shard.
-const MAX_UNARY_CLIENTS: NonZero<usize> = NonZero::new(64).unwrap();
-
-/// Max number of concurrent GetPage streams per shard. The max number of concurrent GetPage
-/// requests is given by `MAX_STREAMS * MAX_STREAM_QUEUE_DEPTH`.
-const MAX_STREAMS: NonZero<usize> = NonZero::new(64).unwrap();
-
-/// Max number of pipelined requests per stream.
-const MAX_STREAM_QUEUE_DEPTH: NonZero<usize> = NonZero::new(2).unwrap();
-
-/// Max number of concurrent bulk GetPage streams per shard, used e.g. for prefetches. Because these
-/// are more throughput-oriented, we have a smaller limit but higher queue depth.
-const MAX_BULK_STREAMS: NonZero<usize> = NonZero::new(16).unwrap();
-
-/// Max number of pipelined requests per bulk stream. These are more throughput-oriented and thus
-/// get a larger queue depth.
-const MAX_BULK_STREAM_QUEUE_DEPTH: NonZero<usize> = NonZero::new(4).unwrap();
-
-/// A rich Pageserver gRPC client for a single tenant timeline. This client is more capable than the
-/// basic `page_api::Client` gRPC client, and supports:
-///
-/// * Sharded tenants across multiple Pageservers.
-/// * Pooling of connections, clients, and streams for efficient resource use.
-/// * Concurrent use by many callers.
-/// * Internal handling of GetPage bidirectional streams, with pipelining and error handling.
-/// * Automatic retries.
-/// * Observability.
-///
-/// TODO: this client does not support base backups or LSN leases, as these are only used by
-/// compute_ctl. Consider adding this, but LSN leases need concurrent requests on all shards.
-pub struct PageserverClient {
-    /// The tenant ID.
-    tenant_id: TenantId,
-    /// The timeline ID.
-    timeline_id: TimelineId,
-    /// The JWT auth token for this tenant, if any.
-    auth_token: Option<String>,
-    /// The shards for this tenant.
-    shards: ArcSwap<Shards>,
-    /// The retry configuration.
-    retry: Retry,
-}
-
-impl PageserverClient {
-    /// Creates a new Pageserver client for a given tenant and timeline. Uses the Pageservers given
-    /// in the shard spec, which must be complete and must use gRPC URLs.
-    pub fn new(
-        tenant_id: TenantId,
-        timeline_id: TimelineId,
-        shard_spec: ShardSpec,
-        auth_token: Option<String>,
-    ) -> anyhow::Result<Self> {
-        let shards = Shards::new(tenant_id, timeline_id, shard_spec, auth_token.clone())?;
-        Ok(Self {
-            tenant_id,
-            timeline_id,
-            auth_token,
-            shards: ArcSwap::new(Arc::new(shards)),
-            retry: Retry,
-        })
-    }
-
-    /// Updates the shards from the given shard spec. In-flight requests will complete using the
-    /// existing shards, but may retry with the new shards if they fail.
-    ///
-    /// TODO: verify that in-flight requests are allowed to complete, and that the old pools are
-    /// properly spun down and dropped afterwards.
-    pub fn update_shards(&self, shard_spec: ShardSpec) -> anyhow::Result<()> {
-        let shards = Shards::new(
-            self.tenant_id,
-            self.timeline_id,
-            shard_spec,
-            self.auth_token.clone(),
-        )?;
-        self.shards.store(Arc::new(shards));
-        Ok(())
-    }
-
-    /// Returns whether a relation exists.
-    #[instrument(skip_all, fields(rel=%req.rel, lsn=%req.read_lsn))]
-    pub async fn check_rel_exists(
-        &self,
-        req: page_api::CheckRelExistsRequest,
-    ) -> tonic::Result<page_api::CheckRelExistsResponse> {
-        self.retry
-            .with(async || {
-                // Relation metadata is only available on shard 0.
-                let mut client = self.shards.load_full().get_zero().client().await?;
-                client.check_rel_exists(req).await
-            })
-            .await
-    }
-
-    /// Returns the total size of a database, as # of bytes.
-    #[instrument(skip_all, fields(db_oid=%req.db_oid, lsn=%req.read_lsn))]
-    pub async fn get_db_size(
-        &self,
-        req: page_api::GetDbSizeRequest,
-    ) -> tonic::Result<page_api::GetDbSizeResponse> {
-        self.retry
-            .with(async || {
-                // Relation metadata is only available on shard 0.
-                let mut client = self.shards.load_full().get_zero().client().await?;
-                client.get_db_size(req).await
-            })
-            .await
-    }
-
-    /// Fetches pages. The `request_id` must be unique across all in-flight requests. Automatically
-    /// splits requests that straddle shard boundaries, and assembles the responses.
-    ///
-    /// Unlike `page_api::Client`, this automatically converts `status_code` into `tonic::Status`
-    /// errors. All responses will have `GetPageStatusCode::Ok`.
-    #[instrument(skip_all, fields(
-        req_id = %req.request_id,
-        class = %req.request_class,
-        rel = %req.rel,
-        blkno = %req.block_numbers[0],
-        blks = %req.block_numbers.len(),
-        lsn = %req.read_lsn,
-    ))]
-    pub async fn get_page(
-        &self,
-        req: page_api::GetPageRequest,
-    ) -> tonic::Result<page_api::GetPageResponse> {
-        // Make sure we have at least one page.
-        if req.block_numbers.is_empty() {
-            return Err(tonic::Status::invalid_argument("no block number"));
-        }
-
-        // The shards may change while we're fetching pages. We execute the request using a stable
-        // view of the shards (especially important for requests that span shards), but retry the
-        // top-level (pre-split) request to pick up shard changes. This can lead to unnecessary
-        // retries and re-splits in some cases where requests span shards, but these are expected to
-        // be rare.
-        //
-        // TODO: the gRPC server and client doesn't yet properly support shard splits. Revisit this
-        // once we figure out how to handle these.
-        self.retry
-            .with(async || Self::get_page_with_shards(req.clone(), &self.shards.load_full()).await)
-            .await
-    }
-
-    /// Fetches pages using the given shards. This uses a stable view of the shards, regardless of
-    /// concurrent shard updates. Does not retry internally, but is retried by `get_page()`.
-    async fn get_page_with_shards(
-        req: page_api::GetPageRequest,
-        shards: &Shards,
-    ) -> tonic::Result<page_api::GetPageResponse> {
-        // Fast path: request is for a single shard.
-        if let Some(shard_id) =
-            GetPageSplitter::is_single_shard(&req, shards.count, shards.stripe_size)
-        {
-            return Self::get_page_with_shard(req, shards.get(shard_id)?).await;
-        }
-
-        // Request spans multiple shards. Split it, dispatch concurrent per-shard requests, and
-        // reassemble the responses.
-        let mut splitter = GetPageSplitter::split(req, shards.count, shards.stripe_size);
-
-        let mut shard_requests = FuturesUnordered::new();
-        for (shard_id, shard_req) in splitter.drain_requests() {
-            let future = Self::get_page_with_shard(shard_req, shards.get(shard_id)?)
-                .map(move |result| result.map(|resp| (shard_id, resp)));
-            shard_requests.push(future);
-        }
-
-        while let Some((shard_id, shard_response)) = shard_requests.next().await.transpose()? {
-            splitter.add_response(shard_id, shard_response);
-        }
-
-        splitter.assemble_response()
-    }
-
-    /// Fetches pages on the given shard. Does not retry internally.
-    async fn get_page_with_shard(
-        req: page_api::GetPageRequest,
-        shard: &Shard,
-    ) -> tonic::Result<page_api::GetPageResponse> {
-        let expected = req.block_numbers.len();
-        let stream = shard.stream(req.request_class.is_bulk()).await;
-        let resp = stream.send(req).await?;
-
-        // Convert per-request errors into a tonic::Status.
-        if resp.status_code != page_api::GetPageStatusCode::Ok {
-            return Err(tonic::Status::new(
-                resp.status_code.into(),
-                resp.reason.unwrap_or_else(|| String::from("unknown error")),
-            ));
-        }
-
-        // Check that we received the expected number of pages.
-        let actual = resp.page_images.len();
-        if expected != actual {
-            return Err(tonic::Status::data_loss(format!(
-                "expected {expected} pages, got {actual}",
-            )));
-        }
-
-        Ok(resp)
-    }
-
-    /// Returns the size of a relation, as # of blocks.
-    #[instrument(skip_all, fields(rel=%req.rel, lsn=%req.read_lsn))]
-    pub async fn get_rel_size(
-        &self,
-        req: page_api::GetRelSizeRequest,
-    ) -> tonic::Result<page_api::GetRelSizeResponse> {
-        self.retry
-            .with(async || {
-                // Relation metadata is only available on shard 0.
-                let mut client = self.shards.load_full().get_zero().client().await?;
-                client.get_rel_size(req).await
-            })
-            .await
-    }
-
-    /// Fetches an SLRU segment.
-    #[instrument(skip_all, fields(kind=%req.kind, segno=%req.segno, lsn=%req.read_lsn))]
-    pub async fn get_slru_segment(
-        &self,
-        req: page_api::GetSlruSegmentRequest,
-    ) -> tonic::Result<page_api::GetSlruSegmentResponse> {
-        self.retry
-            .with(async || {
-                // SLRU segments are only available on shard 0.
-                let mut client = self.shards.load_full().get_zero().client().await?;
-                client.get_slru_segment(req).await
-            })
-            .await
-    }
-}
-
-/// Shard specification for a PageserverClient.
-pub struct ShardSpec {
-    /// Maps shard indices to gRPC URLs.
-    ///
-    /// INVARIANT: every shard 0..count is present, and shard 0 is always present.
-    /// INVARIANT: every URL is valid and uses grpc:// scheme.
-    urls: HashMap<ShardIndex, String>,
-    /// The shard count.
-    ///
-    /// NB: this is 0 for unsharded tenants, following `ShardIndex::unsharded()` convention.
-    count: ShardCount,
-    /// The stripe size for these shards.
-    stripe_size: ShardStripeSize,
-}
-
-impl ShardSpec {
-    /// Creates a new shard spec with the given URLs and stripe size. All shards must be given.
-    /// The stripe size may be omitted for unsharded tenants.
-    pub fn new(
-        urls: HashMap<ShardIndex, String>,
-        stripe_size: Option<ShardStripeSize>,
-    ) -> anyhow::Result<Self> {
-        // Compute the shard count.
-        let count = match urls.len() {
-            0 => return Err(anyhow!("no shards provided")),
-            1 => ShardCount::new(0), // NB: unsharded tenants use 0, like `ShardIndex::unsharded()`
-            n if n > u8::MAX as usize => return Err(anyhow!("too many shards: {n}")),
-            n => ShardCount::new(n as u8),
-        };
-
-        // Determine the stripe size. It doesn't matter for unsharded tenants.
-        if stripe_size.is_none() && !count.is_unsharded() {
-            return Err(anyhow!("stripe size must be given for sharded tenants"));
-        }
-        let stripe_size = stripe_size.unwrap_or_default();
-
-        // Validate the shard spec.
-        for (shard_id, url) in &urls {
-            // The shard index must match the computed shard count, even for unsharded tenants.
-            if shard_id.shard_count != count {
-                return Err(anyhow!("invalid shard index {shard_id}, expected {count}"));
-            }
-            // The shard index' number and count must be consistent.
-            if !shard_id.is_unsharded() && shard_id.shard_number.0 >= shard_id.shard_count.0 {
-                return Err(anyhow!("invalid shard index {shard_id}"));
-            }
-            // The above conditions guarantee that we have all shards 0..count: len() matches count,
-            // shard number < count, and numbers are unique (via hashmap).
-
-            // Validate the URL.
-            if PageserverProtocol::from_connstring(url)? != PageserverProtocol::Grpc {
-                return Err(anyhow!("invalid shard URL {url}: must use gRPC"));
-            }
-        }
-
-        Ok(Self {
-            urls,
-            count,
-            stripe_size,
-        })
-    }
-}
-
-/// Tracks the tenant's shards.
-struct Shards {
-    /// Shards by shard index.
-    ///
-    /// INVARIANT: every shard 0..count is present.
-    /// INVARIANT: shard 0 is always present.
-    by_index: HashMap<ShardIndex, Shard>,
-    /// The shard count.
-    ///
-    /// NB: this is 0 for unsharded tenants, following `ShardIndex::unsharded()` convention.
-    count: ShardCount,
-    /// The stripe size. Only used for sharded tenants.
-    stripe_size: ShardStripeSize,
-}
-
-impl Shards {
-    /// Creates a new set of shards based on a shard spec.
-    fn new(
-        tenant_id: TenantId,
-        timeline_id: TimelineId,
-        shard_spec: ShardSpec,
-        auth_token: Option<String>,
-    ) -> anyhow::Result<Self> {
-        // NB: the shard spec has already been validated when constructed.
-        let mut shards = HashMap::with_capacity(shard_spec.urls.len());
-        for (shard_id, url) in shard_spec.urls {
-            shards.insert(
-                shard_id,
-                Shard::new(url, tenant_id, timeline_id, shard_id, auth_token.clone())?,
-            );
-        }
-
-        Ok(Self {
-            by_index: shards,
-            count: shard_spec.count,
-            stripe_size: shard_spec.stripe_size,
-        })
-    }
-
-    /// Looks up the given shard.
-    #[allow(clippy::result_large_err)] // TODO: check perf impact
-    fn get(&self, shard_id: ShardIndex) -> tonic::Result<&Shard> {
-        self.by_index
-            .get(&shard_id)
-            .ok_or_else(|| tonic::Status::not_found(format!("unknown shard {shard_id}")))
-    }
-
-    /// Returns shard 0.
-    fn get_zero(&self) -> &Shard {
-        self.get(ShardIndex::new(ShardNumber(0), self.count))
-            .expect("always present")
-    }
-}
-
-/// A single shard. Uses dedicated resource pools with the following structure:
-///
-/// * Channel pool: unbounded.
-///   * Unary client pool: MAX_UNARY_CLIENTS.
-///   * Stream client pool: unbounded.
-///     * Stream pool: MAX_STREAMS and MAX_STREAM_QUEUE_DEPTH.
-/// * Bulk channel pool: unbounded.
-///   * Bulk client pool: unbounded.
-///     * Bulk stream pool: MAX_BULK_STREAMS and MAX_BULK_STREAM_QUEUE_DEPTH.
-struct Shard {
-    /// Unary gRPC client pool.
-    client_pool: Arc<ClientPool>,
-    /// GetPage stream pool.
-    stream_pool: Arc<StreamPool>,
-    /// GetPage stream pool for bulk requests, e.g. prefetches.
-    bulk_stream_pool: Arc<StreamPool>,
-}
-
-impl Shard {
-    /// Creates a new shard. It has its own dedicated resource pools.
-    fn new(
-        url: String,
-        tenant_id: TenantId,
-        timeline_id: TimelineId,
-        shard_id: ShardIndex,
-        auth_token: Option<String>,
-    ) -> anyhow::Result<Self> {
-        // Common channel pool for unary and stream requests. Bounded by client/stream pools.
-        let channel_pool = ChannelPool::new(url.clone(), MAX_CLIENTS_PER_CHANNEL)?;
-
-        // Client pool for unary requests.
-        let client_pool = ClientPool::new(
-            channel_pool.clone(),
-            tenant_id,
-            timeline_id,
-            shard_id,
-            auth_token.clone(),
-            Some(MAX_UNARY_CLIENTS),
-        );
-
-        // GetPage stream pool. Uses a dedicated client pool to avoid starving out unary clients,
-        // but shares a channel pool with it (as it's unbounded).
-        let stream_pool = StreamPool::new(
-            ClientPool::new(
-                channel_pool.clone(),
-                tenant_id,
-                timeline_id,
-                shard_id,
-                auth_token.clone(),
-                None, // unbounded, limited by stream pool
-            ),
-            Some(MAX_STREAMS),
-            MAX_STREAM_QUEUE_DEPTH,
-        );
-
-        // Bulk GetPage stream pool, e.g. for prefetches. Uses dedicated channel/client/stream pools
-        // to avoid head-of-line blocking of latency-sensitive requests.
-        let bulk_stream_pool = StreamPool::new(
-            ClientPool::new(
-                ChannelPool::new(url, MAX_CLIENTS_PER_CHANNEL)?,
-                tenant_id,
-                timeline_id,
-                shard_id,
-                auth_token,
-                None, // unbounded, limited by stream pool
-            ),
-            Some(MAX_BULK_STREAMS),
-            MAX_BULK_STREAM_QUEUE_DEPTH,
-        );
-
-        Ok(Self {
-            client_pool,
-            stream_pool,
-            bulk_stream_pool,
-        })
-    }
-
-    /// Returns a pooled client for this shard.
-    async fn client(&self) -> tonic::Result<ClientGuard> {
-        self.client_pool
-            .get()
-            .await
-            .map_err(|err| tonic::Status::internal(format!("failed to get client: {err}")))
-    }
-
-    /// Returns a pooled stream for this shard. If `bulk` is `true`, uses the dedicated bulk stream
-    /// pool (e.g. for prefetches).
-    async fn stream(&self, bulk: bool) -> StreamGuard {
-        match bulk {
-            false => self.stream_pool.get().await,
-            true => self.bulk_stream_pool.get().await,
-        }
-    }
-}
--- a/pageserver/client_grpc/src/lib.rs
+++ b/pageserver/client_grpc/src/lib.rs
@@ -1,6 +0,0 @@
-mod client;
-mod pool;
-mod retry;
-mod split;
-
-pub use client::{PageserverClient, ShardSpec};
--- a/pageserver/client_grpc/src/pool.rs
+++ b/pageserver/client_grpc/src/pool.rs
@@ -1,755 +0,0 @@
-//! This module provides various Pageserver gRPC client resource pools.
-//!
-//! These pools are designed to reuse gRPC resources (connections, clients, and streams) across
-//! multiple concurrent callers (i.e. Postgres backends). This avoids the resource cost and latency
-//! of creating dedicated TCP connections and server tasks for every Postgres backend.
-//!
-//! Each resource has its own, nested pool. The pools are custom-built for the properties of each
-//! resource -- they are different enough that a generic pool isn't suitable.
-//!
-//! * ChannelPool: manages gRPC channels (TCP connections) to a single Pageserver. Multiple clients
-//!   can acquire and use the same channel concurrently (via HTTP/2 stream multiplexing), up to a
-//!   per-channel client limit. Channels may be closed when they are no longer used by any clients.
-//!
-//! * ClientPool: manages gRPC clients for a single tenant shard. Each client acquires a (shared)
-//!   channel from the ChannelPool for the client's lifetime. A client can only be acquired by a
-//!   single caller at a time, and is returned to the pool when dropped. Idle clients may be removed
-//!   from the pool after some time, to free up the channel.
-//!
-//! * StreamPool: manages bidirectional gRPC GetPage streams. Each stream acquires a client from the
-//!   ClientPool for the stream's lifetime. Internal streams are not exposed to callers; instead, it
-//!   returns a guard that can be used to send a single request, to properly enforce queue depth and
-//!   route responses. Internally, the pool will reuse or spin up a suitable stream for the request,
-//!   possibly pipelining multiple requests from multiple callers on the same stream (up to some
-//!   queue depth). Idle streams may be removed from the pool after a while to free up the client.
-//!
-//! Each channel corresponds to one TCP connection. Each client unary request and each stream
-//! corresponds to one HTTP/2 stream and server task.
-//!
-//! TODO: error handling (including custom error types).
-//! TODO: observability.
-
-use std::collections::{BTreeMap, HashMap};
-use std::num::NonZero;
-use std::ops::{Deref, DerefMut};
-use std::sync::atomic::{AtomicUsize, Ordering};
-use std::sync::{Arc, Mutex, Weak};
-use std::time::{Duration, Instant};
-
-use futures::StreamExt as _;
-use tokio::sync::mpsc::{Receiver, Sender};
-use tokio::sync::{OwnedSemaphorePermit, Semaphore, mpsc, oneshot};
-use tokio_util::sync::CancellationToken;
-use tonic::transport::{Channel, Endpoint};
-use tracing::{error, warn};
-
-use pageserver_page_api as page_api;
-use utils::id::{TenantId, TimelineId};
-use utils::shard::ShardIndex;
-
-/// Reap channels/clients/streams that have been idle for this long.
-///
-/// TODO: this is per-pool. For nested pools, it can take up to 3x as long for a TCP connection to
-/// be reaped. First, we must wait for an idle stream to be reaped, which marks its client as idle.
-/// Then, we must wait for the idle client to be reaped, which marks its channel as idle. Then, we
-/// must wait for the idle channel to be reaped. Is that a problem? Maybe not, we just have to
-/// account for it when setting the reap threshold. Alternatively, we can immediately reap empty
-/// channels, and/or stream pool clients.
-const REAP_IDLE_THRESHOLD: Duration = match cfg!(any(test, feature = "testing")) {
-    false => Duration::from_secs(180),
-    true => Duration::from_secs(1), // exercise reaping in tests
-};
-
-/// Reap idle resources with this interval.
-const REAP_IDLE_INTERVAL: Duration = match cfg!(any(test, feature = "testing")) {
-    false => Duration::from_secs(10),
-    true => Duration::from_secs(1), // exercise reaping in tests
-};
-
-/// A gRPC channel pool, for a single Pageserver. A channel is shared by many clients (via HTTP/2
-/// stream multiplexing), up to `clients_per_channel` -- a new channel will be spun up beyond this.
-/// The pool does not limit the number of channels, and instead relies on `ClientPool` or
-/// `StreamPool` to limit the number of concurrent clients.
-///
-/// The pool is always wrapped in an outer `Arc`, to allow long-lived guards across tasks/threads.
-///
-/// TODO: consider prewarming a set of channels, to avoid initial connection latency.
-/// TODO: consider adding a circuit breaker for errors and fail fast.
-pub struct ChannelPool {
-    /// Pageserver endpoint to connect to.
-    endpoint: Endpoint,
-    /// Max number of clients per channel. Beyond this, a new channel will be created.
-    max_clients_per_channel: NonZero<usize>,
-    /// Open channels.
-    channels: Mutex<BTreeMap<ChannelID, ChannelEntry>>,
-    /// Reaps idle channels.
-    idle_reaper: Reaper,
-    /// Channel ID generator.
-    next_channel_id: AtomicUsize,
-}
-
-type ChannelID = usize;
-
-struct ChannelEntry {
-    /// The gRPC channel (i.e. TCP connection). Shared by multiple clients.
-    channel: Channel,
-    /// Number of clients using this channel.
-    clients: usize,
-    /// The channel has been idle (no clients) since this time. None if channel is in use.
-    /// INVARIANT: Some if clients == 0, otherwise None.
-    idle_since: Option<Instant>,
-}
-
-impl ChannelPool {
-    /// Creates a new channel pool for the given Pageserver endpoint.
-    pub fn new<E>(endpoint: E, max_clients_per_channel: NonZero<usize>) -> anyhow::Result<Arc<Self>>
-    where
-        E: TryInto<Endpoint> + Send + Sync + 'static,
-        <E as TryInto<Endpoint>>::Error: std::error::Error + Send + Sync,
-    {
-        let pool = Arc::new(Self {
-            endpoint: endpoint.try_into()?,
-            max_clients_per_channel,
-            channels: Mutex::default(),
-            idle_reaper: Reaper::new(REAP_IDLE_THRESHOLD, REAP_IDLE_INTERVAL),
-            next_channel_id: AtomicUsize::default(),
-        });
-        pool.idle_reaper.spawn(&pool);
-        Ok(pool)
-    }
-
-    /// Acquires a gRPC channel for a client. Multiple clients may acquire the same channel.
-    ///
-    /// This never blocks (except for mutex acquisition). The channel is connected lazily on first
-    /// use, and the `ChannelPool` does not have a channel limit. Channels will be re-established
-    /// automatically on failure (TODO: verify).
-    ///
-    /// Callers should not clone the returned channel, and must hold onto the returned guard as long
-    /// as the channel is in use. It is unfortunately not possible to enforce this: the Protobuf
-    /// client requires an owned `Channel` and we don't have access to the channel's internal
-    /// refcount.
-    ///
-    /// This is not performance-sensitive. It is only called when creating a new client, and clients
-    /// are pooled and reused by `ClientPool`. The total number of channels will also be small. O(n)
-    /// performance is therefore okay.
-    pub fn get(self: &Arc<Self>) -> ChannelGuard {
-        let mut channels = self.channels.lock().unwrap();
-
-        // Try to find an existing channel with available capacity. We check entries in BTreeMap
-        // order, to fill up the lower-ordered channels first. The ClientPool also prefers clients
-        // with lower-ordered channel IDs first. This will cluster clients in lower-ordered
-        // channels, and free up higher-ordered channels such that they can be reaped.
-        for (&id, entry) in channels.iter_mut() {
-            assert!(
-                entry.clients <= self.max_clients_per_channel.get(),
-                "channel overflow"
-            );
-            assert_eq!(
-                entry.idle_since.is_some(),
-                entry.clients == 0,
-                "incorrect channel idle state"
-            );
-            if entry.clients < self.max_clients_per_channel.get() {
-                entry.clients += 1;
-                entry.idle_since = None;
-                return ChannelGuard {
-                    pool: Arc::downgrade(self),
-                    id,
-                    channel: Some(entry.channel.clone()),
-                };
-            }
-        }
-
-        // Create a new channel. We connect lazily on first use, such that we don't block here and
-        // other clients can join onto the same channel while it's connecting.
-        let channel = self.endpoint.connect_lazy();
-
-        let id = self.next_channel_id.fetch_add(1, Ordering::Relaxed);
-        let entry = ChannelEntry {
-            channel: channel.clone(),
-            clients: 1, // account for the guard below
-            idle_since: None,
-        };
-        channels.insert(id, entry);
-
-        ChannelGuard {
-            pool: Arc::downgrade(self),
-            id,
-            channel: Some(channel),
-        }
-    }
-}
-
-impl Reapable for ChannelPool {
-    /// Reaps channels that have been idle since before the cutoff.
-    fn reap_idle(&self, cutoff: Instant) {
-        self.channels.lock().unwrap().retain(|_, entry| {
-            let Some(idle_since) = entry.idle_since else {
-                assert_ne!(entry.clients, 0, "empty channel not marked idle");
-                return true;
-            };
-            assert_eq!(entry.clients, 0, "idle channel has clients");
-            idle_since >= cutoff
-        })
-    }
-}
-
-/// Tracks a channel acquired from the pool. The owned inner channel can be obtained with `take()`,
-/// since the gRPC client requires an owned `Channel`.
-pub struct ChannelGuard {
-    pool: Weak<ChannelPool>,
-    id: ChannelID,
-    channel: Option<Channel>,
-}
-
-impl ChannelGuard {
-    /// Returns the inner owned channel. Panics if called more than once. The caller must hold onto
-    /// the guard as long as the channel is in use, and should not clone it.
-    pub fn take(&mut self) -> Channel {
-        self.channel.take().expect("channel already taken")
-    }
-}
-
-/// Returns the channel to the pool.
-impl Drop for ChannelGuard {
-    fn drop(&mut self) {
-        let Some(pool) = self.pool.upgrade() else {
-            return; // pool was dropped
-        };
-
-        let mut channels = pool.channels.lock().unwrap();
-        let entry = channels.get_mut(&self.id).expect("unknown channel");
-        assert!(entry.idle_since.is_none(), "active channel marked idle");
-        assert!(entry.clients > 0, "channel underflow");
-        entry.clients -= 1;
-        if entry.clients == 0 {
-            entry.idle_since = Some(Instant::now()); // mark channel as idle
-        }
-    }
-}
-
-/// A pool of gRPC clients for a single tenant shard. Each client acquires a channel from the inner
-/// `ChannelPool`. A client is only given out to single caller at a time. The pool limits the total
-/// number of concurrent clients to `max_clients` via semaphore.
-///
-/// The pool is always wrapped in an outer `Arc`, to allow long-lived guards across tasks/threads.
-pub struct ClientPool {
-    /// Tenant ID.
-    tenant_id: TenantId,
-    /// Timeline ID.
-    timeline_id: TimelineId,
-    /// Shard ID.
-    shard_id: ShardIndex,
-    /// Authentication token, if any.
-    auth_token: Option<String>,
-    /// Channel pool to acquire channels from.
-    channel_pool: Arc<ChannelPool>,
-    /// Limits the max number of concurrent clients for this pool. None if the pool is unbounded.
-    limiter: Option<Arc<Semaphore>>,
-    /// Idle pooled clients. Acquired clients are removed from here and returned on drop.
-    ///
-    /// The first client in the map will be acquired next. The map is sorted by client ID, which in
-    /// turn is sorted by its channel ID, such that we prefer acquiring idle clients from
-    /// lower-ordered channels. This allows us to free up and reap higher-numbered channels as idle
-    /// clients are reaped.
-    idle: Mutex<BTreeMap<ClientID, ClientEntry>>,
-    /// Reaps idle clients.
-    idle_reaper: Reaper,
-    /// Unique client ID generator.
-    next_client_id: AtomicUsize,
-}
-
-type ClientID = (ChannelID, usize);
-
-struct ClientEntry {
-    /// The pooled gRPC client.
-    client: page_api::Client,
-    /// The channel guard for the channel used by the client.
-    channel_guard: ChannelGuard,
-    /// The client has been idle since this time. All clients in `ClientPool::idle` are idle by
-    /// definition, so this is the time when it was added back to the pool.
-    idle_since: Instant,
-}
-
-impl ClientPool {
-    /// Creates a new client pool for the given tenant shard. Channels are acquired from the given
-    /// `ChannelPool`, which must point to a Pageserver that hosts the tenant shard. Allows up to
-    /// `max_clients` concurrent clients, or unbounded if None.
-    pub fn new(
-        channel_pool: Arc<ChannelPool>,
-        tenant_id: TenantId,
-        timeline_id: TimelineId,
-        shard_id: ShardIndex,
-        auth_token: Option<String>,
-        max_clients: Option<NonZero<usize>>,
-    ) -> Arc<Self> {
-        let pool = Arc::new(Self {
-            tenant_id,
-            timeline_id,
-            shard_id,
-            auth_token,
-            channel_pool,
-            idle: Mutex::default(),
-            idle_reaper: Reaper::new(REAP_IDLE_THRESHOLD, REAP_IDLE_INTERVAL),
-            limiter: max_clients.map(|max| Arc::new(Semaphore::new(max.get()))),
-            next_client_id: AtomicUsize::default(),
-        });
-        pool.idle_reaper.spawn(&pool);
-        pool
-    }
-
-    /// Gets a client from the pool, or creates a new one if necessary. Connections are established
-    /// lazily and do not block, but this call can block if the pool is at `max_clients`. The client
-    /// is returned to the pool when the guard is dropped.
-    ///
-    /// This is moderately performance-sensitive. It is called for every unary request, but these
-    /// establish a new gRPC stream per request so they're already expensive. GetPage requests use
-    /// the `StreamPool` instead.
-    pub async fn get(self: &Arc<Self>) -> anyhow::Result<ClientGuard> {
-        // Acquire a permit if the pool is bounded.
-        let mut permit = None;
-        if let Some(limiter) = self.limiter.clone() {
-            permit = Some(limiter.acquire_owned().await.expect("never closed"));
-        }
-
-        // Fast path: acquire an idle client from the pool.
-        if let Some((id, entry)) = self.idle.lock().unwrap().pop_first() {
-            return Ok(ClientGuard {
-                pool: Arc::downgrade(self),
-                id,
-                client: Some(entry.client),
-                channel_guard: Some(entry.channel_guard),
-                permit,
-            });
-        }
-
-        // Slow path: construct a new client.
-        let mut channel_guard = self.channel_pool.get();
-        let client = page_api::Client::new(
-            channel_guard.take(),
-            self.tenant_id,
-            self.timeline_id,
-            self.shard_id,
-            self.auth_token.clone(),
-            None,
-        )?;
-
-        Ok(ClientGuard {
-            pool: Arc::downgrade(self),
-            id: (
-                channel_guard.id,
-                self.next_client_id.fetch_add(1, Ordering::Relaxed),
-            ),
-            client: Some(client),
-            channel_guard: Some(channel_guard),
-            permit,
-        })
-    }
-}
-
-impl Reapable for ClientPool {
-    /// Reaps clients that have been idle since before the cutoff.
-    fn reap_idle(&self, cutoff: Instant) {
-        self.idle
-            .lock()
-            .unwrap()
-            .retain(|_, entry| entry.idle_since >= cutoff)
-    }
-}
-
-/// A client acquired from the pool. The inner client can be accessed via Deref. The client is
-/// returned to the pool when dropped.
-pub struct ClientGuard {
-    pool: Weak<ClientPool>,
-    id: ClientID,
-    client: Option<page_api::Client>,     // Some until dropped
-    channel_guard: Option<ChannelGuard>,  // Some until dropped
-    permit: Option<OwnedSemaphorePermit>, // None if pool is unbounded
-}
-
-impl Deref for ClientGuard {
-    type Target = page_api::Client;
-
-    fn deref(&self) -> &Self::Target {
-        self.client.as_ref().expect("not dropped")
-    }
-}
-
-impl DerefMut for ClientGuard {
-    fn deref_mut(&mut self) -> &mut Self::Target {
-        self.client.as_mut().expect("not dropped")
-    }
-}
-
-/// Returns the client to the pool.
-impl Drop for ClientGuard {
-    fn drop(&mut self) {
-        let Some(pool) = self.pool.upgrade() else {
-            return; // pool was dropped
-        };
-
-        let entry = ClientEntry {
-            client: self.client.take().expect("dropped once"),
-            channel_guard: self.channel_guard.take().expect("dropped once"),
-            idle_since: Instant::now(),
-        };
-        pool.idle.lock().unwrap().insert(self.id, entry);
-
-        _ = self.permit; // returned on drop, referenced for visibility
-    }
-}
-
-/// A pool of bidirectional gRPC streams. Currently only used for GetPage streams. Each stream
-/// acquires a client from the inner `ClientPool` for the stream's lifetime.
-///
-/// Individual streams are not exposed to callers -- instead, the returned guard can be used to send
-/// a single request and await the response. Internally, requests are multiplexed across streams and
-/// channels. This allows proper queue depth enforcement and response routing.
-///
-/// TODO: consider making this generic over request and response types; not currently needed.
-pub struct StreamPool {
-    /// The client pool to acquire clients from. Must be unbounded.
-    client_pool: Arc<ClientPool>,
-    /// All pooled streams.
-    ///
-    /// Incoming requests will be sent over an existing stream with available capacity. If all
-    /// streams are full, a new one is spun up and added to the pool (up to `max_streams`). Each
-    /// stream has an associated Tokio task that processes requests and responses.
-    streams: Mutex<HashMap<StreamID, StreamEntry>>,
-    /// The max number of concurrent streams, or None if unbounded.
-    max_streams: Option<NonZero<usize>>,
-    /// The max number of concurrent requests per stream.
-    max_queue_depth: NonZero<usize>,
-    /// Limits the max number of concurrent requests, given by `max_streams * max_queue_depth`.
-    /// None if the pool is unbounded.
-    limiter: Option<Arc<Semaphore>>,
-    /// Reaps idle streams.
-    idle_reaper: Reaper,
-    /// Stream ID generator.
-    next_stream_id: AtomicUsize,
-}
-
-type StreamID = usize;
-type RequestSender = Sender<(page_api::GetPageRequest, ResponseSender)>;
-type RequestReceiver = Receiver<(page_api::GetPageRequest, ResponseSender)>;
-type ResponseSender = oneshot::Sender<tonic::Result<page_api::GetPageResponse>>;
-
-struct StreamEntry {
-    /// Sends caller requests to the stream task. The stream task exits when this is dropped.
-    sender: RequestSender,
-    /// Number of in-flight requests on this stream.
-    queue_depth: usize,
-    /// The time when this stream went idle (queue_depth == 0).
-    /// INVARIANT: Some if queue_depth == 0, otherwise None.
-    idle_since: Option<Instant>,
-}
-
-impl StreamPool {
-    /// Creates a new stream pool, using the given client pool. It will send up to `max_queue_depth`
-    /// concurrent requests on each stream, and use up to `max_streams` concurrent streams.
-    ///
-    /// The client pool must be unbounded. The stream pool will enforce its own limits, and because
-    /// streams are long-lived they can cause persistent starvation if they exhaust the client pool.
-    /// The stream pool should generally have its own dedicated client pool (but it can share a
-    /// channel pool with others since these are always unbounded).
-    pub fn new(
-        client_pool: Arc<ClientPool>,
-        max_streams: Option<NonZero<usize>>,
-        max_queue_depth: NonZero<usize>,
-    ) -> Arc<Self> {
-        assert!(client_pool.limiter.is_none(), "bounded client pool");
-        let pool = Arc::new(Self {
-            client_pool,
-            streams: Mutex::default(),
-            limiter: max_streams.map(|max_streams| {
-                Arc::new(Semaphore::new(max_streams.get() * max_queue_depth.get()))
-            }),
-            max_streams,
-            max_queue_depth,
-            idle_reaper: Reaper::new(REAP_IDLE_THRESHOLD, REAP_IDLE_INTERVAL),
-            next_stream_id: AtomicUsize::default(),
-        });
-        pool.idle_reaper.spawn(&pool);
-        pool
-    }
-
-    /// Acquires an available stream from the pool, or spins up a new stream async if all streams
-    /// are full. Returns a guard that can be used to send a single request on the stream and await
-    /// the response, with queue depth quota already acquired. Blocks if the pool is at capacity
-    /// (i.e. `CLIENT_LIMIT * STREAM_QUEUE_DEPTH` requests in flight).
-    ///
-    /// This is very performance-sensitive, as it is on the GetPage hot path.
-    ///
-    /// TODO: this must do something more sophisticated for performance. We want:
-    ///
-    /// * Cheap, concurrent access in the common case where we can use a pooled stream.
-    /// * Quick acquisition of pooled streams with available capacity.
-    /// * Prefer streams that belong to lower-numbered channels, to reap idle channels.
-    /// * Prefer filling up existing streams' queue depth before spinning up new streams.
-    /// * Don't hold a lock while spinning up new streams.
-    /// * Allow concurrent clients to join onto streams while they're spun up.
-    /// * Allow spinning up multiple streams concurrently, but don't overshoot limits.
-    ///
-    /// For now, we just do something simple but inefficient (linear scan under mutex).
-    pub async fn get(self: &Arc<Self>) -> StreamGuard {
-        // Acquire a permit if the pool is bounded.
-        let mut permit = None;
-        if let Some(limiter) = self.limiter.clone() {
-            permit = Some(limiter.acquire_owned().await.expect("never closed"));
-        }
-        let mut streams = self.streams.lock().unwrap();
-
-        // Look for a pooled stream with available capacity.
-        for (&id, entry) in streams.iter_mut() {
-            assert!(
-                entry.queue_depth <= self.max_queue_depth.get(),
-                "stream queue overflow"
-            );
-            assert_eq!(
-                entry.idle_since.is_some(),
-                entry.queue_depth == 0,
-                "incorrect stream idle state"
-            );
-            if entry.queue_depth < self.max_queue_depth.get() {
-                entry.queue_depth += 1;
-                entry.idle_since = None;
-                return StreamGuard {
-                    pool: Arc::downgrade(self),
-                    id,
-                    sender: entry.sender.clone(),
-                    permit,
-                };
-            }
-        }
-
-        // No available stream, spin up a new one. We install the stream entry in the pool first and
-        // return the guard, while spinning up the stream task async. This allows other callers to
-        // join onto this stream and also create additional streams concurrently if this fills up.
-        let id = self.next_stream_id.fetch_add(1, Ordering::Relaxed);
-        let (req_tx, req_rx) = mpsc::channel(self.max_queue_depth.get());
-        let entry = StreamEntry {
-            sender: req_tx.clone(),
-            queue_depth: 1, // reserve quota for this caller
-            idle_since: None,
-        };
-        streams.insert(id, entry);
-
-        if let Some(max_streams) = self.max_streams {
-            assert!(streams.len() <= max_streams.get(), "stream overflow");
-        };
-
-        let client_pool = self.client_pool.clone();
-        let pool = Arc::downgrade(self);
-
-        tokio::spawn(async move {
-            if let Err(err) = Self::run_stream(client_pool, req_rx).await {
-                error!("stream failed: {err}");
-            }
-            // Remove stream from pool on exit. Weak reference to avoid holding the pool alive.
-            if let Some(pool) = pool.upgrade() {
-                let entry = pool.streams.lock().unwrap().remove(&id);
-                assert!(entry.is_some(), "unknown stream ID: {id}");
-            }
-        });
-
-        StreamGuard {
-            pool: Arc::downgrade(self),
-            id,
-            sender: req_tx,
-            permit,
-        }
-    }
-
-    /// Runs a stream task. This acquires a client from the `ClientPool` and establishes a
-    /// bidirectional GetPage stream, then forwards requests and responses between callers and the
-    /// stream. It does not track or enforce queue depths -- that's done by `get()` since it must be
-    /// atomic with pool stream acquisition.
-    ///
-    /// The task exits when the request channel is closed, or on a stream error. The caller is
-    /// responsible for removing the stream from the pool on exit.
-    async fn run_stream(
-        client_pool: Arc<ClientPool>,
-        mut caller_rx: RequestReceiver,
-    ) -> anyhow::Result<()> {
-        // Acquire a client from the pool and create a stream.
-        let mut client = client_pool.get().await?;
-
-        let (req_tx, req_rx) = mpsc::channel(1);
-        let req_stream = tokio_stream::wrappers::ReceiverStream::new(req_rx);
-        let mut resp_stream = client.get_pages(req_stream).await?;
-
-        // Track caller response channels by request ID. If the task returns early, these response
-        // channels will be dropped and the waiting callers will receive an error.
-        let mut callers = HashMap::new();
-
-        // Process requests and responses.
-        loop {
-            tokio::select! {
-                // Receive requests from callers and send them to the stream.
-                req = caller_rx.recv() => {
-                    // Shut down if request channel is closed.
-                    let Some((req, resp_tx)) = req else {
-                        return Ok(());
-                    };
-
-                    // Store the response channel by request ID.
-                    if callers.contains_key(&req.request_id) {
-                        // Error on request ID duplicates. Ignore callers that went away.
-                        _ = resp_tx.send(Err(tonic::Status::invalid_argument(
-                            format!("duplicate request ID: {}", req.request_id),
-                        )));
-                        continue;
-                    }
-                    callers.insert(req.request_id, resp_tx);
-
-                    // Send the request on the stream. Bail out if the send fails.
-                    req_tx.send(req).await.map_err(|_| {
-                        tonic::Status::unavailable("stream closed")
-                    })?;
-                }
-
-                // Receive responses from the stream and send them to callers.
-                resp = resp_stream.next() => {
-                    // Shut down if the stream is closed, and bail out on stream errors.
-                    let Some(resp) = resp.transpose()? else {
-                        return Ok(())
-                    };
-
-                    // Send the response to the caller. Ignore errors if the caller went away.
-                    let Some(resp_tx) = callers.remove(&resp.request_id) else {
-                        warn!("received response for unknown request ID: {}", resp.request_id);
-                        continue;
-                    };
-                    _ = resp_tx.send(Ok(resp));
-                }
-            }
-        }
-    }
-}
-
-impl Reapable for StreamPool {
-    /// Reaps streams that have been idle since before the cutoff.
-    fn reap_idle(&self, cutoff: Instant) {
-        self.streams.lock().unwrap().retain(|_, entry| {
-            let Some(idle_since) = entry.idle_since else {
-                assert_ne!(entry.queue_depth, 0, "empty stream not marked idle");
-                return true;
-            };
-            assert_eq!(entry.queue_depth, 0, "idle stream has requests");
-            idle_since >= cutoff
-        });
-    }
-}
-
-/// A pooled stream reference. Can be used to send a single request, to properly enforce queue
-/// depth. Queue depth is already reserved and will be returned on drop.
-pub struct StreamGuard {
-    pool: Weak<StreamPool>,
-    id: StreamID,
-    sender: RequestSender,
-    permit: Option<OwnedSemaphorePermit>, // None if pool is unbounded
-}
-
-impl StreamGuard {
-    /// Sends a request on the stream and awaits the response. Consumes the guard, since it's only
-    /// valid for a single request (to enforce queue depth). This also drops the guard on return and
-    /// returns the queue depth quota to the pool.
-    ///
-    /// The `GetPageRequest::request_id` must be unique across in-flight requests.
-    ///
-    /// NB: errors are often returned as `GetPageResponse::status_code` instead of `tonic::Status`
-    /// to avoid tearing down the stream for per-request errors. Callers must check this.
-    pub async fn send(
-        self,
-        req: page_api::GetPageRequest,
-    ) -> tonic::Result<page_api::GetPageResponse> {
-        let (resp_tx, resp_rx) = oneshot::channel();
-
-        self.sender
-            .send((req, resp_tx))
-            .await
-            .map_err(|_| tonic::Status::unavailable("stream closed"))?;
-
-        resp_rx
-            .await
-            .map_err(|_| tonic::Status::unavailable("stream closed"))?
-    }
-}
-
-impl Drop for StreamGuard {
-    fn drop(&mut self) {
-        let Some(pool) = self.pool.upgrade() else {
-            return; // pool was dropped
-        };
-
-        // Release the queue depth reservation on drop. This can prematurely decrement it if dropped
-        // before the response is received, but that's okay.
-        let mut streams = pool.streams.lock().unwrap();
-        let entry = streams.get_mut(&self.id).expect("unknown stream");
-        assert!(entry.idle_since.is_none(), "active stream marked idle");
-        assert!(entry.queue_depth > 0, "stream queue underflow");
-        entry.queue_depth -= 1;
-        if entry.queue_depth == 0 {
-            entry.idle_since = Some(Instant::now()); // mark stream as idle
-        }
-
-        _ = self.permit; // returned on drop, referenced for visibility
-    }
-}
-
-/// Periodically reaps idle resources from a pool.
-struct Reaper {
-    /// The task check interval.
-    interval: Duration,
-    /// The threshold for reaping idle resources.
-    threshold: Duration,
-    /// Cancels the reaper task. Cancelled when the reaper is dropped.
-    cancel: CancellationToken,
-}
-
-impl Reaper {
-    /// Creates a new reaper.
-    pub fn new(threshold: Duration, interval: Duration) -> Self {
-        Self {
-            cancel: CancellationToken::new(),
-            threshold,
-            interval,
-        }
-    }
-
-    /// Spawns a task to periodically reap idle resources from the given task pool. The task is
-    /// cancelled when the reaper is dropped.
-    pub fn spawn(&self, pool: &Arc<impl Reapable>) {
-        // NB: hold a weak pool reference, otherwise the task will prevent dropping the pool.
-        let pool = Arc::downgrade(pool);
-        let cancel = self.cancel.clone();
-        let (interval, threshold) = (self.interval, self.threshold);
-
-        tokio::spawn(async move {
-            loop {
-                tokio::select! {
-                    _ = tokio::time::sleep(interval) => {
-                        let Some(pool) = pool.upgrade() else {
-                            return; // pool was dropped
-                        };
-                        pool.reap_idle(Instant::now() - threshold);
-                    }
-
-                    _ = cancel.cancelled() => return,
-                }
-            }
-        });
-    }
-}
-
-impl Drop for Reaper {
-    fn drop(&mut self) {
-        self.cancel.cancel(); // cancel reaper task
-    }
-}
-
-/// A reapable resource pool.
-trait Reapable: Send + Sync + 'static {
-    /// Reaps resources that have been idle since before the given cutoff.
-    fn reap_idle(&self, cutoff: Instant);
-}
--- a/pageserver/client_grpc/src/retry.rs
+++ b/pageserver/client_grpc/src/retry.rs
@@ -1,152 +0,0 @@
-use std::time::Duration;
-
-use tokio::time::Instant;
-use tracing::{error, info, warn};
-
-use utils::backoff::exponential_backoff_duration;
-
-/// A retry handler for Pageserver gRPC requests.
-///
-/// This is used instead of backoff::retry for better control and observability.
-#[derive(Clone, Copy)]
-pub struct Retry;
-
-impl Retry {
-    /// The per-request timeout.
-    // TODO: tune these, and/or make them configurable. Should we retry forever?
-    const REQUEST_TIMEOUT: Duration = Duration::from_secs(10);
-    /// The total timeout across all attempts
-    const TOTAL_TIMEOUT: Duration = Duration::from_secs(60);
-    /// The initial backoff duration.
-    const BASE_BACKOFF: Duration = Duration::from_millis(10);
-    /// The maximum backoff duration.
-    const MAX_BACKOFF: Duration = Duration::from_secs(10);
-    /// If true, log successful requests. For debugging.
-    const LOG_SUCCESS: bool = false;
-
-    /// Runs the given async closure with timeouts and retries (exponential backoff). Logs errors,
-    /// using the current tracing span for context.
-    ///
-    /// Only certain gRPC status codes are retried, see [`Self::should_retry`]. For default
-    /// timeouts, see [`Self::REQUEST_TIMEOUT`] and [`Self::TOTAL_TIMEOUT`].
-    pub async fn with<T, F, O>(&self, mut f: F) -> tonic::Result<T>
-    where
-        F: FnMut() -> O,
-        O: Future<Output = tonic::Result<T>>,
-    {
-        let started = Instant::now();
-        let deadline = started + Self::TOTAL_TIMEOUT;
-        let mut last_error = None;
-        let mut retries = 0;
-        loop {
-            // Set up a future to wait for the backoff (if any) and run the request with a timeout.
-            let backoff_and_try = async {
-                // NB: sleep() always sleeps 1ms, even when given a 0 argument. See:
-                // https://github.com/tokio-rs/tokio/issues/6866
-                if let Some(backoff) = Self::backoff_duration(retries) {
-                    tokio::time::sleep(backoff).await;
-                }
-
-                let request_started = Instant::now();
-                tokio::time::timeout(Self::REQUEST_TIMEOUT, f())
-                    .await
-                    .map_err(|_| {
-                        tonic::Status::deadline_exceeded(format!(
-                            "request timed out after {:.3}s",
-                            request_started.elapsed().as_secs_f64()
-                        ))
-                    })?
-            };
-
-            // Wait for the backoff and request, or bail out if the total timeout is exceeded.
-            let result = tokio::select! {
-                result = backoff_and_try => result,
-
-                _ = tokio::time::sleep_until(deadline) => {
-                    let last_error = last_error.unwrap_or_else(|| {
-                        tonic::Status::deadline_exceeded(format!(
-                            "request timed out after {:.3}s",
-                            started.elapsed().as_secs_f64()
-                        ))
-                    });
-                    error!(
-                        "giving up after {:.3}s and {retries} retries, last error {:?}: {}",
-                        started.elapsed().as_secs_f64(), last_error.code(), last_error.message(),
-                    );
-                    return Err(last_error);
-                }
-            };
-
-            match result {
-                // Success, return the result.
-                Ok(result) => {
-                    if retries > 0 || Self::LOG_SUCCESS {
-                        info!(
-                            "request succeeded after {retries} retries in {:.3}s",
-                            started.elapsed().as_secs_f64(),
-                        );
-                    }
-
-                    return Ok(result);
-                }
-
-                // Error, retry or bail out.
-                Err(status) => {
-                    let (code, message) = (status.code(), status.message());
-                    let attempt = retries + 1;
-
-                    if !Self::should_retry(code) {
-                        // NB: include the attempt here too. This isn't necessarily the first
-                        // attempt, because the error may change between attempts.
-                        error!(
-                            "request failed with {code:?}: {message}, not retrying (attempt {attempt})"
-                        );
-                        return Err(status);
-                    }
-
-                    warn!("request failed with {code:?}: {message}, retrying (attempt {attempt})");
-
-                    retries += 1;
-                    last_error = Some(status);
-                }
-            }
-        }
-    }
-
-    /// Returns the backoff duration for the given retry attempt, or None for no backoff.
-    fn backoff_duration(retry: usize) -> Option<Duration> {
-        let backoff = exponential_backoff_duration(
-            retry as u32,
-            Self::BASE_BACKOFF.as_secs_f64(),
-            Self::MAX_BACKOFF.as_secs_f64(),
-        );
-        (!backoff.is_zero()).then_some(backoff)
-    }
-
-    /// Returns true if the given status code should be retries.
-    fn should_retry(code: tonic::Code) -> bool {
-        match code {
-            tonic::Code::Ok => panic!("unexpected Ok status code"),
-
-            // These codes are transient, so retry them.
-            tonic::Code::Aborted => true,
-            tonic::Code::Cancelled => true,
-            tonic::Code::DeadlineExceeded => true, // maybe transient slowness
-            tonic::Code::Internal => true,         // maybe transient failure?
-            tonic::Code::ResourceExhausted => true,
-            tonic::Code::Unavailable => true,
-
-            // The following codes will like continue to fail, so don't retry.
-            tonic::Code::AlreadyExists => false,
-            tonic::Code::DataLoss => false,
-            tonic::Code::FailedPrecondition => false,
-            tonic::Code::InvalidArgument => false,
-            tonic::Code::NotFound => false,
-            tonic::Code::OutOfRange => false,
-            tonic::Code::PermissionDenied => false,
-            tonic::Code::Unauthenticated => false,
-            tonic::Code::Unimplemented => false,
-            tonic::Code::Unknown => false,
-        }
-    }
-}
--- a/pageserver/client_grpc/src/split.rs
+++ b/pageserver/client_grpc/src/split.rs
@@ -1,168 +0,0 @@
-use std::collections::HashMap;
-
-use bytes::Bytes;
-
-use pageserver_api::key::rel_block_to_key;
-use pageserver_api::shard::{ShardStripeSize, key_to_shard_number};
-use pageserver_page_api as page_api;
-use utils::shard::{ShardCount, ShardIndex};
-
-/// Splits GetPageRequests that straddle shard boundaries and assembles the responses.
-/// TODO: add tests for this.
-pub struct GetPageSplitter {
-    /// The original request ID. Used for all shard requests.
-    request_id: page_api::RequestID,
-    /// Split requests by shard index.
-    requests: HashMap<ShardIndex, page_api::GetPageRequest>,
-    /// Maps the offset in `GetPageRequest::block_numbers` to the owning shard. Used to assemble
-    /// the response pages in the same order as the original request.
-    block_shards: Vec<ShardIndex>,
-    /// Page responses by shard index. Will be assembled into a single response.
-    responses: HashMap<ShardIndex, Vec<Bytes>>,
-}
-
-impl GetPageSplitter {
-    /// Checks if the given request only touches a single shard, and returns the shard ID. This is
-    /// the common case, so we check first in order to avoid unnecessary allocations and overhead.
-    /// The caller must ensure that the request has at least one block number, or this will panic.
-    pub fn is_single_shard(
-        req: &page_api::GetPageRequest,
-        count: ShardCount,
-        stripe_size: ShardStripeSize,
-    ) -> Option<ShardIndex> {
-        // Fast path: unsharded tenant.
-        if count.is_unsharded() {
-            return Some(ShardIndex::unsharded());
-        }
-
-        // Find the base shard index for the first page, and compare with the rest.
-        let key = rel_block_to_key(req.rel, *req.block_numbers.first().expect("no pages"));
-        let shard_number = key_to_shard_number(count, stripe_size, &key);
-
-        req.block_numbers
-            .iter()
-            .skip(1) // computed above
-            .all(|&blkno| {
-                let key = rel_block_to_key(req.rel, blkno);
-                key_to_shard_number(count, stripe_size, &key) == shard_number
-            })
-            .then_some(ShardIndex::new(shard_number, count))
-    }
-
-    /// Splits the given request.
-    pub fn split(
-        req: page_api::GetPageRequest,
-        count: ShardCount,
-        stripe_size: ShardStripeSize,
-    ) -> Self {
-        // The caller should make sure we don't split requests unnecessarily.
-        debug_assert!(
-            Self::is_single_shard(&req, count, stripe_size).is_none(),
-            "unnecessary request split"
-        );
-
-        // Split the requests by shard index.
-        let mut requests = HashMap::with_capacity(2); // common case
-        let mut block_shards = Vec::with_capacity(req.block_numbers.len());
-        for blkno in req.block_numbers {
-            let key = rel_block_to_key(req.rel, blkno);
-            let shard_number = key_to_shard_number(count, stripe_size, &key);
-            let shard_id = ShardIndex::new(shard_number, count);
-
-            let shard_req = requests
-                .entry(shard_id)
-                .or_insert_with(|| page_api::GetPageRequest {
-                    request_id: req.request_id,
-                    request_class: req.request_class,
-                    rel: req.rel,
-                    read_lsn: req.read_lsn,
-                    block_numbers: Vec::new(),
-                });
-            shard_req.block_numbers.push(blkno);
-            block_shards.push(shard_id);
-        }
-
-        Self {
-            request_id: req.request_id,
-            responses: HashMap::with_capacity(requests.len()),
-            requests,
-            block_shards,
-        }
-    }
-
-    /// Drains the per-shard requests, moving them out of the hashmap to avoid extra allocations.
-    pub fn drain_requests(
-        &mut self,
-    ) -> impl Iterator<Item = (ShardIndex, page_api::GetPageRequest)> {
-        self.requests.drain()
-    }
-
-    /// Adds a response from the given shard. The response must match the request ID and have an OK
-    /// status code. A response must not already exist for the given shard ID.
-    pub fn add_response(&mut self, shard_id: ShardIndex, response: page_api::GetPageResponse) {
-        // NB: this is called below a `Retry::with()`, so unrecoverable errors should not use a
-        // retryable status code (e.g. `Internal`).
-
-        // The caller should already have converted status codes into tonic::Status.
-        assert_eq!(
-            response.status_code,
-            page_api::GetPageStatusCode::Ok,
-            "non-OK response"
-        );
-
-        // The stream pool ensures the response matches the request ID.
-        assert_eq!(response.request_id, self.request_id, "response ID mismatch");
-
-        // Add the response data to the map.
-        let old = self.responses.insert(shard_id, response.page_images);
-
-        // We only dispatch one request per shard.
-        assert!(old.is_none(), "duplicate response for shard {shard_id}");
-    }
-
-    /// Assembles the shard responses into a single response. Responses must be present for all
-    /// relevant shards, and the total number of pages must match the original request.
-    #[allow(clippy::result_large_err)]
-    pub fn assemble_response(self) -> tonic::Result<page_api::GetPageResponse> {
-        // NB: this is called below a `Retry::with()`, so unrecoverable errors should not use a
-        // retryable status code (e.g. `Internal`).
-
-        let mut response = page_api::GetPageResponse {
-            request_id: self.request_id,
-            status_code: page_api::GetPageStatusCode::Ok,
-            reason: None,
-            page_images: Vec::with_capacity(self.block_shards.len()),
-        };
-
-        // Set up per-shard page iterators we can pull from.
-        let mut shard_responses = HashMap::with_capacity(self.responses.len());
-        for (shard_id, responses) in self.responses {
-            shard_responses.insert(shard_id, responses.into_iter());
-        }
-
-        // Reassemble the responses in the same order as the original request.
-        for shard_id in &self.block_shards {
-            let page = shard_responses
-                .get_mut(shard_id)
-                .ok_or_else(|| {
-                    tonic::Status::data_loss(format!("missing response for shard {shard_id}"))
-                })?
-                .next()
-                .ok_or_else(|| {
-                    tonic::Status::data_loss(format!("missing page from shard {shard_id}"))
-                })?;
-            response.page_images.push(page);
-        }
-
-        // Make sure there are no additional pages.
-        for (shard_id, mut pages) in shard_responses {
-            if pages.next().is_some() {
-                return Err(tonic::Status::out_of_range(format!(
-                    "extra pages returned from shard {shard_id}"
-                )));
-            }
-        }
-
-        Ok(response)
-    }
-}
--- a/pageserver/page_api/src/client.rs
+++ b/pageserver/page_api/src/client.rs
@@ -1,153 +1,23 @@
-use anyhow::anyhow;
+use anyhow::Result;
 use futures::{Stream, StreamExt as _, TryStreamExt as _};
 use tokio::io::AsyncRead;
 use tokio_util::io::StreamReader;
-use tonic::codec::CompressionEncoding;
 use tonic::metadata::AsciiMetadataValue;
-use tonic::service::Interceptor;
-use tonic::service::interceptor::InterceptedService;
-use tonic::transport::{Channel, Endpoint};
+use tonic::metadata::errors::InvalidMetadataValue;
+use tonic::transport::Channel;
+use tonic::{Request, Streaming};

-use utils::id::{TenantId, TimelineId};
+use utils::id::TenantId;
+use utils::id::TimelineId;
 use utils::shard::ShardIndex;

-use crate::model::*;
+use crate::model;
 use crate::proto;

-/// A basic Pageserver gRPC client, for a single tenant shard. This API uses native Rust domain
-/// types from `model` rather than generated Protobuf types.
-pub struct Client {
-    inner: proto::PageServiceClient<InterceptedService<Channel, AuthInterceptor>>,
-}
-
-impl Client {
-    /// Connects to the given gRPC endpoint.
-    pub async fn connect<E>(
-        endpoint: E,
-        tenant_id: TenantId,
-        timeline_id: TimelineId,
-        shard_id: ShardIndex,
-        auth_token: Option<String>,
-        compression: Option<CompressionEncoding>,
-    ) -> anyhow::Result<Self>
-    where
-        E: TryInto<Endpoint> + Send + Sync + 'static,
-        <E as TryInto<Endpoint>>::Error: std::error::Error + Send + Sync,
-    {
-        let endpoint: Endpoint = endpoint
-            .try_into()
-            .map_err(|err| anyhow!("invalid endpoint: {err}"))?;
-        let channel = endpoint.connect().await?;
-        Self::new(
-            channel,
-            tenant_id,
-            timeline_id,
-            shard_id,
-            auth_token,
-            compression,
-        )
-    }
-
-    /// Creates a new client using the given gRPC channel.
-    pub fn new(
-        channel: Channel,
-        tenant_id: TenantId,
-        timeline_id: TimelineId,
-        shard_id: ShardIndex,
-        auth_token: Option<String>,
-        compression: Option<CompressionEncoding>,
-    ) -> anyhow::Result<Self> {
-        let auth = AuthInterceptor::new(tenant_id, timeline_id, shard_id, auth_token)?;
-        let mut inner = proto::PageServiceClient::with_interceptor(channel, auth);
-
-        if let Some(compression) = compression {
-            // TODO: benchmark this (including network latency).
-            inner = inner
-                .accept_compressed(compression)
-                .send_compressed(compression);
-        }
-
-        Ok(Self { inner })
-    }
-
-    /// Returns whether a relation exists.
-    pub async fn check_rel_exists(
-        &mut self,
-        req: CheckRelExistsRequest,
-    ) -> tonic::Result<CheckRelExistsResponse> {
-        let req = proto::CheckRelExistsRequest::from(req);
-        let resp = self.inner.check_rel_exists(req).await?.into_inner();
-        Ok(resp.into())
-    }
-
-    /// Fetches a base backup.
-    pub async fn get_base_backup(
-        &mut self,
-        req: GetBaseBackupRequest,
-    ) -> tonic::Result<impl AsyncRead + use<>> {
-        let req = proto::GetBaseBackupRequest::from(req);
-        let chunks = self.inner.get_base_backup(req).await?.into_inner();
-        Ok(StreamReader::new(
-            chunks
-                .map_ok(|resp| resp.chunk)
-                .map_err(std::io::Error::other),
-        ))
-    }
-
-    /// Returns the total size of a database, as # of bytes.
-    pub async fn get_db_size(&mut self, req: GetDbSizeRequest) -> tonic::Result<GetDbSizeResponse> {
-        let req = proto::GetDbSizeRequest::from(req);
-        let resp = self.inner.get_db_size(req).await?.into_inner();
-        Ok(resp.into())
-    }
-
-    /// Fetches pages.
-    ///
-    /// This is implemented as a bidirectional streaming RPC for performance. Per-request errors are
-    /// typically returned as status_code instead of errors, to avoid tearing down the entire stream
-    /// via a tonic::Status error.
-    pub async fn get_pages(
-        &mut self,
-        reqs: impl Stream<Item = GetPageRequest> + Send + 'static,
-    ) -> tonic::Result<impl Stream<Item = tonic::Result<GetPageResponse>> + Send + 'static> {
-        let reqs = reqs.map(proto::GetPageRequest::from);
-        let resps = self.inner.get_pages(reqs).await?.into_inner();
-        Ok(resps.map_ok(GetPageResponse::from))
-    }
-
-    /// Returns the size of a relation, as # of blocks.
-    pub async fn get_rel_size(
-        &mut self,
-        req: GetRelSizeRequest,
-    ) -> tonic::Result<GetRelSizeResponse> {
-        let req = proto::GetRelSizeRequest::from(req);
-        let resp = self.inner.get_rel_size(req).await?.into_inner();
-        Ok(resp.into())
-    }
-
-    /// Fetches an SLRU segment.
-    pub async fn get_slru_segment(
-        &mut self,
-        req: GetSlruSegmentRequest,
-    ) -> tonic::Result<GetSlruSegmentResponse> {
-        let req = proto::GetSlruSegmentRequest::from(req);
-        let resp = self.inner.get_slru_segment(req).await?.into_inner();
-        Ok(resp.try_into()?)
-    }
-
-    /// Acquires or extends a lease on the given LSN. This guarantees that the Pageserver won't
-    /// garbage collect the LSN until the lease expires. Must be acquired on all relevant shards.
-    ///
-    /// Returns the lease expiration time, or a FailedPrecondition status if the lease could not be
-    /// acquired because the LSN has already been garbage collected.
-    pub async fn lease_lsn(&mut self, req: LeaseLsnRequest) -> tonic::Result<LeaseLsnResponse> {
-        let req = proto::LeaseLsnRequest::from(req);
-        let resp = self.inner.lease_lsn(req).await?.into_inner();
-        Ok(resp.try_into()?)
-    }
-}
-
-/// Adds authentication metadata to gRPC requests.
+///
+/// AuthInterceptor adds tenant, timeline, and auth header to the channel. These
+/// headers are required at the pageserver.
+///
 #[derive(Clone)]
 struct AuthInterceptor {
    tenant_id: AsciiMetadataValue,
@@ -160,29 +30,174 @@ impl AuthInterceptor {
    fn new(
        tenant_id: TenantId,
        timeline_id: TimelineId,
-        shard_id: ShardIndex,
        auth_token: Option<String>,
-    ) -> anyhow::Result<Self> {
+        shard_id: ShardIndex,
+    ) -> Result<Self, InvalidMetadataValue> {
+        let tenant_ascii: AsciiMetadataValue = tenant_id.to_string().try_into()?;
+        let timeline_ascii: AsciiMetadataValue = timeline_id.to_string().try_into()?;
+        let shard_ascii: AsciiMetadataValue = shard_id.to_string().try_into()?;
+
+        let auth_header: Option<AsciiMetadataValue> = match auth_token {
+            Some(token) => Some(format!("Bearer {token}").try_into()?),
+            None => None,
+        };
+
        Ok(Self {
-            tenant_id: tenant_id.to_string().try_into()?,
-            timeline_id: timeline_id.to_string().try_into()?,
-            shard_id: shard_id.to_string().try_into()?,
-            auth_header: auth_token
-                .map(|token| format!("Bearer {token}").try_into())
-                .transpose()?,
+            tenant_id: tenant_ascii,
+            shard_id: shard_ascii,
+            timeline_id: timeline_ascii,
+            auth_header,
        })
    }
 }

-impl Interceptor for AuthInterceptor {
-    fn call(&mut self, mut req: tonic::Request<()>) -> tonic::Result<tonic::Request<()>> {
-        let metadata = req.metadata_mut();
-        metadata.insert("neon-tenant-id", self.tenant_id.clone());
-        metadata.insert("neon-timeline-id", self.timeline_id.clone());
-        metadata.insert("neon-shard-id", self.shard_id.clone());
-        if let Some(ref auth_header) = self.auth_header {
-            metadata.insert("authorization", auth_header.clone());
+impl tonic::service::Interceptor for AuthInterceptor {
+    fn call(&mut self, mut req: tonic::Request<()>) -> Result<tonic::Request<()>, tonic::Status> {
+        req.metadata_mut()
+            .insert("neon-tenant-id", self.tenant_id.clone());
+        req.metadata_mut()
+            .insert("neon-shard-id", self.shard_id.clone());
+        req.metadata_mut()
+            .insert("neon-timeline-id", self.timeline_id.clone());
+        if let Some(auth_header) = &self.auth_header {
+            req.metadata_mut()
+                .insert("authorization", auth_header.clone());
        }
        Ok(req)
    }
 }
+
+#[derive(Clone)]
+pub struct Client {
+    client: proto::PageServiceClient<
+        tonic::service::interceptor::InterceptedService<Channel, AuthInterceptor>,
+    >,
+}
+
+impl Client {
+    pub async fn new<T: TryInto<tonic::transport::Endpoint> + Send + Sync + 'static>(
+        into_endpoint: T,
+        tenant_id: TenantId,
+        timeline_id: TimelineId,
+        shard_id: ShardIndex,
+        auth_header: Option<String>,
+        compression: Option<tonic::codec::CompressionEncoding>,
+    ) -> anyhow::Result<Self> {
+        let endpoint: tonic::transport::Endpoint = into_endpoint
+            .try_into()
+            .map_err(|_e| anyhow::anyhow!("failed to convert endpoint"))?;
+        let channel = endpoint.connect().await?;
+        let auth = AuthInterceptor::new(tenant_id, timeline_id, auth_header, shard_id)
+            .map_err(|e| anyhow::anyhow!(e.to_string()))?;
+        let mut client = proto::PageServiceClient::with_interceptor(channel, auth);
+
+        if let Some(compression) = compression {
+            // TODO: benchmark this (including network latency).
+            client = client
+                .accept_compressed(compression)
+                .send_compressed(compression);
+        }
+
+        Ok(Self { client })
+    }
+
+    /// Returns whether a relation exists.
+    pub async fn check_rel_exists(
+        &mut self,
+        req: model::CheckRelExistsRequest,
+    ) -> Result<model::CheckRelExistsResponse, tonic::Status> {
+        let proto_req = proto::CheckRelExistsRequest::from(req);
+
+        let response = self.client.check_rel_exists(proto_req).await?;
+
+        let proto_resp = response.into_inner();
+        Ok(proto_resp.into())
+    }
+
+    /// Fetches a base backup.
+    pub async fn get_base_backup(
+        &mut self,
+        req: model::GetBaseBackupRequest,
+    ) -> Result<impl AsyncRead + use<>, tonic::Status> {
+        let req = proto::GetBaseBackupRequest::from(req);
+        let chunks = self.client.get_base_backup(req).await?.into_inner();
+        let reader = StreamReader::new(
+            chunks
+                .map_ok(|resp| resp.chunk)
+                .map_err(std::io::Error::other),
+        );
+        Ok(reader)
+    }
+
+    /// Returns the total size of a database, as # of bytes.
+    pub async fn get_db_size(
+        &mut self,
+        req: model::GetDbSizeRequest,
+    ) -> Result<u64, tonic::Status> {
+        let proto_req = proto::GetDbSizeRequest::from(req);
+
+        let response = self.client.get_db_size(proto_req).await?;
+        Ok(response.into_inner().into())
+    }
+
+    /// Fetches pages.
+    ///
+    /// This is implemented as a bidirectional streaming RPC for performance.
+    /// Per-request errors are often returned as status_code instead of errors,
+    /// to avoid tearing down the entire stream via tonic::Status.
+    pub async fn get_pages<ReqSt>(
+        &mut self,
+        inbound: ReqSt,
+    ) -> Result<
+        impl Stream<Item = Result<model::GetPageResponse, tonic::Status>> + Send + 'static,
+        tonic::Status,
+    >
+    where
+        ReqSt: Stream<Item = model::GetPageRequest> + Send + 'static,
+    {
+        let outbound_proto = inbound.map(|domain_req| domain_req.into());
+
+        let req_new = Request::new(outbound_proto);
+
+        let response_stream: Streaming<proto::GetPageResponse> =
+            self.client.get_pages(req_new).await?.into_inner();
+
+        let domain_stream = response_stream.map_ok(model::GetPageResponse::from);
+
+        Ok(domain_stream)
+    }
+
+    /// Returns the size of a relation, as # of blocks.
+    pub async fn get_rel_size(
+        &mut self,
+        req: model::GetRelSizeRequest,
+    ) -> Result<model::GetRelSizeResponse, tonic::Status> {
+        let proto_req = proto::GetRelSizeRequest::from(req);
+        let response = self.client.get_rel_size(proto_req).await?;
+        let proto_resp = response.into_inner();
+        Ok(proto_resp.into())
+    }
+
+    /// Fetches an SLRU segment.
+    pub async fn get_slru_segment(
+        &mut self,
+        req: model::GetSlruSegmentRequest,
+    ) -> Result<model::GetSlruSegmentResponse, tonic::Status> {
+        let proto_req = proto::GetSlruSegmentRequest::from(req);
+        let response = self.client.get_slru_segment(proto_req).await?;
+        Ok(response.into_inner().try_into()?)
+    }
+
+    /// Acquires or extends a lease on the given LSN. This guarantees that the Pageserver won't
+    /// garbage collect the LSN until the lease expires. Must be acquired on all relevant shards.
+    ///
+    /// Returns the lease expiration time, or a FailedPrecondition status if the lease could not be
+    /// acquired because the LSN has already been garbage collected.
+    pub async fn lease_lsn(
+        &mut self,
+        req: model::LeaseLsnRequest,
+    ) -> Result<model::LeaseLsnResponse, tonic::Status> {
+        let req = proto::LeaseLsnRequest::from(req);
+        Ok(self.client.lease_lsn(req).await?.into_inner().try_into()?)
+    }
+}
--- a/pageserver/page_api/src/model.rs
+++ b/pageserver/page_api/src/model.rs
@@ -33,8 +33,6 @@ pub enum ProtocolError {
    Invalid(&'static str, String),
    #[error("required field '{0}' is missing")]
    Missing(&'static str),
-    #[error("invalid combination of not_modified_lsn '{0}' and request_lsn '{1}'")]
-    InvalidLsns(Lsn, Lsn),
 }

 impl ProtocolError {
@@ -87,9 +85,9 @@ impl TryFrom<proto::ReadLsn> for ReadLsn {
            return Err(ProtocolError::invalid("request_lsn", pb.request_lsn));
        }
        if pb.not_modified_since_lsn > pb.request_lsn {
-            return Err(ProtocolError::InvalidLsns(
-                Lsn(pb.not_modified_since_lsn),
-                Lsn(pb.request_lsn),
+            return Err(ProtocolError::invalid(
+                "not_modified_since_lsn",
+                pb.not_modified_since_lsn,
            ));
        }
        Ok(Self {
@@ -386,7 +384,7 @@ impl From<GetPageRequest> for proto::GetPageRequest {
 pub type RequestID = u64;

 /// A GetPage request class.
-#[derive(Clone, Copy, Debug, strum_macros::Display)]
+#[derive(Clone, Copy, Debug)]
 pub enum GetPageClass {
    /// Unknown class. For backwards compatibility: used when an older client version sends a class
    /// that a newer server version has removed.
@@ -399,19 +397,6 @@ pub enum GetPageClass {
    Background,
 }

-impl GetPageClass {
-    /// Returns true if this is considered a bulk request (i.e. more throughput-oriented rather than
-    /// latency-sensitive).
-    pub fn is_bulk(&self) -> bool {
-        match self {
-            Self::Unknown => false,
-            Self::Normal => false,
-            Self::Prefetch => true,
-            Self::Background => true,
-        }
-    }
-}
-
 impl From<proto::GetPageClass> for GetPageClass {
    fn from(pb: proto::GetPageClass) -> Self {
        match pb {
@@ -617,21 +602,6 @@ impl TryFrom<tonic::Code> for GetPageStatusCode {
    }
 }

-impl From<GetPageStatusCode> for tonic::Code {
-    fn from(status_code: GetPageStatusCode) -> Self {
-        use tonic::Code;
-
-        match status_code {
-            GetPageStatusCode::Unknown => Code::Unknown,
-            GetPageStatusCode::Ok => Code::Ok,
-            GetPageStatusCode::NotFound => Code::NotFound,
-            GetPageStatusCode::InvalidRequest => Code::InvalidArgument,
-            GetPageStatusCode::InternalError => Code::Internal,
-            GetPageStatusCode::SlowDown => Code::ResourceExhausted,
-        }
-    }
-}
-
 // Fetches the size of a relation at a given LSN, as # of blocks. Only valid on shard 0, other
 // shards will error.
 #[derive(Clone, Copy, Debug)]
--- a/pageserver/pagebench/Cargo.toml
+++ b/pageserver/pagebench/Cargo.toml
@@ -24,14 +24,10 @@ tracing.workspace = true
 tokio.workspace = true
 tokio-stream.workspace = true
 tokio-util.workspace = true
-axum.workspace = true
-http.workspace = true
-metrics.workspace = true
 tonic.workspace = true
 url.workspace = true

 pageserver_client.workspace = true
-pageserver_client_grpc.workspace = true
 pageserver_api.workspace = true
 pageserver_page_api.workspace = true
 utils = { path = "../../libs/utils/" }
--- a/pageserver/pagebench/src/cmd/basebackup.rs
+++ b/pageserver/pagebench/src/cmd/basebackup.rs
@@ -326,7 +326,7 @@ impl GrpcClient {
        ttid: TenantTimelineId,
        compression: bool,
    ) -> anyhow::Result<Self> {
-        let inner = page_api::Client::connect(
+        let inner = page_api::Client::new(
            connstring.to_string(),
            ttid.tenant_id,
            ttid.timeline_id,
--- a/pageserver/pagebench/src/cmd/getpage_latest_lsn.rs
+++ b/pageserver/pagebench/src/cmd/getpage_latest_lsn.rs
@@ -32,10 +32,6 @@ use crate::util::{request_stats, tokio_thread_local_stats};
 /// GetPage@LatestLSN, uniformly distributed across the compute-accessible keyspace.
 #[derive(clap::Parser)]
 pub(crate) struct Args {
-    #[clap(long, default_value = "false")]
-    grpc: bool,
-    #[clap(long, default_value = "false")]
-    grpc_stream: bool,
    #[clap(long, default_value = "http://localhost:9898")]
    mgmt_api_endpoint: String,
    /// Pageserver connection string. Supports postgresql:// and grpc:// protocols.
@@ -76,9 +72,6 @@ pub(crate) struct Args {
    #[clap(long)]
    set_io_mode: Option<pageserver_api::models::virtual_file::IoMode>,

-    #[clap(long)]
-    only_relnode: Option<u32>,
-
    /// Queue depth generated in each client.
    #[clap(long, default_value = "1")]
    queue_depth: NonZeroUsize,
@@ -93,31 +86,10 @@ pub(crate) struct Args {
    #[clap(long, default_value = "1")]
    batch_size: NonZeroUsize,

+    #[clap(long)]
+    only_relnode: Option<u32>,
+
    targets: Option<Vec<TenantTimelineId>>,
-
-    #[clap(long, default_value = "100")]
-    pool_max_consumers: NonZeroUsize,
-
-    #[clap(long, default_value = "5")]
-    pool_error_threshold: NonZeroUsize,
-
-    #[clap(long, default_value = "5000")]
-    pool_connect_timeout: NonZeroUsize,
-
-    #[clap(long, default_value = "1000")]
-    pool_connect_backoff: NonZeroUsize,
-
-    #[clap(long, default_value = "60000")]
-    pool_max_idle_duration: NonZeroUsize,
-
-    #[clap(long, default_value = "0")]
-    max_delay_ms: usize,
-
-    #[clap(long, default_value = "0")]
-    percent_drops: usize,
-
-    #[clap(long, default_value = "0")]
-    percent_hangs: usize,
 }

 /// State shared by all clients
@@ -174,6 +146,7 @@ pub(crate) fn main(args: Args) -> anyhow::Result<()> {
        main_impl(args, thread_local_stats)
    })
 }
+
 async fn main_impl(
    args: Args,
    all_thread_local_stats: AllThreadLocalStats<request_stats::Stats>,
@@ -338,7 +311,6 @@ async fn main_impl(
    let rps_period = args
        .per_client_rate
        .map(|rps_limit| Duration::from_secs_f64(1.0 / (rps_limit as f64)));
-
    let make_worker: &dyn Fn(WorkerId) -> Pin<Box<dyn Send + Future<Output = ()>>> = &|worker_id| {
        let ss = shared_state.clone();
        let cancel = cancel.clone();
@@ -653,7 +625,7 @@ impl GrpcClient {
        ttid: TenantTimelineId,
        compression: bool,
    ) -> anyhow::Result<Self> {
-        let mut client = page_api::Client::connect(
+        let mut client = page_api::Client::new(
            connstring.to_string(),
            ttid.tenant_id,
            ttid.timeline_id,
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -28,7 +28,6 @@ use reqwest::Url;
 use storage_broker::Uri;
 use utils::id::{NodeId, TimelineId};
 use utils::logging::{LogFormat, SecretString};
-use utils::serde_percent::Percent;

 use crate::tenant::storage_layer::inmemory_layer::IndexEntry;
 use crate::tenant::{TENANTS_SEGMENT_NAME, TIMELINES_SEGMENT_NAME};
@@ -460,16 +459,7 @@ impl PageServerConf {
            metric_collection_endpoint,
            metric_collection_bucket,
            synthetic_size_calculation_interval,
-            disk_usage_based_eviction: Some(disk_usage_based_eviction.unwrap_or(
-                DiskUsageEvictionTaskConfig {
-                    max_usage_pct: Percent::new(80).unwrap(),
-                    min_avail_bytes: 2_000_000_000,
-                    period: Duration::from_secs(60),
-                    #[cfg(feature = "testing")]
-                    mock_statvfs: None,
-                    eviction_order: Default::default(),
-                },
-            )),
+            disk_usage_based_eviction,
            test_remote_failures,
            ondemand_download_behavior_treat_error_as_warn,
            background_task_maximum_delay,
@@ -707,8 +697,6 @@ impl ConfigurableSemaphore {
 #[cfg(test)]
 mod tests {

-    use std::time::Duration;
-
    use camino::Utf8PathBuf;
    use rstest::rstest;
    use utils::id::NodeId;
@@ -810,20 +798,4 @@ mod tests {
        PageServerConf::parse_and_validate(NodeId(0), config_toml, &workdir)
            .expect("parse_and_validate");
    }
-
-    #[test]
-    fn test_config_disk_usage_based_eviction_is_valid() {
-        let input = r#"
-            control_plane_api = "http://localhost:6666"
-        "#;
-        let config_toml = toml_edit::de::from_str::<pageserver_api::config::ConfigToml>(input)
-            .expect("disk_usage_based_eviction is valid");
-        let workdir = Utf8PathBuf::from("/nonexistent");
-        let config = PageServerConf::parse_and_validate(NodeId(0), config_toml, &workdir).unwrap();
-        let disk_usage_based_eviction = config.disk_usage_based_eviction.unwrap();
-        assert_eq!(disk_usage_based_eviction.max_usage_pct.get(), 80);
-        assert_eq!(disk_usage_based_eviction.min_avail_bytes, 2_000_000_000);
-        assert_eq!(disk_usage_based_eviction.period, Duration::from_secs(60));
-        assert_eq!(disk_usage_based_eviction.eviction_order, Default::default());
-    }
 }
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -3170,7 +3170,6 @@ where
 pub struct GrpcPageServiceHandler {
    tenant_manager: Arc<TenantManager>,
    ctx: RequestContext,
-    cancel: CancellationToken,
    gate_guard: GateGuard,
    get_vectored_concurrent_io: GetVectoredConcurrentIo,
 }
@@ -3223,7 +3222,6 @@ impl GrpcPageServiceHandler {
        let page_service_handler = GrpcPageServiceHandler {
            tenant_manager,
            ctx,
-            cancel: cancel.clone(),
            gate_guard: gate.enter().expect("gate was just created"),
            get_vectored_concurrent_io,
        };
@@ -3355,8 +3353,6 @@ impl GrpcPageServiceHandler {
    /// TODO: get_vectored() currently enforces a batch limit of 32. Postgres will typically send
    /// batches up to effective_io_concurrency = 100. Either we have to accept large batches, or
    /// split them up in the client or server.
-    ///
-    /// TODO: verify that the given keys belong to this shard.
    #[instrument(skip_all, fields(req_id, rel, blkno, blks, req_lsn, mod_lsn))]
    async fn get_page(
        ctx: &RequestContext,
@@ -3674,7 +3670,6 @@ impl proto::PageService for GrpcPageServiceHandler {
        // Spawn a task to handle the GetPageRequest stream.
        let span = Span::current();
        let ctx = self.ctx.attached_child();
-        let cancel = self.cancel.clone();
        let mut reqs = req.into_inner();

        let resps = async_stream::try_stream! {
@@ -3682,20 +3677,7 @@ impl proto::PageService for GrpcPageServiceHandler {
                .get(ttid.tenant_id, ttid.timeline_id, shard_selector)
                .await?
                .downgrade();
-
-            loop {
-                let req = tokio::select! {
-                    req = reqs.message() => req,
-                    _ = cancel.cancelled() => {
-                        tracing::info!("closing getpages stream due to shutdown");
-                        break;
-                    },
-                };
-                let req = if let Some(req) = req? {
-                    req
-                } else {
-                    break;
-                };
+            while let Some(req) = reqs.message().await? {
                let req_id = req.request_id;
                let result = Self::get_page(&ctx, &timeline, req, io_concurrency.clone())
                    .instrument(span.clone()) // propagate request span
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -78,7 +78,7 @@ use utils::rate_limit::RateLimit;
 use utils::seqwait::SeqWait;
 use utils::simple_rcu::{Rcu, RcuReadGuard};
 use utils::sync::gate::{Gate, GateGuard};
-use utils::{completion, critical_timeline, fs_ext, pausable_failpoint};
+use utils::{completion, critical, fs_ext, pausable_failpoint};
 #[cfg(test)]
 use wal_decoder::models::value::Value;
 use wal_decoder::serialized_batch::{SerializedValueBatch, ValueMeta};
@@ -2144,31 +2144,14 @@ impl Timeline {
        debug_assert_current_span_has_tenant_and_timeline_id();

        // Regardless of whether we're going to try_freeze_and_flush
-        // cancel walreceiver to stop ingesting more data asap.
-        //
-        // Note that we're accepting a race condition here where we may
-        // do the final flush below, before walreceiver observes the
-        // cancellation and exits.
-        // This means we may open a new InMemoryLayer after the final flush below.
-        // Flush loop is also still running for a short while, so, in theory, it
-        // could also make its way into the upload queue.
-        //
-        // If we wait for the shutdown of the walreceiver before moving on to the
-        // flush, then that would be avoided. But we don't do it because the
-        // walreceiver entertains reads internally, which means that it possibly
-        // depends on the download of layers. Layer download is only sensitive to
-        // the cancellation of the entire timeline, so cancelling the walreceiver
-        // will have no effect on the individual get requests.
-        // This would cause problems when there is a lot of ongoing downloads or
-        // there is S3 unavailabilities, i.e. detach, deletion, etc would hang,
-        // and we can't deallocate resources of the timeline, etc.
+        // or not, stop ingesting any more data.
        let walreceiver = self.walreceiver.lock().unwrap().take();
        tracing::debug!(
            is_some = walreceiver.is_some(),
            "Waiting for WalReceiverManager..."
        );
        if let Some(walreceiver) = walreceiver {
-            walreceiver.cancel().await;
+            walreceiver.shutdown().await;
        }
        // ... and inform any waiters for newer LSNs that there won't be any.
        self.last_record_lsn.shutdown();
@@ -4746,7 +4729,7 @@ impl Timeline {
                }

                // Fetch the next layer to flush, if any.
-                let (layer, l0_count, frozen_count, frozen_size, open_layer_size) = {
+                let (layer, l0_count, frozen_count, frozen_size) = {
                    let layers = self.layers.read(LayerManagerLockHolder::FlushLoop).await;
                    let Ok(lm) = layers.layer_map() else {
                        info!("dropping out of flush loop for timeline shutdown");
@@ -4759,13 +4742,8 @@ impl Timeline {
                        .iter()
                        .map(|l| l.estimated_in_mem_size())
                        .sum();
-                    let open_layer_size: u64 = lm
-                        .open_layer
-                        .as_ref()
-                        .map(|l| l.estimated_in_mem_size())
-                        .unwrap_or(0);
                    let layer = lm.frozen_layers.front().cloned();
-                    (layer, l0_count, frozen_count, frozen_size, open_layer_size)
+                    (layer, l0_count, frozen_count, frozen_size)
                    // drop 'layers' lock
                };
                let Some(layer) = layer else {
@@ -4778,7 +4756,7 @@ impl Timeline {
                    if l0_count >= stall_threshold {
                        warn!(
                            "stalling layer flushes for compaction backpressure at {l0_count} \
-                            L0 layers ({frozen_count} frozen layers with {frozen_size} bytes, {open_layer_size} bytes in open layer)"
+                            L0 layers ({frozen_count} frozen layers with {frozen_size} bytes)"
                        );
                        let stall_timer = self
                            .metrics
@@ -4831,7 +4809,7 @@ impl Timeline {
                        let delay = flush_duration.as_secs_f64();
                        info!(
                            "delaying layer flush by {delay:.3}s for compaction backpressure at \
-                            {l0_count} L0 layers ({frozen_count} frozen layers with {frozen_size} bytes, {open_layer_size} bytes in open layer)"
+                            {l0_count} L0 layers ({frozen_count} frozen layers with {frozen_size} bytes)"
                        );
                        let _delay_timer = self
                            .metrics
@@ -6841,11 +6819,7 @@ impl Timeline {
                    Err(walredo::Error::Cancelled) => return Err(PageReconstructError::Cancelled),
                    Err(walredo::Error::Other(err)) => {
                        if fire_critical_error {
-                            critical_timeline!(
-                                self.tenant_shard_id,
-                                self.timeline_id,
-                                "walredo failure during page reconstruction: {err:?}"
-                            );
+                            critical!("walredo failure during page reconstruction: {err:?}");
                        }
                        return Err(PageReconstructError::WalRedo(
                            err.context("reconstruct a page image"),
--- a/pageserver/src/tenant/timeline/compaction.rs
+++ b/pageserver/src/tenant/timeline/compaction.rs
@@ -36,7 +36,7 @@ use serde::Serialize;
 use tokio::sync::{OwnedSemaphorePermit, Semaphore};
 use tokio_util::sync::CancellationToken;
 use tracing::{Instrument, debug, error, info, info_span, trace, warn};
-use utils::critical_timeline;
+use utils::critical;
 use utils::id::TimelineId;
 use utils::lsn::Lsn;
 use wal_decoder::models::record::NeonWalRecord;
@@ -1390,11 +1390,7 @@ impl Timeline {
                            GetVectoredError::MissingKey(_),
                        ) = err
                        {
-                            critical_timeline!(
-                                self.tenant_shard_id,
-                                self.timeline_id,
-                                "missing key during compaction: {err:?}"
-                            );
+                            critical!("missing key during compaction: {err:?}");
                        }
                    })?;

@@ -1422,11 +1418,7 @@ impl Timeline {

            // Alert on critical errors that indicate data corruption.
            Err(err) if err.is_critical() => {
-                critical_timeline!(
-                    self.tenant_shard_id,
-                    self.timeline_id,
-                    "could not compact, repartitioning keyspace failed: {err:?}"
-                );
+                critical!("could not compact, repartitioning keyspace failed: {err:?}");
            }

            // Log other errors. No partitioning? This is normal, if the timeline was just created
--- a/pageserver/src/tenant/timeline/detach_ancestor.rs
+++ b/pageserver/src/tenant/timeline/detach_ancestor.rs
@@ -182,7 +182,6 @@ pub(crate) async fn generate_tombstone_image_layer(
    detached: &Arc<Timeline>,
    ancestor: &Arc<Timeline>,
    ancestor_lsn: Lsn,
-    historic_layers_to_copy: &Vec<Layer>,
    ctx: &RequestContext,
 ) -> Result<Option<ResidentLayer>, Error> {
    tracing::info!(
@@ -200,20 +199,6 @@ pub(crate) async fn generate_tombstone_image_layer(
    let image_lsn = ancestor_lsn;

    {
-        for layer in historic_layers_to_copy {
-            let desc = layer.layer_desc();
-            if !desc.is_delta
-                && desc.lsn_range.start == image_lsn
-                && overlaps_with(&key_range, &desc.key_range)
-            {
-                tracing::info!(
-                    layer=%layer, "will copy tombstone from ancestor instead of creating a new one"
-                );
-
-                return Ok(None);
-            }
-        }
-
        let layers = detached
            .layers
            .read(LayerManagerLockHolder::DetachAncestor)
@@ -465,8 +450,7 @@ pub(super) async fn prepare(
        Vec::with_capacity(straddling_branchpoint.len() + rest_of_historic.len() + 1);

    if let Some(tombstone_layer) =
-        generate_tombstone_image_layer(detached, &ancestor, ancestor_lsn, &rest_of_historic, ctx)
-            .await?
+        generate_tombstone_image_layer(detached, &ancestor, ancestor_lsn, ctx).await?
    {
        new_layers.push(tombstone_layer.into());
    }
--- a/pageserver/src/tenant/timeline/walreceiver.rs
+++ b/pageserver/src/tenant/timeline/walreceiver.rs
@@ -63,6 +63,7 @@ pub struct WalReceiver {
    /// All task spawned by [`WalReceiver::start`] and its children are sensitive to this token.
    /// It's a child token of [`Timeline`] so that timeline shutdown can cancel WalReceiver tasks early for `freeze_and_flush=true`.
    cancel: CancellationToken,
+    task: tokio::task::JoinHandle<()>,
 }

 impl WalReceiver {
@@ -79,7 +80,7 @@ impl WalReceiver {
        let loop_status = Arc::new(std::sync::RwLock::new(None));
        let manager_status = Arc::clone(&loop_status);
        let cancel = timeline.cancel.child_token();
-        let _task = WALRECEIVER_RUNTIME.spawn({
+        let task = WALRECEIVER_RUNTIME.spawn({
            let cancel = cancel.clone();
            async move {
                debug_assert_current_span_has_tenant_and_timeline_id();
@@ -120,14 +121,25 @@ impl WalReceiver {
        Self {
            manager_status,
            cancel,
+            task,
        }
    }

    #[instrument(skip_all, level = tracing::Level::DEBUG)]
-    pub async fn cancel(self) {
+    pub async fn shutdown(self) {
        debug_assert_current_span_has_tenant_and_timeline_id();
        debug!("cancelling walreceiver tasks");
        self.cancel.cancel();
+        match self.task.await {
+            Ok(()) => debug!("Shutdown success"),
+            Err(je) if je.is_cancelled() => unreachable!("not used"),
+            Err(je) if je.is_panic() => {
+                // already logged by panic hook
+            }
+            Err(je) => {
+                error!("shutdown walreceiver task join error: {je}")
+            }
+        }
    }

    pub(crate) fn status(&self) -> Option<ConnectionManagerStatus> {
--- a/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs
+++ b/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs
@@ -100,7 +100,6 @@ pub(super) async fn connection_manager_loop_step(
    // with other streams on this client (other connection managers). When
    // object goes out of scope, stream finishes in drop() automatically.
    let mut broker_subscription = subscribe_for_timeline_updates(broker_client, id, cancel).await?;
-    let mut broker_reset_interval = tokio::time::interval(tokio::time::Duration::from_secs(30));
    debug!("Subscribed for broker timeline updates");

    loop {
@@ -157,10 +156,7 @@ pub(super) async fn connection_manager_loop_step(
            // Got a new update from the broker
            broker_update = broker_subscription.message() /* TODO: review cancellation-safety */ => {
                match broker_update {
-                    Ok(Some(broker_update)) => {
-                        broker_reset_interval.reset();
-                        connection_manager_state.register_timeline_update(broker_update);
-                    },
+                    Ok(Some(broker_update)) => connection_manager_state.register_timeline_update(broker_update),
                    Err(status) => {
                        match status.code() {
                            Code::Unknown if status.message().contains("stream closed because of a broken pipe") || status.message().contains("connection reset") || status.message().contains("error reading a body from connection") => {
@@ -182,14 +178,6 @@ pub(super) async fn connection_manager_loop_step(
                }
            },

-            _ = broker_reset_interval.tick() => {
-                if wait_lsn_status.borrow().is_some() {
-                    tracing::warn!("No broker updates received for a while, but waiting for WAL. Re-setting stream ...")
-                }
-
-                broker_subscription = subscribe_for_timeline_updates(broker_client, id, cancel).await?;
-            },
-
            new_event = async {
                // Reminder: this match arm needs to be cancellation-safe.
                loop {
--- a/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
+++ b/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
@@ -25,7 +25,7 @@ use tokio_postgres::replication::ReplicationStream;
 use tokio_postgres::{Client, SimpleQueryMessage, SimpleQueryRow};
 use tokio_util::sync::CancellationToken;
 use tracing::{Instrument, debug, error, info, trace, warn};
-use utils::critical_timeline;
+use utils::critical;
 use utils::id::NodeId;
 use utils::lsn::Lsn;
 use utils::pageserver_feedback::PageserverFeedback;
@@ -275,12 +275,20 @@ pub(super) async fn handle_walreceiver_connection(
    let copy_stream = replication_client.copy_both_simple(&query).await?;
    let mut physical_stream = pin!(ReplicationStream::new(copy_stream));

-    let mut walingest = WalIngest::new(timeline.as_ref(), startpoint, &ctx)
-        .await
-        .map_err(|e| match e.kind {
-            crate::walingest::WalIngestErrorKind::Cancelled => WalReceiverError::Cancelled,
-            _ => WalReceiverError::Other(e.into()),
-        })?;
+    let walingest_future = WalIngest::new(timeline.as_ref(), startpoint, &ctx);
+    let walingest_res = select! {
+        walingest_res = walingest_future => walingest_res,
+        _ = cancellation.cancelled() => {
+            // We are doing reads in WalIngest::new, and those can hang as they come from the network.
+            // Timeline cancellation hits the walreceiver cancellation token before it hits the timeline global one.
+            debug!("Connection cancelled");
+            return Err(WalReceiverError::Cancelled);
+        },
+    };
+    let mut walingest = walingest_res.map_err(|e| match e.kind {
+        crate::walingest::WalIngestErrorKind::Cancelled => WalReceiverError::Cancelled,
+        _ => WalReceiverError::Other(e.into()),
+    })?;

    let (format, compression) = match protocol {
        PostgresClientProtocol::Interpreted {
@@ -360,13 +368,9 @@ pub(super) async fn handle_walreceiver_connection(
                        match raw_wal_start_lsn.cmp(&expected_wal_start) {
                            std::cmp::Ordering::Greater => {
                                let msg = format!(
-                                    "Gap in streamed WAL: [{expected_wal_start}, {raw_wal_start_lsn}"
-                                );
-                                critical_timeline!(
-                                    timeline.tenant_shard_id,
-                                    timeline.timeline_id,
-                                    "{msg}"
+                                    "Gap in streamed WAL: [{expected_wal_start}, {raw_wal_start_lsn})"
                                );
+                                critical!("{msg}");
                                return Err(WalReceiverError::Other(anyhow!(msg)));
                            }
                            std::cmp::Ordering::Less => {
@@ -379,11 +383,7 @@ pub(super) async fn handle_walreceiver_connection(
                                            "Received record with next_record_lsn multiple times ({} < {})",
                                            first_rec.next_record_lsn, expected_wal_start
                                        );
-                                        critical_timeline!(
-                                            timeline.tenant_shard_id,
-                                            timeline.timeline_id,
-                                            "{msg}"
-                                        );
+                                        critical!("{msg}");
                                        return Err(WalReceiverError::Other(anyhow!(msg)));
                                    }
                                }
@@ -452,11 +452,7 @@ pub(super) async fn handle_walreceiver_connection(
                            // TODO: we can't differentiate cancellation errors with
                            // anyhow::Error, so just ignore it if we're cancelled.
                            if !cancellation.is_cancelled() && !timeline.is_stopping() {
-                                critical_timeline!(
-                                    timeline.tenant_shard_id,
-                                    timeline.timeline_id,
-                                    "{err:?}"
-                                );
+                                critical!("{err:?}")
                            }
                        })?;

--- a/pageserver/src/tenant/upload_queue.rs
+++ b/pageserver/src/tenant/upload_queue.rs
@@ -550,7 +550,6 @@ impl UploadOp {
                    !i.references(dname, dmeta) && !index.references(dname, dmeta)
                })
            }
-
            // Indexes can never bypass each other. They can coalesce though, and
            // `UploadQueue::next_ready()` currently does this when possible.
            (UploadOp::UploadMetadata { .. }, UploadOp::UploadMetadata { .. }) => false,
@@ -1401,4 +1400,40 @@ mod tests {

        Ok(())
    }
+
+    /// Delete should be done after the index_part is uploaded.
+    #[test]
+    fn schedule_upload_index_bypass() -> anyhow::Result<()> {
+        let mut queue = UploadQueue::Uninitialized;
+        let mut index_part = IndexPart::example();
+
+        let tli = make_timeline();
+        let layer0 = make_layer(
+            &tli,
+            "000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51",
+        );
+        index_part
+            .layer_metadata
+            .insert(layer0.layer_desc().layer_name(), layer0.metadata());
+        let queue = queue.initialize_with_current_remote_index_part(&index_part, 0)?;
+        let mut index_part_2 = index_part.clone();
+        index_part_2.layer_metadata.clear();
+
+        let ops = [
+            UploadOp::UploadMetadata {
+                uploaded: Box::new(index_part_2),
+            },
+            UploadOp::Delete(Delete {
+                layers: vec![(layer0.layer_desc().layer_name(), layer0.metadata())],
+            }),
+        ];
+
+        queue.queued_operations.extend(ops.clone());
+
+        let tasks = queue.schedule_ready();
+        assert_same_ops(tasks.iter().map(|t| &t.op), [&ops[0]]);
+        assert_eq!(queue.queued_operations.len(), 1);
+
+        Ok(())
+    }
 }
--- a/pageserver/src/walingest.rs
+++ b/pageserver/src/walingest.rs
@@ -40,7 +40,7 @@ use tracing::*;
 use utils::bin_ser::{DeserializeError, SerializeError};
 use utils::lsn::Lsn;
 use utils::rate_limit::RateLimit;
-use utils::{critical_timeline, failpoint_support};
+use utils::{critical, failpoint_support};
 use wal_decoder::models::record::NeonWalRecord;
 use wal_decoder::models::*;

@@ -418,30 +418,18 @@ impl WalIngest {
        // as there has historically been cases where PostgreSQL has cleared spurious VM pages. See:
        // https://github.com/neondatabase/neon/pull/10634.
        let Some(vm_size) = get_relsize(modification, vm_rel, ctx).await? else {
-            critical_timeline!(
-                modification.tline.tenant_shard_id,
-                modification.tline.timeline_id,
-                "clear_vm_bits for unknown VM relation {vm_rel}"
-            );
+            critical!("clear_vm_bits for unknown VM relation {vm_rel}");
            return Ok(());
        };
        if let Some(blknum) = new_vm_blk {
            if blknum >= vm_size {
-                critical_timeline!(
-                    modification.tline.tenant_shard_id,
-                    modification.tline.timeline_id,
-                    "new_vm_blk {blknum} not in {vm_rel} of size {vm_size}"
-                );
+                critical!("new_vm_blk {blknum} not in {vm_rel} of size {vm_size}");
                new_vm_blk = None;
            }
        }
        if let Some(blknum) = old_vm_blk {
            if blknum >= vm_size {
-                critical_timeline!(
-                    modification.tline.tenant_shard_id,
-                    modification.tline.timeline_id,
-                    "old_vm_blk {blknum} not in {vm_rel} of size {vm_size}"
-                );
+                critical!("old_vm_blk {blknum} not in {vm_rel} of size {vm_size}");
                old_vm_blk = None;
            }
        }
--- a/pgxn/neon/Makefile
+++ b/pgxn/neon/Makefile
@@ -5,7 +5,6 @@ MODULE_big = neon
 OBJS = \
 	$(WIN32RES) \
 	communicator.o \
-	communicator_new.o \
 	extension_server.o \
 	file_cache.o \
 	hll.o \
@@ -23,18 +22,12 @@ OBJS = \
 	walproposer.o \
 	walproposer_pg.o \
 	neon_ddl_handler.o \
-	walsender_hooks.o \
-	$(NEON_CARGO_ARTIFACT_TARGET_DIR)/libcommunicator.a
+	walsender_hooks.o

 PG_CPPFLAGS = -I$(libpq_srcdir)
 SHLIB_LINK_INTERNAL = $(libpq)
 SHLIB_LINK = -lcurl

-UNAME_S := $(shell uname -s)
-ifeq ($(UNAME_S), Darwin)
-    SHLIB_LINK += -framework Security -framework CoreFoundation -framework SystemConfiguration
-endif
-
 EXTENSION = neon
 DATA = \
 	neon--1.0.sql \
@@ -61,17 +54,6 @@ WALPROP_OBJS = \
 	neon_utils.o \
 	walproposer_compat.o

-# libcommunicator.a is built by cargo from the Rust sources under communicator/
-# subdirectory. `cargo build` also generates communicator_bindings.h.
-communicator_new.o: communicator/communicator_bindings.h
-
-$(NEON_CARGO_ARTIFACT_TARGET_DIR)/libcommunicator.a communicator/communicator_bindings.h &:
-	(cd $(srcdir)/communicator && cargo build $(CARGO_BUILD_FLAGS) $(CARGO_PROFILE))
-
-# Force `cargo build` every time. Some of the Rust sources might have
-# changed.
-.PHONY: $(NEON_CARGO_ARTIFACT_TARGET_DIR)/libcommunicator.a communicator/communicator_bindings.h
-
 .PHONY: walproposer-lib
 walproposer-lib: CPPFLAGS += -DWALPROPOSER_LIB
 walproposer-lib: libwalproposer.a;
--- a/pgxn/neon/communicator/.gitignore
+++ b/pgxn/neon/communicator/.gitignore
@@ -1,2 +0,0 @@
-# generated file (with cbindgen, see build.rs)
-communicator_bindings.h
--- a/pgxn/neon/communicator/Cargo.lock
+++ b/pgxn/neon/communicator/Cargo.lock
@@ -1,372 +0,0 @@
-# This file is automatically @generated by Cargo.
-# It is not intended for manual editing.
-version = 4
-
-[[package]]
-name = "addr2line"
-version = "0.24.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1"
-dependencies = [
- "gimli",
-]
-
-[[package]]
-name = "adler2"
-version = "2.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627"
-
-[[package]]
-name = "backtrace"
-version = "0.3.74"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a"
-dependencies = [
- "addr2line",
- "cfg-if",
- "libc",
- "miniz_oxide",
- "object",
- "rustc-demangle",
- "windows-targets",
-]
-
-[[package]]
-name = "base64"
-version = "0.22.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
-
-[[package]]
-name = "bytes"
-version = "1.10.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a"
-
-[[package]]
-name = "cfg-if"
-version = "1.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
-
-[[package]]
-name = "communicator"
-version = "0.1.0"
-dependencies = [
- "tonic",
-]
-
-[[package]]
-name = "fnv"
-version = "1.0.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
-
-[[package]]
-name = "futures-core"
-version = "0.3.31"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e"
-
-[[package]]
-name = "gimli"
-version = "0.31.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f"
-
-[[package]]
-name = "http"
-version = "1.3.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565"
-dependencies = [
- "bytes",
- "fnv",
- "itoa",
-]
-
-[[package]]
-name = "http-body"
-version = "1.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184"
-dependencies = [
- "bytes",
- "http",
-]
-
-[[package]]
-name = "http-body-util"
-version = "0.1.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a"
-dependencies = [
- "bytes",
- "futures-core",
- "http",
- "http-body",
- "pin-project-lite",
-]
-
-[[package]]
-name = "itoa"
-version = "1.0.15"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
-
-[[package]]
-name = "libc"
-version = "0.2.171"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6"
-
-[[package]]
-name = "memchr"
-version = "2.7.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
-
-[[package]]
-name = "miniz_oxide"
-version = "0.8.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ff70ce3e48ae43fa075863cef62e8b43b71a4f2382229920e0df362592919430"
-dependencies = [
- "adler2",
-]
-
-[[package]]
-name = "object"
-version = "0.36.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87"
-dependencies = [
- "memchr",
-]
-
-[[package]]
-name = "once_cell"
-version = "1.21.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
-
-[[package]]
-name = "percent-encoding"
-version = "2.3.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
-
-[[package]]
-name = "pin-project"
-version = "1.1.10"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "677f1add503faace112b9f1373e43e9e054bfdd22ff1a63c1bc485eaec6a6a8a"
-dependencies = [
- "pin-project-internal",
-]
-
-[[package]]
-name = "pin-project-internal"
-version = "1.1.10"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn",
-]
-
-[[package]]
-name = "pin-project-lite"
-version = "0.2.16"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b"
-
-[[package]]
-name = "proc-macro2"
-version = "1.0.94"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a31971752e70b8b2686d7e46ec17fb38dad4051d94024c88df49b667caea9c84"
-dependencies = [
- "unicode-ident",
-]
-
-[[package]]
-name = "quote"
-version = "1.0.40"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
-dependencies = [
- "proc-macro2",
-]
-
-[[package]]
-name = "rustc-demangle"
-version = "0.1.24"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f"
-
-[[package]]
-name = "syn"
-version = "2.0.100"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b09a44accad81e1ba1cd74a32461ba89dee89095ba17b32f5d03683b1b1fc2a0"
-dependencies = [
- "proc-macro2",
- "quote",
- "unicode-ident",
-]
-
-[[package]]
-name = "tokio"
-version = "1.44.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e6b88822cbe49de4185e3a4cbf8321dd487cf5fe0c5c65695fef6346371e9c48"
-dependencies = [
- "backtrace",
- "pin-project-lite",
-]
-
-[[package]]
-name = "tokio-stream"
-version = "0.1.17"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eca58d7bba4a75707817a2c44174253f9236b2d5fbd055602e9d5c07c139a047"
-dependencies = [
- "futures-core",
- "pin-project-lite",
- "tokio",
-]
-
-[[package]]
-name = "tonic"
-version = "0.13.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "85839f0b32fd242bb3209262371d07feda6d780d16ee9d2bc88581b89da1549b"
-dependencies = [
- "base64",
- "bytes",
- "http",
- "http-body",
- "http-body-util",
- "percent-encoding",
- "pin-project",
- "tokio-stream",
- "tower-layer",
- "tower-service",
- "tracing",
-]
-
-[[package]]
-name = "tower-layer"
-version = "0.3.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e"
-
-[[package]]
-name = "tower-service"
-version = "0.3.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3"
-
-[[package]]
-name = "tracing"
-version = "0.1.41"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0"
-dependencies = [
- "pin-project-lite",
- "tracing-attributes",
- "tracing-core",
-]
-
-[[package]]
-name = "tracing-attributes"
-version = "0.1.28"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn",
-]
-
-[[package]]
-name = "tracing-core"
-version = "0.1.33"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c"
-dependencies = [
- "once_cell",
-]
-
-[[package]]
-name = "unicode-ident"
-version = "1.0.18"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
-
-[[package]]
-name = "windows-targets"
-version = "0.52.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
-dependencies = [
- "windows_aarch64_gnullvm",
- "windows_aarch64_msvc",
- "windows_i686_gnu",
- "windows_i686_gnullvm",
- "windows_i686_msvc",
- "windows_x86_64_gnu",
- "windows_x86_64_gnullvm",
- "windows_x86_64_msvc",
-]
-
-[[package]]
-name = "windows_aarch64_gnullvm"
-version = "0.52.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
-
-[[package]]
-name = "windows_aarch64_msvc"
-version = "0.52.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
-
-[[package]]
-name = "windows_i686_gnu"
-version = "0.52.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
-
-[[package]]
-name = "windows_i686_gnullvm"
-version = "0.52.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
-
-[[package]]
-name = "windows_i686_msvc"
-version = "0.52.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
-
-[[package]]
-name = "windows_x86_64_gnu"
-version = "0.52.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
-
-[[package]]
-name = "windows_x86_64_gnullvm"
-version = "0.52.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
-
-[[package]]
-name = "windows_x86_64_msvc"
-version = "0.52.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
--- a/pgxn/neon/communicator/Cargo.toml
+++ b/pgxn/neon/communicator/Cargo.toml
@@ -1,43 +0,0 @@
-[package]
-name = "communicator"
-license.workspace = true
-edition.workspace = true
-
-[features]
-# 'testing' feature is currently unused in the communicator, but we accept it for convenience of
-# calling build scripts, so that you can pass the same feature to all packages.
-testing = []
-
-[lib]
-crate-type = ["staticlib"]
-
-[dependencies]
-axum.workspace = true
-bytes.workspace = true
-clashmap.workspace = true
-http.workspace = true
-libc.workspace = true
-nix.workspace = true
-atomic_enum = "0.3.0"
-prometheus.workspace = true
-prost.workspace = true
-tonic = { version = "0.12.0", default-features = false, features=["codegen", "prost", "transport"] }
-tokio = { version = "1.43.1", features = ["macros", "net", "io-util", "rt", "rt-multi-thread"] }
-tokio-pipe = { version = "0.2.12" }
-thiserror.workspace = true
-tracing.workspace = true
-tracing-subscriber.workspace = true
-
-metrics.workspace = true
-uring-common = { workspace = true, features = ["bytes"] }
-
-pageserver_client_grpc.workspace = true
-pageserver_api.workspace = true
-pageserver_page_api.workspace = true
-
-neon-shmem.workspace = true
-utils.workspace = true
-workspace_hack = { version = "0.1", path = "../../../workspace_hack" }
-
-[build-dependencies]
-cbindgen.workspace = true
--- a/pgxn/neon/communicator/README.md
+++ b/pgxn/neon/communicator/README.md
@@ -1,123 +0,0 @@
-# Communicator
-
-This package provides the so-called "compute-pageserver communicator",
-or just "communicator" in short. It runs in a PostgreSQL server, as
-part of the neon extension, and handles the communication with the
-pageservers. On the PostgreSQL side, the glue code in pgxn/neon/ uses
-the communicator to implement the PostgreSQL Storage Manager (SMGR)
-interface.
-
-## Design criteria
-
- Low latency
- Saturate a 10 Gbit / s network interface without becoming a bottleneck
-
-## Source code view
-
-pgxn/neon/communicator_new.c
-	Contains the glue that interact with PostgreSQL code and the Rust
-	communicator code.
-
-pgxn/neon/communicator/src/backend_interface.rs
-	The entry point for calls from each backend.
-
-pgxn/neon/communicator/src/init.rs
-	Initialization at server startup
-
-pgxn/neon/communicator/src/worker_process/
-    Worker process main loop and glue code
-
-At compilation time, pgxn/neon/communicator/ produces a static
-library, libcommunicator.a. It is linked to the neon.so extension
-library.
-
-The real networking code, which is independent of PostgreSQL, is in
-the pageserver/client_grpc crate.
-
-## Process view
-
-The communicator runs in a dedicated background worker process, the
-"communicator process". The communicator uses a multi-threaded Tokio
-runtime to execute the IO requests. So the communicator process has
-multiple threads running. That's unusual for Postgres processes and
-care must be taken to make that work.
-
-### Backend <-> worker communication
-
-Each backend has a number of I/O request slots in shared memory. The
-slots are statically allocated for each backend, and must not be
-accessed by other backends. The worker process reads requests from the
-shared memory slots, and writes responses back to the slots.
-
-To submit an IO request, first pick one of your backend's free slots,
-and write the details of the IO request in the slot. Finally, update
-the 'state' field of the slot to Submitted. That informs the worker
-process that it can start processing the request. Once the state has
-been set to Submitted, the backend *must not* access the slot anymore,
-until the worker process sets its state to 'Completed'. In other
-words, each slot is owned by either the backend or the worker process
-at all times, and the 'state' field indicates who has ownership at the
-moment.
-
-To inform the worker process that a request slot has a pending IO
-request, there's a pipe shared by the worker process and all backend
-processes. After you have changed the slot's state to Submitted, write
-the index of the request slot to the pipe. This wakes up the worker
-process.
-
-(Note that the pipe is just used for wakeups, but the worker process
-is free to pick up Submitted IO requests even without receiving the
-wakeup. As of this writing, it doesn't do that, but it might be useful
-in the future to reduce latency even further, for example.)
-
-When the worker process has completed processing the request, it
-writes the result back in the request slot. A GetPage request can also
-contain a pointer to buffer in the shared buffer cache. In that case,
-the worker process writes the resulting page contents directly to the
-buffer, and just a result code in the request slot. It then updates
-the 'state' field to Completed, which passes the owner ship back to
-the originating backend. Finally, it signals the process Latch of the
-originating backend, waking it up.
-
-### Differences between PostgreSQL v16, v17 and v18
-
-PostgreSQL v18 introduced the new AIO mechanism. The PostgreSQL AIO
-mechanism uses a very similar mechanism as described in the previous
-section, for the communication between AIO worker processes and
-backends. With our communicator, the AIO worker processes are not
-used, but we use the same PgAioHandle request slots as in upstream.
-For Neon-specific IO requests like GetDbSize, a neon request slot is
-used. But for the actual IO requests, the request slot merely contains
-a pointer to the PgAioHandle slot. The worker process updates the
-status of that, calls the IO callbacks upon completionetc, just like
-the upstream AIO worker processes do.
-
-## Sequence diagram
-
-                      neon
-    PostgreSQL     extension       backend_interface.rs  worker_process.rs    processor    tonic
-       |               .                    .                   .                 .
-	   | smgr_read()   .                    .                   .                 .
-	   +-------------> +                    .                   .                 .
-	   .               |                    .                   .                 .
-	   .               |  rcommunicator_    .                   .                 .
-	   .               | get_page_at_lsn    .                   .                 .
-	   .               +------------------> +                   .                 .
-                                            |                   .                 .
-                                            | write request to  .                 .                 .
-                                            | slot              .                 .
-                                            |                   .                 .
-                                            |                   .                 .
-											| submit_request()  .                 .
-											+-----------------> +                 .
-											|                   |                 .
-											|					| db_size_request .               .
-																+---------------->.
-																                  . TODO
-
-
-
-### Compute <-> pageserver protocol
-
-The protocol between Compute and the pageserver is based on gRPC. See `protos/`.
-
--- a/pgxn/neon/communicator/build.rs
+++ b/pgxn/neon/communicator/build.rs
@@ -1,20 +0,0 @@
-use std::env;
-
-fn main() -> Result<(), Box<dyn std::error::Error>> {
-    let crate_dir = env::var("CARGO_MANIFEST_DIR").unwrap();
-
-    match cbindgen::generate(crate_dir) {
-        Ok(bindings) => {
-            bindings.write_to_file("communicator_bindings.h");
-        }
-        Err(cbindgen::Error::ParseSyntaxError { .. }) => {
-            // This means there was a syntax error in the Rust sources. Don't panic, because
-            // we want the build to continue and the Rust compiler to hit the error. The
-            // Rust compiler produces a better error message than cbindgen.
-            eprintln!("Generating C bindings failed because of a Rust syntax error");
-        }
-        Err(err) => panic!("Unable to generate C bindings: {err:?}"),
-    };
-
-    Ok(())
-}
--- a/pgxn/neon/communicator/cbindgen.toml
+++ b/pgxn/neon/communicator/cbindgen.toml
@@ -1,4 +0,0 @@
-language = "C"
-
-[enum]
-prefix_with_name = true
--- a/pgxn/neon/communicator/src/backend_comms.rs
+++ b/pgxn/neon/communicator/src/backend_comms.rs
@@ -1,207 +0,0 @@
-//! This module implements a request/response "slot" for submitting requests from backends
-//! to the communicator process.
-//!
-//! NB: The "backend" side of this code runs in Postgres backend processes,
-//! which means that it is not safe to use the 'tracing' crate for logging, nor
-//! to launch threads or use tokio tasks.
-use std::cell::UnsafeCell;
-use std::sync::atomic::fence;
-use std::sync::atomic::{AtomicI32, Ordering};
-
-use crate::neon_request::{NeonIORequest, NeonIOResult};
-
-use atomic_enum::atomic_enum;
-
-/// One request/response slot. Each backend has its own set of slots that it uses.
-///
-/// This is the moral equivalent of PgAioHandle for Postgres AIO requests
-/// Like PgAioHandle, try to keep this small.
-///
-/// There is an array of these in shared memory. Therefore, this must be Sized.
-///
-/// ## Lifecycle of a request
-///
-/// The slot is always owned by either the backend process or the communicator
-/// process, depending on the 'state'. Only the owning process is allowed to
-/// read or modify the slot, except for reading the 'state' itself to check who
-/// owns it.
-///
-/// A slot begins in the Idle state, where it is owned by the backend process.
-/// To submit a request, the backend process fills the slot with the request
-/// data, and changes it to the Submitted state. After changing the state, the
-/// slot is owned by the communicator process, and the backend is not allowed
-/// to access it until the communicator process marks it as Completed.
-///
-/// When the communicator process sees that the slot is in Submitted state, it
-/// starts to process the request. After processing the request, it stores the
-/// result in the slot, and changes the state to Completed. It is now owned by
-/// the backend process again, which may now read the result, and reuse the
-/// slot for a new request.
-///
-/// For correctness of the above protocol, we really only need two states:
-/// "owned by backend" and "owned by communicator process. But to help with
-/// debugging, there are a few more states. When the backend starts to fill in
-/// the request details in the slot, it first sets the state from Idle to
-/// Filling, and when it's done with that, from Filling to Submitted. In the
-/// Filling state, the slot is still owned by the backend. Similarly, when the
-/// communicator process starts to process a request, it sets it to Processing
-/// state first, but the slot is still owned by the communicator process.
-///
-/// This struct doesn't handle waking up the communicator process when a request
-/// has been submitted or when a response is ready. We only store the 'owner_procno'
-/// which can be used for waking up the backend on completion, but the wakeups are
-/// performed elsewhere.
-pub struct NeonIOHandle {
-    /// similar to PgAioHandleState
-    state: AtomicNeonIOHandleState,
-
-    /// The owning process's ProcNumber. The worker process uses this to set the process's
-    /// latch on completion.
-    ///
-    /// (This could be calculated from num_neon_request_slots_per_backend and the index of
-    /// this slot in the overall 'neon_requst_slots array')
-    owner_procno: AtomicI32,
-
-    /// SAFETY: This is modified by fill_request(), after it has established ownership
-    /// of the slot by setting state from Idle to Filling
-    request: UnsafeCell<NeonIORequest>,
-
-    /// valid when state is Completed
-    ///
-    /// SAFETY: This is modified by RequestProcessingGuard::complete(). There can be
-    /// only one RequestProcessingGuard outstanding for a slot at a time, because
-    /// it is returned by start_processing_request() which checks the state, so
-    /// RequestProcessingGuard has exclusive access to the slot.
-    result: UnsafeCell<NeonIOResult>,
-}
-
-// The protocol described in the "Lifecycle of a request" section above ensures
-// the safe access to the fields
-unsafe impl Send for NeonIOHandle {}
-unsafe impl Sync for NeonIOHandle {}
-
-impl Default for NeonIOHandle {
-    fn default() -> NeonIOHandle {
-        NeonIOHandle {
-            owner_procno: AtomicI32::new(-1),
-            request: UnsafeCell::new(NeonIORequest::Empty),
-            result: UnsafeCell::new(NeonIOResult::Empty),
-            state: AtomicNeonIOHandleState::new(NeonIOHandleState::Idle),
-        }
-    }
-}
-
-#[atomic_enum]
-#[derive(Eq, PartialEq)]
-pub enum NeonIOHandleState {
-    Idle,
-
-    /// backend is filling in the request
-    Filling,
-
-    /// Backend has submitted the request to the communicator, but the
-    /// communicator process has not yet started processing it.
-    Submitted,
-
-    /// Communicator is processing the request
-    Processing,
-
-    /// Communicator has completed the request, and the 'result' field is now
-    /// valid, but the backend has not read the result yet.
-    Completed,
-}
-
-pub struct RequestProcessingGuard<'a>(&'a NeonIOHandle);
-
-unsafe impl<'a> Send for RequestProcessingGuard<'a> {}
-unsafe impl<'a> Sync for RequestProcessingGuard<'a> {}
-
-impl<'a> RequestProcessingGuard<'a> {
-    pub fn get_request(&self) -> &NeonIORequest {
-        unsafe { &*self.0.request.get() }
-    }
-
-    pub fn get_owner_procno(&self) -> i32 {
-        self.0.owner_procno.load(Ordering::Relaxed)
-    }
-
-    pub fn completed(self, result: NeonIOResult) {
-        unsafe {
-            *self.0.result.get() = result;
-        };
-
-        // Ok, we have completed the IO. Mark the request as completed. After that,
-        // we no longer have ownership of the slot, and must not modify it.
-        let old_state = self
-            .0
-            .state
-            .swap(NeonIOHandleState::Completed, Ordering::Release);
-        assert!(old_state == NeonIOHandleState::Processing);
-    }
-}
-
-impl NeonIOHandle {
-    pub fn fill_request(&self, request: &NeonIORequest, proc_number: i32) {
-        // Verify that the slot is in Idle state previously, and start filling it.
-        //
-        // XXX: This step isn't strictly necessary. Assuming the caller didn't screw up
-        // and try to use a slot that's already in use, we could fill the slot and
-        // switch it directly from Idle to Submitted state.
-        if let Err(s) = self.state.compare_exchange(
-            NeonIOHandleState::Idle,
-            NeonIOHandleState::Filling,
-            Ordering::Relaxed,
-            Ordering::Relaxed,
-        ) {
-            panic!("unexpected state in request slot: {s:?}");
-        }
-
-        // This fence synchronizes-with store/swap in `communicator_process_main_loop`.
-        fence(Ordering::Acquire);
-
-        self.owner_procno.store(proc_number, Ordering::Relaxed);
-        unsafe { *self.request.get() = *request }
-        self.state
-            .store(NeonIOHandleState::Submitted, Ordering::Release);
-    }
-
-    pub fn get_state(&self) -> NeonIOHandleState {
-        self.state.load(Ordering::Relaxed)
-    }
-
-    pub fn try_get_result(&self) -> Option<NeonIOResult> {
-        // FIXME: ordering?
-        let state = self.state.load(Ordering::Relaxed);
-        if state == NeonIOHandleState::Completed {
-            // This fence synchronizes-with store/swap in `communicator_process_main_loop`.
-            fence(Ordering::Acquire);
-            let result = unsafe { *self.result.get() };
-            self.state.store(NeonIOHandleState::Idle, Ordering::Relaxed);
-            Some(result)
-        } else {
-            None
-        }
-    }
-
-    /// Read the IO request from the slot indicated in the wakeup
-    pub fn start_processing_request<'a>(&'a self) -> Option<RequestProcessingGuard<'a>> {
-        // XXX: using compare_exchange for this is not strictly necessary, as long as
-        // the communicator process has _some_ means of tracking which requests it's
-        // already processing. That could be a flag somewhere in communicator's private
-        // memory, for example.
-        if let Err(s) = self.state.compare_exchange(
-            NeonIOHandleState::Submitted,
-            NeonIOHandleState::Processing,
-            Ordering::Relaxed,
-            Ordering::Relaxed,
-        ) {
-            // FIXME surprising state. This is unexpected at the moment, but if we
-            // started to process requests more aggressively, without waiting for the
-            // read from the pipe, then this could happen
-            panic!("unexpected state in request slot: {s:?}");
-        }
-        fence(Ordering::Acquire);
-
-        Some(RequestProcessingGuard(self))
-    }
-}
--- a/pgxn/neon/communicator/src/backend_interface.rs
+++ b/pgxn/neon/communicator/src/backend_interface.rs
@@ -1,234 +0,0 @@
-//! This code runs in each backend process. That means that launching Rust threads, panicking
-//! etc. is forbidden!
-
-use std::os::fd::OwnedFd;
-
-use crate::backend_comms::NeonIOHandle;
-use crate::init::CommunicatorInitStruct;
-use crate::integrated_cache::{BackendCacheReadOp, IntegratedCacheReadAccess};
-use crate::neon_request::{CCachedGetPageVResult, COid};
-use crate::neon_request::{NeonIORequest, NeonIOResult};
-
-pub struct CommunicatorBackendStruct<'t> {
-    my_proc_number: i32,
-
-    neon_request_slots: &'t [NeonIOHandle],
-
-    submission_pipe_write_fd: OwnedFd,
-
-    pending_cache_read_op: Option<BackendCacheReadOp<'t>>,
-
-    integrated_cache: &'t IntegratedCacheReadAccess<'t>,
-}
-
-#[unsafe(no_mangle)]
-pub extern "C" fn rcommunicator_backend_init(
-    cis: Box<CommunicatorInitStruct>,
-    my_proc_number: i32,
-) -> &'static mut CommunicatorBackendStruct<'static> {
-    if my_proc_number < 0 {
-        panic!(
-            "cannot attach to communicator shared memory with procnumber {}",
-            my_proc_number,
-        );
-    }
-
-    let integrated_cache = Box::leak(Box::new(cis.integrated_cache_init_struct.backend_init()));
-
-    let bs: &'static mut CommunicatorBackendStruct =
-        Box::leak(Box::new(CommunicatorBackendStruct {
-            my_proc_number,
-            neon_request_slots: cis.neon_request_slots,
-
-            submission_pipe_write_fd: cis.submission_pipe_write_fd,
-            pending_cache_read_op: None,
-
-            integrated_cache,
-        }));
-    bs
-}
-
-/// Start a request. You can poll for its completion and get the result by
-/// calling bcomm_poll_dbsize_request_completion(). The communicator will wake
-/// us up by setting our process latch, so to wait for the completion, wait on
-/// the latch and call bcomm_poll_dbsize_request_completion() every time the
-/// latch is set.
-///
-/// Safety: The C caller must ensure that the references are valid.
-/// The requested slot must be free, or this panics.
-#[unsafe(no_mangle)]
-pub extern "C" fn bcomm_start_io_request(
-    bs: &'_ mut CommunicatorBackendStruct,
-    slot_idx: i32,
-    request: &NeonIORequest,
-    immediate_result_ptr: &mut NeonIOResult,
-) -> i32 {
-    assert!(bs.pending_cache_read_op.is_none());
-
-    // Check if the request can be satisfied from the cache first
-    if let NeonIORequest::RelSize(req) = request {
-        if let Some(nblocks) = bs.integrated_cache.get_rel_size(&req.reltag()) {
-            *immediate_result_ptr = NeonIOResult::RelSize(nblocks);
-            return -1;
-        }
-    }
-
-    // Create neon request and submit it
-    bs.start_neon_io_request(slot_idx, request);
-
-    // Tell the communicator about it
-    bs.submit_request(slot_idx);
-
-    slot_idx
-}
-
-#[unsafe(no_mangle)]
-pub extern "C" fn bcomm_start_get_page_v_request(
-    bs: &mut CommunicatorBackendStruct,
-    slot_idx: i32,
-    request: &NeonIORequest,
-    immediate_result_ptr: &mut CCachedGetPageVResult,
-) -> i32 {
-    let NeonIORequest::GetPageV(get_pagev_request) = request else {
-        panic!("invalid request passed to bcomm_start_get_page_v_request()");
-    };
-    assert!(matches!(request, NeonIORequest::GetPageV(_)));
-    assert!(bs.pending_cache_read_op.is_none());
-
-    // Check if the request can be satisfied from the cache first
-    let mut all_cached = true;
-    let mut read_op = bs.integrated_cache.start_read_op();
-    for i in 0..get_pagev_request.nblocks {
-        if let Some(cache_block) = read_op.get_page(
-            &get_pagev_request.reltag(),
-            get_pagev_request.block_number + i as u32,
-        ) {
-            immediate_result_ptr.cache_block_numbers[i as usize] = cache_block;
-        } else {
-            // not found in cache
-            all_cached = false;
-            break;
-        }
-    }
-    if all_cached {
-        bs.pending_cache_read_op = Some(read_op);
-        return -1;
-    }
-
-    // Create neon request and submit it
-    bs.start_neon_io_request(slot_idx, request);
-
-    // Tell the communicator about it
-    bs.submit_request(slot_idx);
-
-    slot_idx
-}
-
-/// Check if a request has completed. Returns:
-///
-/// -1 if the request is still being processed
-/// 0 on success
-#[unsafe(no_mangle)]
-pub extern "C" fn bcomm_poll_request_completion(
-    bs: &mut CommunicatorBackendStruct,
-    request_slot_idx: u32,
-    result_p: &mut NeonIOResult,
-) -> i32 {
-    match bs.neon_request_slots[request_slot_idx as usize].try_get_result() {
-        None => -1, // still processing
-        Some(result) => {
-            *result_p = result;
-            0
-        }
-    }
-}
-
-/// Check if a request has completed. Returns:
-///
-/// 'false' if the slot is Idle. The backend process has ownership.
-/// 'true' if the slot is busy, and should be polled for result.
-#[unsafe(no_mangle)]
-pub extern "C" fn bcomm_get_request_slot_status(
-    bs: &mut CommunicatorBackendStruct,
-    request_slot_idx: u32,
-) -> bool {
-    use crate::backend_comms::NeonIOHandleState;
-    match bs.neon_request_slots[request_slot_idx as usize].get_state() {
-        NeonIOHandleState::Idle => false,
-        NeonIOHandleState::Filling => {
-            // 'false' would be the right result here. However, this
-            // is a very transient state. The C code should never
-            // leave a slot in this state, so if it sees that,
-            // something's gone wrong and it's not clear what to do
-            // with it.
-            panic!("unexpected Filling state in request slot {}", request_slot_idx);
-        },
-        NeonIOHandleState::Submitted => true,
-        NeonIOHandleState::Processing => true,
-        NeonIOHandleState::Completed => true,
-    }
-}
-
-// LFC functions
-
-/// Finish a local file cache read
-///
-//
-#[unsafe(no_mangle)]
-pub extern "C" fn bcomm_finish_cache_read(bs: &mut CommunicatorBackendStruct) -> bool {
-    if let Some(op) = bs.pending_cache_read_op.take() {
-        op.finish()
-    } else {
-        panic!("bcomm_finish_cache_read() called with no cached read pending");
-    }
-}
-
-/// Check if the local file cache contians the given block
-#[unsafe(no_mangle)]
-pub extern "C" fn bcomm_cache_contains(
-    bs: &mut CommunicatorBackendStruct,
-    spc_oid: COid,
-    db_oid: COid,
-    rel_number: u32,
-    fork_number: u8,
-    block_number: u32,
-) -> bool {
-    bs.integrated_cache.cache_contains_page(
-        &pageserver_page_api::RelTag {
-            spcnode: spc_oid,
-            dbnode: db_oid,
-            relnode: rel_number,
-            forknum: fork_number,
-        },
-        block_number,
-    )
-}
-
-impl<'t> CommunicatorBackendStruct<'t> {
-    /// Send a wakeup to the communicator process
-    fn submit_request(self: &CommunicatorBackendStruct<'t>, request_slot_idx: i32) {
-        // wake up communicator by writing the idx to the submission pipe
-        //
-        // This can block, if the pipe is full. That should be very rare,
-        // because the communicator tries hard to drain the pipe to prevent
-        // that. Also, there's a natural upper bound on how many wakeups can be
-        // queued up: there is only a limited number of request slots for each
-        // backend.
-        //
-        // If it does block very briefly, that's not too serious.
-        let idxbuf = request_slot_idx.to_ne_bytes();
-
-        let _res = nix::unistd::write(&self.submission_pipe_write_fd, &idxbuf);
-        // FIXME: check result, return any errors
-    }
-
-    /// Note: there's no guarantee on when the communicator might pick it up. You should ring
-    /// the doorbell. But it might pick it up immediately.
-    ///
-    /// The slot must be free, or this panics.
-    pub(crate) fn start_neon_io_request(&mut self, request_slot_idx: i32, request: &NeonIORequest) {
-        let my_proc_number = self.my_proc_number;
-
-        self.neon_request_slots[request_slot_idx as usize].fill_request(request, my_proc_number);
-    }
-}
--- a/pgxn/neon/communicator/src/file_cache.rs
+++ b/pgxn/neon/communicator/src/file_cache.rs
@@ -1,160 +0,0 @@
-//! Implement the "low-level" parts of the file cache.
-//!
-//! This module just deals with reading and writing the file, and keeping track
-//! which blocks in the cache file are in use and which are free. The "high
-//! level" parts of tracking which block in the cache file corresponds to which
-//! relation block is handled in 'integrated_cache' instead.
-//!
-//! This module is only used to access the file from the communicator
-//! process. The backend processes *also* read the file (and sometimes also
-//! write it? ), but the backends use direct C library calls for that.
-use std::fs::File;
-use std::os::unix::fs::FileExt;
-use std::path::Path;
-use std::sync::Arc;
-use std::sync::Mutex;
-
-use crate::BLCKSZ;
-
-use tokio::task::spawn_blocking;
-
-pub type CacheBlock = u64;
-
-pub const INVALID_CACHE_BLOCK: CacheBlock = u64::MAX;
-
-pub struct FileCache {
-    file: Arc<File>,
-
-    free_list: Mutex<FreeList>,
-
-    // metrics
-    max_blocks_gauge: metrics::IntGauge,
-    num_free_blocks_gauge: metrics::IntGauge,
-}
-
-// TODO: We keep track of all free blocks in this vec. That doesn't really scale.
-// Idea: when free_blocks fills up with more than 1024 entries, write them all to
-// one block on disk.
-struct FreeList {
-    next_free_block: CacheBlock,
-    max_blocks: u64,
-
-    free_blocks: Vec<CacheBlock>,
-}
-
-impl FileCache {
-    pub fn new(file_cache_path: &Path, mut initial_size: u64) -> Result<FileCache, std::io::Error> {
-        if initial_size < 100 {
-            tracing::warn!(
-                "min size for file cache is 100 blocks, {} requested",
-                initial_size
-            );
-            initial_size = 100;
-        }
-
-        let file = std::fs::OpenOptions::new()
-            .read(true)
-            .write(true)
-            .truncate(true)
-            .create(true)
-            .open(file_cache_path)?;
-
-        let max_blocks_gauge = metrics::IntGauge::new(
-            "file_cache_max_blocks",
-            "Local File Cache size in 8KiB blocks",
-        )
-        .unwrap();
-        let num_free_blocks_gauge = metrics::IntGauge::new(
-            "file_cache_num_free_blocks",
-            "Number of free 8KiB blocks in Local File Cache",
-        )
-        .unwrap();
-
-        tracing::info!("initialized file cache with {} blocks", initial_size);
-
-        Ok(FileCache {
-            file: Arc::new(file),
-            free_list: Mutex::new(FreeList {
-                next_free_block: 0,
-                max_blocks: initial_size,
-                free_blocks: Vec::new(),
-            }),
-            max_blocks_gauge,
-            num_free_blocks_gauge,
-        })
-    }
-
-    // File cache management
-
-    pub async fn read_block(
-        &self,
-        cache_block: CacheBlock,
-        mut dst: impl uring_common::buf::IoBufMut + Send + Sync,
-    ) -> Result<(), std::io::Error> {
-        assert!(dst.bytes_total() == BLCKSZ);
-        let file = self.file.clone();
-
-        let dst_ref = unsafe { std::slice::from_raw_parts_mut(dst.stable_mut_ptr(), BLCKSZ) };
-
-        spawn_blocking(move || file.read_exact_at(dst_ref, cache_block * BLCKSZ as u64)).await??;
-        Ok(())
-    }
-
-    pub async fn write_block(
-        &self,
-        cache_block: CacheBlock,
-        src: impl uring_common::buf::IoBuf + Send + Sync,
-    ) -> Result<(), std::io::Error> {
-        assert!(src.bytes_init() == BLCKSZ);
-        let file = self.file.clone();
-
-        let src_ref = unsafe { std::slice::from_raw_parts(src.stable_ptr(), BLCKSZ) };
-
-        spawn_blocking(move || file.write_all_at(src_ref, cache_block * BLCKSZ as u64)).await??;
-
-        Ok(())
-    }
-
-    pub fn alloc_block(&self) -> Option<CacheBlock> {
-        let mut free_list = self.free_list.lock().unwrap();
-        if let Some(x) = free_list.free_blocks.pop() {
-            return Some(x);
-        }
-        if free_list.next_free_block < free_list.max_blocks {
-            let result = free_list.next_free_block;
-            free_list.next_free_block += 1;
-            return Some(result);
-        }
-        None
-    }
-
-    pub fn dealloc_block(&self, cache_block: CacheBlock) {
-        let mut free_list = self.free_list.lock().unwrap();
-        free_list.free_blocks.push(cache_block);
-    }
-}
-
-impl metrics::core::Collector for FileCache {
-    fn desc(&self) -> Vec<&metrics::core::Desc> {
-        let mut descs = Vec::new();
-        descs.append(&mut self.max_blocks_gauge.desc());
-        descs.append(&mut self.num_free_blocks_gauge.desc());
-        descs
-    }
-    fn collect(&self) -> Vec<metrics::proto::MetricFamily> {
-        // Update the gauges with fresh values first
-        {
-            let free_list = self.free_list.lock().unwrap();
-            self.max_blocks_gauge.set(free_list.max_blocks as i64);
-
-            let total_free_blocks: i64 = free_list.free_blocks.len() as i64
-                + (free_list.max_blocks as i64 - free_list.next_free_block as i64);
-            self.num_free_blocks_gauge.set(total_free_blocks);
-        }
-
-        let mut values = Vec::new();
-        values.append(&mut self.max_blocks_gauge.collect());
-        values.append(&mut self.num_free_blocks_gauge.collect());
-        values
-    }
-}
--- a/pgxn/neon/communicator/src/global_allocator.rs
+++ b/pgxn/neon/communicator/src/global_allocator.rs
@@ -1,109 +0,0 @@
-//! Global allocator, for tracking memory usage of the Rust parts
-//!
-//! Postgres is designed to handle allocation failure (ie. malloc() returning NULL) gracefully.  It
-//! rolls backs the transaction and gives the user an "ERROR: out of memory" error. Rust code
-//! however panics if an allocation fails. We don't want that to ever happen, because an unhandled
-//! panic leads to Postgres crash and restart. Our strategy is to pre-allocate a large enough chunk
-//! of memory for use by the Rust code, so that the allocations never fail.
-//!
-//! To pick the size for the pre-allocated chunk, we have a metric to track the high watermark
-//! memory usage of all the Rust allocations in total.
-//!
-//! TODO:
-//!
-//! - Currently we just export the metrics. Actual allocations are still just passed through to
-//!   the system allocator.
-//! - Take padding etc. overhead into account
-
-use std::alloc::{GlobalAlloc, Layout, System};
-use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
-
-use metrics::IntGauge;
-
-struct MyAllocator {
-    allocations: AtomicU64,
-    deallocations: AtomicU64,
-
-    allocated: AtomicUsize,
-    high: AtomicUsize,
-}
-
-unsafe impl GlobalAlloc for MyAllocator {
-    unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
-        self.allocations.fetch_add(1, Ordering::Relaxed);
-        let mut allocated = self.allocated.fetch_add(layout.size(), Ordering::Relaxed);
-        allocated += layout.size();
-        self.high.fetch_max(allocated, Ordering::Relaxed);
-        unsafe { System.alloc(layout) }
-    }
-
-    unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
-        self.deallocations.fetch_add(1, Ordering::Relaxed);
-        self.allocated.fetch_sub(layout.size(), Ordering::Relaxed);
-        unsafe { System.dealloc(ptr, layout) }
-    }
-}
-
-#[global_allocator]
-static GLOBAL: MyAllocator = MyAllocator {
-    allocations: AtomicU64::new(0),
-    deallocations: AtomicU64::new(0),
-    allocated: AtomicUsize::new(0),
-    high: AtomicUsize::new(0),
-};
-
-pub struct MyAllocatorCollector {
-    allocations: IntGauge,
-    deallocations: IntGauge,
-    allocated: IntGauge,
-    high: IntGauge,
-}
-
-impl MyAllocatorCollector {
-    pub fn new() -> MyAllocatorCollector {
-        MyAllocatorCollector {
-            allocations: IntGauge::new("allocations_total", "Number of allocations in Rust code")
-                .unwrap(),
-            deallocations: IntGauge::new(
-                "deallocations_total",
-                "Number of deallocations in Rust code",
-            )
-            .unwrap(),
-            allocated: IntGauge::new("allocated_total", "Bytes currently allocated").unwrap(),
-            high: IntGauge::new("allocated_high", "High watermark of allocated bytes").unwrap(),
-        }
-    }
-}
-
-impl metrics::core::Collector for MyAllocatorCollector {
-    fn desc(&self) -> Vec<&metrics::core::Desc> {
-        let mut descs = Vec::new();
-
-        descs.append(&mut self.allocations.desc());
-        descs.append(&mut self.deallocations.desc());
-        descs.append(&mut self.allocated.desc());
-        descs.append(&mut self.high.desc());
-
-        descs
-    }
-
-    fn collect(&self) -> Vec<metrics::proto::MetricFamily> {
-        let mut values = Vec::new();
-
-        // update the gauges
-        self.allocations
-            .set(GLOBAL.allocations.load(Ordering::Relaxed) as i64);
-        self.deallocations
-            .set(GLOBAL.allocations.load(Ordering::Relaxed) as i64);
-        self.allocated
-            .set(GLOBAL.allocated.load(Ordering::Relaxed) as i64);
-        self.high.set(GLOBAL.high.load(Ordering::Relaxed) as i64);
-
-        values.append(&mut self.allocations.collect());
-        values.append(&mut self.deallocations.collect());
-        values.append(&mut self.allocated.collect());
-        values.append(&mut self.high.collect());
-
-        values
-    }
-}
--- a/pgxn/neon/communicator/src/init.rs
+++ b/pgxn/neon/communicator/src/init.rs
@@ -1,171 +0,0 @@
-//! Initialization functions. These are executed in the postmaster process,
-//! at different stages of server startup.
-//!
-//!
-//! Communicator initialization steps:
-//!
-//! 1. At postmaster startup, before shared memory is allocated,
-//!    rcommunicator_shmem_size() is called to get the amount of
-//!    shared memory that this module needs.
-//!
-//! 2. Later, after the shared memory has been allocated,
-//!    rcommunicator_shmem_init() is called to initialize the shmem
-//!    area.
-//!
-//! Per process initialization:
-//!
-//! When a backend process starts up, it calls rcommunicator_backend_init().
-//! In the communicator worker process, other functions are called, see
-//! `worker_process` module.
-
-use std::ffi::c_int;
-use std::mem;
-use std::mem::MaybeUninit;
-use std::os::fd::OwnedFd;
-
-use crate::backend_comms::NeonIOHandle;
-use crate::integrated_cache::IntegratedCacheInitStruct;
-
-/// This struct is created in the postmaster process, and inherited to
-/// the communicator process and all backend processes through fork()
-#[repr(C)]
-pub struct CommunicatorInitStruct {
-    pub submission_pipe_read_fd: OwnedFd,
-    pub submission_pipe_write_fd: OwnedFd,
-
-    // Shared memory data structures
-    pub num_neon_request_slots: u32,
-
-    pub neon_request_slots: &'static [NeonIOHandle],
-
-    pub integrated_cache_init_struct: IntegratedCacheInitStruct<'static>,
-}
-
-impl std::fmt::Debug for CommunicatorInitStruct {
-    fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
-        fmt.debug_struct("CommunicatorInitStruct")
-            .field("submission_pipe_read_fd", &self.submission_pipe_read_fd)
-            .field("submission_pipe_write_fd", &self.submission_pipe_write_fd)
-            .field(
-                "num_neon_request_slots",
-                &self.num_neon_request_slots,
-            )
-            .field("neon_request_slots length", &self.neon_request_slots.len())
-            .finish()
-    }
-}
-
-#[unsafe(no_mangle)]
-pub extern "C" fn rcommunicator_shmem_size(num_neon_request_slots: u32) -> u64 {
-    let mut size = 0;
-
-    size += mem::size_of::<NeonIOHandle>() * num_neon_request_slots as usize;
-
-    // For integrated_cache's Allocator. TODO: make this adjustable
-    size += IntegratedCacheInitStruct::shmem_size();
-
-    size as u64
-}
-
-/// Initialize the shared memory segment. Returns a backend-private
-/// struct, which will be inherited by backend processes through fork
-#[unsafe(no_mangle)]
-pub extern "C" fn rcommunicator_shmem_init(
-    submission_pipe_read_fd: c_int,
-    submission_pipe_write_fd: c_int,
-    num_neon_request_slots: u32,
-    shmem_area_ptr: *mut MaybeUninit<u8>,
-    shmem_area_len: u64,
-    initial_file_cache_size: u64,
-    max_file_cache_size: u64,
-) -> &'static mut CommunicatorInitStruct {
-    let shmem_area: &'static mut [MaybeUninit<u8>] =
-        unsafe { std::slice::from_raw_parts_mut(shmem_area_ptr, shmem_area_len as usize) };
-
-    let (neon_request_slots, remaining_area) =
-        alloc_array_from_slice::<NeonIOHandle>(shmem_area, num_neon_request_slots as usize);
-
-    for slot in neon_request_slots.iter_mut() {
-        slot.write(NeonIOHandle::default());
-    }
-
-    // 'neon_request_slots' is initialized now. (MaybeUninit::slice_assume_init_mut() is nightly-only
-    // as of this writing.)
-    let neon_request_slots = unsafe {
-        std::mem::transmute::<&mut [MaybeUninit<NeonIOHandle>], &mut [NeonIOHandle]>(
-            neon_request_slots,
-        )
-    };
-
-    // Give the rest of the area to the integrated cache
-    let integrated_cache_init_struct = IntegratedCacheInitStruct::shmem_init(
-        remaining_area,
-        initial_file_cache_size,
-        max_file_cache_size,
-    );
-
-    let (submission_pipe_read_fd, submission_pipe_write_fd) = unsafe {
-        use std::os::fd::FromRawFd;
-        (
-            OwnedFd::from_raw_fd(submission_pipe_read_fd),
-            OwnedFd::from_raw_fd(submission_pipe_write_fd),
-        )
-    };
-
-    let cis: &'static mut CommunicatorInitStruct = Box::leak(Box::new(CommunicatorInitStruct {
-        submission_pipe_read_fd,
-        submission_pipe_write_fd,
-
-        num_neon_request_slots,
-        neon_request_slots,
-
-        integrated_cache_init_struct,
-    }));
-
-    cis
-}
-
-// fixme: currently unused
-#[allow(dead_code)]
-pub fn alloc_from_slice<T>(
-    area: &mut [MaybeUninit<u8>],
-) -> (&mut MaybeUninit<T>, &mut [MaybeUninit<u8>]) {
-    let layout = std::alloc::Layout::new::<T>();
-
-    let area_start = area.as_mut_ptr();
-
-    // pad to satisfy alignment requirements
-    let padding = area_start.align_offset(layout.align());
-    if padding + layout.size() > area.len() {
-        panic!("out of memory");
-    }
-    let area = &mut area[padding..];
-    let (result_area, remain) = area.split_at_mut(layout.size());
-
-    let result_ptr: *mut MaybeUninit<T> = result_area.as_mut_ptr().cast();
-    let result = unsafe { result_ptr.as_mut().unwrap() };
-
-    (result, remain)
-}
-
-pub fn alloc_array_from_slice<T>(
-    area: &mut [MaybeUninit<u8>],
-    len: usize,
-) -> (&mut [MaybeUninit<T>], &mut [MaybeUninit<u8>]) {
-    let layout = std::alloc::Layout::new::<T>();
-
-    let area_start = area.as_mut_ptr();
-
-    // pad to satisfy alignment requirements
-    let padding = area_start.align_offset(layout.align());
-    if padding + layout.size() * len > area.len() {
-        panic!("out of memory");
-    }
-    let area = &mut area[padding..];
-    let (result_area, remain) = area.split_at_mut(layout.size() * len);
-
-    let result_ptr: *mut MaybeUninit<T> = result_area.as_mut_ptr().cast();
-    let result = unsafe { std::slice::from_raw_parts_mut(result_ptr.as_mut().unwrap(), len) };
-
-    (result, remain)
-}
--- a/pgxn/neon/communicator/src/integrated_cache.rs
+++ b/pgxn/neon/communicator/src/integrated_cache.rs
@@ -1,794 +0,0 @@
-//! Integrated communicator cache
-//!
-//! It tracks:
-//! - Relation sizes and existence
-//! - Last-written LSN
-//! - Block cache (also known as LFC)
-//!
-//! TODO: limit the size
-//! TODO: concurrency
-//!
-//! Note: This deals with "relations" which is really just one "relation fork" in Postgres
-//! terms. RelFileLocator + ForkNumber is the key.
-
-//
-// TODO: Thoughts on eviction:
-//
-// There are two things we need to track, and evict if we run out of space:
-// - blocks in the file cache's file. If the file grows too large, need to evict something.
-//   Also if the cache is resized
-//
-// - entries in the cache map. If we run out of memory in the shmem area, need to evict
-//   something
-//
-
-use std::mem::MaybeUninit;
-use std::sync::atomic::{AtomicBool, AtomicU32, AtomicU64, Ordering};
-
-use utils::lsn::{AtomicLsn, Lsn};
-
-use crate::file_cache::INVALID_CACHE_BLOCK;
-use crate::file_cache::{CacheBlock, FileCache};
-use pageserver_page_api::RelTag;
-
-use metrics::{IntCounter, IntGauge};
-
-use neon_shmem::hash::{HashMapInit, entry::Entry};
-use neon_shmem::shmem::ShmemHandle;
-
-// in # of entries
-const RELSIZE_CACHE_SIZE: u32 = 64 * 1024;
-
-/// This struct is initialized at postmaster startup, and passed to all the processes via fork().
-pub struct IntegratedCacheInitStruct<'t> {
-    relsize_cache_handle: HashMapInit<'t, RelKey, RelEntry>,
-    block_map_handle: HashMapInit<'t, BlockKey, BlockEntry>,
-}
-
-/// Represents write-access to the integrated cache. This is used by the communicator process.
-pub struct IntegratedCacheWriteAccess<'t> {
-    relsize_cache: neon_shmem::hash::HashMapAccess<'t, RelKey, RelEntry>,
-    block_map: neon_shmem::hash::HashMapAccess<'t, BlockKey, BlockEntry>,
-
-    global_lw_lsn: AtomicU64,
-
-    pub(crate) file_cache: Option<FileCache>,
-
-    // Fields for eviction
-    clock_hand: std::sync::Mutex<usize>,
-
-    // Metrics
-    page_evictions_counter: IntCounter,
-    clock_iterations_counter: IntCounter,
-
-    // metrics from the hash map
-    block_map_num_buckets: IntGauge,
-    block_map_num_buckets_in_use: IntGauge,
-
-    relsize_cache_num_buckets: IntGauge,
-    relsize_cache_num_buckets_in_use: IntGauge,
-}
-
-/// Represents read-only access to the integrated cache. Backend processes have this.
-pub struct IntegratedCacheReadAccess<'t> {
-    relsize_cache: neon_shmem::hash::HashMapAccess<'t, RelKey, RelEntry>,
-    block_map: neon_shmem::hash::HashMapAccess<'t, BlockKey, BlockEntry>,
-}
-
-impl<'t> IntegratedCacheInitStruct<'t> {
-    /// Return the desired size in bytes of the fixed-size shared memory area to reserve for the
-    /// integrated cache.
-    pub fn shmem_size() -> usize {
-        // The relsize cache is fixed-size. The block map is allocated in a separate resizable
-        // area.
-        HashMapInit::<RelKey, RelEntry>::estimate_size(RELSIZE_CACHE_SIZE)
-    }
-
-    /// Initialize the shared memory segment. This runs once in postmaster. Returns a struct which
-    /// will be inherited by all processes through fork.
-    pub fn shmem_init(
-        shmem_area: &'t mut [MaybeUninit<u8>],
-        initial_file_cache_size: u64,
-        max_file_cache_size: u64,
-    ) -> IntegratedCacheInitStruct<'t> {
-        // Initialize the relsize cache in the fixed-size area
-        let relsize_cache_handle =
-            neon_shmem::hash::HashMapInit::with_fixed(RELSIZE_CACHE_SIZE, shmem_area);
-
-        let max_bytes =
-            HashMapInit::<BlockKey, BlockEntry>::estimate_size(max_file_cache_size as u32);
-
-        // Initialize the block map in a separate resizable shared memory area
-        let shmem_handle = ShmemHandle::new("block mapping", 0, max_bytes).unwrap();
-
-        let block_map_handle =
-            neon_shmem::hash::HashMapInit::with_shmem(initial_file_cache_size as u32, shmem_handle);
-        IntegratedCacheInitStruct {
-            relsize_cache_handle,
-            block_map_handle,
-        }
-    }
-
-    /// Initialize access to the integrated cache for the communicator worker process
-    pub fn worker_process_init(
-        self,
-        lsn: Lsn,
-        file_cache: Option<FileCache>,
-    ) -> IntegratedCacheWriteAccess<'t> {
-        let IntegratedCacheInitStruct {
-            relsize_cache_handle,
-            block_map_handle,
-        } = self;
-        IntegratedCacheWriteAccess {
-            relsize_cache: relsize_cache_handle.attach_writer(),
-            block_map: block_map_handle.attach_writer(),
-            global_lw_lsn: AtomicU64::new(lsn.0),
-            file_cache,
-            clock_hand: std::sync::Mutex::new(0),
-
-            page_evictions_counter: metrics::IntCounter::new(
-                "integrated_cache_evictions",
-                "Page evictions from the Local File Cache",
-            )
-            .unwrap(),
-
-            clock_iterations_counter: metrics::IntCounter::new(
-                "clock_iterations",
-                "Number of times the clock hand has moved",
-            )
-            .unwrap(),
-
-            block_map_num_buckets: metrics::IntGauge::new(
-                "block_map_num_buckets",
-                "Allocated size of the block cache hash map",
-            )
-            .unwrap(),
-            block_map_num_buckets_in_use: metrics::IntGauge::new(
-                "block_map_num_buckets_in_use",
-                "Number of buckets in use in the block cache hash map",
-            )
-            .unwrap(),
-
-            relsize_cache_num_buckets: metrics::IntGauge::new(
-                "relsize_cache_num_buckets",
-                "Allocated size of the relsize cache hash map",
-            )
-            .unwrap(),
-            relsize_cache_num_buckets_in_use: metrics::IntGauge::new(
-                "relsize_cache_num_buckets_in_use",
-                "Number of buckets in use in the relsize cache hash map",
-            )
-            .unwrap(),
-        }
-    }
-
-    /// Initialize access to the integrated cache for a backend process
-    pub fn backend_init(self) -> IntegratedCacheReadAccess<'t> {
-        let IntegratedCacheInitStruct {
-            relsize_cache_handle,
-            block_map_handle,
-        } = self;
-
-        IntegratedCacheReadAccess {
-            relsize_cache: relsize_cache_handle.attach_reader(),
-            block_map: block_map_handle.attach_reader(),
-        }
-    }
-}
-
-/// Value stored in the cache mapping hash table.
-struct BlockEntry {
-    lw_lsn: AtomicLsn,
-    cache_block: AtomicU64,
-
-    pinned: AtomicU64,
-
-    // 'referenced' bit for the clock algorithm
-    referenced: AtomicBool,
-}
-
-/// Value stored in the relsize cache hash table.
-struct RelEntry {
-    /// cached size of the relation
-    /// u32::MAX means 'not known' (that's InvalidBlockNumber in Postgres)
-    nblocks: AtomicU32,
-}
-
-impl std::fmt::Debug for RelEntry {
-    fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
-        fmt.debug_struct("Rel")
-            .field("nblocks", &self.nblocks.load(Ordering::Relaxed))
-            .finish()
-    }
-}
-impl std::fmt::Debug for BlockEntry {
-    fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
-        fmt.debug_struct("Block")
-            .field("lw_lsn", &self.lw_lsn.load())
-            .field("cache_block", &self.cache_block.load(Ordering::Relaxed))
-            .field("pinned", &self.pinned.load(Ordering::Relaxed))
-            .field("referenced", &self.referenced.load(Ordering::Relaxed))
-            .finish()
-    }
-}
-
-#[derive(Clone, Debug, PartialEq, PartialOrd, Eq, Hash, Ord)]
-struct RelKey(RelTag);
-
-impl From<&RelTag> for RelKey {
-    fn from(val: &RelTag) -> RelKey {
-        RelKey(*val)
-    }
-}
-
-#[derive(Clone, Debug, PartialEq, PartialOrd, Eq, Hash, Ord)]
-struct BlockKey {
-    rel: RelTag,
-    block_number: u32,
-}
-
-impl From<(&RelTag, u32)> for BlockKey {
-    fn from(val: (&RelTag, u32)) -> BlockKey {
-        BlockKey {
-            rel: *val.0,
-            block_number: val.1,
-        }
-    }
-}
-
-/// Return type used in the cache's get_*() functions. 'Found' means that the page, or other
-/// information that was enqueried, exists in the cache. '
-pub enum CacheResult<V> {
-    /// The enqueried page or other information existed in the cache.
-    Found(V),
-
-    /// The cache doesn't contain the page (or other enqueried information, like relation size). The
-    /// Lsn is the 'not_modified_since' LSN that should be used in the request to the pageserver to
-    /// read the page.
-    NotFound(Lsn),
-}
-
-impl<'t> IntegratedCacheWriteAccess<'t> {
-    pub fn get_rel_size(&'t self, rel: &RelTag) -> CacheResult<u32> {
-        if let Some(nblocks) = get_rel_size(&self.relsize_cache, rel) {
-            CacheResult::Found(nblocks)
-        } else {
-            let lsn = Lsn(self.global_lw_lsn.load(Ordering::Relaxed));
-            CacheResult::NotFound(lsn)
-        }
-    }
-
-    pub async fn get_page(
-        &'t self,
-        rel: &RelTag,
-        block_number: u32,
-        dst: impl uring_common::buf::IoBufMut + Send + Sync,
-    ) -> Result<CacheResult<()>, std::io::Error> {
-        let x = if let Some(block_entry) = self.block_map.get(&BlockKey::from((rel, block_number)))
-        {
-            block_entry.referenced.store(true, Ordering::Relaxed);
-
-            let cache_block = block_entry.cache_block.load(Ordering::Relaxed);
-            if cache_block != INVALID_CACHE_BLOCK {
-                // pin it and release lock
-                block_entry.pinned.fetch_add(1, Ordering::Relaxed);
-
-                (cache_block, DeferredUnpin(block_entry.pinned.as_ptr()))
-            } else {
-                return Ok(CacheResult::NotFound(block_entry.lw_lsn.load()));
-            }
-        } else {
-            let lsn = Lsn(self.global_lw_lsn.load(Ordering::Relaxed));
-            return Ok(CacheResult::NotFound(lsn));
-        };
-
-        let (cache_block, _deferred_pin) = x;
-        self.file_cache
-            .as_ref()
-            .unwrap()
-            .read_block(cache_block, dst)
-            .await?;
-
-        // unpin the entry (by implicitly dropping deferred_pin)
-        Ok(CacheResult::Found(()))
-    }
-
-    pub async fn page_is_cached(
-        &'t self,
-        rel: &RelTag,
-        block_number: u32,
-    ) -> Result<CacheResult<()>, std::io::Error> {
-        if let Some(block_entry) = self.block_map.get(&BlockKey::from((rel, block_number))) {
-            // This is used for prefetch requests. Treat the probe as an 'access', to keep it
-            // in cache.
-            block_entry.referenced.store(true, Ordering::Relaxed);
-
-            let cache_block = block_entry.cache_block.load(Ordering::Relaxed);
-
-            if cache_block != INVALID_CACHE_BLOCK {
-                Ok(CacheResult::Found(()))
-            } else {
-                Ok(CacheResult::NotFound(block_entry.lw_lsn.load()))
-            }
-        } else {
-            let lsn = Lsn(self.global_lw_lsn.load(Ordering::Relaxed));
-            Ok(CacheResult::NotFound(lsn))
-        }
-    }
-
-    /// Does the relation exists? CacheResult::NotFound means that the cache doesn't contain that
-    /// information, i.e. we don't know if the relation exists or not.
-    pub fn get_rel_exists(&'t self, rel: &RelTag) -> CacheResult<bool> {
-        // we don't currently cache negative entries, so if the relation is in the cache, it exists
-        if let Some(_rel_entry) = self.relsize_cache.get(&RelKey::from(rel)) {
-            CacheResult::Found(true)
-        } else {
-            let lsn = Lsn(self.global_lw_lsn.load(Ordering::Relaxed));
-            CacheResult::NotFound(lsn)
-        }
-    }
-
-    pub fn get_db_size(&'t self, _db_oid: u32) -> CacheResult<u64> {
-        // TODO: it would be nice to cache database sizes too. Getting the database size
-        // is not a very common operation, but when you do it, it's often interactive, with
-        // e.g. psql \l+ command, so the user will feel the latency.
-
-        // fixme: is this right lsn?
-        let lsn = Lsn(self.global_lw_lsn.load(Ordering::Relaxed));
-        CacheResult::NotFound(lsn)
-    }
-
-    pub fn remember_rel_size(&'t self, rel: &RelTag, nblocks: u32) {
-        match self.relsize_cache.entry(RelKey::from(rel)) {
-            Entry::Vacant(e) => {
-                tracing::info!("inserting rel entry for {rel:?}, {nblocks} blocks");
-                // FIXME: what to do if we run out of memory? Evict other relation entries?
-                _ = e
-                    .insert(RelEntry {
-                        nblocks: AtomicU32::new(nblocks),
-                    })
-                    .expect("out of memory");
-            }
-            Entry::Occupied(e) => {
-                tracing::info!("updating rel entry for {rel:?}, {nblocks} blocks");
-                e.get().nblocks.store(nblocks, Ordering::Relaxed);
-            }
-        };
-    }
-
-    /// Remember the given page contents in the cache.
-    pub async fn remember_page(
-        &'t self,
-        rel: &RelTag,
-        block_number: u32,
-        src: impl uring_common::buf::IoBuf + Send + Sync,
-        lw_lsn: Lsn,
-        is_write: bool,
-    ) {
-        let key = BlockKey::from((rel, block_number));
-
-        // FIXME: make this work when file cache is disabled. Or make it mandatory
-        let file_cache = self.file_cache.as_ref().unwrap();
-
-        if is_write {
-            // there should be no concurrent IOs. If a backend tries to read the page
-            // at the same time, they may get a torn write. That's the same as with
-            // regular POSIX filesystem read() and write()
-
-            // First check if we have a block in cache already
-            let mut old_cache_block = None;
-            let mut found_existing = false;
-
-            // NOTE(quantumish): honoring original semantics here (used to be update_with_fn)
-            // but I don't see any reason why this has to take a write lock.
-            if let Entry::Occupied(e) = self.block_map.entry(key.clone()) {
-                let block_entry = e.get();
-                found_existing = true;
-
-                // Prevent this entry from being evicted
-                let pin_count = block_entry.pinned.fetch_add(1, Ordering::Relaxed);
-                if pin_count > 0 {
-                    // this is unexpected, because the caller has obtained the io-in-progress lock,
-                    // so no one else should try to modify the page at the same time.
-                    // XXX: and I think a read should not be happening either, because the postgres
-                    // buffer is held locked. TODO: check these conditions and tidy this up a little. Seems fragile to just panic.
-                    panic!("block entry was unexpectedly pinned");
-                }
-
-                let cache_block = block_entry.cache_block.load(Ordering::Relaxed);
-                old_cache_block = if cache_block != INVALID_CACHE_BLOCK {
-                    Some(cache_block)
-                } else {
-                    None
-                };
-            }
-
-            // Allocate a new block if required
-            let cache_block = old_cache_block.unwrap_or_else(|| {
-                loop {
-                    if let Some(x) = file_cache.alloc_block() {
-                        break x;
-                    }
-                    if let Some(x) = self.try_evict_one_cache_block() {
-                        break x;
-                    }
-                }
-            });
-
-            // Write the page to the cache file
-            file_cache
-                .write_block(cache_block, src)
-                .await
-                .expect("error writing to cache");
-            // FIXME: handle errors gracefully.
-            // FIXME: unpin the block entry on error
-
-            // Update the block entry
-            let entry = self.block_map.entry(key);
-            assert_eq!(found_existing, matches!(entry, Entry::Occupied(_)));
-            match entry {
-                Entry::Occupied(e) => {
-                    let block_entry = e.get();
-                    // Update the cache block
-                    let old_blk = block_entry.cache_block.compare_exchange(
-                        INVALID_CACHE_BLOCK,
-                        cache_block,
-                        Ordering::Relaxed,
-                        Ordering::Relaxed,
-                    );
-                    assert!(old_blk == Ok(INVALID_CACHE_BLOCK) || old_blk == Err(cache_block));
-
-                    block_entry.lw_lsn.store(lw_lsn);
-
-                    block_entry.referenced.store(true, Ordering::Relaxed);
-
-                    let pin_count = block_entry.pinned.fetch_sub(1, Ordering::Relaxed);
-                    assert!(pin_count > 0);
-                }
-                Entry::Vacant(e) => {
-                    // FIXME: what to do if we run out of memory? Evict other relation entries? Remove
-                    // block entries first?
-                    _ = e
-                        .insert(BlockEntry {
-                            lw_lsn: AtomicLsn::new(lw_lsn.0),
-                            cache_block: AtomicU64::new(cache_block),
-                            pinned: AtomicU64::new(0),
-                            referenced: AtomicBool::new(true),
-                        })
-                        .expect("out of memory");
-                }
-            }
-        } else {
-            // !is_write
-            //
-            // We can assume that it doesn't already exist, because the
-            // caller is assumed to have already checked it, and holds
-            // the io-in-progress lock. (The BlockEntry might exist, but no cache block)
-
-            // Allocate a new block first
-            let cache_block = {
-                loop {
-                    if let Some(x) = file_cache.alloc_block() {
-                        break x;
-                    }
-                    if let Some(x) = self.try_evict_one_cache_block() {
-                        break x;
-                    }
-                }
-            };
-
-            // Write the page to the cache file
-            file_cache
-                .write_block(cache_block, src)
-                .await
-                .expect("error writing to cache");
-            // FIXME: handle errors gracefully.
-
-            match self.block_map.entry(key) {
-                Entry::Occupied(e) => {
-                    let block_entry = e.get();
-                    // FIXME: could there be concurrent readers?
-                    assert!(block_entry.pinned.load(Ordering::Relaxed) == 0);
-
-                    let old_cache_block =
-                        block_entry.cache_block.swap(cache_block, Ordering::Relaxed);
-                    if old_cache_block != INVALID_CACHE_BLOCK {
-                        panic!(
-                            "remember_page called in !is_write mode, but page is already cached at blk {old_cache_block}"
-                        );
-                    }
-                }
-                Entry::Vacant(e) => {
-                    // FIXME: what to do if we run out of memory? Evict other relation entries? Remove
-                    // block entries first?
-                    _ = e
-                        .insert(BlockEntry {
-                            lw_lsn: AtomicLsn::new(lw_lsn.0),
-                            cache_block: AtomicU64::new(cache_block),
-                            pinned: AtomicU64::new(0),
-                            referenced: AtomicBool::new(true),
-                        })
-                        .expect("out of memory");
-                }
-            }
-        }
-    }
-
-    /// Forget information about given relation in the cache. (For DROP TABLE and such)
-    pub fn forget_rel(&'t self, rel: &RelTag) {
-        tracing::info!("forgetting rel entry for {rel:?}");
-        self.relsize_cache.remove(&RelKey::from(rel));
-
-        // also forget all cached blocks for the relation
-        // FIXME
-        /*
-            let mut iter = MapIterator::new(&key_range_for_rel_blocks(rel));
-            let r = self.cache_tree.start_read();
-            while let Some((k, _v)) = iter.next(&r) {
-                let w = self.cache_tree.start_write();
-
-                let mut evicted_cache_block = None;
-
-                let res = w.update_with_fn(&k, |e| {
-                    if let Some(e) = e {
-                        let block_entry = if let MapEntry::Block(e) = e {
-                            e
-                        } else {
-                            panic!("unexpected map entry type for block key");
-                        };
-                        let cache_block = block_entry
-                            .cache_block
-                            .swap(INVALID_CACHE_BLOCK, Ordering::Relaxed);
-                        if cache_block != INVALID_CACHE_BLOCK {
-                            evicted_cache_block = Some(cache_block);
-                        }
-                        UpdateAction::Remove
-                    } else {
-                        UpdateAction::Nothing
-                    }
-                });
-
-                // FIXME: It's pretty surprising to run out of memory while removing. But
-                // maybe it can happen because of trying to shrink a node?
-                res.expect("out of memory");
-
-                if let Some(evicted_cache_block) = evicted_cache_block {
-                    self.file_cache
-                        .as_ref()
-                        .unwrap()
-                        .dealloc_block(evicted_cache_block);
-                }
-        }
-
-            */
-    }
-
-    // Maintenance routines
-
-    /// Evict one block from the file cache. This is used when the file cache fills up
-    /// Returns the evicted block. It's not put to the free list, so it's available for the
-    /// caller to use immediately.
-    pub fn try_evict_one_cache_block(&self) -> Option<CacheBlock> {
-        let mut clock_hand = self.clock_hand.lock().unwrap();
-        for _ in 0..100 {
-            self.clock_iterations_counter.inc();
-
-            (*clock_hand) += 1;
-
-            let mut evict_this = false;
-            let num_buckets = self.block_map.get_num_buckets();
-            match self
-                .block_map
-                .get_at_bucket((*clock_hand) % num_buckets)
-                .as_deref()
-            {
-                None => {
-                    // This bucket was unused
-                }
-                Some((_, blk_entry)) => {
-                    if !blk_entry.referenced.swap(false, Ordering::Relaxed) {
-                        // Evict this. Maybe.
-                        evict_this = true;
-                    }
-                }
-            };
-
-            if evict_this {
-                // grab the write lock
-                let mut evicted_cache_block = None;
-                if let Some(e) = self.block_map.entry_at_bucket(*clock_hand % num_buckets) {
-                    let old = e.get();
-                    // note: all the accesses to 'pinned' currently happen
-                    // within update_with_fn(), or while holding ValueReadGuard, which protects from concurrent
-                    // updates. Otherwise, another thread could set the 'pinned'
-                    // flag just after we have checked it here.
-                    if old.pinned.load(Ordering::Relaxed) == 0 {
-                        let _ = self
-                            .global_lw_lsn
-                            .fetch_max(old.lw_lsn.load().0, Ordering::Relaxed);
-                        let cache_block =
-                            old.cache_block.swap(INVALID_CACHE_BLOCK, Ordering::Relaxed);
-                        if cache_block != INVALID_CACHE_BLOCK {
-                            evicted_cache_block = Some(cache_block);
-                        }
-                        e.remove();
-                    }
-                }
-
-                if evicted_cache_block.is_some() {
-                    self.page_evictions_counter.inc();
-                    return evicted_cache_block;
-                }
-            }
-        }
-        // Give up if we didn't find anything
-        None
-    }
-
-    /// Resize the local file cache.
-    pub fn resize_file_cache(&self, num_blocks: u32) {
-        let old_num_blocks = self.block_map.get_num_buckets() as u32;
-
-        if old_num_blocks < num_blocks {
-            if let Err(err) = self.block_map.grow(num_blocks) {
-                tracing::warn!(
-                    "could not grow file cache to {} blocks (old size {}): {}",
-                    num_blocks,
-                    old_num_blocks,
-                    err
-                );
-            }
-        } else {
-            // TODO: Shrinking not implemented yet
-        }
-    }
-
-    pub fn dump_map(&self, _dst: &mut dyn std::io::Write) {
-        //FIXME self.cache_map.start_read().dump(dst);
-    }
-}
-
-impl metrics::core::Collector for IntegratedCacheWriteAccess<'_> {
-    fn desc(&self) -> Vec<&metrics::core::Desc> {
-        let mut descs = Vec::new();
-        descs.append(&mut self.page_evictions_counter.desc());
-        descs.append(&mut self.clock_iterations_counter.desc());
-
-        descs.append(&mut self.block_map_num_buckets.desc());
-        descs.append(&mut self.block_map_num_buckets_in_use.desc());
-
-        descs.append(&mut self.relsize_cache_num_buckets.desc());
-        descs.append(&mut self.relsize_cache_num_buckets_in_use.desc());
-
-        descs
-    }
-    fn collect(&self) -> Vec<metrics::proto::MetricFamily> {
-        // Update gauges
-        self.block_map_num_buckets
-            .set(self.block_map.get_num_buckets() as i64);
-        self.block_map_num_buckets_in_use
-            .set(self.block_map.get_num_buckets_in_use() as i64);
-        self.relsize_cache_num_buckets
-            .set(self.relsize_cache.get_num_buckets() as i64);
-        self.relsize_cache_num_buckets_in_use
-            .set(self.relsize_cache.get_num_buckets_in_use() as i64);
-
-        let mut values = Vec::new();
-        values.append(&mut self.page_evictions_counter.collect());
-        values.append(&mut self.clock_iterations_counter.collect());
-
-        values.append(&mut self.block_map_num_buckets.collect());
-        values.append(&mut self.block_map_num_buckets_in_use.collect());
-
-        values.append(&mut self.relsize_cache_num_buckets.collect());
-        values.append(&mut self.relsize_cache_num_buckets_in_use.collect());
-
-        values
-    }
-}
-
-/// Read relation size from the cache.
-///
-/// This is in a separate function so that it can be shared by
-/// IntegratedCacheReadAccess::get_rel_size() and IntegratedCacheWriteAccess::get_rel_size()
-fn get_rel_size(
-    r: &neon_shmem::hash::HashMapAccess<RelKey, RelEntry>,
-    rel: &RelTag,
-) -> Option<u32> {
-    if let Some(rel_entry) = r.get(&RelKey::from(rel)) {
-        let nblocks = rel_entry.nblocks.load(Ordering::Relaxed);
-        if nblocks != u32::MAX {
-            Some(nblocks)
-        } else {
-            None
-        }
-    } else {
-        None
-    }
-}
-
-/// Accessor for other backends
-///
-/// This allows backends to read pages from the cache directly, on their own, without making a
-/// request to the communicator process.
-impl<'t> IntegratedCacheReadAccess<'t> {
-    pub fn get_rel_size(&'t self, rel: &RelTag) -> Option<u32> {
-        get_rel_size(&self.relsize_cache, rel)
-    }
-
-    pub fn start_read_op(&'t self) -> BackendCacheReadOp<'t> {
-        BackendCacheReadOp {
-            read_guards: Vec::new(),
-            map_access: self,
-        }
-    }
-
-    /// Check if the given page is present in the cache
-    pub fn cache_contains_page(&'t self, rel: &RelTag, block_number: u32) -> bool {
-        self.block_map
-            .get(&BlockKey::from((rel, block_number)))
-            .is_some()
-    }
-}
-
-pub struct BackendCacheReadOp<'t> {
-    read_guards: Vec<DeferredUnpin>,
-    map_access: &'t IntegratedCacheReadAccess<'t>,
-}
-
-impl<'e> BackendCacheReadOp<'e> {
-    /// Initiate a read of the page from the cache.
-    ///
-    /// This returns the "cache block number", i.e. the block number within the cache file, where
-    /// the page's contents is stored. To get the page contents, the caller needs to read that block
-    /// from the cache file. This returns a guard object that you must hold while it performs the
-    /// read. It's possible that while you are performing the read, the cache block is invalidated.
-    /// After you have completed the read, call BackendCacheReadResult::finish() to check if the
-    /// read was in fact valid or not. If it was concurrently invalidated, you need to retry.
-    pub fn get_page(&mut self, rel: &RelTag, block_number: u32) -> Option<u64> {
-        if let Some(block_entry) = self
-            .map_access
-            .block_map
-            .get(&BlockKey::from((rel, block_number)))
-        {
-            block_entry.referenced.store(true, Ordering::Relaxed);
-
-            let cache_block = block_entry.cache_block.load(Ordering::Relaxed);
-            if cache_block != INVALID_CACHE_BLOCK {
-                block_entry.pinned.fetch_add(1, Ordering::Relaxed);
-                self.read_guards
-                    .push(DeferredUnpin(block_entry.pinned.as_ptr()));
-                Some(cache_block)
-            } else {
-                None
-            }
-        } else {
-            None
-        }
-    }
-
-    pub fn finish(self) -> bool {
-        // TODO: currently, we hold a pin on the in-memory map, so concurrent invalidations are not
-        // possible. But if we switch to optimistic locking, this would return 'false' if the
-        // optimistic locking failed and you need to retry.
-        true
-    }
-}
-
-/// A hack to decrement an AtomicU64 on drop. This is used to decrement the pin count
-/// of a BlockEntry. The safety depends on the fact that the BlockEntry is not evicted
-/// or moved while it's pinned.
-struct DeferredUnpin(*mut u64);
-
-unsafe impl Sync for DeferredUnpin {}
-unsafe impl Send for DeferredUnpin {}
-
-impl Drop for DeferredUnpin {
-    fn drop(&mut self) {
-        // unpin it
-        unsafe {
-            let pin_ref = AtomicU64::from_ptr(self.0);
-            pin_ref.fetch_sub(1, Ordering::Relaxed);
-        }
-    }
-}
--- a/pgxn/neon/communicator/src/lib.rs
+++ b/pgxn/neon/communicator/src/lib.rs
@@ -1,27 +0,0 @@
-//!
-//! Three main parts:
-//! - async tokio communicator core, which receives requests and processes them.
-//! - Main loop and requests queues, which routes requests from backends to the core
-//! - the per-backend glue code, which submits requests
-//!
-
-mod backend_comms;
-
-// mark this 'pub', because these functions are called from C code. Otherwise, the compiler
-// complains about a bunch of structs and enum variants being unused, because it thinkgs
-// the functions that use them are never called. There are some C-callable functions in
-// other modules too, but marking this as pub is currently enough to silence the warnings
-//
-// TODO: perhaps collect *all* the extern "C" functions to one module?
-pub mod backend_interface;
-
-mod file_cache;
-mod init;
-mod integrated_cache;
-mod neon_request;
-mod worker_process;
-
-mod global_allocator;
-
-// FIXME get this from postgres headers somehow
-pub const BLCKSZ: usize = 8192;
--- a/pgxn/neon/communicator/src/neon_request.rs
+++ b/pgxn/neon/communicator/src/neon_request.rs
@@ -1,377 +0,0 @@
-pub type CLsn = u64;
-pub type COid = u32;
-
-// This conveniently matches PG_IOV_MAX
-pub const MAX_GETPAGEV_PAGES: usize = 32;
-
-use pageserver_page_api as page_api;
-
-#[allow(clippy::large_enum_variant)]
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub enum NeonIORequest {
-    Empty,
-
-    // Read requests. These are C-friendly variants of the corresponding structs in
-    // pageserver_page_api.
-    RelExists(CRelExistsRequest),
-    RelSize(CRelSizeRequest),
-    GetPageV(CGetPageVRequest),
-    PrefetchV(CPrefetchVRequest),
-    DbSize(CDbSizeRequest),
-
-    // Write requests. These are needed to keep the relation size cache and LFC up-to-date.
-    // They are not sent to the pageserver.
-    WritePage(CWritePageRequest),
-    RelExtend(CRelExtendRequest),
-    RelZeroExtend(CRelZeroExtendRequest),
-    RelCreate(CRelCreateRequest),
-    RelTruncate(CRelTruncateRequest),
-    RelUnlink(CRelUnlinkRequest),
-}
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub enum NeonIOResult {
-    Empty,
-    RelExists(bool),
-    RelSize(u32),
-
-    /// the result pages are written to the shared memory addresses given in the request
-    GetPageV,
-
-    /// A prefetch request returns as soon as the request has been received by the communicator.
-    /// It is processed in the background.
-    PrefetchVLaunched,
-
-    DbSize(u64),
-
-    // FIXME design compact error codes. Can't easily pass a string or other dynamic data.
-    // currently, this is 'errno'
-    Error(i32),
-
-    Aborted,
-
-    /// used for all write requests
-    WriteOK,
-}
-
-impl NeonIORequest {
-    pub fn request_id(&self) -> u64 {
-        use NeonIORequest::*;
-        match self {
-            Empty => 0,
-            RelExists(req) => req.request_id,
-            RelSize(req) => req.request_id,
-            GetPageV(req) => req.request_id,
-            PrefetchV(req) => req.request_id,
-            DbSize(req) => req.request_id,
-            WritePage(req) => req.request_id,
-            RelExtend(req) => req.request_id,
-            RelZeroExtend(req) => req.request_id,
-            RelCreate(req) => req.request_id,
-            RelTruncate(req) => req.request_id,
-            RelUnlink(req) => req.request_id,
-        }
-    }
-}
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct CCachedGetPageVResult {
-    pub cache_block_numbers: [u64; MAX_GETPAGEV_PAGES],
-}
-
-/// ShmemBuf represents a buffer in shared memory.
-///
-/// SAFETY: The pointer must point to an area in shared memory. The functions allow you to liberally
-/// get a mutable pointer to the contents; it is the caller's responsibility to ensure that you
-/// don't access a buffer that's you're not allowed to. Inappropriate access to the buffer doesn't
-/// violate Rust's safety semantics, but it will mess up and crash Postgres.
-///
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct ShmemBuf {
-    // These fields define where the result is written. Must point into a buffer in shared memory!
-    pub ptr: *mut u8,
-}
-
-unsafe impl Send for ShmemBuf {}
-unsafe impl Sync for ShmemBuf {}
-
-unsafe impl uring_common::buf::IoBuf for ShmemBuf {
-    fn stable_ptr(&self) -> *const u8 {
-        self.ptr
-    }
-
-    fn bytes_init(&self) -> usize {
-        crate::BLCKSZ
-    }
-
-    fn bytes_total(&self) -> usize {
-        crate::BLCKSZ
-    }
-}
-
-unsafe impl uring_common::buf::IoBufMut for ShmemBuf {
-    fn stable_mut_ptr(&mut self) -> *mut u8 {
-        self.ptr
-    }
-
-    unsafe fn set_init(&mut self, pos: usize) {
-        if pos > crate::BLCKSZ {
-            panic!(
-                "set_init called past end of buffer, pos {}, buffer size {}",
-                pos,
-                crate::BLCKSZ
-            );
-        }
-    }
-}
-
-impl ShmemBuf {
-    pub fn as_mut_ptr(&self) -> *mut u8 {
-        self.ptr
-    }
-}
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct CRelExistsRequest {
-    pub request_id: u64,
-    pub spc_oid: COid,
-    pub db_oid: COid,
-    pub rel_number: u32,
-    pub fork_number: u8,
-}
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct CRelSizeRequest {
-    pub request_id: u64,
-    pub spc_oid: COid,
-    pub db_oid: COid,
-    pub rel_number: u32,
-    pub fork_number: u8,
-}
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct CGetPageVRequest {
-    pub request_id: u64,
-    pub spc_oid: COid,
-    pub db_oid: COid,
-    pub rel_number: u32,
-    pub fork_number: u8,
-    pub block_number: u32,
-    pub nblocks: u8,
-
-    // These fields define where the result is written. Must point into a buffer in shared memory!
-    pub dest: [ShmemBuf; MAX_GETPAGEV_PAGES],
-}
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct CPrefetchVRequest {
-    pub request_id: u64,
-    pub spc_oid: COid,
-    pub db_oid: COid,
-    pub rel_number: u32,
-    pub fork_number: u8,
-    pub block_number: u32,
-    pub nblocks: u8,
-}
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct CDbSizeRequest {
-    pub request_id: u64,
-    pub db_oid: COid,
-    pub request_lsn: CLsn,
-}
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct CWritePageRequest {
-    pub request_id: u64,
-    pub spc_oid: COid,
-    pub db_oid: COid,
-    pub rel_number: u32,
-    pub fork_number: u8,
-    pub block_number: u32,
-    pub lsn: CLsn,
-
-    // These fields define where the result is written. Must point into a buffer in shared memory!
-    pub src: ShmemBuf,
-}
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct CRelExtendRequest {
-    pub request_id: u64,
-    pub spc_oid: COid,
-    pub db_oid: COid,
-    pub rel_number: u32,
-    pub fork_number: u8,
-    pub block_number: u32,
-    pub lsn: CLsn,
-
-    // These fields define page contents. Must point into a buffer in shared memory!
-    pub src: ShmemBuf,
-}
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct CRelZeroExtendRequest {
-    pub request_id: u64,
-    pub spc_oid: COid,
-    pub db_oid: COid,
-    pub rel_number: u32,
-    pub fork_number: u8,
-    pub block_number: u32,
-    pub nblocks: u32,
-    pub lsn: CLsn,
-}
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct CRelCreateRequest {
-    pub request_id: u64,
-    pub spc_oid: COid,
-    pub db_oid: COid,
-    pub rel_number: u32,
-    pub fork_number: u8,
-}
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct CRelTruncateRequest {
-    pub request_id: u64,
-    pub spc_oid: COid,
-    pub db_oid: COid,
-    pub rel_number: u32,
-    pub fork_number: u8,
-    pub nblocks: u32,
-}
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct CRelUnlinkRequest {
-    pub request_id: u64,
-    pub spc_oid: COid,
-    pub db_oid: COid,
-    pub rel_number: u32,
-    pub fork_number: u8,
-    pub block_number: u32,
-    pub nblocks: u32,
-}
-
-impl CRelExistsRequest {
-    pub fn reltag(&self) -> page_api::RelTag {
-        page_api::RelTag {
-            spcnode: self.spc_oid,
-            dbnode: self.db_oid,
-            relnode: self.rel_number,
-            forknum: self.fork_number,
-        }
-    }
-}
-
-impl CRelSizeRequest {
-    pub fn reltag(&self) -> page_api::RelTag {
-        page_api::RelTag {
-            spcnode: self.spc_oid,
-            dbnode: self.db_oid,
-            relnode: self.rel_number,
-            forknum: self.fork_number,
-        }
-    }
-}
-
-impl CGetPageVRequest {
-    pub fn reltag(&self) -> page_api::RelTag {
-        page_api::RelTag {
-            spcnode: self.spc_oid,
-            dbnode: self.db_oid,
-            relnode: self.rel_number,
-            forknum: self.fork_number,
-        }
-    }
-}
-
-impl CPrefetchVRequest {
-    pub fn reltag(&self) -> page_api::RelTag {
-        page_api::RelTag {
-            spcnode: self.spc_oid,
-            dbnode: self.db_oid,
-            relnode: self.rel_number,
-            forknum: self.fork_number,
-        }
-    }
-}
-
-impl CWritePageRequest {
-    pub fn reltag(&self) -> page_api::RelTag {
-        page_api::RelTag {
-            spcnode: self.spc_oid,
-            dbnode: self.db_oid,
-            relnode: self.rel_number,
-            forknum: self.fork_number,
-        }
-    }
-}
-
-impl CRelExtendRequest {
-    pub fn reltag(&self) -> page_api::RelTag {
-        page_api::RelTag {
-            spcnode: self.spc_oid,
-            dbnode: self.db_oid,
-            relnode: self.rel_number,
-            forknum: self.fork_number,
-        }
-    }
-}
-
-impl CRelZeroExtendRequest {
-    pub fn reltag(&self) -> page_api::RelTag {
-        page_api::RelTag {
-            spcnode: self.spc_oid,
-            dbnode: self.db_oid,
-            relnode: self.rel_number,
-            forknum: self.fork_number,
-        }
-    }
-}
-
-impl CRelCreateRequest {
-    pub fn reltag(&self) -> page_api::RelTag {
-        page_api::RelTag {
-            spcnode: self.spc_oid,
-            dbnode: self.db_oid,
-            relnode: self.rel_number,
-            forknum: self.fork_number,
-        }
-    }
-}
-
-impl CRelTruncateRequest {
-    pub fn reltag(&self) -> page_api::RelTag {
-        page_api::RelTag {
-            spcnode: self.spc_oid,
-            dbnode: self.db_oid,
-            relnode: self.rel_number,
-            forknum: self.fork_number,
-        }
-    }
-}
-
-impl CRelUnlinkRequest {
-    pub fn reltag(&self) -> page_api::RelTag {
-        page_api::RelTag {
-            spcnode: self.spc_oid,
-            dbnode: self.db_oid,
-            relnode: self.rel_number,
-            forknum: self.fork_number,
-        }
-    }
-}
--- a/pgxn/neon/communicator/src/worker_process/callbacks.rs
+++ b/pgxn/neon/communicator/src/worker_process/callbacks.rs
@@ -1,28 +0,0 @@
-//! C callbacks to PostgreSQL facilities that the neon extension needs
-//! to provide. These are implemented in `neon/pgxn/communicator_new.c`.
-//! The function signatures better match!
-//!
-//! These are called from the communicator threads! Careful what you do, most
-//! Postgres functions are not safe to call in that context.
-
-use utils::lsn::Lsn;
-
-unsafe extern "C" {
-    pub fn notify_proc_unsafe(procno: std::ffi::c_int);
-    pub fn callback_set_my_latch_unsafe();
-    pub fn callback_get_request_lsn_unsafe() -> u64;
-}
-
-// safe wrappers
-
-pub(super) fn notify_proc(procno: std::ffi::c_int) {
-    unsafe { notify_proc_unsafe(procno) };
-}
-
-pub(super) fn callback_set_my_latch() {
-    unsafe { callback_set_my_latch_unsafe() };
-}
-
-pub(super) fn get_request_lsn() -> Lsn {
-    Lsn(unsafe { callback_get_request_lsn_unsafe() })
-}
--- a/pgxn/neon/communicator/src/worker_process/in_progress_ios.rs
+++ b/pgxn/neon/communicator/src/worker_process/in_progress_ios.rs
@@ -1,95 +0,0 @@
-//! Lock table to ensure that only one IO request is in flight for a given
-//! block (or relation or database metadata) at a time
-
-use std::cmp::Eq;
-use std::hash::Hash;
-use std::sync::Arc;
-
-use tokio::sync::{Mutex, OwnedMutexGuard};
-
-use clashmap::ClashMap;
-use clashmap::Entry;
-
-use pageserver_page_api::RelTag;
-
-#[derive(Clone, Eq, Hash, PartialEq)]
-pub enum RequestInProgressKey {
-    Db(u32),
-    Rel(RelTag),
-    Block(RelTag, u32),
-}
-
-type RequestId = u64;
-
-pub type RequestInProgressTable = MutexHashMap<RequestInProgressKey, RequestId>;
-
-// more primitive locking thingie:
-
-pub struct MutexHashMap<K, V>
-where
-    K: Clone + Eq + Hash,
-{
-    lock_table: ClashMap<K, (V, Arc<Mutex<()>>)>,
-}
-
-pub struct MutexHashMapGuard<'a, K, V>
-where
-    K: Clone + Eq + Hash,
-{
-    pub key: K,
-    map: &'a MutexHashMap<K, V>,
-    mutex: Arc<Mutex<()>>,
-    _guard: OwnedMutexGuard<()>,
-}
-
-impl<'a, K, V> Drop for MutexHashMapGuard<'a, K, V>
-where
-    K: Clone + Eq + Hash,
-{
-    fn drop(&mut self) {
-        let (_old_key, old_val) = self.map.lock_table.remove(&self.key).unwrap();
-        assert!(Arc::ptr_eq(&old_val.1, &self.mutex));
-
-        // the guard will be dropped as we return
-    }
-}
-
-impl<K, V> MutexHashMap<K, V>
-where
-    K: Clone + Eq + Hash,
-    V: std::fmt::Display + Copy,
-{
-    pub fn new() -> MutexHashMap<K, V> {
-        MutexHashMap {
-            lock_table: ClashMap::new(),
-        }
-    }
-
-    pub async fn lock<'a>(&'a self, key: K, val: V) -> MutexHashMapGuard<'a, K, V> {
-        let my_mutex = Arc::new(Mutex::new(()));
-        let my_guard = Arc::clone(&my_mutex).lock_owned().await;
-
-        loop {
-            let (request_id, lock) = match self.lock_table.entry(key.clone()) {
-                Entry::Occupied(e) => {
-                    let e = e.get();
-                    (e.0, Arc::clone(&e.1))
-                }
-                Entry::Vacant(e) => {
-                    e.insert((val, Arc::clone(&my_mutex)));
-                    break;
-                }
-            };
-            tracing::info!("waiting for conflicting IO {request_id} to complete");
-            let _ = lock.lock().await;
-            tracing::info!("conflicting IO {request_id} completed");
-        }
-
-        MutexHashMapGuard {
-            key,
-            map: self,
-            mutex: my_mutex,
-            _guard: my_guard,
-        }
-    }
-}
--- a/pgxn/neon/communicator/src/worker_process/logging.rs
+++ b/pgxn/neon/communicator/src/worker_process/logging.rs
@@ -1,231 +0,0 @@
-//! Glue code to hook up Rust logging with the `tracing` crate to the PostgreSQL log
-//!
-//! In the Rust threads, the log messages are written to a mpsc Channel, and the Postgres
-//! process latch is raised. That wakes up the loop in the main thread. It reads the
-//! message from the channel and ereport()s it. This ensures that only one thread, the main
-//! thread, calls the PostgreSQL logging routines at any time.
-
-use std::sync::mpsc::sync_channel;
-use std::sync::mpsc::{Receiver, SyncSender};
-use std::sync::mpsc::{TryRecvError, TrySendError};
-
-use tracing::info;
-use tracing::{Event, Level, Metadata, Subscriber};
-use tracing_subscriber::filter::LevelFilter;
-use tracing_subscriber::fmt::FmtContext;
-use tracing_subscriber::fmt::FormatEvent;
-use tracing_subscriber::fmt::FormatFields;
-use tracing_subscriber::fmt::FormattedFields;
-use tracing_subscriber::fmt::MakeWriter;
-use tracing_subscriber::fmt::format::Writer;
-use tracing_subscriber::registry::LookupSpan;
-
-use crate::worker_process::callbacks::callback_set_my_latch;
-
-pub struct LoggingState {
-    receiver: Receiver<FormattedEventWithMeta>,
-}
-
-/// Called once, at worker process startup. The returned LoggingState is passed back
-/// in the subsequent calls to `pump_logging`. It is opaque to the C code.
-#[unsafe(no_mangle)]
-pub extern "C" fn configure_logging() -> Box<LoggingState> {
-    let (sender, receiver) = sync_channel(1000);
-
-    let maker = Maker { channel: sender };
-
-    use tracing_subscriber::prelude::*;
-    let r = tracing_subscriber::registry();
-
-    let r = r.with(
-        tracing_subscriber::fmt::layer()
-            .with_ansi(false)
-            .event_format(SimpleFormatter::new())
-            .with_writer(maker)
-            // TODO: derive this from log_min_messages?
-            .with_filter(LevelFilter::from_level(Level::INFO)),
-    );
-    r.init();
-
-    info!("communicator process logging started");
-
-    let state = LoggingState { receiver };
-
-    Box::new(state)
-}
-
-/// Read one message from the logging queue. This is essentially a wrapper to Receiver,
-/// with a C-friendly signature.
-///
-/// The message is copied into *errbuf, which is a caller-supplied buffer of size `errbuf_len`.
-/// If the message doesn't fit in the buffer, it is truncated. It is always NULL-terminated.
-///
-/// The error level is returned *elevel_p. It's one of the PostgreSQL error levels, see elog.h
-#[unsafe(no_mangle)]
-pub extern "C" fn pump_logging(
-    state: &mut LoggingState,
-    errbuf: *mut u8,
-    errbuf_len: u32,
-    elevel_p: &mut i32,
-) -> i32 {
-    let msg = match state.receiver.try_recv() {
-        Err(TryRecvError::Empty) => return 0,
-        Err(TryRecvError::Disconnected) => return -1,
-        Ok(msg) => msg,
-    };
-
-    let src: &[u8] = &msg.message;
-    let dst = errbuf;
-    let len = std::cmp::min(src.len(), errbuf_len as usize - 1);
-    unsafe {
-        std::ptr::copy_nonoverlapping(src.as_ptr(), dst, len);
-        *(errbuf.add(len)) = b'\0'; // NULL terminator
-    }
-
-    // XXX: these levels are copied from PostgreSQL's elog.h. Introduce another enum
-    // to hide these?
-    *elevel_p = match msg.level {
-        Level::TRACE => 10, // DEBUG5
-        Level::DEBUG => 14, // DEBUG1
-        Level::INFO => 17,  // INFO
-        Level::WARN => 19,  // WARNING
-        Level::ERROR => 21, // ERROR
-    };
-
-    1
-}
-
-//---- The following functions can be called from any thread ----
-
-#[derive(Clone)]
-struct FormattedEventWithMeta {
-    message: Vec<u8>,
-    level: tracing::Level,
-}
-
-impl Default for FormattedEventWithMeta {
-    fn default() -> Self {
-        FormattedEventWithMeta {
-            message: Vec::new(),
-            level: tracing::Level::DEBUG,
-        }
-    }
-}
-
-struct EventBuilder<'a> {
-    event: FormattedEventWithMeta,
-
-    maker: &'a Maker,
-}
-
-impl std::io::Write for EventBuilder<'_> {
-    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
-        self.event.message.write(buf)
-    }
-    fn flush(&mut self) -> std::io::Result<()> {
-        self.maker.send_event(self.event.clone());
-        Ok(())
-    }
-}
-
-impl Drop for EventBuilder<'_> {
-    fn drop(&mut self) {
-        let maker = self.maker;
-        let event = std::mem::take(&mut self.event);
-
-        maker.send_event(event);
-    }
-}
-
-struct Maker {
-    channel: SyncSender<FormattedEventWithMeta>,
-}
-
-impl<'a> MakeWriter<'a> for Maker {
-    type Writer = EventBuilder<'a>;
-
-    fn make_writer(&'a self) -> Self::Writer {
-        panic!("not expected to be called when make_writer_for is implemented");
-    }
-
-    fn make_writer_for(&'a self, meta: &Metadata<'_>) -> Self::Writer {
-        EventBuilder {
-            event: FormattedEventWithMeta {
-                message: Vec::new(),
-                level: *meta.level(),
-            },
-            maker: self,
-        }
-    }
-}
-
-impl Maker {
-    fn send_event(&self, e: FormattedEventWithMeta) {
-        match self.channel.try_send(e) {
-            Ok(()) => {
-                // notify the main thread
-                callback_set_my_latch();
-            }
-            Err(TrySendError::Disconnected(_)) => {}
-            Err(TrySendError::Full(_)) => {
-                // TODO: record that some messages were lost
-            }
-        }
-    }
-}
-
-/// Simple formatter implementation for tracing_subscriber, which prints the log
-/// spans and message part like the default formatter, but no timestamp or error
-/// level. The error level is captured separately by `FormattedEventWithMeta',
-/// and when the error is printed by the main thread, with PostgreSQL ereport(),
-/// it gets a timestamp at that point. (The timestamp printed will therefore lag
-/// behind the timestamp on the event here, if the main thread doesn't process
-/// the log message promptly)
-struct SimpleFormatter;
-
-impl<S, N> FormatEvent<S, N> for SimpleFormatter
-where
-    S: Subscriber + for<'a> LookupSpan<'a>,
-    N: for<'a> FormatFields<'a> + 'static,
-{
-    fn format_event(
-        &self,
-        ctx: &FmtContext<'_, S, N>,
-        mut writer: Writer<'_>,
-        event: &Event<'_>,
-    ) -> std::fmt::Result {
-        // Format all the spans in the event's span context.
-        if let Some(scope) = ctx.event_scope() {
-            for span in scope.from_root() {
-                write!(writer, "{}", span.name())?;
-
-                // `FormattedFields` is a formatted representation of the span's
-                // fields, which is stored in its extensions by the `fmt` layer's
-                // `new_span` method. The fields will have been formatted
-                // by the same field formatter that's provided to the event
-                // formatter in the `FmtContext`.
-                let ext = span.extensions();
-                let fields = &ext
-                    .get::<FormattedFields<N>>()
-                    .expect("will never be `None`");
-
-                // Skip formatting the fields if the span had no fields.
-                if !fields.is_empty() {
-                    write!(writer, "{{{fields}}}")?;
-                }
-                write!(writer, ": ")?;
-            }
-        }
-
-        // Write fields on the event
-        ctx.field_format().format_fields(writer.by_ref(), event)?;
-
-        writeln!(writer)
-    }
-}
-
-impl SimpleFormatter {
-    fn new() -> Self {
-        SimpleFormatter {}
-    }
-}
--- a/pgxn/neon/communicator/src/worker_process/main_loop.rs
+++ b/pgxn/neon/communicator/src/worker_process/main_loop.rs
@@ -1,731 +0,0 @@
-use std::collections::HashMap;
-use std::os::fd::AsRawFd;
-use std::os::fd::OwnedFd;
-use std::path::PathBuf;
-use std::str::FromStr as _;
-
-use crate::backend_comms::NeonIOHandle;
-use crate::file_cache::FileCache;
-use crate::global_allocator::MyAllocatorCollector;
-use crate::init::CommunicatorInitStruct;
-use crate::integrated_cache::{CacheResult, IntegratedCacheWriteAccess};
-use crate::neon_request::{CGetPageVRequest, CPrefetchVRequest};
-use crate::neon_request::{NeonIORequest, NeonIOResult};
-use crate::worker_process::in_progress_ios::{RequestInProgressKey, RequestInProgressTable};
-use pageserver_client_grpc::{PageserverClient, ShardSpec};
-use pageserver_page_api as page_api;
-
-use metrics::{IntCounter, IntCounterVec};
-
-use tokio::io::AsyncReadExt;
-use tokio_pipe::PipeRead;
-use uring_common::buf::IoBuf;
-use utils::id::{TenantId, TimelineId};
-
-use super::callbacks::{get_request_lsn, notify_proc};
-
-use tracing::{error, info, info_span, trace};
-
-use utils::lsn::Lsn;
-
-pub struct CommunicatorWorkerProcessStruct<'a> {
-    neon_request_slots: &'a [NeonIOHandle],
-
-    client: PageserverClient,
-
-    pub(crate) cache: IntegratedCacheWriteAccess<'a>,
-
-    submission_pipe_read_fd: OwnedFd,
-
-    in_progress_table: RequestInProgressTable,
-
-    // Metrics
-    request_counters: IntCounterVec,
-    request_rel_exists_counter: IntCounter,
-    request_rel_size_counter: IntCounter,
-    request_get_pagev_counter: IntCounter,
-    request_prefetchv_counter: IntCounter,
-    request_db_size_counter: IntCounter,
-    request_write_page_counter: IntCounter,
-    request_rel_extend_counter: IntCounter,
-    request_rel_zero_extend_counter: IntCounter,
-    request_rel_create_counter: IntCounter,
-    request_rel_truncate_counter: IntCounter,
-    request_rel_unlink_counter: IntCounter,
-
-    getpage_cache_misses_counter: IntCounter,
-    getpage_cache_hits_counter: IntCounter,
-
-    request_nblocks_counters: IntCounterVec,
-    request_get_pagev_nblocks_counter: IntCounter,
-    request_prefetchv_nblocks_counter: IntCounter,
-    request_rel_zero_extend_nblocks_counter: IntCounter,
-
-    allocator_metrics: MyAllocatorCollector,
-}
-
-pub(super) async fn init(
-    cis: Box<CommunicatorInitStruct>,
-    tenant_id: String,
-    timeline_id: String,
-    auth_token: Option<String>,
-    shard_map: HashMap<utils::shard::ShardIndex, String>,
-    initial_file_cache_size: u64,
-    file_cache_path: Option<PathBuf>,
-) -> CommunicatorWorkerProcessStruct<'static> {
-    info!("Test log message");
-    let last_lsn = get_request_lsn();
-
-    let file_cache = if let Some(path) = file_cache_path {
-        Some(FileCache::new(&path, initial_file_cache_size).expect("could not create cache file"))
-    } else {
-        // FIXME: temporarily for testing, use LFC even if disabled
-        Some(
-            FileCache::new(&PathBuf::from("new_filecache"), 1000)
-                .expect("could not create cache file"),
-        )
-    };
-
-    // Initialize subsystems
-    let cache = cis
-        .integrated_cache_init_struct
-        .worker_process_init(last_lsn, file_cache);
-
-    // TODO: plumb through the stripe size.
-    let tenant_id = TenantId::from_str(&tenant_id).expect("invalid tenant ID");
-    let timeline_id = TimelineId::from_str(&timeline_id).expect("invalid timeline ID");
-    let shard_spec = ShardSpec::new(shard_map, None).expect("invalid shard spec");
-    let client = PageserverClient::new(tenant_id, timeline_id, shard_spec, auth_token)
-        .expect("could not create client");
-
-    let request_counters = IntCounterVec::new(
-        metrics::core::Opts::new(
-            "backend_requests_total",
-            "Number of requests from backends.",
-        ),
-        &["request_kind"],
-    )
-    .unwrap();
-    let request_rel_exists_counter = request_counters.with_label_values(&["rel_exists"]);
-    let request_rel_size_counter = request_counters.with_label_values(&["rel_size"]);
-    let request_get_pagev_counter = request_counters.with_label_values(&["get_pagev"]);
-    let request_prefetchv_counter = request_counters.with_label_values(&["prefetchv"]);
-    let request_db_size_counter = request_counters.with_label_values(&["db_size"]);
-    let request_write_page_counter = request_counters.with_label_values(&["write_page"]);
-    let request_rel_extend_counter = request_counters.with_label_values(&["rel_extend"]);
-    let request_rel_zero_extend_counter = request_counters.with_label_values(&["rel_zero_extend"]);
-    let request_rel_create_counter = request_counters.with_label_values(&["rel_create"]);
-    let request_rel_truncate_counter = request_counters.with_label_values(&["rel_truncate"]);
-    let request_rel_unlink_counter = request_counters.with_label_values(&["rel_unlink"]);
-
-    let getpage_cache_misses_counter = IntCounter::new(
-        "getpage_cache_misses",
-        "Number of file cache misses in get_pagev requests.",
-    )
-    .unwrap();
-    let getpage_cache_hits_counter = IntCounter::new(
-        "getpage_cache_hits",
-        "Number of file cache hits in get_pagev requests.",
-    )
-    .unwrap();
-
-    // For the requests that affect multiple blocks, have separate counters for the # of blocks affected
-    let request_nblocks_counters = IntCounterVec::new(
-        metrics::core::Opts::new(
-            "request_nblocks_total",
-            "Number of blocks in backend requests.",
-        ),
-        &["request_kind"],
-    )
-    .unwrap();
-    let request_get_pagev_nblocks_counter =
-        request_nblocks_counters.with_label_values(&["get_pagev"]);
-    let request_prefetchv_nblocks_counter =
-        request_nblocks_counters.with_label_values(&["prefetchv"]);
-    let request_rel_zero_extend_nblocks_counter =
-        request_nblocks_counters.with_label_values(&["rel_zero_extend"]);
-
-    CommunicatorWorkerProcessStruct {
-        neon_request_slots: cis.neon_request_slots,
-        client,
-        cache,
-        submission_pipe_read_fd: cis.submission_pipe_read_fd,
-        in_progress_table: RequestInProgressTable::new(),
-
-        // metrics
-        request_counters,
-        request_rel_exists_counter,
-        request_rel_size_counter,
-        request_get_pagev_counter,
-        request_prefetchv_counter,
-        request_db_size_counter,
-        request_write_page_counter,
-        request_rel_extend_counter,
-        request_rel_zero_extend_counter,
-        request_rel_create_counter,
-        request_rel_truncate_counter,
-        request_rel_unlink_counter,
-
-        getpage_cache_misses_counter,
-        getpage_cache_hits_counter,
-
-        request_nblocks_counters,
-        request_get_pagev_nblocks_counter,
-        request_prefetchv_nblocks_counter,
-        request_rel_zero_extend_nblocks_counter,
-
-        allocator_metrics: MyAllocatorCollector::new(),
-    }
-}
-
-impl<'t> CommunicatorWorkerProcessStruct<'t> {
-    /// Main loop of the worker process. Receive requests from the backends and process them.
-    pub(super) async fn run(&'static self) {
-        let mut idxbuf: [u8; 4] = [0; 4];
-
-        let mut submission_pipe_read =
-            PipeRead::try_from(self.submission_pipe_read_fd.as_raw_fd()).expect("invalid pipe fd");
-
-        loop {
-            // Wait for a backend to ring the doorbell
-            match submission_pipe_read.read(&mut idxbuf).await {
-                Ok(4) => {}
-                Ok(nbytes) => panic!("short read ({nbytes} bytes) on communicator pipe"),
-                Err(e) => panic!("error reading from communicator pipe: {e}"),
-            }
-            let slot_idx = u32::from_ne_bytes(idxbuf) as usize;
-
-            // Read the IO request from the slot indicated in the wakeup
-            let Some(slot) = self.neon_request_slots[slot_idx].start_processing_request() else {
-                // This currently should not happen. But if we had multiple threads picking up
-                // requests, and without waiting for the notifications, it could.
-                panic!("no request in slot");
-            };
-
-            // Ok, we have ownership of this request now. We must process it now, there's no going
-            // back.
-            //
-            // Spawn a separate task for every request. That's a little excessive for requests that
-            // can be quickly satisfied from the cache, but we expect that to be rare, because the
-            // requesting backend would have already checked the cache.
-            tokio::spawn(async move {
-                use tracing::Instrument;
-
-                let request_id = slot.get_request().request_id();
-                let owner_procno = slot.get_owner_procno();
-
-                let span = info_span!(
-                    "processing",
-                    request_id = request_id,
-                    slot_idx = slot_idx,
-                    procno = owner_procno,
-                );
-                async {
-                    // FIXME: as a temporary hack, abort the request if we don't get a response
-                    // promptly.
-                    //
-                    // Lots of regression tests are getting stuck and failing at the moment,
-                    // this makes them fail a little faster, which it faster to iterate.
-                    // This needs to be removed once more regression tests are passing.
-                    // See also similar hack in the backend code, in wait_request_completion()
-                    let result = tokio::time::timeout(
-                        tokio::time::Duration::from_secs(30),
-                        self.handle_request(slot.get_request()),
-                    )
-                    .await
-                    .unwrap_or_else(|_elapsed| {
-                        info!("request {request_id} timed out");
-                        NeonIOResult::Error(libc::ETIMEDOUT)
-                    });
-                    trace!("request {request_id} at slot {slot_idx} completed");
-
-                    // Ok, we have completed the IO. Mark the request as completed. After that,
-                    // we no longer have ownership of the slot, and must not modify it.
-                    slot.completed(result);
-
-                    // Notify the backend about the completion. (Note that the backend might see
-                    // the completed status even before this; this is just a wakeup)
-                    notify_proc(owner_procno);
-                }
-                .instrument(span)
-                .await
-            });
-        }
-    }
-
-    /// Compute the 'request_lsn' to use for a pageserver request
-    fn request_lsns(&self, not_modified_since_lsn: Lsn) -> page_api::ReadLsn {
-        let mut request_lsn = get_request_lsn();
-
-        // Is it possible that the last-written LSN is ahead of last flush LSN? Generally not, we
-        // shouldn't evict a page from the buffer cache before all its modifications have been
-        // safely flushed. That's the "WAL before data" rule. However, such case does exist at index
-        // building: _bt_blwritepage logs the full page without flushing WAL before smgrextend
-        // (files are fsynced before build ends).
-        //
-        // XXX: If we make a request LSN greater than the current WAL flush LSN, the pageserver would
-        // block waiting for the WAL arrive, until we flush it and it propagates through the
-        // safekeepers to the pageserver. If there's nothing that forces the WAL to be flushed,
-        // the pageserver would get stuck waiting forever. To avoid that, all the write-
-        // functions in communicator_new.c call XLogSetAsyncXactLSN(). That nudges the WAL writer to
-        // perform the flush relatively soon.
-        //
-        // It would perhaps be nicer to do the WAL flush here, but it's tricky to call back into
-        // Postgres code to do that from here. That's why we rely on communicator_new.c to do the
-        // calls "pre-emptively".
-        //
-        // FIXME: Because of the above, it can still happen that the flush LSN is ahead of
-        // not_modified_since, if the WAL writer hasn't done the flush yet. It would be nice to know
-        // if there are other cases like that that we have mised, but unfortunately we cannot turn
-        // this into an assertion because of that legit case.
-        //
-        // See also the old logic in neon_get_request_lsns() C function
-        if not_modified_since_lsn > request_lsn {
-            tracing::info!(
-                "not_modified_since_lsn {} is ahead of last flushed LSN {}",
-                not_modified_since_lsn,
-                request_lsn
-            );
-            request_lsn = not_modified_since_lsn;
-        }
-
-        page_api::ReadLsn {
-            request_lsn,
-            not_modified_since_lsn: Some(not_modified_since_lsn),
-        }
-    }
-
-    /// Handle one IO request
-    async fn handle_request(&'static self, req: &'_ NeonIORequest) -> NeonIOResult {
-        match req {
-            NeonIORequest::Empty => {
-                error!("unexpected Empty IO request");
-                NeonIOResult::Error(0)
-            }
-            NeonIORequest::RelExists(req) => {
-                self.request_rel_exists_counter.inc();
-                let rel = req.reltag();
-
-                let _in_progress_guard = self
-                    .in_progress_table
-                    .lock(RequestInProgressKey::Rel(rel), req.request_id)
-                    .await;
-
-                // Check the cache first
-                let not_modified_since = match self.cache.get_rel_exists(&rel) {
-                    CacheResult::Found(exists) => return NeonIOResult::RelExists(exists),
-                    CacheResult::NotFound(lsn) => lsn,
-                };
-
-                match self
-                    .client
-                    .check_rel_exists(page_api::CheckRelExistsRequest {
-                        read_lsn: self.request_lsns(not_modified_since),
-                        rel,
-                    })
-                    .await
-                {
-                    Ok(exists) => NeonIOResult::RelExists(exists),
-                    Err(err) => {
-                        info!("tonic error: {err:?}");
-                        NeonIOResult::Error(0)
-                    }
-                }
-            }
-
-            NeonIORequest::RelSize(req) => {
-                self.request_rel_size_counter.inc();
-                let rel = req.reltag();
-
-                let _in_progress_guard = self
-                    .in_progress_table
-                    .lock(RequestInProgressKey::Rel(rel), req.request_id)
-                    .await;
-
-                // Check the cache first
-                let not_modified_since = match self.cache.get_rel_size(&rel) {
-                    CacheResult::Found(nblocks) => {
-                        tracing::trace!("found relsize for {:?} in cache: {}", rel, nblocks);
-                        return NeonIOResult::RelSize(nblocks);
-                    }
-                    CacheResult::NotFound(lsn) => lsn,
-                };
-
-                let read_lsn = self.request_lsns(not_modified_since);
-                match self
-                    .client
-                    .get_rel_size(page_api::GetRelSizeRequest { read_lsn, rel })
-                    .await
-                {
-                    Ok(nblocks) => {
-                        // update the cache
-                        tracing::info!("updated relsize for {:?} in cache: {}", rel, nblocks);
-                        self.cache.remember_rel_size(&rel, nblocks);
-
-                        NeonIOResult::RelSize(nblocks)
-                    }
-                    Err(err) => {
-                        info!("tonic error: {err:?}");
-                        NeonIOResult::Error(0)
-                    }
-                }
-            }
-            NeonIORequest::GetPageV(req) => {
-                self.request_get_pagev_counter.inc();
-                self.request_get_pagev_nblocks_counter
-                    .inc_by(req.nblocks as u64);
-                match self.handle_get_pagev_request(req).await {
-                    Ok(()) => NeonIOResult::GetPageV,
-                    Err(errno) => NeonIOResult::Error(errno),
-                }
-            }
-            NeonIORequest::PrefetchV(req) => {
-                self.request_prefetchv_counter.inc();
-                self.request_prefetchv_nblocks_counter
-                    .inc_by(req.nblocks as u64);
-                let req = *req;
-                tokio::spawn(async move { self.handle_prefetchv_request(&req).await });
-                NeonIOResult::PrefetchVLaunched
-            }
-            NeonIORequest::DbSize(req) => {
-                self.request_db_size_counter.inc();
-                let _in_progress_guard = self
-                    .in_progress_table
-                    .lock(RequestInProgressKey::Db(req.db_oid), req.request_id)
-                    .await;
-
-                // Check the cache first
-                let not_modified_since = match self.cache.get_db_size(req.db_oid) {
-                    CacheResult::Found(db_size) => {
-                        // get_page already copied the block content to the destination
-                        return NeonIOResult::DbSize(db_size);
-                    }
-                    CacheResult::NotFound(lsn) => lsn,
-                };
-
-                match self
-                    .client
-                    .get_db_size(page_api::GetDbSizeRequest {
-                        read_lsn: self.request_lsns(not_modified_since),
-                        db_oid: req.db_oid,
-                    })
-                    .await
-                {
-                    Ok(db_size) => NeonIOResult::DbSize(db_size),
-                    Err(err) => {
-                        info!("tonic error: {err:?}");
-                        NeonIOResult::Error(0)
-                    }
-                }
-            }
-
-            // Write requests
-            NeonIORequest::WritePage(req) => {
-                self.request_write_page_counter.inc();
-
-                let rel = req.reltag();
-                let _in_progress_guard = self
-                    .in_progress_table
-                    .lock(
-                        RequestInProgressKey::Block(rel, req.block_number),
-                        req.request_id,
-                    )
-                    .await;
-
-                // We must at least update the last-written LSN on the page, but also store the page
-                // image in the LFC while we still have it
-                self.cache
-                    .remember_page(&rel, req.block_number, req.src, Lsn(req.lsn), true)
-                    .await;
-                NeonIOResult::WriteOK
-            }
-            NeonIORequest::RelExtend(req) => {
-                self.request_rel_extend_counter.inc();
-
-                let rel = req.reltag();
-                let _in_progress_guard = self
-                    .in_progress_table
-                    .lock(
-                        RequestInProgressKey::Block(rel, req.block_number),
-                        req.request_id,
-                    )
-                    .await;
-
-                // We must at least update the last-written LSN on the page and the relation size,
-                // but also store the page image in the LFC while we still have it
-                self.cache
-                    .remember_page(&rel, req.block_number, req.src, Lsn(req.lsn), true)
-                    .await;
-                self.cache
-                    .remember_rel_size(&req.reltag(), req.block_number + 1);
-                NeonIOResult::WriteOK
-            }
-            NeonIORequest::RelZeroExtend(req) => {
-                self.request_rel_zero_extend_counter.inc();
-                self.request_rel_zero_extend_nblocks_counter
-                    .inc_by(req.nblocks as u64);
-
-                // TODO: need to grab an io-in-progress lock for this? I guess not
-                // TODO: I think we should put the empty pages to the cache, or at least
-                // update the last-written LSN.
-                self.cache
-                    .remember_rel_size(&req.reltag(), req.block_number + req.nblocks);
-                NeonIOResult::WriteOK
-            }
-            NeonIORequest::RelCreate(req) => {
-                self.request_rel_create_counter.inc();
-
-                // TODO: need to grab an io-in-progress lock for this? I guess not
-                self.cache.remember_rel_size(&req.reltag(), 0);
-                NeonIOResult::WriteOK
-            }
-            NeonIORequest::RelTruncate(req) => {
-                self.request_rel_truncate_counter.inc();
-
-                // TODO: need to grab an io-in-progress lock for this? I guess not
-                self.cache.remember_rel_size(&req.reltag(), req.nblocks);
-                NeonIOResult::WriteOK
-            }
-            NeonIORequest::RelUnlink(req) => {
-                self.request_rel_unlink_counter.inc();
-
-                // TODO: need to grab an io-in-progress lock for this? I guess not
-                self.cache.forget_rel(&req.reltag());
-                NeonIOResult::WriteOK
-            }
-        }
-    }
-
-    /// Subroutine to handle a GetPageV request, since it's a little more complicated than
-    /// others.
-    async fn handle_get_pagev_request(&'t self, req: &CGetPageVRequest) -> Result<(), i32> {
-        let rel = req.reltag();
-
-        // Check the cache first
-        //
-        // Note: Because the backends perform a direct lookup in the cache before sending
-        // the request to the communicator process, we expect the pages to almost never
-        // be already in cache. It could happen if:
-        // 1. two backends try to read the same page at the same time, but that should never
-        //    happen because there's higher level locking in the Postgres buffer manager, or
-        // 2. a prefetch request finished at the same time as a backend requested the
-        //    page. That's much more likely.
-        let mut cache_misses = Vec::with_capacity(req.nblocks as usize);
-        for i in 0..req.nblocks {
-            let blkno = req.block_number + i as u32;
-
-            // note: this is deadlock-safe even though we hold multiple locks at the same time,
-            // because they're always acquired in the same order.
-            let in_progress_guard = self
-                .in_progress_table
-                .lock(RequestInProgressKey::Block(rel, blkno), req.request_id)
-                .await;
-
-            let dest = req.dest[i as usize];
-            let not_modified_since = match self.cache.get_page(&rel, blkno, dest).await {
-                Ok(CacheResult::Found(_)) => {
-                    // get_page already copied the block content to the destination
-                    trace!("found blk {} in rel {:?} in LFC", blkno, rel);
-                    continue;
-                }
-                Ok(CacheResult::NotFound(lsn)) => lsn,
-                Err(_io_error) => return Err(-1), // FIXME errno?
-            };
-            cache_misses.push((blkno, not_modified_since, dest, in_progress_guard));
-        }
-        self.getpage_cache_misses_counter
-            .inc_by(cache_misses.len() as u64);
-        self.getpage_cache_hits_counter
-            .inc_by(req.nblocks as u64 - cache_misses.len() as u64);
-
-        if cache_misses.is_empty() {
-            return Ok(());
-        }
-        let not_modified_since = cache_misses
-            .iter()
-            .map(|(_blkno, lsn, _dest, _guard)| *lsn)
-            .max()
-            .unwrap();
-
-        // Construct a pageserver request for the cache misses
-        let block_numbers: Vec<u32> = cache_misses
-            .iter()
-            .map(|(blkno, _lsn, _dest, _guard)| *blkno)
-            .collect();
-        let read_lsn = self.request_lsns(not_modified_since);
-        info!(
-            "sending getpage request for blocks {:?} in rel {:?} lsns {}",
-            block_numbers, rel, read_lsn
-        );
-        match self
-            .client
-            .get_page(page_api::GetPageRequest {
-                request_id: req.request_id,
-                request_class: page_api::GetPageClass::Normal,
-                read_lsn,
-                rel,
-                block_numbers: block_numbers.clone(),
-            })
-            .await
-        {
-            Ok(resp) => {
-                // Write the received page images directly to the shared memory location
-                // that the backend requested.
-                if resp.page_images.len() != block_numbers.len() {
-                    error!(
-                        "received unexpected response with {} page images from pageserver for a request for {} pages",
-                        resp.page_images.len(),
-                        block_numbers.len(),
-                    );
-                    return Err(-1);
-                }
-                for (page_image, (blkno, _lsn, dest, _guard)) in
-                    resp.page_images.into_iter().zip(cache_misses)
-                {
-                    let src: &[u8] = page_image.as_ref();
-                    let len = std::cmp::min(src.len(), dest.bytes_total());
-                    unsafe {
-                        std::ptr::copy_nonoverlapping(src.as_ptr(), dest.as_mut_ptr(), len);
-                    };
-
-                    // Also store it in the LFC while we have it
-                    self.cache
-                        .remember_page(
-                            &rel,
-                            blkno,
-                            page_image,
-                            read_lsn.not_modified_since_lsn.unwrap(),
-                            false,
-                        )
-                        .await;
-                }
-            }
-            Err(err) => {
-                info!("tonic error: {err:?}");
-                return Err(-1);
-            }
-        }
-        Ok(())
-    }
-
-    /// Subroutine to handle a PrefetchV request, since it's a little more complicated than
-    /// others.
-    ///
-    /// This is very similar to a GetPageV request, but the results are only stored in the cache.
-    async fn handle_prefetchv_request(&'static self, req: &CPrefetchVRequest) -> Result<(), i32> {
-        let rel = req.reltag();
-
-        // Check the cache first
-        let mut cache_misses = Vec::with_capacity(req.nblocks as usize);
-        for i in 0..req.nblocks {
-            let blkno = req.block_number + i as u32;
-
-            // note: this is deadlock-safe even though we hold multiple locks at the same time,
-            // because they're always acquired in the same order.
-            let in_progress_guard = self
-                .in_progress_table
-                .lock(RequestInProgressKey::Block(rel, blkno), req.request_id)
-                .await;
-
-            let not_modified_since = match self.cache.page_is_cached(&rel, blkno).await {
-                Ok(CacheResult::Found(_)) => {
-                    trace!("found blk {} in rel {:?} in LFC", blkno, rel);
-                    continue;
-                }
-                Ok(CacheResult::NotFound(lsn)) => lsn,
-                Err(_io_error) => return Err(-1), // FIXME errno?
-            };
-            cache_misses.push((blkno, not_modified_since, in_progress_guard));
-        }
-        if cache_misses.is_empty() {
-            return Ok(());
-        }
-        let not_modified_since = cache_misses
-            .iter()
-            .map(|(_blkno, lsn, _guard)| *lsn)
-            .max()
-            .unwrap();
-        let block_numbers: Vec<u32> = cache_misses
-            .iter()
-            .map(|(blkno, _lsn, _guard)| *blkno)
-            .collect();
-
-        // TODO: spawn separate tasks for these. Use the integrated cache to keep track of the
-        // in-flight requests
-
-        match self
-            .client
-            .get_page(page_api::GetPageRequest {
-                request_id: req.request_id,
-                request_class: page_api::GetPageClass::Prefetch,
-                read_lsn: self.request_lsns(not_modified_since),
-                rel,
-                block_numbers: block_numbers.clone(),
-            })
-            .await
-        {
-            Ok(resp) => {
-                trace!(
-                    "prefetch completed, remembering blocks {:?} in rel {:?} in LFC",
-                    block_numbers, rel
-                );
-                if resp.page_images.len() != block_numbers.len() {
-                    error!(
-                        "received unexpected response with {} page images from pageserver for a request for {} pages",
-                        resp.page_images.len(),
-                        block_numbers.len(),
-                    );
-                    return Err(-1);
-                }
-
-                for (page_image, (blkno, _lsn, _guard)) in
-                    resp.page_images.into_iter().zip(cache_misses)
-                {
-                    self.cache
-                        .remember_page(&rel, blkno, page_image, not_modified_since, false)
-                        .await;
-                }
-            }
-            Err(err) => {
-                info!("tonic error: {err:?}");
-                return Err(-1);
-            }
-        }
-        Ok(())
-    }
-}
-
-impl<'t> metrics::core::Collector for CommunicatorWorkerProcessStruct<'t> {
-    fn desc(&self) -> Vec<&metrics::core::Desc> {
-        let mut descs = Vec::new();
-
-        descs.append(&mut self.request_counters.desc());
-        descs.append(&mut self.getpage_cache_misses_counter.desc());
-        descs.append(&mut self.getpage_cache_hits_counter.desc());
-        descs.append(&mut self.request_nblocks_counters.desc());
-
-        if let Some(file_cache) = &self.cache.file_cache {
-            descs.append(&mut file_cache.desc());
-        }
-        descs.append(&mut self.cache.desc());
-        descs.append(&mut self.allocator_metrics.desc());
-
-        descs
-    }
-    fn collect(&self) -> Vec<metrics::proto::MetricFamily> {
-        let mut values = Vec::new();
-
-        values.append(&mut self.request_counters.collect());
-        values.append(&mut self.getpage_cache_misses_counter.collect());
-        values.append(&mut self.getpage_cache_hits_counter.collect());
-        values.append(&mut self.request_nblocks_counters.collect());
-
-        if let Some(file_cache) = &self.cache.file_cache {
-            values.append(&mut file_cache.collect());
-        }
-        values.append(&mut self.cache.collect());
-        values.append(&mut self.allocator_metrics.collect());
-
-        values
-    }
-}
--- a/pgxn/neon/communicator/src/worker_process/metrics_exporter.rs
+++ b/pgxn/neon/communicator/src/worker_process/metrics_exporter.rs
@@ -1,82 +0,0 @@
-//! Export information about Postgres, the communicator process, file cache etc. as
-//! prometheus metrics.
-
-use axum::Router;
-use axum::body::Body;
-use axum::extract::State;
-use axum::response::Response;
-use http::StatusCode;
-use http::header::CONTENT_TYPE;
-
-use metrics::proto::MetricFamily;
-use metrics::{Encoder, TextEncoder};
-
-use std::path::PathBuf;
-
-use tokio::net::UnixListener;
-
-use crate::worker_process::main_loop::CommunicatorWorkerProcessStruct;
-
-impl<'a> CommunicatorWorkerProcessStruct<'a> {
-    pub(crate) async fn launch_exporter_task(&'static self) {
-        use axum::routing::get;
-        let app = Router::new()
-            .route("/metrics", get(get_metrics))
-            .route("/dump_cache_map", get(dump_cache_map))
-            .with_state(self);
-
-        // Listen on unix domain socket, in the data directory. That should be unique.
-        let path = PathBuf::from(".metrics.socket");
-
-        let listener = UnixListener::bind(path.clone()).unwrap();
-
-        tokio::spawn(async {
-            tracing::info!("metrics listener spawned");
-            axum::serve(listener, app).await.unwrap()
-        });
-    }
-}
-
-async fn dump_cache_map(
-    State(state): State<&CommunicatorWorkerProcessStruct<'static>>,
-) -> Response {
-    let mut buf: Vec<u8> = Vec::new();
-    state.cache.dump_map(&mut buf);
-
-    Response::builder()
-        .status(StatusCode::OK)
-        .header(CONTENT_TYPE, "application/text")
-        .body(Body::from(buf))
-        .unwrap()
-}
-
-/// Expose Prometheus metrics.
-async fn get_metrics(State(state): State<&CommunicatorWorkerProcessStruct<'static>>) -> Response {
-    use metrics::core::Collector;
-    let metrics = state.collect();
-
-    // When we call TextEncoder::encode() below, it will immediately return an
-    // error if a metric family has no metrics, so we need to preemptively
-    // filter out metric families with no metrics.
-    let metrics = metrics
-        .into_iter()
-        .filter(|m| !m.get_metric().is_empty())
-        .collect::<Vec<MetricFamily>>();
-
-    let encoder = TextEncoder::new();
-    let mut buffer = vec![];
-
-    if let Err(e) = encoder.encode(&metrics, &mut buffer) {
-        Response::builder()
-            .status(StatusCode::INTERNAL_SERVER_ERROR)
-            .header(CONTENT_TYPE, "application/text")
-            .body(Body::from(e.to_string()))
-            .unwrap()
-    } else {
-        Response::builder()
-            .status(StatusCode::OK)
-            .header(CONTENT_TYPE, encoder.format_type())
-            .body(Body::from(buffer))
-            .unwrap()
-    }
-}
--- a/pgxn/neon/communicator/src/worker_process/mod.rs
+++ b/pgxn/neon/communicator/src/worker_process/mod.rs
@@ -1,14 +0,0 @@
-//! This code runs in the communicator worker process. This provides
-//! the glue code to:
-//!
-//! - launch the 'processor',
-//! - receive IO requests from backends and pass them to the processor,
-//! - write results back to backends.
-
-mod callbacks;
-mod logging;
-mod main_loop;
-mod metrics_exporter;
-mod worker_interface;
-
-mod in_progress_ios;
--- a/pgxn/neon/communicator/src/worker_process/worker_interface.rs
+++ b/pgxn/neon/communicator/src/worker_process/worker_interface.rs
@@ -1,121 +0,0 @@
-//! Functions called from the C code in the worker process
-
-use std::collections::HashMap;
-use std::ffi::{CStr, c_char};
-use std::path::PathBuf;
-
-use tracing::error;
-
-use crate::init::CommunicatorInitStruct;
-use crate::worker_process::main_loop;
-use crate::worker_process::main_loop::CommunicatorWorkerProcessStruct;
-
-/// Launch the communicator's tokio tasks, which do most of the work.
-///
-/// The caller has initialized the process as a regular PostgreSQL
-/// background worker process. The shared memory segment used to
-/// communicate with the backends has been allocated and initialized
-/// earlier, at postmaster startup, in rcommunicator_shmem_init().
-#[unsafe(no_mangle)]
-pub extern "C" fn communicator_worker_process_launch(
-    cis: Box<CommunicatorInitStruct>,
-    tenant_id: *const c_char,
-    timeline_id: *const c_char,
-    auth_token: *const c_char,
-    shard_map: *mut *mut c_char,
-    nshards: u32,
-    file_cache_path: *const c_char,
-    initial_file_cache_size: u64,
-) -> &'static CommunicatorWorkerProcessStruct<'static> {
-    // Convert the arguments into more convenient Rust types
-    let tenant_id = unsafe { CStr::from_ptr(tenant_id) }.to_str().unwrap();
-    let timeline_id = unsafe { CStr::from_ptr(timeline_id) }.to_str().unwrap();
-    let auth_token = if auth_token.is_null() {
-        None
-    } else {
-        Some(
-            unsafe { CStr::from_ptr(auth_token) }
-                .to_str()
-                .unwrap()
-                .to_string(),
-        )
-    };
-    let file_cache_path = {
-        if file_cache_path.is_null() {
-            None
-        } else {
-            let c_str = unsafe { CStr::from_ptr(file_cache_path) };
-            Some(PathBuf::from(c_str.to_str().unwrap()))
-        }
-    };
-    let shard_map = parse_shard_map(nshards, shard_map);
-
-    // start main loop
-    let runtime = tokio::runtime::Builder::new_multi_thread()
-        .enable_all()
-        .thread_name("communicator thread")
-        .build()
-        .unwrap();
-
-    let worker_struct = runtime.block_on(main_loop::init(
-        cis,
-        tenant_id.to_string(),
-        timeline_id.to_string(),
-        auth_token,
-        shard_map,
-        initial_file_cache_size,
-        file_cache_path,
-    ));
-    let worker_struct = Box::leak(Box::new(worker_struct));
-
-    let main_loop_handle = runtime.spawn(worker_struct.run());
-
-    runtime.spawn(async {
-        let err = main_loop_handle.await.unwrap_err();
-        error!("error: {err:?}");
-    });
-
-    runtime.block_on(worker_struct.launch_exporter_task());
-
-    // keep the runtime running after we exit this function
-    Box::leak(Box::new(runtime));
-
-    worker_struct
-}
-
-/// Convert the "shard map" from an array of C strings, indexed by shard no to a rust HashMap
-fn parse_shard_map(
-    nshards: u32,
-    shard_map: *mut *mut c_char,
-) -> HashMap<utils::shard::ShardIndex, String> {
-    use utils::shard::*;
-
-    assert!(nshards <= u8::MAX as u32);
-
-    let mut result: HashMap<ShardIndex, String> = HashMap::new();
-    let mut p = shard_map;
-
-    for i in 0..nshards {
-        let c_str = unsafe { CStr::from_ptr(*p) };
-
-        p = unsafe { p.add(1) };
-
-        let s = c_str.to_str().unwrap();
-        let k = if nshards > 1 {
-            ShardIndex::new(ShardNumber(i as u8), ShardCount(nshards as u8))
-        } else {
-            ShardIndex::unsharded()
-        };
-        result.insert(k, s.into());
-    }
-    result
-}
-
-/// Inform the rust code about a configuration change
-#[unsafe(no_mangle)]
-pub extern "C" fn communicator_worker_config_reload(
-    proc_handle: &'static CommunicatorWorkerProcessStruct<'static>,
-    file_cache_size: u64,
-) {
-    proc_handle.cache.resize_file_cache(file_cache_size as u32);
-}
--- a/pgxn/neon/communicator_new.c
+++ b/pgxn/neon/communicator_new.c
--- a/pgxn/neon/communicator_new.h
+++ b/pgxn/neon/communicator_new.h
@@ -1,56 +0,0 @@
-/*-------------------------------------------------------------------------
- *
- * communicator_new.h
- *	  new implementation
- *
- *
- * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- *-------------------------------------------------------------------------
- */
-#ifndef COMMUNICATOR_NEW_H
-#define COMMUNICATOR_NEW_H
-
-#include "neon_pgversioncompat.h"
-
-#include "storage/buf_internals.h"
-
-#include "pagestore_client.h"
-
-/* initialization at postmaster startup */
-extern void pg_init_communicator_new(void);
-extern void communicator_new_shmem_request(void);
-extern void communicator_new_shmem_startup(void);
-
-/* initialization at backend startup */
-extern void communicator_new_init(void);
-
-/* Read requests */
-extern bool communicator_new_rel_exists(NRelFileInfo rinfo, ForkNumber forkNum);
-extern BlockNumber communicator_new_rel_nblocks(NRelFileInfo rinfo, ForkNumber forknum);
-extern int64 communicator_new_dbsize(Oid dbNode);
-extern void communicator_new_read_at_lsnv(NRelFileInfo rinfo, ForkNumber forkNum,
-										  BlockNumber base_blockno,
-										  void **buffers, BlockNumber nblocks);
-extern void communicator_new_prefetch_register_bufferv(NRelFileInfo rinfo, ForkNumber forkNum,
-													   BlockNumber blockno,
-													   BlockNumber nblocks);
-extern bool communicator_new_cache_contains(NRelFileInfo rinfo, ForkNumber forkNum,
-											BlockNumber blockno);
-extern int	communicator_new_read_slru_segment(SlruKind kind, int64 segno,
-											   void *buffer);
-
-/* Write requests, to keep the caches up-to-date */
-extern void communicator_new_write_page(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blockno,
-										const void *buffer, XLogRecPtr lsn);
-extern void communicator_new_rel_extend(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blockno,
-										const void *buffer, XLogRecPtr lsn);
-extern void communicator_new_rel_zeroextend(NRelFileInfo rinfo, ForkNumber forkNum,
-											BlockNumber blockno, BlockNumber nblocks,
-											XLogRecPtr lsn);
-extern void communicator_new_rel_create(NRelFileInfo rinfo, ForkNumber forkNum);
-extern void communicator_new_rel_truncate(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber nblocks);
-extern void communicator_new_rel_unlink(NRelFileInfo rinfo, ForkNumber forkNum);
-
-#endif							/* COMMUNICATOR_NEW_H */
--- a/pgxn/neon/file_cache.c
+++ b/pgxn/neon/file_cache.c
@@ -183,13 +183,13 @@ typedef struct FileCacheControl
 static HTAB *lfc_hash;
 static int	lfc_desc = -1;
 static LWLockId lfc_lock;
-int	lfc_max_size;
-int	lfc_size_limit;
+static int	lfc_max_size;
+static int	lfc_size_limit;
 static int	lfc_prewarm_limit;
 static int	lfc_prewarm_batch;
 static int	lfc_chunk_size_log = MAX_BLOCKS_PER_CHUNK_LOG;
 static int	lfc_blocks_per_chunk = MAX_BLOCKS_PER_CHUNK;
-char *lfc_path;
+static char *lfc_path;
 static uint64 lfc_generation;
 static FileCacheControl *lfc_ctl;
 static bool lfc_do_prewarm;
@@ -230,8 +230,6 @@ lfc_switch_off(void)
 {
 	int			fd;

-	Assert(!neon_enable_new_communicator);
-
 	if (LFC_ENABLED())
 	{
 		HASH_SEQ_STATUS status;
@@ -297,8 +295,6 @@ lfc_maybe_disabled(void)
 static bool
 lfc_ensure_opened(void)
 {
-	Assert(!neon_enable_new_communicator);
-
 	if (lfc_generation != lfc_ctl->generation)
 	{
 		lfc_close_file();
@@ -324,8 +320,6 @@ lfc_shmem_startup(void)
 	bool		found;
 	static HASHCTL info;

-	Assert(!neon_enable_new_communicator);
-
 	if (prev_shmem_startup_hook)
 	{
 		prev_shmem_startup_hook();
@@ -624,9 +618,6 @@ lfc_init(void)
 	if (lfc_max_size == 0)
 		return;

-	if (neon_enable_new_communicator)
-		return;
-
 	prev_shmem_startup_hook = shmem_startup_hook;
 	shmem_startup_hook = lfc_shmem_startup;
 #if PG_VERSION_NUM>=150000
@@ -702,7 +693,6 @@ lfc_prewarm(FileCacheState* fcs, uint32 n_workers)
 	dsm_segment *seg;
 	BackgroundWorkerHandle* bgw_handle[MAX_PREWARM_WORKERS];

-	Assert(!neon_enable_new_communicator);

 	if (!lfc_ensure_opened())
 		return;
@@ -857,8 +847,6 @@ lfc_prewarm_main(Datum main_arg)
 	PrewarmWorkerState* ws;
 	uint32 worker_id = DatumGetInt32(main_arg);

-	Assert(!neon_enable_new_communicator);
-
 	AmPrewarmWorker = true;

 	pqsignal(SIGTERM, die);
@@ -959,8 +947,6 @@ lfc_invalidate(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber nblocks)
 	FileCacheEntry *entry;
 	uint32		hash;

-	Assert(!neon_enable_new_communicator);
-
 	if (lfc_maybe_disabled())	/* fast exit if file cache is disabled */
 		return;

@@ -1006,8 +992,6 @@ lfc_cache_contains(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno)
 	bool		found = false;
 	uint32		hash;

-	Assert(!neon_enable_new_communicator);
-
 	if (lfc_maybe_disabled())	/* fast exit if file cache is disabled */
 		return false;

@@ -1043,8 +1027,6 @@ lfc_cache_containsv(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 	uint32		hash;
 	int			i = 0;

-	Assert(!neon_enable_new_communicator);
-
 	if (lfc_maybe_disabled())	/* fast exit if file cache is disabled */
 		return 0;

@@ -1152,8 +1134,6 @@ lfc_readv_select(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 	int			blocks_read = 0;
 	int			buf_offset = 0;

-	Assert(!neon_enable_new_communicator);
-
 	if (lfc_maybe_disabled())	/* fast exit if file cache is disabled */
 		return -1;

@@ -1520,8 +1500,6 @@ lfc_prefetch(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,

 	int		chunk_offs = BLOCK_TO_CHUNK_OFF(blkno);

-	Assert(!neon_enable_new_communicator);
-
 	if (lfc_maybe_disabled())	/* fast exit if file cache is disabled */
 		return false;

@@ -1667,8 +1645,6 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 	uint32		entry_offset;
 	int			buf_offset = 0;

-	Assert(!neon_enable_new_communicator);
-
 	if (lfc_maybe_disabled())	/* fast exit if file cache is disabled */
 		return;

@@ -2164,9 +2140,6 @@ PG_FUNCTION_INFO_V1(approximate_working_set_size_seconds);
 Datum
 approximate_working_set_size_seconds(PG_FUNCTION_ARGS)
 {
-	if (neon_enable_new_communicator)
-		elog(ERROR, "TODO: not implemented");
-
 	if (lfc_size_limit != 0)
 	{
 		int32 dc;
@@ -2184,9 +2157,6 @@ PG_FUNCTION_INFO_V1(approximate_working_set_size);
 Datum
 approximate_working_set_size(PG_FUNCTION_ARGS)
 {
-	if (neon_enable_new_communicator)
-		elog(ERROR, "TODO: not implemented");
-
 	if (lfc_size_limit != 0)
 	{
 		int32 dc;
@@ -2207,13 +2177,7 @@ Datum
 get_local_cache_state(PG_FUNCTION_ARGS)
 {
 	size_t max_entries = PG_ARGISNULL(0) ? lfc_prewarm_limit : PG_GETARG_INT32(0);
-	FileCacheState* fcs;
-
-	if (neon_enable_new_communicator)
-		elog(ERROR, "TODO: not implemented");
-
-	fcs = lfc_get_state(max_entries);
-
+	FileCacheState* fcs = lfc_get_state(max_entries);
 	if (fcs != NULL)
 		PG_RETURN_BYTEA_P((bytea*)fcs);
 	else
@@ -2227,12 +2191,8 @@ prewarm_local_cache(PG_FUNCTION_ARGS)
 {
 	bytea* state = PG_GETARG_BYTEA_PP(0);
 	uint32 n_workers =  PG_GETARG_INT32(1);
-	FileCacheState* fcs;
+	FileCacheState* fcs = (FileCacheState*)state;

-	if (neon_enable_new_communicator)
-		elog(ERROR, "TODO: not implemented");
-
-	fcs = (FileCacheState*)state;
 	lfc_prewarm(fcs, n_workers);

 	PG_RETURN_NULL();
@@ -2252,9 +2212,6 @@ get_prewarm_info(PG_FUNCTION_ARGS)
 	uint32 total_pages;
 	size_t n_workers;

-	if (neon_enable_new_communicator)
-		elog(ERROR, "TODO: not implemented");
-
 	if (lfc_size_limit == 0)
 		PG_RETURN_NULL();

--- a/pgxn/neon/file_cache.h
+++ b/pgxn/neon/file_cache.h
@@ -26,9 +26,6 @@ typedef struct FileCacheState

 /* GUCs */
 extern bool lfc_store_prefetch_result;
-extern int	lfc_max_size;
-extern int	lfc_size_limit;
-extern char *lfc_path;

 /* functions for local file cache */
 extern void lfc_invalidate(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber nblocks);
--- a/pgxn/neon/libpagestore.c
+++ b/pgxn/neon/libpagestore.c
@@ -69,8 +69,7 @@ char	   *neon_project_id;
 char	   *neon_branch_id;
 char	   *neon_endpoint_id;
 int32		max_cluster_size;
-char	   *pageserver_connstring;
-char	   *pageserver_grpc_urls;
+char	   *page_server_connstring;
 char	   *neon_auth_token;

 int			readahead_buffer_size = 128;
@@ -178,8 +177,6 @@ static bool pageserver_flush(shardno_t shard_no);
 static void pageserver_disconnect(shardno_t shard_no);
 static void pageserver_disconnect_shard(shardno_t shard_no);

-static void AssignShardMap(const char *newval);
-
 static bool
 PagestoreShmemIsValid(void)
 {
@@ -242,7 +239,6 @@ ParseShardMap(const char *connstr, ShardMap *result)
 	return true;
 }

-/* GUC hooks for neon.pageserver_connstring */
 static bool
 CheckPageserverConnstring(char **newval, void **extra, GucSource source)
 {
@@ -253,45 +249,6 @@ CheckPageserverConnstring(char **newval, void **extra, GucSource source)

 static void
 AssignPageserverConnstring(const char *newval, void *extra)
-{
-	/*
-	 * 'neon.pageserver_connstring' is ignored if the new communicator is used.
-	 * In that case, the shard map is loaded from 'neon.pageserver_grpc_urls'
-	 * instead.
-	 */
-	if (neon_enable_new_communicator)
-		return;
-
-	AssignShardMap(newval);
-}
-
-
-/* GUC hooks for neon.pageserver_connstring */
-static bool
-CheckPageserverGrpcUrls(char **newval, void **extra, GucSource source)
-{
-	char	   *p = *newval;
-
-	return ParseShardMap(p, NULL);
-}
-
-static void
-AssignPageserverGrpcUrls(const char *newval, void *extra)
-{
-	/*
-	 * 'neon.pageserver_grpc-urls' is ignored if the new communicator is not
-	 * used.  In that case, the shard map is loaded from 'neon.pageserver_connstring'
-	  instead.
-	 */
-	if (!neon_enable_new_communicator)
-		return;
-
-	AssignShardMap(newval);
-}
-
-
-static void
-AssignShardMap(const char *newval)
 {
 	ShardMap	shard_map;

@@ -305,7 +262,7 @@ AssignShardMap(const char *newval)
 	{
 		/*
 		 * shouldn't happen, because we already checked the value in
-		 * CheckPageserverConnstring/CheckPageserverGrpcUrls
+		 * CheckPageserverConnstring
 		 */
 		elog(ERROR, "could not parse shard map");
 	}
@@ -324,54 +281,6 @@ AssignShardMap(const char *newval)
 	}
 }

-/* Return a copy of the whole shard map from shared memory */
-void
-get_shard_map(char ***connstrs_p, shardno_t *num_shards_p)
-{
-	uint64		begin_update_counter;
-	uint64		end_update_counter;
-	ShardMap   *shard_map = &pagestore_shared->shard_map;
-	shardno_t	num_shards;
-	char	   *buf;
-	char	  **connstrs;
-
-	buf = palloc(MAX_SHARDS*MAX_PAGESERVER_CONNSTRING_SIZE);
-	connstrs = palloc(sizeof(char *) * MAX_SHARDS);
-
-	/*
-	 * Postmaster can update the shared memory values concurrently, in which
-	 * case we would copy a garbled mix of the old and new values. We will
-	 * detect it because the counter's won't match, and retry. But it's
-	 * important that we don't do anything within the retry-loop that would
-	 * depend on the string having valid contents.
-	 */
-	do
-	{
-		char		*p;
-
-		begin_update_counter = pg_atomic_read_u64(&pagestore_shared->begin_update_counter);
-		end_update_counter = pg_atomic_read_u64(&pagestore_shared->end_update_counter);
-
-		num_shards = shard_map->num_shards;
-
-		p = buf;
-		for (int i = 0; i < Min(num_shards, MAX_SHARDS); i++)
-		{
-			strlcpy(p, shard_map->connstring[i], MAX_PAGESERVER_CONNSTRING_SIZE);
-			connstrs[i] = p;
-			p += MAX_PAGESERVER_CONNSTRING_SIZE;
-		}
-
-		pg_memory_barrier();
-	}
-	while (begin_update_counter != end_update_counter
-		   || begin_update_counter != pg_atomic_read_u64(&pagestore_shared->begin_update_counter)
-		   || end_update_counter != pg_atomic_read_u64(&pagestore_shared->end_update_counter));
-
-	*connstrs_p = connstrs;
-	*num_shards_p = num_shards;
-}
-
 /*
 * Get the current number of shards, and/or the connection string for a
 * particular shard from the shard map in shared memory.
@@ -1395,8 +1304,7 @@ PagestoreShmemInit(void)
 		pg_atomic_init_u64(&pagestore_shared->begin_update_counter, 0);
 		pg_atomic_init_u64(&pagestore_shared->end_update_counter, 0);
 		memset(&pagestore_shared->shard_map, 0, sizeof(ShardMap));
-		AssignPageserverConnstring(pageserver_connstring, NULL);
-		AssignPageserverGrpcUrls(pageserver_grpc_urls, NULL);
+		AssignPageserverConnstring(page_server_connstring, NULL);
 	}

 	NeonPerfCountersShmemInit();
@@ -1449,21 +1357,12 @@ pg_init_libpagestore(void)
 	DefineCustomStringVariable("neon.pageserver_connstring",
 							   "connection string to the page server",
 							   NULL,
-							   &pageserver_connstring,
+							   &page_server_connstring,
 							   "",
 							   PGC_SIGHUP,
 							   0,	/* no flags required */
 							   CheckPageserverConnstring, AssignPageserverConnstring, NULL);

-	DefineCustomStringVariable("neon.pageserver_grpc_urls",
-							   "list of gRPC URLs for the page servers",
-							   NULL,
-							   &pageserver_grpc_urls,
-							   "",
-							   PGC_SIGHUP,
-							   0,	/* no flags required */
-							   CheckPageserverGrpcUrls, AssignPageserverGrpcUrls, NULL);
-
 	DefineCustomStringVariable("neon.timeline_id",
 							   "Neon timeline_id the server is running on",
 							   NULL,
@@ -1621,7 +1520,7 @@ pg_init_libpagestore(void)
 	if (neon_auth_token)
 		neon_log(LOG, "using storage auth token from NEON_AUTH_TOKEN environment variable");

-	if (pageserver_connstring[0] || pageserver_grpc_urls[0])
+	if (page_server_connstring && page_server_connstring[0])
 	{
 		neon_log(PageStoreTrace, "set neon_smgr hook");
 		smgr_hook = smgr_neon;
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Alex Chi Z	293687de5f	revert + add tests Signed-off-by: Alex Chi Z <chi@neon.tech>	2025-07-02 14:38:40 -07:00
Alex Chi Z	6c81cf3892	fix(pageserver): do not allow delete to bypass upload metadata Signed-off-by: Alex Chi Z <chi@neon.tech>	2025-07-02 13:55:39 -07:00