git add missing file

Use a custom Rust implementation to replace the LFC hash table
The new implementation lives in a separately allocated shared memory area, which could be resized. Resizing it isn't actually implemented yet, though. It would require some co-operation from the LFC code.
2026-05-21 07:00:38 +00:00 · 2025-06-12 02:37:59 +03:00 · 2025-06-05 18:31:29 +03:00 · 2025-06-05 18:13:03 +03:00 · 2025-06-05 18:08:40 +03:00 · 2025-06-05 18:08:35 +03:00
90 changed files with 1185 additions and 12713 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -13,7 +13,6 @@ neon.iml
 /.neon
 /integration_tests/.neon
 compaction-suite-results.*
-pgxn/neon/communicator/communicator_bindings.h

 # Coverage
 *.profraw
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -253,17 +253,6 @@ version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a8ab6b55fe97976e46f91ddbed8d147d966475dc29b2032757ba47e02376fbc3"

-[[package]]
-name = "atomic_enum"
-version = "0.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "99e1aca718ea7b89985790c94aad72d77533063fe00bc497bb79a7c2dae6a661"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 2.0.100",
-]
-
 [[package]]
 name = "autocfg"
 version = "1.1.0"
@@ -698,40 +687,13 @@ dependencies = [
 "tracing",
 ]

-[[package]]
-name = "axum"
-version = "0.7.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f"
-dependencies = [
- "async-trait",
- "axum-core 0.4.5",
- "bytes",
- "futures-util",
- "http 1.1.0",
- "http-body 1.0.0",
- "http-body-util",
- "itoa",
- "matchit 0.7.3",
- "memchr",
- "mime",
- "percent-encoding",
- "pin-project-lite",
- "rustversion",
- "serde",
- "sync_wrapper 1.0.1",
- "tower 0.5.2",
- "tower-layer",
- "tower-service",
-]
-
 [[package]]
 name = "axum"
 version = "0.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6d6fd624c75e18b3b4c6b9caf42b1afe24437daaee904069137d8bab077be8b8"
 dependencies = [
- "axum-core 0.5.0",
+ "axum-core",
 "base64 0.22.1",
 "bytes",
 "form_urlencoded",
@@ -739,10 +701,10 @@ dependencies = [
 "http 1.1.0",
 "http-body 1.0.0",
 "http-body-util",
- "hyper 1.6.0",
+ "hyper 1.4.1",
 "hyper-util",
 "itoa",
- "matchit 0.8.4",
+ "matchit",
 "memchr",
 "mime",
 "percent-encoding",
@@ -762,26 +724,6 @@ dependencies = [
 "tracing",
 ]

-[[package]]
-name = "axum-core"
-version = "0.4.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199"
-dependencies = [
- "async-trait",
- "bytes",
- "futures-util",
- "http 1.1.0",
- "http-body 1.0.0",
- "http-body-util",
- "mime",
- "pin-project-lite",
- "rustversion",
- "sync_wrapper 1.0.1",
- "tower-layer",
- "tower-service",
-]
-
 [[package]]
 name = "axum-core"
 version = "0.5.0"
@@ -808,8 +750,8 @@ version = "0.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "460fc6f625a1f7705c6cf62d0d070794e94668988b1c38111baeec177c715f7b"
 dependencies = [
- "axum 0.8.1",
- "axum-core 0.5.0",
+ "axum",
+ "axum-core",
 "bytes",
 "futures-util",
 "headers",
@@ -1351,28 +1293,8 @@ dependencies = [
 name = "communicator"
 version = "0.1.0"
 dependencies = [
- "atomic_enum",
- "axum 0.8.1",
- "bytes",
 "cbindgen",
- "clashmap",
- "http 1.1.0",
- "libc",
- "metrics",
 "neon-shmem",
- "nix 0.30.1",
- "pageserver_client_grpc",
- "pageserver_page_api",
- "prometheus",
- "prost 0.13.5",
- "thiserror 1.0.69",
- "tokio",
- "tokio-pipe",
- "tonic 0.12.3",
- "tracing",
- "tracing-subscriber",
- "uring-common",
- "utils",
 ]

 [[package]]
@@ -1400,7 +1322,7 @@ dependencies = [
 "aws-sdk-kms",
 "aws-sdk-s3",
 "aws-smithy-types",
- "axum 0.8.1",
+ "axum",
 "axum-extra",
 "base64 0.13.1",
 "bytes",
@@ -1424,7 +1346,6 @@ dependencies = [
 "opentelemetry",
 "opentelemetry_sdk",
 "p256 0.13.2",
- "pageserver_page_api",
 "postgres",
 "postgres_initdb",
 "regex",
@@ -1443,7 +1364,6 @@ dependencies = [
 "tokio-postgres",
 "tokio-stream",
 "tokio-util",
- "tonic 0.13.1",
 "tower 0.5.2",
 "tower-http",
 "tower-otel",
@@ -1701,9 +1621,9 @@ dependencies = [

 [[package]]
 name = "crossbeam-utils"
-version = "0.8.21"
+version = "0.8.19"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
+checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345"

 [[package]]
 name = "crossterm"
@@ -2157,7 +2077,7 @@ name = "endpoint_storage"
 version = "0.0.1"
 dependencies = [
 "anyhow",
- "axum 0.8.1",
+ "axum",
 "axum-extra",
 "camino",
 "camino-tempfile",
@@ -2437,7 +2357,7 @@ dependencies = [
 "futures-core",
 "futures-sink",
 "http-body-util",
- "hyper 1.6.0",
+ "hyper 1.4.1",
 "hyper-util",
 "pin-project",
 "rand 0.8.5",
@@ -2609,9 +2529,9 @@ dependencies = [

 [[package]]
 name = "getrandom"
-version = "0.3.2"
+version = "0.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "73fea8450eea4bac3940448fb7ae50d91f034f941199fcd9d909a5a07aa455f0"
+checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4"
 dependencies = [
 "cfg-if",
 "libc",
@@ -3008,9 +2928,9 @@ dependencies = [

 [[package]]
 name = "httparse"
-version = "1.10.1"
+version = "1.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87"
+checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904"

 [[package]]
 name = "httpdate"
@@ -3060,9 +2980,9 @@ dependencies = [

 [[package]]
 name = "hyper"
-version = "1.6.0"
+version = "1.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cc2b571658e38e0c01b1fdca3bbbe93c00d3d71693ff2770043f8c29bc7d6f80"
+checksum = "50dfd22e0e76d0f662d429a5f80fcaf3855009297eab6a0a9f8543834744ba05"
 dependencies = [
 "bytes",
 "futures-channel",
@@ -3102,7 +3022,7 @@ checksum = "a0bea761b46ae2b24eb4aef630d8d1c398157b6fc29e6350ecf090a0b70c952c"
 dependencies = [
 "futures-util",
 "http 1.1.0",
- "hyper 1.6.0",
+ "hyper 1.4.1",
 "hyper-util",
 "rustls 0.22.4",
 "rustls-pki-types",
@@ -3117,7 +3037,7 @@ version = "0.5.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3203a961e5c83b6f5498933e78b6b263e208c197b63e9c6c53cc82ffd3f63793"
 dependencies = [
- "hyper 1.6.0",
+ "hyper 1.4.1",
 "hyper-util",
 "pin-project-lite",
 "tokio",
@@ -3126,20 +3046,20 @@ dependencies = [

 [[package]]
 name = "hyper-util"
-version = "0.1.12"
+version = "0.1.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cf9f1e950e0d9d1d3c47184416723cf29c0d1f93bd8cccf37e4beb6b44f31710"
+checksum = "cde7055719c54e36e95e8719f95883f22072a48ede39db7fc17a4e1d5281e9b9"
 dependencies = [
 "bytes",
 "futures-channel",
 "futures-util",
 "http 1.1.0",
 "http-body 1.0.0",
- "hyper 1.6.0",
- "libc",
+ "hyper 1.4.1",
 "pin-project-lite",
 "socket2",
 "tokio",
+ "tower 0.4.13",
 "tower-service",
 "tracing",
 ]
@@ -3728,12 +3648,6 @@ dependencies = [
 "regex-automata 0.1.10",
 ]

-[[package]]
-name = "matchit"
-version = "0.7.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94"
-
 [[package]]
 name = "matchit"
 version = "0.8.4"
@@ -3932,23 +3846,11 @@ dependencies = [
 "nix 0.30.1",
 "rand 0.9.1",
 "rand_distr 0.5.1",
- "spin",
 "tempfile",
 "thiserror 1.0.69",
 "workspace_hack",
 ]

-[[package]]
-name = "neonart"
-version = "0.1.0"
-dependencies = [
- "crossbeam-utils",
- "rand 0.9.1",
- "rand_distr 0.5.1",
- "spin",
- "tracing",
-]
-
 [[package]]
 name = "never-say-never"
 version = "6.6.666"
@@ -4382,19 +4284,15 @@ version = "0.1.0"
 dependencies = [
 "anyhow",
 "async-trait",
- "axum 0.8.1",
 "bytes",
 "camino",
 "clap",
 "futures",
 "hdrhistogram",
- "http 1.1.0",
 "humantime",
 "humantime-serde",
- "metrics",
 "pageserver_api",
 "pageserver_client",
- "pageserver_client_grpc",
 "pageserver_page_api",
 "rand 0.8.5",
 "reqwest",
@@ -4477,7 +4375,6 @@ dependencies = [
 "pageserver_client",
 "pageserver_compaction",
 "pageserver_page_api",
- "peekable",
 "pem",
 "pin-project-lite",
 "postgres-protocol",
@@ -4490,7 +4387,6 @@ dependencies = [
 "pprof",
 "pq_proto",
 "procfs",
- "prost 0.13.5",
 "rand 0.8.5",
 "range-set-blaze",
 "regex",
@@ -4590,34 +4486,6 @@ dependencies = [
 "workspace_hack",
 ]

-[[package]]
-name = "pageserver_client_grpc"
-version = "0.1.0"
-dependencies = [
- "async-trait",
- "bytes",
- "chrono",
- "dashmap 5.5.0",
- "futures",
- "http 1.1.0",
- "hyper 1.6.0",
- "hyper-util",
- "metrics",
- "pageserver_api",
- "pageserver_page_api",
- "priority-queue",
- "rand 0.8.5",
- "thiserror 1.0.69",
- "tokio",
- "tokio-stream",
- "tokio-util",
- "tonic 0.13.1",
- "tower 0.4.13",
- "tracing",
- "utils",
- "uuid",
-]
-
 [[package]]
 name = "pageserver_compaction"
 version = "0.1.0"
@@ -4782,15 +4650,6 @@ dependencies = [
 "sha2",
 ]

-[[package]]
-name = "peekable"
-version = "0.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "225f9651e475709164f871dc2f5724956be59cb9edb055372ffeeab01ec2d20b"
-dependencies = [
- "smallvec",
-]
-
 [[package]]
 name = "pem"
 version = "3.0.3"
@@ -5202,17 +5061,6 @@ dependencies = [
 "elliptic-curve 0.13.8",
 ]

-[[package]]
-name = "priority-queue"
-version = "2.3.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ef08705fa1589a1a59aa924ad77d14722cb0cd97b67dd5004ed5f4a4873fce8d"
-dependencies = [
- "autocfg",
- "equivalent",
- "indexmap 2.9.0",
-]
-
 [[package]]
 name = "proc-macro2"
 version = "1.0.94"
@@ -5413,7 +5261,7 @@ dependencies = [
 "humantime",
 "humantime-serde",
 "hyper 0.14.30",
- "hyper 1.6.0",
+ "hyper 1.4.1",
 "hyper-util",
 "indexmap 2.9.0",
 "ipnet",
@@ -5635,7 +5483,7 @@ version = "0.9.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38"
 dependencies = [
- "getrandom 0.3.2",
+ "getrandom 0.3.3",
 ]

 [[package]]
@@ -5854,7 +5702,7 @@ dependencies = [
 "http-body-util",
 "http-types",
 "humantime-serde",
- "hyper 1.6.0",
+ "hyper 1.4.1",
 "itertools 0.10.5",
 "metrics",
 "once_cell",
@@ -5894,7 +5742,7 @@ dependencies = [
 "http 1.1.0",
 "http-body 1.0.0",
 "http-body-util",
- "hyper 1.6.0",
+ "hyper 1.4.1",
 "hyper-rustls 0.26.0",
 "hyper-util",
 "ipnet",
@@ -5951,7 +5799,7 @@ dependencies = [
 "futures",
 "getrandom 0.2.11",
 "http 1.1.0",
- "hyper 1.6.0",
+ "hyper 1.4.1",
 "parking_lot 0.11.2",
 "reqwest",
 "reqwest-middleware",
@@ -5972,7 +5820,7 @@ dependencies = [
 "async-trait",
 "getrandom 0.2.11",
 "http 1.1.0",
- "matchit 0.8.4",
+ "matchit",
 "opentelemetry",
 "reqwest",
 "reqwest-middleware",
@@ -6892,12 +6740,12 @@ dependencies = [

 [[package]]
 name = "socket2"
-version = "0.5.9"
+version = "0.5.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4f5fd57c80058a56cf5c777ab8a126398ece8e442983605d280a44ce79d0edef"
+checksum = "7b5fac59a5cb5dd637972e5fca70daf0523c9067fcdc4842f053dae04a18f8e9"
 dependencies = [
 "libc",
- "windows-sys 0.52.0",
+ "windows-sys 0.48.0",
 ]

 [[package]]
@@ -6905,9 +6753,6 @@ name = "spin"
 version = "0.9.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
-dependencies = [
- "lock_api",
-]

 [[package]]
 name = "spinning_top"
@@ -6966,7 +6811,7 @@ dependencies = [
 "http-body-util",
 "http-utils",
 "humantime",
- "hyper 1.6.0",
+ "hyper 1.4.1",
 "hyper-util",
 "metrics",
 "once_cell",
@@ -7572,16 +7417,6 @@ dependencies = [
 "syn 2.0.100",
 ]

-[[package]]
-name = "tokio-pipe"
-version = "0.2.12"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f213a84bffbd61b8fa0ba8a044b4bbe35d471d0b518867181e82bd5c15542784"
-dependencies = [
- "libc",
- "tokio",
-]
-
 [[package]]
 name = "tokio-postgres"
 version = "0.7.10"
@@ -7776,25 +7611,16 @@ version = "0.12.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52"
 dependencies = [
- "async-stream",
 "async-trait",
- "axum 0.7.9",
 "base64 0.22.1",
 "bytes",
- "h2 0.4.4",
 "http 1.1.0",
 "http-body 1.0.0",
 "http-body-util",
- "hyper 1.6.0",
- "hyper-timeout",
- "hyper-util",
 "percent-encoding",
 "pin-project",
 "prost 0.13.5",
- "socket2",
- "tokio",
 "tokio-stream",
- "tower 0.4.13",
 "tower-layer",
 "tower-service",
 "tracing",
@@ -7807,15 +7633,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7e581ba15a835f4d9ea06c55ab1bd4dce26fc53752c69a04aac00703bfb49ba9"
 dependencies = [
 "async-trait",
- "axum 0.8.1",
+ "axum",
 "base64 0.22.1",
 "bytes",
- "flate2",
 "h2 0.4.4",
 "http 1.1.0",
 "http-body 1.0.0",
 "http-body-util",
- "hyper 1.6.0",
+ "hyper 1.4.1",
 "hyper-timeout",
 "hyper-util",
 "percent-encoding",
@@ -7867,16 +7692,11 @@ checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c"
 dependencies = [
 "futures-core",
 "futures-util",
- "indexmap 1.9.3",
 "pin-project",
 "pin-project-lite",
- "rand 0.8.5",
- "slab",
 "tokio",
- "tokio-util",
 "tower-layer",
 "tower-service",
- "tracing",
 ]

 [[package]]
@@ -8360,7 +8180,7 @@ name = "vm_monitor"
 version = "0.1.0"
 dependencies = [
 "anyhow",
- "axum 0.8.1",
+ "axum",
 "cgroups-rs",
 "clap",
 "futures",
@@ -8853,8 +8673,8 @@ dependencies = [
 "ahash",
 "anstream",
 "anyhow",
- "axum 0.8.1",
- "axum-core 0.5.0",
+ "axum",
+ "axum-core",
 "base64 0.13.1",
 "base64 0.21.7",
 "base64ct",
@@ -8887,7 +8707,7 @@ dependencies = [
 "hex",
 "hmac",
 "hyper 0.14.30",
- "hyper 1.6.0",
+ "hyper 1.4.1",
 "hyper-util",
 "indexmap 2.9.0",
 "itertools 0.12.1",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -8,7 +8,6 @@ members = [
    "pageserver/compaction",
    "pageserver/ctl",
    "pageserver/client",
-    "pageserver/client_grpc",
    "pageserver/pagebench",
    "pageserver/page_api",
    "proxy",
@@ -33,7 +32,6 @@ members = [
    "libs/pq_proto",
    "libs/tenant_size_model",
    "libs/metrics",
-    "libs/neonart",
    "libs/postgres_connection",
    "libs/remote_storage",
    "libs/tracing-utils",
@@ -90,7 +88,6 @@ clap = { version = "4.0", features = ["derive", "env"] }
 clashmap = { version = "1.0", features = ["raw-api"] }
 comfy-table = "7.1"
 const_format = "0.2"
-crossbeam-utils = "0.8.21"
 crc32c = "0.6"
 diatomic-waker = { version = "0.2.3" }
 either = "1.8"
@@ -149,7 +146,6 @@ parquet = { version = "53", default-features = false, features = ["zstd"] }
 parquet_derive = "53"
 pbkdf2 = { version = "0.12.1", features = ["simple", "std"] }
 pem = "3.0.3"
-peekable = "0.3.0"
 pin-project-lite = "0.2"
 pprof = { version = "0.14", features = ["criterion", "flamegraph", "frame-pointer", "prost-codec"] }
 procfs = "0.16"
@@ -184,7 +180,6 @@ smallvec = "1.11"
 smol_str = { version = "0.2.0", features = ["serde"] }
 socket2 = "0.5"
 spki = "0.7.3"
-spin = "0.9.8"
 strum = "0.26"
 strum_macros = "0.26"
 "subtle"  = "2.5.0"
@@ -196,15 +191,16 @@ thiserror = "1.0"
 tikv-jemallocator = { version = "0.6", features = ["profiling", "stats", "unprefixed_malloc_on_supported_platforms"] }
 tikv-jemalloc-ctl = { version = "0.6", features = ["stats"] }
 tokio = { version = "1.43.1", features = ["macros"] }
+tokio-epoll-uring = { git = "https://github.com/neondatabase/tokio-epoll-uring.git" , branch = "main" }
 tokio-io-timeout = "1.2.0"
 tokio-postgres-rustls = "0.12.0"
 tokio-rustls = { version = "0.26.0", default-features = false, features = ["tls12", "ring"]}
 tokio-stream = "0.1"
 tokio-tar = "0.3"
-tokio-util = { version = "0.7.10", features = ["io", "io-util", "rt"] }
+tokio-util = { version = "0.7.10", features = ["io", "rt"] }
 toml = "0.8"
 toml_edit = "0.22"
-tonic = { version = "0.13.1", default-features = false, features = ["channel", "codegen", "gzip", "prost", "router", "server", "tls-ring", "tls-native-roots"] }
+tonic = { version = "0.13.1", default-features = false, features = ["channel", "codegen", "prost", "router", "server", "tls-ring", "tls-native-roots"] }
 tonic-reflection = { version = "0.13.1", features = ["server"] }
 tower = { version = "0.5.2", default-features = false }
 tower-http = { version = "0.6.2", features = ["auth", "request-id", "trace"] }
@@ -237,9 +233,6 @@ x509-cert = { version = "0.2.5" }
 env_logger = "0.11"
 log = "0.4"

-tokio-epoll-uring = { git = "https://github.com/neondatabase/tokio-epoll-uring.git" , branch = "main" }
-uring-common = { git = "https://github.com/neondatabase/tokio-epoll-uring.git" , branch = "main" }
-
 ## Libraries from neondatabase/ git forks, ideally with changes to be upstreamed
 postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch = "neon" }
 postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", branch = "neon" }
@@ -259,12 +252,10 @@ desim = { version = "0.1", path = "./libs/desim" }
 endpoint_storage = { version = "0.0.1", path = "./endpoint_storage/" }
 http-utils = { version = "0.1", path = "./libs/http-utils/" }
 metrics = { version = "0.1", path = "./libs/metrics/" }
-neonart = { version = "0.1", path = "./libs/neonart/" }
 neon-shmem = { version = "0.1", path = "./libs/neon-shmem/" }
 pageserver = { path = "./pageserver" }
 pageserver_api = { version = "0.1", path = "./libs/pageserver_api/" }
 pageserver_client = { path = "./pageserver/client" }
-pageserver_client_grpc = { path = "./pageserver/client_grpc" }
 pageserver_compaction = { version = "0.1", path = "./pageserver/compaction/" }
 pageserver_page_api = { path = "./pageserver/page_api" }
 postgres_backend = { version = "0.1", path = "./libs/postgres_backend/" }
--- a/compute_tools/Cargo.toml
+++ b/compute_tools/Cargo.toml
@@ -38,7 +38,6 @@ once_cell.workspace = true
 opentelemetry.workspace = true
 opentelemetry_sdk.workspace = true
 p256 = { version = "0.13", features = ["pem"] }
-pageserver_page_api.workspace = true
 postgres.workspace = true
 regex.workspace = true
 reqwest = { workspace = true, features = ["json"] }
@@ -54,7 +53,6 @@ tokio = { workspace = true, features = ["rt", "rt-multi-thread"] }
 tokio-postgres.workspace = true
 tokio-util.workspace = true
 tokio-stream.workspace = true
-tonic.workspace = true
 tower-otel.workspace = true
 tracing.workspace = true
 tracing-opentelemetry.workspace = true
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -1,4 +1,4 @@
-use anyhow::{Context, Result, anyhow};
+use anyhow::{Context, Result};
 use chrono::{DateTime, Utc};
 use compute_api::privilege::Privilege;
 use compute_api::responses::{
@@ -15,7 +15,6 @@ use itertools::Itertools;
 use nix::sys::signal::{Signal, kill};
 use nix::unistd::Pid;
 use once_cell::sync::Lazy;
-use pageserver_page_api as page_api;
 use postgres;
 use postgres::NoTls;
 use postgres::error::SqlState;
@@ -30,9 +29,7 @@ use std::sync::atomic::{AtomicU32, Ordering};
 use std::sync::{Arc, Condvar, Mutex, RwLock};
 use std::time::{Duration, Instant};
 use std::{env, fs};
-use tokio::io::AsyncReadExt;
 use tokio::spawn;
-use tokio_util::io::StreamReader;
 use tracing::{Instrument, debug, error, info, instrument, warn};
 use url::Url;
 use utils::id::{TenantId, TimelineId};
@@ -372,7 +369,7 @@ impl ComputeNode {

        let mut new_state = ComputeState::new();
        if let Some(spec) = config.spec {
-            let pspec = ParsedSpec::try_from(spec).map_err(|msg| anyhow!(msg))?;
+            let pspec = ParsedSpec::try_from(spec).map_err(|msg| anyhow::anyhow!(msg))?;
            new_state.pspec = Some(pspec);
        }

@@ -944,77 +941,6 @@ impl ComputeNode {
    #[instrument(skip_all, fields(%lsn))]
    fn try_get_basebackup(&self, compute_state: &ComputeState, lsn: Lsn) -> Result<()> {
        let spec = compute_state.pspec.as_ref().expect("spec must be set");
-        let shard0_connstr = spec.pageserver_connstr.split(',').next().unwrap();
-
-        match Url::parse(shard0_connstr)?.scheme() {
-            "postgres" | "postgresql" => self.try_get_basebackup_libpq(spec, lsn),
-            "grpc" => self.try_get_basebackup_grpc(spec, lsn),
-            scheme => return Err(anyhow!("unknown URL scheme {scheme}")),
-        }
-    }
-
-    fn try_get_basebackup_grpc(&self, spec: &ParsedSpec, lsn: Lsn) -> Result<()> {
-        let start_time = Instant::now();
-
-        let shard0_connstr = spec
-            .pageserver_connstr
-            .split(',')
-            .next()
-            .unwrap()
-            .to_string();
-
-        let chunks = tokio::runtime::Handle::current().block_on(async move {
-            let mut client = page_api::proto::PageServiceClient::connect(shard0_connstr).await?;
-
-            let req = page_api::proto::GetBaseBackupRequest {
-                read_lsn: Some(page_api::proto::ReadLsn {
-                    request_lsn: lsn.0,
-                    not_modified_since_lsn: 0,
-                }),
-                replica: false, // TODO: handle replicas, with LSN 0
-            };
-            let mut req = tonic::Request::new(req);
-            let metadata = req.metadata_mut();
-            metadata.insert("neon-tenant-id", spec.tenant_id.to_string().parse()?);
-            metadata.insert("neon-timeline-id", spec.timeline_id.to_string().parse()?);
-            metadata.insert("neon-shard-id", "0000".to_string().parse()?); // TODO: shard count
-            if let Some(auth) = spec.storage_auth_token.as_ref() {
-                metadata.insert("authorization", format!("Bearer {auth}").parse()?);
-            }
-
-            let chunks = client.get_base_backup(req).await?.into_inner();
-            anyhow::Ok(chunks)
-        })?;
-        let pageserver_connect_micros = start_time.elapsed().as_micros() as u64;
-
-        // Convert the chunks stream into an AsyncRead
-        let stream_reader = StreamReader::new(
-            chunks.map(|chunk| chunk.map(|c| c.chunk).map_err(std::io::Error::other)),
-        );
-
-        // Wrap the AsyncRead into a blocking reader for compatibility with tar::Archive
-        let reader = tokio_util::io::SyncIoBridge::new(stream_reader);
-        let mut measured_reader = MeasuredReader::new(reader);
-        let mut bufreader = std::io::BufReader::new(&mut measured_reader);
-
-        // Read the archive directly from the `CopyOutReader`
-        //
-        // Set `ignore_zeros` so that unpack() reads all the Copy data and
-        // doesn't stop at the end-of-archive marker. Otherwise, if the server
-        // sends an Error after finishing the tarball, we will not notice it.
-        let mut ar = tar::Archive::new(&mut bufreader);
-        ar.set_ignore_zeros(true);
-        ar.unpack(&self.params.pgdata)?;
-
-        // Report metrics
-        let mut state = self.state.lock().unwrap();
-        state.metrics.pageserver_connect_micros = pageserver_connect_micros;
-        state.metrics.basebackup_bytes = measured_reader.get_byte_count() as u64;
-        state.metrics.basebackup_ms = start_time.elapsed().as_millis() as u64;
-        Ok(())
-    }
-
-    fn try_get_basebackup_libpq(&self, spec: &ParsedSpec, lsn: Lsn) -> Result<()> {
        let start_time = Instant::now();

        let shard0_connstr = spec.pageserver_connstr.split(',').next().unwrap();
@@ -1030,10 +956,12 @@ impl ComputeNode {
        }

        config.application_name("compute_ctl");
-        config.options(&format!(
-            "-c neon.compute_mode={}",
-            spec.spec.mode.to_type_str()
-        ));
+        if let Some(spec) = &compute_state.pspec {
+            config.options(&format!(
+                "-c neon.compute_mode={}",
+                spec.spec.mode.to_type_str()
+            ));
+        }

        // Connect to pageserver
        let mut client = config.connect(NoTls)?;
@@ -1107,7 +1035,10 @@ impl ComputeNode {
                    return result;
                }
                Err(ref e) if attempts < max_attempts => {
-                    warn!("Failed to get basebackup: {e:?} (attempt {attempts}/{max_attempts})");
+                    warn!(
+                        "Failed to get basebackup: {} (attempt {}/{})",
+                        e, attempts, max_attempts
+                    );
                    std::thread::sleep(std::time::Duration::from_millis(retry_period_ms as u64));
                    retry_period_ms *= 1.5;
                }
@@ -1985,7 +1916,7 @@ LIMIT 100",
            self.params
                .remote_ext_base_url
                .as_ref()
-                .ok_or(DownloadError::BadInput(anyhow!(
+                .ok_or(DownloadError::BadInput(anyhow::anyhow!(
                    "Remote extensions storage is not configured",
                )))?;

@@ -2181,7 +2112,7 @@ LIMIT 100",
        let remote_extensions = spec
            .remote_extensions
            .as_ref()
-            .ok_or(anyhow!("Remote extensions are not configured"))?;
+            .ok_or(anyhow::anyhow!("Remote extensions are not configured"))?;

        info!("parse shared_preload_libraries from spec.cluster.settings");
        let mut libs_vec = Vec::new();
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -18,7 +18,7 @@ use clap::Parser;
 use compute_api::requests::ComputeClaimsScope;
 use compute_api::spec::ComputeMode;
 use control_plane::broker::StorageBroker;
-use control_plane::endpoint::{ComputeControlPlane, PageserverProtocol};
+use control_plane::endpoint::ComputeControlPlane;
 use control_plane::endpoint_storage::{ENDPOINT_STORAGE_DEFAULT_ADDR, EndpointStorage};
 use control_plane::local_env;
 use control_plane::local_env::{
@@ -664,10 +664,6 @@ struct EndpointStartCmdArgs {
    #[clap(short = 't', long, value_parser= humantime::parse_duration, help = "timeout until we fail the command")]
    #[arg(default_value = "90s")]
    start_timeout: Duration,
-
-    /// If enabled, use gRPC (and the communicator) to talk to Pageservers.
-    #[clap(long)]
-    grpc: bool,
 }

 #[derive(clap::Args)]
@@ -686,10 +682,6 @@ struct EndpointReconfigureCmdArgs {

    #[clap(long)]
    safekeepers: Option<String>,
-
-    /// If enabled, use gRPC (and communicator) to talk to Pageservers.
-    #[clap(long)]
-    grpc: bool,
 }

 #[derive(clap::Args)]
@@ -1460,18 +1452,13 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res

            let (pageservers, stripe_size) = if let Some(pageserver_id) = pageserver_id {
                let conf = env.get_pageserver_conf(pageserver_id).unwrap();
-                // Use gRPC if requested.
-                let (protocol, host, port) = if args.grpc {
-                    let grpc_addr = conf.listen_grpc_addr.as_ref().expect("bad config");
-                    let (host, port) = parse_host_port(grpc_addr).expect("bad config");
-                    (PageserverProtocol::Grpc, host, port.unwrap_or(51051))
-                } else {
-                    let (host, port) = parse_host_port(&conf.listen_pg_addr).expect("bad config");
-                    (PageserverProtocol::Libpq, host, port.unwrap_or(5432))
-                };
-                // If caller is telling us what pageserver to use, this is not a tenant which is
-                // fully managed by storage controller, therefore not sharded.
-                (vec![(protocol, host, port)], DEFAULT_STRIPE_SIZE)
+                let parsed = parse_host_port(&conf.listen_pg_addr).expect("Bad config");
+                (
+                    vec![(parsed.0, parsed.1.unwrap_or(5432))],
+                    // If caller is telling us what pageserver to use, this is not a tenant which is
+                    // full managed by storage controller, therefore not sharded.
+                    DEFAULT_STRIPE_SIZE,
+                )
            } else {
                // Look up the currently attached location of the tenant, and its striping metadata,
                // to pass these on to postgres.
@@ -1490,22 +1477,11 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
                                .await?;
                        }

-                        let pageserver = if args.grpc {
-                            (
-                                PageserverProtocol::Grpc,
-                                Host::parse(&shard.listen_grpc_addr.expect("no gRPC addr"))
-                                    .expect("bad hostname"),
-                                shard.listen_grpc_port.expect("no gRPC port"),
-                            )
-                        } else {
-                            (
-                                PageserverProtocol::Libpq,
-                                Host::parse(&shard.listen_pg_addr).expect("bad hostname"),
-                                shard.listen_pg_port,
-                            )
-                        };
-
-                        anyhow::Ok(pageserver)
+                        anyhow::Ok((
+                            Host::parse(&shard.listen_pg_addr)
+                                .expect("Storage controller reported bad hostname"),
+                            shard.listen_pg_port,
+                        ))
                    }),
                )
                .await?;
@@ -1560,17 +1536,11 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
                .get(endpoint_id.as_str())
                .with_context(|| format!("postgres endpoint {endpoint_id} is not found"))?;
            let pageservers = if let Some(ps_id) = args.endpoint_pageserver_id {
-                let conf = env.get_pageserver_conf(ps_id)?;
-                // Use gRPC if requested.
-                let (protocol, host, port) = if args.grpc {
-                    let grpc_addr = conf.listen_grpc_addr.as_ref().expect("bad config");
-                    let (host, port) = parse_host_port(grpc_addr).expect("bad config");
-                    (PageserverProtocol::Grpc, host, port.unwrap_or(51051))
-                } else {
-                    let (host, port) = parse_host_port(&conf.listen_pg_addr).expect("bad config");
-                    (PageserverProtocol::Libpq, host, port.unwrap_or(5432))
-                };
-                vec![(protocol, host, port)]
+                let pageserver = PageServerNode::from_env(env, env.get_pageserver_conf(ps_id)?);
+                vec![(
+                    pageserver.pg_connection_config.host().clone(),
+                    pageserver.pg_connection_config.port(),
+                )]
            } else {
                let storage_controller = StorageController::from_env(env);
                storage_controller
@@ -1579,20 +1549,11 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
                    .shards
                    .into_iter()
                    .map(|shard| {
-                        if args.grpc {
-                            (
-                                PageserverProtocol::Grpc,
-                                Host::parse(&shard.listen_grpc_addr.expect("no gRPC addr"))
-                                    .expect("bad hostname"),
-                                shard.listen_grpc_port.expect("no gRPC port"),
-                            )
-                        } else {
-                            (
-                                PageserverProtocol::Libpq,
-                                Host::parse(&shard.listen_pg_addr).expect("bad hostname"),
-                                shard.listen_pg_port,
-                            )
-                        }
+                        (
+                            Host::parse(&shard.listen_pg_addr)
+                                .expect("Storage controller reported malformed host"),
+                            shard.listen_pg_port,
+                        )
                    })
                    .collect::<Vec<_>>()
            };
--- a/control_plane/src/endpoint.rs
+++ b/control_plane/src/endpoint.rs
@@ -37,7 +37,6 @@
 //! ```
 //!
 use std::collections::BTreeMap;
-use std::fmt::Display;
 use std::net::{IpAddr, Ipv4Addr, SocketAddr, TcpStream};
 use std::path::PathBuf;
 use std::process::Command;
@@ -75,6 +74,7 @@ use utils::id::{NodeId, TenantId, TimelineId};

 use crate::local_env::LocalEnv;
 use crate::postgresql_conf::PostgresConf;
+use crate::storage_controller::StorageController;

 // contents of a endpoint.json file
 #[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
@@ -331,7 +331,7 @@ pub enum EndpointStatus {
    RunningNoPidfile,
 }

-impl Display for EndpointStatus {
+impl std::fmt::Display for EndpointStatus {
    fn fmt(&self, writer: &mut std::fmt::Formatter) -> std::fmt::Result {
        let s = match self {
            Self::Running => "running",
@@ -343,28 +343,6 @@ impl Display for EndpointStatus {
    }
 }

-#[derive(Clone, Copy, Debug)]
-pub enum PageserverProtocol {
-    Libpq,
-    Grpc,
-}
-
-impl PageserverProtocol {
-    /// Returns the URL scheme for the protocol, used in connstrings.
-    pub fn scheme(&self) -> &'static str {
-        match self {
-            Self::Libpq => "postgresql",
-            Self::Grpc => "grpc",
-        }
-    }
-}
-
-impl Display for PageserverProtocol {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        f.write_str(self.scheme())
-    }
-}
-
 impl Endpoint {
    fn from_dir_entry(entry: std::fs::DirEntry, env: &LocalEnv) -> Result<Endpoint> {
        if !entry.file_type()?.is_dir() {
@@ -628,10 +606,10 @@ impl Endpoint {
        }
    }

-    fn build_pageserver_connstr(pageservers: &[(PageserverProtocol, Host, u16)]) -> String {
+    fn build_pageserver_connstr(pageservers: &[(Host, u16)]) -> String {
        pageservers
            .iter()
-            .map(|(scheme, host, port)| format!("{scheme}://no_user@{host}:{port}"))
+            .map(|(host, port)| format!("postgresql://no_user@{host}:{port}"))
            .collect::<Vec<_>>()
            .join(",")
    }
@@ -676,7 +654,7 @@ impl Endpoint {
        endpoint_storage_addr: String,
        safekeepers_generation: Option<SafekeeperGeneration>,
        safekeepers: Vec<NodeId>,
-        pageservers: Vec<(PageserverProtocol, Host, u16)>,
+        pageservers: Vec<(Host, u16)>,
        remote_ext_base_url: Option<&String>,
        shard_stripe_size: usize,
        create_test_user: bool,
@@ -961,12 +939,10 @@ impl Endpoint {

    pub async fn reconfigure(
        &self,
-        pageservers: Vec<(PageserverProtocol, Host, u16)>,
+        mut pageservers: Vec<(Host, u16)>,
        stripe_size: Option<ShardStripeSize>,
        safekeepers: Option<Vec<NodeId>>,
    ) -> Result<()> {
-        anyhow::ensure!(!pageservers.is_empty(), "no pageservers provided");
-
        let (mut spec, compute_ctl_config) = {
            let config_path = self.endpoint_path().join("config.json");
            let file = std::fs::File::open(config_path)?;
@@ -978,7 +954,25 @@ impl Endpoint {
        let postgresql_conf = self.read_postgresql_conf()?;
        spec.cluster.postgresql_conf = Some(postgresql_conf);

+        // If we weren't given explicit pageservers, query the storage controller
+        if pageservers.is_empty() {
+            let storage_controller = StorageController::from_env(&self.env);
+            let locate_result = storage_controller.tenant_locate(self.tenant_id).await?;
+            pageservers = locate_result
+                .shards
+                .into_iter()
+                .map(|shard| {
+                    (
+                        Host::parse(&shard.listen_pg_addr)
+                            .expect("Storage controller reported bad hostname"),
+                        shard.listen_pg_port,
+                    )
+                })
+                .collect::<Vec<_>>();
+        }
+
        let pageserver_connstr = Self::build_pageserver_connstr(&pageservers);
+        assert!(!pageserver_connstr.is_empty());
        spec.pageserver_connstring = Some(pageserver_connstr);
        if stripe_size.is_some() {
            spec.shard_stripe_size = stripe_size.map(|s| s.0 as usize);
--- a/control_plane/src/pageserver.rs
+++ b/control_plane/src/pageserver.rs
@@ -265,14 +265,6 @@ impl PageServerNode {
            None => None,
        };

-        let mut grpc_host = None;
-        let mut grpc_port = None;
-        if let Some(grpc_addr) = &self.conf.listen_grpc_addr {
-            let (_, port) = parse_host_port(grpc_addr).expect("Unable to parse listen_grpc_addr");
-            grpc_host = Some("localhost".to_string());
-            grpc_port = Some(port.unwrap_or(51051));
-        }
-
        // Intentionally hand-craft JSON: this acts as an implicit format compat test
        // in case the pageserver-side structure is edited, and reflects the real life
        // situation: the metadata is written by some other script.
@@ -281,8 +273,6 @@ impl PageServerNode {
            serde_json::to_vec(&pageserver_api::config::NodeMetadata {
                postgres_host: "localhost".to_string(),
                postgres_port: self.pg_connection_config.port(),
-                grpc_host,
-                grpc_port,
                http_host: "localhost".to_string(),
                http_port,
                https_port,
--- a/control_plane/storcon_cli/src/main.rs
+++ b/control_plane/storcon_cli/src/main.rs
@@ -37,11 +37,6 @@ enum Command {
        #[arg(long)]
        listen_pg_port: u16,

-        #[arg(long)]
-        listen_grpc_addr: Option<String>,
-        #[arg(long)]
-        listen_grpc_port: Option<u16>,
-
        #[arg(long)]
        listen_http_addr: String,
        #[arg(long)]
@@ -415,8 +410,6 @@ async fn main() -> anyhow::Result<()> {
            node_id,
            listen_pg_addr,
            listen_pg_port,
-            listen_grpc_addr,
-            listen_grpc_port,
            listen_http_addr,
            listen_http_port,
            listen_https_port,
@@ -430,8 +423,6 @@ async fn main() -> anyhow::Result<()> {
                        node_id,
                        listen_pg_addr,
                        listen_pg_port,
-                        listen_grpc_addr,
-                        listen_grpc_port,
                        listen_http_addr,
                        listen_http_port,
                        listen_https_port,
--- a/docker-compose/ext-src/postgis-src/neon-test.sh
+++ b/docker-compose/ext-src/postgis-src/neon-test.sh
@@ -1,9 +1,6 @@
-#!/bin/bash
+#!/bin/sh
 set -ex
 cd "$(dirname "$0")"
-if [[ ${PG_VERSION} = v17 ]]; then
-  sed -i '/computed_columns/d' regress/core/tests.mk
-fi
-patch -p1 <postgis-no-upgrade-test.patch
-trap 'echo Cleaning up; patch -R -p1 <postgis-no-upgrade-test.patch' EXIT
+patch -p1 <"postgis-common-${PG_VERSION}.patch"
+trap 'echo Cleaning up; patch -R -p1 <postgis-common-${PG_VERSION}.patch' EXIT
 make installcheck-base
--- a/docker-compose/ext-src/postgis-src/postgis-no-upgrade-test.patch
+++ b/docker-compose/ext-src/postgis-src/postgis-no-upgrade-test.patch
@@ -1,3 +1,19 @@
+diff --git a/regress/core/tests.mk b/regress/core/tests.mk
+index 3abd7bc..64a9254 100644
+--- a/regress/core/tests.mk
+++ b/regress/core/tests.mk
+@@ -144,11 +144,6 @@ TESTS_SLOW = \
+ 	$(top_srcdir)/regress/core/concave_hull_hard \
+ 	$(top_srcdir)/regress/core/knn_recheck
+ 
+-ifeq ($(shell expr "$(POSTGIS_PGSQL_VERSION)" ">=" 120),1)
+-	TESTS += \
+-		$(top_srcdir)/regress/core/computed_columns
+-endif
+-
+ ifeq ($(shell expr "$(POSTGIS_GEOS_VERSION)" ">=" 30700),1)
+ 	# GEOS-3.7 adds:
+ 	# ST_FrechetDistance
 diff --git a/regress/runtest.mk b/regress/runtest.mk
 index c051f03..010e493 100644
 --- a/regress/runtest.mk
--- a/docker-compose/ext-src/postgis-src/postgis-common-v17.patch
+++ b/docker-compose/ext-src/postgis-src/postgis-common-v17.patch
@@ -0,0 +1,35 @@
+diff --git a/regress/core/tests.mk b/regress/core/tests.mk
+index 9e05244..90987df 100644
+--- a/regress/core/tests.mk
+++ b/regress/core/tests.mk
+@@ -143,8 +143,7 @@ TESTS += \
+ 	$(top_srcdir)/regress/core/oriented_envelope \
+ 	$(top_srcdir)/regress/core/point_coordinates \
+ 	$(top_srcdir)/regress/core/out_geojson \
+-  $(top_srcdir)/regress/core/wrapx \
+-	$(top_srcdir)/regress/core/computed_columns
+  $(top_srcdir)/regress/core/wrapx
+ 
+ # Slow slow tests
+ TESTS_SLOW = \
+diff --git a/regress/runtest.mk b/regress/runtest.mk
+index 4b95b7e..449d5a2 100644
+--- a/regress/runtest.mk
+++ b/regress/runtest.mk
+@@ -24,16 +24,6 @@ check-regress:
+ 
+ 	@POSTGIS_TOP_BUILD_DIR=$(abs_top_builddir) $(PERL) $(top_srcdir)/regress/run_test.pl $(RUNTESTFLAGS) $(RUNTESTFLAGS_INTERNAL) $(TESTS)
+ 
+-	@if echo "$(RUNTESTFLAGS)" | grep -vq -- --upgrade; then \
+-		echo "Running upgrade test as RUNTESTFLAGS did not contain that"; \
+-		POSTGIS_TOP_BUILD_DIR=$(abs_top_builddir) $(PERL) $(top_srcdir)/regress/run_test.pl \
+-      --upgrade \
+-      $(RUNTESTFLAGS) \
+-      $(RUNTESTFLAGS_INTERNAL) \
+-      $(TESTS); \
+-	else \
+-		echo "Skipping upgrade test as RUNTESTFLAGS already requested upgrades"; \
+-	fi
+ 
+ check-long:
+ 	$(PERL) $(top_srcdir)/regress/run_test.pl $(RUNTESTFLAGS) $(TESTS) $(TESTS_SLOW)
--- a/docker-compose/ext-src/postgis-src/postgis-regular-v16.patch
+++ b/docker-compose/ext-src/postgis-src/postgis-regular-v16.patch
@@ -125,7 +125,7 @@ index 7a36b65..ad78fc7 100644
 DROP SCHEMA tm CASCADE;
 +
 diff --git a/regress/core/tests.mk b/regress/core/tests.mk
-index 3abd7bc..94903c3 100644
+index 64a9254..94903c3 100644
 --- a/regress/core/tests.mk
 +++ b/regress/core/tests.mk
@@ -23,7 +23,6 @@ current_dir := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
@@ -160,18 +160,6 @@ index 3abd7bc..94903c3 100644
 	$(top_srcdir)/regress/core/wkb \
 	$(top_srcdir)/regress/core/wkt \
 	$(top_srcdir)/regress/core/wmsservers \
-@@ -144,11 +140,6 @@ TESTS_SLOW = \
- 	$(top_srcdir)/regress/core/concave_hull_hard \
- 	$(top_srcdir)/regress/core/knn_recheck
- 
-ifeq ($(shell expr "$(POSTGIS_PGSQL_VERSION)" ">=" 120),1)
-	TESTS += \
-		$(top_srcdir)/regress/core/computed_columns
-endif
-
- ifeq ($(shell expr "$(POSTGIS_GEOS_VERSION)" ">=" 30700),1)
- 	# GEOS-3.7 adds:
- 	# ST_FrechetDistance
 diff --git a/regress/loader/tests.mk b/regress/loader/tests.mk
 index 1fc77ac..c3cb9de 100644
 --- a/regress/loader/tests.mk
--- a/docker-compose/ext-src/postgis-src/postgis-regular-v17.patch
+++ b/docker-compose/ext-src/postgis-src/postgis-regular-v17.patch
@@ -125,7 +125,7 @@ index 7a36b65..ad78fc7 100644
 DROP SCHEMA tm CASCADE;
 +
 diff --git a/regress/core/tests.mk b/regress/core/tests.mk
-index 9e05244..a63a3e1 100644
+index 90987df..74fe3f1 100644
 --- a/regress/core/tests.mk
 +++ b/regress/core/tests.mk
@@ -16,14 +16,13 @@ POSTGIS_PGSQL_VERSION=170
@@ -168,16 +168,6 @@ index 9e05244..a63a3e1 100644
 	$(top_srcdir)/regress/core/wkb \
 	$(top_srcdir)/regress/core/wkt \
 	$(top_srcdir)/regress/core/wmsservers \
-@@ -143,8 +139,7 @@ TESTS += \
- 	$(top_srcdir)/regress/core/oriented_envelope \
- 	$(top_srcdir)/regress/core/point_coordinates \
- 	$(top_srcdir)/regress/core/out_geojson \
-  $(top_srcdir)/regress/core/wrapx \
-	$(top_srcdir)/regress/core/computed_columns
-+  $(top_srcdir)/regress/core/wrapx 
- 
- # Slow slow tests
- TESTS_SLOW = \
 diff --git a/regress/loader/tests.mk b/regress/loader/tests.mk
 index ac4f8ad..4bad4fc 100644
 --- a/regress/loader/tests.mk
--- a/docker-compose/ext-src/postgis-src/regular-test.sh
+++ b/docker-compose/ext-src/postgis-src/regular-test.sh
@@ -10,8 +10,8 @@ psql -d contrib_regression -c "ALTER DATABASE contrib_regression SET TimeZone='U
     -c "CREATE EXTENSION postgis_tiger_geocoder CASCADE" \
     -c "CREATE EXTENSION postgis_raster SCHEMA public" \
     -c "CREATE EXTENSION postgis_sfcgal SCHEMA public"
-patch -p1 <postgis-no-upgrade-test.patch
+patch -p1 <"postgis-common-${PG_VERSION}.patch"
 patch -p1 <"postgis-regular-${PG_VERSION}.patch"
 psql -d contrib_regression -f raster_outdb_template.sql
-trap 'patch -R -p1 <postgis-no-upgrade-test.patch && patch -R -p1 <"postgis-regular-${PG_VERSION}.patch"' EXIT
+trap 'patch -R -p1 <postgis-regular-${PG_VERSION}.patch && patch -R -p1 <"postgis-common-${PG_VERSION}.patch"' EXIT
 POSTGIS_REGRESS_DB=contrib_regression RUNTESTFLAGS=--nocreate make installcheck-base
--- a/libs/compute_api/src/spec.rs
+++ b/libs/compute_api/src/spec.rs
@@ -4,7 +4,6 @@
 //! provide it by calling the compute_ctl's `/compute_ctl` endpoint, or
 //! compute_ctl can fetch it by calling the control plane's API.
 use std::collections::HashMap;
-use std::fmt::Display;

 use indexmap::IndexMap;
 use regex::Regex;
@@ -320,12 +319,6 @@ impl ComputeMode {
    }
 }

-impl Display for ComputeMode {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        f.write_str(self.to_type_str())
-    }
-}
-
 /// Log level for audit logging
 #[derive(Clone, Debug, Default, Eq, PartialEq, Deserialize, Serialize)]
 pub enum ComputeAudit {
--- a/libs/neon-shmem/Cargo.toml
+++ b/libs/neon-shmem/Cargo.toml
@@ -7,7 +7,6 @@ license.workspace = true
 [dependencies]
 thiserror.workspace = true
 nix.workspace = true
-spin.workspace = true
 workspace_hack = { version = "0.1", path = "../../workspace_hack" }

 [dev-dependencies]
--- a/libs/neon-shmem/src/hash.rs
+++ b/libs/neon-shmem/src/hash.rs
@@ -4,30 +4,23 @@
 //!
 //! [X] Accessible from both Postgres processes and rust threads in the communicator process
 //! [X] Low latency
-//! [ ] Scalable to lots of concurrent accesses (currently uses a single spinlock)
+//! [ ] Scalable to lots of concurrent accesses (currently relies on caller for locking)
 //! [ ] Resizable

 use std::fmt::Debug;
-use std::hash::Hash;
+use std::hash::{DefaultHasher, Hash, Hasher};
 use std::mem::MaybeUninit;
-use std::ops::Deref;

 use crate::shmem::ShmemHandle;

-use spin;
-
 mod core;
+pub mod entry;

 #[cfg(test)]
 mod tests;

 use core::CoreHashMap;
-
-pub enum UpdateAction<V> {
-    Nothing,
-    Insert(V),
-    Remove,
-}
+use entry::{Entry, OccupiedEntry};

 #[derive(Debug)]
 pub struct OutOfMemoryError();
@@ -60,9 +53,19 @@ impl<'a, K, V> HashMapInit<'a, K, V> {
    }
 }

-// This is stored in the shared memory area
+/// This is stored in the shared memory area
+///
+/// NOTE: We carve out the parts from a contiguous chunk. Growing and shrinking the hash table
+/// relies on the memory layout! The data structures are laid out in the contiguous shared memory
+/// area as follows:
+///
+/// HashMapShared
+/// [buckets]
+/// [dictionary]
+///
+/// In between the above parts, there can be padding bytes to align the parts correctly.
 struct HashMapShared<'a, K, V> {
-    inner: spin::RwLock<CoreHashMap<'a, K, V>>,
+    inner: CoreHashMap<'a, K, V>,
 }

 impl<'a, K, V> HashMapInit<'a, K, V>
@@ -98,26 +101,33 @@ where
        area_ptr: *mut u8,
        area_len: usize,
    ) -> HashMapInit<'a, K, V> {
-        // carve out HashMapShared from the area. This does not include the hashmap's dictionary
-        // and buckets.
+        // carve out the HashMapShared struct from the area.
        let mut ptr: *mut u8 = area_ptr;
+        let end_ptr: *mut u8 = unsafe { area_ptr.add(area_len) };
        ptr = unsafe { ptr.add(ptr.align_offset(align_of::<HashMapShared<K, V>>())) };
        let shared_ptr: *mut HashMapShared<K, V> = ptr.cast();
        ptr = unsafe { ptr.add(size_of::<HashMapShared<K, V>>()) };

-        // the rest of the space is given to the hash map's dictionary and buckets
-        let remaining_area = unsafe {
-            std::slice::from_raw_parts_mut(ptr, area_len - ptr.offset_from(area_ptr) as usize)
-        };
+        // carve out the buckets
+        ptr = unsafe { ptr.byte_add(ptr.align_offset(align_of::<core::Bucket<K, V>>())) };
+        let buckets_ptr = ptr;
+        ptr = unsafe { ptr.add(size_of::<core::Bucket<K, V>>() * num_buckets as usize) };

-        let hashmap = CoreHashMap::new(num_buckets, remaining_area);
+        // use remaining space for the dictionary
+        ptr = unsafe { ptr.byte_add(ptr.align_offset(align_of::<u32>())) };
+        assert!(ptr.addr() < end_ptr.addr());
+        let dictionary_ptr = ptr;
+        let dictionary_size = unsafe { end_ptr.byte_offset_from(ptr) / size_of::<u32>() as isize };
+        assert!(dictionary_size > 0);
+
+        let buckets =
+            unsafe { std::slice::from_raw_parts_mut(buckets_ptr.cast(), num_buckets as usize) };
+        let dictionary = unsafe {
+            std::slice::from_raw_parts_mut(dictionary_ptr.cast(), dictionary_size as usize)
+        };
+        let hashmap = CoreHashMap::new(buckets, dictionary);
        unsafe {
-            std::ptr::write(
-                shared_ptr,
-                HashMapShared {
-                    inner: spin::RwLock::new(hashmap),
-                },
-            );
+            std::ptr::write(shared_ptr, HashMapShared { inner: hashmap });
        }

        HashMapInit {
@@ -131,132 +141,73 @@ impl<'a, K, V> HashMapAccess<'a, K, V>
 where
    K: Clone + Hash + Eq,
 {
-    pub fn get<'e>(&'e self, key: &K) -> Option<ValueReadGuard<'e, K, V>> {
+    pub fn get_hash_value(&self, key: &K) -> u64 {
+        let mut hasher = DefaultHasher::new();
+        key.hash(&mut hasher);
+        hasher.finish()
+    }
+
+    pub fn get_with_hash<'e>(&'e self, key: &K, hash: u64) -> Option<&'e V> {
        let map = unsafe { self.shared_ptr.as_ref() }.unwrap();
-        let lock_guard = map.inner.read();

-        match lock_guard.get(key) {
-            None => None,
-            Some(val_ref) => {
-                let val_ptr = std::ptr::from_ref(val_ref);
-                Some(ValueReadGuard {
-                    _lock_guard: lock_guard,
-                    value: val_ptr,
-                })
-            }
-        }
+        map.inner.get_with_hash(key, hash)
    }

-    /// Insert a value
-    pub fn insert(&self, key: &K, value: V) -> Result<bool, OutOfMemoryError> {
-        let mut success = None;
+    pub fn entry_with_hash(&mut self, key: K, hash: u64) -> Entry<'a, '_, K, V> {
+        let map = unsafe { self.shared_ptr.as_mut() }.unwrap();

-        self.update_with_fn(key, |existing| {
-            if let Some(_) = existing {
-                success = Some(false);
-                UpdateAction::Nothing
-            } else {
-                success = Some(true);
-                UpdateAction::Insert(value)
-            }
-        })?;
-        Ok(success.expect("value_fn not called"))
+        map.inner.entry_with_hash(key, hash)
    }

-    /// Remove value. Returns true if it existed
-    pub fn remove(&self, key: &K) -> bool {
-        let mut result = false;
-        self.update_with_fn(key, |existing| match existing {
-            Some(_) => {
-                result = true;
-                UpdateAction::Remove
+    pub fn remove_with_hash(&mut self, key: &K, hash: u64) {
+        let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
+
+        match map.inner.entry_with_hash(key.clone(), hash) {
+            Entry::Occupied(e) => {
+                e.remove();
            }
-            None => UpdateAction::Nothing,
-        })
-        .expect("out of memory while removing");
-        result
+            Entry::Vacant(_) => {}
+        };
    }

-    /// Update key using the given function. All the other modifying operations are based on this.
-    pub fn update_with_fn<F>(&self, key: &K, value_fn: F) -> Result<(), OutOfMemoryError>
-    where
-        F: FnOnce(Option<&V>) -> UpdateAction<V>,
-    {
-        let map = unsafe { self.shared_ptr.as_ref() }.unwrap();
-        let mut lock_guard = map.inner.write();
-
-        let old_val = lock_guard.get(key);
-        let action = value_fn(old_val);
-        match (old_val, action) {
-            (_, UpdateAction::Nothing) => {}
-            (_, UpdateAction::Insert(new_val)) => {
-                let _ = lock_guard.insert(key, new_val);
-            }
-            (None, UpdateAction::Remove) => panic!("Remove action with no old value"),
-            (Some(_), UpdateAction::Remove) => {
-                let _ = lock_guard.remove(key);
-            }
-        }
-
-        Ok(())
-    }
-
-    /// Update key using the given function. All the other modifying operations are based on this.
-    pub fn update_with_fn_at_bucket<F>(
-        &self,
-        pos: usize,
-        value_fn: F,
-    ) -> Result<(), OutOfMemoryError>
-    where
-        F: FnOnce(Option<&V>) -> UpdateAction<V>,
-    {
-        let map = unsafe { self.shared_ptr.as_ref() }.unwrap();
-        let mut lock_guard = map.inner.write();
-
-        let old_val = lock_guard.get_bucket(pos);
-        let action = value_fn(old_val.map(|(_k, v)| v));
-        match (old_val, action) {
-            (_, UpdateAction::Nothing) => {}
-            (_, UpdateAction::Insert(_new_val)) => panic!("cannot insert without key"),
-            (None, UpdateAction::Remove) => panic!("Remove action with no old value"),
-            (Some((key, _value)), UpdateAction::Remove) => {
-                let key = key.clone();
-                let _ = lock_guard.remove(&key);
-            }
-        }
-
-        Ok(())
+    pub fn entry_at_bucket(&mut self, pos: usize) -> Option<OccupiedEntry<'a, '_, K, V>> {
+        let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
+        map.inner.entry_at_bucket(pos)
    }

    pub fn get_num_buckets(&self) -> usize {
        let map = unsafe { self.shared_ptr.as_ref() }.unwrap();
-        map.inner.read().get_num_buckets()
+        map.inner.get_num_buckets()
    }

    /// Return the key and value stored in bucket with given index. This can be used to
    /// iterate through the hash map. (An Iterator might be nicer. The communicator's
    /// clock algorithm needs to _slowly_ iterate through all buckets with its clock hand,
    /// without holding a lock. If we switch to an Iterator, it must not hold the lock.)
-    pub fn get_bucket<'e>(&'e self, pos: usize) -> Option<ValueReadGuard<'e, K, V>> {
+    pub fn get_at_bucket(&self, pos: usize) -> Option<&(K, V)> {
        let map = unsafe { self.shared_ptr.as_ref() }.unwrap();
-        let lock_guard = map.inner.read();

-        match lock_guard.get_bucket(pos) {
-            None => None,
-            Some((_key, val_ref)) => {
-                let val_ptr = std::ptr::from_ref(val_ref);
-                Some(ValueReadGuard {
-                    _lock_guard: lock_guard,
-                    value: val_ptr,
-                })
-            }
+        if pos >= map.inner.buckets.len() {
+            return None;
        }
+        let bucket = &map.inner.buckets[pos];
+        bucket.inner.as_ref()
+    }
+
+    pub fn get_bucket_for_value(&self, val_ptr: *const V) -> usize {
+        let map = unsafe { self.shared_ptr.as_ref() }.unwrap();
+
+        let origin = map.inner.buckets.as_ptr();
+        let idx = (val_ptr as usize - origin as usize) / (size_of::<V>() as usize);
+        assert!(idx < map.inner.buckets.len());
+
+        idx
    }

    // for metrics
    pub fn get_num_buckets_in_use(&self) -> usize {
        let map = unsafe { self.shared_ptr.as_ref() }.unwrap();
-        map.inner.read().buckets_in_use as usize
+        map.inner.buckets_in_use as usize
    }

    /// Grow
@@ -264,10 +215,9 @@ where
    /// 1. grow the underlying shared memory area
    /// 2. Initialize new buckets. This overwrites the current dictionary
    /// 3. Recalculate the dictionary
-    pub fn grow(&self, num_buckets: u32) -> Result<(), crate::shmem::Error> {
-        let map = unsafe { self.shared_ptr.as_ref() }.unwrap();
-        let mut lock_guard = map.inner.write();
-        let inner = &mut *lock_guard;
+    pub fn grow(&mut self, num_buckets: u32) -> Result<(), crate::shmem::Error> {
+        let map = unsafe { self.shared_ptr.as_mut() }.unwrap();
+        let inner = &mut map.inner;
        let old_num_buckets = inner.buckets.len() as u32;

        if num_buckets < old_num_buckets {
@@ -292,7 +242,6 @@ where
            for i in old_num_buckets..num_buckets {
                let bucket_ptr = buckets_ptr.add(i as usize);
                bucket_ptr.write(core::Bucket {
-                    hash: 0,
                    next: if i < num_buckets {
                        i as u32 + 1
                    } else {
@@ -325,7 +274,12 @@ where
            if buckets[i].inner.is_none() {
                continue;
            }
-            let pos: usize = (buckets[i].hash % dictionary.len() as u64) as usize;
+
+            let mut hasher = DefaultHasher::new();
+            buckets[i].inner.as_ref().unwrap().0.hash(&mut hasher);
+            let hash = hasher.finish();
+
+            let pos: usize = (hash % dictionary.len() as u64) as usize;
            buckets[i].next = dictionary[pos];
            dictionary[pos] = i as u32;
        }
@@ -348,19 +302,3 @@ where
    // 3. Finally, call finish_shrink(). This recomputes the dictionary and shrinks the underlying
    //    shmem area
 }
-
-pub struct ValueReadGuard<'a, K, V> {
-    _lock_guard: spin::RwLockReadGuard<'a, CoreHashMap<'a, K, V>>,
-    value: *const V,
-}
-
-impl<'a, K, V> Deref for ValueReadGuard<'a, K, V> {
-    type Target = V;
-
-    fn deref(&self) -> &Self::Target {
-        // SAFETY: The `lock_guard` ensures that the underlying map (and thus the value pointed to
-        // by `value`) remains valid for the lifetime `'a`. The `value` has been obtained from a
-        // valid reference within the map.
-        unsafe { &*self.value }
-    }
-}
--- a/libs/neon-shmem/src/hash/core.rs
+++ b/libs/neon-shmem/src/hash/core.rs
@@ -3,14 +3,15 @@
 //! # Resizing
 //!

-use std::hash::{DefaultHasher, Hash, Hasher};
+use std::hash::Hash;
 use std::mem::MaybeUninit;

+use crate::hash::entry::{Entry, OccupiedEntry, PrevPos, VacantEntry};
+
 pub(crate) const INVALID_POS: u32 = u32::MAX;

 // Bucket
 pub(crate) struct Bucket<K, V> {
-    pub(crate) hash: u64,
    pub(crate) next: u32,
    pub(crate) inner: Option<(K, V)>,
 }
@@ -20,13 +21,16 @@ pub(crate) struct CoreHashMap<'a, K, V> {
    pub(crate) buckets: &'a mut [Bucket<K, V>],
    pub(crate) free_head: u32,

+    pub(crate) _user_list_head: u32,
+
    // metrics
    pub(crate) buckets_in_use: u32,
 }

+#[derive(Debug)]
 pub struct FullError();

-impl<'a, K, V> CoreHashMap<'a, K, V>
+impl<'a, K: Hash + Eq, V> CoreHashMap<'a, K, V>
 where
    K: Clone + Hash + Eq,
 {
@@ -45,58 +49,32 @@ where
        size
    }

-    pub fn new(num_buckets: u32, area: &'a mut [u8]) -> CoreHashMap<'a, K, V> {
-        let len = area.len();
-
-        let mut ptr: *mut u8 = area.as_mut_ptr();
-        let end_ptr: *mut u8 = unsafe { area.as_mut_ptr().add(len) };
-
-        // carve out the buckets
-        ptr = unsafe { ptr.byte_add(ptr.align_offset(align_of::<Bucket<K, V>>())) };
-        let buckets_ptr = ptr;
-        ptr = unsafe { ptr.add(size_of::<Bucket<K, V>>() * num_buckets as usize) };
-
-        // use remaining space for the dictionary
-        ptr = unsafe { ptr.byte_add(ptr.align_offset(align_of::<u32>())) };
-        let dictionary_ptr = ptr;
-
-        assert!(ptr.addr() < end_ptr.addr());
-        let dictionary_size = unsafe { end_ptr.byte_offset_from(ptr) / size_of::<u32>() as isize };
-        assert!(dictionary_size > 0);
-
+    pub fn new(
+        buckets: &'a mut [MaybeUninit<Bucket<K, V>>],
+        dictionary: &'a mut [MaybeUninit<u32>],
+    ) -> CoreHashMap<'a, K, V> {
        // Initialize the buckets
-        let buckets = {
-            let buckets_ptr: *mut MaybeUninit<Bucket<K, V>> = buckets_ptr.cast();
-            let buckets =
-                unsafe { std::slice::from_raw_parts_mut(buckets_ptr, num_buckets as usize) };
-            for i in 0..buckets.len() {
-                buckets[i].write(Bucket {
-                    hash: 0,
-                    next: if i < buckets.len() - 1 {
-                        i as u32 + 1
-                    } else {
-                        INVALID_POS
-                    },
-                    inner: None,
-                });
-            }
-            // TODO: use std::slice::assume_init_mut() once it stabilizes
-            unsafe { std::slice::from_raw_parts_mut(buckets_ptr.cast(), num_buckets as usize) }
-        };
+        for i in 0..buckets.len() {
+            buckets[i].write(Bucket {
+                next: if i < buckets.len() - 1 {
+                    i as u32 + 1
+                } else {
+                    INVALID_POS
+                },
+                inner: None,
+            });
+        }

        // Initialize the dictionary
-        let dictionary = {
-            let dictionary_ptr: *mut MaybeUninit<u32> = dictionary_ptr.cast();
-            let dictionary =
-                unsafe { std::slice::from_raw_parts_mut(dictionary_ptr, dictionary_size as usize) };
+        for i in 0..dictionary.len() {
+            dictionary[i].write(INVALID_POS);
+        }

-            for i in 0..dictionary.len() {
-                dictionary[i].write(INVALID_POS);
-            }
-            // TODO: use std::slice::assume_init_mut() once it stabilizes
-            unsafe {
-                std::slice::from_raw_parts_mut(dictionary_ptr.cast(), dictionary_size as usize)
-            }
+        // TODO: use std::slice::assume_init_mut() once it stabilizes
+        let buckets =
+            unsafe { std::slice::from_raw_parts_mut(buckets.as_mut_ptr().cast(), buckets.len()) };
+        let dictionary = unsafe {
+            std::slice::from_raw_parts_mut(dictionary.as_mut_ptr().cast(), dictionary.len())
        };

        CoreHashMap {
@@ -104,14 +82,11 @@ where
            buckets,
            free_head: 0,
            buckets_in_use: 0,
+            _user_list_head: INVALID_POS,
        }
    }

-    pub fn get(&self, key: &K) -> Option<&V> {
-        let mut hasher = DefaultHasher::new();
-        key.hash(&mut hasher);
-        let hash = hasher.finish();
-
+    pub fn get_with_hash(&self, key: &K, hash: u64) -> Option<&V> {
        let mut next = self.dictionary[hash as usize % self.dictionary.len()];
        loop {
            if next == INVALID_POS {
@@ -127,77 +102,43 @@ where
        }
    }

-    pub fn insert(&mut self, key: &K, value: V) -> Result<(), FullError> {
-        let mut hasher = DefaultHasher::new();
-        key.hash(&mut hasher);
-        let hash = hasher.finish();
-
-        let first = self.dictionary[hash as usize % self.dictionary.len()];
+    // all updates are done through Entry
+    pub fn entry_with_hash(&mut self, key: K, hash: u64) -> Entry<'a, '_, K, V> {
+        let dict_pos = hash as usize % self.dictionary.len();
+        let first = self.dictionary[dict_pos];
        if first == INVALID_POS {
            // no existing entry
-            let pos = self.alloc_bucket(key.clone(), value, hash)?;
-            if pos == INVALID_POS {
-                return Err(FullError());
-            }
-            self.dictionary[hash as usize % self.dictionary.len()] = pos;
-            return Ok(());
+            return Entry::Vacant(VacantEntry {
+                map: self,
+                key,
+                dict_pos: dict_pos as u32,
+            });
        }

+        let mut prev_pos = PrevPos::First(dict_pos as u32);
        let mut next = first;
        loop {
            let bucket = &mut self.buckets[next as usize];
-            let (bucket_key, bucket_value) = bucket.inner.as_mut().expect("entry is in use");
-            if bucket_key == key {
-                // found existing entry, update its value
-                *bucket_value = value;
-                return Ok(());
+            let (bucket_key, _bucket_value) = bucket.inner.as_mut().expect("entry is in use");
+            if *bucket_key == key {
+                // found existing entry
+                return Entry::Occupied(OccupiedEntry {
+                    map: self,
+                    _key: key,
+                    prev_pos,
+                    bucket_pos: next,
+                });
            }

            if bucket.next == INVALID_POS {
-                // No existing entry found. Append to the chain
-                let pos = self.alloc_bucket(key.clone(), value, hash)?;
-                if pos == INVALID_POS {
-                    return Err(FullError());
-                }
-                self.buckets[next as usize].next = pos;
-                return Ok(());
+                // No existing entry
+                return Entry::Vacant(VacantEntry {
+                    map: self,
+                    key,
+                    dict_pos: dict_pos as u32,
+                });
            }
-            next = bucket.next;
-        }
-    }
-
-    pub fn remove(&mut self, key: &K) -> Result<(), FullError> {
-        let mut hasher = DefaultHasher::new();
-        key.hash(&mut hasher);
-        let hash = hasher.finish();
-
-        let mut next = self.dictionary[hash as usize % self.dictionary.len()];
-        let mut prev_pos: u32 = INVALID_POS;
-        loop {
-            if next == INVALID_POS {
-                // no existing entry
-                return Ok(());
-            }
-            let bucket = &mut self.buckets[next as usize];
-            let (bucket_key, _) = bucket.inner.as_mut().expect("entry is in use");
-            if bucket_key == key {
-                // found existing entry, unlink it from the chain
-                if prev_pos == INVALID_POS {
-                    self.dictionary[hash as usize % self.dictionary.len()] = bucket.next;
-                } else {
-                    self.buckets[prev_pos as usize].next = bucket.next;
-                }
-
-                // and add it to the freelist
-                let bucket = &mut self.buckets[next as usize];
-                bucket.hash = 0;
-                bucket.inner = None;
-                bucket.next = self.free_head;
-                self.free_head = next;
-                self.buckets_in_use -= 1;
-                return Ok(());
-            }
-            prev_pos = next;
+            prev_pos = PrevPos::Chained(next);
            next = bucket.next;
        }
    }
@@ -206,15 +147,16 @@ where
        self.buckets.len()
    }

-    pub fn get_bucket(&self, pos: usize) -> Option<&(K, V)> {
+    pub fn entry_at_bucket(&mut self, pos: usize) -> Option<OccupiedEntry<K, V>> {
        if pos >= self.buckets.len() {
            return None;
        }

-        self.buckets[pos].inner.as_ref()
+        todo!()
+        //self.buckets[pos].inner.as_ref()
    }

-    fn alloc_bucket(&mut self, key: K, value: V, hash: u64) -> Result<u32, FullError> {
+    pub(crate) fn alloc_bucket(&mut self, key: K, value: V) -> Result<u32, FullError> {
        let pos = self.free_head;
        if pos == INVALID_POS {
            return Err(FullError());
@@ -224,7 +166,6 @@ where
        self.free_head = bucket.next;
        self.buckets_in_use += 1;

-        bucket.hash = hash;
        bucket.next = INVALID_POS;
        bucket.inner = Some((key, value));

--- a/libs/neon-shmem/src/hash/entry.rs
+++ b/libs/neon-shmem/src/hash/entry.rs
@@ -0,0 +1,91 @@
+//! Like std::collections::hash_map::Entry;
+
+use crate::hash::core::{CoreHashMap, FullError, INVALID_POS};
+
+use std::hash::Hash;
+use std::mem;
+
+pub enum Entry<'a, 'b, K, V> {
+    Occupied(OccupiedEntry<'a, 'b, K, V>),
+    Vacant(VacantEntry<'a, 'b, K, V>),
+}
+
+pub(crate) enum PrevPos {
+    First(u32),
+    Chained(u32),
+}
+
+pub struct OccupiedEntry<'a, 'b, K, V> {
+    pub(crate) map: &'b mut CoreHashMap<'a, K, V>,
+    pub(crate) _key: K, // The key of the occupied entry
+    pub(crate) prev_pos: PrevPos,
+    pub(crate) bucket_pos: u32, // The position of the bucket in the CoreHashMap's buckets array
+}
+
+impl<'a, 'b, K, V> OccupiedEntry<'a, 'b, K, V> {
+    pub fn get(&self) -> &V {
+        &self.map.buckets[self.bucket_pos as usize]
+            .inner
+            .as_ref()
+            .unwrap()
+            .1
+    }
+
+    pub fn get_mut(&mut self) -> &mut V {
+        &mut self.map.buckets[self.bucket_pos as usize]
+            .inner
+            .as_mut()
+            .unwrap()
+            .1
+    }
+
+    pub fn insert(&mut self, value: V) -> V {
+        let bucket = &mut self.map.buckets[self.bucket_pos as usize];
+        // This assumes inner is Some, which it must be for an OccupiedEntry
+        let old_value = mem::replace(&mut bucket.inner.as_mut().unwrap().1, value);
+        old_value
+    }
+
+    pub fn remove(self) -> V {
+        // CoreHashMap::remove returns Option<(K, V)>. We know it's Some for an OccupiedEntry.
+        let bucket = &mut self.map.buckets[self.bucket_pos as usize];
+
+        // unlink it from the chain
+        match self.prev_pos {
+            PrevPos::First(dict_pos) => self.map.dictionary[dict_pos as usize] = bucket.next,
+            PrevPos::Chained(bucket_pos) => {
+                self.map.buckets[bucket_pos as usize].next = bucket.next
+            }
+        }
+
+        // and add it to the freelist
+        let bucket = &mut self.map.buckets[self.bucket_pos as usize];
+        let old_value = bucket.inner.take();
+        bucket.next = self.map.free_head;
+        self.map.free_head = self.bucket_pos;
+        self.map.buckets_in_use -= 1;
+
+        return old_value.unwrap().1;
+    }
+}
+
+pub struct VacantEntry<'a, 'b, K, V> {
+    pub(crate) map: &'b mut CoreHashMap<'a, K, V>,
+    pub(crate) key: K, // The key to insert
+    pub(crate) dict_pos: u32,
+}
+
+impl<'a, 'b, K: Clone + Hash + Eq, V> VacantEntry<'a, 'b, K, V> {
+    pub fn insert(self, value: V) -> Result<&'b mut V, FullError> {
+        let pos = self.map.alloc_bucket(self.key, value)?;
+        if pos == INVALID_POS {
+            return Err(FullError());
+        }
+        let bucket = &mut self.map.buckets[pos as usize];
+        bucket.next = self.map.dictionary[self.dict_pos as usize];
+        self.map.dictionary[self.dict_pos as usize] = pos;
+
+        let result = &mut self.map.buckets[pos as usize].inner.as_mut().unwrap().1;
+        return Ok(result);
+    }
+}
--- a/libs/neonart/Cargo.toml
+++ b/libs/neonart/Cargo.toml
@@ -1,14 +0,0 @@
-[package]
-name = "neonart"
-version = "0.1.0"
-edition.workspace = true
-license.workspace = true
-
-[dependencies]
-crossbeam-utils.workspace = true
-spin.workspace = true
-tracing.workspace = true
-
-[dev-dependencies]
-rand = "0.9.1"
-rand_distr = "0.5.1"
--- a/libs/neonart/src/algorithm.rs
+++ b/libs/neonart/src/algorithm.rs
@@ -1,594 +0,0 @@
-mod lock_and_version;
-pub(crate) mod node_ptr;
-mod node_ref;
-
-use std::vec::Vec;
-
-use crate::algorithm::lock_and_version::ConcurrentUpdateError;
-use crate::algorithm::node_ptr::MAX_PREFIX_LEN;
-use crate::algorithm::node_ref::{NewNodeRef, NodeRef, ReadLockedNodeRef, WriteLockedNodeRef};
-use crate::allocator::OutOfMemoryError;
-
-use crate::TreeWriteGuard;
-use crate::UpdateAction;
-use crate::allocator::ArtAllocator;
-use crate::epoch::EpochPin;
-use crate::{Key, Value};
-
-pub(crate) type RootPtr<V> = node_ptr::NodePtr<V>;
-
-#[derive(Debug)]
-pub enum ArtError {
-    ConcurrentUpdate, // need to retry
-    OutOfMemory,
-}
-
-impl From<ConcurrentUpdateError> for ArtError {
-    fn from(_: ConcurrentUpdateError) -> ArtError {
-        ArtError::ConcurrentUpdate
-    }
-}
-
-impl From<OutOfMemoryError> for ArtError {
-    fn from(_: OutOfMemoryError) -> ArtError {
-        ArtError::OutOfMemory
-    }
-}
-
-pub fn new_root<V: Value>(
-    allocator: &impl ArtAllocator<V>,
-) -> Result<RootPtr<V>, OutOfMemoryError> {
-    node_ptr::new_root(allocator)
-}
-
-pub(crate) fn search<'e, K: Key, V: Value>(
-    key: &K,
-    root: RootPtr<V>,
-    epoch_pin: &'e EpochPin,
-) -> Option<&'e V> {
-    loop {
-        let root_ref = NodeRef::from_root_ptr(root);
-        if let Ok(result) = lookup_recurse(key.as_bytes(), root_ref, None, epoch_pin) {
-            break result;
-        }
-        // retry
-    }
-}
-
-pub(crate) fn iter_next<'e, V: Value>(
-    key: &[u8],
-    root: RootPtr<V>,
-    epoch_pin: &'e EpochPin,
-) -> Option<(Vec<u8>, &'e V)> {
-    loop {
-        let mut path = Vec::new();
-        let root_ref = NodeRef::from_root_ptr(root);
-
-        match next_recurse(key, &mut path, root_ref, epoch_pin) {
-            Ok(Some(v)) => {
-                assert_eq!(path.len(), key.len());
-                break Some((path, v));
-            }
-            Ok(None) => break None,
-            Err(ConcurrentUpdateError()) => {
-                // retry
-                continue;
-            }
-        }
-    }
-}
-
-pub(crate) fn update_fn<'e, 'g, K: Key, V: Value, A: ArtAllocator<V>, F>(
-    key: &K,
-    value_fn: F,
-    root: RootPtr<V>,
-    guard: &'g mut TreeWriteGuard<'e, K, V, A>,
-) -> Result<(), OutOfMemoryError>
-where
-    F: FnOnce(Option<&V>) -> UpdateAction<V>,
-{
-    let value_fn_cell = std::cell::Cell::new(Some(value_fn));
-    loop {
-        let root_ref = NodeRef::from_root_ptr(root);
-        let this_value_fn = |arg: Option<&V>| value_fn_cell.take().unwrap()(arg);
-        let key_bytes = key.as_bytes();
-
-        match update_recurse(
-            key_bytes,
-            this_value_fn,
-            root_ref,
-            None,
-            None,
-            guard,
-            0,
-            key_bytes,
-        ) {
-            Ok(()) => break Ok(()),
-            Err(ArtError::ConcurrentUpdate) => {
-                continue; // retry
-            }
-            Err(ArtError::OutOfMemory) => break Err(OutOfMemoryError()),
-        }
-    }
-}
-
-// Error means you must retry.
-//
-// This corresponds to the 'lookupOpt' function in the paper
-fn lookup_recurse<'e, V: Value>(
-    key: &[u8],
-    node: NodeRef<'e, V>,
-    parent: Option<ReadLockedNodeRef<V>>,
-    epoch_pin: &'e EpochPin,
-) -> Result<Option<&'e V>, ConcurrentUpdateError> {
-    let rnode = node.read_lock_or_restart()?;
-    if let Some(parent) = parent {
-        parent.read_unlock_or_restart()?;
-    }
-
-    // check if the prefix matches, may increment level
-    let prefix_len = if let Some(prefix_len) = rnode.prefix_matches(key) {
-        prefix_len
-    } else {
-        rnode.read_unlock_or_restart()?;
-        return Ok(None);
-    };
-
-    if rnode.is_leaf() {
-        assert_eq!(key.len(), prefix_len);
-        let vptr = rnode.get_leaf_value_ptr()?;
-        // safety: It's OK to return a ref of the pointer because we checked the version
-        // and the lifetime of 'epoch_pin' enforces that the reference is only accessible
-        // as long as the epoch is pinned.
-        let v = unsafe { vptr.as_ref().unwrap() };
-        return Ok(Some(v));
-    }
-
-    let key = &key[prefix_len..];
-
-    // find child (or leaf value)
-    let next_node = rnode.find_child_or_restart(key[0])?;
-
-    match next_node {
-        None => Ok(None), // key not found
-        Some(child) => lookup_recurse(&key[1..], child, Some(rnode), epoch_pin),
-    }
-}
-
-fn next_recurse<'e, V: Value>(
-    min_key: &[u8],
-    path: &mut Vec<u8>,
-    node: NodeRef<'e, V>,
-    epoch_pin: &'e EpochPin,
-) -> Result<Option<&'e V>, ConcurrentUpdateError> {
-    let rnode = node.read_lock_or_restart()?;
-    let prefix = rnode.get_prefix();
-    if prefix.len() != 0 {
-        path.extend_from_slice(prefix);
-    }
-
-    use std::cmp::Ordering;
-    let comparison = path.as_slice().cmp(&min_key[0..path.len()]);
-    if comparison == Ordering::Less {
-        rnode.read_unlock_or_restart()?;
-        return Ok(None);
-    }
-
-    if rnode.is_leaf() {
-        assert_eq!(path.len(), min_key.len());
-        let vptr = rnode.get_leaf_value_ptr()?;
-        // safety: It's OK to return a ref of the pointer because we checked the version
-        // and the lifetime of 'epoch_pin' enforces that the reference is only accessible
-        // as long as the epoch is pinned.
-        let v = unsafe { vptr.as_ref().unwrap() };
-        return Ok(Some(v));
-    }
-
-    let mut min_key_byte = match comparison {
-        Ordering::Less => unreachable!(), // checked this above already
-        Ordering::Equal => min_key[path.len()],
-        Ordering::Greater => 0,
-    };
-
-    loop {
-        match rnode.find_next_child_or_restart(min_key_byte)? {
-            None => {
-                return Ok(None);
-            }
-            Some((key_byte, child_ref)) => {
-                let path_len = path.len();
-                path.push(key_byte);
-                let result = next_recurse(min_key, path, child_ref, epoch_pin)?;
-                if result.is_some() {
-                    return Ok(result);
-                }
-                if key_byte == u8::MAX {
-                    return Ok(None);
-                }
-                path.truncate(path_len);
-                min_key_byte = key_byte + 1;
-            }
-        }
-    }
-}
-
-// This corresponds to the 'insertOpt' function in the paper
-pub(crate) fn update_recurse<'e, 'g, K: Key, V: Value, A: ArtAllocator<V>, F>(
-    key: &[u8],
-    value_fn: F,
-    node: NodeRef<'e, V>,
-    rparent: Option<(ReadLockedNodeRef<V>, u8)>,
-    rgrandparent: Option<(ReadLockedNodeRef<V>, u8)>,
-    guard: &'g mut TreeWriteGuard<'e, K, V, A>,
-    level: usize,
-    orig_key: &[u8],
-) -> Result<(), ArtError>
-where
-    F: FnOnce(Option<&V>) -> UpdateAction<V>,
-{
-    let rnode = node.read_lock_or_restart()?;
-
-    let prefix_match_len = rnode.prefix_matches(key);
-    if prefix_match_len.is_none() {
-        let (rparent, parent_key) = rparent.expect("direct children of the root have no prefix");
-        let mut wparent = rparent.upgrade_to_write_lock_or_restart()?;
-        let mut wnode = rnode.upgrade_to_write_lock_or_restart()?;
-
-        match value_fn(None) {
-            UpdateAction::Nothing => {}
-            UpdateAction::Insert(new_value) => {
-                insert_split_prefix(key, new_value, &mut wnode, &mut wparent, parent_key, guard)?;
-            }
-            UpdateAction::Remove => {
-                panic!("unexpected Remove action on insertion");
-            }
-        }
-        wnode.write_unlock();
-        wparent.write_unlock();
-        return Ok(());
-    }
-    let prefix_match_len = prefix_match_len.unwrap();
-    let key = &key[prefix_match_len as usize..];
-    let level = level + prefix_match_len as usize;
-
-    if rnode.is_leaf() {
-        assert_eq!(key.len(), 0);
-        let (rparent, parent_key) = rparent.expect("root cannot be leaf");
-        let mut wparent = rparent.upgrade_to_write_lock_or_restart()?;
-        let mut wnode = rnode.upgrade_to_write_lock_or_restart()?;
-
-        // safety: Now that we have acquired the write lock, we have exclusive access to the
-        // value. XXX: There might be concurrent reads though?
-        let value_mut = wnode.get_leaf_value_mut();
-
-        match value_fn(Some(value_mut)) {
-            UpdateAction::Nothing => {
-                wparent.write_unlock();
-                wnode.write_unlock();
-            }
-            UpdateAction::Insert(_) => panic!("cannot insert over existing value"),
-            UpdateAction::Remove => {
-                guard.remember_obsolete_node(wnode.as_ptr());
-                wparent.delete_child(parent_key);
-                wnode.write_unlock_obsolete();
-
-                if let Some(rgrandparent) = rgrandparent {
-                    // FIXME: Ignore concurrency error. It doesn't lead to
-                    // corruption, but it means we might leak something. Until
-                    // another update cleans it up.
-                    let _ = cleanup_parent(wparent, rgrandparent, guard);
-                }
-            }
-        }
-
-        return Ok(());
-    }
-
-    let next_node = rnode.find_child_or_restart(key[0])?;
-
-    if next_node.is_none() {
-        if rnode.is_full() {
-            let (rparent, parent_key) = rparent.expect("root node cannot become full");
-            let mut wparent = rparent.upgrade_to_write_lock_or_restart()?;
-            let wnode = rnode.upgrade_to_write_lock_or_restart()?;
-
-            match value_fn(None) {
-                UpdateAction::Nothing => {
-                    wnode.write_unlock();
-                    wparent.write_unlock();
-                }
-                UpdateAction::Insert(new_value) => {
-                    insert_and_grow(key, new_value, wnode, &mut wparent, parent_key, guard)?;
-                    wparent.write_unlock();
-                }
-                UpdateAction::Remove => {
-                    panic!("unexpected Remove action on insertion");
-                }
-            };
-        } else {
-            let mut wnode = rnode.upgrade_to_write_lock_or_restart()?;
-            if let Some((rparent, _)) = rparent {
-                rparent.read_unlock_or_restart()?;
-            }
-            match value_fn(None) {
-                UpdateAction::Nothing => {}
-                UpdateAction::Insert(new_value) => {
-                    insert_to_node(&mut wnode, key, new_value, guard)?;
-                }
-                UpdateAction::Remove => {
-                    panic!("unexpected Remove action on insertion");
-                }
-            };
-            wnode.write_unlock();
-        }
-        return Ok(());
-    } else {
-        let next_child = next_node.unwrap(); // checked above it's not None
-        if let Some((ref rparent, _)) = rparent {
-            rparent.check_or_restart()?;
-        }
-
-        // recurse to next level
-        update_recurse(
-            &key[1..],
-            value_fn,
-            next_child,
-            Some((rnode, key[0])),
-            rparent,
-            guard,
-            level + 1,
-            orig_key,
-        )
-    }
-}
-
-#[derive(Clone)]
-enum PathElement {
-    Prefix(Vec<u8>),
-    KeyByte(u8),
-}
-
-impl std::fmt::Debug for PathElement {
-    fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
-        match self {
-            PathElement::Prefix(prefix) => write!(fmt, "{:?}", prefix),
-            PathElement::KeyByte(key_byte) => write!(fmt, "{}", key_byte),
-        }
-    }
-}
-
-pub(crate) fn dump_tree<'e, V: Value + std::fmt::Debug>(
-    root: RootPtr<V>,
-    epoch_pin: &'e EpochPin,
-    dst: &mut dyn std::io::Write,
-) {
-    let root_ref = NodeRef::from_root_ptr(root);
-
-    let _ = dump_recurse(&[], root_ref, &epoch_pin, 0, dst);
-}
-
-// TODO: return an Err if writeln!() returns error, instead of unwrapping
-fn dump_recurse<'e, V: Value + std::fmt::Debug>(
-    path: &[PathElement],
-    node: NodeRef<'e, V>,
-    epoch_pin: &'e EpochPin,
-    level: usize,
-    dst: &mut dyn std::io::Write,
-) -> Result<(), ConcurrentUpdateError> {
-    let indent = str::repeat(" ", level);
-
-    let rnode = node.read_lock_or_restart()?;
-    let mut path = Vec::from(path);
-    let prefix = rnode.get_prefix();
-    if prefix.len() != 0 {
-        path.push(PathElement::Prefix(Vec::from(prefix)));
-    }
-
-    if rnode.is_leaf() {
-        let vptr = rnode.get_leaf_value_ptr()?;
-        // safety: It's OK to return a ref of the pointer because we checked the version
-        // and the lifetime of 'epoch_pin' enforces that the reference is only accessible
-        // as long as the epoch is pinned.
-        let val = unsafe { vptr.as_ref().unwrap() };
-        writeln!(dst, "{} {:?}: {:?}", indent, path, val).unwrap();
-        return Ok(());
-    }
-
-    for key_byte in 0..=u8::MAX {
-        match rnode.find_child_or_restart(key_byte)? {
-            None => continue,
-            Some(child_ref) => {
-                let rchild = child_ref.read_lock_or_restart()?;
-                writeln!(
-                    dst,
-                    "{} {:?}, {}: prefix {:?}",
-                    indent,
-                    &path,
-                    key_byte,
-                    rchild.get_prefix()
-                )
-                .unwrap();
-
-                let mut child_path = path.clone();
-                child_path.push(PathElement::KeyByte(key_byte));
-
-                dump_recurse(&child_path, child_ref, epoch_pin, level + 1, dst)?;
-            }
-        }
-    }
-
-    Ok(())
-}
-
-///```text
-///        [fooba]r -> value
-///
-/// [foo]b -> [a]r  -> value
-///      e -> [ls]e -> value
-///```
-fn insert_split_prefix<'e, K: Key, V: Value, A: ArtAllocator<V>>(
-    key: &[u8],
-    value: V,
-    node: &mut WriteLockedNodeRef<V>,
-    parent: &mut WriteLockedNodeRef<V>,
-    parent_key: u8,
-    guard: &'e TreeWriteGuard<K, V, A>,
-) -> Result<(), OutOfMemoryError> {
-    let old_node = node;
-    let old_prefix = old_node.get_prefix();
-    let common_prefix_len = common_prefix(key, old_prefix);
-
-    // Allocate a node for the new value.
-    let new_value_node = allocate_node_for_value(
-        &key[common_prefix_len + 1..],
-        value,
-        guard.tree_writer.allocator,
-    )?;
-
-    // Allocate a new internal node with the common prefix
-    // FIXME: deallocate 'new_value_node' on OOM
-    let mut prefix_node =
-        node_ref::new_internal(&key[..common_prefix_len], guard.tree_writer.allocator)?;
-
-    // Add the old node and the new nodes to the new internal node
-    prefix_node.insert_old_child(old_prefix[common_prefix_len], old_node);
-    prefix_node.insert_new_child(key[common_prefix_len], new_value_node);
-
-    // Modify the prefix of the old child in place
-    old_node.truncate_prefix(old_prefix.len() - common_prefix_len - 1);
-
-    // replace the pointer in the parent
-    parent.replace_child(parent_key, prefix_node.into_ptr());
-
-    Ok(())
-}
-
-fn insert_to_node<'e, K: Key, V: Value, A: ArtAllocator<V>>(
-    wnode: &mut WriteLockedNodeRef<V>,
-    key: &[u8],
-    value: V,
-    guard: &'e TreeWriteGuard<K, V, A>,
-) -> Result<(), OutOfMemoryError> {
-    let value_child = allocate_node_for_value(&key[1..], value, guard.tree_writer.allocator)?;
-    wnode.insert_child(key[0], value_child.into_ptr());
-    Ok(())
-}
-
-// On entry: 'parent' and 'node' are locked
-fn insert_and_grow<'e, 'g, K: Key, V: Value, A: ArtAllocator<V>>(
-    key: &[u8],
-    value: V,
-    wnode: WriteLockedNodeRef<V>,
-    parent: &mut WriteLockedNodeRef<V>,
-    parent_key_byte: u8,
-    guard: &'g mut TreeWriteGuard<'e, K, V, A>,
-) -> Result<(), ArtError> {
-    let mut bigger_node = wnode.grow(guard.tree_writer.allocator)?;
-
-    // FIXME: deallocate 'bigger_node' on OOM
-    let value_child = allocate_node_for_value(&key[1..], value, guard.tree_writer.allocator)?;
-    bigger_node.insert_new_child(key[0], value_child);
-
-    // Replace the pointer in the parent
-    parent.replace_child(parent_key_byte, bigger_node.into_ptr());
-
-    guard.remember_obsolete_node(wnode.as_ptr());
-    wnode.write_unlock_obsolete();
-
-    Ok(())
-}
-
-fn cleanup_parent<'e, 'g, K: Key, V: Value, A: ArtAllocator<V>>(
-    wparent: WriteLockedNodeRef<V>,
-    rgrandparent: (ReadLockedNodeRef<V>, u8),
-    guard: &'g mut TreeWriteGuard<'e, K, V, A>,
-) -> Result<(), ArtError> {
-    let (rgrandparent, grandparent_key_byte) = rgrandparent;
-
-    // If the parent becomes completely empty after the deletion, remove the parent from the
-    // grandparent. (This case is possible because we reserve only 8 bytes for the prefix.)
-    // TODO: not implemented.
-
-    // If the parent has only one child, replace the parent with the remaining child. (This is not
-    // possible if the child's prefix field cannot absorb the parent's)
-    if wparent.num_children() == 1 {
-        // Try to lock the remaining child. This can fail if the child is updated
-        // concurrently.
-        let (key_byte, remaining_child) = wparent.find_remaining_child();
-
-        let mut wremaining_child = remaining_child.write_lock_or_restart()?;
-
-        if 1 + wremaining_child.get_prefix().len() + wparent.get_prefix().len() <= MAX_PREFIX_LEN {
-            let mut wgrandparent = rgrandparent.upgrade_to_write_lock_or_restart()?;
-
-            // Ok, we have locked the leaf, the parent, the grandparent, and the parent's only
-            // remaining leaf. Proceed with the updates.
-
-            // Update the prefix on the remaining leaf
-            wremaining_child.prepend_prefix(wparent.get_prefix(), key_byte);
-
-            // Replace the pointer in the grandparent to point directly to the remaining leaf
-            wgrandparent.replace_child(grandparent_key_byte, wremaining_child.as_ptr());
-
-            // Mark the parent as deleted.
-            guard.remember_obsolete_node(wparent.as_ptr());
-            wparent.write_unlock_obsolete();
-            return Ok(());
-        }
-    }
-
-    // If the parent's children would fit on a smaller node type after the deletion, replace it with
-    // a smaller node.
-    if wparent.can_shrink() {
-        let mut wgrandparent = rgrandparent.upgrade_to_write_lock_or_restart()?;
-        let smaller_node = wparent.shrink(guard.tree_writer.allocator)?;
-
-        // Replace the pointer in the grandparent
-        wgrandparent.replace_child(grandparent_key_byte, smaller_node.into_ptr());
-
-        guard.remember_obsolete_node(wparent.as_ptr());
-        wparent.write_unlock_obsolete();
-        return Ok(());
-    }
-
-    // nothing to do
-    wparent.write_unlock();
-    Ok(())
-}
-
-// Allocate a new leaf node to hold 'value'. If the key is long, we
-// may need to allocate new internal nodes to hold it too
-fn allocate_node_for_value<'a, V: Value, A: ArtAllocator<V>>(
-    key: &[u8],
-    value: V,
-    allocator: &'a A,
-) -> Result<NewNodeRef<'a, V, A>, OutOfMemoryError> {
-    let mut prefix_off = key.len().saturating_sub(MAX_PREFIX_LEN);
-
-    let leaf_node = node_ref::new_leaf(&key[prefix_off..key.len()], value, allocator)?;
-
-    let mut node = leaf_node;
-    while prefix_off > 0 {
-        // Need another internal node
-        let remain_prefix = &key[0..prefix_off];
-
-        prefix_off = remain_prefix.len().saturating_sub(MAX_PREFIX_LEN + 1);
-        let mut internal_node = node_ref::new_internal(
-            &remain_prefix[prefix_off..remain_prefix.len() - 1],
-            allocator,
-        )?;
-        internal_node.insert_new_child(*remain_prefix.last().unwrap(), node);
-        node = internal_node;
-    }
-
-    Ok(node)
-}
-
-fn common_prefix(a: &[u8], b: &[u8]) -> usize {
-    for i in 0..MAX_PREFIX_LEN {
-        if a[i] != b[i] {
-            return i;
-        }
-    }
-    panic!("prefixes are equal");
-}
--- a/libs/neonart/src/algorithm/lock_and_version.rs
+++ b/libs/neonart/src/algorithm/lock_and_version.rs
@@ -1,117 +0,0 @@
-//! Each node in the tree has contains one atomic word that stores three things:
-//!
-//! Bit 0: set if the node is "obsolete". An obsolete node has been removed from the tree,
-//!        but might still be accessed by concurrent readers until the epoch expires.
-//! Bit 1: set if the node is currently write-locked. Used as a spinlock.
-//! Bits 2-63: Version number, incremented every time the node is modified.
-//!
-//! AtomicLockAndVersion represents that.
-
-use std::sync::atomic::{AtomicU64, Ordering};
-
-pub(crate) struct ConcurrentUpdateError();
-
-pub(crate) struct AtomicLockAndVersion {
-    inner: AtomicU64,
-}
-
-impl AtomicLockAndVersion {
-    pub(crate) fn new() -> AtomicLockAndVersion {
-        AtomicLockAndVersion {
-            inner: AtomicU64::new(0),
-        }
-    }
-}
-
-impl AtomicLockAndVersion {
-    pub(crate) fn read_lock_or_restart(&self) -> Result<u64, ConcurrentUpdateError> {
-        let version = self.await_node_unlocked();
-        if is_obsolete(version) {
-            return Err(ConcurrentUpdateError());
-        }
-        Ok(version)
-    }
-
-    pub(crate) fn check_or_restart(&self, version: u64) -> Result<(), ConcurrentUpdateError> {
-        self.read_unlock_or_restart(version)
-    }
-
-    pub(crate) fn read_unlock_or_restart(&self, version: u64) -> Result<(), ConcurrentUpdateError> {
-        if self.inner.load(Ordering::Acquire) != version {
-            return Err(ConcurrentUpdateError());
-        }
-        Ok(())
-    }
-
-    pub(crate) fn upgrade_to_write_lock_or_restart(
-        &self,
-        version: u64,
-    ) -> Result<(), ConcurrentUpdateError> {
-        if self
-            .inner
-            .compare_exchange(
-                version,
-                set_locked_bit(version),
-                Ordering::Acquire,
-                Ordering::Relaxed,
-            )
-            .is_err()
-        {
-            return Err(ConcurrentUpdateError());
-        }
-        Ok(())
-    }
-
-    pub(crate) fn write_lock_or_restart(&self) -> Result<(), ConcurrentUpdateError> {
-        let old = self.inner.load(Ordering::Relaxed);
-        if is_obsolete(old) || is_locked(old) {
-            return Err(ConcurrentUpdateError());
-        }
-        if self
-            .inner
-            .compare_exchange(
-                old,
-                set_locked_bit(old),
-                Ordering::Acquire,
-                Ordering::Relaxed,
-            )
-            .is_err()
-        {
-            return Err(ConcurrentUpdateError());
-        }
-        Ok(())
-    }
-
-    pub(crate) fn write_unlock(&self) {
-        // reset locked bit and overflow into version
-        self.inner.fetch_add(2, Ordering::Release);
-    }
-
-    pub(crate) fn write_unlock_obsolete(&self) {
-        // set obsolete, reset locked, overflow into version
-        self.inner.fetch_add(3, Ordering::Release);
-    }
-
-    // Helper functions
-    fn await_node_unlocked(&self) -> u64 {
-        let mut version = self.inner.load(Ordering::Acquire);
-        while is_locked(version) {
-            // spinlock
-            std::thread::yield_now();
-            version = self.inner.load(Ordering::Acquire)
-        }
-        version
-    }
-}
-
-fn set_locked_bit(version: u64) -> u64 {
-    return version + 2;
-}
-
-fn is_obsolete(version: u64) -> bool {
-    return (version & 1) == 1;
-}
-
-fn is_locked(version: u64) -> bool {
-    return (version & 2) == 2;
-}
--- a/libs/neonart/src/algorithm/node_ptr.rs
+++ b/libs/neonart/src/algorithm/node_ptr.rs
--- a/libs/neonart/src/algorithm/node_ref.rs
+++ b/libs/neonart/src/algorithm/node_ref.rs
@@ -1,349 +0,0 @@
-use std::fmt::Debug;
-use std::marker::PhantomData;
-
-use super::node_ptr;
-use super::node_ptr::NodePtr;
-use crate::EpochPin;
-use crate::Value;
-use crate::algorithm::lock_and_version::AtomicLockAndVersion;
-use crate::algorithm::lock_and_version::ConcurrentUpdateError;
-use crate::allocator::ArtAllocator;
-use crate::allocator::OutOfMemoryError;
-
-pub struct NodeRef<'e, V> {
-    ptr: NodePtr<V>,
-
-    phantom: PhantomData<&'e EpochPin<'e>>,
-}
-
-impl<'e, V> Debug for NodeRef<'e, V> {
-    fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
-        write!(fmt, "{:?}", self.ptr)
-    }
-}
-
-impl<'e, V: Value> NodeRef<'e, V> {
-    pub(crate) fn from_root_ptr(root_ptr: NodePtr<V>) -> NodeRef<'e, V> {
-        NodeRef {
-            ptr: root_ptr,
-            phantom: PhantomData,
-        }
-    }
-
-    pub(crate) fn read_lock_or_restart(
-        &self,
-    ) -> Result<ReadLockedNodeRef<'e, V>, ConcurrentUpdateError> {
-        let version = self.lockword().read_lock_or_restart()?;
-        Ok(ReadLockedNodeRef {
-            ptr: self.ptr,
-            version,
-            phantom: self.phantom,
-        })
-    }
-
-    pub(crate) fn write_lock_or_restart(
-        &self,
-    ) -> Result<WriteLockedNodeRef<'e, V>, ConcurrentUpdateError> {
-        self.lockword().write_lock_or_restart()?;
-        Ok(WriteLockedNodeRef {
-            ptr: self.ptr,
-            phantom: self.phantom,
-        })
-    }
-
-    fn lockword(&self) -> &AtomicLockAndVersion {
-        self.ptr.lockword()
-    }
-}
-
-/// A reference to a node that has been optimistically read-locked. The functions re-check
-/// the version after each read.
-pub struct ReadLockedNodeRef<'e, V> {
-    ptr: NodePtr<V>,
-    version: u64,
-
-    phantom: PhantomData<&'e EpochPin<'e>>,
-}
-
-impl<'e, V: Value> ReadLockedNodeRef<'e, V> {
-    pub(crate) fn is_leaf(&self) -> bool {
-        self.ptr.is_leaf()
-    }
-
-    pub(crate) fn is_full(&self) -> bool {
-        self.ptr.is_full()
-    }
-
-    pub(crate) fn get_prefix(&self) -> &[u8] {
-        self.ptr.get_prefix()
-    }
-
-    /// Note: because we're only holding a read lock, the prefix can change concurrently.
-    /// You must be prepared to restart, if read_unlock() returns error later.
-    ///
-    /// Returns the length of the prefix, or None if it's not a match
-    pub(crate) fn prefix_matches(&self, key: &[u8]) -> Option<usize> {
-        self.ptr.prefix_matches(key)
-    }
-
-    pub(crate) fn find_child_or_restart(
-        &self,
-        key_byte: u8,
-    ) -> Result<Option<NodeRef<'e, V>>, ConcurrentUpdateError> {
-        let child_or_value = self.ptr.find_child(key_byte);
-        self.ptr.lockword().check_or_restart(self.version)?;
-
-        match child_or_value {
-            None => Ok(None),
-            Some(child_ptr) => Ok(Some(NodeRef {
-                ptr: child_ptr,
-                phantom: self.phantom,
-            })),
-        }
-    }
-
-    pub(crate) fn find_next_child_or_restart(
-        &self,
-        min_key_byte: u8,
-    ) -> Result<Option<(u8, NodeRef<'e, V>)>, ConcurrentUpdateError> {
-        let child_or_value = self.ptr.find_next_child(min_key_byte);
-        self.ptr.lockword().check_or_restart(self.version)?;
-
-        match child_or_value {
-            None => Ok(None),
-            Some((k, child_ptr)) => Ok(Some((
-                k,
-                NodeRef {
-                    ptr: child_ptr,
-                    phantom: self.phantom,
-                },
-            ))),
-        }
-    }
-
-    pub(crate) fn get_leaf_value_ptr(&self) -> Result<*const V, ConcurrentUpdateError> {
-        let result = self.ptr.get_leaf_value();
-        self.ptr.lockword().check_or_restart(self.version)?;
-
-        // Extend the lifetime.
-        let result = std::ptr::from_ref(result);
-
-        Ok(result)
-    }
-
-    pub(crate) fn upgrade_to_write_lock_or_restart(
-        self,
-    ) -> Result<WriteLockedNodeRef<'e, V>, ConcurrentUpdateError> {
-        self.ptr
-            .lockword()
-            .upgrade_to_write_lock_or_restart(self.version)?;
-
-        Ok(WriteLockedNodeRef {
-            ptr: self.ptr,
-            phantom: self.phantom,
-        })
-    }
-
-    pub(crate) fn read_unlock_or_restart(self) -> Result<(), ConcurrentUpdateError> {
-        self.ptr.lockword().check_or_restart(self.version)?;
-        Ok(())
-    }
-
-    pub(crate) fn check_or_restart(&self) -> Result<(), ConcurrentUpdateError> {
-        self.ptr.lockword().check_or_restart(self.version)?;
-        Ok(())
-    }
-}
-
-/// A reference to a node that has been optimistically read-locked. The functions re-check
-/// the version after each read.
-pub struct WriteLockedNodeRef<'e, V> {
-    ptr: NodePtr<V>,
-    phantom: PhantomData<&'e EpochPin<'e>>,
-}
-
-impl<'e, V: Value> WriteLockedNodeRef<'e, V> {
-    pub(crate) fn can_shrink(&self) -> bool {
-        self.ptr.can_shrink()
-    }
-
-    pub(crate) fn num_children(&self) -> usize {
-        self.ptr.num_children()
-    }
-
-    pub(crate) fn write_unlock(mut self) {
-        self.ptr.lockword().write_unlock();
-        self.ptr = NodePtr::null();
-    }
-
-    pub(crate) fn write_unlock_obsolete(mut self) {
-        self.ptr.lockword().write_unlock_obsolete();
-        self.ptr = NodePtr::null();
-    }
-
-    pub(crate) fn get_prefix(&self) -> &[u8] {
-        self.ptr.get_prefix()
-    }
-
-    pub(crate) fn truncate_prefix(&mut self, new_prefix_len: usize) {
-        self.ptr.truncate_prefix(new_prefix_len)
-    }
-
-    pub(crate) fn prepend_prefix(&mut self, prefix: &[u8], prefix_byte: u8) {
-        self.ptr.prepend_prefix(prefix, prefix_byte)
-    }
-
-    pub(crate) fn insert_child(&mut self, key_byte: u8, child: NodePtr<V>) {
-        self.ptr.insert_child(key_byte, child)
-    }
-
-    pub(crate) fn get_leaf_value_mut(&mut self) -> &mut V {
-        self.ptr.get_leaf_value_mut()
-    }
-
-    pub(crate) fn grow<'a, A>(
-        &self,
-        allocator: &'a A,
-    ) -> Result<NewNodeRef<'a, V, A>, OutOfMemoryError>
-    where
-        A: ArtAllocator<V>,
-    {
-        let new_node = self.ptr.grow(allocator)?;
-        Ok(NewNodeRef {
-            ptr: new_node,
-            allocator,
-            extra_nodes: Vec::new(),
-        })
-    }
-
-    pub(crate) fn shrink<'a, A>(
-        &self,
-        allocator: &'a A,
-    ) -> Result<NewNodeRef<'a, V, A>, OutOfMemoryError>
-    where
-        A: ArtAllocator<V>,
-    {
-        let new_node = self.ptr.shrink(allocator)?;
-        Ok(NewNodeRef {
-            ptr: new_node,
-            allocator,
-            extra_nodes: Vec::new(),
-        })
-    }
-
-    pub(crate) fn as_ptr(&self) -> NodePtr<V> {
-        self.ptr
-    }
-
-    pub(crate) fn replace_child(&mut self, key_byte: u8, replacement: NodePtr<V>) {
-        self.ptr.replace_child(key_byte, replacement);
-    }
-
-    pub(crate) fn delete_child(&mut self, key_byte: u8) {
-        self.ptr.delete_child(key_byte);
-    }
-
-    pub(crate) fn find_remaining_child(&self) -> (u8, NodeRef<'e, V>) {
-        assert_eq!(self.num_children(), 1);
-        let child_or_value = self.ptr.find_next_child(0);
-
-        match child_or_value {
-            None => panic!("could not find only child in node"),
-            Some((k, child_ptr)) => (
-                k,
-                NodeRef {
-                    ptr: child_ptr,
-                    phantom: self.phantom,
-                },
-            ),
-        }
-    }
-}
-
-impl<'e, V> Drop for WriteLockedNodeRef<'e, V> {
-    fn drop(&mut self) {
-        if !self.ptr.is_null() {
-            self.ptr.lockword().write_unlock();
-        }
-    }
-}
-
-pub(crate) struct NewNodeRef<'a, V, A>
-where
-    V: Value,
-    A: ArtAllocator<V>,
-{
-    ptr: NodePtr<V>,
-    allocator: &'a A,
-
-    extra_nodes: Vec<NodePtr<V>>,
-}
-
-impl<'a, V, A> NewNodeRef<'a, V, A>
-where
-    V: Value,
-    A: ArtAllocator<V>,
-{
-    pub(crate) fn insert_old_child(&mut self, key_byte: u8, child: &WriteLockedNodeRef<V>) {
-        self.ptr.insert_child(key_byte, child.as_ptr())
-    }
-
-    pub(crate) fn into_ptr(mut self) -> NodePtr<V> {
-        let ptr = self.ptr;
-        self.ptr = NodePtr::null();
-        ptr
-    }
-
-    pub(crate) fn insert_new_child(&mut self, key_byte: u8, child: NewNodeRef<'a, V, A>) {
-        let child_ptr = child.into_ptr();
-        self.ptr.insert_child(key_byte, child_ptr);
-        self.extra_nodes.push(child_ptr);
-    }
-}
-
-impl<'a, V, A> Drop for NewNodeRef<'a, V, A>
-where
-    V: Value,
-    A: ArtAllocator<V>,
-{
-    /// This drop implementation deallocates the newly allocated node, if into_ptr() was not called.
-    fn drop(&mut self) {
-        if !self.ptr.is_null() {
-            self.ptr.deallocate(self.allocator);
-            for p in self.extra_nodes.iter() {
-                p.deallocate(self.allocator);
-            }
-        }
-    }
-}
-
-pub(crate) fn new_internal<'a, V, A>(
-    prefix: &[u8],
-    allocator: &'a A,
-) -> Result<NewNodeRef<'a, V, A>, OutOfMemoryError>
-where
-    V: Value,
-    A: ArtAllocator<V>,
-{
-    Ok(NewNodeRef {
-        ptr: node_ptr::new_internal(prefix, allocator)?,
-        allocator,
-        extra_nodes: Vec::new(),
-    })
-}
-
-pub(crate) fn new_leaf<'a, V, A>(
-    prefix: &[u8],
-    value: V,
-    allocator: &'a A,
-) -> Result<NewNodeRef<'a, V, A>, OutOfMemoryError>
-where
-    V: Value,
-    A: ArtAllocator<V>,
-{
-    Ok(NewNodeRef {
-        ptr: node_ptr::new_leaf(prefix, value, allocator)?,
-        allocator,
-        extra_nodes: Vec::new(),
-    })
-}
--- a/libs/neonart/src/allocator.rs
+++ b/libs/neonart/src/allocator.rs
@@ -1,158 +0,0 @@
-pub mod block;
-mod multislab;
-mod slab;
-pub mod r#static;
-
-use std::alloc::Layout;
-use std::marker::PhantomData;
-use std::mem::MaybeUninit;
-use std::sync::atomic::Ordering;
-
-use crate::allocator::multislab::MultiSlabAllocator;
-use crate::allocator::r#static::alloc_from_slice;
-
-use spin;
-
-use crate::Tree;
-pub use crate::algorithm::node_ptr::{
-    NodeInternal4, NodeInternal16, NodeInternal48, NodeInternal256, NodeLeaf,
-};
-
-#[derive(Debug)]
-pub struct OutOfMemoryError();
-
-pub trait ArtAllocator<V: crate::Value> {
-    fn alloc_tree(&self) -> *mut Tree<V>;
-
-    fn alloc_node_internal4(&self) -> *mut NodeInternal4<V>;
-    fn alloc_node_internal16(&self) -> *mut NodeInternal16<V>;
-    fn alloc_node_internal48(&self) -> *mut NodeInternal48<V>;
-    fn alloc_node_internal256(&self) -> *mut NodeInternal256<V>;
-    fn alloc_node_leaf(&self) -> *mut NodeLeaf<V>;
-
-    fn dealloc_node_internal4(&self, ptr: *mut NodeInternal4<V>);
-    fn dealloc_node_internal16(&self, ptr: *mut NodeInternal16<V>);
-    fn dealloc_node_internal48(&self, ptr: *mut NodeInternal48<V>);
-    fn dealloc_node_internal256(&self, ptr: *mut NodeInternal256<V>);
-    fn dealloc_node_leaf(&self, ptr: *mut NodeLeaf<V>);
-}
-
-pub struct ArtMultiSlabAllocator<'t, V>
-where
-    V: crate::Value,
-{
-    tree_area: spin::Mutex<Option<&'t mut MaybeUninit<Tree<V>>>>,
-
-    pub(crate) inner: MultiSlabAllocator<'t, 5>,
-
-    phantom_val: PhantomData<V>,
-}
-
-impl<'t, V: crate::Value> ArtMultiSlabAllocator<'t, V> {
-    const LAYOUTS: [Layout; 5] = [
-        Layout::new::<NodeInternal4<V>>(),
-        Layout::new::<NodeInternal16<V>>(),
-        Layout::new::<NodeInternal48<V>>(),
-        Layout::new::<NodeInternal256<V>>(),
-        Layout::new::<NodeLeaf<V>>(),
-    ];
-
-    pub fn new(area: &'t mut [MaybeUninit<u8>]) -> &'t mut ArtMultiSlabAllocator<'t, V> {
-        let (allocator_area, remain) = alloc_from_slice::<ArtMultiSlabAllocator<V>>(area);
-        let (tree_area, remain) = alloc_from_slice::<Tree<V>>(remain);
-
-        let allocator = allocator_area.write(ArtMultiSlabAllocator {
-            tree_area: spin::Mutex::new(Some(tree_area)),
-            inner: MultiSlabAllocator::new(remain, &Self::LAYOUTS),
-            phantom_val: PhantomData,
-        });
-
-        allocator
-    }
-}
-
-impl<'t, V: crate::Value> ArtAllocator<V> for ArtMultiSlabAllocator<'t, V> {
-    fn alloc_tree(&self) -> *mut Tree<V> {
-        let mut t = self.tree_area.lock();
-        if let Some(tree_area) = t.take() {
-            return tree_area.as_mut_ptr().cast();
-        }
-        panic!("cannot allocate more than one tree");
-    }
-
-    fn alloc_node_internal4(&self) -> *mut NodeInternal4<V> {
-        self.inner.alloc_slab(0).cast()
-    }
-    fn alloc_node_internal16(&self) -> *mut NodeInternal16<V> {
-        self.inner.alloc_slab(1).cast()
-    }
-    fn alloc_node_internal48(&self) -> *mut NodeInternal48<V> {
-        self.inner.alloc_slab(2).cast()
-    }
-    fn alloc_node_internal256(&self) -> *mut NodeInternal256<V> {
-        self.inner.alloc_slab(3).cast()
-    }
-    fn alloc_node_leaf(&self) -> *mut NodeLeaf<V> {
-        self.inner.alloc_slab(4).cast()
-    }
-
-    fn dealloc_node_internal4(&self, ptr: *mut NodeInternal4<V>) {
-        self.inner.dealloc_slab(0, ptr.cast())
-    }
-
-    fn dealloc_node_internal16(&self, ptr: *mut NodeInternal16<V>) {
-        self.inner.dealloc_slab(1, ptr.cast())
-    }
-    fn dealloc_node_internal48(&self, ptr: *mut NodeInternal48<V>) {
-        self.inner.dealloc_slab(2, ptr.cast())
-    }
-    fn dealloc_node_internal256(&self, ptr: *mut NodeInternal256<V>) {
-        self.inner.dealloc_slab(3, ptr.cast())
-    }
-    fn dealloc_node_leaf(&self, ptr: *mut NodeLeaf<V>) {
-        self.inner.dealloc_slab(4, ptr.cast())
-    }
-}
-
-impl<'t, V: crate::Value> ArtMultiSlabAllocator<'t, V> {
-    pub(crate) fn get_statistics(&self) -> ArtMultiSlabStats {
-        ArtMultiSlabStats {
-            num_internal4: self.inner.slab_descs[0]
-                .num_allocated
-                .load(Ordering::Relaxed),
-            num_internal16: self.inner.slab_descs[1]
-                .num_allocated
-                .load(Ordering::Relaxed),
-            num_internal48: self.inner.slab_descs[2]
-                .num_allocated
-                .load(Ordering::Relaxed),
-            num_internal256: self.inner.slab_descs[3]
-                .num_allocated
-                .load(Ordering::Relaxed),
-            num_leaf: self.inner.slab_descs[4]
-                .num_allocated
-                .load(Ordering::Relaxed),
-
-            num_blocks_internal4: self.inner.slab_descs[0].num_blocks.load(Ordering::Relaxed),
-            num_blocks_internal16: self.inner.slab_descs[1].num_blocks.load(Ordering::Relaxed),
-            num_blocks_internal48: self.inner.slab_descs[2].num_blocks.load(Ordering::Relaxed),
-            num_blocks_internal256: self.inner.slab_descs[3].num_blocks.load(Ordering::Relaxed),
-            num_blocks_leaf: self.inner.slab_descs[4].num_blocks.load(Ordering::Relaxed),
-        }
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct ArtMultiSlabStats {
-    pub num_internal4: u64,
-    pub num_internal16: u64,
-    pub num_internal48: u64,
-    pub num_internal256: u64,
-    pub num_leaf: u64,
-
-    pub num_blocks_internal4: u64,
-    pub num_blocks_internal16: u64,
-    pub num_blocks_internal48: u64,
-    pub num_blocks_internal256: u64,
-    pub num_blocks_leaf: u64,
-}
--- a/libs/neonart/src/allocator/block.rs
+++ b/libs/neonart/src/allocator/block.rs
@@ -1,191 +0,0 @@
-//! Simple allocator of fixed-size blocks
-
-use std::mem::MaybeUninit;
-use std::sync::atomic::{AtomicU64, Ordering};
-
-use spin;
-
-pub const BLOCK_SIZE: usize = 16 * 1024;
-
-const INVALID_BLOCK: u64 = u64::MAX;
-
-pub(crate) struct BlockAllocator<'t> {
-    blocks_ptr: &'t [MaybeUninit<u8>],
-    num_blocks: u64,
-    num_initialized: AtomicU64,
-
-    freelist_head: spin::Mutex<u64>,
-}
-
-struct FreeListBlock {
-    inner: spin::Mutex<FreeListBlockInner>,
-}
-
-struct FreeListBlockInner {
-    next: u64,
-
-    num_free_blocks: u64,
-    free_blocks: [u64; 100], // FIXME: fill the rest of the block
-}
-
-impl<'t> BlockAllocator<'t> {
-    pub(crate) fn new(area: &'t mut [MaybeUninit<u8>]) -> Self {
-        // Use all the space for the blocks
-        let padding = area.as_ptr().align_offset(BLOCK_SIZE);
-        let remain = &mut area[padding..];
-
-        let num_blocks = (remain.len() / BLOCK_SIZE) as u64;
-
-        BlockAllocator {
-            blocks_ptr: remain,
-            num_blocks,
-            num_initialized: AtomicU64::new(0),
-            freelist_head: spin::Mutex::new(INVALID_BLOCK),
-        }
-    }
-
-    /// safety: you must hold a lock on the pointer to this block, otherwise it might get
-    /// reused for another kind of block
-    fn read_freelist_block(&self, blkno: u64) -> &FreeListBlock {
-        let ptr: *const FreeListBlock = self.get_block_ptr(blkno).cast();
-        unsafe { ptr.as_ref().unwrap() }
-    }
-
-    fn get_block_ptr(&self, blkno: u64) -> *mut u8 {
-        assert!(blkno < self.num_blocks);
-        unsafe {
-            self.blocks_ptr
-                .as_ptr()
-                .byte_offset(blkno as isize * BLOCK_SIZE as isize)
-        }
-        .cast_mut()
-        .cast()
-    }
-
-    #[allow(clippy::mut_from_ref)]
-    pub(crate) fn alloc_block(&self) -> &mut [MaybeUninit<u8>] {
-        // FIXME: handle OOM
-        let blkno = self.alloc_block_internal();
-        if blkno == INVALID_BLOCK {
-            panic!("out of memory");
-        }
-
-        let ptr: *mut MaybeUninit<u8> = self.get_block_ptr(blkno).cast();
-        unsafe { std::slice::from_raw_parts_mut(ptr, BLOCK_SIZE) }
-    }
-
-    fn alloc_block_internal(&self) -> u64 {
-        //  check the free list.
-        {
-            let mut freelist_head = self.freelist_head.lock();
-            if *freelist_head != INVALID_BLOCK {
-                let freelist_block = self.read_freelist_block(*freelist_head);
-
-                // acquire lock on the freelist block before releasing the lock on the parent (i.e. lock coupling)
-                let mut g = freelist_block.inner.lock();
-
-                if g.num_free_blocks > 0 {
-                    g.num_free_blocks -= 1;
-                    let result = g.free_blocks[g.num_free_blocks as usize];
-                    return result;
-                } else {
-                    // consume the freelist block itself
-                    let result = *freelist_head;
-                    *freelist_head = g.next;
-                    // This freelist block is now unlinked and can be repurposed
-                    drop(g);
-                    return result;
-                }
-            }
-        }
-
-        // If there are some blocks left that we've never used, pick next such block
-        let mut next_uninitialized = self.num_initialized.load(Ordering::Relaxed);
-        while next_uninitialized < self.num_blocks {
-            match self.num_initialized.compare_exchange(
-                next_uninitialized,
-                next_uninitialized + 1,
-                Ordering::Relaxed,
-                Ordering::Relaxed,
-            ) {
-                Ok(_) => {
-                    return next_uninitialized;
-                }
-                Err(old) => {
-                    next_uninitialized = old;
-                    continue;
-                }
-            }
-        }
-
-        // out of blocks
-        return INVALID_BLOCK;
-    }
-
-    // TODO: this is currently unused. The slab allocator never releases blocks
-    #[allow(dead_code)]
-    pub(crate) fn release_block(&self, block_ptr: *mut u8) {
-        let blockno = unsafe { block_ptr.byte_offset_from(self.blocks_ptr) / BLOCK_SIZE as isize };
-        self.release_block_internal(blockno as u64);
-    }
-
-    fn release_block_internal(&self, blockno: u64) {
-        let mut freelist_head = self.freelist_head.lock();
-        if *freelist_head != INVALID_BLOCK {
-            let freelist_block = self.read_freelist_block(*freelist_head);
-
-            // acquire lock on the freelist block before releasing the lock on the parent (i.e. lock coupling)
-            let mut g = freelist_block.inner.lock();
-
-            let num_free_blocks = g.num_free_blocks;
-            if num_free_blocks < g.free_blocks.len() as u64 {
-                g.free_blocks[num_free_blocks as usize] = blockno;
-                g.num_free_blocks += 1;
-                return;
-            }
-        }
-
-        // Convert the block into a new freelist block
-        let block_ptr: *mut FreeListBlock = self.get_block_ptr(blockno).cast();
-        let init = FreeListBlock {
-            inner: spin::Mutex::new(FreeListBlockInner {
-                next: *freelist_head,
-                num_free_blocks: 0,
-                free_blocks: [INVALID_BLOCK; 100],
-            }),
-        };
-        unsafe { (*block_ptr) = init };
-        *freelist_head = blockno;
-    }
-
-    // for debugging
-    pub(crate) fn get_statistics(&self) -> BlockAllocatorStats {
-        let mut num_free_blocks = 0;
-
-        let mut _prev_lock = None;
-        let head_lock = self.freelist_head.lock();
-        let mut next_blk = *head_lock;
-        let mut _head_lock = Some(head_lock);
-        while next_blk != INVALID_BLOCK {
-            let freelist_block = self.read_freelist_block(next_blk);
-            let lock = freelist_block.inner.lock();
-            num_free_blocks += lock.num_free_blocks;
-            next_blk = lock.next;
-            _prev_lock = Some(lock); // hold the lock until we've read the next block
-            _head_lock = None;
-        }
-
-        BlockAllocatorStats {
-            num_blocks: self.num_blocks,
-            num_initialized: self.num_initialized.load(Ordering::Relaxed),
-            num_free_blocks,
-        }
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct BlockAllocatorStats {
-    pub num_blocks: u64,
-    pub num_initialized: u64,
-    pub num_free_blocks: u64,
-}
--- a/libs/neonart/src/allocator/multislab.rs
+++ b/libs/neonart/src/allocator/multislab.rs
@@ -1,33 +0,0 @@
-use std::alloc::Layout;
-use std::mem::MaybeUninit;
-
-use crate::allocator::block::BlockAllocator;
-use crate::allocator::slab::SlabDesc;
-
-pub struct MultiSlabAllocator<'t, const N: usize> {
-    pub(crate) block_allocator: BlockAllocator<'t>,
-
-    pub(crate) slab_descs: [SlabDesc; N],
-}
-
-impl<'t, const N: usize> MultiSlabAllocator<'t, N> {
-    pub(crate) fn new(
-        area: &'t mut [MaybeUninit<u8>],
-        layouts: &[Layout; N],
-    ) -> MultiSlabAllocator<'t, N> {
-        let block_allocator = BlockAllocator::new(area);
-        MultiSlabAllocator {
-            block_allocator,
-
-            slab_descs: std::array::from_fn(|i| SlabDesc::new(&layouts[i])),
-        }
-    }
-
-    pub(crate) fn alloc_slab(&self, slab_idx: usize) -> *mut u8 {
-        self.slab_descs[slab_idx].alloc_chunk(&self.block_allocator)
-    }
-
-    pub(crate) fn dealloc_slab(&self, slab_idx: usize, ptr: *mut u8) {
-        self.slab_descs[slab_idx].dealloc_chunk(ptr, &self.block_allocator)
-    }
-}
--- a/libs/neonart/src/allocator/slab.rs
+++ b/libs/neonart/src/allocator/slab.rs
@@ -1,432 +0,0 @@
-//! A slab allocator that carves out fixed-size chunks from larger blocks.
-//!
-//!
-
-use std::alloc::Layout;
-use std::mem::MaybeUninit;
-use std::ops::Deref;
-use std::sync::atomic::{AtomicU32, AtomicU64, Ordering};
-
-use spin;
-
-use super::alloc_from_slice;
-use super::block::BlockAllocator;
-
-use crate::allocator::block::BLOCK_SIZE;
-
-pub(crate) struct SlabDesc {
-    pub(crate) layout: Layout,
-
-    block_lists: spin::RwLock<BlockLists>,
-
-    pub(crate) num_blocks: AtomicU64,
-    pub(crate) num_allocated: AtomicU64,
-}
-
-// FIXME: Not sure if SlabDesc is really Sync or Send. It probably is when it's empty, but
-// 'block_lists' contains pointers when it's not empty. In the current use as part of the
-// the art tree, SlabDescs are only moved during initialization.
-unsafe impl Sync for SlabDesc {}
-unsafe impl Send for SlabDesc {}
-
-#[derive(Default, Debug)]
-struct BlockLists {
-    full_blocks: BlockList,
-    nonfull_blocks: BlockList,
-}
-
-impl BlockLists {
-    // Unlink a node. It must be in either one of the two lists.
-    unsafe fn unlink(&mut self, elem: *mut SlabBlockHeader) {
-        let list = unsafe {
-            if (*elem).next.is_null() {
-                if self.full_blocks.tail == elem {
-                    Some(&mut self.full_blocks)
-                } else {
-                    Some(&mut self.nonfull_blocks)
-                }
-            } else if (*elem).prev.is_null() {
-                if self.full_blocks.head == elem {
-                    Some(&mut self.full_blocks)
-                } else {
-                    Some(&mut self.nonfull_blocks)
-                }
-            } else {
-                None
-            }
-        };
-        unsafe { unlink_slab_block(list, elem) };
-    }
-}
-
-unsafe fn unlink_slab_block(mut list: Option<&mut BlockList>, elem: *mut SlabBlockHeader) {
-    unsafe {
-        if (*elem).next.is_null() {
-            assert_eq!(list.as_ref().unwrap().tail, elem);
-            list.as_mut().unwrap().tail = (*elem).prev;
-        } else {
-            assert_eq!((*(*elem).next).prev, elem);
-            (*(*elem).next).prev = (*elem).prev;
-        }
-        if (*elem).prev.is_null() {
-            assert_eq!(list.as_ref().unwrap().head, elem);
-            list.as_mut().unwrap().head = (*elem).next;
-        } else {
-            assert_eq!((*(*elem).prev).next, elem);
-            (*(*elem).prev).next = (*elem).next;
-        }
-    }
-}
-
-#[derive(Debug)]
-struct BlockList {
-    head: *mut SlabBlockHeader,
-    tail: *mut SlabBlockHeader,
-}
-
-impl Default for BlockList {
-    fn default() -> Self {
-        BlockList {
-            head: std::ptr::null_mut(),
-            tail: std::ptr::null_mut(),
-        }
-    }
-}
-
-impl BlockList {
-    unsafe fn push_head(&mut self, elem: *mut SlabBlockHeader) {
-        unsafe {
-            if self.is_empty() {
-                self.tail = elem;
-                (*elem).next = std::ptr::null_mut();
-            } else {
-                (*elem).next = self.head;
-                (*self.head).prev = elem;
-            }
-            (*elem).prev = std::ptr::null_mut();
-            self.head = elem;
-        }
-    }
-
-    fn is_empty(&self) -> bool {
-        self.head.is_null()
-    }
-
-    unsafe fn unlink(&mut self, elem: *mut SlabBlockHeader) {
-        unsafe { unlink_slab_block(Some(self), elem) }
-    }
-
-    #[cfg(test)]
-    fn dump(&self) {
-        let mut next = self.head;
-
-        while !next.is_null() {
-            let n = unsafe { next.as_ref() }.unwrap();
-            eprintln!(
-                "  blk {:?} (free {}/{})",
-                next,
-                n.num_free_chunks.load(Ordering::Relaxed),
-                n.num_chunks
-            );
-            next = n.next;
-        }
-    }
-}
-
-impl SlabDesc {
-    pub(crate) fn new(layout: &Layout) -> SlabDesc {
-        SlabDesc {
-            layout: *layout,
-            block_lists: spin::RwLock::new(BlockLists::default()),
-            num_allocated: AtomicU64::new(0),
-            num_blocks: AtomicU64::new(0),
-        }
-    }
-}
-
-#[derive(Debug)]
-struct SlabBlockHeader {
-    free_chunks_head: spin::Mutex<*mut FreeChunk>,
-    num_free_chunks: AtomicU32,
-    num_chunks: u32, // this is really a constant for a given Layout
-
-    // these fields are protected by the lock on the BlockLists
-    prev: *mut SlabBlockHeader,
-    next: *mut SlabBlockHeader,
-}
-
-struct FreeChunk {
-    next: *mut FreeChunk,
-}
-
-enum ReadOrWriteGuard<'a, T> {
-    Read(spin::RwLockReadGuard<'a, T>),
-    Write(spin::RwLockWriteGuard<'a, T>),
-}
-
-impl<'a, T> Deref for ReadOrWriteGuard<'a, T> {
-    type Target = T;
-
-    fn deref(&self) -> &<Self as Deref>::Target {
-        match self {
-            ReadOrWriteGuard::Read(g) => g.deref(),
-            ReadOrWriteGuard::Write(g) => g.deref(),
-        }
-    }
-}
-
-impl SlabDesc {
-    pub fn alloc_chunk(&self, block_allocator: &BlockAllocator) -> *mut u8 {
-        // Are there any free chunks?
-        let mut acquire_write = false;
-        'outer: loop {
-            let mut block_lists_guard = if acquire_write {
-                ReadOrWriteGuard::Write(self.block_lists.write())
-            } else {
-                ReadOrWriteGuard::Read(self.block_lists.read())
-            };
-            'inner: loop {
-                let block_ptr = block_lists_guard.nonfull_blocks.head;
-                if block_ptr.is_null() {
-                    break 'outer;
-                }
-                unsafe {
-                    let mut free_chunks_head = (*block_ptr).free_chunks_head.lock();
-                    if !(*free_chunks_head).is_null() {
-                        let result = *free_chunks_head;
-                        (*free_chunks_head) = (*result).next;
-                        let _old = (*block_ptr).num_free_chunks.fetch_sub(1, Ordering::Relaxed);
-
-                        self.num_allocated.fetch_add(1, Ordering::Relaxed);
-                        return result.cast();
-                    }
-                }
-
-                // The block at the head of the list was full. Grab write lock and retry
-                match block_lists_guard {
-                    ReadOrWriteGuard::Read(_) => {
-                        acquire_write = true;
-                        continue 'outer;
-                    }
-                    ReadOrWriteGuard::Write(ref mut g) => {
-                        // move the node to the list of full blocks
-                        unsafe {
-                            g.nonfull_blocks.unlink(block_ptr);
-                            g.full_blocks.push_head(block_ptr);
-                        };
-                        continue 'inner;
-                    }
-                }
-            }
-        }
-
-        // no free chunks. Allocate a new block (and the chunk from that)
-        let (new_block, new_chunk) = self.alloc_block_and_chunk(block_allocator);
-        self.num_blocks.fetch_add(1, Ordering::Relaxed);
-
-        // Add the block to the list in the SlabDesc
-        unsafe {
-            let mut block_lists_guard = self.block_lists.write();
-            block_lists_guard.nonfull_blocks.push_head(new_block);
-        }
-        self.num_allocated.fetch_add(1, Ordering::Relaxed);
-        new_chunk
-    }
-
-    pub fn dealloc_chunk(&self, chunk_ptr: *mut u8, _block_allocator: &BlockAllocator) {
-        // Find the block it belongs to. You can find the block from the address. (And knowing the
-        // layout, you could calculate the chunk number too.)
-        let block_ptr: *mut SlabBlockHeader = {
-            let block_addr = (chunk_ptr.addr() / BLOCK_SIZE) * BLOCK_SIZE;
-            chunk_ptr.with_addr(block_addr).cast()
-        };
-        let chunk_ptr: *mut FreeChunk = chunk_ptr.cast();
-
-        // Mark the chunk as free in 'freechunks' list
-        let num_chunks;
-        let num_free_chunks;
-        unsafe {
-            let mut free_chunks_head = (*block_ptr).free_chunks_head.lock();
-            (*chunk_ptr).next = *free_chunks_head;
-            *free_chunks_head = chunk_ptr;
-
-            num_free_chunks = (*block_ptr).num_free_chunks.fetch_add(1, Ordering::Relaxed) + 1;
-            num_chunks = (*block_ptr).num_chunks;
-        }
-
-        if num_free_chunks == 1 {
-            // If the block was full previously, add it to the nonfull blocks list. Note that
-            // we're not holding the lock anymore, so it can immediately become full again.
-            // That's harmless, it will be moved back to the full list again when a call
-            // to alloc_chunk() sees it.
-            let mut block_lists = self.block_lists.write();
-            unsafe {
-                block_lists.unlink(block_ptr);
-                block_lists.nonfull_blocks.push_head(block_ptr);
-            };
-        } else if num_free_chunks == num_chunks {
-            // If the block became completely empty, move it to the free list
-            // TODO
-            // FIXME: we're still holding the spinlock. It's not exactly safe to return it to
-            // the free blocks list, is it? Defer it as garbage to wait out concurrent updates?
-            //block_allocator.release_block()
-        }
-
-        // update stats
-        self.num_allocated.fetch_sub(1, Ordering::Relaxed);
-    }
-
-    fn alloc_block_and_chunk(
-        &self,
-        block_allocator: &BlockAllocator,
-    ) -> (*mut SlabBlockHeader, *mut u8) {
-        // fixme: handle OOM
-        let block_slice: &mut [MaybeUninit<u8>] = block_allocator.alloc_block();
-        let (block_header, remain) = alloc_from_slice::<SlabBlockHeader>(block_slice);
-
-        let padding = remain.as_ptr().align_offset(self.layout.align());
-
-        let num_chunks = (remain.len() - padding) / self.layout.size();
-
-        let first_chunk_ptr: *mut FreeChunk = remain[padding..].as_mut_ptr().cast();
-
-        unsafe {
-            let mut chunk_ptr = first_chunk_ptr;
-            for _ in 0..num_chunks - 1 {
-                let next_chunk_ptr = chunk_ptr.byte_add(self.layout.size());
-                (*chunk_ptr).next = next_chunk_ptr;
-                chunk_ptr = next_chunk_ptr;
-            }
-            (*chunk_ptr).next = std::ptr::null_mut();
-
-            let result_chunk = first_chunk_ptr;
-
-            let block_header = block_header.write(SlabBlockHeader {
-                free_chunks_head: spin::Mutex::new((*first_chunk_ptr).next),
-                prev: std::ptr::null_mut(),
-                next: std::ptr::null_mut(),
-                num_chunks: num_chunks as u32,
-                num_free_chunks: AtomicU32::new(num_chunks as u32 - 1),
-            });
-
-            (block_header, result_chunk.cast())
-        }
-    }
-
-    #[cfg(test)]
-    fn dump(&self) {
-        eprintln!(
-            "slab dump ({} blocks, {} allocated chunks)",
-            self.num_blocks.load(Ordering::Relaxed),
-            self.num_allocated.load(Ordering::Relaxed)
-        );
-        let lists = self.block_lists.read();
-
-        eprintln!("nonfull blocks:");
-        lists.nonfull_blocks.dump();
-        eprintln!("full blocks:");
-        lists.full_blocks.dump();
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use rand::Rng;
-    use rand_distr::Zipf;
-
-    struct TestObject {
-        val: usize,
-        _dummy: [u8; BLOCK_SIZE / 4],
-    }
-
-    struct TestObjectSlab<'a>(SlabDesc, BlockAllocator<'a>);
-    impl<'a> TestObjectSlab<'a> {
-        fn new(block_allocator: BlockAllocator) -> TestObjectSlab {
-            TestObjectSlab(SlabDesc::new(&Layout::new::<TestObject>()), block_allocator)
-        }
-
-        fn alloc(&self, val: usize) -> *mut TestObject {
-            let obj: *mut TestObject = self.0.alloc_chunk(&self.1).cast();
-            unsafe { (*obj).val = val };
-            obj
-        }
-
-        fn dealloc(&self, obj: *mut TestObject) {
-            self.0.dealloc_chunk(obj.cast(), &self.1)
-        }
-    }
-
-    #[test]
-    fn test_slab_alloc() {
-        const MEM_SIZE: usize = 100000000;
-        let mut area = Box::new_uninit_slice(MEM_SIZE);
-        let block_allocator = BlockAllocator::new(&mut area);
-
-        let slab = TestObjectSlab::new(block_allocator);
-
-        let mut all: Vec<*mut TestObject> = Vec::new();
-        for i in 0..11 {
-            all.push(slab.alloc(i));
-        }
-        for i in 0..11 {
-            assert!(unsafe { (*all[i]).val == i });
-        }
-
-        let distribution = Zipf::new(10 as f64, 1.1).unwrap();
-        let mut rng = rand::rng();
-        for _ in 0..100000 {
-            slab.0.dump();
-            let idx = (rng.sample(distribution) as usize).into();
-            let ptr: *mut TestObject = all[idx];
-            if !ptr.is_null() {
-                assert_eq!(unsafe { (*ptr).val }, idx);
-                slab.dealloc(ptr);
-                all[idx] = std::ptr::null_mut();
-            } else {
-                all[idx] = slab.alloc(idx);
-            }
-        }
-    }
-
-    fn new_test_blk(i: u32) -> *mut SlabBlockHeader {
-        Box::into_raw(Box::new(SlabBlockHeader {
-            free_chunks_head: spin::Mutex::new(std::ptr::null_mut()),
-            num_free_chunks: AtomicU32::new(0),
-            num_chunks: i,
-            prev: std::ptr::null_mut(),
-            next: std::ptr::null_mut(),
-        }))
-    }
-
-    #[test]
-    fn test_block_linked_list() {
-        // note: these are leaked, but that's OK for tests
-        let a = new_test_blk(0);
-        let b = new_test_blk(1);
-
-        let mut list = BlockList::default();
-        assert!(list.is_empty());
-
-        unsafe {
-            list.push_head(a);
-            assert!(!list.is_empty());
-            list.unlink(a);
-        }
-        assert!(list.is_empty());
-
-        unsafe {
-            list.push_head(b);
-            list.push_head(a);
-            assert_eq!(list.head, a);
-            assert_eq!((*a).next, b);
-            assert_eq!((*b).prev, a);
-            assert_eq!(list.tail, b);
-
-            list.unlink(a);
-            list.unlink(b);
-            assert!(list.is_empty());
-        }
-    }
-}
--- a/libs/neonart/src/allocator/static.rs
+++ b/libs/neonart/src/allocator/static.rs
@@ -1,44 +0,0 @@
-use std::mem::MaybeUninit;
-
-pub fn alloc_from_slice<T>(
-    area: &mut [MaybeUninit<u8>],
-) -> (&mut MaybeUninit<T>, &mut [MaybeUninit<u8>]) {
-    let layout = std::alloc::Layout::new::<T>();
-
-    let area_start = area.as_mut_ptr();
-
-    // pad to satisfy alignment requirements
-    let padding = area_start.align_offset(layout.align());
-    if padding + layout.size() > area.len() {
-        panic!("out of memory");
-    }
-    let area = &mut area[padding..];
-    let (result_area, remain) = area.split_at_mut(layout.size());
-
-    let result_ptr: *mut MaybeUninit<T> = result_area.as_mut_ptr().cast();
-    let result = unsafe { result_ptr.as_mut().unwrap() };
-
-    (result, remain)
-}
-
-pub fn alloc_array_from_slice<T>(
-    area: &mut [MaybeUninit<u8>],
-    len: usize,
-) -> (&mut [MaybeUninit<T>], &mut [MaybeUninit<u8>]) {
-    let layout = std::alloc::Layout::new::<T>();
-
-    let area_start = area.as_mut_ptr();
-
-    // pad to satisfy alignment requirements
-    let padding = area_start.align_offset(layout.align());
-    if padding + layout.size() * len > area.len() {
-        panic!("out of memory");
-    }
-    let area = &mut area[padding..];
-    let (result_area, remain) = area.split_at_mut(layout.size() * len);
-
-    let result_ptr: *mut MaybeUninit<T> = result_area.as_mut_ptr().cast();
-    let result = unsafe { std::slice::from_raw_parts_mut(result_ptr.as_mut().unwrap(), len) };
-
-    (result, remain)
-}
--- a/libs/neonart/src/epoch.rs
+++ b/libs/neonart/src/epoch.rs
@@ -1,147 +0,0 @@
-//! This is similar to crossbeam_epoch crate, but works in shared memory
-
-use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
-
-use crossbeam_utils::CachePadded;
-use spin;
-
-const NUM_SLOTS: usize = 1000;
-
-/// This is the struct that is stored in shmem
-///
-/// bit 0: is it pinned or not?
-/// rest of the bits are the epoch counter.
-pub struct EpochShared {
-    global_epoch: AtomicU64,
-    participants: [CachePadded<AtomicU64>; NUM_SLOTS],
-
-    broadcast_lock: spin::Mutex<()>,
-}
-
-impl EpochShared {
-    pub fn new() -> EpochShared {
-        EpochShared {
-            global_epoch: AtomicU64::new(2),
-            participants: [const { CachePadded::new(AtomicU64::new(2)) }; NUM_SLOTS],
-            broadcast_lock: spin::Mutex::new(()),
-        }
-    }
-
-    pub fn register(&self) -> LocalHandle {
-        LocalHandle {
-            global: self,
-            last_slot: AtomicUsize::new(0), // todo: choose more intelligently
-        }
-    }
-
-    fn release_pin(&self, slot: usize, _epoch: u64) {
-        let global_epoch = self.global_epoch.load(Ordering::Relaxed);
-        self.participants[slot].store(global_epoch, Ordering::Relaxed);
-    }
-
-    fn pin_internal(&self, slot_hint: usize) -> (usize, u64) {
-        // pick a slot
-        let mut slot = slot_hint;
-        let epoch = loop {
-            let old = self.participants[slot].fetch_or(1, Ordering::Relaxed);
-            if old & 1 == 0 {
-                // Got this slot
-                break old;
-            }
-
-            // the slot was busy by another thread / process. try a different slot
-            slot += 1;
-            if slot == NUM_SLOTS {
-                slot = 0;
-            }
-            continue;
-        };
-        (slot, epoch)
-    }
-
-    pub(crate) fn advance(&self) -> u64 {
-        // Advance the global epoch
-        let old_epoch = self.global_epoch.fetch_add(2, Ordering::Relaxed);
-        let new_epoch = old_epoch + 2;
-
-        // Anyone that release their pin after this will update their slot.
-        new_epoch
-    }
-
-    pub(crate) fn broadcast(&self) {
-        let Some(_guard) = self.broadcast_lock.try_lock() else {
-            return;
-        };
-
-        let epoch = self.global_epoch.load(Ordering::Relaxed);
-        let old_epoch = epoch.wrapping_sub(2);
-
-        // Update all free slots.
-        for i in 0..NUM_SLOTS {
-            // TODO: check result, as a sanity check. It should either be the old epoch, or pinned
-            let _ = self.participants[i].compare_exchange(
-                old_epoch,
-                epoch,
-                Ordering::Relaxed,
-                Ordering::Relaxed,
-            );
-        }
-
-        // FIXME: memory fence here, since we used Relaxed?
-    }
-
-    pub(crate) fn get_oldest(&self) -> u64 {
-        // Read all slots.
-        let now = self.global_epoch.load(Ordering::Relaxed);
-        let mut oldest = now;
-        for i in 0..NUM_SLOTS {
-            let this_epoch = self.participants[i].load(Ordering::Relaxed);
-            let delta = now.wrapping_sub(this_epoch);
-            if delta > u64::MAX / 2 {
-                // this is very recent
-            } else {
-                if delta > now.wrapping_sub(oldest) {
-                    oldest = this_epoch;
-                }
-            }
-        }
-        oldest
-    }
-
-    pub(crate) fn get_current(&self) -> u64 {
-        self.global_epoch.load(Ordering::Relaxed)
-    }
-}
-
-pub(crate) struct EpochPin<'e> {
-    slot: usize,
-    pub(crate) epoch: u64,
-
-    handle: &'e LocalHandle<'e>,
-}
-
-impl<'e> Drop for EpochPin<'e> {
-    fn drop(&mut self) {
-        self.handle.global.release_pin(self.slot, self.epoch);
-    }
-}
-
-pub struct LocalHandle<'g> {
-    global: &'g EpochShared,
-
-    last_slot: AtomicUsize,
-}
-
-impl<'g> LocalHandle<'g> {
-    pub fn pin(&self) -> EpochPin {
-        let (slot, epoch) = self
-            .global
-            .pin_internal(self.last_slot.load(Ordering::Relaxed));
-        self.last_slot.store(slot, Ordering::Relaxed);
-        EpochPin {
-            handle: self,
-            epoch,
-            slot,
-        }
-    }
-}
--- a/libs/neonart/src/lib.rs
+++ b/libs/neonart/src/lib.rs
@@ -1,587 +0,0 @@
-//! Adaptive Radix Tree (ART) implementation, with Optimistic Lock Coupling.
-//!
-//! The data structure is described in these two papers:
-//!
-//! [1] Leis, V. & Kemper, Alfons & Neumann, Thomas. (2013).
-//!     The adaptive radix tree: ARTful indexing for main-memory databases.
-//!     Proceedings - International Conference on Data Engineering. 38-49. 10.1109/ICDE.2013.6544812.
-//!     https://db.in.tum.de/~leis/papers/ART.pdf
-//!
-//! [2] Leis, Viktor & Scheibner, Florian & Kemper, Alfons & Neumann, Thomas. (2016).
-//!     The ART of practical synchronization.
-//!     1-8. 10.1145/2933349.2933352.
-//!     https://db.in.tum.de/~leis/papers/artsync.pdf
-//!
-//! [1] describes the base data structure, and [2] describes the Optimistic Lock Coupling that we
-//! use.
-//!
-//! The papers mention a few different variants. We have made the following choices in this
-//! implementation:
-//!
-//! - All keys have the same length
-//!
-//! - Single-value leaves.
-//!
-//! - For collapsing inner nodes, we use the Pessimistic approach, where each inner node stores a
-//!   variable length "prefix", which stores the keys of all the one-way nodes which have been
-//!   removed. However, similar to the "hybrid" approach described in the paper, each node only has
-//!   space for a constant-size prefix of 8 bytes. If a node would have a longer prefix, then we
-//!   create create one-way nodes to store them. (There was no particular reason for this choice,
-//!   the "hybrid" approach described in the paper might be better.)
-//!
-//! - For concurrency, we use Optimistic Lock Coupling. The paper [2] also describes another method,
-//!   ROWEX, which generally performs better when there is contention, but that is not important
-//!   for use and Optimisic Lock Coupling is simpler to implement.
-//!
-//! ## Requirements
-//!
-//! This data structure is currently used for the integrated LFC, relsize and last-written LSN cache
-//! in the compute communicator, part of the 'neon' Postgres extension. We have some unique
-//! requirements, which is why we had to write our own. Namely:
-//!
-//! - The data structure has to live in fixed-sized shared memory segment. That rules out any
-//!   built-in Rust collections and most crates. (Except possibly with the 'allocator_api' rust
-//!   feature, which still nightly-only experimental as of this writing).
-//!
-//! - The data structure is accessed from multiple processes. Only one process updates the data
-//!   structure, but other processes perform reads. That rules out using built-in Rust locking
-//!   primitives like Mutex and RwLock, and most crates too.
-//!
-//! - Within the one process with write-access, multiple threads can perform updates concurrently.
-//!   That rules out using PostgreSQL LWLocks for the locking.
-//!
-//! The implementation is generic, and doesn't depend on any PostgreSQL specifics, but it has been
-//! written with that usage and the above constraints in mind. Some noteworthy assumptions:
-//!
-//! - Contention is assumed to be rare. In the integrated cache in PostgreSQL, there's higher level
-//!   locking in the PostgreSQL buffer manager, which ensures that two backends should not try to
-//!   read / write the same page at the same time. (Prefetching can conflict with actual reads,
-//!   however.)
-//!
-//!  - The keys in the integrated cache are 17 bytes long.
-//!
-//! ## Usage
-//!
-//! Because this is designed to be used as a Postgres shared memory data structure, initialization
-//! happens in three stages:
-//!
-//! 0. A fixed area of shared memory is allocated at postmaster startup.
-//!
-//! 1. TreeInitStruct::new() is called to initialize it, still in Postmaster process, before any
-//!    other process or thread is running. It returns a TreeInitStruct, which is inherited by all
-//!    the processes through fork().
-//!
-//! 2. One process may have write-access to the struct, by calling
-//!    [TreeInitStruct::attach_writer]. (That process is the communicator process.)
-//!
-//! 3. Other processes get read-access to the struct, by calling [TreeInitStruct::attach_reader]
-//!
-//! "Write access" means that you can insert / update / delete values in the tree.
-//!
-//! NOTE: The Values stored in the tree are sometimes moved, when a leaf node fills up and a new
-//! larger node needs to be allocated. The versioning and epoch-based allocator ensure that the data
-//! structure stays consistent, but if the Value has interior mutability, like atomic fields,
-//! updates to such fields might be lost if the leaf node is concurrently moved! If that becomes a
-//! problem, the version check could be passed up to the caller, so that the caller could detect the
-//! lost updates and retry the operation.
-//!
-//! ## Implementation
-//!
-//! node_ptr: Provides low-level implementations of the four different node types (eight actually,
-//! since there is an Internal and Leaf variant of each)
-//!
-//! lock_and_version.rs: Provides an abstraction for the combined lock and version counter on each
-//! node.
-//!
-//! node_ref.rs: The code in node_ptr.rs deals with raw pointers. node_ref.rs provides more type-safe
-//!   abstractions on top.
-//!
-//! algorithm.rs: Contains the functions to implement lookups and updates in the tree
-//!
-//! allocator.rs: Provides a facility to allocate memory for the tree nodes. (We must provide our
-//!   own abstraction for that because we need the data structure to live in a pre-allocated shared
-//!   memory segment).
-//!
-//! epoch.rs: The data structure requires that when a node is removed from the tree, it is not
-//!   immediately deallocated, but stays around for as long as concurrent readers might still have
-//!   pointers to them. This is enforced by an epoch system. This is similar to
-//!   e.g. crossbeam_epoch, but we couldn't use that either because it has to work across processes
-//!   communicating over the shared memory segment.
-//!
-//! ## See also
-//!
-//! There are some existing Rust ART implementations out there, but none of them filled all
-//! the requirements:
-//!
-//! - https://github.com/XiangpengHao/congee
-//! - https://github.com/declanvk/blart
-//!
-//! ## TODO
-//!
-//! - Removing values has not been implemented
-
-mod algorithm;
-pub mod allocator;
-mod epoch;
-
-use algorithm::RootPtr;
-use algorithm::node_ptr::NodePtr;
-
-use std::collections::VecDeque;
-use std::fmt::Debug;
-use std::marker::PhantomData;
-use std::ptr::NonNull;
-use std::sync::atomic::{AtomicBool, Ordering};
-
-use crate::epoch::EpochPin;
-
-#[cfg(test)]
-mod tests;
-
-use allocator::ArtAllocator;
-pub use allocator::ArtMultiSlabAllocator;
-pub use allocator::OutOfMemoryError;
-
-/// Fixed-length key type.
-///
-pub trait Key: Debug {
-    const KEY_LEN: usize;
-
-    fn as_bytes(&self) -> &[u8];
-}
-
-/// Values stored in the tree
-///
-/// Values need to be Cloneable, because when a node "grows", the value is copied to a new node and
-/// the old sticks around until all readers that might see the old value are gone.
-// fixme obsolete, no longer needs Clone
-pub trait Value {}
-
-const MAX_GARBAGE: usize = 1024;
-
-/// The root of the tree, plus other tree-wide data. This is stored in the shared memory.
-pub struct Tree<V: Value> {
-    /// For simplicity, so that we never need to grow or shrink the root, the root node is always an
-    /// Internal256 node. Also, it never has a prefix (that's actually a bit wasteful, incurring one
-    /// indirection to every lookup)
-    root: RootPtr<V>,
-
-    writer_attached: AtomicBool,
-
-    epoch: epoch::EpochShared,
-}
-
-unsafe impl<V: Value + Sync> Sync for Tree<V> {}
-unsafe impl<V: Value + Send> Send for Tree<V> {}
-
-struct GarbageQueue<V>(VecDeque<(NodePtr<V>, u64)>);
-
-unsafe impl<V: Value + Sync> Sync for GarbageQueue<V> {}
-unsafe impl<V: Value + Send> Send for GarbageQueue<V> {}
-
-impl<V> GarbageQueue<V> {
-    fn new() -> GarbageQueue<V> {
-        GarbageQueue(VecDeque::with_capacity(MAX_GARBAGE))
-    }
-
-    fn remember_obsolete_node(&mut self, ptr: NodePtr<V>, epoch: u64) {
-        self.0.push_front((ptr, epoch));
-    }
-
-    fn next_obsolete(&mut self, cutoff_epoch: u64) -> Option<NodePtr<V>> {
-        if let Some(back) = self.0.back() {
-            if back.1 < cutoff_epoch {
-                return Some(self.0.pop_back().unwrap().0);
-            }
-        }
-        None
-    }
-}
-
-/// Struct created at postmaster startup
-pub struct TreeInitStruct<'t, K: Key, V: Value, A: ArtAllocator<V>> {
-    tree: &'t Tree<V>,
-
-    allocator: &'t A,
-
-    phantom_key: PhantomData<K>,
-}
-
-/// The worker process has a reference to this. The write operations are only safe
-/// from the worker process
-pub struct TreeWriteAccess<'t, K: Key, V: Value, A: ArtAllocator<V>>
-where
-    K: Key,
-    V: Value,
-{
-    tree: &'t Tree<V>,
-
-    pub allocator: &'t A,
-
-    epoch_handle: epoch::LocalHandle<'t>,
-
-    phantom_key: PhantomData<K>,
-
-    /// Obsolete nodes that cannot be recycled until their epoch expires.
-    garbage: spin::Mutex<GarbageQueue<V>>,
-}
-
-/// The backends have a reference to this. It cannot be used to modify the tree
-pub struct TreeReadAccess<'t, K: Key, V: Value>
-where
-    K: Key,
-    V: Value,
-{
-    tree: &'t Tree<V>,
-
-    epoch_handle: epoch::LocalHandle<'t>,
-
-    phantom_key: PhantomData<K>,
-}
-
-impl<'a, 't: 'a, K: Key, V: Value, A: ArtAllocator<V>> TreeInitStruct<'t, K, V, A> {
-    pub fn new(allocator: &'t A) -> TreeInitStruct<'t, K, V, A> {
-        let tree_ptr = allocator.alloc_tree();
-        let tree_ptr = NonNull::new(tree_ptr).expect("out of memory");
-        let init = Tree {
-            root: algorithm::new_root(allocator).expect("out of memory"),
-            writer_attached: AtomicBool::new(false),
-            epoch: epoch::EpochShared::new(),
-        };
-        unsafe { tree_ptr.write(init) };
-
-        TreeInitStruct {
-            tree: unsafe { tree_ptr.as_ref() },
-            allocator,
-            phantom_key: PhantomData,
-        }
-    }
-
-    pub fn attach_writer(self) -> TreeWriteAccess<'t, K, V, A> {
-        let previously_attached = self.tree.writer_attached.swap(true, Ordering::Relaxed);
-        if previously_attached {
-            panic!("writer already attached");
-        }
-        TreeWriteAccess {
-            tree: self.tree,
-            allocator: self.allocator,
-            phantom_key: PhantomData,
-            epoch_handle: self.tree.epoch.register(),
-            garbage: spin::Mutex::new(GarbageQueue::new()),
-        }
-    }
-
-    pub fn attach_reader(self) -> TreeReadAccess<'t, K, V> {
-        TreeReadAccess {
-            tree: self.tree,
-            phantom_key: PhantomData,
-            epoch_handle: self.tree.epoch.register(),
-        }
-    }
-}
-
-impl<'t, K: Key, V: Value, A: ArtAllocator<V>> TreeWriteAccess<'t, K, V, A> {
-    pub fn start_write<'g>(&'t self) -> TreeWriteGuard<'g, K, V, A>
-    where
-        't: 'g,
-    {
-        TreeWriteGuard {
-            tree_writer: self,
-            epoch_pin: self.epoch_handle.pin(),
-            phantom_key: PhantomData,
-            created_garbage: false,
-        }
-    }
-
-    pub fn start_read(&'t self) -> TreeReadGuard<'t, K, V> {
-        TreeReadGuard {
-            tree: &self.tree,
-            epoch_pin: self.epoch_handle.pin(),
-            phantom_key: PhantomData,
-        }
-    }
-}
-
-impl<'t, K: Key, V: Value> TreeReadAccess<'t, K, V> {
-    pub fn start_read(&'t self) -> TreeReadGuard<'t, K, V> {
-        TreeReadGuard {
-            tree: &self.tree,
-            epoch_pin: self.epoch_handle.pin(),
-            phantom_key: PhantomData,
-        }
-    }
-}
-
-pub struct TreeReadGuard<'e, K, V>
-where
-    K: Key,
-    V: Value,
-{
-    tree: &'e Tree<V>,
-
-    epoch_pin: EpochPin<'e>,
-    phantom_key: PhantomData<K>,
-}
-
-impl<'e, K: Key, V: Value> TreeReadGuard<'e, K, V> {
-    pub fn get(&'e self, key: &K) -> Option<&'e V> {
-        algorithm::search(key, self.tree.root, &self.epoch_pin)
-    }
-}
-
-pub struct TreeWriteGuard<'e, K, V, A>
-where
-    K: Key,
-    V: Value,
-    A: ArtAllocator<V>,
-{
-    tree_writer: &'e TreeWriteAccess<'e, K, V, A>,
-
-    epoch_pin: EpochPin<'e>,
-    phantom_key: PhantomData<K>,
-
-    created_garbage: bool,
-}
-
-pub enum UpdateAction<V> {
-    Nothing,
-    Insert(V),
-    Remove,
-}
-
-impl<'e, K: Key, V: Value, A: ArtAllocator<V>> TreeWriteGuard<'e, K, V, A> {
-    /// Get a value
-    pub fn get(&'e mut self, key: &K) -> Option<&'e V> {
-        algorithm::search(key, self.tree_writer.tree.root, &self.epoch_pin)
-    }
-
-    /// Insert a value
-    pub fn insert(self, key: &K, value: V) -> Result<bool, OutOfMemoryError> {
-        let mut success = None;
-
-        self.update_with_fn(key, |existing| {
-            if let Some(_) = existing {
-                success = Some(false);
-                UpdateAction::Nothing
-            } else {
-                success = Some(true);
-                UpdateAction::Insert(value)
-            }
-        })?;
-        Ok(success.expect("value_fn not called"))
-    }
-
-    /// Remove value. Returns true if it existed
-    pub fn remove(self, key: &K) -> bool {
-        let mut result = false;
-        // FIXME: It's not clear if OOM is expected while removing. It seems
-        // not nice, but shrinking a node can OOM. Then again, we could opt
-        // to not shrink a node if we cannot allocate, to live a little longer.
-        self.update_with_fn(key, |existing| match existing {
-            Some(_) => {
-                result = true;
-                UpdateAction::Remove
-            }
-            None => UpdateAction::Nothing,
-        })
-        .expect("out of memory while removing");
-        result
-    }
-
-    /// Try to remove value and return the old value.
-    pub fn remove_and_return(self, key: &K) -> Option<V>
-    where
-        V: Clone,
-    {
-        let mut old = None;
-        self.update_with_fn(key, |existing| {
-            old = existing.cloned();
-            UpdateAction::Remove
-        })
-        .expect("out of memory while removing");
-        old
-    }
-
-    /// Update key using the given function. All the other modifying operations are based on this.
-    ///
-    /// The function is passed a reference to the existing value, if any. If the function
-    /// returns None, the value is removed from the tree (or if there was no existing value,
-    /// does nothing). If the function returns Some, the existing value is replaced, of if there
-    /// was no existing value, it is inserted. FIXME: update comment
-    pub fn update_with_fn<F>(mut self, key: &K, value_fn: F) -> Result<(), OutOfMemoryError>
-    where
-        F: FnOnce(Option<&V>) -> UpdateAction<V>,
-    {
-        algorithm::update_fn(key, value_fn, self.tree_writer.tree.root, &mut self)?;
-
-        if self.created_garbage {
-            let _ = self.collect_garbage();
-        }
-        Ok(())
-    }
-
-    fn remember_obsolete_node(&mut self, ptr: NodePtr<V>) {
-        self.tree_writer
-            .garbage
-            .lock()
-            .remember_obsolete_node(ptr, self.epoch_pin.epoch);
-        self.created_garbage = true;
-    }
-
-    // returns number of nodes recycled
-    fn collect_garbage(&self) -> usize {
-        self.tree_writer.tree.epoch.advance();
-        self.tree_writer.tree.epoch.broadcast();
-
-        let cutoff_epoch = self.tree_writer.tree.epoch.get_oldest();
-
-        let mut result = 0;
-        let mut garbage_queue = self.tree_writer.garbage.lock();
-        while let Some(ptr) = garbage_queue.next_obsolete(cutoff_epoch) {
-            ptr.deallocate(self.tree_writer.allocator);
-            result += 1;
-        }
-        result
-    }
-}
-
-pub struct TreeIterator<K>
-where
-    K: Key + for<'a> From<&'a [u8]>,
-{
-    done: bool,
-    pub next_key: Vec<u8>,
-    max_key: Option<Vec<u8>>,
-
-    phantom_key: PhantomData<K>,
-}
-
-impl<K> TreeIterator<K>
-where
-    K: Key + for<'a> From<&'a [u8]>,
-{
-    pub fn new_wrapping() -> TreeIterator<K> {
-        let mut next_key = Vec::new();
-        next_key.resize(K::KEY_LEN, 0);
-        TreeIterator {
-            done: false,
-            next_key,
-            max_key: None,
-            phantom_key: PhantomData,
-        }
-    }
-
-    pub fn new(range: &std::ops::Range<K>) -> TreeIterator<K> {
-        let result = TreeIterator {
-            done: false,
-            next_key: Vec::from(range.start.as_bytes()),
-            max_key: Some(Vec::from(range.end.as_bytes())),
-            phantom_key: PhantomData,
-        };
-        assert_eq!(result.next_key.len(), K::KEY_LEN);
-        assert_eq!(result.max_key.as_ref().unwrap().len(), K::KEY_LEN);
-
-        result
-    }
-
-    pub fn next<'g, V>(&mut self, read_guard: &'g TreeReadGuard<'g, K, V>) -> Option<(K, &'g V)>
-    where
-        V: Value,
-    {
-        if self.done {
-            return None;
-        }
-
-        let mut wrapped_around = false;
-        loop {
-            assert_eq!(self.next_key.len(), K::KEY_LEN);
-            if let Some((k, v)) = algorithm::iter_next(
-                &mut self.next_key,
-                read_guard.tree.root,
-                &read_guard.epoch_pin,
-            ) {
-                assert_eq!(k.len(), K::KEY_LEN);
-                assert_eq!(self.next_key.len(), K::KEY_LEN);
-
-                // Check if we reached the end of the range
-                if let Some(max_key) = &self.max_key {
-                    if k.as_slice() >= max_key.as_slice() {
-                        self.done = true;
-                        break None;
-                    }
-                }
-
-                // increment the key
-                self.next_key = k.clone();
-                increment_key(self.next_key.as_mut_slice());
-                let k = k.as_slice().into();
-
-                break Some((k, v));
-            } else {
-                if self.max_key.is_some() {
-                    self.done = true;
-                } else {
-                    // Start from beginning
-                    if !wrapped_around {
-                        for i in 0..K::KEY_LEN {
-                            self.next_key[i] = 0;
-                        }
-                        wrapped_around = true;
-                        continue;
-                    } else {
-                        // The tree is completely empty
-                        // FIXME: perhaps we should remember the starting point instead.
-                        // Currently this will scan some ranges twice.
-                        break None;
-                    }
-                }
-                break None;
-            }
-        }
-    }
-}
-
-fn increment_key(key: &mut [u8]) -> bool {
-    for i in (0..key.len()).rev() {
-        let (byte, overflow) = key[i].overflowing_add(1);
-        key[i] = byte;
-        if !overflow {
-            return false;
-        }
-    }
-    true
-}
-
-// Debugging functions
-impl<'e, K: Key, V: Value + Debug, A: ArtAllocator<V>> TreeWriteGuard<'e, K, V, A> {
-    pub fn dump(&mut self, dst: &mut dyn std::io::Write) {
-        algorithm::dump_tree(self.tree_writer.tree.root, &self.epoch_pin, dst)
-    }
-}
-impl<'e, K: Key, V: Value + Debug> TreeReadGuard<'e, K, V> {
-    pub fn dump(&mut self, dst: &mut dyn std::io::Write) {
-        algorithm::dump_tree(self.tree.root, &self.epoch_pin, dst)
-    }
-}
-impl<'e, K: Key, V: Value> TreeWriteAccess<'e, K, V, ArtMultiSlabAllocator<'e, V>> {
-    pub fn get_statistics(&self) -> ArtTreeStatistics {
-        self.allocator.get_statistics();
-        ArtTreeStatistics {
-            blocks: self.allocator.inner.block_allocator.get_statistics(),
-            slabs: self.allocator.get_statistics(),
-            epoch: self.tree.epoch.get_current(),
-            oldest_epoch: self.tree.epoch.get_oldest(),
-            num_garbage: self.garbage.lock().0.len() as u64,
-        }
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct ArtTreeStatistics {
-    pub blocks: allocator::block::BlockAllocatorStats,
-    pub slabs: allocator::ArtMultiSlabStats,
-
-    pub epoch: u64,
-    pub oldest_epoch: u64,
-    pub num_garbage: u64,
-}
--- a/libs/neonart/src/tests.rs
+++ b/libs/neonart/src/tests.rs
@@ -1,243 +0,0 @@
-use std::collections::BTreeMap;
-use std::collections::HashSet;
-use std::fmt::{Debug, Formatter};
-use std::sync::atomic::{AtomicUsize, Ordering};
-
-use crate::ArtAllocator;
-use crate::ArtMultiSlabAllocator;
-use crate::TreeInitStruct;
-use crate::TreeIterator;
-use crate::TreeWriteAccess;
-use crate::UpdateAction;
-
-use crate::{Key, Value};
-
-use rand::Rng;
-use rand::seq::SliceRandom;
-use rand_distr::Zipf;
-
-const TEST_KEY_LEN: usize = 16;
-
-#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
-struct TestKey([u8; TEST_KEY_LEN]);
-
-impl TestKey {
-    const MIN: TestKey = TestKey([0; TEST_KEY_LEN]);
-    const MAX: TestKey = TestKey([u8::MAX; TEST_KEY_LEN]);
-}
-
-impl Key for TestKey {
-    const KEY_LEN: usize = TEST_KEY_LEN;
-    fn as_bytes(&self) -> &[u8] {
-        &self.0
-    }
-}
-
-impl From<&TestKey> for u128 {
-    fn from(val: &TestKey) -> u128 {
-        u128::from_be_bytes(val.0)
-    }
-}
-
-impl From<u128> for TestKey {
-    fn from(val: u128) -> TestKey {
-        TestKey(val.to_be_bytes())
-    }
-}
-
-impl<'a> From<&'a [u8]> for TestKey {
-    fn from(bytes: &'a [u8]) -> TestKey {
-        TestKey(bytes.try_into().unwrap())
-    }
-}
-
-impl Value for usize {}
-
-fn test_inserts<K: Into<TestKey> + Copy>(keys: &[K]) {
-    const MEM_SIZE: usize = 10000000;
-    let mut area = Box::new_uninit_slice(MEM_SIZE);
-
-    let allocator = ArtMultiSlabAllocator::new(&mut area);
-
-    let init_struct = TreeInitStruct::<TestKey, usize, _>::new(allocator);
-    let tree_writer = init_struct.attach_writer();
-
-    for (idx, k) in keys.iter().enumerate() {
-        let w = tree_writer.start_write();
-        let res = w.insert(&(*k).into(), idx);
-        assert!(res.is_ok());
-    }
-
-    for (idx, k) in keys.iter().enumerate() {
-        let r = tree_writer.start_read();
-        let value = r.get(&(*k).into());
-        assert_eq!(value, Some(idx).as_ref());
-    }
-
-    eprintln!("stats: {:?}", tree_writer.get_statistics());
-}
-
-#[test]
-fn dense() {
-    // This exercises splitting a node with prefix
-    let keys: &[u128] = &[0, 1, 2, 3, 256];
-    test_inserts(keys);
-
-    // Dense keys
-    let mut keys: Vec<u128> = (0..10000).collect();
-    test_inserts(&keys);
-
-    // Do the same in random orders
-    for _ in 1..10 {
-        keys.shuffle(&mut rand::rng());
-        test_inserts(&keys);
-    }
-}
-
-#[test]
-fn sparse() {
-    // sparse keys
-    let mut keys: Vec<TestKey> = Vec::new();
-    let mut used_keys = HashSet::new();
-    for _ in 0..10000 {
-        loop {
-            let key = rand::random::<u128>();
-            if used_keys.get(&key).is_some() {
-                continue;
-            }
-            used_keys.insert(key);
-            keys.push(key.into());
-            break;
-        }
-    }
-    test_inserts(&keys);
-}
-
-struct TestValue(AtomicUsize);
-
-impl TestValue {
-    fn new(val: usize) -> TestValue {
-        TestValue(AtomicUsize::new(val))
-    }
-
-    fn load(&self) -> usize {
-        self.0.load(Ordering::Relaxed)
-    }
-}
-
-impl Value for TestValue {}
-
-impl Clone for TestValue {
-    fn clone(&self) -> TestValue {
-        TestValue::new(self.load())
-    }
-}
-
-impl Debug for TestValue {
-    fn fmt(&self, fmt: &mut Formatter<'_>) -> Result<(), std::fmt::Error> {
-        write!(fmt, "{:?}", self.load())
-    }
-}
-
-#[derive(Clone, Debug)]
-struct TestOp(TestKey, Option<usize>);
-
-fn apply_op<A: ArtAllocator<TestValue>>(
-    op: &TestOp,
-    tree: &TreeWriteAccess<TestKey, TestValue, A>,
-    shadow: &mut BTreeMap<TestKey, usize>,
-) {
-    eprintln!("applying op: {op:?}");
-
-    // apply the change to the shadow tree first
-    let shadow_existing = if let Some(v) = op.1 {
-        shadow.insert(op.0, v)
-    } else {
-        shadow.remove(&op.0)
-    };
-
-    // apply to Art tree
-    let w = tree.start_write();
-    w.update_with_fn(&op.0, |existing| {
-        assert_eq!(existing.map(TestValue::load), shadow_existing);
-
-        match (existing, op.1) {
-            (None, None) => UpdateAction::Nothing,
-            (None, Some(new_val)) => UpdateAction::Insert(TestValue::new(new_val)),
-            (Some(_old_val), None) => UpdateAction::Remove,
-            (Some(old_val), Some(new_val)) => {
-                old_val.0.store(new_val, Ordering::Relaxed);
-                UpdateAction::Nothing
-            }
-        }
-    })
-    .expect("out of memory");
-}
-
-fn test_iter<A: ArtAllocator<TestValue>>(
-    tree: &TreeWriteAccess<TestKey, TestValue, A>,
-    shadow: &BTreeMap<TestKey, usize>,
-) {
-    let mut shadow_iter = shadow.iter();
-    let mut iter = TreeIterator::new(&(TestKey::MIN..TestKey::MAX));
-
-    loop {
-        let shadow_item = shadow_iter.next().map(|(k, v)| (k.clone(), v.clone()));
-        let r = tree.start_read();
-        let item = iter.next(&r);
-
-        if shadow_item != item.map(|(k, v)| (k, v.load())) {
-            eprintln!(
-                "FAIL: iterator returned {:?}, expected {:?}",
-                item, shadow_item
-            );
-            tree.start_read().dump(&mut std::io::stderr());
-
-            eprintln!("SHADOW:");
-            let mut si = shadow.iter();
-            while let Some(si) = si.next() {
-                eprintln!("key: {:?}, val: {}", si.0, si.1);
-            }
-            panic!(
-                "FAIL: iterator returned {:?}, expected {:?}",
-                item, shadow_item
-            );
-        }
-        if item.is_none() {
-            break;
-        }
-    }
-}
-
-#[test]
-fn random_ops() {
-    const MEM_SIZE: usize = 10000000;
-    let mut area = Box::new_uninit_slice(MEM_SIZE);
-
-    let allocator = ArtMultiSlabAllocator::new(&mut area);
-
-    let init_struct = TreeInitStruct::<TestKey, TestValue, _>::new(allocator);
-    let tree_writer = init_struct.attach_writer();
-
-    let mut shadow: std::collections::BTreeMap<TestKey, usize> = BTreeMap::new();
-
-    let distribution = Zipf::new(u128::MAX as f64, 1.1).unwrap();
-    let mut rng = rand::rng();
-    for i in 0..100000 {
-        let mut key: TestKey = (rng.sample(distribution) as u128).into();
-
-        if rng.random_bool(0.10) {
-            key = TestKey::from(u128::from(&key) | 0xffffffff);
-        }
-
-        let op = TestOp(key, if rng.random_bool(0.75) { Some(i) } else { None });
-
-        apply_op(&op, &tree_writer, &mut shadow);
-
-        if i % 1000 == 0 {
-            eprintln!("{i} ops processed");
-            eprintln!("stats: {:?}", tree_writer.get_statistics());
-            test_iter(&tree_writer, &shadow);
-        }
-    }
-}
--- a/libs/pageserver_api/src/config.rs
+++ b/libs/pageserver_api/src/config.rs
@@ -34,8 +34,6 @@ pub struct NodeMetadata {
    pub postgres_host: String,
    #[serde(rename = "port")]
    pub postgres_port: u16,
-    pub grpc_host: Option<String>,
-    pub grpc_port: Option<u16>,
    pub http_host: String,
    pub http_port: u16,
    pub https_port: Option<u16>,
--- a/libs/pageserver_api/src/config/tests.rs
+++ b/libs/pageserver_api/src/config/tests.rs
@@ -14,8 +14,6 @@ fn test_node_metadata_v1_backward_compatibilty() {
        NodeMetadata {
            postgres_host: "localhost".to_string(),
            postgres_port: 23,
-            grpc_host: None,
-            grpc_port: None,
            http_host: "localhost".to_string(),
            http_port: 42,
            https_port: None,
@@ -39,35 +37,6 @@ fn test_node_metadata_v2_backward_compatibilty() {
        NodeMetadata {
            postgres_host: "localhost".to_string(),
            postgres_port: 23,
-            grpc_host: None,
-            grpc_port: None,
-            http_host: "localhost".to_string(),
-            http_port: 42,
-            https_port: Some(123),
-            other: HashMap::new(),
-        }
-    )
-}
-
-#[test]
-fn test_node_metadata_v3_backward_compatibilty() {
-    let v3 = serde_json::to_vec(&serde_json::json!({
-        "host": "localhost",
-        "port": 23,
-        "grpc_host": "localhost",
-        "grpc_port": 51,
-        "http_host": "localhost",
-        "http_port": 42,
-        "https_port": 123,
-    }));
-
-    assert_eq!(
-        serde_json::from_slice::<NodeMetadata>(&v3.unwrap()).unwrap(),
-        NodeMetadata {
-            postgres_host: "localhost".to_string(),
-            postgres_port: 23,
-            grpc_host: Some("localhost".to_string()),
-            grpc_port: Some(51),
            http_host: "localhost".to_string(),
            http_port: 42,
            https_port: Some(123),
--- a/libs/pageserver_api/src/controller_api.rs
+++ b/libs/pageserver_api/src/controller_api.rs
@@ -53,9 +53,6 @@ pub struct NodeRegisterRequest {
    pub listen_pg_addr: String,
    pub listen_pg_port: u16,

-    pub listen_grpc_addr: Option<String>,
-    pub listen_grpc_port: Option<u16>,
-
    pub listen_http_addr: String,
    pub listen_http_port: u16,
    pub listen_https_port: Option<u16>,
@@ -105,9 +102,6 @@ pub struct TenantLocateResponseShard {
    pub listen_pg_addr: String,
    pub listen_pg_port: u16,

-    pub listen_grpc_addr: Option<String>,
-    pub listen_grpc_port: Option<u16>,
-
    pub listen_http_addr: String,
    pub listen_http_port: u16,
    pub listen_https_port: Option<u16>,
@@ -158,8 +152,6 @@ pub struct NodeDescribeResponse {

    pub listen_pg_addr: String,
    pub listen_pg_port: u16,
-    pub listen_grpc_addr: Option<String>,
-    pub listen_grpc_port: Option<u16>,
 }

 #[derive(Serialize, Deserialize, Debug)]
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -51,7 +51,6 @@ pageserver_api.workspace = true
 pageserver_client.workspace = true # for ResponseErrorMessageExt TOOD refactor that
 pageserver_compaction.workspace = true
 pageserver_page_api.workspace = true
-peekable.workspace = true
 pem.workspace = true
 pin-project-lite.workspace = true
 postgres_backend.workspace = true
@@ -63,7 +62,6 @@ postgres-types.workspace = true
 posthog_client_lite.workspace = true
 pprof.workspace = true
 pq_proto.workspace = true
-prost.workspace = true
 rand.workspace = true
 range-set-blaze = { version = "0.1.16", features = ["alloc"] }
 regex.workspace = true
--- a/pageserver/client_grpc/Cargo.toml
+++ b/pageserver/client_grpc/Cargo.toml
@@ -1,30 +0,0 @@
-[package]
-name = "pageserver_client_grpc"
-version = "0.1.0"
-edition = "2024"
-
-[dependencies]
-bytes.workspace = true
-futures.workspace = true
-http.workspace = true
-thiserror.workspace = true
-tonic.workspace = true
-tracing.workspace = true
-tokio = { version = "1.43.1", features = ["full", "macros", "net", "io-util", "rt", "rt-multi-thread"] }
-uuid = { version = "1", features = ["v4"] }
-tower = {  version = "0.4", features = ["timeout", "util"] }
-rand = "0.8"
-tokio-util = { version = "0.7", features = ["compat"] }
-hyper-util = "0.1.9"
-hyper = "1.6.0"
-metrics.workspace = true
-priority-queue = "2.3.1"
-async-trait = { version = "0.1" }
-tokio-stream = "0.1"
-dashmap = "5"
-chrono = { version = "0.4", features = ["serde"] }
-
-
-pageserver_page_api.workspace = true
-pageserver_api.workspace = true
-utils.workspace = true
--- a/pageserver/client_grpc/examples/load_test.rs
+++ b/pageserver/client_grpc/examples/load_test.rs
@@ -1,296 +0,0 @@
-// examples/load_test.rs, generated by AI
-
-use std::collections::{HashMap, HashSet};
-use std::sync::{
-    Arc,
-    Mutex,
-    atomic::{AtomicU64, AtomicUsize, Ordering},
-};
-use std::time::{Duration, Instant};
-
-use tokio::task;
-use tokio::time::sleep;
-use rand::Rng;
-use tonic::Status;
-use uuid::Uuid;
-
-// Pull in your ConnectionPool and PooledItemFactory from the pageserver_client_grpc crate.
-// Adjust these paths if necessary.
-use pageserver_client_grpc::client_cache::ConnectionPool;
-use pageserver_client_grpc::client_cache::PooledItemFactory;
-
-// --------------------------------------
-// GLOBAL COUNTERS FOR “CREATED” / “DROPPED” MockConnections
-// --------------------------------------
-static CREATED: AtomicU64 = AtomicU64::new(0);
-static DROPPED: AtomicU64 = AtomicU64::new(0);
-
-// --------------------------------------
-// MockConnection + Factory
-// --------------------------------------
-
-#[derive(Debug)]
-pub struct MockConnection {
-    pub id: u64,
-}
-
-impl Clone for MockConnection {
-    fn clone(&self) -> Self {
-        // Cloning a MockConnection does NOT count as “creating” a brand‐new connection,
-        // so we do NOT bump CREATED here. We only bump CREATED in the factory’s `create()`.
-        CREATED.fetch_add(1, Ordering::Relaxed);
-        MockConnection { id: self.id }
-    }
-}
-
-impl Drop for MockConnection {
-    fn drop(&mut self) {
-        // When a MockConnection actually gets dropped, bump the counter.
-        DROPPED.fetch_add(1, Ordering::SeqCst);
-    }
-}
-
-pub struct MockConnectionFactory {
-    counter: AtomicU64,
-}
-
-impl MockConnectionFactory {
-    pub fn new() -> Self {
-        MockConnectionFactory {
-            counter: AtomicU64::new(1),
-        }
-    }
-}
-
-#[async_trait::async_trait]
-impl PooledItemFactory<MockConnection> for MockConnectionFactory {
-    /// The trait on ConnectionPool expects:
-    ///   async fn create(&self, timeout: Duration)
-    ///       -> Result<Result<MockConnection, Status>, tokio::time::error::Elapsed>;
-    ///
-    /// On success: Ok(Ok(MockConnection))
-    /// On a simulated “gRPC” failure: Ok(Err(Status::…))
-    /// On a transport/factory error: Err(Box<…>)
-    async fn create(
-        &self,
-        _timeout: Duration,
-    ) -> Result<Result<MockConnection, Status>, tokio::time::error::Elapsed> {
-        // Simulate connection creation immediately succeeding.
-        CREATED.fetch_add(1, Ordering::SeqCst);
-        let next_id = self.counter.fetch_add(1, Ordering::Relaxed);
-        Ok(Ok(MockConnection { id: next_id }))
-    }
-}
-
-// --------------------------------------
-// CLIENT WORKER
-// --------------------------------------
-//
-// Each worker repeatedly calls `pool.get_client().await`. When it succeeds, we:
-//  1. Lock the shared Mutex<HashMap<u64, Arc<AtomicUsize>>> to fetch/insert an Arc<AtomicUsize> for this conn_id.
-//  2. Lock the shared Mutex<HashSet<u64>> to record this conn_id as “seen.”
-//  3. Drop both locks, then atomically increment that counter and assert it ≤ max_consumers.
-//  4. Sleep 10–100 ms to simulate “work.”
-//  5. Atomically decrement the counter.
-//  6. Call `pooled.finish(Ok(()))` to return to the pool.
-
-async fn client_worker(
-    pool: Arc<ConnectionPool<MockConnection>>,
-    usage_map: Arc<Mutex<HashMap<u64, Arc<AtomicUsize>>>>,
-    seen_set: Arc<Mutex<HashSet<u64>>>,
-    max_consumers: usize,
-    worker_id: usize,
-) {
-    for iteration in 0..10 {
-        match pool.clone().get_client().await {
-            Ok(pooled) => {
-                let conn: MockConnection = pooled.channel();
-                let conn_id = conn.id;
-
-                // 1. Fetch or insert the Arc<AtomicUsize> for this conn_id:
-                let counter_arc: Arc<AtomicUsize> = {
-                    let mut guard = usage_map.lock().unwrap();
-                    guard
-                        .entry(conn_id)
-                        .or_insert_with(|| Arc::new(AtomicUsize::new(0)))
-                        .clone()
-                    // MutexGuard is dropped here
-                };
-
-                // 2. Record this conn_id in the shared HashSet of “seen” IDs:
-                {
-                    let mut seen_guard = seen_set.lock().unwrap();
-                    seen_guard.insert(conn_id);
-                    // MutexGuard is dropped immediately
-                }
-
-                // 3. Atomically bump the count for this connection ID
-                let prev = counter_arc.fetch_add(1, Ordering::SeqCst);
-                let current = prev + 1;
-                assert!(
-                    current <= max_consumers,
-                    "Connection {} exceeded max_consumers (got {})",
-                    conn_id,
-                    current
-                );
-
-                println!(
-                    "[worker {}][iter {}] got MockConnection id={} ({} concurrent)",
-                    worker_id, iteration, conn_id, current
-                );
-
-                // 4. Simulate some work (10–100 ms)
-                let delay_ms = rand::thread_rng().gen_range(10..100);
-                sleep(Duration::from_millis(delay_ms)).await;
-
-                // 5. Decrement the usage counter
-                let prev2 = counter_arc.fetch_sub(1, Ordering::SeqCst);
-                let after = prev2 - 1;
-                println!(
-                    "[worker {}][iter {}] returning MockConnection id={} (now {} remain)",
-                    worker_id, iteration, conn_id, after
-                );
-
-                // 6. Return to the pool (mark success)
-                pooled.finish(Ok(())).await;
-            }
-            Err(status) => {
-                eprintln!(
-                    "[worker {}][iter {}] failed to get client: {:?}",
-                    worker_id, iteration, status
-                );
-            }
-        }
-
-        // Small random pause before next iteration to spread out load
-        let pause = rand::thread_rng().gen_range(0..20);
-        sleep(Duration::from_millis(pause)).await;
-    }
-}
-
-#[tokio::main(flavor = "multi_thread", worker_threads = 8)]
-async fn main() {
-    // --------------------------------------
-    // 1. Create factory and shared instrumentation
-    // --------------------------------------
-    let factory = Arc::new(MockConnectionFactory::new());
-
-    // Shared map: connection ID → Arc<AtomicUsize>
-    let usage_map: Arc<Mutex<HashMap<u64, Arc<AtomicUsize>>>> =
-        Arc::new(Mutex::new(HashMap::new()));
-
-    // Shared set: record each unique connection ID we actually saw
-    let seen_set: Arc<Mutex<HashSet<u64>>> = Arc::new(Mutex::new(HashSet::new()));
-
-    // --------------------------------------
-    // 2. Pool parameters
-    // --------------------------------------
-    let connect_timeout    = Duration::from_millis(500);
-    let connect_backoff    = Duration::from_millis(100);
-    let max_consumers      = 100;                 // test limit
-    let error_threshold    = 2;                 // mock never fails
-    let max_idle_duration  = Duration::from_secs(2);
-    let max_total_connections  = 3;
-    let aggregate_metrics  = None;
-
-    let pool: Arc<ConnectionPool<MockConnection>> = ConnectionPool::new(
-        factory,
-        connect_timeout,
-        connect_backoff,
-        max_consumers,
-        error_threshold,
-        max_idle_duration,
-        max_total_connections,
-        aggregate_metrics,
-    );
-
-    // --------------------------------------
-    // 3. Spawn worker tasks
-    // --------------------------------------
-    let num_workers = 10000;
-    let mut handles = Vec::with_capacity(num_workers);
-    let start_time = Instant::now();
-
-    for worker_id in 0..num_workers {
-        let pool_clone   = Arc::clone(&pool);
-        let usage_clone  = Arc::clone(&usage_map);
-        let seen_clone   = Arc::clone(&seen_set);
-        let mc           = max_consumers;
-
-        let handle = task::spawn(async move {
-            client_worker(pool_clone, usage_clone, seen_clone, mc, worker_id).await;
-        });
-        handles.push(handle);
-    }
-
-    // --------------------------------------
-    // 4. Wait for workers to finish
-    // --------------------------------------
-    for handle in handles {
-        let _ = handle.await;
-    }
-    let elapsed = Instant::now().duration_since(start_time);
-    println!(
-        "All {} workers completed in {:?}",
-        num_workers, elapsed
-    );
-
-    // --------------------------------------
-    // 5. Print the total number of unique connections seen so far
-    // --------------------------------------
-    let unique_count = {
-        let seen_guard = seen_set.lock().unwrap();
-        seen_guard.len()
-    };
-    println!("Total unique connections used by workers: {}", unique_count);
-
-    // --------------------------------------
-    // 6. Sleep so the background sweeper can run (max_idle_duration = 2 s)
-    // --------------------------------------
-    sleep(Duration::from_secs(3)).await;
-
-    // --------------------------------------
-    // 7. Shutdown the pool
-    // --------------------------------------
-    let shutdown_pool = Arc::clone(&pool);
-    shutdown_pool.shutdown().await;
-    println!("Pool.shutdown() returned.");
-
-    // --------------------------------------
-    // 8. Verify that no background task still holds an Arc clone of `pool`.
-    //    If any task is still alive (sweeper/create_connection), strong_count > 1.
-    // --------------------------------------
-    sleep(Duration::from_secs(1)).await; // give tasks time to exit
-    let sc = Arc::strong_count(&pool);
-    assert!(
-        sc == 1,
-        "Pool tasks did not all terminate: Arc::strong_count = {} (expected 1)",
-        sc
-    );
-    println!("Verified: all pool tasks have terminated (strong_count == 1).");
-
-    // --------------------------------------
-    // 9. Verify no MockConnection was leaked:
-    //    CREATED must equal DROPPED.
-    // --------------------------------------
-    let created = CREATED.load(Ordering::SeqCst);
-    let dropped = DROPPED.load(Ordering::SeqCst);
-    assert!(
-        created == dropped,
-        "Leaked connections: created={} but dropped={}",
-        created,
-        dropped
-    );
-    println!(
-        "Verified: no connections leaked (created = {}, dropped = {}).",
-        created, dropped
-    );
-
-    // --------------------------------------
-    // 10. Because `client_worker` asserted inside that no connection
-    //     ever exceeded `max_consumers`, reaching this point means that check passed.
-    // --------------------------------------
-    println!("All per-connection usage stayed within max_consumers = {}.", max_consumers);
-
-    println!("Load test complete; exiting cleanly.");
-}
--- a/pageserver/client_grpc/examples/request_tracker_load_test.rs
+++ b/pageserver/client_grpc/examples/request_tracker_load_test.rs
@@ -1,160 +0,0 @@
-// examples/request_tracker_load_test.rs
-
-use std::{sync::Arc, time::Duration};
-use tokio;
-use pageserver_client_grpc::request_tracker::RequestTracker;
-use pageserver_client_grpc::request_tracker::MockStreamFactory;
-use pageserver_client_grpc::request_tracker::StreamReturner;
-use pageserver_client_grpc::client_cache::ConnectionPool;
-use pageserver_client_grpc::client_cache::PooledItemFactory;
-use pageserver_client_grpc::ClientCacheOptions;
-use pageserver_client_grpc::PageserverClientAggregateMetrics;
-use pageserver_client_grpc::AuthInterceptor;
-
-use pageserver_client_grpc::client_cache::ChannelFactory;
-
-use tonic::{transport::{Channel}, Request};
-
-use rand::prelude::*;
-
-use pageserver_api::key::Key;
-
-use utils::lsn::Lsn;
-use utils::id::TenantTimelineId;
-
-use futures::stream::FuturesOrdered;
-use futures::StreamExt;
-// use chrono
-use chrono::Utc;
-
-use pageserver_page_api::{GetPageClass, GetPageResponse};
-use pageserver_page_api::proto;
-#[derive(Clone)]
-struct KeyRange {
-    timeline: TenantTimelineId,
-    timeline_lsn: Lsn,
-    start: i128,
-    end: i128,
-}
-
-impl KeyRange {
-    fn len(&self) -> i128 {
-        self.end - self.start
-    }
-}
-
-#[tokio::main]
-async fn main() {
-    // 1) configure the client‐pool behavior
-    let client_cache_options = ClientCacheOptions {
-        max_delay_ms:       0,
-        drop_rate:          0.0,
-        hang_rate:          0.0,
-        connect_timeout:    Duration::from_secs(10),
-        connect_backoff:    Duration::from_millis(200),
-        max_consumers:      64,
-        error_threshold:    10,
-        max_idle_duration:  Duration::from_secs(60),
-        max_total_connections: 12,
-    };
-
-    // 2) metrics collector (we assume Default is implemented)
-    let metrics = Arc::new(PageserverClientAggregateMetrics::new());
-    let pool = ConnectionPool::<StreamReturner>::new(
-        Arc::new(MockStreamFactory::new(
-        )),
-        client_cache_options.connect_timeout,
-        client_cache_options.connect_backoff,
-        client_cache_options.max_consumers,
-        client_cache_options.error_threshold,
-        client_cache_options.max_idle_duration,
-        client_cache_options.max_total_connections,
-        Some(Arc::clone(&metrics)),
-    );
-
-    // -----------
-    // There is no mock for the unary connection pool, so for now just
-    // don't use this pool
-    //
-    let channel_fact : Arc<dyn PooledItemFactory<Channel> + Send + Sync> = Arc::new(ChannelFactory::new(
-        "".to_string(),
-        client_cache_options.max_delay_ms,
-        client_cache_options.drop_rate,
-        client_cache_options.hang_rate,
-    ));
-    let unary_pool: Arc<ConnectionPool<Channel>> = ConnectionPool::new(
-        Arc::clone(&channel_fact),
-        client_cache_options.connect_timeout,
-        client_cache_options.connect_backoff,
-        client_cache_options.max_consumers,
-        client_cache_options.error_threshold,
-        client_cache_options.max_idle_duration,
-        client_cache_options.max_total_connections,
-        Some(Arc::clone(&metrics)),
-    );
-
-    // -----------
-    // Dummy auth interceptor. This is not used in this test.
-    let auth_interceptor = AuthInterceptor::new("dummy_tenant_id",
-                                                "dummy_timeline_id",
-                                                None);
-    let mut tracker = RequestTracker::new(
-        pool,
-        unary_pool,
-        auth_interceptor,
-    );
-
-    // 4) fire off 10 000 requests in parallel
-    let mut handles = FuturesOrdered::new();
-    for i in 0..500000 {
-
-            let mut rng = rand::thread_rng();
-            let r = 0..=1000000i128;
-            let key: i128 = rng.gen_range(r.clone());
-            let key = Key::from_i128(key);
-            let (rel_tag, block_no) = key
-                .to_rel_block()
-                .expect("we filter non-rel-block keys out above");
-
-            let req2 = proto::GetPageRequest {
-                request_id: 0,
-                request_class: proto::GetPageClass::Normal as i32,
-                read_lsn: Some(proto::ReadLsn {
-                    request_lsn: if rng.gen_bool(0.5) {
-                        u64::from(Lsn::MAX)
-                    } else {
-                        10000
-                    },
-                    not_modified_since_lsn: 10000,
-                }),
-                rel: Some(rel_tag.into()),
-                block_number: vec![block_no],
-            };
-        let req_model = pageserver_page_api::GetPageRequest::try_from(req2.clone());
-
-        // RequestTracker is Clone, so we can share it
-        let mut tr = tracker.clone();
-        let fut = async move {
-            let resp = tr.send_getpage_request(req_model.unwrap()).await.unwrap();
-            // sanity‐check: the mock echo returns the same request_id
-            assert!(resp.request_id > 0);
-        };
-        handles.push_back(fut);
-
-        // empty future
-        let fut = async move {};
-        fut.await;
-    }
-
-    // print timestamp
-    println!("Starting 5000000 requests at: {}", chrono::Utc::now());
-    // 5) wait for them all
-    for i in 0..500000 {
-        handles.next().await.expect("Failed to get next handle");
-    }
-
-    // print timestamp
-    println!("Finished 5000000 requests at: {}", chrono::Utc::now());
-
-    println!("✅ All 100000 requests completed successfully");
-}
--- a/pageserver/client_grpc/src/client_cache.rs
+++ b/pageserver/client_grpc/src/client_cache.rs
@@ -1,741 +0,0 @@
-use std::{
-    collections::HashMap,
-    io::{self, Error, ErrorKind},
-    sync::Arc,
-    time::{Duration, Instant},
-};
-
-use priority_queue::PriorityQueue;
-
-use tokio::{
-    io::{AsyncRead, AsyncWrite, ReadBuf},
-    net::TcpStream,
-    sync::{Mutex, OwnedSemaphorePermit, Semaphore},
-    time::sleep,
-};
-use tonic::transport::{Channel, Endpoint};
-
-use uuid;
-
-use std::{
-    pin::Pin,
-    task::{Context, Poll},
-};
-
-use futures::future;
-use rand::{Rng, SeedableRng, rngs::StdRng};
-
-use bytes::BytesMut;
-use http::Uri;
-use hyper_util::rt::TokioIo;
-use tower::service_fn;
-
-use tokio_util::sync::CancellationToken;
-use async_trait::async_trait;
-
-//
-// The "TokioTcp" is flakey TCP network for testing purposes, in order
-// to simulate network errors and delays.
-//
-
-/// Wraps a `TcpStream`, buffers incoming data, and injects a random delay per fresh read/write.
-pub struct TokioTcp {
-    tcp: TcpStream,
-    /// Maximum randomized delay in milliseconds
-    delay_ms: u64,
-
-    /// Next deadline instant for delay
-    deadline: Instant,
-    /// Internal buffer of previously-read data
-    buffer: BytesMut,
-}
-
-impl TokioTcp {
-    /// Create a new wrapper with given max delay (ms)
-    pub fn new(stream: TcpStream, delay_ms: u64) -> Self {
-        let initial = if delay_ms > 0 {
-            rand::thread_rng().gen_range(0..delay_ms)
-        } else {
-            0
-        };
-        let deadline = Instant::now() + Duration::from_millis(initial);
-        TokioTcp {
-            tcp: stream,
-            delay_ms,
-            deadline,
-            buffer: BytesMut::new(),
-        }
-    }
-}
-
-impl AsyncRead for TokioTcp {
-    fn poll_read(
-        self: Pin<&mut Self>,
-        cx: &mut Context<'_>,
-        buf: &mut ReadBuf<'_>,
-    ) -> Poll<io::Result<()>> {
-        // Safe because TokioTcp is Unpin
-        let this = self.get_mut();
-
-        // 1) Drain any buffered data
-        if !this.buffer.is_empty() {
-            let to_copy = this.buffer.len().min(buf.remaining());
-            buf.put_slice(&this.buffer.split_to(to_copy));
-            return Poll::Ready(Ok(()));
-        }
-
-        // 2) If we're still before the deadline, schedule a wake and return Pending
-        let now = Instant::now();
-        if this.delay_ms > 0 && now < this.deadline {
-            let waker = cx.waker().clone();
-            let wait = this.deadline - now;
-            tokio::spawn(async move {
-                sleep(wait).await;
-                waker.wake_by_ref();
-            });
-            return Poll::Pending;
-        }
-
-        // 3) Past deadline: compute next random deadline
-        if this.delay_ms > 0 {
-            let next_ms = rand::thread_rng().gen_range(0..=this.delay_ms);
-            this.deadline = Instant::now() + Duration::from_millis(next_ms);
-        }
-
-        // 4) Perform actual read into a temporary buffer
-        let mut tmp = [0u8; 4096];
-        let mut rb = ReadBuf::new(&mut tmp);
-        match Pin::new(&mut this.tcp).poll_read(cx, &mut rb) {
-            Poll::Pending => Poll::Pending,
-            Poll::Ready(Ok(())) => {
-                let filled = rb.filled();
-                if filled.is_empty() {
-                    // EOF or zero bytes
-                    Poll::Ready(Ok(()))
-                } else {
-                    this.buffer.extend_from_slice(filled);
-                    let to_copy = this.buffer.len().min(buf.remaining());
-                    buf.put_slice(&this.buffer.split_to(to_copy));
-                    Poll::Ready(Ok(()))
-                }
-            }
-            Poll::Ready(Err(e)) => Poll::Ready(Err(e)),
-        }
-    }
-}
-
-impl AsyncWrite for TokioTcp {
-    fn poll_write(
-        self: Pin<&mut Self>,
-        cx: &mut Context<'_>,
-        data: &[u8],
-    ) -> Poll<io::Result<usize>> {
-        let this = self.get_mut();
-
-        // 1) If before deadline, schedule wake and return Pending
-        let now = Instant::now();
-        if this.delay_ms > 0 && now < this.deadline {
-            let waker = cx.waker().clone();
-            let wait = this.deadline - now;
-            tokio::spawn(async move {
-                sleep(wait).await;
-                waker.wake_by_ref();
-            });
-            return Poll::Pending;
-        }
-
-        // 2) Past deadline: compute next random deadline
-        if this.delay_ms > 0 {
-            let next_ms = rand::thread_rng().gen_range(0..=this.delay_ms);
-            this.deadline = Instant::now() + Duration::from_millis(next_ms);
-        }
-
-        // 3) Actual write
-        Pin::new(&mut this.tcp).poll_write(cx, data)
-    }
-
-    fn poll_flush(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<io::Result<()>> {
-        let this = self.get_mut();
-        Pin::new(&mut this.tcp).poll_flush(cx)
-    }
-
-    fn poll_shutdown(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<io::Result<()>> {
-        let this = self.get_mut();
-        Pin::new(&mut this.tcp).poll_shutdown(cx)
-    }
-}
-
-#[async_trait]
-pub trait PooledItemFactory<T>: Send + Sync + 'static {
-    /// Create a new pooled item.
-    async fn create(&self, connect_timeout: Duration) ->  Result<Result<T, tonic::Status>, tokio::time::error::Elapsed>;
-}
-
-pub struct ChannelFactory {
-    endpoint: String,
-    max_delay_ms: u64,
-    drop_rate: f64,
-    hang_rate: f64,
-}
-
-
-impl ChannelFactory {
-    pub fn new(
-        endpoint: String,
-        max_delay_ms: u64,
-        drop_rate: f64,
-        hang_rate: f64,
-    ) -> Self {
-        ChannelFactory {
-            endpoint,
-            max_delay_ms,
-            drop_rate,
-            hang_rate,
-        }
-    }
-}
-
-#[async_trait]
-impl PooledItemFactory<Channel> for ChannelFactory {
-    async fn create(&self, connect_timeout: Duration) -> Result<Result<Channel, tonic::Status>, tokio::time::error::Elapsed> {
-        let max_delay_ms = self.max_delay_ms;
-        let drop_rate = self.drop_rate;
-        let hang_rate = self.hang_rate;
-
-        // This is a custom connector that inserts delays and errors, for
-        // testing purposes. It would normally be disabled by the config.
-        let connector = service_fn(move |uri: Uri| {
-            let drop_rate = drop_rate;
-            let hang_rate = hang_rate;
-            async move {
-                let mut rng = StdRng::from_entropy();
-                // Simulate an indefinite hang
-                if hang_rate > 0.0 && rng.gen_bool(hang_rate) {
-                    // never completes, to test timeout
-                    return future::pending::<Result<TokioIo<TokioTcp>, std::io::Error>>().await;
-                }
-
-                // Random drop (connect error)
-                if drop_rate > 0.0 && rng.gen_bool(drop_rate) {
-                    return Err(std::io::Error::new(
-                        std::io::ErrorKind::Other,
-                        "simulated connect drop",
-                    ));
-                }
-
-                // Otherwise perform real TCP connect
-                let addr = match (uri.host(), uri.port()) {
-                    // host + explicit port
-                    (Some(host), Some(port)) => format!("{}:{}", host, port.as_str()),
-                    // host only (no port)
-                    (Some(host), None) => host.to_string(),
-                    // neither? error out
-                    _ => return Err(Error::new(ErrorKind::InvalidInput, "no host or port")),
-                };
-
-                let tcp = TcpStream::connect(addr).await?;
-                let tcpwrapper = TokioTcp::new(tcp, max_delay_ms);
-                Ok(TokioIo::new(tcpwrapper))
-            }
-        });
-
-
-        let attempt = tokio::time::timeout(
-            connect_timeout,
-            Endpoint::from_shared(self.endpoint.clone())
-                .expect("invalid endpoint")
-                .timeout(connect_timeout)
-                .connect_with_connector(connector),
-        )
-            .await;
-        match attempt {
-            Ok(Ok(channel)) => {
-                // Connection succeeded
-                Ok(Ok(channel))
-            }
-            Ok(Err(e)) => {
-                Ok(Err(tonic::Status::new(
-                    tonic::Code::Unavailable,
-                    format!("Failed to connect: {}", e),
-                )))
-            }
-            Err(e) => {
-                Err(e)
-            }
-        }
-    }
-}
-
-
-/// A pooled gRPC client with capacity tracking and error handling.
-pub struct ConnectionPool<T> {
-    inner: Mutex<Inner<T>>,
-
-    fact: Arc<dyn PooledItemFactory<T> + Send + Sync>,
-
-    connect_timeout: Duration,
-    connect_backoff: Duration,
-    /// The maximum number of consumers that can use a single connection.
-    max_consumers: usize,
-    /// The number of consecutive errors before a connection is removed from the pool.
-    error_threshold: usize,
-    /// The maximum duration a connection can be idle before being removed.
-    max_idle_duration: Duration,
-    max_total_connections: usize,
-
-    channel_semaphore: Arc<Semaphore>,
-
-    shutdown_token: CancellationToken,
-    aggregate_metrics: Option<Arc<crate::PageserverClientAggregateMetrics>>,
-}
-
-struct Inner<T> {
-    entries: HashMap<uuid::Uuid, ConnectionEntry<T>>,
-    pq: PriorityQueue<uuid::Uuid, usize>,
-    // This is updated when a connection is dropped, or we fail
-    // to create a new connection.
-    last_connect_failure: Option<Instant>,
-    waiters: usize,
-    in_progress: usize,
-}
-struct ConnectionEntry<T> {
-    channel: T,
-    active_consumers: usize,
-    consecutive_errors: usize,
-    last_used: Instant,
-}
-
-/// A client borrowed from the pool.
-pub struct PooledClient<T> {
-    pub channel: T,
-    pool: Arc<ConnectionPool<T>>,
-    is_ok: bool,
-    id: uuid::Uuid,
-    permit: OwnedSemaphorePermit,
-}
-
-impl<T: Clone + Send + 'static> ConnectionPool<T> {
-    pub fn new(
-        fact: Arc<dyn PooledItemFactory<T> + Send + Sync>,
-        connect_timeout: Duration,
-        connect_backoff: Duration,
-        max_consumers: usize,
-        error_threshold: usize,
-        max_idle_duration: Duration,
-        max_total_connections: usize,
-        aggregate_metrics: Option<Arc<crate::PageserverClientAggregateMetrics>>,
-    ) -> Arc<Self> {
-        let shutdown_token = CancellationToken::new();
-        let pool = Arc::new(Self {
-            inner: Mutex::new(Inner::<T> {
-                entries: HashMap::new(),
-                pq: PriorityQueue::new(),
-                last_connect_failure: None,
-                waiters: 0,
-                in_progress: 0,
-            }),
-            fact: Arc::clone(&fact),
-            connect_timeout,
-            connect_backoff,
-            max_consumers,
-            error_threshold,
-            max_idle_duration,
-            max_total_connections,
-            channel_semaphore: Arc::new(Semaphore::new(0)),
-            shutdown_token: shutdown_token.clone(),
-            aggregate_metrics: aggregate_metrics.clone(),
-        });
-
-        // Cancelable background task to sweep idle connections
-        let sweeper_token = shutdown_token.clone();
-        let sweeper_pool = Arc::clone(&pool);
-        tokio::spawn(async move {
-            loop {
-                tokio::select! {
-                    _ = sweeper_token.cancelled() => break,
-                    _ = async {
-                        sweeper_pool.sweep_idle_connections().await;
-                        sleep(Duration::from_secs(5)).await;
-                    } => {}
-                }
-            }
-        });
-
-        pool
-    }
-
-    pub async fn shutdown(self: Arc<Self>) {
-        self.shutdown_token.cancel();
-
-        loop {
-            let all_idle = {
-                let inner = self.inner.lock().await;
-                inner.entries.values().all(|e| e.active_consumers == 0)
-            };
-            if all_idle {
-                break;
-            }
-            sleep(Duration::from_millis(100)).await;
-        }
-
-        // 4. Remove all entries
-        let mut inner = self.inner.lock().await;
-        inner.entries.clear();
-    }
-
-    /// Sweep and remove idle connections safely, burning their permits.
-    async fn sweep_idle_connections(self: &Arc<Self>) {
-        let mut ids_to_remove = Vec::new();
-        let now = Instant::now();
-
-        // Remove idle entries. First collect permits for those connections so that
-        // no consumer will reserve them, then remove them from the pool.
-        {
-            let mut inner = self.inner.lock().await;
-            inner.entries.retain(|id, entry| {
-                if entry.active_consumers == 0
-                    && now.duration_since(entry.last_used) > self.max_idle_duration
-                {
-                    // metric
-                    match self.aggregate_metrics {
-                        Some(ref metrics) => {
-                            metrics
-                                .retry_counters
-                                .with_label_values(&["connection_swept"])
-                                .inc();
-                        }
-                        None => {}
-                    }
-                    ids_to_remove.push(*id);
-                    return false; // remove this entry
-                }
-                true
-            });
-            // Remove the entries from the priority queue
-            for id in ids_to_remove {
-                inner.pq.remove(&id);
-            }
-        }
-    }
-
-    // If we have a permit already, get a connection out of the heap
-    async fn get_conn_with_permit(
-        self: Arc<Self>,
-        permit: OwnedSemaphorePermit,
-    ) -> Option<PooledClient<T>> {
-        let mut inner = self.inner.lock().await;
-
-        // Pop the highest-active-consumers connection. There are no connections
-        // in the heap that have more than max_consumers active consumers.
-        if let Some((id, _cons)) = inner.pq.pop() {
-            let entry = inner
-                .entries
-                .get_mut(&id)
-                .expect("pq and entries got out of sync");
-
-            let mut active_consumers = entry.active_consumers;
-            entry.active_consumers += 1;
-            entry.last_used = Instant::now();
-
-            let client = PooledClient::<T> {
-                channel: entry.channel.clone(),
-                pool: Arc::clone(&self),
-                is_ok: true,
-                id,
-                permit: permit,
-            };
-
-            // re‐insert with updated priority
-            active_consumers += 1;
-            if active_consumers < self.max_consumers {
-                inner.pq.push(id, active_consumers as usize);
-            }
-            return Some(client);
-        } else {
-            // If there is no connection to take, it is because permits for a connection
-            // need to drain. This can happen if a connection is removed because it has
-            // too many errors. It is taken out of the heap/hash table in this case, but
-            // we can't remove it's permits until now.
-            //
-            // Just forget the permit and retry.
-            permit.forget();
-            return None;
-        }
-    }
-
-    pub async fn get_client(self: Arc<Self>) -> Result<PooledClient<T>, tonic::Status> {
-        // The pool is shutting down. Don't accept new connections.
-        if self.shutdown_token.is_cancelled() {
-            return Err(tonic::Status::unavailable("Pool is shutting down"));
-        }
-
-        // A loop is necessary because when a connection is draining, we have to return
-        // a permit and retry.
-        loop {
-            let self_clone = Arc::clone(&self);
-            let mut semaphore = Arc::clone(&self_clone.channel_semaphore);
-
-            match semaphore.try_acquire_owned() {
-                Ok(permit_) => {
-                    // We got a permit, so check the heap for a connection
-                    // we can use.
-                    let pool_conn = self_clone.get_conn_with_permit(permit_).await;
-                    match pool_conn {
-                        Some(pool_conn_) => {
-                            return Ok(pool_conn_);
-                        }
-                        None => {
-                            // No connection available. Forget the permit and retry.
-                            continue;
-                        }
-                    }
-                }
-                Err(_) => {
-                    match self_clone.aggregate_metrics {
-                        Some(ref metrics) => {
-                            metrics
-                                .retry_counters
-                                .with_label_values(&["sema_acquire_failed"])
-                                .inc();
-                        }
-                        None => {}
-                    }
-
-                    {
-                        //
-                        // This is going to generate enough connections to handle a burst,
-                        // but it may generate up to twice the number of connections needed
-                        // in the worst case. Extra connections will go idle and be cleaned
-                        // up.
-                        //
-                        let mut inner = self_clone.inner.lock().await;
-                        inner.waiters += 1;
-                        if inner.waiters > (inner.in_progress * self_clone.max_consumers) {
-                            if (inner.entries.len() + inner.in_progress) < self_clone.max_total_connections {
-
-                                let self_clone_spawn = Arc::clone(&self_clone);
-                                tokio::task::spawn(async move {
-                                    self_clone_spawn.create_connection().await;
-                                });
-                                inner.in_progress += 1;
-                            }
-
-                        }
-                    }
-                    // Wait for a connection to become available, either because it
-                    // was created or because a connection was returned to the pool
-                    // by another consumer.
-                    semaphore = Arc::clone(&self_clone.channel_semaphore);
-                    let conn_permit = semaphore.acquire_owned().await.unwrap();
-                    {
-                        let mut inner = self_clone.inner.lock().await;
-                        inner.waiters -= 1;
-                    }
-                    // We got a permit, check the heap for a connection.
-                    let pool_conn = self_clone.get_conn_with_permit(conn_permit).await;
-                    match pool_conn {
-                        Some(pool_conn_) => {
-                            return Ok(pool_conn_);
-                        }
-                        None => {
-                            // No connection was found, forget the permit and retry.
-                            continue;
-                        }
-                    }
-                }
-            }
-        }
-    }
-
-    async fn create_connection(&self) -> () {
-
-        // Generate a random backoff to add some jitter so that connections
-        // don't all retry at the same time.
-        let mut backoff_delay = Duration::from_millis(
-            rand::thread_rng().gen_range(0..=self.connect_backoff.as_millis() as u64),
-        );
-
-        loop {
-            if self.shutdown_token.is_cancelled() {
-                return;
-            }
-
-            // Back off.
-            // Loop because failure can occur while we are sleeping, so wait
-            // until the failure stopped for at least one backoff period. Backoff
-            // period includes some jitter, so that if multiple connections are
-            // failing, they don't all retry at the same time.
-            loop {
-                if let Some(delay) = {
-                    let inner = self.inner.lock().await;
-                    inner.last_connect_failure.and_then(|at| {
-                        (at.elapsed() < backoff_delay).then(|| backoff_delay - at.elapsed())
-                    })
-                } {
-                    sleep(delay).await;
-                } else {
-                    break; // No delay, so we can create a connection
-                }
-            }
-
-            //
-            // Create a new connection.
-            //
-            // The connect timeout is also the timeout for an individual gRPC request
-            // on this connection. (Requests made later on this channel will time out
-            // with the same timeout.)
-            //
-            match self.aggregate_metrics {
-                Some(ref metrics) => {
-                    metrics
-                        .retry_counters
-                        .with_label_values(&["connection_attempt"])
-                        .inc();
-                }
-                None => {}
-            }
-
-            let attempt = self.fact
-                .create(self.connect_timeout)
-                .await;
-
-            match attempt {
-                // Connection succeeded
-                Ok(Ok(channel)) => {
-                    {
-                        match self.aggregate_metrics {
-                            Some(ref metrics) => {
-                                metrics
-                                    .retry_counters
-                                    .with_label_values(&["connection_success"])
-                                    .inc();
-                            }
-                            None => {}
-                        }
-                        let mut inner = self.inner.lock().await;
-                        let id = uuid::Uuid::new_v4();
-                        inner.entries.insert(
-                            id,
-                            ConnectionEntry::<T> {
-                                channel: channel.clone(),
-                                active_consumers: 0,
-                                consecutive_errors: 0,
-                                last_used: Instant::now(),
-                            },
-                        );
-                        inner.pq.push(id, 0);
-                        inner.in_progress -= 1;
-                        self.channel_semaphore.add_permits(self.max_consumers);
-                        return;
-                    };
-                }
-                // Connection failed, back off and retry
-                Ok(Err(_)) | Err(_) => {
-                    match self.aggregate_metrics {
-                        Some(ref metrics) => {
-                            metrics
-                                .retry_counters
-                                .with_label_values(&["connect_failed"])
-                                .inc();
-                        }
-                        None => {}
-                    }
-                    let mut inner = self.inner.lock().await;
-                    inner.last_connect_failure = Some(Instant::now());
-                    // Add some jitter so that every connection doesn't retry at once
-                    let jitter = rand::thread_rng().gen_range(0..=backoff_delay.as_millis() as u64);
-                    backoff_delay =
-                        Duration::from_millis(backoff_delay.as_millis() as u64 + jitter);
-
-                    // Do not backoff longer than one minute
-                    if backoff_delay > Duration::from_secs(60) {
-                        backoff_delay = Duration::from_secs(60);
-                    }
-                    // continue the loop to retry
-                }
-            }
-        }
-    }
-
-    /// Return client to the pool, indicating success or error.
-    pub async fn return_client(&self, id: uuid::Uuid, success: bool, permit: OwnedSemaphorePermit) {
-        let mut inner = self.inner.lock().await;
-        if let Some(entry) = inner.entries.get_mut(&id) {
-            entry.last_used = Instant::now();
-            if entry.active_consumers <= 0 {
-                panic!("A consumer completed when active_consumers was zero!")
-            }
-            entry.active_consumers = entry.active_consumers - 1;
-            if success {
-                if entry.consecutive_errors < self.error_threshold {
-                    entry.consecutive_errors = 0;
-                }
-            } else {
-                entry.consecutive_errors += 1;
-                if entry.consecutive_errors == self.error_threshold {
-                    match self.aggregate_metrics {
-                        Some(ref metrics) => {
-                            metrics
-                                .retry_counters
-                                .with_label_values(&["connection_dropped"])
-                                .inc();
-                        }
-                        None => {}
-                    }
-                }
-            }
-
-            //
-            // Too many errors on this connection. If there are no active users,
-            // remove it. Otherwise just wait for active_consumers to go to zero.
-            // This connection will not be selected for new consumers.
-            //
-            let active_consumers = entry.active_consumers;
-            if entry.consecutive_errors >= self.error_threshold {
-                // too many errors, remove the connection permanently. Once it drains,
-                // it will be dropped.
-                if inner.pq.get_priority(&id).is_some() {
-                    inner.pq.remove(&id);
-                }
-
-                // remove from entries
-                // check if entry is in inner
-                if inner.entries.contains_key(&id) {
-                    inner.entries.remove(&id);
-                }
-                inner.last_connect_failure = Some(Instant::now());
-
-                // The connection has been removed, it's permits will be
-                // drained because if we look for a connection and it's not there
-                // we just forget the permit. However, this process can be a little
-                // bit faster if we just forget permits as the connections are returned.
-                permit.forget();
-            } else {
-                // update its priority in the queue
-                if inner.pq.get_priority(&id).is_some() {
-                    inner.pq.change_priority(&id, active_consumers);
-                } else {
-                    // This connection is not in the heap, but it has space
-                    // for more consumers. Put it back in the heap.
-                    if active_consumers < self.max_consumers {
-                        inner.pq.push(id, active_consumers);
-                    }
-                }
-            }
-        }
-    }
-}
-
-impl<T: Clone + Send + 'static> PooledClient<T> {
-    pub fn channel(&self) -> T {
-        return self.channel.clone();
-    }
-    pub async fn finish(mut self, result: Result<(), tonic::Status>) {
-        self.is_ok = result.is_ok();
-        self.pool.return_client(
-            self.id,
-            self.is_ok,
-            self.permit,
-        ).await;
-    }
-}
--- a/pageserver/client_grpc/src/lib.rs
+++ b/pageserver/client_grpc/src/lib.rs
@@ -1,456 +0,0 @@
-//! Pageserver Data API client
-//!
-//! - Manage connections to pageserver
-//! - Send requests to correct shards
-//!
-use std::collections::HashMap;
-use std::sync::Arc;
-use std::sync::RwLock;
-use std::time::Duration;
-
-use bytes::Bytes;
-use futures::{Stream, StreamExt};
-use thiserror::Error;
-use tonic::metadata::AsciiMetadataValue;
-
-use pageserver_page_api::proto;
-use pageserver_page_api::*;
-
-use pageserver_page_api::proto::PageServiceClient;
-use utils::shard::ShardIndex;
-
-use std::fmt::Debug;
-pub mod client_cache;
-pub mod request_tracker;
-use tonic::transport::Channel;
-
-use metrics::{IntCounterVec, core::Collector};
-use crate::client_cache::{PooledItemFactory};
-
-use tokio::sync::mpsc;
-use async_trait::async_trait;
-
-
-#[derive(Error, Debug)]
-pub enum PageserverClientError {
-    #[error("could not connect to service: {0}")]
-    ConnectError(#[from] tonic::transport::Error),
-    #[error("could not perform request: {0}`")]
-    RequestError(#[from] tonic::Status),
-    #[error("protocol error: {0}")]
-    ProtocolError(#[from] ProtocolError),
-
-    #[error("could not perform request: {0}`")]
-    InvalidUri(#[from] http::uri::InvalidUri),
-
-    #[error("could not perform request: {0}`")]
-    Other(String),
-}
-
-#[derive(Clone, Debug)]
-pub struct PageserverClientAggregateMetrics {
-    pub request_counters: IntCounterVec,
-    pub retry_counters: IntCounterVec,
-}
-impl PageserverClientAggregateMetrics {
-    pub fn new() -> Self {
-        let request_counters = IntCounterVec::new(
-            metrics::core::Opts::new(
-                "backend_requests_total",
-                "Number of requests from backends.",
-            ),
-            &["request_kind"],
-        )
-        .unwrap();
-
-        let retry_counters = IntCounterVec::new(
-            metrics::core::Opts::new(
-                "backend_requests_retries_total",
-                "Number of retried requests from backends.",
-            ),
-            &["request_kind"],
-        )
-        .unwrap();
-        Self {
-            request_counters,
-            retry_counters,
-        }
-    }
-
-    pub fn collect(&self) -> Vec<metrics::proto::MetricFamily> {
-        let mut metrics = Vec::new();
-        metrics.append(&mut self.request_counters.collect());
-        metrics.append(&mut self.retry_counters.collect());
-        metrics
-    }
-}
-
-pub struct PageserverClient {
-    _tenant_id: String,
-    _timeline_id: String,
-
-    _auth_token: Option<String>,
-
-    shard_map: HashMap<ShardIndex, String>,
-
-    channels: RwLock<HashMap<ShardIndex, Arc<client_cache::ConnectionPool<Channel>>>>,
-
-    auth_interceptor: AuthInterceptor,
-
-    client_cache_options: ClientCacheOptions,
-
-    aggregate_metrics: Option<Arc<PageserverClientAggregateMetrics>>,
-}
-#[derive(Clone)]
-pub struct ClientCacheOptions {
-    pub max_consumers: usize,
-    pub error_threshold: usize,
-    pub connect_timeout: Duration,
-    pub connect_backoff: Duration,
-    pub max_idle_duration: Duration,
-    pub max_total_connections: usize,
-    pub max_delay_ms: u64,
-    pub drop_rate: f64,
-    pub hang_rate: f64,
-}
-
-impl PageserverClient {
-    /// TODO: this doesn't currently react to changes in the shard map.
-    pub fn new(
-        tenant_id: &str,
-        timeline_id: &str,
-        auth_token: &Option<String>,
-        shard_map: HashMap<ShardIndex, String>,
-    ) -> Self {
-        let options = ClientCacheOptions {
-            max_consumers: 5000,
-            error_threshold: 5,
-            connect_timeout: Duration::from_secs(5),
-            connect_backoff: Duration::from_secs(1),
-            max_idle_duration: Duration::from_secs(60),
-            max_total_connections: 100000,
-            max_delay_ms: 0,
-            drop_rate: 0.0,
-            hang_rate: 0.0,
-        };
-        Self::new_with_config(tenant_id, timeline_id, auth_token, shard_map, options, None)
-    }
-    pub fn new_with_config(
-        tenant_id: &str,
-        timeline_id: &str,
-        auth_token: &Option<String>,
-        shard_map: HashMap<ShardIndex, String>,
-        options: ClientCacheOptions,
-        metrics: Option<Arc<PageserverClientAggregateMetrics>>,
-    ) -> Self {
-        Self {
-            _tenant_id: tenant_id.to_string(),
-            _timeline_id: timeline_id.to_string(),
-            _auth_token: auth_token.clone(),
-            shard_map,
-            channels: RwLock::new(HashMap::new()),
-            auth_interceptor: AuthInterceptor::new(tenant_id, timeline_id, auth_token.as_deref()),
-            client_cache_options: options,
-            aggregate_metrics: metrics,
-        }
-    }
-    pub async fn process_check_rel_exists_request(
-        &self,
-        request: CheckRelExistsRequest,
-    ) -> Result<bool, PageserverClientError> {
-        // Current sharding model assumes that all metadata is present only at shard 0.
-        let shard = ShardIndex::unsharded();
-        let pooled_client = self.get_client(shard).await;
-        let chan = pooled_client.channel();
-
-        let mut client =
-            PageServiceClient::with_interceptor(chan, self.auth_interceptor.for_shard(shard));
-
-        let request = proto::CheckRelExistsRequest::from(request);
-        let response = client.check_rel_exists(tonic::Request::new(request)).await;
-
-        match response {
-            Err(status) => {
-                pooled_client.finish(Err(status.clone())).await; // Pass error to finish
-                return Err(PageserverClientError::RequestError(status));
-            }
-            Ok(resp) => {
-                pooled_client.finish(Ok(())).await; // Pass success to finish
-                return Ok(resp.get_ref().exists);
-            }
-        }
-    }
-
-    pub async fn process_get_rel_size_request(
-        &self,
-        request: GetRelSizeRequest,
-    ) -> Result<u32, PageserverClientError> {
-        // Current sharding model assumes that all metadata is present only at shard 0.
-        let shard = ShardIndex::unsharded();
-        let pooled_client = self.get_client(shard).await;
-        let chan = pooled_client.channel();
-
-        let mut client =
-            PageServiceClient::with_interceptor(chan, self.auth_interceptor.for_shard(shard));
-
-        let request = proto::GetRelSizeRequest::from(request);
-        let response = client.get_rel_size(tonic::Request::new(request)).await;
-
-        match response {
-            Err(status) => {
-                pooled_client.finish(Err(status.clone())).await; // Pass error to finish
-                return Err(PageserverClientError::RequestError(status));
-            }
-            Ok(resp) => {
-                pooled_client.finish(Ok(())).await; // Pass success to finish
-                return Ok(resp.get_ref().num_blocks);
-            }
-        }
-    }
-
-    // Request a single batch of pages
-    //
-    // TODO: This opens a new gRPC stream for every request, which is extremely inefficient
-    pub async fn get_page(
-        &self,
-        request: GetPageRequest,
-    ) -> Result<Vec<Bytes>, PageserverClientError> {
-        // FIXME: calculate the shard number correctly
-        let shard = ShardIndex::unsharded();
-        let pooled_client = self.get_client(shard).await;
-        let chan = pooled_client.channel();
-
-        let mut client =
-            PageServiceClient::with_interceptor(chan, self.auth_interceptor.for_shard(shard));
-
-        let request = proto::GetPageRequest::from(request);
-
-        let request_stream = futures::stream::once(std::future::ready(request));
-
-        let mut response_stream = client
-            .get_pages(tonic::Request::new(request_stream))
-            .await?
-            .into_inner();
-
-        let Some(response) = response_stream.next().await else {
-            return Err(PageserverClientError::Other(
-                "no response received for getpage request".to_string(),
-            ));
-        };
-
-        match self.aggregate_metrics {
-            Some(ref metrics) => {
-                metrics
-                    .request_counters
-                    .with_label_values(&["get_page"])
-                    .inc();
-            }
-            None => {}
-        }
-
-        match response {
-            Err(status) => {
-                pooled_client.finish(Err(status.clone())).await; // Pass error to finish
-                return Err(PageserverClientError::RequestError(status));
-            }
-            Ok(resp) => {
-                pooled_client.finish(Ok(())).await; // Pass success to finish
-                let response: GetPageResponse = resp.into();
-                return Ok(response.page_images.to_vec());
-            }
-        }
-    }
-
-    // Open a stream for requesting pages
-    //
-    // TODO: This is a pretty low level interface, the caller should not need to be concerned
-    // with streams. But 'get_page' is currently very naive and inefficient.
-    pub async fn get_pages(
-        &self,
-        requests: impl Stream<Item = proto::GetPageRequest> + Send + 'static,
-    ) -> std::result::Result<
-        tonic::Response<tonic::codec::Streaming<proto::GetPageResponse>>,
-        PageserverClientError,
-    > {
-        // FIXME: calculate the shard number correctly
-        let shard = ShardIndex::unsharded();
-        let pooled_client = self.get_client(shard).await;
-        let chan = pooled_client.channel();
-
-        let mut client =
-            PageServiceClient::with_interceptor(chan, self.auth_interceptor.for_shard(shard));
-
-        let response = client.get_pages(tonic::Request::new(requests)).await;
-
-        match response {
-            Err(status) => {
-                pooled_client.finish(Err(status.clone())).await; // Pass error to finish
-                return Err(PageserverClientError::RequestError(status));
-            }
-            Ok(resp) => {
-                return Ok(resp);
-            }
-        }
-    }
-
-    /// Process a request to get the size of a database.
-    pub async fn process_get_dbsize_request(
-        &self,
-        request: GetDbSizeRequest,
-    ) -> Result<u64, PageserverClientError> {
-        // Current sharding model assumes that all metadata is present only at shard 0.
-        let shard = ShardIndex::unsharded();
-        let pooled_client = self.get_client(shard).await;
-        let chan = pooled_client.channel();
-
-        let mut client =
-            PageServiceClient::with_interceptor(chan, self.auth_interceptor.for_shard(shard));
-
-        let request = proto::GetDbSizeRequest::from(request);
-        let response = client.get_db_size(tonic::Request::new(request)).await;
-
-        match response {
-            Err(status) => {
-                pooled_client.finish(Err(status.clone())).await; // Pass error to finish
-                return Err(PageserverClientError::RequestError(status));
-            }
-            Ok(resp) => {
-                pooled_client.finish(Ok(())).await; // Pass success to finish
-                return Ok(resp.get_ref().num_bytes);
-            }
-        }
-    }
-    /// Process a request to get the size of a database.
-    pub async fn get_base_backup(
-        &self,
-        request: GetBaseBackupRequest,
-        gzip: bool,
-    ) -> std::result::Result<
-        tonic::Response<tonic::codec::Streaming<proto::GetBaseBackupResponseChunk>>,
-        PageserverClientError,
-    > {
-        // Current sharding model assumes that all metadata is present only at shard 0.
-        let shard = ShardIndex::unsharded();
-        let pooled_client = self.get_client(shard).await;
-        let chan = pooled_client.channel();
-
-        let mut client =
-            PageServiceClient::with_interceptor(chan, self.auth_interceptor.for_shard(shard));
-
-        if gzip {
-            client = client.accept_compressed(tonic::codec::CompressionEncoding::Gzip);
-        }
-
-        let request = proto::GetBaseBackupRequest::from(request);
-        let response = client.get_base_backup(tonic::Request::new(request)).await;
-
-        match response {
-            Err(status) => {
-                pooled_client.finish(Err(status.clone())).await; // Pass error to finish
-                return Err(PageserverClientError::RequestError(status));
-            }
-            Ok(resp) => {
-                pooled_client.finish(Ok(())).await; // Pass success to finish
-                return Ok(resp);
-            }
-        }
-    }
-    /// Get a client for given shard
-    ///
-    /// Get a client from the pool for this shard, also creating the pool if it doesn't exist.
-    ///
-    async fn get_client(&self, shard: ShardIndex) -> client_cache::PooledClient<Channel> {
-        let reused_pool: Option<Arc<client_cache::ConnectionPool<Channel>>> = {
-            let channels = self.channels.read().unwrap();
-            channels.get(&shard).cloned()
-        };
-
-        let usable_pool: Arc<client_cache::ConnectionPool<Channel>>;
-        match reused_pool {
-            Some(pool) => {
-                let pooled_client = pool.get_client().await.unwrap();
-                return pooled_client;
-            }
-            None => {
-                // Create a new pool using client_cache_options
-                // declare new_pool
-
-                let new_pool: Arc<client_cache::ConnectionPool<Channel>>;
-                let channel_fact = Arc::new(client_cache::ChannelFactory::new(
-                    self.shard_map.get(&shard).unwrap().clone(),
-                    self.client_cache_options.max_delay_ms,
-                    self.client_cache_options.drop_rate,
-                    self.client_cache_options.hang_rate,
-                ));
-                new_pool = client_cache::ConnectionPool::new(
-                    channel_fact,
-                    self.client_cache_options.connect_timeout,
-                    self.client_cache_options.connect_backoff,
-                    self.client_cache_options.max_consumers,
-                    self.client_cache_options.error_threshold,
-                    self.client_cache_options.max_idle_duration,
-                    self.client_cache_options.max_total_connections,
-                    self.aggregate_metrics.clone(),
-                );
-                let mut write_pool = self.channels.write().unwrap();
-                write_pool.insert(shard, new_pool.clone());
-                usable_pool = new_pool.clone();
-            }
-        }
-
-        let pooled_client = usable_pool.get_client().await.unwrap();
-        return pooled_client;
-    }
-}
-
-/// Inject tenant_id, timeline_id and authentication token to all pageserver requests.
-#[derive(Clone)]
-pub struct AuthInterceptor {
-    tenant_id: AsciiMetadataValue,
-    shard_id: Option<AsciiMetadataValue>,
-    timeline_id: AsciiMetadataValue,
-
-    auth_header: Option<AsciiMetadataValue>, // including "Bearer " prefix
-}
-
-impl AuthInterceptor {
-    pub fn new(tenant_id: &str, timeline_id: &str, auth_token: Option<&str>) -> Self {
-        Self {
-            tenant_id: tenant_id.parse().expect("could not parse tenant id"),
-            shard_id: None,
-            timeline_id: timeline_id.parse().expect("could not parse timeline id"),
-            auth_header: auth_token
-                .map(|t| format!("Bearer {t}"))
-                .map(|t| t.parse().expect("could not parse auth token")),
-        }
-    }
-
-    fn for_shard(&self, shard_id: ShardIndex) -> Self {
-        let mut with_shard = self.clone();
-        with_shard.shard_id = Some(
-            shard_id
-                .to_string()
-                .parse()
-                .expect("could not parse shard id"),
-        );
-        with_shard
-    }
-}
-
-impl tonic::service::Interceptor for AuthInterceptor {
-    fn call(&mut self, mut req: tonic::Request<()>) -> Result<tonic::Request<()>, tonic::Status> {
-        req.metadata_mut()
-            .insert("neon-tenant-id", self.tenant_id.clone());
-        if let Some(shard_id) = &self.shard_id {
-            req.metadata_mut().insert("neon-shard-id", shard_id.clone());
-        }
-        req.metadata_mut()
-            .insert("neon-timeline-id", self.timeline_id.clone());
-        if let Some(auth_header) = &self.auth_header {
-            req.metadata_mut()
-                .insert("authorization", auth_header.clone());
-        }
-
-        Ok(req)
-    }
-}
--- a/pageserver/client_grpc/src/request_tracker.rs
+++ b/pageserver/client_grpc/src/request_tracker.rs
@@ -1,590 +0,0 @@
-
-//
-// API Visible to the spawner, just a function call that is async
-//
-use std::sync::Arc;
-use crate::client_cache;
-use pageserver_page_api::GetPageRequest;
-use pageserver_page_api::GetPageResponse;
-use pageserver_page_api::*;
-use pageserver_page_api::proto;
-use crate::client_cache::ConnectionPool;
-use crate::client_cache::ChannelFactory;
-use crate::AuthInterceptor;
-use tonic::{transport::{Channel}, Request};
-use crate::ClientCacheOptions;
-use crate::PageserverClientAggregateMetrics;
-use tokio::sync::Mutex;
-use std::sync::atomic::{AtomicU64, Ordering};
-
-use utils::shard::ShardIndex;
-
-use tokio_stream::wrappers::ReceiverStream;
-use pageserver_page_api::proto::PageServiceClient;
-
-use tonic::{
-    Status,
-    Code,
-};
-
-use async_trait::async_trait;
-use std::time::Duration;
-
-use client_cache::PooledItemFactory;
-//use tracing::info;
-//
-// A mock stream pool that just returns a sending channel, and whenever a GetPageRequest
-// comes in on that channel, it randomly sleeps before sending a GetPageResponse
-//
-
-#[derive(Clone)]
-pub struct StreamReturner {
-    sender: tokio::sync::mpsc::Sender<proto::GetPageRequest>,
-    sender_hashmap: Arc<Mutex<std::collections::HashMap<u64, tokio::sync::mpsc::Sender<Result<proto::GetPageResponse, Status>>>>>,
-}
-pub struct MockStreamFactory {
-}
-
-impl MockStreamFactory {
-    pub fn new() -> Self {
-        MockStreamFactory {
-        }
-    }
-}
-#[async_trait]
-impl PooledItemFactory<StreamReturner> for MockStreamFactory {
-    async fn create(&self, _connect_timeout: Duration) -> Result<Result<StreamReturner, tonic::Status>, tokio::time::error::Elapsed> {
-        let (sender, mut receiver) = tokio::sync::mpsc::channel::<proto::GetPageRequest>(1000);
-        // Create a StreamReturner that will send requests to the receiver channel
-        let stream_returner = StreamReturner {
-            sender: sender.clone(),
-            sender_hashmap: Arc::new(Mutex::new(std::collections::HashMap::new())),
-        };
-
-        let map : Arc<Mutex<std::collections::HashMap<u64, tokio::sync::mpsc::Sender<Result<proto::GetPageResponse, _>>>>>
-            = Arc::clone(&stream_returner.sender_hashmap);
-        tokio::spawn(async move {
-            while let Some(request) = receiver.recv().await {
-
-                // Break out of the loop with 1% chance
-                if rand::random::<f32>() < 0.001 {
-                    break;
-                }
-                // Generate a random number between 0 and 100
-                // Simulate some processing time
-                let mapclone = Arc::clone(&map);
-                tokio::spawn(async move {
-                    let sleep_ms = rand::random::<u64>() % 100;
-                    tokio::time::sleep(tokio::time::Duration::from_millis(sleep_ms)).await;
-                    let response = proto::GetPageResponse {
-                        request_id: request.request_id,
-                        ..Default::default()
-                    };
-                    // look up stream in hash map
-                    let mut hashmap = mapclone.lock().await;
-                    if let Some(sender) = hashmap.get(&request.request_id) {
-                        // Send the response to the original request sender
-                        if let Err(e) = sender.send(Ok(response.clone())).await {
-                            eprintln!("Failed to send response: {}", e);
-                        }
-                        hashmap.remove(&request.request_id);
-                    } else {
-                        eprintln!("No sender found for request ID: {}", request.request_id);
-                    }
-                });
-            }
-            // Close every sender stream in the hashmap
-            let hashmap = map.lock().await;
-            for sender in hashmap.values() {
-                let error = Status::new(Code::Unknown, "Stream closed");
-                if let Err(e) = sender.send(Err(error)).await {
-                    eprintln!("Failed to send close response: {}", e);
-                }
-            }
-        });
-
-        Ok(Ok(stream_returner))
-    }
-}
-
-
-pub struct StreamFactory {
-    connection_pool: Arc<client_cache::ConnectionPool<Channel>>,
-    auth_interceptor: AuthInterceptor,
-    shard: ShardIndex,
-}
-
-impl StreamFactory {
-    pub fn new(
-        connection_pool: Arc<ConnectionPool<Channel>>,
-        auth_interceptor: AuthInterceptor,
-        shard: ShardIndex,
-    ) -> Self {
-        StreamFactory {
-            connection_pool,
-            auth_interceptor,
-            shard,
-        }
-    }
-}
-
-#[async_trait]
-impl PooledItemFactory<StreamReturner> for StreamFactory {
-    async fn create(&self, _connect_timeout: Duration) ->
-    Result<Result<StreamReturner, tonic::Status>, tokio::time::error::Elapsed>
-    {
-        let pool_clone : Arc<ConnectionPool<Channel>> = Arc::clone(&self.connection_pool);
-        let pooled_client = pool_clone.get_client().await;
-        let channel = pooled_client.unwrap().channel();
-        let mut client =
-            PageServiceClient::with_interceptor(channel, self.auth_interceptor.for_shard(self.shard));
-
-        let (sender, receiver) = tokio::sync::mpsc::channel::<proto::GetPageRequest>(1000);
-        let outbound = ReceiverStream::new(receiver);
-
-        let client_resp = client
-            .get_pages(Request::new(outbound))
-            .await;
-
-        match client_resp {
-            Err(status) => {
-                // TODO: Convert this error correctly
-                Ok(Err(tonic::Status::new(
-                    status.code(),
-                    format!("Failed to connect to pageserver: {}", status.message()),
-                )))
-            }
-            Ok(resp) => {
-                let stream_returner = StreamReturner {
-                    sender: sender.clone(),
-                    sender_hashmap: Arc::new(Mutex::new(std::collections::HashMap::new())),
-                };
-                let map : Arc<Mutex<std::collections::HashMap<u64, tokio::sync::mpsc::Sender<Result<proto::GetPageResponse, _>>>>>
-                    = Arc::clone(&stream_returner.sender_hashmap);
-
-                tokio::spawn(async move {
-
-                    let map_clone = Arc::clone(&map);
-                    let mut inner = resp.into_inner();
-                    loop {
-
-                        let resp = inner.message().await;
-                        if !resp.is_ok() {
-                            break; // Exit the loop if no more messages
-                        }
-                        let response = resp.unwrap().unwrap();
-
-                        // look up stream in hash map
-                        let mut hashmap = map_clone.lock().await;
-                        if let Some(sender) = hashmap.get(&response.request_id) {
-                            // Send the response to the original request sender
-                            if let Err(e) = sender.send(Ok(response.clone())).await {
-                                eprintln!("Failed to send response: {}", e);
-                            }
-                            hashmap.remove(&response.request_id);
-                        } else {
-                            eprintln!("No sender found for request ID: {}", response.request_id);
-                        }
-                    }
-                    // Close every sender stream in the hashmap
-                    let hashmap = map_clone.lock().await;
-                    for sender in hashmap.values() {
-                        let error = Status::new(Code::Unknown, "Stream closed");
-                        if let Err(e) = sender.send(Err(error)).await {
-                            eprintln!("Failed to send close response: {}", e);
-                        }
-                    }
-                });
-
-                Ok(Ok(stream_returner))
-            }
-        }
-    }
-}
-
-#[derive(Clone)]
-pub struct RequestTracker {
-    cur_id: Arc<AtomicU64>,
-    stream_pool: Arc<ConnectionPool<StreamReturner>>,
-    unary_pool: Arc<ConnectionPool<Channel>>,
-    auth_interceptor: AuthInterceptor,
-    shard: ShardIndex,
-}
-
-impl RequestTracker {
-    pub fn new(stream_pool: Arc<ConnectionPool<StreamReturner>>,
-                unary_pool: Arc<ConnectionPool<Channel>>,
-                auth_interceptor: AuthInterceptor,
-                shard: ShardIndex,
-    ) -> Self {
-        let cur_id = Arc::new(AtomicU64::new(0));
-
-        RequestTracker {
-            cur_id: cur_id.clone(),
-            stream_pool: stream_pool,
-            unary_pool: unary_pool,
-            auth_interceptor: auth_interceptor,
-            shard: shard.clone()
-        }
-    }
-
-    pub async fn send_process_check_rel_exists_request(
-        &self,
-        req: CheckRelExistsRequest,
-    ) -> Result<bool, tonic::Status> {
-        loop {
-            let unary_pool = Arc::clone(&self.unary_pool);
-            let pooled_client = unary_pool.get_client().await.unwrap();
-            let channel = pooled_client.channel();
-            let mut ps_client = PageServiceClient::with_interceptor(channel, self.auth_interceptor.for_shard(self.shard));
-            let request = proto::CheckRelExistsRequest::from(req.clone());
-            let response = ps_client.check_rel_exists(tonic::Request::new(request)).await;
-
-            match response {
-                Err(status) => {
-                    pooled_client.finish(Err(status.clone())).await; // Pass error to finish
-                    continue;
-                }
-                Ok(resp) => {
-                    pooled_client.finish(Ok(())).await; // Pass success to finish
-                    return Ok(resp.get_ref().exists);
-                }
-            }
-        }
-    }
-
-    pub async fn send_process_get_rel_size_request(
-        &self,
-        req: GetRelSizeRequest,
-    ) -> Result<u32, tonic::Status> {
-        loop {
-            // Current sharding model assumes that all metadata is present only at shard 0.
-            let unary_pool = Arc::clone(&self.unary_pool);
-            let pooled_client = unary_pool.get_client().await.unwrap();
-            let channel = pooled_client.channel();
-            let mut ps_client = PageServiceClient::with_interceptor(channel, self.auth_interceptor.for_shard(self.shard));
-
-            let request = proto::GetRelSizeRequest::from(req.clone());
-            let response = ps_client.get_rel_size(tonic::Request::new(request)).await;
-
-            match response {
-                Err(status) => {
-                    pooled_client.finish(Err(status.clone())).await; // Pass error to finish
-                    continue;
-                }
-                Ok(resp) => {
-                    pooled_client.finish(Ok(())).await; // Pass success to finish
-                    return Ok(resp.get_ref().num_blocks);
-                }
-            }
-
-        }
-    }
-
-    pub async fn send_process_get_dbsize_request(
-        &self,
-        req: GetDbSizeRequest,
-    ) -> Result<u64, tonic::Status> {
-        loop {
-            // Current sharding model assumes that all metadata is present only at shard 0.
-            let unary_pool = Arc::clone(&self.unary_pool);
-            let pooled_client = unary_pool.get_client().await.unwrap();let channel = pooled_client.channel();
-            let mut ps_client = PageServiceClient::with_interceptor(channel, self.auth_interceptor.for_shard(self.shard));
-
-            let request = proto::GetDbSizeRequest::from(req.clone());
-            let response = ps_client.get_db_size(tonic::Request::new(request)).await;
-
-            match response {
-                Err(status) => {
-                    pooled_client.finish(Err(status.clone())).await; // Pass error to finish
-                    continue;
-                }
-                Ok(resp) => {
-                    pooled_client.finish(Ok(())).await; // Pass success to finish
-                    return Ok(resp.get_ref().num_bytes);
-                }
-            }
-
-        }
-    }
-
-    pub async fn send_getpage_request(
-        &mut self,
-        req: GetPageRequest,
-    ) -> Result<GetPageResponse, tonic::Status> {
-        loop {
-            let mut request = req.clone();
-            // Increment cur_id
-            //let request_id = self.cur_id.fetch_add(1, Ordering::SeqCst) + 1;
-            let request_id = request.request_id;
-            let response_sender: tokio::sync::mpsc::Sender<Result<proto::GetPageResponse, Status>>;
-            let mut response_receiver: tokio::sync::mpsc::Receiver<Result<proto::GetPageResponse, Status>>;
-
-            (response_sender, response_receiver) = tokio::sync::mpsc::channel(1);
-            //request.request_id = request_id;
-
-            // Get a stream from the stream pool
-            let pool_clone = Arc::clone(&self.stream_pool);
-            let sender_stream_pool = pool_clone.get_client().await;
-            let stream_returner = match sender_stream_pool {
-                Ok(stream_ret) => stream_ret,
-                Err(_e) => {
-                    // retry
-                    continue;
-                }
-            };
-            let returner = stream_returner.channel();
-            let map = returner.sender_hashmap.clone();
-            // Insert the response sender into the hashmap
-            {
-                let mut map_inner = map.lock().await;
-                map_inner.insert(request_id, response_sender);
-            }
-            let sent = returner.sender.send(proto::GetPageRequest::from(request))
-                .await;
-
-            if let Err(_e) = sent {
-                // Remove the request from the map if sending failed
-                {
-                    let mut map_inner = map.lock().await;
-                    // remove from hashmap
-                    map_inner.remove(&request_id);
-                }
-                stream_returner.finish(Err(Status::new(Code::Unknown,
-                                                       "Failed to send request"))).await;
-                continue;
-            }
-
-            let response: Option<Result<proto::GetPageResponse, Status>>;
-            response = response_receiver.recv().await;
-            match response {
-                Some (resp) => {
-                    match resp {
-                        Err(_status) => {
-                            // Handle the case where the response was not received
-                            stream_returner.finish(Err(Status::new(Code::Unknown,
-                                                                   "Failed to receive response"))).await;
-                            continue;
-                        },
-                        Ok(resp) => {
-                            stream_returner.finish(Result::Ok(())).await;
-                            return Ok(resp.clone().into());
-                        }
-                    }
-                }
-                None => {
-                    // Handle the case where the response channel was closed
-                    stream_returner.finish(Err(Status::new(Code::Unknown,
-                                                           "Response channel closed"))).await;
-                    continue;
-                }
-            }
-        }
-    }
-}
-
-struct ShardedRequestTrackerInner {
-    // Hashmap of shard index to RequestTracker
-    trackers: std::collections::HashMap<ShardIndex, RequestTracker>,
-}
-pub struct ShardedRequestTracker {
-    inner: Arc<Mutex<ShardedRequestTrackerInner>>,
-    tcp_client_cache_options: ClientCacheOptions,
-    stream_client_cache_options: ClientCacheOptions,
-}
-
-//
-// TODO: Functions in the ShardedRequestTracker should be able to timeout and
-// cancel a reqeust. The request should return an error if it is cancelled.
-//
-impl ShardedRequestTracker {
-    pub fn new() -> Self {
-        //
-        // Default configuration for the client. These could be added to a config file
-        //
-        let tcp_client_cache_options = ClientCacheOptions {
-            max_delay_ms:       0,
-            drop_rate:          0.0,
-            hang_rate:          0.0,
-            connect_timeout:    Duration::from_secs(1),
-            connect_backoff:    Duration::from_millis(100),
-            max_consumers:      8, // Streams per connection
-            error_threshold:    10,
-            max_idle_duration:  Duration::from_secs(5),
-            max_total_connections: 8,
-        };
-        let stream_client_cache_options = ClientCacheOptions {
-            max_delay_ms:       0,
-            drop_rate:          0.0,
-            hang_rate:          0.0,
-            connect_timeout:    Duration::from_secs(1),
-            connect_backoff:    Duration::from_millis(100),
-            max_consumers:      64, // Requests per stream
-            error_threshold:    10,
-            max_idle_duration:  Duration::from_secs(5),
-            max_total_connections: 64, // Total allowable number of streams
-        };
-        ShardedRequestTracker {
-            inner: Arc::new(Mutex::new(ShardedRequestTrackerInner {
-                trackers: std::collections::HashMap::new(),
-            })),
-            tcp_client_cache_options,
-            stream_client_cache_options,
-        }
-    }
-
-    pub async fn update_shard_map(&self,
-                            shard_urls: std::collections::HashMap<ShardIndex, String>,
-                            metrics: Option<Arc<PageserverClientAggregateMetrics>>,
-                            tenant_id: String, timeline_id: String, auth_str: Option<&str>) {
-
-
-       let mut trackers = std::collections::HashMap::new();
-        for (shard, endpoint_url) in shard_urls {
-            //
-            // Create a pool of streams for streaming get_page requests
-            //
-            let channel_fact : Arc<dyn PooledItemFactory<Channel> + Send + Sync> = Arc::new(ChannelFactory::new(
-                endpoint_url.clone(),
-                self.tcp_client_cache_options.max_delay_ms,
-                self.tcp_client_cache_options.drop_rate,
-                self.tcp_client_cache_options.hang_rate,
-            ));
-            let new_pool: Arc<ConnectionPool<Channel>>;
-            new_pool = ConnectionPool::new(
-                Arc::clone(&channel_fact),
-                self.tcp_client_cache_options.connect_timeout,
-                self.tcp_client_cache_options.connect_backoff,
-                self.tcp_client_cache_options.max_consumers,
-                self.tcp_client_cache_options.error_threshold,
-                self.tcp_client_cache_options.max_idle_duration,
-                self.tcp_client_cache_options.max_total_connections,
-                metrics.clone(),
-            );
-
-            let auth_interceptor = AuthInterceptor::new(tenant_id.as_str(),
-                                                        timeline_id.as_str(),
-                                                        auth_str);
-
-            let stream_pool = ConnectionPool::<StreamReturner>::new(
-                Arc::new(StreamFactory::new(new_pool.clone(),
-                                            auth_interceptor.clone(), ShardIndex::unsharded())),
-                self.stream_client_cache_options.connect_timeout,
-                self.stream_client_cache_options.connect_backoff,
-                self.stream_client_cache_options.max_consumers,
-                self.stream_client_cache_options.error_threshold,
-                self.stream_client_cache_options.max_idle_duration,
-                self.stream_client_cache_options.max_total_connections,
-                metrics.clone(),
-            );
-
-            //
-            // Create a client pool for unary requests
-            //
-
-            let unary_pool: Arc<ConnectionPool<Channel>>;
-            unary_pool = ConnectionPool::new(
-                Arc::clone(&channel_fact),
-                self.tcp_client_cache_options.connect_timeout,
-                self.tcp_client_cache_options.connect_backoff,
-                self.tcp_client_cache_options.max_consumers,
-                self.tcp_client_cache_options.error_threshold,
-                self.tcp_client_cache_options.max_idle_duration,
-                self.tcp_client_cache_options.max_total_connections,
-                metrics.clone()
-            );
-            //
-            // Create a new RequestTracker for this shard
-            //
-            let new_tracker = RequestTracker::new(stream_pool, unary_pool, auth_interceptor, shard);
-            trackers.insert(shard, new_tracker);
-        }
-        let mut inner = self.inner.lock().await;
-        inner.trackers = trackers;
-    }
-
-    pub async fn get_page(
-        &self,
-        req: GetPageRequest,
-    ) -> Result<GetPageResponse, tonic::Status> {
-
-        // Get shard index from the request
-        let shard_index = ShardIndex::unsharded();
-        let inner = self.inner.lock().await;
-        let mut tracker : RequestTracker;
-        if let Some(t) = inner.trackers.get(&shard_index) {
-            tracker = t.clone();
-        } else {
-            return Err(tonic::Status::not_found(format!("Shard {} not found", shard_index)));
-        }
-        drop(inner);
-        // Call the send_getpage_request method on the tracker
-        let response = tracker.send_getpage_request(req).await;
-        match response {
-            Ok(resp) => Ok(resp),
-            Err(e) => Err(tonic::Status::unknown(format!("Failed to get page: {}", e))),
-        }
-    }
-    pub async fn process_get_dbsize_request(
-        &self,
-        request: GetDbSizeRequest,
-    ) -> Result<u64, tonic::Status> {
-        let shard_index = ShardIndex::unsharded();
-        let inner = self.inner.lock().await;
-        let mut tracker: RequestTracker;
-        if let Some(t) = inner.trackers.get(&shard_index) {
-            tracker = t.clone();
-        } else {
-            return Err(tonic::Status::not_found(format!("Shard {} not found", shard_index)));
-        }
-        drop(inner); // Release the lock before calling send_process_get_dbsize_request
-        // Call the send_process_get_dbsize_request method on the tracker
-        let response = tracker.send_process_get_dbsize_request(request).await;
-        match response {
-            Ok(resp) => Ok(resp),
-            Err(e) => Err(e),
-        }
-    }
-
-    pub async fn process_get_rel_size_request(
-        &self,
-        request: GetRelSizeRequest,
-    ) -> Result<u32, tonic::Status> {
-        let shard_index = ShardIndex::unsharded();
-        let inner = self.inner.lock().await;
-        let mut tracker: RequestTracker;
-        if let Some(t) = inner.trackers.get(&shard_index) {
-            tracker = t.clone();
-        } else {
-            return Err(tonic::Status::not_found(format!("Shard {} not found", shard_index)));
-        }
-        drop(inner); // Release the lock before calling send_process_get_rel_size_request
-        // Call the send_process_get_rel_size_request method on the tracker
-        let response = tracker.send_process_get_rel_size_request(request).await;
-        match response {
-            Ok(resp) => Ok(resp),
-            Err(e) => Err(e),
-        }
-    }
-
-    pub async fn process_check_rel_exists_request(
-        &self,
-        request: CheckRelExistsRequest,
-    ) -> Result<bool, tonic::Status> {
-        let shard_index = ShardIndex::unsharded();
-        let inner = self.inner.lock().await;
-        let mut tracker: RequestTracker;
-        if let Some(t) = inner.trackers.get(&shard_index) {
-            tracker = t.clone();
-        } else {
-            return Err(tonic::Status::not_found(format!("Shard {} not found", shard_index)));
-        }
-        drop(inner); // Release the lock before calling send_process_check_rel_exists_request
-        // Call the send_process_check_rel_exists_request method on the tracker
-        let response = tracker.send_process_check_rel_exists_request(request).await;
-        match response {
-            Ok(resp) => Ok(resp),
-            Err(e) => Err(e),
-        }
-    }
-}
--- a/pageserver/page_api/src/model.rs
+++ b/pageserver/page_api/src/model.rs
@@ -195,25 +195,11 @@ impl TryFrom<proto::GetBaseBackupRequest> for GetBaseBackupRequest {
    type Error = ProtocolError;

    fn try_from(pb: proto::GetBaseBackupRequest) -> Result<Self, Self::Error> {
-        // Allow 0 read_lsn for base backups.
-        // TODO: reconsider requiring request_lsn > 0.
-        let zero = proto::ReadLsn {
-            request_lsn: 0,
-            not_modified_since_lsn: 0,
-        };
-        let read_lsn = if pb.read_lsn == Some(zero) || pb.read_lsn.is_none() {
-            ReadLsn {
-                request_lsn: Lsn(0),
-                not_modified_since_lsn: None,
-            }
-        } else {
-            pb.read_lsn
-                .ok_or(ProtocolError::Missing("read_lsn"))?
-                .try_into()?
-        };
-
        Ok(Self {
-            read_lsn,
+            read_lsn: pb
+                .read_lsn
+                .ok_or(ProtocolError::Missing("read_lsn"))?
+                .try_into()?,
            replica: pb.replica,
        })
    }
@@ -501,7 +487,6 @@ impl From<GetPageStatusCode> for i32 {

 // Fetches the size of a relation at a given LSN, as # of blocks. Only valid on shard 0, other
 // shards will error.
-#[derive(Clone)]
 pub struct GetRelSizeRequest {
    pub read_lsn: ReadLsn,
    pub rel: RelTag,
--- a/pageserver/pagebench/Cargo.toml
+++ b/pageserver/pagebench/Cargo.toml
@@ -24,13 +24,9 @@ tracing.workspace = true
 tokio.workspace = true
 tokio-stream.workspace = true
 tokio-util.workspace = true
-axum.workspace = true
-http.workspace = true
-metrics.workspace = true
 tonic.workspace = true

 pageserver_client.workspace = true
-pageserver_client_grpc.workspace = true
 pageserver_api.workspace = true
 pageserver_page_api.workspace = true
 utils = { path = "../../libs/utils/" }
--- a/pageserver/pagebench/src/cmd/basebackup.rs
+++ b/pageserver/pagebench/src/cmd/basebackup.rs
@@ -9,15 +9,12 @@ use anyhow::Context;
 use pageserver_api::shard::TenantShardId;
 use pageserver_client::mgmt_api::ForceAwaitLogicalSize;
 use pageserver_client::page_service::BasebackupRequest;
-use pageserver_page_api::{GetBaseBackupRequest, ReadLsn};
-
 use rand::prelude::*;
 use tokio::sync::Barrier;
 use tokio::task::JoinSet;
 use tracing::{info, instrument};
 use utils::id::TenantTimelineId;
 use utils::lsn::Lsn;
-use utils::shard::ShardIndex;

 use crate::util::tokio_thread_local_stats::AllThreadLocalStats;
 use crate::util::{request_stats, tokio_thread_local_stats};
@@ -25,8 +22,6 @@ use crate::util::{request_stats, tokio_thread_local_stats};
 /// basebackup@LatestLSN
 #[derive(clap::Parser)]
 pub(crate) struct Args {
-    #[clap(long, default_value = "false")]
-    grpc: bool,
    #[clap(long, default_value = "http://localhost:9898")]
    mgmt_api_endpoint: String,
    #[clap(long, default_value = "postgres://postgres@localhost:64000")]
@@ -57,7 +52,7 @@ impl LiveStats {

 struct Target {
    timeline: TenantTimelineId,
-    lsn_range: Range<Lsn>,
+    lsn_range: Option<Range<Lsn>>,
 }

 #[derive(serde::Serialize)]
@@ -110,7 +105,7 @@ async fn main_impl(
                anyhow::Ok(Target {
                    timeline,
                    // TODO: support lsn_range != latest LSN
-                    lsn_range: info.last_record_lsn..(info.last_record_lsn + 1),
+                    lsn_range: Some(info.last_record_lsn..(info.last_record_lsn + 1)),
                })
            }
        });
@@ -154,27 +149,14 @@ async fn main_impl(
    for tl in &timelines {
        let (sender, receiver) = tokio::sync::mpsc::channel(1); // TODO: not sure what the implications of this are
        work_senders.insert(tl, sender);
-
-        let client_task = if args.grpc {
-            tokio::spawn(client_grpc(
-                args,
-                *tl,
-                Arc::clone(&start_work_barrier),
-                receiver,
-                Arc::clone(&all_work_done_barrier),
-                Arc::clone(&live_stats),
-            ))
-        } else {
-            tokio::spawn(client(
-                args,
-                *tl,
-                Arc::clone(&start_work_barrier),
-                receiver,
-                Arc::clone(&all_work_done_barrier),
-                Arc::clone(&live_stats),
-            ))
-        };
-        tasks.push(client_task);
+        tasks.push(tokio::spawn(client(
+            args,
+            *tl,
+            Arc::clone(&start_work_barrier),
+            receiver,
+            Arc::clone(&all_work_done_barrier),
+            Arc::clone(&live_stats),
+        )));
    }

    let work_sender = async move {
@@ -183,7 +165,7 @@ async fn main_impl(
            let (timeline, work) = {
                let mut rng = rand::thread_rng();
                let target = all_targets.choose(&mut rng).unwrap();
-                let lsn = rng.gen_range(target.lsn_range.clone());
+                let lsn = target.lsn_range.clone().map(|r| rng.gen_range(r));
                (
                    target.timeline,
                    Work {
@@ -233,7 +215,7 @@ async fn main_impl(

 #[derive(Copy, Clone)]
 struct Work {
-    lsn: Lsn,
+    lsn: Option<Lsn>,
    gzip: bool,
 }

@@ -258,7 +240,7 @@ async fn client(
            .basebackup(&BasebackupRequest {
                tenant_id: timeline.tenant_id,
                timeline_id: timeline.timeline_id,
-                lsn: Some(lsn),
+                lsn,
                gzip,
            })
            .await
@@ -288,74 +270,3 @@ async fn client(

    all_work_done_barrier.wait().await;
 }
-
-#[instrument(skip_all)]
-async fn client_grpc(
-    args: &'static Args,
-    timeline: TenantTimelineId,
-    start_work_barrier: Arc<Barrier>,
-    mut work: tokio::sync::mpsc::Receiver<Work>,
-    all_work_done_barrier: Arc<Barrier>,
-    live_stats: Arc<LiveStats>,
-) {
-    let shard_map = HashMap::from([(
-        ShardIndex::unsharded(),
-        args.page_service_connstring.clone(),
-    )]);
-    let client = pageserver_client_grpc::PageserverClient::new(
-        &timeline.tenant_id.to_string(),
-        &timeline.timeline_id.to_string(),
-        &None,
-        shard_map,
-    );
-
-    start_work_barrier.wait().await;
-
-    while let Some(Work { lsn, gzip }) = work.recv().await {
-        let start = Instant::now();
-
-        //tokio::time::sleep(std::time::Duration::from_secs(1)).await;
-
-        info!("starting get_base_backup");
-        let mut basebackup_stream = client
-            .get_base_backup(
-                GetBaseBackupRequest {
-                    read_lsn: ReadLsn {
-                        request_lsn: lsn,
-                        not_modified_since_lsn: Some(lsn),
-                    },
-                    replica: false,
-                },
-                gzip,
-            )
-            .await
-            .with_context(|| format!("start basebackup for {timeline}"))
-            .unwrap()
-            .into_inner();
-
-        info!("starting receive");
-        use futures::StreamExt;
-        let mut size = 0;
-        let mut nchunks = 0;
-        while let Some(chunk) = basebackup_stream.next().await {
-            let chunk = chunk
-                .with_context(|| format!("error during basebackup"))
-                .unwrap();
-            size += chunk.chunk.len();
-            nchunks += 1;
-        }
-
-        info!(
-            "basebackup size is {} bytes, avg chunk size {} bytes",
-            size,
-            size as f32 / nchunks as f32
-        );
-        let elapsed = start.elapsed();
-        live_stats.inc();
-        STATS.with(|stats| {
-            stats.borrow().lock().unwrap().observe(elapsed).unwrap();
-        });
-    }
-
-    all_work_done_barrier.wait().await;
-}
--- a/pageserver/pagebench/src/cmd/getpage_latest_lsn.rs
+++ b/pageserver/pagebench/src/cmd/getpage_latest_lsn.rs
@@ -1,11 +1,10 @@
-use std::collections::{HashSet, HashMap, VecDeque};
+use std::collections::{HashMap, HashSet, VecDeque};
 use std::future::Future;
 use std::num::NonZeroUsize;
 use std::pin::Pin;
 use std::sync::atomic::{AtomicU64, Ordering};
 use std::sync::{Arc, Mutex};
 use std::time::{Duration, Instant};
-use std::io::Error;

 use anyhow::Context;
 use async_trait::async_trait;
@@ -24,20 +23,6 @@ use tracing::info;
 use utils::id::TenantTimelineId;
 use utils::lsn::Lsn;

-use tonic::transport::Channel;
-
-use axum::Router;
-use axum::body::Body;
-use axum::extract::State;
-use axum::response::Response;
-
-use http::StatusCode;
-use http::header::CONTENT_TYPE;
-
-use metrics;
-use metrics::proto::MetricFamily;
-use metrics::{Encoder, TextEncoder};
-
 use crate::util::tokio_thread_local_stats::AllThreadLocalStats;
 use crate::util::{request_stats, tokio_thread_local_stats};

@@ -50,10 +35,6 @@ enum Protocol {
 /// GetPage@LatestLSN, uniformly distributed across the compute-accessible keyspace.
 #[derive(clap::Parser)]
 pub(crate) struct Args {
-    #[clap(long, default_value = "false")]
-    grpc: bool,
-    #[clap(long, default_value = "false")]
-    grpc_stream: bool,
    #[clap(long, default_value = "http://localhost:9898")]
    mgmt_api_endpoint: String,
    #[clap(long, default_value = "postgres://postgres@localhost:64000")]
@@ -92,9 +73,6 @@ pub(crate) struct Args {
    #[clap(long)]
    set_io_mode: Option<pageserver_api::models::virtual_file::IoMode>,

-    #[clap(long)]
-    only_relnode: Option<u32>,
-
    /// Queue depth generated in each client.
    #[clap(long, default_value = "1")]
    queue_depth: NonZeroUsize,
@@ -109,31 +87,10 @@ pub(crate) struct Args {
    #[clap(long, default_value = "1")]
    batch_size: NonZeroUsize,

+    #[clap(long)]
+    only_relnode: Option<u32>,
+
    targets: Option<Vec<TenantTimelineId>>,
-
-    #[clap(long, default_value = "100")]
-    pool_max_consumers: NonZeroUsize,
-
-    #[clap(long, default_value = "5")]
-    pool_error_threshold: NonZeroUsize,
-
-    #[clap(long, default_value = "5000")]
-    pool_connect_timeout: NonZeroUsize,
-
-    #[clap(long, default_value = "1000")]
-    pool_connect_backoff: NonZeroUsize,
-
-    #[clap(long, default_value = "60000")]
-    pool_max_idle_duration: NonZeroUsize,
-
-    #[clap(long, default_value = "0")]
-    max_delay_ms: usize,
-
-    #[clap(long, default_value = "0")]
-    percent_drops: usize,
-
-    #[clap(long, default_value = "0")]
-    percent_hangs: usize,
 }

 /// State shared by all clients
@@ -190,37 +147,6 @@ pub(crate) fn main(args: Args) -> anyhow::Result<()> {
        main_impl(args, thread_local_stats)
    })
 }
-async fn get_metrics(
-    State(state): State<Arc<pageserver_client_grpc::PageserverClientAggregateMetrics>>,
-) -> Response {
-    let metrics = state.collect();
-
-    info!("metrics: {metrics:?}");
-    // When we call TextEncoder::encode() below, it will immediately return an
-    // error if a metric family has no metrics, so we need to preemptively
-    // filter out metric families with no metrics.
-    let metrics = metrics
-        .into_iter()
-        .filter(|m| !m.get_metric().is_empty())
-        .collect::<Vec<MetricFamily>>();
-
-    let encoder = TextEncoder::new();
-    let mut buffer = vec![];
-
-    if let Err(e) = encoder.encode(&metrics, &mut buffer) {
-        Response::builder()
-            .status(StatusCode::INTERNAL_SERVER_ERROR)
-            .header(CONTENT_TYPE, "application/text")
-            .body(Body::from(e.to_string()))
-            .unwrap()
-    } else {
-        Response::builder()
-            .status(StatusCode::OK)
-            .header(CONTENT_TYPE, encoder.format_type())
-            .body(Body::from(buffer))
-            .unwrap()
-    }
-}

 async fn main_impl(
    args: Args,
@@ -228,24 +154,6 @@ async fn main_impl(
 ) -> anyhow::Result<()> {
    let args: &'static Args = Box::leak(Box::new(args));

-    // Vector of pageserver clients
-    let client_metrics = Arc::new(pageserver_client_grpc::PageserverClientAggregateMetrics::new());
-
-    use axum::routing::get;
-    let app = Router::new()
-        .route("/metrics", get(get_metrics))
-        .with_state(client_metrics.clone());
-
-    // TODO: make configurable. Or listen on unix domain socket?
-    let listener = tokio::net::TcpListener::bind("127.0.0.1:9090")
-        .await
-        .unwrap();
-
-    tokio::spawn(async {
-        tracing::info!("metrics listener spawned");
-        axum::serve(listener, app).await.unwrap()
-    });
-
    let mgmt_api_client = Arc::new(pageserver_client::mgmt_api::Client::new(
        reqwest::Client::new(), // TODO: support ssl_ca_file for https APIs in pagebench.
        args.mgmt_api_endpoint.clone(),
@@ -404,7 +312,6 @@ async fn main_impl(
    let rps_period = args
        .per_client_rate
        .map(|rps_limit| Duration::from_secs_f64(1.0 / (rps_limit as f64)));
-
    let make_worker: &dyn Fn(WorkerId) -> Pin<Box<dyn Send + Future<Output = ()>>> = &|worker_id| {
        let ss = shared_state.clone();
        let cancel = cancel.clone();
@@ -430,7 +337,6 @@ async fn main_impl(
                        .await
                        .unwrap(),
                ),
-
            };
            run_worker(args, client, ss, cancel, rps_period, ranges, weights).await
        })
@@ -698,7 +604,6 @@ impl Client for LibpqClient {
 struct GrpcClient {
    req_tx: tokio::sync::mpsc::Sender<proto::GetPageRequest>,
    resp_rx: tonic::Streaming<proto::GetPageResponse>,
-    start_times: Vec<Instant>,
 }

 impl GrpcClient {
@@ -722,7 +627,6 @@ impl GrpcClient {
        Ok(Self {
            req_tx,
            resp_rx: resp_stream,
-            start_times: Vec::new(),
        })
    }
 }
@@ -747,7 +651,6 @@ impl Client for GrpcClient {
            rel: Some(rel.into()),
            block_number: blks,
        };
-        self.start_times.push(Instant::now());
        self.req_tx.send(req).await?;
        Ok(())
    }
@@ -762,4 +665,3 @@ impl Client for GrpcClient {
        Ok((resp.request_id, resp.page_image))
    }
 }
-
--- a/pageserver/src/controller_upcall_client.rs
+++ b/pageserver/src/controller_upcall_client.rs
@@ -195,8 +195,6 @@ impl StorageControllerUpcallApi for StorageControllerUpcallClient {
                        node_id: conf.id,
                        listen_pg_addr: m.postgres_host,
                        listen_pg_port: m.postgres_port,
-                        listen_grpc_addr: m.grpc_host,
-                        listen_grpc_port: m.grpc_port,
                        listen_http_addr: m.http_host,
                        listen_http_port: m.http_port,
                        listen_https_port: m.https_port,
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -14,7 +14,7 @@ use std::{io, str};

 use anyhow::{Context as _, anyhow, bail};
 use async_compression::tokio::write::GzipEncoder;
-use bytes::{Buf, BufMut as _, BytesMut};
+use bytes::{Buf, BytesMut};
 use futures::future::BoxFuture;
 use futures::{FutureExt, Stream};
 use itertools::Itertools;
@@ -3610,24 +3610,20 @@ impl proto::PageService for GrpcPageServiceHandler {

        span_record!(lsn=%req.read_lsn);

-        let mut lsn = None;
-        if req.read_lsn.request_lsn > Lsn(0) {
-            lsn = Some(req.read_lsn.request_lsn);
-            let latest_gc_cutoff_lsn = timeline.get_applied_gc_cutoff_lsn();
-            timeline
-                .wait_lsn(
-                    req.read_lsn.request_lsn,
-                    WaitLsnWaiter::PageService,
-                    WaitLsnTimeout::Default,
-                    &ctx,
-                )
-                .await?;
-            timeline
-                .check_lsn_is_in_scope(req.read_lsn.request_lsn, &latest_gc_cutoff_lsn)
-                .map_err(|err| {
-                    tonic::Status::invalid_argument(format!("invalid basebackup LSN: {err}"))
-                })?;
-        }
+        let latest_gc_cutoff_lsn = timeline.get_applied_gc_cutoff_lsn();
+        timeline
+            .wait_lsn(
+                req.read_lsn.request_lsn,
+                WaitLsnWaiter::PageService,
+                WaitLsnTimeout::Default,
+                &ctx,
+            )
+            .await?;
+        timeline
+            .check_lsn_is_in_scope(req.read_lsn.request_lsn, &latest_gc_cutoff_lsn)
+            .map_err(|err| {
+                tonic::Status::invalid_argument(format!("invalid basebackup LSN: {err}"))
+            })?;

        // Spawn a task to run the basebackup.
        //
@@ -3638,7 +3634,7 @@ impl proto::PageService for GrpcPageServiceHandler {
            let result = basebackup::send_basebackup_tarball(
                &mut simplex_write,
                &timeline,
-                lsn,
+                Some(req.read_lsn.request_lsn),
                None,
                false,
                req.replica,
@@ -3654,21 +3650,20 @@ impl proto::PageService for GrpcPageServiceHandler {

        // Emit chunks of size CHUNK_SIZE.
        let chunks = async_stream::try_stream! {
+            let mut chunk = BytesMut::with_capacity(CHUNK_SIZE);
            loop {
-                let mut chunk = BytesMut::with_capacity(CHUNK_SIZE).limit(CHUNK_SIZE);
-                let mut n = 1;
-                while n != 0 {
-                    n = simplex_read.read_buf(&mut chunk).await.map_err(|err| {
-                        tonic::Status::internal(format!("failed to read basebackup chunk: {err}"))
-                    })?;
-                }
-                let chunk = chunk.into_inner();
+                let n = simplex_read.read_buf(&mut chunk).await.map_err(|err| {
+                    tonic::Status::internal(format!("failed to read basebackup chunk: {err}"))
+                })?;

                // If we read 0 bytes, either the chunk is full or the stream is closed.
-                if chunk.is_empty() {
-                    break;
+                if n == 0 {
+                    if chunk.is_empty() {
+                        break;
+                    }
+                    yield proto::GetBaseBackupResponseChunk::from(chunk.clone().freeze());
+                    chunk.clear();
                }
-                yield proto::GetBaseBackupResponseChunk::from(chunk.freeze());
            }
            // Wait for the basebackup task to exit and check for errors.
            jh.await.map_err(|err| {
--- a/pgxn/neon/Makefile
+++ b/pgxn/neon/Makefile
@@ -4,7 +4,6 @@ MODULE_big = neon
 OBJS = \
 	$(WIN32RES) \
 	communicator.o \
-	communicator_new.o \
 	extension_server.o \
 	file_cache.o \
 	hll.o \
--- a/pgxn/neon/communicator/Cargo.lock
+++ b/pgxn/neon/communicator/Cargo.lock
@@ -1,372 +0,0 @@
-# This file is automatically @generated by Cargo.
-# It is not intended for manual editing.
-version = 4
-
-[[package]]
-name = "addr2line"
-version = "0.24.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1"
-dependencies = [
- "gimli",
-]
-
-[[package]]
-name = "adler2"
-version = "2.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627"
-
-[[package]]
-name = "backtrace"
-version = "0.3.74"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a"
-dependencies = [
- "addr2line",
- "cfg-if",
- "libc",
- "miniz_oxide",
- "object",
- "rustc-demangle",
- "windows-targets",
-]
-
-[[package]]
-name = "base64"
-version = "0.22.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
-
-[[package]]
-name = "bytes"
-version = "1.10.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a"
-
-[[package]]
-name = "cfg-if"
-version = "1.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
-
-[[package]]
-name = "communicator"
-version = "0.1.0"
-dependencies = [
- "tonic",
-]
-
-[[package]]
-name = "fnv"
-version = "1.0.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
-
-[[package]]
-name = "futures-core"
-version = "0.3.31"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e"
-
-[[package]]
-name = "gimli"
-version = "0.31.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f"
-
-[[package]]
-name = "http"
-version = "1.3.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565"
-dependencies = [
- "bytes",
- "fnv",
- "itoa",
-]
-
-[[package]]
-name = "http-body"
-version = "1.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184"
-dependencies = [
- "bytes",
- "http",
-]
-
-[[package]]
-name = "http-body-util"
-version = "0.1.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a"
-dependencies = [
- "bytes",
- "futures-core",
- "http",
- "http-body",
- "pin-project-lite",
-]
-
-[[package]]
-name = "itoa"
-version = "1.0.15"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
-
-[[package]]
-name = "libc"
-version = "0.2.171"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6"
-
-[[package]]
-name = "memchr"
-version = "2.7.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
-
-[[package]]
-name = "miniz_oxide"
-version = "0.8.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ff70ce3e48ae43fa075863cef62e8b43b71a4f2382229920e0df362592919430"
-dependencies = [
- "adler2",
-]
-
-[[package]]
-name = "object"
-version = "0.36.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87"
-dependencies = [
- "memchr",
-]
-
-[[package]]
-name = "once_cell"
-version = "1.21.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
-
-[[package]]
-name = "percent-encoding"
-version = "2.3.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
-
-[[package]]
-name = "pin-project"
-version = "1.1.10"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "677f1add503faace112b9f1373e43e9e054bfdd22ff1a63c1bc485eaec6a6a8a"
-dependencies = [
- "pin-project-internal",
-]
-
-[[package]]
-name = "pin-project-internal"
-version = "1.1.10"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn",
-]
-
-[[package]]
-name = "pin-project-lite"
-version = "0.2.16"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b"
-
-[[package]]
-name = "proc-macro2"
-version = "1.0.94"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a31971752e70b8b2686d7e46ec17fb38dad4051d94024c88df49b667caea9c84"
-dependencies = [
- "unicode-ident",
-]
-
-[[package]]
-name = "quote"
-version = "1.0.40"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
-dependencies = [
- "proc-macro2",
-]
-
-[[package]]
-name = "rustc-demangle"
-version = "0.1.24"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f"
-
-[[package]]
-name = "syn"
-version = "2.0.100"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b09a44accad81e1ba1cd74a32461ba89dee89095ba17b32f5d03683b1b1fc2a0"
-dependencies = [
- "proc-macro2",
- "quote",
- "unicode-ident",
-]
-
-[[package]]
-name = "tokio"
-version = "1.44.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e6b88822cbe49de4185e3a4cbf8321dd487cf5fe0c5c65695fef6346371e9c48"
-dependencies = [
- "backtrace",
- "pin-project-lite",
-]
-
-[[package]]
-name = "tokio-stream"
-version = "0.1.17"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eca58d7bba4a75707817a2c44174253f9236b2d5fbd055602e9d5c07c139a047"
-dependencies = [
- "futures-core",
- "pin-project-lite",
- "tokio",
-]
-
-[[package]]
-name = "tonic"
-version = "0.13.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "85839f0b32fd242bb3209262371d07feda6d780d16ee9d2bc88581b89da1549b"
-dependencies = [
- "base64",
- "bytes",
- "http",
- "http-body",
- "http-body-util",
- "percent-encoding",
- "pin-project",
- "tokio-stream",
- "tower-layer",
- "tower-service",
- "tracing",
-]
-
-[[package]]
-name = "tower-layer"
-version = "0.3.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e"
-
-[[package]]
-name = "tower-service"
-version = "0.3.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3"
-
-[[package]]
-name = "tracing"
-version = "0.1.41"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0"
-dependencies = [
- "pin-project-lite",
- "tracing-attributes",
- "tracing-core",
-]
-
-[[package]]
-name = "tracing-attributes"
-version = "0.1.28"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn",
-]
-
-[[package]]
-name = "tracing-core"
-version = "0.1.33"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c"
-dependencies = [
- "once_cell",
-]
-
-[[package]]
-name = "unicode-ident"
-version = "1.0.18"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
-
-[[package]]
-name = "windows-targets"
-version = "0.52.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
-dependencies = [
- "windows_aarch64_gnullvm",
- "windows_aarch64_msvc",
- "windows_i686_gnu",
- "windows_i686_gnullvm",
- "windows_i686_msvc",
- "windows_x86_64_gnu",
- "windows_x86_64_gnullvm",
- "windows_x86_64_msvc",
-]
-
-[[package]]
-name = "windows_aarch64_gnullvm"
-version = "0.52.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
-
-[[package]]
-name = "windows_aarch64_msvc"
-version = "0.52.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
-
-[[package]]
-name = "windows_i686_gnu"
-version = "0.52.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
-
-[[package]]
-name = "windows_i686_gnullvm"
-version = "0.52.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
-
-[[package]]
-name = "windows_i686_msvc"
-version = "0.52.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
-
-[[package]]
-name = "windows_x86_64_gnu"
-version = "0.52.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
-
-[[package]]
-name = "windows_x86_64_gnullvm"
-version = "0.52.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
-
-[[package]]
-name = "windows_x86_64_msvc"
-version = "0.52.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
--- a/pgxn/neon/communicator/Cargo.toml
+++ b/pgxn/neon/communicator/Cargo.toml
@@ -3,37 +3,11 @@ name = "communicator"
 version = "0.1.0"
 edition = "2024"

-[features]
-testing = []
-
 [lib]
 crate-type = ["staticlib"]

 [dependencies]
-axum.workspace = true
-bytes.workspace = true
-clashmap.workspace = true
-http.workspace = true
-libc.workspace = true
-nix.workspace = true
-atomic_enum = "0.3.0"
-prometheus.workspace = true
-prost.workspace = true
-tonic = { version = "0.12.0", default-features = false, features=["codegen", "prost", "transport"] }
-tokio = { version = "1.43.1", features = ["macros", "net", "io-util", "rt", "rt-multi-thread"] }
-tokio-pipe = { version = "0.2.12" }
-thiserror.workspace = true
-tracing.workspace = true
-tracing-subscriber.workspace = true
-
-metrics.workspace = true
-uring-common = { workspace = true, features = ["bytes"] }
-
-pageserver_client_grpc.workspace = true
-pageserver_page_api.workspace = true
-
 neon-shmem.workspace = true
-utils.workspace = true

 [build-dependencies]
 cbindgen.workspace = true
--- a/pgxn/neon/communicator/README.md
+++ b/pgxn/neon/communicator/README.md
@@ -1,123 +1,8 @@
-# Communicator
-
-This package provides the so-called "compute-pageserver communicator",
-or just "communicator" in short. It runs in a PostgreSQL server, as
-part of the neon extension, and handles the communication with the
-pageservers. On the PostgreSQL side, the glue code in pgxn/neon/ uses
-the communicator to implement the PostgreSQL Storage Manager (SMGR)
-interface.
-
-## Design criteria
-
- Low latency
- Saturate a 10 Gbit / s network interface without becoming a bottleneck
-
-## Source code view
-
-pgxn/neon/communicator_new.c
-	Contains the glue that interact with PostgreSQL code and the Rust
-	communicator code.
-
-pgxn/neon/communicator/src/backend_interface.rs
-	The entry point for calls from each backend.
-
-pgxn/neon/communicator/src/init.rs
-	Initialization at server startup
-
-pgxn/neon/communicator/src/worker_process/
-    Worker process main loop and glue code
+This package will evolve into a "compute-pageserver communicator"
+process and machinery. For now, it just provides wrappers on the
+neon-shmem Rust crate, to allow using it in the C implementation of
+the LFC.

 At compilation time, pgxn/neon/communicator/ produces a static
 library, libcommunicator.a. It is linked to the neon.so extension
-library.
-
-The real networking code, which is independent of PostgreSQL, is in
-the pageserver/client_grpc crate.
-
-## Process view
-
-The communicator runs in a dedicated background worker process, the
-"communicator process". The communicator uses a multi-threaded Tokio
-runtime to execute the IO requests. So the communicator process has
-multiple threads running. That's unusual for Postgres processes and
-care must be taken to make that work.
-
-### Backend <-> worker communication
-
-Each backend has a number of I/O request slots in shared memory. The
-slots are statically allocated for each backend, and must not be
-accessed by other backends. The worker process reads requests from the
-shared memory slots, and writes responses back to the slots.
-
-To submit an IO request, first pick one of your backend's free slots,
-and write the details of the IO request in the slot. Finally, update
-the 'state' field of the slot to Submitted. That informs the worker
-process that it can start processing the request. Once the state has
-been set to Submitted, the backend *must not* access the slot anymore,
-until the worker process sets its state to 'Completed'. In other
-words, each slot is owned by either the backend or the worker process
-at all times, and the 'state' field indicates who has ownership at the
-moment.
-
-To inform the worker process that a request slot has a pending IO
-request, there's a pipe shared by the worker process and all backend
-processes. After you have changed the slot's state to Submitted, write
-the index of the request slot to the pipe. This wakes up the worker
-process.
-
-(Note that the pipe is just used for wakeups, but the worker process
-is free to pick up Submitted IO requests even without receiving the
-wakeup. As of this writing, it doesn't do that, but it might be useful
-in the future to reduce latency even further, for example.)
-
-When the worker process has completed processing the request, it
-writes the result back in the request slot. A GetPage request can also
-contain a pointer to buffer in the shared buffer cache. In that case,
-the worker process writes the resulting page contents directly to the
-buffer, and just a result code in the request slot. It then updates
-the 'state' field to Completed, which passes the owner ship back to
-the originating backend. Finally, it signals the process Latch of the
-originating backend, waking it up.
-
-### Differences between PostgreSQL v16, v17 and v18
-
-PostgreSQL v18 introduced the new AIO mechanism. The PostgreSQL AIO
-mechanism uses a very similar mechanism as described in the previous
-section, for the communication between AIO worker processes and
-backends. With our communicator, the AIO worker processes are not
-used, but we use the same PgAioHandle request slots as in upstream.
-For Neon-specific IO requests like GetDbSize, a neon request slot is
-used. But for the actual IO requests, the request slot merely contains
-a pointer to the PgAioHandle slot. The worker process updates the
-status of that, calls the IO callbacks upon completionetc, just like
-the upstream AIO worker processes do.
-
-## Sequence diagram
-
-                      neon
-    PostgreSQL     extension       backend_interface.rs  worker_process.rs    processor    tonic
-       |               .                    .                   .                 .
-	   | smgr_read()   .                    .                   .                 .
-	   +-------------> +                    .                   .                 .
-	   .               |                    .                   .                 .
-	   .               |  rcommunicator_    .                   .                 .
-	   .               | get_page_at_lsn    .                   .                 .
-	   .               +------------------> +                   .                 .
-                                            |                   .                 .
-                                            | write request to  .                 .                 .
-                                            | slot              .                 .
-                                            |                   .                 .
-                                            |                   .                 .
-											| submit_request()  .                 .
-											+-----------------> +                 .
-											|                   |                 .
-											|					| db_size_request .               .
-																+---------------->.
-																                  . TODO
-
-
-
-### Compute <-> pageserver protocol
-
-The protocol between Compute and the pageserver is based on gRPC. See `protos/`.
-
+library. 
--- a/pgxn/neon/communicator/src/backend_comms.rs
+++ b/pgxn/neon/communicator/src/backend_comms.rs
@@ -1,204 +0,0 @@
-//! This module implements a request/response "slot" for submitting requests from backends
-//! to the communicator process.
-//!
-//! NB: The "backend" side of this code runs in Postgres backend processes,
-//! which means that it is not safe to use the 'tracing' crate for logging, nor
-//! to launch threads or use tokio tasks.
-use std::cell::UnsafeCell;
-use std::sync::atomic::fence;
-use std::sync::atomic::{AtomicI32, Ordering};
-
-use crate::neon_request::{NeonIORequest, NeonIOResult};
-
-use atomic_enum::atomic_enum;
-
-/// One request/response slot. Each backend has its own set of slots that it uses.
-///
-/// This is the moral equivalent of PgAioHandle for Postgres AIO requests
-/// Like PgAioHandle, try to keep this small.
-///
-/// There is an array of these in shared memory. Therefore, this must be Sized.
-///
-/// ## Lifecycle of a request
-///
-/// The slot is always owned by either the backend process or the communicator
-/// process, depending on the 'state'. Only the owning process is allowed to
-/// read or modify the slot, except for reading the 'state' itself to check who
-/// owns it.
-///
-/// A slot begins in the Idle state, where it is owned by the backend process.
-/// To submit a request, the backend process fills the slot with the request
-/// data, and changes it to the Submitted state. After changing the state, the
-/// slot is owned by the communicator process, and the backend is not allowed
-/// to access it until the communicator process marks it as Completed.
-///
-/// When the communicator process sees that the slot is in Submitted state, it
-/// starts to process the request. After processing the request, it stores the
-/// result in the slot, and changes the state to Completed. It is now owned by
-/// the backend process again, which may now read the result, and reuse the
-/// slot for a new request.
-///
-/// For correctness of the above protocol, we really only need two states:
-/// "owned by backend" and "owned by communicator process. But to help with
-/// debugging, there are a few more states. When the backend starts to fill in
-/// the request details in the slot, it first sets the state from Idle to
-/// Filling, and when it's done with that, from Filling to Submitted. In the
-/// Filling state, the slot is still owned by the backend. Similarly, when the
-/// communicator process starts to process a request, it sets it to Processing
-/// state first, but the slot is still owned by the communicator process.
-///
-/// This struct doesn't handle waking up the communicator process when a request
-/// has been submitted or when a response is ready. We only store the 'owner_procno'
-/// which can be used for waking up the backend on completion, but the wakeups are
-/// performed elsewhere.
-pub struct NeonIOHandle {
-    /// similar to PgAioHandleState
-    state: AtomicNeonIOHandleState,
-
-    /// The owning process's ProcNumber. The worker process uses this to set the process's
-    /// latch on completion.
-    ///
-    /// (This could be calculated from num_neon_request_slots_per_backend and the index of
-    /// this slot in the overall 'neon_requst_slots array')
-    owner_procno: AtomicI32,
-
-    /// SAFETY: This is modified by fill_request(), after it has established ownership
-    /// of the slot by setting state from Idle to Filling
-    request: UnsafeCell<NeonIORequest>,
-
-    /// valid when state is Completed
-    ///
-    /// SAFETY: This is modified by RequestProcessingGuard::complete(). There can be
-    /// only one RequestProcessingGuard outstanding for a slot at a time, because
-    /// it is returned by start_processing_request() which checks the state, so
-    /// RequestProcessingGuard has exclusive access to the slot.
-    result: UnsafeCell<NeonIOResult>,
-}
-
-// The protocol described in the "Lifecycle of a request" section above ensures
-// the safe access to the fields
-unsafe impl Send for NeonIOHandle {}
-unsafe impl Sync for NeonIOHandle {}
-
-impl Default for NeonIOHandle {
-    fn default() -> NeonIOHandle {
-        NeonIOHandle {
-            owner_procno: AtomicI32::new(-1),
-            request: UnsafeCell::new(NeonIORequest::Empty),
-            result: UnsafeCell::new(NeonIOResult::Empty),
-            state: AtomicNeonIOHandleState::new(NeonIOHandleState::Idle),
-        }
-    }
-}
-
-#[atomic_enum]
-#[derive(Eq, PartialEq)]
-pub enum NeonIOHandleState {
-    Idle,
-
-    /// backend is filling in the request
-    Filling,
-
-    /// Backend has submitted the request to the communicator, but the
-    /// communicator process has not yet started processing it.
-    Submitted,
-
-    /// Communicator is processing the request
-    Processing,
-
-    /// Communicator has completed the request, and the 'result' field is now
-    /// valid, but the backend has not read the result yet.
-    Completed,
-}
-
-pub struct RequestProcessingGuard<'a>(&'a NeonIOHandle);
-
-unsafe impl<'a> Send for RequestProcessingGuard<'a> {}
-unsafe impl<'a> Sync for RequestProcessingGuard<'a> {}
-
-impl<'a> RequestProcessingGuard<'a> {
-    pub fn get_request(&self) -> &NeonIORequest {
-        unsafe { &*self.0.request.get() }
-    }
-
-    pub fn get_owner_procno(&self) -> i32 {
-        self.0.owner_procno.load(Ordering::Relaxed)
-    }
-
-    pub fn completed(self, result: NeonIOResult) {
-        unsafe {
-            *self.0.result.get() = result;
-        };
-
-        // Ok, we have completed the IO. Mark the request as completed. After that,
-        // we no longer have ownership of the slot, and must not modify it.
-        let old_state = self
-            .0
-            .state
-            .swap(NeonIOHandleState::Completed, Ordering::Release);
-        assert!(old_state == NeonIOHandleState::Processing);
-    }
-}
-
-impl NeonIOHandle {
-    pub fn fill_request(&self, request: &NeonIORequest, proc_number: i32) {
-        // Verify that the slot is in Idle state previously, and start filling it.
-        //
-        // XXX: This step isn't strictly necessary. Assuming the caller didn't screw up
-        // and try to use a slot that's already in use, we could fill the slot and
-        // switch it directly from Idle to Submitted state.
-        if let Err(s) = self.state.compare_exchange(
-            NeonIOHandleState::Idle,
-            NeonIOHandleState::Filling,
-            Ordering::Relaxed,
-            Ordering::Relaxed,
-        ) {
-            panic!("unexpected state in request slot: {s:?}");
-        }
-
-        // This fence synchronizes-with store/swap in `communicator_process_main_loop`.
-        fence(Ordering::Acquire);
-
-        self.owner_procno.store(proc_number, Ordering::Relaxed);
-        unsafe { *self.request.get() = *request }
-        self.state
-            .store(NeonIOHandleState::Submitted, Ordering::Release);
-    }
-
-    pub fn try_get_result(&self) -> Option<NeonIOResult> {
-        // FIXME: ordering?
-        let state = self.state.load(Ordering::Relaxed);
-        if state == NeonIOHandleState::Completed {
-            // This fence synchronizes-with store/swap in `communicator_process_main_loop`.
-            fence(Ordering::Acquire);
-            let result = unsafe { *self.result.get() };
-            self.state.store(NeonIOHandleState::Idle, Ordering::Relaxed);
-            Some(result)
-        } else {
-            None
-        }
-    }
-
-    pub fn start_processing_request<'a>(&'a self) -> Option<RequestProcessingGuard<'a>> {
-        // Read the IO request from the slot indicated in the wakeup
-        //
-        // XXX: using compare_exchange for this is not strictly necessary, as long as
-        // the communicator process has _some_ means of tracking which requests it's
-        // already processing. That could be a flag somewhere in communicator's private
-        // memory, for example.
-        if let Err(s) = self.state.compare_exchange(
-            NeonIOHandleState::Submitted,
-            NeonIOHandleState::Processing,
-            Ordering::Relaxed,
-            Ordering::Relaxed,
-        ) {
-            // FIXME surprising state. This is unexpected at the moment, but if we
-            // started to process requests more aggressively, without waiting for the
-            // read from the pipe, then this could happen
-            panic!("unexpected state in request slot: {s:?}");
-        }
-        fence(Ordering::Acquire);
-
-        Some(RequestProcessingGuard(self))
-    }
-}
--- a/pgxn/neon/communicator/src/backend_interface.rs
+++ b/pgxn/neon/communicator/src/backend_interface.rs
@@ -1,199 +0,0 @@
-//! This code runs in each backend process. That means that launching Rust threads, panicking
-//! etc. is forbidden!
-
-use std::os::fd::OwnedFd;
-
-use crate::backend_comms::NeonIOHandle;
-use crate::init::CommunicatorInitStruct;
-use crate::integrated_cache::{BackendCacheReadOp, IntegratedCacheReadAccess};
-use crate::neon_request::CCachedGetPageVResult;
-use crate::neon_request::{NeonIORequest, NeonIOResult};
-
-pub struct CommunicatorBackendStruct<'t> {
-    my_proc_number: i32,
-
-    next_neon_request_idx: u32,
-
-    my_start_idx: u32, // First request slot that belongs to this backend
-    my_end_idx: u32,   // end + 1 request slot that belongs to this backend
-
-    neon_request_slots: &'t [NeonIOHandle],
-
-    submission_pipe_write_fd: OwnedFd,
-
-    pending_cache_read_op: Option<BackendCacheReadOp<'t>>,
-
-    integrated_cache: &'t IntegratedCacheReadAccess<'t>,
-}
-
-#[unsafe(no_mangle)]
-pub extern "C" fn rcommunicator_backend_init(
-    cis: Box<CommunicatorInitStruct>,
-    my_proc_number: i32,
-) -> &'static mut CommunicatorBackendStruct<'static> {
-    let start_idx = my_proc_number as u32 * cis.num_neon_request_slots_per_backend;
-    let end_idx = start_idx + cis.num_neon_request_slots_per_backend;
-
-    let integrated_cache = Box::leak(Box::new(cis.integrated_cache_init_struct.backend_init()));
-
-    let bs: &'static mut CommunicatorBackendStruct =
-        Box::leak(Box::new(CommunicatorBackendStruct {
-            my_proc_number,
-            next_neon_request_idx: start_idx,
-            my_start_idx: start_idx,
-            my_end_idx: end_idx,
-            neon_request_slots: cis.neon_request_slots,
-
-            submission_pipe_write_fd: cis.submission_pipe_write_fd,
-            pending_cache_read_op: None,
-
-            integrated_cache,
-        }));
-    bs
-}
-
-/// Start a request. You can poll for its completion and get the result by
-/// calling bcomm_poll_dbsize_request_completion(). The communicator will wake
-/// us up by setting our process latch, so to wait for the completion, wait on
-/// the latch and call bcomm_poll_dbsize_request_completion() every time the
-/// latch is set.
-///
-/// Safety: The C caller must ensure that the references are valid.
-#[unsafe(no_mangle)]
-pub extern "C" fn bcomm_start_io_request<'t>(
-    bs: &'t mut CommunicatorBackendStruct,
-    request: &NeonIORequest,
-    immediate_result_ptr: &mut NeonIOResult,
-) -> i32 {
-    assert!(bs.pending_cache_read_op.is_none());
-
-    // Check if the request can be satisfied from the cache first
-    if let NeonIORequest::RelSize(req) = request {
-        if let Some(nblocks) = bs.integrated_cache.get_rel_size(&req.reltag()) {
-            *immediate_result_ptr = NeonIOResult::RelSize(nblocks);
-            return -1;
-        }
-    }
-
-    // Create neon request and submit it
-    let request_idx = bs.start_neon_request(request);
-
-    // Tell the communicator about it
-    bs.submit_request(request_idx);
-
-    return request_idx;
-}
-
-#[unsafe(no_mangle)]
-pub extern "C" fn bcomm_start_get_page_v_request<'t>(
-    bs: &'t mut CommunicatorBackendStruct,
-    request: &NeonIORequest,
-    immediate_result_ptr: &mut CCachedGetPageVResult,
-) -> i32 {
-    let NeonIORequest::GetPageV(get_pagev_request) = request else {
-        panic!("invalid request passed to bcomm_start_get_page_v_request()");
-    };
-    assert!(matches!(request, NeonIORequest::GetPageV(_)));
-    assert!(bs.pending_cache_read_op.is_none());
-
-    // Check if the request can be satisfied from the cache first
-    let mut all_cached = true;
-    let mut read_op = bs.integrated_cache.start_read_op();
-    for i in 0..get_pagev_request.nblocks {
-        if let Some(cache_block) = read_op.get_page(
-            &get_pagev_request.reltag(),
-            get_pagev_request.block_number + i as u32,
-        ) {
-            (*immediate_result_ptr).cache_block_numbers[i as usize] = cache_block;
-        } else {
-            // not found in cache
-            all_cached = false;
-            break;
-        }
-    }
-    if all_cached {
-        bs.pending_cache_read_op = Some(read_op);
-        return -1;
-    }
-
-    // Create neon request and submit it
-    let request_idx = bs.start_neon_request(request);
-
-    // Tell the communicator about it
-    bs.submit_request(request_idx);
-
-    request_idx
-}
-
-/// Check if a request has completed. Returns:
-///
-/// -1 if the request is still being processed
-/// 0 on success
-#[unsafe(no_mangle)]
-pub extern "C" fn bcomm_poll_request_completion(
-    bs: &mut CommunicatorBackendStruct,
-    request_idx: u32,
-    result_p: &mut NeonIOResult,
-) -> i32 {
-    match bs.neon_request_slots[request_idx as usize].try_get_result() {
-        None => -1, // still processing
-        Some(result) => {
-            *result_p = result;
-            0
-        }
-    }
-}
-
-// LFC functions
-
-/// Finish a local file cache read
-///
-//
-#[unsafe(no_mangle)]
-pub extern "C" fn bcomm_finish_cache_read(bs: &mut CommunicatorBackendStruct) -> bool {
-    if let Some(op) = bs.pending_cache_read_op.take() {
-        op.finish()
-    } else {
-        panic!("bcomm_finish_cache_read() called with no cached read pending");
-    }
-}
-
-impl<'t> CommunicatorBackendStruct<'t> {
-    /// Send a wakeup to the communicator process
-    fn submit_request(self: &CommunicatorBackendStruct<'t>, request_idx: i32) {
-        // wake up communicator by writing the idx to the submission pipe
-        //
-        // This can block, if the pipe is full. That should be very rare,
-        // because the communicator tries hard to drain the pipe to prevent
-        // that. Also, there's a natural upper bound on how many wakeups can be
-        // queued up: there is only a limited number of request slots for each
-        // backend.
-        //
-        // If it does block very briefly, that's not too serious.
-        let idxbuf = request_idx.to_ne_bytes();
-
-        let _res = nix::unistd::write(&self.submission_pipe_write_fd, &idxbuf);
-        // FIXME: check result, return any errors
-    }
-
-    /// Note: there's no guarantee on when the communicator might pick it up. You should ring
-    /// the doorbell. But it might pick it up immediately.
-    pub(crate) fn start_neon_request(&mut self, request: &NeonIORequest) -> i32 {
-        let my_proc_number = self.my_proc_number;
-
-        // Grab next free slot
-        // FIXME: any guarantee that there will be any?
-        let idx = self.next_neon_request_idx;
-
-        let next_idx = idx + 1;
-        self.next_neon_request_idx = if next_idx == self.my_end_idx {
-            self.my_start_idx
-        } else {
-            next_idx
-        };
-
-        self.neon_request_slots[idx as usize].fill_request(request, my_proc_number);
-
-        return idx as i32;
-    }
-}
--- a/pgxn/neon/communicator/src/file_cache.rs
+++ b/pgxn/neon/communicator/src/file_cache.rs
@@ -1,162 +0,0 @@
-//! Implement the "low-level" parts of the file cache.
-//!
-//! This module just deals with reading and writing the file, and keeping track
-//! which blocks in the cache file are in use and which are free. The "high
-//! level" parts of tracking which block in the cache file corresponds to which
-//! relation block is handled in 'integrated_cache' instead.
-//!
-//! This module is only used to access the file from the communicator
-//! process. The backend processes *also* read the file (and sometimes also
-//! write it? ), but the backends use direct C library calls for that.
-use std::fs::File;
-use std::os::unix::fs::FileExt;
-use std::path::Path;
-use std::sync::Arc;
-use std::sync::Mutex;
-
-use crate::BLCKSZ;
-
-use tokio::task::spawn_blocking;
-
-pub type CacheBlock = u64;
-
-pub const INVALID_CACHE_BLOCK: CacheBlock = u64::MAX;
-
-pub struct FileCache {
-    file: Arc<File>,
-
-    free_list: Mutex<FreeList>,
-
-    // metrics
-    max_blocks_gauge: metrics::IntGauge,
-    num_free_blocks_gauge: metrics::IntGauge,
-}
-
-// TODO: We keep track of all free blocks in this vec. That doesn't really scale.
-// Idea: when free_blocks fills up with more than 1024 entries, write them all to
-// one block on disk.
-struct FreeList {
-    next_free_block: CacheBlock,
-    max_blocks: u64,
-
-    free_blocks: Vec<CacheBlock>,
-}
-
-impl FileCache {
-    pub fn new(file_cache_path: &Path, mut initial_size: u64) -> Result<FileCache, std::io::Error> {
-        if initial_size < 100 {
-            tracing::warn!(
-                "min size for file cache is 100 blocks, {} requested",
-                initial_size
-            );
-            initial_size = 100;
-        }
-
-        let file = std::fs::OpenOptions::new()
-            .read(true)
-            .write(true)
-            .truncate(true)
-            .create(true)
-            .open(file_cache_path)?;
-
-        let max_blocks_gauge = metrics::IntGauge::new(
-            "file_cache_max_blocks",
-            "Local File Cache size in 8KiB blocks",
-        )
-        .unwrap();
-        let num_free_blocks_gauge = metrics::IntGauge::new(
-            "file_cache_num_free_blocks",
-            "Number of free 8KiB blocks in Local File Cache",
-        )
-        .unwrap();
-
-        tracing::info!("initialized file cache with {} blocks", initial_size);
-
-        Ok(FileCache {
-            file: Arc::new(file),
-            free_list: Mutex::new(FreeList {
-                next_free_block: 0,
-                max_blocks: initial_size,
-                free_blocks: Vec::new(),
-            }),
-            max_blocks_gauge,
-            num_free_blocks_gauge,
-        })
-    }
-
-    // File cache management
-
-    pub async fn read_block(
-        &self,
-        cache_block: CacheBlock,
-        mut dst: impl uring_common::buf::IoBufMut + Send + Sync,
-    ) -> Result<(), std::io::Error> {
-        assert!(dst.bytes_total() == BLCKSZ);
-        let file = self.file.clone();
-
-        let dst_ref = unsafe { std::slice::from_raw_parts_mut(dst.stable_mut_ptr(), BLCKSZ) };
-
-        spawn_blocking(move || file.read_exact_at(dst_ref, cache_block as u64 * BLCKSZ as u64))
-            .await??;
-        Ok(())
-    }
-
-    pub async fn write_block(
-        &self,
-        cache_block: CacheBlock,
-        src: impl uring_common::buf::IoBuf + Send + Sync,
-    ) -> Result<(), std::io::Error> {
-        assert!(src.bytes_init() == BLCKSZ);
-        let file = self.file.clone();
-
-        let src_ref = unsafe { std::slice::from_raw_parts(src.stable_ptr(), BLCKSZ) };
-
-        spawn_blocking(move || file.write_all_at(src_ref, cache_block as u64 * BLCKSZ as u64))
-            .await??;
-
-        Ok(())
-    }
-
-    pub fn alloc_block(&self) -> Option<CacheBlock> {
-        let mut free_list = self.free_list.lock().unwrap();
-        if let Some(x) = free_list.free_blocks.pop() {
-            return Some(x);
-        }
-        if free_list.next_free_block < free_list.max_blocks {
-            let result = free_list.next_free_block;
-            free_list.next_free_block += 1;
-            return Some(result);
-        }
-        None
-    }
-
-    pub fn dealloc_block(&self, cache_block: CacheBlock) {
-        let mut free_list = self.free_list.lock().unwrap();
-        free_list.free_blocks.push(cache_block);
-    }
-}
-
-impl metrics::core::Collector for FileCache {
-    fn desc(&self) -> Vec<&metrics::core::Desc> {
-        let mut descs = Vec::new();
-        descs.append(&mut self.max_blocks_gauge.desc());
-        descs.append(&mut self.num_free_blocks_gauge.desc());
-        descs
-    }
-    fn collect(&self) -> Vec<metrics::proto::MetricFamily> {
-        // Update the gauges with fresh values first
-        {
-            let free_list = self.free_list.lock().unwrap();
-            self.max_blocks_gauge.set(free_list.max_blocks as i64);
-
-            let total_free_blocks: i64 = free_list.free_blocks.len() as i64
-                + (free_list.max_blocks as i64 - free_list.next_free_block as i64);
-            self.num_free_blocks_gauge.set(total_free_blocks as i64);
-        }
-
-        let mut values = Vec::new();
-        values.append(&mut self.max_blocks_gauge.collect());
-        values.append(&mut self.num_free_blocks_gauge.collect());
-        values
-    }
-}
--- a/pgxn/neon/communicator/src/file_cache_hashmap.rs
+++ b/pgxn/neon/communicator/src/file_cache_hashmap.rs
@@ -0,0 +1,240 @@
+//! Glue code to allow using the Rust shmem hash map implementation from C code
+//!
+//! For convience of adapting existing code, the interface provided somewhat resembles the dynahash
+//! interface.
+//!
+//! NOTE: The caller is responsible for locking! The caller is expected to hold the PostgreSQL
+//! LWLock, 'lfc_lock', while accessing the hash table, in shared or exclusive mode as appropriate.
+
+use std::ffi::c_void;
+use std::marker::PhantomData;
+
+use neon_shmem::hash::entry::Entry;
+use neon_shmem::hash::{HashMapAccess, HashMapInit};
+use neon_shmem::shmem::ShmemHandle;
+
+/// NB: This must match the definition of BufferTag in Postgres C headers. We could use bindgen to
+/// generate this from the C headers, but prefer to not introduce dependency on bindgen for now.
+///
+/// Note that there are no padding bytes. If the corresponding C struct has padding bytes, the C C
+/// code must clear them.
+#[derive(Clone, Debug, Hash, Eq, PartialEq)]
+#[repr(C)]
+pub struct FileCacheKey {
+    pub _spc_id: u32,
+    pub _db_id: u32,
+    pub _rel_number: u32,
+    pub _fork_num: u32,
+    pub _block_num: u32,
+}
+
+/// Like with FileCacheKey, this must match the definition of FileCacheEntry in file_cache.c.  We
+/// don't look at the contents here though, it's sufficent that the size and alignment matches.
+#[derive(Clone, Debug, Default)]
+#[repr(C)]
+pub struct FileCacheEntry {
+    pub _offset: u32,
+    pub _access_count: u32,
+    pub _prev: *mut FileCacheEntry,
+    pub _next: *mut FileCacheEntry,
+    pub _state: [u32; 8],
+}
+
+/// XXX: This could be just:
+///
+/// ```ignore
+/// type FileCacheHashMapHandle = HashMapInit<'a, FileCacheKey, FileCacheEntry>
+/// ```
+///
+/// but with that, cbindgen generates a broken typedef in the C header file which doesn't
+/// compile. It apparently gets confused by the generics.
+#[repr(transparent)]
+pub struct FileCacheHashMapHandle<'a>(
+    pub *mut c_void,
+    PhantomData<HashMapInit<'a, FileCacheKey, FileCacheEntry>>,
+);
+impl<'a> From<Box<HashMapInit<'a, FileCacheKey, FileCacheEntry>>> for FileCacheHashMapHandle<'a> {
+    fn from(x: Box<HashMapInit<'a, FileCacheKey, FileCacheEntry>>) -> Self {
+        FileCacheHashMapHandle(Box::into_raw(x) as *mut c_void, PhantomData::default())
+    }
+}
+impl<'a> From<FileCacheHashMapHandle<'a>> for Box<HashMapInit<'a, FileCacheKey, FileCacheEntry>> {
+    fn from(x: FileCacheHashMapHandle) -> Self {
+        unsafe { Box::from_raw(x.0.cast()) }
+    }
+}
+
+/// XXX: same for this
+#[repr(transparent)]
+pub struct FileCacheHashMapAccess<'a>(
+    pub *mut c_void,
+    PhantomData<HashMapAccess<'a, FileCacheKey, FileCacheEntry>>,
+);
+impl<'a> From<Box<HashMapAccess<'a, FileCacheKey, FileCacheEntry>>> for FileCacheHashMapAccess<'a> {
+    fn from(x: Box<HashMapAccess<'a, FileCacheKey, FileCacheEntry>>) -> Self {
+        // Convert the Box into a raw mutable pointer to the HashMapAccess itself.
+        // This transfers ownership of the HashMapAccess (and its contained ShmemHandle)
+        // to the raw pointer. The C caller is now responsible for managing this memory.
+        FileCacheHashMapAccess(Box::into_raw(x) as *mut c_void, PhantomData::default())
+    }
+}
+impl<'a> FileCacheHashMapAccess<'a> {
+    fn as_ref(self) -> &'a HashMapAccess<'a, FileCacheKey, FileCacheEntry> {
+        let ptr: *mut HashMapAccess<'_, FileCacheKey, FileCacheEntry> = self.0.cast();
+        unsafe { ptr.as_ref().unwrap() }
+    }
+    fn as_mut(self) -> &'a mut HashMapAccess<'a, FileCacheKey, FileCacheEntry> {
+        let ptr: *mut HashMapAccess<'_, FileCacheKey, FileCacheEntry> = self.0.cast();
+        unsafe { ptr.as_mut().unwrap() }
+    }
+}
+
+/// Initialize the shared memory area at postmaster startup. The returned handle is inherited
+/// by all the backend processes across fork()
+#[unsafe(no_mangle)]
+pub extern "C" fn bcomm_file_cache_shmem_init<'a>(
+    initial_num_buckets: u32,
+    max_num_buckets: u32,
+) -> FileCacheHashMapHandle<'a> {
+    let max_bytes = HashMapInit::<FileCacheKey, FileCacheEntry>::estimate_size(max_num_buckets);
+    let shmem_handle =
+        ShmemHandle::new("lfc mapping", 0, max_bytes).expect("shmem initialization failed");
+
+    let handle = HashMapInit::<FileCacheKey, FileCacheEntry>::init_in_shmem(
+        initial_num_buckets,
+        shmem_handle,
+    );
+
+    Box::new(handle).into()
+}
+
+/// Initialize the access to the shared memory area in a backend process.
+///
+/// XXX: I'm not sure if this actually gets called in each process, or if the returned struct
+/// is also inherited across fork(). It currently works either way but if this did more
+/// initialization that needed to be done after fork(), then it would matter.
+#[unsafe(no_mangle)]
+pub extern "C" fn bcomm_file_cache_shmem_access<'a>(
+    handle: FileCacheHashMapHandle<'a>,
+) -> FileCacheHashMapAccess<'a> {
+    let handle: Box<HashMapInit<'_, FileCacheKey, FileCacheEntry>> = handle.into();
+    Box::new(handle.attach_writer()).into()
+}
+
+/// Return the current number of buckets in the hash table
+#[unsafe(no_mangle)]
+pub extern "C" fn bcomm_file_cache_get_num_buckets<'a>(
+    map: FileCacheHashMapAccess<'static>,
+) -> u32 {
+    let map = map.as_ref();
+    map.get_num_buckets().try_into().unwrap()
+}
+
+/// Look up the entry with given key and hash.
+///
+/// This is similar to dynahash's hash_search(... , HASH_FIND)
+#[unsafe(no_mangle)]
+pub extern "C" fn bcomm_file_cache_hash_find<'a>(
+    map: FileCacheHashMapAccess<'static>,
+    key: &FileCacheKey,
+    hash: u64,
+) -> Option<&'static FileCacheEntry> {
+    let map = map.as_ref();
+    map.get_with_hash(key, hash)
+}
+
+/// Look up the entry at given bucket position
+///
+/// This has no direct equivalent in the dynahash interface, but can be used to
+/// iterate through all entries in the hash table.
+#[unsafe(no_mangle)]
+pub extern "C" fn bcomm_file_cache_hash_get_at_pos<'a>(
+    map: FileCacheHashMapAccess<'static>,
+    pos: u32,
+) -> Option<&'static FileCacheEntry> {
+    let map = map.as_ref();
+    map.get_at_bucket(pos as usize).map(|(_k, v)| v)
+}
+
+/// Remove entry, given a pointer to the value.
+///
+/// This is equivalent to dynahash hash_search(entry->key, HASH_REMOVE), where 'entry'
+/// is an entry you have previously looked up
+#[unsafe(no_mangle)]
+pub extern "C" fn bcomm_file_cache_hash_remove_entry<'a, 'b>(
+    map: FileCacheHashMapAccess,
+    entry: *mut FileCacheEntry,
+) {
+    let map = map.as_mut();
+    let pos = map.get_bucket_for_value(entry);
+    match map.entry_at_bucket(pos) {
+        Some(e) => {
+            e.remove();
+        }
+        None => {
+            // todo: shouldn't happen, panic?
+        }
+    }
+}
+
+/// Compute the hash for given key
+///
+/// This is equivalent to dynahash get_hash_value() function. We use Rust's default hasher
+/// for calculating the hash though.
+#[unsafe(no_mangle)]
+pub extern "C" fn bcomm_file_cache_get_hash_value<'a, 'b>(
+    map: FileCacheHashMapAccess<'static>,
+    key: &FileCacheKey,
+) -> u64 {
+    map.as_ref().get_hash_value(key)
+}
+
+/// Insert a new entry to the hash table
+///
+/// This is equivalent to dynahash hash_search(..., HASH_ENTER).
+#[unsafe(no_mangle)]
+pub extern "C" fn bcomm_file_cache_hash_enter<'a, 'b>(
+    map: FileCacheHashMapAccess,
+    key: &FileCacheKey,
+    hash: u64,
+    found: &mut bool,
+) -> *mut FileCacheEntry {
+    match map.as_mut().entry_with_hash(key.clone(), hash) {
+        Entry::Occupied(mut e) => {
+            *found = true;
+            e.get_mut()
+        }
+        Entry::Vacant(e) => {
+            *found = false;
+            let initial_value = FileCacheEntry::default();
+            e.insert(initial_value).expect("TODO: hash table full")
+        }
+    }
+}
+
+/// Get the key for a given entry, which must be present in the hash table.
+///
+/// Dynahash requires the key to be part of the "value" struct, so you can always
+/// access the key with something like `entry->key`. The Rust implementation however
+/// stores the key separately. This function extracts the separately stored key.
+#[unsafe(no_mangle)]
+pub extern "C" fn bcomm_file_cache_hash_get_key_for_entry<'a, 'b>(
+    map: FileCacheHashMapAccess,
+    entry: *const FileCacheEntry,
+) -> Option<&FileCacheKey> {
+    let map = map.as_ref();
+    let pos = map.get_bucket_for_value(entry);
+    map.get_at_bucket(pos as usize).map(|(k, _v)| k)
+}
+
+/// Remove all entries from the hash table
+#[unsafe(no_mangle)]
+pub extern "C" fn bcomm_file_cache_hash_reset<'a, 'b>(map: FileCacheHashMapAccess) {
+    let map = map.as_mut();
+    let num_buckets = map.get_num_buckets();
+    for i in 0..num_buckets {
+        if let Some(e) = map.entry_at_bucket(i) {
+            e.remove();
+        }
+    }
+}
--- a/pgxn/neon/communicator/src/global_allocator.rs
+++ b/pgxn/neon/communicator/src/global_allocator.rs
@@ -1,109 +0,0 @@
-//! Global allocator, for tracking memory usage of the Rust parts
-//!
-//! Postgres is designed to handle allocation failure (ie. malloc() returning NULL) gracefully.  It
-//! rolls backs the transaction and gives the user an "ERROR: out of memory" error. Rust code
-//! however panics if an allocation fails. We don't want that to ever happen, because an unhandled
-//! panic leads to Postgres crash and restart. Our strategy is to pre-allocate a large enough chunk
-//! of memory for use by the Rust code, so that the allocations never fail.
-//!
-//! To pick the size for the pre-allocated chunk, we have a metric to track the high watermark
-//! memory usage of all the Rust allocations in total.
-//!
-//! TODO:
-//!
-//! - Currently we just export the metrics. Actual allocations are still just passed through to
-//!   the system allocator.
-//! - Take padding etc. overhead into account
-
-use std::alloc::{GlobalAlloc, Layout, System};
-use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
-
-use metrics::IntGauge;
-
-struct MyAllocator {
-    allocations: AtomicU64,
-    deallocations: AtomicU64,
-
-    allocated: AtomicUsize,
-    high: AtomicUsize,
-}
-
-unsafe impl GlobalAlloc for MyAllocator {
-    unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
-        self.allocations.fetch_add(1, Ordering::Relaxed);
-        let mut allocated = self.allocated.fetch_add(layout.size(), Ordering::Relaxed);
-        allocated += layout.size();
-        self.high.fetch_max(allocated, Ordering::Relaxed);
-        unsafe { System.alloc(layout) }
-    }
-
-    unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
-        self.deallocations.fetch_add(1, Ordering::Relaxed);
-        self.allocated.fetch_sub(layout.size(), Ordering::Relaxed);
-        unsafe { System.dealloc(ptr, layout) }
-    }
-}
-
-#[global_allocator]
-static GLOBAL: MyAllocator = MyAllocator {
-    allocations: AtomicU64::new(0),
-    deallocations: AtomicU64::new(0),
-    allocated: AtomicUsize::new(0),
-    high: AtomicUsize::new(0),
-};
-
-pub struct MyAllocatorCollector {
-    allocations: IntGauge,
-    deallocations: IntGauge,
-    allocated: IntGauge,
-    high: IntGauge,
-}
-
-impl MyAllocatorCollector {
-    pub fn new() -> MyAllocatorCollector {
-        MyAllocatorCollector {
-            allocations: IntGauge::new("allocations_total", "Number of allocations in Rust code")
-                .unwrap(),
-            deallocations: IntGauge::new(
-                "deallocations_total",
-                "Number of deallocations in Rust code",
-            )
-            .unwrap(),
-            allocated: IntGauge::new("allocated_total", "Bytes currently allocated").unwrap(),
-            high: IntGauge::new("allocated_high", "High watermark of allocated bytes").unwrap(),
-        }
-    }
-}
-
-impl metrics::core::Collector for MyAllocatorCollector {
-    fn desc(&self) -> Vec<&metrics::core::Desc> {
-        let mut descs = Vec::new();
-
-        descs.append(&mut self.allocations.desc());
-        descs.append(&mut self.deallocations.desc());
-        descs.append(&mut self.allocated.desc());
-        descs.append(&mut self.high.desc());
-
-        descs
-    }
-
-    fn collect(&self) -> Vec<metrics::proto::MetricFamily> {
-        let mut values = Vec::new();
-
-        // update the gauges
-        self.allocations
-            .set(GLOBAL.allocations.load(Ordering::Relaxed) as i64);
-        self.deallocations
-            .set(GLOBAL.allocations.load(Ordering::Relaxed) as i64);
-        self.allocated
-            .set(GLOBAL.allocated.load(Ordering::Relaxed) as i64);
-        self.high.set(GLOBAL.high.load(Ordering::Relaxed) as i64);
-
-        values.append(&mut self.allocations.collect());
-        values.append(&mut self.deallocations.collect());
-        values.append(&mut self.allocated.collect());
-        values.append(&mut self.high.collect());
-
-        values
-    }
-}
--- a/pgxn/neon/communicator/src/init.rs
+++ b/pgxn/neon/communicator/src/init.rs
@@ -1,184 +0,0 @@
-//! Initialization functions. These are executed in the postmaster process,
-//! at different stages of server startup.
-//!
-//!
-//! Communicator initialization steps:
-//!
-//! 1. At postmaster startup, before shared memory is allocated,
-//!    rcommunicator_shmem_size() is called to get the amount of
-//!    shared memory that this module needs.
-//!
-//! 2. Later, after the shared memory has been allocated,
-//!    rcommunicator_shmem_init() is called to initialize the shmem
-//!    area.
-//!
-//! Per process initialization:
-//!
-//! When a backend process starts up, it calls rcommunicator_backend_init().
-//! In the communicator worker process, other functions are called, see
-//! `worker_process` module.
-
-use std::ffi::c_int;
-use std::mem;
-use std::mem::MaybeUninit;
-use std::os::fd::OwnedFd;
-
-use crate::backend_comms::NeonIOHandle;
-use crate::integrated_cache::IntegratedCacheInitStruct;
-
-const NUM_NEON_REQUEST_SLOTS_PER_BACKEND: u32 = 5;
-
-/// This struct is created in the postmaster process, and inherited to
-/// the communicator process and all backend processes through fork()
-#[repr(C)]
-pub struct CommunicatorInitStruct {
-    #[allow(dead_code)]
-    pub max_procs: u32,
-
-    pub submission_pipe_read_fd: OwnedFd,
-    pub submission_pipe_write_fd: OwnedFd,
-
-    // Shared memory data structures
-    pub num_neon_request_slots_per_backend: u32,
-
-    pub neon_request_slots: &'static [NeonIOHandle],
-
-    pub integrated_cache_init_struct: IntegratedCacheInitStruct<'static>,
-}
-
-impl std::fmt::Debug for CommunicatorInitStruct {
-    fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
-        fmt.debug_struct("CommunicatorInitStruct")
-            .field("max_procs", &self.max_procs)
-            .field("submission_pipe_read_fd", &self.submission_pipe_read_fd)
-            .field("submission_pipe_write_fd", &self.submission_pipe_write_fd)
-            .field(
-                "num_neon_request_slots_per_backend",
-                &self.num_neon_request_slots_per_backend,
-            )
-            .field("neon_request_slots length", &self.neon_request_slots.len())
-            .finish()
-    }
-}
-
-#[unsafe(no_mangle)]
-pub extern "C" fn rcommunicator_shmem_size(max_procs: u32) -> u64 {
-    let mut size = 0;
-
-    let num_neon_request_slots = max_procs * NUM_NEON_REQUEST_SLOTS_PER_BACKEND;
-    size += mem::size_of::<NeonIOHandle>() * num_neon_request_slots as usize;
-
-    // For integrated_cache's Allocator. TODO: make this adjustable
-    size += IntegratedCacheInitStruct::shmem_size(max_procs);
-
-    size as u64
-}
-
-/// Initialize the shared memory segment. Returns a backend-private
-/// struct, which will be inherited by backend processes through fork
-#[unsafe(no_mangle)]
-pub extern "C" fn rcommunicator_shmem_init(
-    submission_pipe_read_fd: c_int,
-    submission_pipe_write_fd: c_int,
-    max_procs: u32,
-    shmem_area_ptr: *mut MaybeUninit<u8>,
-    shmem_area_len: u64,
-    initial_file_cache_size: u64,
-    max_file_cache_size: u64,
-) -> &'static mut CommunicatorInitStruct {
-    let shmem_area: &'static mut [MaybeUninit<u8>] =
-        unsafe { std::slice::from_raw_parts_mut(shmem_area_ptr, shmem_area_len as usize) };
-
-    // Carve out the request slots from the shmem area and initialize them
-    let num_neon_request_slots_per_backend = NUM_NEON_REQUEST_SLOTS_PER_BACKEND as usize;
-    let num_neon_request_slots = max_procs as usize * num_neon_request_slots_per_backend;
-
-    let (neon_request_slots, remaining_area) =
-        alloc_array_from_slice::<NeonIOHandle>(shmem_area, num_neon_request_slots);
-
-    for i in 0..num_neon_request_slots {
-        neon_request_slots[i].write(NeonIOHandle::default());
-    }
-
-    // 'neon_request_slots' is initialized now. (MaybeUninit::slice_assume_init_mut() is nightly-only
-    // as of this writing.)
-    let neon_request_slots = unsafe {
-        std::mem::transmute::<&mut [MaybeUninit<NeonIOHandle>], &mut [NeonIOHandle]>(
-            neon_request_slots,
-        )
-    };
-
-    // Give the rest of the area to the integrated cache
-    let integrated_cache_init_struct = IntegratedCacheInitStruct::shmem_init(
-        max_procs,
-        remaining_area,
-        initial_file_cache_size,
-        max_file_cache_size,
-    );
-
-    let (submission_pipe_read_fd, submission_pipe_write_fd) = unsafe {
-        use std::os::fd::FromRawFd;
-        (
-            OwnedFd::from_raw_fd(submission_pipe_read_fd),
-            OwnedFd::from_raw_fd(submission_pipe_write_fd),
-        )
-    };
-
-    let cis: &'static mut CommunicatorInitStruct = Box::leak(Box::new(CommunicatorInitStruct {
-        max_procs,
-        submission_pipe_read_fd,
-        submission_pipe_write_fd,
-
-        num_neon_request_slots_per_backend: NUM_NEON_REQUEST_SLOTS_PER_BACKEND,
-        neon_request_slots,
-
-        integrated_cache_init_struct,
-    }));
-
-    cis
-}
-
-// fixme: currently unused
-#[allow(dead_code)]
-pub fn alloc_from_slice<T>(
-    area: &mut [MaybeUninit<u8>],
-) -> (&mut MaybeUninit<T>, &mut [MaybeUninit<u8>]) {
-    let layout = std::alloc::Layout::new::<T>();
-
-    let area_start = area.as_mut_ptr();
-
-    // pad to satisfy alignment requirements
-    let padding = area_start.align_offset(layout.align());
-    if padding + layout.size() > area.len() {
-        panic!("out of memory");
-    }
-    let area = &mut area[padding..];
-    let (result_area, remain) = area.split_at_mut(layout.size());
-
-    let result_ptr: *mut MaybeUninit<T> = result_area.as_mut_ptr().cast();
-    let result = unsafe { result_ptr.as_mut().unwrap() };
-
-    (result, remain)
-}
-
-pub fn alloc_array_from_slice<T>(
-    area: &mut [MaybeUninit<u8>],
-    len: usize,
-) -> (&mut [MaybeUninit<T>], &mut [MaybeUninit<u8>]) {
-    let layout = std::alloc::Layout::new::<T>();
-
-    let area_start = area.as_mut_ptr();
-
-    // pad to satisfy alignment requirements
-    let padding = area_start.align_offset(layout.align());
-    if padding + layout.size() * len > area.len() {
-        panic!("out of memory");
-    }
-    let area = &mut area[padding..];
-    let (result_area, remain) = area.split_at_mut(layout.size() * len);
-
-    let result_ptr: *mut MaybeUninit<T> = result_area.as_mut_ptr().cast();
-    let result = unsafe { std::slice::from_raw_parts_mut(result_ptr.as_mut().unwrap(), len) };
-
-    (result, remain)
-}
--- a/pgxn/neon/communicator/src/integrated_cache.rs
+++ b/pgxn/neon/communicator/src/integrated_cache.rs
@@ -1,803 +0,0 @@
-//! Integrated communicator cache
-//!
-//! It tracks:
-//! - Relation sizes and existence
-//! - Last-written LSN
-//! - Block cache (also known as LFC)
-//!
-//! TODO: limit the size
-//! TODO: concurrency
-//!
-//! Note: This deals with "relations" which is really just one "relation fork" in Postgres
-//! terms. RelFileLocator + ForkNumber is the key.
-
-//
-// TODO: Thoughts on eviction:
-//
-// There are two things we need to track, and evict if we run out of space:
-// - blocks in the file cache's file. If the file grows too large, need to evict something.
-//   Also if the cache is resized
-//
-// - entries in the cache map. If we run out of memory in the shmem area, need to evict
-//   something
-//
-
-use std::mem::MaybeUninit;
-use std::sync::atomic::{AtomicBool, AtomicU32, AtomicU64, Ordering};
-
-use utils::lsn::{AtomicLsn, Lsn};
-
-use crate::file_cache::INVALID_CACHE_BLOCK;
-use crate::file_cache::{CacheBlock, FileCache};
-use pageserver_page_api::RelTag;
-
-use metrics::{IntCounter, IntGauge};
-
-use neon_shmem::hash::HashMapInit;
-use neon_shmem::hash::UpdateAction;
-use neon_shmem::shmem::ShmemHandle;
-
-// in # of entries
-const RELSIZE_CACHE_SIZE: u32 = 64 * 1024;
-
-/// This struct is initialized at postmaster startup, and passed to all the processes via fork().
-pub struct IntegratedCacheInitStruct<'t> {
-    relsize_cache_handle: HashMapInit<'t, RelKey, RelEntry>,
-    block_map_handle: HashMapInit<'t, BlockKey, BlockEntry>,
-}
-
-/// Represents write-access to the integrated cache. This is used by the communicator process.
-pub struct IntegratedCacheWriteAccess<'t> {
-    relsize_cache: neon_shmem::hash::HashMapAccess<'t, RelKey, RelEntry>,
-    block_map: neon_shmem::hash::HashMapAccess<'t, BlockKey, BlockEntry>,
-
-    global_lw_lsn: AtomicU64,
-
-    pub(crate) file_cache: Option<FileCache>,
-
-    // Fields for eviction
-    clock_hand: std::sync::Mutex<usize>,
-
-    // Metrics
-    page_evictions_counter: IntCounter,
-    clock_iterations_counter: IntCounter,
-
-    // metrics from the hash map
-    block_map_num_buckets: IntGauge,
-    block_map_num_buckets_in_use: IntGauge,
-
-    relsize_cache_num_buckets: IntGauge,
-    relsize_cache_num_buckets_in_use: IntGauge,
-}
-
-/// Represents read-only access to the integrated cache. Backend processes have this.
-pub struct IntegratedCacheReadAccess<'t> {
-    relsize_cache: neon_shmem::hash::HashMapAccess<'t, RelKey, RelEntry>,
-    block_map: neon_shmem::hash::HashMapAccess<'t, BlockKey, BlockEntry>,
-}
-
-impl<'t> IntegratedCacheInitStruct<'t> {
-    /// Return the desired size in bytes of the fixed-size shared memory area to reserve for the
-    /// integrated cache.
-    pub fn shmem_size(_max_procs: u32) -> usize {
-        // The relsize cache is fixed-size. The block map is allocated in a separate resizable
-        // area.
-        HashMapInit::<RelKey, RelEntry>::estimate_size(RELSIZE_CACHE_SIZE)
-    }
-
-    /// Initialize the shared memory segment. This runs once in postmaster. Returns a struct which
-    /// will be inherited by all processes through fork.
-    pub fn shmem_init(
-        _max_procs: u32,
-        shmem_area: &'t mut [MaybeUninit<u8>],
-        initial_file_cache_size: u64,
-        max_file_cache_size: u64,
-    ) -> IntegratedCacheInitStruct<'t> {
-        // Initialize the relsize cache in the fixed-size area
-        let relsize_cache_handle =
-            neon_shmem::hash::HashMapInit::init_in_fixed_area(RELSIZE_CACHE_SIZE, shmem_area);
-
-        let max_bytes =
-            HashMapInit::<BlockKey, BlockEntry>::estimate_size(max_file_cache_size as u32);
-
-        // Initialize the block map in a separate resizable shared memory area
-        let shmem_handle = ShmemHandle::new("block mapping", 0, max_bytes).unwrap();
-
-        let block_map_handle = neon_shmem::hash::HashMapInit::init_in_shmem(
-            initial_file_cache_size as u32,
-            shmem_handle,
-        );
-        IntegratedCacheInitStruct {
-            relsize_cache_handle,
-            block_map_handle,
-        }
-    }
-
-    /// Initialize access to the integrated cache for the communicator worker process
-    pub fn worker_process_init(
-        self,
-        lsn: Lsn,
-        file_cache: Option<FileCache>,
-    ) -> IntegratedCacheWriteAccess<'t> {
-        let IntegratedCacheInitStruct {
-            relsize_cache_handle,
-            block_map_handle,
-        } = self;
-        IntegratedCacheWriteAccess {
-            relsize_cache: relsize_cache_handle.attach_writer(),
-            block_map: block_map_handle.attach_writer(),
-            global_lw_lsn: AtomicU64::new(lsn.0),
-            file_cache,
-            clock_hand: std::sync::Mutex::new(0),
-
-            page_evictions_counter: metrics::IntCounter::new(
-                "integrated_cache_evictions",
-                "Page evictions from the Local File Cache",
-            )
-            .unwrap(),
-
-            clock_iterations_counter: metrics::IntCounter::new(
-                "clock_iterations",
-                "Number of times the clock hand has moved",
-            )
-            .unwrap(),
-
-            block_map_num_buckets: metrics::IntGauge::new(
-                "block_map_num_buckets",
-                "Allocated size of the block cache hash map",
-            )
-            .unwrap(),
-            block_map_num_buckets_in_use: metrics::IntGauge::new(
-                "block_map_num_buckets_in_use",
-                "Number of buckets in use in the block cache hash map",
-            )
-            .unwrap(),
-
-            relsize_cache_num_buckets: metrics::IntGauge::new(
-                "relsize_cache_num_buckets",
-                "Allocated size of the relsize cache hash map",
-            )
-            .unwrap(),
-            relsize_cache_num_buckets_in_use: metrics::IntGauge::new(
-                "relsize_cache_num_buckets_in_use",
-                "Number of buckets in use in the relsize cache hash map",
-            )
-            .unwrap(),
-        }
-    }
-
-    /// Initialize access to the integrated cache for a backend process
-    pub fn backend_init(self) -> IntegratedCacheReadAccess<'t> {
-        let IntegratedCacheInitStruct {
-            relsize_cache_handle,
-            block_map_handle,
-        } = self;
-
-        IntegratedCacheReadAccess {
-            relsize_cache: relsize_cache_handle.attach_reader(),
-            block_map: block_map_handle.attach_reader(),
-        }
-    }
-}
-
-/// Value stored in the cache mapping hash table.
-struct BlockEntry {
-    lw_lsn: AtomicLsn,
-    cache_block: AtomicU64,
-
-    pinned: AtomicU64,
-
-    // 'referenced' bit for the clock algorithm
-    referenced: AtomicBool,
-}
-
-/// Value stored in the relsize cache hash table.
-struct RelEntry {
-    /// cached size of the relation
-    /// u32::MAX means 'not known' (that's InvalidBlockNumber in Postgres)
-    nblocks: AtomicU32,
-}
-
-impl std::fmt::Debug for RelEntry {
-    fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
-        fmt.debug_struct("Rel")
-            .field("nblocks", &self.nblocks.load(Ordering::Relaxed))
-            .finish()
-    }
-}
-impl std::fmt::Debug for BlockEntry {
-    fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
-        fmt.debug_struct("Block")
-            .field("lw_lsn", &self.lw_lsn.load())
-            .field("cache_block", &self.cache_block.load(Ordering::Relaxed))
-            .field("pinned", &self.pinned.load(Ordering::Relaxed))
-            .field("referenced", &self.referenced.load(Ordering::Relaxed))
-            .finish()
-    }
-}
-
-#[derive(Clone, Debug, PartialEq, PartialOrd, Eq, Hash, Ord)]
-struct RelKey(RelTag);
-
-impl From<&RelTag> for RelKey {
-    fn from(val: &RelTag) -> RelKey {
-        RelKey(val.clone())
-    }
-}
-
-#[derive(Clone, Debug, PartialEq, PartialOrd, Eq, Hash, Ord)]
-struct BlockKey {
-    rel: RelTag,
-    block_number: u32,
-}
-
-impl From<(&RelTag, u32)> for BlockKey {
-    fn from(val: (&RelTag, u32)) -> BlockKey {
-        BlockKey {
-            rel: val.0.clone(),
-            block_number: val.1,
-        }
-    }
-}
-
-/// Return type used in the cache's get_*() functions. 'Found' means that the page, or other
-/// information that was enqueried, exists in the cache. '
-pub enum CacheResult<V> {
-    /// The enqueried page or other information existed in the cache.
-    Found(V),
-
-    /// The cache doesn't contain the page (or other enqueried information, like relation size). The
-    /// Lsn is the 'not_modified_since' LSN that should be used in the request to the pageserver to
-    /// read the page.
-    NotFound(Lsn),
-}
-
-impl<'t> IntegratedCacheWriteAccess<'t> {
-    pub fn get_rel_size(&'t self, rel: &RelTag) -> CacheResult<u32> {
-        if let Some(nblocks) = get_rel_size(&self.relsize_cache, rel) {
-            CacheResult::Found(nblocks)
-        } else {
-            let lsn = Lsn(self.global_lw_lsn.load(Ordering::Relaxed));
-            CacheResult::NotFound(lsn)
-        }
-    }
-
-    pub async fn get_page(
-        &'t self,
-        rel: &RelTag,
-        block_number: u32,
-        dst: impl uring_common::buf::IoBufMut + Send + Sync,
-    ) -> Result<CacheResult<()>, std::io::Error> {
-        let x = if let Some(block_entry) = self.block_map.get(&BlockKey::from((rel, block_number)))
-        {
-            block_entry.referenced.store(true, Ordering::Relaxed);
-
-            let cache_block = block_entry.cache_block.load(Ordering::Relaxed);
-            if cache_block != INVALID_CACHE_BLOCK {
-                // pin it and release lock
-                block_entry.pinned.fetch_add(1, Ordering::Relaxed);
-
-                (cache_block, DeferredUnpin(block_entry.pinned.as_ptr()))
-            } else {
-                return Ok(CacheResult::NotFound(block_entry.lw_lsn.load()));
-            }
-        } else {
-            let lsn = Lsn(self.global_lw_lsn.load(Ordering::Relaxed));
-            return Ok(CacheResult::NotFound(lsn));
-        };
-
-        let (cache_block, _deferred_pin) = x;
-        self.file_cache
-            .as_ref()
-            .unwrap()
-            .read_block(cache_block, dst)
-            .await?;
-
-        // unpin the entry (by implicitly dropping deferred_pin)
-        Ok(CacheResult::Found(()))
-    }
-
-    pub async fn page_is_cached(
-        &'t self,
-        rel: &RelTag,
-        block_number: u32,
-    ) -> Result<CacheResult<()>, std::io::Error> {
-        if let Some(block_entry) = self.block_map.get(&BlockKey::from((rel, block_number))) {
-            // This is used for prefetch requests. Treat the probe as an 'access', to keep it
-            // in cache.
-            block_entry.referenced.store(true, Ordering::Relaxed);
-
-            let cache_block = block_entry.cache_block.load(Ordering::Relaxed);
-
-            if cache_block != INVALID_CACHE_BLOCK {
-                Ok(CacheResult::Found(()))
-            } else {
-                Ok(CacheResult::NotFound(block_entry.lw_lsn.load()))
-            }
-        } else {
-            let lsn = Lsn(self.global_lw_lsn.load(Ordering::Relaxed));
-            Ok(CacheResult::NotFound(lsn))
-        }
-    }
-
-    /// Does the relation exists? CacheResult::NotFound means that the cache doesn't contain that
-    /// information, i.e. we don't know if the relation exists or not.
-    pub fn get_rel_exists(&'t self, rel: &RelTag) -> CacheResult<bool> {
-        // we don't currently cache negative entries, so if the relation is in the cache, it exists
-        if let Some(_rel_entry) = self.relsize_cache.get(&RelKey::from(rel)) {
-            CacheResult::Found(true)
-        } else {
-            let lsn = Lsn(self.global_lw_lsn.load(Ordering::Relaxed));
-            CacheResult::NotFound(lsn)
-        }
-    }
-
-    pub fn get_db_size(&'t self, _db_oid: u32) -> CacheResult<u64> {
-        // TODO: it would be nice to cache database sizes too. Getting the database size
-        // is not a very common operation, but when you do it, it's often interactive, with
-        // e.g. psql \l+ command, so the user will feel the latency.
-
-        // fixme: is this right lsn?
-        let lsn = Lsn(self.global_lw_lsn.load(Ordering::Relaxed));
-        CacheResult::NotFound(lsn)
-    }
-
-    pub fn remember_rel_size(&'t self, rel: &RelTag, nblocks: u32) {
-        let result =
-            self.relsize_cache
-                .update_with_fn(&RelKey::from(rel), |existing| match existing {
-                    None => {
-                        tracing::info!("inserting rel entry for {rel:?}, {nblocks} blocks");
-                        UpdateAction::Insert(RelEntry {
-                            nblocks: AtomicU32::new(nblocks),
-                        })
-                    }
-                    Some(e) => {
-                        tracing::info!("updating rel entry for {rel:?}, {nblocks} blocks");
-                        e.nblocks.store(nblocks, Ordering::Relaxed);
-                        UpdateAction::Nothing
-                    }
-                });
-
-        // FIXME: what to do if we run out of memory? Evict other relation entries?
-        result.expect("out of memory");
-    }
-
-    /// Remember the given page contents in the cache.
-    pub async fn remember_page(
-        &'t self,
-        rel: &RelTag,
-        block_number: u32,
-        src: impl uring_common::buf::IoBuf + Send + Sync,
-        lw_lsn: Lsn,
-        is_write: bool,
-    ) {
-        let key = BlockKey::from((rel, block_number));
-
-        // FIXME: make this work when file cache is disabled. Or make it mandatory
-        let file_cache = self.file_cache.as_ref().unwrap();
-
-        if is_write {
-            // there should be no concurrent IOs. If a backend tries to read the page
-            // at the same time, they may get a torn write. That's the same as with
-            // regular POSIX filesystem read() and write()
-
-            // First check if we have a block in cache already
-            let mut old_cache_block = None;
-            let mut found_existing = false;
-
-            let res = self.block_map.update_with_fn(&key, |existing| {
-                if let Some(block_entry) = existing {
-                    found_existing = true;
-
-                    // Prevent this entry from being evicted
-                    let pin_count = block_entry.pinned.fetch_add(1, Ordering::Relaxed);
-                    if pin_count > 0 {
-                        // this is unexpected, because the caller has obtained the io-in-progress lock,
-                        // so no one else should try to modify the page at the same time.
-                        // XXX: and I think a read should not be happening either, because the postgres
-                        // buffer is held locked. TODO: check these conditions and tidy this up a little. Seems fragile to just panic.
-                        panic!("block entry was unexpectedly pinned");
-                    }
-
-                    let cache_block = block_entry.cache_block.load(Ordering::Relaxed);
-                    old_cache_block = if cache_block != INVALID_CACHE_BLOCK {
-                        Some(cache_block)
-                    } else {
-                        None
-                    };
-                }
-                // if there was no existing entry, we will insert one, but not yet
-                UpdateAction::Nothing
-            });
-
-            // FIXME: what to do if we run out of memory? Evict other relation entries? Remove
-            // block entries first?
-            res.expect("out of memory");
-
-            // Allocate a new block if required
-            let cache_block = old_cache_block.unwrap_or_else(|| {
-                loop {
-                    if let Some(x) = file_cache.alloc_block() {
-                        break x;
-                    }
-                    if let Some(x) = self.try_evict_one_cache_block() {
-                        break x;
-                    }
-                }
-            });
-
-            // Write the page to the cache file
-            file_cache
-                .write_block(cache_block, src)
-                .await
-                .expect("error writing to cache");
-            // FIXME: handle errors gracefully.
-            // FIXME: unpin the block entry on error
-
-            // Update the block entry
-            let res = self.block_map.update_with_fn(&key, |existing| {
-                assert_eq!(found_existing, existing.is_some());
-                if let Some(block_entry) = existing {
-                    // Update the cache block
-                    let old_blk = block_entry.cache_block.compare_exchange(
-                        INVALID_CACHE_BLOCK,
-                        cache_block,
-                        Ordering::Relaxed,
-                        Ordering::Relaxed,
-                    );
-                    assert!(old_blk == Ok(INVALID_CACHE_BLOCK) || old_blk == Err(cache_block));
-
-                    block_entry.lw_lsn.store(lw_lsn);
-
-                    block_entry.referenced.store(true, Ordering::Relaxed);
-
-                    let pin_count = block_entry.pinned.fetch_sub(1, Ordering::Relaxed);
-                    assert!(pin_count > 0);
-                    UpdateAction::Nothing
-                } else {
-                    UpdateAction::Insert(BlockEntry {
-                        lw_lsn: AtomicLsn::new(lw_lsn.0),
-                        cache_block: AtomicU64::new(cache_block),
-                        pinned: AtomicU64::new(0),
-                        referenced: AtomicBool::new(true),
-                    })
-                }
-            });
-
-            // FIXME: what to do if we run out of memory? Evict other relation entries? Remove
-            // block entries first?
-            res.expect("out of memory");
-        } else {
-            // !is_write
-            //
-            // We can assume that it doesn't already exist, because the
-            // caller is assumed to have already checked it, and holds
-            // the io-in-progress lock. (The BlockEntry might exist, but no cache block)
-
-            // Allocate a new block first
-            let cache_block = {
-                loop {
-                    if let Some(x) = file_cache.alloc_block() {
-                        break x;
-                    }
-                    if let Some(x) = self.try_evict_one_cache_block() {
-                        break x;
-                    }
-                }
-            };
-
-            // Write the page to the cache file
-            file_cache
-                .write_block(cache_block, src)
-                .await
-                .expect("error writing to cache");
-            // FIXME: handle errors gracefully.
-
-            let res = self.block_map.update_with_fn(&key, |existing| {
-                if let Some(block_entry) = existing {
-                    // FIXME: could there be concurrent readers?
-                    assert!(block_entry.pinned.load(Ordering::Relaxed) == 0);
-
-                    let old_cache_block = block_entry.cache_block.swap(cache_block, Ordering::Relaxed);
-                    if old_cache_block != INVALID_CACHE_BLOCK {
-                        panic!("remember_page called in !is_write mode, but page is already cached at blk {}", old_cache_block);
-                    }
-                    UpdateAction::Nothing
-                } else {
-                    UpdateAction::Insert(BlockEntry {
-                        lw_lsn: AtomicLsn::new(lw_lsn.0),
-                        cache_block: AtomicU64::new(cache_block),
-                        pinned: AtomicU64::new(0),
-                        referenced: AtomicBool::new(true),
-                    })
-                }
-            });
-
-            // FIXME: what to do if we run out of memory? Evict other relation entries? Remove
-            // block entries first?
-            res.expect("out of memory");
-        }
-    }
-
-    /// Forget information about given relation in the cache. (For DROP TABLE and such)
-    pub fn forget_rel(&'t self, rel: &RelTag) {
-        tracing::info!("forgetting rel entry for {rel:?}");
-        self.relsize_cache.remove(&RelKey::from(rel));
-
-        // also forget all cached blocks for the relation
-        // FIXME
-        /*
-            let mut iter = MapIterator::new(&key_range_for_rel_blocks(rel));
-            let r = self.cache_tree.start_read();
-            while let Some((k, _v)) = iter.next(&r) {
-                let w = self.cache_tree.start_write();
-
-                let mut evicted_cache_block = None;
-
-                let res = w.update_with_fn(&k, |e| {
-                    if let Some(e) = e {
-                        let block_entry = if let MapEntry::Block(e) = e {
-                            e
-                        } else {
-                            panic!("unexpected map entry type for block key");
-                        };
-                        let cache_block = block_entry
-                            .cache_block
-                            .swap(INVALID_CACHE_BLOCK, Ordering::Relaxed);
-                        if cache_block != INVALID_CACHE_BLOCK {
-                            evicted_cache_block = Some(cache_block);
-                        }
-                        UpdateAction::Remove
-                    } else {
-                        UpdateAction::Nothing
-                    }
-                });
-
-                // FIXME: It's pretty surprising to run out of memory while removing. But
-                // maybe it can happen because of trying to shrink a node?
-                res.expect("out of memory");
-
-                if let Some(evicted_cache_block) = evicted_cache_block {
-                    self.file_cache
-                        .as_ref()
-                        .unwrap()
-                        .dealloc_block(evicted_cache_block);
-                }
-        }
-
-            */
-    }
-
-    // Maintenance routines
-
-    /// Evict one block from the file cache. This is used when the file cache fills up
-    /// Returns the evicted block. It's not put to the free list, so it's available for the
-    /// caller to use immediately.
-    pub fn try_evict_one_cache_block(&self) -> Option<CacheBlock> {
-        let mut clock_hand = self.clock_hand.lock().unwrap();
-        for _ in 0..100 {
-            self.clock_iterations_counter.inc();
-
-            (*clock_hand) += 1;
-
-            let mut evict_this = false;
-            let num_buckets = self.block_map.get_num_buckets();
-            match self
-                .block_map
-                .get_bucket((*clock_hand) % num_buckets)
-                .as_deref()
-            {
-                None => {
-                    // This bucket was unused
-                }
-                Some(blk_entry) => {
-                    if !blk_entry.referenced.swap(false, Ordering::Relaxed) {
-                        // Evict this. Maybe.
-                        evict_this = true;
-                    }
-                }
-            };
-
-            if evict_this {
-                // grab the write lock
-                let mut evicted_cache_block = None;
-                let res =
-                    self.block_map
-                        .update_with_fn_at_bucket(*clock_hand % num_buckets, |old| {
-                            match old {
-                                None => UpdateAction::Nothing,
-                                Some(old) => {
-                                    // note: all the accesses to 'pinned' currently happen
-                                    // within update_with_fn(), or while holding ValueReadGuard, which protects from concurrent
-                                    // updates. Otherwise, another thread could set the 'pinned'
-                                    // flag just after we have checked it here.
-                                    if old.pinned.load(Ordering::Relaxed) != 0 {
-                                        return UpdateAction::Nothing;
-                                    }
-
-                                    let _ = self
-                                        .global_lw_lsn
-                                        .fetch_max(old.lw_lsn.load().0, Ordering::Relaxed);
-                                    let cache_block = old
-                                        .cache_block
-                                        .swap(INVALID_CACHE_BLOCK, Ordering::Relaxed);
-                                    if cache_block != INVALID_CACHE_BLOCK {
-                                        evicted_cache_block = Some(cache_block);
-                                    }
-                                    UpdateAction::Remove
-                                }
-                            }
-                        });
-
-                // Out of memory should not happen here, as we're only updating existing values,
-                // not inserting new entries to the map.
-                res.expect("out of memory");
-
-                if evicted_cache_block.is_some() {
-                    self.page_evictions_counter.inc();
-                    return evicted_cache_block;
-                }
-            }
-        }
-        // Give up if we didn't find anything
-        None
-    }
-
-    pub fn resize_file_cache(&self, num_blocks: u32) {
-        let old_num_blocks = self.block_map.get_num_buckets() as u32;
-
-        if old_num_blocks < num_blocks {
-            if let Err(err) = self.block_map.grow(num_blocks) {
-                tracing::warn!(
-                    "could not grow file cache to {} blocks (old size {}): {}",
-                    num_blocks,
-                    old_num_blocks,
-                    err
-                );
-            }
-        }
-    }
-
-    pub fn dump_map(&self, _dst: &mut dyn std::io::Write) {
-        //FIXME self.cache_map.start_read().dump(dst);
-    }
-}
-
-impl metrics::core::Collector for IntegratedCacheWriteAccess<'_> {
-    fn desc(&self) -> Vec<&metrics::core::Desc> {
-        let mut descs = Vec::new();
-        descs.append(&mut self.page_evictions_counter.desc());
-        descs.append(&mut self.clock_iterations_counter.desc());
-
-        descs.append(&mut self.block_map_num_buckets.desc());
-        descs.append(&mut self.block_map_num_buckets_in_use.desc());
-
-        descs.append(&mut self.relsize_cache_num_buckets.desc());
-        descs.append(&mut self.relsize_cache_num_buckets_in_use.desc());
-
-        descs
-    }
-    fn collect(&self) -> Vec<metrics::proto::MetricFamily> {
-        // Update gauges
-        self.block_map_num_buckets
-            .set(self.block_map.get_num_buckets() as i64);
-        self.block_map_num_buckets_in_use
-            .set(self.block_map.get_num_buckets_in_use() as i64);
-        self.relsize_cache_num_buckets
-            .set(self.relsize_cache.get_num_buckets() as i64);
-        self.relsize_cache_num_buckets_in_use
-            .set(self.relsize_cache.get_num_buckets_in_use() as i64);
-
-        let mut values = Vec::new();
-        values.append(&mut self.page_evictions_counter.collect());
-        values.append(&mut self.clock_iterations_counter.collect());
-
-        values.append(&mut self.block_map_num_buckets.collect());
-        values.append(&mut self.block_map_num_buckets_in_use.collect());
-
-        values.append(&mut self.relsize_cache_num_buckets.collect());
-        values.append(&mut self.relsize_cache_num_buckets_in_use.collect());
-
-        values
-    }
-}
-
-/// Read relation size from the cache.
-///
-/// This is in a separate function so that it can be shared by
-/// IntegratedCacheReadAccess::get_rel_size() and IntegratedCacheWriteAccess::get_rel_size()
-fn get_rel_size<'t>(
-    r: &neon_shmem::hash::HashMapAccess<RelKey, RelEntry>,
-    rel: &RelTag,
-) -> Option<u32> {
-    if let Some(rel_entry) = r.get(&RelKey::from(rel)) {
-        let nblocks = rel_entry.nblocks.load(Ordering::Relaxed);
-        if nblocks != u32::MAX {
-            Some(nblocks)
-        } else {
-            None
-        }
-    } else {
-        None
-    }
-}
-
-/// Accessor for other backends
-///
-/// This allows backends to read pages from the cache directly, on their own, without making a
-/// request to the communicator process.
-impl<'t> IntegratedCacheReadAccess<'t> {
-    pub fn get_rel_size(&'t self, rel: &RelTag) -> Option<u32> {
-        get_rel_size(&self.relsize_cache, rel)
-    }
-
-    pub fn start_read_op(&'t self) -> BackendCacheReadOp<'t> {
-        BackendCacheReadOp {
-            read_guards: Vec::new(),
-            map_access: self,
-        }
-    }
-}
-
-pub struct BackendCacheReadOp<'t> {
-    read_guards: Vec<DeferredUnpin>,
-    map_access: &'t IntegratedCacheReadAccess<'t>,
-}
-
-impl<'e> BackendCacheReadOp<'e> {
-    /// Initiate a read of the page from the cache.
-    ///
-    /// This returns the "cache block number", i.e. the block number within the cache file, where
-    /// the page's contents is stored. To get the page contents, the caller needs to read that block
-    /// from the cache file. This returns a guard object that you must hold while it performs the
-    /// read. It's possible that while you are performing the read, the cache block is invalidated.
-    /// After you have completed the read, call BackendCacheReadResult::finish() to check if the
-    /// read was in fact valid or not. If it was concurrently invalidated, you need to retry.
-    pub fn get_page(&mut self, rel: &RelTag, block_number: u32) -> Option<u64> {
-        if let Some(block_entry) = self
-            .map_access
-            .block_map
-            .get(&BlockKey::from((rel, block_number)))
-        {
-            block_entry.referenced.store(true, Ordering::Relaxed);
-
-            let cache_block = block_entry.cache_block.load(Ordering::Relaxed);
-            if cache_block != INVALID_CACHE_BLOCK {
-                block_entry.pinned.fetch_add(1, Ordering::Relaxed);
-                self.read_guards
-                    .push(DeferredUnpin(block_entry.pinned.as_ptr()));
-                Some(cache_block)
-            } else {
-                None
-            }
-        } else {
-            None
-        }
-    }
-
-    pub fn finish(self) -> bool {
-        // TODO: currently, we hold a pin on the in-memory map, so concurrent invalidations are not
-        // possible. But if we switch to optimistic locking, this would return 'false' if the
-        // optimistic locking failed and you need to retry.
-        true
-    }
-}
-
-/// A hack to decrement an AtomicU64 on drop. This is used to decrement the pin count
-/// of a BlockEntry. The safety depends on the fact that the BlockEntry is not evicted
-/// or moved while it's pinned.
-struct DeferredUnpin(*mut u64);
-
-unsafe impl Sync for DeferredUnpin {}
-unsafe impl Send for DeferredUnpin {}
-
-impl Drop for DeferredUnpin {
-    fn drop(&mut self) {
-        // unpin it
-        unsafe {
-            let pin_ref = AtomicU64::from_ptr(self.0);
-            pin_ref.fetch_sub(1, Ordering::Relaxed);
-        }
-    }
-}
--- a/pgxn/neon/communicator/src/lib.rs
+++ b/pgxn/neon/communicator/src/lib.rs
@@ -1,27 +1 @@
-//!
-//! Three main parts:
-//! - async tokio communicator core, which receives requests and processes them.
-//! - Main loop and requests queues, which routes requests from backends to the core
-//! - the per-backend glue code, which submits requests
-//!
-
-mod backend_comms;
-
-// mark this 'pub', because these functions are called from C code. Otherwise, the compiler
-// complains about a bunch of structs and enum variants being unused, because it thinkgs
-// the functions that use them are never called. There are some C-callable functions in
-// other modules too, but marking this as pub is currently enough to silence the warnings
-//
-// TODO: perhaps collect *all* the extern "C" functions to one module?
-pub mod backend_interface;
-
-mod file_cache;
-mod init;
-mod integrated_cache;
-mod neon_request;
-mod worker_process;
-
-mod global_allocator;
-
-// FIXME get this from postgres headers somehow
-pub const BLCKSZ: usize = 8192;
+pub mod file_cache_hashmap;
--- a/pgxn/neon/communicator/src/neon_request.rs
+++ b/pgxn/neon/communicator/src/neon_request.rs
@@ -1,346 +0,0 @@
-type CLsn = u64;
-type COid = u32;
-
-// This conveniently matches PG_IOV_MAX
-pub const MAX_GETPAGEV_PAGES: usize = 32;
-
-use pageserver_page_api as page_api;
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub enum NeonIORequest {
-    Empty,
-
-    // Read requests. These are C-friendly variants of the corresponding structs in
-    // pageserver_page_api.
-    RelExists(CRelExistsRequest),
-    RelSize(CRelSizeRequest),
-    GetPageV(CGetPageVRequest),
-    PrefetchV(CPrefetchVRequest),
-    DbSize(CDbSizeRequest),
-
-    // Write requests. These are needed to keep the relation size cache and LFC up-to-date.
-    // They are not sent to the pageserver.
-    WritePage(CWritePageRequest),
-    RelExtend(CRelExtendRequest),
-    RelZeroExtend(CRelZeroExtendRequest),
-    RelCreate(CRelCreateRequest),
-    RelTruncate(CRelTruncateRequest),
-    RelUnlink(CRelUnlinkRequest),
-}
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub enum NeonIOResult {
-    Empty,
-    RelExists(bool),
-    RelSize(u32),
-
-    /// the result pages are written to the shared memory addresses given in the request
-    GetPageV,
-
-    /// A prefetch request returns as soon as the request has been received by the communicator.
-    /// It is processed in the background.
-    PrefetchVLaunched,
-
-    DbSize(u64),
-
-    // FIXME design compact error codes. Can't easily pass a string or other dynamic data.
-    // currently, this is 'errno'
-    Error(i32),
-
-    Aborted,
-
-    /// used for all write requests
-    WriteOK,
-}
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct CCachedGetPageVResult {
-    pub cache_block_numbers: [u64; MAX_GETPAGEV_PAGES],
-}
-
-/// ShmemBuf represents a buffer in shared memory.
-///
-/// SAFETY: The pointer must point to an area in shared memory. The functions allow you to liberally
-/// get a mutable pointer to the contents; it is the caller's responsibility to ensure that you
-/// don't access a buffer that's you're not allowed to. Inappropriate access to the buffer doesn't
-/// violate Rust's safety semantics, but it will mess up and crash Postgres.
-///
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct ShmemBuf {
-    // These fields define where the result is written. Must point into a buffer in shared memory!
-    pub ptr: *mut u8,
-}
-
-unsafe impl Send for ShmemBuf {}
-unsafe impl Sync for ShmemBuf {}
-
-unsafe impl uring_common::buf::IoBuf for ShmemBuf {
-    fn stable_ptr(&self) -> *const u8 {
-        self.ptr
-    }
-
-    fn bytes_init(&self) -> usize {
-        crate::BLCKSZ
-    }
-
-    fn bytes_total(&self) -> usize {
-        crate::BLCKSZ
-    }
-}
-
-unsafe impl uring_common::buf::IoBufMut for ShmemBuf {
-    fn stable_mut_ptr(&mut self) -> *mut u8 {
-        self.ptr
-    }
-
-    unsafe fn set_init(&mut self, pos: usize) {
-        if pos > crate::BLCKSZ as usize {
-            panic!(
-                "set_init called past end of buffer, pos {}, buffer size {}",
-                pos,
-                crate::BLCKSZ
-            );
-        }
-    }
-}
-
-impl ShmemBuf {
-    pub fn as_mut_ptr(&self) -> *mut u8 {
-        self.ptr
-    }
-}
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct CRelExistsRequest {
-    pub spc_oid: COid,
-    pub db_oid: COid,
-    pub rel_number: u32,
-    pub fork_number: u8,
-}
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct CRelSizeRequest {
-    pub spc_oid: COid,
-    pub db_oid: COid,
-    pub rel_number: u32,
-    pub fork_number: u8,
-}
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct CGetPageVRequest {
-    pub spc_oid: COid,
-    pub db_oid: COid,
-    pub rel_number: u32,
-    pub fork_number: u8,
-    pub block_number: u32,
-    pub nblocks: u8,
-
-    // These fields define where the result is written. Must point into a buffer in shared memory!
-    pub dest: [ShmemBuf; MAX_GETPAGEV_PAGES],
-}
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct CPrefetchVRequest {
-    pub spc_oid: COid,
-    pub db_oid: COid,
-    pub rel_number: u32,
-    pub fork_number: u8,
-    pub block_number: u32,
-    pub nblocks: u8,
-}
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct CDbSizeRequest {
-    pub db_oid: COid,
-    pub request_lsn: CLsn,
-}
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct CWritePageRequest {
-    pub spc_oid: COid,
-    pub db_oid: COid,
-    pub rel_number: u32,
-    pub fork_number: u8,
-    pub block_number: u32,
-    pub lsn: CLsn,
-
-    // These fields define where the result is written. Must point into a buffer in shared memory!
-    pub src: ShmemBuf,
-}
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct CRelExtendRequest {
-    pub spc_oid: COid,
-    pub db_oid: COid,
-    pub rel_number: u32,
-    pub fork_number: u8,
-    pub block_number: u32,
-    pub lsn: CLsn,
-
-    // These fields define page contents. Must point into a buffer in shared memory!
-    pub src_ptr: usize,
-    pub src_size: u32,
-}
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct CRelZeroExtendRequest {
-    pub spc_oid: COid,
-    pub db_oid: COid,
-    pub rel_number: u32,
-    pub fork_number: u8,
-    pub block_number: u32,
-    pub nblocks: u32,
-    pub lsn: CLsn,
-}
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct CRelCreateRequest {
-    pub spc_oid: COid,
-    pub db_oid: COid,
-    pub rel_number: u32,
-    pub fork_number: u8,
-}
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct CRelTruncateRequest {
-    pub spc_oid: COid,
-    pub db_oid: COid,
-    pub rel_number: u32,
-    pub fork_number: u8,
-    pub nblocks: u32,
-}
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct CRelUnlinkRequest {
-    pub spc_oid: COid,
-    pub db_oid: COid,
-    pub rel_number: u32,
-    pub fork_number: u8,
-    pub block_number: u32,
-    pub nblocks: u32,
-}
-
-impl CRelExistsRequest {
-    pub fn reltag(&self) -> page_api::RelTag {
-        page_api::RelTag {
-            spcnode: self.spc_oid,
-            dbnode: self.db_oid,
-            relnode: self.rel_number,
-            forknum: self.fork_number,
-        }
-    }
-}
-
-impl CRelSizeRequest {
-    pub fn reltag(&self) -> page_api::RelTag {
-        page_api::RelTag {
-            spcnode: self.spc_oid,
-            dbnode: self.db_oid,
-            relnode: self.rel_number,
-            forknum: self.fork_number,
-        }
-    }
-}
-
-impl CGetPageVRequest {
-    pub fn reltag(&self) -> page_api::RelTag {
-        page_api::RelTag {
-            spcnode: self.spc_oid,
-            dbnode: self.db_oid,
-            relnode: self.rel_number,
-            forknum: self.fork_number,
-        }
-    }
-}
-
-impl CPrefetchVRequest {
-    pub fn reltag(&self) -> page_api::RelTag {
-        page_api::RelTag {
-            spcnode: self.spc_oid,
-            dbnode: self.db_oid,
-            relnode: self.rel_number,
-            forknum: self.fork_number,
-        }
-    }
-}
-
-impl CWritePageRequest {
-    pub fn reltag(&self) -> page_api::RelTag {
-        page_api::RelTag {
-            spcnode: self.spc_oid,
-            dbnode: self.db_oid,
-            relnode: self.rel_number,
-            forknum: self.fork_number,
-        }
-    }
-}
-
-impl CRelExtendRequest {
-    pub fn reltag(&self) -> page_api::RelTag {
-        page_api::RelTag {
-            spcnode: self.spc_oid,
-            dbnode: self.db_oid,
-            relnode: self.rel_number,
-            forknum: self.fork_number,
-        }
-    }
-}
-
-impl CRelZeroExtendRequest {
-    pub fn reltag(&self) -> page_api::RelTag {
-        page_api::RelTag {
-            spcnode: self.spc_oid,
-            dbnode: self.db_oid,
-            relnode: self.rel_number,
-            forknum: self.fork_number,
-        }
-    }
-}
-
-impl CRelCreateRequest {
-    pub fn reltag(&self) -> page_api::RelTag {
-        page_api::RelTag {
-            spcnode: self.spc_oid,
-            dbnode: self.db_oid,
-            relnode: self.rel_number,
-            forknum: self.fork_number,
-        }
-    }
-}
-
-impl CRelTruncateRequest {
-    pub fn reltag(&self) -> page_api::RelTag {
-        page_api::RelTag {
-            spcnode: self.spc_oid,
-            dbnode: self.db_oid,
-            relnode: self.rel_number,
-            forknum: self.fork_number,
-        }
-    }
-}
-
-impl CRelUnlinkRequest {
-    pub fn reltag(&self) -> page_api::RelTag {
-        page_api::RelTag {
-            spcnode: self.spc_oid,
-            dbnode: self.db_oid,
-            relnode: self.rel_number,
-            forknum: self.fork_number,
-        }
-    }
-}
--- a/pgxn/neon/communicator/src/worker_process/callbacks.rs
+++ b/pgxn/neon/communicator/src/worker_process/callbacks.rs
@@ -1,28 +0,0 @@
-//! C callbacks to PostgreSQL facilities that the neon extension needs
-//! to provide. These are implemented in `neon/pgxn/communicator_new.c`.
-//! The function signatures better match!
-//!
-//! These are called from the communicator threads! Careful what you do, most
-//! Postgres functions are not safe to call in that context.
-
-use utils::lsn::Lsn;
-
-unsafe extern "C" {
-    pub fn notify_proc_unsafe(procno: std::ffi::c_int);
-    pub fn callback_set_my_latch_unsafe();
-    pub fn callback_get_request_lsn_unsafe() -> u64;
-}
-
-// safe wrappers
-
-pub(super) fn notify_proc(procno: std::ffi::c_int) {
-    unsafe { notify_proc_unsafe(procno) };
-}
-
-pub(super) fn callback_set_my_latch() {
-    unsafe { callback_set_my_latch_unsafe() };
-}
-
-pub(super) fn get_request_lsn() -> Lsn {
-    Lsn(unsafe { callback_get_request_lsn_unsafe() })
-}
--- a/pgxn/neon/communicator/src/worker_process/in_progress_ios.rs
+++ b/pgxn/neon/communicator/src/worker_process/in_progress_ios.rs
@@ -1,84 +0,0 @@
-use std::cmp::Eq;
-use std::hash::Hash;
-use std::sync::Arc;
-
-use tokio::sync::{Mutex, OwnedMutexGuard};
-
-use clashmap::ClashMap;
-use clashmap::Entry;
-
-use pageserver_page_api::RelTag;
-
-#[derive(Clone, Eq, Hash, PartialEq)]
-pub enum RequestInProgressKey {
-    Db(u32),
-    Rel(RelTag),
-    Block(RelTag, u32),
-}
-
-pub type RequestInProgressTable = MutexHashSet<RequestInProgressKey>;
-
-// more primitive locking thingie:
-
-pub struct MutexHashSet<K>
-where
-    K: Clone + Eq + Hash,
-{
-    lock_table: ClashMap<K, Arc<Mutex<()>>>,
-}
-
-pub struct MutexHashSetGuard<'a, K>
-where
-    K: Clone + Eq + Hash,
-{
-    pub key: K,
-    set: &'a MutexHashSet<K>,
-    mutex: Arc<Mutex<()>>,
-    _guard: OwnedMutexGuard<()>,
-}
-
-impl<'a, K> Drop for MutexHashSetGuard<'a, K>
-where
-    K: Clone + Eq + Hash,
-{
-    fn drop(&mut self) {
-        let (_old_key, old_val) = self.set.lock_table.remove(&self.key).unwrap();
-        assert!(Arc::ptr_eq(&old_val, &self.mutex));
-
-        // the guard will be dropped as we return
-    }
-}
-
-impl<K> MutexHashSet<K>
-where
-    K: Clone + Eq + Hash,
-{
-    pub fn new() -> MutexHashSet<K> {
-        MutexHashSet {
-            lock_table: ClashMap::new(),
-        }
-    }
-
-    pub async fn lock<'a>(&'a self, key: K) -> MutexHashSetGuard<'a, K> {
-        let my_mutex = Arc::new(Mutex::new(()));
-        let my_guard = Arc::clone(&my_mutex).lock_owned().await;
-
-        loop {
-            let lock = match self.lock_table.entry(key.clone()) {
-                Entry::Occupied(e) => Arc::clone(e.get()),
-                Entry::Vacant(e) => {
-                    e.insert(Arc::clone(&my_mutex));
-                    break;
-                }
-            };
-            let _ = lock.lock().await;
-        }
-
-        MutexHashSetGuard {
-            key: key,
-            set: &self,
-            mutex: my_mutex,
-            _guard: my_guard,
-        }
-    }
-}
--- a/pgxn/neon/communicator/src/worker_process/logging.rs
+++ b/pgxn/neon/communicator/src/worker_process/logging.rs
@@ -1,229 +0,0 @@
-//! Glue code to hook up Rust logging, with the `tracing` crate, to the PostgreSQL log
-//!
-//! In the Rust threads, the log messages are written to a mpsc Channel, and the Postgres
-//! process latch is raised. That wakes up the loop in the  main thread. It reads the
-//! message from the channel and ereport()s it. This ensures that only one thread, the main
-//! thread, calls the PostgreSQL logging routines at any time.
-
-use std::sync::mpsc::sync_channel;
-use std::sync::mpsc::{Receiver, SyncSender};
-use std::sync::mpsc::{TryRecvError, TrySendError};
-
-use tracing::info;
-use tracing::{Event, Level, Metadata, Subscriber};
-use tracing_subscriber::filter::LevelFilter;
-use tracing_subscriber::fmt::FmtContext;
-use tracing_subscriber::fmt::FormatEvent;
-use tracing_subscriber::fmt::FormatFields;
-use tracing_subscriber::fmt::FormattedFields;
-use tracing_subscriber::fmt::MakeWriter;
-use tracing_subscriber::fmt::format::Writer;
-use tracing_subscriber::registry::LookupSpan;
-
-use crate::worker_process::callbacks::callback_set_my_latch;
-
-pub struct LoggingState {
-    receiver: Receiver<FormattedEventWithMeta>,
-}
-
-/// Called once, at worker process startup. The returned LoggingState is passed back
-/// in the subsequent calls to `pump_logging`. It is opaque to the C code.
-#[unsafe(no_mangle)]
-pub extern "C" fn configure_logging() -> Box<LoggingState> {
-    let (sender, receiver) = sync_channel(1000);
-
-    let maker = Maker { channel: sender };
-
-    use tracing_subscriber::prelude::*;
-    let r = tracing_subscriber::registry();
-
-    let r = r.with(
-        tracing_subscriber::fmt::layer()
-            .event_format(SimpleFormatter::new())
-            .with_writer(maker)
-            // TODO: derive this from log_min_messages?
-            .with_filter(LevelFilter::from_level(Level::INFO)),
-    );
-    r.init();
-
-    info!("communicator process logging started");
-
-    let state = LoggingState { receiver };
-
-    Box::new(state)
-}
-
-/// Read one message from the logging queue. This is essentially a wrapper to Receiver,
-/// with a C-friendly signature.
-///
-/// The message is copied into *errbuf, which is a caller-supplied buffer of size `errbuf_len`.
-/// If the message doesn't fit in the buffer, it is truncated. It is always NULL-terminated.
-///
-/// The error level is returned *elevel_p. It's one of the PostgreSQL error levels, see elog.h
-#[unsafe(no_mangle)]
-pub extern "C" fn pump_logging(
-    state: &mut LoggingState,
-    errbuf: *mut u8,
-    errbuf_len: u32,
-    elevel_p: &mut i32,
-) -> i32 {
-    let msg = match state.receiver.try_recv() {
-        Err(TryRecvError::Empty) => return 0,
-        Err(TryRecvError::Disconnected) => return -1,
-        Ok(msg) => msg,
-    };
-
-    let src: &[u8] = &msg.message;
-    let dst = errbuf;
-    let len = std::cmp::min(src.len(), errbuf_len as usize - 1);
-    unsafe {
-        std::ptr::copy_nonoverlapping(src.as_ptr(), dst, len);
-        *(errbuf.add(len)) = b'\0'; // NULL terminator
-    }
-
-    // XXX: these levels are copied from PostgreSQL's elog.h. Introduce another enum
-    // to hide these?
-    *elevel_p = match msg.level {
-        Level::TRACE => 10, // DEBUG5
-        Level::DEBUG => 14, // DEBUG1
-        Level::INFO => 17,  // INFO
-        Level::WARN => 19,  // WARNING
-        Level::ERROR => 21, // ERROR
-    };
-    1
-}
-
-//---- The following functions can be called from any thread ----
-
-#[derive(Clone)]
-struct FormattedEventWithMeta {
-    message: Vec<u8>,
-    level: tracing::Level,
-}
-
-impl Default for FormattedEventWithMeta {
-    fn default() -> Self {
-        FormattedEventWithMeta {
-            message: Vec::new(),
-            level: tracing::Level::DEBUG,
-        }
-    }
-}
-
-struct EventBuilder<'a> {
-    event: FormattedEventWithMeta,
-
-    maker: &'a Maker,
-}
-
-impl std::io::Write for EventBuilder<'_> {
-    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
-        self.event.message.write(buf)
-    }
-    fn flush(&mut self) -> std::io::Result<()> {
-        self.maker.send_event(self.event.clone());
-        Ok(())
-    }
-}
-
-impl Drop for EventBuilder<'_> {
-    fn drop(&mut self) {
-        let maker = self.maker;
-        let event = std::mem::take(&mut self.event);
-
-        maker.send_event(event);
-    }
-}
-
-struct Maker {
-    channel: SyncSender<FormattedEventWithMeta>,
-}
-
-impl<'a> MakeWriter<'a> for Maker {
-    type Writer = EventBuilder<'a>;
-
-    fn make_writer(&'a self) -> Self::Writer {
-        panic!("not expected to be called when make_writer_for is implemented");
-    }
-
-    fn make_writer_for(&'a self, meta: &Metadata<'_>) -> Self::Writer {
-        EventBuilder {
-            event: FormattedEventWithMeta {
-                message: Vec::new(),
-                level: *meta.level(),
-            },
-            maker: self,
-        }
-    }
-}
-
-impl Maker {
-    fn send_event(&self, e: FormattedEventWithMeta) {
-        match self.channel.try_send(e) {
-            Ok(()) => {
-                // notify the main thread
-                callback_set_my_latch();
-            }
-            Err(TrySendError::Disconnected(_)) => {}
-            Err(TrySendError::Full(_)) => {
-                // TODO: record that some messages were lost
-            }
-        }
-    }
-}
-
-/// Simple formatter implementation for tracing_subscriber, which prints the log
-/// spans and message part like the default formatter, but no timestamp or error
-/// level. The error level is captured separately by `FormattedEventWithMeta',
-/// and when the error is printed by the main thread, with PostgreSQL ereport(),
-/// it gets a timestamp at that point. (The timestamp printed will therefore lag
-/// behind the timestamp on the event here, if the main thread doesn't process
-/// the log message promptly)
-struct SimpleFormatter;
-
-impl<S, N> FormatEvent<S, N> for SimpleFormatter
-where
-    S: Subscriber + for<'a> LookupSpan<'a>,
-    N: for<'a> FormatFields<'a> + 'static,
-{
-    fn format_event(
-        &self,
-        ctx: &FmtContext<'_, S, N>,
-        mut writer: Writer<'_>,
-        event: &Event<'_>,
-    ) -> std::fmt::Result {
-        // Format all the spans in the event's span context.
-        if let Some(scope) = ctx.event_scope() {
-            for span in scope.from_root() {
-                write!(writer, "{}", span.name())?;
-
-                // `FormattedFields` is a formatted representation of the span's
-                // fields, which is stored in its extensions by the `fmt` layer's
-                // `new_span` method. The fields will have been formatted
-                // by the same field formatter that's provided to the event
-                // formatter in the `FmtContext`.
-                let ext = span.extensions();
-                let fields = &ext
-                    .get::<FormattedFields<N>>()
-                    .expect("will never be `None`");
-
-                // Skip formatting the fields if the span had no fields.
-                if !fields.is_empty() {
-                    write!(writer, "{{{}}}", fields)?;
-                }
-                write!(writer, ": ")?;
-            }
-        }
-
-        // Write fields on the event
-        ctx.field_format().format_fields(writer.by_ref(), event)?;
-
-        writeln!(writer)
-    }
-}
-
-impl SimpleFormatter {
-    fn new() -> Self {
-        SimpleFormatter {}
-    }
-}
--- a/pgxn/neon/communicator/src/worker_process/main_loop.rs
+++ b/pgxn/neon/communicator/src/worker_process/main_loop.rs
@@ -1,618 +0,0 @@
-use std::collections::HashMap;
-use std::os::fd::AsRawFd;
-use std::os::fd::OwnedFd;
-use std::path::PathBuf;
-use std::sync::atomic::{AtomicU64, Ordering};
-
-use crate::backend_comms::NeonIOHandle;
-use crate::file_cache::FileCache;
-use crate::global_allocator::MyAllocatorCollector;
-use crate::init::CommunicatorInitStruct;
-use crate::integrated_cache::{CacheResult, IntegratedCacheWriteAccess};
-use crate::neon_request::{CGetPageVRequest, CPrefetchVRequest};
-use crate::neon_request::{NeonIORequest, NeonIOResult};
-use crate::worker_process::in_progress_ios::{RequestInProgressKey, RequestInProgressTable};
-use pageserver_client_grpc::request_tracker::ShardedRequestTracker;
-use pageserver_page_api as page_api;
-
-use metrics::{IntCounter, IntCounterVec};
-
-use tokio::io::AsyncReadExt;
-use tokio_pipe::PipeRead;
-use uring_common::buf::IoBuf;
-
-use super::callbacks::{get_request_lsn, notify_proc};
-
-use tracing::{error, info, trace};
-
-use utils::lsn::Lsn;
-
-pub struct CommunicatorWorkerProcessStruct<'a> {
-    neon_request_slots: &'a [NeonIOHandle],
-
-    request_tracker: ShardedRequestTracker,
-
-    pub(crate) cache: IntegratedCacheWriteAccess<'a>,
-
-    submission_pipe_read_fd: OwnedFd,
-
-    next_request_id: AtomicU64,
-
-    in_progress_table: RequestInProgressTable,
-
-    // Metrics
-    request_counters: IntCounterVec,
-    request_rel_exists_counter: IntCounter,
-    request_rel_size_counter: IntCounter,
-    request_get_pagev_counter: IntCounter,
-    request_prefetchv_counter: IntCounter,
-    request_db_size_counter: IntCounter,
-    request_write_page_counter: IntCounter,
-    request_rel_extend_counter: IntCounter,
-    request_rel_zero_extend_counter: IntCounter,
-    request_rel_create_counter: IntCounter,
-    request_rel_truncate_counter: IntCounter,
-    request_rel_unlink_counter: IntCounter,
-
-    getpage_cache_misses_counter: IntCounter,
-    getpage_cache_hits_counter: IntCounter,
-
-    request_nblocks_counters: IntCounterVec,
-    request_get_pagev_nblocks_counter: IntCounter,
-    request_prefetchv_nblocks_counter: IntCounter,
-    request_rel_zero_extend_nblocks_counter: IntCounter,
-
-    allocator_metrics: MyAllocatorCollector,
-}
-
-pub(super) async fn init(
-    cis: Box<CommunicatorInitStruct>,
-    tenant_id: String,
-    timeline_id: String,
-    auth_token: Option<String>,
-    mut shard_map: HashMap<utils::shard::ShardIndex, String>,
-    initial_file_cache_size: u64,
-    file_cache_path: Option<PathBuf>,
-) -> CommunicatorWorkerProcessStruct<'static> {
-    info!("Test log message");
-    let last_lsn = get_request_lsn();
-
-    let file_cache = if let Some(path) = file_cache_path {
-        Some(FileCache::new(&path, initial_file_cache_size).expect("could not create cache file"))
-    } else {
-        // FIXME: temporarily for testing, use LFC even if disabled
-        Some(
-            FileCache::new(&PathBuf::from("new_filecache"), 1000)
-                .expect("could not create cache file"),
-        )
-    };
-
-    // TODO: for now, just hack in the gRPC port number. This needs to be plumbed through.
-    for connstr in shard_map.values_mut() {
-        *connstr = connstr.replace(":64000", ":51051");
-    }
-    tracing::warn!("mangled connstrings to use gRPC port 51051 shard_map={shard_map:?}");
-
-    // Initialize subsystems
-    let cache = cis
-        .integrated_cache_init_struct
-        .worker_process_init(last_lsn, file_cache);
-
-    let mut request_tracker = ShardedRequestTracker::new();
-    request_tracker.update_shard_map(shard_map,
-        None,
-        tenant_id,
-        timeline_id,
-        auth_token.as_deref()).await;
-
-    let request_counters = IntCounterVec::new(
-        metrics::core::Opts::new(
-            "backend_requests_total",
-            "Number of requests from backends.",
-        ),
-        &["request_kind"],
-    )
-    .unwrap();
-    let request_rel_exists_counter = request_counters.with_label_values(&["rel_exists"]);
-    let request_rel_size_counter = request_counters.with_label_values(&["rel_size"]);
-    let request_get_pagev_counter = request_counters.with_label_values(&["get_pagev"]);
-    let request_prefetchv_counter = request_counters.with_label_values(&["prefetchv"]);
-    let request_db_size_counter = request_counters.with_label_values(&["db_size"]);
-    let request_write_page_counter = request_counters.with_label_values(&["write_page"]);
-    let request_rel_extend_counter = request_counters.with_label_values(&["rel_extend"]);
-    let request_rel_zero_extend_counter = request_counters.with_label_values(&["rel_zero_extend"]);
-    let request_rel_create_counter = request_counters.with_label_values(&["rel_create"]);
-    let request_rel_truncate_counter = request_counters.with_label_values(&["rel_truncate"]);
-    let request_rel_unlink_counter = request_counters.with_label_values(&["rel_unlink"]);
-
-    let getpage_cache_misses_counter = IntCounter::new(
-        "getpage_cache_misses",
-        "Number of file cache misses in get_pagev requests.",
-    )
-    .unwrap();
-    let getpage_cache_hits_counter = IntCounter::new(
-        "getpage_cache_hits",
-        "Number of file cache hits in get_pagev requests.",
-    )
-    .unwrap();
-
-    // For the requests that affect multiple blocks, have separate counters for the # of blocks affected
-    let request_nblocks_counters = IntCounterVec::new(
-        metrics::core::Opts::new(
-            "request_nblocks_total",
-            "Number of blocks in backend requests.",
-        ),
-        &["request_kind"],
-    )
-    .unwrap();
-    let request_get_pagev_nblocks_counter =
-        request_nblocks_counters.with_label_values(&["get_pagev"]);
-    let request_prefetchv_nblocks_counter =
-        request_nblocks_counters.with_label_values(&["prefetchv"]);
-    let request_rel_zero_extend_nblocks_counter =
-        request_nblocks_counters.with_label_values(&["rel_zero_extend"]);
-
-    CommunicatorWorkerProcessStruct {
-        neon_request_slots: cis.neon_request_slots,
-        request_tracker,
-        cache,
-        submission_pipe_read_fd: cis.submission_pipe_read_fd,
-        next_request_id: AtomicU64::new(1),
-        in_progress_table: RequestInProgressTable::new(),
-
-        // metrics
-        request_counters,
-        request_rel_exists_counter,
-        request_rel_size_counter,
-        request_get_pagev_counter,
-        request_prefetchv_counter,
-        request_db_size_counter,
-        request_write_page_counter,
-        request_rel_extend_counter,
-        request_rel_zero_extend_counter,
-        request_rel_create_counter,
-        request_rel_truncate_counter,
-        request_rel_unlink_counter,
-
-        getpage_cache_misses_counter,
-        getpage_cache_hits_counter,
-
-        request_nblocks_counters,
-        request_get_pagev_nblocks_counter,
-        request_prefetchv_nblocks_counter,
-        request_rel_zero_extend_nblocks_counter,
-
-        allocator_metrics: MyAllocatorCollector::new(),
-    }
-}
-
-impl<'t> CommunicatorWorkerProcessStruct<'t> {
-    /// Main loop of the worker process. Receive requests from the backends and process them.
-    pub(super) async fn run(self: &'static Self) {
-        let mut idxbuf: [u8; 4] = [0; 4];
-
-        let mut submission_pipe_read =
-            PipeRead::try_from(self.submission_pipe_read_fd.as_raw_fd()).expect("invalid pipe fd");
-
-        loop {
-            // Wait for a backend to ring the doorbell
-
-            match submission_pipe_read.read(&mut idxbuf).await {
-                Ok(4) => {}
-                Ok(nbytes) => panic!("short read ({nbytes} bytes) on communicator pipe"),
-                Err(e) => panic!("error reading from communicator pipe: {e}"),
-            }
-            let request_idx = u32::from_ne_bytes(idxbuf);
-
-            // Read the IO request from the slot indicated in the wakeup
-            let Some(slot) =
-                self.neon_request_slots[request_idx as usize].start_processing_request()
-            else {
-                // This currently should not happen. But if we have multiple threads picking up
-                // requests, and without waiting for the notifications, it could.
-                panic!("no request in slot");
-            };
-
-            // Ok, we have ownership of this request now. We must process
-            // it now, there's no going back.
-
-            //trace!("processing request {request_idx}: {request:?}");
-
-            // Spawn a separate task for every request. That's a little excessive for requests that
-            // can be quickly satisfied from the cache, but we expect that to be rare, because the
-            // requesting backend would have already checked the cache.
-            tokio::spawn(async {
-                let result = self.handle_request(slot.get_request()).await;
-                let owner_procno = slot.get_owner_procno();
-
-                // Ok, we have completed the IO. Mark the request as completed. After that,
-                // we no longer have ownership of the slot, and must not modify it.
-                slot.completed(result);
-
-                // Notify the backend about the completion. (Note that the backend might see
-                // the completed status even before this; this is just a wakeup)
-                notify_proc(owner_procno);
-            });
-        }
-    }
-
-    fn request_lsns(&self, not_modified_since_lsn: Lsn) -> page_api::ReadLsn {
-        page_api::ReadLsn {
-            request_lsn: get_request_lsn(),
-            not_modified_since_lsn: Some(not_modified_since_lsn),
-        }
-    }
-
-    async fn handle_request<'x>(self: &'static Self, req: &'x NeonIORequest) -> NeonIOResult {
-        match req {
-            NeonIORequest::Empty => {
-                error!("unexpected Empty IO request");
-                NeonIOResult::Error(0)
-            }
-            NeonIORequest::RelExists(req) => {
-                self.request_rel_exists_counter.inc();
-                let rel = req.reltag();
-
-                let _in_progress_guard = self
-                    .in_progress_table
-                    .lock(RequestInProgressKey::Rel(rel.clone()));
-
-                let not_modified_since = match self.cache.get_rel_exists(&rel) {
-                    CacheResult::Found(exists) => return NeonIOResult::RelExists(exists),
-                    CacheResult::NotFound(lsn) => lsn,
-                };
-
-                match self
-                    .request_tracker
-                    .process_check_rel_exists_request(page_api::CheckRelExistsRequest {
-                        read_lsn: self.request_lsns(not_modified_since),
-                        rel,
-                    })
-                    .await
-                {
-                    Ok(exists) => NeonIOResult::RelExists(exists),
-                    Err(err) => {
-                        info!("tonic error: {err:?}");
-                        NeonIOResult::Error(0)
-                    }
-                }
-            }
-
-            NeonIORequest::RelSize(req) => {
-                self.request_rel_size_counter.inc();
-                let rel = req.reltag();
-
-                let _in_progress_guard = self
-                    .in_progress_table
-                    .lock(RequestInProgressKey::Rel(rel.clone()));
-
-                // Check the cache first
-                let not_modified_since = match self.cache.get_rel_size(&rel) {
-                    CacheResult::Found(nblocks) => {
-                        tracing::trace!("found relsize for {:?} in cache: {}", rel, nblocks);
-                        return NeonIOResult::RelSize(nblocks);
-                    }
-                    CacheResult::NotFound(lsn) => lsn,
-                };
-
-                let read_lsn = self.request_lsns(not_modified_since);
-                match self
-                    .request_tracker
-                    .process_get_rel_size_request(page_api::GetRelSizeRequest {
-                        read_lsn,
-                        rel: rel.clone(),
-                    })
-                    .await
-                {
-                    Ok(nblocks) => {
-                        // update the cache
-                        tracing::info!("updated relsize for {:?} in cache: {}", rel, nblocks);
-                        self.cache.remember_rel_size(&rel, nblocks);
-
-                        NeonIOResult::RelSize(nblocks)
-                    }
-                    Err(err) => {
-                        info!("tonic error: {err:?}");
-                        NeonIOResult::Error(0)
-                    }
-                }
-            }
-            NeonIORequest::GetPageV(req) => {
-                self.request_get_pagev_counter.inc();
-                self.request_get_pagev_nblocks_counter
-                    .inc_by(req.nblocks as u64);
-                match self.handle_get_pagev_request(req).await {
-                    Ok(()) => NeonIOResult::GetPageV,
-                    Err(errno) => NeonIOResult::Error(errno),
-                }
-            }
-            NeonIORequest::PrefetchV(req) => {
-                self.request_prefetchv_counter.inc();
-                self.request_prefetchv_nblocks_counter
-                    .inc_by(req.nblocks as u64);
-                let req = req.clone();
-                tokio::spawn(async move { self.handle_prefetchv_request(&req).await });
-                NeonIOResult::PrefetchVLaunched
-            }
-            NeonIORequest::DbSize(req) => {
-                self.request_db_size_counter.inc();
-                let _in_progress_guard = self
-                    .in_progress_table
-                    .lock(RequestInProgressKey::Db(req.db_oid));
-
-                // Check the cache first
-                let not_modified_since = match self.cache.get_db_size(req.db_oid) {
-                    CacheResult::Found(db_size) => {
-                        // get_page already copied the block content to the destination
-                        return NeonIOResult::DbSize(db_size);
-                    }
-                    CacheResult::NotFound(lsn) => lsn,
-                };
-
-                match self
-                    .request_tracker
-                    .process_get_dbsize_request(page_api::GetDbSizeRequest {
-                        read_lsn: self.request_lsns(not_modified_since),
-                        db_oid: req.db_oid,
-                    })
-                    .await
-                {
-                    Ok(db_size) => NeonIOResult::DbSize(db_size),
-                    Err(err) => {
-                        info!("tonic error: {err:?}");
-                        NeonIOResult::Error(0)
-                    }
-                }
-            }
-
-            // Write requests
-            NeonIORequest::WritePage(req) => {
-                self.request_write_page_counter.inc();
-
-                // Also store it in the LFC while we still have it
-                let rel = req.reltag();
-                let _in_progress_guard = self
-                    .in_progress_table
-                    .lock(RequestInProgressKey::Block(rel.clone(), req.block_number));
-                self.cache
-                    .remember_page(&rel, req.block_number, req.src, Lsn(req.lsn), true)
-                    .await;
-                NeonIOResult::WriteOK
-            }
-            NeonIORequest::RelExtend(req) => {
-                self.request_rel_extend_counter.inc();
-
-                // TODO: need to grab an io-in-progress lock for this? I guess not
-                self.cache
-                    .remember_rel_size(&req.reltag(), req.block_number + 1);
-                NeonIOResult::WriteOK
-            }
-            NeonIORequest::RelZeroExtend(req) => {
-                self.request_rel_zero_extend_counter.inc();
-                self.request_rel_zero_extend_nblocks_counter
-                    .inc_by(req.nblocks as u64);
-
-                // TODO: need to grab an io-in-progress lock for this? I guess not
-                self.cache
-                    .remember_rel_size(&req.reltag(), req.block_number + req.nblocks);
-                NeonIOResult::WriteOK
-            }
-            NeonIORequest::RelCreate(req) => {
-                self.request_rel_create_counter.inc();
-
-                // TODO: need to grab an io-in-progress lock for this? I guess not
-                self.cache.remember_rel_size(&req.reltag(), 0);
-                NeonIOResult::WriteOK
-            }
-            NeonIORequest::RelTruncate(req) => {
-                self.request_rel_truncate_counter.inc();
-
-                // TODO: need to grab an io-in-progress lock for this? I guess not
-                self.cache.remember_rel_size(&req.reltag(), req.nblocks);
-                NeonIOResult::WriteOK
-            }
-            NeonIORequest::RelUnlink(req) => {
-                self.request_rel_unlink_counter.inc();
-
-                // TODO: need to grab an io-in-progress lock for this? I guess not
-                self.cache.forget_rel(&req.reltag());
-                NeonIOResult::WriteOK
-            }
-        }
-    }
-
-    async fn handle_get_pagev_request(&'t self, req: &CGetPageVRequest) -> Result<(), i32> {
-        let rel = req.reltag();
-
-        // Check the cache first
-        //
-        // Note: Because the backends perform a direct lookup in the cache before sending
-        // the request to the communicator process, we expect the pages to almost never
-        // be already in cache. It could happen when:
-        // 1. two backends try to read the same page at the same time, but that should never
-        //    happen because there's higher level locking in the Postgres buffer manager, or
-        // 2. if a prefetch request finished at the same time as a backend requested the
-        //    page. That's much more likely.
-        let mut cache_misses = Vec::with_capacity(req.nblocks as usize);
-        for i in 0..req.nblocks {
-            let blkno = req.block_number + i as u32;
-
-            // note: this is deadlock-safe even though we hold multiple locks at the same time,
-            // because they're always acquired in the same order.
-            let in_progress_guard = self
-                .in_progress_table
-                .lock(RequestInProgressKey::Block(rel.clone(), blkno))
-                .await;
-
-            let dest = req.dest[i as usize];
-            let not_modified_since = match self.cache.get_page(&rel, blkno, dest).await {
-                Ok(CacheResult::Found(_)) => {
-                    // get_page already copied the block content to the destination
-                    trace!("found blk {} in rel {:?} in LFC", blkno, rel);
-                    continue;
-                }
-                Ok(CacheResult::NotFound(lsn)) => lsn,
-                Err(_io_error) => return Err(-1), // FIXME errno?
-            };
-            cache_misses.push((blkno, not_modified_since, dest, in_progress_guard));
-        }
-        self.getpage_cache_misses_counter
-            .inc_by(cache_misses.len() as u64);
-        self.getpage_cache_hits_counter
-            .inc_by(req.nblocks as u64 - cache_misses.len() as u64);
-
-        if cache_misses.is_empty() {
-            return Ok(());
-        }
-        let not_modified_since = cache_misses
-            .iter()
-            .map(|(_blkno, lsn, _dest, _guard)| *lsn)
-            .max()
-            .unwrap();
-
-        // TODO: Use batched protocol
-        for (blkno, _lsn, dest, _guard) in cache_misses.iter() {
-            match self
-                .request_tracker
-                .get_page(page_api::GetPageRequest {
-                    request_id: self.next_request_id.fetch_add(1, Ordering::Relaxed),
-                    request_class: page_api::GetPageClass::Normal,
-                    read_lsn: self.request_lsns(not_modified_since),
-                    rel: rel.clone(),
-                    block_numbers: vec![*blkno],
-                })
-                .await
-            {
-                Ok(resp) => {
-                    // Write the received page image directly to the shared memory location
-                    // that the backend requested.
-                    assert!(resp.page_images.len() == 1);
-                    let page_image = resp.page_images[0].clone();
-                    let src: &[u8] = page_image.as_ref();
-                    let len = std::cmp::min(src.len(), dest.bytes_total() as usize);
-                    unsafe {
-                        std::ptr::copy_nonoverlapping(src.as_ptr(), dest.as_mut_ptr(), len);
-                    };
-
-                    // Also store it in the LFC while we have it
-                    self.cache
-                        .remember_page(&rel, *blkno, page_image, not_modified_since, false)
-                        .await;
-                }
-                Err(err) => {
-                    info!("tonic error: {err:?}");
-                    return Err(-1);
-                }
-            }
-        }
-        Ok(())
-    }
-
-    async fn handle_prefetchv_request(
-        self: &'static Self,
-        req: &CPrefetchVRequest,
-    ) -> Result<(), i32> {
-        let rel = req.reltag();
-
-        // Check the cache first
-        let mut cache_misses = Vec::with_capacity(req.nblocks as usize);
-        for i in 0..req.nblocks {
-            let blkno = req.block_number + i as u32;
-
-            // note: this is deadlock-safe even though we hold multiple locks at the same time,
-            // because they're always acquired in the same order.
-            let in_progress_guard = self
-                .in_progress_table
-                .lock(RequestInProgressKey::Block(rel.clone(), blkno))
-                .await;
-
-            let not_modified_since = match self.cache.page_is_cached(&rel, blkno).await {
-                Ok(CacheResult::Found(_)) => {
-                    trace!("found blk {} in rel {:?} in LFC", blkno, rel);
-                    continue;
-                }
-                Ok(CacheResult::NotFound(lsn)) => lsn,
-                Err(_io_error) => return Err(-1), // FIXME errno?
-            };
-            cache_misses.push((blkno, not_modified_since, in_progress_guard));
-        }
-        if cache_misses.is_empty() {
-            return Ok(());
-        }
-        let not_modified_since = cache_misses
-            .iter()
-            .map(|(_blkno, lsn, _guard)| *lsn)
-            .max()
-            .unwrap();
-
-        // TODO: spawn separate tasks for these. Use the integrated cache to keep track of the
-        // in-flight requests
-
-        // TODO: Use batched protocol
-        for (blkno, _lsn, _guard) in cache_misses.iter() {
-            match self
-                .request_tracker
-                .get_page(page_api::GetPageRequest {
-                    request_id: self.next_request_id.fetch_add(1, Ordering::Relaxed),
-                    request_class: page_api::GetPageClass::Prefetch,
-                    read_lsn: self.request_lsns(not_modified_since),
-                    rel: rel.clone(),
-                    block_numbers: vec![*blkno],
-                })
-                .await
-            {
-                Ok(resp) => {
-                    trace!(
-                        "prefetch completed, remembering blk {} in rel {:?} in LFC",
-                        *blkno, rel
-                    );
-                    assert!(resp.page_images.len() == 1);
-                    let page_image = resp.page_images[0].clone();
-                    self.cache
-                        .remember_page(&rel, *blkno, page_image, not_modified_since, false)
-                        .await;
-                }
-                Err(err) => {
-                    info!("tonic error: {err:?}");
-                    return Err(-1);
-                }
-            }
-        }
-        Ok(())
-    }
-}
-
-impl<'t> metrics::core::Collector for CommunicatorWorkerProcessStruct<'t> {
-    fn desc(&self) -> Vec<&metrics::core::Desc> {
-        let mut descs = Vec::new();
-
-        descs.append(&mut self.request_counters.desc());
-        descs.append(&mut self.getpage_cache_misses_counter.desc());
-        descs.append(&mut self.getpage_cache_hits_counter.desc());
-        descs.append(&mut self.request_nblocks_counters.desc());
-
-        if let Some(file_cache) = &self.cache.file_cache {
-            descs.append(&mut file_cache.desc());
-        }
-        descs.append(&mut self.cache.desc());
-        descs.append(&mut self.allocator_metrics.desc());
-
-        descs
-    }
-    fn collect(&self) -> Vec<metrics::proto::MetricFamily> {
-        let mut values = Vec::new();
-
-        values.append(&mut self.request_counters.collect());
-        values.append(&mut self.getpage_cache_misses_counter.collect());
-        values.append(&mut self.getpage_cache_hits_counter.collect());
-        values.append(&mut self.request_nblocks_counters.collect());
-
-        if let Some(file_cache) = &self.cache.file_cache {
-            values.append(&mut file_cache.collect());
-        }
-        values.append(&mut self.cache.collect());
-        values.append(&mut self.allocator_metrics.collect());
-
-        values
-    }
-}
--- a/pgxn/neon/communicator/src/worker_process/metrics_exporter.rs
+++ b/pgxn/neon/communicator/src/worker_process/metrics_exporter.rs
@@ -1,83 +0,0 @@
-//! Export information about Postgres, the communicator process, file cache etc. as
-//! prometheus metrics.
-
-use axum::Router;
-use axum::body::Body;
-use axum::extract::State;
-use axum::response::Response;
-use http::StatusCode;
-use http::header::CONTENT_TYPE;
-
-use metrics;
-use metrics::proto::MetricFamily;
-use metrics::{Encoder, TextEncoder};
-
-use std::path::PathBuf;
-
-use tokio::net::UnixListener;
-
-use crate::worker_process::main_loop::CommunicatorWorkerProcessStruct;
-
-impl<'a> CommunicatorWorkerProcessStruct<'a> {
-    pub(crate) async fn launch_exporter_task(&'static self) {
-        use axum::routing::get;
-        let app = Router::new()
-            .route("/metrics", get(get_metrics))
-            .route("/dump_cache_map", get(dump_cache_map))
-            .with_state(self);
-
-        // Listen on unix domain socket, in the data directory. That should be unique.
-        let path = PathBuf::from(".metrics.socket");
-
-        let listener = UnixListener::bind(path.clone()).unwrap();
-
-        tokio::spawn(async {
-            tracing::info!("metrics listener spawned");
-            axum::serve(listener, app).await.unwrap()
-        });
-    }
-}
-
-async fn dump_cache_map(
-    State(state): State<&CommunicatorWorkerProcessStruct<'static>>,
-) -> Response {
-    let mut buf: Vec<u8> = Vec::new();
-    state.cache.dump_map(&mut buf);
-
-    Response::builder()
-        .status(StatusCode::OK)
-        .header(CONTENT_TYPE, "application/text")
-        .body(Body::from(buf))
-        .unwrap()
-}
-
-/// Expose Prometheus metrics.
-async fn get_metrics(State(state): State<&CommunicatorWorkerProcessStruct<'static>>) -> Response {
-    use metrics::core::Collector;
-    let metrics = state.collect();
-
-    // When we call TextEncoder::encode() below, it will immediately return an
-    // error if a metric family has no metrics, so we need to preemptively
-    // filter out metric families with no metrics.
-    let metrics = metrics
-        .into_iter()
-        .filter(|m| !m.get_metric().is_empty())
-        .collect::<Vec<MetricFamily>>();
-
-    let encoder = TextEncoder::new();
-    let mut buffer = vec![];
-
-    if let Err(e) = encoder.encode(&metrics, &mut buffer) {
-        Response::builder()
-            .status(StatusCode::INTERNAL_SERVER_ERROR)
-            .header(CONTENT_TYPE, "application/text")
-            .body(Body::from(e.to_string()))
-            .unwrap()
-    } else {
-        Response::builder()
-            .status(StatusCode::OK)
-            .header(CONTENT_TYPE, encoder.format_type())
-            .body(Body::from(buffer))
-            .unwrap()
-    }
-}
--- a/pgxn/neon/communicator/src/worker_process/mod.rs
+++ b/pgxn/neon/communicator/src/worker_process/mod.rs
@@ -1,14 +0,0 @@
-//! This code runs in the communicator worker process. This provides
-//! the glue code to:
-//!
-//! - launch the 'processor',
-//! - receive IO requests from backends and pass them to the processor,
-//! - write results back to backends.
-
-mod callbacks;
-mod logging;
-mod main_loop;
-mod metrics_exporter;
-mod worker_interface;
-
-mod in_progress_ios;
--- a/pgxn/neon/communicator/src/worker_process/worker_interface.rs
+++ b/pgxn/neon/communicator/src/worker_process/worker_interface.rs
@@ -1,112 +0,0 @@
-//! Functions called from the C code in the worker process
-
-use std::collections::HashMap;
-use std::ffi::{CStr, c_char};
-use std::path::PathBuf;
-
-use tracing::error;
-
-use crate::init::CommunicatorInitStruct;
-use crate::worker_process::main_loop;
-use crate::worker_process::main_loop::CommunicatorWorkerProcessStruct;
-
-/// Launch the communicator's tokio tasks, which do most of the work.
-///
-/// The caller has initialized the process as a regular PostgreSQL
-/// background worker process. The shared memory segment used to
-/// communicate with the backends has been allocated and initialized
-/// earlier, at postmaster startup, in rcommunicator_shmem_init().
-#[unsafe(no_mangle)]
-pub extern "C" fn communicator_worker_process_launch(
-    cis: Box<CommunicatorInitStruct>,
-    tenant_id: *const c_char,
-    timeline_id: *const c_char,
-    auth_token: *const c_char,
-    shard_map: *mut *mut c_char,
-    nshards: u32,
-    file_cache_path: *const c_char,
-    initial_file_cache_size: u64,
-) -> &'static CommunicatorWorkerProcessStruct<'static> {
-    // Convert the arguments into more convenient Rust types
-    let tenant_id = unsafe { CStr::from_ptr(tenant_id) }.to_str().unwrap();
-    let timeline_id = unsafe { CStr::from_ptr(timeline_id) }.to_str().unwrap();
-    let auth_token = unsafe { auth_token.as_ref() }.map(|s| s.to_string());
-    let file_cache_path = {
-        if file_cache_path.is_null() {
-            None
-        } else {
-            let c_str = unsafe { CStr::from_ptr(file_cache_path) };
-            Some(PathBuf::from(c_str.to_str().unwrap()))
-        }
-    };
-    let shard_map = parse_shard_map(nshards, shard_map);
-
-    // start main loop
-    let runtime = tokio::runtime::Builder::new_multi_thread()
-        .enable_all()
-        .thread_name("communicator thread")
-        .build()
-        .unwrap();
-
-    let worker_struct = runtime.block_on(main_loop::init(
-        cis,
-        tenant_id.to_string(),
-        timeline_id.to_string(),
-        auth_token,
-        shard_map,
-        initial_file_cache_size,
-        file_cache_path,
-    ));
-    let worker_struct = Box::leak(Box::new(worker_struct));
-
-    let main_loop_handle = runtime.spawn(worker_struct.run());
-
-    runtime.spawn(async {
-        let err = main_loop_handle.await.unwrap_err();
-        error!("error: {err:?}");
-    });
-
-    runtime.block_on(worker_struct.launch_exporter_task());
-
-    // keep the runtime running after we exit this function
-    Box::leak(Box::new(runtime));
-
-    worker_struct
-}
-
-/// Convert the "shard map" from an array of C strings, indexed by shard no to a rust HashMap
-fn parse_shard_map(
-    nshards: u32,
-    shard_map: *mut *mut c_char,
-) -> HashMap<utils::shard::ShardIndex, String> {
-    use utils::shard::*;
-
-    assert!(nshards <= u8::MAX as u32);
-
-    let mut result: HashMap<ShardIndex, String> = HashMap::new();
-    let mut p = shard_map;
-
-    for i in 0..nshards {
-        let c_str = unsafe { CStr::from_ptr(*p) };
-
-        p = unsafe { p.add(1) };
-
-        let s = c_str.to_str().unwrap();
-        let k = if nshards > 1 {
-            ShardIndex::new(ShardNumber(i as u8), ShardCount(nshards as u8))
-        } else {
-            ShardIndex::unsharded()
-        };
-        result.insert(k, s.into());
-    }
-    result
-}
-
-/// Inform the rust code about a configuration change
-#[unsafe(no_mangle)]
-pub extern "C" fn communicator_worker_config_reload(
-    proc_handle: &'static CommunicatorWorkerProcessStruct<'static>,
-    file_cache_size: u64,
-) {
-    proc_handle.cache.resize_file_cache(file_cache_size as u32);
-}
--- a/pgxn/neon/communicator_new.c
+++ b/pgxn/neon/communicator_new.c
--- a/pgxn/neon/communicator_new.h
+++ b/pgxn/neon/communicator_new.h
@@ -1,54 +0,0 @@
-/*-------------------------------------------------------------------------
- *
- * communicator_new.h
- *	  new implementation
- *
- *
- * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- *-------------------------------------------------------------------------
- */
-#ifndef COMMUNICATOR_NEW_H
-#define COMMUNICATOR_NEW_H
-
-#include "neon_pgversioncompat.h"
-
-#include "storage/buf_internals.h"
-
-#include "pagestore_client.h"
-
-/* initialization at postmaster startup */
-extern void pg_init_communicator_new(void);
-extern void communicator_new_shmem_request(void);
-extern void communicator_new_shmem_startup(void);
-
-/* initialization at backend startup */
-extern void communicator_new_init(void);
-
-/* Read requests */
-extern bool communicator_new_rel_exists(NRelFileInfo rinfo, ForkNumber forkNum);
-extern BlockNumber communicator_new_rel_nblocks(NRelFileInfo rinfo, ForkNumber forknum);
-extern int64 communicator_new_dbsize(Oid dbNode);
-extern void communicator_new_read_at_lsnv(NRelFileInfo rinfo, ForkNumber forkNum,
-										  BlockNumber base_blockno,
-										  void **buffers, BlockNumber nblocks);
-extern void communicator_new_prefetch_register_bufferv(NRelFileInfo rinfo, ForkNumber forkNum,
-													   BlockNumber blockno,
-													   BlockNumber nblocks);
-extern int	communicator_new_read_slru_segment(SlruKind kind, int64 segno,
-											   void *buffer);
-
-/* Write requests, to keep the caches up-to-date */
-extern void communicator_new_write_page(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blockno,
-										const void *buffer, XLogRecPtr lsn);
-extern void communicator_new_rel_extend(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blockno,
-										const void *buffer, XLogRecPtr lsn);
-extern void communicator_new_rel_zeroextend(NRelFileInfo rinfo, ForkNumber forkNum,
-											BlockNumber blockno, BlockNumber nblocks,
-											XLogRecPtr lsn);
-extern void communicator_new_rel_create(NRelFileInfo rinfo, ForkNumber forkNum);
-extern void communicator_new_rel_truncate(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber nblocks);
-extern void communicator_new_rel_unlink(NRelFileInfo rinfo, ForkNumber forkNum);
-
-#endif							/* COMMUNICATOR_NEW_H */
--- a/pgxn/neon/file_cache.c
+++ b/pgxn/neon/file_cache.c
@@ -21,7 +21,7 @@
 #include "access/xlog.h"
 #include "funcapi.h"
 #include "miscadmin.h"
-#include "common/hashfn.h"
+#include "common/file_utils.h"
 #include "pgstat.h"
 #include "port/pg_iovec.h"
 #include "postmaster/bgworker.h"
@@ -36,7 +36,6 @@
 #include "storage/procsignal.h"
 #include "tcop/tcopprot.h"
 #include "utils/builtins.h"
-#include "utils/dynahash.h"
 #include "utils/guc.h"

 #if PG_VERSION_NUM >= 150000
@@ -46,6 +45,7 @@
 #include "hll.h"
 #include "bitmap.h"
 #include "file_cache.h"
+#include "file_cache_rust_hash.h"
 #include "neon.h"
 #include "neon_lwlsncache.h"
 #include "neon_perf_counters.h"
@@ -64,7 +64,7 @@
 *
 * Cache is always reconstructed at node startup, so we do not need to save mapping somewhere and worry about
 * its consistency.
-
+ *
 *
 * ## Holes
 *
@@ -76,13 +76,15 @@
 * fallocate(FALLOC_FL_PUNCH_HOLE) call. The nominal size of the file doesn't
 * shrink, but the disk space it uses does.
 *
- * Each hole is tracked by a dummy FileCacheEntry, which are kept in the
- * 'holes' linked list. They are entered into the chunk hash table, with a
- * special key where the blockNumber is used to store the 'offset' of the
- * hole, and all other fields are zero. Holes are never looked up in the hash
- * table, we only enter them there to have a FileCacheEntry that we can keep
- * in the linked list. If the soft limit is raised again, we reuse the holes
- * before extending the nominal size of the file.
+ * Each hole is tracked in a freelist. The freelist consists of two parts: a
+ * fixed-size array in shared memory, and a linked chain of on-disk
+ * blocks. When the in-memory array fills up, it's flushed to a new on-disk
+ * chunk. If the soft limit is raised again, we reuse the holes before
+ * extending the nominal size of the file.
+ *
+ * The in-memory freelist array is protected by 'lfc_lock', while the on-disk
+ * chain is protected by a separate 'lfc_freelist_lock'.  Locking rule to
+ * avoid deadlocks: always acquire lfc_freelist_lock first, then lfc_lock.
 */

 /* Local file storage allocation chunk.
@@ -92,13 +94,15 @@
 *    1Mb chunks can reduce hash map size to 320Mb.
 * 2. Improve access locality, subsequent pages will be allocated together improving seqscan speed
 */
-#define MAX_BLOCKS_PER_CHUNK_LOG  7 /* 1Mb chunk */
-#define MAX_BLOCKS_PER_CHUNK	  (1 << MAX_BLOCKS_PER_CHUNK_LOG)
+#define BLOCKS_PER_CHUNK_LOG  7 /* 1Mb chunk */
+#define BLOCKS_PER_CHUNK	  (1 << BLOCKS_PER_CHUNK_LOG)

 #define MB					((uint64)1024*1024)

-#define SIZE_MB_TO_CHUNKS(size) ((uint32)((size) * MB / BLCKSZ >> lfc_chunk_size_log))
-#define BLOCK_TO_CHUNK_OFF(blkno) ((blkno) & (lfc_blocks_per_chunk-1))
+#define SIZE_MB_TO_CHUNKS(size) ((uint32)((size) * MB / BLCKSZ >> BLOCKS_PER_CHUNK_LOG))
+#define BLOCK_TO_CHUNK_OFF(blkno) ((blkno) & (BLOCKS_PER_CHUNK-1))
+
+#define INVALID_OFFSET (0xffffffff)

 /*
 * Blocks are read or written to LFC file outside LFC critical section.
@@ -119,15 +123,18 @@ typedef enum FileCacheBlockState

 typedef struct FileCacheEntry
 {
-	BufferTag	key;
-	uint32		hash;
 	uint32		offset;
 	uint32		access_count;
-	dlist_node	list_node;		/* LRU/holes list node */
-	uint32		state[FLEXIBLE_ARRAY_MEMBER]; /* two bits per block */
+	dlist_node	list_node;		/* LRU list node */
+	uint32		state[(BLOCKS_PER_CHUNK * 2 + 31) / 32]; /* two bits per block */
 } FileCacheEntry;

-#define FILE_CACHE_ENRTY_SIZE MAXALIGN(offsetof(FileCacheEntry, state) + (lfc_blocks_per_chunk*2+31)/32*4)
+/* Todo: alignment must be the same too */
+StaticAssertDecl(sizeof(FileCacheEntry) == sizeof(RustFileCacheEntry),
+				 "Rust and C declarations of FileCacheEntry are incompatible");
+StaticAssertDecl(sizeof(BufferTag) == sizeof(RustFileCacheKey),
+				 "Rust and C declarations of FileCacheKey are incompatible");
+
 #define GET_STATE(entry, i) (((entry)->state[(i) / 16] >> ((i) % 16 * 2)) & 3)
 #define SET_STATE(entry, i, new_state) (entry)->state[(i) / 16] = ((entry)->state[(i) / 16] & ~(3 << ((i) % 16 * 2))) | ((new_state) << ((i) % 16 * 2))

@@ -136,6 +143,9 @@ typedef struct FileCacheEntry

 #define MAX_PREWARM_WORKERS 8

+
+#define FREELIST_ENTRIES_PER_CHUNK (BLOCKS_PER_CHUNK * BLCKSZ / sizeof(uint32) - 2)
+
 typedef struct PrewarmWorkerState
 {
 	uint32		prewarmed_pages;
@@ -161,7 +171,6 @@ typedef struct FileCacheControl
 	uint64		evicted_pages;	/* number of evicted pages */
 	dlist_head	lru;			/* double linked list for LRU replacement
 								 * algorithm */
-	dlist_head  holes;          /* double linked list of punched holes */
 	HyperLogLogState wss_estimation; /* estimation of working set size */
 	ConditionVariable cv[N_COND_VARS]; /* turnstile of condition variables */
 	PrewarmWorkerState prewarm_workers[MAX_PREWARM_WORKERS];
@@ -172,24 +181,40 @@ typedef struct FileCacheControl
 	bool   prewarm_active;
 	bool   prewarm_canceled;
 	dsm_handle prewarm_lfc_state_handle;
+
+	/*
+	 * Free list. This is large enough to hold one chunks worth of entries.
+	 */
+	uint32		freelist_size;
+	uint32		freelist_head;
+	uint32		num_free_pages;
+	uint32		free_pages[FREELIST_ENTRIES_PER_CHUNK];
 } FileCacheControl;

+typedef struct FreeListChunk
+{
+	uint32		next;
+	uint32		num_free_pages;
+	uint32		free_pages[FREELIST_ENTRIES_PER_CHUNK];
+} FreeListChunk;
+
 #define FILE_CACHE_STATE_MAGIC 0xfcfcfcfc

 #define FILE_CACHE_STATE_BITMAP(fcs)	((uint8*)&(fcs)->chunks[(fcs)->n_chunks])
-#define FILE_CACHE_STATE_SIZE_FOR_CHUNKS(n_chunks)	(sizeof(FileCacheState) + (n_chunks)*sizeof(BufferTag) + (((n_chunks) * lfc_blocks_per_chunk)+7)/8)
+#define FILE_CACHE_STATE_SIZE_FOR_CHUNKS(n_chunks)	(sizeof(FileCacheState) + (n_chunks)*sizeof(BufferTag) + (((n_chunks) * BLOCKS_PER_CHUNK)+7)/8)
 #define FILE_CACHE_STATE_SIZE(fcs)		(sizeof(FileCacheState) + (fcs->n_chunks)*sizeof(BufferTag) + (((fcs->n_chunks) << fcs->chunk_size_log)+7)/8)

-static HTAB *lfc_hash;
+static FileCacheHashMapHandle lfc_hash_handle;
+static FileCacheHashMapAccess lfc_hash;
 static int	lfc_desc = -1;
 static LWLockId lfc_lock;
-int	lfc_max_size;
-int	lfc_size_limit;
+static LWLockId lfc_freelist_lock;
+static int	lfc_max_size;
+static int	lfc_size_limit;
 static int	lfc_prewarm_limit;
 static int	lfc_prewarm_batch;
-static int	lfc_chunk_size_log = MAX_BLOCKS_PER_CHUNK_LOG;
-static int	lfc_blocks_per_chunk = MAX_BLOCKS_PER_CHUNK;
-char *lfc_path;
+static int	lfc_blocks_per_chunk_ro = BLOCKS_PER_CHUNK;
+static char *lfc_path;
 static uint64 lfc_generation;
 static FileCacheControl *lfc_ctl;
 static bool lfc_do_prewarm;
@@ -205,6 +230,11 @@ bool AmPrewarmWorker;

 #define LFC_ENABLED() (lfc_ctl->limit != 0)

+static bool freelist_push(uint32 offset);
+static bool freelist_prepare_pop(void);
+static uint32 freelist_pop(void);
+static bool freelist_is_empty(void);
+
 /*
 * Close LFC file if opened.
 * All backends should close their LFC files once LFC is disabled.
@@ -232,15 +262,9 @@ lfc_switch_off(void)

 	if (LFC_ENABLED())
 	{
-		HASH_SEQ_STATUS status;
-		FileCacheEntry *entry;
-
 		/* Invalidate hash */
-		hash_seq_init(&status, lfc_hash);
-		while ((entry = hash_seq_search(&status)) != NULL)
-		{
-			hash_search_with_hash_value(lfc_hash, &entry->key, entry->hash, HASH_REMOVE, NULL);
-		}
+		file_cache_hash_reset(lfc_hash);
+
 		lfc_ctl->generation += 1;
 		lfc_ctl->size = 0;
 		lfc_ctl->pinned = 0;
@@ -248,7 +272,9 @@ lfc_switch_off(void)
 		lfc_ctl->used_pages = 0;
 		lfc_ctl->limit = 0;
 		dlist_init(&lfc_ctl->lru);
-		dlist_init(&lfc_ctl->holes);
+
+		lfc_ctl->freelist_head = INVALID_OFFSET;
+		lfc_ctl->num_free_pages = 0;

 		/*
 		 * We need to use unlink to to avoid races in LFC write, because it is not
@@ -317,8 +343,8 @@ lfc_ensure_opened(void)
 static void
 lfc_shmem_startup(void)
 {
+	size_t		size;
 	bool		found;
-	static HASHCTL info;

 	if (prev_shmem_startup_hook)
 	{
@@ -327,27 +353,29 @@ lfc_shmem_startup(void)

 	LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);

-	lfc_ctl = (FileCacheControl *) ShmemInitStruct("lfc", sizeof(FileCacheControl), &found);
+	size = sizeof(FileCacheControl);
+
+	lfc_ctl = (FileCacheControl *) ShmemInitStruct("lfc", size, &found);
 	if (!found)
 	{
 		int			fd;
 		uint32		n_chunks = SIZE_MB_TO_CHUNKS(lfc_max_size);

 		lfc_lock = (LWLockId) GetNamedLWLockTranche("lfc_lock");
-		info.keysize = sizeof(BufferTag);
-		info.entrysize = FILE_CACHE_ENRTY_SIZE;
+		lfc_freelist_lock = (LWLockId) GetNamedLWLockTranche("lfc_freelist_lock");

 		/*
 		 * n_chunks+1 because we add new element to hash table before eviction
 		 * of victim
 		 */
-		lfc_hash = ShmemInitHash("lfc_hash",
-								 n_chunks + 1, n_chunks + 1,
-								 &info,
-								 HASH_ELEM | HASH_BLOBS);
-		memset(lfc_ctl, 0, sizeof(FileCacheControl));
+		lfc_hash_handle = file_cache_hash_shmem_init(n_chunks + 1, n_chunks + 1);
+
+		memset(lfc_ctl, 0, offsetof(FileCacheControl, free_pages));
 		dlist_init(&lfc_ctl->lru);
-		dlist_init(&lfc_ctl->holes);
+
+		lfc_ctl->freelist_size = FREELIST_ENTRIES_PER_CHUNK;
+		lfc_ctl->freelist_head = INVALID_OFFSET;
+		lfc_ctl->num_free_pages = 0;

 		/* Initialize hyper-log-log structure for estimating working set size */
 		initSHLL(&lfc_ctl->wss_estimation);
@@ -371,18 +399,25 @@ lfc_shmem_startup(void)

 	}
 	LWLockRelease(AddinShmemInitLock);
+
+	lfc_hash = file_cache_hash_shmem_access(lfc_hash_handle);
 }

 static void
 lfc_shmem_request(void)
 {
+	size_t		size;
+
 #if PG_VERSION_NUM>=150000
 	if (prev_shmem_request_hook)
 		prev_shmem_request_hook();
 #endif

-	RequestAddinShmemSpace(sizeof(FileCacheControl) + hash_estimate_size(SIZE_MB_TO_CHUNKS(lfc_max_size) + 1, FILE_CACHE_ENRTY_SIZE));
+	size = sizeof(FileCacheControl);
+
+	RequestAddinShmemSpace(size);
 	RequestNamedLWLockTranche("lfc_lock", 1);
+	RequestNamedLWLockTranche("lfc_freelist_lock", 2);
 }

 static bool
@@ -398,24 +433,6 @@ is_normal_backend(void)
 	return lfc_ctl && MyProc && UsedShmemSegAddr && !IsParallelWorker();
 }

-static bool
-lfc_check_chunk_size(int *newval, void **extra, GucSource source)
-{
-	if (*newval & (*newval - 1))
-	{
-		elog(ERROR, "LFC chunk size should be power of two");
-		return false;
-	}
-	return true;
-}
-
-static void
-lfc_change_chunk_size(int newval, void* extra)
-{
-	lfc_chunk_size_log = pg_ceil_log2_32(newval);
-}
-
-
 static bool
 lfc_check_limit_hook(int *newval, void **extra, GucSource source)
 {
@@ -435,12 +452,14 @@ lfc_change_limit_hook(int newval, void *extra)
 	if (!lfc_ctl || !is_normal_backend())
 		return;

+	LWLockAcquire(lfc_freelist_lock, LW_EXCLUSIVE);
 	LWLockAcquire(lfc_lock, LW_EXCLUSIVE);

 	/* Open LFC file only if LFC was enabled or we are going to reenable it */
 	if (newval == 0 && !LFC_ENABLED())
 	{
 		LWLockRelease(lfc_lock);
+		LWLockRelease(lfc_freelist_lock);
 		/* File should be reopened if LFC is reenabled */
 		lfc_close_file();
 		return;
@@ -449,6 +468,7 @@ lfc_change_limit_hook(int newval, void *extra)
 	if (!lfc_ensure_opened())
 	{
 		LWLockRelease(lfc_lock);
+		LWLockRelease(lfc_freelist_lock);
 		return;
 	}

@@ -464,35 +484,30 @@ lfc_change_limit_hook(int newval, void *extra)
 		 * returning their space to file system
 		 */
 		FileCacheEntry *victim = dlist_container(FileCacheEntry, list_node, dlist_pop_head_node(&lfc_ctl->lru));
-		FileCacheEntry *hole;
 		uint32		offset = victim->offset;
-		uint32		hash;
-		bool		found;
-		BufferTag	holetag;

 		CriticalAssert(victim->access_count == 0);
 #ifdef FALLOC_FL_PUNCH_HOLE
-		if (fallocate(lfc_desc, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, (off_t) victim->offset * lfc_blocks_per_chunk * BLCKSZ, lfc_blocks_per_chunk * BLCKSZ) < 0)
+		if (fallocate(lfc_desc, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, (off_t) victim->offset * BLOCKS_PER_CHUNK * BLCKSZ, BLOCKS_PER_CHUNK * BLCKSZ) < 0)
 			neon_log(LOG, "Failed to punch hole in file: %m");
 #endif
-		/* We remove the old entry, and re-enter a hole to the hash table */
-		for (int i = 0; i < lfc_blocks_per_chunk; i++)
+		/* We remove the entry, and enter a hole to the freelist */
+		for (int i = 0; i < BLOCKS_PER_CHUNK; i++)
 		{
 			bool is_page_cached = GET_STATE(victim, i) == AVAILABLE;
 			lfc_ctl->used_pages -= is_page_cached;
 			lfc_ctl->evicted_pages += is_page_cached;
 		}
-		hash_search_with_hash_value(lfc_hash, &victim->key, victim->hash, HASH_REMOVE, NULL);
+		file_cache_hash_remove_entry(lfc_hash, victim);

-		memset(&holetag, 0, sizeof(holetag));
-		holetag.blockNum = offset;
-		hash = get_hash_value(lfc_hash, &holetag);
-		hole = hash_search_with_hash_value(lfc_hash, &holetag, hash, HASH_ENTER, &found);
-		hole->hash = hash;
-		hole->offset = offset;
-		hole->access_count = 0;
-		CriticalAssert(!found);
-		dlist_push_tail(&lfc_ctl->holes, &hole->list_node);
+		if (!freelist_push(offset))
+		{
+			/* freelist_push already logged the error */
+			lfc_switch_off();
+			LWLockRelease(lfc_lock);
+			LWLockRelease(lfc_freelist_lock);
+			return;
+		}

 		lfc_ctl->used -= 1;
 	}
@@ -504,6 +519,7 @@ lfc_change_limit_hook(int newval, void *extra)
 	neon_log(DEBUG1, "set local file cache limit to %d", new_size);

 	LWLockRelease(lfc_lock);
+	LWLockRelease(lfc_freelist_lock);
 }

 void
@@ -579,14 +595,14 @@ lfc_init(void)
 	DefineCustomIntVariable("neon.file_cache_chunk_size",
 							"LFC chunk size in blocks (should be power of two)",
 							NULL,
-							&lfc_blocks_per_chunk,
-							MAX_BLOCKS_PER_CHUNK,
-							1,
-							MAX_BLOCKS_PER_CHUNK,
-							PGC_POSTMASTER,
+							&lfc_blocks_per_chunk_ro,
+							BLOCKS_PER_CHUNK,
+							BLOCKS_PER_CHUNK,
+							BLOCKS_PER_CHUNK,
+							PGC_INTERNAL,
 							GUC_UNIT_BLOCKS,
-							lfc_check_chunk_size,
-							lfc_change_chunk_size,
+							NULL,
+							NULL,
 							NULL);

 	DefineCustomIntVariable("neon.file_cache_prewarm_limit",
@@ -649,19 +665,19 @@ lfc_get_state(size_t max_entries)
 		fcs = (FileCacheState*)palloc0(state_size);
 		SET_VARSIZE(fcs, state_size);
 		fcs->magic = FILE_CACHE_STATE_MAGIC;
-		fcs->chunk_size_log = lfc_chunk_size_log;
+		fcs->chunk_size_log = BLOCKS_PER_CHUNK_LOG;
 		fcs->n_chunks = n_entries;
 		bitmap = FILE_CACHE_STATE_BITMAP(fcs);

 		dlist_reverse_foreach(iter, &lfc_ctl->lru)
 		{
 			FileCacheEntry *entry = dlist_container(FileCacheEntry, list_node, iter.cur);
-			fcs->chunks[i] = entry->key;
-			for (int j = 0; j < lfc_blocks_per_chunk; j++)
+			fcs->chunks[i] = *file_cache_hash_get_key_for_entry(lfc_hash, entry);
+			for (int j = 0; j < BLOCKS_PER_CHUNK; j++)
 			{
 				if (GET_STATE(entry, j) != UNAVAILABLE)
 				{
-					BITMAP_SET(bitmap, i*lfc_blocks_per_chunk + j);
+					BITMAP_SET(bitmap, i*BLOCKS_PER_CHUNK + j);
 					n_pages += 1;
 				}
 			}
@@ -670,7 +686,7 @@ lfc_get_state(size_t max_entries)
 		}
 		Assert(i == n_entries);
 		fcs->n_pages = n_pages;
-		Assert(pg_popcount((char*)bitmap, ((n_entries << lfc_chunk_size_log) + 7)/8) == n_pages);
+		Assert(pg_popcount((char*)bitmap, ((n_entries << BLOCKS_PER_CHUNK_LOG) + 7)/8) == n_pages);
 		elog(LOG, "LFC: save state of %d chunks %d pages", (int)n_entries, (int)n_pages);
 	}

@@ -726,7 +742,7 @@ lfc_prewarm(FileCacheState* fcs, uint32 n_workers)
 	}

 	fcs_chunk_size_log = fcs->chunk_size_log;
-	if (fcs_chunk_size_log > MAX_BLOCKS_PER_CHUNK_LOG)
+	if (fcs_chunk_size_log > BLOCKS_PER_CHUNK_LOG)
 	{
 		elog(ERROR, "LFC: Invalid chunk size log: %u", fcs->chunk_size_log);
 	}
@@ -945,7 +961,7 @@ lfc_invalidate(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber nblocks)
 {
 	BufferTag	tag;
 	FileCacheEntry *entry;
-	uint32		hash;
+	uint64		hash;

 	if (lfc_maybe_disabled())	/* fast exit if file cache is disabled */
 		return;
@@ -958,14 +974,14 @@ lfc_invalidate(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber nblocks)
 	LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
 	if (LFC_ENABLED())
 	{
-		for (BlockNumber blkno = 0; blkno < nblocks; blkno += lfc_blocks_per_chunk)
+		for (BlockNumber blkno = 0; blkno < nblocks; blkno += BLOCKS_PER_CHUNK)
 		{
 			tag.blockNum = blkno;
-			hash = get_hash_value(lfc_hash, &tag);
-			entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_FIND, NULL);
+			hash = file_cache_hash_get_hash_value(lfc_hash, &tag);
+			entry = file_cache_hash_find(lfc_hash, &tag, hash);
 			if (entry != NULL)
 			{
-				for (int i = 0; i < lfc_blocks_per_chunk; i++)
+				for (int i = 0; i < BLOCKS_PER_CHUNK; i++)
 				{
 					if (GET_STATE(entry, i) == AVAILABLE)
 					{
@@ -990,7 +1006,7 @@ lfc_cache_contains(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno)
 	FileCacheEntry *entry;
 	int			chunk_offs = BLOCK_TO_CHUNK_OFF(blkno);
 	bool		found = false;
-	uint32		hash;
+	uint64		hash;

 	if (lfc_maybe_disabled())	/* fast exit if file cache is disabled */
 		return false;
@@ -1000,12 +1016,12 @@ lfc_cache_contains(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno)
 	tag.blockNum = blkno - chunk_offs;

 	CriticalAssert(BufTagGetRelNumber(&tag) != InvalidRelFileNumber);
-	hash = get_hash_value(lfc_hash, &tag);
+	hash = file_cache_hash_get_hash_value(lfc_hash, &tag);

 	LWLockAcquire(lfc_lock, LW_SHARED);
 	if (LFC_ENABLED())
 	{
-		entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_FIND, NULL);
+		entry = file_cache_hash_find(lfc_hash, &tag, hash);
 		found = entry != NULL && GET_STATE(entry, chunk_offs) != UNAVAILABLE;
 	}
 	LWLockRelease(lfc_lock);
@@ -1024,7 +1040,7 @@ lfc_cache_containsv(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 	FileCacheEntry *entry;
 	uint32		chunk_offs;
 	int			found = 0;
-	uint32		hash;
+	uint64		hash;
 	int			i = 0;

 	if (lfc_maybe_disabled())	/* fast exit if file cache is disabled */
@@ -1037,7 +1053,7 @@ lfc_cache_containsv(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,

 	chunk_offs = BLOCK_TO_CHUNK_OFF(blkno);
 	tag.blockNum = blkno - chunk_offs;
-	hash = get_hash_value(lfc_hash, &tag);
+	hash = file_cache_hash_get_hash_value(lfc_hash, &tag);

 	LWLockAcquire(lfc_lock, LW_SHARED);

@@ -1048,12 +1064,12 @@ lfc_cache_containsv(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 	}
 	while (true)
 	{
-		int		this_chunk = Min(nblocks - i, lfc_blocks_per_chunk - chunk_offs);
-		entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_FIND, NULL);
+		int		this_chunk = Min(nblocks - i, BLOCKS_PER_CHUNK - chunk_offs);
+		entry = file_cache_hash_find(lfc_hash, &tag, hash);

 		if (entry != NULL)
 		{
-			for (; chunk_offs < lfc_blocks_per_chunk && i < nblocks; chunk_offs++, i++)
+			for (; chunk_offs < BLOCKS_PER_CHUNK && i < nblocks; chunk_offs++, i++)
 			{
 				if (GET_STATE(entry, chunk_offs) != UNAVAILABLE)
 				{
@@ -1079,7 +1095,7 @@ lfc_cache_containsv(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 		 */
 		chunk_offs = BLOCK_TO_CHUNK_OFF(blkno + i);
 		tag.blockNum = (blkno + i) - chunk_offs;
-		hash = get_hash_value(lfc_hash, &tag);
+		hash = file_cache_hash_get_hash_value(lfc_hash, &tag);
 	}

 	LWLockRelease(lfc_lock);
@@ -1128,7 +1144,7 @@ lfc_readv_select(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 	BufferTag	tag;
 	FileCacheEntry *entry;
 	ssize_t		rc;
-	uint32		hash;
+	uint64		hash;
 	uint64		generation;
 	uint32		entry_offset;
 	int			blocks_read = 0;
@@ -1154,9 +1170,9 @@ lfc_readv_select(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 	while (nblocks > 0)
 	{
 		struct iovec iov[PG_IOV_MAX];
-		uint8	chunk_mask[MAX_BLOCKS_PER_CHUNK / 8] = {0};
+		uint8	chunk_mask[BLOCKS_PER_CHUNK / 8] = {0};
 		int		chunk_offs = BLOCK_TO_CHUNK_OFF(blkno);
-		int		blocks_in_chunk = Min(nblocks, lfc_blocks_per_chunk - chunk_offs);
+		int		blocks_in_chunk = Min(nblocks, BLOCKS_PER_CHUNK - chunk_offs);
 		int		iteration_hits = 0;
 		int		iteration_misses = 0;
 		uint64	io_time_us = 0;
@@ -1206,7 +1222,7 @@ lfc_readv_select(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 		Assert(iov_last_used - first_block_in_chunk_read >= n_blocks_to_read);

 		tag.blockNum = blkno - chunk_offs;
-		hash = get_hash_value(lfc_hash, &tag);
+		hash = file_cache_hash_get_hash_value(lfc_hash, &tag);
 		cv = &lfc_ctl->cv[hash % N_COND_VARS];

 		LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
@@ -1219,13 +1235,13 @@ lfc_readv_select(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 			return blocks_read;
 		}

-		entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_FIND, NULL);
+		entry = file_cache_hash_find(lfc_hash, &tag, hash);

 		/* Approximate working set for the blocks assumed in this entry */
 		for (int i = 0; i < blocks_in_chunk; i++)
 		{
 			tag.blockNum = blkno + i;
-			addSHLL(&lfc_ctl->wss_estimation, hash_bytes((uint8_t const*)&tag, sizeof(tag)));
+			addSHLL(&lfc_ctl->wss_estimation, file_cache_hash_get_hash_value(lfc_hash, &tag));
 		}

 		if (entry == NULL)
@@ -1296,7 +1312,7 @@ lfc_readv_select(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 		if (iteration_hits != 0)
 		{
 			/* chunk offset (# of pages) into the LFC file */
-			off_t	first_read_offset = (off_t) entry_offset * lfc_blocks_per_chunk;
+			off_t	first_read_offset = (off_t) entry_offset * BLOCKS_PER_CHUNK;
 			int		nwrite = iov_last_used - first_block_in_chunk_read;
 			/* offset of first IOV */
 			first_read_offset += chunk_offs + first_block_in_chunk_read;
@@ -1373,14 +1389,14 @@ lfc_readv_select(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 * Returns false if there are no unpinned entries and chunk can not be added.
 */
 static bool
-lfc_init_new_entry(FileCacheEntry* entry, uint32 hash)
+lfc_init_new_entry(FileCacheEntry *entry)
 {
 	/*-----------
 	 * If the chunk wasn't already in the LFC then we have these
 	 * options, in order of preference:
 	 *
 	 * Unless there is no space available, we can:
-	 *  1. Use an entry from the `holes` list, and
+	 *  1. Use an entry from the freelist, and
 	 *  2. Create a new entry.
 	 * We can always, regardless of space in the LFC:
 	 *  3. evict an entry from LRU, and
@@ -1388,17 +1404,10 @@ lfc_init_new_entry(FileCacheEntry* entry, uint32 hash)
 	 */
 	if (lfc_ctl->used < lfc_ctl->limit)
 	{
-		if (!dlist_is_empty(&lfc_ctl->holes))
+		if (!freelist_is_empty())
 		{
 			/* We can reuse a hole that was left behind when the LFC was shrunk previously */
-			FileCacheEntry *hole = dlist_container(FileCacheEntry, list_node,
-												   dlist_pop_head_node(&lfc_ctl->holes));
-			uint32 offset = hole->offset;
-			bool hole_found;
-
-			hash_search_with_hash_value(lfc_hash, &hole->key,
-										hole->hash, HASH_REMOVE, &hole_found);
-			CriticalAssert(hole_found);
+			uint32 offset = freelist_pop();

 			lfc_ctl->used += 1;
 			entry->offset = offset;			/* reuse the hole */
@@ -1427,7 +1436,7 @@ lfc_init_new_entry(FileCacheEntry* entry, uint32 hash)
 		FileCacheEntry *victim = dlist_container(FileCacheEntry, list_node,
 												 dlist_pop_head_node(&lfc_ctl->lru));

-		for (int i = 0; i < lfc_blocks_per_chunk; i++)
+		for (int i = 0; i < BLOCKS_PER_CHUNK; i++)
 		{
 			bool is_page_cached = GET_STATE(victim, i) == AVAILABLE;
 			lfc_ctl->used_pages -= is_page_cached;
@@ -1436,24 +1445,21 @@ lfc_init_new_entry(FileCacheEntry* entry, uint32 hash)

 		CriticalAssert(victim->access_count == 0);
 		entry->offset = victim->offset; /* grab victim's chunk */
-		hash_search_with_hash_value(lfc_hash, &victim->key,
-									victim->hash, HASH_REMOVE, NULL);
+		file_cache_hash_remove_entry(lfc_hash, victim);
 		neon_log(DEBUG2, "Swap file cache page");
 	}
 	else
 	{
 		/* Can't add this chunk - we don't have the space for it */
-		hash_search_with_hash_value(lfc_hash, &entry->key, hash,
-									HASH_REMOVE, NULL);
+		file_cache_hash_remove_entry(lfc_hash, entry);
 		lfc_ctl->prewarm_canceled = true; /* cancel prewarm if LFC limit is reached */
 		return false;
 	}

 	entry->access_count = 1;
-	entry->hash = hash;
 	lfc_ctl->pinned += 1;

-	for (int i = 0; i < lfc_blocks_per_chunk; i++)
+	for (int i = 0; i < BLOCKS_PER_CHUNK; i++)
 		SET_STATE(entry, i, UNAVAILABLE);

 	return true;
@@ -1490,7 +1496,7 @@ lfc_prefetch(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,
 	FileCacheEntry *entry;
 	ssize_t		rc;
 	bool		found;
-	uint32		hash;
+	uint64		hash;
 	uint64		generation;
 	uint32		entry_offset;
 	instr_time io_start, io_end;
@@ -1509,9 +1515,10 @@ lfc_prefetch(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,
 	CriticalAssert(BufTagGetRelNumber(&tag) != InvalidRelFileNumber);

 	tag.blockNum = blkno - chunk_offs;
-	hash = get_hash_value(lfc_hash, &tag);
+	hash = file_cache_hash_get_hash_value(lfc_hash, &tag);
 	cv = &lfc_ctl->cv[hash % N_COND_VARS];

+ retry:
 	LWLockAcquire(lfc_lock, LW_EXCLUSIVE);

 	if (!LFC_ENABLED() || !lfc_ensure_opened())
@@ -1520,6 +1527,9 @@ lfc_prefetch(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,
 		return false;
 	}

+	if (!freelist_prepare_pop())
+		goto retry;
+
 	lwlsn = neon_get_lwlsn(rinfo, forknum, blkno);

 	if (lwlsn > lsn)
@@ -1530,12 +1540,12 @@ lfc_prefetch(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,
 		return false;
 	}

-	entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_ENTER, &found);
+	entry = file_cache_hash_enter(lfc_hash, &tag, hash, &found);

 	if (lfc_prewarm_update_ws_estimation)
 	{
 		tag.blockNum = blkno;
-		addSHLL(&lfc_ctl->wss_estimation, hash_bytes((uint8_t const*)&tag, sizeof(tag)));
+		addSHLL(&lfc_ctl->wss_estimation, file_cache_hash_get_hash_value(lfc_hash, &tag));
 	}
 	if (found)
 	{
@@ -1557,7 +1567,7 @@ lfc_prefetch(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,
 	}
 	else
 	{
-		if (!lfc_init_new_entry(entry, hash))
+		if (!lfc_init_new_entry(entry))
 		{
 			/*
 			 * We can't process this chunk due to lack of space in LFC,
@@ -1578,7 +1588,7 @@ lfc_prefetch(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,
 	pgstat_report_wait_start(WAIT_EVENT_NEON_LFC_WRITE);
 	INSTR_TIME_SET_CURRENT(io_start);
 	rc = pwrite(lfc_desc, buffer, BLCKSZ,
-				((off_t) entry_offset * lfc_blocks_per_chunk + chunk_offs) * BLCKSZ);
+				((off_t) entry_offset * BLOCKS_PER_CHUNK + chunk_offs) * BLCKSZ);
 	INSTR_TIME_SET_CURRENT(io_end);
 	pgstat_report_wait_end();

@@ -1640,7 +1650,7 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 	FileCacheEntry *entry;
 	ssize_t		rc;
 	bool		found;
-	uint32		hash;
+	uint64		hash;
 	uint64		generation;
 	uint32		entry_offset;
 	int			buf_offset = 0;
@@ -1653,6 +1663,7 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,

 	CriticalAssert(BufTagGetRelNumber(&tag) != InvalidRelFileNumber);

+ retry:
 	LWLockAcquire(lfc_lock, LW_EXCLUSIVE);

 	if (!LFC_ENABLED() || !lfc_ensure_opened())
@@ -1662,6 +1673,9 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 	}
 	generation = lfc_ctl->generation;

+	if (!freelist_prepare_pop())
+		goto retry;
+
 	/*
 	 * For every chunk that has blocks we're interested in, we
 	 * 1. get the chunk header
@@ -1675,7 +1689,7 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 	{
 		struct iovec iov[PG_IOV_MAX];
 		int		chunk_offs = BLOCK_TO_CHUNK_OFF(blkno);
-		int		blocks_in_chunk = Min(nblocks, lfc_blocks_per_chunk - chunk_offs);
+		int		blocks_in_chunk = Min(nblocks, BLOCKS_PER_CHUNK - chunk_offs);
 		instr_time io_start, io_end;
 		ConditionVariable* cv;

@@ -1688,16 +1702,16 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 		}

 		tag.blockNum = blkno - chunk_offs;
-		hash = get_hash_value(lfc_hash, &tag);
+		hash = file_cache_hash_get_hash_value(lfc_hash, &tag);
 		cv = &lfc_ctl->cv[hash % N_COND_VARS];

-		entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_ENTER, &found);
+		entry = file_cache_hash_enter(lfc_hash, &tag, hash, &found);

 		/* Approximate working set for the blocks assumed in this entry */
 		for (int i = 0; i < blocks_in_chunk; i++)
 		{
 			tag.blockNum = blkno + i;
-			addSHLL(&lfc_ctl->wss_estimation, hash_bytes((uint8_t const*)&tag, sizeof(tag)));
+			addSHLL(&lfc_ctl->wss_estimation, file_cache_hash_get_hash_value(lfc_hash, &tag));
 		}

 		if (found)
@@ -1714,7 +1728,7 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 		}
 		else
 		{
-			if (!lfc_init_new_entry(entry, hash))
+			if (!lfc_init_new_entry(entry))
 			{
 				/*
 				 * We can't process this chunk due to lack of space in LFC,
@@ -1763,7 +1777,7 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 		pgstat_report_wait_start(WAIT_EVENT_NEON_LFC_WRITE);
 		INSTR_TIME_SET_CURRENT(io_start);
 		rc = pwritev(lfc_desc, iov, blocks_in_chunk,
-					 ((off_t) entry_offset * lfc_blocks_per_chunk + chunk_offs) * BLCKSZ);
+					 ((off_t) entry_offset * BLOCKS_PER_CHUNK + chunk_offs) * BLCKSZ);
 		INSTR_TIME_SET_CURRENT(io_end);
 		pgstat_report_wait_end();

@@ -1823,6 +1837,140 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 	LWLockRelease(lfc_lock);
 }

+/**** freelist management ****/
+
+
+/*
+ * Prerequisites:
+ * - The caller is holding 'lfc_lock'. XXX
+ */
+static bool
+freelist_prepare_pop(void)
+{
+	/*
+	 * If the in-memory freelist is empty, but there are more blocks available, load them.
+	 *
+	 * TODO: if there
+	 */
+	if (lfc_ctl->num_free_pages == 0 && lfc_ctl->freelist_head != INVALID_OFFSET)
+	{
+		uint32		freelist_head;
+		FreeListChunk *freelist_chunk;
+		size_t		bytes_read;
+
+		LWLockRelease(lfc_lock);
+		LWLockAcquire(lfc_freelist_lock, LW_EXCLUSIVE);
+
+		if (!(lfc_ctl->num_free_pages == 0 && lfc_ctl->freelist_head != INVALID_OFFSET))
+		{
+			/* someone else did the work for us while we were not holding the lock */
+			LWLockRelease(lfc_freelist_lock);
+			return false;
+		}
+
+		freelist_head = lfc_ctl->freelist_head;
+		freelist_chunk = palloc(BLOCKS_PER_CHUNK * BLCKSZ);
+
+		bytes_read = 0;
+		while (bytes_read < BLOCKS_PER_CHUNK * BLCKSZ)
+		{
+			ssize_t		rc;
+
+			rc = pread(lfc_desc, freelist_chunk, BLOCKS_PER_CHUNK * BLCKSZ - bytes_read, (off_t) freelist_head * BLOCKS_PER_CHUNK * BLCKSZ + bytes_read);
+			if (rc < 0)
+			{
+				lfc_disable("read freelist page");
+				return false;
+			}
+			bytes_read += rc;
+		}
+
+		LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
+		if (lfc_generation != lfc_ctl->generation)
+		{
+			LWLockRelease(lfc_lock);
+			return false;
+		}
+
+		Assert(lfc_ctl->freelist_head == freelist_head);
+		Assert(lfc_ctl->num_free_pages == 0);
+		lfc_ctl->freelist_head = freelist_chunk->next;
+		lfc_ctl->num_free_pages = freelist_chunk->num_free_pages;
+		memcpy(lfc_ctl->free_pages, freelist_chunk->free_pages, lfc_ctl->num_free_pages * sizeof(uint32));
+		pfree(freelist_chunk);
+
+		LWLockRelease(lfc_lock);
+		LWLockRelease(lfc_freelist_lock);
+		return false;
+	}
+
+	return true;
+}
+
+/*
+ * Prerequisites:
+ * - The caller is holding 'lfc_lock' and 'lfc_freelist_lock'.
+ *
+ * Returns 'false' on error.
+ */
+static bool
+freelist_push(uint32 offset)
+{
+	Assert(lfc_ctl->freelist_size == FREELIST_ENTRIES_PER_CHUNK);
+	if (lfc_ctl->num_free_pages == lfc_ctl->freelist_size)
+	{
+		FreeListChunk *freelist_chunk;
+		struct iovec iov;
+		ssize_t		rc;
+
+		freelist_chunk = palloc(BLOCKS_PER_CHUNK * BLCKSZ);
+
+		/* write the existing entries to the chunk on disk */
+		freelist_chunk->next = lfc_ctl->freelist_head;
+		freelist_chunk->num_free_pages = lfc_ctl->num_free_pages;
+		memcpy(freelist_chunk->free_pages, lfc_ctl->free_pages, lfc_ctl->num_free_pages * sizeof(uint32));
+
+		/* Use the passed-in offset to hold the freelist chunk itself */
+		iov.iov_base = freelist_chunk;
+		iov.iov_len = BLOCKS_PER_CHUNK * BLCKSZ;
+		rc = pg_pwritev_with_retry(lfc_desc, &iov, 1, (off_t) offset * BLOCKS_PER_CHUNK * BLCKSZ);
+
+		pfree(freelist_chunk);
+
+		if (rc < 0)
+			return false;
+
+		lfc_ctl->freelist_head = offset;
+		lfc_ctl->num_free_pages = 0;
+	}
+	else
+	{
+		lfc_ctl->free_pages[lfc_ctl->num_free_pages] = offset;
+		lfc_ctl->num_free_pages++;
+	}
+	return true;
+}
+
+static uint32
+freelist_pop(void)
+{
+	uint32		result;
+
+	/* The caller should've checked that the list is not empty */
+	Assert(lfc_ctl->num_free_pages > 0);
+
+	result = lfc_ctl->free_pages[lfc_ctl->num_free_pages - 1];
+	lfc_ctl->num_free_pages--;
+
+	return result;
+}
+
+static bool
+freelist_is_empty(void)
+{
+	return lfc_ctl->num_free_pages == 0;
+}
+
 typedef struct
 {
 	TupleDesc	tupdesc;
@@ -1919,7 +2067,7 @@ neon_get_lfc_stats(PG_FUNCTION_ARGS)
 			break;
 		case 8:
 			key = "file_cache_chunk_size_pages";
-			value = lfc_blocks_per_chunk;
+			value = BLOCKS_PER_CHUNK;
 			break;
 		case 9:
 			key = "file_cache_chunks_pinned";
@@ -1990,7 +2138,6 @@ local_cache_pages(PG_FUNCTION_ARGS)

 	if (SRF_IS_FIRSTCALL())
 	{
-		HASH_SEQ_STATUS status;
 		FileCacheEntry *entry;
 		uint32		n_pages = 0;

@@ -2046,15 +2193,16 @@ local_cache_pages(PG_FUNCTION_ARGS)

 			if (LFC_ENABLED())
 			{
-				hash_seq_init(&status, lfc_hash);
-				while ((entry = hash_seq_search(&status)) != NULL)
+				uint32		num_buckets = file_cache_hash_get_num_buckets(lfc_hash);
+
+				for (uint32 pos = 0; pos < num_buckets; pos++)
 				{
-					/* Skip hole tags */
-					if (NInfoGetRelNumber(BufTagGetNRelFileInfo(entry->key)) != 0)
-					{
-						for (int i = 0; i < lfc_blocks_per_chunk; i++)
-							n_pages += GET_STATE(entry, i) == AVAILABLE;
-					}
+					entry = file_cache_hash_get_at_pos(lfc_hash, pos);
+					if (entry == NULL)
+						continue;
+
+					for (int i = 0; i < BLOCKS_PER_CHUNK; i++)
+						n_pages += GET_STATE(entry, i) == AVAILABLE;
 				}
 			}
 		}
@@ -2076,25 +2224,28 @@ local_cache_pages(PG_FUNCTION_ARGS)
 			 * in the fctx->record structure.
 			 */
 			uint32		n = 0;
+			uint32		num_buckets = file_cache_hash_get_num_buckets(lfc_hash);

-			hash_seq_init(&status, lfc_hash);
-			while ((entry = hash_seq_search(&status)) != NULL)
+			for (uint32 pos = 0; pos < num_buckets; pos++)
 			{
-				for (int i = 0; i < lfc_blocks_per_chunk; i++)
+				entry = file_cache_hash_get_at_pos(lfc_hash, pos);
+				if (entry == NULL)
+					continue;
+
+				for (int i = 0; i < BLOCKS_PER_CHUNK; i++)
 				{
-					if (NInfoGetRelNumber(BufTagGetNRelFileInfo(entry->key)) != 0)
+					const BufferTag *key = file_cache_hash_get_key_for_entry(lfc_hash, entry);
+
+					if (GET_STATE(entry, i) == AVAILABLE)
 					{
-						if (GET_STATE(entry, i) == AVAILABLE)
-						{
-							fctx->record[n].pageoffs = entry->offset * lfc_blocks_per_chunk + i;
-							fctx->record[n].relfilenode = NInfoGetRelNumber(BufTagGetNRelFileInfo(entry->key));
-							fctx->record[n].reltablespace = NInfoGetSpcOid(BufTagGetNRelFileInfo(entry->key));
-							fctx->record[n].reldatabase = NInfoGetDbOid(BufTagGetNRelFileInfo(entry->key));
-							fctx->record[n].forknum = entry->key.forkNum;
-							fctx->record[n].blocknum = entry->key.blockNum + i;
-							fctx->record[n].accesscount = entry->access_count;
-							n += 1;
-						}
+						fctx->record[n].pageoffs = entry->offset * BLOCKS_PER_CHUNK + i;
+						fctx->record[n].relfilenode = NInfoGetRelNumber(BufTagGetNRelFileInfo(*key));
+						fctx->record[n].reltablespace = NInfoGetSpcOid(BufTagGetNRelFileInfo(*key));
+						fctx->record[n].reldatabase = NInfoGetDbOid(BufTagGetNRelFileInfo(*key));
+						fctx->record[n].forknum = key->forkNum;
+						fctx->record[n].blocknum = key->blockNum + i;
+						fctx->record[n].accesscount = entry->access_count;
+						n += 1;
 					}
 				}
 			}
--- a/pgxn/neon/file_cache.h
+++ b/pgxn/neon/file_cache.h
@@ -26,9 +26,6 @@ typedef struct FileCacheState

 /* GUCs */
 extern bool lfc_store_prefetch_result;
-extern int	lfc_max_size;
-extern int	lfc_size_limit;
-extern char *lfc_path;

 /* functions for local file cache */
 extern void lfc_invalidate(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber nblocks);
--- a/pgxn/neon/libpagestore.c
+++ b/pgxn/neon/libpagestore.c
@@ -281,54 +281,6 @@ AssignPageserverConnstring(const char *newval, void *extra)
 	}
 }

-/* Return a copy of the whole shard map from shared memory */
-void
-get_shard_map(char ***connstrs_p, shardno_t *num_shards_p)
-{
-	uint64		begin_update_counter;
-	uint64		end_update_counter;
-	ShardMap   *shard_map = &pagestore_shared->shard_map;
-	shardno_t	num_shards;
-	char	   *buf;
-	char	  **connstrs;
-
-	buf = palloc(MAX_SHARDS*MAX_PAGESERVER_CONNSTRING_SIZE);
-	connstrs = palloc(sizeof(char *) * MAX_SHARDS);
-
-	/*
-	 * Postmaster can update the shared memory values concurrently, in which
-	 * case we would copy a garbled mix of the old and new values. We will
-	 * detect it because the counter's won't match, and retry. But it's
-	 * important that we don't do anything within the retry-loop that would
-	 * depend on the string having valid contents.
-	 */
-	do
-	{
-		char		*p;
-
-		begin_update_counter = pg_atomic_read_u64(&pagestore_shared->begin_update_counter);
-		end_update_counter = pg_atomic_read_u64(&pagestore_shared->end_update_counter);
-
-		num_shards = shard_map->num_shards;
-
-		p = buf;
-		for (int i = 0; i < Min(num_shards, MAX_SHARDS); i++)
-		{
-			strlcpy(p, shard_map->connstring[i], MAX_PAGESERVER_CONNSTRING_SIZE);
-			connstrs[i] = p;
-			p += MAX_PAGESERVER_CONNSTRING_SIZE;
-		}
-
-		pg_memory_barrier();
-	}
-	while (begin_update_counter != end_update_counter
-		   || begin_update_counter != pg_atomic_read_u64(&pagestore_shared->begin_update_counter)
-		   || end_update_counter != pg_atomic_read_u64(&pagestore_shared->end_update_counter));
-
-	*connstrs_p = connstrs;
-	*num_shards_p = num_shards;
-}
-
 /*
 * Get the current number of shards, and/or the connection string for a
 * particular shard from the shard map in shared memory.
--- a/pgxn/neon/neon.c
+++ b/pgxn/neon/neon.c
@@ -21,7 +21,6 @@
 #include "replication/logicallauncher.h"
 #include "replication/slot.h"
 #include "replication/walsender.h"
-#include "storage/ipc.h"
 #include "storage/proc.h"
 #include "funcapi.h"
 #include "access/htup_details.h"
@@ -31,7 +30,6 @@
 #include "utils/guc_tables.h"

 #include "communicator.h"
-#include "communicator_new.h"
 #include "extension_server.h"
 #include "file_cache.h"
 #include "neon.h"
@@ -49,7 +47,6 @@ PG_MODULE_MAGIC;
 void		_PG_init(void);


-bool neon_enable_new_communicator;
 static int  running_xacts_overflow_policy;
 static bool monitor_query_exec_time = false;

@@ -59,14 +56,11 @@ static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
 static void neon_ExecutorStart(QueryDesc *queryDesc, int eflags);
 static void neon_ExecutorEnd(QueryDesc *queryDesc);

+#if PG_MAJORVERSION_NUM >= 16
 static shmem_startup_hook_type prev_shmem_startup_hook;
-#if PG_VERSION_NUM>=150000
-static shmem_request_hook_type prev_shmem_request_hook;
-#endif

-static void neon_shmem_request(void);
 static void neon_shmem_startup_hook(void);
-
+#endif
 #if PG_MAJORVERSION_NUM >= 17
 uint32		WAIT_EVENT_NEON_LFC_MAINTENANCE;
 uint32		WAIT_EVENT_NEON_LFC_READ;
@@ -445,36 +439,17 @@ _PG_init(void)
 	 */
 #if PG_VERSION_NUM >= 160000
 	load_file("$libdir/neon_rmgr", false);
-#endif

 	prev_shmem_startup_hook = shmem_startup_hook;
 	shmem_startup_hook = neon_shmem_startup_hook;
-#if PG_VERSION_NUM>=150000
-	prev_shmem_request_hook = shmem_request_hook;
-	shmem_request_hook = neon_shmem_request;
-#else
-	neon_shmem_request();
 #endif

-	DefineCustomBoolVariable(
-							"neon.enable_new_communicator",
-							"Enables new communicator implementation",
-							NULL,
-							&neon_enable_new_communicator,
-							true,
-							PGC_POSTMASTER,
-							0,
-							NULL, NULL, NULL);
-
 	pg_init_libpagestore();
 	lfc_init();
 	pg_init_walproposer();
 	init_lwlsncache();

 	pg_init_communicator();
-	if (neon_enable_new_communicator)
-		pg_init_communicator_new();
-
 	Custom_XLogReaderRoutines = NeonOnDemandXLogReaderRoutines;

 	InitUnstableExtensionsSupport();
@@ -608,17 +583,7 @@ backpressure_throttling_time(PG_FUNCTION_ARGS)
 	PG_RETURN_UINT64(BackpressureThrottlingTime());
 }

-static void
-neon_shmem_request(void)
-{
-#if PG_VERSION_NUM>=150000
-	if (prev_shmem_request_hook)
-		prev_shmem_request_hook();
-#endif
-
-	communicator_new_shmem_request();
-}
-
+#if PG_MAJORVERSION_NUM >= 16
 static void
 neon_shmem_startup_hook(void)
 {
@@ -638,9 +603,8 @@ neon_shmem_startup_hook(void)
 	WAIT_EVENT_NEON_PS_READ = WaitEventExtensionNew("Neon/PS_ReadIO");
 	WAIT_EVENT_NEON_WAL_DL = WaitEventExtensionNew("Neon/WAL_Download");
 #endif
-
-	communicator_new_shmem_startup();
 }
+#endif

 /*
 * ExecutorStart hook: start up tracking if needed
--- a/pgxn/neon/neon.h
+++ b/pgxn/neon/neon.h
@@ -13,7 +13,6 @@
 #include "utils/wait_event.h"

 /* GUCs */
-extern bool neon_enable_new_communicator;
 extern char *neon_auth_token;
 extern char *neon_timeline;
 extern char *neon_tenant;
--- a/pgxn/neon/neon_pgversioncompat.h
+++ b/pgxn/neon/neon_pgversioncompat.h
@@ -9,10 +9,6 @@
 #include "fmgr.h"
 #include "storage/buf_internals.h"

-#if PG_MAJORVERSION_NUM < 16
-typedef PGAlignedBlock PGIOAlignedBlock;
-#endif
-
 #if PG_MAJORVERSION_NUM < 17
 #define NRelFileInfoBackendIsTemp(rinfo) (rinfo.backend != InvalidBackendId)
 #else
@@ -164,10 +160,6 @@ InitBufferTag(BufferTag *tag, const RelFileNode *rnode,
 #define AmAutoVacuumWorkerProcess() (IsAutoVacuumWorkerProcess())
 #endif

-#if PG_MAJORVERSION_NUM < 17
-#define	MyProcNumber (MyProc - &ProcGlobal->allProcs[0])
-#endif
-
 #if PG_MAJORVERSION_NUM < 15
 extern void InitMaterializedSRF(FunctionCallInfo fcinfo, bits32 flags);
 extern TimeLineID GetWALInsertionTimeLine(void);
--- a/pgxn/neon/pagestore_client.h
+++ b/pgxn/neon/pagestore_client.h
@@ -228,7 +228,6 @@ extern char *neon_tenant;
 extern int32 max_cluster_size;
 extern int  neon_protocol_version;

-extern void get_shard_map(char ***connstrs_p, shardno_t *num_shards_p);
 extern shardno_t get_shard_number(BufferTag* tag);

 extern const f_smgr *smgr_neon(ProcNumber backend, NRelFileInfo rinfo);
--- a/pgxn/neon/pagestore_smgr.c
+++ b/pgxn/neon/pagestore_smgr.c
@@ -62,7 +62,6 @@

 #include "bitmap.h"
 #include "communicator.h"
-#include "communicator_new.h"
 #include "file_cache.h"
 #include "neon.h"
 #include "neon_lwlsncache.h"
@@ -73,6 +72,10 @@
 #include "access/xlogrecovery.h"
 #endif

+#if PG_VERSION_NUM < 160000
+typedef PGAlignedBlock PGIOAlignedBlock;
+#endif
+
 /*
 * If DEBUG_COMPARE_LOCAL is defined, we pass through all the SMGR API
 * calls to md.c, and *also* do the calls to the Page Server. On every
@@ -94,7 +97,7 @@ static char *hexdump_page(char *page);
 		NInfoGetRelNumber(InfoFromSMgrRel(reln)) >= FirstNormalObjectId \
 )

-const int	SmgrTrace = DEBUG1;
+const int	SmgrTrace = DEBUG5;

 /* unlogged relation build states */
 typedef enum
@@ -776,15 +779,10 @@ neon_exists(SMgrRelation reln, ForkNumber forkNum)
 		return false;
 	}

-	if (neon_enable_new_communicator)
-		return communicator_new_rel_exists(InfoFromSMgrRel(reln), forkNum);
-	else
-	{
-		neon_get_request_lsns(InfoFromSMgrRel(reln), forkNum,
-							  REL_METADATA_PSEUDO_BLOCKNO, &request_lsns, 1);
+	neon_get_request_lsns(InfoFromSMgrRel(reln), forkNum,
+						  REL_METADATA_PSEUDO_BLOCKNO, &request_lsns, 1);

-		return communicator_exists(InfoFromSMgrRel(reln), forkNum, &request_lsns);
-	}
+	return communicator_exists(InfoFromSMgrRel(reln), forkNum, &request_lsns);
 }

 /*
@@ -822,40 +820,33 @@ neon_create(SMgrRelation reln, ForkNumber forkNum, bool isRedo)
 		 RelFileInfoFmt(InfoFromSMgrRel(reln)),
 		 forkNum);

-	if (neon_enable_new_communicator)
+	/*
+	 * Newly created relation is empty, remember that in the relsize cache.
+	 *
+	 * Note that in REDO, this is called to make sure the relation fork
+	 * exists, but it does not truncate the relation. So, we can only update
+	 * the relsize if it didn't exist before.
+	 *
+	 * Also, in redo, we must make sure to update the cached size of the
+	 * relation, as that is the primary source of truth for REDO's file length
+	 * considerations, and as file extension isn't (perfectly) logged, we need
+	 * to take care of that before we hit file size checks.
+	 *
+	 * FIXME: This is currently not just an optimization, but required for
+	 * correctness. Postgres can call smgrnblocks() on the newly-created
+	 * relation. Currently, we don't call SetLastWrittenLSN() when a new
+	 * relation created, so if we didn't remember the size in the relsize
+	 * cache, we might call smgrnblocks() on the newly-created relation before
+	 * the creation WAL record hass been received by the page server.
+	 */
+	if (isRedo)
 	{
-		communicator_new_rel_create(InfoFromSMgrRel(reln), forkNum);
+		update_cached_relsize(InfoFromSMgrRel(reln), forkNum, 0);
+		get_cached_relsize(InfoFromSMgrRel(reln), forkNum,
+						   &reln->smgr_cached_nblocks[forkNum]);
 	}
 	else
-	{
-		/*
-		 * Newly created relation is empty, remember that in the relsize cache.
-		 *
-		 * Note that in REDO, this is called to make sure the relation fork
-		 * exists, but it does not truncate the relation. So, we can only update
-		 * the relsize if it didn't exist before.
-		 *
-		 * Also, in redo, we must make sure to update the cached size of the
-		 * relation, as that is the primary source of truth for REDO's file length
-		 * considerations, and as file extension isn't (perfectly) logged, we need
-		 * to take care of that before we hit file size checks.
-		 *
-		 * FIXME: This is currently not just an optimization, but required for
-		 * correctness. Postgres can call smgrnblocks() on the newly-created
-		 * relation. Currently, we don't call SetLastWrittenLSN() when a new
-		 * relation created, so if we didn't remember the size in the relsize
-		 * cache, we might call smgrnblocks() on the newly-created relation before
-		 * the creation WAL record hass been received by the page server.
-		 */
-		if (isRedo)
-		{
-			update_cached_relsize(InfoFromSMgrRel(reln), forkNum, 0);
-			get_cached_relsize(InfoFromSMgrRel(reln), forkNum,
-							   &reln->smgr_cached_nblocks[forkNum]);
-		}
-		else
-			set_cached_relsize(InfoFromSMgrRel(reln), forkNum, 0);
-	}
+		set_cached_relsize(InfoFromSMgrRel(reln), forkNum, 0);

 #ifdef DEBUG_COMPARE_LOCAL
 	if (IS_LOCAL_REL(reln))
@@ -890,15 +881,9 @@ neon_unlink(NRelFileInfoBackend rinfo, ForkNumber forkNum, bool isRedo)
 	 * unlink, it won't do any harm if the file doesn't exist.
 	 */
 	mdunlink(rinfo, forkNum, isRedo);
-
 	if (!NRelFileInfoBackendIsTemp(rinfo))
 	{
-		if (neon_enable_new_communicator)
-		{
-			communicator_new_rel_unlink(InfoFromNInfoB(rinfo), forkNum);
-		}
-		else
-			forget_cached_relsize(InfoFromNInfoB(rinfo), forkNum);
+		forget_cached_relsize(InfoFromNInfoB(rinfo), forkNum);
 	}
 }

@@ -986,43 +971,34 @@ neon_extend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
 		 forkNum, blkno,
 		 (uint32) (lsn >> 32), (uint32) lsn);

-	if (neon_enable_new_communicator)
-	{
-		// FIXME: this can pass lsn == invalid. Is that ok?
-		communicator_new_rel_extend(InfoFromSMgrRel(reln), forkNum, blkno, (const void *) buffer, lsn);
-	}
-	else
-	{
-		lfc_write(InfoFromSMgrRel(reln), forkNum, blkno, buffer);
+	lfc_write(InfoFromSMgrRel(reln), forkNum, blkno, buffer);

 #ifdef DEBUG_COMPARE_LOCAL
-		if (IS_LOCAL_REL(reln))
-			mdextend(reln, forkNum, blkno, buffer, skipFsync);
+	if (IS_LOCAL_REL(reln))
+		mdextend(reln, forkNum, blkno, buffer, skipFsync);
 #endif

-		/*
-		 * smgr_extend is often called with an all-zeroes page, so
-		 * lsn==InvalidXLogRecPtr. An smgr_write() call will come for the buffer
-		 * later, after it has been initialized with the real page contents, and
-		 * it is eventually evicted from the buffer cache. But we need a valid LSN
-		 * to the relation metadata update now.
-		 */
-		if (lsn == InvalidXLogRecPtr)
-		{
-			lsn = GetXLogInsertRecPtr();
-			neon_set_lwlsn_block(lsn, InfoFromSMgrRel(reln), forkNum, blkno);
-		}
-		neon_set_lwlsn_relation(lsn, InfoFromSMgrRel(reln), forkNum);
+	/*
+	 * smgr_extend is often called with an all-zeroes page, so
+	 * lsn==InvalidXLogRecPtr. An smgr_write() call will come for the buffer
+	 * later, after it has been initialized with the real page contents, and
+	 * it is eventually evicted from the buffer cache. But we need a valid LSN
+	 * to the relation metadata update now.
+	 */
+	if (lsn == InvalidXLogRecPtr)
+	{
+		lsn = GetXLogInsertRecPtr();
+		neon_set_lwlsn_block(lsn, InfoFromSMgrRel(reln), forkNum, blkno);
 	}
+	neon_set_lwlsn_relation(lsn, InfoFromSMgrRel(reln), forkNum);
 }

 #if PG_MAJORVERSION_NUM >= 16
 static void
-neon_zeroextend(SMgrRelation reln, ForkNumber forkNum, BlockNumber start_block,
+neon_zeroextend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blocknum,
 				int nblocks, bool skipFsync)
 {
 	const PGIOAlignedBlock buffer = {0};
-	BlockNumber blocknum = start_block;
 	int			remblocks = nblocks;
 	XLogRecPtr	lsn = 0;

@@ -1117,15 +1093,8 @@ neon_zeroextend(SMgrRelation reln, ForkNumber forkNum, BlockNumber start_block,

 	Assert(lsn != 0);

-	if (neon_enable_new_communicator)
-	{
-		communicator_new_rel_zeroextend(InfoFromSMgrRel(reln), forkNum, start_block, nblocks, lsn);
-	}
-	else
-	{
-		neon_set_lwlsn_relation(lsn, InfoFromSMgrRel(reln), forkNum);
-		set_cached_relsize(InfoFromSMgrRel(reln), forkNum, blocknum);
-	}
+	neon_set_lwlsn_relation(lsn, InfoFromSMgrRel(reln), forkNum);
+	set_cached_relsize(InfoFromSMgrRel(reln), forkNum, blocknum);
 }
 #endif

@@ -1185,17 +1154,11 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 			neon_log(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
 	}

-	if (neon_enable_new_communicator)
-	{
-		communicator_new_prefetch_register_bufferv(InfoFromSMgrRel(reln), forknum, blocknum, nblocks);
-		return false;
-	}
-
 	tag.spcOid = reln->smgr_rlocator.locator.spcOid;
 	tag.dbOid = reln->smgr_rlocator.locator.dbOid;
 	tag.relNumber = reln->smgr_rlocator.locator.relNumber;
 	tag.forkNum = forknum;
-	
+
 	while (nblocks > 0)
 	{
 		int		iterblocks = Min(nblocks, PG_IOV_MAX);
@@ -1217,8 +1180,7 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 		blocknum += iterblocks;
 	}

-	if (!neon_enable_new_communicator)
-		communicator_prefetch_pump_state();
+	communicator_prefetch_pump_state();

 	return false;
 }
@@ -1255,13 +1217,9 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)

 	CopyNRelFileInfoToBufTag(tag, InfoFromSMgrRel(reln));

-	if (neon_enable_new_communicator)
-		communicator_new_prefetch_register_bufferv(InfoFromSMgrRel(reln), forknum, blocknum, 1);
-	else
-		communicator_prefetch_register_bufferv(tag, NULL, 1, NULL);
+	communicator_prefetch_register_bufferv(tag, NULL, 1, NULL);

-	if (!neon_enable_new_communicator)
-		communicator_prefetch_pump_state();
+	communicator_prefetch_pump_state();

 	return false;
 }
@@ -1305,8 +1263,7 @@ neon_writeback(SMgrRelation reln, ForkNumber forknum,
 	 */
 	neon_log(SmgrTrace, "writeback noop");

-	if (!neon_enable_new_communicator)
-		communicator_prefetch_pump_state();
+	communicator_prefetch_pump_state();

 #ifdef DEBUG_COMPARE_LOCAL
 	if (IS_LOCAL_REL(reln))
@@ -1322,14 +1279,7 @@ void
 neon_read_at_lsn(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 				 neon_request_lsns request_lsns, void *buffer)
 {
-	if (neon_enable_new_communicator)
-	{
-		// FIXME: request_lsns is ignored. That affects the neon_test_utils callers.
-		// Add the capability to specify the LSNs explicitly, for the sake of neon_test_utils ?
-		communicator_new_read_at_lsnv(rinfo, forkNum, blkno, &buffer, 1);
-	}
-	else
-		communicator_read_at_lsnv(rinfo, forkNum, blkno, &request_lsns, &buffer, 1, NULL);
+	communicator_read_at_lsnv(rinfo, forkNum, blkno, &request_lsns, &buffer, 1, NULL);
 }

 #ifdef DEBUG_COMPARE_LOCAL
@@ -1457,49 +1407,41 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer
 			neon_log(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
 	}

-	if (neon_enable_new_communicator)
+	/* Try to read PS results if they are available */
+	communicator_prefetch_pump_state();
+
+	neon_get_request_lsns(InfoFromSMgrRel(reln), forkNum, blkno, &request_lsns, 1);
+
+	present = 0;
+	bufferp = buffer;
+	if (communicator_prefetch_lookupv(InfoFromSMgrRel(reln), forkNum, blkno, &request_lsns, 1, &bufferp, &present))
 	{
-		communicator_new_read_at_lsnv(InfoFromSMgrRel(reln), forkNum, blkno,
-									  (void *) &buffer, 1);
+		/* Prefetch hit */
+#ifdef DEBUG_COMPARE_LOCAL
+		compare_with_local(reln, forkNum, blkno, buffer, request_lsns.request_lsn);
+#else
+		return;
+#endif
 	}
-	else
+
+	/* Try to read from local file cache */
+	if (lfc_read(InfoFromSMgrRel(reln), forkNum, blkno, buffer))
 	{
-		/* Try to read PS results if they are available */
-		communicator_prefetch_pump_state();
-
-		neon_get_request_lsns(InfoFromSMgrRel(reln), forkNum, blkno, &request_lsns, 1);
-
-		present = 0;
-		bufferp = buffer;
-		if (communicator_prefetch_lookupv(InfoFromSMgrRel(reln), forkNum, blkno, &request_lsns, 1, &bufferp, &present))
-		{
-			/* Prefetch hit */
+		MyNeonCounters->file_cache_hits_total++;
 #ifdef DEBUG_COMPARE_LOCAL
-			compare_with_local(reln, forkNum, blkno, buffer, request_lsns.request_lsn);
+		compare_with_local(reln, forkNum, blkno, buffer, request_lsns.request_lsn);
 #else
-			return;
+		return;
 #endif
-		}
-
-		/* Try to read from local file cache */
-		if (lfc_read(InfoFromSMgrRel(reln), forkNum, blkno, buffer))
-		{
-			MyNeonCounters->file_cache_hits_total++;
-#ifdef DEBUG_COMPARE_LOCAL
-			compare_with_local(reln, forkNum, blkno, buffer, request_lsns.request_lsn);
-#else
-			return;
-#endif
-		}
-
-		neon_read_at_lsn(InfoFromSMgrRel(reln), forkNum, blkno, request_lsns, buffer);
-
-		/*
-		 * Try to receive prefetch results once again just to make sure we don't leave the smgr code while the OS might still have buffered bytes.
-		 */
-		communicator_prefetch_pump_state();
 	}

+	neon_read_at_lsn(InfoFromSMgrRel(reln), forkNum, blkno, request_lsns, buffer);
+
+	/*
+	 * Try to receive prefetch results once again just to make sure we don't leave the smgr code while the OS might still have buffered bytes.
+	 */
+	communicator_prefetch_pump_state();
+
 #ifdef DEBUG_COMPARE_LOCAL
 	compare_with_local(reln, forkNum, blkno, buffer, request_lsns.request_lsn);
 #endif
@@ -1563,57 +1505,48 @@ neon_readv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 				 nblocks, PG_IOV_MAX);

 	/* Try to read PS results if they are available */
-	if (!neon_enable_new_communicator)
-		communicator_prefetch_pump_state();
+	communicator_prefetch_pump_state();
+
+	neon_get_request_lsns(InfoFromSMgrRel(reln), forknum, blocknum,
+						  request_lsns, nblocks);

 	memset(read_pages, 0, sizeof(read_pages));

-	if (neon_enable_new_communicator)
-	{
-		communicator_new_read_at_lsnv(InfoFromSMgrRel(reln), forknum, blocknum,
-									  buffers, nblocks);
-	}
-	else
-	{
-		neon_get_request_lsns(InfoFromSMgrRel(reln), forknum, blocknum,
-							  request_lsns, nblocks);
-		
-		prefetch_result = communicator_prefetch_lookupv(InfoFromSMgrRel(reln), forknum,
-														blocknum, request_lsns, nblocks,
-														buffers, read_pages);
+	prefetch_result = communicator_prefetch_lookupv(InfoFromSMgrRel(reln), forknum,
+													blocknum, request_lsns, nblocks,
+													buffers, read_pages);

 #ifdef DEBUG_COMPARE_LOCAL
-		compare_with_localv(reln, forknum, blocknum, buffers, nblocks, request_lsns, read_pages);
-		memset(read_pages, 0, sizeof(read_pages));
+	compare_with_localv(reln, forknum, blocknum, buffers, nblocks, request_lsns, read_pages);
+	memset(read_pages, 0, sizeof(read_pages));
 #else
-		if (prefetch_result == nblocks)
-			return;
+	if (prefetch_result == nblocks)
+		return;
 #endif

-		/* Try to read from local file cache */
-		lfc_result = lfc_readv_select(InfoFromSMgrRel(reln), forknum, blocknum, buffers,
-									  nblocks, read_pages);
+	/* Try to read from local file cache */
+	lfc_result = lfc_readv_select(InfoFromSMgrRel(reln), forknum, blocknum, buffers,
+								  nblocks, read_pages);

-		if (lfc_result > 0)
-			MyNeonCounters->file_cache_hits_total += lfc_result;
+	if (lfc_result > 0)
+		MyNeonCounters->file_cache_hits_total += lfc_result;

 #ifdef DEBUG_COMPARE_LOCAL
-		compare_with_localv(reln, forknum, blocknum, buffers, nblocks, request_lsns, read_pages);
-		memset(read_pages, 0, sizeof(read_pages));
+	compare_with_localv(reln, forknum, blocknum, buffers, nblocks, request_lsns, read_pages);
+	memset(read_pages, 0, sizeof(read_pages));
 #else
-		/* Read all blocks from LFC, so we're done */
-		if (prefetch_result + lfc_result == nblocks)
-			return;
+	/* Read all blocks from LFC, so we're done */
+	if (prefetch_result + lfc_result == nblocks)
+		return;
 #endif

-		communicator_read_at_lsnv(InfoFromSMgrRel(reln), forknum, blocknum, request_lsns,
-								  buffers, nblocks, read_pages);
+	communicator_read_at_lsnv(InfoFromSMgrRel(reln), forknum, blocknum, request_lsns,
+							  buffers, nblocks, read_pages);

-		/*
-		 * Try to receive prefetch results once again just to make sure we don't leave the smgr code while the OS might still have buffered bytes.
-		 */
-		communicator_prefetch_pump_state();
-	}
+	/*
+	 * Try to receive prefetch results once again just to make sure we don't leave the smgr code while the OS might still have buffered bytes.
+	 */
+	communicator_prefetch_pump_state();

 #ifdef DEBUG_COMPARE_LOCAL
 	memset(read_pages, 0xFF, sizeof(read_pages));
@@ -1719,16 +1652,9 @@ neon_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const vo
 		 forknum, blocknum,
 		 (uint32) (lsn >> 32), (uint32) lsn);

-	if (neon_enable_new_communicator)
-	{
-		communicator_new_write_page(InfoFromSMgrRel(reln), forknum, blocknum, buffer, lsn);
-	}
-	else
-	{
-		lfc_write(InfoFromSMgrRel(reln), forknum, blocknum, buffer);
+	lfc_write(InfoFromSMgrRel(reln), forknum, blocknum, buffer);

-		communicator_prefetch_pump_state();
-	}
+	communicator_prefetch_pump_state();

 #ifdef DEBUG_COMPARE_LOCAL
 	if (IS_LOCAL_REL(reln))
@@ -1790,21 +1716,9 @@ neon_writev(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,

 	neon_wallog_pagev(reln, forknum, blkno, nblocks, (const char **) buffers, false);

-	if (neon_enable_new_communicator)
-	{
-		for (int i = 0; i < nblocks; i++)
-		{
-			XLogRecPtr lsn = PageGetLSN((Page) buffers[i]);
+	lfc_writev(InfoFromSMgrRel(reln), forknum, blkno, buffers, nblocks);

-			communicator_new_write_page(InfoFromSMgrRel(reln), forknum, blkno + i, buffers[i], lsn);
-		}
-	}
-	else
-	{
-		lfc_writev(InfoFromSMgrRel(reln), forknum, blkno, buffers, nblocks);
-
-		communicator_prefetch_pump_state();
-	}
+	communicator_prefetch_pump_state();

 #ifdef DEBUG_COMPARE_LOCAL
 	if (IS_LOCAL_REL(reln))
@@ -1844,26 +1758,19 @@ neon_nblocks(SMgrRelation reln, ForkNumber forknum)
 			neon_log(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
 	}

-	if (neon_enable_new_communicator)
+	if (get_cached_relsize(InfoFromSMgrRel(reln), forknum, &n_blocks))
 	{
-		n_blocks = communicator_new_rel_nblocks(InfoFromSMgrRel(reln), forknum);
+		neon_log(SmgrTrace, "cached nblocks for %u/%u/%u.%u: %u blocks",
+			 RelFileInfoFmt(InfoFromSMgrRel(reln)),
+			 forknum, n_blocks);
+		return n_blocks;
 	}
-	else
-	{
-		if (get_cached_relsize(InfoFromSMgrRel(reln), forknum, &n_blocks))
-		{
-			neon_log(SmgrTrace, "cached nblocks for %u/%u/%u.%u: %u blocks",
-					 RelFileInfoFmt(InfoFromSMgrRel(reln)),
-					 forknum, n_blocks);
-			return n_blocks;
-		}

-		neon_get_request_lsns(InfoFromSMgrRel(reln), forknum,
-							  REL_METADATA_PSEUDO_BLOCKNO, &request_lsns, 1);
+	neon_get_request_lsns(InfoFromSMgrRel(reln), forknum,
+						  REL_METADATA_PSEUDO_BLOCKNO, &request_lsns, 1);

-		n_blocks = communicator_nblocks(InfoFromSMgrRel(reln), forknum, &request_lsns);
-		update_cached_relsize(InfoFromSMgrRel(reln), forknum, n_blocks);
-	}
+	n_blocks = communicator_nblocks(InfoFromSMgrRel(reln), forknum, &request_lsns);
+	update_cached_relsize(InfoFromSMgrRel(reln), forknum, n_blocks);

 	neon_log(SmgrTrace, "neon_nblocks: rel %u/%u/%u fork %u (request LSN %X/%08X): %u blocks",
 			 RelFileInfoFmt(InfoFromSMgrRel(reln)),
@@ -1884,17 +1791,10 @@ neon_dbsize(Oid dbNode)
 	neon_request_lsns request_lsns;
 	NRelFileInfo dummy_node = {0};

-	if (neon_enable_new_communicator)
-	{
-		db_size = communicator_new_dbsize(dbNode);
-	}
-	else
-	{
-		neon_get_request_lsns(dummy_node, MAIN_FORKNUM,
-							  REL_METADATA_PSEUDO_BLOCKNO, &request_lsns, 1);
+	neon_get_request_lsns(dummy_node, MAIN_FORKNUM,
+						  REL_METADATA_PSEUDO_BLOCKNO, &request_lsns, 1);

-		db_size = communicator_dbsize(dbNode, &request_lsns);
-	}
+	db_size = communicator_dbsize(dbNode, &request_lsns);

 	neon_log(SmgrTrace, "neon_dbsize: db %u (request LSN %X/%08X): %ld bytes",
 			 dbNode, LSN_FORMAT_ARGS(request_lsns.effective_request_lsn), db_size);
@@ -1908,6 +1808,8 @@ neon_dbsize(Oid dbNode)
 static void
 neon_truncate(SMgrRelation reln, ForkNumber forknum, BlockNumber old_blocks, BlockNumber nblocks)
 {
+	XLogRecPtr	lsn;
+
 	switch (reln->smgr_relpersistence)
 	{
 		case 0:
@@ -1931,43 +1833,34 @@ neon_truncate(SMgrRelation reln, ForkNumber forknum, BlockNumber old_blocks, Blo
 			neon_log(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
 	}

-	if (neon_enable_new_communicator)
-	{
-		communicator_new_rel_truncate(InfoFromSMgrRel(reln), forknum, nblocks);
-	}
-	else
-	{
-		XLogRecPtr	lsn;
+	set_cached_relsize(InfoFromSMgrRel(reln), forknum, nblocks);

-		set_cached_relsize(InfoFromSMgrRel(reln), forknum, nblocks);
+	/*
+	 * Truncating a relation drops all its buffers from the buffer cache
+	 * without calling smgrwrite() on them. But we must account for that in
+	 * our tracking of last-written-LSN all the same: any future smgrnblocks()
+	 * request must return the new size after the truncation. We don't know
+	 * what the LSN of the truncation record was, so be conservative and use
+	 * the most recently inserted WAL record's LSN.
+	 */
+	lsn = GetXLogInsertRecPtr();
+	lsn = nm_adjust_lsn(lsn);

-		/*
-		 * Truncating a relation drops all its buffers from the buffer cache
-		 * without calling smgrwrite() on them. But we must account for that in
-		 * our tracking of last-written-LSN all the same: any future smgrnblocks()
-		 * request must return the new size after the truncation. We don't know
-		 * what the LSN of the truncation record was, so be conservative and use
-		 * the most recently inserted WAL record's LSN.
-		 */
-		lsn = GetXLogInsertRecPtr();
-		lsn = nm_adjust_lsn(lsn);
+	/*
+	 * Flush it, too. We don't actually care about it here, but let's uphold
+	 * the invariant that last-written LSN <= flush LSN.
+	 */
+	XLogFlush(lsn);

-		/*
-		 * Flush it, too. We don't actually care about it here, but let's uphold
-		 * the invariant that last-written LSN <= flush LSN.
-		 */
-		XLogFlush(lsn);
-
-		/*
-		 * Truncate may affect several chunks of relations. So we should either
-		 * update last written LSN for all of them, or update LSN for "dummy"
-		 * metadata block. Second approach seems more efficient. If the relation
-		 * is extended again later, the extension will update the last-written LSN
-		 * for the extended pages, so there's no harm in leaving behind obsolete
-		 * entries for the truncated chunks.
-		 */
-		neon_set_lwlsn_relation(lsn, InfoFromSMgrRel(reln), forknum);
-	}
+	/*
+	 * Truncate may affect several chunks of relations. So we should either
+	 * update last written LSN for all of them, or update LSN for "dummy"
+	 * metadata block. Second approach seems more efficient. If the relation
+	 * is extended again later, the extension will update the last-written LSN
+	 * for the extended pages, so there's no harm in leaving behind obsolete
+	 * entries for the truncated chunks.
+	 */
+	neon_set_lwlsn_relation(lsn, InfoFromSMgrRel(reln), forknum);

 #ifdef DEBUG_COMPARE_LOCAL
 	if (IS_LOCAL_REL(reln))
@@ -2009,8 +1902,7 @@ neon_immedsync(SMgrRelation reln, ForkNumber forknum)

 	neon_log(SmgrTrace, "[NEON_SMGR] immedsync noop");

-	if (!neon_enable_new_communicator)
-		communicator_prefetch_pump_state();
+	communicator_prefetch_pump_state();

 #ifdef DEBUG_COMPARE_LOCAL
 	if (IS_LOCAL_REL(reln))
@@ -2280,10 +2172,7 @@ neon_read_slru_segment(SMgrRelation reln, const char* path, int segno, void* buf
 	request_lsns.not_modified_since = not_modified_since;
 	request_lsns.effective_request_lsn = request_lsn;

-	if (neon_enable_new_communicator)
-		n_blocks = communicator_new_read_slru_segment(kind, segno, buffer);
-	else
-		n_blocks = communicator_read_slru_segment(kind, segno, &request_lsns, buffer);
+	n_blocks = communicator_read_slru_segment(kind, segno, &request_lsns, buffer);

 	return n_blocks;
 }
@@ -2320,8 +2209,7 @@ AtEOXact_neon(XactEvent event, void *arg)
 			}
 			break;
 	}
-	if (!neon_enable_new_communicator)
-		communicator_reconfigure_timeout_if_needed();
+	communicator_reconfigure_timeout_if_needed();
 }

 static const struct f_smgr neon_smgr =
@@ -2379,10 +2267,7 @@ smgr_init_neon(void)

 	smgr_init_standard();
 	neon_init();
-	if (neon_enable_new_communicator)
-		communicator_new_init();
-	else
-		communicator_init();
+	communicator_init();
 }


@@ -2394,12 +2279,6 @@ neon_extend_rel_size(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,
 	/* This is only used in WAL replay */
 	Assert(RecoveryInProgress());

-	if (neon_enable_new_communicator)
-	{
-		// FIXME: broken, but this is only used in replica
-		elog(ERROR, "not implemented yet");
-	}
-
 	/* Extend the relation if we know its size */
 	if (get_cached_relsize(rinfo, forknum, &relsize))
 	{
--- a/storage_controller/migrations/2025-06-07-043910_pageserver_grpc_addr/down.sql
+++ b/storage_controller/migrations/2025-06-07-043910_pageserver_grpc_addr/down.sql
@@ -1 +0,0 @@
-ALTER TABLE nodes DROP listen_grpc_addr, listen_grpc_port;
--- a/storage_controller/migrations/2025-06-07-043910_pageserver_grpc_addr/up.sql
+++ b/storage_controller/migrations/2025-06-07-043910_pageserver_grpc_addr/up.sql
@@ -1 +0,0 @@
-ALTER TABLE nodes ADD listen_grpc_addr VARCHAR NULL, ADD listen_grpc_port INTEGER NULL;
--- a/storage_controller/src/compute_hook.rs
+++ b/storage_controller/src/compute_hook.rs
@@ -5,7 +5,7 @@ use std::sync::Arc;
 use std::time::Duration;

 use anyhow::Context;
-use control_plane::endpoint::{ComputeControlPlane, EndpointStatus, PageserverProtocol};
+use control_plane::endpoint::{ComputeControlPlane, EndpointStatus};
 use control_plane::local_env::LocalEnv;
 use futures::StreamExt;
 use hyper::StatusCode;
@@ -428,8 +428,7 @@ impl ComputeHook {
                    .expect("Unknown pageserver");
                let (pg_host, pg_port) = parse_host_port(&ps_conf.listen_pg_addr)
                    .expect("Unable to parse listen_pg_addr");
-                // TODO: plumb gRPC through storage-controller.
-                (PageserverProtocol::Libpq, pg_host, pg_port.unwrap_or(5432))
+                (pg_host, pg_port.unwrap_or(5432))
            })
            .collect::<Vec<_>>();

--- a/storage_controller/src/node.rs
+++ b/storage_controller/src/node.rs
@@ -37,9 +37,6 @@ pub(crate) struct Node {
    listen_pg_addr: String,
    listen_pg_port: u16,

-    listen_grpc_addr: Option<String>,
-    listen_grpc_port: Option<u16>,
-
    availability_zone_id: AvailabilityZone,

    // Flag from storcon's config to use https for pageserver admin API.
@@ -102,8 +99,8 @@ impl Node {
        self.id == register_req.node_id
            && self.listen_http_addr == register_req.listen_http_addr
            && self.listen_http_port == register_req.listen_http_port
-            // Note: HTTPS and gRPC ports/addresses may change, to allow for migrations. See
-            // [`Self::need_update`] for more details.
+            // Note: listen_https_port may change. See [`Self::need_update`] for mode details.
+            // && self.listen_https_port == register_req.listen_https_port
            && self.listen_pg_addr == register_req.listen_pg_addr
            && self.listen_pg_port == register_req.listen_pg_port
            && self.availability_zone_id == register_req.availability_zone_id
@@ -111,10 +108,9 @@ impl Node {

    // Do we need to update an existing record in DB on this registration request?
    pub(crate) fn need_update(&self, register_req: &NodeRegisterRequest) -> bool {
-        // These are checked here, since they may change before we're fully migrated.
+        // listen_https_port is checked here because it may change during migration to https.
+        // After migration, this check may be moved to registration_match.
        self.listen_https_port != register_req.listen_https_port
-            || self.listen_grpc_addr != register_req.listen_grpc_addr
-            || self.listen_grpc_port != register_req.listen_grpc_port
    }

    /// For a shard located on this node, populate a response object
@@ -128,8 +124,6 @@ impl Node {
            listen_https_port: self.listen_https_port,
            listen_pg_addr: self.listen_pg_addr.clone(),
            listen_pg_port: self.listen_pg_port,
-            listen_grpc_addr: self.listen_grpc_addr.clone(),
-            listen_grpc_port: self.listen_grpc_port,
        }
    }

@@ -216,8 +210,6 @@ impl Node {
        listen_https_port: Option<u16>,
        listen_pg_addr: String,
        listen_pg_port: u16,
-        listen_grpc_addr: Option<String>,
-        listen_grpc_port: Option<u16>,
        availability_zone_id: AvailabilityZone,
        use_https: bool,
    ) -> anyhow::Result<Self> {
@@ -235,8 +227,6 @@ impl Node {
            listen_https_port,
            listen_pg_addr,
            listen_pg_port,
-            listen_grpc_addr,
-            listen_grpc_port,
            scheduling: NodeSchedulingPolicy::Active,
            availability: NodeAvailability::Offline,
            availability_zone_id,
@@ -254,8 +244,6 @@ impl Node {
            listen_https_port: self.listen_https_port.map(|x| x as i32),
            listen_pg_addr: self.listen_pg_addr.clone(),
            listen_pg_port: self.listen_pg_port as i32,
-            listen_grpc_addr: self.listen_grpc_addr.clone(),
-            listen_grpc_port: self.listen_grpc_port.map(|x| x as i32),
            availability_zone_id: self.availability_zone_id.0.clone(),
        }
    }
@@ -280,8 +268,6 @@ impl Node {
            listen_https_port: np.listen_https_port.map(|x| x as u16),
            listen_pg_addr: np.listen_pg_addr,
            listen_pg_port: np.listen_pg_port as u16,
-            listen_grpc_addr: np.listen_grpc_addr,
-            listen_grpc_port: np.listen_grpc_port.map(|x| x as u16),
            availability_zone_id: AvailabilityZone(np.availability_zone_id),
            use_https,
            cancel: CancellationToken::new(),
@@ -371,8 +357,6 @@ impl Node {
            listen_https_port: self.listen_https_port,
            listen_pg_addr: self.listen_pg_addr.clone(),
            listen_pg_port: self.listen_pg_port,
-            listen_grpc_addr: self.listen_grpc_addr.clone(),
-            listen_grpc_port: self.listen_grpc_port,
        }
    }
 }
--- a/storage_controller/src/persistence.rs
+++ b/storage_controller/src/persistence.rs
@@ -2048,8 +2048,6 @@ pub(crate) struct NodePersistence {
    pub(crate) listen_pg_port: i32,
    pub(crate) availability_zone_id: String,
    pub(crate) listen_https_port: Option<i32>,
-    pub(crate) listen_grpc_addr: Option<String>,
-    pub(crate) listen_grpc_port: Option<i32>,
 }

 /// Tenant metadata health status that are stored durably.
--- a/storage_controller/src/scheduler.rs
+++ b/storage_controller/src/scheduler.rs
@@ -945,8 +945,6 @@ pub(crate) mod test_utils {
                        None,
                        format!("pghost-{i}"),
                        5432 + i as u16,
-                        None,
-                        None,
                        az_iter
                            .next()
                            .cloned()
--- a/storage_controller/src/schema.rs
+++ b/storage_controller/src/schema.rs
@@ -33,8 +33,6 @@ diesel::table! {
        listen_pg_port -> Int4,
        availability_zone_id -> Varchar,
        listen_https_port -> Nullable<Int4>,
-        listen_grpc_addr -> Nullable<Varchar>,
-        listen_grpc_port -> Nullable<Int4>,
    }
 }

--- a/storage_controller/src/service.rs
+++ b/storage_controller/src/service.rs
@@ -1681,8 +1681,6 @@ impl Service {
                    None,
                    "".to_string(),
                    123,
-                    None,
-                    None,
                    AvailabilityZone("test_az".to_string()),
                    false,
                )
@@ -7217,8 +7215,6 @@ impl Service {
            register_req.listen_https_port,
            register_req.listen_pg_addr,
            register_req.listen_pg_port,
-            register_req.listen_grpc_addr,
-            register_req.listen_grpc_port,
            register_req.availability_zone_id.clone(),
            self.config.use_https_pageserver_api,
        );
--- a/test_runner/fixtures/neon_cli.py
+++ b/test_runner/fixtures/neon_cli.py
@@ -564,7 +564,6 @@ class NeonLocalCli(AbstractNeonCli):
        basebackup_request_tries: int | None = None,
        timeout: str | None = None,
        env: dict[str, str] | None = None,
-        grpc: bool = False,
    ) -> subprocess.CompletedProcess[str]:
        args = [
            "endpoint",
@@ -584,8 +583,6 @@ class NeonLocalCli(AbstractNeonCli):
            args.append(endpoint_id)
        if pageserver_id is not None:
            args.extend(["--pageserver-id", str(pageserver_id)])
-        if grpc:
-            args.extend(["--grpc"])
        if allow_multiple:
            args.extend(["--allow-multiple"])
        if create_test_user:
@@ -602,7 +599,6 @@ class NeonLocalCli(AbstractNeonCli):
        endpoint_id: str,
        tenant_id: TenantId | None = None,
        pageserver_id: int | None = None,
-        grpc: bool = False,
        safekeepers: list[int] | None = None,
        check_return_code=True,
    ) -> subprocess.CompletedProcess[str]:
@@ -611,8 +607,6 @@ class NeonLocalCli(AbstractNeonCli):
            args.extend(["--tenant-id", str(tenant_id)])
        if pageserver_id is not None:
            args.extend(["--pageserver-id", str(pageserver_id)])
-        if grpc:
-            args.extend(["--grpc"])
        if safekeepers is not None:
            args.extend(["--safekeepers", (",".join(map(str, safekeepers)))])
        return self.raw_cli(args, check_return_code=check_return_code)
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -1228,7 +1228,6 @@ class NeonEnv:
        ):
            pageserver_port = PageserverPort(
                pg=self.port_distributor.get_port(),
-                grpc=self.port_distributor.get_port(),
                http=self.port_distributor.get_port(),
                https=self.port_distributor.get_port() if config.use_https_pageserver_api else None,
            )
@@ -1244,14 +1243,13 @@ class NeonEnv:
            ps_cfg: dict[str, Any] = {
                "id": ps_id,
                "listen_pg_addr": f"localhost:{pageserver_port.pg}",
-                "listen_grpc_addr": f"localhost:{pageserver_port.grpc}",
                "listen_http_addr": f"localhost:{pageserver_port.http}",
                "listen_https_addr": f"localhost:{pageserver_port.https}"
                if config.use_https_pageserver_api
                else None,
                "pg_auth_type": pg_auth_type,
-                "grpc_auth_type": grpc_auth_type,
                "http_auth_type": http_auth_type,
+                "grpc_auth_type": grpc_auth_type,
                "availability_zone": availability_zone,
                # Disable pageserver disk syncs in tests: when running tests concurrently, this avoids
                # the pageserver taking a long time to start up due to syncfs flushing other tests' data
@@ -1764,7 +1762,6 @@ def neon_env_builder(
@dataclass
 class PageserverPort:
    pg: int
-    grpc: int
    http: int
    https: int | None = None

@@ -4176,7 +4173,6 @@ class Endpoint(PgProtocol, LogUtils):
        pageserver_id: int | None = None,
        allow_multiple: bool = False,
        update_catalog: bool = False,
-        grpc: bool = False,
    ) -> Self:
        """
        Create a new Postgres endpoint.
@@ -4210,12 +4206,7 @@ class Endpoint(PgProtocol, LogUtils):

        # set small 'max_replication_write_lag' to enable backpressure
        # and make tests more stable.
-        config_lines += ["max_replication_write_lag=15MB"]
-
-        # If gRPC is enabled, use the new communicator too.
-        #
-        # NB: the communicator is enabled by default, so force it to false otherwise.
-        config_lines += [f"neon.enable_new_communicator={str(grpc).lower()}"]
+        config_lines = ["max_replication_write_lag=15MB"] + config_lines

        # Delete file cache if it exists (and we're recreating the endpoint)
        if USE_LFC:
@@ -4268,7 +4259,6 @@ class Endpoint(PgProtocol, LogUtils):
        basebackup_request_tries: int | None = None,
        timeout: str | None = None,
        env: dict[str, str] | None = None,
-        grpc: bool = False,
    ) -> Self:
        """
        Start the Postgres instance.
@@ -4293,7 +4283,6 @@ class Endpoint(PgProtocol, LogUtils):
            basebackup_request_tries=basebackup_request_tries,
            timeout=timeout,
            env=env,
-            grpc=grpc,
        )
        self._running.release(1)
        self.log_config_value("shared_buffers")
@@ -4364,14 +4353,14 @@ class Endpoint(PgProtocol, LogUtils):
    def is_running(self):
        return self._running._value > 0

-    def reconfigure(self, pageserver_id: int | None = None, grpc: bool = False, safekeepers: list[int] | None = None):
+    def reconfigure(self, pageserver_id: int | None = None, safekeepers: list[int] | None = None):
        assert self.endpoint_id is not None
        # If `safekeepers` is not None, they are remember them as active and use
        # in the following commands.
        if safekeepers is not None:
            self.active_safekeepers = safekeepers
        self.env.neon_cli.endpoint_reconfigure(
-            self.endpoint_id, self.tenant_id, pageserver_id, grpc, self.active_safekeepers
+            self.endpoint_id, self.tenant_id, pageserver_id, self.active_safekeepers
        )

    def respec(self, **kwargs: Any) -> None:
@@ -4506,7 +4495,6 @@ class Endpoint(PgProtocol, LogUtils):
        pageserver_id: int | None = None,
        allow_multiple: bool = False,
        basebackup_request_tries: int | None = None,
-        grpc: bool = False,
    ) -> Self:
        """
        Create an endpoint, apply config, and start Postgres.
@@ -4521,13 +4509,11 @@ class Endpoint(PgProtocol, LogUtils):
            lsn=lsn,
            pageserver_id=pageserver_id,
            allow_multiple=allow_multiple,
-            grpc=grpc,
        ).start(
            remote_ext_base_url=remote_ext_base_url,
            pageserver_id=pageserver_id,
            allow_multiple=allow_multiple,
            basebackup_request_tries=basebackup_request_tries,
-            grpc=grpc,
        )

        return self
@@ -4611,7 +4597,6 @@ class EndpointFactory:
        remote_ext_base_url: str | None = None,
        pageserver_id: int | None = None,
        basebackup_request_tries: int | None = None,
-        grpc: bool = False,
    ) -> Endpoint:
        ep = Endpoint(
            self.env,
@@ -4632,7 +4617,6 @@ class EndpointFactory:
            remote_ext_base_url=remote_ext_base_url,
            pageserver_id=pageserver_id,
            basebackup_request_tries=basebackup_request_tries,
-            grpc=grpc,
        )

    def create(
@@ -4645,7 +4629,6 @@ class EndpointFactory:
        config_lines: list[str] | None = None,
        pageserver_id: int | None = None,
        update_catalog: bool = False,
-        grpc: bool = False,
    ) -> Endpoint:
        ep = Endpoint(
            self.env,
@@ -4668,7 +4651,6 @@ class EndpointFactory:
            config_lines=config_lines,
            pageserver_id=pageserver_id,
            update_catalog=update_catalog,
-            grpc=grpc,
        )

    def stop_all(self, fail_on_error=True) -> Self:
--- a/test_runner/regress/test_normal_work.py
+++ b/test_runner/regress/test_normal_work.py
@@ -17,7 +17,7 @@ def check_tenant(
    config_lines = [
        f"neon.safekeeper_proto_version = {safekeeper_proto_version}",
    ]
-    endpoint = env.endpoints.create_start("main", tenant_id=tenant_id, config_lines=config_lines, grpc=True)
+    endpoint = env.endpoints.create_start("main", tenant_id=tenant_id, config_lines=config_lines)
    # we rely upon autocommit after each statement
    res_1 = endpoint.safe_psql_many(
        queries=[
Author	SHA1	Message	Date
Heikki Linnakangas	16d6898e44	git add missing file	2025-06-12 02:37:59 +03:00
Heikki Linnakangas	10b936bf03	Use a custom Rust implementation to replace the LFC hash table The new implementation lives in a separately allocated shared memory area, which could be resized. Resizing it isn't actually implemented yet, though. It would require some co-operation from the LFC code.	2025-06-05 18:31:29 +03:00
Heikki Linnakangas	6145cfd1c2	Move neon-shmem facility to separate module within the crate	2025-06-05 18:13:03 +03:00
Heikki Linnakangas	96b4de1de6	Make LFC chunk size a compile-time constant A runtime setting is nicer, but the next commit will replace the hash table with a different implementation that requires the value size to be a compile-time constant.	2025-06-05 18:08:40 +03:00
Heikki Linnakangas	9fdf5fbb7e	Use a separate freelist to track LFC "holes" When the LFC is shrunk, we punch holes in the underlying file to release the disk space to the OS. We tracked it in the same hash table as the in-use entries, because that was convenient. However, I'm working on being able to shrink the hash table too, and once we do that, we'll need some other place to track the holes. Implement a simple scheme of an in-memory array and a chain of on-disk blocks for that.	2025-06-05 18:08:35 +03:00
a-masterov	f64eb0cbaf	Remove the Flaky Test computed-columns from postgis v16 (#12132 ) ## Problem The `computed_columns` test assumes that computed columns are always faster than the request itself. However, this is not always the case on Neon, which can lead to flaky results. ## Summary of changes The `computed_columns` test is excluded from the PostGIS test for PostgreSQL v16, accompanied by related patch refactoring.	2025-06-05 15:02:38 +00:00
				`@@ -1 +0,0 @@`
				`ALTER TABLE nodes DROP listen_grpc_addr, listen_grpc_port;`
				`@@ -1 +0,0 @@`
				`ALTER TABLE nodes ADD listen_grpc_addr VARCHAR NULL, ADD listen_grpc_port INTEGER NULL;`