build(deps): bump tracing-subscriber

Bumps the cargo group with 1 update in the / directory: [tracing-subscriber](https://github.com/tokio-rs/tracing). Updates `tracing-subscriber` from 0.3.19 to 0.3.20 - [Release notes](https://github.com/tokio-rs/tracing/releases) - [Commits](https://github.com/tokio-rs/tracing/compare/tracing-subscriber-0.3.19...tracing-subscriber-0.3.20) --- updated-dependencies: - dependency-name: tracing-subscriber dependency-version: 0.3.20 dependency-type: direct:production dependency-group: cargo ... Signed-off-by: dependabot[bot] <support@github.com>
remove obsolete comment - this is a dummy commit (#12816 )
2026-05-25 00:50:36 +00:00 · 2025-08-29 20:55:15 +00:00 · 2025-08-25 07:36:41 +00:00 · 2025-07-31 13:05:09 +00:00 · 2025-07-31 12:40:32 +00:00 · 2025-07-31 11:51:19 +00:00
85 changed files with 2405 additions and 8118 deletions
--- a/.github/workflows/large_oltp_growth.yml
+++ b/.github/workflows/large_oltp_growth.yml
@@ -2,9 +2,6 @@ name: large oltp growth
 # workflow to grow the reuse branch of large oltp benchmark continuously (about 16 GB per run)

 on:
-  # uncomment to run on push for debugging your PR
-  # push:
-  #  branches: [ bodobolero/increase_large_oltp_workload ]

  schedule:
    # * is a special character in YAML so you have to quote this string
--- a/.gitignore
+++ b/.gitignore
@@ -15,7 +15,6 @@ neon.iml
 /.neon
 /integration_tests/.neon
 compaction-suite-results.*
-pgxn/neon/communicator/communicator_bindings.h
 docker-compose/docker-compose-parallel.yml

 # Coverage
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -259,17 +259,6 @@ version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a8ab6b55fe97976e46f91ddbed8d147d966475dc29b2032757ba47e02376fbc3"

-[[package]]
-name = "atomic_enum"
-version = "0.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "99e1aca718ea7b89985790c94aad72d77533063fe00bc497bb79a7c2dae6a661"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 2.0.100",
-]
-
 [[package]]
 name = "autocfg"
 version = "1.1.0"
@@ -1307,30 +1296,13 @@ dependencies = [
 name = "communicator"
 version = "0.1.0"
 dependencies = [
- "atomic_enum",
 "axum",
- "bytes",
 "cbindgen",
- "clashmap",
 "http 1.3.1",
- "libc",
 "measured",
- "metrics",
- "neon-shmem",
- "nix 0.30.1",
- "pageserver_api",
- "pageserver_client_grpc",
- "pageserver_page_api",
- "prometheus",
- "prost 0.13.5",
- "strum_macros",
- "thiserror 1.0.69",
 "tokio",
- "tokio-pipe",
- "tonic",
 "tracing",
 "tracing-subscriber",
- "uring-common",
 "utils",
 "workspace_hack",
 ]
@@ -1671,9 +1643,9 @@ dependencies = [

 [[package]]
 name = "crossbeam-utils"
-version = "0.8.21"
+version = "0.8.19"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
+checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345"

 [[package]]
 name = "crossterm"
@@ -2389,12 +2361,6 @@ version = "1.0.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"

-[[package]]
-name = "foldhash"
-version = "0.1.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
-
 [[package]]
 name = "form_urlencoded"
 version = "1.2.1"
@@ -2776,16 +2742,6 @@ version = "0.15.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289"

-[[package]]
-name = "hashbrown"
-version = "0.15.4"
-source = "git+https://github.com/quantumish/hashbrown.git?rev=6610e6d#6610e6d2b1f288ef7b0709a3efefbc846395dc5e"
-dependencies = [
- "allocator-api2",
- "equivalent",
- "foldhash",
-]
-
 [[package]]
 name = "hashlink"
 version = "0.9.1"
@@ -3751,11 +3707,11 @@ dependencies = [

 [[package]]
 name = "matchers"
-version = "0.1.0"
+version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558"
+checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9"
 dependencies = [
- "regex-automata 0.1.10",
+ "regex-automata 0.4.9",
 ]

 [[package]]
@@ -3866,7 +3822,7 @@ dependencies = [
 "prometheus",
 "rand 0.9.1",
 "rand_distr",
- "twox-hash 1.6.3",
+ "twox-hash",
 ]

 [[package]]
@@ -3972,21 +3928,15 @@ checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a"
 name = "neon-shmem"
 version = "0.1.0"
 dependencies = [
- "ahash",
- "criterion",
- "hashbrown 0.15.4",
 "libc",
 "lock_api",
 "nix 0.30.1",
 "rand 0.9.1",
 "rand_distr",
 "rustc-hash 2.1.1",
- "seahash",
 "tempfile",
 "thiserror 1.0.69",
- "twox-hash 2.1.1",
 "workspace_hack",
- "xxhash-rust",
 ]

 [[package]]
@@ -4085,12 +4035,11 @@ dependencies = [

 [[package]]
 name = "nu-ansi-term"
-version = "0.46.0"
+version = "0.50.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84"
+checksum = "d4a28e057d01f97e61255210fcff094d74ed0466038633e95017f5beb68e4399"
 dependencies = [
- "overload",
- "winapi",
+ "windows-sys 0.52.0",
 ]

 [[package]]
@@ -4396,12 +4345,6 @@ version = "0.5.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4030760ffd992bef45b0ae3f10ce1aba99e33464c90d14dd7c039884963ddc7a"

-[[package]]
-name = "overload"
-version = "0.1.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
-
 [[package]]
 name = "p256"
 version = "0.11.1"
@@ -4441,16 +4384,13 @@ version = "0.1.0"
 dependencies = [
 "anyhow",
 "async-trait",
- "axum",
 "bytes",
 "camino",
 "clap",
 "futures",
 "hdrhistogram",
- "http 1.3.1",
 "humantime",
 "humantime-serde",
- "metrics",
 "pageserver_api",
 "pageserver_client",
 "pageserver_client_grpc",
@@ -4540,7 +4480,6 @@ dependencies = [
 "pageserver_client",
 "pageserver_compaction",
 "pageserver_page_api",
- "peekable",
 "pem",
 "pin-project-lite",
 "postgres-protocol",
@@ -4554,7 +4493,6 @@ dependencies = [
 "pprof",
 "pq_proto",
 "procfs",
- "prost 0.13.5",
 "rand 0.9.1",
 "range-set-blaze",
 "regex",
@@ -4591,7 +4529,7 @@ dependencies = [
 "tower 0.5.2",
 "tracing",
 "tracing-utils",
- "twox-hash 1.6.3",
+ "twox-hash",
 "url",
 "utils",
 "uuid",
@@ -4803,7 +4741,7 @@ dependencies = [
 "paste",
 "seq-macro",
 "thrift",
- "twox-hash 1.6.3",
+ "twox-hash",
 "zstd",
 "zstd-sys",
 ]
@@ -4849,15 +4787,6 @@ dependencies = [
 "sha2",
 ]

-[[package]]
-name = "peekable"
-version = "0.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "225f9651e475709164f871dc2f5724956be59cb9edb055372ffeeab01ec2d20b"
-dependencies = [
- "smallvec",
-]
-
 [[package]]
 name = "pem"
 version = "3.0.3"
@@ -5869,7 +5798,7 @@ dependencies = [
 "aho-corasick",
 "memchr",
 "regex-automata 0.4.9",
- "regex-syntax 0.8.5",
+ "regex-syntax",
 ]

 [[package]]
@@ -5877,9 +5806,6 @@ name = "regex-automata"
 version = "0.1.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
-dependencies = [
- "regex-syntax 0.6.29",
-]

 [[package]]
 name = "regex-automata"
@@ -5889,7 +5815,7 @@ checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908"
 dependencies = [
 "aho-corasick",
 "memchr",
- "regex-syntax 0.8.5",
+ "regex-syntax",
 ]

 [[package]]
@@ -5898,12 +5824,6 @@ version = "0.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "30b661b2f27137bdbc16f00eda72866a92bb28af1753ffbd56744fb6e2e9cd8e"

-[[package]]
-name = "regex-syntax"
-version = "0.6.29"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
-
 [[package]]
 name = "regex-syntax"
 version = "0.8.5"
@@ -6557,12 +6477,6 @@ version = "0.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "621e3680f3e07db4c9c2c3fb07c6223ab2fab2e54bd3c04c3ae037990f428c32"

-[[package]]
-name = "seahash"
-version = "4.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b"
-
 [[package]]
 name = "sec1"
 version = "0.3.0"
@@ -7716,16 +7630,6 @@ dependencies = [
 "syn 2.0.100",
 ]

-[[package]]
-name = "tokio-pipe"
-version = "0.2.12"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f213a84bffbd61b8fa0ba8a044b4bbe35d471d0b518867181e82bd5c15542784"
-dependencies = [
- "libc",
- "tokio",
-]
-
 [[package]]
 name = "tokio-postgres"
 version = "0.7.10"
@@ -8154,14 +8058,14 @@ dependencies = [

 [[package]]
 name = "tracing-subscriber"
-version = "0.3.19"
+version = "0.3.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008"
+checksum = "2054a14f5307d601f88daf0553e1cbf472acc4f2c51afab632431cdcd72124d5"
 dependencies = [
 "matchers",
 "nu-ansi-term",
 "once_cell",
- "regex",
+ "regex-automata 0.4.9",
 "serde",
 "serde_json",
 "sharded-slab",
@@ -8263,15 +8167,6 @@ dependencies = [
 "static_assertions",
 ]

-[[package]]
-name = "twox-hash"
-version = "2.1.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8b907da542cbced5261bd3256de1b3a1bf340a3d37f93425a07362a1d687de56"
-dependencies = [
- "rand 0.9.1",
-]
-
 [[package]]
 name = "typed-json"
 version = "0.1.1"
@@ -9102,8 +8997,8 @@ dependencies = [
 "clap",
 "clap_builder",
 "const-oid",
- "criterion",
 "crossbeam-epoch",
+ "crossbeam-utils",
 "crypto-bigint 0.5.5",
 "der 0.7.8",
 "deranged",
@@ -9146,6 +9041,7 @@ dependencies = [
 "num-iter",
 "num-rational",
 "num-traits",
+ "once_cell",
 "p256 0.13.2",
 "parquet",
 "portable-atomic",
@@ -9156,7 +9052,7 @@ dependencies = [
 "rand 0.9.1",
 "regex",
 "regex-automata 0.4.9",
- "regex-syntax 0.8.5",
+ "regex-syntax",
 "reqwest",
 "rustls 0.23.29",
 "rustls-pki-types",
@@ -9254,12 +9150,6 @@ version = "0.13.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4d25c75bf9ea12c4040a97f829154768bbbce366287e2dc044af160cd79a13fd"

-[[package]]
-name = "xxhash-rust"
-version = "0.8.15"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3"
-
 [[package]]
 name = "yansi"
 version = "1.0.1"
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -93,7 +93,6 @@ clap = { version = "4.0", features = ["derive", "env"] }
 clashmap = { version = "1.0", features = ["raw-api"] }
 comfy-table = "7.1"
 const_format = "0.2"
-crossbeam-utils = "0.8.21"
 crc32c = "0.6"
 diatomic-waker = { version = "0.2.3" }
 either = "1.8"
@@ -153,7 +152,6 @@ parquet = { version = "53", default-features = false, features = ["zstd"] }
 parquet_derive = "53"
 pbkdf2 = { version = "0.12.1", features = ["simple", "std"] }
 pem = "3.0.3"
-peekable = "0.3.0"
 pin-project-lite = "0.2"
 pprof = { version = "0.14", features = ["criterion", "flamegraph", "frame-pointer", "prost-codec"] }
 procfs = "0.16"
@@ -192,7 +190,6 @@ smallvec = "1.11"
 smol_str = { version = "0.2.0", features = ["serde"] }
 socket2 = "0.5"
 spki = "0.7.3"
-spin = "0.9.8"
 strum = "0.26"
 strum_macros = "0.26"
 "subtle"  = "2.5.0"
@@ -204,6 +201,7 @@ thiserror = "1.0"
 tikv-jemallocator = { version = "0.6", features = ["profiling", "stats", "unprefixed_malloc_on_supported_platforms"] }
 tikv-jemalloc-ctl = { version = "0.6", features = ["stats"] }
 tokio = { version = "1.43.1", features = ["macros"] }
+tokio-epoll-uring = { git = "https://github.com/neondatabase/tokio-epoll-uring.git" , branch = "main" }
 tokio-io-timeout = "1.2.0"
 tokio-postgres-rustls = "0.12.0"
 tokio-rustls = { version = "0.26.0", default-features = false, features = ["tls12", "ring"]}
@@ -244,9 +242,6 @@ zeroize = "1.8"
 env_logger = "0.11"
 log = "0.4"

-tokio-epoll-uring = { git = "https://github.com/neondatabase/tokio-epoll-uring.git" , branch = "main" }
-uring-common = { git = "https://github.com/neondatabase/tokio-epoll-uring.git" , branch = "main" }
-
 ## Libraries from neondatabase/ git forks, ideally with changes to be upstreamed
 postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch = "neon" }
 postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", branch = "neon" }
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -32,8 +32,12 @@ use std::sync::{Arc, Condvar, Mutex, RwLock};
 use std::time::{Duration, Instant};
 use std::{env, fs};
 use tokio::{spawn, sync::watch, task::JoinHandle, time};
+use tokio_util::sync::CancellationToken;
 use tracing::{Instrument, debug, error, info, instrument, warn};
 use url::Url;
+use utils::backoff::{
+    DEFAULT_BASE_BACKOFF_SECONDS, DEFAULT_MAX_BACKOFF_SECONDS, exponential_backoff_duration,
+};
 use utils::id::{TenantId, TimelineId};
 use utils::lsn::Lsn;
 use utils::measured_stream::MeasuredReader;
@@ -192,6 +196,7 @@ pub struct ComputeState {
    pub startup_span: Option<tracing::span::Span>,

    pub lfc_prewarm_state: LfcPrewarmState,
+    pub lfc_prewarm_token: CancellationToken,
    pub lfc_offload_state: LfcOffloadState,

    /// WAL flush LSN that is set after terminating Postgres and syncing safekeepers if
@@ -217,6 +222,7 @@ impl ComputeState {
            lfc_offload_state: LfcOffloadState::default(),
            terminate_flush_lsn: None,
            promote_state: None,
+            lfc_prewarm_token: CancellationToken::new(),
        }
    }

@@ -1554,6 +1560,41 @@ impl ComputeNode {
        Ok(lsn)
    }

+    fn sync_safekeepers_with_retries(&self, storage_auth_token: Option<String>) -> Result<Lsn> {
+        let max_retries = 5;
+        let mut attempts = 0;
+        loop {
+            let result = self.sync_safekeepers(storage_auth_token.clone());
+            match &result {
+                Ok(_) => {
+                    if attempts > 0 {
+                        tracing::info!("sync_safekeepers succeeded after {attempts} retries");
+                    }
+                    return result;
+                }
+                Err(e) if attempts < max_retries => {
+                    tracing::info!(
+                        "sync_safekeepers failed, will retry (attempt {attempts}): {e:#}"
+                    );
+                }
+                Err(err) => {
+                    tracing::warn!(
+                        "sync_safekeepers still failed after {attempts} retries, giving up: {err:?}"
+                    );
+                    return result;
+                }
+            }
+            // sleep and retry
+            let backoff = exponential_backoff_duration(
+                attempts,
+                DEFAULT_BASE_BACKOFF_SECONDS,
+                DEFAULT_MAX_BACKOFF_SECONDS,
+            );
+            std::thread::sleep(backoff);
+            attempts += 1;
+        }
+    }
+
    /// Do all the preparations like PGDATA directory creation, configuration,
    /// safekeepers sync, basebackup, etc.
    #[instrument(skip_all)]
@@ -1589,7 +1630,7 @@ impl ComputeNode {
                    lsn
                } else {
                    info!("starting safekeepers syncing");
-                    self.sync_safekeepers(pspec.storage_auth_token.clone())
+                    self.sync_safekeepers_with_retries(pspec.storage_auth_token.clone())
                        .with_context(|| "failed to sync safekeepers")?
                };
                info!("safekeepers synced at LSN {}", lsn);
@@ -2739,7 +2780,7 @@ LIMIT 100",
                // 4. We start again and try to prewarm with the state from 2. instead of the previous complete state
                if matches!(
                    prewarm_state,
-                    LfcPrewarmState::Completed
+                    LfcPrewarmState::Completed { .. }
                        | LfcPrewarmState::NotPrewarmed
                        | LfcPrewarmState::Skipped
                ) {
--- a/compute_tools/src/compute_prewarm.rs
+++ b/compute_tools/src/compute_prewarm.rs
@@ -7,18 +7,11 @@ use http::StatusCode;
 use reqwest::Client;
 use std::mem::replace;
 use std::sync::Arc;
-use tokio::{io::AsyncReadExt, spawn};
+use std::time::Instant;
+use tokio::{io::AsyncReadExt, select, spawn};
+use tokio_util::sync::CancellationToken;
 use tracing::{error, info};

-#[derive(serde::Serialize, Default)]
-pub struct LfcPrewarmStateWithProgress {
-    #[serde(flatten)]
-    base: LfcPrewarmState,
-    total: i32,
-    prewarmed: i32,
-    skipped: i32,
-}
-
 /// A pair of url and a token to query endpoint storage for LFC prewarm-related tasks
 struct EndpointStoragePair {
    url: String,
@@ -27,7 +20,7 @@ struct EndpointStoragePair {

 const KEY: &str = "lfc_state";
 impl EndpointStoragePair {
-    /// endpoint_id is set to None while prewarming from other endpoint, see replica promotion
+    /// endpoint_id is set to None while prewarming from other endpoint, see compute_promote.rs
    /// If not None, takes precedence over pspec.spec.endpoint_id
    fn from_spec_and_endpoint(
        pspec: &crate::compute::ParsedSpec,
@@ -53,36 +46,8 @@ impl EndpointStoragePair {
 }

 impl ComputeNode {
-    // If prewarm failed, we want to get overall number of segments as well as done ones.
-    // However, this function should be reliable even if querying postgres failed.
-    pub async fn lfc_prewarm_state(&self) -> LfcPrewarmStateWithProgress {
-        info!("requesting LFC prewarm state from postgres");
-        let mut state = LfcPrewarmStateWithProgress::default();
-        {
-            state.base = self.state.lock().unwrap().lfc_prewarm_state.clone();
-        }
-
-        let client = match ComputeNode::get_maintenance_client(&self.tokio_conn_conf).await {
-            Ok(client) => client,
-            Err(err) => {
-                error!(%err, "connecting to postgres");
-                return state;
-            }
-        };
-        let row = match client
-            .query_one("select * from neon.get_prewarm_info()", &[])
-            .await
-        {
-            Ok(row) => row,
-            Err(err) => {
-                error!(%err, "querying LFC prewarm status");
-                return state;
-            }
-        };
-        state.total = row.try_get(0).unwrap_or_default();
-        state.prewarmed = row.try_get(1).unwrap_or_default();
-        state.skipped = row.try_get(2).unwrap_or_default();
-        state
+    pub async fn lfc_prewarm_state(&self) -> LfcPrewarmState {
+        self.state.lock().unwrap().lfc_prewarm_state.clone()
    }

    pub fn lfc_offload_state(&self) -> LfcOffloadState {
@@ -92,34 +57,35 @@ impl ComputeNode {
    /// If there is a prewarm request ongoing, return `false`, `true` otherwise.
    /// Has a failpoint "compute-prewarm"
    pub fn prewarm_lfc(self: &Arc<Self>, from_endpoint: Option<String>) -> bool {
+        let token: CancellationToken;
        {
-            let state = &mut self.state.lock().unwrap().lfc_prewarm_state;
-            if let LfcPrewarmState::Prewarming = replace(state, LfcPrewarmState::Prewarming) {
+            let state = &mut self.state.lock().unwrap();
+            token = state.lfc_prewarm_token.clone();
+            if let LfcPrewarmState::Prewarming =
+                replace(&mut state.lfc_prewarm_state, LfcPrewarmState::Prewarming)
+            {
                return false;
            }
        }
        crate::metrics::LFC_PREWARMS.inc();

-        let cloned = self.clone();
+        let this = self.clone();
        spawn(async move {
-            let state = match cloned.prewarm_impl(from_endpoint).await {
-                Ok(true) => LfcPrewarmState::Completed,
-                Ok(false) => {
-                    info!(
-                        "skipping LFC prewarm because LFC state is not found in endpoint storage"
-                    );
-                    LfcPrewarmState::Skipped
-                }
+            let prewarm_state = match this.prewarm_impl(from_endpoint, token).await {
+                Ok(state) => state,
                Err(err) => {
                    crate::metrics::LFC_PREWARM_ERRORS.inc();
                    error!(%err, "could not prewarm LFC");
-                    LfcPrewarmState::Failed {
-                        error: format!("{err:#}"),
-                    }
+                    let error = format!("{err:#}");
+                    LfcPrewarmState::Failed { error }
                }
            };

-            cloned.state.lock().unwrap().lfc_prewarm_state = state;
+            let state = &mut this.state.lock().unwrap();
+            if let LfcPrewarmState::Cancelled = prewarm_state {
+                state.lfc_prewarm_token = CancellationToken::new();
+            }
+            state.lfc_prewarm_state = prewarm_state;
        });
        true
    }
@@ -131,55 +97,101 @@ impl ComputeNode {
    }

    /// Request LFC state from endpoint storage and load corresponding pages into Postgres.
-    /// Returns a result with `false` if the LFC state is not found in endpoint storage.
-    async fn prewarm_impl(&self, from_endpoint: Option<String>) -> Result<bool> {
-        let EndpointStoragePair { url, token } = self.endpoint_storage_pair(from_endpoint)?;
+    async fn prewarm_impl(
+        &self,
+        from_endpoint: Option<String>,
+        token: CancellationToken,
+    ) -> Result<LfcPrewarmState> {
+        let EndpointStoragePair {
+            url,
+            token: storage_token,
+        } = self.endpoint_storage_pair(from_endpoint)?;

        #[cfg(feature = "testing")]
-        fail::fail_point!("compute-prewarm", |_| {
-            bail!("prewarm configured to fail because of a failpoint")
-        });
+        fail::fail_point!("compute-prewarm", |_| bail!("compute-prewarm failpoint"));

        info!(%url, "requesting LFC state from endpoint storage");
-        let request = Client::new().get(&url).bearer_auth(token);
-        let res = request.send().await.context("querying endpoint storage")?;
-        match res.status() {
+        let mut now = Instant::now();
+        let request = Client::new().get(&url).bearer_auth(storage_token);
+        let response = select! {
+            _ = token.cancelled() => return Ok(LfcPrewarmState::Cancelled),
+            response = request.send() => response
+        }
+        .context("querying endpoint storage")?;
+
+        match response.status() {
            StatusCode::OK => (),
-            StatusCode::NOT_FOUND => {
-                return Ok(false);
-            }
+            StatusCode::NOT_FOUND => return Ok(LfcPrewarmState::Skipped),
            status => bail!("{status} querying endpoint storage"),
        }
+        let state_download_time_ms = now.elapsed().as_millis() as u32;
+        now = Instant::now();

        let mut uncompressed = Vec::new();
-        let lfc_state = res
-            .bytes()
-            .await
-            .context("getting request body from endpoint storage")?;
-        ZstdDecoder::new(lfc_state.iter().as_slice())
-            .read_to_end(&mut uncompressed)
-            .await
-            .context("decoding LFC state")?;
+        let lfc_state = select! {
+            _ = token.cancelled() => return Ok(LfcPrewarmState::Cancelled),
+            lfc_state = response.bytes() => lfc_state
+        }
+        .context("getting request body from endpoint storage")?;
+
+        let mut decoder = ZstdDecoder::new(lfc_state.iter().as_slice());
+        select! {
+            _ = token.cancelled() => return Ok(LfcPrewarmState::Cancelled),
+            read = decoder.read_to_end(&mut uncompressed) => read
+        }
+        .context("decoding LFC state")?;
+        let uncompress_time_ms = now.elapsed().as_millis() as u32;
+        now = Instant::now();
+
        let uncompressed_len = uncompressed.len();
+        info!(%url, "downloaded LFC state, uncompressed size {uncompressed_len}");

-        info!(%url, "downloaded LFC state, uncompressed size {uncompressed_len}, loading into Postgres");
-
-        ComputeNode::get_maintenance_client(&self.tokio_conn_conf)
+        // Client connection and prewarm info querying are fast and therefore don't need
+        // cancellation
+        let client = ComputeNode::get_maintenance_client(&self.tokio_conn_conf)
            .await
-            .context("connecting to postgres")?
-            .query_one("select neon.prewarm_local_cache($1)", &[&uncompressed])
-            .await
-            .context("loading LFC state into postgres")
-            .map(|_| ())?;
+            .context("connecting to postgres")?;
+        let pg_token = client.cancel_token();

-        Ok(true)
+        let params: Vec<&(dyn postgres_types::ToSql + Sync)> = vec![&uncompressed];
+        select! {
+            res = client.query_one("select neon.prewarm_local_cache($1)", &params) => res,
+            _ = token.cancelled() => {
+                pg_token.cancel_query(postgres::NoTls).await
+                    .context("cancelling neon.prewarm_local_cache()")?;
+                return Ok(LfcPrewarmState::Cancelled)
+            }
+        }
+        .context("loading LFC state into postgres")
+        .map(|_| ())?;
+        let prewarm_time_ms = now.elapsed().as_millis() as u32;
+
+        let row = client
+            .query_one("select * from neon.get_prewarm_info()", &[])
+            .await
+            .context("querying prewarm info")?;
+        let total = row.try_get(0).unwrap_or_default();
+        let prewarmed = row.try_get(1).unwrap_or_default();
+        let skipped = row.try_get(2).unwrap_or_default();
+
+        Ok(LfcPrewarmState::Completed {
+            total,
+            prewarmed,
+            skipped,
+            state_download_time_ms,
+            uncompress_time_ms,
+            prewarm_time_ms,
+        })
    }

    /// If offload request is ongoing, return false, true otherwise
    pub fn offload_lfc(self: &Arc<Self>) -> bool {
        {
            let state = &mut self.state.lock().unwrap().lfc_offload_state;
-            if replace(state, LfcOffloadState::Offloading) == LfcOffloadState::Offloading {
+            if matches!(
+                replace(state, LfcOffloadState::Offloading),
+                LfcOffloadState::Offloading
+            ) {
                return false;
            }
        }
@@ -191,7 +203,10 @@ impl ComputeNode {
    pub async fn offload_lfc_async(self: &Arc<Self>) {
        {
            let state = &mut self.state.lock().unwrap().lfc_offload_state;
-            if replace(state, LfcOffloadState::Offloading) == LfcOffloadState::Offloading {
+            if matches!(
+                replace(state, LfcOffloadState::Offloading),
+                LfcOffloadState::Offloading
+            ) {
                return;
            }
        }
@@ -200,23 +215,23 @@ impl ComputeNode {

    async fn offload_lfc_with_state_update(&self) {
        crate::metrics::LFC_OFFLOADS.inc();
-
-        let Err(err) = self.offload_lfc_impl().await else {
-            self.state.lock().unwrap().lfc_offload_state = LfcOffloadState::Completed;
-            return;
-        };
-
-        crate::metrics::LFC_OFFLOAD_ERRORS.inc();
-        error!(%err, "could not offload LFC state to endpoint storage");
-        self.state.lock().unwrap().lfc_offload_state = LfcOffloadState::Failed {
-            error: format!("{err:#}"),
+        let state = match self.offload_lfc_impl().await {
+            Ok(state) => state,
+            Err(err) => {
+                crate::metrics::LFC_OFFLOAD_ERRORS.inc();
+                error!(%err, "could not offload LFC");
+                let error = format!("{err:#}");
+                LfcOffloadState::Failed { error }
+            }
        };
+        self.state.lock().unwrap().lfc_offload_state = state;
    }

-    async fn offload_lfc_impl(&self) -> Result<()> {
+    async fn offload_lfc_impl(&self) -> Result<LfcOffloadState> {
        let EndpointStoragePair { url, token } = self.endpoint_storage_pair(None)?;
        info!(%url, "requesting LFC state from Postgres");

+        let mut now = Instant::now();
        let row = ComputeNode::get_maintenance_client(&self.tokio_conn_conf)
            .await
            .context("connecting to postgres")?
@@ -228,26 +243,41 @@ impl ComputeNode {
            .context("deserializing LFC state")?;
        let Some(state) = state else {
            info!(%url, "empty LFC state, not exporting");
-            return Ok(());
+            return Ok(LfcOffloadState::Skipped);
        };
+        let state_query_time_ms = now.elapsed().as_millis() as u32;
+        now = Instant::now();

        let mut compressed = Vec::new();
        ZstdEncoder::new(state)
            .read_to_end(&mut compressed)
            .await
            .context("compressing LFC state")?;
+        let compress_time_ms = now.elapsed().as_millis() as u32;
+        now = Instant::now();

        let compressed_len = compressed.len();
-        info!(%url, "downloaded LFC state, compressed size {compressed_len}, writing to endpoint storage");
+        info!(%url, "downloaded LFC state, compressed size {compressed_len}");

        let request = Client::new().put(url).bearer_auth(token).body(compressed);
-        match request.send().await {
-            Ok(res) if res.status() == StatusCode::OK => Ok(()),
-            Ok(res) => bail!(
-                "Request to endpoint storage failed with status: {}",
-                res.status()
-            ),
-            Err(err) => Err(err).context("writing to endpoint storage"),
+        let response = request
+            .send()
+            .await
+            .context("writing to endpoint storage")?;
+        let state_upload_time_ms = now.elapsed().as_millis() as u32;
+        let status = response.status();
+        if status != StatusCode::OK {
+            bail!("request to endpoint storage failed: {status}");
        }
+
+        Ok(LfcOffloadState::Completed {
+            compress_time_ms,
+            state_query_time_ms,
+            state_upload_time_ms,
+        })
+    }
+
+    pub fn cancel_prewarm(self: &Arc<Self>) {
+        self.state.lock().unwrap().lfc_prewarm_token.cancel();
    }
 }
--- a/compute_tools/src/compute_promote.rs
+++ b/compute_tools/src/compute_promote.rs
@@ -1,32 +1,24 @@
 use crate::compute::ComputeNode;
-use anyhow::{Context, Result, bail};
+use anyhow::{Context, bail};
 use compute_api::responses::{LfcPrewarmState, PromoteConfig, PromoteState};
-use compute_api::spec::ComputeMode;
-use itertools::Itertools;
-use std::collections::HashMap;
-use std::{sync::Arc, time::Duration};
-use tokio::time::sleep;
+use std::time::Instant;
 use tracing::info;
-use utils::lsn::Lsn;

 impl ComputeNode {
-    /// Returns only when promote fails or succeeds. If a network error occurs
-    /// and http client disconnects, this does not stop promotion, and subsequent
-    /// calls block until promote finishes.
+    /// Returns only when promote fails or succeeds. If http client calling this function
+    /// disconnects, this does not stop promotion, and subsequent calls block until promote finishes.
    /// Called by control plane on secondary after primary endpoint is terminated
    /// Has a failpoint "compute-promotion"
-    pub async fn promote(self: &Arc<Self>, cfg: PromoteConfig) -> PromoteState {
-        let cloned = self.clone();
-        let promote_fn = async move || {
-            let Err(err) = cloned.promote_impl(cfg).await else {
-                return PromoteState::Completed;
-            };
-            tracing::error!(%err, "promoting");
-            PromoteState::Failed {
-                error: format!("{err:#}"),
+    pub async fn promote(self: &std::sync::Arc<Self>, cfg: PromoteConfig) -> PromoteState {
+        let this = self.clone();
+        let promote_fn = async move || match this.promote_impl(cfg).await {
+            Ok(state) => state,
+            Err(err) => {
+                tracing::error!(%err, "promoting replica");
+                let error = format!("{err:#}");
+                PromoteState::Failed { error }
            }
        };
-
        let start_promotion = || {
            let (tx, rx) = tokio::sync::watch::channel(PromoteState::NotPromoted);
            tokio::spawn(async move { tx.send(promote_fn().await) });
@@ -34,36 +26,31 @@ impl ComputeNode {
        };

        let mut task;
-        // self.state is unlocked after block ends so we lock it in promote_impl
-        // and task.changed() is reached
+        // promote_impl locks self.state so we need to unlock it before calling task.changed()
        {
-            task = self
-                .state
-                .lock()
-                .unwrap()
-                .promote_state
-                .get_or_insert_with(start_promotion)
-                .clone()
+            let promote_state = &mut self.state.lock().unwrap().promote_state;
+            task = promote_state.get_or_insert_with(start_promotion).clone()
+        }
+        if task.changed().await.is_err() {
+            let error = "promote sender dropped".to_string();
+            return PromoteState::Failed { error };
        }
-        task.changed().await.expect("promote sender dropped");
        task.borrow().clone()
    }

-    async fn promote_impl(&self, mut cfg: PromoteConfig) -> Result<()> {
+    async fn promote_impl(&self, cfg: PromoteConfig) -> anyhow::Result<PromoteState> {
        {
            let state = self.state.lock().unwrap();
            let mode = &state.pspec.as_ref().unwrap().spec.mode;
-            if *mode != ComputeMode::Replica {
-                bail!("{} is not replica", mode.to_type_str());
+            if *mode != compute_api::spec::ComputeMode::Replica {
+                bail!("compute mode \"{}\" is not replica", mode.to_type_str());
            }
-
-            // we don't need to query Postgres so not self.lfc_prewarm_state()
            match &state.lfc_prewarm_state {
-                LfcPrewarmState::NotPrewarmed | LfcPrewarmState::Prewarming => {
-                    bail!("prewarm not requested or pending")
+                status @ (LfcPrewarmState::NotPrewarmed | LfcPrewarmState::Prewarming) => {
+                    bail!("compute {status}")
                }
                LfcPrewarmState::Failed { error } => {
-                    tracing::warn!(%error, "replica prewarm failed")
+                    tracing::warn!(%error, "compute prewarm failed")
                }
                _ => {}
            }
@@ -72,9 +59,10 @@ impl ComputeNode {
        let client = ComputeNode::get_maintenance_client(&self.tokio_conn_conf)
            .await
            .context("connecting to postgres")?;
+        let mut now = Instant::now();

        let primary_lsn = cfg.wal_flush_lsn;
-        let mut last_wal_replay_lsn: Lsn = Lsn::INVALID;
+        let mut standby_lsn = utils::lsn::Lsn::INVALID;
        const RETRIES: i32 = 20;
        for i in 0..=RETRIES {
            let row = client
@@ -82,16 +70,18 @@ impl ComputeNode {
                .await
                .context("getting last replay lsn")?;
            let lsn: u64 = row.get::<usize, postgres_types::PgLsn>(0).into();
-            last_wal_replay_lsn = lsn.into();
-            if last_wal_replay_lsn >= primary_lsn {
+            standby_lsn = lsn.into();
+            if standby_lsn >= primary_lsn {
                break;
            }
-            info!("Try {i}, replica lsn {last_wal_replay_lsn}, primary lsn {primary_lsn}");
-            sleep(Duration::from_secs(1)).await;
+            info!(%standby_lsn, %primary_lsn, "catching up, try {i}");
+            tokio::time::sleep(std::time::Duration::from_secs(1)).await;
        }
-        if last_wal_replay_lsn < primary_lsn {
+        if standby_lsn < primary_lsn {
            bail!("didn't catch up with primary in {RETRIES} retries");
        }
+        let lsn_wait_time_ms = now.elapsed().as_millis() as u32;
+        now = Instant::now();

        // using $1 doesn't work with ALTER SYSTEM SET
        let safekeepers_sql = format!(
@@ -102,27 +92,33 @@ impl ComputeNode {
            .query(&safekeepers_sql, &[])
            .await
            .context("setting safekeepers")?;
+        client
+            .query(
+                "ALTER SYSTEM SET synchronous_standby_names=walproposer",
+                &[],
+            )
+            .await
+            .context("setting synchronous_standby_names")?;
        client
            .query("SELECT pg_catalog.pg_reload_conf()", &[])
            .await
            .context("reloading postgres config")?;

        #[cfg(feature = "testing")]
-        fail::fail_point!("compute-promotion", |_| {
-            bail!("promotion configured to fail because of a failpoint")
-        });
+        fail::fail_point!("compute-promotion", |_| bail!(
+            "compute-promotion failpoint"
+        ));

        let row = client
            .query_one("SELECT * FROM pg_catalog.pg_promote()", &[])
            .await
            .context("pg_promote")?;
        if !row.get::<usize, bool>(0) {
-            bail!("pg_promote() returned false");
+            bail!("pg_promote() failed");
        }
+        let pg_promote_time_ms = now.elapsed().as_millis() as u32;
+        let now = Instant::now();

-        let client = ComputeNode::get_maintenance_client(&self.tokio_conn_conf)
-            .await
-            .context("connecting to postgres")?;
        let row = client
            .query_one("SHOW transaction_read_only", &[])
            .await
@@ -131,36 +127,47 @@ impl ComputeNode {
            bail!("replica in read only mode after promotion");
        }

+        // Already checked validity in http handler
+        #[allow(unused_mut)]
+        let mut new_pspec = crate::compute::ParsedSpec::try_from(cfg.spec).expect("invalid spec");
        {
            let mut state = self.state.lock().unwrap();
-            let spec = &mut state.pspec.as_mut().unwrap().spec;
-            spec.mode = ComputeMode::Primary;
-            let new_conf = cfg.spec.cluster.postgresql_conf.as_mut().unwrap();
-            let existing_conf = spec.cluster.postgresql_conf.as_ref().unwrap();
-            Self::merge_spec(new_conf, existing_conf);
+
+            // Local setup has different ports for pg process (port=) for primary and secondary.
+            // Primary is stopped so we need secondary's "port" value
+            #[cfg(feature = "testing")]
+            {
+                let old_spec = &state.pspec.as_ref().unwrap().spec;
+                let Some(old_conf) = old_spec.cluster.postgresql_conf.as_ref() else {
+                    bail!("pspec.spec.cluster.postgresql_conf missing for endpoint");
+                };
+                let set: std::collections::HashMap<&str, &str> = old_conf
+                    .split_terminator('\n')
+                    .map(|e| e.split_once("=").expect("invalid item"))
+                    .collect();
+
+                let Some(new_conf) = new_pspec.spec.cluster.postgresql_conf.as_mut() else {
+                    bail!("pspec.spec.cluster.postgresql_conf missing for supplied config");
+                };
+                new_conf.push_str(&format!("port={}\n", set["port"]));
+            }
+
+            tracing::debug!("applied spec: {:#?}", new_pspec.spec);
+            if self.params.lakebase_mode {
+                ComputeNode::set_spec(&self.params, &mut state, new_pspec);
+            } else {
+                state.pspec = Some(new_pspec);
+            }
        }
+
        info!("applied new spec, reconfiguring as primary");
-        self.reconfigure()
-    }
+        self.reconfigure()?;
+        let reconfigure_time_ms = now.elapsed().as_millis() as u32;

-    /// Merge old and new Postgres conf specs to apply on secondary.
-    /// Change new spec's port and safekeepers since they are supplied
-    /// differenly
-    fn merge_spec(new_conf: &mut String, existing_conf: &str) {
-        let mut new_conf_set: HashMap<&str, &str> = new_conf
-            .split_terminator('\n')
-            .map(|e| e.split_once("=").expect("invalid item"))
-            .collect();
-        new_conf_set.remove("neon.safekeepers");
-
-        let existing_conf_set: HashMap<&str, &str> = existing_conf
-            .split_terminator('\n')
-            .map(|e| e.split_once("=").expect("invalid item"))
-            .collect();
-        new_conf_set.insert("port", existing_conf_set["port"]);
-        *new_conf = new_conf_set
-            .iter()
-            .map(|(k, v)| format!("{k}={v}"))
-            .join("\n");
+        Ok(PromoteState::Completed {
+            lsn_wait_time_ms,
+            pg_promote_time_ms,
+            reconfigure_time_ms,
+        })
    }
 }
--- a/compute_tools/src/config.rs
+++ b/compute_tools/src/config.rs
@@ -73,13 +73,12 @@ pub fn write_postgres_conf(
        if let Some(stripe_size) = conninfo.stripe_size {
            writeln!(
                file,
-                "# from compute spec's pageserver_conninfo.stripe_size field"
+                "# from compute spec's pageserver_connection_info.stripe_size field"
            )?;
            writeln!(file, "neon.stripe_size={stripe_size}")?;
        }

        let mut libpq_urls: Option<Vec<String>> = Some(Vec::new());
-        let mut grpc_urls: Option<Vec<String>> = Some(Vec::new());
        let num_shards = if conninfo.shard_count.0 == 0 {
            1 // unsharded, treat it as a single shard
        } else {
@@ -112,43 +111,20 @@ pub fn write_postgres_conf(
            } else {
                libpq_urls = None
            }
-            // Similarly for gRPC URLs
-            if let Some(url) = &first_pageserver.grpc_url {
-                if let Some(ref mut urls) = grpc_urls {
-                    urls.push(url.clone());
-                }
-            } else {
-                grpc_urls = None
-            }
        }
        if let Some(libpq_urls) = libpq_urls {
            writeln!(
                file,
-                "# derived from compute spec's pageserver_conninfo field"
+                "# derived from compute spec's pageserver_connection_info field"
            )?;
            writeln!(
                file,
                "neon.pageserver_connstring={}",
                escape_conf_value(&libpq_urls.join(","))
            )?;
-            writeln!(file, "neon.use_communicator_worker=false")?;
        } else {
            writeln!(file, "# no neon.pageserver_connstring")?;
        }
-        if let Some(grpc_urls) = grpc_urls {
-            writeln!(
-                file,
-                "# derived from compute spec's pageserver_conninfo field"
-            )?;
-            writeln!(
-                file,
-                "neon.pageserver_grpc_urls={}",
-                escape_conf_value(&grpc_urls.join(","))
-            )?;
-            writeln!(file, "neon.use_communicator_worker=true")?;
-        } else {
-            writeln!(file, "# no neon.pageserver_grpc_urls")?;
-        }
    } else {
        // Stripe size GUC should be defined prior to connection string
        if let Some(stripe_size) = spec.shard_stripe_size {
--- a/compute_tools/src/http/openapi_spec.yaml
+++ b/compute_tools/src/http/openapi_spec.yaml
@@ -139,6 +139,15 @@ paths:
            application/json:
              schema:
                $ref: "#/components/schemas/LfcPrewarmState"
+    delete:
+      tags:
+        - Prewarm
+      summary: Cancel ongoing LFC prewarm
+      description: ""
+      operationId: cancelLfcPrewarm
+      responses:
+        202:
+          description: Prewarm cancelled

  /lfc/offload:
    post:
@@ -608,9 +617,6 @@ components:
      type: object
      required:
        - status
-        - total
-        - prewarmed
-        - skipped
      properties:
        status:
          description: LFC prewarm status
@@ -628,6 +634,15 @@ components:
        skipped:
          description: Pages processed but not prewarmed
          type: integer
+        state_download_time_ms:
+          description: Time it takes to download LFC state to compute
+          type: integer
+        uncompress_time_ms:
+          description: Time it takes to uncompress LFC state
+          type: integer
+        prewarm_time_ms:
+          description: Time it takes to prewarm LFC state in Postgres
+          type: integer

    LfcOffloadState:
      type: object
@@ -636,11 +651,21 @@ components:
      properties:
        status:
          description: LFC offload status
-          enum: [not_offloaded, offloading, completed, failed]
+          enum: [not_offloaded, offloading, completed, skipped, failed]
          type: string
        error:
          description: LFC offload error, if any
          type: string
+        state_query_time_ms:
+          description: Time it takes to get LFC state from Postgres
+          type: integer
+        compress_time_ms:
+          description: Time it takes to compress LFC state
+          type: integer
+        state_upload_time_ms:
+          description: Time it takes to upload LFC state to endpoint storage
+          type: integer
+

    PromoteState:
      type: object
@@ -654,6 +679,15 @@ components:
        error:
          description: Promote error, if any
          type: string
+        lsn_wait_time_ms:
+          description: Time it takes for secondary to catch up with primary WAL flush LSN
+          type: integer
+        pg_promote_time_ms:
+          description: Time it takes to call pg_promote on secondary
+          type: integer
+        reconfigure_time_ms:
+          description: Time it takes to reconfigure promoted secondary
+          type: integer

    SetRoleGrantsRequest:
      type: object
--- a/compute_tools/src/http/routes/lfc.rs
+++ b/compute_tools/src/http/routes/lfc.rs
@@ -1,12 +1,11 @@
-use crate::compute_prewarm::LfcPrewarmStateWithProgress;
 use crate::http::JsonResponse;
 use axum::response::{IntoResponse, Response};
 use axum::{Json, http::StatusCode};
 use axum_extra::extract::OptionalQuery;
-use compute_api::responses::LfcOffloadState;
+use compute_api::responses::{LfcOffloadState, LfcPrewarmState};
 type Compute = axum::extract::State<std::sync::Arc<crate::compute::ComputeNode>>;

-pub(in crate::http) async fn prewarm_state(compute: Compute) -> Json<LfcPrewarmStateWithProgress> {
+pub(in crate::http) async fn prewarm_state(compute: Compute) -> Json<LfcPrewarmState> {
    Json(compute.lfc_prewarm_state().await)
 }

@@ -46,3 +45,8 @@ pub(in crate::http) async fn offload(compute: Compute) -> Response {
        )
    }
 }
+
+pub(in crate::http) async fn cancel_prewarm(compute: Compute) -> StatusCode {
+    compute.cancel_prewarm();
+    StatusCode::ACCEPTED
+}
--- a/compute_tools/src/http/routes/promote.rs
+++ b/compute_tools/src/http/routes/promote.rs
@@ -1,11 +1,22 @@
 use crate::http::JsonResponse;
 use axum::extract::Json;
+use compute_api::responses::PromoteConfig;
 use http::StatusCode;

 pub(in crate::http) async fn promote(
    compute: axum::extract::State<std::sync::Arc<crate::compute::ComputeNode>>,
-    Json(cfg): Json<compute_api::responses::PromoteConfig>,
+    Json(cfg): Json<PromoteConfig>,
 ) -> axum::response::Response {
+    // Return early at the cost of extra parsing spec
+    let pspec = match crate::compute::ParsedSpec::try_from(cfg.spec) {
+        Ok(p) => p,
+        Err(e) => return JsonResponse::error(StatusCode::BAD_REQUEST, e),
+    };
+
+    let cfg = PromoteConfig {
+        spec: pspec.spec,
+        wal_flush_lsn: cfg.wal_flush_lsn,
+    };
    let state = compute.promote(cfg).await;
    if let compute_api::responses::PromoteState::Failed { error: _ } = state {
        return JsonResponse::create_response(StatusCode::INTERNAL_SERVER_ERROR, state);
--- a/compute_tools/src/http/server.rs
+++ b/compute_tools/src/http/server.rs
@@ -99,7 +99,12 @@ impl From<&Server> for Router<Arc<ComputeNode>> {
                    );

                let authenticated_router = Router::<Arc<ComputeNode>>::new()
-                    .route("/lfc/prewarm", get(lfc::prewarm_state).post(lfc::prewarm))
+                    .route(
+                        "/lfc/prewarm",
+                        get(lfc::prewarm_state)
+                            .post(lfc::prewarm)
+                            .delete(lfc::cancel_prewarm),
+                    )
                    .route("/lfc/offload", get(lfc::offload_state).post(lfc::offload))
                    .route("/promote", post(promote::promote))
                    .route("/check_writability", post(check_writability::is_writable))
--- a/compute_tools/src/sql/anon_ext_fn_reassign.sql
+++ b/compute_tools/src/sql/anon_ext_fn_reassign.sql
@@ -1,13 +0,0 @@
-DO $$
-DECLARE
-    query varchar;
-BEGIN
-    FOR query IN
-    SELECT pg_catalog.format('ALTER FUNCTION %I(%s) OWNER TO {db_owner};', p.oid::regproc, pg_catalog.pg_get_function_identity_arguments(p.oid))
-    FROM pg_catalog.pg_proc p
-        WHERE p.pronamespace OPERATOR(pg_catalog.=) 'anon'::regnamespace::oid
-    LOOP
-        EXECUTE query;
-    END LOOP;
-END
-$$;
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -71,8 +71,9 @@ const DEFAULT_PG_VERSION_NUM: &str = "17";

 const DEFAULT_PAGESERVER_CONTROL_PLANE_API: &str = "http://127.0.0.1:1234/upcall/v1/";

+/// Neon CLI.
 #[derive(clap::Parser)]
-#[command(version = GIT_VERSION, about, name = "Neon CLI")]
+#[command(version = GIT_VERSION, name = "Neon CLI")]
 struct Cli {
    #[command(subcommand)]
    command: NeonLocalCmd,
@@ -107,30 +108,31 @@ enum NeonLocalCmd {
    Stop(StopCmdArgs),
 }

+/// Initialize a new Neon repository, preparing configs for services to start with.
 #[derive(clap::Args)]
-#[clap(about = "Initialize a new Neon repository, preparing configs for services to start with")]
 struct InitCmdArgs {
-    #[clap(long, help("How many pageservers to create (default 1)"))]
+    /// How many pageservers to create (default 1).
+    #[clap(long)]
    num_pageservers: Option<u16>,

    #[clap(long)]
    config: Option<PathBuf>,

-    #[clap(long, help("Force initialization even if the repository is not empty"))]
+    /// Force initialization even if the repository is not empty.
+    #[clap(long, default_value = "must-not-exist")]
    #[arg(value_parser)]
-    #[clap(default_value = "must-not-exist")]
    force: InitForceMode,
 }

+/// Start pageserver and safekeepers.
 #[derive(clap::Args)]
-#[clap(about = "Start pageserver and safekeepers")]
 struct StartCmdArgs {
    #[clap(long = "start-timeout", default_value = "10s")]
    timeout: humantime::Duration,
 }

+/// Stop pageserver and safekeepers.
 #[derive(clap::Args)]
-#[clap(about = "Stop pageserver and safekeepers")]
 struct StopCmdArgs {
    #[arg(value_enum)]
    #[clap(long, default_value_t = StopMode::Fast)]
@@ -143,8 +145,8 @@ enum StopMode {
    Immediate,
 }

+/// Manage tenants.
 #[derive(clap::Subcommand)]
-#[clap(about = "Manage tenants")]
 enum TenantCmd {
    List,
    Create(TenantCreateCmdArgs),
@@ -155,38 +157,36 @@ enum TenantCmd {

 #[derive(clap::Args)]
 struct TenantCreateCmdArgs {
-    #[clap(
-        long = "tenant-id",
-        help = "Tenant id. Represented as a hexadecimal string 32 symbols length"
-    )]
+    /// Tenant ID, as a 32-byte hexadecimal string.
+    #[clap(long = "tenant-id")]
    tenant_id: Option<TenantId>,

-    #[clap(
-        long,
-        help = "Use a specific timeline id when creating a tenant and its initial timeline"
-    )]
+    /// Use a specific timeline id when creating a tenant and its initial timeline.
+    #[clap(long)]
    timeline_id: Option<TimelineId>,

    #[clap(short = 'c')]
    config: Vec<String>,

+    /// Postgres version to use for the initial timeline.
    #[arg(default_value = DEFAULT_PG_VERSION_NUM)]
-    #[clap(long, help = "Postgres version to use for the initial timeline")]
+    #[clap(long)]
    pg_version: PgMajorVersion,

-    #[clap(
-        long,
-        help = "Use this tenant in future CLI commands where tenant_id is needed, but not specified"
-    )]
+    /// Use this tenant in future CLI commands where tenant_id is needed, but not specified.
+    #[clap(long)]
    set_default: bool,

-    #[clap(long, help = "Number of shards in the new tenant")]
+    /// Number of shards in the new tenant.
+    #[clap(long)]
    #[arg(default_value_t = 0)]
    shard_count: u8,
-    #[clap(long, help = "Sharding stripe size in pages")]
+    /// Sharding stripe size in pages.
+    #[clap(long)]
    shard_stripe_size: Option<u32>,

-    #[clap(long, help = "Placement policy shards in this tenant")]
+    /// Placement policy shards in this tenant.
+    #[clap(long)]
    #[arg(value_parser = parse_placement_policy)]
    placement_policy: Option<PlacementPolicy>,
 }
@@ -195,44 +195,35 @@ fn parse_placement_policy(s: &str) -> anyhow::Result<PlacementPolicy> {
    Ok(serde_json::from_str::<PlacementPolicy>(s)?)
 }

+/// Set a particular tenant as default in future CLI commands where tenant_id is needed, but not
+/// specified.
 #[derive(clap::Args)]
-#[clap(
-    about = "Set a particular tenant as default in future CLI commands where tenant_id is needed, but not specified"
-)]
 struct TenantSetDefaultCmdArgs {
-    #[clap(
-        long = "tenant-id",
-        help = "Tenant id. Represented as a hexadecimal string 32 symbols length"
-    )]
+    /// Tenant ID, as a 32-byte hexadecimal string.
+    #[clap(long = "tenant-id")]
    tenant_id: TenantId,
 }

 #[derive(clap::Args)]
 struct TenantConfigCmdArgs {
-    #[clap(
-        long = "tenant-id",
-        help = "Tenant id. Represented as a hexadecimal string 32 symbols length"
-    )]
+    /// Tenant ID, as a 32-byte hexadecimal string.
+    #[clap(long = "tenant-id")]
    tenant_id: Option<TenantId>,

    #[clap(short = 'c')]
    config: Vec<String>,
 }

+/// Import a tenant that is present in remote storage, and create branches for its timelines.
 #[derive(clap::Args)]
-#[clap(
-    about = "Import a tenant that is present in remote storage, and create branches for its timelines"
-)]
 struct TenantImportCmdArgs {
-    #[clap(
-        long = "tenant-id",
-        help = "Tenant id. Represented as a hexadecimal string 32 symbols length"
-    )]
+    /// Tenant ID, as a 32-byte hexadecimal string.
+    #[clap(long = "tenant-id")]
    tenant_id: TenantId,
 }

+/// Manage timelines.
 #[derive(clap::Subcommand)]
-#[clap(about = "Manage timelines")]
 enum TimelineCmd {
    List(TimelineListCmdArgs),
    Branch(TimelineBranchCmdArgs),
@@ -240,98 +231,87 @@ enum TimelineCmd {
    Import(TimelineImportCmdArgs),
 }

+/// List all timelines available to this pageserver.
 #[derive(clap::Args)]
-#[clap(about = "List all timelines available to this pageserver")]
 struct TimelineListCmdArgs {
-    #[clap(
-        long = "tenant-id",
-        help = "Tenant id. Represented as a hexadecimal string 32 symbols length"
-    )]
+    /// Tenant ID, as a 32-byte hexadecimal string.
+    #[clap(long = "tenant-id")]
    tenant_shard_id: Option<TenantShardId>,
 }

+/// Create a new timeline, branching off from another timeline.
 #[derive(clap::Args)]
-#[clap(about = "Create a new timeline, branching off from another timeline")]
 struct TimelineBranchCmdArgs {
-    #[clap(
-        long = "tenant-id",
-        help = "Tenant id. Represented as a hexadecimal string 32 symbols length"
-    )]
+    /// Tenant ID, as a 32-byte hexadecimal string.
+    #[clap(long = "tenant-id")]
    tenant_id: Option<TenantId>,
-
-    #[clap(long, help = "New timeline's ID")]
+    /// New timeline's ID, as a 32-byte hexadecimal string.
+    #[clap(long)]
    timeline_id: Option<TimelineId>,
-
-    #[clap(long, help = "Human-readable alias for the new timeline")]
+    /// Human-readable alias for the new timeline.
+    #[clap(long)]
    branch_name: String,
-
-    #[clap(
-        long,
-        help = "Use last Lsn of another timeline (and its data) as base when creating the new timeline. The timeline gets resolved by its branch name."
-    )]
+    /// Use last Lsn of another timeline (and its data) as base when creating the new timeline. The
+    /// timeline gets resolved by its branch name.
+    #[clap(long)]
    ancestor_branch_name: Option<String>,
-
-    #[clap(
-        long,
-        help = "When using another timeline as base, use a specific Lsn in it instead of the latest one"
-    )]
+    /// When using another timeline as base, use a specific Lsn in it instead of the latest one.
+    #[clap(long)]
    ancestor_start_lsn: Option<Lsn>,
 }

+/// Create a new blank timeline.
 #[derive(clap::Args)]
-#[clap(about = "Create a new blank timeline")]
 struct TimelineCreateCmdArgs {
-    #[clap(
-        long = "tenant-id",
-        help = "Tenant id. Represented as a hexadecimal string 32 symbols length"
-    )]
+    /// Tenant ID, as a 32-byte hexadecimal string.
+    #[clap(long = "tenant-id")]
    tenant_id: Option<TenantId>,
-
-    #[clap(long, help = "New timeline's ID")]
+    /// New timeline's ID, as a 32-byte hexadecimal string.
+    #[clap(long)]
    timeline_id: Option<TimelineId>,
-
-    #[clap(long, help = "Human-readable alias for the new timeline")]
+    /// Human-readable alias for the new timeline.
+    #[clap(long)]
    branch_name: String,

+    /// Postgres version.
    #[arg(default_value = DEFAULT_PG_VERSION_NUM)]
-    #[clap(long, help = "Postgres version")]
+    #[clap(long)]
    pg_version: PgMajorVersion,
 }

+/// Import a timeline from a basebackup directory.
 #[derive(clap::Args)]
-#[clap(about = "Import timeline from a basebackup directory")]
 struct TimelineImportCmdArgs {
-    #[clap(
-        long = "tenant-id",
-        help = "Tenant id. Represented as a hexadecimal string 32 symbols length"
-    )]
+    /// Tenant ID, as a 32-byte hexadecimal string.
+    #[clap(long = "tenant-id")]
    tenant_id: Option<TenantId>,
-
-    #[clap(long, help = "New timeline's ID")]
+    /// New timeline's ID, as a 32-byte hexadecimal string.
+    #[clap(long)]
    timeline_id: TimelineId,
-
-    #[clap(long, help = "Human-readable alias for the new timeline")]
+    /// Human-readable alias for the new timeline.
+    #[clap(long)]
    branch_name: String,
-
-    #[clap(long, help = "Basebackup tarfile to import")]
+    /// Basebackup tarfile to import.
+    #[clap(long)]
    base_tarfile: PathBuf,
-
-    #[clap(long, help = "Lsn the basebackup starts at")]
+    /// LSN the basebackup starts at.
+    #[clap(long)]
    base_lsn: Lsn,
-
-    #[clap(long, help = "Wal to add after base")]
+    /// WAL to add after base.
+    #[clap(long)]
    wal_tarfile: Option<PathBuf>,
-
-    #[clap(long, help = "Lsn the basebackup ends at")]
+    /// LSN the basebackup ends at.
+    #[clap(long)]
    end_lsn: Option<Lsn>,

+    /// Postgres version of the basebackup being imported.
    #[arg(default_value = DEFAULT_PG_VERSION_NUM)]
-    #[clap(long, help = "Postgres version of the backup being imported")]
+    #[clap(long)]
    pg_version: PgMajorVersion,
 }

+/// Manage pageservers.
 #[derive(clap::Subcommand)]
-#[clap(about = "Manage pageservers")]
 enum PageserverCmd {
    Status(PageserverStatusCmdArgs),
    Start(PageserverStartCmdArgs),
@@ -339,223 +319,202 @@ enum PageserverCmd {
    Restart(PageserverRestartCmdArgs),
 }

+/// Show status of a local pageserver.
 #[derive(clap::Args)]
-#[clap(about = "Show status of a local pageserver")]
 struct PageserverStatusCmdArgs {
-    #[clap(long = "id", help = "pageserver id")]
+    /// Pageserver ID.
+    #[clap(long = "id")]
    pageserver_id: Option<NodeId>,
 }

+/// Start local pageserver.
 #[derive(clap::Args)]
-#[clap(about = "Start local pageserver")]
 struct PageserverStartCmdArgs {
-    #[clap(long = "id", help = "pageserver id")]
+    /// Pageserver ID.
+    #[clap(long = "id")]
    pageserver_id: Option<NodeId>,
-
-    #[clap(short = 't', long, help = "timeout until we fail the command")]
+    /// Timeout until we fail the command.
+    #[clap(short = 't', long)]
    #[arg(default_value = "10s")]
    start_timeout: humantime::Duration,
 }

+/// Stop local pageserver.
 #[derive(clap::Args)]
-#[clap(about = "Stop local pageserver")]
 struct PageserverStopCmdArgs {
-    #[clap(long = "id", help = "pageserver id")]
+    /// Pageserver ID.
+    #[clap(long = "id")]
    pageserver_id: Option<NodeId>,
-
-    #[clap(
-        short = 'm',
-        help = "If 'immediate', don't flush repository data at shutdown"
-    )]
+    /// If 'immediate', don't flush repository data at shutdown
+    #[clap(short = 'm')]
    #[arg(value_enum, default_value = "fast")]
    stop_mode: StopMode,
 }

+/// Restart local pageserver.
 #[derive(clap::Args)]
-#[clap(about = "Restart local pageserver")]
 struct PageserverRestartCmdArgs {
-    #[clap(long = "id", help = "pageserver id")]
+    /// Pageserver ID.
+    #[clap(long = "id")]
    pageserver_id: Option<NodeId>,
-
-    #[clap(short = 't', long, help = "timeout until we fail the command")]
+    /// Timeout until we fail the command.
+    #[clap(short = 't', long)]
    #[arg(default_value = "10s")]
    start_timeout: humantime::Duration,
 }

+/// Manage storage controller.
 #[derive(clap::Subcommand)]
-#[clap(about = "Manage storage controller")]
 enum StorageControllerCmd {
    Start(StorageControllerStartCmdArgs),
    Stop(StorageControllerStopCmdArgs),
 }

+/// Start storage controller.
 #[derive(clap::Args)]
-#[clap(about = "Start storage controller")]
 struct StorageControllerStartCmdArgs {
-    #[clap(short = 't', long, help = "timeout until we fail the command")]
+    /// Timeout until we fail the command.
+    #[clap(short = 't', long)]
    #[arg(default_value = "10s")]
    start_timeout: humantime::Duration,
-
-    #[clap(
-        long,
-        help = "Identifier used to distinguish storage controller instances"
-    )]
+    /// Identifier used to distinguish storage controller instances.
+    #[clap(long)]
    #[arg(default_value_t = 1)]
    instance_id: u8,
-
-    #[clap(
-        long,
-        help = "Base port for the storage controller instance idenfified by instance-id (defaults to pageserver cplane api)"
-    )]
+    /// Base port for the storage controller instance identified by instance-id (defaults to
+    /// pageserver cplane api).
+    #[clap(long)]
    base_port: Option<u16>,

-    #[clap(
-        long,
-        help = "Whether the storage controller should handle pageserver-reported local disk loss events."
-    )]
+    /// Whether the storage controller should handle pageserver-reported local disk loss events.
+    #[clap(long)]
    handle_ps_local_disk_loss: Option<bool>,
 }

+/// Stop storage controller.
 #[derive(clap::Args)]
-#[clap(about = "Stop storage controller")]
 struct StorageControllerStopCmdArgs {
-    #[clap(
-        short = 'm',
-        help = "If 'immediate', don't flush repository data at shutdown"
-    )]
+    /// If 'immediate', don't flush repository data at shutdown
+    #[clap(short = 'm')]
    #[arg(value_enum, default_value = "fast")]
    stop_mode: StopMode,
-
-    #[clap(
-        long,
-        help = "Identifier used to distinguish storage controller instances"
-    )]
+    /// Identifier used to distinguish storage controller instances.
+    #[clap(long)]
    #[arg(default_value_t = 1)]
    instance_id: u8,
 }

+/// Manage storage broker.
 #[derive(clap::Subcommand)]
-#[clap(about = "Manage storage broker")]
 enum StorageBrokerCmd {
    Start(StorageBrokerStartCmdArgs),
    Stop(StorageBrokerStopCmdArgs),
 }

+/// Start broker.
 #[derive(clap::Args)]
-#[clap(about = "Start broker")]
 struct StorageBrokerStartCmdArgs {
-    #[clap(short = 't', long, help = "timeout until we fail the command")]
-    #[arg(default_value = "10s")]
+    /// Timeout until we fail the command.
+    #[clap(short = 't', long, default_value = "10s")]
    start_timeout: humantime::Duration,
 }

+/// Stop broker.
 #[derive(clap::Args)]
-#[clap(about = "stop broker")]
 struct StorageBrokerStopCmdArgs {
-    #[clap(
-        short = 'm',
-        help = "If 'immediate', don't flush repository data at shutdown"
-    )]
+    /// If 'immediate', don't flush repository data on shutdown.
+    #[clap(short = 'm')]
    #[arg(value_enum, default_value = "fast")]
    stop_mode: StopMode,
 }

+/// Manage safekeepers.
 #[derive(clap::Subcommand)]
-#[clap(about = "Manage safekeepers")]
 enum SafekeeperCmd {
    Start(SafekeeperStartCmdArgs),
    Stop(SafekeeperStopCmdArgs),
    Restart(SafekeeperRestartCmdArgs),
 }

+/// Manage object storage.
 #[derive(clap::Subcommand)]
-#[clap(about = "Manage object storage")]
 enum EndpointStorageCmd {
    Start(EndpointStorageStartCmd),
    Stop(EndpointStorageStopCmd),
 }

+/// Start object storage.
 #[derive(clap::Args)]
-#[clap(about = "Start object storage")]
 struct EndpointStorageStartCmd {
-    #[clap(short = 't', long, help = "timeout until we fail the command")]
+    /// Timeout until we fail the command.
+    #[clap(short = 't', long)]
    #[arg(default_value = "10s")]
    start_timeout: humantime::Duration,
 }

+/// Stop object storage.
 #[derive(clap::Args)]
-#[clap(about = "Stop object storage")]
 struct EndpointStorageStopCmd {
+    /// If 'immediate', don't flush repository data on shutdown.
+    #[clap(short = 'm')]
    #[arg(value_enum, default_value = "fast")]
-    #[clap(
-        short = 'm',
-        help = "If 'immediate', don't flush repository data at shutdown"
-    )]
    stop_mode: StopMode,
 }

+/// Start local safekeeper.
 #[derive(clap::Args)]
-#[clap(about = "Start local safekeeper")]
 struct SafekeeperStartCmdArgs {
-    #[clap(help = "safekeeper id")]
+    /// Safekeeper ID.
    #[arg(default_value_t = NodeId(1))]
    id: NodeId,

-    #[clap(
-        short = 'e',
-        long = "safekeeper-extra-opt",
-        help = "Additional safekeeper invocation options, e.g. -e=--http-auth-public-key-path=foo"
-    )]
+    /// Additional safekeeper invocation options, e.g. -e=--http-auth-public-key-path=foo.
+    #[clap(short = 'e', long = "safekeeper-extra-opt")]
    extra_opt: Vec<String>,

-    #[clap(short = 't', long, help = "timeout until we fail the command")]
+    /// Timeout until we fail the command.
+    #[clap(short = 't', long)]
    #[arg(default_value = "10s")]
    start_timeout: humantime::Duration,
 }

+/// Stop local safekeeper.
 #[derive(clap::Args)]
-#[clap(about = "Stop local safekeeper")]
 struct SafekeeperStopCmdArgs {
-    #[clap(help = "safekeeper id")]
+    /// Safekeeper ID.
    #[arg(default_value_t = NodeId(1))]
    id: NodeId,

+    /// If 'immediate', don't flush repository data on shutdown.
    #[arg(value_enum, default_value = "fast")]
-    #[clap(
-        short = 'm',
-        help = "If 'immediate', don't flush repository data at shutdown"
-    )]
+    #[clap(short = 'm')]
    stop_mode: StopMode,
 }

+/// Restart local safekeeper.
 #[derive(clap::Args)]
-#[clap(about = "Restart local safekeeper")]
 struct SafekeeperRestartCmdArgs {
-    #[clap(help = "safekeeper id")]
+    /// Safekeeper ID.
    #[arg(default_value_t = NodeId(1))]
    id: NodeId,

+    /// If 'immediate', don't flush repository data on shutdown.
    #[arg(value_enum, default_value = "fast")]
-    #[clap(
-        short = 'm',
-        help = "If 'immediate', don't flush repository data at shutdown"
-    )]
+    #[clap(short = 'm')]
    stop_mode: StopMode,

-    #[clap(
-        short = 'e',
-        long = "safekeeper-extra-opt",
-        help = "Additional safekeeper invocation options, e.g. -e=--http-auth-public-key-path=foo"
-    )]
+    /// Additional safekeeper invocation options, e.g. -e=--http-auth-public-key-path=foo.
+    #[clap(short = 'e', long = "safekeeper-extra-opt")]
    extra_opt: Vec<String>,

-    #[clap(short = 't', long, help = "timeout until we fail the command")]
+    /// Timeout until we fail the command.
+    #[clap(short = 't', long)]
    #[arg(default_value = "10s")]
    start_timeout: humantime::Duration,
 }

+/// Manage Postgres instances.
 #[derive(clap::Subcommand)]
-#[clap(about = "Manage Postgres instances")]
 enum EndpointCmd {
    List(EndpointListCmdArgs),
    Create(EndpointCreateCmdArgs),
@@ -567,33 +526,27 @@ enum EndpointCmd {
    GenerateJwt(EndpointGenerateJwtCmdArgs),
 }

+/// List endpoints.
 #[derive(clap::Args)]
-#[clap(about = "List endpoints")]
 struct EndpointListCmdArgs {
-    #[clap(
-        long = "tenant-id",
-        help = "Tenant id. Represented as a hexadecimal string 32 symbols length"
-    )]
+    /// Tenant ID, as a 32-byte hexadecimal string.
+    #[clap(long = "tenant-id")]
    tenant_shard_id: Option<TenantShardId>,
 }

+/// Create a compute endpoint.
 #[derive(clap::Args)]
-#[clap(about = "Create a compute endpoint")]
 struct EndpointCreateCmdArgs {
-    #[clap(
-        long = "tenant-id",
-        help = "Tenant id. Represented as a hexadecimal string 32 symbols length"
-    )]
+    /// Tenant ID, as a 32-byte hexadecimal string.
+    #[clap(long = "tenant-id")]
    tenant_id: Option<TenantId>,
-
-    #[clap(help = "Postgres endpoint id")]
+    /// Postgres endpoint ID.
    endpoint_id: Option<String>,
-    #[clap(long, help = "Name of the branch the endpoint will run on")]
+    /// Name of the branch the endpoint will run on.
+    #[clap(long)]
    branch_name: Option<String>,
-    #[clap(
-        long,
-        help = "Specify Lsn on the timeline to start from. By default, end of the timeline would be used"
-    )]
+    /// Specify LSN on the timeline to start from. By default, end of the timeline would be used.
+    #[clap(long)]
    lsn: Option<Lsn>,
    #[clap(long)]
    pg_port: Option<u16>,
@@ -604,16 +557,13 @@ struct EndpointCreateCmdArgs {
    #[clap(long = "pageserver-id")]
    endpoint_pageserver_id: Option<NodeId>,

-    #[clap(
-        long,
-        help = "Don't do basebackup, create endpoint directory with only config files",
-        action = clap::ArgAction::Set,
-        default_value_t = false
-    )]
+    /// Don't do basebackup, create endpoint directory with only config files.
+    #[clap(long, action = clap::ArgAction::Set, default_value_t = false)]
    config_only: bool,

+    /// Postgres version.
    #[arg(default_value = DEFAULT_PG_VERSION_NUM)]
-    #[clap(long, help = "Postgres version")]
+    #[clap(long)]
    pg_version: PgMajorVersion,

    /// Use gRPC to communicate with Pageservers, by generating grpc:// connstrings.
@@ -624,170 +574,140 @@ struct EndpointCreateCmdArgs {
    #[clap(long)]
    grpc: bool,

-    #[clap(
-        long,
-        help = "If set, the node will be a hot replica on the specified timeline",
-        action = clap::ArgAction::Set,
-        default_value_t = false
-    )]
+    /// If set, the node will be a hot replica on the specified timeline.
+    #[clap(long, action = clap::ArgAction::Set, default_value_t = false)]
    hot_standby: bool,
-
-    #[clap(long, help = "If set, will set up the catalog for neon_superuser")]
+    /// If set, will set up the catalog for neon_superuser.
+    #[clap(long)]
    update_catalog: bool,
-
-    #[clap(
-        long,
-        help = "Allow multiple primary endpoints running on the same branch. Shouldn't be used normally, but useful for tests."
-    )]
+    /// Allow multiple primary endpoints running on the same branch. Shouldn't be used normally, but
+    /// useful for tests.
+    #[clap(long)]
    allow_multiple: bool,

-    /// Only allow changing it on creation
-    #[clap(long, help = "Name of the privileged role for the endpoint")]
+    /// Name of the privileged role for the endpoint.
+    // Only allow changing it on creation.
+    #[clap(long)]
    privileged_role_name: Option<String>,
 }

+/// Start Postgres. If the endpoint doesn't exist yet, it is created.
 #[derive(clap::Args)]
-#[clap(about = "Start postgres. If the endpoint doesn't exist yet, it is created.")]
 struct EndpointStartCmdArgs {
-    #[clap(help = "Postgres endpoint id")]
+    /// Postgres endpoint ID.
    endpoint_id: String,
+    /// Pageserver ID.
    #[clap(long = "pageserver-id")]
    endpoint_pageserver_id: Option<NodeId>,
-
-    #[clap(
-        long,
-        help = "Safekeepers membership generation to prefix neon.safekeepers with. Normally neon_local sets it on its own, but this option allows to override. Non zero value forces endpoint to use membership configurations."
-    )]
+    /// Safekeepers membership generation to prefix neon.safekeepers with.
+    #[clap(long)]
    safekeepers_generation: Option<u32>,
-    #[clap(
-        long,
-        help = "List of safekeepers endpoint will talk to. Normally neon_local chooses them on its own, but this option allows to override."
-    )]
+    /// List of safekeepers endpoint will talk to.
+    #[clap(long)]
    safekeepers: Option<String>,
-
-    #[clap(
-        long,
-        help = "Configure the remote extensions storage proxy gateway URL to request for extensions.",
-        alias = "remote-ext-config"
-    )]
+    /// Configure the remote extensions storage proxy gateway URL to request for extensions.
+    #[clap(long, alias = "remote-ext-config")]
    remote_ext_base_url: Option<String>,
-
-    #[clap(
-        long,
-        help = "If set, will create test user `user` and `neondb` database. Requires `update-catalog = true`"
-    )]
+    /// If set, will create test user `user` and `neondb` database. Requires `update-catalog = true`
+    #[clap(long)]
    create_test_user: bool,
-
-    #[clap(
-        long,
-        help = "Allow multiple primary endpoints running on the same branch. Shouldn't be used normally, but useful for tests."
-    )]
+    /// Allow multiple primary endpoints running on the same branch. Shouldn't be used normally, but
+    /// useful for tests.
+    #[clap(long)]
    allow_multiple: bool,
-
-    #[clap(short = 't', long, value_parser= humantime::parse_duration, help = "timeout until we fail the command")]
+    /// Timeout until we fail the command.
+    #[clap(short = 't', long, value_parser= humantime::parse_duration)]
    #[arg(default_value = "90s")]
    start_timeout: Duration,

-    #[clap(
-        long,
-        help = "Download LFC cache from endpoint storage on endpoint startup",
-        default_value = "false"
-    )]
+    /// Download LFC cache from endpoint storage on endpoint startup
+    #[clap(long, default_value = "false")]
    autoprewarm: bool,

-    #[clap(long, help = "Upload LFC cache to endpoint storage periodically")]
+    /// Upload LFC cache to endpoint storage periodically
+    #[clap(long)]
    offload_lfc_interval_seconds: Option<std::num::NonZeroU64>,

-    #[clap(
-        long,
-        help = "Run in development mode, skipping VM-specific operations like process termination",
-        action = clap::ArgAction::SetTrue
-    )]
+    /// Run in development mode, skipping VM-specific operations like process termination
+    #[clap(long, action = clap::ArgAction::SetTrue)]
    dev: bool,
 }

+/// Reconfigure an endpoint.
 #[derive(clap::Args)]
-#[clap(about = "Reconfigure an endpoint")]
 struct EndpointReconfigureCmdArgs {
-    #[clap(
-        long = "tenant-id",
-        help = "Tenant id. Represented as a hexadecimal string 32 symbols length"
-    )]
+    /// Tenant id. Represented as a hexadecimal string 32 symbols length
+    #[clap(long = "tenant-id")]
    tenant_id: Option<TenantId>,
-
-    #[clap(help = "Postgres endpoint id")]
+    /// Postgres endpoint ID.
    endpoint_id: String,
+    /// Pageserver ID.
    #[clap(long = "pageserver-id")]
    endpoint_pageserver_id: Option<NodeId>,
-
    #[clap(long)]
    safekeepers: Option<String>,
 }

+/// Refresh the endpoint's configuration by forcing it reload it's spec
 #[derive(clap::Args)]
-#[clap(about = "Refresh the endpoint's configuration by forcing it reload it's spec")]
 struct EndpointRefreshConfigurationArgs {
-    #[clap(help = "Postgres endpoint id")]
+    /// Postgres endpoint id
    endpoint_id: String,
 }

+/// Stop an endpoint.
 #[derive(clap::Args)]
-#[clap(about = "Stop an endpoint")]
 struct EndpointStopCmdArgs {
-    #[clap(help = "Postgres endpoint id")]
+    /// Postgres endpoint ID.
    endpoint_id: String,
-
-    #[clap(
-        long,
-        help = "Also delete data directory (now optional, should be default in future)"
-    )]
+    /// Also delete data directory (now optional, should be default in future).
+    #[clap(long)]
    destroy: bool,

-    #[clap(long, help = "Postgres shutdown mode")]
+    /// Postgres shutdown mode, passed to `pg_ctl -m <mode>`.
+    #[clap(long)]
    #[clap(default_value = "fast")]
    mode: EndpointTerminateMode,
 }

+/// Update the pageservers in the spec file of the compute endpoint
 #[derive(clap::Args)]
-#[clap(about = "Update the pageservers in the spec file of the compute endpoint")]
 struct EndpointUpdatePageserversCmdArgs {
-    #[clap(help = "Postgres endpoint id")]
+    /// Postgres endpoint id
    endpoint_id: String,

-    #[clap(short = 'p', long, help = "Specified pageserver id")]
+    /// Specified pageserver id
+    #[clap(short = 'p', long)]
    pageserver_id: Option<NodeId>,
 }

+/// Generate a JWT for an endpoint.
 #[derive(clap::Args)]
-#[clap(about = "Generate a JWT for an endpoint")]
 struct EndpointGenerateJwtCmdArgs {
-    #[clap(help = "Postgres endpoint id")]
+    /// Postgres endpoint ID.
    endpoint_id: String,
-
-    #[clap(short = 's', long, help = "Scope to generate the JWT with", value_parser = ComputeClaimsScope::from_str)]
+    /// Scope to generate the JWT with.
+    #[clap(short = 's', long, value_parser = ComputeClaimsScope::from_str)]
    scope: Option<ComputeClaimsScope>,
 }

+/// Manage neon_local branch name mappings.
 #[derive(clap::Subcommand)]
-#[clap(about = "Manage neon_local branch name mappings")]
 enum MappingsCmd {
    Map(MappingsMapCmdArgs),
 }

+/// Create new mapping which cannot exist already.
 #[derive(clap::Args)]
-#[clap(about = "Create new mapping which cannot exist already")]
 struct MappingsMapCmdArgs {
-    #[clap(
-        long,
-        help = "Tenant id. Represented as a hexadecimal string 32 symbols length"
-    )]
+    /// Tenant ID, as a 32-byte hexadecimal string.
+    #[clap(long)]
    tenant_id: TenantId,
-    #[clap(
-        long,
-        help = "Timeline id. Represented as a hexadecimal string 32 symbols length"
-    )]
+    /// Timeline ID, as a 32-byte hexadecimal string.
+    #[clap(long)]
    timeline_id: TimelineId,
-    #[clap(long, help = "Branch name to give to the timeline")]
+    /// Branch name to give to the timeline.
+    #[clap(long)]
    branch_name: String,
 }

--- a/control_plane/storcon_cli/src/main.rs
+++ b/control_plane/storcon_cli/src/main.rs
@@ -303,6 +303,13 @@ enum Command {
        #[arg(long, required = true, value_delimiter = ',')]
        new_sk_set: Vec<NodeId>,
    },
+    /// Abort ongoing safekeeper migration.
+    TimelineSafekeeperMigrateAbort {
+        #[arg(long)]
+        tenant_id: TenantId,
+        #[arg(long)]
+        timeline_id: TimelineId,
+    },
 }

 #[derive(Parser)]
@@ -1396,6 +1403,17 @@ async fn main() -> anyhow::Result<()> {
                )
                .await?;
        }
+        Command::TimelineSafekeeperMigrateAbort {
+            tenant_id,
+            timeline_id,
+        } => {
+            let path =
+                format!("v1/tenant/{tenant_id}/timeline/{timeline_id}/safekeeper_migrate_abort");
+
+            storcon_client
+                .dispatch::<(), ()>(Method::POST, path, None)
+                .await?;
+        }
    }

    Ok(())
--- a/docker-compose/compute_wrapper/var/db/postgres/configs/config.json
+++ b/docker-compose/compute_wrapper/var/db/postgres/configs/config.json
@@ -120,11 +120,6 @@
                    "value": "host=pageserver port=6400",
                    "vartype": "string"
                },
-                {
-                    "name": "neon.pageserver_grpc_urls",
-                    "value": "grpc://pageserver:6401/",
-                    "vartype": "string"
-                },
                {
                    "name": "max_replication_write_lag",
                    "value": "500MB",
--- a/docker-compose/pageserver_config/pageserver.toml
+++ b/docker-compose/pageserver_config/pageserver.toml
@@ -1,7 +1,6 @@
 broker_endpoint='http://storage_broker:50051'
 pg_distrib_dir='/usr/local/'
 listen_pg_addr='0.0.0.0:6400'
-listen_grpc_addr='0.0.0.0:6401'
 listen_http_addr='0.0.0.0:9898'
 remote_storage={ endpoint='http://minio:9000', bucket_name='neon', bucket_region='eu-north-1', prefix_in_bucket='/pageserver' }
 control_plane_api='http://0.0.0.0:6666' # No storage controller in docker compose, specify a junk address
--- a/docs/rfcs/2025-07-07-node-deletion-api-improvement.md
+++ b/docs/rfcs/2025-07-07-node-deletion-api-improvement.md
@@ -0,0 +1,246 @@
+# Node deletion API improvement
+
+Created on 2025-07-07
+Implemented on _TBD_
+
+## Summary
+
+This RFC describes improvements to the storage controller API for gracefully deleting pageserver
+nodes.
+
+## Motivation
+
+The basic node deletion API introduced in [#8226](https://github.com/neondatabase/neon/issues/8333)
+has several limitations:
+
+- Deleted nodes can re-add themselves if they restart (e.g., a flaky node that keeps restarting and
+we cannot reach via SSH to stop the pageserver). This issue has been resolved by tombstone
+mechanism in [#12036](https://github.com/neondatabase/neon/issues/12036)
+- Process of node deletion is not graceful, i.e. it just imitates a node failure
+
+In this context, "graceful" node deletion means that users do not experience any disruption or
+negative effects, provided the system remains in a healthy state (i.e., the remaining pageservers
+can handle the workload and all requirements are met). To achieve this, the system must perform
+live migration of all tenant shards from the node being deleted while the node is still running
+and continue processing all incoming requests. The node is removed only after all tenant shards
+have been safely migrated.
+
+Although live migrations can be achieved with the drain functionality, it leads to incorrect shard
+placement, such as not matching availability zones. This results in unnecessary work to optimize
+the placement that was just recently performed.
+
+If we delete a node before its tenant shards are fully moved, the new node won't have all the
+needed data (e.g. heatmaps) ready. This means user requests to the new node will be much slower at
+first. If there are many tenant shards, this slowdown affects a huge amount of users.
+
+Graceful node deletion is more complicated and can introduce new issues. It takes longer because
+live migration of each tenant shard can last several minutes. Using non-blocking accessors may
+also cause deletion to wait if other processes are holding inner state lock. It also gets trickier
+because we need to handle other requests, like drain and fill, at the same time.
+
+## Impacted components (e.g. pageserver, safekeeper, console, etc)
+
+- storage controller
+- pageserver (indirectly)
+
+## Proposed implementation
+
+### Tombstones
+
+To resolve the problem of deleted nodes re-adding themselves, a tombstone mechanism was introduced
+as part of the node stored information. Each node has a separate `NodeLifecycle` field with two
+possible states: `Active` and `Deleted`. When node deletion completes, the database row is not
+deleted but instead has its `NodeLifecycle` column switched to `Deleted`. Nodes with `Deleted`
+lifecycle are treated as if the row is absent for most handlers, with several exceptions: reattach
+and register functionality must be aware of tombstones. Additionally, new debug handlers are
+available for listing and deleting tombstones via the `/debug/v1/tombstone` path.
+
+### Gracefulness
+
+The problem of making node deletion graceful is complex and involves several challenges:
+
+- **Cancellable**: The operation must be cancellable to allow administrators to abort the process
+if needed, e.g. if run by mistake.
+- **Non-blocking**: We don't want to block deployment operations like draining/filling on the node
+deletion process. We need clear policies for handling concurrent operations: what happens when a
+drain/fill request arrives while deletion is in progress, and what happens when a delete request
+arrives while drain/fill is in progress.
+- **Persistent**: If the storage controller restarts during this long-running operation, we must
+preserve progress and automatically resume the deletion process after the storage controller
+restarts.
+- **Migrated correctly**: We cannot simply use the existing drain mechanism for nodes scheduled
+for deletion, as this would move shards to irrelevant locations. The drain process expects the
+node to return, so it only moves shards to backup locations, not to their preferred AZs. It also
+leaves secondary locations unmoved. This could result in unnecessary load on the storage
+controller and inefficient resource utilization.
+- **Force option**: Administrators need the ability to force immediate, non-graceful deletion when
+time constraints or emergency situations require it, bypassing the normal graceful migration
+process.
+
+See below for a detailed breakdown of the proposed changes and mechanisms.
+
+#### Node lifecycle
+
+New `NodeLifecycle` enum and a matching database field with these values:
+- `Active`: The normal state. All operations are allowed.
+- `ScheduledForDeletion`: The node is marked to be deleted soon. Deletion may be in progress or
+will happen later, but the node will eventually be removed. All operations are allowed.
+- `Deleted`: The node is fully deleted. No operations are allowed, and the node cannot be brought
+back. The only action left is to remove its record from the database. Any attempt to register a
+node in this state will fail.
+
+This state persists across storage controller restarts.
+
+**State transition**
+```
+        +--------------------+
+    +---|       Active       |<---------------------+
+    |   +--------------------+                      |
+    |                     ^                         |
+    | start_node_delete   | cancel_node_delete      |
+    v                     |                         |
+  +----------------------------------+              |
+  |       ScheduledForDeletion       |              |
+  +----------------------------------+              |
+       |                                            |
+       |                              node_register |
+       |                                            |
+       | delete_node (at the finish)                |
+       |                                            |
+       v                                            |
+  +---------+         tombstone_delete        +----------+
+  | Deleted |-------------------------------->|  no row  |
+  +---------+                                 +----------+
+```
+
+#### NodeSchedulingPolicy::Deleting
+
+A `Deleting` variant to the `NodeSchedulingPolicy` enum. This means the deletion function is
+running for the node right now. Only one node can have the `Deleting` policy at a time.
+
+The `NodeSchedulingPolicy::Deleting` state is persisted in the database. However, after a storage
+controller restart, any node previously marked as `Deleting` will have its scheduling policy reset
+to `Pause`. The policy will only transition back to `Deleting` when the deletion operation is
+actively started again, as triggered by the node's `NodeLifecycle::ScheduledForDeletion` state.
+
+`NodeSchedulingPolicy` transition details:
+1. When `node_delete` begins, set the policy to `NodeSchedulingPolicy::Deleting`.
+2. If `node_delete` is cancelled (for example, due to a concurrent drain operation), revert the
+policy to its previous value. The policy is persisted in storcon DB.
+3. After `node_delete` completes, the final value of the scheduling policy is irrelevant, since
+`NodeLifecycle::Deleted` prevents any further access to this field.
+
+The deletion process cannot be initiated for nodes currently undergoing deployment-related
+operations (`Draining`, `Filling`, or `PauseForRestart` policies). Deletion will only be triggered
+once the node transitions to either the `Active` or `Pause` state.
+
+#### OperationTracker
+
+A replacement for `Option<OperationHandler> ongoing_operation`, the `OperationTracker` is a
+dedicated service state object responsible for managing all long-running node operations (drain,
+fill, delete) with robust concurrency control.
+
+Key responsibilities:
+- Orchestrates the execution of operations
+- Supports cancellation of currently running operations
+- Enforces operation constraints, e.g. allowing only single drain/fill operation at a time
+- Persists deletion state, enabling recovery of pending deletions across restarts
+- Ensures thread safety across concurrent requests
+
+#### Attached tenant shard processing
+
+When deleting a node, handle each attached tenant shard as follows:
+
+1. Pick the best node to become the new attached (the candidate).
+2. If the candidate already has this shard as a secondary:
+    - Create a new secondary for the shard on another suitable node.
+   Otherwise:
+    - Create a secondary for the shard on the candidate node.
+3. Wait until all secondaries are ready and pre-warmed.
+4. Promote the candidate's secondary to attached.
+5. Remove the secondary from the node being deleted.
+
+This process safely moves all attached shards before deleting the node.
+
+#### Secondary tenant shard processing
+
+When deleting a node, handle each secondary tenant shard as follows:
+
+1. Choose the best node to become the new secondary.
+2. Create a secondary for the shard on that node.
+3. Wait until the new secondary is ready.
+4. Remove the secondary from the node being deleted.
+
+This ensures all secondary shards are safely moved before deleting the node.
+
+### Reliability, failure modes and corner cases
+
+In case of a storage controller failure and following restart, the system behavior depends on the
+`NodeLifecycle` state:
+
+- If `NodeLifecycle` is `Active`: No action is taken for this node.
+- If `NodeLifecycle` is `Deleted`: The node will not be re-added.
+- If `NodeLifecycle` is `ScheduledForDeletion`: A deletion background task will be launched for
+this node.
+
+In case of a pageserver node failure during deletion, the behavior depends on the `force` flag:
+- If `force` is set: The node deletion will proceed regardless of the node's availability.
+- If `force` is not set: The deletion will be retried a limited number of times. If the node
+remains unavailable, the deletion process will pause and automatically resume when the node
+becomes healthy again.
+
+### Operations concurrency
+
+The following sections describe the behavior when different types of requests arrive at the storage
+controller and how they interact with ongoing operations.
+
+#### Delete request
+
+Handler: `PUT /control/v1/node/:node_id/delete`
+
+1. If node lifecycle is `NodeLifecycle::ScheduledForDeletion`:
+    - Return `200 OK`: there is already an ongoing deletion request for this node
+2. Update & persist lifecycle to `NodeLifecycle::ScheduledForDeletion`
+3. Persist current scheduling policy
+4. If there is no active operation (drain/fill/delete):
+    - Run deletion process for this node
+
+#### Cancel delete request
+
+Handler: `DELETE /control/v1/node/:node_id/delete`
+
+1. If node lifecycle is not `NodeLifecycle::ScheduledForDeletion`:
+    - Return `404 Not Found`: there is no current deletion request for this node
+2. If the active operation is deleting this node, cancel it
+3. Update & persist lifecycle to `NodeLifecycle::Active`
+4. Restore the last scheduling policy from persistence
+
+#### Drain/fill request
+
+1. If there are already ongoing drain/fill processes:
+    - Return `409 Conflict`: queueing of drain/fill processes is not supported
+2. If there is an ongoing delete process:
+    - Cancel it and wait until it is cancelled
+3. Run the drain/fill process
+4. After the drain/fill process is cancelled or finished:
+    - Try to find another candidate to delete and run the deletion process for that node
+
+#### Drain/fill cancel request
+
+1. If the active operation is not the related process:
+    - Return `400 Bad Request`: cancellation request is incorrect, operations are not the same
+2. Cancel the active operation
+3. Try to find another candidate to delete and run the deletion process for that node
+
+## Definition of Done
+
+- [x] Fix flaky node scenario and introduce related debug handlers
+- [ ] Node deletion intent is persistent - a node will be eventually deleted after a deletion
+request regardless of draining/filling requests and restarts
+- [ ] Node deletion can be graceful - deletion completes only after moving all tenant shards to
+recommended locations
+- [ ] Deploying does not break due to long deletions - drain/fill operations override deletion
+process and deletion resumes after drain/fill completes
+- [ ] `force` flag is implemented and provides fast, failure-tolerant node removal (e.g., when a
+pageserver node does not respond)
+- [ ] Legacy delete handler code is removed from storage_controller, test_runner, and storcon_cli
--- a/libs/compute_api/src/responses.rs
+++ b/libs/compute_api/src/responses.rs
@@ -1,10 +1,9 @@
 //! Structs representing the JSON formats used in the compute_ctl's HTTP API.

-use std::fmt::Display;
-
 use chrono::{DateTime, Utc};
 use jsonwebtoken::jwk::JwkSet;
 use serde::{Deserialize, Serialize, Serializer};
+use std::fmt::Display;

 use crate::privilege::Privilege;
 use crate::spec::{ComputeSpec, Database, ExtVersion, PgIdent, Role};
@@ -49,7 +48,7 @@ pub struct ExtensionInstallResponse {
 /// Status of the LFC prewarm process. The same state machine is reused for
 /// both autoprewarm (prewarm after compute/Postgres start using the previously
 /// stored LFC state) and explicit prewarming via API.
-#[derive(Serialize, Default, Debug, Clone, PartialEq)]
+#[derive(Serialize, Default, Debug, Clone)]
 #[serde(tag = "status", rename_all = "snake_case")]
 pub enum LfcPrewarmState {
    /// Default value when compute boots up.
@@ -59,7 +58,14 @@ pub enum LfcPrewarmState {
    Prewarming,
    /// We found requested LFC state in the endpoint storage and
    /// completed prewarming successfully.
-    Completed,
+    Completed {
+        total: i32,
+        prewarmed: i32,
+        skipped: i32,
+        state_download_time_ms: u32,
+        uncompress_time_ms: u32,
+        prewarm_time_ms: u32,
+    },
    /// Unexpected error happened during prewarming. Note, `Not Found 404`
    /// response from the endpoint storage is explicitly excluded here
    /// because it can normally happen on the first compute start,
@@ -68,11 +74,15 @@ pub enum LfcPrewarmState {
    /// We tried to fetch the corresponding LFC state from the endpoint storage,
    /// but received `Not Found 404`. This should normally happen only during the
    /// first endpoint start after creation with `autoprewarm: true`.
+    /// This may also happen if LFC is turned off or not initialized
    ///
    /// During the orchestrated prewarm via API, when a caller explicitly
    /// provides the LFC state key to prewarm from, it's the caller responsibility
    /// to handle this status as an error state in this case.
    Skipped,
+    /// LFC prewarm was cancelled. Some pages in LFC cache may be prewarmed if query
+    /// has started working before cancellation
+    Cancelled,
 }

 impl Display for LfcPrewarmState {
@@ -80,32 +90,44 @@ impl Display for LfcPrewarmState {
        match self {
            LfcPrewarmState::NotPrewarmed => f.write_str("NotPrewarmed"),
            LfcPrewarmState::Prewarming => f.write_str("Prewarming"),
-            LfcPrewarmState::Completed => f.write_str("Completed"),
+            LfcPrewarmState::Completed { .. } => f.write_str("Completed"),
            LfcPrewarmState::Skipped => f.write_str("Skipped"),
            LfcPrewarmState::Failed { error } => write!(f, "Error({error})"),
+            LfcPrewarmState::Cancelled => f.write_str("Cancelled"),
        }
    }
 }

-#[derive(Serialize, Default, Debug, Clone, PartialEq)]
+#[derive(Serialize, Default, Debug, Clone)]
 #[serde(tag = "status", rename_all = "snake_case")]
 pub enum LfcOffloadState {
    #[default]
    NotOffloaded,
    Offloading,
-    Completed,
+    Completed {
+        state_query_time_ms: u32,
+        compress_time_ms: u32,
+        state_upload_time_ms: u32,
+    },
    Failed {
        error: String,
    },
+    /// LFC state was empty so it wasn't offloaded
+    Skipped,
 }

-#[derive(Serialize, Debug, Clone, PartialEq)]
+#[derive(Serialize, Debug, Clone)]
 #[serde(tag = "status", rename_all = "snake_case")]
-/// Response of /promote
 pub enum PromoteState {
    NotPromoted,
-    Completed,
-    Failed { error: String },
+    Completed {
+        lsn_wait_time_ms: u32,
+        pg_promote_time_ms: u32,
+        reconfigure_time_ms: u32,
+    },
+    Failed {
+        error: String,
+    },
 }

 #[derive(Deserialize, Default, Debug)]
--- a/libs/neon-shmem/Cargo.toml
+++ b/libs/neon-shmem/Cargo.toml
@@ -6,26 +6,15 @@ license.workspace = true

 [dependencies]
 thiserror.workspace = true
-nix.workspace = true
+nix.workspace=true
 workspace_hack = { version = "0.1", path = "../../workspace_hack" }
 libc.workspace = true
 lock_api.workspace = true
 rustc-hash.workspace = true

-[dev-dependencies]
-criterion = { workspace = true, features = ["html_reports"] }
-rand = "0.9"
-rand_distr = "0.5.1"
-xxhash-rust = { version = "0.8.15", features = ["xxh3"] }
-ahash.workspace = true
-twox-hash = { version = "2.1.1" }
-seahash = "4.1.0"
-hashbrown = { git = "https://github.com/quantumish/hashbrown.git", rev = "6610e6d" }
-
-
 [target.'cfg(target_os = "macos")'.dependencies]
 tempfile = "3.14.0"

-[[bench]]
-name = "hmap_resize"
-harness = false
+[dev-dependencies]
+rand.workspace = true
+rand_distr = "0.5.1"
--- a/libs/neon-shmem/benches/hmap_resize.rs
+++ b/libs/neon-shmem/benches/hmap_resize.rs
@@ -1,330 +0,0 @@
-use criterion::{BatchSize, BenchmarkId, Criterion, criterion_group, criterion_main};
-use neon_shmem::hash::HashMapAccess;
-use neon_shmem::hash::HashMapInit;
-use neon_shmem::hash::entry::Entry;
-use rand::distr::{Distribution, StandardUniform};
-use rand::prelude::*;
-use std::default::Default;
-use std::hash::BuildHasher;
-
-// Taken from bindings to C code
-
-#[derive(Clone, Debug, Hash, Eq, PartialEq)]
-#[repr(C)]
-pub struct FileCacheKey {
-    pub _spc_id: u32,
-    pub _db_id: u32,
-    pub _rel_number: u32,
-    pub _fork_num: u32,
-    pub _block_num: u32,
-}
-
-impl Distribution<FileCacheKey> for StandardUniform {
-    // questionable, but doesn't need to be good randomness
-    fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> FileCacheKey {
-        FileCacheKey {
-            _spc_id: rng.random(),
-            _db_id: rng.random(),
-            _rel_number: rng.random(),
-            _fork_num: rng.random(),
-            _block_num: rng.random(),
-        }
-    }
-}
-
-#[derive(Clone, Debug)]
-#[repr(C)]
-pub struct FileCacheEntry {
-    pub _offset: u32,
-    pub _access_count: u32,
-    pub _prev: *mut FileCacheEntry,
-    pub _next: *mut FileCacheEntry,
-    pub _state: [u32; 8],
-}
-
-impl FileCacheEntry {
-    fn dummy() -> Self {
-        Self {
-            _offset: 0,
-            _access_count: 0,
-            _prev: std::ptr::null_mut(),
-            _next: std::ptr::null_mut(),
-            _state: [0; 8],
-        }
-    }
-}
-
-// Utilities for applying operations.
-
-#[derive(Clone, Debug)]
-struct TestOp<K, V>(K, Option<V>);
-
-fn apply_op<K: Clone + std::hash::Hash + Eq, V, S: std::hash::BuildHasher>(
-    op: TestOp<K, V>,
-    map: &mut HashMapAccess<K, V, S>,
-) {
-    let entry = map.entry(op.0);
-
-    match op.1 {
-        Some(new) => match entry {
-            Entry::Occupied(mut e) => Some(e.insert(new)),
-            Entry::Vacant(e) => {
-                _ = e.insert(new).unwrap();
-                None
-            }
-        },
-        None => match entry {
-            Entry::Occupied(e) => Some(e.remove()),
-            Entry::Vacant(_) => None,
-        },
-    };
-}
-
-// Hash utilities
-
-struct SeaRandomState {
-    k1: u64,
-    k2: u64,
-    k3: u64,
-    k4: u64,
-}
-
-impl std::hash::BuildHasher for SeaRandomState {
-    type Hasher = seahash::SeaHasher;
-
-    fn build_hasher(&self) -> Self::Hasher {
-        seahash::SeaHasher::with_seeds(self.k1, self.k2, self.k3, self.k4)
-    }
-}
-
-impl SeaRandomState {
-    fn new() -> Self {
-        let mut rng = rand::rng();
-        Self {
-            k1: rng.random(),
-            k2: rng.random(),
-            k3: rng.random(),
-            k4: rng.random(),
-        }
-    }
-}
-
-fn small_benchs(c: &mut Criterion) {
-    let mut group = c.benchmark_group("Small maps");
-    group.sample_size(10);
-
-    group.bench_function("small_rehash", |b| {
-        let ideal_filled = 4_000_000;
-        let size = 5_000_000;
-        let mut writer = HashMapInit::new_resizeable(size, size * 2).attach_writer();
-        let mut rng = rand::rng();
-        while writer.get_num_buckets_in_use() < ideal_filled as usize {
-            let key: FileCacheKey = rng.random();
-            let val = FileCacheEntry::dummy();
-            apply_op(TestOp(key, Some(val)), &mut writer);
-        }
-        b.iter(|| writer.shuffle());
-    });
-
-    group.bench_function("small_rehash_xxhash", |b| {
-        let ideal_filled = 4_000_000;
-        let size = 5_000_000;
-        let mut writer = HashMapInit::new_resizeable(size, size * 2)
-            .with_hasher(twox_hash::xxhash64::RandomState::default())
-            .attach_writer();
-        let mut rng = rand::rng();
-        while writer.get_num_buckets_in_use() < ideal_filled as usize {
-            let key: FileCacheKey = rng.random();
-            let val = FileCacheEntry::dummy();
-            apply_op(TestOp(key, Some(val)), &mut writer);
-        }
-        b.iter(|| writer.shuffle());
-    });
-
-    group.bench_function("small_rehash_ahash", |b| {
-        let ideal_filled = 4_000_000;
-        let size = 5_000_000;
-        let mut writer = HashMapInit::new_resizeable(size, size * 2)
-            .with_hasher(ahash::RandomState::default())
-            .attach_writer();
-        let mut rng = rand::rng();
-        while writer.get_num_buckets_in_use() < ideal_filled as usize {
-            let key: FileCacheKey = rng.random();
-            let val = FileCacheEntry::dummy();
-            apply_op(TestOp(key, Some(val)), &mut writer);
-        }
-        b.iter(|| writer.shuffle());
-    });
-
-    group.bench_function("small_rehash_seahash", |b| {
-        let ideal_filled = 4_000_000;
-        let size = 5_000_000;
-        let mut writer = HashMapInit::new_resizeable(size, size * 2)
-            .with_hasher(SeaRandomState::new())
-            .attach_writer();
-        let mut rng = rand::rng();
-        while writer.get_num_buckets_in_use() < ideal_filled as usize {
-            let key: FileCacheKey = rng.random();
-            let val = FileCacheEntry::dummy();
-            apply_op(TestOp(key, Some(val)), &mut writer);
-        }
-        b.iter(|| writer.shuffle());
-    });
-
-    group.finish();
-}
-
-fn real_benchs(c: &mut Criterion) {
-    let mut group = c.benchmark_group("Realistic workloads");
-    group.sample_size(10);
-    group.bench_function("real_bulk_insert", |b| {
-        let size = 125_000_000;
-        let ideal_filled = 100_000_000;
-        let mut rng = rand::rng();
-        b.iter_batched(
-            || HashMapInit::new_resizeable(size, size * 2).attach_writer(),
-            |writer| {
-                for _ in 0..ideal_filled {
-                    let key: FileCacheKey = rng.random();
-                    let val = FileCacheEntry::dummy();
-                    let entry = writer.entry(key);
-                    match entry {
-                        Entry::Occupied(mut e) => {
-                            std::hint::black_box(e.insert(val));
-                        }
-                        Entry::Vacant(e) => {
-                            let _ = std::hint::black_box(e.insert(val).unwrap());
-                        }
-                    }
-                }
-            },
-            BatchSize::SmallInput,
-        )
-    });
-
-    group.bench_function("real_rehash", |b| {
-        let size = 125_000_000;
-        let ideal_filled = 100_000_000;
-        let mut writer = HashMapInit::new_resizeable(size, size).attach_writer();
-        let mut rng = rand::rng();
-        while writer.get_num_buckets_in_use() < ideal_filled {
-            let key: FileCacheKey = rng.random();
-            let val = FileCacheEntry::dummy();
-            apply_op(TestOp(key, Some(val)), &mut writer);
-        }
-        b.iter(|| writer.shuffle());
-    });
-
-    group.bench_function("real_rehash_hashbrown", |b| {
-        let size = 125_000_000;
-        let ideal_filled = 100_000_000;
-        let mut writer = hashbrown::raw::RawTable::new();
-        let mut rng = rand::rng();
-        let hasher = rustc_hash::FxBuildHasher;
-        unsafe {
-            writer
-                .resize(
-                    size,
-                    |(k, _)| hasher.hash_one(k),
-                    hashbrown::raw::Fallibility::Infallible,
-                )
-                .unwrap();
-        }
-        while writer.len() < ideal_filled as usize {
-            let key: FileCacheKey = rng.random();
-            let val = FileCacheEntry::dummy();
-            writer.insert(hasher.hash_one(&key), (key, val), |(k, _)| {
-                hasher.hash_one(k)
-            });
-        }
-        b.iter(|| unsafe {
-            writer.table.rehash_in_place(
-                &|table, index| {
-                    hasher.hash_one(
-                        &table
-                            .bucket::<(FileCacheKey, FileCacheEntry)>(index)
-                            .as_ref()
-                            .0,
-                    )
-                },
-                std::mem::size_of::<(FileCacheKey, FileCacheEntry)>(),
-                if std::mem::needs_drop::<(FileCacheKey, FileCacheEntry)>() {
-                    Some(|ptr| std::ptr::drop_in_place(ptr as *mut (FileCacheKey, FileCacheEntry)))
-                } else {
-                    None
-                },
-            )
-        });
-    });
-
-    for elems in [2, 4, 8, 16, 32, 64, 96, 112] {
-        group.bench_with_input(
-            BenchmarkId::new("real_rehash_varied", elems),
-            &elems,
-            |b, &size| {
-                let ideal_filled = size * 1_000_000;
-                let size = 125_000_000;
-                let mut writer = HashMapInit::new_resizeable(size, size).attach_writer();
-                let mut rng = rand::rng();
-                while writer.get_num_buckets_in_use() < ideal_filled as usize {
-                    let key: FileCacheKey = rng.random();
-                    let val = FileCacheEntry::dummy();
-                    apply_op(TestOp(key, Some(val)), &mut writer);
-                }
-                b.iter(|| writer.shuffle());
-            },
-        );
-        group.bench_with_input(
-            BenchmarkId::new("real_rehash_varied_hashbrown", elems),
-            &elems,
-            |b, &size| {
-                let ideal_filled = size * 1_000_000;
-                let size = 125_000_000;
-                let mut writer = hashbrown::raw::RawTable::new();
-                let mut rng = rand::rng();
-                let hasher = rustc_hash::FxBuildHasher;
-                unsafe {
-                    writer
-                        .resize(
-                            size,
-                            |(k, _)| hasher.hash_one(k),
-                            hashbrown::raw::Fallibility::Infallible,
-                        )
-                        .unwrap();
-                }
-                while writer.len() < ideal_filled as usize {
-                    let key: FileCacheKey = rng.random();
-                    let val = FileCacheEntry::dummy();
-                    writer.insert(hasher.hash_one(&key), (key, val), |(k, _)| {
-                        hasher.hash_one(k)
-                    });
-                }
-                b.iter(|| unsafe {
-                    writer.table.rehash_in_place(
-                        &|table, index| {
-                            hasher.hash_one(
-                                &table
-                                    .bucket::<(FileCacheKey, FileCacheEntry)>(index)
-                                    .as_ref()
-                                    .0,
-                            )
-                        },
-                        std::mem::size_of::<(FileCacheKey, FileCacheEntry)>(),
-                        if std::mem::needs_drop::<(FileCacheKey, FileCacheEntry)>() {
-                            Some(|ptr| {
-                                std::ptr::drop_in_place(ptr as *mut (FileCacheKey, FileCacheEntry))
-                            })
-                        } else {
-                            None
-                        },
-                    )
-                });
-            },
-        );
-    }
-
-    group.finish();
-}
-
-criterion_group!(benches, small_benchs, real_benchs);
-criterion_main!(benches);
--- a/libs/neon-shmem/src/hash.rs
+++ b/libs/neon-shmem/src/hash.rs
@@ -16,7 +16,6 @@
 //!
 //! Concurrency is managed very simply: the entire map is guarded by one shared-memory RwLock.

-use std::fmt::Debug;
 use std::hash::{BuildHasher, Hash};
 use std::mem::MaybeUninit;

@@ -57,22 +56,6 @@ pub struct HashMapInit<'a, K, V, S = rustc_hash::FxBuildHasher> {
    num_buckets: u32,
 }

-impl<'a, K, V, S> Debug for HashMapInit<'a, K, V, S>
-where
-    K: Debug,
-    V: Debug,
-{
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        f.debug_struct("HashMapInit")
-            .field("shmem_handle", &self.shmem_handle)
-            .field("shared_ptr", &self.shared_ptr)
-            .field("shared_size", &self.shared_size)
-            // .field("hasher", &self.hasher)
-            .field("num_buckets", &self.num_buckets)
-            .finish()
-    }
-}
-
 /// This is a per-process handle to a hash table that (possibly) lives in shared memory.
 /// If a child process is launched with fork(), the child process should
 /// get its own HashMapAccess by calling HashMapInit::attach_writer/reader().
@@ -88,20 +71,6 @@ pub struct HashMapAccess<'a, K, V, S = rustc_hash::FxBuildHasher> {
 unsafe impl<K: Sync, V: Sync, S> Sync for HashMapAccess<'_, K, V, S> {}
 unsafe impl<K: Send, V: Send, S> Send for HashMapAccess<'_, K, V, S> {}

-impl<'a, K, V, S> Debug for HashMapAccess<'a, K, V, S>
-where
-    K: Debug,
-    V: Debug,
-{
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        f.debug_struct("HashMapAccess")
-            .field("shmem_handle", &self.shmem_handle)
-            .field("shared_ptr", &self.shared_ptr)
-            // .field("hasher", &self.hasher)
-            .finish()
-    }
-}
-
 impl<'a, K: Clone + Hash + Eq, V, S> HashMapInit<'a, K, V, S> {
    /// Change the 'hasher' used by the hash table.
    ///
@@ -329,7 +298,7 @@ where

    /// Get a reference to the entry containing a key.
    ///
-    /// NB: This takes a write lock as there's no way to distinguish whether the intention
+    /// NB: THis takes a write lock as there's no way to distinguish whether the intention
    /// is to use the entry for reading or for writing in advance.
    pub fn entry(&self, key: K) -> Entry<'a, '_, K, V> {
        let hash = self.get_hash_value(&key);
--- a/libs/neon-shmem/src/hash/core.rs
+++ b/libs/neon-shmem/src/hash/core.rs
@@ -1,6 +1,5 @@
 //! Simple hash table with chaining.

-use std::fmt::Debug;
 use std::hash::Hash;
 use std::mem::MaybeUninit;

@@ -18,19 +17,6 @@ pub(crate) struct Bucket<K, V> {
    pub(crate) inner: Option<(K, V)>,
 }

-impl<K, V> Debug for Bucket<K, V>
-where
-    K: Debug,
-    V: Debug,
-{
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        f.debug_struct("Bucket")
-            .field("next", &self.next)
-            .field("inner", &self.inner)
-            .finish()
-    }
-}
-
 /// Core hash table implementation.
 pub(crate) struct CoreHashMap<'a, K, V> {
    /// Dictionary used to map hashes to bucket indices.
@@ -45,22 +31,6 @@ pub(crate) struct CoreHashMap<'a, K, V> {
    pub(crate) buckets_in_use: u32,
 }

-impl<'a, K, V> Debug for CoreHashMap<'a, K, V>
-where
-    K: Debug,
-    V: Debug,
-{
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        f.debug_struct("CoreHashMap")
-            .field("dictionary", &self.dictionary)
-            .field("buckets", &self.buckets)
-            .field("free_head", &self.free_head)
-            .field("alloc_limit", &self.alloc_limit)
-            .field("buckets_in_use", &self.buckets_in_use)
-            .finish()
-    }
-}
-
 /// Error for when there are no empty buckets left but one is needed.
 #[derive(Debug, PartialEq)]
 pub struct FullError;
--- a/libs/neon-shmem/src/hash/entry.rs
+++ b/libs/neon-shmem/src/hash/entry.rs
@@ -61,10 +61,6 @@ impl<K, V> OccupiedEntry<'_, '_, K, V> {
    ///
    /// This may result in multiple bucket accesses if the entry was obtained by index as the
    /// previous chain entry needs to be discovered in this case.
-    ///
-    /// # Panics
-    /// Panics if the `prev_pos` field is equal to [`PrevPos::Unknown`]. In practice, this means
-    /// the entry was obtained via calling something like [`super::HashMapAccess::entry_at_bucket`].
    pub fn remove(mut self) -> V {
        // If this bucket was queried by index, go ahead and follow its chain from the start.
        let prev = if let PrevPos::Unknown(hash) = self.prev_pos {
--- a/libs/neon-shmem/src/shmem.rs
+++ b/libs/neon-shmem/src/shmem.rs
@@ -21,7 +21,6 @@ use nix::unistd::ftruncate as nix_ftruncate;
 /// the underlying file is resized. Do not access the area beyond the current size. Currently, that
 /// will cause the file to be expanded, but we might use `mprotect()` etc. to enforce that in the
 /// future.
-#[derive(Debug)]
 pub struct ShmemHandle {
    /// memfd file descriptor
    fd: OwnedFd,
@@ -36,7 +35,6 @@ pub struct ShmemHandle {
 }

 /// This is stored at the beginning in the shared memory area.
-#[derive(Debug)]
 struct SharedStruct {
    max_size: usize,

--- a/libs/utils/src/lsn.rs
+++ b/libs/utils/src/lsn.rs
@@ -310,11 +310,6 @@ impl AtomicLsn {
        }
    }

-    /// Consumes the atomic and returns the contained value.
-    pub const fn into_inner(self) -> Lsn {
-        Lsn(self.inner.into_inner())
-    }
-
    /// Atomically retrieve the `Lsn` value from memory.
    pub fn load(&self) -> Lsn {
        Lsn(self.inner.load(Ordering::Acquire))
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -54,7 +54,6 @@ pageserver_api.workspace = true
 pageserver_client.workspace = true # for ResponseErrorMessageExt TOOD refactor that
 pageserver_compaction.workspace = true
 pageserver_page_api.workspace = true
-peekable.workspace = true
 pem.workspace = true
 pin-project-lite.workspace = true
 postgres_backend.workspace = true
@@ -67,7 +66,6 @@ postgres-types.workspace = true
 posthog_client_lite.workspace = true
 pprof.workspace = true
 pq_proto.workspace = true
-prost.workspace = true
 rand.workspace = true
 range-set-blaze = { version = "0.1.16", features = ["alloc"] }
 regex.workspace = true
--- a/pageserver/client_grpc/src/lib.rs
+++ b/pageserver/client_grpc/src/lib.rs
@@ -3,4 +3,3 @@ mod pool;
 mod retry;

 pub use client::{PageserverClient, ShardSpec};
-pub use pageserver_api::shard::ShardStripeSize; // used in ShardSpec
--- a/pageserver/page_api/src/model.rs
+++ b/pageserver/page_api/src/model.rs
@@ -33,8 +33,6 @@ pub enum ProtocolError {
    Invalid(&'static str, String),
    #[error("required field '{0}' is missing")]
    Missing(&'static str),
-    #[error("invalid combination of not_modified_lsn '{0}' and request_lsn '{1}'")]
-    InvalidLsns(Lsn, Lsn),
 }

 impl ProtocolError {
@@ -87,9 +85,9 @@ impl TryFrom<proto::ReadLsn> for ReadLsn {
            return Err(ProtocolError::invalid("request_lsn", pb.request_lsn));
        }
        if pb.not_modified_since_lsn > pb.request_lsn {
-            return Err(ProtocolError::InvalidLsns(
-                Lsn(pb.not_modified_since_lsn),
-                Lsn(pb.request_lsn),
+            return Err(ProtocolError::invalid(
+                "not_modified_since_lsn",
+                pb.not_modified_since_lsn,
            ));
        }
        Ok(Self {
--- a/pageserver/pagebench/Cargo.toml
+++ b/pageserver/pagebench/Cargo.toml
@@ -25,9 +25,6 @@ tracing.workspace = true
 tokio.workspace = true
 tokio-stream.workspace = true
 tokio-util.workspace = true
-axum.workspace = true
-http.workspace = true
-metrics.workspace = true
 tonic.workspace = true
 url.workspace = true

--- a/pageserver/pagebench/src/cmd/getpage_latest_lsn.rs
+++ b/pageserver/pagebench/src/cmd/getpage_latest_lsn.rs
@@ -34,10 +34,6 @@ use crate::util::{request_stats, tokio_thread_local_stats};
 /// GetPage@LatestLSN, uniformly distributed across the compute-accessible keyspace.
 #[derive(clap::Parser)]
 pub(crate) struct Args {
-    #[clap(long, default_value = "false")]
-    grpc: bool,
-    #[clap(long, default_value = "false")]
-    grpc_stream: bool,
    #[clap(long, default_value = "http://localhost:9898")]
    mgmt_api_endpoint: String,
    /// Pageserver connection string. Supports postgresql:// and grpc:// protocols.
@@ -82,9 +78,6 @@ pub(crate) struct Args {
    #[clap(long)]
    set_io_mode: Option<pageserver_api::models::virtual_file::IoMode>,

-    #[clap(long)]
-    only_relnode: Option<u32>,
-
    /// Queue depth generated in each client.
    #[clap(long, default_value = "1")]
    queue_depth: NonZeroUsize,
@@ -99,31 +92,10 @@ pub(crate) struct Args {
    #[clap(long, default_value = "1")]
    batch_size: NonZeroUsize,

+    #[clap(long)]
+    only_relnode: Option<u32>,
+
    targets: Option<Vec<TenantTimelineId>>,
-
-    #[clap(long, default_value = "100")]
-    pool_max_consumers: NonZeroUsize,
-
-    #[clap(long, default_value = "5")]
-    pool_error_threshold: NonZeroUsize,
-
-    #[clap(long, default_value = "5000")]
-    pool_connect_timeout: NonZeroUsize,
-
-    #[clap(long, default_value = "1000")]
-    pool_connect_backoff: NonZeroUsize,
-
-    #[clap(long, default_value = "60000")]
-    pool_max_idle_duration: NonZeroUsize,
-
-    #[clap(long, default_value = "0")]
-    max_delay_ms: usize,
-
-    #[clap(long, default_value = "0")]
-    percent_drops: usize,
-
-    #[clap(long, default_value = "0")]
-    percent_hangs: usize,
 }

 /// State shared by all clients
@@ -180,6 +152,7 @@ pub(crate) fn main(args: Args) -> anyhow::Result<()> {
        main_impl(args, thread_local_stats)
    })
 }
+
 async fn main_impl(
    args: Args,
    all_thread_local_stats: AllThreadLocalStats<request_stats::Stats>,
@@ -344,7 +317,6 @@ async fn main_impl(
    let rps_period = args
        .per_client_rate
        .map(|rps_limit| Duration::from_secs_f64(1.0 / (rps_limit as f64)));
-
    let make_worker: &dyn Fn(WorkerId) -> Pin<Box<dyn Send + Future<Output = ()>>> = &|worker_id| {
        let ss = shared_state.clone();
        let cancel = cancel.clone();
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -453,7 +453,6 @@ impl TimelineHandles {
            handles: Default::default(),
        }
    }
-
    async fn get(
        &mut self,
        tenant_id: TenantId,
--- a/pgxn/neon/Makefile
+++ b/pgxn/neon/Makefile
@@ -5,12 +5,10 @@ MODULE_big = neon
 OBJS = \
 	$(WIN32RES) \
 	communicator.o \
-	communicator_new.o \
 	communicator_process.o \
 	extension_server.o \
 	file_cache.o \
 	hll.o \
-	lfc_prewarm.o \
 	libpagestore.o \
 	logical_replication_monitor.o \
 	neon.o \
@@ -69,7 +67,6 @@ WALPROP_OBJS = \

 # libcommunicator.a is built by cargo from the Rust sources under communicator/
 # subdirectory. `cargo build` also generates communicator_bindings.h.
-communicator_new.o: communicator/communicator_bindings.h
 communicator_process.o: communicator/communicator_bindings.h
 file_cache.o: communicator/communicator_bindings.h

--- a/pgxn/neon/communicator/Cargo.toml
+++ b/pgxn/neon/communicator/Cargo.toml
@@ -17,31 +17,12 @@ rest_broker = []

 [dependencies]
 axum.workspace = true
-bytes.workspace = true
-clashmap.workspace = true
 http.workspace = true
-libc.workspace = true
-nix.workspace = true
-atomic_enum = "0.3.0"
-measured.workspace = true
-prometheus.workspace = true
-prost.workspace = true
-strum_macros.workspace = true
-thiserror.workspace = true
-tonic = { workspace = true, default-features = false, features=["codegen", "prost", "transport"] }
 tokio = { workspace = true, features = ["macros", "net", "io-util", "rt", "rt-multi-thread"] }
-tokio-pipe = { version = "0.2.12" }
 tracing.workspace = true
 tracing-subscriber.workspace = true

-metrics.workspace = true
-uring-common = { workspace = true, features = ["bytes"] }
-
-pageserver_client_grpc.workspace = true
-pageserver_api.workspace = true
-pageserver_page_api.workspace = true
-
-neon-shmem.workspace = true
+measured.workspace = true
 utils.workspace = true
 workspace_hack = { version = "0.1", path = "../../../workspace_hack" }

--- a/pgxn/neon/communicator/README.md
+++ b/pgxn/neon/communicator/README.md
@@ -3,18 +3,9 @@
 This package provides the so-called "compute-pageserver communicator",
 or just "communicator" in short. The communicator is a separate
 background worker process that runs in the PostgreSQL server. It's
-part of the neon extension.
-
-The commuicator handles the communication with the pageservers, and
-also provides an HTTP endpoint for metrics over a local Unix Domain
-socket (aka. the "communicator control socket"). On the PostgreSQL
-side, the glue code in pgxn/neon/ uses the communicator to implement
-the PostgreSQL Storage Manager (SMGR) interface.
-
-## Design criteria
-
- Low latency
- Saturate a 10 Gbit / s network interface without becoming a bottleneck
+part of the neon extension. Currently, it only provides an HTTP
+endpoint for metrics, but in the future it will evolve to handle all
+communications with the pageservers.

 ## Source code view

@@ -23,122 +14,10 @@ pgxn/neon/communicator_process.c
    the glue that interacts with PostgreSQL code and the Rust
    code in the communicator process.

-pgxn/neon/communicator_new.c
-	Contains the backend code that interacts with the communicator
-	process.

-pgxn/neon/communicator/src/backend_interface.rs
-	The entry point for calls from each backend.
-
-pgxn/neon/communicator/src/init.rs
-	Initialization at server startup
+pgxn/neon/communicator/src/worker_process/
+    Worker process main loop and glue code

 At compilation time, pgxn/neon/communicator/ produces a static
 library, libcommunicator.a. It is linked to the neon.so extension
 library.
-
-The real networking code, which is independent of PostgreSQL, is in
-the pageserver/client_grpc crate.
-
-## Process view
-
-The communicator runs in a dedicated background worker process, the
-"communicator process". The communicator uses a multi-threaded Tokio
-runtime to execute the IO requests. So the communicator process has
-multiple threads running. That's unusual for Postgres processes and
-care must be taken to make that work.
-
-### Backend <-> worker communication
-
-Each backend has a number of I/O request slots in shared memory. The
-slots are statically allocated for each backend, and must not be
-accessed by other backends. The worker process reads requests from the
-shared memory slots, and writes responses back to the slots.
-
-Here's an example snapshot of the system, when two requests from two
-different backends are in progress:
-
-```
-Backends           Request slots          Communicator process
---------          -------------          --------------------
-
-Backend 1          1: Idle
-                   2: Idle
-                   3: Processing          tokio task handling request 3
-
-Backend 2          4: Completed
-                   5: Processing          tokio task handling request 5
-                   6: Idle
-
-...                ...
-```
-
-To submit an IO request, the backend first picks one of its Idle
-slots, writes the IO request in the slot, and updates it to
-'Submitted' state. That transfers the ownership of the slot to the
-worker process, until the worker process marks the request as
-Completed. The worker process spawns a separate Tokio task for each
-request.
-
-To inform the worker process that a request slot has a pending IO
-request, there's a pipe shared by the worker process and all backend
-processes. The backend writes the index of the request slot to the
-pipe after changing the slot's state to Submitted. This wakes up the
-worker process.
-
-(Note that the pipe is just used for wakeups, but the worker process
-is free to pick up Submitted IO requests even without receiving the
-wakeup. As of this writing, it doesn't do that, but it might be useful
-in the future to reduce latency even further, for example.)
-
-When the worker process has completed processing the request, it
-writes the result back in the request slot. A GetPage request can also
-contain a pointer to buffer in the shared buffer cache. In that case,
-the worker process writes the resulting page contents directly to the
-buffer, and just a result code in the request slot. It then updates
-the 'state' field to Completed, which passes the owner ship back to
-the originating backend. Finally, it signals the process Latch of the
-originating backend, waking it up.
-
-### Differences between PostgreSQL v16, v17 and v18
-
-PostgreSQL v18 introduced the new AIO mechanism. The PostgreSQL AIO
-mechanism uses a very similar mechanism as described in the previous
-section, for the communication between AIO worker processes and
-backends. With our communicator, the AIO worker processes are not
-used, but we use the same PgAioHandle request slots as in upstream.
-For Neon-specific IO requests like GetDbSize, a neon request slot is
-used. But for the actual IO requests, the request slot merely contains
-a pointer to the PgAioHandle slot. The worker process updates the
-status of that, calls the IO callbacks upon completionetc, just like
-the upstream AIO worker processes do.
-
-## Sequence diagram
-
-                      neon
-    PostgreSQL     extension       backend_interface.rs  worker_process.rs    processor    tonic
-       |               .                    .                   .                 .
-	   | smgr_read()   .                    .                   .                 .
-	   +-------------> +                    .                   .                 .
-	   .               |                    .                   .                 .
-	   .               |  rcommunicator_    .                   .                 .
-	   .               | get_page_at_lsn    .                   .                 .
-	   .               +------------------> +                   .                 .
-                                            |                   .                 .
-                                            | write request to  .                 .                 .
-                                            | slot              .                 .
-                                            |                   .                 .
-                                            |                   .                 .
-											| submit_request()  .                 .
-											+-----------------> +                 .
-											|                   |                 .
-											|					| db_size_request .               .
-																+---------------->.
-																                  . TODO
-
-
-
-### Compute <-> pageserver protocol
-
-The protocol between Compute and the pageserver is based on gRPC. See `protos/`.
-
--- a/pgxn/neon/communicator/src/backend_comms.rs
+++ b/pgxn/neon/communicator/src/backend_comms.rs
@@ -1,224 +0,0 @@
-//! This module implements a request/response "slot" for submitting
-//! requests from backends to the communicator process.
-//!
-//! NB: The "backend" side of this code runs in Postgres backend processes,
-//! which means that it is not safe to use the 'tracing' crate for logging, nor
-//! to launch threads or use tokio tasks!
-
-use std::cell::UnsafeCell;
-use std::sync::atomic::{AtomicI32, Ordering};
-
-use crate::neon_request::{NeonIORequest, NeonIOResult};
-
-use atomic_enum::atomic_enum;
-
-/// One request/response slot. Each backend has its own set of slots that it
-/// uses.
-///
-/// This is the moral equivalent of PgAioHandle for Postgres AIO requests
-/// Like PgAioHandle, try to keep this small.
-///
-/// There is an array of these in shared memory. Therefore, this must be Sized.
-///
-/// ## Lifecycle of a request
-///
-/// A slot is always owned by either the backend process or the communicator
-/// process, depending on the 'state'. Only the owning process is allowed to
-/// read or modify the slot, except for reading the 'state' itself to check who
-/// owns it.
-///
-/// A slot begins in the Idle state, where it is owned by the backend process.
-/// To submit a request, the backend process fills the slot with the request
-/// data, and changes it to the Submitted state. After changing the state, the
-/// slot is owned by the communicator process, and the backend is not allowed
-/// to access it until the communicator process marks it as Completed.
-///
-/// When the communicator process sees that the slot is in Submitted state, it
-/// starts to process the request. After processing the request, it stores the
-/// result in the slot, and changes the state to Completed. It is now owned by
-/// the backend process again, which may now read the result, and reuse the
-/// slot for a new request.
-///
-/// For correctness of the above protocol, we really only need two states:
-/// "owned by backend" and "owned by communicator process". But to help with
-/// debugging and better assertions, there are a few more states. When the
-/// backend starts to fill in the request details in the slot, it first sets the
-/// state from Idle to Filling, and when it's done with that, from Filling to
-/// Submitted. In the Filling state, the slot is still owned by the
-/// backend. Similarly, when the communicator process starts to process a
-/// request, it sets it to Processing state first, but the slot is still owned
-/// by the communicator process.
-///
-/// This struct doesn't handle waking up the communicator process when a request
-/// has been submitted or when a response is ready. The 'owner_procno' is used
-/// for waking up the backend on completion, but that happens elsewhere.
-pub struct NeonIORequestSlot {
-    /// similar to PgAioHandleState
-    state: AtomicNeonIORequestSlotState,
-
-    /// The owning process's ProcNumber. The worker process uses this to set the
-    /// process's latch on completion.
-    ///
-    /// (This could be calculated from num_neon_request_slots_per_backend and
-    /// the index of this slot in the overall 'neon_requst_slots array'. But we
-    /// prefer the communicator process to not know how the request slots are
-    /// divided between the backends.)
-    owner_procno: AtomicI32,
-
-    /// SAFETY: This is modified by submit_request(), after it has established
-    /// ownership of the slot by setting state from Idle to Filling
-    request: UnsafeCell<NeonIORequest>,
-
-    /// Valid when state is Completed
-    ///
-    /// SAFETY: This is modified by RequestProcessingGuard::complete(). There
-    /// can be only one RequestProcessingGuard outstanding for a slot at a time,
-    /// because it is returned by start_processing_request() which checks the
-    /// state, so RequestProcessingGuard has exclusive access to the slot.
-    result: UnsafeCell<NeonIOResult>,
-}
-
-// The protocol described in the "Lifecycle of a request" section above ensures
-// the safe access to the fields
-unsafe impl Send for NeonIORequestSlot {}
-unsafe impl Sync for NeonIORequestSlot {}
-
-impl Default for NeonIORequestSlot {
-    fn default() -> NeonIORequestSlot {
-        NeonIORequestSlot {
-            owner_procno: AtomicI32::new(-1),
-            request: UnsafeCell::new(NeonIORequest::Empty),
-            result: UnsafeCell::new(NeonIOResult::Empty),
-            state: AtomicNeonIORequestSlotState::new(NeonIORequestSlotState::Idle),
-        }
-    }
-}
-
-#[atomic_enum]
-#[derive(Eq, PartialEq)]
-pub enum NeonIORequestSlotState {
-    Idle,
-
-    /// Backend is filling in the request
-    Filling,
-
-    /// Backend has submitted the request to the communicator, but the
-    /// communicator process has not yet started processing it.
-    Submitted,
-
-    /// Communicator is processing the request
-    Processing,
-
-    /// Communicator has completed the request, and the 'result' field is now
-    /// valid, but the backend has not read the result yet.
-    Completed,
-}
-
-impl NeonIORequestSlot {
-    /// Write a request to the slot, and mark it as Submitted.
-    ///
-    /// Note: This does not wake up the worker process to actually process
-    /// the request. It's the caller's responsibility to do that.
-    pub fn submit_request(&self, request: &NeonIORequest, proc_number: i32) {
-        // Verify that the slot is in Idle state previously, and put it in
-        // Filling state.
-        //
-        // XXX: This step isn't strictly necessary. Assuming the caller didn't
-        // screw up and try to use a slot that's already in use, we could fill
-        // the slot and switch it directly from Idle to Submitted state.
-        if let Err(s) = self.state.compare_exchange(
-            NeonIORequestSlotState::Idle,
-            NeonIORequestSlotState::Filling,
-            Ordering::Relaxed,
-            Ordering::Relaxed,
-        ) {
-            panic!("unexpected state in request slot: {s:?}");
-        }
-
-        // Fill in the request details
-        self.owner_procno.store(proc_number, Ordering::Relaxed);
-        unsafe { *self.request.get() = *request }
-
-        // This synchronizes-with store/swap in [`start_processing_request`].
-        // Note that this ensures that the previous non-atomic writes visible
-        // to other threads too.
-        self.state
-            .store(NeonIORequestSlotState::Submitted, Ordering::Release);
-    }
-
-    pub fn get_state(&self) -> NeonIORequestSlotState {
-        self.state.load(Ordering::Relaxed)
-    }
-
-    pub fn try_get_result(&self) -> Option<NeonIOResult> {
-        // This synchronizes-with the store/swap in [`RequestProcessingGuard::completed`]
-        let state = self.state.load(Ordering::Acquire);
-        if state == NeonIORequestSlotState::Completed {
-            let result = unsafe { *self.result.get() };
-            self.state
-                .store(NeonIORequestSlotState::Idle, Ordering::Relaxed);
-            Some(result)
-        } else {
-            None
-        }
-    }
-
-    /// Read the IO request from the slot indicated in the wakeup
-    pub fn start_processing_request<'a>(&'a self) -> Option<RequestProcessingGuard<'a>> {
-        // XXX: using atomic load rather than compare_exchange would be
-        // sufficient here, as long as the communicator process has _some_ means
-        // of tracking which requests it's already processing. That could be a
-        // flag somewhere in communicator's private memory, for example.
-        //
-        // This synchronizes-with the store in [`submit_request`].
-        if let Err(s) = self.state.compare_exchange(
-            NeonIORequestSlotState::Submitted,
-            NeonIORequestSlotState::Processing,
-            Ordering::Acquire,
-            Ordering::Relaxed,
-        ) {
-            // FIXME surprising state. This is unexpected at the moment, but if we
-            // started to process requests more aggressively, without waiting for the
-            // read from the pipe, then this could happen
-            panic!("unexpected state in request slot: {s:?}");
-        }
-
-        Some(RequestProcessingGuard(self))
-    }
-}
-
-/// [`NeonIORequestSlot::start_processing_request`] returns this guard object to
-/// indicate that the the caller now "owns" the slot, until it calls
-/// [`RequestProcessingGuard::completed`].
-///
-/// TODO: implement Drop on this, to mark the request as Aborted or Errored
-/// if [`RequestProcessingGuard::completed`] is not called.
-pub struct RequestProcessingGuard<'a>(&'a NeonIORequestSlot);
-
-unsafe impl<'a> Send for RequestProcessingGuard<'a> {}
-unsafe impl<'a> Sync for RequestProcessingGuard<'a> {}
-
-impl<'a> RequestProcessingGuard<'a> {
-    pub fn get_request(&self) -> &NeonIORequest {
-        unsafe { &*self.0.request.get() }
-    }
-
-    pub fn get_owner_procno(&self) -> i32 {
-        self.0.owner_procno.load(Ordering::Relaxed)
-    }
-
-    pub fn completed(self, result: NeonIOResult) {
-        // Store the result to the slot.
-        unsafe {
-            *self.0.result.get() = result;
-        };
-
-        // Mark the request as completed. After that, we no longer have
-        // ownership of the slot, and must not modify it.
-        let old_state = self
-            .0
-            .state
-            .swap(NeonIORequestSlotState::Completed, Ordering::Release);
-        assert!(old_state == NeonIORequestSlotState::Processing);
-    }
-}
--- a/pgxn/neon/communicator/src/backend_interface.rs
+++ b/pgxn/neon/communicator/src/backend_interface.rs
@@ -1,296 +0,0 @@
-//! This code runs in each backend process. That means that launching Rust threads, panicking
-//! etc. is forbidden!
-
-use std::os::fd::OwnedFd;
-
-use crate::backend_comms::NeonIORequestSlot;
-use crate::init::CommunicatorInitStruct;
-use crate::integrated_cache::{BackendCacheReadOp, IntegratedCacheReadAccess};
-use crate::neon_request::{CCachedGetPageVResult, CLsn, COid};
-use crate::neon_request::{NeonIORequest, NeonIOResult};
-
-use utils::lsn::Lsn;
-
-pub struct CommunicatorBackendStruct<'t> {
-    my_proc_number: i32,
-
-    neon_request_slots: &'t [NeonIORequestSlot],
-
-    submission_pipe_write_fd: OwnedFd,
-
-    pending_cache_read_op: Option<BackendCacheReadOp<'t>>,
-
-    integrated_cache: &'t IntegratedCacheReadAccess<'t>,
-}
-
-#[unsafe(no_mangle)]
-pub extern "C" fn rcommunicator_backend_init(
-    cis: Box<CommunicatorInitStruct>,
-    my_proc_number: i32,
-) -> &'static mut CommunicatorBackendStruct<'static> {
-    if my_proc_number < 0 {
-        panic!("cannot attach to communicator shared memory with procnumber {my_proc_number}");
-    }
-
-    let integrated_cache = Box::leak(Box::new(cis.integrated_cache_init_struct.backend_init()));
-
-    let bs: &'static mut CommunicatorBackendStruct =
-        Box::leak(Box::new(CommunicatorBackendStruct {
-            my_proc_number,
-            neon_request_slots: cis.neon_request_slots,
-
-            submission_pipe_write_fd: cis.submission_pipe_write_fd,
-            pending_cache_read_op: None,
-
-            integrated_cache,
-        }));
-    bs
-}
-
-/// Start a request. You can poll for its completion and get the result by
-/// calling bcomm_poll_dbsize_request_completion(). The communicator will wake
-/// us up by setting our process latch, so to wait for the completion, wait on
-/// the latch and call bcomm_poll_dbsize_request_completion() every time the
-/// latch is set.
-///
-/// Safety: The C caller must ensure that the references are valid.
-/// The requested slot must be free, or this panics.
-#[unsafe(no_mangle)]
-pub extern "C" fn bcomm_start_io_request(
-    bs: &'_ mut CommunicatorBackendStruct,
-    slot_idx: i32,
-    request: &NeonIORequest,
-    immediate_result_ptr: &mut NeonIOResult,
-) -> i32 {
-    assert!(bs.pending_cache_read_op.is_none());
-
-    // Check if the request can be satisfied from the cache first
-    if let NeonIORequest::RelSize(req) = request {
-        if let Some(nblocks) = bs.integrated_cache.get_rel_size(&req.reltag()) {
-            *immediate_result_ptr = NeonIOResult::RelSize(nblocks);
-            return -1;
-        }
-    }
-
-    // Create neon request and submit it
-    bs.start_neon_io_request(slot_idx, request);
-
-    slot_idx
-}
-
-#[unsafe(no_mangle)]
-pub extern "C" fn bcomm_start_get_page_v_request(
-    bs: &mut CommunicatorBackendStruct,
-    slot_idx: i32,
-    request: &NeonIORequest,
-    immediate_result_ptr: &mut CCachedGetPageVResult,
-) -> i32 {
-    let NeonIORequest::GetPageV(get_pagev_request) = request else {
-        panic!("invalid request passed to bcomm_start_get_page_v_request()");
-    };
-    assert!(matches!(request, NeonIORequest::GetPageV(_)));
-    assert!(bs.pending_cache_read_op.is_none());
-
-    // Check if the request can be satisfied from the cache first
-    let mut all_cached = true;
-    let mut read_op = bs.integrated_cache.start_read_op();
-    for i in 0..get_pagev_request.nblocks {
-        if let Some(cache_block) = read_op.get_page(
-            &get_pagev_request.reltag(),
-            get_pagev_request.block_number + i as u32,
-        ) {
-            immediate_result_ptr.cache_block_numbers[i as usize] = cache_block;
-        } else {
-            // not found in cache
-            all_cached = false;
-            break;
-        }
-    }
-    if all_cached {
-        bs.pending_cache_read_op = Some(read_op);
-        return -1;
-    }
-
-    // Create neon request and submit it
-    bs.start_neon_io_request(slot_idx, request);
-
-    slot_idx
-}
-
-/// Check if a request has completed. Returns:
-///
-/// -1 if the request is still being processed
-/// 0 on success
-#[unsafe(no_mangle)]
-pub extern "C" fn bcomm_poll_request_completion(
-    bs: &mut CommunicatorBackendStruct,
-    request_slot_idx: u32,
-    result_p: &mut NeonIOResult,
-) -> i32 {
-    match bs.neon_request_slots[request_slot_idx as usize].try_get_result() {
-        None => -1, // still processing
-        Some(result) => {
-            *result_p = result;
-            0
-        }
-    }
-}
-
-/// Check if a request has completed. Returns:
-///
-/// 'false' if the slot is Idle. The backend process has ownership.
-/// 'true' if the slot is busy, and should be polled for result.
-#[unsafe(no_mangle)]
-pub extern "C" fn bcomm_get_request_slot_status(
-    bs: &mut CommunicatorBackendStruct,
-    request_slot_idx: u32,
-) -> bool {
-    use crate::backend_comms::NeonIORequestSlotState;
-    match bs.neon_request_slots[request_slot_idx as usize].get_state() {
-        NeonIORequestSlotState::Idle => false,
-        NeonIORequestSlotState::Filling => {
-            // 'false' would be the right result here. However, this
-            // is a very transient state. The C code should never
-            // leave a slot in this state, so if it sees that,
-            // something's gone wrong and it's not clear what to do
-            // with it.
-            panic!("unexpected Filling state in request slot {request_slot_idx}");
-        }
-        NeonIORequestSlotState::Submitted => true,
-        NeonIORequestSlotState::Processing => true,
-        NeonIORequestSlotState::Completed => true,
-    }
-}
-
-// LFC functions
-
-/// Finish a local file cache read
-///
-//
-#[unsafe(no_mangle)]
-pub extern "C" fn bcomm_finish_cache_read(bs: &mut CommunicatorBackendStruct) -> bool {
-    if let Some(op) = bs.pending_cache_read_op.take() {
-        op.finish()
-    } else {
-        panic!("bcomm_finish_cache_read() called with no cached read pending");
-    }
-}
-
-/// Check if LFC contains the given buffer, and update its last-written LSN if not.
-///
-/// This is used in WAL replay in read replica, to skip updating pages that are
-/// not in cache.
-#[unsafe(no_mangle)]
-pub extern "C" fn bcomm_update_lw_lsn_for_block_if_not_cached(
-    bs: &mut CommunicatorBackendStruct,
-    spc_oid: COid,
-    db_oid: COid,
-    rel_number: u32,
-    fork_number: u8,
-    block_number: u32,
-    lsn: CLsn,
-) -> bool {
-    bs.integrated_cache.update_lw_lsn_for_block_if_not_cached(
-        &pageserver_page_api::RelTag {
-            spcnode: spc_oid,
-            dbnode: db_oid,
-            relnode: rel_number,
-            forknum: fork_number,
-        },
-        block_number,
-        Lsn(lsn),
-    )
-}
-
-#[repr(C)]
-#[derive(Clone, Debug)]
-pub struct FileCacheIterator {
-    next_bucket: u64,
-
-    pub spc_oid: COid,
-    pub db_oid: COid,
-    pub rel_number: u32,
-    pub fork_number: u8,
-    pub block_number: u32,
-}
-
-/// Iterate over LFC contents
-#[allow(clippy::missing_safety_doc)]
-#[unsafe(no_mangle)]
-pub unsafe extern "C" fn bcomm_cache_iterate_begin(
-    _bs: &mut CommunicatorBackendStruct,
-    iter: *mut FileCacheIterator,
-) {
-    unsafe { (*iter).next_bucket = 0 };
-}
-
-#[allow(clippy::missing_safety_doc)]
-#[unsafe(no_mangle)]
-pub unsafe extern "C" fn bcomm_cache_iterate_next(
-    bs: &mut CommunicatorBackendStruct,
-    iter: *mut FileCacheIterator,
-) -> bool {
-    use crate::integrated_cache::GetBucketResult;
-    loop {
-        let next_bucket = unsafe { (*iter).next_bucket } as usize;
-        match bs.integrated_cache.get_bucket(next_bucket) {
-            GetBucketResult::Occupied(rel, blk) => {
-                unsafe {
-                    (*iter).spc_oid = rel.spcnode;
-                    (*iter).db_oid = rel.dbnode;
-                    (*iter).rel_number = rel.relnode;
-                    (*iter).fork_number = rel.forknum;
-                    (*iter).block_number = blk;
-
-                    (*iter).next_bucket += 1;
-                }
-                break true;
-            }
-            GetBucketResult::Vacant => {
-                unsafe {
-                    (*iter).next_bucket += 1;
-                }
-                continue;
-            }
-            GetBucketResult::OutOfBounds => {
-                break false;
-            }
-        }
-    }
-}
-
-#[allow(clippy::missing_safety_doc)]
-#[unsafe(no_mangle)]
-pub unsafe extern "C" fn bcomm_cache_get_num_pages_used(bs: &mut CommunicatorBackendStruct) -> u64 {
-    bs.integrated_cache.get_num_buckets_in_use() as u64
-}
-
-impl<'t> CommunicatorBackendStruct<'t> {
-    /// The slot must be free, or this panics.
-    pub(crate) fn start_neon_io_request(&mut self, request_slot_idx: i32, request: &NeonIORequest) {
-        let my_proc_number = self.my_proc_number;
-
-        self.neon_request_slots[request_slot_idx as usize].submit_request(request, my_proc_number);
-
-        // Tell the communicator about it
-        self.notify_about_request(request_slot_idx);
-    }
-
-    /// Send a wakeup to the communicator process
-    fn notify_about_request(self: &CommunicatorBackendStruct<'t>, request_slot_idx: i32) {
-        // wake up communicator by writing the idx to the submission pipe
-        //
-
-        // This can block, if the pipe is full. That should be very rare,
-        // because the communicator tries hard to drain the pipe to prevent
-        // that. Also, there's a natural upper bound on how many wakeups can be
-        // queued up: there is only a limited number of request slots for each
-        // backend.
-        //
-        // If it does block very briefly, that's not too serious.
-        let idxbuf = request_slot_idx.to_ne_bytes();
-
-        let _res = nix::unistd::write(&self.submission_pipe_write_fd, &idxbuf);
-        // FIXME: check result, return any errors
-    }
-}
--- a/pgxn/neon/communicator/src/file_cache.rs
+++ b/pgxn/neon/communicator/src/file_cache.rs
@@ -1,162 +0,0 @@
-//! Implement the "low-level" parts of the file cache.
-//!
-//! This module just deals with reading and writing the file, and keeping track
-//! which blocks in the cache file are in use and which are free. The "high
-//! level" parts of tracking which block in the cache file corresponds to which
-//! relation block is handled in 'integrated_cache' instead.
-//!
-//! This module is only used to access the file from the communicator
-//! process. The backend processes *also* read the file (and sometimes also
-//! write it? ), but the backends use direct C library calls for that.
-use std::fs::File;
-use std::os::unix::fs::FileExt;
-use std::path::Path;
-use std::sync::Arc;
-use std::sync::Mutex;
-
-use crate::BLCKSZ;
-
-use tokio::task::spawn_blocking;
-
-pub type CacheBlock = u64;
-
-pub const INVALID_CACHE_BLOCK: CacheBlock = u64::MAX;
-
-#[derive(Debug)]
-pub struct FileCache {
-    file: Arc<File>,
-
-    free_list: Mutex<FreeList>,
-
-    // metrics
-    max_blocks_gauge: metrics::IntGauge,
-    num_free_blocks_gauge: metrics::IntGauge,
-}
-
-// TODO: We keep track of all free blocks in this vec. That doesn't really scale.
-// Idea: when free_blocks fills up with more than 1024 entries, write them all to
-// one block on disk.
-#[derive(Debug)]
-struct FreeList {
-    next_free_block: CacheBlock,
-    max_blocks: u64,
-
-    free_blocks: Vec<CacheBlock>,
-}
-
-impl FileCache {
-    pub fn new(file_cache_path: &Path, mut initial_size: u64) -> Result<FileCache, std::io::Error> {
-        if initial_size < 100 {
-            tracing::warn!(
-                "min size for file cache is 100 blocks, {} requested",
-                initial_size
-            );
-            initial_size = 100;
-        }
-
-        let file = std::fs::OpenOptions::new()
-            .read(true)
-            .write(true)
-            .truncate(true)
-            .create(true)
-            .open(file_cache_path)?;
-
-        let max_blocks_gauge = metrics::IntGauge::new(
-            "file_cache_max_blocks",
-            "Local File Cache size in 8KiB blocks",
-        )
-        .unwrap();
-        let num_free_blocks_gauge = metrics::IntGauge::new(
-            "file_cache_num_free_blocks",
-            "Number of free 8KiB blocks in Local File Cache",
-        )
-        .unwrap();
-
-        tracing::info!("initialized file cache with {} blocks", initial_size);
-
-        Ok(FileCache {
-            file: Arc::new(file),
-            free_list: Mutex::new(FreeList {
-                next_free_block: 0,
-                max_blocks: initial_size,
-                free_blocks: Vec::new(),
-            }),
-            max_blocks_gauge,
-            num_free_blocks_gauge,
-        })
-    }
-
-    // File cache management
-
-    pub async fn read_block(
-        &self,
-        cache_block: CacheBlock,
-        mut dst: impl uring_common::buf::IoBufMut + Send + Sync,
-    ) -> Result<(), std::io::Error> {
-        assert!(dst.bytes_total() == BLCKSZ);
-        let file = self.file.clone();
-
-        let dst_ref = unsafe { std::slice::from_raw_parts_mut(dst.stable_mut_ptr(), BLCKSZ) };
-
-        spawn_blocking(move || file.read_exact_at(dst_ref, cache_block * BLCKSZ as u64)).await??;
-        Ok(())
-    }
-
-    pub async fn write_block(
-        &self,
-        cache_block: CacheBlock,
-        src: impl uring_common::buf::IoBuf + Send + Sync,
-    ) -> Result<(), std::io::Error> {
-        assert!(src.bytes_init() == BLCKSZ);
-        let file = self.file.clone();
-
-        let src_ref = unsafe { std::slice::from_raw_parts(src.stable_ptr(), BLCKSZ) };
-
-        spawn_blocking(move || file.write_all_at(src_ref, cache_block * BLCKSZ as u64)).await??;
-
-        Ok(())
-    }
-
-    pub fn alloc_block(&self) -> Option<CacheBlock> {
-        let mut free_list = self.free_list.lock().unwrap();
-        if let Some(x) = free_list.free_blocks.pop() {
-            return Some(x);
-        }
-        if free_list.next_free_block < free_list.max_blocks {
-            let result = free_list.next_free_block;
-            free_list.next_free_block += 1;
-            return Some(result);
-        }
-        None
-    }
-
-    pub fn dealloc_block(&self, cache_block: CacheBlock) {
-        let mut free_list = self.free_list.lock().unwrap();
-        free_list.free_blocks.push(cache_block);
-    }
-}
-
-impl metrics::core::Collector for FileCache {
-    fn desc(&self) -> Vec<&metrics::core::Desc> {
-        let mut descs = Vec::new();
-        descs.append(&mut self.max_blocks_gauge.desc());
-        descs.append(&mut self.num_free_blocks_gauge.desc());
-        descs
-    }
-    fn collect(&self) -> Vec<metrics::proto::MetricFamily> {
-        // Update the gauges with fresh values first
-        {
-            let free_list = self.free_list.lock().unwrap();
-            self.max_blocks_gauge.set(free_list.max_blocks as i64);
-
-            let total_free_blocks: i64 = free_list.free_blocks.len() as i64
-                + (free_list.max_blocks as i64 - free_list.next_free_block as i64);
-            self.num_free_blocks_gauge.set(total_free_blocks);
-        }
-
-        let mut values = Vec::new();
-        values.append(&mut self.max_blocks_gauge.collect());
-        values.append(&mut self.num_free_blocks_gauge.collect());
-        values
-    }
-}
--- a/pgxn/neon/communicator/src/global_allocator.rs
+++ b/pgxn/neon/communicator/src/global_allocator.rs
@@ -1,109 +0,0 @@
-//! Global allocator, for tracking memory usage of the Rust parts
-//!
-//! Postgres is designed to handle allocation failure (ie. malloc() returning NULL) gracefully.  It
-//! rolls backs the transaction and gives the user an "ERROR: out of memory" error. Rust code
-//! however panics if an allocation fails. We don't want that to ever happen, because an unhandled
-//! panic leads to Postgres crash and restart. Our strategy is to pre-allocate a large enough chunk
-//! of memory for use by the Rust code, so that the allocations never fail.
-//!
-//! To pick the size for the pre-allocated chunk, we have a metric to track the high watermark
-//! memory usage of all the Rust allocations in total.
-//!
-//! TODO:
-//!
-//! - Currently we just export the metrics. Actual allocations are still just passed through to
-//!   the system allocator.
-//! - Take padding etc. overhead into account
-
-use std::alloc::{GlobalAlloc, Layout, System};
-use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
-
-use metrics::IntGauge;
-
-struct MyAllocator {
-    allocations: AtomicU64,
-    deallocations: AtomicU64,
-
-    allocated: AtomicUsize,
-    high: AtomicUsize,
-}
-
-unsafe impl GlobalAlloc for MyAllocator {
-    unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
-        self.allocations.fetch_add(1, Ordering::Relaxed);
-        let mut allocated = self.allocated.fetch_add(layout.size(), Ordering::Relaxed);
-        allocated += layout.size();
-        self.high.fetch_max(allocated, Ordering::Relaxed);
-        unsafe { System.alloc(layout) }
-    }
-
-    unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
-        self.deallocations.fetch_add(1, Ordering::Relaxed);
-        self.allocated.fetch_sub(layout.size(), Ordering::Relaxed);
-        unsafe { System.dealloc(ptr, layout) }
-    }
-}
-
-#[global_allocator]
-static GLOBAL: MyAllocator = MyAllocator {
-    allocations: AtomicU64::new(0),
-    deallocations: AtomicU64::new(0),
-    allocated: AtomicUsize::new(0),
-    high: AtomicUsize::new(0),
-};
-
-pub struct MyAllocatorCollector {
-    allocations: IntGauge,
-    deallocations: IntGauge,
-    allocated: IntGauge,
-    high: IntGauge,
-}
-
-impl MyAllocatorCollector {
-    pub fn new() -> MyAllocatorCollector {
-        MyAllocatorCollector {
-            allocations: IntGauge::new("allocations_total", "Number of allocations in Rust code")
-                .unwrap(),
-            deallocations: IntGauge::new(
-                "deallocations_total",
-                "Number of deallocations in Rust code",
-            )
-            .unwrap(),
-            allocated: IntGauge::new("allocated_total", "Bytes currently allocated").unwrap(),
-            high: IntGauge::new("allocated_high", "High watermark of allocated bytes").unwrap(),
-        }
-    }
-}
-
-impl metrics::core::Collector for MyAllocatorCollector {
-    fn desc(&self) -> Vec<&metrics::core::Desc> {
-        let mut descs = Vec::new();
-
-        descs.append(&mut self.allocations.desc());
-        descs.append(&mut self.deallocations.desc());
-        descs.append(&mut self.allocated.desc());
-        descs.append(&mut self.high.desc());
-
-        descs
-    }
-
-    fn collect(&self) -> Vec<metrics::proto::MetricFamily> {
-        let mut values = Vec::new();
-
-        // update the gauges
-        self.allocations
-            .set(GLOBAL.allocations.load(Ordering::Relaxed) as i64);
-        self.deallocations
-            .set(GLOBAL.allocations.load(Ordering::Relaxed) as i64);
-        self.allocated
-            .set(GLOBAL.allocated.load(Ordering::Relaxed) as i64);
-        self.high.set(GLOBAL.high.load(Ordering::Relaxed) as i64);
-
-        values.append(&mut self.allocations.collect());
-        values.append(&mut self.deallocations.collect());
-        values.append(&mut self.allocated.collect());
-        values.append(&mut self.high.collect());
-
-        values
-    }
-}
--- a/pgxn/neon/communicator/src/init.rs
+++ b/pgxn/neon/communicator/src/init.rs
@@ -1,166 +0,0 @@
-//! Initialization functions. These are executed in the postmaster process,
-//! at different stages of server startup.
-//!
-//!
-//! Communicator initialization steps:
-//!
-//! 1. At postmaster startup, before shared memory is allocated,
-//!    rcommunicator_shmem_size() is called to get the amount of
-//!    shared memory that this module needs.
-//!
-//! 2. Later, after the shared memory has been allocated,
-//!    rcommunicator_shmem_init() is called to initialize the shmem
-//!    area.
-//!
-//! Per process initialization:
-//!
-//! When a backend process starts up, it calls rcommunicator_backend_init().
-//! In the communicator worker process, other functions are called, see
-//! `worker_process` module.
-
-use std::ffi::c_int;
-use std::mem;
-use std::mem::MaybeUninit;
-use std::os::fd::OwnedFd;
-
-use crate::backend_comms::NeonIORequestSlot;
-use crate::integrated_cache::IntegratedCacheInitStruct;
-
-/// This struct is created in the postmaster process, and inherited to
-/// the communicator process and all backend processes through fork()
-#[repr(C)]
-pub struct CommunicatorInitStruct {
-    pub submission_pipe_read_fd: OwnedFd,
-    pub submission_pipe_write_fd: OwnedFd,
-
-    // Shared memory data structures
-    pub num_neon_request_slots: u32,
-
-    pub neon_request_slots: &'static [NeonIORequestSlot],
-
-    pub integrated_cache_init_struct: IntegratedCacheInitStruct<'static>,
-}
-
-impl std::fmt::Debug for CommunicatorInitStruct {
-    fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
-        fmt.debug_struct("CommunicatorInitStruct")
-            .field("submission_pipe_read_fd", &self.submission_pipe_read_fd)
-            .field("submission_pipe_write_fd", &self.submission_pipe_write_fd)
-            .field("num_neon_request_slots", &self.num_neon_request_slots)
-            .field("neon_request_slots length", &self.neon_request_slots.len())
-            .finish()
-    }
-}
-
-#[unsafe(no_mangle)]
-pub extern "C" fn rcommunicator_shmem_size(num_neon_request_slots: u32) -> u64 {
-    let mut size = 0;
-
-    size += mem::size_of::<NeonIORequestSlot>() * num_neon_request_slots as usize;
-
-    // For integrated_cache's Allocator. TODO: make this adjustable
-    size += IntegratedCacheInitStruct::shmem_size();
-
-    size as u64
-}
-
-/// Initialize the shared memory segment. Returns a backend-private
-/// struct, which will be inherited by backend processes through fork
-#[unsafe(no_mangle)]
-pub extern "C" fn rcommunicator_shmem_init(
-    submission_pipe_read_fd: c_int,
-    submission_pipe_write_fd: c_int,
-    num_neon_request_slots: u32,
-    shmem_area_ptr: *mut MaybeUninit<u8>,
-    shmem_area_len: u64,
-    initial_file_cache_size: u64,
-    max_file_cache_size: u64,
-) -> &'static mut CommunicatorInitStruct {
-    let shmem_area: &'static mut [MaybeUninit<u8>] =
-        unsafe { std::slice::from_raw_parts_mut(shmem_area_ptr, shmem_area_len as usize) };
-
-    let (neon_request_slots, remaining_area) =
-        alloc_array_from_slice::<NeonIORequestSlot>(shmem_area, num_neon_request_slots as usize);
-
-    for slot in neon_request_slots.iter_mut() {
-        slot.write(NeonIORequestSlot::default());
-    }
-
-    // 'neon_request_slots' is initialized now. (MaybeUninit::slice_assume_init_mut() is nightly-only
-    // as of this writing.)
-    let neon_request_slots = unsafe {
-        std::mem::transmute::<&mut [MaybeUninit<NeonIORequestSlot>], &mut [NeonIORequestSlot]>(
-            neon_request_slots,
-        )
-    };
-
-    // Give the rest of the area to the integrated cache
-    let integrated_cache_init_struct = IntegratedCacheInitStruct::shmem_init(
-        remaining_area,
-        initial_file_cache_size,
-        max_file_cache_size,
-    );
-
-    let (submission_pipe_read_fd, submission_pipe_write_fd) = unsafe {
-        use std::os::fd::FromRawFd;
-        (
-            OwnedFd::from_raw_fd(submission_pipe_read_fd),
-            OwnedFd::from_raw_fd(submission_pipe_write_fd),
-        )
-    };
-
-    let cis: &'static mut CommunicatorInitStruct = Box::leak(Box::new(CommunicatorInitStruct {
-        submission_pipe_read_fd,
-        submission_pipe_write_fd,
-
-        num_neon_request_slots,
-        neon_request_slots,
-
-        integrated_cache_init_struct,
-    }));
-
-    cis
-}
-
-pub fn alloc_from_slice<T>(
-    area: &mut [MaybeUninit<u8>],
-) -> (&mut MaybeUninit<T>, &mut [MaybeUninit<u8>]) {
-    let layout = std::alloc::Layout::new::<T>();
-
-    let area_start = area.as_mut_ptr();
-
-    // pad to satisfy alignment requirements
-    let padding = area_start.align_offset(layout.align());
-    if padding + layout.size() > area.len() {
-        panic!("out of memory");
-    }
-    let area = &mut area[padding..];
-    let (result_area, remain) = area.split_at_mut(layout.size());
-
-    let result_ptr: *mut MaybeUninit<T> = result_area.as_mut_ptr().cast();
-    let result = unsafe { result_ptr.as_mut().unwrap() };
-
-    (result, remain)
-}
-
-pub fn alloc_array_from_slice<T>(
-    area: &mut [MaybeUninit<u8>],
-    len: usize,
-) -> (&mut [MaybeUninit<T>], &mut [MaybeUninit<u8>]) {
-    let layout = std::alloc::Layout::new::<T>();
-
-    let area_start = area.as_mut_ptr();
-
-    // pad to satisfy alignment requirements
-    let padding = area_start.align_offset(layout.align());
-    if padding + layout.size() * len > area.len() {
-        panic!("out of memory");
-    }
-    let area = &mut area[padding..];
-    let (result_area, remain) = area.split_at_mut(layout.size() * len);
-
-    let result_ptr: *mut MaybeUninit<T> = result_area.as_mut_ptr().cast();
-    let result = unsafe { std::slice::from_raw_parts_mut(result_ptr.as_mut().unwrap(), len) };
-
-    (result, remain)
-}
--- a/pgxn/neon/communicator/src/integrated_cache.rs
+++ b/pgxn/neon/communicator/src/integrated_cache.rs
--- a/pgxn/neon/communicator/src/lib.rs
+++ b/pgxn/neon/communicator/src/lib.rs
@@ -1,29 +1,5 @@
-//! Three main parts:
-//! - async tokio communicator core, which receives requests and processes them.
-//! - Main loop and requests queues, which routes requests from backends to the core
-//! - the per-backend glue code, which submits requests
-
-mod backend_comms;
-
-// mark this 'pub', because these functions are called from C code. Otherwise, the compiler
-// complains about a bunch of structs and enum variants being unused, because it thinkgs
-// the functions that use them are never called. There are some C-callable functions in
-// other modules too, but marking this as pub is currently enough to silence the warnings
-//
-// TODO: perhaps collect *all* the extern "C" functions to one module?
-pub mod backend_interface;
-
-mod file_cache;
-mod init;
-mod integrated_cache;
-mod neon_request;
 mod worker_process;

-mod global_allocator;
-
 /// Name of the Unix Domain Socket that serves the metrics, and other APIs in the
 /// future. This is within the Postgres data directory.
 const NEON_COMMUNICATOR_SOCKET_NAME: &str = "neon-communicator.socket";
-
-// FIXME: get this from postgres headers somehow
-pub const BLCKSZ: usize = 8192;
--- a/pgxn/neon/communicator/src/neon_request.rs
+++ b/pgxn/neon/communicator/src/neon_request.rs
@@ -1,466 +0,0 @@
-// Definitions of some core PostgreSQL datatypes.
-
-/// XLogRecPtr is defined in "access/xlogdefs.h" as:
-///
-/// ```
-/// typedef uint64 XLogRecPtr;
-/// ```
-/// cbindgen:no-export
-pub type XLogRecPtr = u64;
-
-pub type CLsn = XLogRecPtr;
-pub type COid = u32;
-
-// This conveniently matches PG_IOV_MAX
-pub const MAX_GETPAGEV_PAGES: usize = 32;
-
-pub const INVALID_BLOCK_NUMBER: u32 = u32::MAX;
-
-use std::ffi::CStr;
-
-use pageserver_page_api::{self as page_api, SlruKind};
-
-/// Request from a Postgres backend to the communicator process
-#[allow(clippy::large_enum_variant)]
-#[repr(C)]
-#[derive(Copy, Clone, Debug, strum_macros::EnumDiscriminants)]
-#[strum_discriminants(derive(measured::FixedCardinalityLabel))]
-pub enum NeonIORequest {
-    Empty,
-
-    // Read requests. These are C-friendly variants of the corresponding structs in
-    // pageserver_page_api.
-    RelSize(CRelSizeRequest),
-    GetPageV(CGetPageVRequest),
-    ReadSlruSegment(CReadSlruSegmentRequest),
-    PrefetchV(CPrefetchVRequest),
-    DbSize(CDbSizeRequest),
-
-    /// This is like GetPageV, but bypasses the LFC and allows specifiying the
-    /// request LSNs directly. For debugging purposes only.
-    GetPageVUncached(CGetPageVUncachedRequest),
-
-    // Write requests. These are needed to keep the relation size cache and LFC up-to-date.
-    // They are not sent to the pageserver.
-    WritePage(CWritePageRequest),
-    RelExtend(CRelExtendRequest),
-    RelZeroExtend(CRelZeroExtendRequest),
-    RelCreate(CRelCreateRequest),
-    RelTruncate(CRelTruncateRequest),
-    RelUnlink(CRelUnlinkRequest),
-
-    // Other requests
-    UpdateCachedRelSize(CUpdateCachedRelSizeRequest),
-}
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub enum NeonIOResult {
-    Empty,
-    /// InvalidBlockNumber == 0xffffffff means "rel does not exist"
-    RelSize(u32),
-
-    /// the result pages are written to the shared memory addresses given in the request
-    GetPageV,
-    /// The result is written to the file, path to which is provided
-    /// in the request. The [`u64`] value here is the number of blocks.
-    ReadSlruSegment(u64),
-
-    /// A prefetch request returns as soon as the request has been received by the communicator.
-    /// It is processed in the background.
-    PrefetchVLaunched,
-
-    DbSize(u64),
-
-    // FIXME design compact error codes. Can't easily pass a string or other dynamic data.
-    // currently, this is 'errno'
-    Error(i32),
-
-    Aborted,
-
-    /// used for all write requests
-    WriteOK,
-}
-
-impl NeonIORequest {
-    /// All requests include a unique request ID, which can be used to trace the execution
-    /// of a request all the way to the pageservers. The request ID needs to be unique
-    /// within the lifetime of the Postgres instance (but not across servers or across
-    /// restarts of the same server).
-    pub fn request_id(&self) -> u64 {
-        use NeonIORequest::*;
-        match self {
-            Empty => 0,
-            RelSize(req) => req.request_id,
-            GetPageV(req) => req.request_id,
-            GetPageVUncached(req) => req.request_id,
-            ReadSlruSegment(req) => req.request_id,
-            PrefetchV(req) => req.request_id,
-            DbSize(req) => req.request_id,
-            WritePage(req) => req.request_id,
-            RelExtend(req) => req.request_id,
-            RelZeroExtend(req) => req.request_id,
-            RelCreate(req) => req.request_id,
-            RelTruncate(req) => req.request_id,
-            RelUnlink(req) => req.request_id,
-            UpdateCachedRelSize(req) => req.request_id,
-        }
-    }
-}
-
-/// Special quick result to a CGetPageVRequest request, indicating that the
-/// the requested pages are present in the local file cache. The backend can
-/// read the blocks directly from the given LFC blocks.
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct CCachedGetPageVResult {
-    pub cache_block_numbers: [u64; MAX_GETPAGEV_PAGES],
-}
-
-/// ShmemBuf represents a buffer in shared memory.
-///
-/// SAFETY: The pointer must point to an area in shared memory. The functions allow you to liberally
-/// get a mutable pointer to the contents; it is the caller's responsibility to ensure that you
-/// don't access a buffer that's you're not allowed to. Inappropriate access to the buffer doesn't
-/// violate Rust's safety semantics, but it will mess up and crash Postgres.
-///
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct ShmemBuf {
-    // Pointer to where the result is written or where to read from. Must point into a buffer in shared memory!
-    pub ptr: *mut u8,
-}
-
-unsafe impl Send for ShmemBuf {}
-unsafe impl Sync for ShmemBuf {}
-
-unsafe impl uring_common::buf::IoBuf for ShmemBuf {
-    fn stable_ptr(&self) -> *const u8 {
-        self.ptr
-    }
-
-    fn bytes_init(&self) -> usize {
-        crate::BLCKSZ
-    }
-
-    fn bytes_total(&self) -> usize {
-        crate::BLCKSZ
-    }
-}
-
-unsafe impl uring_common::buf::IoBufMut for ShmemBuf {
-    fn stable_mut_ptr(&mut self) -> *mut u8 {
-        self.ptr
-    }
-
-    unsafe fn set_init(&mut self, pos: usize) {
-        if pos > crate::BLCKSZ {
-            panic!(
-                "set_init called past end of buffer, pos {}, buffer size {}",
-                pos,
-                crate::BLCKSZ
-            );
-        }
-    }
-}
-
-impl ShmemBuf {
-    pub fn as_mut_ptr(&self) -> *mut u8 {
-        self.ptr
-    }
-}
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct CRelSizeRequest {
-    pub request_id: u64,
-    pub spc_oid: COid,
-    pub db_oid: COid,
-    pub rel_number: u32,
-    pub fork_number: u8,
-    pub allow_missing: bool,
-}
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct CGetPageVRequest {
-    pub request_id: u64,
-    pub spc_oid: COid,
-    pub db_oid: COid,
-    pub rel_number: u32,
-    pub fork_number: u8,
-    pub block_number: u32,
-    pub nblocks: u8,
-
-    // These fields define where the result is written. Must point into a buffer in shared memory!
-    pub dest: [ShmemBuf; MAX_GETPAGEV_PAGES],
-}
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct CGetPageVUncachedRequest {
-    pub request_id: u64,
-    pub spc_oid: COid,
-    pub db_oid: COid,
-    pub rel_number: u32,
-    pub fork_number: u8,
-    pub block_number: u32,
-    pub nblocks: u8,
-
-    pub request_lsn: CLsn,
-    pub not_modified_since: CLsn,
-
-    // These fields define where the result is written. Must point into a buffer in shared memory!
-    pub dest: [ShmemBuf; MAX_GETPAGEV_PAGES],
-}
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct CReadSlruSegmentRequest {
-    pub request_id: u64,
-    pub slru_kind: SlruKind,
-    pub segment_number: u32,
-    pub request_lsn: CLsn,
-    /// Must be a null-terminated C string containing the file path
-    /// where the communicator will write the SLRU segment.
-    pub destination_file_path: ShmemBuf,
-}
-
-impl CReadSlruSegmentRequest {
-    /// Returns the file path where the communicator will write the
-    /// SLRU segment.
-    pub(crate) fn destination_file_path(&self) -> String {
-        unsafe { CStr::from_ptr(self.destination_file_path.as_mut_ptr() as *const _) }
-            .to_string_lossy()
-            .into_owned()
-    }
-}
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct CPrefetchVRequest {
-    pub request_id: u64,
-    pub spc_oid: COid,
-    pub db_oid: COid,
-    pub rel_number: u32,
-    pub fork_number: u8,
-    pub block_number: u32,
-    pub nblocks: u8,
-}
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct CDbSizeRequest {
-    pub request_id: u64,
-    pub db_oid: COid,
-}
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct CWritePageRequest {
-    pub request_id: u64,
-    pub spc_oid: COid,
-    pub db_oid: COid,
-    pub rel_number: u32,
-    pub fork_number: u8,
-    pub block_number: u32,
-    pub lsn: CLsn,
-
-    // `src` defines the new page contents. Must point into a buffer in shared memory!
-    pub src: ShmemBuf,
-}
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct CRelExtendRequest {
-    pub request_id: u64,
-    pub spc_oid: COid,
-    pub db_oid: COid,
-    pub rel_number: u32,
-    pub fork_number: u8,
-    pub block_number: u32,
-    pub lsn: CLsn,
-
-    // `src` defines the new page contents. Must point into a buffer in shared memory!
-    pub src: ShmemBuf,
-}
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct CRelZeroExtendRequest {
-    pub request_id: u64,
-    pub spc_oid: COid,
-    pub db_oid: COid,
-    pub rel_number: u32,
-    pub fork_number: u8,
-    pub block_number: u32,
-    pub nblocks: u32,
-    pub lsn: CLsn,
-}
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct CRelCreateRequest {
-    pub request_id: u64,
-    pub spc_oid: COid,
-    pub db_oid: COid,
-    pub rel_number: u32,
-    pub fork_number: u8,
-    pub lsn: CLsn,
-}
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct CRelTruncateRequest {
-    pub request_id: u64,
-    pub spc_oid: COid,
-    pub db_oid: COid,
-    pub rel_number: u32,
-    pub fork_number: u8,
-    pub nblocks: u32,
-    pub lsn: CLsn,
-}
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct CRelUnlinkRequest {
-    pub request_id: u64,
-    pub spc_oid: COid,
-    pub db_oid: COid,
-    pub rel_number: u32,
-    pub fork_number: u8,
-    pub lsn: CLsn,
-}
-
-impl CRelSizeRequest {
-    pub fn reltag(&self) -> page_api::RelTag {
-        page_api::RelTag {
-            spcnode: self.spc_oid,
-            dbnode: self.db_oid,
-            relnode: self.rel_number,
-            forknum: self.fork_number,
-        }
-    }
-}
-
-impl CGetPageVRequest {
-    pub fn reltag(&self) -> page_api::RelTag {
-        page_api::RelTag {
-            spcnode: self.spc_oid,
-            dbnode: self.db_oid,
-            relnode: self.rel_number,
-            forknum: self.fork_number,
-        }
-    }
-}
-
-impl CGetPageVUncachedRequest {
-    pub fn reltag(&self) -> page_api::RelTag {
-        page_api::RelTag {
-            spcnode: self.spc_oid,
-            dbnode: self.db_oid,
-            relnode: self.rel_number,
-            forknum: self.fork_number,
-        }
-    }
-}
-
-impl CPrefetchVRequest {
-    pub fn reltag(&self) -> page_api::RelTag {
-        page_api::RelTag {
-            spcnode: self.spc_oid,
-            dbnode: self.db_oid,
-            relnode: self.rel_number,
-            forknum: self.fork_number,
-        }
-    }
-}
-
-impl CWritePageRequest {
-    pub fn reltag(&self) -> page_api::RelTag {
-        page_api::RelTag {
-            spcnode: self.spc_oid,
-            dbnode: self.db_oid,
-            relnode: self.rel_number,
-            forknum: self.fork_number,
-        }
-    }
-}
-
-impl CRelExtendRequest {
-    pub fn reltag(&self) -> page_api::RelTag {
-        page_api::RelTag {
-            spcnode: self.spc_oid,
-            dbnode: self.db_oid,
-            relnode: self.rel_number,
-            forknum: self.fork_number,
-        }
-    }
-}
-
-impl CRelZeroExtendRequest {
-    pub fn reltag(&self) -> page_api::RelTag {
-        page_api::RelTag {
-            spcnode: self.spc_oid,
-            dbnode: self.db_oid,
-            relnode: self.rel_number,
-            forknum: self.fork_number,
-        }
-    }
-}
-
-impl CRelCreateRequest {
-    pub fn reltag(&self) -> page_api::RelTag {
-        page_api::RelTag {
-            spcnode: self.spc_oid,
-            dbnode: self.db_oid,
-            relnode: self.rel_number,
-            forknum: self.fork_number,
-        }
-    }
-}
-
-impl CRelTruncateRequest {
-    pub fn reltag(&self) -> page_api::RelTag {
-        page_api::RelTag {
-            spcnode: self.spc_oid,
-            dbnode: self.db_oid,
-            relnode: self.rel_number,
-            forknum: self.fork_number,
-        }
-    }
-}
-
-impl CRelUnlinkRequest {
-    pub fn reltag(&self) -> page_api::RelTag {
-        page_api::RelTag {
-            spcnode: self.spc_oid,
-            dbnode: self.db_oid,
-            relnode: self.rel_number,
-            forknum: self.fork_number,
-        }
-    }
-}
-
-#[repr(C)]
-#[derive(Copy, Clone, Debug)]
-pub struct CUpdateCachedRelSizeRequest {
-    pub request_id: u64,
-    pub spc_oid: COid,
-    pub db_oid: COid,
-    pub rel_number: u32,
-    pub fork_number: u8,
-    pub nblocks: u32,
-    pub lsn: CLsn,
-}
-
-impl CUpdateCachedRelSizeRequest {
-    pub fn reltag(&self) -> page_api::RelTag {
-        page_api::RelTag {
-            spcnode: self.spc_oid,
-            dbnode: self.db_oid,
-            relnode: self.rel_number,
-            forknum: self.fork_number,
-        }
-    }
-}
--- a/pgxn/neon/communicator/src/worker_process/callbacks.rs
+++ b/pgxn/neon/communicator/src/worker_process/callbacks.rs
@@ -4,13 +4,10 @@
 //!
 //! These are called from the communicator threads! Careful what you do, most Postgres
 //! functions are not safe to call in that context.
-use utils::lsn::Lsn;

 #[cfg(not(test))]
 unsafe extern "C" {
-    pub fn notify_proc_unsafe(procno: std::ffi::c_int);
    pub fn callback_set_my_latch_unsafe();
-    pub fn callback_get_request_lsn_unsafe() -> crate::neon_request::CLsn;
    pub fn callback_get_lfc_metrics_unsafe() -> LfcMetrics;
 }

@@ -19,36 +16,20 @@ unsafe extern "C" {
 // package, but the code coverage build still builds these and tries to link with the
 // external C code.)
 #[cfg(test)]
-unsafe fn notify_proc_unsafe(_procno: std::ffi::c_int) {
-    panic!("not usable in unit tests");
-}
-#[cfg(test)]
 unsafe fn callback_set_my_latch_unsafe() {
    panic!("not usable in unit tests");
 }
 #[cfg(test)]
-unsafe fn callback_get_request_lsn_unsafe() -> crate::neon_request::CLsn {
-    panic!("not usable in unit tests");
-}
-#[cfg(test)]
 unsafe fn callback_get_lfc_metrics_unsafe() -> LfcMetrics {
    panic!("not usable in unit tests");
 }

 // safe wrappers

-pub(super) fn notify_proc(procno: std::ffi::c_int) {
-    unsafe { notify_proc_unsafe(procno) };
-}
-
 pub(super) fn callback_set_my_latch() {
    unsafe { callback_set_my_latch_unsafe() };
 }

-pub(super) fn get_request_lsn() -> Lsn {
-    Lsn(unsafe { callback_get_request_lsn_unsafe() })
-}
-
 pub(super) fn callback_get_lfc_metrics() -> LfcMetrics {
    unsafe { callback_get_lfc_metrics_unsafe() }
 }
--- a/pgxn/neon/communicator/src/worker_process/control_socket.rs
+++ b/pgxn/neon/communicator/src/worker_process/control_socket.rs
@@ -19,105 +19,71 @@ use http::StatusCode;
 use http::header::CONTENT_TYPE;

 use measured::MetricGroup;
-use measured::metric::MetricEncoding;
-use measured::metric::gauge::GaugeState;
-use measured::metric::group::Encoding;
 use measured::text::BufferedTextEncoder;

 use std::io::ErrorKind;
-use std::sync::Arc;

 use tokio::net::UnixListener;

 use crate::NEON_COMMUNICATOR_SOCKET_NAME;
-use crate::worker_process::lfc_metrics::LfcMetricsCollector;
 use crate::worker_process::main_loop::CommunicatorWorkerProcessStruct;

-enum ControlSocketState<'a> {
-    Full(&'a CommunicatorWorkerProcessStruct<'a>),
-    Legacy(LegacyControlSocketState),
-}
+impl CommunicatorWorkerProcessStruct {
+    /// Launch the listener
+    pub(crate) async fn launch_control_socket_listener(
+        &'static self,
+    ) -> Result<(), std::io::Error> {
+        use axum::routing::get;
+        let app = Router::new()
+            .route("/metrics", get(get_metrics))
+            .route("/autoscaling_metrics", get(get_autoscaling_metrics))
+            .route("/debug/panic", get(handle_debug_panic))
+            .with_state(self);

-struct LegacyControlSocketState {
-    pub(crate) lfc_metrics: LfcMetricsCollector,
-}
+        // If the server is restarted, there might be an old socket still
+        // lying around. Remove it first.
+        match std::fs::remove_file(NEON_COMMUNICATOR_SOCKET_NAME) {
+            Ok(()) => {
+                tracing::warn!("removed stale control socket");
+            }
+            Err(e) if e.kind() == ErrorKind::NotFound => {}
+            Err(e) => {
+                tracing::error!("could not remove stale control socket: {e:#}");
+                // Try to proceed anyway. It will likely fail below though.
+            }
+        };
+
+        // Create the unix domain socket and start listening on it
+        let listener = UnixListener::bind(NEON_COMMUNICATOR_SOCKET_NAME)?;
+
+        tokio::spawn(async {
+            tracing::info!("control socket listener spawned");
+            axum::serve(listener, app)
+                .await
+                .expect("axum::serve never returns")
+        });

-impl<T> MetricGroup<T> for LegacyControlSocketState
-where
-    T: Encoding,
-    GaugeState: MetricEncoding<T>,
-{
-    fn collect_group_into(&self, enc: &mut T) -> Result<(), T::Err> {
-        self.lfc_metrics.collect_group_into(enc)?;
        Ok(())
    }
 }

-/// Launch the listener
-pub(crate) async fn launch_listener(
-    worker: Option<&'static CommunicatorWorkerProcessStruct<'static>>,
-) -> Result<(), std::io::Error> {
-    use axum::routing::get;
-
-    let state = match worker {
-        Some(worker) => ControlSocketState::Full(worker),
-        None => ControlSocketState::Legacy(LegacyControlSocketState {
-            lfc_metrics: LfcMetricsCollector,
-        }),
-    };
-
-    let app = Router::new()
-        .route("/metrics", get(get_metrics))
-        .route("/autoscaling_metrics", get(get_autoscaling_metrics))
-        .route("/debug/panic", get(handle_debug_panic))
-        .route("/debug/dump_cache_map", get(dump_cache_map))
-        .with_state(Arc::new(state));
-
-    // If the server is restarted, there might be an old socket still
-    // lying around. Remove it first.
-    match std::fs::remove_file(NEON_COMMUNICATOR_SOCKET_NAME) {
-        Ok(()) => {
-            tracing::warn!("removed stale control socket");
-        }
-        Err(e) if e.kind() == ErrorKind::NotFound => {}
-        Err(e) => {
-            tracing::error!("could not remove stale control socket: {e:#}");
-            // Try to proceed anyway. It will likely fail below though.
-        }
-    };
-
-    // Create the unix domain socket and start listening on it
-    let listener = UnixListener::bind(NEON_COMMUNICATOR_SOCKET_NAME)?;
-
-    tokio::spawn(async {
-        tracing::info!("control socket listener spawned");
-        axum::serve(listener, app)
-            .await
-            .expect("axum::serve never returns")
-    });
-
-    Ok(())
-}
-
 /// Expose all Prometheus metrics.
-async fn get_metrics(State(state): State<Arc<ControlSocketState<'_>>>) -> Response {
-    match state.as_ref() {
-        ControlSocketState::Full(worker) => metrics_to_response(&worker).await,
-        ControlSocketState::Legacy(legacy) => metrics_to_response(&legacy).await,
-    }
+async fn get_metrics(State(state): State<&CommunicatorWorkerProcessStruct>) -> Response {
+    tracing::trace!("/metrics requested");
+    metrics_to_response(&state).await
 }

 /// Expose Prometheus metrics, for use by the autoscaling agent.
 ///
 /// This is a subset of all the metrics.
-async fn get_autoscaling_metrics(State(state): State<Arc<ControlSocketState<'_>>>) -> Response {
-    match state.as_ref() {
-        ControlSocketState::Full(worker) => metrics_to_response(&worker.lfc_metrics).await,
-        ControlSocketState::Legacy(legacy) => metrics_to_response(&legacy.lfc_metrics).await,
-    }
+async fn get_autoscaling_metrics(
+    State(state): State<&CommunicatorWorkerProcessStruct>,
+) -> Response {
+    tracing::trace!("/metrics requested");
+    metrics_to_response(&state.lfc_metrics).await
 }

-async fn handle_debug_panic(State(_state): State<Arc<ControlSocketState<'_>>>) -> Response {
+async fn handle_debug_panic(State(_state): State<&CommunicatorWorkerProcessStruct>) -> Response {
    panic!("test HTTP handler task panic");
 }

@@ -134,23 +100,3 @@ async fn metrics_to_response(metrics: &(dyn MetricGroup<BufferedTextEncoder> + S
        .body(Body::from(enc.finish()))
        .unwrap()
 }
-
-async fn dump_cache_map(State(state): State<Arc<ControlSocketState<'_>>>) -> Response {
-    match state.as_ref() {
-        ControlSocketState::Full(worker) => {
-            let mut buf: Vec<u8> = Vec::new();
-            worker.cache.dump_map(&mut buf);
-
-            Response::builder()
-                .status(StatusCode::OK)
-                .header(CONTENT_TYPE, "application/text")
-                .body(Body::from(buf))
-                .unwrap()
-        }
-        ControlSocketState::Legacy(_) => Response::builder()
-            .status(StatusCode::NOT_FOUND)
-            .header(CONTENT_TYPE, "application/text")
-            .body(Body::from(Vec::new()))
-            .unwrap(),
-    }
-}
--- a/pgxn/neon/communicator/src/worker_process/in_progress_ios.rs
+++ b/pgxn/neon/communicator/src/worker_process/in_progress_ios.rs
@@ -1,95 +0,0 @@
-//! Lock table to ensure that only one IO request is in flight for a given
-//! block (or relation or database metadata) at a time
-
-use std::cmp::Eq;
-use std::hash::Hash;
-use std::sync::Arc;
-
-use tokio::sync::{Mutex, OwnedMutexGuard};
-
-use clashmap::ClashMap;
-use clashmap::Entry;
-
-use pageserver_page_api::RelTag;
-
-#[derive(Clone, Eq, Hash, PartialEq)]
-pub enum RequestInProgressKey {
-    Db(u32),
-    Rel(RelTag),
-    Block(RelTag, u32),
-}
-
-type RequestId = u64;
-
-pub type RequestInProgressTable = MutexHashMap<RequestInProgressKey, RequestId>;
-
-// more primitive locking thingie:
-
-pub struct MutexHashMap<K, V>
-where
-    K: Clone + Eq + Hash,
-{
-    lock_table: ClashMap<K, (V, Arc<Mutex<()>>)>,
-}
-
-pub struct MutexHashMapGuard<'a, K, V>
-where
-    K: Clone + Eq + Hash,
-{
-    pub key: K,
-    map: &'a MutexHashMap<K, V>,
-    mutex: Arc<Mutex<()>>,
-    _guard: OwnedMutexGuard<()>,
-}
-
-impl<'a, K, V> Drop for MutexHashMapGuard<'a, K, V>
-where
-    K: Clone + Eq + Hash,
-{
-    fn drop(&mut self) {
-        let (_old_key, old_val) = self.map.lock_table.remove(&self.key).unwrap();
-        assert!(Arc::ptr_eq(&old_val.1, &self.mutex));
-
-        // the guard will be dropped as we return
-    }
-}
-
-impl<K, V> MutexHashMap<K, V>
-where
-    K: Clone + Eq + Hash,
-    V: std::fmt::Display + Copy,
-{
-    pub fn new() -> MutexHashMap<K, V> {
-        MutexHashMap {
-            lock_table: ClashMap::new(),
-        }
-    }
-
-    pub async fn lock<'a>(&'a self, key: K, val: V) -> MutexHashMapGuard<'a, K, V> {
-        let my_mutex = Arc::new(Mutex::new(()));
-        let my_guard = Arc::clone(&my_mutex).lock_owned().await;
-
-        loop {
-            let (request_id, lock) = match self.lock_table.entry(key.clone()) {
-                Entry::Occupied(e) => {
-                    let e = e.get();
-                    (e.0, Arc::clone(&e.1))
-                }
-                Entry::Vacant(e) => {
-                    e.insert((val, Arc::clone(&my_mutex)));
-                    break;
-                }
-            };
-            tracing::info!("waiting for conflicting IO {request_id} to complete");
-            let _ = lock.lock().await;
-            tracing::info!("conflicting IO {request_id} completed");
-        }
-
-        MutexHashMapGuard {
-            key,
-            map: self,
-            mutex: my_mutex,
-            _guard: my_guard,
-        }
-    }
-}
--- a/pgxn/neon/communicator/src/worker_process/main_loop.rs
+++ b/pgxn/neon/communicator/src/worker_process/main_loop.rs
@@ -1,126 +1,34 @@
-use std::collections::HashMap;
-use std::os::fd::AsRawFd;
-use std::os::fd::OwnedFd;
-use std::path::PathBuf;
 use std::str::FromStr as _;

-use crate::backend_comms::NeonIORequestSlot;
-use crate::file_cache::FileCache;
-use crate::global_allocator::MyAllocatorCollector;
-use crate::init::CommunicatorInitStruct;
-use crate::integrated_cache::{CacheResult, IntegratedCacheWriteAccess};
-use crate::neon_request::{CGetPageVRequest, CGetPageVUncachedRequest, CPrefetchVRequest};
-use crate::neon_request::{INVALID_BLOCK_NUMBER, NeonIORequest, NeonIOResult};
-use crate::worker_process::control_socket;
-use crate::worker_process::in_progress_ios::{RequestInProgressKey, RequestInProgressTable};
 use crate::worker_process::lfc_metrics::LfcMetricsCollector;
-use pageserver_client_grpc::{PageserverClient, ShardSpec, ShardStripeSize};
-use pageserver_page_api as page_api;
-
-use tokio::io::AsyncReadExt;
-use tokio_pipe::PipeRead;
-use uring_common::buf::IoBuf;

 use measured::MetricGroup;
 use measured::metric::MetricEncoding;
 use measured::metric::gauge::GaugeState;
 use measured::metric::group::Encoding;
-use measured::{Gauge, GaugeVec};
 use utils::id::{TenantId, TimelineId};

-use super::callbacks::{get_request_lsn, notify_proc};
-
-use tracing::{debug, error, info, info_span, trace};
-
-use utils::lsn::Lsn;
-
-pub struct CommunicatorWorkerProcessStruct<'a> {
-    /// Tokio runtime that the main loop and any other related tasks runs in.
+pub struct CommunicatorWorkerProcessStruct {
    runtime: tokio::runtime::Runtime,

-    /// Client to communicate with the pageserver
-    client: PageserverClient,
-
-    /// Request slots that backends use to send IO requests to the communicator.
-    neon_request_slots: &'a [NeonIORequestSlot],
-
-    /// Notification pipe. Backends use this to notify the communicator that a request is waiting to
-    /// be processed in one of the request slots.
-    submission_pipe_read_fd: OwnedFd,
-
-    /// Locking table for all in-progress IO requests.
-    in_progress_table: RequestInProgressTable,
-
-    /// Local File Cache, relation size tracking, last-written LSN tracking
-    pub(crate) cache: IntegratedCacheWriteAccess<'a>,
-
    /*** Metrics ***/
    pub(crate) lfc_metrics: LfcMetricsCollector,
-
-    request_counters: GaugeVec<RequestTypeLabelGroupSet>,
-
-    getpage_cache_misses_counter: Gauge,
-    getpage_cache_hits_counter: Gauge,
-
-    // For the requests that affect multiple blocks, have separate counters for the # of blocks affected
-    request_nblocks_counters: GaugeVec<RequestTypeLabelGroupSet>,
-
-    #[allow(dead_code)]
-    allocator_metrics: MyAllocatorCollector,
-}
-
-// Define a label group, consisting of 1 or more label values
-#[derive(measured::LabelGroup)]
-#[label(set = RequestTypeLabelGroupSet)]
-struct RequestTypeLabelGroup {
-    request_type: crate::neon_request::NeonIORequestDiscriminants,
-}
-
-impl RequestTypeLabelGroup {
-    fn from_req(req: &NeonIORequest) -> Self {
-        RequestTypeLabelGroup {
-            request_type: req.into(),
-        }
-    }
 }

 /// Launch the communicator process's Rust subsystems
-#[allow(clippy::too_many_arguments)]
-pub(super) fn init_legacy() -> Result<(), String> {
-    let runtime = tokio::runtime::Builder::new_multi_thread()
-        .enable_all()
-        .thread_name("communicator thread")
-        .build()
-        .unwrap();
-
-    // Start the listener on the control socket
-    runtime
-        .block_on(control_socket::launch_listener(None))
-        .map_err(|e| e.to_string())?;
-
-    Box::leak(Box::new(runtime));
-
-    Ok(())
-}
-
-/// Launch the communicator process's Rust subsystems
-#[allow(clippy::too_many_arguments)]
 pub(super) fn init(
-    cis: CommunicatorInitStruct,
-    tenant_id: &str,
-    timeline_id: &str,
-    auth_token: Option<&str>,
-    shard_map: HashMap<utils::shard::ShardIndex, String>,
-    stripe_size: Option<ShardStripeSize>,
-    initial_file_cache_size: u64,
-    file_cache_path: Option<PathBuf>,
-) -> Result<&'static CommunicatorWorkerProcessStruct<'static>, String> {
+    tenant_id: Option<&str>,
+    timeline_id: Option<&str>,
+) -> Result<&'static CommunicatorWorkerProcessStruct, String> {
    // The caller validated these already
-    let tenant_id = TenantId::from_str(tenant_id).map_err(|e| format!("invalid tenant ID: {e}"))?;
-    let timeline_id =
-        TimelineId::from_str(timeline_id).map_err(|e| format!("invalid timeline ID: {e}"))?;
-    let shard_spec =
-        ShardSpec::new(shard_map, stripe_size).map_err(|e| format!("invalid shard spec: {e}:"))?;
+    let _tenant_id = tenant_id
+        .map(TenantId::from_str)
+        .transpose()
+        .map_err(|e| format!("invalid tenant ID: {e}"))?;
+    let _timeline_id = timeline_id
+        .map(TimelineId::from_str)
+        .transpose()
+        .map_err(|e| format!("invalid timeline ID: {e}"))?;

    let runtime = tokio::runtime::Builder::new_multi_thread()
        .enable_all()
@@ -128,710 +36,31 @@ pub(super) fn init(
        .build()
        .unwrap();

-    let last_lsn = get_request_lsn();
-
-    let file_cache = if let Some(path) = file_cache_path {
-        Some(FileCache::new(&path, initial_file_cache_size).expect("could not create cache file"))
-    } else {
-        // FIXME: temporarily for testing, use LFC even if disabled
-        Some(
-            FileCache::new(&PathBuf::from("new_filecache"), 1000)
-                .expect("could not create cache file"),
-        )
-    };
-
-    // Initialize subsystems
-    let cache = cis
-        .integrated_cache_init_struct
-        .worker_process_init(last_lsn, file_cache);
-
-    debug!("Initialised integrated cache: {cache:?}");
-
-    let client = {
-        let _guard = runtime.enter();
-        PageserverClient::new(
-            tenant_id,
-            timeline_id,
-            shard_spec,
-            auth_token.map(|s| s.to_string()),
-            None,
-        )
-        .expect("could not create client")
-    };
-
    let worker_struct = CommunicatorWorkerProcessStruct {
        // Note: it's important to not drop the runtime, or all the tasks are dropped
        // too. Including it in the returned struct is one way to keep it around.
        runtime,
-        neon_request_slots: cis.neon_request_slots,
-        client,
-        cache,
-        submission_pipe_read_fd: cis.submission_pipe_read_fd,
-        in_progress_table: RequestInProgressTable::new(),

        // metrics
        lfc_metrics: LfcMetricsCollector,
-
-        request_counters: GaugeVec::new(),
-
-        getpage_cache_misses_counter: Gauge::new(),
-        getpage_cache_hits_counter: Gauge::new(),
-
-        request_nblocks_counters: GaugeVec::new(),
-
-        allocator_metrics: MyAllocatorCollector::new(),
    };
-
    let worker_struct = Box::leak(Box::new(worker_struct));

-    let main_loop_handle = worker_struct.runtime.spawn(worker_struct.run());
-    worker_struct.runtime.spawn(async {
-        let err = main_loop_handle.await.unwrap_err();
-        error!("error: {err:?}");
-    });
-
    // Start the listener on the control socket
    worker_struct
        .runtime
-        .block_on(control_socket::launch_listener(Some(worker_struct)))
+        .block_on(worker_struct.launch_control_socket_listener())
        .map_err(|e| e.to_string())?;

    Ok(worker_struct)
 }

-impl<'t> CommunicatorWorkerProcessStruct<'t> {
-    /// Update the configuration
-    pub(super) fn update_shard_map(
-        &self,
-        new_shard_map: HashMap<utils::shard::ShardIndex, String>,
-        stripe_size: Option<ShardStripeSize>,
-    ) {
-        let shard_spec = ShardSpec::new(new_shard_map, stripe_size).expect("invalid shard spec");
-
-        {
-            let _in_runtime = self.runtime.enter();
-            if let Err(err) = self.client.update_shards(shard_spec) {
-                tracing::error!("could not update shard map: {err:?}");
-            }
-        }
-    }
-
-    /// Main loop of the worker process. Receive requests from the backends and process them.
-    pub(super) async fn run(&'static self) {
-        let mut idxbuf: [u8; 4] = [0; 4];
-
-        let mut submission_pipe_read =
-            PipeRead::try_from(self.submission_pipe_read_fd.as_raw_fd()).expect("invalid pipe fd");
-
-        loop {
-            // Wait for a backend to ring the doorbell
-            match submission_pipe_read.read(&mut idxbuf).await {
-                Ok(4) => {}
-                Ok(nbytes) => panic!("short read ({nbytes} bytes) on communicator pipe"),
-                Err(e) => panic!("error reading from communicator pipe: {e}"),
-            }
-            let slot_idx = u32::from_ne_bytes(idxbuf) as usize;
-
-            // Read the IO request from the slot indicated in the wakeup
-            let Some(slot) = self.neon_request_slots[slot_idx].start_processing_request() else {
-                // This currently should not happen. But if we had multiple threads picking up
-                // requests, and without waiting for the notifications, it could.
-                panic!("no request in slot");
-            };
-
-            // Ok, we have ownership of this request now. We must process it now, there's no going
-            // back.
-            //
-            // Spawn a separate task for every request. That's a little excessive for requests that
-            // can be quickly satisfied from the cache, but we expect that to be rare, because the
-            // requesting backend would have already checked the cache.
-            tokio::spawn(async move {
-                use tracing::Instrument;
-
-                let request_id = slot.get_request().request_id();
-                let owner_procno = slot.get_owner_procno();
-
-                let span = info_span!(
-                    "processing",
-                    request_id = request_id,
-                    slot_idx = slot_idx,
-                    procno = owner_procno,
-                );
-                async {
-                    // FIXME: as a temporary hack, abort the request if we don't get a response
-                    // promptly.
-                    //
-                    // Lots of regression tests are getting stuck and failing at the moment,
-                    // this makes them fail a little faster, which it faster to iterate.
-                    // This needs to be removed once more regression tests are passing.
-                    // See also similar hack in the backend code, in wait_request_completion()
-                    let result = tokio::time::timeout(
-                        tokio::time::Duration::from_secs(60),
-                        self.handle_request(slot.get_request()),
-                    )
-                    .await
-                    .unwrap_or_else(|_elapsed| {
-                        info!("request {request_id} timed out");
-                        NeonIOResult::Error(libc::ETIMEDOUT)
-                    });
-                    trace!("request {request_id} at slot {slot_idx} completed");
-
-                    // Ok, we have completed the IO. Mark the request as completed. After that,
-                    // we no longer have ownership of the slot, and must not modify it.
-                    slot.completed(result);
-
-                    // Notify the backend about the completion. (Note that the backend might see
-                    // the completed status even before this; this is just a wakeup)
-                    notify_proc(owner_procno);
-                }
-                .instrument(span)
-                .await
-            });
-        }
-    }
-
-    /// Compute the 'request_lsn' to use for a pageserver request
-    fn request_lsns(&self, not_modified_since_lsn: Lsn) -> page_api::ReadLsn {
-        let mut request_lsn = get_request_lsn();
-
-        // Is it possible that the last-written LSN is ahead of last flush LSN? Generally not, we
-        // shouldn't evict a page from the buffer cache before all its modifications have been
-        // safely flushed. That's the "WAL before data" rule. However, there are a few exceptions:
-        //
-        // - when creation an index: _bt_blwritepage logs the full page without flushing WAL before
-        // smgrextend (files are fsynced before build ends).
-        //
-        // XXX: If we make a request LSN greater than the current WAL flush LSN, the pageserver would
-        // block waiting for the WAL arrive, until we flush it and it propagates through the
-        // safekeepers to the pageserver. If there's nothing that forces the WAL to be flushed,
-        // the pageserver would get stuck waiting forever. To avoid that, all the write-
-        // functions in communicator_new.c call XLogSetAsyncXactLSN(). That nudges the WAL writer to
-        // perform the flush relatively soon.
-        //
-        // It would perhaps be nicer to do the WAL flush here, but it's tricky to call back into
-        // Postgres code to do that from here. That's why we rely on communicator_new.c to do the
-        // calls "pre-emptively".
-        //
-        // FIXME: Because of the above, it can still happen that the flush LSN is ahead of
-        // not_modified_since, if the WAL writer hasn't done the flush yet. It would be nice to know
-        // if there are other cases like that that we have mised, but unfortunately we cannot turn
-        // this into an assertion because of that legit case.
-        //
-        // See also the old logic in neon_get_request_lsns() C function
-        if not_modified_since_lsn > request_lsn {
-            tracing::info!(
-                "not_modified_since_lsn {} is ahead of last flushed LSN {}",
-                not_modified_since_lsn,
-                request_lsn
-            );
-            request_lsn = not_modified_since_lsn;
-        }
-
-        page_api::ReadLsn {
-            request_lsn,
-            not_modified_since_lsn: Some(not_modified_since_lsn),
-        }
-    }
-
-    /// Handle one IO request
-    async fn handle_request(&'static self, request: &'_ NeonIORequest) -> NeonIOResult {
-        self.request_counters
-            .inc(RequestTypeLabelGroup::from_req(request));
-        match request {
-            NeonIORequest::Empty => {
-                error!("unexpected Empty IO request");
-                NeonIOResult::Error(0)
-            }
-            NeonIORequest::RelSize(req) => {
-                let rel = req.reltag();
-
-                let _in_progress_guard = self
-                    .in_progress_table
-                    .lock(RequestInProgressKey::Rel(rel), req.request_id)
-                    .await;
-
-                // Check the cache first
-                let not_modified_since = match self.cache.get_rel_size(&rel) {
-                    CacheResult::Found(nblocks) => {
-                        tracing::trace!("found relsize for {:?} in cache: {}", rel, nblocks);
-                        return NeonIOResult::RelSize(nblocks);
-                    }
-                    // XXX: we don't cache negative entries, so if there's no entry in the cache, it could mean
-                    // that the relation doesn't exist or that we don't have it cached.
-                    CacheResult::NotFound(lsn) => lsn,
-                };
-
-                let read_lsn = self.request_lsns(not_modified_since);
-                match self
-                    .client
-                    .get_rel_size(page_api::GetRelSizeRequest {
-                        read_lsn,
-                        rel,
-                        allow_missing: req.allow_missing,
-                    })
-                    .await
-                {
-                    Ok(Some(nblocks)) => {
-                        // update the cache
-                        tracing::trace!(
-                            "updated relsize for {:?} in cache: {}, lsn {}",
-                            rel,
-                            nblocks,
-                            read_lsn
-                        );
-                        self.cache
-                            .remember_rel_size(&rel, nblocks, not_modified_since);
-
-                        NeonIOResult::RelSize(nblocks)
-                    }
-                    Ok(None) => {
-                        // TODO: cache negative entry?
-                        NeonIOResult::RelSize(INVALID_BLOCK_NUMBER)
-                    }
-                    Err(err) => {
-                        info!("tonic error: {err:?}");
-                        NeonIOResult::Error(0)
-                    }
-                }
-            }
-            NeonIORequest::GetPageV(req) => match self.handle_get_pagev_request(req).await {
-                Ok(()) => NeonIOResult::GetPageV,
-                Err(errno) => NeonIOResult::Error(errno),
-            },
-            NeonIORequest::GetPageVUncached(req) => {
-                match self.handle_get_pagev_uncached_request(req).await {
-                    Ok(()) => NeonIOResult::GetPageV,
-                    Err(errno) => NeonIOResult::Error(errno),
-                }
-            }
-            NeonIORequest::ReadSlruSegment(req) => {
-                let lsn = Lsn(req.request_lsn);
-                let file_path = req.destination_file_path();
-
-                match self
-                    .client
-                    .get_slru_segment(page_api::GetSlruSegmentRequest {
-                        read_lsn: self.request_lsns(lsn),
-                        kind: req.slru_kind,
-                        segno: req.segment_number,
-                    })
-                    .await
-                {
-                    Ok(slru_bytes) => {
-                        if let Err(e) = tokio::fs::write(&file_path, &slru_bytes).await {
-                            error!("could not write slru segment to file {file_path}: {e}");
-                            return NeonIOResult::Error(e.raw_os_error().unwrap_or(libc::EIO));
-                        }
-
-                        let blocks_count = slru_bytes.len() / crate::BLCKSZ;
-
-                        NeonIOResult::ReadSlruSegment(blocks_count as _)
-                    }
-                    Err(err) => {
-                        info!("tonic error: {err:?}");
-                        NeonIOResult::Error(0)
-                    }
-                }
-            }
-            NeonIORequest::PrefetchV(req) => {
-                self.request_nblocks_counters
-                    .inc_by(RequestTypeLabelGroup::from_req(request), req.nblocks as i64);
-                let req = *req;
-                tokio::spawn(async move { self.handle_prefetchv_request(&req).await });
-                NeonIOResult::PrefetchVLaunched
-            }
-            NeonIORequest::DbSize(req) => {
-                let _in_progress_guard = self
-                    .in_progress_table
-                    .lock(RequestInProgressKey::Db(req.db_oid), req.request_id)
-                    .await;
-
-                // Check the cache first
-                let not_modified_since = match self.cache.get_db_size(req.db_oid) {
-                    CacheResult::Found(db_size) => {
-                        // get_page already copied the block content to the destination
-                        return NeonIOResult::DbSize(db_size);
-                    }
-                    CacheResult::NotFound(lsn) => lsn,
-                };
-
-                match self
-                    .client
-                    .get_db_size(page_api::GetDbSizeRequest {
-                        read_lsn: self.request_lsns(not_modified_since),
-                        db_oid: req.db_oid,
-                    })
-                    .await
-                {
-                    Ok(db_size) => NeonIOResult::DbSize(db_size),
-                    Err(err) => {
-                        info!("tonic error: {err:?}");
-                        NeonIOResult::Error(0)
-                    }
-                }
-            }
-
-            // Write requests
-            NeonIORequest::WritePage(req) => {
-                let rel = req.reltag();
-                let _in_progress_guard = self
-                    .in_progress_table
-                    .lock(
-                        RequestInProgressKey::Block(rel, req.block_number),
-                        req.request_id,
-                    )
-                    .await;
-
-                // We must at least update the last-written LSN on the page, but also store the page
-                // image in the LFC while we still have it
-                self.cache
-                    .remember_page(&rel, req.block_number, req.src, Lsn(req.lsn), true)
-                    .await;
-                NeonIOResult::WriteOK
-            }
-            NeonIORequest::RelExtend(req) => {
-                let rel = req.reltag();
-                let _in_progress_guard = self
-                    .in_progress_table
-                    .lock(
-                        RequestInProgressKey::Block(rel, req.block_number),
-                        req.request_id,
-                    )
-                    .await;
-
-                // We must at least update the last-written LSN on the page and the relation size,
-                // but also store the page image in the LFC while we still have it
-                self.cache
-                    .remember_page(&rel, req.block_number, req.src, Lsn(req.lsn), true)
-                    .await;
-                self.cache
-                    .remember_rel_size(&req.reltag(), req.block_number + 1, Lsn(req.lsn));
-                NeonIOResult::WriteOK
-            }
-            NeonIORequest::RelZeroExtend(req) => {
-                self.request_nblocks_counters
-                    .inc_by(RequestTypeLabelGroup::from_req(request), req.nblocks as i64);
-
-                // TODO: need to grab an io-in-progress lock for this? I guess not
-                // TODO: We could put the empty pages to the cache. Maybe have
-                // a marker on the block entries for all-zero pages, instead of
-                // actually storing the empty pages.
-                self.cache.remember_rel_size(
-                    &req.reltag(),
-                    req.block_number + req.nblocks,
-                    Lsn(req.lsn),
-                );
-                NeonIOResult::WriteOK
-            }
-            NeonIORequest::RelCreate(req) => {
-                // TODO: need to grab an io-in-progress lock for this? I guess not
-                self.cache.remember_rel_size(&req.reltag(), 0, Lsn(req.lsn));
-                NeonIOResult::WriteOK
-            }
-            NeonIORequest::RelTruncate(req) => {
-                // TODO: need to grab an io-in-progress lock for this? I guess not
-                self.cache
-                    .remember_rel_size(&req.reltag(), req.nblocks, Lsn(req.lsn));
-                NeonIOResult::WriteOK
-            }
-            NeonIORequest::RelUnlink(req) => {
-                // TODO: need to grab an io-in-progress lock for this? I guess not
-                self.cache.forget_rel(&req.reltag(), None, Lsn(req.lsn));
-                NeonIOResult::WriteOK
-            }
-            NeonIORequest::UpdateCachedRelSize(req) => {
-                // TODO: need to grab an io-in-progress lock for this? I guess not
-                self.cache
-                    .remember_rel_size(&req.reltag(), req.nblocks, Lsn(req.lsn));
-                NeonIOResult::WriteOK
-            }
-        }
-    }
-
-    /// Subroutine to handle a GetPageV request, since it's a little more complicated than
-    /// others.
-    async fn handle_get_pagev_request(&'t self, req: &CGetPageVRequest) -> Result<(), i32> {
-        let rel = req.reltag();
-
-        // Check the cache first
-        //
-        // Note: Because the backends perform a direct lookup in the cache before sending
-        // the request to the communicator process, we expect the pages to almost never
-        // be already in cache. It could happen if:
-        // 1. two backends try to read the same page at the same time, but that should never
-        //    happen because there's higher level locking in the Postgres buffer manager, or
-        // 2. a prefetch request finished at the same time as a backend requested the
-        //    page. That's much more likely.
-        let mut cache_misses = Vec::with_capacity(req.nblocks as usize);
-        for i in 0..req.nblocks {
-            let blkno = req.block_number + i as u32;
-
-            // note: this is deadlock-safe even though we hold multiple locks at the same time,
-            // because they're always acquired in the same order.
-            let in_progress_guard = self
-                .in_progress_table
-                .lock(RequestInProgressKey::Block(rel, blkno), req.request_id)
-                .await;
-
-            let dest = req.dest[i as usize];
-            let not_modified_since = match self.cache.get_page(&rel, blkno, dest).await {
-                Ok(CacheResult::Found(_)) => {
-                    // get_page already copied the block content to the destination
-                    trace!("found blk {} in rel {:?} in LFC", blkno, rel);
-                    continue;
-                }
-                Ok(CacheResult::NotFound(lsn)) => lsn,
-                Err(_io_error) => return Err(-1), // FIXME errno?
-            };
-            cache_misses.push((blkno, not_modified_since, dest, in_progress_guard));
-        }
-        self.getpage_cache_misses_counter
-            .inc_by(cache_misses.len() as i64);
-        self.getpage_cache_hits_counter
-            .inc_by(req.nblocks as i64 - cache_misses.len() as i64);
-
-        if cache_misses.is_empty() {
-            return Ok(());
-        }
-        let not_modified_since = cache_misses
-            .iter()
-            .map(|(_blkno, lsn, _dest, _guard)| *lsn)
-            .max()
-            .unwrap();
-
-        // Construct a pageserver request for the cache misses
-        let block_numbers: Vec<u32> = cache_misses
-            .iter()
-            .map(|(blkno, _lsn, _dest, _guard)| *blkno)
-            .collect();
-        let read_lsn = self.request_lsns(not_modified_since);
-        trace!(
-            "sending getpage request for blocks {:?} in rel {:?} lsns {}",
-            block_numbers, rel, read_lsn
-        );
-        match self
-            .client
-            .get_page(page_api::GetPageRequest {
-                request_id: req.request_id.into(),
-                request_class: page_api::GetPageClass::Normal,
-                read_lsn,
-                rel,
-                block_numbers: block_numbers.clone(),
-            })
-            .await
-        {
-            Ok(resp) => {
-                // Write the received page images directly to the shared memory location
-                // that the backend requested.
-                if resp.pages.len() != block_numbers.len() {
-                    error!(
-                        "received unexpected response with {} page images from pageserver for a request for {} pages",
-                        resp.pages.len(),
-                        block_numbers.len(),
-                    );
-                    return Err(-1);
-                }
-
-                trace!(
-                    "received getpage response for blocks {:?} in rel {:?} lsns {}",
-                    block_numbers, rel, read_lsn
-                );
-
-                for (page, (blkno, _lsn, dest, _guard)) in resp.pages.into_iter().zip(cache_misses)
-                {
-                    let src: &[u8] = page.image.as_ref();
-                    let len = std::cmp::min(src.len(), dest.bytes_total());
-                    unsafe {
-                        std::ptr::copy_nonoverlapping(src.as_ptr(), dest.as_mut_ptr(), len);
-                    };
-
-                    // Also store it in the LFC while we have it
-                    self.cache
-                        .remember_page(
-                            &rel,
-                            blkno,
-                            page.image,
-                            read_lsn.not_modified_since_lsn.unwrap(),
-                            false,
-                        )
-                        .await;
-                }
-            }
-            Err(err) => {
-                info!("tonic error: {err:?}");
-                return Err(-1);
-            }
-        }
-        Ok(())
-    }
-
-    /// Subroutine to handle an GetPageVUncached request.
-    ///
-    /// Note: this bypasses the cache, in-progress IO locking, and all other side-effects.
-    /// This request type is only used in tests.
-    async fn handle_get_pagev_uncached_request(
-        &'t self,
-        req: &CGetPageVUncachedRequest,
-    ) -> Result<(), i32> {
-        let rel = req.reltag();
-
-        // Construct a pageserver request
-        let block_numbers: Vec<u32> =
-            (req.block_number..(req.block_number + (req.nblocks as u32))).collect();
-        let read_lsn = page_api::ReadLsn {
-            request_lsn: Lsn(req.request_lsn),
-            not_modified_since_lsn: Some(Lsn(req.not_modified_since)),
-        };
-        trace!(
-            "sending (uncached) getpage request for blocks {:?} in rel {:?} lsns {}",
-            block_numbers, rel, read_lsn
-        );
-        match self
-            .client
-            .get_page(page_api::GetPageRequest {
-                request_id: req.request_id.into(),
-                request_class: page_api::GetPageClass::Normal,
-                read_lsn,
-                rel,
-                block_numbers: block_numbers.clone(),
-            })
-            .await
-        {
-            Ok(resp) => {
-                // Write the received page images directly to the shared memory location
-                // that the backend requested.
-                if resp.pages.len() != block_numbers.len() {
-                    error!(
-                        "received unexpected response with {} page images from pageserver for a request for {} pages",
-                        resp.pages.len(),
-                        block_numbers.len(),
-                    );
-                    return Err(-1);
-                }
-
-                trace!(
-                    "received getpage response for blocks {:?} in rel {:?} lsns {}",
-                    block_numbers, rel, read_lsn
-                );
-
-                for (page, dest) in resp.pages.into_iter().zip(req.dest) {
-                    let src: &[u8] = page.image.as_ref();
-                    let len = std::cmp::min(src.len(), dest.bytes_total());
-                    unsafe {
-                        std::ptr::copy_nonoverlapping(src.as_ptr(), dest.as_mut_ptr(), len);
-                    };
-                }
-            }
-            Err(err) => {
-                info!("tonic error: {err:?}");
-                return Err(-1);
-            }
-        }
-        Ok(())
-    }
-
-    /// Subroutine to handle a PrefetchV request, since it's a little more complicated than
-    /// others.
-    ///
-    /// This is very similar to a GetPageV request, but the results are only stored in the cache.
-    async fn handle_prefetchv_request(&'static self, req: &CPrefetchVRequest) -> Result<(), i32> {
-        let rel = req.reltag();
-
-        // Check the cache first
-        let mut cache_misses = Vec::with_capacity(req.nblocks as usize);
-        for i in 0..req.nblocks {
-            let blkno = req.block_number + i as u32;
-
-            // note: this is deadlock-safe even though we hold multiple locks at the same time,
-            // because they're always acquired in the same order.
-            let in_progress_guard = self
-                .in_progress_table
-                .lock(RequestInProgressKey::Block(rel, blkno), req.request_id)
-                .await;
-
-            let not_modified_since = match self.cache.page_is_cached(&rel, blkno).await {
-                Ok(CacheResult::Found(_)) => {
-                    trace!("found blk {} in rel {:?} in LFC", blkno, rel);
-                    continue;
-                }
-                Ok(CacheResult::NotFound(lsn)) => lsn,
-                Err(_io_error) => return Err(-1), // FIXME errno?
-            };
-            cache_misses.push((blkno, not_modified_since, in_progress_guard));
-        }
-        if cache_misses.is_empty() {
-            return Ok(());
-        }
-        let not_modified_since = cache_misses
-            .iter()
-            .map(|(_blkno, lsn, _guard)| *lsn)
-            .max()
-            .unwrap();
-        let block_numbers: Vec<u32> = cache_misses
-            .iter()
-            .map(|(blkno, _lsn, _guard)| *blkno)
-            .collect();
-
-        // TODO: spawn separate tasks for these. Use the integrated cache to keep track of the
-        // in-flight requests
-
-        match self
-            .client
-            .get_page(page_api::GetPageRequest {
-                request_id: req.request_id.into(),
-                request_class: page_api::GetPageClass::Prefetch,
-                read_lsn: self.request_lsns(not_modified_since),
-                rel,
-                block_numbers: block_numbers.clone(),
-            })
-            .await
-        {
-            Ok(resp) => {
-                trace!(
-                    "prefetch completed, remembering blocks {:?} in rel {:?} in LFC",
-                    block_numbers, rel
-                );
-                if resp.pages.len() != block_numbers.len() {
-                    error!(
-                        "received unexpected response with {} page images from pageserver for a request for {} pages",
-                        resp.pages.len(),
-                        block_numbers.len(),
-                    );
-                    return Err(-1);
-                }
-
-                for (page, (blkno, _lsn, _guard)) in resp.pages.into_iter().zip(cache_misses) {
-                    self.cache
-                        .remember_page(&rel, blkno, page.image, not_modified_since, false)
-                        .await;
-                }
-            }
-            Err(err) => {
-                info!("tonic error: {err:?}");
-                return Err(-1);
-            }
-        }
-        Ok(())
-    }
-}
-
-impl<T> MetricGroup<T> for CommunicatorWorkerProcessStruct<'_>
+impl<T> MetricGroup<T> for CommunicatorWorkerProcessStruct
 where
    T: Encoding,
    GaugeState: MetricEncoding<T>,
 {
    fn collect_group_into(&self, enc: &mut T) -> Result<(), T::Err> {
-        use measured::metric::MetricFamilyEncoding;
-        use measured::metric::name::MetricName;
-
-        self.lfc_metrics.collect_group_into(enc)?;
-        self.request_counters
-            .collect_family_into(MetricName::from_str("request_counters"), enc)?;
-        self.request_nblocks_counters
-            .collect_family_into(MetricName::from_str("request_nblocks_counters"), enc)?;
-
-        // FIXME: allocator metrics
-
-        Ok(())
+        self.lfc_metrics.collect_group_into(enc)
    }
 }
--- a/pgxn/neon/communicator/src/worker_process/mod.rs
+++ b/pgxn/neon/communicator/src/worker_process/mod.rs
@@ -4,9 +4,9 @@
 //! - launch the main loop,
 //! - receive IO requests from backends and process them,
 //! - write results back to backends.
+
 mod callbacks;
 mod control_socket;
-mod in_progress_ios;
 mod lfc_metrics;
 mod logging;
 mod main_loop;
--- a/pgxn/neon/communicator/src/worker_process/worker_interface.rs
+++ b/pgxn/neon/communicator/src/worker_process/worker_interface.rs
@@ -1,21 +1,14 @@
 //! Functions called from the C code in the worker process

-use std::collections::HashMap;
 use std::ffi::{CStr, CString, c_char};
-use std::path::PathBuf;

-use crate::init::CommunicatorInitStruct;
 use crate::worker_process::main_loop;
 use crate::worker_process::main_loop::CommunicatorWorkerProcessStruct;

-use pageserver_client_grpc::ShardStripeSize;
-
 /// Launch the communicator's tokio tasks, which do most of the work.
 ///
 /// The caller has initialized the process as a regular PostgreSQL background worker
-/// process. The shared memory segment used to communicate with the backends has been
-/// allocated and initialized earlier, at postmaster startup, in
-/// rcommunicator_shmem_init().
+/// process.
 ///
 /// Inputs:
 ///   `tenant_id` and `timeline_id` can be NULL, if we're been launched in "non-Neon" mode,
@@ -30,63 +23,27 @@ use pageserver_client_grpc::ShardStripeSize;
 /// This is called only once in the process, so the returned struct, and error message in
 /// case of failure, are simply leaked.
 #[unsafe(no_mangle)]
-pub extern "C" fn communicator_worker_process_launch(
-    cis: Box<CommunicatorInitStruct>,
+pub extern "C" fn communicator_worker_launch(
    tenant_id: *const c_char,
    timeline_id: *const c_char,
-    auth_token: *const c_char,
-    shard_map: *mut *mut c_char,
-    nshards: u32,
-    stripe_size: u32,
-    file_cache_path: *const c_char,
-    initial_file_cache_size: u64,
    error_p: *mut *const c_char,
-) -> Option<&'static CommunicatorWorkerProcessStruct<'static>> {
-    tracing::warn!("starting threads in rust code");
+) -> Option<&'static CommunicatorWorkerProcessStruct> {
    // Convert the arguments into more convenient Rust types
-    let tenant_id = {
-        let cstr = unsafe { CStr::from_ptr(tenant_id) };
-        cstr.to_str().expect("assume UTF-8")
-    };
-    let timeline_id = {
-        let cstr = unsafe { CStr::from_ptr(timeline_id) };
-        cstr.to_str().expect("assume UTF-8")
-    };
-    let auth_token = if auth_token.is_null() {
+    let tenant_id = if tenant_id.is_null() {
        None
    } else {
-        let cstr = unsafe { CStr::from_ptr(auth_token) };
+        let cstr = unsafe { CStr::from_ptr(tenant_id) };
        Some(cstr.to_str().expect("assume UTF-8"))
    };
-    let file_cache_path = {
-        if file_cache_path.is_null() {
-            None
-        } else {
-            let c_str = unsafe { CStr::from_ptr(file_cache_path) };
-            Some(PathBuf::from(c_str.to_str().unwrap()))
-        }
-    };
-    let shard_map = shard_map_to_hash(nshards, shard_map);
-    // FIXME: distinguish between unsharded, and sharded with 1 shard
-    // Also, we might go from unsharded to sharded while the system
-    // is running.
-    let stripe_size = if stripe_size > 0 && nshards > 1 {
-        Some(ShardStripeSize(stripe_size))
-    } else {
+    let timeline_id = if timeline_id.is_null() {
        None
+    } else {
+        let cstr = unsafe { CStr::from_ptr(timeline_id) };
+        Some(cstr.to_str().expect("assume UTF-8"))
    };

    // The `init` function does all the work.
-    let result = main_loop::init(
-        *cis,
-        tenant_id,
-        timeline_id,
-        auth_token,
-        shard_map,
-        stripe_size,
-        initial_file_cache_size,
-        file_cache_path,
-    );
+    let result = main_loop::init(tenant_id, timeline_id);

    // On failure, return the error message to the C caller in *error_p.
    match result {
@@ -101,66 +58,3 @@ pub extern "C" fn communicator_worker_process_launch(
        }
    }
 }
-
-#[unsafe(no_mangle)]
-pub extern "C" fn communicator_worker_process_launch_legacy(error_p: *mut *const c_char) -> bool {
-    // The `init` function does all the work.
-    let result = main_loop::init_legacy();
-
-    // On failure, return the error message to the C caller in *error_p.
-    match result {
-        Ok(()) => true,
-        Err(errmsg) => {
-            let errmsg = CString::new(errmsg).expect("no nuls within error message");
-            let errmsg = Box::leak(errmsg.into_boxed_c_str());
-            let p: *const c_char = errmsg.as_ptr();
-
-            unsafe { *error_p = p };
-            false
-        }
-    }
-}
-
-/// Convert the "shard map" from an array of C strings, indexed by shard no to a rust HashMap
-fn shard_map_to_hash(
-    nshards: u32,
-    shard_map: *mut *mut c_char,
-) -> HashMap<utils::shard::ShardIndex, String> {
-    use utils::shard::*;
-
-    assert!(nshards <= u8::MAX as u32);
-
-    let mut result: HashMap<ShardIndex, String> = HashMap::new();
-    let mut p = shard_map;
-
-    for i in 0..nshards {
-        let c_str = unsafe { CStr::from_ptr(*p) };
-
-        p = unsafe { p.add(1) };
-
-        let s = c_str.to_str().unwrap();
-        let k = if nshards > 1 {
-            ShardIndex::new(ShardNumber(i as u8), ShardCount(nshards as u8))
-        } else {
-            ShardIndex::unsharded()
-        };
-        result.insert(k, s.into());
-    }
-    result
-}
-
-/// Inform the rust code about a configuration change
-#[unsafe(no_mangle)]
-pub extern "C" fn communicator_worker_config_reload(
-    proc_handle: &'static CommunicatorWorkerProcessStruct<'static>,
-    file_cache_size: u64,
-    shard_map: *mut *mut c_char,
-    nshards: u32,
-    stripe_size: u32,
-) {
-    proc_handle.cache.resize_file_cache(file_cache_size as u32);
-
-    let shard_map = shard_map_to_hash(nshards, shard_map);
-    let stripe_size = (nshards > 1).then_some(ShardStripeSize(stripe_size));
-    proc_handle.update_shard_map(shard_map, stripe_size);
-}
--- a/pgxn/neon/communicator_new.c
+++ b/pgxn/neon/communicator_new.c
--- a/pgxn/neon/communicator_new.h
+++ b/pgxn/neon/communicator_new.h
@@ -1,69 +0,0 @@
-/*-------------------------------------------------------------------------
- *
- * communicator_new.h
- *	  new implementation
- *
- *
- * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- *-------------------------------------------------------------------------
- */
-#ifndef COMMUNICATOR_NEW_H
-#define COMMUNICATOR_NEW_H
-
-#include "storage/buf_internals.h"
-
-#include "lfc_prewarm.h"
-#include "neon.h"
-#include "neon_pgversioncompat.h"
-#include "pagestore_client.h"
-
-/* initialization at postmaster startup */
-extern void CommunicatorNewShmemRequest(void);
-extern void CommunicatorNewShmemInit(void);
-
-/* initialization at backend startup */
-extern void communicator_new_init(void);
-
-/* Read requests */
-extern bool communicator_new_rel_exists(NRelFileInfo rinfo, ForkNumber forkNum);
-extern BlockNumber communicator_new_rel_nblocks(NRelFileInfo rinfo, ForkNumber forknum);
-extern int64 communicator_new_dbsize(Oid dbNode);
-extern void communicator_new_readv(NRelFileInfo rinfo, ForkNumber forkNum,
-								   BlockNumber base_blockno,
-								   void **buffers, BlockNumber nblocks);
-extern void communicator_new_read_at_lsn_uncached(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blockno,
-												  void *buffer, XLogRecPtr request_lsn, XLogRecPtr not_modified_since);
-extern void communicator_new_prefetch_register_bufferv(NRelFileInfo rinfo, ForkNumber forkNum,
-													   BlockNumber blockno,
-													   BlockNumber nblocks);
-extern bool communicator_new_update_lwlsn_for_block_if_not_cached(NRelFileInfo rinfo, ForkNumber forkNum,
-																  BlockNumber blockno, XLogRecPtr lsn);
-extern int	communicator_new_read_slru_segment(
-											   SlruKind kind,
-											   uint32_t segno,
-											   neon_request_lsns * request_lsns,
-											   const char *path
-);
-
-/* Write requests, to keep the caches up-to-date */
-extern void communicator_new_write_page(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blockno,
-										const void *buffer, XLogRecPtr lsn);
-extern void communicator_new_rel_extend(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blockno,
-										const void *buffer, XLogRecPtr lsn);
-extern void communicator_new_rel_zeroextend(NRelFileInfo rinfo, ForkNumber forkNum,
-											BlockNumber blockno, BlockNumber nblocks,
-											XLogRecPtr lsn);
-extern void communicator_new_rel_create(NRelFileInfo rinfo, ForkNumber forkNum, XLogRecPtr lsn);
-extern void communicator_new_rel_truncate(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber nblocks, XLogRecPtr lsn);
-extern void communicator_new_rel_unlink(NRelFileInfo rinfo, ForkNumber forkNum, XLogRecPtr lsn);
-extern void communicator_new_update_cached_rel_size(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber nblocks, XLogRecPtr lsn);
-
-/* other functions */
-extern int32 communicator_new_approximate_working_set_size_seconds(time_t duration, bool reset);
-extern struct LfcMetrics communicator_new_get_lfc_metrics_unsafe(void);
-extern FileCacheState *communicator_new_get_lfc_state(size_t max_entries);
-extern struct LfcStatsEntry *communicator_new_lfc_get_stats(size_t *num_entries);
-
-#endif							/* COMMUNICATOR_NEW_H */
--- a/pgxn/neon/communicator_process.c
+++ b/pgxn/neon/communicator_process.c
@@ -18,9 +18,6 @@
 #include <unistd.h>

 #include "miscadmin.h"
-#if PG_VERSION_NUM >= 150000
-#include "access/xlogrecovery.h"
-#endif
 #include "postmaster/bgworker.h"
 #include "postmaster/interrupt.h"
 #include "postmaster/postmaster.h"
@@ -32,18 +29,14 @@
 #include "tcop/tcopprot.h"
 #include "utils/timestamp.h"

-#include "communicator_new.h"
 #include "communicator_process.h"
 #include "file_cache.h"
 #include "neon.h"
 #include "neon_perf_counters.h"
-#include "pagestore_client.h"

 /* the rust bindings, generated by cbindgen */
 #include "communicator/communicator_bindings.h"

-struct CommunicatorInitStruct *cis;
-
 static void pump_logging(struct LoggingReceiver *logging);
 PGDLLEXPORT void communicator_new_bgworker_main(Datum main_arg);

@@ -77,13 +70,9 @@ pg_init_communicator_process(void)
 void
 communicator_new_bgworker_main(Datum main_arg)
 {
-	char	  **connstrings;
-	ShardMap	shard_map;
-	uint64		file_cache_size;
 	struct LoggingReceiver *logging;
 	const char *errmsg = NULL;
 	const struct CommunicatorWorkerProcessStruct *proc_handle;
-	bool		success;

 	/*
 	 * Pretend that this process is a WAL sender. That affects the shutdown
@@ -119,42 +108,12 @@ communicator_new_bgworker_main(Datum main_arg)

 	logging = communicator_worker_configure_logging();

-	if (cis != NULL)
-	{
-		/* lfc_size_limit is in MBs */
-		file_cache_size = lfc_size_limit * (1024 * 1024 / BLCKSZ);
-		if (file_cache_size < 100)
-			file_cache_size = 100;
-
-		if (!parse_shard_map(pageserver_grpc_urls, &shard_map))
-		{
-			/* shouldn't happen, as the GUC was verified already */
-			elog(FATAL, "could not parse neon.pageserver_grpcs_urls");
-		}
-		connstrings = palloc(shard_map.num_shards * sizeof(char *));
-		for (int i = 0; i < shard_map.num_shards; i++)
-			connstrings[i] = shard_map.connstring[i];
-		proc_handle = communicator_worker_process_launch(
-			cis,
-			neon_tenant,
-			neon_timeline,
-			neon_auth_token,
-			connstrings,
-			shard_map.num_shards,
-			neon_stripe_size,
-			lfc_path,
-			file_cache_size,
-			&errmsg);
-		pfree(connstrings);
-		cis = NULL;
-		success = proc_handle != NULL;
-	}
-	else
-	{
-		proc_handle = NULL;
-		success = communicator_worker_process_launch_legacy(&errmsg);
-	}
-	if (!success)
+	proc_handle = communicator_worker_launch(
+		neon_tenant[0] == '\0' ? NULL : neon_tenant,
+		neon_timeline[0] == '\0' ? NULL : neon_timeline,
+		&errmsg
+		);
+	if (proc_handle == NULL)
 	{
 		/*
 		 * Something went wrong. Before exiting, forward any log messages that
@@ -214,31 +173,6 @@ communicator_new_bgworker_main(Datum main_arg)
 		{
 			ConfigReloadPending = false;
 			ProcessConfigFile(PGC_SIGHUP);
-
-			if (proc_handle)
-			{
-				/* lfc_size_limit is in MBs */
-				file_cache_size = lfc_size_limit * (1024 * 1024 / BLCKSZ);
-				if (file_cache_size < 100)
-					file_cache_size = 100;
-
-				/* Reload pageserver URLs */
-				if (!parse_shard_map(pageserver_grpc_urls, &shard_map))
-				{
-					/* shouldn't happen, as the GUC was verified already */
-					elog(FATAL, "could not parse neon.pageserver_grpcs_urls");
-				}
-				connstrings = palloc(shard_map.num_shards * sizeof(char *));
-				for (int i = 0; i < shard_map.num_shards; i++)
-					connstrings[i] = shard_map.connstring[i];
-
-				communicator_worker_config_reload(proc_handle,
-												  file_cache_size,
-												  connstrings,
-												  shard_map.num_shards,
-												  neon_stripe_size);
-				pfree(connstrings);
-			}
 		}

 		duration = TimestampDifferenceMilliseconds(before, GetCurrentTimestamp());
@@ -337,49 +271,3 @@ callback_set_my_latch_unsafe(void)
 {
 	SetLatch(MyLatch);
 }
-
-/*
- * FIXME: The logic from neon_get_request_lsns() needs to go here, except for
- * the last-written LSN cache stuff, which is managed by the rust code now.
- */
-XLogRecPtr
-callback_get_request_lsn_unsafe(void)
-{
-	/*
-	 * NB: be very careful with what you do here! This is called from tokio
-	 * threads, so anything tha tries to take LWLocks is unsafe, for example.
-	 *
-	 * RecoveryInProgress() is OK
-	 */
-	if (RecoveryInProgress())
-	{
-		XLogRecPtr	replay_lsn = GetXLogReplayRecPtr(NULL);
-
-		return replay_lsn;
-	}
-	else
-	{
-		XLogRecPtr	flushlsn;
-
-#if PG_VERSION_NUM >= 150000
-		flushlsn = GetFlushRecPtr(NULL);
-#else
-		flushlsn = GetFlushRecPtr();
-#endif
-
-		return flushlsn;
-	}
-}
-
-/*
- * Get metrics, for the built-in metrics exporter that's part of the
- * communicator process.
- */
-struct LfcMetrics
-callback_get_lfc_metrics_unsafe(void)
-{
-	if (neon_use_communicator_worker)
-		return communicator_new_get_lfc_metrics_unsafe();
-	else
-		return lfc_get_metrics_unsafe();
-}
--- a/pgxn/neon/communicator_process.h
+++ b/pgxn/neon/communicator_process.h
@@ -12,9 +12,6 @@
 #ifndef COMMUNICATOR_PROCESS_H
 #define COMMUNICATOR_PROCESS_H

-extern struct CommunicatorInitStruct *cis;
-
-/* initialization early at postmaster startup */
 extern void pg_init_communicator_process(void);

 #endif			/* COMMUNICATOR_PROCESS_H */
--- a/pgxn/neon/file_cache.c
+++ b/pgxn/neon/file_cache.c
@@ -137,6 +137,15 @@ typedef struct FileCacheEntry
 #define N_COND_VARS 	64
 #define CV_WAIT_TIMEOUT	10

+#define MAX_PREWARM_WORKERS 8
+
+typedef struct PrewarmWorkerState
+{
+	uint32		prewarmed_pages;
+	uint32		skipped_pages;
+	TimestampTz completed;
+} PrewarmWorkerState;
+
 typedef struct FileCacheControl
 {
 	uint64		generation;		/* generation is needed to handle correct hash
@@ -182,27 +191,47 @@ typedef struct FileCacheControl
 	 *   again.
 	 */
 	HyperLogLogState wss_estimation;
+
+	/* Prewarmer state */
+	PrewarmWorkerState prewarm_workers[MAX_PREWARM_WORKERS];
+	size_t n_prewarm_workers;
+	size_t n_prewarm_entries;
+	size_t total_prewarm_pages;
+	size_t prewarm_batch;
+	bool   prewarm_active;
+	bool   prewarm_canceled;
+	dsm_handle prewarm_lfc_state_handle;
 } FileCacheControl;

+#define FILE_CACHE_STATE_MAGIC 0xfcfcfcfc
+
+#define FILE_CACHE_STATE_BITMAP(fcs)	((uint8*)&(fcs)->chunks[(fcs)->n_chunks])
+#define FILE_CACHE_STATE_SIZE_FOR_CHUNKS(n_chunks)	(sizeof(FileCacheState) + (n_chunks)*sizeof(BufferTag) + (((n_chunks) * lfc_blocks_per_chunk)+7)/8)
+#define FILE_CACHE_STATE_SIZE(fcs)		(sizeof(FileCacheState) + (fcs->n_chunks)*sizeof(BufferTag) + (((fcs->n_chunks) << fcs->chunk_size_log)+7)/8)
+
 static HTAB *lfc_hash;
 static int	lfc_desc = -1;
 static LWLockId lfc_lock;
-int	lfc_max_size;
-int	lfc_size_limit;
+static int	lfc_max_size;
+static int	lfc_size_limit;
+static int	lfc_prewarm_limit;
+static int	lfc_prewarm_batch;
 static int	lfc_chunk_size_log = MAX_BLOCKS_PER_CHUNK_LOG;
 static int	lfc_blocks_per_chunk = MAX_BLOCKS_PER_CHUNK;
-char *lfc_path;
+static char *lfc_path;
 static uint64 lfc_generation;
 static FileCacheControl *lfc_ctl;
+static bool lfc_do_prewarm;

 bool lfc_store_prefetch_result;
 bool lfc_prewarm_update_ws_estimation;

-bool lfc_do_prewarm;
-bool lfc_prewarm_cancel;
+bool AmPrewarmWorker;

 #define LFC_ENABLED() (lfc_ctl->limit != 0)

+PGDLLEXPORT void lfc_prewarm_main(Datum main_arg);
+
 /*
 * Close LFC file if opened.
 * All backends should close their LFC files once LFC is disabled.
@@ -228,8 +257,6 @@ lfc_switch_off(void)
 {
 	int			fd;

-	Assert(!neon_use_communicator_worker);
-
 	if (LFC_ENABLED())
 	{
 		HASH_SEQ_STATUS status;
@@ -295,8 +322,6 @@ lfc_maybe_disabled(void)
 static bool
 lfc_ensure_opened(void)
 {
-	Assert(!neon_use_communicator_worker);
-
 	if (lfc_generation != lfc_ctl->generation)
 	{
 		lfc_close_file();
@@ -322,9 +347,6 @@ LfcShmemInit(void)
 	bool		found;
 	static HASHCTL info;

-	if (neon_use_communicator_worker)
-		return;
-
 	if (lfc_max_size <= 0)
 		return;

@@ -514,6 +536,7 @@ lfc_init(void)
 	if (!process_shared_preload_libraries_in_progress)
 		neon_log(ERROR, "Neon module should be loaded via shared_preload_libraries");

+
 	DefineCustomBoolVariable("neon.store_prefetch_result_in_lfc",
 							"Immediately store received prefetch result in LFC",
 							NULL,
@@ -585,6 +608,32 @@ lfc_init(void)
 							lfc_check_chunk_size,
 							lfc_change_chunk_size,
 							NULL);
+
+	DefineCustomIntVariable("neon.file_cache_prewarm_limit",
+							"Maximal number of prewarmed chunks",
+							NULL,
+							&lfc_prewarm_limit,
+							INT_MAX,	/* no limit by default */
+							0,
+							INT_MAX,
+							PGC_SIGHUP,
+							0,
+							NULL,
+							NULL,
+							NULL);
+
+	DefineCustomIntVariable("neon.file_cache_prewarm_batch",
+							"Number of pages retrivied by prewarm from page server",
+							NULL,
+							&lfc_prewarm_batch,
+							64,
+							1,
+							INT_MAX,
+							PGC_SIGHUP,
+							0,
+							NULL,
+							NULL,
+							NULL);
 }

 /*
@@ -609,7 +658,7 @@ lfc_get_state(size_t max_entries)
 		uint8* bitmap;
 		size_t n_pages = 0;
 		size_t n_entries = Min(max_entries, lfc_ctl->used - lfc_ctl->pinned);
-		size_t state_size = FILE_CACHE_STATE_SIZE_FOR_CHUNKS(n_entries, lfc_blocks_per_chunk);
+		size_t state_size = FILE_CACHE_STATE_SIZE_FOR_CHUNKS(n_entries);
 		fcs = (FileCacheState*)palloc0(state_size);
 		SET_VARSIZE(fcs, state_size);
 		fcs->magic = FILE_CACHE_STATE_MAGIC;
@@ -654,6 +703,278 @@ lfc_get_state(size_t max_entries)
 	return fcs;
 }

+/*
+ * Prewarm LFC cache to the specified state. It uses lfc_prefetch function to load prewarmed page without hoilding shared buffer lock
+ * and avoid race conditions with other backends.
+ */
+void
+lfc_prewarm(FileCacheState* fcs, uint32 n_workers)
+{
+	size_t fcs_chunk_size_log;
+	size_t n_entries;
+	size_t prewarm_batch = Min(lfc_prewarm_batch, readahead_buffer_size);
+	size_t fcs_size;
+	uint32_t max_prefetch_pages;
+	dsm_segment *seg;
+	BackgroundWorkerHandle* bgw_handle[MAX_PREWARM_WORKERS];
+
+
+	if (!lfc_ensure_opened())
+		return;
+
+	if (prewarm_batch == 0 || lfc_prewarm_limit == 0 || n_workers == 0)
+	{
+		elog(LOG, "LFC: prewarm is disabled");
+		return;
+	}
+
+	if (n_workers > MAX_PREWARM_WORKERS)
+	{
+		elog(ERROR, "LFC: Too much prewarm workers, maximum is %d", MAX_PREWARM_WORKERS);
+	}
+
+	if (fcs == NULL || fcs->n_chunks == 0)
+	{
+		elog(LOG, "LFC: nothing to prewarm");
+		return;
+	}
+
+	if (fcs->magic != FILE_CACHE_STATE_MAGIC)
+	{
+		elog(ERROR, "LFC: Invalid file cache state magic: %X", fcs->magic);
+	}
+
+	fcs_size = VARSIZE(fcs);
+	if (FILE_CACHE_STATE_SIZE(fcs) != fcs_size)
+	{
+		elog(ERROR, "LFC: Invalid file cache state size: %u vs. %u", (unsigned)FILE_CACHE_STATE_SIZE(fcs), VARSIZE(fcs));
+	}
+
+	fcs_chunk_size_log = fcs->chunk_size_log;
+	if (fcs_chunk_size_log > MAX_BLOCKS_PER_CHUNK_LOG)
+	{
+		elog(ERROR, "LFC: Invalid chunk size log: %u", fcs->chunk_size_log);
+	}
+
+	n_entries = Min(fcs->n_chunks, lfc_prewarm_limit);
+	Assert(n_entries != 0);
+
+	max_prefetch_pages = n_entries << fcs_chunk_size_log;
+	if (fcs->n_pages > max_prefetch_pages) {
+		elog(ERROR, "LFC: Number of pages in file cache state (%d) is more than the limit (%d)", fcs->n_pages, max_prefetch_pages);
+	}
+
+	LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
+
+	/* Do not prewarm more entries than LFC limit */
+	if (lfc_ctl->limit <= lfc_ctl->size)
+	{
+		elog(LOG, "LFC: skip prewarm because LFC is already filled");
+		LWLockRelease(lfc_lock);
+		return;
+	}
+
+	if (lfc_ctl->prewarm_active)
+	{
+		LWLockRelease(lfc_lock);
+		elog(ERROR, "LFC: skip prewarm because another prewarm is still active");
+	}
+	lfc_ctl->n_prewarm_entries = n_entries;
+	lfc_ctl->n_prewarm_workers = n_workers;
+	lfc_ctl->prewarm_active = true;
+	lfc_ctl->prewarm_canceled = false;
+	lfc_ctl->prewarm_batch = prewarm_batch;
+	memset(lfc_ctl->prewarm_workers, 0, n_workers*sizeof(PrewarmWorkerState));
+
+	LWLockRelease(lfc_lock);
+
+	/* Calculate total number of pages to be prewarmed */
+	lfc_ctl->total_prewarm_pages = fcs->n_pages;
+
+	seg = dsm_create(fcs_size, 0);
+	memcpy(dsm_segment_address(seg), fcs, fcs_size);
+	lfc_ctl->prewarm_lfc_state_handle = dsm_segment_handle(seg);
+
+	/* Spawn background workers */
+	for (uint32 i = 0; i < n_workers; i++)
+	{
+		BackgroundWorker worker = {0};
+
+		worker.bgw_flags = BGWORKER_SHMEM_ACCESS;
+		worker.bgw_start_time = BgWorkerStart_ConsistentState;
+		worker.bgw_restart_time = BGW_NEVER_RESTART;
+		strcpy(worker.bgw_library_name, "neon");
+		strcpy(worker.bgw_function_name, "lfc_prewarm_main");
+		snprintf(worker.bgw_name, BGW_MAXLEN, "LFC prewarm worker %d", i+1);
+		strcpy(worker.bgw_type, "LFC prewarm worker");
+		worker.bgw_main_arg = Int32GetDatum(i);
+		/* must set notify PID to wait for shutdown */
+		worker.bgw_notify_pid = MyProcPid;
+
+		if (!RegisterDynamicBackgroundWorker(&worker, &bgw_handle[i]))
+		{
+			ereport(LOG,
+					(errcode(ERRCODE_INSUFFICIENT_RESOURCES),
+					 errmsg("LFC: registering dynamic bgworker prewarm failed"),
+					 errhint("Consider increasing the configuration parameter \"%s\".", "max_worker_processes")));
+			n_workers = i;
+			lfc_ctl->prewarm_canceled = true;
+			break;
+		}
+	}
+
+	for (uint32 i = 0; i < n_workers; i++)
+	{
+		bool interrupted;
+		do
+		{
+			interrupted = false;
+			PG_TRY();
+			{
+				BgwHandleStatus status = WaitForBackgroundWorkerShutdown(bgw_handle[i]);
+				if (status != BGWH_STOPPED && status != BGWH_POSTMASTER_DIED)
+				{
+					elog(LOG, "LFC: Unexpected status of prewarm worker termination: %d", status);
+				}
+			}
+			PG_CATCH();
+			{
+				elog(LOG, "LFC: cancel prewarm");
+				lfc_ctl->prewarm_canceled = true;
+				interrupted = true;
+			}
+			PG_END_TRY();
+		} while (interrupted);
+
+		if (!lfc_ctl->prewarm_workers[i].completed)
+		{
+			/* Background worker doesn't set completion time: it means that it was abnormally terminated */
+			elog(LOG, "LFC: prewarm worker %d failed", i+1);
+			/* Set completion time to prevent get_prewarm_info from considering this worker as active */
+			lfc_ctl->prewarm_workers[i].completed = GetCurrentTimestamp();
+		}
+	}
+	dsm_detach(seg);
+
+	LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
+	lfc_ctl->prewarm_active = false;
+	LWLockRelease(lfc_lock);
+}
+
+void
+lfc_prewarm_main(Datum main_arg)
+{
+	size_t snd_idx = 0, rcv_idx = 0;
+	size_t n_sent = 0, n_received = 0;
+	size_t fcs_chunk_size_log;
+	size_t max_prefetch_pages;
+	size_t prewarm_batch;
+	size_t n_workers;
+	dsm_segment *seg;
+	FileCacheState* fcs;
+	uint8* bitmap;
+	BufferTag tag;
+	PrewarmWorkerState* ws;
+	uint32 worker_id = DatumGetInt32(main_arg);
+
+	AmPrewarmWorker = true;
+
+	pqsignal(SIGTERM, die);
+	BackgroundWorkerUnblockSignals();
+
+	seg = dsm_attach(lfc_ctl->prewarm_lfc_state_handle);
+	if (seg == NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				 errmsg("could not map dynamic shared memory segment")));
+
+	fcs = (FileCacheState*) dsm_segment_address(seg);
+	prewarm_batch = lfc_ctl->prewarm_batch;
+	fcs_chunk_size_log = fcs->chunk_size_log;
+	n_workers = lfc_ctl->n_prewarm_workers;
+	max_prefetch_pages = lfc_ctl->n_prewarm_entries << fcs_chunk_size_log;
+	ws = &lfc_ctl->prewarm_workers[worker_id];
+	bitmap = FILE_CACHE_STATE_BITMAP(fcs);
+
+	/* enable prefetch in LFC */
+	lfc_store_prefetch_result = true;
+	lfc_do_prewarm = true; /* Flag for lfc_prefetch preventing replacement of existed entries if LFC cache is full */
+
+	elog(LOG, "LFC: worker %d start prewarming", worker_id);
+	while (!lfc_ctl->prewarm_canceled)
+	{
+		if (snd_idx < max_prefetch_pages)
+		{
+			if ((snd_idx >> fcs_chunk_size_log) % n_workers != worker_id)
+			{
+				/* If there are multiple workers, split chunks between them */
+				snd_idx += 1 << fcs_chunk_size_log;
+			}
+			else
+			{
+				if (BITMAP_ISSET(bitmap, snd_idx))
+				{
+					tag = fcs->chunks[snd_idx >> fcs_chunk_size_log];
+					tag.blockNum += snd_idx & ((1 << fcs_chunk_size_log) - 1);
+
+					if (!BufferTagIsValid(&tag)) {
+						elog(ERROR, "LFC: Invalid buffer tag: %u", tag.blockNum);
+					}
+
+					if (!lfc_cache_contains(BufTagGetNRelFileInfo(tag), tag.forkNum, tag.blockNum))
+					{
+						(void)communicator_prefetch_register_bufferv(tag, NULL, 1, NULL);
+						n_sent += 1;
+					}
+					else
+					{
+						ws->skipped_pages += 1;
+						BITMAP_CLR(bitmap, snd_idx);
+					}
+				}
+				snd_idx += 1;
+			}
+		}
+		if (n_sent >= n_received + prewarm_batch || snd_idx == max_prefetch_pages)
+		{
+			if (n_received == n_sent && snd_idx == max_prefetch_pages)
+			{
+				break;
+			}
+			if ((rcv_idx >> fcs_chunk_size_log) % n_workers != worker_id)
+			{
+				/* Skip chunks processed by other workers */
+				rcv_idx += 1 << fcs_chunk_size_log;
+				continue;
+			}
+
+			/* Locate next block to prefetch */
+			while (!BITMAP_ISSET(bitmap, rcv_idx))
+			{
+				rcv_idx += 1;
+			}
+			tag = fcs->chunks[rcv_idx >> fcs_chunk_size_log];
+			tag.blockNum += rcv_idx & ((1 << fcs_chunk_size_log) - 1);
+			if (communicator_prefetch_receive(tag))
+			{
+				ws->prewarmed_pages += 1;
+			}
+			else
+			{
+				ws->skipped_pages += 1;
+			}
+			rcv_idx += 1;
+			n_received += 1;
+		}
+	}
+	/* No need to perform prefetch cleanup here because prewarm worker will be terminated and
+	 * connection to PS dropped just after return from this function.
+	 */
+	Assert(n_sent == n_received || lfc_ctl->prewarm_canceled);
+	elog(LOG, "LFC: worker %d complete prewarming: loaded %ld pages", worker_id, (long)n_received);
+	lfc_ctl->prewarm_workers[worker_id].completed = GetCurrentTimestamp();
+}
+
 void
 lfc_invalidate(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber nblocks)
 {
@@ -661,8 +982,6 @@ lfc_invalidate(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber nblocks)
 	FileCacheEntry *entry;
 	uint32		hash;

-	Assert(!neon_use_communicator_worker);
-
 	if (lfc_maybe_disabled())	/* fast exit if file cache is disabled */
 		return;

@@ -708,8 +1027,6 @@ lfc_cache_contains(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno)
 	bool		found = false;
 	uint32		hash;

-	Assert(!neon_use_communicator_worker);
-
 	if (lfc_maybe_disabled())	/* fast exit if file cache is disabled */
 		return false;

@@ -745,8 +1062,6 @@ lfc_cache_containsv(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 	uint32		hash;
 	int			i = 0;

-	Assert(!neon_use_communicator_worker);
-
 	if (lfc_maybe_disabled())	/* fast exit if file cache is disabled */
 		return 0;

@@ -854,8 +1169,6 @@ lfc_readv_select(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 	int			blocks_read = 0;
 	int			buf_offset = 0;

-	Assert(!neon_use_communicator_worker);
-
 	if (lfc_maybe_disabled())	/* fast exit if file cache is disabled */
 		return -1;

@@ -1166,7 +1479,7 @@ lfc_init_new_entry(FileCacheEntry* entry, uint32 hash)
 		/* Can't add this chunk - we don't have the space for it */
 		hash_search_with_hash_value(lfc_hash, &entry->key, hash,
 									HASH_REMOVE, NULL);
-		lfc_prewarm_cancel = true; /* cancel prewarm if LFC limit is reached */
+		lfc_ctl->prewarm_canceled = true; /* cancel prewarm if LFC limit is reached */
 		return false;
 	}

@@ -1221,8 +1534,6 @@ lfc_prefetch(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,

 	int		chunk_offs = BLOCK_TO_CHUNK_OFF(blkno);

-	Assert(!neon_use_communicator_worker);
-
 	if (lfc_maybe_disabled())	/* fast exit if file cache is disabled */
 		return false;

@@ -1368,8 +1679,6 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 	uint32		entry_offset;
 	int			buf_offset = 0;

-	Assert(!neon_use_communicator_worker);
-
 	if (lfc_maybe_disabled())	/* fast exit if file cache is disabled */
 		return;

@@ -1588,6 +1897,7 @@ lfc_get_stats(size_t *num_entries)
 	return entries;
 }

+
 /*
 * Function returning data from the local file cache
 * relation node/tablespace/database/blocknum and access_counter
@@ -1691,15 +2001,15 @@ lfc_approximate_working_set_size_seconds(time_t duration, bool reset)
 }

 /*
- * Get metrics, for the built-in metrics exporter that's part of the
- * communicator process.
+ * Get metrics, for the built-in metrics exporter that's part of the communicator
+ * process.
 *
 * NB: This is called from a Rust tokio task inside the communicator process.
 * Acquiring lwlocks, elog(), allocating memory or anything else non-trivial
 * is strictly prohibited here!
 */
 struct LfcMetrics
-lfc_get_metrics_unsafe(void)
+callback_get_lfc_metrics_unsafe(void)
 {
 	struct LfcMetrics result = {
 		.lfc_cache_size_limit = (int64) lfc_size_limit * 1024 * 1024,
@@ -1720,3 +2030,82 @@ lfc_get_metrics_unsafe(void)

 	return result;
 }
+
+
+PG_FUNCTION_INFO_V1(get_local_cache_state);
+
+Datum
+get_local_cache_state(PG_FUNCTION_ARGS)
+{
+	size_t max_entries = PG_ARGISNULL(0) ? lfc_prewarm_limit : PG_GETARG_INT32(0);
+	FileCacheState* fcs = lfc_get_state(max_entries);
+	if (fcs != NULL)
+		PG_RETURN_BYTEA_P((bytea*)fcs);
+	else
+		PG_RETURN_NULL();
+}
+
+PG_FUNCTION_INFO_V1(prewarm_local_cache);
+
+Datum
+prewarm_local_cache(PG_FUNCTION_ARGS)
+{
+	bytea* state = PG_GETARG_BYTEA_PP(0);
+	uint32 n_workers =  PG_GETARG_INT32(1);
+	FileCacheState* fcs = (FileCacheState*)state;
+
+	lfc_prewarm(fcs, n_workers);
+
+	PG_RETURN_NULL();
+}
+
+PG_FUNCTION_INFO_V1(get_prewarm_info);
+
+Datum
+get_prewarm_info(PG_FUNCTION_ARGS)
+{
+	Datum		values[4];
+	bool		nulls[4];
+	TupleDesc	tupdesc;
+	uint32 prewarmed_pages = 0;
+	uint32 skipped_pages = 0;
+	uint32 active_workers = 0;
+	uint32 total_pages;
+	size_t n_workers;
+
+	if (lfc_size_limit == 0)
+		PG_RETURN_NULL();
+
+	LWLockAcquire(lfc_lock, LW_SHARED);
+	if (!lfc_ctl || lfc_ctl->n_prewarm_workers == 0)
+	{
+		LWLockRelease(lfc_lock);
+		PG_RETURN_NULL();
+	}
+	n_workers = lfc_ctl->n_prewarm_workers;
+	total_pages = lfc_ctl->total_prewarm_pages;
+	for (size_t i = 0; i < n_workers; i++)
+	{
+		PrewarmWorkerState* ws = &lfc_ctl->prewarm_workers[i];
+		prewarmed_pages += ws->prewarmed_pages;
+		skipped_pages += ws->skipped_pages;
+		active_workers += ws->completed != 0;
+	}
+	LWLockRelease(lfc_lock);
+
+	tupdesc = CreateTemplateTupleDesc(4);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 1, "total_pages", INT4OID, -1, 0);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 2, "prewarmed_pages", INT4OID, -1, 0);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 3, "skipped_pages", INT4OID, -1, 0);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 4, "active_workers", INT4OID, -1, 0);
+	tupdesc = BlessTupleDesc(tupdesc);
+
+	MemSet(nulls, 0, sizeof(nulls));
+
+	values[0] = Int32GetDatum(total_pages);
+	values[1] = Int32GetDatum(prewarmed_pages);
+	values[2] = Int32GetDatum(skipped_pages);
+	values[3] = Int32GetDatum(active_workers);
+
+	PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)));
+}
--- a/pgxn/neon/file_cache.h
+++ b/pgxn/neon/file_cache.h
@@ -11,19 +11,21 @@
 #ifndef FILE_CACHE_h
 #define FILE_CACHE_h

-#include "lfc_prewarm.h"
-#include "neon.h"
-
 #include "neon_pgversioncompat.h"

+typedef struct FileCacheState
+{
+	int32		vl_len_;		/* varlena header (do not touch directly!) */
+	uint32		magic;
+	uint32		n_chunks;
+	uint32		n_pages;
+	uint16		chunk_size_log;
+	BufferTag	chunks[FLEXIBLE_ARRAY_MEMBER];
+	/* followed by bitmap */
+} FileCacheState;
+
 /* GUCs */
 extern bool lfc_store_prefetch_result;
-extern int	lfc_max_size;
-extern int	lfc_size_limit;
-extern char *lfc_path;
-
-extern bool lfc_do_prewarm;
-extern bool lfc_prewarm_cancel;

 /* functions for local file cache */
 extern void lfc_invalidate(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber nblocks);
@@ -42,12 +44,16 @@ extern int lfc_cache_containsv(NRelFileInfo rinfo, ForkNumber forkNum,
 extern void lfc_init(void);
 extern bool lfc_prefetch(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,
 						 const void* buffer, XLogRecPtr lsn);
-
 extern FileCacheState* lfc_get_state(size_t max_entries);
-extern LfcStatsEntry *lfc_get_stats(size_t *num_entries);
+extern void lfc_prewarm(FileCacheState* fcs, uint32 n_workers);

-struct LfcMetrics; /* defined in communicator_bindings.h */
-extern struct LfcMetrics lfc_get_metrics_unsafe(void);
+typedef struct LfcStatsEntry
+{
+	const char *metric_name;
+	bool		isnull;
+	uint64		value;
+} LfcStatsEntry;
+extern LfcStatsEntry *lfc_get_stats(size_t *num_entries);

 typedef struct
 {
@@ -63,6 +69,7 @@ extern LocalCachePagesRec *lfc_local_cache_pages(size_t *num_entries);

 extern int32 lfc_approximate_working_set_size_seconds(time_t duration, bool reset);

+
 static inline bool
 lfc_read(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 		 void *buffer)
--- a/pgxn/neon/lfc_prewarm.c
+++ b/pgxn/neon/lfc_prewarm.c
@@ -1,661 +0,0 @@
-/*-------------------------------------------------------------------------
- *
- * lfc_prewarm.c
- *		Functions related to LFC prewarming
- *
- * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- *-------------------------------------------------------------------------
- */
-
-#include "postgres.h"
-
-#include "bitmap.h"
-#include "communicator.h"
-#include "communicator_new.h"
-#include "file_cache.h"
-#include "lfc_prewarm.h"
-#include "neon.h"
-#include "neon_utils.h"
-#include "pagestore_client.h"
-
-#include "funcapi.h"
-#include "miscadmin.h"
-#include "postmaster/bgworker.h"
-#include "storage/dsm.h"
-#include "tcop/tcopprot.h"
-#include "utils/timestamp.h"
-
-#define MAX_PREWARM_WORKERS 8
-
-typedef struct PrewarmWorkerState
-{
-	uint32		prewarmed_pages;
-	uint32		skipped_pages;
-	TimestampTz completed;
-} PrewarmWorkerState;
-
-typedef struct PrewarmControl
-{
-	/* -1 when not using workers, 0 when no prewarm has been performed */
-	size_t		n_prewarm_workers;
-	size_t		total_prewarm_pages;
-	bool		prewarm_active;
-	bool		prewarm_canceled;
-
-	/* These are used in the non-worker mode */
-	uint32		prewarmed_pages;
-	uint32		skipped_pages;
-	TimestampTz completed;
-
-	/* These are used with workers */
-	PrewarmWorkerState prewarm_workers[MAX_PREWARM_WORKERS];
-	dsm_handle	prewarm_lfc_state_handle;
-	size_t		prewarm_batch;
-	size_t		n_prewarm_entries;
-} PrewarmControl;
-
-static PrewarmControl *prewarm_ctl;
-
-static int	lfc_prewarm_limit;
-static int	lfc_prewarm_batch;
-
-static LWLockId prewarm_lock;
-
-bool AmPrewarmWorker;
-
-static void lfc_prewarm_with_workers(FileCacheState *fcs, uint32 n_workers);
-static void lfc_prewarm_with_async_requests(FileCacheState *fcs);
-PGDLLEXPORT void lfc_prewarm_main(Datum main_arg);
-
-void
-pg_init_prewarm(void)
-{
-	DefineCustomIntVariable("neon.file_cache_prewarm_limit",
-							"Maximal number of prewarmed chunks",
-							NULL,
-							&lfc_prewarm_limit,
-							INT_MAX,	/* no limit by default */
-							0,
-							INT_MAX,
-							PGC_SIGHUP,
-							0,
-							NULL,
-							NULL,
-							NULL);
-
-	DefineCustomIntVariable("neon.file_cache_prewarm_batch",
-							"Number of pages retrivied by prewarm from page server",
-							NULL,
-							&lfc_prewarm_batch,
-							64,
-							1,
-							INT_MAX,
-							PGC_SIGHUP,
-							0,
-							NULL,
-							NULL,
-							NULL);
-}
-
-static size_t
-PrewarmShmemSize(void)
-{
-	return sizeof(PrewarmControl);
-}
-
-void
-PrewarmShmemRequest(void)
-{
-	RequestAddinShmemSpace(PrewarmShmemSize());
-	RequestNamedLWLockTranche("prewarm_lock", 1);
-}
-
-void
-PrewarmShmemInit(void)
-{
-	bool		found;
-
-	prewarm_ctl = (PrewarmControl *) ShmemInitStruct("Prewarmer shmem state",
-								PrewarmShmemSize(),
-								&found);
-	if (!found)
-	{
-		/* it's zeroed already */
-
-		prewarm_lock = (LWLockId) GetNamedLWLockTranche("prewarm_lock");
-	}
-}
-
-static void
-validate_fcs(FileCacheState *fcs)
-{
-	size_t fcs_size;
-#if 0
-	size_t fcs_chunk_size_log;
-#endif
-
-	if (fcs->magic != FILE_CACHE_STATE_MAGIC)
-	{
-		elog(ERROR, "LFC: Invalid file cache state magic: %X", fcs->magic);
-	}
-
-	fcs_size = VARSIZE(fcs);
-	if (FILE_CACHE_STATE_SIZE(fcs) != fcs_size)
-	{
-		elog(ERROR, "LFC: Invalid file cache state size: %u vs. %u", (unsigned)FILE_CACHE_STATE_SIZE(fcs), VARSIZE(fcs));
-	}
-
-	/* FIXME */
-#if 0
-	fcs_chunk_size_log = fcs->chunk_size_log;
-	if (fcs_chunk_size_log > MAX_BLOCKS_PER_CHUNK_LOG)
-	{
-		elog(ERROR, "LFC: Invalid chunk size log: %u", fcs->chunk_size_log);
-	}
-#endif
-}
-
-/*
- * Prewarm LFC cache to the specified state. It uses lfc_prefetch function to
- * load prewarmed page without hoilding shared buffer lock and avoid race
- * conditions with other backends.
- */
-void
-lfc_prewarm_with_workers(FileCacheState *fcs, uint32 n_workers)
-{
-	size_t n_entries;
-	size_t prewarm_batch = Min(lfc_prewarm_batch, readahead_buffer_size);
-	size_t fcs_size = VARSIZE(fcs);
-	dsm_segment *seg;
-	BackgroundWorkerHandle* bgw_handle[MAX_PREWARM_WORKERS];
-
-	Assert(!neon_use_communicator_worker);
-
-	if (prewarm_batch == 0 || lfc_prewarm_limit == 0 || n_workers == 0)
-	{
-		elog(LOG, "LFC: prewarm is disabled");
-		return;
-	}
-
-	if (n_workers > MAX_PREWARM_WORKERS)
-	{
-		elog(ERROR, "LFC: too many prewarm workers, maximum is %d", MAX_PREWARM_WORKERS);
-	}
-
-	if (fcs == NULL || fcs->n_chunks == 0)
-	{
-		elog(LOG, "LFC: nothing to prewarm");
-		return;
-	}
-
-	n_entries = Min(fcs->n_chunks, lfc_prewarm_limit);
-	Assert(n_entries != 0);
-
-	LWLockAcquire(prewarm_lock, LW_EXCLUSIVE);
-
-	/* Do not prewarm more entries than LFC limit */
-	/* FIXME */
-#if 0
-	if (prewarm_ctl->limit <= prewarm_ctl->size)
-	{
-		elog(LOG, "LFC: skip prewarm because LFC is already filled");
-		LWLockRelease(prewarm_lock);
-		return;
-	}
-#endif
-	
-	if (prewarm_ctl->prewarm_active)
-	{
-		LWLockRelease(prewarm_lock);
-		elog(ERROR, "LFC: skip prewarm because another prewarm is still active");
-	}
-	prewarm_ctl->n_prewarm_entries = n_entries;
-	prewarm_ctl->n_prewarm_workers = n_workers;
-	prewarm_ctl->prewarm_active = true;
-	prewarm_ctl->prewarm_canceled = false;
-	prewarm_ctl->prewarm_batch = prewarm_batch;
-	memset(prewarm_ctl->prewarm_workers, 0, n_workers*sizeof(PrewarmWorkerState));
-
-	/* Calculate total number of pages to be prewarmed */
-	prewarm_ctl->total_prewarm_pages = fcs->n_pages;
-
-	LWLockRelease(prewarm_lock);
-
-	seg = dsm_create(fcs_size, 0);
-	memcpy(dsm_segment_address(seg), fcs, fcs_size);
-	prewarm_ctl->prewarm_lfc_state_handle = dsm_segment_handle(seg);
-
-	/* Spawn background workers */
-	for (uint32 i = 0; i < n_workers; i++)
-	{
-		BackgroundWorker worker = {0};
-
-		worker.bgw_flags = BGWORKER_SHMEM_ACCESS;
-		worker.bgw_start_time = BgWorkerStart_ConsistentState;
-		worker.bgw_restart_time = BGW_NEVER_RESTART;
-		strcpy(worker.bgw_library_name, "neon");
-		strcpy(worker.bgw_function_name, "lfc_prewarm_main");
-		snprintf(worker.bgw_name, BGW_MAXLEN, "LFC prewarm worker %d", i+1);
-		strcpy(worker.bgw_type, "LFC prewarm worker");
-		worker.bgw_main_arg = Int32GetDatum(i);
-		/* must set notify PID to wait for shutdown */
-		worker.bgw_notify_pid = MyProcPid;
-
-		if (!RegisterDynamicBackgroundWorker(&worker, &bgw_handle[i]))
-		{
-			ereport(LOG,
-					(errcode(ERRCODE_INSUFFICIENT_RESOURCES),
-					 errmsg("LFC: registering dynamic bgworker prewarm failed"),
-					 errhint("Consider increasing the configuration parameter \"%s\".", "max_worker_processes")));
-			n_workers = i;
-			prewarm_ctl->prewarm_canceled = true;
-			break;
-		}
-	}
-
-	for (uint32 i = 0; i < n_workers; i++)
-	{
-		bool interrupted;
-		do
-		{
-			interrupted = false;
-			PG_TRY();
-			{
-				BgwHandleStatus status = WaitForBackgroundWorkerShutdown(bgw_handle[i]);
-				if (status != BGWH_STOPPED && status != BGWH_POSTMASTER_DIED)
-				{
-					elog(LOG, "LFC: Unexpected status of prewarm worker termination: %d", status);
-				}
-			}
-			PG_CATCH();
-			{
-				elog(LOG, "LFC: cancel prewarm");
-				prewarm_ctl->prewarm_canceled = true;
-				interrupted = true;
-			}
-			PG_END_TRY();
-		} while (interrupted);
-
-		if (!prewarm_ctl->prewarm_workers[i].completed)
-		{
-			/* Background worker doesn't set completion time: it means that it was abnormally terminated */
-			elog(LOG, "LFC: prewarm worker %d failed", i+1);
-			/* Set completion time to prevent get_prewarm_info from considering this worker as active */
-			prewarm_ctl->prewarm_workers[i].completed = GetCurrentTimestamp();
-		}
-	}
-	dsm_detach(seg);
-
-	LWLockAcquire(prewarm_lock, LW_EXCLUSIVE);
-	prewarm_ctl->prewarm_active = false;
-	LWLockRelease(prewarm_lock);
-}
-
-
-void
-lfc_prewarm_main(Datum main_arg)
-{
-	size_t snd_idx = 0, rcv_idx = 0;
-	size_t n_sent = 0, n_received = 0;
-	size_t fcs_chunk_size_log;
-	size_t max_prefetch_pages;
-	size_t prewarm_batch;
-	size_t n_workers;
-	dsm_segment *seg;
-	FileCacheState* fcs;
-	uint8* bitmap;
-	BufferTag tag;
-	PrewarmWorkerState* ws;
-	uint32 worker_id = DatumGetInt32(main_arg);
-
-	Assert(!neon_use_communicator_worker);
-
-	AmPrewarmWorker = true;
-
-	pqsignal(SIGTERM, die);
-	BackgroundWorkerUnblockSignals();
-
-	seg = dsm_attach(prewarm_ctl->prewarm_lfc_state_handle);
-	if (seg == NULL)
-		ereport(ERROR,
-				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
-				 errmsg("could not map dynamic shared memory segment")));
-
-	fcs = (FileCacheState*) dsm_segment_address(seg);
-	prewarm_batch = prewarm_ctl->prewarm_batch;
-	fcs_chunk_size_log = fcs->chunk_size_log;
-	n_workers = prewarm_ctl->n_prewarm_workers;
-	max_prefetch_pages = prewarm_ctl->n_prewarm_entries << fcs_chunk_size_log;
-	ws = &prewarm_ctl->prewarm_workers[worker_id];
-	bitmap = FILE_CACHE_STATE_BITMAP(fcs);
-
-	/* enable prefetch in LFC */
-	lfc_store_prefetch_result = true;
-	lfc_do_prewarm = true; /* Flag for lfc_prefetch preventing replacement of existed entries if LFC cache is full */
-
-	elog(LOG, "LFC: worker %d start prewarming", worker_id);
-	while (!prewarm_ctl->prewarm_canceled)
-	{
-		if (snd_idx < max_prefetch_pages)
-		{
-			if ((snd_idx >> fcs_chunk_size_log) % n_workers != worker_id)
-			{
-				/* If there are multiple workers, split chunks between them */
-				snd_idx += 1 << fcs_chunk_size_log;
-			}
-			else
-			{
-				if (BITMAP_ISSET(bitmap, snd_idx))
-				{
-					tag = fcs->chunks[snd_idx >> fcs_chunk_size_log];
-					tag.blockNum += snd_idx & ((1 << fcs_chunk_size_log) - 1);
-
-					if (!BufferTagIsValid(&tag))
-						elog(ERROR, "LFC: Invalid buffer tag: %u", tag.blockNum);
-
-					if (!lfc_cache_contains(BufTagGetNRelFileInfo(tag), tag.forkNum, tag.blockNum))
-					{
-						(void) communicator_prefetch_register_bufferv(tag, NULL, 1, NULL);
-						n_sent += 1;
-					}
-					else
-					{
-						ws->skipped_pages += 1;
-						BITMAP_CLR(bitmap, snd_idx);
-					}
-				}
-				snd_idx += 1;
-			}
-		}
-		if (n_sent >= n_received + prewarm_batch || snd_idx == max_prefetch_pages)
-		{
-			if (n_received == n_sent && snd_idx == max_prefetch_pages)
-			{
-				break;
-			}
-			if ((rcv_idx >> fcs_chunk_size_log) % n_workers != worker_id)
-			{
-				/* Skip chunks processed by other workers */
-				rcv_idx += 1 << fcs_chunk_size_log;
-				continue;
-			}
-
-			/* Locate next block to prefetch */
-			while (!BITMAP_ISSET(bitmap, rcv_idx))
-			{
-				rcv_idx += 1;
-			}
-			tag = fcs->chunks[rcv_idx >> fcs_chunk_size_log];
-			tag.blockNum += rcv_idx & ((1 << fcs_chunk_size_log) - 1);
-			if (communicator_prefetch_receive(tag))
-			{
-				ws->prewarmed_pages += 1;
-			}
-			else
-			{
-				ws->skipped_pages += 1;
-			}
-			rcv_idx += 1;
-			n_received += 1;
-		}
-	}
-	/* No need to perform prefetch cleanup here because prewarm worker will be terminated and
-	 * connection to PS dropped just after return from this function.
-	 */
-	Assert(n_sent == n_received || prewarm_ctl->prewarm_canceled);
-	elog(LOG, "LFC: worker %d complete prewarming: loaded %ld pages", worker_id, (long)n_received);
-	prewarm_ctl->prewarm_workers[worker_id].completed = GetCurrentTimestamp();
-}
-
-/*
- * Prewarm LFC cache to the specified state. Uses the new communicator
- *
- * FIXME: Is there a race condition because we're not holding Postgres
- * buffer manager locks?
- */
-static void
-lfc_prewarm_with_async_requests(FileCacheState *fcs)
-{
-	size_t n_entries;
-	uint8	   *bitmap;
-	uint64		bitno;
-	int			blocks_per_chunk;
-
-	Assert(neon_use_communicator_worker);
-
-	if (lfc_prewarm_limit == 0)
-	{
-		elog(LOG, "LFC: prewarm is disabled");
-		return;
-	}
-
-	if (fcs == NULL || fcs->n_chunks == 0)
-	{
-		elog(LOG, "LFC: nothing to prewarm");
-		return;
-	}
-
-	n_entries = Min(fcs->n_chunks, lfc_prewarm_limit);
-	Assert(n_entries != 0);
-
-	LWLockAcquire(prewarm_lock, LW_EXCLUSIVE);
-
-	/* Do not prewarm more entries than LFC limit */
-	/* FIXME */
-#if 0
-	if (prewarm_ctl->limit <= prewarm_ctl->size)
-	{
-		elog(LOG, "LFC: skip prewarm because LFC is already filled");
-		LWLockRelease(prewarm_lock);
-		return;
-	}
-#endif
-
-	if (prewarm_ctl->prewarm_active)
-	{
-		LWLockRelease(prewarm_lock);
-		elog(ERROR, "LFC: skip prewarm because another prewarm is still active");
-	}
-	prewarm_ctl->n_prewarm_entries = n_entries;
-	prewarm_ctl->n_prewarm_workers = -1;
-	prewarm_ctl->prewarm_active = true;
-	prewarm_ctl->prewarm_canceled = false;
-
-	/* Calculate total number of pages to be prewarmed */
-	prewarm_ctl->total_prewarm_pages = fcs->n_pages;
-
-	LWLockRelease(prewarm_lock);
-
-	elog(LOG, "LFC: start prewarming");
-	lfc_do_prewarm = true;
-	lfc_prewarm_cancel = false;
-
-	bitmap = FILE_CACHE_STATE_BITMAP(fcs);
-
-	blocks_per_chunk = 1 << fcs->chunk_size_log;
-
-	bitno = 0;
-	for (uint32 chunkno = 0; chunkno < fcs->n_chunks; chunkno++)
-	{
-		BufferTag *chunk_tag = &fcs->chunks[chunkno];
-		BlockNumber request_startblkno = InvalidBlockNumber;
-		BlockNumber request_endblkno;
-
-		if (!BufferTagIsValid(chunk_tag))
-			elog(ERROR, "LFC: Invalid buffer tag: %u", chunk_tag->blockNum);
-
-		if (lfc_prewarm_cancel)
-		{
-			prewarm_ctl->prewarm_canceled = true;
-			break;
-		}
-
-		/* take next chunk */
-		for (int j = 0; j < blocks_per_chunk; j++)
-		{
-			BlockNumber blkno = chunk_tag->blockNum + j;
-
-			if (BITMAP_ISSET(bitmap, bitno))
-			{
-				if (request_startblkno != InvalidBlockNumber)
-				{
-					if (request_endblkno == blkno)
-					{
-						/* append this block to the request */
-						request_endblkno++;
-					}
-					else
-					{
-						/* flush this request, and start new one */
-						communicator_new_prefetch_register_bufferv(
-							BufTagGetNRelFileInfo(*chunk_tag),
-							chunk_tag->forkNum,
-							request_startblkno,
-							request_endblkno - request_startblkno
-							);
-						request_startblkno = blkno;
-						request_endblkno = blkno + 1;
-					}
-				}
-				else
-				{
-					/* flush this request, if any, and start new one */
-					if (request_startblkno != InvalidBlockNumber)
-					{
-						communicator_new_prefetch_register_bufferv(
-							BufTagGetNRelFileInfo(*chunk_tag),
-							chunk_tag->forkNum,
-							request_startblkno,
-							request_endblkno - request_startblkno
-							);
-					}
-					request_startblkno = blkno;
-					request_endblkno = blkno + 1;
-				}
-				prewarm_ctl->prewarmed_pages += 1;
-			}
-			bitno++;
-		}
-
-		/* flush this request */
-		communicator_new_prefetch_register_bufferv(
-			BufTagGetNRelFileInfo(*chunk_tag),
-			chunk_tag->forkNum,
-			request_startblkno,
-			request_endblkno - request_startblkno
-			);
-		request_startblkno = request_endblkno = InvalidBlockNumber;
-	}
-
-	elog(LOG, "LFC: complete prewarming: loaded %lu pages", (unsigned long) prewarm_ctl->prewarmed_pages);
-	prewarm_ctl->completed = GetCurrentTimestamp();
-
-	LWLockAcquire(prewarm_lock, LW_EXCLUSIVE);
-	prewarm_ctl->prewarm_active = false;
-	LWLockRelease(prewarm_lock);
-}
-
-PG_FUNCTION_INFO_V1(get_local_cache_state);
-
-Datum
-get_local_cache_state(PG_FUNCTION_ARGS)
-{
-	size_t max_entries = PG_ARGISNULL(0) ? lfc_prewarm_limit : PG_GETARG_INT32(0);
-	FileCacheState* fcs;
-
-	if (neon_use_communicator_worker)
-		fcs = communicator_new_get_lfc_state(max_entries);
-	else
-		fcs = lfc_get_state(max_entries);
-
-	if (fcs != NULL)
-		PG_RETURN_BYTEA_P((bytea*)fcs);
-	else
-		PG_RETURN_NULL();
-}
-
-PG_FUNCTION_INFO_V1(prewarm_local_cache);
-
-Datum
-prewarm_local_cache(PG_FUNCTION_ARGS)
-{
-	bytea* state = PG_GETARG_BYTEA_PP(0);
-	uint32 n_workers =  PG_GETARG_INT32(1);
-	FileCacheState* fcs;
-
-	fcs = (FileCacheState *)state;
-	validate_fcs(fcs);
-
-	if (neon_use_communicator_worker)
-		lfc_prewarm_with_async_requests(fcs);
-	else
-		lfc_prewarm_with_workers(fcs, n_workers);
-
-	PG_RETURN_NULL();
-}
-
-PG_FUNCTION_INFO_V1(get_prewarm_info);
-
-Datum
-get_prewarm_info(PG_FUNCTION_ARGS)
-{
-	Datum		values[4];
-	bool		nulls[4];
-	TupleDesc	tupdesc;
-	uint32		prewarmed_pages = 0;
-	uint32		skipped_pages = 0;
-	uint32		active_workers = 0;
-	uint32		total_pages;
-
-	if (lfc_size_limit == 0)
-		PG_RETURN_NULL();
-
-	LWLockAcquire(prewarm_lock, LW_SHARED);
-	if (!prewarm_ctl || prewarm_ctl->n_prewarm_workers == 0)
-	{
-		LWLockRelease(prewarm_lock);
-		PG_RETURN_NULL();
-	}
-
-	if (prewarm_ctl->n_prewarm_workers == -1)
-	{
-		total_pages = prewarm_ctl->total_prewarm_pages;
-		prewarmed_pages = prewarm_ctl->prewarmed_pages;
-		skipped_pages = prewarm_ctl->prewarmed_pages;
-		active_workers = 1;
-	}
-	else
-	{
-		size_t		n_workers;
-
-		n_workers = prewarm_ctl->n_prewarm_workers;
-		total_pages = prewarm_ctl->total_prewarm_pages;
-		for (size_t i = 0; i < n_workers; i++)
-		{
-			PrewarmWorkerState *ws = &prewarm_ctl->prewarm_workers[i];
-
-			prewarmed_pages += ws->prewarmed_pages;
-			skipped_pages += ws->skipped_pages;
-			active_workers += ws->completed != 0;
-		}
-	}
-	LWLockRelease(prewarm_lock);
-
-	tupdesc = CreateTemplateTupleDesc(4);
-	TupleDescInitEntry(tupdesc, (AttrNumber) 1, "total_pages", INT4OID, -1, 0);
-	TupleDescInitEntry(tupdesc, (AttrNumber) 2, "prewarmed_pages", INT4OID, -1, 0);
-	TupleDescInitEntry(tupdesc, (AttrNumber) 3, "skipped_pages", INT4OID, -1, 0);
-	TupleDescInitEntry(tupdesc, (AttrNumber) 4, "active_workers", INT4OID, -1, 0);
-	tupdesc = BlessTupleDesc(tupdesc);
-
-	MemSet(nulls, 0, sizeof(nulls));
-
-	values[0] = Int32GetDatum(total_pages);
-	values[1] = Int32GetDatum(prewarmed_pages);
-	values[2] = Int32GetDatum(skipped_pages);
-	values[3] = Int32GetDatum(active_workers);
-
-	PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)));
-}
--- a/pgxn/neon/lfc_prewarm.h
+++ b/pgxn/neon/lfc_prewarm.h
@@ -1,39 +0,0 @@
-/*-------------------------------------------------------------------------
- *
- * lfc_prewarm.h
- *	  Local File Cache prewarmer
- *
- * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- *-------------------------------------------------------------------------
- */
-#ifndef LFC_PREWARM_H
-#define LFC_PREWARM_H
-
-#include "storage/buf_internals.h"
-
-typedef struct FileCacheState
-{
-	int32		vl_len_;		/* varlena header (do not touch directly!) */
-	uint32		magic;
-	uint32		n_chunks;
-	uint32		n_pages;
-	uint16		chunk_size_log;
-	BufferTag	chunks[FLEXIBLE_ARRAY_MEMBER];
-	/* followed by bitmap */
-} FileCacheState;
-
-#define FILE_CACHE_STATE_MAGIC 0xfcfcfcfc
-
-#define FILE_CACHE_STATE_BITMAP(fcs)	((uint8*)&(fcs)->chunks[(fcs)->n_chunks])
-#define FILE_CACHE_STATE_SIZE_FOR_CHUNKS(n_chunks, blocks_per_chunk)	(sizeof(FileCacheState) + (n_chunks)*sizeof(BufferTag) + (((n_chunks) * blocks_per_chunk)+7)/8)
-#define FILE_CACHE_STATE_SIZE(fcs)		(sizeof(FileCacheState) + (fcs->n_chunks)*sizeof(BufferTag) + (((fcs->n_chunks) << fcs->chunk_size_log)+7)/8)
-
-extern void pg_init_prewarm(void);
-extern void PrewarmShmemRequest(void);
-extern void PrewarmShmemInit(void);
-
-#endif							/* LFC_PREWARM_H */
-
-
--- a/pgxn/neon/libpagestore.c
+++ b/pgxn/neon/libpagestore.c
@@ -72,7 +72,6 @@ char	   *neon_branch_id;
 char	   *neon_endpoint_id;
 int32		max_cluster_size;
 char	   *pageserver_connstring;
-char	   *pageserver_grpc_urls;
 char	   *neon_auth_token;

 int			readahead_buffer_size = 128;
@@ -82,7 +81,7 @@ int         neon_protocol_version = 3;

 static int	neon_compute_mode = 0;
 static int	max_reconnect_attempts = 60;
-int		neon_stripe_size;
+static int	stripe_size;
 static int	max_sockets;

 static int pageserver_response_log_timeout = 10000;
@@ -93,6 +92,13 @@ static int	conf_refresh_reconnect_attempt_threshold = 16;
 // Hadron: timeout for refresh errors (1 minute)
 static uint64 	kRefreshErrorTimeoutUSec = 1 * USECS_PER_MINUTE;

+typedef struct
+{
+	char		connstring[MAX_SHARDS][MAX_PAGESERVER_CONNSTRING_SIZE];
+	size_t		num_shards;
+	size_t		stripe_size;
+} ShardMap;
+
 /*
 * PagestoreShmemState is kept in shared memory. It contains the connection
 * strings for each shard.
@@ -181,8 +187,6 @@ static void pageserver_disconnect_shard(shardno_t shard_no);
 // HADRON
 shardno_t get_num_shards(void);

-static void AssignShardMap(const char *newval);
-
 static bool
 PagestoreShmemIsValid(void)
 {
@@ -196,8 +200,8 @@ PagestoreShmemIsValid(void)
 * not valid, returns false. The contents of *result are undefined in
 * that case, and must not be relied on.
 */
-bool
-parse_shard_map(const char *connstr, ShardMap *result)
+static bool
+ParseShardMap(const char *connstr, ShardMap *result)
 {
 	const char *p;
 	int			nshards = 0;
@@ -242,31 +246,24 @@ parse_shard_map(const char *connstr, ShardMap *result)
 	if (result)
 	{
 		result->num_shards = nshards;
-		result->stripe_size = neon_stripe_size;
+		result->stripe_size = stripe_size;
 	}

 	return true;
 }

-/* GUC hooks for neon.pageserver_connstring */
 static bool
 CheckPageserverConnstring(char **newval, void **extra, GucSource source)
 {
 	char	   *p = *newval;

-	return parse_shard_map(p, NULL);
+	return ParseShardMap(p, NULL);
 }

 static void
 AssignPageserverConnstring(const char *newval, void *extra)
 {
-	/*
-	 * 'neon.pageserver_connstring' is ignored if the new communicator is used.
-	 * In that case, the shard map is loaded from 'neon.pageserver_grpc_urls'
-	 * instead, and that happens in the communicator process only.
-	 */
-	if (neon_use_communicator_worker)
-		return;
+	ShardMap	shard_map;

 	/*
 	 * Only postmaster updates the copy in shared memory.
@@ -274,29 +271,11 @@ AssignPageserverConnstring(const char *newval, void *extra)
 	if (!PagestoreShmemIsValid() || IsUnderPostmaster)
 		return;

-	AssignShardMap(newval);
-}
-
-
-/* GUC hooks for neon.pageserver_connstring */
-static bool
-CheckPageserverGrpcUrls(char **newval, void **extra, GucSource source)
-{
-	char	   *p = *newval;
-
-	return parse_shard_map(p, NULL);
-}
-
-static void
-AssignShardMap(const char *newval)
-{
-	ShardMap	shard_map;
-
-	if (!parse_shard_map(newval, &shard_map))
+	if (!ParseShardMap(newval, &shard_map))
 	{
 		/*
 		 * shouldn't happen, because we already checked the value in
-		 * CheckPageserverConnstring/CheckPageserverGrpcUrls
+		 * CheckPageserverConnstring
 		 */
 		elog(ERROR, "could not parse shard map");
 	}
@@ -418,10 +397,10 @@ get_shard_number(BufferTag *tag)

 #if PG_MAJORVERSION_NUM < 16
 	hash = murmurhash32(tag->rnode.relNode);
-	hash = hash_combine(hash, murmurhash32(tag->blockNum / neon_stripe_size));
+	hash = hash_combine(hash, murmurhash32(tag->blockNum / stripe_size));
 #else
 	hash = murmurhash32(tag->relNumber);
-	hash = hash_combine(hash, murmurhash32(tag->blockNum / neon_stripe_size));
+	hash = hash_combine(hash, murmurhash32(tag->blockNum / stripe_size));
 #endif

 	return hash % n_shards;
@@ -1499,15 +1478,6 @@ pg_init_libpagestore(void)
 							   0,	/* no flags required */
 							   CheckPageserverConnstring, AssignPageserverConnstring, NULL);

-	DefineCustomStringVariable("neon.pageserver_grpc_urls",
-							   "list of gRPC URLs for the page servers",
-							   NULL,
-							   &pageserver_grpc_urls,
-							   "",
-							   PGC_SIGHUP,
-							   0,	/* no flags required */
-							   CheckPageserverGrpcUrls, NULL, NULL);
-
 	DefineCustomStringVariable("neon.timeline_id",
 							   "Neon timeline_id the server is running on",
 							   NULL,
@@ -1554,7 +1524,7 @@ pg_init_libpagestore(void)
 	DefineCustomIntVariable("neon.stripe_size",
 							"sharding stripe size",
 							NULL,
-							&neon_stripe_size,
+							&stripe_size,
 							2048, 1, INT_MAX,
 							PGC_SIGHUP,
 							GUC_UNIT_BLOCKS,
@@ -1673,7 +1643,7 @@ pg_init_libpagestore(void)
 	if (neon_auth_token)
 		neon_log(LOG, "using storage auth token from NEON_AUTH_TOKEN environment variable");

-	if (pageserver_connstring[0] || pageserver_grpc_urls[0])
+	if (pageserver_connstring[0])
 	{
 		neon_log(PageStoreTrace, "set neon_smgr hook");
 		smgr_hook = smgr_neon;
--- a/pgxn/neon/neon.c
+++ b/pgxn/neon/neon.c
@@ -21,7 +21,6 @@
 #include "replication/logicallauncher.h"
 #include "replication/slot.h"
 #include "replication/walsender.h"
-#include "storage/ipc.h"
 #include "storage/proc.h"
 #include "storage/ipc.h"
 #include "funcapi.h"
@@ -32,7 +31,6 @@
 #include "utils/guc_tables.h"

 #include "communicator.h"
-#include "communicator_new.h"
 #include "communicator_process.h"
 #include "extension_server.h"
 #include "file_cache.h"
@@ -475,16 +473,6 @@ _PG_init(void)
 	load_file("$libdir/neon_rmgr", false);
 #endif

-	DefineCustomBoolVariable(
-							"neon.use_communicator_worker",
-							"Uses the communicator worker implementation",
-							NULL,
-							&neon_use_communicator_worker,
-							true,
-							PGC_POSTMASTER,
-							0,
-							NULL, NULL, NULL);
-
 	if (lakebase_mode) {
 		prev_emit_log_hook = emit_log_hook;
 		emit_log_hook = DatabricksSqlErrorHookImpl;
@@ -524,14 +512,12 @@ _PG_init(void)
 	pg_init_libpagestore();
 	relsize_hash_init();
 	lfc_init();
-	pg_init_prewarm();
 	pg_init_walproposer();
-	pg_init_lwlsncache();
+	init_lwlsncache();

 	pg_init_communicator_process();

 	pg_init_communicator();
-
 	Custom_XLogReaderRoutines = NeonOnDemandXLogReaderRoutines;

 	InitUnstableExtensionsSupport();
@@ -737,10 +723,7 @@ approximate_working_set_size_seconds(PG_FUNCTION_ARGS)

 	duration = PG_ARGISNULL(0) ? (time_t) -1 : PG_GETARG_INT32(0);

-	if (neon_use_communicator_worker)
-		dc = communicator_new_approximate_working_set_size_seconds(duration, false);
-	else
-		dc = lfc_approximate_working_set_size_seconds(duration, false);
+	dc = lfc_approximate_working_set_size_seconds(duration, false);
 	if (dc < 0)
 		PG_RETURN_NULL();
 	else
@@ -753,10 +736,7 @@ approximate_working_set_size(PG_FUNCTION_ARGS)
 	bool		reset = PG_GETARG_BOOL(0);
 	int32		dc;

-	if (neon_use_communicator_worker)
-		dc = communicator_new_approximate_working_set_size_seconds(-1, reset);
-	else
-		dc = lfc_approximate_working_set_size_seconds(-1, reset);
+	dc = lfc_approximate_working_set_size_seconds(-1, reset);
 	if (dc < 0)
 		PG_RETURN_NULL();
 	else
@@ -774,10 +754,7 @@ neon_get_lfc_stats(PG_FUNCTION_ARGS)
 	InitMaterializedSRF(fcinfo, 0);

 	/* lfc_get_stats() does all the heavy lifting */
-	if (neon_use_communicator_worker)
-		entries = communicator_new_lfc_get_stats(&num_entries);
-	else
-		entries = lfc_get_stats(&num_entries);
+	entries = lfc_get_stats(&num_entries);

 	/* Convert the LfcStatsEntrys to a result set */
 	for (size_t i = 0; i < num_entries; i++)
@@ -851,13 +828,11 @@ neon_shmem_request_hook(void)
 #endif

 	LfcShmemRequest();
-	PrewarmShmemRequest();
 	NeonPerfCountersShmemRequest();
 	PagestoreShmemRequest();
 	RelsizeCacheShmemRequest();
 	WalproposerShmemRequest();
 	LwLsnCacheShmemRequest();
-	CommunicatorNewShmemRequest();
 }


@@ -875,7 +850,6 @@ neon_shmem_startup_hook(void)
 	LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);

 	LfcShmemInit();
-	PrewarmShmemInit();
 	NeonPerfCountersShmemInit();
 	if (lakebase_mode) {
 		DatabricksMetricsShmemInit();
@@ -884,7 +858,6 @@ neon_shmem_startup_hook(void)
 	RelsizeCacheShmemInit();
 	WalproposerShmemInit();
 	LwLsnCacheShmemInit();
-	CommunicatorNewShmemInit();

 #if PG_MAJORVERSION_NUM >= 17
 	WAIT_EVENT_NEON_LFC_MAINTENANCE = WaitEventExtensionNew("Neon/FileCache_Maintenance");
--- a/pgxn/neon/neon.h
+++ b/pgxn/neon/neon.h
@@ -85,11 +85,5 @@ extern void WalproposerShmemInit(void);
 extern void LwLsnCacheShmemInit(void);
 extern void NeonPerfCountersShmemInit(void);

-typedef struct LfcStatsEntry
-{
-	const char *metric_name;
-	bool		isnull;
-	uint64		value;
-} LfcStatsEntry;

 #endif							/* NEON_H */
--- a/pgxn/neon/neon_lwlsncache.c
+++ b/pgxn/neon/neon_lwlsncache.c
@@ -85,54 +85,12 @@ static set_lwlsn_db_hook_type prev_set_lwlsn_db_hook = NULL;
 static void neon_set_max_lwlsn(XLogRecPtr lsn);

 void
-pg_init_lwlsncache(void)
+init_lwlsncache(void)
 {
 	if (!process_shared_preload_libraries_in_progress)
 		ereport(ERROR, errcode(ERRCODE_INTERNAL_ERROR), errmsg("Loading of shared preload libraries is not in progress. Exiting"));
 	
 	lwlc_register_gucs();
-}
-
-
-void
-LwLsnCacheShmemRequest(void)
-{
-	Size		requested_size;
-
-	if (neon_use_communicator_worker)
-		return;
-
-	requested_size = sizeof(LwLsnCacheCtl);
-	requested_size += hash_estimate_size(lwlsn_cache_size, sizeof(LastWrittenLsnCacheEntry));
-
-	RequestAddinShmemSpace(requested_size);
-}
-
-void
-LwLsnCacheShmemInit(void)
-{
-	static HASHCTL info;
-	bool		found;
-
-	if (neon_use_communicator_worker)
-		return;
-
-	Assert(lwlsn_cache_size > 0);
-
-	info.keysize = sizeof(BufferTag);
-	info.entrysize = sizeof(LastWrittenLsnCacheEntry);
-	lastWrittenLsnCache = ShmemInitHash("last_written_lsn_cache",
-										lwlsn_cache_size, lwlsn_cache_size,
-										&info,
-										HASH_ELEM | HASH_BLOBS);
-	LwLsnCache = ShmemInitStruct("neon/LwLsnCacheCtl", sizeof(LwLsnCacheCtl), &found);
-	// Now set the size in the struct
-	LwLsnCache->lastWrittenLsnCacheSize = lwlsn_cache_size;
-	if (found) {
-		return;
-	}
-	dlist_init(&LwLsnCache->lastWrittenLsnLRU);
-    LwLsnCache->maxLastWrittenLsn = GetRedoRecPtr();

 	prev_set_lwlsn_block_range_hook = set_lwlsn_block_range_hook;
 	set_lwlsn_block_range_hook = neon_set_lwlsn_block_range;
@@ -148,6 +106,41 @@ LwLsnCacheShmemInit(void)
 	set_lwlsn_db_hook = neon_set_lwlsn_db;
 }

+
+void
+LwLsnCacheShmemRequest(void)
+{
+	Size requested_size = sizeof(LwLsnCacheCtl);
+
+	requested_size += hash_estimate_size(lwlsn_cache_size, sizeof(LastWrittenLsnCacheEntry));
+
+	RequestAddinShmemSpace(requested_size);
+}
+
+void
+LwLsnCacheShmemInit(void)
+{
+	static HASHCTL info;
+	bool found;
+	if (lwlsn_cache_size > 0)
+	{
+		info.keysize = sizeof(BufferTag);
+		info.entrysize = sizeof(LastWrittenLsnCacheEntry);
+		lastWrittenLsnCache = ShmemInitHash("last_written_lsn_cache",
+			lwlsn_cache_size, lwlsn_cache_size,
+										&info,
+										HASH_ELEM | HASH_BLOBS);
+		LwLsnCache = ShmemInitStruct("neon/LwLsnCacheCtl", sizeof(LwLsnCacheCtl), &found);
+		// Now set the size in the struct
+		LwLsnCache->lastWrittenLsnCacheSize = lwlsn_cache_size;
+		if (found) {
+			return;
+		}
+	}
+	dlist_init(&LwLsnCache->lastWrittenLsnLRU);
+    LwLsnCache->maxLastWrittenLsn = GetRedoRecPtr();
+}
+
 /*
 * neon_get_lwlsn -- Returns maximal LSN of written page.
 * It returns an upper bound for the last written LSN of a given page,
@@ -162,7 +155,6 @@ neon_get_lwlsn(NRelFileInfo rlocator, ForkNumber forknum, BlockNumber blkno)
 	XLogRecPtr lsn;
 	LastWrittenLsnCacheEntry* entry;

-	Assert(!neon_use_communicator_worker);
 	Assert(LwLsnCache->lastWrittenLsnCacheSize != 0);

 	LWLockAcquire(LastWrittenLsnLock, LW_SHARED);
@@ -215,10 +207,7 @@ neon_get_lwlsn(NRelFileInfo rlocator, ForkNumber forknum, BlockNumber blkno)
 	return lsn;
 }

-static void
-neon_set_max_lwlsn(XLogRecPtr lsn)
-{
-	Assert(!neon_use_communicator_worker);
+static void neon_set_max_lwlsn(XLogRecPtr lsn) {
 	LWLockAcquire(LastWrittenLsnLock, LW_EXCLUSIVE);
 	LwLsnCache->maxLastWrittenLsn = lsn;
 	LWLockRelease(LastWrittenLsnLock);
@@ -239,7 +228,6 @@ neon_get_lwlsn_v(NRelFileInfo relfilenode, ForkNumber forknum,
 	LastWrittenLsnCacheEntry* entry;
 	XLogRecPtr lsn;

-	Assert(!neon_use_communicator_worker);
 	Assert(LwLsnCache->lastWrittenLsnCacheSize != 0);
 	Assert(nblocks > 0);
 	Assert(PointerIsValid(lsns));
@@ -388,8 +376,6 @@ SetLastWrittenLSNForBlockRangeInternal(XLogRecPtr lsn,
 XLogRecPtr
 neon_set_lwlsn_block_range(XLogRecPtr lsn, NRelFileInfo rlocator, ForkNumber forknum, BlockNumber from, BlockNumber n_blocks)
 {
-	Assert(!neon_use_communicator_worker);
-
 	if (lsn == InvalidXLogRecPtr || n_blocks == 0 || LwLsnCache->lastWrittenLsnCacheSize == 0)
 		return lsn;

@@ -426,8 +412,6 @@ neon_set_lwlsn_block_v(const XLogRecPtr *lsns, NRelFileInfo relfilenode,
 	Oid dbOid = NInfoGetDbOid(relfilenode);
 	Oid relNumber = NInfoGetRelNumber(relfilenode);

-	Assert(!neon_use_communicator_worker);
-
 	if (lsns == NULL || nblocks == 0 || LwLsnCache->lastWrittenLsnCacheSize == 0 ||
 		NInfoGetRelNumber(relfilenode) == InvalidOid)
 		return InvalidXLogRecPtr;
@@ -485,7 +469,6 @@ neon_set_lwlsn_block_v(const XLogRecPtr *lsns, NRelFileInfo relfilenode,
 XLogRecPtr
 neon_set_lwlsn_block(XLogRecPtr lsn, NRelFileInfo rlocator, ForkNumber forknum, BlockNumber blkno)
 {
-	Assert(!neon_use_communicator_worker);
 	return neon_set_lwlsn_block_range(lsn, rlocator, forknum, blkno, 1);
 }

@@ -495,7 +478,6 @@ neon_set_lwlsn_block(XLogRecPtr lsn, NRelFileInfo rlocator, ForkNumber forknum,
 XLogRecPtr
 neon_set_lwlsn_relation(XLogRecPtr lsn, NRelFileInfo rlocator, ForkNumber forknum)
 {
-	Assert(!neon_use_communicator_worker);
 	return neon_set_lwlsn_block(lsn, rlocator, forknum, REL_METADATA_PSEUDO_BLOCKNO);
 }

@@ -506,8 +488,6 @@ XLogRecPtr
 neon_set_lwlsn_db(XLogRecPtr lsn)
 {
 	NRelFileInfo dummyNode = {InvalidOid, InvalidOid, InvalidOid};
-
-	Assert(!neon_use_communicator_worker);
 	return neon_set_lwlsn_block(lsn, dummyNode, MAIN_FORKNUM, 0);
 }

--- a/pgxn/neon/neon_lwlsncache.h
+++ b/pgxn/neon/neon_lwlsncache.h
@@ -3,7 +3,7 @@

 #include "neon_pgversioncompat.h"

-extern void pg_init_lwlsncache(void);
+void init_lwlsncache(void);

 /* Hooks */
 XLogRecPtr neon_get_lwlsn(NRelFileInfo rlocator, ForkNumber forknum, BlockNumber blkno);
@@ -14,4 +14,4 @@ XLogRecPtr neon_set_lwlsn_block(XLogRecPtr lsn, NRelFileInfo rlocator, ForkNumbe
 XLogRecPtr neon_set_lwlsn_relation(XLogRecPtr lsn, NRelFileInfo rlocator, ForkNumber forknum);
 XLogRecPtr neon_set_lwlsn_db(XLogRecPtr lsn);

-#endif /* NEON_LWLSNCACHE_H */
+#endif /* NEON_LWLSNCACHE_H */
--- a/pgxn/neon/pagestore_client.h
+++ b/pgxn/neon/pagestore_client.h
@@ -237,23 +237,13 @@ extern void prefetch_on_ps_disconnect(void);
 extern page_server_api *page_server;

 extern char *pageserver_connstring;
-extern char *pageserver_grpc_urls;
 extern int	flush_every_n_requests;
 extern int	readahead_buffer_size;
 extern char *neon_timeline;
 extern char *neon_tenant;
 extern int32 max_cluster_size;
 extern int  neon_protocol_version;
-extern int	neon_stripe_size;

-typedef struct
-{
-	char		connstring[MAX_SHARDS][MAX_PAGESERVER_CONNSTRING_SIZE];
-	size_t		num_shards;
-	size_t		stripe_size;
-} ShardMap;
-
-extern bool parse_shard_map(const char *connstr, ShardMap *result);
 extern shardno_t get_shard_number(BufferTag* tag);

 extern const f_smgr *smgr_neon(ProcNumber backend, NRelFileInfo rinfo);
@@ -300,7 +290,6 @@ extern int64 neon_dbsize(Oid dbNode);
 extern void neon_get_request_lsns(NRelFileInfo rinfo, ForkNumber forknum,
 								  BlockNumber blkno, neon_request_lsns *output,
 								  BlockNumber nblocks);
-extern XLogRecPtr neon_get_write_lsn(void);

 /* utils for neon relsize cache */
 extern void relsize_hash_init(void);
--- a/pgxn/neon/pagestore_smgr.c
+++ b/pgxn/neon/pagestore_smgr.c
@@ -62,7 +62,6 @@

 #include "bitmap.h"
 #include "communicator.h"
-#include "communicator_new.h"
 #include "file_cache.h"
 #include "neon.h"
 #include "neon_lwlsncache.h"
@@ -84,7 +83,7 @@ static char *hexdump_page(char *page);
 		NInfoGetRelNumber(InfoFromSMgrRel(reln)) >= FirstNormalObjectId \
 )

-const int	SmgrTrace = DEBUG1;
+const int	SmgrTrace = DEBUG5;

 /* unlogged relation build states */
 typedef enum
@@ -302,7 +301,7 @@ neon_wallog_pagev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 		 */
 		lsns[batch_size++] = lsn;

-		if (batch_size >= BLOCK_BATCH_SIZE && !neon_use_communicator_worker)
+		if (batch_size >= BLOCK_BATCH_SIZE)
 		{
 			neon_set_lwlsn_block_v(lsns, InfoFromSMgrRel(reln), forknum,
 									   batch_blockno,
@@ -312,7 +311,7 @@ neon_wallog_pagev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 		}
 	}

-	if (batch_size != 0 && !neon_use_communicator_worker)
+	if (batch_size != 0)
 	{
 		neon_set_lwlsn_block_v(lsns, InfoFromSMgrRel(reln), forknum,
 								   batch_blockno,
@@ -437,17 +436,11 @@ neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, co
 						forknum, LSN_FORMAT_ARGS(lsn))));
 	}

-	if (!neon_use_communicator_worker)
-	{
-		/*
-		 * Remember the LSN on this page. When we read the page again, we must
-		 * read the same or newer version of it.
-		 *
-		 * (With the new communicator, the caller will make a write-request
-		 * for this page, which updates the last-written LSN too)
-		 */
-		neon_set_lwlsn_block(lsn, InfoFromSMgrRel(reln), forknum, blocknum);
-	}
+	/*
+	 * Remember the LSN on this page. When we read the page again, we must
+	 * read the same or newer version of it.
+	 */
+	neon_set_lwlsn_block(lsn, InfoFromSMgrRel(reln), forknum, blocknum);
 }

 /*
@@ -504,60 +497,6 @@ nm_adjust_lsn(XLogRecPtr lsn)
 	return lsn;
 }

-/*
- * Get a LSN to use to stamp an operation like relation create or truncate.
- * On operations on individual pages we use the LSN of the page, but when
- * e.g. smgrcreate() is called, we have to do something else.
- */
-XLogRecPtr
-neon_get_write_lsn(void)
-{
-	XLogRecPtr	lsn;
-
-	if (RecoveryInProgress())
-	{
-		/*
-		 * FIXME: v14 doesn't have GetCurrentReplayRecPtr(). Options:
-		 * - add it in our fork
-		 * - store a magic value that means that you must use
-		 *   current latest possible LSN at the time that the request
-		 *   on this thing is made again (or some other recent enough
-		 *   lsn).
-		 */
-#if PG_VERSION_NUM >= 150000
-		lsn = GetCurrentReplayRecPtr(NULL);
-#else
-		lsn = GetXLogReplayRecPtr(NULL); /* FIXME: this is wrong, see above */
-#endif
-	}
-	else
-		lsn = GetXLogInsertRecPtr();
-
-	/*
-	 * If the insert LSN points to just after page header, round it down to
-	 * the beginning of the page, because the page header might not have been
-	 * inserted to the WAL yet, and if we tried to flush it, the WAL flushing
-	 * code gets upset.
-	 */
-	{
-		int			segoff;
-
-		segoff = XLogSegmentOffset(lsn, wal_segment_size);
-		if (segoff == SizeOfXLogLongPHD)
-		{
-			lsn = lsn - segoff;
-		}
-		else
-		{
-			int			offset = lsn % XLOG_BLCKSZ;
-
-			if (offset == SizeOfXLogShortPHD)
-				lsn = lsn - offset;
-		}
-	}
-
-	return lsn;
-}

 /*
 * Return LSN for requesting pages and number of blocks from page server
@@ -570,7 +509,6 @@ neon_get_request_lsns(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,
 {
 	XLogRecPtr	last_written_lsns[PG_IOV_MAX];

-	Assert(!neon_use_communicator_worker);
 	Assert(nblocks <= PG_IOV_MAX);

 	neon_get_lwlsn_v(rinfo, forknum, blkno, (int) nblocks, last_written_lsns);
@@ -802,6 +740,11 @@ neon_exists(SMgrRelation reln, ForkNumber forkNum)
 			neon_log(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
 	}

+	if (get_cached_relsize(InfoFromSMgrRel(reln), forkNum, &n_blocks))
+	{
+		return true;
+	}
+
 	/*
 	 * \d+ on a view calls smgrexists with 0/0/0 relfilenode. The page server
 	 * will error out if you check that, because the whole dbdir for
@@ -825,20 +768,10 @@ neon_exists(SMgrRelation reln, ForkNumber forkNum)
 		return false;
 	}

-	if (neon_use_communicator_worker)
-		return communicator_new_rel_exists(InfoFromSMgrRel(reln), forkNum);
-	else
-	{
-		if (get_cached_relsize(InfoFromSMgrRel(reln), forkNum, &n_blocks))
-		{
-			return true;
-		}
+	neon_get_request_lsns(InfoFromSMgrRel(reln), forkNum,
+						  REL_METADATA_PSEUDO_BLOCKNO, &request_lsns, 1);

-		neon_get_request_lsns(InfoFromSMgrRel(reln), forkNum,
-							  REL_METADATA_PSEUDO_BLOCKNO, &request_lsns, 1);
-
-		return communicator_exists(InfoFromSMgrRel(reln), forkNum, &request_lsns);
-	}
+	return communicator_exists(InfoFromSMgrRel(reln), forkNum, &request_lsns);
 }

 /*
@@ -896,53 +829,16 @@ neon_create(SMgrRelation reln, ForkNumber forkNum, bool isRedo)
 	 * relation. Currently, we don't call SetLastWrittenLSN() when a new
 	 * relation created, so if we didn't remember the size in the relsize
 	 * cache, we might call smgrnblocks() on the newly-created relation before
-	 * the creation WAL record has been received by the page server.
-	 *
-	 * XXX: with the new communicator, similar considerations apply. However,
-	 * during replay, neon_get_write_lsn() returns the (end-)LSN of the record
-	 * that's being replayed, so we should not have the correctness issue
-	 * mentioned in previous paragraph.
+	 * the creation WAL record hass been received by the page server.
 	 */
-	if (neon_use_communicator_worker)
+	if (isRedo)
 	{
-		XLogRecPtr	lsn = neon_get_write_lsn();
-
-		if (isRedo)
-		{
-			/*
-			 * TODO: the protocol can check for existence and get the relsize
-			 * in one roundtrip. Add a similar call to the
-			 * backend<->communicator API. (The size is cached on the
-			 * rel_exists call, so this does only one roundtrip to the
-			 * pageserver, but two function calls and two cache lookups.)
-			 */
-			if (!communicator_new_rel_exists(InfoFromSMgrRel(reln), forkNum))
-			{
-				communicator_new_rel_create(InfoFromSMgrRel(reln), forkNum, lsn);
-				reln->smgr_cached_nblocks[forkNum] = 0;
-			}
-			else
-			{
-				BlockNumber nblocks;
-
-				nblocks = communicator_new_rel_nblocks(InfoFromSMgrRel(reln), forkNum);
-				reln->smgr_cached_nblocks[forkNum] = nblocks;
-			}
-		}
-		else
-			communicator_new_rel_create(InfoFromSMgrRel(reln), forkNum, lsn);
+		update_cached_relsize(InfoFromSMgrRel(reln), forkNum, 0);
+		get_cached_relsize(InfoFromSMgrRel(reln), forkNum,
+						   &reln->smgr_cached_nblocks[forkNum]);
 	}
 	else
-	{
-		if (isRedo)
-		{
-			update_cached_relsize(InfoFromSMgrRel(reln), forkNum, 0);
-			get_cached_relsize(InfoFromSMgrRel(reln), forkNum,
-							   &reln->smgr_cached_nblocks[forkNum]);
-		}
-		else
-			set_cached_relsize(InfoFromSMgrRel(reln), forkNum, 0);
-	}
+		set_cached_relsize(InfoFromSMgrRel(reln), forkNum, 0);

 	if (debug_compare_local)
 	{
@@ -978,17 +874,9 @@ neon_unlink(NRelFileInfoBackend rinfo, ForkNumber forkNum, bool isRedo)
 	 * unlink, it won't do any harm if the file doesn't exist.
 	 */
 	mdunlink(rinfo, forkNum, isRedo);
-
 	if (!NRelFileInfoBackendIsTemp(rinfo))
 	{
-		if (neon_use_communicator_worker)
-		{
-			XLogRecPtr	lsn = neon_get_write_lsn();
-
-			communicator_new_rel_unlink(InfoFromNInfoB(rinfo), forkNum, lsn);
-		}
-		else
-			forget_cached_relsize(InfoFromNInfoB(rinfo), forkNum);
+		forget_cached_relsize(InfoFromNInfoB(rinfo), forkNum);
 	}
 }

@@ -1011,7 +899,6 @@ neon_extend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
 #endif
 {
 	XLogRecPtr	lsn;
-	bool		lsn_was_zero;
 	BlockNumber n_blocks = 0;

 	switch (reln->smgr_relpersistence)
@@ -1069,6 +956,7 @@ neon_extend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
 		neon_wallog_page(reln, forkNum, n_blocks++, buffer, true);

 	neon_wallog_page(reln, forkNum, blkno, buffer, false);
+	set_cached_relsize(InfoFromSMgrRel(reln), forkNum, blkno + 1);

 	lsn = PageGetLSN((Page) buffer);
 	neon_log(SmgrTrace, "smgrextend called for %u/%u/%u.%u blk %u, page LSN: %X/%08X",
@@ -1076,6 +964,14 @@ neon_extend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
 		 forkNum, blkno,
 		 (uint32) (lsn >> 32), (uint32) lsn);

+	lfc_write(InfoFromSMgrRel(reln), forkNum, blkno, buffer);
+
+	if (debug_compare_local)
+	{
+		if (IS_LOCAL_REL(reln))
+			mdextend(reln, forkNum, blkno, buffer, skipFsync);
+	}
+
 	/*
 	 * smgr_extend is often called with an all-zeroes page, so
 	 * lsn==InvalidXLogRecPtr. An smgr_write() call will come for the buffer
@@ -1083,51 +979,20 @@ neon_extend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
 	 * it is eventually evicted from the buffer cache. But we need a valid LSN
 	 * to the relation metadata update now.
 	 */
-	lsn_was_zero = (lsn == InvalidXLogRecPtr);
-	if (lsn_was_zero)
+	if (lsn == InvalidXLogRecPtr)
+	{
 		lsn = GetXLogInsertRecPtr();
-
-	if (neon_use_communicator_worker)
-	{
-		communicator_new_rel_extend(InfoFromSMgrRel(reln), forkNum, blkno, (const void *) buffer, lsn);
-
-		if (debug_compare_local)
-		{
-			if (IS_LOCAL_REL(reln))
-				mdextend(reln, forkNum, blkno, buffer, skipFsync);
-		}
-	}
-	else
-	{
-		set_cached_relsize(InfoFromSMgrRel(reln), forkNum, blkno + 1);
-		lfc_write(InfoFromSMgrRel(reln), forkNum, blkno, buffer);
-
-		if (debug_compare_local)
-		{
-			if (IS_LOCAL_REL(reln))
-				mdextend(reln, forkNum, blkno, buffer, skipFsync);
-		}
-
-		/*
-		 * smgr_extend is often called with an all-zeroes page, so
-		 * lsn==InvalidXLogRecPtr. An smgr_write() call will come for the buffer
-		 * later, after it has been initialized with the real page contents, and
-		 * it is eventually evicted from the buffer cache. But we need a valid LSN
-		 * to the relation metadata update now.
-		 */
-		if (lsn_was_zero)
-			neon_set_lwlsn_block(lsn, InfoFromSMgrRel(reln), forkNum, blkno);
-		neon_set_lwlsn_relation(lsn, InfoFromSMgrRel(reln), forkNum);
+		neon_set_lwlsn_block(lsn, InfoFromSMgrRel(reln), forkNum, blkno);
 	}
+	neon_set_lwlsn_relation(lsn, InfoFromSMgrRel(reln), forkNum);
 }

 #if PG_MAJORVERSION_NUM >= 16
 static void
-neon_zeroextend(SMgrRelation reln, ForkNumber forkNum, BlockNumber start_block,
+neon_zeroextend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blocknum,
 				int nblocks, bool skipFsync)
 {
 	const PGIOAlignedBlock buffer = {0};
-	BlockNumber blocknum = start_block;
 	int			remblocks = nblocks;
 	XLogRecPtr	lsn = 0;

@@ -1210,14 +1075,11 @@ neon_zeroextend(SMgrRelation reln, ForkNumber forkNum, BlockNumber start_block,

 		lsn = XLogInsert(RM_XLOG_ID, XLOG_FPI);

-		if (!neon_use_communicator_worker)
+		for (int i = 0; i < count; i++)
 		{
-			for (int i = 0; i < count; i++)
-			{
-				lfc_write(InfoFromSMgrRel(reln), forkNum, blocknum + i, buffer.data);
-				neon_set_lwlsn_block(lsn, InfoFromSMgrRel(reln), forkNum,
-									 blocknum + i);
-			}
+			lfc_write(InfoFromSMgrRel(reln), forkNum, blocknum + i, buffer.data);
+			neon_set_lwlsn_block(lsn, InfoFromSMgrRel(reln), forkNum,
+									  blocknum + i);
 		}

 		blocknum += count;
@@ -1226,15 +1088,8 @@ neon_zeroextend(SMgrRelation reln, ForkNumber forkNum, BlockNumber start_block,

 	Assert(lsn != 0);

-	if (neon_use_communicator_worker)
-	{
-		communicator_new_rel_zeroextend(InfoFromSMgrRel(reln), forkNum, start_block, nblocks, lsn);
-	}
-	else
-	{
-		neon_set_lwlsn_relation(lsn, InfoFromSMgrRel(reln), forkNum);
-		set_cached_relsize(InfoFromSMgrRel(reln), forkNum, blocknum);
-	}
+	neon_set_lwlsn_relation(lsn, InfoFromSMgrRel(reln), forkNum);
+	set_cached_relsize(InfoFromSMgrRel(reln), forkNum, blocknum);
 }
 #endif

@@ -1294,12 +1149,6 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 			neon_log(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
 	}

-	if (neon_use_communicator_worker)
-	{
-		communicator_new_prefetch_register_bufferv(InfoFromSMgrRel(reln), forknum, blocknum, nblocks);
-		return false;
-	}
-
 	tag.spcOid = reln->smgr_rlocator.locator.spcOid;
 	tag.dbOid = reln->smgr_rlocator.locator.dbOid;
 	tag.relNumber = reln->smgr_rlocator.locator.relNumber;
@@ -1326,8 +1175,7 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 		blocknum += iterblocks;
 	}

-	if (!neon_use_communicator_worker)
-		communicator_prefetch_pump_state();
+	communicator_prefetch_pump_state();

 	return false;
 }
@@ -1340,6 +1188,8 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 static bool
 neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
 {
+	BufferTag	tag;
+
 	switch (reln->smgr_relpersistence)
 	{
 		case 0:					/* probably shouldn't happen, but ignore it */
@@ -1354,25 +1204,17 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
 			neon_log(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
 	}

-	if (neon_use_communicator_worker)
-	{
-		communicator_new_prefetch_register_bufferv(InfoFromSMgrRel(reln), forknum, blocknum, 1);
-	}
-	else
-	{
-		BufferTag	tag;
+	if (lfc_cache_contains(InfoFromSMgrRel(reln), forknum, blocknum))
+		return false;

-		if (lfc_cache_contains(InfoFromSMgrRel(reln), forknum, blocknum))
-			return false;
+	tag.forkNum = forknum;
+	tag.blockNum = blocknum;

-		tag.forkNum = forknum;
-		tag.blockNum = blocknum;
+	CopyNRelFileInfoToBufTag(tag, InfoFromSMgrRel(reln));

-		CopyNRelFileInfoToBufTag(tag, InfoFromSMgrRel(reln));
-		communicator_prefetch_register_bufferv(tag, NULL, 1, NULL);
+	communicator_prefetch_register_bufferv(tag, NULL, 1, NULL);

-		communicator_prefetch_pump_state();
-	}
+	communicator_prefetch_pump_state();

 	return false;
 }
@@ -1416,8 +1258,7 @@ neon_writeback(SMgrRelation reln, ForkNumber forknum,
 	 */
 	neon_log(SmgrTrace, "writeback noop");

-	if (!neon_use_communicator_worker)
-		communicator_prefetch_pump_state();
+	communicator_prefetch_pump_state();

 	if (debug_compare_local)
 	{
@@ -1434,14 +1275,7 @@ void
 neon_read_at_lsn(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 				 neon_request_lsns request_lsns, void *buffer)
 {
-	if (neon_use_communicator_worker)
-	{
-		// FIXME: request_lsns is ignored. That affects the neon_test_utils callers.
-		// Add the capability to specify the LSNs explicitly, for the sake of neon_test_utils ?
-		communicator_new_read_at_lsn_uncached(rinfo, forkNum, blkno, buffer, request_lsns.request_lsn, request_lsns.not_modified_since);
-	}
-	else
-		communicator_read_at_lsnv(rinfo, forkNum, blkno, &request_lsns, &buffer, 1, NULL);
+	communicator_read_at_lsnv(rinfo, forkNum, blkno, &request_lsns, &buffer, 1, NULL);
 }

 static void
@@ -1567,55 +1401,47 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer
 			neon_log(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
 	}

-	if (neon_use_communicator_worker)
+	/* Try to read PS results if they are available */
+	communicator_prefetch_pump_state();
+
+	neon_get_request_lsns(InfoFromSMgrRel(reln), forkNum, blkno, &request_lsns, 1);
+
+	present = 0;
+	bufferp = buffer;
+	if (communicator_prefetch_lookupv(InfoFromSMgrRel(reln), forkNum, blkno, &request_lsns, 1, &bufferp, &present))
 	{
-		communicator_new_readv(InfoFromSMgrRel(reln), forkNum, blkno,
-							   (void *) &buffer, 1);
+		/* Prefetch hit */
+		if (debug_compare_local >= DEBUG_COMPARE_LOCAL_PREFETCH)
+		{
+			compare_with_local(reln, forkNum, blkno, buffer, request_lsns.request_lsn);
+		}
+		if (debug_compare_local <= DEBUG_COMPARE_LOCAL_PREFETCH)
+		{
+			return;
+		}
 	}
-	else
+
+	/* Try to read from local file cache */
+	if (lfc_read(InfoFromSMgrRel(reln), forkNum, blkno, buffer))
 	{
-		/* Try to read PS results if they are available */
-		communicator_prefetch_pump_state();
-
-		neon_get_request_lsns(InfoFromSMgrRel(reln), forkNum, blkno, &request_lsns, 1);
-
-		present = 0;
-		bufferp = buffer;
-		if (communicator_prefetch_lookupv(InfoFromSMgrRel(reln), forkNum, blkno, &request_lsns, 1, &bufferp, &present))
+		MyNeonCounters->file_cache_hits_total++;
+		if (debug_compare_local >= DEBUG_COMPARE_LOCAL_LFC)
 		{
-			/* Prefetch hit */
-			if (debug_compare_local >= DEBUG_COMPARE_LOCAL_PREFETCH)
-			{
-				compare_with_local(reln, forkNum, blkno, buffer, request_lsns.request_lsn);
-			}
-			if (debug_compare_local <= DEBUG_COMPARE_LOCAL_PREFETCH)
-			{
-				return;
-			}
+			compare_with_local(reln, forkNum, blkno, buffer, request_lsns.request_lsn);
 		}
-
-		/* Try to read from local file cache */
-		if (lfc_read(InfoFromSMgrRel(reln), forkNum, blkno, buffer))
+		if (debug_compare_local <= DEBUG_COMPARE_LOCAL_LFC)
 		{
-			MyNeonCounters->file_cache_hits_total++;
-			if (debug_compare_local >= DEBUG_COMPARE_LOCAL_LFC)
-			{
-				compare_with_local(reln, forkNum, blkno, buffer, request_lsns.request_lsn);
-			}
-			if (debug_compare_local <= DEBUG_COMPARE_LOCAL_LFC)
-			{
-				return;
-			}
+			return;
 		}
-
-		neon_read_at_lsn(InfoFromSMgrRel(reln), forkNum, blkno, request_lsns, buffer);
-
-		/*
-		 * Try to receive prefetch results once again just to make sure we don't leave the smgr code while the OS might still have buffered bytes.
-		 */
-		communicator_prefetch_pump_state();
 	}

+	neon_read_at_lsn(InfoFromSMgrRel(reln), forkNum, blkno, request_lsns, buffer);
+
+	/*
+	 * Try to receive prefetch results once again just to make sure we don't leave the smgr code while the OS might still have buffered bytes.
+	 */
+	communicator_prefetch_pump_state();
+
 	if (debug_compare_local)
 	{
 		compare_with_local(reln, forkNum, blkno, buffer, request_lsns.request_lsn);
@@ -1678,67 +1504,59 @@ neon_readv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 				 nblocks, PG_IOV_MAX);

 	/* Try to read PS results if they are available */
-	if (!neon_use_communicator_worker)
-		communicator_prefetch_pump_state();
+	communicator_prefetch_pump_state();
+
+	neon_get_request_lsns(InfoFromSMgrRel(reln), forknum, blocknum,
+						  request_lsns, nblocks);

 	memset(read_pages, 0, sizeof(read_pages));

-	if (neon_use_communicator_worker)
+	prefetch_result = communicator_prefetch_lookupv(InfoFromSMgrRel(reln), forknum,
+													blocknum, request_lsns, nblocks,
+													buffers, read_pages);
+
+	if (debug_compare_local >= DEBUG_COMPARE_LOCAL_PREFETCH)
 	{
-		communicator_new_readv(InfoFromSMgrRel(reln), forknum, blocknum,
-							   buffers, nblocks);
+		compare_with_localv(reln, forknum, blocknum, buffers, nblocks, request_lsns, read_pages);
 	}
-	else
+	if (debug_compare_local <= DEBUG_COMPARE_LOCAL_PREFETCH && prefetch_result == nblocks)
 	{
-		neon_get_request_lsns(InfoFromSMgrRel(reln), forknum, blocknum,
-							  request_lsns, nblocks);
-
-		prefetch_result = communicator_prefetch_lookupv(InfoFromSMgrRel(reln), forknum,
-														blocknum, request_lsns, nblocks,
-														buffers, read_pages);
-
-		if (debug_compare_local >= DEBUG_COMPARE_LOCAL_PREFETCH)
-		{
-			compare_with_localv(reln, forknum, blocknum, buffers, nblocks, request_lsns, read_pages);
-		}
-		if (debug_compare_local <= DEBUG_COMPARE_LOCAL_PREFETCH && prefetch_result == nblocks)
-		{
-			return;
-		}
-		if (debug_compare_local > DEBUG_COMPARE_LOCAL_PREFETCH)
-		{
-			memset(read_pages, 0, sizeof(read_pages));
-		}
-
-		/* Try to read from local file cache */
-		lfc_result = lfc_readv_select(InfoFromSMgrRel(reln), forknum, blocknum, buffers,
-									  nblocks, read_pages);
-
-		if (lfc_result > 0)
-			MyNeonCounters->file_cache_hits_total += lfc_result;
-
-		if (debug_compare_local >= DEBUG_COMPARE_LOCAL_LFC)
-		{
-			compare_with_localv(reln, forknum, blocknum, buffers, nblocks, request_lsns, read_pages);
-		}
-		if (debug_compare_local <= DEBUG_COMPARE_LOCAL_LFC && prefetch_result + lfc_result == nblocks)
-		{
-			/* Read all blocks from LFC, so we're done */
-			return;
-		}
-		if (debug_compare_local > DEBUG_COMPARE_LOCAL_LFC)
-		{
-			memset(read_pages, 0, sizeof(read_pages));
-		}
-
-		communicator_read_at_lsnv(InfoFromSMgrRel(reln), forknum, blocknum, request_lsns,
-								  buffers, nblocks, read_pages);
-
-		/*
-		 * Try to receive prefetch results once again just to make sure we don't leave the smgr code while the OS might still have buffered bytes.
-		 */
-		communicator_prefetch_pump_state();
+		return;
 	}
+	if (debug_compare_local > DEBUG_COMPARE_LOCAL_PREFETCH)
+	{
+		memset(read_pages, 0, sizeof(read_pages));
+	}
+
+
+	/* Try to read from local file cache */
+	lfc_result = lfc_readv_select(InfoFromSMgrRel(reln), forknum, blocknum, buffers,
+								  nblocks, read_pages);
+
+	if (lfc_result > 0)
+		MyNeonCounters->file_cache_hits_total += lfc_result;
+
+	if (debug_compare_local >= DEBUG_COMPARE_LOCAL_LFC)
+	{
+		compare_with_localv(reln, forknum, blocknum, buffers, nblocks, request_lsns, read_pages);
+	}
+	if (debug_compare_local <= DEBUG_COMPARE_LOCAL_LFC && prefetch_result + lfc_result == nblocks)
+	{
+		/* Read all blocks from LFC, so we're done */
+		return;
+	}
+	if (debug_compare_local > DEBUG_COMPARE_LOCAL_LFC)
+	{
+		memset(read_pages, 0, sizeof(read_pages));
+	}
+
+	communicator_read_at_lsnv(InfoFromSMgrRel(reln), forknum, blocknum, request_lsns,
+							  buffers, nblocks, read_pages);
+
+	/*
+	 * Try to receive prefetch results once again just to make sure we don't leave the smgr code while the OS might still have buffered bytes.
+	 */
+	communicator_prefetch_pump_state();

 	if (debug_compare_local)
 	{
@@ -1839,16 +1657,9 @@ neon_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const vo
 		 forknum, blocknum,
 		 (uint32) (lsn >> 32), (uint32) lsn);

-	if (neon_use_communicator_worker)
-	{
-		communicator_new_write_page(InfoFromSMgrRel(reln), forknum, blocknum, buffer, lsn);
-	}
-	else
-	{
-		lfc_write(InfoFromSMgrRel(reln), forknum, blocknum, buffer);
+	lfc_write(InfoFromSMgrRel(reln), forknum, blocknum, buffer);

-		communicator_prefetch_pump_state();
-	}
+	communicator_prefetch_pump_state();

 	if (debug_compare_local)
 	{
@@ -1909,21 +1720,9 @@ neon_writev(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,

 	neon_wallog_pagev(reln, forknum, blkno, nblocks, (const char **) buffers, false);

-	if (neon_use_communicator_worker)
-	{
-		for (int i = 0; i < nblocks; i++)
-		{
-			XLogRecPtr lsn = PageGetLSN((Page) buffers[i]);
+	lfc_writev(InfoFromSMgrRel(reln), forknum, blkno, buffers, nblocks);

-			communicator_new_write_page(InfoFromSMgrRel(reln), forknum, blkno + i, buffers[i], lsn);
-		}
-	}
-	else
-	{
-		lfc_writev(InfoFromSMgrRel(reln), forknum, blkno, buffers, nblocks);
-
-		communicator_prefetch_pump_state();
-	}
+	communicator_prefetch_pump_state();

 	if (debug_compare_local)
 	{
@@ -1964,26 +1763,19 @@ neon_nblocks(SMgrRelation reln, ForkNumber forknum)
 			neon_log(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
 	}

-	if (neon_use_communicator_worker)
+	if (get_cached_relsize(InfoFromSMgrRel(reln), forknum, &n_blocks))
 	{
-		n_blocks = communicator_new_rel_nblocks(InfoFromSMgrRel(reln), forknum);
+		neon_log(SmgrTrace, "cached nblocks for %u/%u/%u.%u: %u blocks",
+			 RelFileInfoFmt(InfoFromSMgrRel(reln)),
+			 forknum, n_blocks);
+		return n_blocks;
 	}
-	else
-	{
-		if (get_cached_relsize(InfoFromSMgrRel(reln), forknum, &n_blocks))
-		{
-			neon_log(SmgrTrace, "cached nblocks for %u/%u/%u.%u: %u blocks",
-					 RelFileInfoFmt(InfoFromSMgrRel(reln)),
-					 forknum, n_blocks);
-			return n_blocks;
-		}

-		neon_get_request_lsns(InfoFromSMgrRel(reln), forknum,
-							  REL_METADATA_PSEUDO_BLOCKNO, &request_lsns, 1);
+	neon_get_request_lsns(InfoFromSMgrRel(reln), forknum,
+						  REL_METADATA_PSEUDO_BLOCKNO, &request_lsns, 1);

-		n_blocks = communicator_nblocks(InfoFromSMgrRel(reln), forknum, &request_lsns);
-		update_cached_relsize(InfoFromSMgrRel(reln), forknum, n_blocks);
-	}
+	n_blocks = communicator_nblocks(InfoFromSMgrRel(reln), forknum, &request_lsns);
+	update_cached_relsize(InfoFromSMgrRel(reln), forknum, n_blocks);

 	neon_log(SmgrTrace, "neon_nblocks: rel %u/%u/%u fork %u (request LSN %X/%08X): %u blocks",
 			 RelFileInfoFmt(InfoFromSMgrRel(reln)),
@@ -2004,17 +1796,10 @@ neon_dbsize(Oid dbNode)
 	neon_request_lsns request_lsns;
 	NRelFileInfo dummy_node = {0};

-	if (neon_use_communicator_worker)
-	{
-		db_size = communicator_new_dbsize(dbNode);
-	}
-	else
-	{
-		neon_get_request_lsns(dummy_node, MAIN_FORKNUM,
-							  REL_METADATA_PSEUDO_BLOCKNO, &request_lsns, 1);
+	neon_get_request_lsns(dummy_node, MAIN_FORKNUM,
+						  REL_METADATA_PSEUDO_BLOCKNO, &request_lsns, 1);

-		db_size = communicator_dbsize(dbNode, &request_lsns);
-	}
+	db_size = communicator_dbsize(dbNode, &request_lsns);

 	neon_log(SmgrTrace, "neon_dbsize: db %u (request LSN %X/%08X): %ld bytes",
 			 dbNode, LSN_FORMAT_ARGS(request_lsns.effective_request_lsn), db_size);
@@ -2028,6 +1813,8 @@ neon_dbsize(Oid dbNode)
 static void
 neon_truncate(SMgrRelation reln, ForkNumber forknum, BlockNumber old_blocks, BlockNumber nblocks)
 {
+	XLogRecPtr	lsn;
+
 	switch (reln->smgr_relpersistence)
 	{
 		case 0:
@@ -2051,45 +1838,34 @@ neon_truncate(SMgrRelation reln, ForkNumber forknum, BlockNumber old_blocks, Blo
 			neon_log(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
 	}

-	if (neon_use_communicator_worker)
-	{
-		XLogRecPtr	lsn = neon_get_write_lsn();
+	set_cached_relsize(InfoFromSMgrRel(reln), forknum, nblocks);

-		communicator_new_rel_truncate(InfoFromSMgrRel(reln), forknum, nblocks, lsn);
-	}
-	else
-	{
-		XLogRecPtr	lsn;
+	/*
+	 * Truncating a relation drops all its buffers from the buffer cache
+	 * without calling smgrwrite() on them. But we must account for that in
+	 * our tracking of last-written-LSN all the same: any future smgrnblocks()
+	 * request must return the new size after the truncation. We don't know
+	 * what the LSN of the truncation record was, so be conservative and use
+	 * the most recently inserted WAL record's LSN.
+	 */
+	lsn = GetXLogInsertRecPtr();
+	lsn = nm_adjust_lsn(lsn);

-		set_cached_relsize(InfoFromSMgrRel(reln), forknum, nblocks);
+	/*
+	 * Flush it, too. We don't actually care about it here, but let's uphold
+	 * the invariant that last-written LSN <= flush LSN.
+	 */
+	XLogFlush(lsn);

-		/*
-		 * Truncating a relation drops all its buffers from the buffer cache
-		 * without calling smgrwrite() on them. But we must account for that in
-		 * our tracking of last-written-LSN all the same: any future smgrnblocks()
-		 * request must return the new size after the truncation. We don't know
-		 * what the LSN of the truncation record was, so be conservative and use
-		 * the most recently inserted WAL record's LSN.
-		 */
-		lsn = GetXLogInsertRecPtr();
-		lsn = nm_adjust_lsn(lsn);
-
-		/*
-		 * Flush it, too. We don't actually care about it here, but let's uphold
-		 * the invariant that last-written LSN <= flush LSN.
-		 */
-		XLogFlush(lsn);
-
-		/*
-		 * Truncate may affect several chunks of relations. So we should either
-		 * update last written LSN for all of them, or update LSN for "dummy"
-		 * metadata block. Second approach seems more efficient. If the relation
-		 * is extended again later, the extension will update the last-written LSN
-		 * for the extended pages, so there's no harm in leaving behind obsolete
-		 * entries for the truncated chunks.
-		 */
-		neon_set_lwlsn_relation(lsn, InfoFromSMgrRel(reln), forknum);
-	}
+	/*
+	 * Truncate may affect several chunks of relations. So we should either
+	 * update last written LSN for all of them, or update LSN for "dummy"
+	 * metadata block. Second approach seems more efficient. If the relation
+	 * is extended again later, the extension will update the last-written LSN
+	 * for the extended pages, so there's no harm in leaving behind obsolete
+	 * entries for the truncated chunks.
+	 */
+	neon_set_lwlsn_relation(lsn, InfoFromSMgrRel(reln), forknum);

 	if (debug_compare_local)
 	{
@@ -2132,8 +1908,7 @@ neon_immedsync(SMgrRelation reln, ForkNumber forknum)

 	neon_log(SmgrTrace, "[NEON_SMGR] immedsync noop");

-	if (!neon_use_communicator_worker)
-		communicator_prefetch_pump_state();
+	communicator_prefetch_pump_state();

 	if (debug_compare_local)
 	{
@@ -2319,15 +2094,12 @@ neon_end_unlogged_build(SMgrRelation reln)
 		nblocks = mdnblocks(reln, MAIN_FORKNUM);
 		recptr = GetXLogInsertRecPtr();

-		if (!neon_use_communicator_worker)
-		{
-			neon_set_lwlsn_block_range(recptr,
-									   InfoFromNInfoB(rinfob),
-									   MAIN_FORKNUM, 0, nblocks);
-			neon_set_lwlsn_relation(recptr,
-									InfoFromNInfoB(rinfob),
-									MAIN_FORKNUM);
-		}
+		neon_set_lwlsn_block_range(recptr,
+								   InfoFromNInfoB(rinfob),
+								   MAIN_FORKNUM, 0, nblocks);
+		neon_set_lwlsn_relation(recptr,
+								InfoFromNInfoB(rinfob),
+								MAIN_FORKNUM);

 		/* Remove local copy */
 		for (int forknum = 0; forknum <= MAX_FORKNUM; forknum++)
@@ -2336,15 +2108,8 @@ neon_end_unlogged_build(SMgrRelation reln)
 				 RelFileInfoFmt(InfoFromNInfoB(rinfob)),
 				 forknum);

-			if (neon_use_communicator_worker)
-			{
-				communicator_new_update_cached_rel_size(InfoFromSMgrRel(reln), forknum, nblocks, recptr);
-			}
-			else
-			{
-				forget_cached_relsize(InfoFromNInfoB(rinfob), forknum);
-				lfc_invalidate(InfoFromNInfoB(rinfob), forknum, nblocks);
-			}
+			forget_cached_relsize(InfoFromNInfoB(rinfob), forknum);
+			lfc_invalidate(InfoFromNInfoB(rinfob), forknum, nblocks);

 			mdclose(reln, forknum);
 			if (!debug_compare_local)
@@ -2412,10 +2177,7 @@ neon_read_slru_segment(SMgrRelation reln, const char* path, int segno, void* buf
 	request_lsns.not_modified_since = not_modified_since;
 	request_lsns.effective_request_lsn = request_lsn;

-	if (neon_use_communicator_worker)
-		n_blocks = communicator_new_read_slru_segment(kind, (uint32_t)segno, &request_lsns, path);
-	else
-		n_blocks = communicator_read_slru_segment(kind, segno, &request_lsns, buffer);
+	n_blocks = communicator_read_slru_segment(kind, segno, &request_lsns, buffer);

 	return n_blocks;
 }
@@ -2452,8 +2214,7 @@ AtEOXact_neon(XactEvent event, void *arg)
 			}
 			break;
 	}
-	if (!neon_use_communicator_worker)
-		communicator_reconfigure_timeout_if_needed();
+	communicator_reconfigure_timeout_if_needed();
 }

 static const struct f_smgr neon_smgr =
@@ -2511,10 +2272,7 @@ smgr_init_neon(void)

 	smgr_init_standard();
 	neon_init();
-	if (neon_use_communicator_worker)
-		communicator_new_init();
-	else
-		communicator_init();
+	communicator_init();
 }


@@ -2526,16 +2284,6 @@ neon_extend_rel_size(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,
 	/* This is only used in WAL replay */
 	Assert(RecoveryInProgress());

-	if (neon_use_communicator_worker)
-	{
-		relsize = communicator_new_rel_nblocks(rinfo, forknum);
-
-		if (blkno >= relsize)
-			communicator_new_rel_zeroextend(rinfo, forknum, relsize, (blkno - relsize) + 1, end_recptr);
-
-		return;
-	}
-
 	/* Extend the relation if we know its size */
 	if (get_cached_relsize(rinfo, forknum, &relsize))
 	{
@@ -2690,27 +2438,18 @@ neon_redo_read_buffer_filter(XLogReaderState *record, uint8 block_id)
 	}

 	/*
-	 * We don't have the buffer in shared buffers. Check if it's in the LFC.
-	 * If it's not there either, update the lwLsn past this record.
+	 * we don't have the buffer in memory, update lwLsn past this record, also
+	 * evict page from file cache
 	 */
 	if (no_redo_needed)
 	{
-		bool		in_cache;
-
+		neon_set_lwlsn_block(end_recptr, rinfo, forknum, blkno);
 		/*
-		 * Redo changes if the page is present in the LFC.
+		 * Redo changes if page exists in LFC.
+		 * We should perform this check after assigning LwLSN to prevent
+		 * prefetching of some older version of the page by some other backend.
 		 */
-		if (neon_use_communicator_worker)
-		{
-			in_cache = communicator_new_update_lwlsn_for_block_if_not_cached(rinfo, forknum, blkno, end_recptr);
-		}
-		else
-		{
-			in_cache = lfc_cache_contains(rinfo, forknum, blkno);
-			neon_set_lwlsn_block(end_recptr, rinfo, forknum, blkno);
-		}
-
-		no_redo_needed = !in_cache;
+		no_redo_needed = !lfc_cache_contains(rinfo, forknum, blkno);
 	}

 	LWLockRelease(partitionLock);
--- a/pgxn/neon/relsize_cache.c
+++ b/pgxn/neon/relsize_cache.c
@@ -87,8 +87,6 @@ get_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber *size)
 {
 	bool		found = false;

-	Assert(!neon_use_communicator_worker);
-
 	if (relsize_hash_size > 0)
 	{
 		RelTag		tag;
@@ -120,8 +118,6 @@ get_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber *size)
 void
 set_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber size)
 {
-	Assert(!neon_use_communicator_worker);
-
 	if (relsize_hash_size > 0)
 	{
 		RelTag		tag;
@@ -170,8 +166,6 @@ set_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber size)
 void
 update_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber size)
 {
-	Assert(!neon_use_communicator_worker);
-
 	if (relsize_hash_size > 0)
 	{
 		RelTag		tag;
@@ -206,8 +200,6 @@ update_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber size)
 void
 forget_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum)
 {
-	Assert(!neon_use_communicator_worker);
-
 	if (relsize_hash_size > 0)
 	{
 		RelTag		tag;
--- a/proxy/src/serverless/backend.rs
+++ b/proxy/src/serverless/backend.rs
@@ -458,7 +458,7 @@ pub(crate) enum LocalProxyConnError {
 impl ReportableError for HttpConnError {
    fn get_error_kind(&self) -> ErrorKind {
        match self {
-            HttpConnError::ConnectError(_) => ErrorKind::Compute,
+            HttpConnError::ConnectError(e) => e.get_error_kind(),
            HttpConnError::ConnectionClosedAbruptly(_) => ErrorKind::Compute,
            HttpConnError::PostgresConnectionError(p) => match p.as_db_error() {
                // user provided a wrong database name
--- a/proxy/src/serverless/rest.rs
+++ b/proxy/src/serverless/rest.rs
@@ -5,12 +5,17 @@ use std::sync::Arc;

 use bytes::Bytes;
 use http::Method;
-use http::header::{AUTHORIZATION, CONTENT_TYPE, HOST};
+use http::header::{
+    ACCESS_CONTROL_ALLOW_HEADERS, ACCESS_CONTROL_ALLOW_METHODS, ACCESS_CONTROL_ALLOW_ORIGIN,
+    ACCESS_CONTROL_EXPOSE_HEADERS, ACCESS_CONTROL_MAX_AGE, ACCESS_CONTROL_REQUEST_HEADERS, ALLOW,
+    AUTHORIZATION, CONTENT_TYPE, HOST, ORIGIN,
+};
 use http_body_util::combinators::BoxBody;
-use http_body_util::{BodyExt, Full};
+use http_body_util::{BodyExt, Empty, Full};
 use http_utils::error::ApiError;
 use hyper::body::Incoming;
-use hyper::http::{HeaderName, HeaderValue};
+use hyper::http::response::Builder;
+use hyper::http::{HeaderMap, HeaderName, HeaderValue};
 use hyper::{Request, Response, StatusCode};
 use indexmap::IndexMap;
 use moka::sync::Cache;
@@ -67,6 +72,15 @@ use crate::util::deserialize_json_string;

 static EMPTY_JSON_SCHEMA: &str = r#"{"schemas":[]}"#;
 const INTROSPECTION_SQL: &str = POSTGRESQL_INTROSPECTION_SQL;
+const HEADER_VALUE_ALLOW_ALL_ORIGINS: HeaderValue = HeaderValue::from_static("*");
+// CORS headers values
+const ACCESS_CONTROL_ALLOW_METHODS_VALUE: HeaderValue =
+    HeaderValue::from_static("GET, POST, PATCH, PUT, DELETE, OPTIONS");
+const ACCESS_CONTROL_MAX_AGE_VALUE: HeaderValue = HeaderValue::from_static("86400");
+const ACCESS_CONTROL_EXPOSE_HEADERS_VALUE: HeaderValue = HeaderValue::from_static(
+    "Content-Encoding, Content-Location, Content-Range, Content-Type, Date, Location, Server, Transfer-Encoding, Range-Unit",
+);
+const ACCESS_CONTROL_ALLOW_HEADERS_VALUE: HeaderValue = HeaderValue::from_static("Authorization");

 // A wrapper around the DbSchema that allows for self-referencing
 #[self_referencing]
@@ -137,6 +151,8 @@ pub struct ApiConfig {
    pub role_claim_key: String,
    #[serde(default, deserialize_with = "deserialize_comma_separated_option")]
    pub db_extra_search_path: Option<Vec<String>>,
+    #[serde(default, deserialize_with = "deserialize_comma_separated_option")]
+    pub server_cors_allowed_origins: Option<Vec<String>>,
 }

 // The DbSchemaCache is a cache of the ApiConfig and DbSchemaOwned for each endpoint
@@ -165,7 +181,13 @@ impl DbSchemaCache {
        }
    }

-    pub async fn get_cached_or_remote(
+    pub fn get_cached(
+        &self,
+        endpoint_id: &EndpointCacheKey,
+    ) -> Option<Arc<(ApiConfig, DbSchemaOwned)>> {
+        count_cache_outcome(CacheKind::Schema, self.0.get(endpoint_id))
+    }
+    pub async fn get_remote(
        &self,
        endpoint_id: &EndpointCacheKey,
        auth_header: &HeaderValue,
@@ -174,47 +196,42 @@ impl DbSchemaCache {
        ctx: &RequestContext,
        config: &'static ProxyConfig,
    ) -> Result<Arc<(ApiConfig, DbSchemaOwned)>, RestError> {
-        let cache_result = count_cache_outcome(CacheKind::Schema, self.0.get(endpoint_id));
-        match cache_result {
-            Some(v) => Ok(v),
-            None => {
-                info!("db_schema cache miss for endpoint: {:?}", endpoint_id);
-                let remote_value = self
-                    .get_remote(auth_header, connection_string, client, ctx, config)
-                    .await;
-                let (api_config, schema_owned) = match remote_value {
-                    Ok((api_config, schema_owned)) => (api_config, schema_owned),
-                    Err(e @ RestError::SchemaTooLarge) => {
-                        // for the case where the schema is too large, we cache an empty dummy value
-                        // all the other requests will fail without triggering the introspection query
-                        let schema_owned = serde_json::from_str::<DbSchemaOwned>(EMPTY_JSON_SCHEMA)
-                            .map_err(|e| JsonDeserialize { source: e })?;
+        info!("db_schema cache miss for endpoint: {:?}", endpoint_id);
+        let remote_value = self
+            .internal_get_remote(auth_header, connection_string, client, ctx, config)
+            .await;
+        let (api_config, schema_owned) = match remote_value {
+            Ok((api_config, schema_owned)) => (api_config, schema_owned),
+            Err(e @ RestError::SchemaTooLarge) => {
+                // for the case where the schema is too large, we cache an empty dummy value
+                // all the other requests will fail without triggering the introspection query
+                let schema_owned = serde_json::from_str::<DbSchemaOwned>(EMPTY_JSON_SCHEMA)
+                    .map_err(|e| JsonDeserialize { source: e })?;

-                        let api_config = ApiConfig {
-                            db_schemas: vec![],
-                            db_anon_role: None,
-                            db_max_rows: None,
-                            db_allowed_select_functions: vec![],
-                            role_claim_key: String::new(),
-                            db_extra_search_path: None,
-                        };
-                        let value = Arc::new((api_config, schema_owned));
-                        count_cache_insert(CacheKind::Schema);
-                        self.0.insert(endpoint_id.clone(), value);
-                        return Err(e);
-                    }
-                    Err(e) => {
-                        return Err(e);
-                    }
+                let api_config = ApiConfig {
+                    db_schemas: vec![],
+                    db_anon_role: None,
+                    db_max_rows: None,
+                    db_allowed_select_functions: vec![],
+                    role_claim_key: String::new(),
+                    db_extra_search_path: None,
+                    server_cors_allowed_origins: None,
                };
                let value = Arc::new((api_config, schema_owned));
                count_cache_insert(CacheKind::Schema);
-                self.0.insert(endpoint_id.clone(), value.clone());
-                Ok(value)
+                self.0.insert(endpoint_id.clone(), value);
+                return Err(e);
            }
-        }
+            Err(e) => {
+                return Err(e);
+            }
+        };
+        let value = Arc::new((api_config, schema_owned));
+        count_cache_insert(CacheKind::Schema);
+        self.0.insert(endpoint_id.clone(), value.clone());
+        Ok(value)
    }
-    pub async fn get_remote(
+    async fn internal_get_remote(
        &self,
        auth_header: &HeaderValue,
        connection_string: &str,
@@ -531,7 +548,7 @@ pub(crate) async fn handle(
 ) -> Result<Response<BoxBody<Bytes, hyper::Error>>, ApiError> {
    let result = handle_inner(cancel, config, &ctx, request, backend).await;

-    let mut response = match result {
+    let response = match result {
        Ok(r) => {
            ctx.set_success();

@@ -640,9 +657,6 @@ pub(crate) async fn handle(
        }
    };

-    response
-        .headers_mut()
-        .insert("Access-Control-Allow-Origin", HeaderValue::from_static("*"));
    Ok(response)
 }

@@ -722,6 +736,37 @@ async fn handle_inner(
    }
 }

+fn apply_common_cors_headers(
+    response: &mut Builder,
+    request_headers: &HeaderMap,
+    allowed_origins: Option<&Vec<String>>,
+) {
+    let request_origin = request_headers
+        .get(ORIGIN)
+        .map(|v| v.to_str().unwrap_or(""));
+
+    let response_allow_origin = match (request_origin, allowed_origins) {
+        (Some(or), Some(allowed_origins)) => {
+            if allowed_origins.iter().any(|o| o == or) {
+                Some(HeaderValue::from_str(or).unwrap_or(HEADER_VALUE_ALLOW_ALL_ORIGINS))
+            } else {
+                None
+            }
+        }
+        (Some(_), None) => Some(HEADER_VALUE_ALLOW_ALL_ORIGINS),
+        _ => None,
+    };
+    if let Some(h) = response.headers_mut() {
+        h.insert(
+            ACCESS_CONTROL_EXPOSE_HEADERS,
+            ACCESS_CONTROL_EXPOSE_HEADERS_VALUE,
+        );
+        if let Some(origin) = response_allow_origin {
+            h.insert(ACCESS_CONTROL_ALLOW_ORIGIN, origin);
+        }
+    }
+}
+
 #[allow(clippy::too_many_arguments)]
 async fn handle_rest_inner(
    config: &'static ProxyConfig,
@@ -733,12 +778,6 @@ async fn handle_rest_inner(
    jwt: String,
    backend: Arc<PoolingBackend>,
 ) -> Result<Response<BoxBody<Bytes, hyper::Error>>, RestError> {
-    // validate the jwt token
-    let jwt_parsed = backend
-        .authenticate_with_jwt(ctx, &conn_info.user_info, jwt)
-        .await
-        .map_err(HttpConnError::from)?;
-
    let db_schema_cache =
        config
            .rest_config
@@ -754,28 +793,83 @@ async fn handle_rest_inner(
            message: "Failed to get endpoint cache key".to_string(),
        }))?;

-    let mut client = backend.connect_to_local_proxy(ctx, conn_info).await?;
-
    let (parts, originial_body) = request.into_parts();

+    // try and get the cached entry for this endpoint
+    // it contains the api config and the introspected db schema
+    let cached_entry = db_schema_cache.get_cached(&endpoint_cache_key);
+
+    let allowed_origins = cached_entry
+        .as_ref()
+        .and_then(|arc| arc.0.server_cors_allowed_origins.as_ref());
+
+    let mut response = Response::builder();
+    apply_common_cors_headers(&mut response, &parts.headers, allowed_origins);
+
+    // handle the OPTIONS request
+    if parts.method == Method::OPTIONS {
+        let allowed_headers = parts
+            .headers
+            .get(ACCESS_CONTROL_REQUEST_HEADERS)
+            .and_then(|a| a.to_str().ok())
+            .filter(|v| !v.is_empty())
+            .map_or_else(
+                || "Authorization".to_string(),
+                |v| format!("{v}, Authorization"),
+            );
+        return response
+            .status(StatusCode::OK)
+            .header(
+                ACCESS_CONTROL_ALLOW_METHODS,
+                ACCESS_CONTROL_ALLOW_METHODS_VALUE,
+            )
+            .header(ACCESS_CONTROL_MAX_AGE, ACCESS_CONTROL_MAX_AGE_VALUE)
+            .header(
+                ACCESS_CONTROL_ALLOW_HEADERS,
+                HeaderValue::from_str(&allowed_headers)
+                    .unwrap_or(ACCESS_CONTROL_ALLOW_HEADERS_VALUE),
+            )
+            .header(ALLOW, ACCESS_CONTROL_ALLOW_METHODS_VALUE)
+            .body(Empty::new().map_err(|x| match x {}).boxed())
+            .map_err(|e| {
+                RestError::SubzeroCore(InternalError {
+                    message: e.to_string(),
+                })
+            });
+    }
+
+    // validate the jwt token
+    let jwt_parsed = backend
+        .authenticate_with_jwt(ctx, &conn_info.user_info, jwt)
+        .await
+        .map_err(HttpConnError::from)?;
+
    let auth_header = parts
        .headers
        .get(AUTHORIZATION)
        .ok_or(RestError::SubzeroCore(InternalError {
            message: "Authorization header is required".to_string(),
        }))?;
+    let mut client = backend.connect_to_local_proxy(ctx, conn_info).await?;

-    let entry = db_schema_cache
-        .get_cached_or_remote(
-            &endpoint_cache_key,
-            auth_header,
-            connection_string,
-            &mut client,
-            ctx,
-            config,
-        )
-        .await?;
+    let entry = match cached_entry {
+        Some(e) => e,
+        None => {
+            // if not cached, get the remote entry (will run the introspection query)
+            db_schema_cache
+                .get_remote(
+                    &endpoint_cache_key,
+                    auth_header,
+                    connection_string,
+                    &mut client,
+                    ctx,
+                    config,
+                )
+                .await?
+        }
+    };
    let (api_config, db_schema_owned) = entry.as_ref();
+
    let db_schema = db_schema_owned.borrow_schema();

    let db_schemas = &api_config.db_schemas; // list of schemas available for the api
@@ -999,8 +1093,8 @@ async fn handle_rest_inner(
    let _metrics = client.metrics(ctx); // FIXME: is everything in the context set correctly?

    // send the request to the local proxy
-    let response = make_raw_local_proxy_request(&mut client, headers, req_body).await?;
-    let (parts, body) = response.into_parts();
+    let proxy_response = make_raw_local_proxy_request(&mut client, headers, req_body).await?;
+    let (response_parts, body) = proxy_response.into_parts();

    let max_response = config.http_config.max_response_size_bytes;
    let bytes = read_body_with_limit(body, max_response)
@@ -1009,7 +1103,7 @@ async fn handle_rest_inner(

    // if the response status is greater than 399, then it is an error
    // FIXME: check if there are other error codes or shapes of the response
-    if parts.status.as_u16() > 399 {
+    if response_parts.status.as_u16() > 399 {
        // turn this postgres error from the json into PostgresError
        let postgres_error = serde_json::from_slice(&bytes)
            .map_err(|e| RestError::SubzeroCore(JsonDeserialize { source: e }))?;
@@ -1175,7 +1269,7 @@ async fn handle_rest_inner(
        .boxed();

    // build the response
-    let mut response = Response::builder()
+    response = response
        .status(StatusCode::from_u16(status).unwrap_or(StatusCode::INTERNAL_SERVER_ERROR))
        .header(CONTENT_TYPE, http_content_type);

--- a/safekeeper/src/pull_timeline.rs
+++ b/safekeeper/src/pull_timeline.rs
@@ -612,19 +612,25 @@ pub async fn handle_request(
        }
    }

+    let max_term = statuses
+        .iter()
+        .map(|(status, _)| status.acceptor_state.term)
+        .max()
+        .unwrap();
+
    // Find the most advanced safekeeper
    let (status, i) = statuses
        .into_iter()
        .max_by_key(|(status, _)| {
            (
                status.acceptor_state.epoch,
+                status.flush_lsn,
                /* BEGIN_HADRON */
                // We need to pull from the SK with the highest term.
                // This is because another compute may come online and vote the same highest term again on the other two SKs.
                // Then, there will be 2 computes running on the same term.
                status.acceptor_state.term,
                /* END_HADRON */
-                status.flush_lsn,
                status.commit_lsn,
            )
        })
@@ -634,6 +640,22 @@ pub async fn handle_request(
    assert!(status.tenant_id == request.tenant_id);
    assert!(status.timeline_id == request.timeline_id);

+    // TODO(diko): This is hadron only check to make sure that we pull the timeline
+    // from the safekeeper with the highest term during timeline restore.
+    // We could avoid returning the error by calling bump_term after pull_timeline.
+    // However, this is not a big deal because we retry the pull_timeline requests.
+    // The check should be removed together with removing custom hadron logic for
+    // safekeeper restore.
+    if wait_for_peer_timeline_status && status.acceptor_state.term != max_term {
+        return Err(ApiError::PreconditionFailed(
+            format!(
+                "choosen safekeeper {} has term {}, but the most advanced term is {}",
+                safekeeper_host, status.acceptor_state.term, max_term
+            )
+            .into(),
+        ));
+    }
+
    match pull_timeline(
        status,
        safekeeper_host,
--- a/safekeeper/src/timeline.rs
+++ b/safekeeper/src/timeline.rs
@@ -195,12 +195,14 @@ impl StateSK {
        to: Configuration,
    ) -> Result<TimelineMembershipSwitchResponse> {
        let result = self.state_mut().membership_switch(to).await?;
+        let flush_lsn = self.flush_lsn();
+        let last_log_term = self.state().acceptor_state.get_last_log_term(flush_lsn);

        Ok(TimelineMembershipSwitchResponse {
            previous_conf: result.previous_conf,
            current_conf: result.current_conf,
-            last_log_term: self.state().acceptor_state.term,
-            flush_lsn: self.flush_lsn(),
+            last_log_term,
+            flush_lsn,
        })
    }

--- a/storage_controller/src/compute_hook.rs
+++ b/storage_controller/src/compute_hook.rs
@@ -510,7 +510,6 @@ impl ApiMethod for ComputeHookTenant {
                tracing::info!("Reconfiguring pageservers for endpoint {endpoint_name}");

                let shard_count = match shards.len() {
-                    0 => panic!("no shards"),
                    1 => ShardCount::unsharded(),
                    n => ShardCount(n.try_into().expect("too many shards")),
                };
--- a/storage_controller/src/http.rs
+++ b/storage_controller/src/http.rs
@@ -644,6 +644,7 @@ async fn handle_tenant_timeline_safekeeper_migrate(
    req: Request<Body>,
 ) -> Result<Response<Body>, ApiError> {
    let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
+    // TODO(diko): it's not PS operation, there should be a different permission scope.
    check_permissions(&req, Scope::PageServerApi)?;
    maybe_rate_limit(&req, tenant_id).await;

@@ -665,6 +666,23 @@ async fn handle_tenant_timeline_safekeeper_migrate(
    json_response(StatusCode::OK, ())
 }

+async fn handle_tenant_timeline_safekeeper_migrate_abort(
+    service: Arc<Service>,
+    req: Request<Body>,
+) -> Result<Response<Body>, ApiError> {
+    let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
+    let timeline_id: TimelineId = parse_request_param(&req, "timeline_id")?;
+    // TODO(diko): it's not PS operation, there should be a different permission scope.
+    check_permissions(&req, Scope::PageServerApi)?;
+    maybe_rate_limit(&req, tenant_id).await;
+
+    service
+        .tenant_timeline_safekeeper_migrate_abort(tenant_id, timeline_id)
+        .await?;
+
+    json_response(StatusCode::OK, ())
+}
+
 async fn handle_tenant_timeline_lsn_lease(
    service: Arc<Service>,
    req: Request<Body>,
@@ -2611,6 +2629,16 @@ pub fn make_router(
                )
            },
        )
+        .post(
+            "/v1/tenant/:tenant_id/timeline/:timeline_id/safekeeper_migrate_abort",
+            |r| {
+                tenant_service_handler(
+                    r,
+                    handle_tenant_timeline_safekeeper_migrate_abort,
+                    RequestName("v1_tenant_timeline_safekeeper_migrate_abort"),
+                )
+            },
+        )
        // LSN lease passthrough to all shards
        .post(
            "/v1/tenant/:tenant_id/timeline/:timeline_id/lsn_lease",
--- a/storage_controller/src/service/safekeeper_service.rs
+++ b/storage_controller/src/service/safekeeper_service.rs
@@ -24,12 +24,12 @@ use pageserver_api::controller_api::{
 };
 use pageserver_api::models::{SafekeeperInfo, SafekeepersInfo, TimelineInfo};
 use safekeeper_api::PgVersionId;
+use safekeeper_api::Term;
 use safekeeper_api::membership::{self, MemberSet, SafekeeperGeneration};
 use safekeeper_api::models::{
    PullTimelineRequest, TimelineLocateResponse, TimelineMembershipSwitchRequest,
    TimelineMembershipSwitchResponse,
 };
-use safekeeper_api::{INITIAL_TERM, Term};
 use safekeeper_client::mgmt_api;
 use tokio::task::JoinSet;
 use tokio_util::sync::CancellationToken;
@@ -1230,10 +1230,7 @@ impl Service {
            }
            // It it is the same new_sk_set, we can continue the migration (retry).
        } else {
-            let prev_finished = timeline.cplane_notified_generation == timeline.generation
-                && timeline.sk_set_notified_generation == timeline.generation;
-
-            if !prev_finished {
+            if !is_migration_finished(&timeline) {
                // The previous migration is committed, but the finish step failed.
                // Safekeepers/cplane might not know about the last membership configuration.
                // Retry the finish step to ensure smooth migration.
@@ -1298,13 +1295,7 @@ impl Service {
            )
            .await?;

-        let mut sync_position = (INITIAL_TERM, Lsn::INVALID);
-        for res in results.into_iter().flatten() {
-            let sk_position = (res.last_log_term, res.flush_lsn);
-            if sync_position < sk_position {
-                sync_position = sk_position;
-            }
-        }
+        let sync_position = Self::get_sync_position(&results)?;

        tracing::info!(
            %generation,
@@ -1551,6 +1542,8 @@ impl Service {
        timeline_id: TimelineId,
        timeline: &TimelinePersistence,
    ) -> Result<(), ApiError> {
+        tracing::info!(generation=?timeline.generation, sk_set=?timeline.sk_set, new_sk_set=?timeline.new_sk_set, "retrying finish safekeeper migration");
+
        if timeline.new_sk_set.is_some() {
            // Logical error, should never happen.
            return Err(ApiError::InternalServerError(anyhow::anyhow!(
@@ -1598,4 +1591,152 @@ impl Service {

        Ok(())
    }
+
+    /// Get membership switch responses from all safekeepers and return the sync position.
+    ///
+    /// Sync position is a position equal or greater than the commit position.
+    /// It is guaranteed that all WAL entries with (last_log_term, flush_lsn)
+    /// greater than the sync position are not committed (= not on a quorum).
+    ///
+    /// Returns error if there is no quorum of successful responses.
+    fn get_sync_position(
+        responses: &[mgmt_api::Result<TimelineMembershipSwitchResponse>],
+    ) -> Result<(Term, Lsn), ApiError> {
+        let quorum_size = responses.len() / 2 + 1;
+
+        let mut wal_positions = responses
+            .iter()
+            .flatten()
+            .map(|res| (res.last_log_term, res.flush_lsn))
+            .collect::<Vec<_>>();
+
+        // Should be already checked if the responses are from tenant_timeline_set_membership_quorum.
+        if wal_positions.len() < quorum_size {
+            return Err(ApiError::InternalServerError(anyhow::anyhow!(
+                "not enough successful responses to get sync position: {}/{}",
+                wal_positions.len(),
+                quorum_size,
+            )));
+        }
+
+        wal_positions.sort();
+
+        Ok(wal_positions[quorum_size - 1])
+    }
+
+    /// Abort ongoing safekeeper migration.
+    pub(crate) async fn tenant_timeline_safekeeper_migrate_abort(
+        self: &Arc<Self>,
+        tenant_id: TenantId,
+        timeline_id: TimelineId,
+    ) -> Result<(), ApiError> {
+        // TODO(diko): per-tenant lock is too wide. Consider introducing per-timeline locks.
+        let _tenant_lock = trace_shared_lock(
+            &self.tenant_op_locks,
+            tenant_id,
+            TenantOperations::TimelineSafekeeperMigrate,
+        )
+        .await;
+
+        // Fetch current timeline configuration from the configuration storage.
+        let timeline = self
+            .persistence
+            .get_timeline(tenant_id, timeline_id)
+            .await?;
+
+        let Some(timeline) = timeline else {
+            return Err(ApiError::NotFound(
+                anyhow::anyhow!(
+                    "timeline {tenant_id}/{timeline_id} doesn't exist in timelines table"
+                )
+                .into(),
+            ));
+        };
+
+        let mut generation = SafekeeperGeneration::new(timeline.generation as u32);
+
+        let Some(new_sk_set) = &timeline.new_sk_set else {
+            // No new_sk_set -> no active migration that we can abort.
+            tracing::info!("timeline has no active migration");
+
+            if !is_migration_finished(&timeline) {
+                // The last migration is committed, but the finish step failed.
+                // Safekeepers/cplane might not know about the last membership configuration.
+                // Retry the finish step to make the timeline state clean.
+                self.finish_safekeeper_migration_retry(tenant_id, timeline_id, &timeline)
+                    .await?;
+            }
+            return Ok(());
+        };
+
+        tracing::info!(sk_set=?timeline.sk_set, ?new_sk_set, ?generation, "aborting timeline migration");
+
+        let cur_safekeepers = self.get_safekeepers(&timeline.sk_set)?;
+        let new_safekeepers = self.get_safekeepers(new_sk_set)?;
+
+        let cur_sk_member_set =
+            Self::make_member_set(&cur_safekeepers).map_err(ApiError::InternalServerError)?;
+
+        // Increment current generation and remove new_sk_set from the timeline to abort the migration.
+        generation = generation.next();
+
+        let mconf = membership::Configuration {
+            generation,
+            members: cur_sk_member_set,
+            new_members: None,
+        };
+
+        // Exclude safekeepers which were added during the current migration.
+        let cur_ids: HashSet<NodeId> = cur_safekeepers.iter().map(|sk| sk.get_id()).collect();
+        let exclude_safekeepers = new_safekeepers
+            .into_iter()
+            .filter(|sk| !cur_ids.contains(&sk.get_id()))
+            .collect::<Vec<_>>();
+
+        let exclude_requests = exclude_safekeepers
+            .iter()
+            .map(|sk| TimelinePendingOpPersistence {
+                sk_id: sk.skp.id,
+                tenant_id: tenant_id.to_string(),
+                timeline_id: timeline_id.to_string(),
+                generation: generation.into_inner() as i32,
+                op_kind: SafekeeperTimelineOpKind::Exclude,
+            })
+            .collect::<Vec<_>>();
+
+        let cur_sk_set = cur_safekeepers
+            .iter()
+            .map(|sk| sk.get_id())
+            .collect::<Vec<_>>();
+
+        // Persist new mconf and exclude requests.
+        self.persistence
+            .update_timeline_membership(
+                tenant_id,
+                timeline_id,
+                generation,
+                &cur_sk_set,
+                None,
+                &exclude_requests,
+            )
+            .await?;
+
+        // At this point we have already commited the abort, but still need to notify
+        // cplane/safekeepers with the new mconf. That's what finish_safekeeper_migration does.
+        self.finish_safekeeper_migration(
+            tenant_id,
+            timeline_id,
+            &cur_safekeepers,
+            &mconf,
+            &exclude_safekeepers,
+        )
+        .await?;
+
+        Ok(())
+    }
+}
+
+fn is_migration_finished(timeline: &TimelinePersistence) -> bool {
+    timeline.cplane_notified_generation == timeline.generation
+        && timeline.sk_set_notified_generation == timeline.generation
 }
--- a/test_runner/fixtures/endpoint/http.py
+++ b/test_runner/fixtures/endpoint/http.py
@@ -78,20 +78,26 @@ class EndpointHttpClient(requests.Session):
        json: dict[str, str] = res.json()
        return json

-    def prewarm_lfc(self, from_endpoint_id: str | None = None):
+    def prewarm_lfc(self, from_endpoint_id: str | None = None) -> dict[str, str]:
        """
        Prewarm LFC cache from given endpoint and wait till it finishes or errors
        """
        params = {"from_endpoint": from_endpoint_id} if from_endpoint_id else dict()
        self.post(self.prewarm_url, params=params).raise_for_status()
-        self.prewarm_lfc_wait()
+        return self.prewarm_lfc_wait()

-    def prewarm_lfc_wait(self):
+    def cancel_prewarm_lfc(self):
+        """
+        Cancel LFC prewarm if any is ongoing
+        """
+        self.delete(self.prewarm_url).raise_for_status()
+
+    def prewarm_lfc_wait(self) -> dict[str, str]:
        """
        Wait till LFC prewarm returns with error or success.
        If prewarm was not requested before calling this function, it will error
        """
-        statuses = "failed", "completed", "skipped"
+        statuses = "failed", "completed", "skipped", "cancelled"

        def prewarmed():
            json = self.prewarm_lfc_status()
@@ -101,6 +107,7 @@ class EndpointHttpClient(requests.Session):
        wait_until(prewarmed, timeout=60)
        res = self.prewarm_lfc_status()
        assert res["status"] != "failed", res
+        return res

    def offload_lfc_status(self) -> dict[str, str]:
        res = self.get(self.offload_url)
@@ -108,29 +115,31 @@ class EndpointHttpClient(requests.Session):
        json: dict[str, str] = res.json()
        return json

-    def offload_lfc(self):
+    def offload_lfc(self) -> dict[str, str]:
        """
        Offload LFC cache to endpoint storage and wait till offload finishes or errors
        """
        self.post(self.offload_url).raise_for_status()
-        self.offload_lfc_wait()
+        return self.offload_lfc_wait()

-    def offload_lfc_wait(self):
+    def offload_lfc_wait(self) -> dict[str, str]:
        """
        Wait till LFC offload returns with error or success.
        If offload was not requested before calling this function, it will error
        """
+        statuses = "failed", "completed", "skipped"

        def offloaded():
            json = self.offload_lfc_status()
            status, err = json["status"], json.get("error")
-            assert status in ["failed", "completed"], f"{status}, {err=}"
+            assert status in statuses, f"{status}, {err=}"

        wait_until(offloaded, timeout=60)
        res = self.offload_lfc_status()
        assert res["status"] != "failed", res
+        return res

-    def promote(self, promote_spec: dict[str, Any], disconnect: bool = False):
+    def promote(self, promote_spec: dict[str, Any], disconnect: bool = False) -> dict[str, str]:
        url = f"http://localhost:{self.external_port}/promote"
        if disconnect:
            try:  # send first request to start promote and disconnect
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -2313,6 +2313,7 @@ class NeonStorageController(MetricsGetter, LogUtils):
        timeline_id: TimelineId,
        new_sk_set: list[int],
    ):
+        log.info(f"migrate_safekeepers({tenant_id}, {timeline_id}, {new_sk_set})")
        response = self.request(
            "POST",
            f"{self.api}/v1/tenant/{tenant_id}/timeline/{timeline_id}/safekeeper_migrate",
@@ -2322,6 +2323,19 @@ class NeonStorageController(MetricsGetter, LogUtils):
        response.raise_for_status()
        log.info(f"migrate_safekeepers success: {response.json()}")

+    def abort_safekeeper_migration(
+        self,
+        tenant_id: TenantId,
+        timeline_id: TimelineId,
+    ):
+        response = self.request(
+            "POST",
+            f"{self.api}/v1/tenant/{tenant_id}/timeline/{timeline_id}/safekeeper_migrate_abort",
+            headers=self.headers(TokenScope.PAGE_SERVER_API),
+        )
+        response.raise_for_status()
+        log.info(f"abort_safekeeper_migration success: {response.json()}")
+
    def locate(self, tenant_id: TenantId) -> list[dict[str, Any]]:
        """
        :return: list of {"shard_id": "", "node_id": int, "listen_pg_addr": str, "listen_pg_port": int, "listen_http_addr": str, "listen_http_port": int}
@@ -4777,18 +4791,7 @@ class Endpoint(PgProtocol, LogUtils):

        # set small 'max_replication_write_lag' to enable backpressure
        # and make tests more stable.
-        config_lines += ["max_replication_write_lag=15MB"]
-
-        # If gRPC is enabled, use the new communicator too.
-        #
-        # NB: the communicator is enabled by default, so force it to false otherwise.
-        #
-        # XXX: By checking for None, we enable the new communicator for all tests
-        # by default
-        if grpc or grpc is None:
-            config_lines += ["neon.use_communicator_worker=on"]
-        else:
-            config_lines += ["neon.use_communicator_worker=off"]
+        config_lines = ["max_replication_write_lag=15MB"] + config_lines

        # Delete file cache if it exists (and we're recreating the endpoint)
        if USE_LFC:
--- a/test_runner/fixtures/pageserver/allowed_errors.py
+++ b/test_runner/fixtures/pageserver/allowed_errors.py
@@ -90,8 +90,6 @@ DEFAULT_PAGESERVER_ALLOWED_ERRORS = (
    # During shutdown, DownloadError::Cancelled may be logged as an error.  Cleaning this
    # up is tracked in https://github.com/neondatabase/neon/issues/6096
    ".*Cancelled, shutting down.*",
-    # gRPC request failures during shutdown.
-    ".*grpc:pageservice.*request failed with Unavailable: timeline is shutting down.*",
    # Open layers are only rolled at Lsn boundaries to avoid name clashses.
    # Hence, we can overshoot the soft limit set by checkpoint distance.
    # This is especially pronounced in tests that set small checkpoint
--- a/test_runner/regress/test_lfc_prewarm.py
+++ b/test_runner/regress/test_lfc_prewarm.py
@@ -1,6 +1,6 @@
 import random
-import threading
 from enum import StrEnum
+from threading import Thread
 from time import sleep
 from typing import Any

@@ -47,19 +47,23 @@ def offload_lfc(method: PrewarmMethod, client: EndpointHttpClient, cur: Cursor)
        # With autoprewarm, we need to be sure LFC was offloaded after all writes
        # finish, so we sleep. Otherwise we'll have less prewarmed pages than we want
        sleep(AUTOOFFLOAD_INTERVAL_SECS)
-        client.offload_lfc_wait()
-        return
+        offload_res = client.offload_lfc_wait()
+        log.info(offload_res)
+        return offload_res

    if method == PrewarmMethod.COMPUTE_CTL:
        status = client.prewarm_lfc_status()
        assert status["status"] == "not_prewarmed"
        assert "error" not in status
-        client.offload_lfc()
+        offload_res = client.offload_lfc()
+        log.info(offload_res)
        assert client.prewarm_lfc_status()["status"] == "not_prewarmed"
+
        parsed = prom_parse(client)
        desired = {OFFLOAD_LABEL: 1, PREWARM_LABEL: 0, OFFLOAD_ERR_LABEL: 0, PREWARM_ERR_LABEL: 0}
        assert parsed == desired, f"{parsed=} != {desired=}"
-        return
+
+        return offload_res

    raise AssertionError(f"{method} not in PrewarmMethod")

@@ -68,21 +72,30 @@ def prewarm_endpoint(
    method: PrewarmMethod, client: EndpointHttpClient, cur: Cursor, lfc_state: str | None
 ):
    if method == PrewarmMethod.AUTOPREWARM:
-        client.prewarm_lfc_wait()
+        prewarm_res = client.prewarm_lfc_wait()
+        log.info(prewarm_res)
    elif method == PrewarmMethod.COMPUTE_CTL:
-        client.prewarm_lfc()
+        prewarm_res = client.prewarm_lfc()
+        log.info(prewarm_res)
+        return prewarm_res
    elif method == PrewarmMethod.POSTGRES:
        cur.execute("select neon.prewarm_local_cache(%s)", (lfc_state,))


-def check_prewarmed(
+def check_prewarmed_contains(
    method: PrewarmMethod, client: EndpointHttpClient, desired_status: dict[str, str | int]
 ):
    if method == PrewarmMethod.AUTOPREWARM:
-        assert client.prewarm_lfc_status() == desired_status
+        prewarm_status = client.prewarm_lfc_status()
+        for k in desired_status:
+            assert desired_status[k] == prewarm_status[k]
+
        assert prom_parse(client)[PREWARM_LABEL] == 1
    elif method == PrewarmMethod.COMPUTE_CTL:
-        assert client.prewarm_lfc_status() == desired_status
+        prewarm_status = client.prewarm_lfc_status()
+        for k in desired_status:
+            assert desired_status[k] == prewarm_status[k]
+
        desired = {OFFLOAD_LABEL: 0, PREWARM_LABEL: 1, PREWARM_ERR_LABEL: 0, OFFLOAD_ERR_LABEL: 0}
        assert prom_parse(client) == desired

@@ -149,9 +162,6 @@ def test_lfc_prewarm(neon_simple_env: NeonEnv, method: PrewarmMethod):
    log.info(f"Used LFC size: {lfc_used_pages}")
    pg_cur.execute("select * from neon.get_prewarm_info()")
    total, prewarmed, skipped, _ = pg_cur.fetchall()[0]
-    log.info(f"Prewarm info: {total=} {prewarmed=} {skipped=}")
-    progress = (prewarmed + skipped) * 100 // total
-    log.info(f"Prewarm progress: {progress}%")
    assert lfc_used_pages > 10000
    assert total > 0
    assert prewarmed > 0
@@ -161,7 +171,54 @@ def test_lfc_prewarm(neon_simple_env: NeonEnv, method: PrewarmMethod):
    assert lfc_cur.fetchall()[0][0] == n_records * (n_records + 1) / 2

    desired = {"status": "completed", "total": total, "prewarmed": prewarmed, "skipped": skipped}
-    check_prewarmed(method, client, desired)
+    check_prewarmed_contains(method, client, desired)
+
+
+@pytest.mark.skipif(not USE_LFC, reason="LFC is disabled, skipping")
+def test_lfc_prewarm_cancel(neon_simple_env: NeonEnv):
+    """
+    Test we can cancel LFC prewarm and prewarm successfully after
+    """
+    env = neon_simple_env
+    n_records = 1000000
+    cfg = [
+        "autovacuum = off",
+        "shared_buffers=1MB",
+        "neon.max_file_cache_size=1GB",
+        "neon.file_cache_size_limit=1GB",
+        "neon.file_cache_prewarm_limit=1000",
+    ]
+    endpoint = env.endpoints.create_start(branch_name="main", config_lines=cfg)
+
+    pg_conn = endpoint.connect()
+    pg_cur = pg_conn.cursor()
+    pg_cur.execute("create schema neon; create extension neon with schema neon")
+    pg_cur.execute("create database lfc")
+
+    lfc_conn = endpoint.connect(dbname="lfc")
+    lfc_cur = lfc_conn.cursor()
+    log.info(f"Inserting {n_records} rows")
+    lfc_cur.execute("create table t(pk integer primary key, payload text default repeat('?', 128))")
+    lfc_cur.execute(f"insert into t (pk) values (generate_series(1,{n_records}))")
+    log.info(f"Inserted {n_records} rows")
+
+    client = endpoint.http_client()
+    method = PrewarmMethod.COMPUTE_CTL
+    offload_lfc(method, client, pg_cur)
+
+    endpoint.stop()
+    endpoint.start()
+
+    thread = Thread(target=lambda: prewarm_endpoint(method, client, pg_cur, None))
+    thread.start()
+    # wait 2 seconds to ensure we cancel prewarm SQL query
+    sleep(2)
+    client.cancel_prewarm_lfc()
+    thread.join()
+    assert client.prewarm_lfc_status()["status"] == "cancelled"
+
+    prewarm_endpoint(method, client, pg_cur, None)
+    assert client.prewarm_lfc_status()["status"] == "completed"


@pytest.mark.skipif(not USE_LFC, reason="LFC is disabled, skipping")
@@ -178,9 +235,8 @@ def test_lfc_prewarm_empty(neon_simple_env: NeonEnv):
    cur = conn.cursor()
    cur.execute("create schema neon; create extension neon with schema neon")
    method = PrewarmMethod.COMPUTE_CTL
-    offload_lfc(method, client, cur)
-    prewarm_endpoint(method, client, cur, None)
-    assert client.prewarm_lfc_status()["status"] == "skipped"
+    assert offload_lfc(method, client, cur)["status"] == "skipped"
+    assert prewarm_endpoint(method, client, cur, None)["status"] == "skipped"


 # autoprewarm isn't needed as we prewarm manually
@@ -251,11 +307,11 @@ def test_lfc_prewarm_under_workload(neon_simple_env: NeonEnv, method: PrewarmMet

    workload_threads = []
    for _ in range(n_threads):
-        t = threading.Thread(target=workload)
+        t = Thread(target=workload)
        workload_threads.append(t)
        t.start()

-    prewarm_thread = threading.Thread(target=prewarm)
+    prewarm_thread = Thread(target=prewarm)
    prewarm_thread.start()

    def prewarmed():
--- a/test_runner/regress/test_normal_work.py
+++ b/test_runner/regress/test_normal_work.py
@@ -17,9 +17,7 @@ def check_tenant(
    config_lines = [
        f"neon.safekeeper_proto_version = {safekeeper_proto_version}",
    ]
-    endpoint = env.endpoints.create_start(
-        "main", tenant_id=tenant_id, config_lines=config_lines, grpc=True
-    )
+    endpoint = env.endpoints.create_start("main", tenant_id=tenant_id, config_lines=config_lines)
    # we rely upon autocommit after each statement
    res_1 = endpoint.safe_psql_many(
        queries=[
--- a/test_runner/regress/test_replica_promotes.py
+++ b/test_runner/regress/test_replica_promotes.py
@@ -145,6 +145,7 @@ def test_replica_promote(neon_simple_env: NeonEnv, method: PromoteMethod):
        stop_and_check_lsn(secondary, None)

    if method == PromoteMethod.COMPUTE_CTL:
+        log.info("Restarting primary to check new config")
        secondary.stop()
        # In production, compute ultimately receives new compute spec from cplane.
        secondary.respec(mode="Primary")
--- a/test_runner/regress/test_safekeeper_migration.py
+++ b/test_runner/regress/test_safekeeper_migration.py
@@ -286,3 +286,265 @@ def test_sk_generation_aware_tombstones(neon_env_builder: NeonEnvBuilder):
    assert re.match(r".*Timeline .* deleted.*", exc.value.response.text)
    # The timeline should remain deleted.
    expect_deleted(second_sk)
+
+
+def test_safekeeper_migration_stale_timeline(neon_env_builder: NeonEnvBuilder):
+    """
+    Test that safekeeper migration handles stale timeline correctly by migrating to
+    a safekeeper with a stale timeline.
+    1. Check that we are waiting for the stale timeline to catch up with the commit lsn.
+       The migration might fail if there is no compute to advance the WAL.
+    2. Check that we rely on last_log_term (and not the current term) when waiting for the
+       sync_position on step 7.
+    3. Check that migration succeeds if the compute is running.
+    """
+    neon_env_builder.num_safekeepers = 2
+    neon_env_builder.storage_controller_config = {
+        "timelines_onto_safekeepers": True,
+        "timeline_safekeeper_count": 1,
+    }
+    env = neon_env_builder.init_start()
+    env.pageserver.allowed_errors.extend(PAGESERVER_ALLOWED_ERRORS)
+    env.storage_controller.allowed_errors.append(".*not enough successful .* to reach quorum.*")
+
+    mconf = env.storage_controller.timeline_locate(env.initial_tenant, env.initial_timeline)
+
+    active_sk = env.get_safekeeper(mconf["sk_set"][0])
+    other_sk = [sk for sk in env.safekeepers if sk.id != active_sk.id][0]
+
+    ep = env.endpoints.create("main", tenant_id=env.initial_tenant)
+    ep.start(safekeeper_generation=1, safekeepers=[active_sk.id])
+    ep.safe_psql("CREATE TABLE t(a int)")
+    ep.safe_psql("INSERT INTO t VALUES (0)")
+
+    # Pull the timeline to other_sk, so other_sk now has a "stale" timeline on it.
+    other_sk.pull_timeline([active_sk], env.initial_tenant, env.initial_timeline)
+
+    # Advance the WAL on active_sk.
+    ep.safe_psql("INSERT INTO t VALUES (1)")
+
+    # The test is more tricky if we have the same last_log_term but different term/flush_lsn.
+    # Stop the active_sk during the endpoint shutdown because otherwise compute_ctl runs
+    # sync_safekeepers and advances last_log_term on active_sk.
+    active_sk.stop()
+    ep.stop(mode="immediate")
+    active_sk.start()
+
+    active_sk_status = active_sk.http_client().timeline_status(
+        env.initial_tenant, env.initial_timeline
+    )
+    other_sk_status = other_sk.http_client().timeline_status(
+        env.initial_tenant, env.initial_timeline
+    )
+
+    # other_sk should have the same last_log_term, but a stale flush_lsn.
+    assert active_sk_status.last_log_term == other_sk_status.last_log_term
+    assert active_sk_status.flush_lsn > other_sk_status.flush_lsn
+
+    commit_lsn = active_sk_status.flush_lsn
+
+    # Bump the term on other_sk to make it higher than active_sk.
+    # This is to make sure we don't use current term instead of last_log_term in the algorithm.
+    other_sk.http_client().term_bump(
+        env.initial_tenant, env.initial_timeline, active_sk_status.term + 100
+    )
+
+    # TODO(diko): now it fails because the timeline on other_sk is stale and there is no compute
+    # to catch up it with active_sk. It might be fixed in https://databricks.atlassian.net/browse/LKB-946
+    # if we delete stale timelines before starting the migration.
+    # But the rest of the test is still valid: we should not lose committed WAL after the migration.
+    with pytest.raises(
+        StorageControllerApiException, match="not enough successful .* to reach quorum"
+    ):
+        env.storage_controller.migrate_safekeepers(
+            env.initial_tenant, env.initial_timeline, [other_sk.id]
+        )
+
+    mconf = env.storage_controller.timeline_locate(env.initial_tenant, env.initial_timeline)
+    assert mconf["new_sk_set"] == [other_sk.id]
+    assert mconf["sk_set"] == [active_sk.id]
+    assert mconf["generation"] == 2
+
+    # Start the endpoint, so it advances the WAL on other_sk.
+    ep.start(safekeeper_generation=2, safekeepers=[active_sk.id, other_sk.id])
+    # Now the migration should succeed.
+    env.storage_controller.migrate_safekeepers(
+        env.initial_tenant, env.initial_timeline, [other_sk.id]
+    )
+
+    # Check that we didn't lose committed WAL.
+    assert (
+        other_sk.http_client().timeline_status(env.initial_tenant, env.initial_timeline).flush_lsn
+        >= commit_lsn
+    )
+    assert ep.safe_psql("SELECT * FROM t") == [(0,), (1,)]
+
+
+def test_pull_from_most_advanced_sk(neon_env_builder: NeonEnvBuilder):
+    """
+    Test that we pull the timeline from the most advanced safekeeper during the
+    migration and do not lose committed WAL.
+    """
+    neon_env_builder.num_safekeepers = 4
+    neon_env_builder.storage_controller_config = {
+        "timelines_onto_safekeepers": True,
+        "timeline_safekeeper_count": 3,
+    }
+    env = neon_env_builder.init_start()
+    env.pageserver.allowed_errors.extend(PAGESERVER_ALLOWED_ERRORS)
+
+    mconf = env.storage_controller.timeline_locate(env.initial_tenant, env.initial_timeline)
+
+    sk_set = mconf["sk_set"]
+    assert len(sk_set) == 3
+
+    other_sk = [sk.id for sk in env.safekeepers if sk.id not in sk_set][0]
+
+    ep = env.endpoints.create("main", tenant_id=env.initial_tenant)
+    ep.start(safekeeper_generation=1, safekeepers=sk_set)
+    ep.safe_psql("CREATE TABLE t(a int)")
+    ep.safe_psql("INSERT INTO t VALUES (0)")
+
+    # Stop one sk, so we have a lagging WAL on it.
+    env.get_safekeeper(sk_set[0]).stop()
+    # Advance the WAL on the other sks.
+    ep.safe_psql("INSERT INTO t VALUES (1)")
+
+    # Stop other sks to make sure compute_ctl doesn't advance the last_log_term on them during shutdown.
+    for sk_id in sk_set[1:]:
+        env.get_safekeeper(sk_id).stop()
+    ep.stop(mode="immediate")
+    for sk_id in sk_set:
+        env.get_safekeeper(sk_id).start()
+
+    # Bump the term on the lagging sk to make sure we don't use it to choose the most advanced sk.
+    env.get_safekeeper(sk_set[0]).http_client().term_bump(
+        env.initial_tenant, env.initial_timeline, 100
+    )
+
+    def get_commit_lsn(sk_set: list[int]):
+        flush_lsns = []
+        last_log_terms = []
+        for sk_id in sk_set:
+            sk = env.get_safekeeper(sk_id)
+            status = sk.http_client().timeline_status(env.initial_tenant, env.initial_timeline)
+            flush_lsns.append(status.flush_lsn)
+            last_log_terms.append(status.last_log_term)
+
+        # In this test we assume that all sks have the same last_log_term.
+        assert len(set(last_log_terms)) == 1
+
+        flush_lsns.sort(reverse=True)
+        commit_lsn = flush_lsns[len(sk_set) // 2]
+
+        log.info(f"sk_set: {sk_set}, flush_lsns: {flush_lsns}, commit_lsn: {commit_lsn}")
+        return commit_lsn
+
+    commit_lsn_before_migration = get_commit_lsn(sk_set)
+
+    # Make two migrations, so the lagging sk stays in the sk_set, but other sks are replaced.
+    new_sk_set1 = [sk_set[0], sk_set[1], other_sk]  # remove sk_set[2], add other_sk
+    new_sk_set2 = [sk_set[0], other_sk, sk_set[2]]  # remove sk_set[1], add sk_set[2] back
+    env.storage_controller.migrate_safekeepers(
+        env.initial_tenant, env.initial_timeline, new_sk_set1
+    )
+    env.storage_controller.migrate_safekeepers(
+        env.initial_tenant, env.initial_timeline, new_sk_set2
+    )
+
+    commit_lsn_after_migration = get_commit_lsn(new_sk_set2)
+
+    # We should not lose committed WAL.
+    # If we have choosen the lagging sk to pull the timeline from, this might fail.
+    assert commit_lsn_before_migration <= commit_lsn_after_migration
+
+    ep.start(safekeeper_generation=5, safekeepers=new_sk_set2)
+    assert ep.safe_psql("SELECT * FROM t") == [(0,), (1,)]
+
+
+def test_abort_safekeeper_migration(neon_env_builder: NeonEnvBuilder):
+    """
+    Test that safekeeper migration can be aborted.
+    1. Insert failpoints and ensure the abort successfully reverts the timeline state.
+    2. Check that endpoint is operational after the abort.
+    """
+    neon_env_builder.num_safekeepers = 2
+    neon_env_builder.storage_controller_config = {
+        "timelines_onto_safekeepers": True,
+        "timeline_safekeeper_count": 1,
+    }
+    env = neon_env_builder.init_start()
+    env.pageserver.allowed_errors.extend(PAGESERVER_ALLOWED_ERRORS)
+
+    mconf = env.storage_controller.timeline_locate(env.initial_tenant, env.initial_timeline)
+    assert len(mconf["sk_set"]) == 1
+    cur_sk = mconf["sk_set"][0]
+    cur_gen = 1
+
+    ep = env.endpoints.create("main", tenant_id=env.initial_tenant)
+    ep.start(safekeeper_generation=1, safekeepers=mconf["sk_set"])
+    ep.safe_psql("CREATE EXTENSION neon_test_utils;")
+    ep.safe_psql("CREATE TABLE t(a int)")
+    ep.safe_psql("INSERT INTO t VALUES (1)")
+
+    another_sk = [sk.id for sk in env.safekeepers if sk.id != cur_sk][0]
+
+    failpoints = [
+        "sk-migration-after-step-3",
+        "sk-migration-after-step-4",
+        "sk-migration-after-step-5",
+        "sk-migration-after-step-7",
+    ]
+
+    for fp in failpoints:
+        env.storage_controller.configure_failpoints((fp, "return(1)"))
+
+        with pytest.raises(StorageControllerApiException, match=f"failpoint {fp}"):
+            env.storage_controller.migrate_safekeepers(
+                env.initial_tenant, env.initial_timeline, [another_sk]
+            )
+        cur_gen += 1
+
+        env.storage_controller.configure_failpoints((fp, "off"))
+
+        # We should have a joint mconf after the failure.
+        mconf = env.storage_controller.timeline_locate(env.initial_tenant, env.initial_timeline)
+        assert mconf["generation"] == cur_gen
+        assert mconf["sk_set"] == [cur_sk]
+        assert mconf["new_sk_set"] == [another_sk]
+
+        env.storage_controller.abort_safekeeper_migration(env.initial_tenant, env.initial_timeline)
+        cur_gen += 1
+
+        # Abort should revert the timeline to the previous sk_set and increment the generation.
+        mconf = env.storage_controller.timeline_locate(env.initial_tenant, env.initial_timeline)
+        assert mconf["generation"] == cur_gen
+        assert mconf["sk_set"] == [cur_sk]
+        assert mconf["new_sk_set"] is None
+
+        assert ep.safe_psql("SHOW neon.safekeepers")[0][0].startswith(f"g#{cur_gen}:")
+        ep.safe_psql(f"INSERT INTO t VALUES ({cur_gen})")
+
+    # After step-8 the final mconf is committed and the migration is not abortable anymore.
+    # So the abort should not abort anything.
+    env.storage_controller.configure_failpoints(("sk-migration-after-step-8", "return(1)"))
+
+    with pytest.raises(StorageControllerApiException, match="failpoint sk-migration-after-step-8"):
+        env.storage_controller.migrate_safekeepers(
+            env.initial_tenant, env.initial_timeline, [another_sk]
+        )
+    cur_gen += 2
+
+    env.storage_controller.configure_failpoints((fp, "off"))
+
+    env.storage_controller.abort_safekeeper_migration(env.initial_tenant, env.initial_timeline)
+
+    # The migration is fully committed, no abort should have been performed.
+    mconf = env.storage_controller.timeline_locate(env.initial_tenant, env.initial_timeline)
+    assert mconf["generation"] == cur_gen
+    assert mconf["sk_set"] == [another_sk]
+    assert mconf["new_sk_set"] is None
+
+    ep.safe_psql(f"INSERT INTO t VALUES ({cur_gen})")
+    ep.clear_buffers()
+    assert ep.safe_psql("SELECT * FROM t") == [(i + 1,) for i in range(cur_gen) if i % 2 == 0]
--- a/vendor/postgres-v14
+++ b/vendor/postgres-v14
--- a/vendor/postgres-v15
+++ b/vendor/postgres-v15
--- a/vendor/postgres-v16
+++ b/vendor/postgres-v16
--- a/vendor/postgres-v17
+++ b/vendor/postgres-v17
--- a/vendor/revisions.json
+++ b/vendor/revisions.json
@@ -1,18 +1,18 @@
 {
  "v17": [
    "17.5",
-    "fa1788475e3146cc9c7c6a1b74f48fd296898fcd"
+    "1e01fcea2a6b38180021aa83e0051d95286d9096"
  ],
  "v16": [
    "16.9",
-    "9b9cb4b3e33347aea8f61e606bb6569979516de5"
+    "a42351fcd41ea01edede1daed65f651e838988fc"
  ],
  "v15": [
    "15.13",
-    "aaaeff2550d5deba58847f112af9b98fa3a58b00"
+    "2aaab3bb4a13557aae05bb2ae0ef0a132d0c4f85"
  ],
  "v14": [
    "14.18",
-    "c9f9fdd0113b52c0bd535afdb09d3a543aeee25f"
+    "2155cb165d05f617eb2c8ad7e43367189b627703"
  ]
 }
--- a/workspace_hack/Cargo.toml
+++ b/workspace_hack/Cargo.toml
@@ -28,8 +28,8 @@ chrono = { version = "0.4", default-features = false, features = ["clock", "serd
 clap = { version = "4", features = ["derive", "env", "string"] }
 clap_builder = { version = "4", default-features = false, features = ["color", "env", "help", "std", "string", "suggestions", "usage"] }
 const-oid = { version = "0.9", default-features = false, features = ["db", "std"] }
-criterion = { version = "0.5", features = ["html_reports"] }
 crossbeam-epoch = { version = "0.9" }
+crossbeam-utils = { version = "0.8" }
 crypto-bigint = { version = "0.5", features = ["generic-array", "zeroize"] }
 der = { version = "0.7", default-features = false, features = ["derive", "flagset", "oid", "pem", "std"] }
 deranged = { version = "0.3", default-features = false, features = ["powerfmt", "serde", "std"] }
@@ -72,6 +72,7 @@ num-integer = { version = "0.1", features = ["i128"] }
 num-iter = { version = "0.1", default-features = false, features = ["i128", "std"] }
 num-rational = { version = "0.4", default-features = false, features = ["num-bigint-std", "std"] }
 num-traits = { version = "0.2", features = ["i128", "libm"] }
+once_cell = { version = "1" }
 p256 = { version = "0.13", features = ["jwk"] }
 parquet = { version = "53", default-features = false, features = ["zstd"] }
 portable-atomic = { version = "1", features = ["require-cas"] }
@@ -104,7 +105,7 @@ tokio-rustls = { version = "0.26", default-features = false, features = ["loggin
 tokio-stream = { version = "0.1", features = ["net", "sync"] }
 tokio-util = { version = "0.7", features = ["codec", "compat", "io-util", "rt"] }
 toml_edit = { version = "0.22", features = ["serde"] }
-tonic = { version = "0.13", default-features = false, features = ["codegen", "gzip", "prost", "router", "tls-native-roots", "tls-ring", "transport", "zstd"] }
+tonic = { version = "0.13", default-features = false, features = ["codegen", "gzip", "prost", "router", "server", "tls-native-roots", "tls-ring", "zstd"] }
 tower = { version = "0.5", default-features = false, features = ["balance", "buffer", "limit", "log"] }
 tracing = { version = "0.1", features = ["log"] }
 tracing-core = { version = "0.1" }
@@ -142,6 +143,7 @@ num-integer = { version = "0.1", features = ["i128"] }
 num-iter = { version = "0.1", default-features = false, features = ["i128", "std"] }
 num-rational = { version = "0.4", default-features = false, features = ["num-bigint-std", "std"] }
 num-traits = { version = "0.2", features = ["i128", "libm"] }
+once_cell = { version = "1" }
 parquet = { version = "53", default-features = false, features = ["zstd"] }
 prettyplease = { version = "0.2", default-features = false, features = ["verbatim"] }
 proc-macro2 = { version = "1" }
Author	SHA1	Message	Date
dependabot[bot]	6eb7220c47	build(deps): bump tracing-subscriber Bumps the cargo group with 1 update in the / directory: [tracing-subscriber](https://github.com/tokio-rs/tracing). Updates `tracing-subscriber` from 0.3.19 to 0.3.20 - [Release notes](https://github.com/tokio-rs/tracing/releases) - [Commits](https://github.com/tokio-rs/tracing/compare/tracing-subscriber-0.3.19...tracing-subscriber-0.3.20) --- updated-dependencies: - dependency-name: tracing-subscriber dependency-version: 0.3.20 dependency-type: direct:production dependency-group: cargo ... Signed-off-by: dependabot[bot] <support@github.com>	2025-08-29 20:55:15 +00:00
Peter Bendel	77e22e4bf0	remove obsolete comment - this is a dummy commit (#12816 ) ## Problem we ran out of commit comment on same commit sha, [see](https://github.com/neondatabase/neon/actions/runs/17190868211/job/48766305883#step:10:591) ## Summary of changes Push another commit to neondatabase/neon.git to create a new commit sha on main branch	2025-08-25 07:36:41 +00:00
Ruslan Talpa	d96cea1917	[proxy] handle options request in rest broker (cors headers) (#12744 ) ## Problem rest broker needs to respond with the correct cors headers for the api to be usable from other domains ## Summary of changes added a code path in rest broker to handle the OPTIONS requests --------- Co-authored-by: Ruslan Talpa <ruslan.talpa@databricks.com>	2025-07-31 13:05:09 +00:00
Dmitrii Kovalkov	312a74f11f	storcon: implement safekeeper_migrate_abort handler (#12705 ) ## Problem Right now if we commit a joint configuration to DB, there is no way back. The only way to get the clean mconf is to continue the migration. The RFC also described an abort mechanism, which allows to abort current migration and revert mconf change. It might be needed if the migration is stuck and cannot have any progress, e.g. if the sk we are migrating to went down during the migration. This PR implements this abort algorithm. - Closes: https://databricks.atlassian.net/browse/LKB-899 - Closes: https://github.com/neondatabase/neon/issues/12549 ## Summary of changes - Implement `safekeeper_migrate_abort` handler with the algorithm described in RFC - Add `timeline-safekeeper-migrate-abort` subcommand to `storcon_cli` - Add test for the migration abort algorithm.	2025-07-31 12:40:32 +00:00
Mikhail	df4e37b7cc	Report timespans for promotion and prewarm (#12730 ) - Return sub-actions time spans for prewarm, prewarm offload, and promotion in http handlers. - Set `synchronous_standby_names=walproposer` for promoted endpoints. Otherwise, walproposer on promoted standby ignores reply from safekeeper and is stuck on lsn COMMIT eternally.	2025-07-31 11:51:19 +00:00
Heikki Linnakangas	b4a63e0a34	Fix how `neon.stripe_size` option is set in postgresql.conf file (#12776 ) Commit `1dce2a9e74` changed how the `neon.pageserver_connstring` setting is formed, but it messed up setting the `neon.stripe_size` setting so that it was set twice. That got mixed up during development of the patch, as commit `7fef4435c1` landed first and was merged incorrectly.	2025-07-31 11:46:57 +00:00
Erik Grinaker	f8fc0bf3c0	neon_local: use doc comments for help texts (#12270 ) Clap automatically uses doc comments as help/about texts. Doc comments are strictly better, since they're also used e.g. for IDE documentation, and are better formatted. This patch updates all `neon_local` commands to use doc comments (courtesy of GPT-o3).	2025-07-31 10:25:33 +00:00
Alexey Kondratov	8fe7596120	chore(compute_tools): Delete unused anon_ext_fn_reassign.sql (#12787 ) It's an anon v1 failed launch artifact, I suppose.	2025-07-31 10:11:30 +00:00
Krzysztof Szafrański	f3ee6e818d	[proxy] Correctly classify ConnectErrors (#12793 ) As is, e.g. quota errors on wake compute are logged as "compute" errors.	2025-07-31 09:53:48 +00:00
Dmitrii Kovalkov	edd60730c8	safekeeper: use last_log_term in mconf switch + choose most advanced sk in pull timeline (#12778 ) ## Problem I discovered two bugs corresponding to safekeeper migration, which together might lead to a data loss during the migration. The second bug is from a hadron patch and might lead to a data loss during the safekeeper restore in hadron as well. 1. `switch_membership` returns the current `term` instead of `last_log_term`. It is used to choose the `sync_position` in the algorithm, so we might choose the wrong one and break the correctness guarantees. 2. The current `term` is used to choose the most advanced SK in `pull_timeline` with higher priority than `flush_lsn`. It is incorrect because the most advanced safekeeper is the one with the highest `(last_log_term, flush_lsn)` pair. The compute might bump term on the least advanced sk, making it the best choice to pull from, and thus making committed log entries "uncommitted" after `pull_timeline` Part of https://databricks.atlassian.net/browse/LKB-1017 ## Summary of changes - Return `last_log_term` in `switch_membership` - Use `(last_log_term, flush_lsn)` as a primary key for choosing the most advanced sk in `pull_timeline` and deny pulling if the `max_term` is higher than on the most advanced sk (hadron only) - Write tests for both cases - Retry `sync_safekeepers` in `compute_ctl` - Take into the account the quorum size when calculating `sync_position`	2025-07-31 09:29:25 +00:00
Aleksandr Sarantsev	975b95f4cd	Introduce deletion API improvement RFC (#12484 ) ## Problem The deletion logic had become difficult to understand and maintain. ## Summary of changes - Added an RFC detailing proposed improvements to all deletion-related APIs. --------- Co-authored-by: Aleksandr Sarantsev <aleksandr.sarantsev@databricks.com>	2025-07-31 08:34:47 +00:00
Mikhail	01c39f378e	prewarm cancellation (#12785 ) Add DELETE /lfc/prewarm route which handles ongoing prewarm cancellation, update API spec, add prewarm Cancelled state Add offload Cancelled state when LFC is not initialized	2025-07-30 22:05:51 +00:00
Dimitri Fontaine	4d3b28bd2e	[Hadron] Always run databricks auth hook. (#12683 )	2025-07-30 21:34:30 +00:00