diff --git a/libs/pageserver_api/src/models.rs b/libs/pageserver_api/src/models.rs
index 87e8df2ab6..c38af9cb80 100644
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -33,7 +33,6 @@ use crate::{
     reltag::RelTag,
     shard::{ShardCount, ShardStripeSize, TenantShardId},
 };
-use anyhow::bail;
 use bytes::{Buf, BufMut, Bytes, BytesMut};
 
 /// The state of a tenant in this pageserver.
@@ -1400,6 +1399,8 @@ pub enum PagestreamFeMessage {
     GetPage(PagestreamGetPageRequest),
     DbSize(PagestreamDbSizeRequest),
     GetSlruSegment(PagestreamGetSlruSegmentRequest),
+    #[cfg(feature = "testing")]
+    Test(PagestreamTestRequest),
 }
 
 // Wrapped in libpq CopyData
@@ -1411,6 +1412,22 @@ pub enum PagestreamBeMessage {
     Error(PagestreamErrorResponse),
     DbSize(PagestreamDbSizeResponse),
     GetSlruSegment(PagestreamGetSlruSegmentResponse),
+    #[cfg(feature = "testing")]
+    Test(PagestreamTestResponse),
+}
+
+// Keep in sync with `pagestore_client.h`
+#[repr(u8)]
+enum PagestreamFeMessageTag {
+    Exists = 0,
+    Nblocks = 1,
+    GetPage = 2,
+    DbSize = 3,
+    GetSlruSegment = 4,
+    /* future tags above this line */
+    /// For testing purposes, not available in production.
+    #[cfg(feature = "testing")]
+    Test = 99,
 }
 
 // Keep in sync with `pagestore_client.h`
@@ -1422,7 +1439,28 @@ enum PagestreamBeMessageTag {
     Error = 103,
     DbSize = 104,
     GetSlruSegment = 105,
+    /* future tags above this line */
+    /// For testing purposes, not available in production.
+    #[cfg(feature = "testing")]
+    Test = 199,
 }
+
+impl TryFrom<u8> for PagestreamFeMessageTag {
+    type Error = u8;
+    fn try_from(value: u8) -> Result<Self, u8> {
+        match value {
+            0 => Ok(PagestreamFeMessageTag::Exists),
+            1 => Ok(PagestreamFeMessageTag::Nblocks),
+            2 => Ok(PagestreamFeMessageTag::GetPage),
+            3 => Ok(PagestreamFeMessageTag::DbSize),
+            4 => Ok(PagestreamFeMessageTag::GetSlruSegment),
+            #[cfg(feature = "testing")]
+            99 => Ok(PagestreamFeMessageTag::Test),
+            _ => Err(value),
+        }
+    }
+}
+
 impl TryFrom<u8> for PagestreamBeMessageTag {
     type Error = u8;
     fn try_from(value: u8) -> Result<Self, u8> {
@@ -1433,6 +1471,8 @@ impl TryFrom<u8> for PagestreamBeMessageTag {
             103 => Ok(PagestreamBeMessageTag::Error),
             104 => Ok(PagestreamBeMessageTag::DbSize),
             105 => Ok(PagestreamBeMessageTag::GetSlruSegment),
+            #[cfg(feature = "testing")]
+            199 => Ok(PagestreamBeMessageTag::Test),
             _ => Err(value),
         }
     }
@@ -1550,6 +1590,20 @@ pub struct PagestreamDbSizeResponse {
     pub db_size: i64,
 }
 
+#[cfg(feature = "testing")]
+#[derive(Debug, PartialEq, Eq, Clone)]
+pub struct PagestreamTestRequest {
+    pub hdr: PagestreamRequest,
+    pub batch_key: u64,
+    pub message: String,
+}
+
+#[cfg(feature = "testing")]
+#[derive(Debug)]
+pub struct PagestreamTestResponse {
+    pub req: PagestreamTestRequest,
+}
+
 // This is a cut-down version of TenantHistorySize from the pageserver crate, omitting fields
 // that require pageserver-internal types.  It is sufficient to get the total size.
 #[derive(Serialize, Deserialize, Debug)]
@@ -1569,7 +1623,7 @@ impl PagestreamFeMessage {
 
         match self {
             Self::Exists(req) => {
-                bytes.put_u8(0);
+                bytes.put_u8(PagestreamFeMessageTag::Exists as u8);
                 bytes.put_u64(req.hdr.reqid);
                 bytes.put_u64(req.hdr.request_lsn.0);
                 bytes.put_u64(req.hdr.not_modified_since.0);
@@ -1580,7 +1634,7 @@ impl PagestreamFeMessage {
             }
 
             Self::Nblocks(req) => {
-                bytes.put_u8(1);
+                bytes.put_u8(PagestreamFeMessageTag::Nblocks as u8);
                 bytes.put_u64(req.hdr.reqid);
                 bytes.put_u64(req.hdr.request_lsn.0);
                 bytes.put_u64(req.hdr.not_modified_since.0);
@@ -1591,7 +1645,7 @@ impl PagestreamFeMessage {
             }
 
             Self::GetPage(req) => {
-                bytes.put_u8(2);
+                bytes.put_u8(PagestreamFeMessageTag::GetPage as u8);
                 bytes.put_u64(req.hdr.reqid);
                 bytes.put_u64(req.hdr.request_lsn.0);
                 bytes.put_u64(req.hdr.not_modified_since.0);
@@ -1603,7 +1657,7 @@ impl PagestreamFeMessage {
             }
 
             Self::DbSize(req) => {
-                bytes.put_u8(3);
+                bytes.put_u8(PagestreamFeMessageTag::DbSize as u8);
                 bytes.put_u64(req.hdr.reqid);
                 bytes.put_u64(req.hdr.request_lsn.0);
                 bytes.put_u64(req.hdr.not_modified_since.0);
@@ -1611,13 +1665,24 @@ impl PagestreamFeMessage {
             }
 
             Self::GetSlruSegment(req) => {
-                bytes.put_u8(4);
+                bytes.put_u8(PagestreamFeMessageTag::GetSlruSegment as u8);
                 bytes.put_u64(req.hdr.reqid);
                 bytes.put_u64(req.hdr.request_lsn.0);
                 bytes.put_u64(req.hdr.not_modified_since.0);
                 bytes.put_u8(req.kind);
                 bytes.put_u32(req.segno);
             }
+            #[cfg(feature = "testing")]
+            Self::Test(req) => {
+                bytes.put_u8(PagestreamFeMessageTag::Test as u8);
+                bytes.put_u64(req.hdr.reqid);
+                bytes.put_u64(req.hdr.request_lsn.0);
+                bytes.put_u64(req.hdr.not_modified_since.0);
+                bytes.put_u64(req.batch_key);
+                let message = req.message.as_bytes();
+                bytes.put_u64(message.len() as u64);
+                bytes.put_slice(message);
+            }
         }
 
         bytes.into()
@@ -1645,56 +1710,66 @@ impl PagestreamFeMessage {
             ),
         };
 
-        match msg_tag {
-            0 => Ok(PagestreamFeMessage::Exists(PagestreamExistsRequest {
-                hdr: PagestreamRequest {
-                    reqid,
-                    request_lsn,
-                    not_modified_since,
-                },
-                rel: RelTag {
-                    spcnode: body.read_u32::<BigEndian>()?,
+        match PagestreamFeMessageTag::try_from(msg_tag)
+            .map_err(|tag: u8| anyhow::anyhow!("invalid tag {tag}"))?
+        {
+            PagestreamFeMessageTag::Exists => {
+                Ok(PagestreamFeMessage::Exists(PagestreamExistsRequest {
+                    hdr: PagestreamRequest {
+                        reqid,
+                        request_lsn,
+                        not_modified_since,
+                    },
+                    rel: RelTag {
+                        spcnode: body.read_u32::<BigEndian>()?,
+                        dbnode: body.read_u32::<BigEndian>()?,
+                        relnode: body.read_u32::<BigEndian>()?,
+                        forknum: body.read_u8()?,
+                    },
+                }))
+            }
+            PagestreamFeMessageTag::Nblocks => {
+                Ok(PagestreamFeMessage::Nblocks(PagestreamNblocksRequest {
+                    hdr: PagestreamRequest {
+                        reqid,
+                        request_lsn,
+                        not_modified_since,
+                    },
+                    rel: RelTag {
+                        spcnode: body.read_u32::<BigEndian>()?,
+                        dbnode: body.read_u32::<BigEndian>()?,
+                        relnode: body.read_u32::<BigEndian>()?,
+                        forknum: body.read_u8()?,
+                    },
+                }))
+            }
+            PagestreamFeMessageTag::GetPage => {
+                Ok(PagestreamFeMessage::GetPage(PagestreamGetPageRequest {
+                    hdr: PagestreamRequest {
+                        reqid,
+                        request_lsn,
+                        not_modified_since,
+                    },
+                    rel: RelTag {
+                        spcnode: body.read_u32::<BigEndian>()?,
+                        dbnode: body.read_u32::<BigEndian>()?,
+                        relnode: body.read_u32::<BigEndian>()?,
+                        forknum: body.read_u8()?,
+                    },
+                    blkno: body.read_u32::<BigEndian>()?,
+                }))
+            }
+            PagestreamFeMessageTag::DbSize => {
+                Ok(PagestreamFeMessage::DbSize(PagestreamDbSizeRequest {
+                    hdr: PagestreamRequest {
+                        reqid,
+                        request_lsn,
+                        not_modified_since,
+                    },
                     dbnode: body.read_u32::<BigEndian>()?,
-                    relnode: body.read_u32::<BigEndian>()?,
-                    forknum: body.read_u8()?,
-                },
-            })),
-            1 => Ok(PagestreamFeMessage::Nblocks(PagestreamNblocksRequest {
-                hdr: PagestreamRequest {
-                    reqid,
-                    request_lsn,
-                    not_modified_since,
-                },
-                rel: RelTag {
-                    spcnode: body.read_u32::<BigEndian>()?,
-                    dbnode: body.read_u32::<BigEndian>()?,
-                    relnode: body.read_u32::<BigEndian>()?,
-                    forknum: body.read_u8()?,
-                },
-            })),
-            2 => Ok(PagestreamFeMessage::GetPage(PagestreamGetPageRequest {
-                hdr: PagestreamRequest {
-                    reqid,
-                    request_lsn,
-                    not_modified_since,
-                },
-                rel: RelTag {
-                    spcnode: body.read_u32::<BigEndian>()?,
-                    dbnode: body.read_u32::<BigEndian>()?,
-                    relnode: body.read_u32::<BigEndian>()?,
-                    forknum: body.read_u8()?,
-                },
-                blkno: body.read_u32::<BigEndian>()?,
-            })),
-            3 => Ok(PagestreamFeMessage::DbSize(PagestreamDbSizeRequest {
-                hdr: PagestreamRequest {
-                    reqid,
-                    request_lsn,
-                    not_modified_since,
-                },
-                dbnode: body.read_u32::<BigEndian>()?,
-            })),
-            4 => Ok(PagestreamFeMessage::GetSlruSegment(
+                }))
+            }
+            PagestreamFeMessageTag::GetSlruSegment => Ok(PagestreamFeMessage::GetSlruSegment(
                 PagestreamGetSlruSegmentRequest {
                     hdr: PagestreamRequest {
                         reqid,
@@ -1705,7 +1780,21 @@ impl PagestreamFeMessage {
                     segno: body.read_u32::<BigEndian>()?,
                 },
             )),
-            _ => bail!("unknown smgr message tag: {:?}", msg_tag),
+            #[cfg(feature = "testing")]
+            PagestreamFeMessageTag::Test => Ok(PagestreamFeMessage::Test(PagestreamTestRequest {
+                hdr: PagestreamRequest {
+                    reqid,
+                    request_lsn,
+                    not_modified_since,
+                },
+                batch_key: body.read_u64::<BigEndian>()?,
+                message: {
+                    let len = body.read_u64::<BigEndian>()?;
+                    let mut buf = vec![0; len as usize];
+                    body.read_exact(&mut buf)?;
+                    String::from_utf8(buf)?
+                },
+            })),
         }
     }
 }
@@ -1748,6 +1837,15 @@ impl PagestreamBeMessage {
                         bytes.put_u32((resp.segment.len() / BLCKSZ as usize) as u32);
                         bytes.put(&resp.segment[..]);
                     }
+
+                    #[cfg(feature = "testing")]
+                    Self::Test(resp) => {
+                        bytes.put_u8(Tag::Test as u8);
+                        bytes.put_u64(resp.req.batch_key);
+                        let message = resp.req.message.as_bytes();
+                        bytes.put_u64(message.len() as u64);
+                        bytes.put_slice(message);
+                    }
                 }
             }
             PagestreamProtocolVersion::V3 => {
@@ -1816,6 +1914,18 @@ impl PagestreamBeMessage {
                         bytes.put_u32((resp.segment.len() / BLCKSZ as usize) as u32);
                         bytes.put(&resp.segment[..]);
                     }
+
+                    #[cfg(feature = "testing")]
+                    Self::Test(resp) => {
+                        bytes.put_u8(Tag::Test as u8);
+                        bytes.put_u64(resp.req.hdr.reqid);
+                        bytes.put_u64(resp.req.hdr.request_lsn.0);
+                        bytes.put_u64(resp.req.hdr.not_modified_since.0);
+                        bytes.put_u64(resp.req.batch_key);
+                        let message = resp.req.message.as_bytes();
+                        bytes.put_u64(message.len() as u64);
+                        bytes.put_slice(message);
+                    }
                 }
             }
         }
@@ -1958,6 +2068,28 @@ impl PagestreamBeMessage {
                         segment: segment.into(),
                     })
                 }
+                #[cfg(feature = "testing")]
+                Tag::Test => {
+                    let reqid = buf.read_u64::<BigEndian>()?;
+                    let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
+                    let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
+                    let batch_key = buf.read_u64::<BigEndian>()?;
+                    let len = buf.read_u64::<BigEndian>()?;
+                    let mut msg = vec![0; len as usize];
+                    buf.read_exact(&mut msg)?;
+                    let message = String::from_utf8(msg)?;
+                    Self::Test(PagestreamTestResponse {
+                        req: PagestreamTestRequest {
+                            hdr: PagestreamRequest {
+                                reqid,
+                                request_lsn,
+                                not_modified_since,
+                            },
+                            batch_key,
+                            message,
+                        },
+                    })
+                }
             };
         let remaining = buf.into_inner();
         if !remaining.is_empty() {
@@ -1977,6 +2109,8 @@ impl PagestreamBeMessage {
             Self::Error(_) => "Error",
             Self::DbSize(_) => "DbSize",
             Self::GetSlruSegment(_) => "GetSlruSegment",
+            #[cfg(feature = "testing")]
+            Self::Test(_) => "Test",
         }
     }
 }
diff --git a/pageserver/Cargo.toml b/pageserver/Cargo.toml
index 9195951191..9c835c956b 100644
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -8,7 +8,7 @@ license.workspace = true
 default = []
 # Enables test-only APIs, incuding failpoints. In particular, enables the `fail_point!` macro,
 # which adds some runtime cost to run tests on outage conditions
-testing = ["fail/failpoints", "pageserver_api/testing", "wal_decoder/testing"]
+testing = ["fail/failpoints", "pageserver_api/testing", "wal_decoder/testing", "pageserver_client/testing"]
 
 [dependencies]
 anyhow.workspace = true
@@ -114,3 +114,7 @@ harness = false
 [[bench]]
 name = "upload_queue"
 harness = false
+
+[[bin]]
+name = "test_helper_slow_client_reads"
+required-features = [ "testing" ]
diff --git a/pageserver/client/Cargo.toml b/pageserver/client/Cargo.toml
index d9b36bf3d4..f582d307a7 100644
--- a/pageserver/client/Cargo.toml
+++ b/pageserver/client/Cargo.toml
@@ -4,6 +4,9 @@ version = "0.1.0"
 edition.workspace = true
 license.workspace = true
 
+[features]
+testing = [ "pageserver_api/testing" ]
+
 [dependencies]
 pageserver_api.workspace = true
 thiserror.workspace = true
diff --git a/pageserver/client/src/page_service.rs b/pageserver/client/src/page_service.rs
index 207ec4166c..27280912b4 100644
--- a/pageserver/client/src/page_service.rs
+++ b/pageserver/client/src/page_service.rs
@@ -1,6 +1,9 @@
-use std::pin::Pin;
+use std::sync::{Arc, Mutex};
 
-use futures::SinkExt;
+use futures::{
+    stream::{SplitSink, SplitStream},
+    SinkExt, StreamExt,
+};
 use pageserver_api::{
     models::{
         PagestreamBeMessage, PagestreamFeMessage, PagestreamGetPageRequest,
@@ -10,7 +13,6 @@ use pageserver_api::{
 };
 use tokio::task::JoinHandle;
 use tokio_postgres::CopyOutStream;
-use tokio_stream::StreamExt;
 use tokio_util::sync::CancellationToken;
 use utils::{
     id::{TenantId, TimelineId},
@@ -62,15 +64,28 @@ impl Client {
             .client
             .copy_both_simple(&format!("pagestream_v3 {tenant_id} {timeline_id}"))
             .await?;
+        let (sink, stream) = copy_both.split(); // TODO: actually support splitting of the CopyBothDuplex so the lock inside this split adaptor goes away.
         let Client {
             cancel_on_client_drop,
             conn_task,
             client: _,
         } = self;
+        let shared = Arc::new(Mutex::new(PagestreamShared::ConnTaskRunning(
+            ConnTaskRunning {
+                cancel_on_client_drop,
+                conn_task,
+            },
+        )));
         Ok(PagestreamClient {
-            copy_both: Box::pin(copy_both),
-            conn_task,
-            cancel_on_client_drop,
+            sink: PagestreamSender {
+                shared: shared.clone(),
+                sink,
+            },
+            stream: PagestreamReceiver {
+                shared: shared.clone(),
+                stream,
+            },
+            shared,
         })
     }
 
@@ -97,7 +112,28 @@ impl Client {
 
 /// Create using [`Client::pagestream`].
 pub struct PagestreamClient {
-    copy_both: Pin<Box<tokio_postgres::CopyBothDuplex<bytes::Bytes>>>,
+    shared: Arc<Mutex<PagestreamShared>>,
+    sink: PagestreamSender,
+    stream: PagestreamReceiver,
+}
+
+pub struct PagestreamSender {
+    #[allow(dead_code)]
+    shared: Arc<Mutex<PagestreamShared>>,
+    sink: SplitSink<tokio_postgres::CopyBothDuplex<bytes::Bytes>, bytes::Bytes>,
+}
+
+pub struct PagestreamReceiver {
+    #[allow(dead_code)]
+    shared: Arc<Mutex<PagestreamShared>>,
+    stream: SplitStream<tokio_postgres::CopyBothDuplex<bytes::Bytes>>,
+}
+
+enum PagestreamShared {
+    ConnTaskRunning(ConnTaskRunning),
+    ConnTaskCancelledJoinHandleReturnedOrDropped,
+}
+struct ConnTaskRunning {
     cancel_on_client_drop: Option<tokio_util::sync::DropGuard>,
     conn_task: JoinHandle<()>,
 }
@@ -110,11 +146,11 @@ pub struct RelTagBlockNo {
 impl PagestreamClient {
     pub async fn shutdown(self) {
         let Self {
-            copy_both,
-            cancel_on_client_drop: cancel_conn_task,
-            conn_task,
-        } = self;
-        // The `copy_both` contains internal channel sender, the receiver of which is polled by `conn_task`.
+            shared,
+            sink,
+            stream,
+        } = { self };
+        // The `copy_both` split into `sink` and `stream` contains internal channel sender, the receiver of which is polled by `conn_task`.
         // When `conn_task` observes the sender has been dropped, it sends a `FeMessage::CopyFail` into the connection.
         // (see https://github.com/neondatabase/rust-postgres/blob/2005bf79573b8add5cf205b52a2b208e356cc8b0/tokio-postgres/src/copy_both.rs#L56).
         //
@@ -131,27 +167,77 @@ impl PagestreamClient {
         //
         // NB: page_service doesn't have a use case to exit the `pagestream` mode currently.
         // => https://github.com/neondatabase/neon/issues/6390
-        let _ = cancel_conn_task.unwrap();
+        let ConnTaskRunning {
+            cancel_on_client_drop,
+            conn_task,
+        } = {
+            let mut guard = shared.lock().unwrap();
+            match std::mem::replace(
+                &mut *guard,
+                PagestreamShared::ConnTaskCancelledJoinHandleReturnedOrDropped,
+            ) {
+                PagestreamShared::ConnTaskRunning(conn_task_running) => conn_task_running,
+                PagestreamShared::ConnTaskCancelledJoinHandleReturnedOrDropped => unreachable!(),
+            }
+        };
+        let _ = cancel_on_client_drop.unwrap();
         conn_task.await.unwrap();
-        drop(copy_both);
+
+        // Now drop the split copy_both.
+        drop(sink);
+        drop(stream);
+    }
+
+    pub fn split(self) -> (PagestreamSender, PagestreamReceiver) {
+        let Self {
+            shared: _,
+            sink,
+            stream,
+        } = self;
+        (sink, stream)
     }
 
     pub async fn getpage(
         &mut self,
         req: PagestreamGetPageRequest,
     ) -> anyhow::Result<PagestreamGetPageResponse> {
-        let req = PagestreamFeMessage::GetPage(req);
-        let req: bytes::Bytes = req.serialize();
-        // let mut req = tokio_util::io::ReaderStream::new(&req);
-        let mut req = tokio_stream::once(Ok(req));
+        self.getpage_send(req).await?;
+        self.getpage_recv().await
+    }
 
-        self.copy_both.send_all(&mut req).await?;
+    pub async fn getpage_send(&mut self, req: PagestreamGetPageRequest) -> anyhow::Result<()> {
+        self.sink.getpage_send(req).await
+    }
 
-        let next: Option<Result<bytes::Bytes, _>> = self.copy_both.next().await;
+    pub async fn getpage_recv(&mut self) -> anyhow::Result<PagestreamGetPageResponse> {
+        self.stream.getpage_recv().await
+    }
+}
+
+impl PagestreamSender {
+    // TODO: maybe make this impl Sink instead for better composability?
+    pub async fn send(&mut self, msg: PagestreamFeMessage) -> anyhow::Result<()> {
+        let msg = msg.serialize();
+        self.sink.send_all(&mut tokio_stream::once(Ok(msg))).await?;
+        Ok(())
+    }
+
+    pub async fn getpage_send(&mut self, req: PagestreamGetPageRequest) -> anyhow::Result<()> {
+        self.send(PagestreamFeMessage::GetPage(req)).await
+    }
+}
+
+impl PagestreamReceiver {
+    // TODO: maybe make this impl Stream instead for better composability?
+    pub async fn recv(&mut self) -> anyhow::Result<PagestreamBeMessage> {
+        let next: Option<Result<bytes::Bytes, _>> = self.stream.next().await;
         let next: bytes::Bytes = next.unwrap()?;
+        PagestreamBeMessage::deserialize(next)
+    }
 
-        let msg = PagestreamBeMessage::deserialize(next)?;
-        match msg {
+    pub async fn getpage_recv(&mut self) -> anyhow::Result<PagestreamGetPageResponse> {
+        let next: PagestreamBeMessage = self.recv().await?;
+        match next {
             PagestreamBeMessage::GetPage(p) => Ok(p),
             PagestreamBeMessage::Error(e) => anyhow::bail!("Error: {:?}", e),
             PagestreamBeMessage::Exists(_)
@@ -160,7 +246,14 @@ impl PagestreamClient {
             | PagestreamBeMessage::GetSlruSegment(_) => {
                 anyhow::bail!(
                     "unexpected be message kind in response to getpage request: {}",
-                    msg.kind()
+                    next.kind()
+                )
+            }
+            #[cfg(feature = "testing")]
+            PagestreamBeMessage::Test(_) => {
+                anyhow::bail!(
+                    "unexpected be message kind in response to getpage request: {}",
+                    next.kind()
                 )
             }
         }
diff --git a/pageserver/src/bin/test_helper_slow_client_reads.rs b/pageserver/src/bin/test_helper_slow_client_reads.rs
new file mode 100644
index 0000000000..c1ce332b6c
--- /dev/null
+++ b/pageserver/src/bin/test_helper_slow_client_reads.rs
@@ -0,0 +1,65 @@
+use std::{
+    io::{stdin, stdout, Read, Write},
+    time::Duration,
+};
+
+use clap::Parser;
+use pageserver_api::models::{PagestreamRequest, PagestreamTestRequest};
+use utils::{
+    id::{TenantId, TimelineId},
+    lsn::Lsn,
+};
+
+#[derive(clap::Parser)]
+struct Args {
+    connstr: String,
+    tenant_id: TenantId,
+    timeline_id: TimelineId,
+}
+
+#[tokio::main]
+async fn main() -> anyhow::Result<()> {
+    let Args {
+        connstr,
+        tenant_id,
+        timeline_id,
+    } = Args::parse();
+    let client = pageserver_client::page_service::Client::new(connstr).await?;
+    let client = client.pagestream(tenant_id, timeline_id).await?;
+    let (mut sender, _receiver) = client.split();
+
+    eprintln!("filling the pipe");
+    let mut msg = 0;
+    loop {
+        msg += 1;
+        let fut = sender.send(pageserver_api::models::PagestreamFeMessage::Test(
+            PagestreamTestRequest {
+                hdr: PagestreamRequest {
+                    reqid: 0,
+                    request_lsn: Lsn(23),
+                    not_modified_since: Lsn(23),
+                },
+                batch_key: 42,
+                message: format!("message {}", msg),
+            },
+        ));
+        let Ok(res) = tokio::time::timeout(Duration::from_secs(10), fut).await else {
+            eprintln!("pipe seems full");
+            break;
+        };
+        let _: () = res?;
+    }
+
+    let n = stdout().write(b"R")?;
+    assert_eq!(n, 1);
+    stdout().flush()?;
+
+    eprintln!("waiting for signal to tell us to exit");
+
+    let mut buf = [0u8; 1];
+    stdin().read_exact(&mut buf)?;
+
+    eprintln!("termination signal received, exiting");
+
+    anyhow::Ok(())
+}
diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs
index 5b1cbbad63..3c4830e3cd 100644
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -1463,6 +1463,8 @@ pub enum SmgrQueryType {
     GetPageAtLsn,
     GetDbSize,
     GetSlruSegment,
+    #[cfg(feature = "testing")]
+    Test,
 }
 
 pub(crate) struct SmgrQueryTimePerTimeline {
diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs
index da4180a927..b14a44f9e3 100644
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -555,37 +555,52 @@ struct BatchedGetPageRequest {
     timer: SmgrOpTimer,
 }
 
+#[cfg(feature = "testing")]
+struct BatchedTestRequest {
+    req: models::PagestreamTestRequest,
+    timer: SmgrOpTimer,
+}
+
+/// NB: we only hold [`timeline::handle::WeakHandle`] inside this enum,
+/// so that we don't keep the [`Timeline::gate`] open while the batch
+/// is being built up inside the [`spsc_fold`] (pagestream pipelining).
 enum BatchedFeMessage {
     Exists {
         span: Span,
         timer: SmgrOpTimer,
-        shard: timeline::handle::Handle<TenantManagerTypes>,
+        shard: timeline::handle::WeakHandle<TenantManagerTypes>,
         req: models::PagestreamExistsRequest,
     },
     Nblocks {
         span: Span,
         timer: SmgrOpTimer,
-        shard: timeline::handle::Handle<TenantManagerTypes>,
+        shard: timeline::handle::WeakHandle<TenantManagerTypes>,
         req: models::PagestreamNblocksRequest,
     },
     GetPage {
         span: Span,
-        shard: timeline::handle::Handle<TenantManagerTypes>,
+        shard: timeline::handle::WeakHandle<TenantManagerTypes>,
         effective_request_lsn: Lsn,
         pages: smallvec::SmallVec<[BatchedGetPageRequest; 1]>,
     },
     DbSize {
         span: Span,
         timer: SmgrOpTimer,
-        shard: timeline::handle::Handle<TenantManagerTypes>,
+        shard: timeline::handle::WeakHandle<TenantManagerTypes>,
         req: models::PagestreamDbSizeRequest,
     },
     GetSlruSegment {
         span: Span,
         timer: SmgrOpTimer,
-        shard: timeline::handle::Handle<TenantManagerTypes>,
+        shard: timeline::handle::WeakHandle<TenantManagerTypes>,
         req: models::PagestreamGetSlruSegmentRequest,
     },
+    #[cfg(feature = "testing")]
+    Test {
+        span: Span,
+        shard: timeline::handle::WeakHandle<TenantManagerTypes>,
+        requests: Vec<BatchedTestRequest>,
+    },
     RespondError {
         span: Span,
         error: BatchedPageStreamError,
@@ -606,6 +621,12 @@ impl BatchedFeMessage {
                     page.timer.observe_execution_start(at);
                 }
             }
+            #[cfg(feature = "testing")]
+            BatchedFeMessage::Test { requests, .. } => {
+                for req in requests {
+                    req.timer.observe_execution_start(at);
+                }
+            }
             BatchedFeMessage::RespondError { .. } => {}
         }
     }
@@ -735,7 +756,7 @@ impl PageServerHandler {
                 BatchedFeMessage::Exists {
                     span,
                     timer,
-                    shard,
+                    shard: shard.downgrade(),
                     req,
                 }
             }
@@ -754,7 +775,7 @@ impl PageServerHandler {
                 BatchedFeMessage::Nblocks {
                     span,
                     timer,
-                    shard,
+                    shard: shard.downgrade(),
                     req,
                 }
             }
@@ -773,7 +794,7 @@ impl PageServerHandler {
                 BatchedFeMessage::DbSize {
                     span,
                     timer,
-                    shard,
+                    shard: shard.downgrade(),
                     req,
                 }
             }
@@ -792,7 +813,7 @@ impl PageServerHandler {
                 BatchedFeMessage::GetSlruSegment {
                     span,
                     timer,
-                    shard,
+                    shard: shard.downgrade(),
                     req,
                 }
             }
@@ -844,6 +865,7 @@ impl PageServerHandler {
                 )
                 .await?;
 
+                // We're holding the Handle
                 let effective_request_lsn = match Self::wait_or_get_last_lsn(
                     &shard,
                     req.hdr.request_lsn,
@@ -861,11 +883,27 @@ impl PageServerHandler {
                 };
                 BatchedFeMessage::GetPage {
                     span,
-                    shard,
+                    shard: shard.downgrade(),
                     effective_request_lsn,
                     pages: smallvec::smallvec![BatchedGetPageRequest { req, timer }],
                 }
             }
+            #[cfg(feature = "testing")]
+            PagestreamFeMessage::Test(req) => {
+                let span = tracing::info_span!(parent: parent_span, "handle_test_request");
+                let shard = timeline_handles
+                    .get(tenant_id, timeline_id, ShardSelector::Zero)
+                    .instrument(span.clone()) // sets `shard_id` field
+                    .await?;
+                let timer =
+                    record_op_start_and_throttle(&shard, metrics::SmgrQueryType::Test, received_at)
+                        .await?;
+                BatchedFeMessage::Test {
+                    span,
+                    shard: shard.downgrade(),
+                    requests: vec![BatchedTestRequest { req, timer }],
+                }
+            }
         };
         Ok(Some(batched_msg))
     }
@@ -907,9 +945,7 @@ impl PageServerHandler {
                     assert_eq!(accum_pages.len(), max_batch_size.get());
                     return false;
                 }
-                if (accum_shard.tenant_shard_id, accum_shard.timeline_id)
-                    != (this_shard.tenant_shard_id, this_shard.timeline_id)
-                {
+                if !accum_shard.is_same_handle_as(&this_shard) {
                     trace!(%accum_lsn, %this_lsn, "stopping batching because timeline object mismatch");
                     // TODO: we _could_ batch & execute each shard seperately (and in parallel).
                     // But the current logic for keeping responses in order does not support that.
@@ -928,6 +964,44 @@ impl PageServerHandler {
                 accum_pages.extend(this_pages);
                 Ok(())
             }
+            #[cfg(feature = "testing")]
+            (
+                Ok(BatchedFeMessage::Test {
+                    shard: accum_shard,
+                    requests: accum_requests,
+                    ..
+                }),
+                BatchedFeMessage::Test {
+                    shard: this_shard,
+                    requests: this_requests,
+                    ..
+                },
+            ) if (|| {
+                assert!(this_requests.len() == 1);
+                if accum_requests.len() >= max_batch_size.get() {
+                    trace!(%max_batch_size, "stopping batching because of batch size");
+                    assert_eq!(accum_requests.len(), max_batch_size.get());
+                    return false;
+                }
+                if !accum_shard.is_same_handle_as(&this_shard) {
+                    trace!("stopping batching because timeline object mismatch");
+                    // TODO: we _could_ batch & execute each shard seperately (and in parallel).
+                    // But the current logic for keeping responses in order does not support that.
+                    return false;
+                }
+                let this_batch_key = this_requests[0].req.batch_key;
+                let accum_batch_key = accum_requests[0].req.batch_key;
+                if this_requests[0].req.batch_key != accum_requests[0].req.batch_key {
+                    trace!(%accum_batch_key, %this_batch_key, "stopping batching because batch key changed");
+                    return false;
+                }
+                true
+            })() =>
+            {
+                // ok to batch
+                accum_requests.extend(this_requests);
+                Ok(())
+            }
             // something batched already but this message is unbatchable
             (_, this_msg) => {
                 // by default, don't continue batching
@@ -969,7 +1043,7 @@ impl PageServerHandler {
                 fail::fail_point!("ps::handle-pagerequest-message::exists");
                 (
                     vec![self
-                        .handle_get_rel_exists_request(&shard, &req, ctx)
+                        .handle_get_rel_exists_request(&*shard.upgrade()?, &req, ctx)
                         .instrument(span.clone())
                         .await
                         .map(|msg| (msg, timer))
@@ -986,7 +1060,7 @@ impl PageServerHandler {
                 fail::fail_point!("ps::handle-pagerequest-message::nblocks");
                 (
                     vec![self
-                        .handle_get_nblocks_request(&shard, &req, ctx)
+                        .handle_get_nblocks_request(&*shard.upgrade()?, &req, ctx)
                         .instrument(span.clone())
                         .await
                         .map(|msg| (msg, timer))
@@ -1007,7 +1081,7 @@ impl PageServerHandler {
                         trace!(npages, "handling getpage request");
                         let res = self
                             .handle_get_page_at_lsn_request_batched(
-                                &shard,
+                                &*shard.upgrade()?,
                                 effective_request_lsn,
                                 pages,
                                 ctx,
@@ -1029,7 +1103,7 @@ impl PageServerHandler {
                 fail::fail_point!("ps::handle-pagerequest-message::dbsize");
                 (
                     vec![self
-                        .handle_db_size_request(&shard, &req, ctx)
+                        .handle_db_size_request(&*shard.upgrade()?, &req, ctx)
                         .instrument(span.clone())
                         .await
                         .map(|msg| (msg, timer))
@@ -1046,7 +1120,7 @@ impl PageServerHandler {
                 fail::fail_point!("ps::handle-pagerequest-message::slrusegment");
                 (
                     vec![self
-                        .handle_get_slru_segment_request(&shard, &req, ctx)
+                        .handle_get_slru_segment_request(&*shard.upgrade()?, &req, ctx)
                         .instrument(span.clone())
                         .await
                         .map(|msg| (msg, timer))
@@ -1054,6 +1128,27 @@ impl PageServerHandler {
                     span,
                 )
             }
+            #[cfg(feature = "testing")]
+            BatchedFeMessage::Test {
+                span,
+                shard,
+                requests,
+            } => {
+                fail::fail_point!("ps::handle-pagerequest-message::test");
+                (
+                    {
+                        let npages = requests.len();
+                        trace!(npages, "handling getpage request");
+                        let res = self
+                            .handle_test_request_batch(&*shard.upgrade()?, requests, ctx)
+                            .instrument(span.clone())
+                            .await;
+                        assert_eq!(res.len(), npages);
+                        res
+                    },
+                    span,
+                )
+            }
             BatchedFeMessage::RespondError { span, error } => {
                 // We've already decided to respond with an error, so we don't need to
                 // call the handler.
@@ -1791,6 +1886,51 @@ impl PageServerHandler {
         ))
     }
 
+    // NB: this impl mimics what we do for batched getpage requests.
+    #[cfg(feature = "testing")]
+    #[instrument(skip_all, fields(shard_id))]
+    async fn handle_test_request_batch(
+        &mut self,
+        timeline: &Timeline,
+        requests: Vec<BatchedTestRequest>,
+        _ctx: &RequestContext,
+    ) -> Vec<Result<(PagestreamBeMessage, SmgrOpTimer), BatchedPageStreamError>> {
+        // real requests would do something with the timeline
+        let mut results = Vec::with_capacity(requests.len());
+        for _req in requests.iter() {
+            tokio::task::yield_now().await;
+
+            results.push({
+                if timeline.cancel.is_cancelled() {
+                    Err(PageReconstructError::Cancelled)
+                } else {
+                    Ok(())
+                }
+            });
+        }
+
+        // TODO: avoid creating the new Vec here
+        Vec::from_iter(
+            requests
+                .into_iter()
+                .zip(results.into_iter())
+                .map(|(req, res)| {
+                    res.map(|()| {
+                        (
+                            PagestreamBeMessage::Test(models::PagestreamTestResponse {
+                                req: req.req.clone(),
+                            }),
+                            req.timer,
+                        )
+                    })
+                    .map_err(|e| BatchedPageStreamError {
+                        err: PageStreamError::from(e),
+                        req: req.req.hdr,
+                    })
+                }),
+        )
+    }
+
     /// Note on "fullbackup":
     /// Full basebackups should only be used for debugging purposes.
     /// Originally, it was introduced to enable breaking storage format changes,
@@ -2406,6 +2546,14 @@ impl From<GetActiveTimelineError> for QueryError {
     }
 }
 
+impl From<crate::tenant::timeline::handle::HandleUpgradeError> for QueryError {
+    fn from(e: crate::tenant::timeline::handle::HandleUpgradeError) -> Self {
+        match e {
+            crate::tenant::timeline::handle::HandleUpgradeError::ShutDown => QueryError::Shutdown,
+        }
+    }
+}
+
 fn set_tracing_field_shard_id(timeline: &Timeline) {
     debug_assert_current_span_has_tenant_and_timeline_id_no_shard_id();
     tracing::Span::current().record(
diff --git a/pageserver/src/tenant/remote_timeline_client.rs b/pageserver/src/tenant/remote_timeline_client.rs
index 47c4a8637d..a006647785 100644
--- a/pageserver/src/tenant/remote_timeline_client.rs
+++ b/pageserver/src/tenant/remote_timeline_client.rs
@@ -382,6 +382,12 @@ pub(crate) struct RemoteTimelineClient {
     cancel: CancellationToken,
 }
 
+impl Drop for RemoteTimelineClient {
+    fn drop(&mut self) {
+        debug!("dropping RemoteTimelineClient");
+    }
+}
+
 impl RemoteTimelineClient {
     ///
     /// Create a remote storage client for given timeline
diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs
index f24611e1d8..2ba71416b8 100644
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -76,6 +76,7 @@ use std::{pin::pin, sync::OnceLock};
 
 use crate::{
     aux_file::AuxFileSizeEstimator,
+    page_service::TenantManagerTypes,
     tenant::{
         config::AttachmentMode,
         layer_map::{LayerMap, SearchResult},
@@ -431,7 +432,7 @@ pub struct Timeline {
 
     pub(crate) l0_flush_global_state: L0FlushGlobalState,
 
-    pub(crate) handles: handle::PerTimelineState<crate::page_service::TenantManagerTypes>,
+    pub(crate) handles: handle::PerTimelineState<TenantManagerTypes>,
 
     pub(crate) attach_wal_lag_cooldown: Arc<OnceLock<WalLagCooldown>>,
 
@@ -4625,6 +4626,10 @@ impl Drop for Timeline {
                 }
             }
         }
+        info!(
+            "Timeline {} for tenant {} is being dropped",
+            self.timeline_id, self.tenant_shard_id.tenant_id
+        );
     }
 }
 
diff --git a/pageserver/src/tenant/timeline/handle.rs b/pageserver/src/tenant/timeline/handle.rs
index e82559b8b3..35d8c75ce1 100644
--- a/pageserver/src/tenant/timeline/handle.rs
+++ b/pageserver/src/tenant/timeline/handle.rs
@@ -32,54 +32,151 @@
 //!
 //! # Design
 //!
+//! ## Data Structures
+//!
 //! There are three user-facing data structures:
 //! - `PerTimelineState`: a struct embedded into each Timeline struct. Lifetime == Timeline lifetime.
 //! - `Cache`: a struct private to each connection handler; Lifetime == connection lifetime.
 //! - `Handle`: a smart pointer that holds the Timeline gate open and derefs to `&Timeline`.
-//!   Lifetime: for a single request dispatch on the Timeline (i.e., one getpage request)
+//! - `WeakHandle`: downgrade of a `Handle` that does not keep the gate open, but allows
+//!   trying to ugprade back to a `Handle`, guaranteeing it's the same `Timeline` *object*.
 //!
-//! The `Handle` is just a wrapper around an `Arc<HandleInner>`.
+//! Internally, there is 0 or 1 `HandleInner` per `(Cache,Timeline)`.
+//! Since Cache:Connection is 1:1, there is 0 or 1 `HandleInner` per `(Connection,Timeline)`.
 //!
-//! There is one long-lived `Arc<HandleInner>`, which is stored in the `PerTimelineState`.
-//! The `Cache` stores a `Weak<HandleInner>` for each cached Timeline.
+//! The `HandleInner`  is allocated as a `Arc<Mutex<HandleInner>>` and
+//! referenced weakly and strongly from various places which we are now illustrating.
+//! For brevity, we will omit the `Arc<Mutex<>>` part in the following and instead
+//! use `strong ref` and `weak ref` when referring to the `Arc<Mutex<HandleInner>>`
+//! or `Weak<Mutex<HandleInner>>`, respectively.
+//!
+//! - The `Handle` is a strong ref.
+//! - The `WeakHandle` is a weak ref.
+//! - The `PerTimelineState` contains a `HashMap<CacheId, strong ref>`.
+//! - The `Cache` is a `HashMap<unique identifier for the shard, weak ref>`.
+//!
+//! Lifetimes:
+//! - `WeakHandle` and `Handle`: single pagestream request.
+//! - `Cache`: single page service connection.
+//! - `PerTimelineState`:  lifetime of the Timeline object (i.e., i.e., till `Timeline::shutdown`).
+//!
+//! ## Request Handling Flow (= filling and using the `Cache``)
 //!
 //! To dispatch a request, the page service connection calls `Cache::get`.
 //!
 //! A cache miss means we consult the tenant manager for shard routing,
-//! resulting in an `Arc<Timeline>`. We enter its gate _once_ and construct an
-//! `Arc<HandleInner>`. We store a `Weak<HandleInner>` in the cache
-//! and the `Arc<HandleInner>` in the `PerTimelineState`.
+//! resulting in an `Arc<Timeline>`. We enter its gate _once_ and store it in the the
+//! `Arc<Mutex<HandleInner>>>`. A weak ref is stored in the `Cache`
+//! and a strong ref in the `PerTimelineState`.
+//! A strong ref is returned wrapped in a `Handle`.
 //!
 //! For subsequent requests, `Cache::get` will perform a "fast path" shard routing
-//! and find the `Weak<HandleInner>` in the cache.
-//! We upgrade the `Weak<HandleInner>` to an `Arc<HandleInner>` and wrap it in the user-facing `Handle` type.
+//! and find the weak ref in the cache.
+//! We upgrade the weak ref to a strong ref and return it wrapped in a `Handle`.
 //!
-//! The request handler dispatches the request to the right `<Handle as Deref<Target = Timeline>>::$request_method`.
+//! The pagestream processing is pipelined and involves a batching step.
+//! While a request is batching, the `Handle` is downgraded to a `WeakHandle`.
+//! When the batch is ready to be executed, the `WeakHandle` is upgraded back to a `Handle`
+//! and the request handler dispatches the request to the right `<Handle as Deref<Target = Timeline>>::$request_method`.
 //! It then drops the `Handle`, which drops the `Arc<HandleInner>`.
 //!
-//! # Memory Management / How The Reference Cycle Is Broken
+//! # Performance
 //!
-//! The attentive reader may have noticed the strong reference cycle
-//! from `Arc<HandleInner>` to `PerTimelineState` to `Arc<Timeline>`.
+//! Remember from the introductory section:
 //!
-//! This cycle is intentional: while it exists, the `Cache` can upgrade its
-//! `Weak<HandleInner>` to an `Arc<HandleInner>` in a single atomic operation.
+//! > However, we want to avoid the overhead of entering the gate for every
+//! > method invocation.
+//!
+//! Why do we want to avoid that?
+//! Because the gate is a shared location in memory and entering it involves
+//! bumping refcounts, which leads to cache contention if done frequently
+//! from multiple cores in parallel.
+//!
+//! So, we only acquire the `GateGuard` once on `Cache` miss, and wrap it in an `Arc`.
+//! That `Arc` is private to the `HandleInner` and hence to the connection.
+//! (Review the "Data Structures" section if that is unclear to you.)
+//!
+//! A `WeakHandle` is a weak ref to the `HandleInner`.
+//! When upgrading a `WeakHandle`, we upgrade to a strong ref to the `HandleInner` and
+//! further acquire an additional strong ref to the `Arc<GateGuard>` inside it.
+//! Again, this manipulation of ref counts is is cheap because `Arc` is private to the connection.
+//!
+//! When downgrading a `Handle` to a `WeakHandle`, we drop the `Arc<GateGuard>`.
+//! Again, this is cheap because the `Arc` is private to the connection.
+//!
+//! In addition to the GateGuard, we need to provide `Deref<Target=Timeline>` impl.
+//! For this, both `Handle` need infallible access to an `Arc<Timeline>`.
+//! We could clone the `Arc<Timeline>` when upgrading a `WeakHandle`, but that would cause contention
+//! on the shared memory location that trakcs the refcount of the `Arc<Timeline>`.
+//! Instead, we wrap the `Arc<Timeline>` into another `Arc`.
+//! so that we can clone it cheaply when upgrading a `WeakHandle`.
+//!
+//! # Shutdown
+//!
+//! The attentive reader may have noticed the following reference cycle around the `Arc<Timeline>`:
+//!
+//! ```text
+//! Timeline --owns--> PerTimelineState --strong--> HandleInner --strong--> Timeline
+//! ```
+//!
+//! Further, there is this cycle:
+//!
+//! ```text
+//! Timeline --owns--> PerTimelineState --strong--> HandleInner --strong--> GateGuard --keepalive--> Timeline
+//! ```
+//!
+//! The former cycle is a memory leak if not broken.
+//! The latter cycle further prevents the Timeline from shutting down
+//! because we certainly won't drop the Timeline while the GateGuard is alive.
+//! Preventing shutdown is the whole point of this handle/cache system,
+//! but when the Timeline needs to shut down, we need to break the cycle.
 //!
 //! The cycle is broken by either
-//! - `PerTimelineState::shutdown` or
-//! - dropping the `Cache`.
+//! - Timeline shutdown (=> `PerTimelineState::shutdown`)
+//! - Connection shutdown (=> dropping the `Cache`).
 //!
-//! Concurrently existing `Handle`s will extend the existence of the cycle.
+//! Both transition the `HandleInner` from [`HandleInner::KeepingTimelineGateOpen`] to
+//! [`HandleInner::ShutDown`], which drops the only long-lived strong ref to the
+//! `Arc<GateGuard>`.
+//!
+//! `PerTimelineState::shutdown` drops all the `HandleInners` it contains,
+//! thereby breaking the cycle.
+//! It also initiates draining of already existing `Handle`s by
+//! poisoning things so that no new `HandleInner`'s can be added
+//! to the `PerTimelineState`, which will make subsequent `Cache::get` fail.
+//!
+//! Concurrently existing / already upgraded `Handle`s will extend the
+//! lifetime of the `Arc<Mutex<HandleInner>>` and hence cycles.
 //! However, since `Handle`s are short-lived and new `Handle`s are not
-//! handed out after either `PerTimelineState::shutdown` or `Cache` drop,
-//! that extension of the cycle is bounded.
+//! handed out from `Cache::get` or `WeakHandle::upgrade` after
+//! `PerTimelineState::shutdown`, that extension of the cycle is bounded.
+//!
+//! Concurrently existing `WeakHandle`s will fail to `upgrade()`:
+//! while they will succeed in upgrading `Weak<Mutex<HandleInner>>`,
+//! they will find the inner in state `HandleInner::ShutDown` state where the
+//! `Arc<GateGuard>` and Timeline has already been dropped.
+//!
+//! Dropping the `Cache` undoes the registration of this `Cache`'s
+//! `HandleInner`s from all the `PerTimelineState`s, i.e., it
+//! removes the strong ref to each of its `HandleInner`s
+//! from all the `PerTimelineState`.
+//!
+//! # Locking Rules
+//!
+//! To prevent deadlocks we:
+//!
+//! 1. Only ever hold one of the locks at a time.
+//! 2. Don't add more than one Drop impl that locks on the
+//!    cycles above.
+//!
+//! As per (2), that impl is in `Drop for Cache`.
 //!
 //! # Fast Path for Shard Routing
 //!
 //! The `Cache` has a fast path for shard routing to avoid calling into
 //! the tenant manager for every request.
 //!
-//! The `Cache` maintains a hash map of `ShardTimelineId` to `Weak<HandleInner>`.
+//! The `Cache` maintains a hash map of `ShardTimelineId` to `WeakHandle`s.
 //!
 //! The current implementation uses the first entry in the hash map
 //! to determine the `ShardParameters` and derive the correct
@@ -87,18 +184,18 @@
 //!
 //! It then looks up the hash map for that `ShardTimelineId := {ShardIndex,TimelineId}`.
 //!
-//! If the lookup is successful and the `Weak<HandleInner>` can be upgraded,
+//! If the lookup is successful and the `WeakHandle` can be upgraded,
 //! it's a hit.
 //!
 //! ## Cache invalidation
 //!
-//! The insight is that cache invalidation is sufficient and most efficiently done lazily.
+//! The insight is that cache invalidation is sufficient and most efficiently if done lazily.
 //! The only reasons why an entry in the cache can become stale are:
 //! 1. The `PerTimelineState` / Timeline is shutting down e.g. because the shard is
 //!    being detached, timeline or shard deleted, or pageserver is shutting down.
 //! 2. We're doing a shard split and new traffic should be routed to the child shards.
 //!
-//! Regarding (1), we will eventually fail to upgrade the `Weak<HandleInner>` once the
+//! Regarding (1), we will eventually fail to upgrade the `WeakHandle` once the
 //! timeline has shut down, and when that happens, we remove the entry from the cache.
 //!
 //! Regarding (2), the insight is that it is toally fine to keep dispatching requests
@@ -107,8 +204,6 @@
 
 use std::collections::hash_map;
 use std::collections::HashMap;
-use std::sync::atomic::AtomicBool;
-use std::sync::atomic::Ordering;
 use std::sync::Arc;
 use std::sync::Mutex;
 use std::sync::Weak;
@@ -152,7 +247,7 @@ pub(crate) struct Cache<T: Types> {
     map: Map<T>,
 }
 
-type Map<T> = HashMap<ShardTimelineId, Weak<HandleInner<T>>>;
+type Map<T> = HashMap<ShardTimelineId, WeakHandle<T>>;
 
 impl<T: Types> Default for Cache<T> {
     fn default() -> Self {
@@ -170,12 +265,22 @@ pub(crate) struct ShardTimelineId {
 }
 
 /// See module-level comment.
-pub(crate) struct Handle<T: Types>(Arc<HandleInner<T>>);
-struct HandleInner<T: Types> {
-    shut_down: AtomicBool,
-    timeline: T::Timeline,
-    // The timeline's gate held open.
-    _gate_guard: utils::sync::gate::GateGuard,
+pub(crate) struct Handle<T: Types> {
+    timeline: Arc<T::Timeline>,
+    #[allow(dead_code)] // the field exists to keep the gate open
+    gate_guard: Arc<utils::sync::gate::GateGuard>,
+    inner: Arc<Mutex<HandleInner<T>>>,
+}
+pub(crate) struct WeakHandle<T: Types> {
+    inner: Weak<Mutex<HandleInner<T>>>,
+}
+enum HandleInner<T: Types> {
+    KeepingTimelineGateOpen {
+        #[allow(dead_code)]
+        gate_guard: Arc<utils::sync::gate::GateGuard>,
+        timeline: Arc<T::Timeline>,
+    },
+    ShutDown,
 }
 
 /// Embedded in each [`Types::Timeline`] as the anchor for the only long-lived strong ref to `HandleInner`.
@@ -183,7 +288,8 @@ struct HandleInner<T: Types> {
 /// See module-level comment for details.
 pub struct PerTimelineState<T: Types> {
     // None = shutting down
-    handles: Mutex<Option<HashMap<CacheId, Arc<HandleInner<T>>>>>,
+    #[allow(clippy::type_complexity)]
+    handles: Mutex<Option<HashMap<CacheId, Arc<Mutex<HandleInner<T>>>>>>,
 }
 
 impl<T: Types> Default for PerTimelineState<T> {
@@ -243,49 +349,24 @@ impl<T: Types> Cache<T> {
         shard_selector: ShardSelector,
         tenant_manager: &T::TenantManager,
     ) -> Result<Handle<T>, GetError<T>> {
-        // terminates because each iteration removes an element from the map
-        loop {
-            let handle = self
-                .get_impl(timeline_id, shard_selector, tenant_manager)
-                .await?;
-            if handle.0.shut_down.load(Ordering::Relaxed) {
-                let removed = self
-                    .map
-                    .remove(&handle.0.timeline.shard_timeline_id())
-                    .expect("invariant of get_impl is that the returned handle is in the map");
-                assert!(
-                    Weak::ptr_eq(&removed, &Arc::downgrade(&handle.0)),
-                    "shard_timeline_id() incorrect?"
-                );
-            } else {
-                return Ok(handle);
-            }
-        }
-    }
-
-    #[instrument(level = "trace", skip_all)]
-    async fn get_impl(
-        &mut self,
-        timeline_id: TimelineId,
-        shard_selector: ShardSelector,
-        tenant_manager: &T::TenantManager,
-    ) -> Result<Handle<T>, GetError<T>> {
-        let miss: ShardSelector = {
+        // terminates because when every iteration we remove an element from the map
+        let miss: ShardSelector = loop {
             let routing_state = self.shard_routing(timeline_id, shard_selector);
             match routing_state {
                 RoutingResult::FastPath(handle) => return Ok(handle),
                 RoutingResult::SlowPath(key) => match self.map.get(&key) {
                     Some(cached) => match cached.upgrade() {
-                        Some(upgraded) => return Ok(Handle(upgraded)),
-                        None => {
+                        Ok(upgraded) => return Ok(upgraded),
+                        Err(HandleUpgradeError::ShutDown) => {
+                            // TODO: dedup with shard_routing()
                             trace!("handle cache stale");
                             self.map.remove(&key).unwrap();
-                            ShardSelector::Known(key.shard_index)
+                            continue;
                         }
                     },
-                    None => ShardSelector::Known(key.shard_index),
+                    None => break ShardSelector::Known(key.shard_index),
                 },
-                RoutingResult::NeedConsultTenantManager => shard_selector,
+                RoutingResult::NeedConsultTenantManager => break shard_selector,
             }
         };
         self.get_miss(timeline_id, miss, tenant_manager).await
@@ -302,7 +383,7 @@ impl<T: Types> Cache<T> {
             let Some((first_key, first_handle)) = self.map.iter().next() else {
                 return RoutingResult::NeedConsultTenantManager;
             };
-            let Some(first_handle) = first_handle.upgrade() else {
+            let Ok(first_handle) = first_handle.upgrade() else {
                 // TODO: dedup with get()
                 trace!("handle cache stale");
                 let first_key_owned = *first_key;
@@ -310,7 +391,7 @@ impl<T: Types> Cache<T> {
                 continue;
             };
 
-            let first_handle_shard_identity = first_handle.timeline.get_shard_identity();
+            let first_handle_shard_identity = first_handle.get_shard_identity();
             let make_shard_index = |shard_num: ShardNumber| ShardIndex {
                 shard_number: shard_num,
                 shard_count: first_handle_shard_identity.count,
@@ -329,11 +410,11 @@ impl<T: Types> Cache<T> {
             };
             let first_handle_shard_timeline_id = ShardTimelineId {
                 shard_index: first_handle_shard_identity.shard_index(),
-                timeline_id: first_handle.timeline.shard_timeline_id().timeline_id,
+                timeline_id: first_handle.shard_timeline_id().timeline_id,
             };
 
             if need_shard_timeline_id == first_handle_shard_timeline_id {
-                return RoutingResult::FastPath(Handle(first_handle));
+                return RoutingResult::FastPath(first_handle);
             } else {
                 return RoutingResult::SlowPath(need_shard_timeline_id);
             }
@@ -357,23 +438,30 @@ impl<T: Types> Cache<T> {
                     ShardSelector::Known(idx) => assert_eq!(idx, &key.shard_index),
                 }
 
-                let gate_guard = match timeline.gate().enter() {
-                    Ok(guard) => guard,
-                    Err(_) => {
-                        return Err(GetError::TimelineGateClosed);
-                    }
-                };
                 trace!("creating new HandleInner");
-                let handle = Arc::new(
-                    // TODO: global metric that keeps track of the number of live HandlerTimeline instances
-                    // so we can identify reference cycle bugs.
-                    HandleInner {
-                        shut_down: AtomicBool::new(false),
-                        _gate_guard: gate_guard,
-                        timeline: timeline.clone(),
-                    },
-                );
-                let handle = {
+                let handle_inner_arc = Arc::new(Mutex::new(HandleInner::KeepingTimelineGateOpen {
+                    gate_guard: Arc::new(
+                        // this enter() is expensive in production code because
+                        // it hits the global Arc<Timeline>::gate refcounts
+                        match timeline.gate().enter() {
+                            Ok(guard) => guard,
+                            Err(_) => {
+                                return Err(GetError::TimelineGateClosed);
+                            }
+                        },
+                    ),
+                    // this clone is expensive in production code because
+                    // it hits the global Arc<Timeline>::clone refcounts
+                    timeline: Arc::new(timeline.clone()),
+                }));
+                let handle_weak = WeakHandle {
+                    inner: Arc::downgrade(&handle_inner_arc),
+                };
+                let handle = handle_weak
+                    .upgrade()
+                    .ok()
+                    .expect("we just created it and it's not linked anywhere yet");
+                {
                     let mut lock_guard = timeline
                         .per_timeline_state()
                         .handles
@@ -381,7 +469,8 @@ impl<T: Types> Cache<T> {
                         .expect("mutex poisoned");
                     match &mut *lock_guard {
                         Some(per_timeline_state) => {
-                            let replaced = per_timeline_state.insert(self.id, Arc::clone(&handle));
+                            let replaced =
+                                per_timeline_state.insert(self.id, Arc::clone(&handle_inner_arc));
                             assert!(replaced.is_none(), "some earlier code left a stale handle");
                             match self.map.entry(key) {
                                 hash_map::Entry::Occupied(_o) => {
@@ -392,8 +481,7 @@ impl<T: Types> Cache<T> {
                                     unreachable!()
                                 }
                                 hash_map::Entry::Vacant(v) => {
-                                    v.insert(Arc::downgrade(&handle));
-                                    handle
+                                    v.insert(handle_weak);
                                 }
                             }
                         }
@@ -401,14 +489,62 @@ impl<T: Types> Cache<T> {
                             return Err(GetError::PerTimelineStateShutDown);
                         }
                     }
-                };
-                Ok(Handle(handle))
+                }
+                Ok(handle)
             }
             Err(e) => Err(GetError::TenantManager(e)),
         }
     }
 }
 
+pub(crate) enum HandleUpgradeError {
+    ShutDown,
+}
+
+impl<T: Types> WeakHandle<T> {
+    pub(crate) fn upgrade(&self) -> Result<Handle<T>, HandleUpgradeError> {
+        let Some(inner) = Weak::upgrade(&self.inner) else {
+            return Err(HandleUpgradeError::ShutDown);
+        };
+        let lock_guard = inner.lock().expect("poisoned");
+        match &*lock_guard {
+            HandleInner::KeepingTimelineGateOpen {
+                timeline,
+                gate_guard,
+            } => {
+                let gate_guard = Arc::clone(gate_guard);
+                let timeline = Arc::clone(timeline);
+                drop(lock_guard);
+                Ok(Handle {
+                    timeline,
+                    gate_guard,
+                    inner,
+                })
+            }
+            HandleInner::ShutDown => Err(HandleUpgradeError::ShutDown),
+        }
+    }
+
+    pub(crate) fn is_same_handle_as(&self, other: &WeakHandle<T>) -> bool {
+        Weak::ptr_eq(&self.inner, &other.inner)
+    }
+}
+
+impl<T: Types> std::ops::Deref for Handle<T> {
+    type Target = T::Timeline;
+    fn deref(&self) -> &Self::Target {
+        &self.timeline
+    }
+}
+
+impl<T: Types> Handle<T> {
+    pub(crate) fn downgrade(&self) -> WeakHandle<T> {
+        WeakHandle {
+            inner: Arc::downgrade(&self.inner),
+        }
+    }
+}
+
 impl<T: Types> PerTimelineState<T> {
     /// After this method returns, [`Cache::get`] will never again return a [`Handle`]
     /// to the [`Types::Timeline`] that embeds this per-timeline state.
@@ -430,43 +566,54 @@ impl<T: Types> PerTimelineState<T> {
             trace!("already shut down");
             return;
         };
-        for handle in handles.values() {
+        for handle_inner_arc in handles.values() {
             // Make hits fail.
-            handle.shut_down.store(true, Ordering::Relaxed);
+            let mut lock_guard = handle_inner_arc.lock().expect("poisoned");
+            lock_guard.shutdown();
         }
         drop(handles);
     }
 }
 
-impl<T: Types> std::ops::Deref for Handle<T> {
-    type Target = T::Timeline;
-    fn deref(&self) -> &Self::Target {
-        &self.0.timeline
-    }
-}
-
-#[cfg(test)]
-impl<T: Types> Drop for HandleInner<T> {
-    fn drop(&mut self) {
-        trace!("HandleInner dropped");
-    }
-}
-
 // When dropping a [`Cache`], prune its handles in the [`PerTimelineState`] to break the reference cycle.
 impl<T: Types> Drop for Cache<T> {
     fn drop(&mut self) {
-        for (_, weak) in self.map.drain() {
-            if let Some(strong) = weak.upgrade() {
-                // handle is still being kept alive in PerTimelineState
-                let timeline = strong.timeline.per_timeline_state();
-                let mut handles = timeline.handles.lock().expect("mutex poisoned");
-                if let Some(handles) = &mut *handles {
-                    let Some(removed) = handles.remove(&self.id) else {
-                        // There could have been a shutdown inbetween us upgrading the weak and locking the mutex.
-                        continue;
-                    };
-                    assert!(Arc::ptr_eq(&removed, &strong));
-                }
+        for (
+            _,
+            WeakHandle {
+                inner: handle_inner_weak,
+            },
+        ) in self.map.drain()
+        {
+            let Some(handle_inner_arc) = handle_inner_weak.upgrade() else {
+                continue;
+            };
+            let handle_timeline = handle_inner_arc
+                // locking rules: drop lock before acquiring other lock below
+                .lock()
+                .expect("poisoned")
+                .shutdown();
+            let per_timeline_state = handle_timeline.per_timeline_state();
+            let mut handles_lock_guard = per_timeline_state.handles.lock().expect("mutex poisoned");
+            let Some(handles) = &mut *handles_lock_guard else {
+                continue;
+            };
+            let Some(removed_handle_inner_arc) = handles.remove(&self.id) else {
+                // There could have been a shutdown inbetween us upgrading the weak and locking the mutex.
+                continue;
+            };
+            drop(handles_lock_guard); // locking rules: remember them when!
+            assert!(Arc::ptr_eq(&removed_handle_inner_arc, &handle_inner_arc,));
+        }
+    }
+}
+
+impl<T: Types> HandleInner<T> {
+    fn shutdown(&mut self) -> Arc<T::Timeline> {
+        match std::mem::replace(self, HandleInner::ShutDown) {
+            HandleInner::KeepingTimelineGateOpen { timeline, .. } => timeline,
+            HandleInner::ShutDown => {
+                unreachable!("handles are only shut down once in their lifetime");
             }
         }
     }
@@ -474,6 +621,8 @@ impl<T: Types> Drop for Cache<T> {
 
 #[cfg(test)]
 mod tests {
+    use std::sync::Weak;
+
     use pageserver_api::{
         key::{rel_block_to_key, Key, DBDIR_KEY},
         models::ShardParameters,
@@ -583,39 +732,13 @@ mod tests {
         //
         // fill the cache
         //
-        assert_eq!(
-            (Arc::strong_count(&shard0), Arc::weak_count(&shard0)),
-            (2, 1),
-            "strong: shard0, mgr; weak: myself"
-        );
-
         let handle: Handle<_> = cache
             .get(timeline_id, ShardSelector::Page(key), &mgr)
             .await
             .expect("we have the timeline");
-        let handle_inner_weak = Arc::downgrade(&handle.0);
         assert!(Weak::ptr_eq(&handle.myself, &shard0.myself));
-        assert_eq!(
-            (
-                Weak::strong_count(&handle_inner_weak),
-                Weak::weak_count(&handle_inner_weak)
-            ),
-            (2, 2),
-            "strong: handle, per_timeline_state, weak: handle_inner_weak, cache"
-        );
         assert_eq!(cache.map.len(), 1);
-
-        assert_eq!(
-            (Arc::strong_count(&shard0), Arc::weak_count(&shard0)),
-            (3, 1),
-            "strong: handleinner(per_timeline_state), shard0, mgr; weak: myself"
-        );
         drop(handle);
-        assert_eq!(
-            (Arc::strong_count(&shard0), Arc::weak_count(&shard0)),
-            (3, 1),
-            "strong: handleinner(per_timeline_state), shard0, mgr; weak: myself"
-        );
 
         //
         // demonstrate that Handle holds up gate closure
@@ -640,21 +763,11 @@ mod tests {
         // SHUTDOWN
         shard0.per_timeline_state.shutdown(); // keeping handle alive across shutdown
 
-        assert_eq!(
-            1,
-            Weak::strong_count(&handle_inner_weak),
-            "through local var handle"
-        );
         assert_eq!(
             cache.map.len(),
             1,
             "this is an implementation detail but worth pointing out: we can't clear the cache from shutdown(), it's cleared on first access after"
         );
-        assert_eq!(
-            (Arc::strong_count(&shard0), Arc::weak_count(&shard0)),
-            (3, 1),
-            "strong: handleinner(via handle), shard0, mgr; weak: myself"
-        );
 
         // this handle is perfectly usable
         handle.getpage();
@@ -678,16 +791,6 @@ mod tests {
         }
 
         drop(handle);
-        assert_eq!(
-            0,
-            Weak::strong_count(&handle_inner_weak),
-            "the HandleInner destructor already ran"
-        );
-        assert_eq!(
-            (Arc::strong_count(&shard0), Arc::weak_count(&shard0)),
-            (2, 1),
-            "strong: shard0, mgr; weak: myself"
-        );
 
         // closing gate succeeds after dropping handle
         tokio::select! {
@@ -706,10 +809,8 @@ mod tests {
         assert_eq!(cache.map.len(), 0);
 
         // ensure all refs to shard0 are gone and we're not leaking anything
-        let myself = Weak::clone(&shard0.myself);
         drop(shard0);
         drop(mgr);
-        assert_eq!(Weak::strong_count(&myself), 0);
     }
 
     #[tokio::test]
@@ -948,15 +1049,11 @@ mod tests {
                 handle
             };
             handle.getpage();
-            used_handles.push(Arc::downgrade(&handle.0));
+            used_handles.push(Arc::downgrade(&handle.timeline));
         }
 
-        // No handles exist, thus gates are closed and don't require shutdown
-        assert!(used_handles
-            .iter()
-            .all(|weak| Weak::strong_count(weak) == 0));
-
-        // ... thus the gate should close immediately, even without shutdown
+        // No handles exist, thus gates are closed and don't require shutdown.
+        // Thus the gate should close immediately, even without shutdown.
         tokio::select! {
             _ = shard0.gate.close() => { }
             _ = tokio::time::sleep(FOREVER) => {
@@ -964,4 +1061,75 @@ mod tests {
             }
         }
     }
+
+    #[tokio::test(start_paused = true)]
+    async fn test_weak_handles() {
+        crate::tenant::harness::setup_logging();
+        let timeline_id = TimelineId::generate();
+        let shard0 = Arc::new_cyclic(|myself| StubTimeline {
+            gate: Default::default(),
+            id: timeline_id,
+            shard: ShardIdentity::unsharded(),
+            per_timeline_state: PerTimelineState::default(),
+            myself: myself.clone(),
+        });
+        let mgr = StubManager {
+            shards: vec![shard0.clone()],
+        };
+
+        let refcount_start = Arc::strong_count(&shard0);
+
+        let key = DBDIR_KEY;
+
+        let mut cache = Cache::<TestTypes>::default();
+
+        let handle = cache
+            .get(timeline_id, ShardSelector::Page(key), &mgr)
+            .await
+            .expect("we have the timeline");
+        assert!(Weak::ptr_eq(&handle.myself, &shard0.myself));
+
+        let weak_handle = handle.downgrade();
+
+        drop(handle);
+
+        let upgraded_handle = weak_handle.upgrade().ok().expect("we can upgrade it");
+
+        // Start shutdown
+        shard0.per_timeline_state.shutdown();
+
+        // Upgrades during shutdown don't work, even if upgraded_handle exists.
+        weak_handle
+            .upgrade()
+            .err()
+            .expect("can't upgrade weak handle as soon as shutdown started");
+
+        // But upgraded_handle is still alive, so the gate won't close.
+        tokio::select! {
+            _ = shard0.gate.close() => {
+                panic!("handle is keeping gate open");
+            }
+            _ = tokio::time::sleep(FOREVER) => { }
+        }
+
+        // Drop the last handle.
+        drop(upgraded_handle);
+
+        // The gate should close now, despite there still being a weak_handle.
+        tokio::select! {
+            _ = shard0.gate.close() => { }
+            _ = tokio::time::sleep(FOREVER) => {
+                panic!("only strong handle is dropped and we shut down per-timeline-state")
+            }
+        }
+
+        // The weak handle still can't be upgraded.
+        weak_handle
+            .upgrade()
+            .err()
+            .expect("still shouldn't be able to upgrade the weak handle");
+
+        // There should be no strong references to the timeline object except the one on "stack".
+        assert_eq!(Arc::strong_count(&shard0), refcount_start);
+    }
 }
diff --git a/pgxn/neon/pagestore_client.h b/pgxn/neon/pagestore_client.h
index b751235595..7b748d7252 100644
--- a/pgxn/neon/pagestore_client.h
+++ b/pgxn/neon/pagestore_client.h
@@ -34,6 +34,8 @@ typedef enum
 	T_NeonGetPageRequest,
 	T_NeonDbSizeRequest,
 	T_NeonGetSlruSegmentRequest,
+	/* future tags above this line */
+	T_NeonTestRequest = 99, /* only in cfg(feature = "testing") */
 
 	/* pagestore -> pagestore_client */
 	T_NeonExistsResponse = 100,
@@ -42,6 +44,8 @@ typedef enum
 	T_NeonErrorResponse,
 	T_NeonDbSizeResponse,
 	T_NeonGetSlruSegmentResponse,
+	/* future tags above this line */
+	T_NeonTestResponse = 199, /* only in cfg(feature = "testing") */
 } NeonMessageTag;
 
 typedef uint64 NeonRequestId;
diff --git a/test_runner/regress/test_page_service_batching_regressions.py b/test_runner/regress/test_page_service_batching_regressions.py
new file mode 100644
index 0000000000..fa85e1210b
--- /dev/null
+++ b/test_runner/regress/test_page_service_batching_regressions.py
@@ -0,0 +1,60 @@
+# NB: there are benchmarks that double-serve as tests inside the `performance` directory.
+
+import subprocess
+from pathlib import Path
+
+import pytest
+from fixtures.log_helper import log
+from fixtures.neon_fixtures import NeonEnvBuilder
+
+
+@pytest.mark.timeout(30)  # test takes <20s if pageserver impl is correct
+@pytest.mark.parametrize("kind", ["pageserver-stop", "tenant-detach"])
+def test_slow_flush(neon_env_builder: NeonEnvBuilder, neon_binpath: Path, kind: str):
+    def patch_pageserver_toml(config):
+        config["page_service_pipelining"] = {
+            "mode": "pipelined",
+            "max_batch_size": 32,
+            "execution": "concurrent-futures",
+        }
+
+    neon_env_builder.pageserver_config_override = patch_pageserver_toml
+    env = neon_env_builder.init_start()
+
+    log.info("make flush appear slow")
+
+    log.info("sending requests until pageserver accepts no more")
+    # TODO: extract this into a helper, like subprocess_capture,
+    # so that we capture the stderr from the helper somewhere.
+    child = subprocess.Popen(
+        [
+            neon_binpath / "test_helper_slow_client_reads",
+            env.pageserver.connstr(),
+            str(env.initial_tenant),
+            str(env.initial_timeline),
+        ],
+        bufsize=0,  # unbuffered
+        stdin=subprocess.PIPE,
+        stdout=subprocess.PIPE,
+    )
+    assert child.stdout is not None
+    buf = child.stdout.read(1)
+    if len(buf) != 1:
+        raise Exception("unexpected EOF")
+    if buf != b"R":
+        raise Exception(f"unexpected data: {buf!r}")
+    log.info("helper reports pageserver accepts no more requests")
+    log.info(
+        "assuming pageserver connection handle is in a state where TCP has backpressured pageserver=>client response flush() into userspace"
+    )
+
+    if kind == "pageserver-stop":
+        log.info("try to shut down the pageserver cleanly")
+        env.pageserver.stop()
+    elif kind == "tenant-detach":
+        log.info("try to shut down the tenant")
+        env.pageserver.tenant_detach(env.initial_tenant)
+    else:
+        raise ValueError(f"unexpected kind: {kind}")
+
+    log.info("shutdown did not time out, test passed")