pageserver: fix handling getpage with multiple shards on one node

Previously, we would wait for the LSN to be visible on whichever timeline we happened to load at the start of the connection, then proceed to look up the correct timeline for the key and do the read. If the timeline holding the key was behind the timeline we used for the LSN wait, then we might serve an apparently-successful read result that actually contains data from behind the requested lsn.
2026-01-10 23:12:54 +00:00 · 2023-12-29 15:13:00 +00:00
parent ef7c9c2ccc
commit 34ebfbdd6f
1 changed files with 21 additions and 20 deletions
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -802,7 +802,7 @@ impl PageServerHandler {
        }))
    }

-    async fn handle_get_page_at_lsn_request(
+    async fn do_handle_get_page_at_lsn_request(
        &self,
        timeline: &Timeline,
        req: &PagestreamGetPageRequest,
@@ -812,20 +812,25 @@ impl PageServerHandler {
        let lsn =
            Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest, &latest_gc_cutoff_lsn, ctx)
                .await?;
-        /*
-        // Add a 1s delay to some requests. The delay helps the requests to
-        // hit the race condition from github issue #1047 more easily.
-        use rand::Rng;
-        if rand::thread_rng().gen::<u8>() < 5 {
-            std::thread::sleep(std::time::Duration::from_millis(1000));
-        }
-        */
+        let page = timeline
+            .get_rel_page_at_lsn(req.rel, req.blkno, Version::Lsn(lsn), req.latest, ctx)
+            .await?;

+        Ok(PagestreamBeMessage::GetPage(PagestreamGetPageResponse {
+            page,
+        }))
+    }
+
+    async fn handle_get_page_at_lsn_request(
+        &self,
+        timeline: &Timeline,
+        req: &PagestreamGetPageRequest,
+        ctx: &RequestContext,
+    ) -> anyhow::Result<PagestreamBeMessage> {
        let key = rel_block_to_key(req.rel, req.blkno);
-        let page = if timeline.get_shard_identity().is_key_local(&key) {
-            timeline
-                .get_rel_page_at_lsn(req.rel, req.blkno, Version::Lsn(lsn), req.latest, ctx)
-                .await?
+        if timeline.get_shard_identity().is_key_local(&key) {
+            self.do_handle_get_page_at_lsn_request(timeline, req, ctx)
+                .await
        } else {
            // The Tenant shard we looked up at connection start does not hold this particular
            // key: look for other shards in this tenant.  This scenario occurs if a pageserver
@@ -860,14 +865,10 @@ impl PageServerHandler {
            // Take a GateGuard for the duration of this request.  If we were using our main Timeline object,
            // the GateGuard was already held over the whole connection.
            let _timeline_guard = timeline.gate.enter().map_err(|_| QueryError::Shutdown)?;
-            timeline
-                .get_rel_page_at_lsn(req.rel, req.blkno, Version::Lsn(lsn), req.latest, ctx)
-                .await?
-        };

-        Ok(PagestreamBeMessage::GetPage(PagestreamGetPageResponse {
-            page,
-        }))
+            self.do_handle_get_page_at_lsn_request(&timeline, req, ctx)
+                .await
+        }
    }

    #[allow(clippy::too_many_arguments)]