From f7ec7668a2a452f23f197ee2670a4d799be20ed4 Mon Sep 17 00:00:00 2001
From: Dmitrii Kovalkov <34828390+DimasKovas@users.noreply.github.com>
Date: Thu, 5 Jun 2025 16:04:37 +0400
Subject: [PATCH] pageserver, tests: prepare test_basebackup_cache for
 --timelines-onto-safekeepers (#12143)

## Problem
- `test_basebackup_cache` fails in
https://github.com/neondatabase/neon/pull/11712 because after the
timelines on safekeepers are managed by storage controller, they do
contain proper start_lsn and the compute_ctl tool sends the first
basebackup request with this LSN.
- `Failed to prepare basebackup` log messages during timeline
initialization, because the timeline is not yet in the global timeline
map.

- Relates to https://github.com/neondatabase/cloud/issues/29353

## Summary of changes
- Account for `timeline_onto_safekeepers` storcon's option in the test.
- Do not trigger basebackup prepare during the timeline initialization.
---
 pageserver/src/tenant/timeline.rs      |  7 ++++++
 test_runner/regress/test_basebackup.py | 33 +++++++++++++++++++-------
 2 files changed, 31 insertions(+), 9 deletions(-)

diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs
index 6798606141..3522af2de0 100644
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -2506,6 +2506,13 @@ impl Timeline {
             // Preparing basebackup doesn't make sense for shards other than shard zero.
             return;
         }
+        if !self.is_active() {
+            // May happen during initial timeline creation.
+            // Such timeline is not in the global timeline map yet,
+            // so basebackup cache will not be able to find it.
+            // TODO(diko): We can prepare such timelines in finish_creation().
+            return;
+        }
 
         let res = self
             .basebackup_prepare_sender
diff --git a/test_runner/regress/test_basebackup.py b/test_runner/regress/test_basebackup.py
index b083c394c7..2d42be4051 100644
--- a/test_runner/regress/test_basebackup.py
+++ b/test_runner/regress/test_basebackup.py
@@ -26,6 +26,10 @@ def test_basebackup_cache(neon_env_builder: NeonEnvBuilder):
     ps = env.pageserver
     ps_http = ps.http_client()
 
+    storcon_managed_timelines = (env.storage_controller_config or {}).get(
+        "timelines_onto_safekeepers", False
+    )
+
     # 1. Check that we always hit the cache after compute restart.
     for i in range(3):
         ep.start()
@@ -33,15 +37,26 @@ def test_basebackup_cache(neon_env_builder: NeonEnvBuilder):
 
         def check_metrics(i=i):
             metrics = ps_http.get_metrics()
-            # Never miss.
-            # The first time compute_ctl sends `get_basebackup` with lsn=None, we do not cache such requests.
-            # All other requests should be a hit
-            assert (
-                metrics.query_one(
-                    "pageserver_basebackup_cache_read_total", {"result": "miss"}
-                ).value
-                == 0
-            )
+            if storcon_managed_timelines:
+                # We do not cache the initial basebackup yet,
+                # so the first compute startup should be a miss.
+                assert (
+                    metrics.query_one(
+                        "pageserver_basebackup_cache_read_total", {"result": "miss"}
+                    ).value
+                    == 1
+                )
+            else:
+                # If the timeline is not initialized on safekeeprs,
+                # the compute_ctl sends `get_basebackup` with lsn=None for the first startup.
+                # We do not use cache for such requests, so it's niether a hit nor a miss.
+                assert (
+                    metrics.query_one(
+                        "pageserver_basebackup_cache_read_total", {"result": "miss"}
+                    ).value
+                    == 0
+                )
+
             # All but the first requests are hits.
             assert (
                 metrics.query_one("pageserver_basebackup_cache_read_total", {"result": "hit"}).value