tests: start primary compute on not-readonly branches (#12408)

## Problem

https://github.com/neondatabase/neon/pull/11712 changed how computes are
started in the test: the lsn is specified, making them read-only static
replicas. Lsn is `last_record_lsn` from pageserver. It works fine with
read-only branches (because their `last_record_lsn` is equal to
`start_lsn` and always valid). But with writable timelines, the
`last_record_lsn` on the pageserver might be stale.

Particularly in this test, after the `detach_branch` operation, the
tenant is reset on the pagesever. It leads to `last_record_lsn` going
back to `disk_consistent_lsn`, so basically rolling back some recent
writes.

If we start a primary compute, it will start at safekeepers' commit Lsn,
which is the correct one , and will wait till pageserver catches up with
this Lsn after reset.

- Closes: https://github.com/neondatabase/neon/issues/12365

## Summary of changes
- Start `primary` compute for writable timelines.
This commit is contained in:
Dmitrii Kovalkov
2025-07-02 09:41:17 +04:00
committed by GitHub
parent 5ec8881c0b
commit 8e7ce42229

View File

@@ -324,7 +324,7 @@ def test_ancestor_detach_reparents_earlier(neon_env_builder: NeonEnvBuilder):
# it is to be in line with the deletion timestamp.. well, almost.
when = original_ancestor[2][:26]
when_ts = datetime.datetime.fromisoformat(when).replace(tzinfo=datetime.UTC)
now = datetime.datetime.utcnow().replace(tzinfo=datetime.UTC)
now = datetime.datetime.now(datetime.UTC)
assert when_ts < now
assert len(lineage.get("reparenting_history", [])) == 0
elif expected_ancestor == timeline_id:
@@ -458,19 +458,20 @@ def test_ancestor_detach_behavior_v2(neon_env_builder: NeonEnvBuilder, snapshots
env.pageserver.quiesce_tenants()
# checking the ancestor after is much faster than waiting for the endpoint not start
# checking the ancestor after is much faster than waiting for the endpoint to start
expected_result = [
("main", env.initial_timeline, None, 24576, 1),
("after", after, env.initial_timeline, 24576, 1),
("snapshot_branchpoint_old", snapshot_branchpoint_old, env.initial_timeline, 8192, 1),
("snapshot_branchpoint", snapshot_branchpoint, env.initial_timeline, 16384, 1),
("branch_to_detach", branch_to_detach, None, 16384, 1),
("earlier", earlier, env.initial_timeline, 0, 1),
# (branch_name, queried_timeline, expected_ancestor, rows, starts, read_only)
("main", env.initial_timeline, None, 24576, 1, False),
("after", after, env.initial_timeline, 24576, 1, False),
("snapshot_branchpoint_old", snapshot_branchpoint_old, env.initial_timeline, 8192, 1, True),
("snapshot_branchpoint", snapshot_branchpoint, env.initial_timeline, 16384, 1, False),
("branch_to_detach", branch_to_detach, None, 16384, 1, False),
("earlier", earlier, env.initial_timeline, 0, 1, False),
]
assert isinstance(env.pageserver_remote_storage, LocalFsStorage)
for branch_name, queried_timeline, expected_ancestor, _, _ in expected_result:
for branch_name, queried_timeline, expected_ancestor, _, _, _ in expected_result:
details = client.timeline_detail(env.initial_tenant, queried_timeline)
ancestor_timeline_id = details["ancestor_timeline_id"]
if expected_ancestor is None:
@@ -508,13 +509,17 @@ def test_ancestor_detach_behavior_v2(neon_env_builder: NeonEnvBuilder, snapshots
assert len(lineage.get("original_ancestor", [])) == 0
assert len(lineage.get("reparenting_history", [])) == 0
for branch_name, queried_timeline, _, rows, starts in expected_result:
details = client.timeline_detail(env.initial_tenant, queried_timeline)
log.info(f"reading data from branch {branch_name}")
# specifying the lsn makes the endpoint read-only and not connect to safekeepers
for branch_name, queried_timeline, _, rows, starts, read_only in expected_result:
last_record_lsn = None
if read_only:
# specifying the lsn makes the endpoint read-only and not connect to safekeepers
details = client.timeline_detail(env.initial_tenant, queried_timeline)
last_record_lsn = Lsn(details["last_record_lsn"])
log.info(f"reading data from branch {branch_name} at {last_record_lsn}")
with env.endpoints.create(
branch_name,
lsn=Lsn(details["last_record_lsn"]),
lsn=last_record_lsn,
) as ep:
ep.start(safekeeper_generation=1)
assert ep.safe_psql("SELECT count(*) FROM foo;")[0][0] == rows