mirror of
https://github.com/neondatabase/neon.git
synced 2026-06-01 04:20:39 +00:00
tests: flush wal before waiting for last record lsn (#11726)
## Problem Compute may flush WAL on page boundaries, leaving some records partially flushed for a long time. It leads to `wait_for_last_flush_lsn` stuck waiting for this partial LSN. - Closes: https://github.com/neondatabase/cloud/issues/27876 ## Summary of changes - Flush WAL via CHECKPOINT after requesting current_wal_lsn to make sure that the record we point to is flushed in full - Use proper endpoint in `test_timeline_detach_with_aux_files_with_detach_v1`
This commit is contained in:
@@ -5477,6 +5477,13 @@ def wait_for_last_flush_lsn(
|
||||
|
||||
if last_flush_lsn is None:
|
||||
last_flush_lsn = Lsn(endpoint.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0])
|
||||
# The last_flush_lsn may not correspond to a record boundary.
|
||||
# For example, if the compute flushed WAL on a page boundary,
|
||||
# the remaining part of the record might not be flushed for a long time.
|
||||
# This would prevent the pageserver from reaching last_flush_lsn promptly.
|
||||
# To ensure the rest of the record reaches the pageserver quickly,
|
||||
# we forcibly flush the WAL by using CHECKPOINT.
|
||||
endpoint.safe_psql("CHECKPOINT")
|
||||
|
||||
results = []
|
||||
for tenant_shard_id, pageserver in shards:
|
||||
|
||||
@@ -1822,7 +1822,7 @@ def test_timeline_detach_with_aux_files_with_detach_v1(
|
||||
endpoint2.safe_psql(
|
||||
"SELECT pg_create_logical_replication_slot('test_slot_restore', 'pgoutput')"
|
||||
)
|
||||
lsn3 = wait_for_last_flush_lsn(env, endpoint, env.initial_tenant, branch_timeline_id)
|
||||
lsn3 = wait_for_last_flush_lsn(env, endpoint2, env.initial_tenant, branch_timeline_id)
|
||||
assert set(http.list_aux_files(env.initial_tenant, branch_timeline_id, lsn1).keys()) == set([])
|
||||
assert set(http.list_aux_files(env.initial_tenant, branch_timeline_id, lsn3).keys()) == set(
|
||||
["pg_replslot/test_slot_restore/state"]
|
||||
@@ -1839,7 +1839,7 @@ def test_timeline_detach_with_aux_files_with_detach_v1(
|
||||
assert all_reparented == set([])
|
||||
|
||||
# We need to ensure all safekeeper data are ingested before checking aux files: the API does not wait for LSN.
|
||||
wait_for_last_flush_lsn(env, endpoint, env.initial_tenant, branch_timeline_id)
|
||||
wait_for_last_flush_lsn(env, endpoint2, env.initial_tenant, branch_timeline_id)
|
||||
assert set(http.list_aux_files(env.initial_tenant, env.initial_timeline, lsn2).keys()) == set(
|
||||
["pg_replslot/test_slot_parent_1/state", "pg_replslot/test_slot_parent_2/state"]
|
||||
), "main branch unaffected"
|
||||
|
||||
Reference in New Issue
Block a user