mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-09 06:22:57 +00:00
test: allow requests to any pageserver get cancelled (#8413)
Fix flakyness on `test_sharded_timeline_detach_ancestor` which does not reproduce on a fast enough runner by allowing cancelled request before completing on all pageservers. It was only allowed on half of the pageservers. Failure evidence: https://neon-github-public-dev.s3.amazonaws.com/reports/pr-8352/9972357040/index.html#suites/a1c2be32556270764423c495fad75d47/7cca3e3d94fe12f2
This commit is contained in:
@@ -702,20 +702,16 @@ def test_sharded_timeline_detach_ancestor(neon_env_builder: NeonEnvBuilder):
|
||||
# make another of the nodes get stuck, then restart
|
||||
|
||||
stuck = pageservers[int(shards[0]["node_id"])]
|
||||
stuck.allowed_errors.append(".*: request was dropped before completing")
|
||||
env.storage_controller.allowed_errors.append(".*: request was dropped before completing")
|
||||
log.info(f"stuck pageserver is id={stuck.id}")
|
||||
stuck_http = stuck.http_client()
|
||||
stuck_http.configure_failpoints(
|
||||
("timeline-detach-ancestor::before_starting_after_locking_pausable", "pause")
|
||||
)
|
||||
|
||||
restarted = pageservers[int(shards[1]["node_id"])]
|
||||
restarted.allowed_errors.extend(
|
||||
[
|
||||
".*: request was dropped before completing",
|
||||
".*: Cancelled request finished with an error: ShuttingDown",
|
||||
]
|
||||
)
|
||||
log.info(f"restarted pageserver is id={restarted.id}")
|
||||
# this might be hit; see `restart_restarted`
|
||||
restarted.allowed_errors.append(".*: Cancelled request finished with an error: ShuttingDown")
|
||||
assert restarted.id != stuck.id
|
||||
restarted_http = restarted.http_client()
|
||||
restarted_http.configure_failpoints(
|
||||
@@ -724,6 +720,14 @@ def test_sharded_timeline_detach_ancestor(neon_env_builder: NeonEnvBuilder):
|
||||
]
|
||||
)
|
||||
|
||||
for info in shards:
|
||||
pageserver = pageservers[int(info["node_id"])]
|
||||
# the first request can cause these, but does not repeatedly
|
||||
pageserver.allowed_errors.append(".*: request was dropped before completing")
|
||||
|
||||
# first request again
|
||||
env.storage_controller.allowed_errors.append(".*: request was dropped before completing")
|
||||
|
||||
target = env.storage_controller.pageserver_api()
|
||||
|
||||
with pytest.raises(ReadTimeout):
|
||||
|
||||
Reference in New Issue
Block a user