storage_controller: rename failpoint and make it pausable

The same failpoint is used for a new test by a follow up commit
and that needs a pausable failpoint.
This commit is contained in:
Vlad Lazar
2025-01-07 16:30:32 +01:00
parent be38123e62
commit d3fa0f6b9e
2 changed files with 11 additions and 3 deletions

View File

@@ -2406,7 +2406,14 @@ def test_storage_controller_step_down(neon_env_builder: NeonEnvBuilder):
env.storage_controller.tenant_create(tid)
env.storage_controller.reconcile_until_idle()
env.storage_controller.configure_failpoints(("sleep-on-reconcile-epilogue", "return(10000)"))
env.storage_controller.configure_failpoints(("reconciler-epilogue", "pause"))
def unpause_failpoint():
time.sleep(2)
env.storage_controller.configure_failpoints(("reconciler-epilogue", "off"))
thread = threading.Thread(target=unpause_failpoint)
thread.start()
# Make a change to the tenant config to trigger a slow reconcile
virtual_ps_http = PageserverHttpClient(env.storage_controller_port, lambda: True)
@@ -2421,6 +2428,8 @@ def test_storage_controller_step_down(neon_env_builder: NeonEnvBuilder):
observed_state = env.storage_controller.step_down()
log.info(f"Storage controller stepped down with {observed_state=}")
thread.join()
# Validate that we waited for the slow reconcile to complete
# and updated the observed state in the storcon before stepping down.
node_id = str(env.pageserver.id)