mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-14 17:02:56 +00:00
storage_controller: rename failpoint and make it pausable
The same failpoint is used for a new test by a follow up commit and that needs a pausable failpoint.
This commit is contained in:
@@ -14,7 +14,6 @@ use std::sync::Arc;
|
||||
use std::time::{Duration, Instant};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use utils::backoff::exponential_backoff;
|
||||
use utils::failpoint_support;
|
||||
use utils::generation::Generation;
|
||||
use utils::id::{NodeId, TimelineId};
|
||||
use utils::lsn::Lsn;
|
||||
@@ -824,7 +823,7 @@ impl Reconciler {
|
||||
.handle_detach(self.tenant_shard_id, self.shard.stripe_size);
|
||||
}
|
||||
|
||||
failpoint_support::sleep_millis_async!("sleep-on-reconcile-epilogue");
|
||||
pausable_failpoint!("reconciler-epilogue");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -2406,7 +2406,14 @@ def test_storage_controller_step_down(neon_env_builder: NeonEnvBuilder):
|
||||
env.storage_controller.tenant_create(tid)
|
||||
|
||||
env.storage_controller.reconcile_until_idle()
|
||||
env.storage_controller.configure_failpoints(("sleep-on-reconcile-epilogue", "return(10000)"))
|
||||
env.storage_controller.configure_failpoints(("reconciler-epilogue", "pause"))
|
||||
|
||||
def unpause_failpoint():
|
||||
time.sleep(2)
|
||||
env.storage_controller.configure_failpoints(("reconciler-epilogue", "off"))
|
||||
|
||||
thread = threading.Thread(target=unpause_failpoint)
|
||||
thread.start()
|
||||
|
||||
# Make a change to the tenant config to trigger a slow reconcile
|
||||
virtual_ps_http = PageserverHttpClient(env.storage_controller_port, lambda: True)
|
||||
@@ -2421,6 +2428,8 @@ def test_storage_controller_step_down(neon_env_builder: NeonEnvBuilder):
|
||||
observed_state = env.storage_controller.step_down()
|
||||
log.info(f"Storage controller stepped down with {observed_state=}")
|
||||
|
||||
thread.join()
|
||||
|
||||
# Validate that we waited for the slow reconcile to complete
|
||||
# and updated the observed state in the storcon before stepping down.
|
||||
node_id = str(env.pageserver.id)
|
||||
|
||||
Reference in New Issue
Block a user