tests: perform graceful rolling restarts in storcon scale test (#8173)

## Problem
Scale test doesn't exercise drain & fill.

## Summary of changes
Make scale test exercise drain & fill
This commit is contained in:
Vlad Lazar
2024-07-04 06:04:19 +01:00
committed by GitHub
parent 778787d8e9
commit bbb2fa7cdd
3 changed files with 171 additions and 59 deletions

View File

@@ -2113,6 +2113,21 @@ class NeonStorageController(MetricsGetter, LogUtils):
self.running = False
return self
@staticmethod
def retryable_node_operation(op, ps_id, max_attempts, backoff):
while max_attempts > 0:
try:
op(ps_id)
return
except StorageControllerApiException as e:
max_attempts -= 1
log.info(f"Operation failed ({max_attempts} attempts left): {e}")
if max_attempts == 0:
raise e
time.sleep(backoff)
@staticmethod
def raise_api_exception(res: requests.Response):
try:
@@ -2453,6 +2468,38 @@ class NeonStorageController(MetricsGetter, LogUtils):
)
log.info("storage controller passed consistency check")
def poll_node_status(
self, node_id: int, desired_scheduling_policy: str, max_attempts: int, backoff: int
):
"""
Poll the node status until it reaches 'desired_scheduling_policy' or 'max_attempts' have been exhausted
"""
log.info(f"Polling {node_id} for {desired_scheduling_policy} scheduling policy")
while max_attempts > 0:
try:
status = self.node_status(node_id)
policy = status["scheduling"]
if policy == desired_scheduling_policy:
return
else:
max_attempts -= 1
log.info(f"Status call returned {policy=} ({max_attempts} attempts left)")
if max_attempts == 0:
raise AssertionError(
f"Status for {node_id=} did not reach {desired_scheduling_policy=}"
)
time.sleep(backoff)
except StorageControllerApiException as e:
max_attempts -= 1
log.info(f"Status call failed ({max_attempts} retries left): {e}")
if max_attempts == 0:
raise e
time.sleep(backoff)
def configure_failpoints(self, config_strings: Tuple[str, str] | List[Tuple[str, str]]):
if isinstance(config_strings, tuple):
pairs = [config_strings]