test: bring back consistency checks

This commit is contained in:
Vlad Lazar
2024-06-20 10:33:44 +01:00
parent 48615a4f21
commit 82af640484

View File

@@ -138,6 +138,9 @@ def test_storage_controller_many_tenants(
# of shards are hitting the delayed path.
env.storage_controller.allowed_errors.append(".*Many shards are waiting to reconcile")
# TODO: explain
env.storage_controller.allowed_errors.append(".*Scheduling error when draining pageserver.*")
for ps in env.pageservers:
# This can happen because when we do a loop over all pageservers and mark them offline/active,
# reconcilers might get cancelled, and the next reconcile can follow a not-so-elegant path of
@@ -297,8 +300,8 @@ def test_storage_controller_many_tenants(
ps.restart()
poll_node_status(env, ps.id, "Active", max_attempts=24, backoff=1)
# env.storage_controller.reconcile_until_idle()
# env.storage_controller.consistency_check()
env.storage_controller.reconcile_until_idle()
env.storage_controller.consistency_check()
retryable_node_operation(
lambda ps_id: env.storage_controller.node_fill(ps_id), ps.id, max_attempts=3, backoff=2
@@ -312,7 +315,7 @@ def test_storage_controller_many_tenants(
# Consistency check is safe here: restarting pageservers should not have caused any Reconcilers to spawn,
# as they were not offline long enough to trigger any scheduling changes.
# env.storage_controller.consistency_check()
env.storage_controller.consistency_check()
check_memory()
# Stop the storage controller before tearing down fixtures, because it otherwise might log