mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-17 13:10:38 +00:00
tests: parametrize test_graceful_cluster_restart on AZ count (#10427)
## Problem In https://github.com/neondatabase/neon/pull/10411 fill logic changes such that it benefits us to test it with & without AZs set up. I didn't extend the test inline in that PR because there were overlapping test changes in flight to add `num_az` parameter. ## Summary of changes - Parameterise test on AZ count (1 or 2) - When AZ count is 2, use a different balance check that just asserts the _tenants_ are balanced (since AZ affinity is chosen on a per-tenant basis)
This commit is contained in:
@@ -2139,12 +2139,18 @@ def test_tenant_import(neon_env_builder: NeonEnvBuilder, shard_count, remote_sto
|
||||
workload.validate()
|
||||
|
||||
|
||||
def test_graceful_cluster_restart(neon_env_builder: NeonEnvBuilder):
|
||||
@pytest.mark.parametrize("num_azs", [1, 2])
|
||||
def test_graceful_cluster_restart(neon_env_builder: NeonEnvBuilder, num_azs: int):
|
||||
"""
|
||||
Graceful reststart of storage controller clusters use the drain and
|
||||
fill hooks in order to migrate attachments away from pageservers before
|
||||
restarting. In practice, Ansible will drive this process.
|
||||
|
||||
Test is parametrized on the number of AZs to exercise the AZ-driven behavior
|
||||
of reliably moving shards back to their home AZ, and the behavior for AZ-agnostic
|
||||
tenants where we fill based on a target shard count.
|
||||
"""
|
||||
neon_env_builder.num_azs = num_azs
|
||||
neon_env_builder.num_pageservers = 2
|
||||
env = neon_env_builder.init_configs()
|
||||
env.start()
|
||||
@@ -2174,8 +2180,15 @@ def test_graceful_cluster_restart(neon_env_builder: NeonEnvBuilder):
|
||||
min_shard_count = min(shard_counts.values())
|
||||
max_shard_count = max(shard_counts.values())
|
||||
|
||||
flake_factor = 5 / 100
|
||||
assert max_shard_count - min_shard_count <= int(total_shards * flake_factor)
|
||||
if num_azs == 1:
|
||||
# AZ-agnostic case: we expect all nodes to have the same number of shards, within some bound
|
||||
flake_factor = 5 / 100
|
||||
assert max_shard_count - min_shard_count <= int(total_shards * flake_factor)
|
||||
else:
|
||||
# AZ-driven case: we expect tenants to have been round-robin allocated to AZs,
|
||||
# and after the restart they should all be back in their home AZ, so difference
|
||||
# should be at most a single shard's tenants
|
||||
assert max_shard_count - min_shard_count <= shard_count_per_tenant
|
||||
|
||||
# Perform a graceful rolling restart
|
||||
for ps in env.pageservers:
|
||||
|
||||
Reference in New Issue
Block a user