tests: use approximate equality in test_get_tenant_size_with_multiple_branches (#5411)

## Problem

This test has been flaky for a long time.

As far as I can tell, the test was simply wrong to expect postgres
activity to result in deterministic sizes: making the match fuzzy is not
a hack, it's just matching the reality that postgres doesn't promise to
write exactly the same number of pages every time it runs a given query.

## Summary of changes

Equalities now tolerate up to 4 pages different. This is big enough to
tolerate the deltas we've seen in practice.

Closes: https://github.com/neondatabase/neon/issues/2962
This commit is contained in:
John Spray
2023-09-29 09:15:43 +01:00
committed by GitHub
parent 1881373ec4
commit 6a1903987a

View File

@@ -15,7 +15,7 @@ from fixtures.pageserver.utils import (
timeline_delete_wait_completed,
wait_until_tenant_active,
)
from fixtures.pg_version import PgVersion, xfail_on_postgres
from fixtures.pg_version import PgVersion
from fixtures.types import Lsn, TenantId, TimelineId
@@ -532,7 +532,24 @@ def test_single_branch_get_tenant_size_grows(
assert size_after == prev, "size after restarting pageserver should not have changed"
@xfail_on_postgres(PgVersion.V15, reason="Test significantly more flaky on Postgres 15")
def assert_size_approx_equal(size_a, size_b):
"""
Tests that evaluate sizes are checking the pageserver space consumption
that sits many layers below the user input. The exact space needed
varies slightly depending on postgres behavior.
Rather than expecting postgres to be determinstic and occasionally
failing the test, we permit sizes for the same data to vary by a few pages.
"""
# Determined empirically from examples of equality failures: they differ
# by page multiples of 8272, and usually by 1-3 pages. Tolerate 4 to avoid
# failing on outliers from that observed range.
threshold = 4 * 8272
assert size_a == pytest.approx(size_b, abs=threshold)
def test_get_tenant_size_with_multiple_branches(
neon_env_builder: NeonEnvBuilder, test_output_dir: Path
):
@@ -573,7 +590,7 @@ def test_get_tenant_size_with_multiple_branches(
)
size_after_first_branch = http_client.tenant_size(tenant_id)
assert size_after_first_branch == size_at_branch
assert_size_approx_equal(size_after_first_branch, size_at_branch)
first_branch_endpoint = env.endpoints.create_start("first-branch", tenant_id=tenant_id)
@@ -599,7 +616,7 @@ def test_get_tenant_size_with_multiple_branches(
"second-branch", main_branch_name, tenant_id
)
size_after_second_branch = http_client.tenant_size(tenant_id)
assert size_after_second_branch == size_after_continuing_on_main
assert_size_approx_equal(size_after_second_branch, size_after_continuing_on_main)
second_branch_endpoint = env.endpoints.create_start("second-branch", tenant_id=tenant_id)
@@ -635,7 +652,7 @@ def test_get_tenant_size_with_multiple_branches(
# tenant_size but so far this has been reliable, even though at least gc
# and tenant_size race for the same locks
size_after = http_client.tenant_size(tenant_id)
assert size_after == size_after_thinning_branch
assert_size_approx_equal(size_after, size_after_thinning_branch)
size_debug_file_before = open(test_output_dir / "size_debug_before.html", "w")
size_debug = http_client.tenant_size_debug(tenant_id)