tests: stabilize compat tests (#7227)

This test had two flaky failure modes:
- pageserver log error for timeline not found: this resulted from
changes for DR when timeline destroy/create was added, but endpoint was
left running during that operation.
- storage controller log error because the test was running for long
enough that a background reconcile happened at almost the exact moment
of test teardown, and our test fixtures tear down the pageservers before
the controller.

Closes: #7224
This commit is contained in:
John Spray
2024-03-25 14:35:24 +00:00
committed by GitHub
parent a6c1fdcaf6
commit 2713142308
2 changed files with 13 additions and 2 deletions

View File

@@ -267,9 +267,10 @@ def test_forward_compatibility(
def check_neon_works(env: NeonEnv, test_output_dir: Path, sql_dump_path: Path, repo_dir: Path):
ep = env.endpoints.create_start("main")
connstr = ep.connstr()
pg_bin = PgBin(test_output_dir, env.pg_distrib_dir, env.pg_version)
connstr = ep.connstr()
pg_bin.run_capture(
["pg_dumpall", f"--dbname={connstr}", f"--file={test_output_dir / 'dump.sql'}"]
)
@@ -286,6 +287,9 @@ def check_neon_works(env: NeonEnv, test_output_dir: Path, sql_dump_path: Path, r
timeline_id = env.initial_timeline
pg_version = env.pg_version
# Stop endpoint while we recreate timeline
ep.stop()
try:
pageserver_http.timeline_preserve_initdb_archive(tenant_id, timeline_id)
except PageserverApiException as e:
@@ -310,6 +314,9 @@ def check_neon_works(env: NeonEnv, test_output_dir: Path, sql_dump_path: Path, r
existing_initdb_timeline_id=timeline_id,
)
# Timeline exists again: restart the endpoint
ep.start()
pg_bin.run_capture(
["pg_dumpall", f"--dbname={connstr}", f"--file={test_output_dir / 'dump-from-wal.sql'}"]
)