From 119b86480f9186545f37047801b9a2a97066865e Mon Sep 17 00:00:00 2001 From: Alek Westover Date: Thu, 10 Aug 2023 08:24:43 -0400 Subject: [PATCH] test: make pg_regress less flaky, hopefully (#4903) `pg_regress` is flaky: https://github.com/neondatabase/neon/issues/559 Consolidated `CHECKPOINT` to `check_restored_datadir_content`, add a wait for `wait_for_last_flush_lsn`. Some recently introduced flakyness was fixed with #4948. --------- Co-authored-by: Joonas Koivunen --- test_runner/fixtures/neon_fixtures.py | 11 +++++++++-- test_runner/regress/test_pg_regress.py | 9 --------- test_runner/regress/test_subxacts.py | 4 ---- 3 files changed, 9 insertions(+), 15 deletions(-) diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 7ba979745b..901049188e 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -2817,8 +2817,15 @@ def check_restored_datadir_content( endpoint: Endpoint, ): # Get the timeline ID. We need it for the 'basebackup' command - timeline = TimelineId(endpoint.safe_psql("SHOW neon.timeline_id")[0][0]) + timeline_id = TimelineId(endpoint.safe_psql("SHOW neon.timeline_id")[0][0]) + # many tests already checkpoint, but do it just in case + with closing(endpoint.connect()) as conn: + with conn.cursor() as cur: + cur.execute("CHECKPOINT") + + # wait for pageserver to catch up + wait_for_last_flush_lsn(env, endpoint, endpoint.tenant_id, timeline_id) # stop postgres to ensure that files won't change endpoint.stop() @@ -2833,7 +2840,7 @@ def check_restored_datadir_content( {psql_path} \ --no-psqlrc \ postgres://localhost:{env.pageserver.service_port.pg} \ - -c 'basebackup {endpoint.tenant_id} {timeline}' \ + -c 'basebackup {endpoint.tenant_id} {timeline_id}' \ | tar -x -C {restored_dir_path} """ diff --git a/test_runner/regress/test_pg_regress.py b/test_runner/regress/test_pg_regress.py index ee600db2c6..f26d04e2f3 100644 --- a/test_runner/regress/test_pg_regress.py +++ b/test_runner/regress/test_pg_regress.py @@ -56,10 +56,6 @@ def test_pg_regress( with capsys.disabled(): pg_bin.run(pg_regress_command, env=env_vars, cwd=runpath) - # checkpoint one more time to ensure that the lsn we get is the latest one - endpoint.safe_psql("CHECKPOINT") - - # Check that we restore the content of the datadir correctly check_restored_datadir_content(test_output_dir, env, endpoint) @@ -166,9 +162,4 @@ def test_sql_regress( with capsys.disabled(): pg_bin.run(pg_regress_command, env=env_vars, cwd=runpath) - # checkpoint one more time to ensure that the lsn we get is the latest one - endpoint.safe_psql("CHECKPOINT") - endpoint.safe_psql("select pg_current_wal_insert_lsn()")[0][0] - - # Check that we restore the content of the datadir correctly check_restored_datadir_content(test_output_dir, env, endpoint) diff --git a/test_runner/regress/test_subxacts.py b/test_runner/regress/test_subxacts.py index 494820ef8e..eb96a8faa4 100644 --- a/test_runner/regress/test_subxacts.py +++ b/test_runner/regress/test_subxacts.py @@ -33,8 +33,4 @@ def test_subxacts(neon_simple_env: NeonEnv, test_output_dir): cur.execute(f"insert into t1 values ({i}, {j})") cur.execute("commit") - # force wal flush - cur.execute("checkpoint") - - # Check that we can restore the content of the datadir correctly check_restored_datadir_content(test_output_dir, env, endpoint)