From 2fd8e24c8ff300dc9e640c8765a0311307871e7d Mon Sep 17 00:00:00 2001 From: Sasha Krassovsky Date: Fri, 2 Feb 2024 12:32:40 -0900 Subject: [PATCH] Switch sleeps to wait_until (#6575) ## Problem I didn't know about `wait_until` and was relying on `sleep` to wait for stuff. This caused some tests to be flaky. https://github.com/neondatabase/neon/issues/6561 ## Summary of changes Switch to `wait_until`, this should make it tests less flaky --- test_runner/fixtures/neon_fixtures.py | 14 ++++++++++++++ test_runner/regress/test_migrations.py | 12 ++++++++---- test_runner/regress/test_neon_superuser.py | 19 ++++++++++--------- 3 files changed, 32 insertions(+), 13 deletions(-) diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 1e15ebe5a0..5ce2fca820 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -3130,6 +3130,20 @@ class Endpoint(PgProtocol): log.info(json.dumps(dict(data_dict, **kwargs))) json.dump(dict(data_dict, **kwargs), file, indent=4) + # Please note: if you didn't respec this endpoint to have the `migrations` + # feature, this function will probably fail because neon_migration.migration_id + # won't exist. This is temporary - soon we'll get rid of the feature flag and + # migrations will be enabled for everyone. + def wait_for_migrations(self): + with self.cursor() as cur: + + def check_migrations_done(): + cur.execute("SELECT id FROM neon_migration.migration_id") + migration_id = cur.fetchall()[0][0] + assert migration_id != 0 + + wait_until(20, 0.5, check_migrations_done) + # Mock the extension part of spec passed from control plane for local testing # endpooint.rs adds content of this file as a part of the spec.json def create_remote_extension_spec(self, spec: dict[str, Any]): diff --git a/test_runner/regress/test_migrations.py b/test_runner/regress/test_migrations.py index dee22f9b48..30dd54a8c1 100644 --- a/test_runner/regress/test_migrations.py +++ b/test_runner/regress/test_migrations.py @@ -13,12 +13,14 @@ def test_migrations(neon_simple_env: NeonEnv): endpoint.respec(skip_pg_catalog_updates=False, features=["migrations"]) endpoint.start() - time.sleep(1) # Sleep to let migrations run + endpoint.wait_for_migrations() + + num_migrations = 3 with endpoint.cursor() as cur: cur.execute("SELECT id FROM neon_migration.migration_id") migration_id = cur.fetchall() - assert migration_id[0][0] == 3 + assert migration_id[0][0] == num_migrations with open(log_path, "r") as log_file: logs = log_file.read() @@ -26,11 +28,13 @@ def test_migrations(neon_simple_env: NeonEnv): endpoint.stop() endpoint.start() - time.sleep(1) # Sleep to let migrations run + # We don't have a good way of knowing that the migrations code path finished executing + # in compute_ctl in the case that no migrations are being run + time.sleep(1) with endpoint.cursor() as cur: cur.execute("SELECT id FROM neon_migration.migration_id") migration_id = cur.fetchall() - assert migration_id[0][0] == 3 + assert migration_id[0][0] == num_migrations with open(log_path, "r") as log_file: logs = log_file.read() diff --git a/test_runner/regress/test_neon_superuser.py b/test_runner/regress/test_neon_superuser.py index 8b9eb1d9c4..eff2cadabf 100644 --- a/test_runner/regress/test_neon_superuser.py +++ b/test_runner/regress/test_neon_superuser.py @@ -1,8 +1,7 @@ -import time - from fixtures.log_helper import log from fixtures.neon_fixtures import NeonEnv from fixtures.pg_version import PgVersion +from fixtures.utils import wait_until def test_neon_superuser(neon_simple_env: NeonEnv, pg_version: PgVersion): @@ -19,7 +18,8 @@ def test_neon_superuser(neon_simple_env: NeonEnv, pg_version: PgVersion): sub.respec(skip_pg_catalog_updates=False, features=["migrations"]) sub.start() - time.sleep(1) # Sleep to let migrations run + pub.wait_for_migrations() + sub.wait_for_migrations() with pub.cursor() as cur: cur.execute( @@ -68,10 +68,11 @@ def test_neon_superuser(neon_simple_env: NeonEnv, pg_version: PgVersion): with pub.cursor(dbname="neondb", user="mr_whiskers", password="cat") as pcur: pcur.execute("INSERT INTO t VALUES (30), (40)") - time.sleep(1) # Give the change time to propagate + def check_that_changes_propagated(): + cur.execute("SELECT * FROM t") + res = cur.fetchall() + log.info(res) + assert len(res) == 4 + assert [r[0] for r in res] == [10, 20, 30, 40] - cur.execute("SELECT * FROM t") - res = cur.fetchall() - log.info(res) - assert len(res) == 4 - assert [r[0] for r in res] == [10, 20, 30, 40] + wait_until(10, 0.5, check_that_changes_propagated)