From d43d77389e3d38408ec74d7f30b243d1c181569b Mon Sep 17 00:00:00 2001 From: Sasha Krassovsky Date: Wed, 1 May 2024 21:36:50 -0700 Subject: [PATCH] Add retry loops and bump test timeout in test_pageserver_connection_stress (#7281) --- test_runner/regress/test_bad_connection.py | 23 ++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/test_runner/regress/test_bad_connection.py b/test_runner/regress/test_bad_connection.py index c808fa0f54..82a3a05c2b 100644 --- a/test_runner/regress/test_bad_connection.py +++ b/test_runner/regress/test_bad_connection.py @@ -1,10 +1,13 @@ import random import time +import psycopg2.errors +import pytest from fixtures.log_helper import log from fixtures.neon_fixtures import NeonEnvBuilder +@pytest.mark.timeout(600) def test_compute_pageserver_connection_stress(neon_env_builder: NeonEnvBuilder): env = neon_env_builder.init_start() env.pageserver.allowed_errors.append(".*simulated connection error.*") @@ -20,12 +23,20 @@ def test_compute_pageserver_connection_stress(neon_env_builder: NeonEnvBuilder): pg_conn = endpoint.connect() cur = pg_conn.cursor() + def execute_retry_on_timeout(query): + while True: + try: + cur.execute(query) + return + except psycopg2.errors.QueryCanceled: + log.info(f"Query '{query}' timed out - retrying") + # Create table, and insert some rows. Make it big enough that it doesn't fit in # shared_buffers, otherwise the SELECT after restart will just return answer # from shared_buffers without hitting the page server, which defeats the point # of this test. - cur.execute("CREATE TABLE foo (t text)") - cur.execute( + execute_retry_on_timeout("CREATE TABLE foo (t text)") + execute_retry_on_timeout( """ INSERT INTO foo SELECT 'long string to consume some space' || g @@ -34,7 +45,7 @@ def test_compute_pageserver_connection_stress(neon_env_builder: NeonEnvBuilder): ) # Verify that the table is larger than shared_buffers - cur.execute( + execute_retry_on_timeout( """ select setting::int * pg_size_bytes(unit) as shared_buffers, pg_relation_size('foo') as tbl_size from pg_settings where name = 'shared_buffers' @@ -45,16 +56,16 @@ def test_compute_pageserver_connection_stress(neon_env_builder: NeonEnvBuilder): log.info(f"shared_buffers is {row[0]}, table size {row[1]}") assert int(row[0]) < int(row[1]) - cur.execute("SELECT count(*) FROM foo") + execute_retry_on_timeout("SELECT count(*) FROM foo") assert cur.fetchone() == (100000,) end_time = time.time() + 30 times_executed = 0 while time.time() < end_time: if random.random() < 0.5: - cur.execute("INSERT INTO foo VALUES ('stas'), ('heikki')") + execute_retry_on_timeout("INSERT INTO foo VALUES ('stas'), ('heikki')") else: - cur.execute("SELECT t FROM foo ORDER BY RANDOM() LIMIT 10") + execute_retry_on_timeout("SELECT t FROM foo ORDER BY RANDOM() LIMIT 10") cur.fetchall() times_executed += 1 log.info(f"Workload executed {times_executed} times")