Add retry loops and bump test timeout in test_pageserver_connection_stress (#7281)

This commit is contained in:
Sasha Krassovsky
2024-05-01 21:36:50 -07:00
committed by GitHub
parent 5558457c84
commit d43d77389e

View File

@@ -1,10 +1,13 @@
import random
import time
import psycopg2.errors
import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnvBuilder
@pytest.mark.timeout(600)
def test_compute_pageserver_connection_stress(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
env.pageserver.allowed_errors.append(".*simulated connection error.*")
@@ -20,12 +23,20 @@ def test_compute_pageserver_connection_stress(neon_env_builder: NeonEnvBuilder):
pg_conn = endpoint.connect()
cur = pg_conn.cursor()
def execute_retry_on_timeout(query):
while True:
try:
cur.execute(query)
return
except psycopg2.errors.QueryCanceled:
log.info(f"Query '{query}' timed out - retrying")
# Create table, and insert some rows. Make it big enough that it doesn't fit in
# shared_buffers, otherwise the SELECT after restart will just return answer
# from shared_buffers without hitting the page server, which defeats the point
# of this test.
cur.execute("CREATE TABLE foo (t text)")
cur.execute(
execute_retry_on_timeout("CREATE TABLE foo (t text)")
execute_retry_on_timeout(
"""
INSERT INTO foo
SELECT 'long string to consume some space' || g
@@ -34,7 +45,7 @@ def test_compute_pageserver_connection_stress(neon_env_builder: NeonEnvBuilder):
)
# Verify that the table is larger than shared_buffers
cur.execute(
execute_retry_on_timeout(
"""
select setting::int * pg_size_bytes(unit) as shared_buffers, pg_relation_size('foo') as tbl_size
from pg_settings where name = 'shared_buffers'
@@ -45,16 +56,16 @@ def test_compute_pageserver_connection_stress(neon_env_builder: NeonEnvBuilder):
log.info(f"shared_buffers is {row[0]}, table size {row[1]}")
assert int(row[0]) < int(row[1])
cur.execute("SELECT count(*) FROM foo")
execute_retry_on_timeout("SELECT count(*) FROM foo")
assert cur.fetchone() == (100000,)
end_time = time.time() + 30
times_executed = 0
while time.time() < end_time:
if random.random() < 0.5:
cur.execute("INSERT INTO foo VALUES ('stas'), ('heikki')")
execute_retry_on_timeout("INSERT INTO foo VALUES ('stas'), ('heikki')")
else:
cur.execute("SELECT t FROM foo ORDER BY RANDOM() LIMIT 10")
execute_retry_on_timeout("SELECT t FROM foo ORDER BY RANDOM() LIMIT 10")
cur.fetchall()
times_executed += 1
log.info(f"Workload executed {times_executed} times")