Files
neon/test_runner/regress/test_pageserver_reconnect.py
Konstantin Knizhnik 4a0c2aebe0 Add test for proper handling of connection failure to avoid 'cannot wait on socket event without a socket' error (#8231)
## Problem

See https://github.com/neondatabase/cloud/issues/14289
and PR #8210 

## Summary of changes

Add test for problems fixed in #8210

## Checklist before requesting a review

- [ ] I have performed a self-review of my code.
- [ ] If it is a core feature, I have added thorough tests.
- [ ] Do we need to implement analytics? if so did you add the relevant
metrics to the dashboard?
- [ ] If this PR requires public announcement, mark it with
/release-notes label and add several sentences in this section.

## Checklist before merging

- [ ] Do not forget to reformat commit message to not include the above
checklist

---------

Co-authored-by: Konstantin Knizhnik <knizhnik@neon.tech>
2024-07-02 21:45:42 +03:00

67 lines
2.4 KiB
Python

import threading
import time
from contextlib import closing
import psycopg2.errors
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnv, PgBin
# Test updating neon.pageserver_connstring setting on the fly.
#
# This merely changes some whitespace in the connection string, so
# this doesn't prove that the new string actually takes effect. But at
# least the code gets exercised.
def test_pageserver_reconnect(neon_simple_env: NeonEnv, pg_bin: PgBin):
env = neon_simple_env
env.neon_cli.create_branch("test_pageserver_restarts")
endpoint = env.endpoints.create_start("test_pageserver_restarts")
n_reconnects = 1000
timeout = 0.01
scale = 10
def run_pgbench(connstr: str):
log.info(f"Start a pgbench workload on pg {connstr}")
pg_bin.run_capture(["pgbench", "-i", f"-s{scale}", connstr])
pg_bin.run_capture(["pgbench", f"-T{int(n_reconnects*timeout)}", connstr])
thread = threading.Thread(target=run_pgbench, args=(endpoint.connstr(),), daemon=True)
thread.start()
with closing(endpoint.connect()) as con:
with con.cursor() as c:
c.execute("SELECT setting FROM pg_settings WHERE name='neon.pageserver_connstring'")
connstring = c.fetchall()[0][0]
for i in range(n_reconnects):
time.sleep(timeout)
c.execute(
"alter system set neon.pageserver_connstring=%s",
(connstring + (" " * (i % 2)),),
)
c.execute("select pg_reload_conf()")
thread.join()
# Test handling errors during page server reconnect
def test_pageserver_reconnect_failure(neon_simple_env: NeonEnv):
env = neon_simple_env
env.neon_cli.create_branch("test_pageserver_reconnect")
endpoint = env.endpoints.create_start("test_pageserver_reconnect")
con = endpoint.connect()
cur = con.cursor()
cur.execute("set statement_timeout='2s'")
cur.execute("SELECT setting FROM pg_settings WHERE name='neon.pageserver_connstring'")
connstring = cur.fetchall()[0][0]
cur.execute(
f"alter system set neon.pageserver_connstring='{connstring}?some_invalid_param=xyz'"
)
cur.execute("select pg_reload_conf()")
try:
cur.execute("select count(*) from pg_class")
except psycopg2.errors.QueryCanceled:
log.info("Connection to PS failed")
assert not endpoint.log_contains("ERROR: cannot wait on socket event without a socket.*")