From 4ab2b8f5758fb718adfe023c35dced5f645d84ef Mon Sep 17 00:00:00 2001 From: Matthias van de Meent Date: Fri, 28 Jun 2024 16:42:53 +0200 Subject: [PATCH] Implement testing, and add a call to cleanup that I'd missed. --- pgxn/neon/libpagestore.c | 1 + .../test_pageserver_broken_handling.py | 61 +++++++++++++++++++ 2 files changed, 62 insertions(+) create mode 100644 test_runner/regress/test_pageserver_broken_handling.py diff --git a/pgxn/neon/libpagestore.c b/pgxn/neon/libpagestore.c index 9327e5f00d..392200736e 100644 --- a/pgxn/neon/libpagestore.c +++ b/pgxn/neon/libpagestore.c @@ -643,6 +643,7 @@ pageserver_connect(shardno_t shard_no, int elevel) case PGRES_BAD_RESPONSE: case PGRES_NONFATAL_ERROR: case PGRES_FATAL_ERROR: + CLEANUP_AND_DISCONNECT(shard); neon_shard_log(shard_no, elevel, "could not complete handshake: PageServer returned error: %s", PQresultErrorMessage(result)); diff --git a/test_runner/regress/test_pageserver_broken_handling.py b/test_runner/regress/test_pageserver_broken_handling.py new file mode 100644 index 0000000000..6cc35d7521 --- /dev/null +++ b/test_runner/regress/test_pageserver_broken_handling.py @@ -0,0 +1,61 @@ +from contextlib import closing + +import pytest +from fixtures.neon_fixtures import Endpoint, NeonEnv, NeonPageserver +from psycopg2.errors import QueryCanceled + +""" +Test that we can handle broken pageservers correctly +""" + + +def test_pageserver_breaks_while_running(neon_simple_env: NeonEnv): + env = neon_simple_env + ps = env.pageserver + ps_http = ps.http_client() + ps_http.is_testing_enabled_or_skip() + + env.neon_cli.create_branch("test_config", "empty") + + # We don't want to have any racy behaviour with autovacuum IOs + ep = env.endpoints.create_start( + "test_config", + config_lines=[ + "autovacuum = off", + "shared_buffers = 128MB", + ], + ) + + # tenant is still attached, no errors from PS + with closing(ep.connect()) as conn: + with conn.cursor() as cur: + cur.execute( + """ + CREATE TABLE test1 AS + SELECT id, sha256(id::text::bytea) payload + FROM generate_series(1, 1024::bigint) p(id); + """ + ) + + ps_http.tenant_detach(ep.tenant_id) + + # create a new connection to PS, this will cause errors. + with closing(ep.connect()) as conn: + with conn.cursor() as cur: + cur.execute( + """ + SET query_timeout = 1s; + """ + ) + with pytest.raises(QueryCanceled): + # definitely uncached relation + cur.execute( + """ + SELECT count(*) FROM pg_rewrite; + """ + ) + + ep.stop() + ep.log_contains( + """could not complete handshake: PageServer returned error: """ + )