Implement testing, and add a call to cleanup that I'd missed.

This commit is contained in:
Matthias van de Meent
2024-06-28 16:42:53 +02:00
committed by MMeent
parent 088a743d39
commit 4ab2b8f575
2 changed files with 62 additions and 0 deletions

View File

@@ -643,6 +643,7 @@ pageserver_connect(shardno_t shard_no, int elevel)
case PGRES_BAD_RESPONSE:
case PGRES_NONFATAL_ERROR:
case PGRES_FATAL_ERROR:
CLEANUP_AND_DISCONNECT(shard);
neon_shard_log(shard_no, elevel,
"could not complete handshake: PageServer returned error: %s",
PQresultErrorMessage(result));

View File

@@ -0,0 +1,61 @@
from contextlib import closing
import pytest
from fixtures.neon_fixtures import Endpoint, NeonEnv, NeonPageserver
from psycopg2.errors import QueryCanceled
"""
Test that we can handle broken pageservers correctly
"""
def test_pageserver_breaks_while_running(neon_simple_env: NeonEnv):
env = neon_simple_env
ps = env.pageserver
ps_http = ps.http_client()
ps_http.is_testing_enabled_or_skip()
env.neon_cli.create_branch("test_config", "empty")
# We don't want to have any racy behaviour with autovacuum IOs
ep = env.endpoints.create_start(
"test_config",
config_lines=[
"autovacuum = off",
"shared_buffers = 128MB",
],
)
# tenant is still attached, no errors from PS
with closing(ep.connect()) as conn:
with conn.cursor() as cur:
cur.execute(
"""
CREATE TABLE test1 AS
SELECT id, sha256(id::text::bytea) payload
FROM generate_series(1, 1024::bigint) p(id);
"""
)
ps_http.tenant_detach(ep.tenant_id)
# create a new connection to PS, this will cause errors.
with closing(ep.connect()) as conn:
with conn.cursor() as cur:
cur.execute(
"""
SET query_timeout = 1s;
"""
)
with pytest.raises(QueryCanceled):
# definitely uncached relation
cur.execute(
"""
SELECT count(*) FROM pg_rewrite;
"""
)
ep.stop()
ep.log_contains(
"""could not complete handshake: PageServer returned error: """
)