mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-08 22:12:56 +00:00
pageserver: cancellation handling in writes to postgres client socket (#5503)
## Problem Writes to the postgres client socket from the page server were not wrapped in cancellation handling, so a stuck client connection could prevent tenant shutdowwn. ## Summary of changes All the places we call flush() to write to the socket, we should be respecting the cancellation token for the task. In this PR, I explicitly pass around a CancellationToken rather than doing inline `task_mgr::shutdown_token` calls, to avoid coupling it to the global task_mgr state and make it easier to refactor later. I have some follow-on commits that add a Shutdown variant to QueryError and use it more extensively, but that's pure refactor so will keep separate from this bug fix PR. Closes: https://github.com/neondatabase/neon/issues/5341
This commit is contained in:
@@ -17,6 +17,8 @@ def test_pageserver_restarts_under_worload(neon_simple_env: NeonEnv, pg_bin: PgB
|
||||
n_restarts = 10
|
||||
scale = 10
|
||||
|
||||
env.pageserver.allowed_errors.append(".*query handler.*failed.*Shutting down")
|
||||
|
||||
def run_pgbench(connstr: str):
|
||||
log.info(f"Start a pgbench workload on pg {connstr}")
|
||||
pg_bin.run_capture(["pgbench", "-i", f"-s{scale}", connstr])
|
||||
|
||||
@@ -752,6 +752,9 @@ def test_ignore_while_attaching(
|
||||
env.pageserver.allowed_errors.append(
|
||||
f".*Tenant {tenant_id} will not become active\\. Current state: Stopping.*"
|
||||
)
|
||||
# An endpoint is starting up concurrently with our detach, it can
|
||||
# experience RPC failure due to shutdown.
|
||||
env.pageserver.allowed_errors.append(".*query handler.*failed.*Shutting down")
|
||||
|
||||
data_id = 1
|
||||
data_secret = "very secret secret"
|
||||
|
||||
Reference in New Issue
Block a user