Tone down safekeeper and pageserver walreceiver errors (#3227)

Closes https://github.com/neondatabase/neon/issues/3114

Adds more typization into errors that appear during protocol messages (`FeMessage`), postgres and walreceiver connections.

Socket IO errors are now better detected and logged with lesser (INFO, DEBUG) error level, without traces that they were logged before, when they were wrapped in anyhow context.
This commit is contained in:
Kirill Bulatov
2023-01-03 22:42:04 +02:00
committed by GitHub
parent e9583db73b
commit 10dae79c6d
18 changed files with 635 additions and 334 deletions

View File

@@ -1903,13 +1903,15 @@ class NeonPageserver(PgProtocol):
".*wal receiver task finished with an error: walreceiver connection handling failure.*",
".*Shutdown task error: walreceiver connection handling failure.*",
".*wal_connection_manager.*tcp connect error: Connection refused.*",
".*query handler for .* failed: Connection reset by peer.*",
".*serving compute connection task.*exited with error: Broken pipe.*",
".*Connection aborted: error communicating with the server: Broken pipe.*",
".*Connection aborted: error communicating with the server: Transport endpoint is not connected.*",
".*Connection aborted: error communicating with the server: Connection reset by peer.*",
".*query handler for .* failed: Socket IO error: Connection reset by peer.*",
".*serving compute connection task.*exited with error: Postgres connection error.*",
".*serving compute connection task.*exited with error: Connection reset by peer.*",
".*serving compute connection task.*exited with error: Postgres query error.*",
".*Connection aborted: connection error: error communicating with the server: Broken pipe.*",
".*Connection aborted: connection error: error communicating with the server: Transport endpoint is not connected.*",
".*Connection aborted: connection error: error communicating with the server: Connection reset by peer.*",
".*kill_and_wait_impl.*: wait successful.*",
".*end streaming to Some.*",
".*Replication stream finished: db error: ERROR: Socket IO error: end streaming to Some.*",
".*query handler for 'pagestream.*failed: Broken pipe.*", # pageserver notices compute shut down
# safekeeper connection can fail with this, in the window between timeline creation
# and streaming start

View File

@@ -1105,7 +1105,6 @@ def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
env.pageserver.allowed_errors.extend(
[
".*Failed to process query for timeline .*: Timeline .* was not found in global map.*",
".*end streaming to Some.*",
]
)