mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-15 20:20:38 +00:00
pageserver: simpler, stricter config error handling (#8177)
## Problem Tenant attachment has error paths for failures to write local configuration, but these types of local storage I/O errors should be considered fatal for the process. Related thread on an earlier PR that touched this code: https://github.com/neondatabase/neon/pull/7947#discussion_r1655134114 ## Summary of changes - Make errors writing tenant config fatal (abort process) - When reading tenant config, make all I/O errors except ENOENT fatal - Replace use of bare anyhow errors with `LoadConfigError`
This commit is contained in:
@@ -41,18 +41,35 @@ def test_tenant_creation_fails(neon_simple_env: NeonEnv):
|
||||
neon_simple_env.storage_controller.allowed_errors.extend(error_regexes)
|
||||
|
||||
pageserver_http = neon_simple_env.pageserver.http_client()
|
||||
|
||||
# Failure to write a config to local disk makes the pageserver assume that local disk is bad and abort the process
|
||||
pageserver_http.configure_failpoints(("tenant-config-before-write", "return"))
|
||||
with pytest.raises(Exception, match="tenant-config-before-write"):
|
||||
|
||||
# Storage controller will see a torn TCP connection when the crash point is reached, and follow an unclean 500 error path
|
||||
neon_simple_env.storage_controller.allowed_errors.extend(
|
||||
[
|
||||
".*Reconcile not done yet while creating tenant.*",
|
||||
".*Reconcile error: receive body: error sending request.*",
|
||||
".*Error processing HTTP request: InternalServerError.*",
|
||||
]
|
||||
)
|
||||
|
||||
with pytest.raises(Exception, match="error sending request"):
|
||||
_ = neon_simple_env.neon_cli.create_tenant()
|
||||
|
||||
# Any files left behind on disk during failed creation do not prevent
|
||||
# a retry from succeeding. Restart pageserver with no failpoints.
|
||||
neon_simple_env.pageserver.running = False
|
||||
neon_simple_env.pageserver.start()
|
||||
|
||||
# The failed creation should not be present in list of tenants, as when we start up we'll see
|
||||
# an empty tenant dir with no config in it.
|
||||
neon_simple_env.pageserver.allowed_errors.append(".*Failed to load tenant config.*")
|
||||
new_tenants = sorted(
|
||||
map(lambda t: t.split()[0], neon_simple_env.neon_cli.list_tenants().stdout.splitlines())
|
||||
)
|
||||
assert initial_tenants == new_tenants, "should not create new tenants"
|
||||
|
||||
# Any files left behind on disk during failed creation do not prevent
|
||||
# a retry from succeeding.
|
||||
pageserver_http.configure_failpoints(("tenant-config-before-write", "off"))
|
||||
neon_simple_env.neon_cli.create_tenant()
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user