mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-29 19:10:38 +00:00
pageserver: downgrade stale generation messages to INFO (#8256)
## Problem When generations were new, these messages were an important way of noticing if something unexpected was going on. We found some real issues when investigating tests that unexpectedly tripped them. At time has gone on, this code is now pretty battle-tested, and as we do more live migrations etc, it's fairly normal to see the occasional message from a node with a stale generation. At this point the cognitive load on developers to selectively allow-list these logs outweighs the benefit of having them at warn severity. Closes: https://github.com/neondatabase/neon/issues/8080 ## Summary of changes - Downgrade "Dropped remote consistent LSN updates" and "Dropping stale deletions" messages to INFO - Remove all the allow-list entries for these logs.
This commit is contained in:
@@ -60,11 +60,6 @@ def test_storage_controller_smoke(
|
||||
neon_env_builder.num_pageservers = 3
|
||||
env = neon_env_builder.init_configs()
|
||||
|
||||
for pageserver in env.pageservers:
|
||||
# This test detaches tenants during migration, which can race with deletion queue operations,
|
||||
# during detach we only do an advisory flush, we don't wait for it.
|
||||
pageserver.allowed_errors.extend([".*Dropped remote consistent LSN updates.*"])
|
||||
|
||||
# Start services by hand so that we can skip a pageserver (this will start + register later)
|
||||
env.broker.try_start()
|
||||
env.storage_controller.start()
|
||||
@@ -484,9 +479,6 @@ def test_storage_controller_compute_hook(
|
||||
# Start running
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
# We will to an unclean migration, which will result in deletion queue warnings
|
||||
env.pageservers[0].allowed_errors.append(".*Dropped remote consistent LSN updates for tenant.*")
|
||||
|
||||
# Initial notification from tenant creation
|
||||
assert len(notifications) == 1
|
||||
expect: Dict[str, Union[List[Dict[str, int]], str, None, int]] = {
|
||||
@@ -1054,13 +1046,6 @@ def test_storage_controller_heartbeats(
|
||||
online_node_ids = set(range(1, len(env.pageservers) + 1)) - offline_node_ids
|
||||
|
||||
for node_id in offline_node_ids:
|
||||
env.get_pageserver(node_id).allowed_errors.append(
|
||||
# In the case of the failpoint failure, the impacted pageserver
|
||||
# still believes it has the tenant attached since location
|
||||
# config calls into it will fail due to being marked offline.
|
||||
".*Dropped remote consistent LSN updates.*",
|
||||
)
|
||||
|
||||
if len(offline_node_ids) > 1:
|
||||
env.get_pageserver(node_id).allowed_errors.append(
|
||||
".*Scheduling error when marking pageserver.*offline.*",
|
||||
|
||||
Reference in New Issue
Block a user