mirror of
https://github.com/neondatabase/neon.git
synced 2025-12-27 16:12:56 +00:00
## Problem We don't notify cplane about safekeeper membership change yet. Without the notification the compute needs to know all the safekeepers on the cluster to be able to speak to them. Change notifications will allow to avoid it. - Closes: https://github.com/neondatabase/neon/issues/12188 ## Summary of changes - Implement `notify_safekeepers` method in `ComputeHook` - Notify cplane about safekeepers in `safekeeper_migrate` handler. - Update the test to make sure notifications work. ## Out of scope - There is `cplane_notified_generation` field in `timelines` table in strocon's database. It's not needed now, so it's not updated in the PR. Probably we can remove it. - e2e tests to make sure it works with a production cplane
77 lines
2.7 KiB
Python
77 lines
2.7 KiB
Python
from __future__ import annotations
|
|
|
|
from typing import TYPE_CHECKING
|
|
|
|
if TYPE_CHECKING:
|
|
from fixtures.neon_fixtures import NeonEnvBuilder
|
|
|
|
|
|
def test_safekeeper_migration_simple(neon_env_builder: NeonEnvBuilder):
|
|
"""
|
|
Simple safekeeper migration test.
|
|
Creates 3 safekeepers. The timeline is configuret to use only one safekeeper.
|
|
1. Go through all safekeepers, migrate the timeline to it.
|
|
2. Stop the other safekeepers. Validate that the insert is successful.
|
|
3. Start the other safekeepers again and go to the next safekeeper.
|
|
4. Validate that the table contains all inserted values.
|
|
"""
|
|
neon_env_builder.num_safekeepers = 3
|
|
neon_env_builder.storage_controller_config = {
|
|
"timelines_onto_safekeepers": True,
|
|
"timeline_safekeeper_count": 1,
|
|
}
|
|
env = neon_env_builder.init_start()
|
|
# TODO(diko): pageserver spams with various errors during safekeeper migration.
|
|
# Fix the code so it handles the migration better.
|
|
env.pageserver.allowed_errors.extend(
|
|
[
|
|
".*Timeline .* was cancelled and cannot be used anymore.*",
|
|
".*Timeline .* has been deleted.*",
|
|
".*wal receiver task finished with an error.*",
|
|
]
|
|
)
|
|
|
|
ep = env.endpoints.create("main", tenant_id=env.initial_tenant)
|
|
|
|
mconf = env.storage_controller.timeline_locate(env.initial_tenant, env.initial_timeline)
|
|
assert mconf["new_sk_set"] is None
|
|
assert len(mconf["sk_set"]) == 1
|
|
assert mconf["generation"] == 1
|
|
|
|
ep.start(safekeeper_generation=1, safekeepers=mconf["sk_set"])
|
|
ep.safe_psql("CREATE EXTENSION neon_test_utils;")
|
|
ep.safe_psql("CREATE TABLE t(a int)")
|
|
|
|
for active_sk in range(1, 4):
|
|
env.storage_controller.migrate_safekeepers(
|
|
env.initial_tenant, env.initial_timeline, [active_sk]
|
|
)
|
|
|
|
other_sks = [sk for sk in range(1, 4) if sk != active_sk]
|
|
|
|
for sk in other_sks:
|
|
env.safekeepers[sk - 1].stop()
|
|
|
|
ep.safe_psql(f"INSERT INTO t VALUES ({active_sk})")
|
|
|
|
for sk in other_sks:
|
|
env.safekeepers[sk - 1].start()
|
|
|
|
ep.clear_buffers()
|
|
|
|
assert ep.safe_psql("SELECT * FROM t") == [(i,) for i in range(1, 4)]
|
|
|
|
# 1 initial generation + 2 migrations on each loop iteration.
|
|
expected_gen = 1 + 2 * 3
|
|
|
|
mconf = env.storage_controller.timeline_locate(env.initial_tenant, env.initial_timeline)
|
|
assert mconf["generation"] == expected_gen
|
|
|
|
assert ep.safe_psql("SHOW neon.safekeepers")[0][0].startswith(f"g#{expected_gen}:")
|
|
|
|
# Restart and check again to make sure data is persistent.
|
|
ep.stop()
|
|
ep.start(safekeeper_generation=1, safekeepers=[3])
|
|
|
|
assert ep.safe_psql("SELECT * FROM t") == [(i,) for i in range(1, 4)]
|