safekeeper: exclude API (#10757)

## Problem

https://github.com/neondatabase/neon/pull/10241 added configuration
switch endpoint, but it didn't delete timeline if node was excluded.

## Summary of changes

Add separate /exclude API endpoint which similarly accepts membership
configuration where sk is supposed by be excluded. Implementation
deletes the timeline locally.

Some more small related tweaks:
- make mconf switch API PUT instead of POST as it is idempotent;
- return 409 if switch was refused instead of 200 with requested &
current;
- remove unused was_active flag from delete response;
- remove meaningless _force suffix from delete functions names;
- reuse timeline.rs delete_dir function in timelines_global_map instead
of its own copy.

part of https://github.com/neondatabase/neon/issues/9965
This commit is contained in:
Arseny Sher
2025-02-26 22:26:33 +03:00
committed by GitHub
parent c1a040447d
commit 643a48210f
6 changed files with 212 additions and 75 deletions

View File

@@ -273,10 +273,22 @@ class SafekeeperHttpClient(requests.Session, MetricsGetter):
assert isinstance(res_json, dict)
return res_json
def timeline_exclude(
self, tenant_id: TenantId, timeline_id: TimelineId, to: Configuration
) -> dict[str, Any]:
res = self.put(
f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/exclude",
data=to.to_json(),
)
res.raise_for_status()
res_json = res.json()
assert isinstance(res_json, dict)
return res_json
def membership_switch(
self, tenant_id: TenantId, timeline_id: TimelineId, to: Configuration
) -> TimelineMembershipSwitchResponse:
res = self.post(
res = self.put(
f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/membership",
data=to.to_json(),
)

View File

@@ -1686,7 +1686,7 @@ def test_replace_safekeeper(neon_env_builder: NeonEnvBuilder):
@pytest.mark.parametrize("auth_enabled", [False, True])
def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
def test_delete(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
neon_env_builder.auth_enabled = auth_enabled
env = neon_env_builder.init_start()
@@ -2215,13 +2215,21 @@ def test_membership_api(neon_env_builder: NeonEnvBuilder):
neon_env_builder.num_safekeepers = 1
env = neon_env_builder.init_start()
# These are expected after timeline deletion on safekeepers.
env.pageserver.allowed_errors.extend(
[
".*Timeline .* was not found in global map.*",
".*Timeline .* was cancelled and cannot be used anymore.*",
]
)
tenant_id = env.initial_tenant
timeline_id = env.initial_timeline
sk = env.safekeepers[0]
http_cli = sk.http_client()
sk_id_1 = SafekeeperId(env.safekeepers[0].id, "localhost", sk.port.pg_tenant_only)
sk_id_1 = SafekeeperId(sk.id, "localhost", sk.port.pg_tenant_only)
sk_id_2 = SafekeeperId(11, "localhost", 5434) # just a mock
# Request to switch before timeline creation should fail.
@@ -2249,19 +2257,28 @@ def test_membership_api(neon_env_builder: NeonEnvBuilder):
log.info(f"conf after restart: {after_restart}")
assert after_restart.generation == 4
# Switch into disjoint conf.
non_joint = Configuration(generation=5, members=[sk_id_2], new_members=None)
# Switch into non joint conf of which sk is not a member, must fail.
non_joint_not_member = Configuration(generation=5, members=[sk_id_2], new_members=None)
with pytest.raises(requests.exceptions.HTTPError):
resp = http_cli.membership_switch(tenant_id, timeline_id, non_joint_not_member)
# Switch into good non joint conf.
non_joint = Configuration(generation=6, members=[sk_id_1], new_members=None)
resp = http_cli.membership_switch(tenant_id, timeline_id, non_joint)
log.info(f"non joint switch resp: {resp}")
assert resp.previous_conf.generation == 4
assert resp.current_conf.generation == 5
assert resp.current_conf.generation == 6
# Switch request to lower conf should be ignored.
lower_conf = Configuration(generation=3, members=[], new_members=None)
resp = http_cli.membership_switch(tenant_id, timeline_id, lower_conf)
log.info(f"lower switch resp: {resp}")
assert resp.previous_conf.generation == 5
assert resp.current_conf.generation == 5
# Switch request to lower conf should be rejected.
lower_conf = Configuration(generation=3, members=[sk_id_1], new_members=None)
with pytest.raises(requests.exceptions.HTTPError):
http_cli.membership_switch(tenant_id, timeline_id, lower_conf)
# Now, exclude sk from the membership, timeline should be deleted.
excluded_conf = Configuration(generation=7, members=[sk_id_2], new_members=None)
http_cli.timeline_exclude(tenant_id, timeline_id, excluded_conf)
with pytest.raises(requests.exceptions.HTTPError):
http_cli.timeline_status(tenant_id, timeline_id)
# In this test we check for excessive START_REPLICATION and START_WAL_PUSH queries