mirror of
https://github.com/neondatabase/neon.git
synced 2026-06-01 20:40:37 +00:00
safekeeper: add term_bump endpoint.
When walproposer observes now higher term it restarts instead of crashing whole compute with PANIC; this avoids compute crash after term_bump call. After successfull election we're still checking last_log_term of the highest given vote to ensure basebackup is good, and PANIC otherwise. It will be used for migration per 035-safekeeper-dynamic-membership-change.md and https://github.com/neondatabase/docs/pull/21 ref https://github.com/neondatabase/neon/issues/8700
This commit is contained in:
@@ -50,6 +50,19 @@ class SafekeeperMetrics(Metrics):
|
||||
).value
|
||||
|
||||
|
||||
@dataclass
|
||||
class TermBumpResponse:
|
||||
previous_term: int
|
||||
current_term: int
|
||||
|
||||
@classmethod
|
||||
def from_json(cls, d: Dict[str, Any]) -> "TermBumpResponse":
|
||||
return TermBumpResponse(
|
||||
previous_term=d["previous_term"],
|
||||
current_term=d["current_term"],
|
||||
)
|
||||
|
||||
|
||||
class SafekeeperHttpClient(requests.Session, MetricsGetter):
|
||||
HTTPError = requests.HTTPError
|
||||
|
||||
@@ -252,6 +265,22 @@ class SafekeeperHttpClient(requests.Session, MetricsGetter):
|
||||
res.raise_for_status()
|
||||
return res.json()
|
||||
|
||||
def term_bump(
|
||||
self,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
term: Optional[int],
|
||||
) -> TermBumpResponse:
|
||||
body = {}
|
||||
if term is not None:
|
||||
body["term"] = term
|
||||
res = self.post(
|
||||
f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/term_bump",
|
||||
json=body,
|
||||
)
|
||||
res.raise_for_status()
|
||||
return TermBumpResponse.from_json(res.json())
|
||||
|
||||
def record_safekeeper_info(self, tenant_id: TenantId, timeline_id: TimelineId, body):
|
||||
res = self.post(
|
||||
f"http://localhost:{self.port}/v1/record_safekeeper_info/{tenant_id}/{timeline_id}",
|
||||
|
||||
@@ -2194,6 +2194,43 @@ def test_patch_control_file(neon_env_builder: NeonEnvBuilder):
|
||||
assert res["timelines"][0]["control_file"]["timeline_start_lsn"] == "0/1"
|
||||
|
||||
|
||||
def test_term_bump(neon_env_builder: NeonEnvBuilder):
|
||||
neon_env_builder.num_safekeepers = 1
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
tenant_id = env.initial_tenant
|
||||
timeline_id = env.initial_timeline
|
||||
|
||||
endpoint = env.endpoints.create_start("main")
|
||||
# initialize safekeeper
|
||||
endpoint.safe_psql("create table t(key int, value text)")
|
||||
|
||||
http_cli = env.safekeepers[0].http_client()
|
||||
|
||||
# check that bump up to specific term works
|
||||
curr_term = http_cli.timeline_status(tenant_id, timeline_id).term
|
||||
bump_to = curr_term + 3
|
||||
res = http_cli.term_bump(tenant_id, timeline_id, bump_to)
|
||||
log.info(f"bump to {bump_to} res: {res}")
|
||||
assert res.current_term >= bump_to
|
||||
|
||||
# check that bump to none increments current term
|
||||
res = http_cli.term_bump(tenant_id, timeline_id, None)
|
||||
log.info(f"bump to None res: {res}")
|
||||
assert res.current_term > bump_to
|
||||
assert res.current_term > res.previous_term
|
||||
|
||||
# check that bumping doesn't work downward
|
||||
res = http_cli.term_bump(tenant_id, timeline_id, 2)
|
||||
log.info(f"bump to 2 res: {res}")
|
||||
assert res.current_term > bump_to
|
||||
assert res.current_term == res.previous_term
|
||||
|
||||
# check that this doesn't kill endpoint because last WAL flush was his and
|
||||
# thus its basebackup is still good
|
||||
endpoint.safe_psql("insert into t values (1, 'payload')")
|
||||
|
||||
|
||||
# Test disables periodic pushes from safekeeper to the broker and checks that
|
||||
# pageserver can still discover safekeepers with discovery requests.
|
||||
def test_broker_discovery(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
Reference in New Issue
Block a user