mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-29 19:10:38 +00:00
timeline creation: reflect failures due to ancestor LSN issues in status code (#4600)
Before, it was a `500` and control plane would retry, wereas it actually should have stopped retrying. (Stacked on top of https://github.com/neondatabase/neon/pull/4597 ) fixes https://github.com/neondatabase/neon/issues/4595 part of https://github.com/neondatabase/cloud/issues/5626 --------- Co-authored-by: Shany Pozin <shany@neon.tech>
This commit is contained in:
committed by
GitHub
parent
26828560a8
commit
24eaa3b7ca
@@ -21,6 +21,18 @@ class PageserverApiException(Exception):
|
||||
self.status_code = status_code
|
||||
|
||||
|
||||
class TimelineCreate406(PageserverApiException):
|
||||
def __init__(self, res: requests.Response):
|
||||
assert res.status_code == 406
|
||||
super().__init__(res.json()["msg"], res.status_code)
|
||||
|
||||
|
||||
class TimelineCreate409(PageserverApiException):
|
||||
def __init__(self, res: requests.Response):
|
||||
assert res.status_code == 409
|
||||
super().__init__("", res.status_code)
|
||||
|
||||
|
||||
@dataclass
|
||||
class InMemoryLayerInfo:
|
||||
kind: str
|
||||
@@ -309,9 +321,12 @@ class PageserverHttpClient(requests.Session):
|
||||
res = self.post(
|
||||
f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline", json=body, **kwargs
|
||||
)
|
||||
self.verbose_error(res)
|
||||
if res.status_code == 409:
|
||||
raise Exception(f"could not create timeline: already exists for id {new_timeline_id}")
|
||||
raise TimelineCreate409(res)
|
||||
if res.status_code == 406:
|
||||
raise TimelineCreate406(res)
|
||||
|
||||
self.verbose_error(res)
|
||||
|
||||
res_json = res.json()
|
||||
assert isinstance(res_json, dict)
|
||||
|
||||
@@ -4,7 +4,8 @@ import time
|
||||
import pytest
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import NeonEnv
|
||||
from fixtures.types import Lsn
|
||||
from fixtures.pageserver.http import TimelineCreate406
|
||||
from fixtures.types import Lsn, TimelineId
|
||||
from fixtures.utils import query_scalar
|
||||
|
||||
|
||||
@@ -173,5 +174,12 @@ def test_branch_creation_before_gc(neon_simple_env: NeonEnv):
|
||||
# The starting LSN is invalid as the corresponding record is scheduled to be removed by in-queue GC.
|
||||
with pytest.raises(Exception, match="invalid branch start lsn: .*"):
|
||||
env.neon_cli.create_branch("b1", "b0", tenant_id=tenant, ancestor_start_lsn=lsn)
|
||||
# retry the same with the HTTP API, so that we can inspect the status code
|
||||
with pytest.raises(TimelineCreate406):
|
||||
new_timeline_id = TimelineId.generate()
|
||||
log.info(
|
||||
f"Expecting failure for branch behind gc'ing LSN, new_timeline_id={new_timeline_id}"
|
||||
)
|
||||
pageserver_http_client.timeline_create(env.pg_version, tenant, new_timeline_id, b0, lsn)
|
||||
|
||||
thread.join()
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import pytest
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder
|
||||
from fixtures.pageserver.http import TimelineCreate406
|
||||
from fixtures.types import Lsn, TimelineId
|
||||
from fixtures.utils import print_gc_result, query_scalar
|
||||
|
||||
@@ -17,7 +18,7 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder):
|
||||
env.pageserver.allowed_errors.append(".*invalid start lsn .* for ancestor timeline.*")
|
||||
|
||||
# Branch at the point where only 100 rows were inserted
|
||||
env.neon_cli.create_branch("test_branch_behind")
|
||||
branch_behind_timeline_id = env.neon_cli.create_branch("test_branch_behind")
|
||||
endpoint_main = env.endpoints.create_start("test_branch_behind")
|
||||
log.info("postgres is running on 'test_branch_behind' branch")
|
||||
|
||||
@@ -89,6 +90,7 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder):
|
||||
assert query_scalar(main_cur, "SELECT count(*) FROM foo") == 400100
|
||||
|
||||
# Check bad lsn's for branching
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
|
||||
# branch at segment boundary
|
||||
env.neon_cli.create_branch(
|
||||
@@ -97,27 +99,52 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder):
|
||||
endpoint = env.endpoints.create_start("test_branch_segment_boundary")
|
||||
assert endpoint.safe_psql("SELECT 1")[0][0] == 1
|
||||
|
||||
# branch at pre-initdb lsn
|
||||
# branch at pre-initdb lsn (from main branch)
|
||||
with pytest.raises(Exception, match="invalid branch start lsn: .*"):
|
||||
env.neon_cli.create_branch("test_branch_preinitdb", ancestor_start_lsn=Lsn("0/42"))
|
||||
# retry the same with the HTTP API, so that we can inspect the status code
|
||||
with pytest.raises(TimelineCreate406):
|
||||
new_timeline_id = TimelineId.generate()
|
||||
log.info(f"Expecting failure for branch pre-initdb LSN, new_timeline_id={new_timeline_id}")
|
||||
pageserver_http.timeline_create(
|
||||
env.pg_version, env.initial_tenant, new_timeline_id, env.initial_timeline, Lsn("0/42")
|
||||
)
|
||||
|
||||
# branch at pre-ancestor lsn
|
||||
with pytest.raises(Exception, match="less than timeline ancestor lsn"):
|
||||
env.neon_cli.create_branch(
|
||||
"test_branch_preinitdb", "test_branch_behind", ancestor_start_lsn=Lsn("0/42")
|
||||
)
|
||||
# retry the same with the HTTP API, so that we can inspect the status code
|
||||
with pytest.raises(TimelineCreate406):
|
||||
new_timeline_id = TimelineId.generate()
|
||||
log.info(
|
||||
f"Expecting failure for branch pre-ancestor LSN, new_timeline_id={new_timeline_id}"
|
||||
)
|
||||
pageserver_http.timeline_create(
|
||||
env.pg_version,
|
||||
env.initial_tenant,
|
||||
new_timeline_id,
|
||||
branch_behind_timeline_id,
|
||||
Lsn("0/42"),
|
||||
)
|
||||
|
||||
# check that we cannot create branch based on garbage collected data
|
||||
with env.pageserver.http_client() as pageserver_http:
|
||||
pageserver_http.timeline_checkpoint(env.initial_tenant, timeline)
|
||||
gc_result = pageserver_http.timeline_gc(env.initial_tenant, timeline, 0)
|
||||
print_gc_result(gc_result)
|
||||
|
||||
pageserver_http.timeline_checkpoint(env.initial_tenant, timeline)
|
||||
gc_result = pageserver_http.timeline_gc(env.initial_tenant, timeline, 0)
|
||||
print_gc_result(gc_result)
|
||||
with pytest.raises(Exception, match="invalid branch start lsn: .*"):
|
||||
# this gced_lsn is pretty random, so if gc is disabled this woudln't fail
|
||||
env.neon_cli.create_branch(
|
||||
"test_branch_create_fail", "test_branch_behind", ancestor_start_lsn=gced_lsn
|
||||
)
|
||||
# retry the same with the HTTP API, so that we can inspect the status code
|
||||
with pytest.raises(TimelineCreate406):
|
||||
new_timeline_id = TimelineId.generate()
|
||||
log.info(f"Expecting failure for branch behind gc'd LSN, new_timeline_id={new_timeline_id}")
|
||||
pageserver_http.timeline_create(
|
||||
env.pg_version, env.initial_tenant, new_timeline_id, branch_behind_timeline_id, gced_lsn
|
||||
)
|
||||
|
||||
# check that after gc everything is still there
|
||||
assert query_scalar(hundred_cur, "SELECT count(*) FROM foo") == 100
|
||||
|
||||
Reference in New Issue
Block a user