diff --git a/pageserver/src/http/openapi_spec.yml b/pageserver/src/http/openapi_spec.yml index 33a2e32141..290492203a 100644 --- a/pageserver/src/http/openapi_spec.yml +++ b/pageserver/src/http/openapi_spec.yml @@ -722,6 +722,12 @@ paths: application/json: schema: $ref: "#/components/schemas/ForbiddenError" + "406": + description: Permanently unsatisfiable request, don't retry. + content: + application/json: + schema: + $ref: "#/components/schemas/Error" "409": description: Timeline already exists, creation skipped content: diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index 08d3fcf8df..58dcbb2aac 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -338,6 +338,11 @@ async fn timeline_create_handler( Err(tenant::CreateTimelineError::AlreadyExists) => { json_response(StatusCode::CONFLICT, ()) } + Err(tenant::CreateTimelineError::AncestorLsn(err)) => { + json_response(StatusCode::NOT_ACCEPTABLE, HttpErrorBody::from_msg( + format!("{err:#}") + )) + } Err(tenant::CreateTimelineError::Other(err)) => Err(ApiError::InternalServerError(err)), } } diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 339291149f..3fa2a4bab4 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -506,6 +506,8 @@ pub enum CreateTimelineError { #[error("a timeline with the given ID already exists")] AlreadyExists, #[error(transparent)] + AncestorLsn(anyhow::Error), + #[error(transparent)] Other(#[from] anyhow::Error), } @@ -1432,7 +1434,7 @@ impl Tenant { let ancestor_ancestor_lsn = ancestor_timeline.get_ancestor_lsn(); if ancestor_ancestor_lsn > *lsn { // can we safely just branch from the ancestor instead? - return Err(CreateTimelineError::Other(anyhow::anyhow!( + return Err(CreateTimelineError::AncestorLsn(anyhow::anyhow!( "invalid start lsn {} for ancestor timeline {}: less than timeline ancestor lsn {}", lsn, ancestor_timeline_id, @@ -2715,7 +2717,7 @@ impl Tenant { dst_id: TimelineId, start_lsn: Option, ctx: &RequestContext, - ) -> anyhow::Result> { + ) -> Result, CreateTimelineError> { let tl = self .branch_timeline_impl(src_timeline, dst_id, start_lsn, ctx) .await?; @@ -2732,7 +2734,7 @@ impl Tenant { dst_id: TimelineId, start_lsn: Option, ctx: &RequestContext, - ) -> anyhow::Result> { + ) -> Result, CreateTimelineError> { self.branch_timeline_impl(src_timeline, dst_id, start_lsn, ctx) .await } @@ -2743,7 +2745,7 @@ impl Tenant { dst_id: TimelineId, start_lsn: Option, _ctx: &RequestContext, - ) -> anyhow::Result> { + ) -> Result, CreateTimelineError> { let src_id = src_timeline.timeline_id; // If no start LSN is specified, we branch the new timeline from the source timeline's last record LSN @@ -2783,16 +2785,17 @@ impl Tenant { .context(format!( "invalid branch start lsn: less than latest GC cutoff {}", *latest_gc_cutoff_lsn, - ))?; + )) + .map_err(CreateTimelineError::AncestorLsn)?; // and then the planned GC cutoff { let gc_info = src_timeline.gc_info.read().unwrap(); let cutoff = min(gc_info.pitr_cutoff, gc_info.horizon_cutoff); if start_lsn < cutoff { - bail!(format!( + return Err(CreateTimelineError::AncestorLsn(anyhow::anyhow!( "invalid branch start lsn: less than planned GC cutoff {cutoff}" - )); + ))); } } @@ -3825,6 +3828,9 @@ mod tests { { Ok(_) => panic!("branching should have failed"), Err(err) => { + let CreateTimelineError::AncestorLsn(err) = err else { + panic!("wrong error type") + }; assert!(err.to_string().contains("invalid branch start lsn")); assert!(err .source() @@ -3854,6 +3860,9 @@ mod tests { { Ok(_) => panic!("branching should have failed"), Err(err) => { + let CreateTimelineError::AncestorLsn(err) = err else { + panic!("wrong error type"); + }; assert!(&err.to_string().contains("invalid branch start lsn")); assert!(&err .source() diff --git a/test_runner/fixtures/pageserver/http.py b/test_runner/fixtures/pageserver/http.py index 5c4f5177d0..824d11cb17 100644 --- a/test_runner/fixtures/pageserver/http.py +++ b/test_runner/fixtures/pageserver/http.py @@ -21,6 +21,18 @@ class PageserverApiException(Exception): self.status_code = status_code +class TimelineCreate406(PageserverApiException): + def __init__(self, res: requests.Response): + assert res.status_code == 406 + super().__init__(res.json()["msg"], res.status_code) + + +class TimelineCreate409(PageserverApiException): + def __init__(self, res: requests.Response): + assert res.status_code == 409 + super().__init__("", res.status_code) + + @dataclass class InMemoryLayerInfo: kind: str @@ -309,9 +321,12 @@ class PageserverHttpClient(requests.Session): res = self.post( f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline", json=body, **kwargs ) - self.verbose_error(res) if res.status_code == 409: - raise Exception(f"could not create timeline: already exists for id {new_timeline_id}") + raise TimelineCreate409(res) + if res.status_code == 406: + raise TimelineCreate406(res) + + self.verbose_error(res) res_json = res.json() assert isinstance(res_json, dict) diff --git a/test_runner/regress/test_branch_and_gc.py b/test_runner/regress/test_branch_and_gc.py index 4a03421fcf..53e67b1592 100644 --- a/test_runner/regress/test_branch_and_gc.py +++ b/test_runner/regress/test_branch_and_gc.py @@ -4,7 +4,8 @@ import time import pytest from fixtures.log_helper import log from fixtures.neon_fixtures import NeonEnv -from fixtures.types import Lsn +from fixtures.pageserver.http import TimelineCreate406 +from fixtures.types import Lsn, TimelineId from fixtures.utils import query_scalar @@ -173,5 +174,12 @@ def test_branch_creation_before_gc(neon_simple_env: NeonEnv): # The starting LSN is invalid as the corresponding record is scheduled to be removed by in-queue GC. with pytest.raises(Exception, match="invalid branch start lsn: .*"): env.neon_cli.create_branch("b1", "b0", tenant_id=tenant, ancestor_start_lsn=lsn) + # retry the same with the HTTP API, so that we can inspect the status code + with pytest.raises(TimelineCreate406): + new_timeline_id = TimelineId.generate() + log.info( + f"Expecting failure for branch behind gc'ing LSN, new_timeline_id={new_timeline_id}" + ) + pageserver_http_client.timeline_create(env.pg_version, tenant, new_timeline_id, b0, lsn) thread.join() diff --git a/test_runner/regress/test_branch_behind.py b/test_runner/regress/test_branch_behind.py index 3f7d49ab03..a19b2862f8 100644 --- a/test_runner/regress/test_branch_behind.py +++ b/test_runner/regress/test_branch_behind.py @@ -1,6 +1,7 @@ import pytest from fixtures.log_helper import log from fixtures.neon_fixtures import NeonEnvBuilder +from fixtures.pageserver.http import TimelineCreate406 from fixtures.types import Lsn, TimelineId from fixtures.utils import print_gc_result, query_scalar @@ -17,7 +18,7 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder): env.pageserver.allowed_errors.append(".*invalid start lsn .* for ancestor timeline.*") # Branch at the point where only 100 rows were inserted - env.neon_cli.create_branch("test_branch_behind") + branch_behind_timeline_id = env.neon_cli.create_branch("test_branch_behind") endpoint_main = env.endpoints.create_start("test_branch_behind") log.info("postgres is running on 'test_branch_behind' branch") @@ -89,6 +90,7 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder): assert query_scalar(main_cur, "SELECT count(*) FROM foo") == 400100 # Check bad lsn's for branching + pageserver_http = env.pageserver.http_client() # branch at segment boundary env.neon_cli.create_branch( @@ -97,27 +99,52 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder): endpoint = env.endpoints.create_start("test_branch_segment_boundary") assert endpoint.safe_psql("SELECT 1")[0][0] == 1 - # branch at pre-initdb lsn + # branch at pre-initdb lsn (from main branch) with pytest.raises(Exception, match="invalid branch start lsn: .*"): env.neon_cli.create_branch("test_branch_preinitdb", ancestor_start_lsn=Lsn("0/42")) + # retry the same with the HTTP API, so that we can inspect the status code + with pytest.raises(TimelineCreate406): + new_timeline_id = TimelineId.generate() + log.info(f"Expecting failure for branch pre-initdb LSN, new_timeline_id={new_timeline_id}") + pageserver_http.timeline_create( + env.pg_version, env.initial_tenant, new_timeline_id, env.initial_timeline, Lsn("0/42") + ) # branch at pre-ancestor lsn with pytest.raises(Exception, match="less than timeline ancestor lsn"): env.neon_cli.create_branch( "test_branch_preinitdb", "test_branch_behind", ancestor_start_lsn=Lsn("0/42") ) + # retry the same with the HTTP API, so that we can inspect the status code + with pytest.raises(TimelineCreate406): + new_timeline_id = TimelineId.generate() + log.info( + f"Expecting failure for branch pre-ancestor LSN, new_timeline_id={new_timeline_id}" + ) + pageserver_http.timeline_create( + env.pg_version, + env.initial_tenant, + new_timeline_id, + branch_behind_timeline_id, + Lsn("0/42"), + ) # check that we cannot create branch based on garbage collected data - with env.pageserver.http_client() as pageserver_http: - pageserver_http.timeline_checkpoint(env.initial_tenant, timeline) - gc_result = pageserver_http.timeline_gc(env.initial_tenant, timeline, 0) - print_gc_result(gc_result) - + pageserver_http.timeline_checkpoint(env.initial_tenant, timeline) + gc_result = pageserver_http.timeline_gc(env.initial_tenant, timeline, 0) + print_gc_result(gc_result) with pytest.raises(Exception, match="invalid branch start lsn: .*"): # this gced_lsn is pretty random, so if gc is disabled this woudln't fail env.neon_cli.create_branch( "test_branch_create_fail", "test_branch_behind", ancestor_start_lsn=gced_lsn ) + # retry the same with the HTTP API, so that we can inspect the status code + with pytest.raises(TimelineCreate406): + new_timeline_id = TimelineId.generate() + log.info(f"Expecting failure for branch behind gc'd LSN, new_timeline_id={new_timeline_id}") + pageserver_http.timeline_create( + env.pg_version, env.initial_tenant, new_timeline_id, branch_behind_timeline_id, gced_lsn + ) # check that after gc everything is still there assert query_scalar(hundred_cur, "SELECT count(*) FROM foo") == 100