diff --git a/pageserver/src/http/openapi_spec.yml b/pageserver/src/http/openapi_spec.yml index 50614653be..33a2e32141 100644 --- a/pageserver/src/http/openapi_spec.yml +++ b/pageserver/src/http/openapi_spec.yml @@ -186,10 +186,8 @@ paths: schema: $ref: "#/components/schemas/Error" delete: - description: "Attempts to delete specified timeline. On 500 errors should be retried" + description: "Attempts to delete specified timeline. 500 and 409 errors should be retried" responses: - "200": - description: Ok "400": description: Error when no tenant id found in path or no timeline id content: @@ -214,6 +212,12 @@ paths: application/json: schema: $ref: "#/components/schemas/NotFoundError" + "409": + description: Deletion is already in progress, continue polling + content: + application/json: + schema: + $ref: "#/components/schemas/ConflictError" "412": description: Tenant is missing, or timeline has children content: diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index 8d3bb5552b..0e12cc2b1e 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -187,6 +187,7 @@ impl From for ApiError { format!("Cannot delete timeline which has child timelines: {children:?}") .into_boxed_str(), ), + a @ AlreadyInProgress => ApiError::Conflict(a.to_string()), Other(e) => ApiError::InternalServerError(e), } } diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 0e8d6b1287..ce1e5bf51f 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -440,8 +440,13 @@ pub enum GetTimelineError { pub enum DeleteTimelineError { #[error("NotFound")] NotFound, + #[error("HasChildren")] HasChildren(Vec), + + #[error("Timeline deletion is already in progress")] + AlreadyInProgress, + #[error(transparent)] Other(#[from] anyhow::Error), } @@ -1755,14 +1760,11 @@ impl Tenant { timeline = Arc::clone(timeline_entry.get()); // Prevent two tasks from trying to delete the timeline at the same time. - delete_lock_guard = - DeletionGuard(Arc::clone(&timeline.delete_lock).try_lock_owned().map_err( - |_| { - DeleteTimelineError::Other(anyhow::anyhow!( - "timeline deletion is already in progress" - )) - }, - )?); + delete_lock_guard = DeletionGuard( + Arc::clone(&timeline.delete_lock) + .try_lock_owned() + .map_err(|_| DeleteTimelineError::AlreadyInProgress)?, + ); // If another task finished the deletion just before we acquired the lock, // return success. diff --git a/test_runner/regress/test_timeline_delete.py b/test_runner/regress/test_timeline_delete.py index ddd9ffd755..7c3424cf32 100644 --- a/test_runner/regress/test_timeline_delete.py +++ b/test_runner/regress/test_timeline_delete.py @@ -463,10 +463,10 @@ def test_concurrent_timeline_delete_stuck_on( # make the second call and assert behavior log.info("second call start") - error_msg_re = "timeline deletion is already in progress" + error_msg_re = "Timeline deletion is already in progress" with pytest.raises(PageserverApiException, match=error_msg_re) as second_call_err: ps_http.timeline_delete(env.initial_tenant, child_timeline_id) - assert second_call_err.value.status_code == 500 + assert second_call_err.value.status_code == 409 env.pageserver.allowed_errors.append(f".*{child_timeline_id}.*{error_msg_re}.*") # the second call will try to transition the timeline into Stopping state as well env.pageserver.allowed_errors.append( @@ -518,9 +518,9 @@ def test_delete_timeline_client_hangup(neon_env_builder: NeonEnvBuilder): ps_http.timeline_delete(env.initial_tenant, child_timeline_id, timeout=2) env.pageserver.allowed_errors.append( - f".*{child_timeline_id}.*timeline deletion is already in progress.*" + f".*{child_timeline_id}.*Timeline deletion is already in progress.*" ) - with pytest.raises(PageserverApiException, match="timeline deletion is already in progress"): + with pytest.raises(PageserverApiException, match="Timeline deletion is already in progress"): ps_http.timeline_delete(env.initial_tenant, child_timeline_id, timeout=2) # make sure the timeout was due to the failpoint