From 8840f3858c829ccfd855c278dde6fdff6c60ff77 Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Tue, 5 Nov 2024 13:16:55 +0100 Subject: [PATCH] pageserver: return 503 during tenant shutdown (#9635) ## Problem Tenant operations may return `409 Conflict` if the tenant is shutting down. This status code is not retried by the control plane, causing user-facing errors during pageserver restarts. Operations should instead return `503 Service Unavailable`, which may be retried for idempotent operations. ## Summary of changes Convert `GetActiveTenantError::WillNotBecomeActive(TenantState::Stopping)` to `ApiError::ShuttingDown` rather than `ApiError::Conflict`. This error is returned by `Tenant::wait_to_become_active` in most (all?) tenant/timeline-related HTTP routes. --- pageserver/src/http/routes.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index ef8efd3f27..72eb3e7ade 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -37,6 +37,7 @@ use pageserver_api::models::TenantShardLocation; use pageserver_api::models::TenantShardSplitRequest; use pageserver_api::models::TenantShardSplitResponse; use pageserver_api::models::TenantSorting; +use pageserver_api::models::TenantState; use pageserver_api::models::TimelineArchivalConfigRequest; use pageserver_api::models::TimelineCreateRequestMode; use pageserver_api::models::TimelinesInfoAndOffloaded; @@ -295,6 +296,9 @@ impl From for ApiError { GetActiveTenantError::Broken(reason) => { ApiError::InternalServerError(anyhow!("tenant is broken: {}", reason)) } + GetActiveTenantError::WillNotBecomeActive(TenantState::Stopping { .. }) => { + ApiError::ShuttingDown + } GetActiveTenantError::WillNotBecomeActive(_) => ApiError::Conflict(format!("{}", e)), GetActiveTenantError::Cancelled => ApiError::ShuttingDown, GetActiveTenantError::NotFound(gte) => gte.into(),