From 1874f9427a1bf9ab9e4b22dbf37beeabe0530636 Mon Sep 17 00:00:00 2001 From: John Spray Date: Fri, 27 Oct 2023 16:54:53 +0100 Subject: [PATCH] pageserver: return 503 for GET on InProgress tenant --- pageserver/src/http/routes.rs | 1 + pageserver/src/page_service.rs | 3 ++ pageserver/src/tenant/mgr.rs | 53 ++++++++++++++++++++++------------ 3 files changed, 39 insertions(+), 18 deletions(-) diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index 591e20ae9a..988b148a1f 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -222,6 +222,7 @@ impl From for ApiError { // (We can produce this variant only in `mgr::get_tenant(..., active=true)` calls). ApiError::ResourceUnavailable("Tenant not yet active".into()) } + GetTenantError::MapState(e) => ApiError::ResourceUnavailable(format!("{e}").into()), } } } diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs index 4d265a8c6f..ef564ce710 100644 --- a/pageserver/src/page_service.rs +++ b/pageserver/src/page_service.rs @@ -1323,6 +1323,9 @@ async fn get_active_tenant_with_timeout( Err(GetTenantError::Broken(_)) => { unreachable!("we're calling get_tenant with active_only=false") } + Err(GetTenantError::MapState(_)) => { + unreachable!("TenantManager is initialized before page service starts") + } }; let wait_time = Duration::from_secs(30); match tokio::time::timeout(wait_time, tenant.wait_to_become_active()).await { diff --git a/pageserver/src/tenant/mgr.rs b/pageserver/src/tenant/mgr.rs index 7dfac5eead..318f35075c 100644 --- a/pageserver/src/tenant/mgr.rs +++ b/pageserver/src/tenant/mgr.rs @@ -748,7 +748,7 @@ pub(crate) async fn upsert_location( // existng tenant. { let locked = TENANTS.read().unwrap(); - let peek_slot = tenant_map_peek_slot(&locked, &tenant_id)?; + let peek_slot = tenant_map_peek_slot(&locked, &tenant_id, false)?; match (&new_location_config.mode, peek_slot) { (LocationMode::Attached(attach_conf), Some(TenantSlot::Attached(tenant))) => { if attach_conf.generation == tenant.generation { @@ -874,6 +874,10 @@ pub(crate) enum GetTenantError { /// is a stuck error state #[error("Tenant is broken: {0}")] Broken(String), + + // Initializing or shutting down: cannot authoritatively say whether we have this tenant + #[error("Tenant map is not available: {0}")] + MapState(#[from] TenantMapError), } /// Gets the tenant from the in-memory data, erroring if it's absent or is not fitting to the query. @@ -884,24 +888,26 @@ pub(crate) fn get_tenant( tenant_id: TenantId, active_only: bool, ) -> Result, GetTenantError> { - let m = TENANTS.read().unwrap(); - let tenant = m - .get(&tenant_id) - .ok_or(GetTenantError::NotFound(tenant_id))?; + let locked = TENANTS.read().unwrap(); + let peek_slot = tenant_map_peek_slot(&locked, &tenant_id, true)?; - match tenant.current_state() { - TenantState::Broken { - reason, - backtrace: _, - } if active_only => Err(GetTenantError::Broken(reason)), - TenantState::Active => Ok(Arc::clone(tenant)), - _ => { - if active_only { - Err(GetTenantError::NotActive(tenant_id)) - } else { - Ok(Arc::clone(tenant)) + match peek_slot { + Some(TenantSlot::Attached(tenant)) => match tenant.current_state() { + TenantState::Broken { + reason, + backtrace: _, + } if active_only => Err(GetTenantError::Broken(reason)), + TenantState::Active => Ok(Arc::clone(tenant)), + _ => { + if active_only { + Err(GetTenantError::NotActive(tenant_id)) + } else { + Ok(Arc::clone(tenant)) + } } - } + }, + Some(TenantSlot::InProgress(_)) => Err(GetTenantError::NotActive(tenant_id)), + None | Some(TenantSlot::Secondary) => Err(GetTenantError::NotFound(tenant_id)), } } @@ -1372,13 +1378,24 @@ impl Drop for SlotGuard { } } +/// `allow_shutdown=true` is appropriate for read-only APIs that should stay working while +/// the pageserver is shutting down. Otherwise, set it to false to refuse attempts to +/// operate on a slot while we are shutting down. +/// fn tenant_map_peek_slot<'a>( tenants: &'a std::sync::RwLockReadGuard<'a, TenantsMap>, tenant_id: &TenantId, + allow_shutdown: bool, ) -> Result, TenantMapError> { let m = match tenants.deref() { TenantsMap::Initializing => return Err(TenantMapError::StillInitializing), - TenantsMap::ShuttingDown(_) => return Err(TenantMapError::ShuttingDown), + TenantsMap::ShuttingDown(m) => { + if allow_shutdown { + return Err(TenantMapError::ShuttingDown); + } else { + m + } + } TenantsMap::Open(m) => m, };