From 264b0ada9fe60ce781006ac48bd0f6ba2e546443 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Thu, 24 Nov 2022 00:59:20 +0200 Subject: [PATCH] Handle concurrent detach and attach more gracefully. If tenant detach is requested while the tenant is still in Attaching state, we set the state to Paused, but when the attach completed, it changed it to Active again, and worse, it started the background jobs. To fix, rewrite the set_state() function so that when you activate a tenant that is already in Paused state, it stays in Paused state and we don't start the background loops. --- pageserver/src/tenant.rs | 140 ++++++++++++++++++++++++----------- pageserver/src/tenant_mgr.rs | 4 +- 2 files changed, 98 insertions(+), 46 deletions(-) diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 72e1697182..08cd0f0df6 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -551,7 +551,7 @@ impl Tenant { match tenant_clone.attach().await { Ok(_) => {} Err(e) => { - tenant_clone.set_state(TenantState::Broken); + tenant_clone.set_broken(); error!("error attaching tenant: {:?}", e); } } @@ -644,11 +644,9 @@ impl Tenant { anyhow::bail!("failpoint attach-beore-activate"); }); - // FIXME: Check if the state has changed to Stopping while we were downloading stuff - // We're ready for business. - // Change to active state under the hood spawns background loops + // Start background operations and open the tenant for business. // The loops will shut themselves down when they notice that the tenant is inactive. - self.activate(); + self.activate()?; info!("Done"); @@ -787,7 +785,7 @@ impl Tenant { match tenant_clone.load().await { Ok(()) => {} Err(err) => { - tenant_clone.set_state(TenantState::Broken); + tenant_clone.set_broken(); error!("could not load tenant {tenant_id}: {err:?}"); } } @@ -930,10 +928,9 @@ impl Tenant { .await?; } - // We're ready for business. - // Change to active state under the hood spawns background loops + // Start background operations and open the tenant for business. // The loops will shut themselves down when they notice that the tenant is inactive. - self.activate(); + self.activate()?; info!("Done"); @@ -1273,47 +1270,102 @@ impl Tenant { self.current_state() == TenantState::Active } - /// Changes tenant status to active, if it was not broken before. - /// Otherwise, ignores the state change, logging an error. - fn activate(&self) { - self.set_state(TenantState::Active); - } + /// Changes tenant status to active, unless shutdown was already requested. + fn activate(&self) -> anyhow::Result<()> { + let mut result = Ok(()); + self.state.send_modify(|current_state| { + match *current_state { + TenantState::Active => { + // activate() was called on an already Active tenant. Shouldn't happen. + result = Err(anyhow::anyhow!("Tenant is already active")); + } + TenantState::Broken => { + // This shouldn't happen either + result = Err(anyhow::anyhow!("Could not activate tenant because it is in broken state")); + } + TenantState::Paused => { + // The tenant was detached, or system shutdown was requested, while we were + // loading or attaching the tenant. + info!("Tenant is already in Paused state, skipping activation"); + } + TenantState::Loading | TenantState::Attaching => { + *current_state = TenantState::Active; - pub fn set_state(&self, new_state: TenantState) { - match (self.current_state(), new_state) { - (equal_state_1, equal_state_2) if equal_state_1 == equal_state_2 => { - debug!("Ignoring new state, equal to the existing one: {equal_state_2:?}"); - } - (TenantState::Broken, _) => { - error!("Ignoring state update {new_state:?} for broken tenant"); - } - (_, new_state) => { - self.state.send_replace(new_state); + let timelines_accessor = self.timelines.lock().unwrap(); + let not_broken_timelines = timelines_accessor + .values() + .filter(|timeline| timeline.current_state() != TimelineState::Broken); - let timelines_accessor = self.timelines.lock().unwrap(); - let not_broken_timelines = timelines_accessor - .values() - .filter(|timeline| timeline.current_state() != TimelineState::Broken); - match new_state { - TenantState::Active => { - // Spawn gc and compaction loops. The loops will shut themselves - // down when they notice that the tenant is inactive. - crate::tenant_tasks::start_background_loops(self.tenant_id); + // Spawn gc and compaction loops. The loops will shut themselves + // down when they notice that the tenant is inactive. + crate::tenant_tasks::start_background_loops(self.tenant_id); - for timeline in not_broken_timelines { - timeline.set_state(TimelineState::Active); - } + for timeline in not_broken_timelines { + timeline.set_state(TimelineState::Active); } - TenantState::Paused | TenantState::Broken => { - for timeline in not_broken_timelines { - timeline.set_state(TimelineState::Suspended); - } - } - TenantState::Loading => { /* Should we do something here? */ } - TenantState::Attaching => { /* Should we do something here? */ } } } - } + }); + result + } + + + /// Change tenant status to paused, to mark that it is being shut down + pub fn set_paused(&self) { + self.state.send_modify(|current_state| { + match *current_state { + TenantState::Active | TenantState::Loading | TenantState::Attaching => { + *current_state = TenantState::Paused; + + // FIXME: If the tenant is still Loading or Attaching, new timelines + // might be created after this. That's harmless, as the Timelines + // won't be accessible to anyone, when the Tenant is in Paused + // state. + let timelines_accessor = self.timelines.lock().unwrap(); + let not_broken_timelines = timelines_accessor + .values() + .filter(|timeline| timeline.current_state() != TimelineState::Broken); + for timeline in not_broken_timelines { + timeline.set_state(TimelineState::Suspended); + } + } + TenantState::Broken => { + info!("Cannot set tenant to Paused state, it is already in Broken state"); + } + TenantState::Paused => { + // The tenant was detached, or system shutdown was requested, while we were + // loading or attaching the tenant. + info!("Tenant is already in Paused state"); + } + } + }); + } + + pub fn set_broken(&self) { + self.state.send_modify(|current_state| { + match *current_state { + TenantState::Active => { + // Broken tenants can currently only used for fatal errors that happen + // while loading or attaching a tenant. A tenant that has already been + // activated should never be marked as broken. We cope with it the best + // we can, but it shouldn't happen. + *current_state = TenantState::Broken; + warn!("Changing Active tenant to Broken state"); + } + TenantState::Broken => { + // This shouldn't happen either + warn!("Tenant is already broken"); + } + TenantState::Paused => { + // This shouldn't happen either + *current_state = TenantState::Broken; + warn!("Marking Paused tenant as Broken"); + } + TenantState::Loading | TenantState::Attaching => { + *current_state = TenantState::Broken; + } + } + }); } pub fn subscribe_for_state_updates(&self) -> watch::Receiver { diff --git a/pageserver/src/tenant_mgr.rs b/pageserver/src/tenant_mgr.rs index ed3ecfd429..c85df05494 100644 --- a/pageserver/src/tenant_mgr.rs +++ b/pageserver/src/tenant_mgr.rs @@ -168,7 +168,7 @@ pub async fn shutdown_all_tenants() { for (_, tenant) in m.drain() { if tenant.is_active() { // updates tenant state, forbidding new GC and compaction iterations from starting - tenant.set_state(TenantState::Paused); + tenant.set_paused(); tenants_to_shut_down.push(tenant) } } @@ -294,7 +294,7 @@ pub async fn detach_tenant( None => anyhow::bail!("Tenant not found for id {tenant_id}"), }; - tenant.set_state(TenantState::Paused); + tenant.set_paused(); // shutdown all tenant and timeline tasks: gc, compaction, page service) task_mgr::shutdown_tasks(None, Some(tenant_id), None).await;