diff --git a/libs/pageserver_api/src/models.rs b/libs/pageserver_api/src/models.rs index 9c94b53318..9d1ad8a022 100644 --- a/libs/pageserver_api/src/models.rs +++ b/libs/pageserver_api/src/models.rs @@ -23,7 +23,7 @@ pub enum TenantState { Active, /// A tenant is recognized by pageserver, but it is being detached or the /// system is being shut down. - Paused, + Stopping, /// A tenant is recognized by the pageserver, but can no longer be used for /// any operations, because it failed to be activated. Broken, @@ -35,7 +35,7 @@ impl TenantState { Self::Loading => true, Self::Attaching => true, Self::Active => false, - Self::Paused => false, + Self::Stopping => false, Self::Broken => false, } } @@ -53,7 +53,7 @@ pub enum TimelineState { Suspended, /// A timeline is recognized by pageserver, but not yet ready to operate and not allowed to /// automatically become Active after certain events: only a management call can change this status. - Paused, + Stopping, /// A timeline is recognized by the pageserver, but can no longer be used for /// any operations, because it failed to be activated. Broken, diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 7a5de28eff..981c049111 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -1248,7 +1248,7 @@ impl Tenant { /// Removes timeline-related in-memory data pub async fn delete_timeline(&self, timeline_id: TimelineId) -> anyhow::Result<()> { - // Transition the timeline into TimelineState::Paused. + // Transition the timeline into TimelineState::Stopping. // This should prevent new operations from starting. let timeline = { let mut timelines = self.timelines.lock().unwrap(); @@ -1269,14 +1269,14 @@ impl Tenant { }; let timeline = Arc::clone(timeline_entry.get()); - timeline.set_state(TimelineState::Paused); + timeline.set_state(TimelineState::Stopping); drop(timelines); timeline }; info!("waiting for layer_removal_cs.lock()"); - // No timeout here, GC & Compaction should be responsive to the `TimelineState::Paused` change. + // No timeout here, GC & Compaction should be responsive to the `TimelineState::Stopping` change. let layer_removal_guard = timeline.layer_removal_cs.lock().await; info!("got layer_removal_cs.lock(), deleting layer files"); @@ -1301,7 +1301,7 @@ impl Tenant { let children_exist = timelines .iter() .any(|(_, entry)| entry.get_ancestor_timeline_id() == Some(timeline_id)); - // XXX this can happen because `branch_timeline` doesn't check `TimelineState::Paused`. + // XXX this can happen because `branch_timeline` doesn't check `TimelineState::Stopping`. // We already deleted the layer files, so it's probably best to panic. // (Ideally, above remove_dir_all is atomic so we don't see this timeline after a restart) if children_exist { @@ -1355,10 +1355,10 @@ impl Tenant { "Could not activate tenant because it is in broken state" )); } - TenantState::Paused => { + TenantState::Stopping => { // The tenant was detached, or system shutdown was requested, while we were // loading or attaching the tenant. - info!("Tenant is already in Paused state, skipping activation"); + info!("Tenant is already in Stopping state, skipping activation"); } TenantState::Loading | TenantState::Attaching => { *current_state = TenantState::Active; @@ -1384,16 +1384,16 @@ impl Tenant { result } - /// Change tenant status to paused, to mark that it is being shut down - pub fn set_paused(&self) { + /// Change tenant status to Stopping, to mark that it is being shut down + pub fn set_stopping(&self) { self.state.send_modify(|current_state| { match *current_state { TenantState::Active | TenantState::Loading | TenantState::Attaching => { - *current_state = TenantState::Paused; + *current_state = TenantState::Stopping; // FIXME: If the tenant is still Loading or Attaching, new timelines // might be created after this. That's harmless, as the Timelines - // won't be accessible to anyone, when the Tenant is in Paused + // won't be accessible to anyone, when the Tenant is in Stopping // state. let timelines_accessor = self.timelines.lock().unwrap(); let not_broken_timelines = timelines_accessor @@ -1404,12 +1404,12 @@ impl Tenant { } } TenantState::Broken => { - info!("Cannot set tenant to Paused state, it is already in Broken state"); + info!("Cannot set tenant to Stopping state, it is already in Broken state"); } - TenantState::Paused => { + TenantState::Stopping => { // The tenant was detached, or system shutdown was requested, while we were // loading or attaching the tenant. - info!("Tenant is already in Paused state"); + info!("Tenant is already in Stopping state"); } } }); @@ -1430,10 +1430,10 @@ impl Tenant { // This shouldn't happen either warn!("Tenant is already broken"); } - TenantState::Paused => { + TenantState::Stopping => { // This shouldn't happen either *current_state = TenantState::Broken; - warn!("Marking Paused tenant as Broken"); + warn!("Marking Stopping tenant as Broken"); } TenantState::Loading | TenantState::Attaching => { *current_state = TenantState::Broken; @@ -1458,7 +1458,7 @@ impl Tenant { TenantState::Active { .. } => { return Ok(()); } - TenantState::Broken | TenantState::Paused => { + TenantState::Broken | TenantState::Stopping => { // There's no chance the tenant can transition back into ::Active anyhow::bail!( "Tenant {} will not become active. Current state: {:?}", diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 8ad2b4cd93..3dd6b1f9d6 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -555,8 +555,8 @@ impl Timeline { let _layer_removal_cs = self.layer_removal_cs.lock().await; // Is the timeline being deleted? let state = *self.state.borrow(); - if state == TimelineState::Paused { - anyhow::bail!("timeline is paused: {:?}", state); + if state == TimelineState::Stopping { + anyhow::bail!("timeline is Stopping"); } let target_file_size = self.get_checkpoint_distance(); @@ -668,8 +668,8 @@ impl Timeline { (TimelineState::Broken, _) => { error!("Ignoring state update {new_state:?} for broken tenant"); } - (TimelineState::Paused, TimelineState::Active) => { - debug!("Not activating a paused timeline"); + (TimelineState::Stopping, TimelineState::Active) => { + debug!("Not activating a Stopping timeline"); } (_, new_state) => { self.state.send_replace(new_state); @@ -1251,7 +1251,7 @@ impl Timeline { match new_state { // we're running this job for active timelines only TimelineState::Active => continue, - TimelineState::Broken | TimelineState::Paused | TimelineState::Suspended => return Some(new_state), + TimelineState::Broken | TimelineState::Stopping | TimelineState::Suspended => return Some(new_state), } } Err(_sender_dropped_error) => return None, @@ -2393,8 +2393,8 @@ impl Timeline { let _layer_removal_cs = self.layer_removal_cs.lock().await; // Is the timeline being deleted? let state = *self.state.borrow(); - if state == TimelineState::Paused { - anyhow::bail!("timeline is paused: {:?}", state); + if state == TimelineState::Stopping { + anyhow::bail!("timeline is Stopping"); } let (horizon_cutoff, pitr_cutoff, retain_lsns) = { diff --git a/pageserver/src/tenant_mgr.rs b/pageserver/src/tenant_mgr.rs index cbdbe01ddc..70de713a26 100644 --- a/pageserver/src/tenant_mgr.rs +++ b/pageserver/src/tenant_mgr.rs @@ -170,7 +170,7 @@ pub async fn shutdown_all_tenants() { for (_, tenant) in m.drain() { if tenant.is_active() { // updates tenant state, forbidding new GC and compaction iterations from starting - tenant.set_paused(); + tenant.set_stopping(); tenants_to_shut_down.push(tenant) } } @@ -310,7 +310,7 @@ pub async fn detach_tenant( None => anyhow::bail!("Tenant not found for id {tenant_id}"), }; - tenant.set_paused(); + tenant.set_stopping(); // shutdown all tenant and timeline tasks: gc, compaction, page service) task_mgr::shutdown_tasks(None, Some(tenant_id), None).await; diff --git a/pageserver/src/walreceiver/connection_manager.rs b/pageserver/src/walreceiver/connection_manager.rs index 9a1b55ac1c..c598f20b10 100644 --- a/pageserver/src/walreceiver/connection_manager.rs +++ b/pageserver/src/walreceiver/connection_manager.rs @@ -214,7 +214,7 @@ async fn connection_manager_loop_step( match new_state { // we're already active as walreceiver, no need to reactivate TimelineState::Active => continue, - TimelineState::Broken | TimelineState::Paused | TimelineState::Suspended => return ControlFlow::Continue(new_state), + TimelineState::Broken | TimelineState::Stopping | TimelineState::Suspended => return ControlFlow::Continue(new_state), } } Err(_sender_dropped_error) => return ControlFlow::Break(()), diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 1787591bb1..7fc2a7c24b 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -1765,8 +1765,8 @@ class NeonPageserver(PgProtocol): # FIXME: we shouldn't be considering it an error: https://github.com/neondatabase/neon/issues/2946 ".*could not flush frozen layer.*queue is in state Stopped", # when schedule layer upload fails because queued got closed before compaction got killed ".*wait for layer upload ops to complete.*", # .*Caused by:.*wait_completion aborted because upload queue was stopped - ".*gc_loop.*Gc failed, retrying in.*timeline is paused: Paused", # When gc checks timeline state after acquiring layer_removal_cs - ".*compaction_loop.*Compaction failed, retrying in.*timeline is paused: Paused", # When compaction checks timeline state after acquiring layer_removal_cs + ".*gc_loop.*Gc failed, retrying in.*timeline is Stopping", # When gc checks timeline state after acquiring layer_removal_cs + ".*compaction_loop.*Compaction failed, retrying in.*timeline is Stopping", # When compaction checks timeline state after acquiring layer_removal_cs ".*query handler for 'pagestream.*failed: Timeline .* was not found", # postgres reconnects while timeline_delete doesn't hold the tenant's timelines.lock() ]