diff --git a/libs/pageserver_api/src/models.rs b/libs/pageserver_api/src/models.rs index e667645c0a..000945b3cb 100644 --- a/libs/pageserver_api/src/models.rs +++ b/libs/pageserver_api/src/models.rs @@ -9,7 +9,6 @@ use serde::{Deserialize, Serialize}; use serde_with::{serde_as, DisplayFromStr}; use strum_macros; use utils::{ - completion, generation::Generation, history_buffer::HistoryBufferWithDropCounter, id::{NodeId, TenantId, TimelineId}, @@ -78,12 +77,7 @@ pub enum TenantState { /// system is being shut down. /// /// Transitions out of this state are possible through `set_broken()`. - Stopping { - // Because of https://github.com/serde-rs/serde/issues/2105 this has to be a named field, - // otherwise it will not be skipped during deserialization - #[serde(skip)] - progress: completion::Barrier, - }, + Stopping, /// The tenant is recognized by the pageserver, but can no longer be used for /// any operations. /// @@ -993,13 +987,7 @@ mod tests { "Activating", ), (line!(), TenantState::Active, "Active"), - ( - line!(), - TenantState::Stopping { - progress: utils::completion::Barrier::default(), - }, - "Stopping", - ), + (line!(), TenantState::Stopping {}, "Stopping"), ( line!(), TenantState::Broken { diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 62da6b618e..8f3d979e71 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -352,14 +352,14 @@ impl Debug for DeleteTimelineError { } pub enum SetStoppingError { - AlreadyStopping(completion::Barrier), + AlreadyStopping, Broken, } impl Debug for SetStoppingError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - Self::AlreadyStopping(_) => f.debug_tuple("AlreadyStopping").finish(), + Self::AlreadyStopping => f.debug_tuple("AlreadyStopping").finish(), Self::Broken => write!(f, "Broken"), } } @@ -415,6 +415,11 @@ enum CreateTimelineCause { Delete, } +#[derive(Debug)] +enum ShutdownError { + AlreadyStopping, +} + impl Tenant { /// Yet another helper for timeline initialization. /// @@ -1803,16 +1808,7 @@ impl Tenant { /// - detach + ignore (freeze_and_flush == false) /// /// This will attempt to shutdown even if tenant is broken. - /// - /// `shutdown_progress` is a [`completion::Barrier`] for the shutdown initiated by this call. - /// If the tenant is already shutting down, we return a clone of the first shutdown call's - /// `Barrier` as an `Err`. This not-first caller can use the returned barrier to join with - /// the ongoing shutdown. - async fn shutdown( - &self, - shutdown_progress: completion::Barrier, - freeze_and_flush: bool, - ) -> Result<(), completion::Barrier> { + async fn shutdown(&self, freeze_and_flush: bool) -> Result<(), ShutdownError> { span::debug_assert_current_span_has_tenant_id(); // Set tenant (and its timlines) to Stoppping state. // @@ -1832,15 +1828,17 @@ impl Tenant { // It's mesed up. // we just ignore the failure to stop - match self.set_stopping(shutdown_progress, false, false).await { + match self.set_stopping(false, false).await { Ok(()) => {} Err(SetStoppingError::Broken) => { // assume that this is acceptable } - Err(SetStoppingError::AlreadyStopping(other)) => { - // give caller the option to wait for this this shutdown - info!("Tenant::shutdown: AlreadyStopping"); - return Err(other); + Err(SetStoppingError::AlreadyStopping) => { + // This should not happen: individual tenant shutdowns are guarded by + // `[TenantSlot::InProgress]`, and when we shutdown all tenants during + // process shutdown, we just wait for those InProgress ones to finish. + error!("Called Tenant::shutdown while already stopping"); + return Err(ShutdownError::AlreadyStopping); } }; @@ -1881,7 +1879,6 @@ impl Tenant { /// `allow_transition_from_attaching` is needed for the special case of attaching deleted tenant. async fn set_stopping( &self, - progress: completion::Barrier, allow_transition_from_loading: bool, allow_transition_from_attaching: bool, ) -> Result<(), SetStoppingError> { @@ -1898,7 +1895,7 @@ impl Tenant { false } TenantState::Loading => allow_transition_from_loading, - TenantState::Active | TenantState::Broken { .. } | TenantState::Stopping { .. } => true, + TenantState::Active | TenantState::Broken { .. } | TenantState::Stopping => true, }) .await .expect("cannot drop self.state while on a &self method"); @@ -1913,21 +1910,21 @@ impl Tenant { if !allow_transition_from_attaching { unreachable!("2we ensured above that we're done with activation, and, there is no re-activation") }; - *current_state = TenantState::Stopping { progress }; + *current_state = TenantState::Stopping; true } TenantState::Loading => { if !allow_transition_from_loading { unreachable!("3we ensured above that we're done with activation, and, there is no re-activation") }; - *current_state = TenantState::Stopping { progress }; + *current_state = TenantState::Stopping; true } TenantState::Active => { // FIXME: due to time-of-check vs time-of-use issues, it can happen that new timelines // are created after the transition to Stopping. That's harmless, as the Timelines // won't be accessible to anyone afterwards, because the Tenant is in Stopping state. - *current_state = TenantState::Stopping { progress }; + *current_state = TenantState::Stopping; // Continue stopping outside the closure. We need to grab timelines.lock() // and we plan to turn it into a tokio::sync::Mutex in a future patch. true @@ -1939,9 +1936,9 @@ impl Tenant { err = Some(SetStoppingError::Broken); false } - TenantState::Stopping { progress } => { + TenantState::Stopping => { info!("Tenant is already in Stopping state"); - err = Some(SetStoppingError::AlreadyStopping(progress.clone())); + err = Some(SetStoppingError::AlreadyStopping); false } }); @@ -4128,7 +4125,7 @@ mod tests { make_some_layers(tline.as_ref(), Lsn(0x8000), &ctx).await?; // so that all uploads finish & we can call harness.load() below again tenant - .shutdown(Default::default(), true) + .shutdown(true) .instrument(info_span!("test_shutdown", tenant_id=%tenant.tenant_id)) .await .ok() @@ -4169,7 +4166,7 @@ mod tests { // so that all uploads finish & we can call harness.load() below again tenant - .shutdown(Default::default(), true) + .shutdown(true) .instrument(info_span!("test_shutdown", tenant_id=%tenant.tenant_id)) .await .ok() @@ -4231,7 +4228,7 @@ mod tests { drop(tline); // so that all uploads finish & we can call harness.try_load() below again tenant - .shutdown(Default::default(), true) + .shutdown(true) .instrument(info_span!("test_shutdown", tenant_id=%tenant.tenant_id)) .await .ok() diff --git a/pageserver/src/tenant/delete.rs b/pageserver/src/tenant/delete.rs index 7344dd1d92..fa63c83c17 100644 --- a/pageserver/src/tenant/delete.rs +++ b/pageserver/src/tenant/delete.rs @@ -9,7 +9,7 @@ use tokio_util::sync::CancellationToken; use tracing::{error, instrument, warn, Instrument, Span}; use utils::{ - backoff, completion, crashsafe, fs_ext, + backoff, crashsafe, fs_ext, id::{TenantId, TimelineId}, }; @@ -391,10 +391,8 @@ impl DeleteTenantFlow { init_order: Option, ctx: &RequestContext, ) -> Result<(), DeleteTenantError> { - let (_, progress) = completion::channel(); - tenant - .set_stopping(progress, false, true) + .set_stopping(false, true) .await .expect("cant be stopping or broken"); @@ -434,16 +432,13 @@ impl DeleteTenantFlow { Err(anyhow::anyhow!("failpoint: tenant-delete-before-shutdown"))? }); - // make pageserver shutdown not to wait for our completion - let (_, progress) = completion::channel(); - // It would be good to only set stopping here and continue shutdown in the background, but shutdown is not idempotent. // i e it is an error to do: // tenant.set_stopping // tenant.shutdown // Its also bad that we're holding tenants.read here. // TODO relax set_stopping to be idempotent? - if tenant.shutdown(progress, false).await.is_err() { + if tenant.shutdown(false).await.is_err() { return Err(DeleteTenantError::Other(anyhow::anyhow!( "tenant shutdown is already in progress" ))); diff --git a/pageserver/src/tenant/mgr.rs b/pageserver/src/tenant/mgr.rs index d6e8513444..7dfac5eead 100644 --- a/pageserver/src/tenant/mgr.rs +++ b/pageserver/src/tenant/mgr.rs @@ -27,7 +27,9 @@ use crate::deletion_queue::DeletionQueueClient; use crate::task_mgr::{self, TaskKind}; use crate::tenant::config::{AttachmentMode, LocationConf, LocationMode, TenantConfOpt}; use crate::tenant::delete::DeleteTenantFlow; -use crate::tenant::{create_tenant_files, AttachedTenantConf, SpawnMode, Tenant, TenantState}; +use crate::tenant::{ + create_tenant_files, AttachedTenantConf, ShutdownError, SpawnMode, Tenant, TenantState, +}; use crate::{InitializationOrder, IGNORED_TENANT_FILE_NAME, TEMP_FILE_SUFFIX}; use utils::crashsafe::path_with_suffix_extension; @@ -532,8 +534,6 @@ pub(crate) async fn shutdown_all_tenants() { } async fn shutdown_all_tenants0(tenants: &std::sync::RwLock) { - use utils::completion; - // Under write lock (prevent any new tenants being created), extract the list // of tenants to shut down. let (in_progress_ops, tenants_to_shut_down) = { @@ -597,14 +597,15 @@ async fn shutdown_all_tenants0(tenants: &std::sync::RwLock) { async move { let freeze_and_flush = true; - let res = { - let (_guard, shutdown_progress) = completion::channel(); - tenant.shutdown(shutdown_progress, freeze_and_flush).await - }; - - if let Err(other_progress) = res { - // join the another shutdown in progress - other_progress.wait().await; + let res = { tenant.shutdown(freeze_and_flush).await }; + if let Err(e) = res { + match e { + ShutdownError::AlreadyStopping => { + // TODO: to ensure this can _never_ happen, we need to get rid of + // the horrible DeleteTenantFlow::should_resume_deletion + tracing::warn!(%tenant_id, "Tenant already stopping during shutdown"); + } + } } // we cannot afford per tenant logging here, because if s3 is degraded, we are @@ -785,8 +786,6 @@ pub(crate) async fn upsert_location( // for Attached->Attached transitions in the same generation. By this point, // if we see an attached tenant we know it will be discarded and should be // shut down. - let (_guard, progress) = utils::completion::channel(); - match tenant.get_attach_mode() { AttachmentMode::Single | AttachmentMode::Multi => { // Before we leave our state as the presumed holder of the latest generation, @@ -799,11 +798,11 @@ pub(crate) async fn upsert_location( }; info!("Shutting down attached tenant"); - match tenant.shutdown(progress, false).await { + match tenant.shutdown(false).await { Ok(()) => {} - Err(barrier) => { - info!("Shutdown already in progress, waiting for it to complete"); - barrier.wait().await; + Err(ShutdownError::AlreadyStopping) => { + // This shouldn't have happened, we are guarded by TenantSlot::InProgress + warn!("Shutdown unexpectedly already in progress"); } } } @@ -1472,8 +1471,6 @@ async fn remove_tenant_from_memory( where F: std::future::Future>, { - use utils::completion; - let mut tenant_guard = tenant_map_acquire_slot_impl(&tenant_id, tenants, Some(true))?; let tenant_slot = tenant_guard.take_value(); @@ -1484,9 +1481,6 @@ where _ => None, }; - // allow pageserver shutdown to await for our completion - let (_guard, progress) = completion::channel(); - // If the tenant was attached, shut it down gracefully. For secondary // locations this part is not necessary match &attached_tenant { @@ -1496,11 +1490,12 @@ where // shutdown is sure to transition tenant to stopping, and wait for all tasks to complete, so // that we can continue safely to cleanup. - match attached_tenant.shutdown(progress, freeze_and_flush).await { + match attached_tenant.shutdown(freeze_and_flush).await { Ok(()) => {} - Err(_other) => { - // if pageserver shutdown or other detach/ignore is already ongoing, we don't want to - // wait for it but return an error right away because these are distinct requests. + Err(ShutdownError::AlreadyStopping) => { + // This is unexpected: fail whatever operation has encountered this + // buggy state. + warn!(%tenant_id, "Tenant already stopping while trying to shut down"); return Err(TenantStateError::IsStopping(tenant_id)); } }