diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index 5ce09500ee..8766de1629 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -280,7 +280,6 @@ impl From for ApiError { use crate::tenant::delete::DeleteTenantError::*; match value { Get(g) => ApiError::from(g), - e @ AlreadyInProgress => ApiError::Conflict(e.to_string()), Timeline(t) => ApiError::from(t), NotAttached => ApiError::NotFound(anyhow::anyhow!("Tenant is not attached").into()), SlotError(e) => e.into(), diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 82afeca338..b37bb702b5 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -257,8 +257,6 @@ pub struct Tenant { eviction_task_tenant_state: tokio::sync::Mutex, - pub(crate) delete_progress: Arc>, - // Cancellation token fires when we have entered shutdown(). This is a parent of // Timelines' cancellation token. pub(crate) cancel: CancellationToken, @@ -634,9 +632,9 @@ impl Tenant { } }; - info!("pending_deletion {}", pending_deletion.is_some()); + info!("pending_deletion {}", pending_deletion); - if let Some(deletion) = pending_deletion { + if pending_deletion { // as we are no longer loading, signal completion by dropping // the completion while we resume deletion drop(_completion); @@ -653,7 +651,6 @@ impl Tenant { } match DeleteTenantFlow::resume_from_attach( - deletion, &tenant_clone, preload, tenants, @@ -2372,7 +2369,6 @@ impl Tenant { cached_logical_sizes: tokio::sync::Mutex::new(HashMap::new()), cached_synthetic_tenant_size: Arc::new(AtomicU64::new(0)), eviction_task_tenant_state: tokio::sync::Mutex::new(EvictionTaskTenantState::default()), - delete_progress: Arc::new(tokio::sync::Mutex::new(DeleteTenantFlow::default())), cancel: CancellationToken::default(), gate: Gate::new(format!("Tenant<{tenant_id}>")), } diff --git a/pageserver/src/tenant/delete.rs b/pageserver/src/tenant/delete.rs index 066f239ff0..91d4a62329 100644 --- a/pageserver/src/tenant/delete.rs +++ b/pageserver/src/tenant/delete.rs @@ -4,7 +4,6 @@ use anyhow::Context; use camino::{Utf8Path, Utf8PathBuf}; use pageserver_api::models::TenantState; use remote_storage::{GenericRemoteStorage, RemotePath}; -use tokio::sync::OwnedMutexGuard; use tokio_util::sync::CancellationToken; use tracing::{error, instrument, warn, Instrument, Span}; @@ -39,9 +38,6 @@ pub(crate) enum DeleteTenantError { #[error("Invalid state {0}. Expected Active or Broken")] InvalidState(TenantState), - #[error("Tenant deletion is already in progress")] - AlreadyInProgress, - #[error("Tenant map slot error {0}")] SlotError(#[from] TenantSlotError), @@ -55,8 +51,6 @@ pub(crate) enum DeleteTenantError { Other(#[from] anyhow::Error), } -type DeletionGuard = tokio::sync::OwnedMutexGuard; - fn remote_tenant_delete_mark_path( conf: &PageServerConf, tenant_id: &TenantId, @@ -287,14 +281,14 @@ impl DeleteTenantFlow { ) -> Result<(), DeleteTenantError> { span::debug_assert_current_span_has_tenant_id(); - let mut guard = Self::prepare(&tenant).await?; + Self::prepare(&tenant).await?; - if let Err(e) = Self::run_inner(&mut guard, conf, remote_storage.as_ref(), &tenant).await { + if let Err(e) = Self::run_inner(conf, remote_storage.as_ref(), &tenant).await { tenant.set_broken(format!("{e:#}")).await; return Err(e); } - Self::schedule_background(guard, conf, remote_storage, tenants, tenant); + Self::schedule_background(conf, remote_storage, tenants, tenant); Ok(()) } @@ -304,13 +298,10 @@ impl DeleteTenantFlow { // will result in an error, but here we need to be able to retry shutdown when tenant deletion is retried. // So the solution is to set tenant state to broken. async fn run_inner( - guard: &mut OwnedMutexGuard, conf: &'static PageServerConf, remote_storage: Option<&GenericRemoteStorage>, tenant: &Tenant, ) -> Result<(), DeleteTenantError> { - guard.mark_in_progress()?; - fail::fail_point!("tenant-delete-before-create-remote-mark", |_| { Err(anyhow::anyhow!( "failpoint: tenant-delete-before-create-remote-mark" @@ -345,46 +336,25 @@ impl DeleteTenantFlow { Ok(()) } - fn mark_in_progress(&mut self) -> anyhow::Result<()> { - match self { - Self::Finished => anyhow::bail!("Bug. Is in finished state"), - Self::InProgress { .. } => { /* We're in a retry */ } - Self::NotStarted => { /* Fresh start */ } - } - - *self = Self::InProgress; - - Ok(()) - } - pub(crate) async fn should_resume_deletion( conf: &'static PageServerConf, remote_mark_exists: bool, tenant: &Tenant, - ) -> Result, DeleteTenantError> { - let acquire = |t: &Tenant| { - Some( - Arc::clone(&t.delete_progress) - .try_lock_owned() - .expect("we're the only owner during init"), - ) - }; - + ) -> Result { if remote_mark_exists { - return Ok(acquire(tenant)); + return Ok(true); } let tenant_id = tenant.tenant_id; // Check local mark first, if its there there is no need to go to s3 to check whether remote one exists. if conf.tenant_deleted_mark_file_path(&tenant_id).exists() { - Ok(acquire(tenant)) + Ok(true) } else { - Ok(None) + Ok(false) } } pub(crate) async fn resume_from_attach( - guard: DeletionGuard, tenant: &Arc, preload: Option, tenants: &'static std::sync::RwLock, @@ -403,19 +373,10 @@ impl DeleteTenantFlow { .await .context("attach")?; - Self::background( - guard, - tenant.conf, - tenant.remote_storage.clone(), - tenants, - tenant, - ) - .await + Self::background(tenant.conf, tenant.remote_storage.clone(), tenants, tenant).await } - async fn prepare( - tenant: &Arc, - ) -> Result, DeleteTenantError> { + async fn prepare(tenant: &Arc) -> Result<(), DeleteTenantError> { // FIXME: unsure about active only. Our init jobs may not be cancellable properly, // so at least for now allow deletions only for active tenants. TODO recheck // Broken and Stopping is needed for retries. @@ -426,10 +387,6 @@ impl DeleteTenantFlow { return Err(DeleteTenantError::InvalidState(tenant.current_state())); } - let guard = Arc::clone(&tenant.delete_progress) - .try_lock_owned() - .map_err(|_| DeleteTenantError::AlreadyInProgress)?; - fail::fail_point!("tenant-delete-before-shutdown", |_| { Err(anyhow::anyhow!("failpoint: tenant-delete-before-shutdown"))? }); @@ -449,11 +406,10 @@ impl DeleteTenantFlow { ))); } - Ok(guard) + Ok(()) } fn schedule_background( - guard: OwnedMutexGuard, conf: &'static PageServerConf, remote_storage: Option, tenants: &'static std::sync::RwLock, @@ -469,9 +425,7 @@ impl DeleteTenantFlow { "tenant_delete", false, async move { - if let Err(err) = - Self::background(guard, conf, remote_storage, tenants, &tenant).await - { + if let Err(err) = Self::background(conf, remote_storage, tenants, &tenant).await { error!("Error: {err:#}"); tenant.set_broken(format!("{err:#}")).await; }; @@ -486,7 +440,6 @@ impl DeleteTenantFlow { } async fn background( - mut guard: OwnedMutexGuard, conf: &PageServerConf, remote_storage: Option, tenants: &'static std::sync::RwLock, @@ -550,8 +503,6 @@ impl DeleteTenantFlow { .set(locked.len() as u64); } - *guard = Self::Finished; - Ok(()) } }