From d8a6942e0e15a8019a957aba2407f777ad9f86a6 Mon Sep 17 00:00:00 2001 From: John Spray Date: Mon, 12 Feb 2024 13:47:48 +0000 Subject: [PATCH] Refactor detach methods into TenantManager --- pageserver/src/http/routes.rs | 33 ++++--- pageserver/src/tenant/mgr.rs | 167 +++++++++++++++++----------------- 2 files changed, 104 insertions(+), 96 deletions(-) diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index af9a3c7301..2bed1cb90a 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -853,14 +853,16 @@ async fn tenant_detach_handler( let state = get_state(&request); let conf = state.conf; - mgr::detach_tenant( - conf, - tenant_shard_id, - detach_ignored.unwrap_or(false), - &state.deletion_queue_client, - ) - .instrument(info_span!("tenant_detach", %tenant_id, shard_id=%tenant_shard_id.shard_slug())) - .await?; + state + .tenant_manager + .detach_tenant( + conf, + tenant_shard_id, + detach_ignored.unwrap_or(false), + &state.deletion_queue_client, + ) + .instrument(info_span!("tenant_detach", %tenant_id, shard_id=%tenant_shard_id.shard_slug())) + .await?; json_response(StatusCode::OK, ()) } @@ -1381,13 +1383,14 @@ async fn put_tenant_location_config_handler( // The `Detached` state is special, it doesn't upsert a tenant, it removes // its local disk content and drops it from memory. if let LocationConfigMode::Detached = request_data.config.mode { - if let Err(e) = - mgr::detach_tenant(conf, tenant_shard_id, true, &state.deletion_queue_client) - .instrument(info_span!("tenant_detach", - tenant_id = %tenant_shard_id.tenant_id, - shard_id = %tenant_shard_id.shard_slug() - )) - .await + if let Err(e) = state + .tenant_manager + .detach_tenant(conf, tenant_shard_id, true, &state.deletion_queue_client) + .instrument(info_span!("tenant_detach", + tenant_id = %tenant_shard_id.tenant_id, + shard_id = %tenant_shard_id.shard_slug() + )) + .await { match e { TenantStateError::SlotError(TenantSlotError::NotFound(_)) => { diff --git a/pageserver/src/tenant/mgr.rs b/pageserver/src/tenant/mgr.rs index dd8ba3efba..e7d48cb178 100644 --- a/pageserver/src/tenant/mgr.rs +++ b/pageserver/src/tenant/mgr.rs @@ -1661,6 +1661,92 @@ impl TenantManager { Ok(()) } + + pub(crate) async fn detach_tenant( + &self, + conf: &'static PageServerConf, + tenant_shard_id: TenantShardId, + detach_ignored: bool, + deletion_queue_client: &DeletionQueueClient, + ) -> Result<(), TenantStateError> { + let tmp_path = self + .detach_tenant0( + conf, + &TENANTS, + tenant_shard_id, + detach_ignored, + deletion_queue_client, + ) + .await?; + // Although we are cleaning up the tenant, this task is not meant to be bound by the lifetime of the tenant in memory. + // After a tenant is detached, there are no more task_mgr tasks for that tenant_id. + let task_tenant_id = None; + task_mgr::spawn( + task_mgr::BACKGROUND_RUNTIME.handle(), + TaskKind::MgmtRequest, + task_tenant_id, + None, + "tenant_files_delete", + false, + async move { + fs::remove_dir_all(tmp_path.as_path()) + .await + .with_context(|| format!("tenant directory {:?} deletion", tmp_path)) + }, + ); + Ok(()) + } + + async fn detach_tenant0( + &self, + conf: &'static PageServerConf, + tenants: &std::sync::RwLock, + tenant_shard_id: TenantShardId, + detach_ignored: bool, + deletion_queue_client: &DeletionQueueClient, + ) -> Result { + let tenant_dir_rename_operation = |tenant_id_to_clean: TenantShardId| async move { + let local_tenant_directory = conf.tenant_path(&tenant_id_to_clean); + safe_rename_tenant_dir(&local_tenant_directory) + .await + .with_context(|| { + format!("local tenant directory {local_tenant_directory:?} rename") + }) + }; + + let removal_result = remove_tenant_from_memory( + tenants, + tenant_shard_id, + tenant_dir_rename_operation(tenant_shard_id), + ) + .await; + + // Flush pending deletions, so that they have a good chance of passing validation + // before this tenant is potentially re-attached elsewhere. + deletion_queue_client.flush_advisory(); + + // Ignored tenants are not present in memory and will bail the removal from memory operation. + // Before returning the error, check for ignored tenant removal case — we only need to clean its local files then. + if detach_ignored + && matches!( + removal_result, + Err(TenantStateError::SlotError(TenantSlotError::NotFound(_))) + ) + { + let tenant_ignore_mark = conf.tenant_ignore_mark_file_path(&tenant_shard_id); + if tenant_ignore_mark.exists() { + info!("Detaching an ignored tenant"); + let tmp_path = tenant_dir_rename_operation(tenant_shard_id) + .await + .with_context(|| { + format!("Ignored tenant {tenant_shard_id} local directory rename") + })?; + return Ok(tmp_path); + } + } + + removal_result + } } #[derive(Debug, thiserror::Error)] @@ -1862,87 +1948,6 @@ pub(crate) enum TenantStateError { Other(#[from] anyhow::Error), } -pub(crate) async fn detach_tenant( - conf: &'static PageServerConf, - tenant_shard_id: TenantShardId, - detach_ignored: bool, - deletion_queue_client: &DeletionQueueClient, -) -> Result<(), TenantStateError> { - let tmp_path = detach_tenant0( - conf, - &TENANTS, - tenant_shard_id, - detach_ignored, - deletion_queue_client, - ) - .await?; - // Although we are cleaning up the tenant, this task is not meant to be bound by the lifetime of the tenant in memory. - // After a tenant is detached, there are no more task_mgr tasks for that tenant_id. - let task_tenant_id = None; - task_mgr::spawn( - task_mgr::BACKGROUND_RUNTIME.handle(), - TaskKind::MgmtRequest, - task_tenant_id, - None, - "tenant_files_delete", - false, - async move { - fs::remove_dir_all(tmp_path.as_path()) - .await - .with_context(|| format!("tenant directory {:?} deletion", tmp_path)) - }, - ); - Ok(()) -} - -async fn detach_tenant0( - conf: &'static PageServerConf, - tenants: &std::sync::RwLock, - tenant_shard_id: TenantShardId, - detach_ignored: bool, - deletion_queue_client: &DeletionQueueClient, -) -> Result { - let tenant_dir_rename_operation = |tenant_id_to_clean: TenantShardId| async move { - let local_tenant_directory = conf.tenant_path(&tenant_id_to_clean); - safe_rename_tenant_dir(&local_tenant_directory) - .await - .with_context(|| format!("local tenant directory {local_tenant_directory:?} rename")) - }; - - let removal_result = remove_tenant_from_memory( - tenants, - tenant_shard_id, - tenant_dir_rename_operation(tenant_shard_id), - ) - .await; - - // Flush pending deletions, so that they have a good chance of passing validation - // before this tenant is potentially re-attached elsewhere. - deletion_queue_client.flush_advisory(); - - // Ignored tenants are not present in memory and will bail the removal from memory operation. - // Before returning the error, check for ignored tenant removal case — we only need to clean its local files then. - if detach_ignored - && matches!( - removal_result, - Err(TenantStateError::SlotError(TenantSlotError::NotFound(_))) - ) - { - let tenant_ignore_mark = conf.tenant_ignore_mark_file_path(&tenant_shard_id); - if tenant_ignore_mark.exists() { - info!("Detaching an ignored tenant"); - let tmp_path = tenant_dir_rename_operation(tenant_shard_id) - .await - .with_context(|| { - format!("Ignored tenant {tenant_shard_id} local directory rename") - })?; - return Ok(tmp_path); - } - } - - removal_result -} - pub(crate) async fn load_tenant( conf: &'static PageServerConf, tenant_id: TenantId,