From fd22fc5b7d29214e8544c65062494c2ad03d744d Mon Sep 17 00:00:00 2001 From: John Spray Date: Tue, 4 Jun 2024 16:16:50 +0100 Subject: [PATCH] pageserver: include heatmap in tenant deletion (#7928) ## Problem This was an oversight when adding heatmaps: because they are at the top level of the tenant, they aren't included in the catch-all list & delete that happens for timeline paths. This doesn't break anything, but it leaves behind a few kilobytes of garbage in the S3 bucket after a tenant is deleted, generating work for the scrubber. ## Summary of changes - During deletion, explicitly remove the heatmap file - In test_tenant_delete_smoke, upload a heatmap so that the test would fail its "remote storage empty after delete" check if we didn't delete it. --- pageserver/src/tenant/delete.rs | 20 ++++++++++++++++++++ test_runner/regress/test_tenant_delete.py | 3 +++ 2 files changed, 23 insertions(+) diff --git a/pageserver/src/tenant/delete.rs b/pageserver/src/tenant/delete.rs index 7c6640eaac..8b36aa15e5 100644 --- a/pageserver/src/tenant/delete.rs +++ b/pageserver/src/tenant/delete.rs @@ -16,6 +16,7 @@ use crate::{ task_mgr::{self, TaskKind}, tenant::{ mgr::{TenantSlot, TenantsMapRemoveResult}, + remote_timeline_client::remote_heatmap_path, timeline::ShutdownMode, }, }; @@ -531,6 +532,25 @@ impl DeleteTenantFlow { } } + // Remove top-level tenant objects that don't belong to a timeline, such as heatmap + let heatmap_path = remote_heatmap_path(&tenant.tenant_shard_id()); + if let Some(Err(e)) = backoff::retry( + || async { + remote_storage + .delete(&heatmap_path, &task_mgr::shutdown_token()) + .await + }, + TimeoutOrCancel::caused_by_cancel, + FAILED_UPLOAD_WARN_THRESHOLD, + FAILED_REMOTE_OP_RETRIES, + "remove_remote_tenant_heatmap", + &task_mgr::shutdown_token(), + ) + .await + { + tracing::warn!("Failed to delete heatmap at {heatmap_path}: {e}"); + } + let timelines_path = conf.timelines_path(&tenant.tenant_shard_id); // May not exist if we fail in cleanup_remaining_fs_traces after removing it if timelines_path.exists() { diff --git a/test_runner/regress/test_tenant_delete.py b/test_runner/regress/test_tenant_delete.py index 3fc44de6fa..e120aa1a7c 100644 --- a/test_runner/regress/test_tenant_delete.py +++ b/test_runner/regress/test_tenant_delete.py @@ -88,6 +88,9 @@ def test_tenant_delete_smoke( parent = timeline + # Upload a heatmap so that we exercise deletion of that too + ps_http.tenant_heatmap_upload(tenant_id) + iterations = poll_for_remote_storage_iterations(remote_storage_kind) assert ps_http.get_metric_value("pageserver_tenant_manager_slots", {"mode": "attached"}) == 2