From 349b37501050052432c284210a4eff687e5b8335 Mon Sep 17 00:00:00 2001 From: John Spray Date: Mon, 19 Feb 2024 14:01:36 +0000 Subject: [PATCH] pageserver: remove heatmap file during tenant delete (#6806) ## Problem Secondary mode locations keep a local copy of the heatmap, which needs cleaning up during deletion. Closes: https://github.com/neondatabase/neon/issues/6802 ## Summary of changes - Extend test_live_migration to reproduce the issue - Remove heatmap-v1.json during tenant deletion --- pageserver/src/tenant/delete.rs | 2 ++ test_runner/regress/test_pageserver_secondary.py | 12 +++++++++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/pageserver/src/tenant/delete.rs b/pageserver/src/tenant/delete.rs index b64be8dcc5..3d138da7af 100644 --- a/pageserver/src/tenant/delete.rs +++ b/pageserver/src/tenant/delete.rs @@ -246,6 +246,8 @@ async fn cleanup_remaining_fs_traces( rm(conf.tenant_deleted_mark_file_path(tenant_shard_id), false).await?; + rm(conf.tenant_heatmap_path(tenant_shard_id), false).await?; + fail::fail_point!("tenant-delete-before-remove-tenant-dir", |_| { Err(anyhow::anyhow!( "failpoint: tenant-delete-before-remove-tenant-dir" diff --git a/test_runner/regress/test_pageserver_secondary.py b/test_runner/regress/test_pageserver_secondary.py index aec989252c..cbff01dc2a 100644 --- a/test_runner/regress/test_pageserver_secondary.py +++ b/test_runner/regress/test_pageserver_secondary.py @@ -7,6 +7,7 @@ from fixtures.log_helper import log from fixtures.neon_fixtures import NeonEnvBuilder, NeonPageserver, S3Scrubber from fixtures.pageserver.utils import ( assert_prefix_empty, + poll_for_remote_storage_iterations, tenant_delete_wait_completed, ) from fixtures.remote_storage import LocalFsStorage, RemoteStorageKind @@ -224,9 +225,8 @@ def test_live_migration(neon_env_builder: NeonEnvBuilder): Test the sequence of location states that are used in a live migration. """ neon_env_builder.num_pageservers = 2 - neon_env_builder.enable_pageserver_remote_storage( - remote_storage_kind=RemoteStorageKind.MOCK_S3, - ) + remote_storage_kind = RemoteStorageKind.MOCK_S3 + neon_env_builder.enable_pageserver_remote_storage(remote_storage_kind=remote_storage_kind) env = neon_env_builder.init_start(initial_tenant_conf=TENANT_CONF) tenant_id = env.initial_tenant @@ -342,6 +342,12 @@ def test_live_migration(neon_env_builder: NeonEnvBuilder): workload.churn_rows(64, pageserver_b.id) workload.validate(pageserver_b.id) + del workload + + # Check that deletion works properly on a tenant that was live-migrated + # (reproduce https://github.com/neondatabase/neon/issues/6802) + iterations = poll_for_remote_storage_iterations(remote_storage_kind) + tenant_delete_wait_completed(pageserver_b.http_client(), tenant_id, iterations) def test_heatmap_uploads(neon_env_builder: NeonEnvBuilder):