diff --git a/pageserver/src/tenant/delete.rs b/pageserver/src/tenant/delete.rs index 4f34f3c113..1f03ed495a 100644 --- a/pageserver/src/tenant/delete.rs +++ b/pageserver/src/tenant/delete.rs @@ -238,6 +238,30 @@ async fn cleanup_remaining_fs_traces( Ok(()) } +pub(crate) async fn remote_delete_mark_exists( + conf: &PageServerConf, + tenant_id: &TenantId, + remote_storage: &GenericRemoteStorage, +) -> anyhow::Result { + // If remote storage is there we rely on it + let remote_mark_path = remote_tenant_delete_mark_path(conf, tenant_id).context("path")?; + + let result = backoff::retry( + || async { remote_storage.download(&remote_mark_path).await }, + |e| matches!(e, DownloadError::NotFound), + SHOULD_RESUME_DELETION_FETCH_MARK_ATTEMPTS, + SHOULD_RESUME_DELETION_FETCH_MARK_ATTEMPTS, + "fetch_tenant_deletion_mark", + ) + .await; + + match result { + Ok(_) => Ok(true), + Err(DownloadError::NotFound) => Ok(false), + Err(e) => Err(anyhow::anyhow!(e)).context("remote_delete_mark_exists")?, + } +} + /// Orchestrates tenant shut down of all tasks, removes its in-memory structures, /// and deletes its data from both disk and s3. /// The sequence of steps: @@ -372,22 +396,10 @@ impl DeleteTenantFlow { None => return Ok(None), }; - // If remote storage is there we rely on it - let remote_mark_path = remote_tenant_delete_mark_path(conf, &tenant_id)?; - - let result = backoff::retry( - || async { remote_storage.download(&remote_mark_path).await }, - |e| matches!(e, DownloadError::NotFound), - SHOULD_RESUME_DELETION_FETCH_MARK_ATTEMPTS, - SHOULD_RESUME_DELETION_FETCH_MARK_ATTEMPTS, - "fetch_tenant_deletion_mark", - ) - .await; - - match result { - Ok(_) => Ok(acquire(tenant)), - Err(DownloadError::NotFound) => Ok(None), - Err(e) => Err(anyhow::anyhow!(e)).context("should_resume_deletion")?, + if remote_delete_mark_exists(conf, &tenant_id, remote_storage).await? { + Ok(acquire(tenant)) + } else { + Ok(None) } } diff --git a/pageserver/src/tenant/mgr.rs b/pageserver/src/tenant/mgr.rs index ae6d237066..bb8a0d7089 100644 --- a/pageserver/src/tenant/mgr.rs +++ b/pageserver/src/tenant/mgr.rs @@ -27,7 +27,7 @@ use crate::{InitializationOrder, IGNORED_TENANT_FILE_NAME}; use utils::fs_ext::PathExt; use utils::id::{TenantId, TimelineId}; -use super::delete::DeleteTenantError; +use super::delete::{remote_delete_mark_exists, DeleteTenantError}; use super::timeline::delete::DeleteTimelineFlow; /// The tenants known to the pageserver. @@ -591,6 +591,12 @@ pub async fn attach_tenant( remote_storage: GenericRemoteStorage, ctx: &RequestContext, ) -> Result<(), TenantMapInsertError> { + // Temporary solution, proper one would be to resume deletion, but that needs more plumbing around Tenant::load/Tenant::attach + // Corresponding issue https://github.com/neondatabase/neon/issues/5006 + if remote_delete_mark_exists(conf, &tenant_id, &remote_storage).await? { + return Err(anyhow::anyhow!("Tenant is marked as deleted on remote storage").into()); + } + tenant_map_insert(tenant_id, || { let tenant_dir = create_tenant_files(conf, tenant_conf, &tenant_id, CreateTenantFilesMode::Attach)?; // TODO: tenant directory remains on disk if we bail out from here on. diff --git a/test_runner/fixtures/pageserver/utils.py b/test_runner/fixtures/pageserver/utils.py index a2a49b8a6e..b61878c2a6 100644 --- a/test_runner/fixtures/pageserver/utils.py +++ b/test_runner/fixtures/pageserver/utils.py @@ -1,6 +1,8 @@ import time from typing import TYPE_CHECKING, Any, Dict, Optional +from mypy_boto3_s3.type_defs import ListObjectsV2OutputTypeDef + from fixtures.log_helper import log from fixtures.pageserver.http import PageserverApiException, PageserverHttpClient from fixtures.remote_storage import RemoteStorageKind, S3Storage @@ -230,6 +232,24 @@ if TYPE_CHECKING: def assert_prefix_empty(neon_env_builder: "NeonEnvBuilder", prefix: Optional[str] = None): + response = list_prefix(neon_env_builder, prefix) + objects = response.get("Contents") + assert ( + response["KeyCount"] == 0 + ), f"remote dir with prefix {prefix} is not empty after deletion: {objects}" + + +def assert_prefix_not_empty(neon_env_builder: "NeonEnvBuilder", prefix: Optional[str] = None): + response = list_prefix(neon_env_builder, prefix) + assert response["KeyCount"] != 0, f"remote dir with prefix {prefix} is empty: {response}" + + +def list_prefix( + neon_env_builder: "NeonEnvBuilder", prefix: Optional[str] = None +) -> ListObjectsV2OutputTypeDef: + """ + Note that this function takes into account prefix_in_bucket. + """ # For local_fs we need to properly handle empty directories, which we currently dont, so for simplicity stick to s3 api. assert neon_env_builder.remote_storage_kind in ( RemoteStorageKind.MOCK_S3, @@ -239,15 +259,21 @@ def assert_prefix_empty(neon_env_builder: "NeonEnvBuilder", prefix: Optional[str assert isinstance(neon_env_builder.remote_storage, S3Storage) assert neon_env_builder.remote_storage_client is not None + prefix_in_bucket = neon_env_builder.remote_storage.prefix_in_bucket or "" + if not prefix: + prefix = prefix_in_bucket + else: + # real s3 tests have uniqie per test prefix + # mock_s3 tests use special pageserver prefix for pageserver stuff + prefix = "/".join((prefix_in_bucket, prefix)) + # Note that this doesnt use pagination, so list is not guaranteed to be exhaustive. response = neon_env_builder.remote_storage_client.list_objects_v2( + Delimiter="/", Bucket=neon_env_builder.remote_storage.bucket_name, - Prefix=prefix or neon_env_builder.remote_storage.prefix_in_bucket or "", + Prefix=prefix, ) - objects = response.get("Contents") - assert ( - response["KeyCount"] == 0 - ), f"remote dir with prefix {prefix} is not empty after deletion: {objects}" + return response def wait_tenant_status_404( diff --git a/test_runner/fixtures/remote_storage.py b/test_runner/fixtures/remote_storage.py index ada2d42347..320e658639 100644 --- a/test_runner/fixtures/remote_storage.py +++ b/test_runner/fixtures/remote_storage.py @@ -92,8 +92,11 @@ def available_s3_storages() -> List[RemoteStorageKind]: class LocalFsStorage: root: Path + def tenant_path(self, tenant_id: TenantId) -> Path: + return self.root / "tenants" / str(tenant_id) + def timeline_path(self, tenant_id: TenantId, timeline_id: TimelineId) -> Path: - return self.root / "tenants" / str(tenant_id) / "timelines" / str(timeline_id) + return self.tenant_path(tenant_id) / "timelines" / str(timeline_id) def index_path(self, tenant_id: TenantId, timeline_id: TimelineId) -> Path: return self.timeline_path(tenant_id, timeline_id) / TIMELINE_INDEX_PART_FILE_NAME diff --git a/test_runner/regress/test_tenant_delete.py b/test_runner/regress/test_tenant_delete.py index f0f6cc743c..24d64f373b 100644 --- a/test_runner/regress/test_tenant_delete.py +++ b/test_runner/regress/test_tenant_delete.py @@ -1,5 +1,7 @@ import enum import os +import shutil +from pathlib import Path import pytest from fixtures.log_helper import log @@ -13,13 +15,18 @@ from fixtures.pageserver.http import PageserverApiException from fixtures.pageserver.utils import ( MANY_SMALL_LAYERS_TENANT_CONFIG, assert_prefix_empty, + assert_prefix_not_empty, poll_for_remote_storage_iterations, tenant_delete_wait_completed, wait_tenant_status_404, wait_until_tenant_active, wait_until_tenant_state, ) -from fixtures.remote_storage import RemoteStorageKind, available_remote_storages +from fixtures.remote_storage import ( + RemoteStorageKind, + available_remote_storages, + available_s3_storages, +) from fixtures.types import TenantId from fixtures.utils import run_pg_bench_small @@ -64,6 +71,17 @@ def test_tenant_delete_smoke( run_pg_bench_small(pg_bin, endpoint.connstr()) wait_for_last_flush_lsn(env, endpoint, tenant=tenant_id, timeline=timeline_id) + if remote_storage_kind in available_s3_storages(): + assert_prefix_not_empty( + neon_env_builder, + prefix="/".join( + ( + "tenants", + str(tenant_id), + ) + ), + ) + parent = timeline iterations = poll_for_remote_storage_iterations(remote_storage_kind) @@ -73,7 +91,7 @@ def test_tenant_delete_smoke( tenant_path = env.tenant_dir(tenant_id=tenant_id) assert not tenant_path.exists() - if remote_storage_kind in [RemoteStorageKind.MOCK_S3, RemoteStorageKind.REAL_S3]: + if remote_storage_kind in available_s3_storages(): assert_prefix_empty( neon_env_builder, prefix="/".join( @@ -189,6 +207,17 @@ def test_delete_tenant_exercise_crash_safety_failpoints( else: last_flush_lsn_upload(env, endpoint, tenant_id, timeline_id) + if remote_storage_kind in available_s3_storages(): + assert_prefix_not_empty( + neon_env_builder, + prefix="/".join( + ( + "tenants", + str(tenant_id), + ) + ), + ) + ps_http.configure_failpoints((failpoint, "return")) iterations = poll_for_remote_storage_iterations(remote_storage_kind) @@ -241,8 +270,12 @@ def test_delete_tenant_exercise_crash_safety_failpoints( tenant_delete_wait_completed(ps_http, tenant_id, iterations=iterations) - # Check remote is impty - if remote_storage_kind is RemoteStorageKind.MOCK_S3: + tenant_dir = env.tenant_dir(tenant_id) + # Check local is empty + assert not tenant_dir.exists() + + # Check remote is empty + if remote_storage_kind in available_s3_storages(): assert_prefix_empty( neon_env_builder, prefix="/".join( @@ -253,10 +286,118 @@ def test_delete_tenant_exercise_crash_safety_failpoints( ), ) - tenant_dir = env.tenant_dir(tenant_id) - # Check local is empty - assert not tenant_dir.exists() + +# TODO resume deletion (https://github.com/neondatabase/neon/issues/5006) +@pytest.mark.parametrize("remote_storage_kind", available_remote_storages()) +def test_deleted_tenant_ignored_on_attach( + neon_env_builder: NeonEnvBuilder, + remote_storage_kind: RemoteStorageKind, + pg_bin: PgBin, +): + neon_env_builder.enable_remote_storage( + remote_storage_kind=remote_storage_kind, + test_name="test_deleted_tenant_ignored_on_attach", + ) + + env = neon_env_builder.init_start(initial_tenant_conf=MANY_SMALL_LAYERS_TENANT_CONFIG) + + tenant_id = env.initial_tenant + + ps_http = env.pageserver.http_client() + # create two timelines + for timeline in ["first", "second"]: + timeline_id = env.neon_cli.create_timeline(timeline, tenant_id=tenant_id) + with env.endpoints.create_start(timeline, tenant_id=tenant_id) as endpoint: + run_pg_bench_small(pg_bin, endpoint.connstr()) + wait_for_last_flush_lsn(env, endpoint, tenant=tenant_id, timeline=timeline_id) + + # sanity check, data should be there + if remote_storage_kind in available_s3_storages(): + assert_prefix_not_empty( + neon_env_builder, + prefix="/".join( + ( + "tenants", + str(tenant_id), + ) + ), + ) + + # failpoint before we remove index_part from s3 + failpoint = "timeline-delete-before-index-delete" + ps_http.configure_failpoints((failpoint, "return")) + + env.pageserver.allowed_errors.extend( + ( + # allow errors caused by failpoints + f".*failpoint: {failpoint}", + # It appears when we stopped flush loop during deletion (attempt) and then pageserver is stopped + ".*freeze_and_flush_on_shutdown.*failed to freeze and flush: cannot flush frozen layers when flush_loop is not running, state is Exited", + # error from http response is also logged + ".*InternalServerError\\(Tenant is marked as deleted on remote storage.*", + '.*shutdown_pageserver{exit_code=0}: stopping left-over name="remote upload".*', + ) + ) + + iterations = poll_for_remote_storage_iterations(remote_storage_kind) + + ps_http.tenant_delete(tenant_id) + + tenant_info = wait_until_tenant_state( + pageserver_http=ps_http, + tenant_id=tenant_id, + expected_state="Broken", + iterations=iterations, + ) + + if remote_storage_kind in available_s3_storages(): + assert_prefix_not_empty( + neon_env_builder, + prefix="/".join( + ( + "tenants", + str(tenant_id), + ) + ), + ) + + reason = tenant_info["state"]["data"]["reason"] + # failpoint may not be the only error in the stack + assert reason.endswith(f"failpoint: {failpoint}"), reason + + # now we stop pageserver and remove local tenant state + env.endpoints.stop_all() + env.pageserver.stop() + + dir_to_clear = Path(env.repo_dir) / "tenants" + shutil.rmtree(dir_to_clear) + os.mkdir(dir_to_clear) + + env.pageserver.start() + + # now we call attach + with pytest.raises( + PageserverApiException, match="Tenant is marked as deleted on remote storage" + ): + ps_http.tenant_attach(tenant_id=tenant_id) + + # delete should be resumed (not yet) + # wait_tenant_status_404(ps_http, tenant_id, iterations) + + # we shouldn've created tenant dir on disk + tenant_path = env.tenant_dir(tenant_id=tenant_id) + assert not tenant_path.exists() + + if remote_storage_kind in available_s3_storages(): + assert_prefix_not_empty( + neon_env_builder, + prefix="/".join( + ( + "tenants", + str(tenant_id), + ) + ), + ) # TODO test concurrent deletions with "hang" failpoint -# TODO test tenant delete continues after attach diff --git a/test_runner/regress/test_timeline_delete.py b/test_runner/regress/test_timeline_delete.py index a48c2186de..7d2d3304e2 100644 --- a/test_runner/regress/test_timeline_delete.py +++ b/test_runner/regress/test_timeline_delete.py @@ -18,6 +18,7 @@ from fixtures.neon_fixtures import ( from fixtures.pageserver.http import PageserverApiException from fixtures.pageserver.utils import ( assert_prefix_empty, + assert_prefix_not_empty, poll_for_remote_storage_iterations, timeline_delete_wait_completed, wait_for_last_record_lsn, @@ -30,6 +31,7 @@ from fixtures.remote_storage import ( LocalFsStorage, RemoteStorageKind, available_remote_storages, + available_s3_storages, ) from fixtures.types import Lsn, TenantId, TimelineId from fixtures.utils import query_scalar, wait_until @@ -212,6 +214,19 @@ def test_delete_timeline_exercise_crash_safety_failpoints( else: last_flush_lsn_upload(env, endpoint, env.initial_tenant, timeline_id) + if remote_storage_kind in available_s3_storages(): + assert_prefix_not_empty( + neon_env_builder, + prefix="/".join( + ( + "tenants", + str(env.initial_tenant), + "timelines", + str(timeline_id), + ) + ), + ) + env.pageserver.allowed_errors.append(f".*{timeline_id}.*failpoint: {failpoint}") # It appears when we stopped flush loop during deletion and then pageserver is stopped env.pageserver.allowed_errors.append( @@ -298,7 +313,7 @@ def test_delete_timeline_exercise_crash_safety_failpoints( ps_http, env.initial_tenant, timeline_id, iterations=iterations ) - # Check remote is impty + # Check remote is empty if remote_storage_kind is RemoteStorageKind.MOCK_S3: assert_prefix_empty( neon_env_builder, @@ -739,6 +754,19 @@ def test_timeline_delete_works_for_remote_smoke( timeline_ids.append(timeline_id) + for timeline_id in timeline_ids: + assert_prefix_not_empty( + neon_env_builder, + prefix="/".join( + ( + "tenants", + str(env.initial_tenant), + "timelines", + str(timeline_id), + ) + ), + ) + for timeline_id in reversed(timeline_ids): # note that we need to finish previous deletion before scheduling next one # otherwise we can get an "HasChildren" error if deletion is not fast enough (real_s3) @@ -758,11 +786,7 @@ def test_timeline_delete_works_for_remote_smoke( # for some reason the check above doesnt immediately take effect for the below. # Assume it is mock server inconsistency and check twice. - wait_until( - 2, - 0.5, - lambda: assert_prefix_empty(neon_env_builder), - ) + wait_until(2, 0.5, lambda: assert_prefix_empty(neon_env_builder)) def test_delete_orphaned_objects(