From 60ced06586a6811470c16c6386daba79ffaeda13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arpad=20M=C3=BCller?= Date: Sat, 13 Jan 2024 09:15:58 +0100 Subject: [PATCH] Fix timeline creation and tenant deletion race (#6310) Fixes the race condition between timeline creation and tenant deletion outlined in #6255. Related: #5914, which is a similar race condition about the uninit marker file. Fixes #6255 --- pageserver/src/tenant.rs | 40 ++++---- pageserver/src/tenant/delete.rs | 1 + test_runner/regress/test_tenant_delete.py | 111 +++++++++++++++++++++- 3 files changed, 127 insertions(+), 25 deletions(-) diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 2bc6058edb..0e09f2cbf1 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -3197,8 +3197,17 @@ impl Tenant { "{INITDB_PATH}.upload-{timeline_id}.{TEMP_FILE_SUFFIX}" )); + scopeguard::defer! { + if let Err(e) = fs::remove_file(&temp_path) { + error!("Failed to remove temporary initdb archive '{temp_path}': {e}"); + } + } + let (pgdata_zstd, tar_zst_size) = import_datadir::create_tar_zst(pgdata_path, &temp_path).await?; + + pausable_failpoint!("before-initdb-upload"); + backoff::retry( || async { self::remote_timeline_client::upload_initdb_dir( @@ -3219,18 +3228,6 @@ impl Tenant { ) .await?; - tokio::fs::remove_file(&temp_path) - .await - .or_else(|e| { - if e.kind() == std::io::ErrorKind::NotFound { - // If something else already removed the file, ignore the error - Ok(()) - } else { - Err(e) - } - }) - .with_context(|| format!("tempfile removal {temp_path}"))?; - Ok(()) } @@ -3283,23 +3280,18 @@ impl Tenant { ) .await .context("download initdb tar")?; + + scopeguard::defer! { + if let Err(e) = fs::remove_file(&initdb_tar_zst_path) { + error!("Failed to remove temporary initdb archive '{initdb_tar_zst_path}': {e}"); + } + } + let buf_read = BufReader::with_capacity(remote_timeline_client::BUFFER_SIZE, initdb_tar_zst); import_datadir::extract_tar_zst(&pgdata_path, buf_read) .await .context("extract initdb tar")?; - - tokio::fs::remove_file(&initdb_tar_zst_path) - .await - .or_else(|e| { - if e.kind() == std::io::ErrorKind::NotFound { - // If something else already removed the file, ignore the error - Ok(()) - } else { - Err(e) - } - }) - .with_context(|| format!("tempfile removal {initdb_tar_zst_path}"))?; } else { // Init temporarily repo to get bootstrap data, this creates a directory in the `initdb_path` path run_initdb(self.conf, &pgdata_path, pg_version, &self.cancel).await?; diff --git a/pageserver/src/tenant/delete.rs b/pageserver/src/tenant/delete.rs index 2f606ed822..ecffd4e6c1 100644 --- a/pageserver/src/tenant/delete.rs +++ b/pageserver/src/tenant/delete.rs @@ -542,6 +542,7 @@ impl DeleteTenantFlow { ) .await?; + pausable_failpoint!("tenant-delete-before-cleanup-remaining-fs-traces-pausable"); fail::fail_point!("tenant-delete-before-cleanup-remaining-fs-traces", |_| { Err(anyhow::anyhow!( "failpoint: tenant-delete-before-cleanup-remaining-fs-traces" diff --git a/test_runner/regress/test_tenant_delete.py b/test_runner/regress/test_tenant_delete.py index fece876459..7c52fb3071 100644 --- a/test_runner/regress/test_tenant_delete.py +++ b/test_runner/regress/test_tenant_delete.py @@ -24,8 +24,9 @@ from fixtures.pageserver.utils import ( wait_until_tenant_state, ) from fixtures.remote_storage import RemoteStorageKind, available_s3_storages, s3_storage -from fixtures.types import TenantId +from fixtures.types import TenantId, TimelineId from fixtures.utils import run_pg_bench_small, wait_until +from requests.exceptions import ReadTimeout def test_tenant_delete_smoke( @@ -553,3 +554,111 @@ def test_tenant_delete_concurrent( # Zero tenants remain (we deleted the default tenant) assert ps_http.get_metric_value("pageserver_tenant_manager_slots") == 0 + + +def test_tenant_delete_races_timeline_creation( + neon_env_builder: NeonEnvBuilder, + pg_bin: PgBin, +): + """ + Validate that timeline creation executed in parallel with deletion works correctly. + + This is a reproducer for https://github.com/neondatabase/neon/issues/6255 + """ + # The remote storage kind doesn't really matter but we use it for iterations calculation below + # (and there is no way to reconstruct the used remote storage kind) + remote_storage_kind = RemoteStorageKind.MOCK_S3 + neon_env_builder.enable_pageserver_remote_storage(remote_storage_kind) + env = neon_env_builder.init_start(initial_tenant_conf=MANY_SMALL_LAYERS_TENANT_CONFIG) + ps_http = env.pageserver.http_client() + tenant_id = env.initial_tenant + + # Sometimes it ends with "InternalServerError(Cancelled", sometimes with "InternalServerError(Operation was cancelled" + CANCELLED_ERROR = ( + ".*POST.*Cancelled request finished with an error: InternalServerError\\(.*ancelled" + ) + + env.pageserver.allowed_errors.extend( + [ + # lucky race with stopping from flushing a layer we fail to schedule any uploads + ".*layer flush task.+: could not flush frozen layer: update_metadata_file", + # We need the http connection close for successful reproduction + ".*POST.*/timeline.* request was dropped before completing", + # Timeline creation runs into this error + CANCELLED_ERROR, + ] + ) + + BEFORE_INITDB_UPLOAD_FAILPOINT = "before-initdb-upload" + DELETE_BEFORE_CLEANUP_FAILPOINT = "tenant-delete-before-cleanup-remaining-fs-traces-pausable" + + # Wait just before the initdb upload + ps_http.configure_failpoints((BEFORE_INITDB_UPLOAD_FAILPOINT, "pause")) + + def timeline_create(): + try: + ps_http.timeline_create(env.pg_version, tenant_id, TimelineId.generate(), timeout=1) + raise RuntimeError("creation succeeded even though it shouldn't") + except ReadTimeout: + pass + + Thread(target=timeline_create).start() + + def hit_initdb_upload_failpoint(): + assert env.pageserver.log_contains(f"at failpoint {BEFORE_INITDB_UPLOAD_FAILPOINT}") + + wait_until(100, 0.1, hit_initdb_upload_failpoint) + + def creation_connection_timed_out(): + assert env.pageserver.log_contains( + "POST.*/timeline.* request was dropped before completing" + ) + + # Wait so that we hit the timeout and the connection is dropped + # (But timeline creation still continues) + wait_until(100, 0.1, creation_connection_timed_out) + + ps_http.configure_failpoints((DELETE_BEFORE_CLEANUP_FAILPOINT, "pause")) + + def tenant_delete(): + ps_http.tenant_delete(tenant_id) + + Thread(target=tenant_delete).start() + + def deletion_arrived(): + assert env.pageserver.log_contains( + f"cfg failpoint: {DELETE_BEFORE_CLEANUP_FAILPOINT} pause" + ) + + wait_until(100, 0.1, deletion_arrived) + + ps_http.configure_failpoints((DELETE_BEFORE_CLEANUP_FAILPOINT, "off")) + + # Disable the failpoint and wait for deletion to finish + ps_http.configure_failpoints((BEFORE_INITDB_UPLOAD_FAILPOINT, "off")) + + iterations = poll_for_remote_storage_iterations(remote_storage_kind) + try: + tenant_delete_wait_completed(ps_http, tenant_id, iterations) + except PageserverApiException: + pass + + # Physical deletion should have happened + assert_prefix_empty( + neon_env_builder, + prefix="/".join( + ( + "tenants", + str(tenant_id), + ) + ), + ) + + # Ensure that creation cancelled and deletion didn't end up in broken state or encountered the leftover temp file + assert env.pageserver.log_contains(CANCELLED_ERROR) + assert not env.pageserver.log_contains( + ".*ERROR.*delete_tenant.*Timelines directory is not empty after all timelines deletion" + ) + + # Zero tenants remain (we deleted the default tenant) + assert ps_http.get_metric_value("pageserver_tenant_manager_slots") == 0