diff --git a/pageserver/src/control_plane_client.rs b/pageserver/src/control_plane_client.rs index 3375392373..d37dc694dd 100644 --- a/pageserver/src/control_plane_client.rs +++ b/pageserver/src/control_plane_client.rs @@ -133,6 +133,8 @@ impl ControlPlaneGenerationsApi for ControlPlaneClient { node_id: self.node_id, }; + fail::fail_point!("control-plane-client-re-attach"); + let response: ReAttachResponse = self.retry_http_forever(&re_attach_path, request).await?; tracing::info!( "Received re-attach response with {} tenants", @@ -168,6 +170,8 @@ impl ControlPlaneGenerationsApi for ControlPlaneClient { .collect(), }; + fail::fail_point!("control-plane-client-validate"); + let response: ValidateResponse = self.retry_http_forever(&re_attach_path, request).await?; Ok(response diff --git a/pageserver/src/deletion_queue/validator.rs b/pageserver/src/deletion_queue/validator.rs index c44564acfb..a2cbfb9dc7 100644 --- a/pageserver/src/deletion_queue/validator.rs +++ b/pageserver/src/deletion_queue/validator.rs @@ -220,6 +220,8 @@ where warn!("Dropping stale deletions for tenant {tenant_id} in generation {:?}, objects may be leaked", tenant.generation); metrics::DELETION_QUEUE.keys_dropped.inc_by(tenant.len() as u64); mutated = true; + } else { + metrics::DELETION_QUEUE.keys_validated.inc_by(tenant.len() as u64); } this_list_valid }); diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs index 92ce654b7e..1dc85fd76f 100644 --- a/pageserver/src/metrics.rs +++ b/pageserver/src/metrics.rs @@ -967,6 +967,7 @@ pub(crate) struct DeletionQueueMetrics { pub(crate) keys_submitted: IntCounter, pub(crate) keys_dropped: IntCounter, pub(crate) keys_executed: IntCounter, + pub(crate) keys_validated: IntCounter, pub(crate) dropped_lsn_updates: IntCounter, pub(crate) unexpected_errors: IntCounter, pub(crate) remote_errors: IntCounterVec, @@ -988,7 +989,13 @@ pub(crate) static DELETION_QUEUE: Lazy = Lazy::new(|| { keys_executed: register_int_counter!( "pageserver_deletion_queue_executed_total", - "Number of objects deleted. Only includes objects that we actually deleted, sum with pageserver_deletion_queue_dropped_total for the total number of keys processed." + "Number of objects deleted. Only includes objects that we actually deleted, sum with pageserver_deletion_queue_dropped_total for the total number of keys processed to completion" + ) + .expect("failed to define a metric"), + + keys_validated: register_int_counter!( + "pageserver_deletion_queue_validated_total", + "Number of keys validated for deletion. Sum with pageserver_deletion_queue_dropped_total for the total number of keys that have passed through the validation stage." ) .expect("failed to define a metric"), diff --git a/test_runner/regress/test_pageserver_generations.py b/test_runner/regress/test_pageserver_generations.py index 81d38ac934..abc2c79ac9 100644 --- a/test_runner/regress/test_pageserver_generations.py +++ b/test_runner/regress/test_pageserver_generations.py @@ -116,6 +116,10 @@ def get_deletion_queue_submitted(ps_http) -> int: return get_metric_or_0(ps_http, "pageserver_deletion_queue_submitted_total") +def get_deletion_queue_validated(ps_http) -> int: + return get_metric_or_0(ps_http, "pageserver_deletion_queue_validated_total") + + def get_deletion_queue_dropped(ps_http) -> int: return get_metric_or_0(ps_http, "pageserver_deletion_queue_dropped_total") @@ -273,12 +277,15 @@ def test_deferred_deletion(neon_env_builder: NeonEnvBuilder): @pytest.mark.parametrize("keep_attachment", [True, False]) +@pytest.mark.parametrize("validate_before", [True, False]) def test_deletion_queue_recovery( - neon_env_builder: NeonEnvBuilder, pg_bin: PgBin, keep_attachment: bool + neon_env_builder: NeonEnvBuilder, pg_bin: PgBin, keep_attachment: bool, validate_before: bool ): """ :param keep_attachment: If true, we re-attach after restart. Else, we act as if some other node took the attachment while we were restarting. + :param validate_before: If true, we wait for deletions to be validated before restart. This + makes them elegible to be executed after restart, if the same node keeps the attachment. """ neon_env_builder.enable_generations = True neon_env_builder.enable_pageserver_remote_storage( @@ -288,12 +295,20 @@ def test_deletion_queue_recovery( ps_http = env.pageserver.http_client() - # Prevent deletion lists from being executed, to build up some backlog of deletions - ps_http.configure_failpoints( - [ - ("deletion-queue-before-execute", "return"), - ] - ) + failpoints = [ + # Prevent deletion lists from being executed, to build up some backlog of deletions + ("deletion-queue-before-execute", "return"), + ] + + if not validate_before: + failpoints.append( + # Prevent deletion lists from being validated, we will test that they are + # dropped properly during recovery. 'pause' is okay here because we kill + # the pageserver with immediate=true + ("control-plane-client-validate", "pause") + ) + + ps_http.configure_failpoints(failpoints) generate_uploads_and_deletions(env) @@ -305,6 +320,16 @@ def test_deletion_queue_recovery( assert get_deletion_queue_unexpected_errors(ps_http) == 0 assert get_deletion_queue_dropped_lsn_updates(ps_http) == 0 + if validate_before: + + def assert_validation_complete(): + assert get_deletion_queue_submitted(ps_http) == get_deletion_queue_validated(ps_http) + + wait_until(20, 1, assert_validation_complete) + # A short wait to let the DeletionHeader get written out, as this happens after + # the validated count gets incremented. + time.sleep(1) + log.info(f"Restarting pageserver with {before_restart_depth} deletions enqueued") env.pageserver.stop(immediate=True) @@ -327,14 +352,17 @@ def test_deletion_queue_recovery( ps_http.deletion_queue_flush(execute=True) wait_until(10, 1, lambda: assert_deletion_queue(ps_http, lambda n: n == 0)) - if keep_attachment: - # If we kept the attachment, then our pre-restart deletions should have executed - # successfully + if keep_attachment or validate_before: + # - If we kept the attachment, then our pre-restart deletions should execute + # because on re-attach they were from the immediately preceding generation + # - If we validated before restart, then the deletions should execute because the + # deletion queue header records a validated deletion list sequence number. assert get_deletion_queue_executed(ps_http) == before_restart_depth else: + env.pageserver.allowed_errors.extend([".*Dropping stale deletions.*"]) + # If we lost the attachment, we should have dropped our pre-restart deletions. assert get_deletion_queue_dropped(ps_http) == before_restart_depth - env.pageserver.allowed_errors.extend([".*Dropping stale deletions.*"]) assert get_deletion_queue_unexpected_errors(ps_http) == 0 assert get_deletion_queue_dropped_lsn_updates(ps_http) == 0