tests: remove test_gc_cutoff (#6587)

This test became flaky when postgres retry handling was fixed to use backoff delays -- each iteration in this test's loop was taking much longer because pgbench doesn't fail until postgres has given up on retrying to the pageserver. We are just removing it, because the condition it tests is no longer risky: we reload all metadata from remote storage on restart, so crashing directly between making local changes and doing remote uploads isn't interesting any more. Closes: https://github.com/neondatabase/neon/issues/2856 Closes: https://github.com/neondatabase/neon/issues/5329
2026-06-01 04:20:39 +00:00 · 2024-02-02 18:20:18 +00:00
parent caf868e274
commit 2e5eab69c6
2 changed files with 0 additions and 51 deletions
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -4388,10 +4388,6 @@ impl Timeline {

            guard.finish_gc_timeline(&gc_layers);

-            if result.layers_removed != 0 {
-                fail_point!("after-timeline-gc-removed-layers");
-            }
-
            #[cfg(feature = "testing")]
            {
                result.doomed_layers = gc_layers;
--- a/test_runner/regress/test_gc_cutoff.py
+++ b/test_runner/regress/test_gc_cutoff.py
@@ -1,47 +0,0 @@
-import subprocess
-
-import pytest
-from fixtures.neon_fixtures import NeonEnvBuilder, PgBin
-
-
-# Test gc_cutoff
-#
-# This test sets fail point at the end of GC, and checks that pageserver
-# normally restarts after it. Also, there should be GC ERRORs in the log,
-# but the fixture checks the log for any unexpected ERRORs after every
-# test anyway, so it doesn't need any special attention here.
-@pytest.mark.timeout(600)
-def test_gc_cutoff(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
-    env = neon_env_builder.init_start(
-        initial_tenant_conf={
-            "gc_period": "10 s",
-            "gc_horizon": f"{1024 ** 2}",
-            "checkpoint_distance": f"{1024 ** 2}",
-            "compaction_period": "5 s",
-            # set PITR interval to be small, so we can do GC
-            "pitr_interval": "1 s",
-            "compaction_threshold": "3",
-            "image_creation_threshold": "2",
-        }
-    )
-
-    pageserver_http = env.pageserver.http_client()
-
-    # Use aggressive GC and checkpoint settings, so that we also exercise GC during the test
-    tenant_id = env.initial_tenant
-    endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
-    connstr = endpoint.connstr(options="-csynchronous_commit=off")
-    pg_bin.run_capture(["pgbench", "-i", "-s10", connstr])
-
-    pageserver_http.configure_failpoints(("after-timeline-gc-removed-layers", "exit"))
-
-    # Because this test does a rapid series of restarts of the same node, it's possible that
-    # we are restarted again before we can clean up deletion lists form the previous generation,
-    # resulting in a subsequent startup logging a warning.
-    env.pageserver.allowed_errors.append(".*Dropping stale deletions for tenant.*")
-
-    for _ in range(5):
-        with pytest.raises(subprocess.SubprocessError):
-            pg_bin.run_capture(["pgbench", "-P1", "-N", "-c5", "-T500", "-Mprepared", connstr])
-        env.pageserver.stop()
-        env.pageserver.start(extra_env_vars={"FAILPOINTS": "after-timeline-gc-removed-layers=exit"})