diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 8f325d31ec..a0ac0adea2 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -1984,10 +1984,10 @@ impl Timeline { new_gc_cutoff ); write_guard.store_and_unlock(new_gc_cutoff).wait(); - - // Persist metadata file - self.update_metadata_file(self.disk_consistent_lsn.load(), HashMap::new())?; } + // Persist the new GC cutoff value in the metadata file, before + // we actually remove anything. + self.update_metadata_file(self.disk_consistent_lsn.load(), HashMap::new())?; info!("GC starting"); @@ -2114,15 +2114,12 @@ impl Timeline { } info!( - "GC completed removing {} layers, cuttof {}", + "GC completed removing {} layers, cutoff {}", result.layers_removed, new_gc_cutoff ); + if result.layers_removed != 0 { - fail_point!("gc-before-save-metadata", |_| { - info!("Abnormaly terinate pageserver at gc-before-save-metadata fail point"); - std::process::abort(); - }); - return Ok(result); + fail_point!("after-timeline-gc-removed-layers"); } if self.upload_layers.load(atomic::Ordering::Relaxed) { diff --git a/test_runner/regress/test_gc_cutoff.py b/test_runner/regress/test_gc_cutoff.py index 946c689a30..22b77d2cf1 100644 --- a/test_runner/regress/test_gc_cutoff.py +++ b/test_runner/regress/test_gc_cutoff.py @@ -1,14 +1,13 @@ -import pytest from fixtures.neon_fixtures import NeonEnvBuilder, PgBin -from performance.test_perf_pgbench import get_scales_matrix -# Test gc_cuttoff +# Test gc_cutoff # -# This test set fail point after at the end of GC and checks -# that pageserver normally restarts after it -@pytest.mark.parametrize("scale", get_scales_matrix(10)) -def test_gc_cutoff(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin, scale: int): +# This test sets fail point at the end of GC, and checks that pageserver +# normally restarts after it. Also, there should be GC ERRORs in the log, +# but the fixture checks the log for any unexpected ERRORs after every +# test anyway, so it doesn't need any special attention here. +def test_gc_cutoff(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin): env = neon_env_builder.init_start() pageserver_http = env.pageserver.http_client() @@ -18,21 +17,23 @@ def test_gc_cutoff(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin, scale: int): "gc_period": "10 s", "gc_horizon": f"{1024 ** 2}", "checkpoint_distance": f"{1024 ** 2}", - "compaction_target_size": f"{1024 ** 2}", + "compaction_period": "5 s", # set PITR interval to be small, so we can do GC "pitr_interval": "1 s", + "compaction_threshold": "3", + "image_creation_threshold": "2", } ) pg = env.postgres.create_start("main", tenant_id=tenant_id) - connstr = pg.connstr() - pg_bin.run_capture(["pgbench", "-i", f"-s{scale}", connstr]) + connstr = pg.connstr(options="-csynchronous_commit=off") + pg_bin.run_capture(["pgbench", "-i", "-s10", connstr]) - pageserver_http.configure_failpoints(("gc-before-save-metadata", "return")) + pageserver_http.configure_failpoints(("after-timeline-gc-removed-layers", "exit")) for i in range(5): try: - pg_bin.run_capture(["pgbench", "-T100", connstr]) + pg_bin.run_capture(["pgbench", "-N", "-c5", "-T100", "-Mprepared", connstr]) except Exception: env.pageserver.stop() env.pageserver.start() - pageserver_http.configure_failpoints(("gc-before-save-metadata", "return")) + pageserver_http.configure_failpoints(("after-timeline-gc-removed-layers", "exit"))