diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index a746fd9bf8..cc6583dcf6 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -2487,9 +2487,6 @@ impl Timeline { ); write_guard.store_and_unlock(new_gc_cutoff).wait(); } - // Persist the new GC cutoff value in the metadata file, before - // we actually remove anything. - self.update_metadata_file(self.disk_consistent_lsn.load(), HashMap::new())?; info!("GC starting"); @@ -2600,19 +2597,33 @@ impl Timeline { layers_to_remove.push(Arc::clone(&l)); } - // Actually delete the layers from disk and remove them from the map. - // (couldn't do this in the loop above, because you cannot modify a collection - // while iterating it. BTreeMap::retain() would be another option) - let mut layer_names_to_delete = Vec::with_capacity(layers_to_remove.len()); - for doomed_layer in layers_to_remove { - let path = doomed_layer.local_path(); - self.metrics - .current_physical_size_gauge - .sub(path.metadata()?.len()); - layer_names_to_delete.push(doomed_layer.filename()); - doomed_layer.delete()?; - layers.remove_historic(doomed_layer); - result.layers_removed += 1; + if !layers_to_remove.is_empty() { + // Persist the new GC cutoff value in the metadata file, before + // we actually remove anything. + self.update_metadata_file(self.disk_consistent_lsn.load(), HashMap::new())?; + + // Actually delete the layers from disk and remove them from the map. + // (couldn't do this in the loop above, because you cannot modify a collection + // while iterating it. BTreeMap::retain() would be another option) + let mut layer_names_to_delete = Vec::with_capacity(layers_to_remove.len()); + for doomed_layer in layers_to_remove { + let path = doomed_layer.local_path(); + self.metrics + .current_physical_size_gauge + .sub(path.metadata()?.len()); + layer_names_to_delete.push(doomed_layer.filename()); + doomed_layer.delete()?; + layers.remove_historic(doomed_layer); + result.layers_removed += 1; + } + + if result.layers_removed != 0 { + fail_point!("after-timeline-gc-removed-layers"); + } + + if let Some(remote_client) = &self.remote_client { + remote_client.schedule_layer_file_deletion(&layer_names_to_delete)?; + } } info!( @@ -2620,14 +2631,6 @@ impl Timeline { result.layers_removed, new_gc_cutoff ); - if result.layers_removed != 0 { - fail_point!("after-timeline-gc-removed-layers"); - } - - if let Some(remote_client) = &self.remote_client { - remote_client.schedule_layer_file_deletion(&layer_names_to_delete)?; - } - result.elapsed = now.elapsed()?; Ok(result) } diff --git a/test_runner/regress/test_gc_aggressive.py b/test_runner/regress/test_gc_aggressive.py index 92855899f0..b9d012fa36 100644 --- a/test_runner/regress/test_gc_aggressive.py +++ b/test_runner/regress/test_gc_aggressive.py @@ -2,9 +2,17 @@ import asyncio import concurrent.futures import random +import pytest from fixtures.log_helper import log -from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres -from fixtures.types import TimelineId +from fixtures.metrics import parse_metrics +from fixtures.neon_fixtures import ( + NeonEnv, + NeonEnvBuilder, + Postgres, + RemoteStorageKind, + wait_for_last_flush_lsn, +) +from fixtures.types import TenantId, TimelineId from fixtures.utils import query_scalar # Test configuration @@ -43,6 +51,7 @@ async def gc(env: NeonEnv, timeline: TimelineId): while updates_performed < updates_to_perform: await loop.run_in_executor(pool, do_gc) + # At the same time, run UPDATEs and GC async def update_and_gc(env: NeonEnv, pg: Postgres, timeline: TimelineId): workers = [] @@ -88,3 +97,76 @@ def test_gc_aggressive(neon_env_builder: NeonEnvBuilder): r = cur.fetchone() assert r is not None assert r == (num_rows, updates_to_perform) + + +# +@pytest.mark.parametrize("remote_storage_kind", [RemoteStorageKind.LOCAL_FS]) +def test_gc_index_upload(neon_env_builder: NeonEnvBuilder, remote_storage_kind: RemoteStorageKind): + + # Disable time-based pitr, we will use LSN-based thresholds in the manual GC calls + neon_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '0 sec'}" + + neon_env_builder.enable_remote_storage( + remote_storage_kind=remote_storage_kind, + test_name="test_gc_index_upload", + ) + + env = neon_env_builder.init_start() + env.neon_cli.create_branch("test_gc_index_upload", "main") + pg = env.postgres.create_start("test_gc_index_upload") + + pageserver_http = env.pageserver.http_client() + + pg_conn = pg.connect() + cur = pg_conn.cursor() + + tenant_id = TenantId(query_scalar(cur, "SHOW neon.tenant_id")) + timeline_id = TimelineId(query_scalar(cur, "SHOW neon.timeline_id")) + + cur.execute("CREATE TABLE foo (id int, counter int, t text)") + cur.execute( + """ + INSERT INTO foo + SELECT g, 0, 'long string to consume some space' || g + FROM generate_series(1, 100000) g + """ + ) + + # Helper function that gets the number of given kind of remote ops from the metrics + def get_num_remote_ops(file_kind: str, op_kind: str) -> int: + ps_metrics = parse_metrics(env.pageserver.http_client().get_metrics(), "pageserver") + total = 0.0 + for sample in ps_metrics.query_all( + name="pageserver_remote_operation_seconds_count", + filter={ + "tenant_id": str(tenant_id), + "timeline_id": str(timeline_id), + "file_kind": str(file_kind), + "op_kind": str(op_kind), + }, + ): + total += sample[2] + return int(total) + + # Sanity check that the metric works + wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id) + pageserver_http.timeline_checkpoint(tenant_id, timeline_id) + pageserver_http.timeline_gc(tenant_id, timeline_id, 10000) + before = get_num_remote_ops("index", "upload") + assert before > 0 + + # Run many cycles of GC. Then check that the number of index files + # uploads didn't grow much. In particular we don't want to re-upload the + # index file on every GC iteration, when it has no work to do. + # + # On each iteration, we use a slightly smaller GC horizon, so that the GC + # at least needs to check if it has work to do. + for i in range(100): + cur.execute("INSERT INTO foo VALUES (0, 0, 'foo')") + pageserver_http.timeline_gc(tenant_id, timeline_id, 10000 - i * 32) + num_index_uploads = get_num_remote_ops("index", "upload") + log.info(f"{num_index_uploads} index uploads after GC iteration {i}") + + after = num_index_uploads + log.info(f"{after-before} new index uploads during test") + assert after - before < 5