diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index b8063eb5a2..d63c240365 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -1676,6 +1676,10 @@ async fn timeline_checkpoint_handler( if Some(true) == parse_query_param::<_, bool>(&request, "force_image_layer_creation")? { flags |= CompactFlags::ForceImageLayerCreation; } + + // By default, checkpoints come with a compaction, but this may be optionally disabled by tests that just want to flush + upload. + let compact = parse_query_param::<_, bool>(&request, "compact")?.unwrap_or(true); + let wait_until_uploaded = parse_query_param::<_, bool>(&request, "wait_until_uploaded")?.unwrap_or(false); @@ -1692,15 +1696,17 @@ async fn timeline_checkpoint_handler( } })?; - timeline - .compact(&cancel, flags, &ctx) - .await - .map_err(|e| - match e { - CompactionError::ShuttingDown => ApiError::ShuttingDown, - CompactionError::Other(e) => ApiError::InternalServerError(e) - } - )?; + if compact { + timeline + .compact(&cancel, flags, &ctx) + .await + .map_err(|e| + match e { + CompactionError::ShuttingDown => ApiError::ShuttingDown, + CompactionError::Other(e) => ApiError::InternalServerError(e) + } + )?; + } if wait_until_uploaded { timeline.remote_client.wait_completion().await.map_err(ApiError::InternalServerError)?; diff --git a/test_runner/fixtures/pageserver/http.py b/test_runner/fixtures/pageserver/http.py index c7cea4ec04..c6df6b5baf 100644 --- a/test_runner/fixtures/pageserver/http.py +++ b/test_runner/fixtures/pageserver/http.py @@ -662,6 +662,7 @@ class PageserverHttpClient(requests.Session, MetricsGetter): force_repartition=False, force_image_layer_creation=False, wait_until_uploaded=False, + compact: Optional[bool] = None, ): self.is_testing_enabled_or_skip() query = {} @@ -672,6 +673,9 @@ class PageserverHttpClient(requests.Session, MetricsGetter): if wait_until_uploaded: query["wait_until_uploaded"] = "true" + if compact is not None: + query["compact"] = "true" if compact else "false" + log.info(f"Requesting checkpoint: tenant {tenant_id}, timeline {timeline_id}") res = self.put( f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/checkpoint", diff --git a/test_runner/regress/test_storage_scrubber.py b/test_runner/regress/test_storage_scrubber.py index a9f12f09b7..7c411a6b84 100644 --- a/test_runner/regress/test_storage_scrubber.py +++ b/test_runner/regress/test_storage_scrubber.py @@ -209,9 +209,17 @@ def test_scrubber_physical_gc_ancestors( new_shard_count = 4 assert shard_count is None or new_shard_count > shard_count shards = env.storage_controller.tenant_shard_split(tenant_id, shard_count=new_shard_count) + env.storage_controller.reconcile_until_idle() # Move shards to their final locations immediately - # Make sure child shards have some layers - workload.write_rows(100) + # Make sure child shards have some layers. Do not force upload, because the test helper calls checkpoint, which + # compacts, and we only want to do tha explicitly later in the test. + workload.write_rows(100, upload=False) + for shard in shards: + ps = env.get_tenant_pageserver(shard) + log.info(f"Waiting for shard {shard} on pageserver {ps.id}") + ps.http_client().timeline_checkpoint( + shard, timeline_id, compact=False, wait_until_uploaded=True + ) # Flush deletion queue so that we don't leave any orphan layers in the parent that will confuse subsequent checks: once # a shard is split, any layers in its prefix that aren't referenced by a child will be considered GC'able, even