Perform L0 compaction before creating new image layers (#5950)

If there are too many L0 layers before compaction, the compaction
process becomes slow because of slow `Timeline::get`. As a result of the
slowdown, the pageserver will generate even more L0 layers for the next
iteration, further exacerbating the slow performance.

Change to perform L0 -> L1 compaction before creating new images. The
simple change speeds up compaction time and `Timeline::get` to 5x.
`Timeline::get` is faster on top of L1 layers.

Co-authored-by: Joonas Koivunen <joonas@neon.tech>
This commit is contained in:
John Khvatov
2023-12-04 15:35:09 +03:00
committed by GitHub
parent e6b2f89fec
commit eae49ff598
2 changed files with 11 additions and 7 deletions

View File

@@ -801,7 +801,12 @@ impl Timeline {
.access_stats_behavior(AccessStatsBehavior::Skip)
.build();
// 2. Create new image layers for partitions that have been modified
// 2. Compact
let timer = self.metrics.compact_time_histo.start_timer();
self.compact_level0(target_file_size, ctx).await?;
timer.stop_and_record();
// 3. Create new image layers for partitions that have been modified
// "enough".
let layers = self
.create_image_layers(&partitioning, lsn, false, &image_ctx)
@@ -813,11 +818,6 @@ impl Timeline {
}
}
// 3. Compact
let timer = self.metrics.compact_time_histo.start_timer();
self.compact_level0(target_file_size, ctx).await?;
timer.stop_and_record();
if let Some(remote_client) = &self.remote_client {
// should any new image layer been created, not uploading index_part will
// result in a mismatch between remote_physical_size and layermap calculated

View File

@@ -49,7 +49,7 @@ def test_issue_5878(neon_env_builder: NeonEnvBuilder):
"compaction_period": "0s", # we want to control when compaction runs
"checkpoint_timeout": "24h", # something we won't reach
"checkpoint_distance": f"{50 * (1024**2)}", # something we won't reach, we checkpoint manually
"image_creation_threshold": f"{image_creation_threshold}",
"image_creation_threshold": "100", # we want to control when image is created
"compaction_threshold": f"{l0_l1_threshold}",
"compaction_target_size": f"{128 * (1024**3)}", # make it so that we only have 1 partition => image coverage for delta layers => enables gc of delta layers
}
@@ -124,6 +124,10 @@ def test_issue_5878(neon_env_builder: NeonEnvBuilder):
), "sanity check for what above loop is supposed to do"
# create the image layer from the future
ps_http.patch_tenant_config_client_side(
tenant_id, {"image_creation_threshold": image_creation_threshold}, None
)
assert ps_http.tenant_config(tenant_id).effective_config["image_creation_threshold"] == 1
ps_http.timeline_compact(tenant_id, timeline_id, force_repartition=True)
assert (
len(