mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-05 20:42:54 +00:00
test(pageserver): fix test_pageserver_gc_compaction_idempotent (#10833)
## Problem ref https://github.com/neondatabase/neon/issues/10517 ## Summary of changes For some reasons the job split algorithm decides to have different image coverage range for two compactions before/after restart. So we remove the subcompaction key range and let it generate an image covering the full range, which should make the test more stable. Also slightly tuned the logging span. --------- Signed-off-by: Alex Chi Z <chi@neon.tech>
This commit is contained in:
@@ -3101,6 +3101,9 @@ impl Tenant {
|
||||
if let Some(queue) = queue {
|
||||
outcome = queue
|
||||
.iteration(cancel, ctx, &self.gc_block, &timeline)
|
||||
.instrument(
|
||||
info_span!("gc_compact_timeline", timeline_id = %timeline.timeline_id),
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -301,18 +301,12 @@ impl GcCompactionQueue {
|
||||
let mut guard = self.inner.lock().unwrap();
|
||||
guard.gc_guards.insert(id, gc_guard);
|
||||
}
|
||||
let _ = timeline
|
||||
.compact_with_options(cancel, options, ctx)
|
||||
.instrument(info_span!("scheduled_compact_timeline", %timeline.timeline_id))
|
||||
.await?;
|
||||
let _ = timeline.compact_with_options(cancel, options, ctx).await?;
|
||||
self.notify_and_unblock(id);
|
||||
}
|
||||
}
|
||||
GcCompactionQueueItem::SubCompactionJob(options) => {
|
||||
let _ = timeline
|
||||
.compact_with_options(cancel, options, ctx)
|
||||
.instrument(info_span!("scheduled_compact_timeline", %timeline.timeline_id))
|
||||
.await?;
|
||||
let _ = timeline.compact_with_options(cancel, options, ctx).await?;
|
||||
}
|
||||
GcCompactionQueueItem::Notify(id) => {
|
||||
self.notify_and_unblock(id);
|
||||
|
||||
@@ -236,9 +236,7 @@ def test_pageserver_gc_compaction_smoke(neon_env_builder: NeonEnvBuilder, with_b
|
||||
wait_until(compaction_finished, timeout=60)
|
||||
|
||||
# ensure gc_compaction is scheduled and it's actually running (instead of skipping due to no layers picked)
|
||||
env.pageserver.assert_log_contains(
|
||||
"scheduled_compact_timeline.*picked .* layers for compaction"
|
||||
)
|
||||
env.pageserver.assert_log_contains("gc_compact_timeline.*picked .* layers for compaction")
|
||||
|
||||
log.info("Validating at workload end ...")
|
||||
workload.validate(env.pageserver.id)
|
||||
@@ -300,6 +298,8 @@ def test_pageserver_gc_compaction_idempotent(
|
||||
workload.churn_rows(row_count, env.pageserver.id)
|
||||
env.create_branch("child_branch") # so that we have a retain_lsn
|
||||
workload.churn_rows(row_count, env.pageserver.id)
|
||||
env.create_branch("child_branch_2") # so that we have another retain_lsn
|
||||
workload.churn_rows(row_count, env.pageserver.id)
|
||||
# compact 3 times if mode is before_restart
|
||||
n_compactions = 3 if compaction_mode == "before_restart" else 1
|
||||
ps_http.timeline_compact(
|
||||
@@ -315,10 +315,6 @@ def test_pageserver_gc_compaction_idempotent(
|
||||
body={
|
||||
"scheduled": True,
|
||||
"sub_compaction": True,
|
||||
"compact_key_range": {
|
||||
"start": "000000000000000000000000000000000000",
|
||||
"end": "030000000000000000000000000000000000",
|
||||
},
|
||||
"sub_compaction_max_job_size_mb": 16,
|
||||
},
|
||||
)
|
||||
@@ -336,19 +332,13 @@ def test_pageserver_gc_compaction_idempotent(
|
||||
body={
|
||||
"scheduled": True,
|
||||
"sub_compaction": True,
|
||||
"compact_key_range": {
|
||||
"start": "000000000000000000000000000000000000",
|
||||
"end": "030000000000000000000000000000000000",
|
||||
},
|
||||
"sub_compaction_max_job_size_mb": 16,
|
||||
},
|
||||
)
|
||||
wait_until(compaction_finished, timeout=60)
|
||||
|
||||
# ensure gc_compaction is scheduled and it's actually running (instead of skipping due to no layers picked)
|
||||
env.pageserver.assert_log_contains(
|
||||
"scheduled_compact_timeline.*picked .* layers for compaction"
|
||||
)
|
||||
env.pageserver.assert_log_contains("gc_compact_timeline.*picked .* layers for compaction")
|
||||
|
||||
# ensure we hit the duplicated layer key warning at least once: we did two compactions consecutively,
|
||||
# and the second one should have hit the duplicated layer key warning.
|
||||
@@ -466,9 +456,7 @@ def test_pageserver_gc_compaction_interrupt(neon_env_builder: NeonEnvBuilder):
|
||||
wait_until(compaction_finished, timeout=60)
|
||||
|
||||
# ensure gc_compaction is scheduled and it's actually running (instead of skipping due to no layers picked)
|
||||
env.pageserver.assert_log_contains(
|
||||
"scheduled_compact_timeline.*picked .* layers for compaction"
|
||||
)
|
||||
env.pageserver.assert_log_contains("gc_compact_timeline.*picked .* layers for compaction")
|
||||
|
||||
log.info("Validating at workload end ...")
|
||||
workload.validate(env.pageserver.id)
|
||||
|
||||
Reference in New Issue
Block a user