mirror of
https://github.com/neondatabase/neon.git
synced 2025-12-22 21:59:59 +00:00
fix(test): wait compaction in timeline offload test (#12673)
## Problem close LKB-753. `test_pageserver_metrics_removed_after_offload` is unstable and it sometimes leave the metrics behind after tenant offloading. It turns out that we triggered an image compaction before the offload and the job was stopped after the offload request was completed. ## Summary of changes Wait all background tasks to finish before checking the metrics. --------- Signed-off-by: Alex Chi Z <chi@neon.tech>
This commit is contained in:
@@ -298,15 +298,26 @@ def test_pageserver_metrics_removed_after_detach(neon_env_builder: NeonEnvBuilde
|
|||||||
assert post_detach_samples == set()
|
assert post_detach_samples == set()
|
||||||
|
|
||||||
|
|
||||||
def test_pageserver_metrics_removed_after_offload(neon_env_builder: NeonEnvBuilder):
|
@pytest.mark.parametrize("compaction", ["compaction_enabled", "compaction_disabled"])
|
||||||
|
def test_pageserver_metrics_removed_after_offload(
|
||||||
|
neon_env_builder: NeonEnvBuilder, compaction: str
|
||||||
|
):
|
||||||
"""Tests that when a timeline is offloaded, the tenant specific metrics are not left behind"""
|
"""Tests that when a timeline is offloaded, the tenant specific metrics are not left behind"""
|
||||||
|
|
||||||
neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.MOCK_S3)
|
neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.MOCK_S3)
|
||||||
|
|
||||||
neon_env_builder.num_safekeepers = 3
|
neon_env_builder.num_safekeepers = 3
|
||||||
|
|
||||||
env = neon_env_builder.init_start()
|
env = neon_env_builder.init_start()
|
||||||
tenant_1, _ = env.create_tenant()
|
tenant_1, _ = env.create_tenant(
|
||||||
|
conf={
|
||||||
|
# disable background compaction and GC so that we don't have leftover tasks
|
||||||
|
# after offloading.
|
||||||
|
"gc_period": "0s",
|
||||||
|
"compaction_period": "0s",
|
||||||
|
}
|
||||||
|
if compaction == "compaction_disabled"
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
|
||||||
timeline_1 = env.create_timeline("test_metrics_removed_after_offload_1", tenant_id=tenant_1)
|
timeline_1 = env.create_timeline("test_metrics_removed_after_offload_1", tenant_id=tenant_1)
|
||||||
timeline_2 = env.create_timeline("test_metrics_removed_after_offload_2", tenant_id=tenant_1)
|
timeline_2 = env.create_timeline("test_metrics_removed_after_offload_2", tenant_id=tenant_1)
|
||||||
@@ -351,6 +362,23 @@ def test_pageserver_metrics_removed_after_offload(neon_env_builder: NeonEnvBuild
|
|||||||
state=TimelineArchivalState.ARCHIVED,
|
state=TimelineArchivalState.ARCHIVED,
|
||||||
)
|
)
|
||||||
env.pageserver.http_client().timeline_offload(tenant_1, timeline)
|
env.pageserver.http_client().timeline_offload(tenant_1, timeline)
|
||||||
|
# We need to wait until all background jobs are finished before we can check the metrics.
|
||||||
|
# There're many of them: compaction, GC, etc.
|
||||||
|
wait_until(
|
||||||
|
lambda: all(
|
||||||
|
sample.value == 0
|
||||||
|
for sample in env.pageserver.http_client()
|
||||||
|
.get_metrics()
|
||||||
|
.query_all("pageserver_background_loop_semaphore_waiting_tasks")
|
||||||
|
)
|
||||||
|
and all(
|
||||||
|
sample.value == 0
|
||||||
|
for sample in env.pageserver.http_client()
|
||||||
|
.get_metrics()
|
||||||
|
.query_all("pageserver_background_loop_semaphore_running_tasks")
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
post_offload_samples = set(
|
post_offload_samples = set(
|
||||||
[x.name for x in get_ps_metric_samples_for_timeline(tenant_1, timeline)]
|
[x.name for x in get_ps_metric_samples_for_timeline(tenant_1, timeline)]
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user