From 9ee5f17c41caf06468b0834365da0c9fa4151c58 Mon Sep 17 00:00:00 2001 From: Bojan Serafimov Date: Wed, 29 Jun 2022 17:40:48 -0400 Subject: [PATCH] Fix test --- pageserver/src/tenant_mgr.rs | 1 + pageserver/src/tenant_tasks.rs | 1 + test_runner/batch_others/test_tenant_tasks.py | 12 +++++++++++- 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/pageserver/src/tenant_mgr.rs b/pageserver/src/tenant_mgr.rs index c73fed140a..8b88a2185e 100644 --- a/pageserver/src/tenant_mgr.rs +++ b/pageserver/src/tenant_mgr.rs @@ -347,6 +347,7 @@ pub fn set_tenant_state(tenant_id: ZTenantId, new_state: TenantState) -> anyhow: ); // Wait until all gc/compaction tasks finish + // TODO send cancellation signal too, or make the state a watch let repo = get_repository_for_tenant(tenant_id)?; let _guard = repo.file_lock.write().unwrap(); } diff --git a/pageserver/src/tenant_tasks.rs b/pageserver/src/tenant_tasks.rs index ede140bf94..ede4114c94 100644 --- a/pageserver/src/tenant_tasks.rs +++ b/pageserver/src/tenant_tasks.rs @@ -192,6 +192,7 @@ pub fn init_tenant_task_pool() -> anyhow::Result<()> { // Spawn new task, request cancellation of the old one if exists let (cancel_send, cancel_recv) = watch::channel(()); + // TODO this instrument doesn't work let handle = tokio::spawn(compaction_loop(tenantid, cancel_recv) .instrument(trace_span!("compaction loop", tenant = %tenantid))); if let Some(old_cancel_send) = compaction_loops.insert(tenantid, cancel_send) { diff --git a/test_runner/batch_others/test_tenant_tasks.py b/test_runner/batch_others/test_tenant_tasks.py index d1deb83621..fa299324f4 100644 --- a/test_runner/batch_others/test_tenant_tasks.py +++ b/test_runner/batch_others/test_tenant_tasks.py @@ -20,6 +20,8 @@ def test_tenant_tasks(neon_env_builder: NeonEnvBuilder): def get_metric_value(name): metrics = client.get_metrics() relevant = [line for line in metrics.splitlines() if line.startswith(name)] + if len(relevant) == 0: + return 0 line = get_only_element(relevant) value = line.lstrip(name).strip() return int(value) @@ -48,9 +50,17 @@ def test_tenant_tasks(neon_env_builder: NeonEnvBuilder): tenant_id = UUID(tenant_info["id"]) detach_all_timelines(tenant_id) - # XXX this fails. Why? + # TODO poll wait until idle instead + import time; time.sleep(1) assert get_state(tenant_id) == "Idle" + # Read metrics + import time; time.sleep(1) tasks_started = get_metric_value('pageserver_tenant_task_events{event="start"}') tasks_ended = get_metric_value('pageserver_tenant_task_events{event="stop"}') + tasks_panicked = get_metric_value('pageserver_tenant_task_events{event="panic"}') + + # TODO this fails because the "active -> idle" transition only waits for gc to + # finish without cancelling it, and gc waits for 100 seconds. assert tasks_started == tasks_ended + assert tasks_panicked == 0