fix: spanless log message (#7155)

with `immediate_gc` the span only covered the `gc_iteration`, make it cover the whole needless spawned task, which also does waiting for layer drops and stray logging in tests. also clarify some comments while we are here. Fixes: #6910
2026-01-10 15:02:56 +00:00 · 2024-03-18 16:27:53 +02:00
parent db749914d8
commit 877fd14401
2 changed files with 8 additions and 7 deletions
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -1653,8 +1653,7 @@ async fn timeline_gc_handler(
    let gc_req: TimelineGcRequest = json_request(&mut request).await?;

    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
-    let wait_task_done =
-        mgr::immediate_gc(tenant_shard_id, timeline_id, gc_req, cancel, &ctx).await?;
+    let wait_task_done = mgr::immediate_gc(tenant_shard_id, timeline_id, gc_req, cancel, &ctx)?;
    let gc_result = wait_task_done
        .await
        .context("wait for gc task")
--- a/pageserver/src/tenant/mgr.rs
+++ b/pageserver/src/tenant/mgr.rs
@@ -2730,7 +2730,7 @@ use {
    utils::http::error::ApiError,
 };

-pub(crate) async fn immediate_gc(
+pub(crate) fn immediate_gc(
    tenant_shard_id: TenantShardId,
    timeline_id: TimelineId,
    gc_req: TimelineGcRequest,
@@ -2752,6 +2752,8 @@ pub(crate) async fn immediate_gc(
    // Run in task_mgr to avoid race with tenant_detach operation
    let ctx = ctx.detached_child(TaskKind::GarbageCollector, DownloadBehavior::Download);
    let (task_done, wait_task_done) = tokio::sync::oneshot::channel();
+    let span = info_span!("manual_gc", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), %timeline_id);
+
    // TODO: spawning is redundant now, need to hold the gate
    task_mgr::spawn(
        &tokio::runtime::Handle::current(),
@@ -2766,16 +2768,15 @@ pub(crate) async fn immediate_gc(
            #[allow(unused_mut)]
            let mut result = tenant
                .gc_iteration(Some(timeline_id), gc_horizon, pitr, &cancel, &ctx)
-                .instrument(info_span!("manual_gc", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), %timeline_id))
                .await;
                // FIXME: `gc_iteration` can return an error for multiple reasons; we should handle it
                // better once the types support it.

            #[cfg(feature = "testing")]
            {
+                // we need to synchronize with drop completion for python tests without polling for
+                // log messages
                if let Ok(result) = result.as_mut() {
-                    // why not futures unordered? it seems it needs very much the same task structure
-                    // but would only run on single task.
                    let mut js = tokio::task::JoinSet::new();
                    for layer in std::mem::take(&mut result.doomed_layers) {
                        js.spawn(layer.wait_drop());
@@ -2791,7 +2792,7 @@ pub(crate) async fn immediate_gc(

                if let Some(rtc) = rtc {
                    // layer drops schedule actions on remote timeline client to actually do the
-                    // deletions; don't care just exit fast about the shutdown error
+                    // deletions; don't care about the shutdown error, just exit fast
                    drop(rtc.wait_completion().await);
                }
            }
@@ -2802,6 +2803,7 @@ pub(crate) async fn immediate_gc(
            }
            Ok(())
        }
+        .instrument(span)
    );

    // drop the guard until after we've spawned the task so that timeline shutdown will wait for the task