fix: spanless log message (#7155)

with `immediate_gc` the span only covered the `gc_iteration`, make it
cover the whole needless spawned task, which also does waiting for layer
drops and stray logging in tests.

also clarify some comments while we are here.

Fixes: #6910
This commit is contained in:
Joonas Koivunen
2024-03-18 16:27:53 +02:00
committed by GitHub
parent db749914d8
commit 877fd14401
2 changed files with 8 additions and 7 deletions

View File

@@ -1653,8 +1653,7 @@ async fn timeline_gc_handler(
let gc_req: TimelineGcRequest = json_request(&mut request).await?;
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
let wait_task_done =
mgr::immediate_gc(tenant_shard_id, timeline_id, gc_req, cancel, &ctx).await?;
let wait_task_done = mgr::immediate_gc(tenant_shard_id, timeline_id, gc_req, cancel, &ctx)?;
let gc_result = wait_task_done
.await
.context("wait for gc task")

View File

@@ -2730,7 +2730,7 @@ use {
utils::http::error::ApiError,
};
pub(crate) async fn immediate_gc(
pub(crate) fn immediate_gc(
tenant_shard_id: TenantShardId,
timeline_id: TimelineId,
gc_req: TimelineGcRequest,
@@ -2752,6 +2752,8 @@ pub(crate) async fn immediate_gc(
// Run in task_mgr to avoid race with tenant_detach operation
let ctx = ctx.detached_child(TaskKind::GarbageCollector, DownloadBehavior::Download);
let (task_done, wait_task_done) = tokio::sync::oneshot::channel();
let span = info_span!("manual_gc", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), %timeline_id);
// TODO: spawning is redundant now, need to hold the gate
task_mgr::spawn(
&tokio::runtime::Handle::current(),
@@ -2766,16 +2768,15 @@ pub(crate) async fn immediate_gc(
#[allow(unused_mut)]
let mut result = tenant
.gc_iteration(Some(timeline_id), gc_horizon, pitr, &cancel, &ctx)
.instrument(info_span!("manual_gc", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), %timeline_id))
.await;
// FIXME: `gc_iteration` can return an error for multiple reasons; we should handle it
// better once the types support it.
#[cfg(feature = "testing")]
{
// we need to synchronize with drop completion for python tests without polling for
// log messages
if let Ok(result) = result.as_mut() {
// why not futures unordered? it seems it needs very much the same task structure
// but would only run on single task.
let mut js = tokio::task::JoinSet::new();
for layer in std::mem::take(&mut result.doomed_layers) {
js.spawn(layer.wait_drop());
@@ -2791,7 +2792,7 @@ pub(crate) async fn immediate_gc(
if let Some(rtc) = rtc {
// layer drops schedule actions on remote timeline client to actually do the
// deletions; don't care just exit fast about the shutdown error
// deletions; don't care about the shutdown error, just exit fast
drop(rtc.wait_completion().await);
}
}
@@ -2802,6 +2803,7 @@ pub(crate) async fn immediate_gc(
}
Ok(())
}
.instrument(span)
);
// drop the guard until after we've spawned the task so that timeline shutdown will wait for the task