callers of task_mgr::spawn: some top-level async blocks were missing tenant/timeline id (#4283)

Looking at logs from staging and prod, I found there are a bunch of log
lines without tenant / timeline context.

Manully walk through all task_mgr::spawn lines and fix that using the
least amount of work required.

While doing it, remove some redundant `shutting down` messages. 

refs https://github.com/neondatabase/neon/issues/4222
This commit is contained in:
Christian Schwarz
2023-07-05 14:04:05 +02:00
committed by GitHub
parent c92b7543b5
commit 7d7cd8375c
3 changed files with 12 additions and 6 deletions

View File

@@ -110,7 +110,6 @@ pub fn launch_disk_usage_global_eviction_task(
disk_usage_eviction_task(&state, task_config, storage, &conf.tenants_path(), cancel)
.await;
info!("disk usage based eviction task finishing");
Ok(())
},
);
@@ -126,13 +125,16 @@ async fn disk_usage_eviction_task(
tenants_dir: &Path,
cancel: CancellationToken,
) {
scopeguard::defer! {
info!("disk usage based eviction task finishing");
};
use crate::tenant::tasks::random_init_delay;
{
if random_init_delay(task_config.period, &cancel)
.await
.is_err()
{
info!("shutting down");
return;
}
}
@@ -167,7 +169,6 @@ async fn disk_usage_eviction_task(
tokio::select! {
_ = tokio::time::sleep_until(sleep_until) => {},
_ = cancel.cancelled() => {
info!("shutting down");
break
}
}

View File

@@ -70,7 +70,6 @@ impl Timeline {
};
self_clone.eviction_task(cancel).await;
info!("eviction task finishing");
Ok(())
},
);
@@ -78,6 +77,9 @@ impl Timeline {
#[instrument(skip_all, fields(tenant_id = %self.tenant_id, timeline_id = %self.timeline_id))]
async fn eviction_task(self: Arc<Self>, cancel: CancellationToken) {
scopeguard::defer! {
info!("eviction task finishing");
}
use crate::tenant::tasks::random_init_delay;
{
let policy = self.get_eviction_policy();
@@ -86,7 +88,6 @@ impl Timeline {
EvictionPolicy::NoEviction => Duration::from_secs(10),
};
if random_init_delay(period, &cancel).await.is_err() {
info!("shutting down");
return;
}
}
@@ -101,7 +102,6 @@ impl Timeline {
ControlFlow::Continue(sleep_until) => {
tokio::select! {
_ = cancel.cancelled() => {
info!("shutting down");
break;
}
_ = tokio::time::sleep_until(sleep_until) => { }

View File

@@ -71,6 +71,8 @@ pub(super) async fn handle_walreceiver_connection(
ctx: RequestContext,
node: NodeId,
) -> anyhow::Result<()> {
debug_assert_current_span_has_tenant_and_timeline_id();
WALRECEIVER_STARTED_CONNECTIONS.inc();
// Connect to the database in replication mode.
@@ -140,6 +142,9 @@ pub(super) async fn handle_walreceiver_connection(
}
Ok(())
}
// Enrich the log lines emitted by this closure with meaningful context.
// TODO: technically, this task outlives the surrounding function, so, the
// spans won't be properly nested.
.instrument(tracing::info_span!("poller")),
);