Compare commits

...

3 Commits

Author SHA1 Message Date
John Spray
550eadc1d5 pageserver: cargo fmt
This is in a separate commit to make the previous ones more readable.
2023-08-03 14:12:46 +01:00
John Spray
db0deb8457 pageserver: respect task_mgr cancellation in metrics task
This previously relied on seeing a channel close, when
Tenant is destroyed: this task ran beyond Tenant::shutdown,
whereas the idea of that shutdown function is that all the
per-tenant background tasks are joined when it completes.

Instead, stop this task as soon as background tasks for the
Tenant are cancelled, making the behavior of shutdown() much
more obvious and bringing the task into line with how we
do shutdown in other background tasks.
2023-08-03 10:01:12 +01:00
John Spray
a7ad080961 pageserver: remove bare tokio::spawn
Commit ddb9c2fe9 added this task, which is launched
with tokio::spawn rather than task_mgr::spawn, and is
not wrapped in a tracing Instrumented<>.

While the task doesn't overtly do any logging, for
consistency we should spawn all our tasks via the wrapper: in
future this task might be extended to e.g. emit log lines that
we would expect to have the proper tracing spans.
2023-08-03 09:53:38 +01:00

View File

@@ -2076,57 +2076,74 @@ impl Tenant {
) -> Tenant {
let (state, mut rx) = watch::channel(state);
tokio::spawn(async move {
let tid = tenant_id.to_string();
task_mgr::spawn(
task_mgr::BACKGROUND_RUNTIME.handle(),
TaskKind::MetricsCollection,
Some(tenant_id),
None,
&format!("state metrics collector for tenant {tenant_id}"),
false,
async move {
let cancel = task_mgr::shutdown_token();
fn inspect_state(state: &TenantState) -> ([&'static str; 1], bool) {
([state.into()], matches!(state, TenantState::Broken { .. }))
}
let tid = tenant_id.to_string();
let mut tuple = inspect_state(&rx.borrow_and_update());
let is_broken = tuple.1;
let mut counted_broken = if !is_broken {
// the tenant might be ignored and reloaded, so first remove any previous set
// element. it most likely has already been scraped, as these are manual operations
// right now. most likely we will add it back very soon.
drop(crate::metrics::BROKEN_TENANTS_SET.remove_label_values(&[&tid]));
false
} else {
// add the id to the set right away, there should not be any updates on the channel
// after
crate::metrics::BROKEN_TENANTS_SET
.with_label_values(&[&tid])
.set(1);
true
};
loop {
let labels = &tuple.0;
let current = TENANT_STATE_METRIC.with_label_values(labels);
current.inc();
if rx.changed().await.is_err() {
// tenant has been dropped; decrement the counter because a tenant with that
// state is no longer in tenant map, but allow any broken set item to exist
// still.
current.dec();
break;
fn inspect_state(state: &TenantState) -> ([&'static str; 1], bool) {
([state.into()], matches!(state, TenantState::Broken { .. }))
}
current.dec();
tuple = inspect_state(&rx.borrow_and_update());
let mut tuple = inspect_state(&rx.borrow_and_update());
let is_broken = tuple.1;
if is_broken && !counted_broken {
counted_broken = true;
// insert the tenant_id (back) into the set
let mut counted_broken = if !is_broken {
// the tenant might be ignored and reloaded, so first remove any previous set
// element. it most likely has already been scraped, as these are manual operations
// right now. most likely we will add it back very soon.
drop(crate::metrics::BROKEN_TENANTS_SET.remove_label_values(&[&tid]));
false
} else {
// add the id to the set right away, there should not be any updates on the channel
// after
crate::metrics::BROKEN_TENANTS_SET
.with_label_values(&[&tid])
.inc();
.set(1);
true
};
loop {
let labels = &tuple.0;
let current = TENANT_STATE_METRIC.with_label_values(labels);
current.inc();
let changed = tokio::select! {
changed = rx.changed() => {changed},
_ = cancel.cancelled() => {return Ok(())}
};
if changed.is_err() {
// tenant has been dropped; decrement the counter because a tenant with that
// state is no longer in tenant map, but allow any broken set item to exist
// still.
current.dec();
break;
}
current.dec();
tuple = inspect_state(&rx.borrow_and_update());
let is_broken = tuple.1;
if is_broken && !counted_broken {
counted_broken = true;
// insert the tenant_id (back) into the set
crate::metrics::BROKEN_TENANTS_SET
.with_label_values(&[&tid])
.inc();
}
}
Ok(())
}
});
.instrument(info_span!("state_metrics", tenant_id = %tenant_id)),
);
Tenant {
tenant_id,