fix: avoid starving background task permits in eviction task (#7471)

As seen with a recent incident, eviction tasks can cause pageserver-wide
permit starvation on the background task semaphore when synthetic size
calculation takes a long time for a tenant that has more than our permit
number of timelines or multiple tenants that have slow synthetic size
and total number of timelines exceeds the permits. Metric links can be
found in the internal [slack thread].

As a solution, release the permit while waiting for the state guarding
the synthetic size calculation. This will most likely hurt the eviction
task eviction performance, but that does not matter because we are
hoping to get away from it using OnlyImitiate policy anyway and rely
solely on disk usage-based eviction.

[slack thread]:
https://neondb.slack.com/archives/C06UEMLK7FE/p1713810505587809?thread_ts=1713468604.508969&cid=C06UEMLK7FE
This commit is contained in:
Joonas Koivunen
2024-04-24 11:38:59 +03:00
committed by GitHub
parent 18fd73d84a
commit a60035b23a
2 changed files with 43 additions and 27 deletions

View File

@@ -62,7 +62,7 @@ impl BackgroundLoopKind {
pub(crate) async fn concurrent_background_tasks_rate_limit_permit(
loop_kind: BackgroundLoopKind,
_ctx: &RequestContext,
) -> impl Drop {
) -> tokio::sync::SemaphorePermit<'static> {
let _guard = crate::metrics::BACKGROUND_LOOP_SEMAPHORE_WAIT_GAUGE
.with_label_values(&[loop_kind.as_static_str()])
.guard();

View File

@@ -188,24 +188,10 @@ impl Timeline {
) -> ControlFlow<()> {
let now = SystemTime::now();
let acquire_permit = crate::tenant::tasks::concurrent_background_tasks_rate_limit_permit(
BackgroundLoopKind::Eviction,
ctx,
);
let permit = self.acquire_imitation_permit(cancel, ctx).await?;
let _permit = tokio::select! {
permit = acquire_permit => permit,
_ = cancel.cancelled() => return ControlFlow::Break(()),
_ = self.cancel.cancelled() => return ControlFlow::Break(()),
};
match self
.imitate_layer_accesses(tenant, p, cancel, gate, ctx)
.await
{
ControlFlow::Break(()) => return ControlFlow::Break(()),
ControlFlow::Continue(()) => (),
}
self.imitate_layer_accesses(tenant, p, cancel, gate, permit, ctx)
.await?;
#[derive(Debug, Default)]
struct EvictionStats {
@@ -330,19 +316,27 @@ impl Timeline {
gate: &GateGuard,
ctx: &RequestContext,
) -> ControlFlow<()> {
let permit = self.acquire_imitation_permit(cancel, ctx).await?;
self.imitate_layer_accesses(tenant, p, cancel, gate, permit, ctx)
.await
}
async fn acquire_imitation_permit(
&self,
cancel: &CancellationToken,
ctx: &RequestContext,
) -> ControlFlow<(), tokio::sync::SemaphorePermit<'static>> {
let acquire_permit = crate::tenant::tasks::concurrent_background_tasks_rate_limit_permit(
BackgroundLoopKind::Eviction,
ctx,
);
let _permit = tokio::select! {
permit = acquire_permit => permit,
_ = cancel.cancelled() => return ControlFlow::Break(()),
_ = self.cancel.cancelled() => return ControlFlow::Break(()),
};
self.imitate_layer_accesses(tenant, p, cancel, gate, ctx)
.await
tokio::select! {
permit = acquire_permit => ControlFlow::Continue(permit),
_ = cancel.cancelled() => ControlFlow::Break(()),
_ = self.cancel.cancelled() => ControlFlow::Break(()),
}
}
/// If we evict layers but keep cached values derived from those layers, then
@@ -376,6 +370,7 @@ impl Timeline {
p: &EvictionPolicyLayerAccessThreshold,
cancel: &CancellationToken,
gate: &GateGuard,
permit: tokio::sync::SemaphorePermit<'static>,
ctx: &RequestContext,
) -> ControlFlow<()> {
if !self.tenant_shard_id.is_shard_zero() {
@@ -408,7 +403,28 @@ impl Timeline {
// Make one of the tenant's timelines draw the short straw and run the calculation.
// The others wait until the calculation is done so that they take into account the
// imitated accesses that the winner made.
let mut state = tenant.eviction_task_tenant_state.lock().await;
let (mut state, _permit) = {
if let Ok(locked) = tenant.eviction_task_tenant_state.try_lock() {
(locked, permit)
} else {
// we might need to wait for a long time here in case of pathological synthetic
// size calculation performance
drop(permit);
let locked = tokio::select! {
locked = tenant.eviction_task_tenant_state.lock() => locked,
_ = self.cancel.cancelled() => {
return ControlFlow::Break(())
},
_ = cancel.cancelled() => {
return ControlFlow::Break(())
}
};
// then reacquire -- this will be bad if there is a lot of traffic, but because we
// released the permit, the overall latency will be much better.
let permit = self.acquire_imitation_permit(cancel, ctx).await?;
(locked, permit)
}
};
match state.last_layer_access_imitation {
Some(ts) if ts.elapsed() < inter_imitate_period => { /* no need to run */ }
_ => {