diff --git a/pageserver/src/tenant/tasks.rs b/pageserver/src/tenant/tasks.rs index df3ffd08d3..6f432cefbc 100644 --- a/pageserver/src/tenant/tasks.rs +++ b/pageserver/src/tenant/tasks.rs @@ -14,6 +14,52 @@ use tokio_util::sync::CancellationToken; use tracing::*; use utils::completion; +static CONCURRENT_BACKGROUND_TASKS: once_cell::sync::Lazy = + once_cell::sync::Lazy::new(|| { + let total_threads = *task_mgr::BACKGROUND_RUNTIME_WORKER_THREADS; + let permits = usize::max( + 1, + // while a lot of the work is done on spawn_blocking, we still do + // repartitioning in the async context. this should give leave us some workers + // unblocked to be blocked on other work, hopefully easing any outside visible + // effects of restarts. + // + // 6/8 is a guess; previously we ran with unlimited 8 and more from + // spawn_blocking. + (total_threads * 3).checked_div(4).unwrap_or(0), + ); + assert_ne!(permits, 0, "we will not be adding in permits later"); + assert!( + permits < total_threads, + "need threads avail for shorter work" + ); + tokio::sync::Semaphore::new(permits) + }); + +pub(crate) async fn concurrent_background_tasks_rate_limit( + _ctx: &RequestContext, + cancel: &CancellationToken, +) -> ControlFlow<(), impl Drop> { + // TODO: use request context TaskKind to get statistics on how many tasks of what kind are waiting for background task semaphore + tokio::select! { + permit = CONCURRENT_BACKGROUND_TASKS.acquire() => { + ControlFlow::Continue(permit) + }, + _ = cancel.cancelled() => { + ControlFlow::Break(()) + } + } +} + +macro_rules! background_task_wait_permit { + ($ctx:expr, $cancel_token:expr) => { + match concurrent_background_tasks_concurrency_limit(ctx, cancel).await { + ControlFlow::Break(()) => return Ok(()), + ControlFlow::Continue(permit) => permit, + } + }; +} + /// Start per tenant background loops: compaction and gc. pub fn start_background_loops( tenant: &Arc, diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index def8e336c1..a34dcccafb 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -158,7 +158,7 @@ pub struct Timeline { /// The generation of the tenant that instantiated us: this is used for safety when writing remote objects. /// Never changes for the lifetime of this [`Timeline`] object. - /// + /// /// This duplicates the generation stored in LocationConf, but that structure is mutable: /// this copy enforces the invariant that generatio doesn't change during a Tenant's lifetime. generation: Generation, @@ -684,38 +684,9 @@ impl Timeline { ) -> anyhow::Result<()> { const ROUNDS: usize = 2; - // static CONCURRENT_COMPACTIONS: once_cell::sync::Lazy = - // once_cell::sync::Lazy::new(|| { - // let total_threads = *task_mgr::BACKGROUND_RUNTIME_WORKER_THREADS; - // let permits = usize::max( - // 1, - // // while a lot of the work is done on spawn_blocking, we still do - // // repartitioning in the async context. this should give leave us some workers - // // unblocked to be blocked on other work, hopefully easing any outside visible - // // effects of restarts. - // // - // // 6/8 is a guess; previously we ran with unlimited 8 and more from - // // spawn_blocking. - // (total_threads * 3).checked_div(4).unwrap_or(0), - // ); - // assert_ne!(permits, 0, "we will not be adding in permits later"); - // assert!( - // permits < total_threads, - // "need threads avail for shorter work" - // ); - // tokio::sync::Semaphore::new(permits) - // }); - - // // this wait probably never needs any "long time spent" logging, because we already nag if - // // compaction task goes over it's period (20s) which is quite often in production. - // let _permit = tokio::select! { - // permit = CONCURRENT_COMPACTIONS.acquire() => { - // permit - // }, - // _ = cancel.cancelled() => { - // return Ok(()); - // } - // }; + // this wait probably never needs any "long time spent" logging, because we already nag if + // compaction task goes over it's period (20s) which is quite often in production. + let permit = crate::tenant::tasks::background_task_wait_permit!(ctx, cancel); let last_record_lsn = self.get_last_record_lsn(); diff --git a/pageserver/src/tenant/timeline/eviction_task.rs b/pageserver/src/tenant/timeline/eviction_task.rs index 39f0d03a01..99b29ca7dc 100644 --- a/pageserver/src/tenant/timeline/eviction_task.rs +++ b/pageserver/src/tenant/timeline/eviction_task.rs @@ -150,6 +150,8 @@ impl Timeline { ) -> ControlFlow<()> { let now = SystemTime::now(); + let permit = crate::tenant::tasks::background_task_wait_permit!(ctx, cancel); + // If we evict layers but keep cached values derived from those layers, then // we face a storm of on-demand downloads after pageserver restart. // The reason is that the restart empties the caches, and so, the values