From 55106aa98164e0ff0f3107805f60f367fa84360f Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Mon, 9 Oct 2023 19:47:17 +0000 Subject: [PATCH] Revert "WIP limit eviction task concurrency" This reverts commit 64680b13739d75d7a78416ff6b8ebec2c4c73fef. --- pageserver/src/tenant/tasks.rs | 46 ------------------- pageserver/src/tenant/timeline.rs | 37 +++++++++++++-- .../src/tenant/timeline/eviction_task.rs | 2 - 3 files changed, 33 insertions(+), 52 deletions(-) diff --git a/pageserver/src/tenant/tasks.rs b/pageserver/src/tenant/tasks.rs index 6f432cefbc..df3ffd08d3 100644 --- a/pageserver/src/tenant/tasks.rs +++ b/pageserver/src/tenant/tasks.rs @@ -14,52 +14,6 @@ use tokio_util::sync::CancellationToken; use tracing::*; use utils::completion; -static CONCURRENT_BACKGROUND_TASKS: once_cell::sync::Lazy = - once_cell::sync::Lazy::new(|| { - let total_threads = *task_mgr::BACKGROUND_RUNTIME_WORKER_THREADS; - let permits = usize::max( - 1, - // while a lot of the work is done on spawn_blocking, we still do - // repartitioning in the async context. this should give leave us some workers - // unblocked to be blocked on other work, hopefully easing any outside visible - // effects of restarts. - // - // 6/8 is a guess; previously we ran with unlimited 8 and more from - // spawn_blocking. - (total_threads * 3).checked_div(4).unwrap_or(0), - ); - assert_ne!(permits, 0, "we will not be adding in permits later"); - assert!( - permits < total_threads, - "need threads avail for shorter work" - ); - tokio::sync::Semaphore::new(permits) - }); - -pub(crate) async fn concurrent_background_tasks_rate_limit( - _ctx: &RequestContext, - cancel: &CancellationToken, -) -> ControlFlow<(), impl Drop> { - // TODO: use request context TaskKind to get statistics on how many tasks of what kind are waiting for background task semaphore - tokio::select! { - permit = CONCURRENT_BACKGROUND_TASKS.acquire() => { - ControlFlow::Continue(permit) - }, - _ = cancel.cancelled() => { - ControlFlow::Break(()) - } - } -} - -macro_rules! background_task_wait_permit { - ($ctx:expr, $cancel_token:expr) => { - match concurrent_background_tasks_concurrency_limit(ctx, cancel).await { - ControlFlow::Break(()) => return Ok(()), - ControlFlow::Continue(permit) => permit, - } - }; -} - /// Start per tenant background loops: compaction and gc. pub fn start_background_loops( tenant: &Arc, diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index a34dcccafb..def8e336c1 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -158,7 +158,7 @@ pub struct Timeline { /// The generation of the tenant that instantiated us: this is used for safety when writing remote objects. /// Never changes for the lifetime of this [`Timeline`] object. - /// + /// /// This duplicates the generation stored in LocationConf, but that structure is mutable: /// this copy enforces the invariant that generatio doesn't change during a Tenant's lifetime. generation: Generation, @@ -684,9 +684,38 @@ impl Timeline { ) -> anyhow::Result<()> { const ROUNDS: usize = 2; - // this wait probably never needs any "long time spent" logging, because we already nag if - // compaction task goes over it's period (20s) which is quite often in production. - let permit = crate::tenant::tasks::background_task_wait_permit!(ctx, cancel); + // static CONCURRENT_COMPACTIONS: once_cell::sync::Lazy = + // once_cell::sync::Lazy::new(|| { + // let total_threads = *task_mgr::BACKGROUND_RUNTIME_WORKER_THREADS; + // let permits = usize::max( + // 1, + // // while a lot of the work is done on spawn_blocking, we still do + // // repartitioning in the async context. this should give leave us some workers + // // unblocked to be blocked on other work, hopefully easing any outside visible + // // effects of restarts. + // // + // // 6/8 is a guess; previously we ran with unlimited 8 and more from + // // spawn_blocking. + // (total_threads * 3).checked_div(4).unwrap_or(0), + // ); + // assert_ne!(permits, 0, "we will not be adding in permits later"); + // assert!( + // permits < total_threads, + // "need threads avail for shorter work" + // ); + // tokio::sync::Semaphore::new(permits) + // }); + + // // this wait probably never needs any "long time spent" logging, because we already nag if + // // compaction task goes over it's period (20s) which is quite often in production. + // let _permit = tokio::select! { + // permit = CONCURRENT_COMPACTIONS.acquire() => { + // permit + // }, + // _ = cancel.cancelled() => { + // return Ok(()); + // } + // }; let last_record_lsn = self.get_last_record_lsn(); diff --git a/pageserver/src/tenant/timeline/eviction_task.rs b/pageserver/src/tenant/timeline/eviction_task.rs index 99b29ca7dc..39f0d03a01 100644 --- a/pageserver/src/tenant/timeline/eviction_task.rs +++ b/pageserver/src/tenant/timeline/eviction_task.rs @@ -150,8 +150,6 @@ impl Timeline { ) -> ControlFlow<()> { let now = SystemTime::now(); - let permit = crate::tenant::tasks::background_task_wait_permit!(ctx, cancel); - // If we evict layers but keep cached values derived from those layers, then // we face a storm of on-demand downloads after pageserver restart. // The reason is that the restart empties the caches, and so, the values