From 55106aa98164e0ff0f3107805f60f367fa84360f Mon Sep 17 00:00:00 2001
From: Christian Schwarz <me@cschwarz.com>
Date: Mon, 9 Oct 2023 19:47:17 +0000
Subject: [PATCH] Revert "WIP limit eviction task concurrency"

This reverts commit 64680b13739d75d7a78416ff6b8ebec2c4c73fef.
---
 pageserver/src/tenant/tasks.rs                | 46 -------------------
 pageserver/src/tenant/timeline.rs             | 37 +++++++++++++--
 .../src/tenant/timeline/eviction_task.rs      |  2 -
 3 files changed, 33 insertions(+), 52 deletions(-)
diff --git a/pageserver/src/tenant/tasks.rs b/pageserver/src/tenant/tasks.rs
index 6f432cefbc..df3ffd08d3 100644
--- a/pageserver/src/tenant/tasks.rs
+++ b/pageserver/src/tenant/tasks.rs
@@ -14,52 +14,6 @@ use tokio_util::sync::CancellationToken;
 use tracing::*;
 use utils::completion;
 
-static CONCURRENT_BACKGROUND_TASKS: once_cell::sync::Lazy<tokio::sync::Semaphore> =
-    once_cell::sync::Lazy::new(|| {
-        let total_threads = *task_mgr::BACKGROUND_RUNTIME_WORKER_THREADS;
-        let permits = usize::max(
-            1,
-            // while a lot of the work is done on spawn_blocking, we still do
-            // repartitioning in the async context. this should give leave us some workers
-            // unblocked to be blocked on other work, hopefully easing any outside visible
-            // effects of restarts.
-            //
-            // 6/8 is a guess; previously we ran with unlimited 8 and more from
-            // spawn_blocking.
-            (total_threads * 3).checked_div(4).unwrap_or(0),
-        );
-        assert_ne!(permits, 0, "we will not be adding in permits later");
-        assert!(
-            permits < total_threads,
-            "need threads avail for shorter work"
-        );
-        tokio::sync::Semaphore::new(permits)
-    });
-
-pub(crate) async fn concurrent_background_tasks_rate_limit(
-    _ctx: &RequestContext,
-    cancel: &CancellationToken,
-) -> ControlFlow<(), impl Drop> {
-    // TODO: use request context TaskKind to get statistics on how many tasks of what kind are waiting for background task semaphore
-    tokio::select! {
-        permit = CONCURRENT_BACKGROUND_TASKS.acquire() => {
-            ControlFlow::Continue(permit)
-        },
-        _ = cancel.cancelled() => {
-            ControlFlow::Break(())
-        }
-    }
-}
-
-macro_rules! background_task_wait_permit {
-    ($ctx:expr, $cancel_token:expr) => {
-        match concurrent_background_tasks_concurrency_limit(ctx, cancel).await {
-            ControlFlow::Break(()) => return Ok(()),
-            ControlFlow::Continue(permit) => permit,
-        }
-    };
-}
-
 /// Start per tenant background loops: compaction and gc.
 pub fn start_background_loops(
     tenant: &Arc<Tenant>,
diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs
index a34dcccafb..def8e336c1 100644
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -158,7 +158,7 @@ pub struct Timeline {
 
     /// The generation of the tenant that instantiated us: this is used for safety when writing remote objects.
     /// Never changes for the lifetime of this [`Timeline`] object.
-    ///
+    ///  
     /// This duplicates the generation stored in LocationConf, but that structure is mutable:
     /// this copy enforces the invariant that generatio doesn't change during a Tenant's lifetime.
     generation: Generation,
@@ -684,9 +684,38 @@ impl Timeline {
     ) -> anyhow::Result<()> {
         const ROUNDS: usize = 2;
 
-        // this wait probably never needs any "long time spent" logging, because we already nag if
-        // compaction task goes over it's period (20s) which is quite often in production.
-        let permit = crate::tenant::tasks::background_task_wait_permit!(ctx, cancel);
+        // static CONCURRENT_COMPACTIONS: once_cell::sync::Lazy<tokio::sync::Semaphore> =
+        //     once_cell::sync::Lazy::new(|| {
+        //         let total_threads = *task_mgr::BACKGROUND_RUNTIME_WORKER_THREADS;
+        //         let permits = usize::max(
+        //             1,
+        //             // while a lot of the work is done on spawn_blocking, we still do
+        //             // repartitioning in the async context. this should give leave us some workers
+        //             // unblocked to be blocked on other work, hopefully easing any outside visible
+        //             // effects of restarts.
+        //             //
+        //             // 6/8 is a guess; previously we ran with unlimited 8 and more from
+        //             // spawn_blocking.
+        //             (total_threads * 3).checked_div(4).unwrap_or(0),
+        //         );
+        //         assert_ne!(permits, 0, "we will not be adding in permits later");
+        //         assert!(
+        //             permits < total_threads,
+        //             "need threads avail for shorter work"
+        //         );
+        //         tokio::sync::Semaphore::new(permits)
+        //     });
+
+        // // this wait probably never needs any "long time spent" logging, because we already nag if
+        // // compaction task goes over it's period (20s) which is quite often in production.
+        // let _permit = tokio::select! {
+        //     permit = CONCURRENT_COMPACTIONS.acquire() => {
+        //         permit
+        //     },
+        //     _ = cancel.cancelled() => {
+        //         return Ok(());
+        //     }
+        // };
 
         let last_record_lsn = self.get_last_record_lsn();
 
diff --git a/pageserver/src/tenant/timeline/eviction_task.rs b/pageserver/src/tenant/timeline/eviction_task.rs
index 99b29ca7dc..39f0d03a01 100644
--- a/pageserver/src/tenant/timeline/eviction_task.rs
+++ b/pageserver/src/tenant/timeline/eviction_task.rs
@@ -150,8 +150,6 @@ impl Timeline {
     ) -> ControlFlow<()> {
         let now = SystemTime::now();
 
-        let permit = crate::tenant::tasks::background_task_wait_permit!(ctx, cancel);
-
         // If we evict layers but keep cached values derived from those layers, then
         // we face a storm of on-demand downloads after pageserver restart.
         // The reason is that the restart empties the caches, and so, the values