From 6bc756129065cb3d2e1be92289fce749e2920891 Mon Sep 17 00:00:00 2001
From: Christian Schwarz <me@cschwarz.com>
Date: Fri, 23 Jun 2023 20:43:20 +0200
Subject: [PATCH] don't use MGMT_REQUEST_RUNTIME for consumption metrics
 synthetic size worker

The consumption metrics synthetic size worker does logical size calculation.
Logical size calculation currently does synchronous disk IO.
This blocks the MGMT_REQUEST_RUNTIME's executor threads, starving other futures.

While there's work on the way to move the synchronous disk IO into spawn_blocking,
the quickfix here is to use the BACKGROUND_RUNTIME instead of MGMT_REQUEST_RUNTIME.

Actually it's not just a quickfix. We simply shouldn't be blocking MGMT_REQUEST_RUNTIME
executor threads on CPU or sync disk IO.
That work isn't done yet, as many of the mgmt tasks still _do_ disk IO.
But it's not as intensive as the logical size calculations that we're fixing here.

While we're at it, fix disk-usage-based eviction in a similar way.
It wasn't the culprit here, according to prod logs, but it can theoretically be
a little CPU-intensive.

More context, including graphs from Prod:
https://neondb.slack.com/archives/C03F5SM1N02/p1687541681336949

(cherry picked from commit d6e35222ea592428b78401ff0053b51424674e03)
---
 pageserver/src/bin/pageserver.rs | 82 ++++++++++++++++----------------
 pageserver/src/http/routes.rs    |  4 +-
 2 files changed, 42 insertions(+), 44 deletions(-)

diff --git a/pageserver/src/bin/pageserver.rs b/pageserver/src/bin/pageserver.rs
index 1fa5e4ab3b..b01ace63e4 100644
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -495,50 +495,50 @@ fn start_pageserver(
                 Ok(())
             },
         );
+    }
 
-        if let Some(metric_collection_endpoint) = &conf.metric_collection_endpoint {
-            let background_jobs_barrier = background_jobs_barrier;
-            let metrics_ctx = RequestContext::todo_child(
-                TaskKind::MetricsCollection,
-                // This task itself shouldn't download anything.
-                // The actual size calculation does need downloads, and
-                // creates a child context with the right DownloadBehavior.
-                DownloadBehavior::Error,
-            );
-            task_mgr::spawn(
-                MGMT_REQUEST_RUNTIME.handle(),
-                TaskKind::MetricsCollection,
-                None,
-                None,
-                "consumption metrics collection",
-                true,
-                async move {
-                    // first wait until background jobs are cleared to launch.
-                    //
-                    // this is because we only process active tenants and timelines, and the
-                    // Timeline::get_current_logical_size will spawn the logical size calculation,
-                    // which will not be rate-limited.
-                    let cancel = task_mgr::shutdown_token();
+    if let Some(metric_collection_endpoint) = &conf.metric_collection_endpoint {
+        let background_jobs_barrier = background_jobs_barrier;
+        let metrics_ctx = RequestContext::todo_child(
+            TaskKind::MetricsCollection,
+            // This task itself shouldn't download anything.
+            // The actual size calculation does need downloads, and
+            // creates a child context with the right DownloadBehavior.
+            DownloadBehavior::Error,
+        );
+        task_mgr::spawn(
+            crate::BACKGROUND_RUNTIME.handle(),
+            TaskKind::MetricsCollection,
+            None,
+            None,
+            "consumption metrics collection",
+            true,
+            async move {
+                // first wait until background jobs are cleared to launch.
+                //
+                // this is because we only process active tenants and timelines, and the
+                // Timeline::get_current_logical_size will spawn the logical size calculation,
+                // which will not be rate-limited.
+                let cancel = task_mgr::shutdown_token();
 
-                    tokio::select! {
-                        _ = cancel.cancelled() => { return Ok(()); },
-                        _ = background_jobs_barrier.wait() => {}
-                    };
+                tokio::select! {
+                    _ = cancel.cancelled() => { return Ok(()); },
+                    _ = background_jobs_barrier.wait() => {}
+                };
 
-                    pageserver::consumption_metrics::collect_metrics(
-                        metric_collection_endpoint,
-                        conf.metric_collection_interval,
-                        conf.cached_metric_collection_interval,
-                        conf.synthetic_size_calculation_interval,
-                        conf.id,
-                        metrics_ctx,
-                    )
-                    .instrument(info_span!("metrics_collection"))
-                    .await?;
-                    Ok(())
-                },
-            );
-        }
+                pageserver::consumption_metrics::collect_metrics(
+                    metric_collection_endpoint,
+                    conf.metric_collection_interval,
+                    conf.cached_metric_collection_interval,
+                    conf.synthetic_size_calculation_interval,
+                    conf.id,
+                    metrics_ctx,
+                )
+                .instrument(info_span!("metrics_collection"))
+                .await?;
+                Ok(())
+            },
+        );
     }
 
     // Spawn a task to listen for libpq connections. It will spawn further tasks
diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs
index fc8da70cc0..0a55741f84 100644
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -1128,8 +1128,6 @@ async fn disk_usage_eviction_run(
         freed_bytes: 0,
     };
 
-    use crate::task_mgr::MGMT_REQUEST_RUNTIME;
-
     let (tx, rx) = tokio::sync::oneshot::channel();
 
     let state = get_state(&r);
@@ -1147,7 +1145,7 @@ async fn disk_usage_eviction_run(
     let _g = cancel.drop_guard();
 
     crate::task_mgr::spawn(
-        MGMT_REQUEST_RUNTIME.handle(),
+        crate::task_mgr::BACKGROUND_RUNTIME.handle(),
         TaskKind::DiskUsageEviction,
         None,
         None,