From 6bc756129065cb3d2e1be92289fce749e2920891 Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Fri, 23 Jun 2023 20:43:20 +0200 Subject: [PATCH] don't use MGMT_REQUEST_RUNTIME for consumption metrics synthetic size worker The consumption metrics synthetic size worker does logical size calculation. Logical size calculation currently does synchronous disk IO. This blocks the MGMT_REQUEST_RUNTIME's executor threads, starving other futures. While there's work on the way to move the synchronous disk IO into spawn_blocking, the quickfix here is to use the BACKGROUND_RUNTIME instead of MGMT_REQUEST_RUNTIME. Actually it's not just a quickfix. We simply shouldn't be blocking MGMT_REQUEST_RUNTIME executor threads on CPU or sync disk IO. That work isn't done yet, as many of the mgmt tasks still _do_ disk IO. But it's not as intensive as the logical size calculations that we're fixing here. While we're at it, fix disk-usage-based eviction in a similar way. It wasn't the culprit here, according to prod logs, but it can theoretically be a little CPU-intensive. More context, including graphs from Prod: https://neondb.slack.com/archives/C03F5SM1N02/p1687541681336949 (cherry picked from commit d6e35222ea592428b78401ff0053b51424674e03) --- pageserver/src/bin/pageserver.rs | 82 ++++++++++++++++---------------- pageserver/src/http/routes.rs | 4 +- 2 files changed, 42 insertions(+), 44 deletions(-) diff --git a/pageserver/src/bin/pageserver.rs b/pageserver/src/bin/pageserver.rs index 1fa5e4ab3b..b01ace63e4 100644 --- a/pageserver/src/bin/pageserver.rs +++ b/pageserver/src/bin/pageserver.rs @@ -495,50 +495,50 @@ fn start_pageserver( Ok(()) }, ); + } - if let Some(metric_collection_endpoint) = &conf.metric_collection_endpoint { - let background_jobs_barrier = background_jobs_barrier; - let metrics_ctx = RequestContext::todo_child( - TaskKind::MetricsCollection, - // This task itself shouldn't download anything. - // The actual size calculation does need downloads, and - // creates a child context with the right DownloadBehavior. - DownloadBehavior::Error, - ); - task_mgr::spawn( - MGMT_REQUEST_RUNTIME.handle(), - TaskKind::MetricsCollection, - None, - None, - "consumption metrics collection", - true, - async move { - // first wait until background jobs are cleared to launch. - // - // this is because we only process active tenants and timelines, and the - // Timeline::get_current_logical_size will spawn the logical size calculation, - // which will not be rate-limited. - let cancel = task_mgr::shutdown_token(); + if let Some(metric_collection_endpoint) = &conf.metric_collection_endpoint { + let background_jobs_barrier = background_jobs_barrier; + let metrics_ctx = RequestContext::todo_child( + TaskKind::MetricsCollection, + // This task itself shouldn't download anything. + // The actual size calculation does need downloads, and + // creates a child context with the right DownloadBehavior. + DownloadBehavior::Error, + ); + task_mgr::spawn( + crate::BACKGROUND_RUNTIME.handle(), + TaskKind::MetricsCollection, + None, + None, + "consumption metrics collection", + true, + async move { + // first wait until background jobs are cleared to launch. + // + // this is because we only process active tenants and timelines, and the + // Timeline::get_current_logical_size will spawn the logical size calculation, + // which will not be rate-limited. + let cancel = task_mgr::shutdown_token(); - tokio::select! { - _ = cancel.cancelled() => { return Ok(()); }, - _ = background_jobs_barrier.wait() => {} - }; + tokio::select! { + _ = cancel.cancelled() => { return Ok(()); }, + _ = background_jobs_barrier.wait() => {} + }; - pageserver::consumption_metrics::collect_metrics( - metric_collection_endpoint, - conf.metric_collection_interval, - conf.cached_metric_collection_interval, - conf.synthetic_size_calculation_interval, - conf.id, - metrics_ctx, - ) - .instrument(info_span!("metrics_collection")) - .await?; - Ok(()) - }, - ); - } + pageserver::consumption_metrics::collect_metrics( + metric_collection_endpoint, + conf.metric_collection_interval, + conf.cached_metric_collection_interval, + conf.synthetic_size_calculation_interval, + conf.id, + metrics_ctx, + ) + .instrument(info_span!("metrics_collection")) + .await?; + Ok(()) + }, + ); } // Spawn a task to listen for libpq connections. It will spawn further tasks diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index fc8da70cc0..0a55741f84 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -1128,8 +1128,6 @@ async fn disk_usage_eviction_run( freed_bytes: 0, }; - use crate::task_mgr::MGMT_REQUEST_RUNTIME; - let (tx, rx) = tokio::sync::oneshot::channel(); let state = get_state(&r); @@ -1147,7 +1145,7 @@ async fn disk_usage_eviction_run( let _g = cancel.drop_guard(); crate::task_mgr::spawn( - MGMT_REQUEST_RUNTIME.handle(), + crate::task_mgr::BACKGROUND_RUNTIME.handle(), TaskKind::DiskUsageEviction, None, None,