From fdf231c237c1929f1b11c893dc9666e4c89e1722 Mon Sep 17 00:00:00 2001 From: John Spray Date: Sun, 1 Dec 2024 18:09:58 +0000 Subject: [PATCH] storcon: don't take any Service locks in /status and /ready (#9944) ## Problem We saw unexpected container terminations when running in k8s with with small CPU resource requests. The /status and /ready handlers called `maybe_forward`, which always takes the lock on Service::inner. If there is a lot of writer lock contention, and the container is starved of CPU, this increases the likelihood that we will get killed by the kubelet. It isn't certain that this was a cause of issues, but it is a potential source that we can eliminate. ## Summary of changes - Revise logic to return immediately if the URL is in the non-forwarded list, rather than calling maybe_forward --- storage_controller/src/http.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/storage_controller/src/http.rs b/storage_controller/src/http.rs index 9b5d4caf31..39e078ba7c 100644 --- a/storage_controller/src/http.rs +++ b/storage_controller/src/http.rs @@ -1452,10 +1452,15 @@ async fn maybe_forward(req: Request) -> ForwardOutcome { let uri = req.uri().to_string(); let uri_for_forward = !NOT_FOR_FORWARD.contains(&uri.as_str()); + // Fast return before trying to take any Service locks, if we will never forward anyway + if !uri_for_forward { + return ForwardOutcome::NotForwarded(req); + } + let state = get_state(&req); let leadership_status = state.service.get_leadership_status(); - if leadership_status != LeadershipStatus::SteppedDown || !uri_for_forward { + if leadership_status != LeadershipStatus::SteppedDown { return ForwardOutcome::NotForwarded(req); }