From 37962e729e596bf6cb48bf8743f0a3b1372d600f Mon Sep 17 00:00:00 2001 From: Tristan Partin Date: Thu, 21 Nov 2024 14:19:02 -0600 Subject: [PATCH] Fix panic in compute_ctl metrics collection (#9831) Calling unwrap on the encoder is a little overzealous. One of the errors that can be returned by the encode function in particular is the non-existence of metrics for a metric family, so we should prematurely filter instances like that out. I believe that the cause of this panic was caused by a race condition between the prometheus collector and the compute collecting the installed extensions metric for the first time. The HTTP server is spawned on a separate thread before we even start bringing up Postgres. Signed-off-by: Tristan Partin --- compute_tools/src/http/api.rs | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/compute_tools/src/http/api.rs b/compute_tools/src/http/api.rs index 3677582c11..8a047634df 100644 --- a/compute_tools/src/http/api.rs +++ b/compute_tools/src/http/api.rs @@ -20,6 +20,7 @@ use anyhow::Result; use hyper::header::CONTENT_TYPE; use hyper::service::{make_service_fn, service_fn}; use hyper::{Body, Method, Request, Response, Server, StatusCode}; +use metrics::proto::MetricFamily; use metrics::Encoder; use metrics::TextEncoder; use tokio::task; @@ -72,10 +73,22 @@ async fn routes(req: Request, compute: &Arc) -> Response { debug!("serving /metrics GET request"); - let mut buffer = vec![]; - let metrics = installed_extensions::collect(); + // When we call TextEncoder::encode() below, it will immediately + // return an error if a metric family has no metrics, so we need to + // preemptively filter out metric families with no metrics. + let metrics = installed_extensions::collect() + .into_iter() + .filter(|m| !m.get_metric().is_empty()) + .collect::>(); + let encoder = TextEncoder::new(); - encoder.encode(&metrics, &mut buffer).unwrap(); + let mut buffer = vec![]; + + if let Err(err) = encoder.encode(&metrics, &mut buffer) { + let msg = format!("error handling /metrics request: {err}"); + error!(msg); + return render_json_error(&msg, StatusCode::INTERNAL_SERVER_ERROR); + } match Response::builder() .status(StatusCode::OK)