From a2902e774aaebbb3e424ad23be30a86e413ab431 Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Tue, 4 Mar 2025 13:13:41 +0100 Subject: [PATCH] http-utils: generate heap profiles with jemalloc_pprof (#11075) ## Problem The code to generate symbolized pprof heap profiles and flamegraph SVGs has been upstreamed to the `jemalloc_pprof` crate: * https://github.com/polarsignals/rust-jemalloc-pprof/pull/22 * https://github.com/polarsignals/rust-jemalloc-pprof/pull/23 ## Summary of changes Use `jemalloc_pprof` to generate symbolized pprof heap profiles and flamegraph SVGs. This reintroduces a bunch of internal jemalloc stack frames that we'd previously strip, e.g. each stack now always ends with `prof_backtrace_impl` (where jemalloc takes a stack trace for heap profiling), but that seems ok. --- Cargo.lock | 18 ++- Cargo.toml | 4 +- libs/http-utils/Cargo.toml | 3 - libs/http-utils/src/endpoint.rs | 58 ++------ libs/http-utils/src/lib.rs | 1 - libs/http-utils/src/pprof.rs | 238 -------------------------------- libs/utils/Cargo.toml | 1 - 7 files changed, 21 insertions(+), 302 deletions(-) delete mode 100644 libs/http-utils/src/pprof.rs diff --git a/Cargo.lock b/Cargo.lock index a978e4d744..030753bca5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2798,12 +2798,9 @@ name = "http-utils" version = "0.1.0" dependencies = [ "anyhow", - "backtrace", "bytes", "fail", - "flate2", "hyper 0.14.30", - "inferno 0.12.0", "itertools 0.10.5", "jemalloc_pprof", "metrics", @@ -3302,9 +3299,9 @@ checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" [[package]] name = "jemalloc_pprof" -version = "0.6.0" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a883828bd6a4b957cd9f618886ff19e5f3ebd34e06ba0e855849e049fef32fb" +checksum = "5622af6d21ff86ed7797ef98e11b8f302da25ec69a7db9f6cde8e2e1c8df9992" dependencies = [ "anyhow", "libc", @@ -3503,9 +3500,9 @@ dependencies = [ [[package]] name = "mappings" -version = "0.6.0" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce9229c438fbf1c333926e2053c4c091feabbd40a1b590ec62710fea2384af9e" +checksum = "e434981a332777c2b3062652d16a55f8e74fa78e6b1882633f0d77399c84fc2a" dependencies = [ "anyhow", "libc", @@ -4794,12 +4791,14 @@ dependencies = [ [[package]] name = "pprof_util" -version = "0.6.0" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65c568b3f8c1c37886ae07459b1946249e725c315306b03be5632f84c239f781" +checksum = "9fa015c78eed2130951e22c58d2095849391e73817ab2e74f71b0b9f63dd8416" dependencies = [ "anyhow", + "backtrace", "flate2", + "inferno 0.12.0", "num", "paste", "prost", @@ -7715,7 +7714,6 @@ dependencies = [ "anyhow", "arc-swap", "async-compression", - "backtrace", "bincode", "byteorder", "bytes", diff --git a/Cargo.toml b/Cargo.toml index 870b3412db..2303723e43 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -53,7 +53,6 @@ anyhow = { version = "1.0", features = ["backtrace"] } arc-swap = "1.6" async-compression = { version = "0.4.0", features = ["tokio", "gzip", "zstd"] } atomic-take = "1.1.0" -backtrace = "0.3.74" flate2 = "1.0.26" assert-json-diff = "2" async-stream = "0.3" @@ -114,11 +113,10 @@ hyper-util = "0.1" tokio-tungstenite = "0.21.0" indexmap = "2" indoc = "2" -inferno = "0.12.0" ipnet = "2.10.0" itertools = "0.10" itoa = "1.0.11" -jemalloc_pprof = "0.6" +jemalloc_pprof = { version = "0.7", features = ["symbolize", "flamegraph"] } jsonwebtoken = "9" lasso = "0.7" libc = "0.2" diff --git a/libs/http-utils/Cargo.toml b/libs/http-utils/Cargo.toml index d72e4bd012..d16dac7876 100644 --- a/libs/http-utils/Cargo.toml +++ b/libs/http-utils/Cargo.toml @@ -6,11 +6,8 @@ license.workspace = true [dependencies] anyhow.workspace = true -backtrace.workspace = true bytes.workspace = true -inferno.workspace = true fail.workspace = true -flate2.workspace = true hyper0.workspace = true itertools.workspace = true jemalloc_pprof.workspace = true diff --git a/libs/http-utils/src/endpoint.rs b/libs/http-utils/src/endpoint.rs index 6128113580..f4f93df62f 100644 --- a/libs/http-utils/src/endpoint.rs +++ b/libs/http-utils/src/endpoint.rs @@ -3,8 +3,6 @@ use std::io::Write as _; use std::str::FromStr; use std::time::Duration; -use ::pprof::ProfilerGuardBuilder; -use ::pprof::protos::Message as _; use anyhow::{Context, anyhow}; use bytes::{Bytes, BytesMut}; use hyper::header::{AUTHORIZATION, CONTENT_DISPOSITION, CONTENT_TYPE, HeaderName}; @@ -12,7 +10,8 @@ use hyper::http::HeaderValue; use hyper::{Body, Method, Request, Response}; use metrics::{Encoder, IntCounter, TextEncoder, register_int_counter}; use once_cell::sync::Lazy; -use regex::Regex; +use pprof::ProfilerGuardBuilder; +use pprof::protos::Message as _; use routerify::ext::RequestExt; use routerify::{Middleware, RequestInfo, Router, RouterBuilder}; use tokio::sync::{Mutex, Notify, mpsc}; @@ -22,7 +21,6 @@ use tracing::{Instrument, debug, info, info_span, warn}; use utils::auth::{AuthError, Claims, SwappableJwtAuth}; use crate::error::{ApiError, api_error_handler, route_error_handler}; -use crate::pprof; use crate::request::{get_query_param, parse_query_param}; static SERVE_METRICS_COUNT: Lazy = Lazy::new(|| { @@ -449,20 +447,6 @@ pub async fn profile_heap_handler(req: Request) -> Result, Some(format) => return Err(ApiError::BadRequest(anyhow!("invalid format {format}"))), }; - // Functions and mappings to strip when symbolizing pprof profiles. If true, - // also remove child frames. - static STRIP_FUNCTIONS: Lazy> = Lazy::new(|| { - vec![ - (Regex::new("^__rust").unwrap(), false), - (Regex::new("^_start$").unwrap(), false), - (Regex::new("^irallocx_prof").unwrap(), true), - (Regex::new("^prof_alloc_prep").unwrap(), true), - (Regex::new("^std::rt::lang_start").unwrap(), false), - (Regex::new("^std::sys::backtrace::__rust").unwrap(), false), - ] - }); - const STRIP_MAPPINGS: &[&str] = &["libc", "libgcc", "pthread", "vdso"]; - // Obtain profiler handle. let mut prof_ctl = jemalloc_pprof::PROF_CTL .as_ref() @@ -495,45 +479,27 @@ pub async fn profile_heap_handler(req: Request) -> Result, } Format::Pprof => { - let data = tokio::task::spawn_blocking(move || { - let bytes = prof_ctl.dump_pprof()?; - // Symbolize the profile. - // TODO: consider moving this upstream to jemalloc_pprof and avoiding the - // serialization roundtrip. - let profile = pprof::decode(&bytes)?; - let profile = pprof::symbolize(profile)?; - let profile = pprof::strip_locations(profile, STRIP_MAPPINGS, &STRIP_FUNCTIONS); - pprof::encode(&profile) - }) - .await - .map_err(|join_err| ApiError::InternalServerError(join_err.into()))? - .map_err(ApiError::InternalServerError)?; + let data = tokio::task::spawn_blocking(move || prof_ctl.dump_pprof()) + .await + .map_err(|join_err| ApiError::InternalServerError(join_err.into()))? + .map_err(ApiError::InternalServerError)?; Response::builder() .status(200) .header(CONTENT_TYPE, "application/octet-stream") - .header(CONTENT_DISPOSITION, "attachment; filename=\"heap.pb\"") + .header(CONTENT_DISPOSITION, "attachment; filename=\"heap.pb.gz\"") .body(Body::from(data)) .map_err(|err| ApiError::InternalServerError(err.into())) } Format::Svg => { - let body = tokio::task::spawn_blocking(move || { - let bytes = prof_ctl.dump_pprof()?; - let profile = pprof::decode(&bytes)?; - let profile = pprof::symbolize(profile)?; - let profile = pprof::strip_locations(profile, STRIP_MAPPINGS, &STRIP_FUNCTIONS); - let mut opts = inferno::flamegraph::Options::default(); - opts.title = "Heap inuse".to_string(); - opts.count_name = "bytes".to_string(); - pprof::flamegraph(profile, &mut opts) - }) - .await - .map_err(|join_err| ApiError::InternalServerError(join_err.into()))? - .map_err(ApiError::InternalServerError)?; + let svg = tokio::task::spawn_blocking(move || prof_ctl.dump_flamegraph()) + .await + .map_err(|join_err| ApiError::InternalServerError(join_err.into()))? + .map_err(ApiError::InternalServerError)?; Response::builder() .status(200) .header(CONTENT_TYPE, "image/svg+xml") - .body(Body::from(body)) + .body(Body::from(svg)) .map_err(|err| ApiError::InternalServerError(err.into())) } } diff --git a/libs/http-utils/src/lib.rs b/libs/http-utils/src/lib.rs index c692a54257..1e9b3c761a 100644 --- a/libs/http-utils/src/lib.rs +++ b/libs/http-utils/src/lib.rs @@ -2,7 +2,6 @@ pub mod endpoint; pub mod error; pub mod failpoints; pub mod json; -pub mod pprof; pub mod request; extern crate hyper0 as hyper; diff --git a/libs/http-utils/src/pprof.rs b/libs/http-utils/src/pprof.rs deleted file mode 100644 index 529017f350..0000000000 --- a/libs/http-utils/src/pprof.rs +++ /dev/null @@ -1,238 +0,0 @@ -use std::borrow::Cow; -use std::collections::{HashMap, HashSet}; -use std::ffi::c_void; -use std::io::Write as _; - -use anyhow::bail; -use flate2::Compression; -use flate2::write::{GzDecoder, GzEncoder}; -use itertools::Itertools as _; -use pprof::protos::{Function, Line, Location, Message as _, Profile}; -use regex::Regex; - -/// Decodes a gzip-compressed Protobuf-encoded pprof profile. -pub fn decode(bytes: &[u8]) -> anyhow::Result { - let mut gz = GzDecoder::new(Vec::new()); - gz.write_all(bytes)?; - Ok(Profile::parse_from_bytes(&gz.finish()?)?) -} - -/// Encodes a pprof profile as gzip-compressed Protobuf. -pub fn encode(profile: &Profile) -> anyhow::Result> { - let mut gz = GzEncoder::new(Vec::new(), Compression::default()); - profile.write_to_writer(&mut gz)?; - Ok(gz.finish()?) -} - -/// Symbolizes a pprof profile using the current binary. -pub fn symbolize(mut profile: Profile) -> anyhow::Result { - if !profile.function.is_empty() { - return Ok(profile); // already symbolized - } - - // Collect function names. - let mut functions: HashMap = HashMap::new(); - let mut strings: HashMap = profile - .string_table - .into_iter() - .enumerate() - .map(|(i, s)| (s, i as i64)) - .collect(); - - // Helper to look up or register a string. - let mut string_id = |s: &str| -> i64 { - // Don't use .entry() to avoid unnecessary allocations. - if let Some(id) = strings.get(s) { - return *id; - } - let id = strings.len() as i64; - strings.insert(s.to_string(), id); - id - }; - - for loc in &mut profile.location { - if !loc.line.is_empty() { - continue; - } - - // Resolve the line and function for each location. - backtrace::resolve(loc.address as *mut c_void, |symbol| { - let Some(symbol_name) = symbol.name() else { - return; - }; - - let function_name = format!("{symbol_name:#}"); - let functions_len = functions.len(); - let function_id = functions - .entry(function_name) - .or_insert_with_key(|function_name| { - let function_id = functions_len as u64 + 1; - let system_name = String::from_utf8_lossy(symbol_name.as_bytes()); - let filename = symbol - .filename() - .map(|path| path.to_string_lossy()) - .unwrap_or(Cow::Borrowed("")); - Function { - id: function_id, - name: string_id(function_name), - system_name: string_id(&system_name), - filename: string_id(&filename), - ..Default::default() - } - }) - .id; - loc.line.push(Line { - function_id, - line: symbol.lineno().unwrap_or(0) as i64, - ..Default::default() - }); - }); - } - - // Store the resolved functions, and mark the mapping as resolved. - profile.function = functions.into_values().sorted_by_key(|f| f.id).collect(); - profile.string_table = strings - .into_iter() - .sorted_by_key(|(_, i)| *i) - .map(|(s, _)| s) - .collect(); - - for mapping in &mut profile.mapping { - mapping.has_functions = true; - mapping.has_filenames = true; - } - - Ok(profile) -} - -/// Strips locations (stack frames) matching the given mappings (substring) or function names -/// (regex). The function bool specifies whether child frames should be stripped as well. -/// -/// The string definitions are left behind in the profile for simplicity, to avoid rewriting all -/// string references. -pub fn strip_locations( - mut profile: Profile, - mappings: &[&str], - functions: &[(Regex, bool)], -) -> Profile { - // Strip mappings. - let mut strip_mappings: HashSet = HashSet::new(); - - profile.mapping.retain(|mapping| { - let Some(name) = profile.string_table.get(mapping.filename as usize) else { - return true; - }; - if mappings.iter().any(|substr| name.contains(substr)) { - strip_mappings.insert(mapping.id); - return false; - } - true - }); - - // Strip functions. - let mut strip_functions: HashMap = HashMap::new(); - - profile.function.retain(|function| { - let Some(name) = profile.string_table.get(function.name as usize) else { - return true; - }; - for (regex, strip_children) in functions { - if regex.is_match(name) { - strip_functions.insert(function.id, *strip_children); - return false; - } - } - true - }); - - // Strip locations. The bool specifies whether child frames should be stripped too. - let mut strip_locations: HashMap = HashMap::new(); - - profile.location.retain(|location| { - for line in &location.line { - if let Some(strip_children) = strip_functions.get(&line.function_id) { - strip_locations.insert(location.id, *strip_children); - return false; - } - } - if strip_mappings.contains(&location.mapping_id) { - strip_locations.insert(location.id, false); - return false; - } - true - }); - - // Strip sample locations. - for sample in &mut profile.sample { - // First, find the uppermost function with child removal and truncate the stack. - if let Some(truncate) = sample - .location_id - .iter() - .rposition(|id| strip_locations.get(id) == Some(&true)) - { - sample.location_id.drain(..=truncate); - } - // Next, strip any individual frames without child removal. - sample - .location_id - .retain(|id| !strip_locations.contains_key(id)); - } - - profile -} - -/// Generates an SVG flamegraph from a symbolized pprof profile. -pub fn flamegraph( - profile: Profile, - opts: &mut inferno::flamegraph::Options, -) -> anyhow::Result> { - if profile.mapping.iter().any(|m| !m.has_functions) { - bail!("profile not symbolized"); - } - - // Index locations, functions, and strings. - let locations: HashMap = - profile.location.into_iter().map(|l| (l.id, l)).collect(); - let functions: HashMap = - profile.function.into_iter().map(|f| (f.id, f)).collect(); - let strings = profile.string_table; - - // Resolve stacks as function names, and sum sample values per stack. Also reverse the stack, - // since inferno expects it bottom-up. - let mut stacks: HashMap, i64> = HashMap::new(); - for sample in profile.sample { - let mut stack = Vec::with_capacity(sample.location_id.len()); - for location in sample.location_id.into_iter().rev() { - let Some(location) = locations.get(&location) else { - bail!("missing location {location}"); - }; - for line in location.line.iter().rev() { - let Some(function) = functions.get(&line.function_id) else { - bail!("missing function {}", line.function_id); - }; - let Some(name) = strings.get(function.name as usize) else { - bail!("missing string {}", function.name); - }; - stack.push(name.as_str()); - } - } - let Some(&value) = sample.value.first() else { - bail!("missing value"); - }; - *stacks.entry(stack).or_default() += value; - } - - // Construct stack lines for inferno. - let lines = stacks - .into_iter() - .map(|(stack, value)| (stack.into_iter().join(";"), value)) - .map(|(stack, value)| format!("{stack} {value}")) - .sorted() - .collect_vec(); - - // Construct the flamegraph. - let mut bytes = Vec::new(); - let lines = lines.iter().map(|line| line.as_str()); - inferno::flamegraph::from_lines(opts, lines, &mut bytes)?; - Ok(bytes) -} diff --git a/libs/utils/Cargo.toml b/libs/utils/Cargo.toml index 5020d82adf..ac44300a51 100644 --- a/libs/utils/Cargo.toml +++ b/libs/utils/Cargo.toml @@ -15,7 +15,6 @@ arc-swap.workspace = true sentry.workspace = true async-compression.workspace = true anyhow.workspace = true -backtrace.workspace = true bincode.workspace = true bytes.workspace = true camino.workspace = true