http-utils: generate heap profiles with jemalloc_pprof (#11075)

## Problem

The code to generate symbolized pprof heap profiles and flamegraph SVGs
has been upstreamed to the `jemalloc_pprof` crate:

* https://github.com/polarsignals/rust-jemalloc-pprof/pull/22
* https://github.com/polarsignals/rust-jemalloc-pprof/pull/23

## Summary of changes

Use `jemalloc_pprof` to generate symbolized pprof heap profiles and
flamegraph SVGs.

This reintroduces a bunch of internal jemalloc stack frames that we'd
previously strip, e.g. each stack now always ends with
`prof_backtrace_impl` (where jemalloc takes a stack trace for heap
profiling), but that seems ok.
This commit is contained in:
Erik Grinaker
2025-03-04 13:13:41 +01:00
committed by GitHub
parent 435bf452e6
commit a2902e774a
7 changed files with 21 additions and 302 deletions

18
Cargo.lock generated
View File

@@ -2798,12 +2798,9 @@ name = "http-utils"
version = "0.1.0"
dependencies = [
"anyhow",
"backtrace",
"bytes",
"fail",
"flate2",
"hyper 0.14.30",
"inferno 0.12.0",
"itertools 0.10.5",
"jemalloc_pprof",
"metrics",
@@ -3302,9 +3299,9 @@ checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b"
[[package]]
name = "jemalloc_pprof"
version = "0.6.0"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a883828bd6a4b957cd9f618886ff19e5f3ebd34e06ba0e855849e049fef32fb"
checksum = "5622af6d21ff86ed7797ef98e11b8f302da25ec69a7db9f6cde8e2e1c8df9992"
dependencies = [
"anyhow",
"libc",
@@ -3503,9 +3500,9 @@ dependencies = [
[[package]]
name = "mappings"
version = "0.6.0"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ce9229c438fbf1c333926e2053c4c091feabbd40a1b590ec62710fea2384af9e"
checksum = "e434981a332777c2b3062652d16a55f8e74fa78e6b1882633f0d77399c84fc2a"
dependencies = [
"anyhow",
"libc",
@@ -4794,12 +4791,14 @@ dependencies = [
[[package]]
name = "pprof_util"
version = "0.6.0"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "65c568b3f8c1c37886ae07459b1946249e725c315306b03be5632f84c239f781"
checksum = "9fa015c78eed2130951e22c58d2095849391e73817ab2e74f71b0b9f63dd8416"
dependencies = [
"anyhow",
"backtrace",
"flate2",
"inferno 0.12.0",
"num",
"paste",
"prost",
@@ -7715,7 +7714,6 @@ dependencies = [
"anyhow",
"arc-swap",
"async-compression",
"backtrace",
"bincode",
"byteorder",
"bytes",

View File

@@ -53,7 +53,6 @@ anyhow = { version = "1.0", features = ["backtrace"] }
arc-swap = "1.6"
async-compression = { version = "0.4.0", features = ["tokio", "gzip", "zstd"] }
atomic-take = "1.1.0"
backtrace = "0.3.74"
flate2 = "1.0.26"
assert-json-diff = "2"
async-stream = "0.3"
@@ -114,11 +113,10 @@ hyper-util = "0.1"
tokio-tungstenite = "0.21.0"
indexmap = "2"
indoc = "2"
inferno = "0.12.0"
ipnet = "2.10.0"
itertools = "0.10"
itoa = "1.0.11"
jemalloc_pprof = "0.6"
jemalloc_pprof = { version = "0.7", features = ["symbolize", "flamegraph"] }
jsonwebtoken = "9"
lasso = "0.7"
libc = "0.2"

View File

@@ -6,11 +6,8 @@ license.workspace = true
[dependencies]
anyhow.workspace = true
backtrace.workspace = true
bytes.workspace = true
inferno.workspace = true
fail.workspace = true
flate2.workspace = true
hyper0.workspace = true
itertools.workspace = true
jemalloc_pprof.workspace = true

View File

@@ -3,8 +3,6 @@ use std::io::Write as _;
use std::str::FromStr;
use std::time::Duration;
use ::pprof::ProfilerGuardBuilder;
use ::pprof::protos::Message as _;
use anyhow::{Context, anyhow};
use bytes::{Bytes, BytesMut};
use hyper::header::{AUTHORIZATION, CONTENT_DISPOSITION, CONTENT_TYPE, HeaderName};
@@ -12,7 +10,8 @@ use hyper::http::HeaderValue;
use hyper::{Body, Method, Request, Response};
use metrics::{Encoder, IntCounter, TextEncoder, register_int_counter};
use once_cell::sync::Lazy;
use regex::Regex;
use pprof::ProfilerGuardBuilder;
use pprof::protos::Message as _;
use routerify::ext::RequestExt;
use routerify::{Middleware, RequestInfo, Router, RouterBuilder};
use tokio::sync::{Mutex, Notify, mpsc};
@@ -22,7 +21,6 @@ use tracing::{Instrument, debug, info, info_span, warn};
use utils::auth::{AuthError, Claims, SwappableJwtAuth};
use crate::error::{ApiError, api_error_handler, route_error_handler};
use crate::pprof;
use crate::request::{get_query_param, parse_query_param};
static SERVE_METRICS_COUNT: Lazy<IntCounter> = Lazy::new(|| {
@@ -449,20 +447,6 @@ pub async fn profile_heap_handler(req: Request<Body>) -> Result<Response<Body>,
Some(format) => return Err(ApiError::BadRequest(anyhow!("invalid format {format}"))),
};
// Functions and mappings to strip when symbolizing pprof profiles. If true,
// also remove child frames.
static STRIP_FUNCTIONS: Lazy<Vec<(Regex, bool)>> = Lazy::new(|| {
vec![
(Regex::new("^__rust").unwrap(), false),
(Regex::new("^_start$").unwrap(), false),
(Regex::new("^irallocx_prof").unwrap(), true),
(Regex::new("^prof_alloc_prep").unwrap(), true),
(Regex::new("^std::rt::lang_start").unwrap(), false),
(Regex::new("^std::sys::backtrace::__rust").unwrap(), false),
]
});
const STRIP_MAPPINGS: &[&str] = &["libc", "libgcc", "pthread", "vdso"];
// Obtain profiler handle.
let mut prof_ctl = jemalloc_pprof::PROF_CTL
.as_ref()
@@ -495,45 +479,27 @@ pub async fn profile_heap_handler(req: Request<Body>) -> Result<Response<Body>,
}
Format::Pprof => {
let data = tokio::task::spawn_blocking(move || {
let bytes = prof_ctl.dump_pprof()?;
// Symbolize the profile.
// TODO: consider moving this upstream to jemalloc_pprof and avoiding the
// serialization roundtrip.
let profile = pprof::decode(&bytes)?;
let profile = pprof::symbolize(profile)?;
let profile = pprof::strip_locations(profile, STRIP_MAPPINGS, &STRIP_FUNCTIONS);
pprof::encode(&profile)
})
let data = tokio::task::spawn_blocking(move || prof_ctl.dump_pprof())
.await
.map_err(|join_err| ApiError::InternalServerError(join_err.into()))?
.map_err(ApiError::InternalServerError)?;
Response::builder()
.status(200)
.header(CONTENT_TYPE, "application/octet-stream")
.header(CONTENT_DISPOSITION, "attachment; filename=\"heap.pb\"")
.header(CONTENT_DISPOSITION, "attachment; filename=\"heap.pb.gz\"")
.body(Body::from(data))
.map_err(|err| ApiError::InternalServerError(err.into()))
}
Format::Svg => {
let body = tokio::task::spawn_blocking(move || {
let bytes = prof_ctl.dump_pprof()?;
let profile = pprof::decode(&bytes)?;
let profile = pprof::symbolize(profile)?;
let profile = pprof::strip_locations(profile, STRIP_MAPPINGS, &STRIP_FUNCTIONS);
let mut opts = inferno::flamegraph::Options::default();
opts.title = "Heap inuse".to_string();
opts.count_name = "bytes".to_string();
pprof::flamegraph(profile, &mut opts)
})
let svg = tokio::task::spawn_blocking(move || prof_ctl.dump_flamegraph())
.await
.map_err(|join_err| ApiError::InternalServerError(join_err.into()))?
.map_err(ApiError::InternalServerError)?;
Response::builder()
.status(200)
.header(CONTENT_TYPE, "image/svg+xml")
.body(Body::from(body))
.body(Body::from(svg))
.map_err(|err| ApiError::InternalServerError(err.into()))
}
}

View File

@@ -2,7 +2,6 @@ pub mod endpoint;
pub mod error;
pub mod failpoints;
pub mod json;
pub mod pprof;
pub mod request;
extern crate hyper0 as hyper;

View File

@@ -1,238 +0,0 @@
use std::borrow::Cow;
use std::collections::{HashMap, HashSet};
use std::ffi::c_void;
use std::io::Write as _;
use anyhow::bail;
use flate2::Compression;
use flate2::write::{GzDecoder, GzEncoder};
use itertools::Itertools as _;
use pprof::protos::{Function, Line, Location, Message as _, Profile};
use regex::Regex;
/// Decodes a gzip-compressed Protobuf-encoded pprof profile.
pub fn decode(bytes: &[u8]) -> anyhow::Result<Profile> {
let mut gz = GzDecoder::new(Vec::new());
gz.write_all(bytes)?;
Ok(Profile::parse_from_bytes(&gz.finish()?)?)
}
/// Encodes a pprof profile as gzip-compressed Protobuf.
pub fn encode(profile: &Profile) -> anyhow::Result<Vec<u8>> {
let mut gz = GzEncoder::new(Vec::new(), Compression::default());
profile.write_to_writer(&mut gz)?;
Ok(gz.finish()?)
}
/// Symbolizes a pprof profile using the current binary.
pub fn symbolize(mut profile: Profile) -> anyhow::Result<Profile> {
if !profile.function.is_empty() {
return Ok(profile); // already symbolized
}
// Collect function names.
let mut functions: HashMap<String, Function> = HashMap::new();
let mut strings: HashMap<String, i64> = profile
.string_table
.into_iter()
.enumerate()
.map(|(i, s)| (s, i as i64))
.collect();
// Helper to look up or register a string.
let mut string_id = |s: &str| -> i64 {
// Don't use .entry() to avoid unnecessary allocations.
if let Some(id) = strings.get(s) {
return *id;
}
let id = strings.len() as i64;
strings.insert(s.to_string(), id);
id
};
for loc in &mut profile.location {
if !loc.line.is_empty() {
continue;
}
// Resolve the line and function for each location.
backtrace::resolve(loc.address as *mut c_void, |symbol| {
let Some(symbol_name) = symbol.name() else {
return;
};
let function_name = format!("{symbol_name:#}");
let functions_len = functions.len();
let function_id = functions
.entry(function_name)
.or_insert_with_key(|function_name| {
let function_id = functions_len as u64 + 1;
let system_name = String::from_utf8_lossy(symbol_name.as_bytes());
let filename = symbol
.filename()
.map(|path| path.to_string_lossy())
.unwrap_or(Cow::Borrowed(""));
Function {
id: function_id,
name: string_id(function_name),
system_name: string_id(&system_name),
filename: string_id(&filename),
..Default::default()
}
})
.id;
loc.line.push(Line {
function_id,
line: symbol.lineno().unwrap_or(0) as i64,
..Default::default()
});
});
}
// Store the resolved functions, and mark the mapping as resolved.
profile.function = functions.into_values().sorted_by_key(|f| f.id).collect();
profile.string_table = strings
.into_iter()
.sorted_by_key(|(_, i)| *i)
.map(|(s, _)| s)
.collect();
for mapping in &mut profile.mapping {
mapping.has_functions = true;
mapping.has_filenames = true;
}
Ok(profile)
}
/// Strips locations (stack frames) matching the given mappings (substring) or function names
/// (regex). The function bool specifies whether child frames should be stripped as well.
///
/// The string definitions are left behind in the profile for simplicity, to avoid rewriting all
/// string references.
pub fn strip_locations(
mut profile: Profile,
mappings: &[&str],
functions: &[(Regex, bool)],
) -> Profile {
// Strip mappings.
let mut strip_mappings: HashSet<u64> = HashSet::new();
profile.mapping.retain(|mapping| {
let Some(name) = profile.string_table.get(mapping.filename as usize) else {
return true;
};
if mappings.iter().any(|substr| name.contains(substr)) {
strip_mappings.insert(mapping.id);
return false;
}
true
});
// Strip functions.
let mut strip_functions: HashMap<u64, bool> = HashMap::new();
profile.function.retain(|function| {
let Some(name) = profile.string_table.get(function.name as usize) else {
return true;
};
for (regex, strip_children) in functions {
if regex.is_match(name) {
strip_functions.insert(function.id, *strip_children);
return false;
}
}
true
});
// Strip locations. The bool specifies whether child frames should be stripped too.
let mut strip_locations: HashMap<u64, bool> = HashMap::new();
profile.location.retain(|location| {
for line in &location.line {
if let Some(strip_children) = strip_functions.get(&line.function_id) {
strip_locations.insert(location.id, *strip_children);
return false;
}
}
if strip_mappings.contains(&location.mapping_id) {
strip_locations.insert(location.id, false);
return false;
}
true
});
// Strip sample locations.
for sample in &mut profile.sample {
// First, find the uppermost function with child removal and truncate the stack.
if let Some(truncate) = sample
.location_id
.iter()
.rposition(|id| strip_locations.get(id) == Some(&true))
{
sample.location_id.drain(..=truncate);
}
// Next, strip any individual frames without child removal.
sample
.location_id
.retain(|id| !strip_locations.contains_key(id));
}
profile
}
/// Generates an SVG flamegraph from a symbolized pprof profile.
pub fn flamegraph(
profile: Profile,
opts: &mut inferno::flamegraph::Options,
) -> anyhow::Result<Vec<u8>> {
if profile.mapping.iter().any(|m| !m.has_functions) {
bail!("profile not symbolized");
}
// Index locations, functions, and strings.
let locations: HashMap<u64, Location> =
profile.location.into_iter().map(|l| (l.id, l)).collect();
let functions: HashMap<u64, Function> =
profile.function.into_iter().map(|f| (f.id, f)).collect();
let strings = profile.string_table;
// Resolve stacks as function names, and sum sample values per stack. Also reverse the stack,
// since inferno expects it bottom-up.
let mut stacks: HashMap<Vec<&str>, i64> = HashMap::new();
for sample in profile.sample {
let mut stack = Vec::with_capacity(sample.location_id.len());
for location in sample.location_id.into_iter().rev() {
let Some(location) = locations.get(&location) else {
bail!("missing location {location}");
};
for line in location.line.iter().rev() {
let Some(function) = functions.get(&line.function_id) else {
bail!("missing function {}", line.function_id);
};
let Some(name) = strings.get(function.name as usize) else {
bail!("missing string {}", function.name);
};
stack.push(name.as_str());
}
}
let Some(&value) = sample.value.first() else {
bail!("missing value");
};
*stacks.entry(stack).or_default() += value;
}
// Construct stack lines for inferno.
let lines = stacks
.into_iter()
.map(|(stack, value)| (stack.into_iter().join(";"), value))
.map(|(stack, value)| format!("{stack} {value}"))
.sorted()
.collect_vec();
// Construct the flamegraph.
let mut bytes = Vec::new();
let lines = lines.iter().map(|line| line.as_str());
inferno::flamegraph::from_lines(opts, lines, &mut bytes)?;
Ok(bytes)
}

View File

@@ -15,7 +15,6 @@ arc-swap.workspace = true
sentry.workspace = true
async-compression.workspace = true
anyhow.workspace = true
backtrace.workspace = true
bincode.workspace = true
bytes.workspace = true
camino.workspace = true