safekeeper,pageserver: add heap profiling (#9778)

## Problem

We don't have good observability for memory usage. This would be useful
e.g. to debug OOM incidents or optimize performance or resource usage.

We would also like to use continuous profiling with e.g. [Grafana Cloud
Profiles](https://grafana.com/products/cloud/profiles-for-continuous-profiling/)
(see https://github.com/neondatabase/cloud/issues/14888).

This PR is intended as a proof of concept, to try it out in staging and
drive further discussions about profiling more broadly.

Touches https://github.com/neondatabase/neon/issues/9534.
Touches https://github.com/neondatabase/cloud/issues/14888.
Depends on #9779.
Depends on #9780.

## Summary of changes

Adds a HTTP route `/profile/heap` that takes a heap profile and returns
it. Query parameters:

* `format`: output format (`jemalloc` or `pprof`; default `pprof`).

Unlike CPU profiles (see #9764), heap profiles are not symbolized and
require the original binary to translate addresses to function names. To
make this work with Grafana, we'll probably have to symbolize the
process server-side -- this is left as future work, as is other output
formats like SVG.

Heap profiles don't work on macOS due to limitations in jemalloc.
This commit is contained in:
Erik Grinaker
2024-12-03 12:35:59 +01:00
committed by GitHub
parent a2a942f93c
commit dcb24ce170
10 changed files with 175 additions and 28 deletions

89
Cargo.lock generated
View File

@@ -301,7 +301,7 @@ dependencies = [
"aws-smithy-types",
"aws-types",
"bytes",
"fastrand 2.0.0",
"fastrand 2.2.0",
"hex",
"http 0.2.9",
"hyper 0.14.30",
@@ -341,7 +341,7 @@ dependencies = [
"aws-smithy-types",
"aws-types",
"bytes",
"fastrand 2.0.0",
"fastrand 2.2.0",
"http 0.2.9",
"http-body 0.4.5",
"once_cell",
@@ -417,7 +417,7 @@ dependencies = [
"aws-smithy-xml",
"aws-types",
"bytes",
"fastrand 2.0.0",
"fastrand 2.2.0",
"hex",
"hmac",
"http 0.2.9",
@@ -621,7 +621,7 @@ dependencies = [
"aws-smithy-runtime-api",
"aws-smithy-types",
"bytes",
"fastrand 2.0.0",
"fastrand 2.2.0",
"h2 0.3.26",
"http 0.2.9",
"http-body 0.4.5",
@@ -2054,9 +2054,9 @@ dependencies = [
[[package]]
name = "fastrand"
version = "2.0.0"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6999dc1837253364c2ebb0704ba97994bd874e8f195d665c50b7548f6ea92764"
checksum = "486f806e73c5707928240ddc295403b1b93c96a02038563881c4a2fd84b81ac4"
[[package]]
name = "ff"
@@ -2912,6 +2912,23 @@ version = "1.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b"
[[package]]
name = "jemalloc_pprof"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a883828bd6a4b957cd9f618886ff19e5f3ebd34e06ba0e855849e049fef32fb"
dependencies = [
"anyhow",
"libc",
"mappings",
"once_cell",
"pprof_util",
"tempfile",
"tikv-jemalloc-ctl",
"tokio",
"tracing",
]
[[package]]
name = "jobserver"
version = "0.1.32"
@@ -3022,9 +3039,9 @@ dependencies = [
[[package]]
name = "libc"
version = "0.2.150"
version = "0.2.167"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c"
checksum = "09d6582e104315a817dff97f75133544b2e094ee22447d2acf4a74e189ba06fc"
[[package]]
name = "libloading"
@@ -3044,9 +3061,9 @@ checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058"
[[package]]
name = "linux-raw-sys"
version = "0.4.13"
version = "0.4.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c"
checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89"
[[package]]
name = "linux-raw-sys"
@@ -3079,6 +3096,19 @@ dependencies = [
"hashbrown 0.14.5",
]
[[package]]
name = "mappings"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ce9229c438fbf1c333926e2053c4c091feabbd40a1b590ec62710fea2384af9e"
dependencies = [
"anyhow",
"libc",
"once_cell",
"pprof_util",
"tracing",
]
[[package]]
name = "matchers"
version = "0.1.0"
@@ -3346,6 +3376,7 @@ version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b05180d69e3da0e530ba2a1dae5110317e49e3b7f3d41be227dc5f92e49ee7af"
dependencies = [
"num-bigint",
"num-complex",
"num-integer",
"num-iter",
@@ -3434,6 +3465,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0"
dependencies = [
"autocfg",
"num-bigint",
"num-integer",
"num-traits",
]
@@ -3497,9 +3529,9 @@ dependencies = [
[[package]]
name = "once_cell"
version = "1.18.0"
version = "1.20.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775"
[[package]]
name = "oorandom"
@@ -4298,6 +4330,19 @@ dependencies = [
"thiserror",
]
[[package]]
name = "pprof_util"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "65c568b3f8c1c37886ae07459b1946249e725c315306b03be5632f84c239f781"
dependencies = [
"anyhow",
"flate2",
"num",
"paste",
"prost",
]
[[package]]
name = "ppv-lite86"
version = "0.2.17"
@@ -5220,14 +5265,14 @@ dependencies = [
[[package]]
name = "rustix"
version = "0.38.28"
version = "0.38.41"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72e572a5e8ca657d7366229cdde4bd14c4eb5499a9573d4d366fe1b599daa316"
checksum = "d7f649912bc1495e167a6edee79151c84b1bad49748cb4f1f1167f459f6224f6"
dependencies = [
"bitflags 2.4.1",
"errno",
"libc",
"linux-raw-sys 0.4.13",
"linux-raw-sys 0.4.14",
"windows-sys 0.52.0",
]
@@ -6251,13 +6296,13 @@ dependencies = [
[[package]]
name = "tempfile"
version = "3.9.0"
version = "3.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "01ce4141aa927a6d1bd34a041795abd0db1cccba5d5f24b009f694bdf3a1f3fa"
checksum = "28cce251fcbc87fac86a866eeb0d6c2d536fc16d06f184bb61aeae11aa4cee0c"
dependencies = [
"cfg-if",
"fastrand 2.0.0",
"redox_syscall 0.4.1",
"fastrand 2.2.0",
"once_cell",
"rustix",
"windows-sys 0.52.0",
]
@@ -7058,6 +7103,7 @@ dependencies = [
"hex-literal",
"humantime",
"hyper 0.14.30",
"jemalloc_pprof",
"jsonwebtoken",
"metrics",
"nix 0.27.1",
@@ -7644,8 +7690,12 @@ dependencies = [
"memchr",
"nix 0.26.4",
"nom",
"num",
"num-bigint",
"num-complex",
"num-integer",
"num-iter",
"num-rational",
"num-traits",
"once_cell",
"parquet",
@@ -7669,6 +7719,7 @@ dependencies = [
"subtle",
"syn 2.0.90",
"sync_wrapper 0.1.2",
"tikv-jemalloc-ctl",
"tikv-jemalloc-sys",
"time",
"time-macros",

View File

@@ -115,6 +115,7 @@ indoc = "2"
ipnet = "2.10.0"
itertools = "0.10"
itoa = "1.0.11"
jemalloc_pprof = "0.6"
jsonwebtoken = "9"
lasso = "0.7"
libc = "0.2"
@@ -175,7 +176,7 @@ sync_wrapper = "0.1.2"
tar = "0.4"
test-context = "0.3"
thiserror = "1.0"
tikv-jemallocator = { version = "0.6", features = ["stats"] }
tikv-jemallocator = { version = "0.6", features = ["profiling", "stats", "unprefixed_malloc_on_supported_platforms"] }
tikv-jemalloc-ctl = { version = "0.6", features = ["stats"] }
tokio = { version = "1.17", features = ["macros"] }
tokio-epoll-uring = { git = "https://github.com/neondatabase/tokio-epoll-uring.git" , branch = "main" }

View File

@@ -26,6 +26,7 @@ humantime.workspace = true
hyper0 = { workspace = true, features = ["full"] }
fail.workspace = true
futures = { workspace = true}
jemalloc_pprof.workspace = true
jsonwebtoken.workspace = true
nix.workspace = true
once_cell.workspace = true

View File

@@ -10,6 +10,7 @@ use metrics::{register_int_counter, Encoder, IntCounter, TextEncoder};
use once_cell::sync::Lazy;
use routerify::ext::RequestExt;
use routerify::{Middleware, RequestInfo, Router, RouterBuilder};
use tokio_util::io::ReaderStream;
use tracing::{debug, info, info_span, warn, Instrument};
use std::future::Future;
@@ -407,6 +408,69 @@ pub async fn profile_cpu_handler(req: Request<Body>) -> Result<Response<Body>, A
}
}
/// Generates heap profiles.
///
/// This only works with jemalloc on Linux.
pub async fn profile_heap_handler(req: Request<Body>) -> Result<Response<Body>, ApiError> {
enum Format {
Jemalloc,
Pprof,
}
// Parameters.
let format = match get_query_param(&req, "format")?.as_deref() {
None => Format::Pprof,
Some("jemalloc") => Format::Jemalloc,
Some("pprof") => Format::Pprof,
Some(format) => return Err(ApiError::BadRequest(anyhow!("invalid format {format}"))),
};
// Obtain profiler handle.
let mut prof_ctl = jemalloc_pprof::PROF_CTL
.as_ref()
.ok_or(ApiError::InternalServerError(anyhow!(
"heap profiling not enabled"
)))?
.lock()
.await;
if !prof_ctl.activated() {
return Err(ApiError::InternalServerError(anyhow!(
"heap profiling not enabled"
)));
}
// Take and return the profile.
match format {
Format::Jemalloc => {
// NB: file is an open handle to a tempfile that's already deleted.
let file = tokio::task::spawn_blocking(move || prof_ctl.dump())
.await
.map_err(|join_err| ApiError::InternalServerError(join_err.into()))?
.map_err(ApiError::InternalServerError)?;
let stream = ReaderStream::new(tokio::fs::File::from_std(file));
Response::builder()
.status(200)
.header(CONTENT_TYPE, "application/octet-stream")
.header(CONTENT_DISPOSITION, "attachment; filename=\"heap.dump\"")
.body(Body::wrap_stream(stream))
.map_err(|err| ApiError::InternalServerError(err.into()))
}
Format::Pprof => {
let data = tokio::task::spawn_blocking(move || prof_ctl.dump_pprof())
.await
.map_err(|join_err| ApiError::InternalServerError(join_err.into()))?
.map_err(ApiError::InternalServerError)?;
Response::builder()
.status(200)
.header(CONTENT_TYPE, "application/octet-stream")
.header(CONTENT_DISPOSITION, "attachment; filename=\"heap.pb\"")
.body(Body::from(data))
.map_err(|err| ApiError::InternalServerError(err.into()))
}
}
}
pub fn add_request_id_middleware<B: hyper::body::HttpBody + Send + Sync + 'static>(
) -> Middleware<B, ApiError> {
Middleware::pre(move |req| async move {

View File

@@ -53,6 +53,11 @@ project_build_tag!(BUILD_TAG);
#[global_allocator]
static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
/// Configure jemalloc to sample allocations for profiles every 1 MB (1 << 20).
#[allow(non_upper_case_globals)]
#[export_name = "malloc_conf"]
pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:20\0";
const PID_FILE_NAME: &str = "pageserver.pid";
const FEATURES: &[&str] = &[

View File

@@ -56,9 +56,9 @@ use tokio_util::sync::CancellationToken;
use tracing::*;
use utils::auth::JwtAuth;
use utils::failpoint_support::failpoints_handler;
use utils::http::endpoint::profile_cpu_handler;
use utils::http::endpoint::prometheus_metrics_handler;
use utils::http::endpoint::request_span;
use utils::http::endpoint::{
profile_cpu_handler, profile_heap_handler, prometheus_metrics_handler, request_span,
};
use utils::http::request::must_parse_query_param;
use utils::http::request::{get_request_param, must_get_query_param, parse_query_param};
@@ -155,6 +155,7 @@ impl State {
"/swagger.yml",
"/metrics",
"/profile/cpu",
"/profile/heap",
];
Ok(Self {
conf,
@@ -3203,6 +3204,7 @@ pub fn make_router(
.data(state)
.get("/metrics", |r| request_span(r, prometheus_metrics_handler))
.get("/profile/cpu", |r| request_span(r, profile_cpu_handler))
.get("/profile/heap", |r| request_span(r, profile_heap_handler))
.get("/v1/status", |r| api_handler(r, status_handler))
.put("/v1/failpoints", |r| {
testing_api_handler("manage failpoints", r, failpoints_handler)

View File

@@ -24,9 +24,15 @@ const KB: usize = 1024;
const MB: usize = 1024 * KB;
const GB: usize = 1024 * MB;
/// Use jemalloc, and configure it to sample allocations for profiles every 1 MB.
/// This mirrors the configuration in bin/safekeeper.rs.
#[global_allocator]
static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
#[allow(non_upper_case_globals)]
#[export_name = "malloc_conf"]
pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:20\0";
// Register benchmarks with Criterion.
criterion_group!(
name = benches;

View File

@@ -51,6 +51,11 @@ use utils::{
#[global_allocator]
static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
/// Configure jemalloc to sample allocations for profiles every 1 MB (1 << 20).
#[allow(non_upper_case_globals)]
#[export_name = "malloc_conf"]
pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:20\0";
const PID_FILE_NAME: &str = "safekeeper.pid";
const ID_FILE_NAME: &str = "safekeeper.id";

View File

@@ -14,7 +14,8 @@ use tokio_util::sync::CancellationToken;
use tracing::{info_span, Instrument};
use utils::failpoint_support::failpoints_handler;
use utils::http::endpoint::{
profile_cpu_handler, prometheus_metrics_handler, request_span, ChannelWriter,
profile_cpu_handler, profile_heap_handler, prometheus_metrics_handler, request_span,
ChannelWriter,
};
use utils::http::request::parse_query_param;
@@ -573,7 +574,8 @@ pub fn make_router(conf: SafeKeeperConf) -> RouterBuilder<hyper::Body, ApiError>
let mut router = endpoint::make_router();
if conf.http_auth.is_some() {
router = router.middleware(auth_middleware(|request| {
const ALLOWLIST_ROUTES: &[&str] = &["/v1/status", "/metrics", "/profile/cpu"];
const ALLOWLIST_ROUTES: &[&str] =
&["/v1/status", "/metrics", "/profile/cpu", "profile/heap"];
if ALLOWLIST_ROUTES.contains(&request.uri().path()) {
None
} else {
@@ -594,6 +596,7 @@ pub fn make_router(conf: SafeKeeperConf) -> RouterBuilder<hyper::Body, ApiError>
.data(auth)
.get("/metrics", |r| request_span(r, prometheus_metrics_handler))
.get("/profile/cpu", |r| request_span(r, profile_cpu_handler))
.get("/profile/heap", |r| request_span(r, profile_heap_handler))
.get("/v1/status", |r| request_span(r, status_handler))
.put("/v1/failpoints", |r| {
request_span(r, move |r| async {

View File

@@ -55,12 +55,16 @@ log = { version = "0.4", default-features = false, features = ["std"] }
memchr = { version = "2" }
nix = { version = "0.26" }
nom = { version = "7" }
num = { version = "0.4" }
num-bigint = { version = "0.4" }
num-complex = { version = "0.4", default-features = false, features = ["std"] }
num-integer = { version = "0.1", features = ["i128"] }
num-iter = { version = "0.1", default-features = false, features = ["i128", "std"] }
num-rational = { version = "0.4", default-features = false, features = ["num-bigint-std", "std"] }
num-traits = { version = "0.2", features = ["i128", "libm"] }
once_cell = { version = "1" }
parquet = { version = "53", default-features = false, features = ["zstd"] }
prost = { version = "0.13", features = ["prost-derive"] }
prost = { version = "0.13", features = ["no-recursion-limit", "prost-derive"] }
rand = { version = "0.8", features = ["small_rng"] }
regex = { version = "1" }
regex-automata = { version = "0.4", default-features = false, features = ["dfa-onepass", "hybrid", "meta", "nfa-backtrack", "perf-inline", "perf-literal", "unicode"] }
@@ -76,7 +80,8 @@ smallvec = { version = "1", default-features = false, features = ["const_new", "
spki = { version = "0.7", default-features = false, features = ["pem", "std"] }
subtle = { version = "2" }
sync_wrapper = { version = "0.1", default-features = false, features = ["futures"] }
tikv-jemalloc-sys = { version = "0.6", features = ["stats"] }
tikv-jemalloc-ctl = { version = "0.6", features = ["stats", "use_std"] }
tikv-jemalloc-sys = { version = "0.6", features = ["profiling", "stats", "unprefixed_malloc_on_supported_platforms"] }
time = { version = "0.3", features = ["macros", "serde-well-known"] }
tokio = { version = "1", features = ["full", "test-util"] }
tokio-rustls = { version = "0.26", default-features = false, features = ["logging", "ring", "tls12"] }
@@ -111,14 +116,18 @@ libc = { version = "0.2", features = ["extra_traits", "use_std"] }
log = { version = "0.4", default-features = false, features = ["std"] }
memchr = { version = "2" }
nom = { version = "7" }
num = { version = "0.4" }
num-bigint = { version = "0.4" }
num-complex = { version = "0.4", default-features = false, features = ["std"] }
num-integer = { version = "0.1", features = ["i128"] }
num-iter = { version = "0.1", default-features = false, features = ["i128", "std"] }
num-rational = { version = "0.4", default-features = false, features = ["num-bigint-std", "std"] }
num-traits = { version = "0.2", features = ["i128", "libm"] }
once_cell = { version = "1" }
parquet = { version = "53", default-features = false, features = ["zstd"] }
prettyplease = { version = "0.2", default-features = false, features = ["verbatim"] }
proc-macro2 = { version = "1" }
prost = { version = "0.13", features = ["prost-derive"] }
prost = { version = "0.13", features = ["no-recursion-limit", "prost-derive"] }
quote = { version = "1" }
regex = { version = "1" }
regex-automata = { version = "0.4", default-features = false, features = ["dfa-onepass", "hybrid", "meta", "nfa-backtrack", "perf-inline", "perf-literal", "unicode"] }