diff --git a/Cargo.lock b/Cargo.lock index ba02e3b11d..b2769e59f0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -301,7 +301,7 @@ dependencies = [ "aws-smithy-types", "aws-types", "bytes", - "fastrand 2.0.0", + "fastrand 2.2.0", "hex", "http 0.2.9", "hyper 0.14.30", @@ -341,7 +341,7 @@ dependencies = [ "aws-smithy-types", "aws-types", "bytes", - "fastrand 2.0.0", + "fastrand 2.2.0", "http 0.2.9", "http-body 0.4.5", "once_cell", @@ -417,7 +417,7 @@ dependencies = [ "aws-smithy-xml", "aws-types", "bytes", - "fastrand 2.0.0", + "fastrand 2.2.0", "hex", "hmac", "http 0.2.9", @@ -621,7 +621,7 @@ dependencies = [ "aws-smithy-runtime-api", "aws-smithy-types", "bytes", - "fastrand 2.0.0", + "fastrand 2.2.0", "h2 0.3.26", "http 0.2.9", "http-body 0.4.5", @@ -2054,9 +2054,9 @@ dependencies = [ [[package]] name = "fastrand" -version = "2.0.0" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6999dc1837253364c2ebb0704ba97994bd874e8f195d665c50b7548f6ea92764" +checksum = "486f806e73c5707928240ddc295403b1b93c96a02038563881c4a2fd84b81ac4" [[package]] name = "ff" @@ -2912,6 +2912,23 @@ version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" +[[package]] +name = "jemalloc_pprof" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a883828bd6a4b957cd9f618886ff19e5f3ebd34e06ba0e855849e049fef32fb" +dependencies = [ + "anyhow", + "libc", + "mappings", + "once_cell", + "pprof_util", + "tempfile", + "tikv-jemalloc-ctl", + "tokio", + "tracing", +] + [[package]] name = "jobserver" version = "0.1.32" @@ -3022,9 +3039,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.150" +version = "0.2.167" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c" +checksum = "09d6582e104315a817dff97f75133544b2e094ee22447d2acf4a74e189ba06fc" [[package]] name = "libloading" @@ -3044,9 +3061,9 @@ checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" [[package]] name = "linux-raw-sys" -version = "0.4.13" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" +checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" [[package]] name = "linux-raw-sys" @@ -3079,6 +3096,19 @@ dependencies = [ "hashbrown 0.14.5", ] +[[package]] +name = "mappings" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce9229c438fbf1c333926e2053c4c091feabbd40a1b590ec62710fea2384af9e" +dependencies = [ + "anyhow", + "libc", + "once_cell", + "pprof_util", + "tracing", +] + [[package]] name = "matchers" version = "0.1.0" @@ -3346,6 +3376,7 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b05180d69e3da0e530ba2a1dae5110317e49e3b7f3d41be227dc5f92e49ee7af" dependencies = [ + "num-bigint", "num-complex", "num-integer", "num-iter", @@ -3434,6 +3465,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0" dependencies = [ "autocfg", + "num-bigint", "num-integer", "num-traits", ] @@ -3497,9 +3529,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.18.0" +version = "1.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" +checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" [[package]] name = "oorandom" @@ -4298,6 +4330,19 @@ dependencies = [ "thiserror", ] +[[package]] +name = "pprof_util" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65c568b3f8c1c37886ae07459b1946249e725c315306b03be5632f84c239f781" +dependencies = [ + "anyhow", + "flate2", + "num", + "paste", + "prost", +] + [[package]] name = "ppv-lite86" version = "0.2.17" @@ -5220,14 +5265,14 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.28" +version = "0.38.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72e572a5e8ca657d7366229cdde4bd14c4eb5499a9573d4d366fe1b599daa316" +checksum = "d7f649912bc1495e167a6edee79151c84b1bad49748cb4f1f1167f459f6224f6" dependencies = [ "bitflags 2.4.1", "errno", "libc", - "linux-raw-sys 0.4.13", + "linux-raw-sys 0.4.14", "windows-sys 0.52.0", ] @@ -6251,13 +6296,13 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.9.0" +version = "3.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01ce4141aa927a6d1bd34a041795abd0db1cccba5d5f24b009f694bdf3a1f3fa" +checksum = "28cce251fcbc87fac86a866eeb0d6c2d536fc16d06f184bb61aeae11aa4cee0c" dependencies = [ "cfg-if", - "fastrand 2.0.0", - "redox_syscall 0.4.1", + "fastrand 2.2.0", + "once_cell", "rustix", "windows-sys 0.52.0", ] @@ -7058,6 +7103,7 @@ dependencies = [ "hex-literal", "humantime", "hyper 0.14.30", + "jemalloc_pprof", "jsonwebtoken", "metrics", "nix 0.27.1", @@ -7644,8 +7690,12 @@ dependencies = [ "memchr", "nix 0.26.4", "nom", + "num", "num-bigint", + "num-complex", "num-integer", + "num-iter", + "num-rational", "num-traits", "once_cell", "parquet", @@ -7669,6 +7719,7 @@ dependencies = [ "subtle", "syn 2.0.90", "sync_wrapper 0.1.2", + "tikv-jemalloc-ctl", "tikv-jemalloc-sys", "time", "time-macros", diff --git a/Cargo.toml b/Cargo.toml index 036dc01057..91fa6a2607 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -115,6 +115,7 @@ indoc = "2" ipnet = "2.10.0" itertools = "0.10" itoa = "1.0.11" +jemalloc_pprof = "0.6" jsonwebtoken = "9" lasso = "0.7" libc = "0.2" @@ -175,7 +176,7 @@ sync_wrapper = "0.1.2" tar = "0.4" test-context = "0.3" thiserror = "1.0" -tikv-jemallocator = { version = "0.6", features = ["stats"] } +tikv-jemallocator = { version = "0.6", features = ["profiling", "stats", "unprefixed_malloc_on_supported_platforms"] } tikv-jemalloc-ctl = { version = "0.6", features = ["stats"] } tokio = { version = "1.17", features = ["macros"] } tokio-epoll-uring = { git = "https://github.com/neondatabase/tokio-epoll-uring.git" , branch = "main" } diff --git a/libs/utils/Cargo.toml b/libs/utils/Cargo.toml index 5648072a83..66500fb141 100644 --- a/libs/utils/Cargo.toml +++ b/libs/utils/Cargo.toml @@ -26,6 +26,7 @@ humantime.workspace = true hyper0 = { workspace = true, features = ["full"] } fail.workspace = true futures = { workspace = true} +jemalloc_pprof.workspace = true jsonwebtoken.workspace = true nix.workspace = true once_cell.workspace = true diff --git a/libs/utils/src/http/endpoint.rs b/libs/utils/src/http/endpoint.rs index 6a85f0ddeb..d975b63677 100644 --- a/libs/utils/src/http/endpoint.rs +++ b/libs/utils/src/http/endpoint.rs @@ -10,6 +10,7 @@ use metrics::{register_int_counter, Encoder, IntCounter, TextEncoder}; use once_cell::sync::Lazy; use routerify::ext::RequestExt; use routerify::{Middleware, RequestInfo, Router, RouterBuilder}; +use tokio_util::io::ReaderStream; use tracing::{debug, info, info_span, warn, Instrument}; use std::future::Future; @@ -407,6 +408,69 @@ pub async fn profile_cpu_handler(req: Request) -> Result, A } } +/// Generates heap profiles. +/// +/// This only works with jemalloc on Linux. +pub async fn profile_heap_handler(req: Request) -> Result, ApiError> { + enum Format { + Jemalloc, + Pprof, + } + + // Parameters. + let format = match get_query_param(&req, "format")?.as_deref() { + None => Format::Pprof, + Some("jemalloc") => Format::Jemalloc, + Some("pprof") => Format::Pprof, + Some(format) => return Err(ApiError::BadRequest(anyhow!("invalid format {format}"))), + }; + + // Obtain profiler handle. + let mut prof_ctl = jemalloc_pprof::PROF_CTL + .as_ref() + .ok_or(ApiError::InternalServerError(anyhow!( + "heap profiling not enabled" + )))? + .lock() + .await; + if !prof_ctl.activated() { + return Err(ApiError::InternalServerError(anyhow!( + "heap profiling not enabled" + ))); + } + + // Take and return the profile. + match format { + Format::Jemalloc => { + // NB: file is an open handle to a tempfile that's already deleted. + let file = tokio::task::spawn_blocking(move || prof_ctl.dump()) + .await + .map_err(|join_err| ApiError::InternalServerError(join_err.into()))? + .map_err(ApiError::InternalServerError)?; + let stream = ReaderStream::new(tokio::fs::File::from_std(file)); + Response::builder() + .status(200) + .header(CONTENT_TYPE, "application/octet-stream") + .header(CONTENT_DISPOSITION, "attachment; filename=\"heap.dump\"") + .body(Body::wrap_stream(stream)) + .map_err(|err| ApiError::InternalServerError(err.into())) + } + + Format::Pprof => { + let data = tokio::task::spawn_blocking(move || prof_ctl.dump_pprof()) + .await + .map_err(|join_err| ApiError::InternalServerError(join_err.into()))? + .map_err(ApiError::InternalServerError)?; + Response::builder() + .status(200) + .header(CONTENT_TYPE, "application/octet-stream") + .header(CONTENT_DISPOSITION, "attachment; filename=\"heap.pb\"") + .body(Body::from(data)) + .map_err(|err| ApiError::InternalServerError(err.into())) + } + } +} + pub fn add_request_id_middleware( ) -> Middleware { Middleware::pre(move |req| async move { diff --git a/pageserver/src/bin/pageserver.rs b/pageserver/src/bin/pageserver.rs index 31f4370855..8fe225c6aa 100644 --- a/pageserver/src/bin/pageserver.rs +++ b/pageserver/src/bin/pageserver.rs @@ -53,6 +53,11 @@ project_build_tag!(BUILD_TAG); #[global_allocator] static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc; +/// Configure jemalloc to sample allocations for profiles every 1 MB (1 << 20). +#[allow(non_upper_case_globals)] +#[export_name = "malloc_conf"] +pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:20\0"; + const PID_FILE_NAME: &str = "pageserver.pid"; const FEATURES: &[&str] = &[ diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index ceb1c3b012..e127871549 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -56,9 +56,9 @@ use tokio_util::sync::CancellationToken; use tracing::*; use utils::auth::JwtAuth; use utils::failpoint_support::failpoints_handler; -use utils::http::endpoint::profile_cpu_handler; -use utils::http::endpoint::prometheus_metrics_handler; -use utils::http::endpoint::request_span; +use utils::http::endpoint::{ + profile_cpu_handler, profile_heap_handler, prometheus_metrics_handler, request_span, +}; use utils::http::request::must_parse_query_param; use utils::http::request::{get_request_param, must_get_query_param, parse_query_param}; @@ -155,6 +155,7 @@ impl State { "/swagger.yml", "/metrics", "/profile/cpu", + "/profile/heap", ]; Ok(Self { conf, @@ -3203,6 +3204,7 @@ pub fn make_router( .data(state) .get("/metrics", |r| request_span(r, prometheus_metrics_handler)) .get("/profile/cpu", |r| request_span(r, profile_cpu_handler)) + .get("/profile/heap", |r| request_span(r, profile_heap_handler)) .get("/v1/status", |r| api_handler(r, status_handler)) .put("/v1/failpoints", |r| { testing_api_handler("manage failpoints", r, failpoints_handler) diff --git a/safekeeper/benches/receive_wal.rs b/safekeeper/benches/receive_wal.rs index 8c4281cf52..313d945b94 100644 --- a/safekeeper/benches/receive_wal.rs +++ b/safekeeper/benches/receive_wal.rs @@ -24,9 +24,15 @@ const KB: usize = 1024; const MB: usize = 1024 * KB; const GB: usize = 1024 * MB; +/// Use jemalloc, and configure it to sample allocations for profiles every 1 MB. +/// This mirrors the configuration in bin/safekeeper.rs. #[global_allocator] static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc; +#[allow(non_upper_case_globals)] +#[export_name = "malloc_conf"] +pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:20\0"; + // Register benchmarks with Criterion. criterion_group!( name = benches; diff --git a/safekeeper/src/bin/safekeeper.rs b/safekeeper/src/bin/safekeeper.rs index 3659bcd7e0..4dc7edef37 100644 --- a/safekeeper/src/bin/safekeeper.rs +++ b/safekeeper/src/bin/safekeeper.rs @@ -51,6 +51,11 @@ use utils::{ #[global_allocator] static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc; +/// Configure jemalloc to sample allocations for profiles every 1 MB (1 << 20). +#[allow(non_upper_case_globals)] +#[export_name = "malloc_conf"] +pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:20\0"; + const PID_FILE_NAME: &str = "safekeeper.pid"; const ID_FILE_NAME: &str = "safekeeper.id"; diff --git a/safekeeper/src/http/routes.rs b/safekeeper/src/http/routes.rs index 28294abdb9..69b775fd76 100644 --- a/safekeeper/src/http/routes.rs +++ b/safekeeper/src/http/routes.rs @@ -14,7 +14,8 @@ use tokio_util::sync::CancellationToken; use tracing::{info_span, Instrument}; use utils::failpoint_support::failpoints_handler; use utils::http::endpoint::{ - profile_cpu_handler, prometheus_metrics_handler, request_span, ChannelWriter, + profile_cpu_handler, profile_heap_handler, prometheus_metrics_handler, request_span, + ChannelWriter, }; use utils::http::request::parse_query_param; @@ -573,7 +574,8 @@ pub fn make_router(conf: SafeKeeperConf) -> RouterBuilder let mut router = endpoint::make_router(); if conf.http_auth.is_some() { router = router.middleware(auth_middleware(|request| { - const ALLOWLIST_ROUTES: &[&str] = &["/v1/status", "/metrics", "/profile/cpu"]; + const ALLOWLIST_ROUTES: &[&str] = + &["/v1/status", "/metrics", "/profile/cpu", "profile/heap"]; if ALLOWLIST_ROUTES.contains(&request.uri().path()) { None } else { @@ -594,6 +596,7 @@ pub fn make_router(conf: SafeKeeperConf) -> RouterBuilder .data(auth) .get("/metrics", |r| request_span(r, prometheus_metrics_handler)) .get("/profile/cpu", |r| request_span(r, profile_cpu_handler)) + .get("/profile/heap", |r| request_span(r, profile_heap_handler)) .get("/v1/status", |r| request_span(r, status_handler)) .put("/v1/failpoints", |r| { request_span(r, move |r| async { diff --git a/workspace_hack/Cargo.toml b/workspace_hack/Cargo.toml index c0a3abc377..d19379aefd 100644 --- a/workspace_hack/Cargo.toml +++ b/workspace_hack/Cargo.toml @@ -55,12 +55,16 @@ log = { version = "0.4", default-features = false, features = ["std"] } memchr = { version = "2" } nix = { version = "0.26" } nom = { version = "7" } +num = { version = "0.4" } num-bigint = { version = "0.4" } +num-complex = { version = "0.4", default-features = false, features = ["std"] } num-integer = { version = "0.1", features = ["i128"] } +num-iter = { version = "0.1", default-features = false, features = ["i128", "std"] } +num-rational = { version = "0.4", default-features = false, features = ["num-bigint-std", "std"] } num-traits = { version = "0.2", features = ["i128", "libm"] } once_cell = { version = "1" } parquet = { version = "53", default-features = false, features = ["zstd"] } -prost = { version = "0.13", features = ["prost-derive"] } +prost = { version = "0.13", features = ["no-recursion-limit", "prost-derive"] } rand = { version = "0.8", features = ["small_rng"] } regex = { version = "1" } regex-automata = { version = "0.4", default-features = false, features = ["dfa-onepass", "hybrid", "meta", "nfa-backtrack", "perf-inline", "perf-literal", "unicode"] } @@ -76,7 +80,8 @@ smallvec = { version = "1", default-features = false, features = ["const_new", " spki = { version = "0.7", default-features = false, features = ["pem", "std"] } subtle = { version = "2" } sync_wrapper = { version = "0.1", default-features = false, features = ["futures"] } -tikv-jemalloc-sys = { version = "0.6", features = ["stats"] } +tikv-jemalloc-ctl = { version = "0.6", features = ["stats", "use_std"] } +tikv-jemalloc-sys = { version = "0.6", features = ["profiling", "stats", "unprefixed_malloc_on_supported_platforms"] } time = { version = "0.3", features = ["macros", "serde-well-known"] } tokio = { version = "1", features = ["full", "test-util"] } tokio-rustls = { version = "0.26", default-features = false, features = ["logging", "ring", "tls12"] } @@ -111,14 +116,18 @@ libc = { version = "0.2", features = ["extra_traits", "use_std"] } log = { version = "0.4", default-features = false, features = ["std"] } memchr = { version = "2" } nom = { version = "7" } +num = { version = "0.4" } num-bigint = { version = "0.4" } +num-complex = { version = "0.4", default-features = false, features = ["std"] } num-integer = { version = "0.1", features = ["i128"] } +num-iter = { version = "0.1", default-features = false, features = ["i128", "std"] } +num-rational = { version = "0.4", default-features = false, features = ["num-bigint-std", "std"] } num-traits = { version = "0.2", features = ["i128", "libm"] } once_cell = { version = "1" } parquet = { version = "53", default-features = false, features = ["zstd"] } prettyplease = { version = "0.2", default-features = false, features = ["verbatim"] } proc-macro2 = { version = "1" } -prost = { version = "0.13", features = ["prost-derive"] } +prost = { version = "0.13", features = ["no-recursion-limit", "prost-derive"] } quote = { version = "1" } regex = { version = "1" } regex-automata = { version = "0.4", default-features = false, features = ["dfa-onepass", "hybrid", "meta", "nfa-backtrack", "perf-inline", "perf-literal", "unicode"] }