diff --git a/Cargo.lock b/Cargo.lock index 76183bdaab..bfb228fef6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6005,8 +6005,7 @@ dependencies = [ [[package]] name = "tikv-jemalloc-ctl" version = "0.5.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "619bfed27d807b54f7f776b9430d4f8060e66ee138a28632ca898584d462c31c" +source = "git+https://github.com/yuhao-su/jemallocator?rev=a0911601bb7bb263ca55c7ea161ef308fdc623f8#a0911601bb7bb263ca55c7ea161ef308fdc623f8" dependencies = [ "libc", "paste", @@ -6016,8 +6015,7 @@ dependencies = [ [[package]] name = "tikv-jemalloc-sys" version = "0.5.4+5.3.0-patched" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9402443cb8fd499b6f327e40565234ff34dbda27460c5b47db0db77443dd85d1" +source = "git+https://github.com/yuhao-su/jemallocator?rev=a0911601bb7bb263ca55c7ea161ef308fdc623f8#a0911601bb7bb263ca55c7ea161ef308fdc623f8" dependencies = [ "cc", "libc", @@ -6026,8 +6024,7 @@ dependencies = [ [[package]] name = "tikv-jemallocator" version = "0.5.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "965fe0c26be5c56c94e38ba547249074803efd52adfb66de62107d95aab3eaca" +source = "git+https://github.com/yuhao-su/jemallocator?rev=a0911601bb7bb263ca55c7ea161ef308fdc623f8#a0911601bb7bb263ca55c7ea161ef308fdc623f8" dependencies = [ "libc", "tikv-jemalloc-sys", diff --git a/Cargo.toml b/Cargo.toml index 8310d2d522..dc4c9246cf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -163,8 +163,8 @@ tar = "0.4" task-local-extensions = "0.1.4" test-context = "0.3" thiserror = "1.0" -tikv-jemallocator = "0.5" -tikv-jemalloc-ctl = "0.5" +tikv-jemallocator = { git = "https://github.com/yuhao-su/jemallocator", rev = "a0911601bb7bb263ca55c7ea161ef308fdc623f8" } +tikv-jemalloc-ctl = { git = "https://github.com/yuhao-su/jemallocator", rev = "a0911601bb7bb263ca55c7ea161ef308fdc623f8" } tokio = { version = "1.17", features = ["macros"] } tokio-epoll-uring = { git = "https://github.com/neondatabase/tokio-epoll-uring.git" , branch = "main" } tokio-io-timeout = "1.2.0" @@ -252,7 +252,7 @@ debug = true # disable debug symbols for all packages except this one to decrease binaries size [profile.release.package."*"] -debug = false +debug = true [profile.release-line-debug] inherits = "release" diff --git a/Dockerfile b/Dockerfile index 5f82df3e18..ccabe9ca3e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -44,6 +44,7 @@ COPY --from=pg-build /home/nonroot/pg_install/v15/include/postgresql/server pg_i COPY --from=pg-build /home/nonroot/pg_install/v16/include/postgresql/server pg_install/v16/include/postgresql/server COPY --chown=nonroot . . +ENV _RJEM_MALLOC_CONF="prof:true" # Show build caching stats to check if it was used in the end. # Has to be the part of the same RUN since cachepot daemon is killed in the end of this RUN, losing the compilation stats. RUN set -e \ diff --git a/proxy/Cargo.toml b/proxy/Cargo.toml index 6b8f2ecbf4..7b9a9d9704 100644 --- a/proxy/Cargo.toml +++ b/proxy/Cargo.toml @@ -21,6 +21,7 @@ base64.workspace = true bstr.workspace = true bytes = { workspace = true, features = ["serde"] } camino.workspace = true +camino-tempfile.workspace = true chrono.workspace = true clap.workspace = true consumption_metrics.workspace = true @@ -78,7 +79,7 @@ subtle.workspace = true sync_wrapper.workspace = true task-local-extensions.workspace = true thiserror.workspace = true -tikv-jemallocator.workspace = true +tikv-jemallocator = { workspace = true, features = ["profiling"] } tikv-jemalloc-ctl = { workspace = true, features = ["use_std"] } tokio-postgres.workspace = true tokio-rustls.workspace = true @@ -102,7 +103,6 @@ redis.workspace = true workspace_hack.workspace = true [dev-dependencies] -camino-tempfile.workspace = true fallible-iterator.workspace = true rcgen.workspace = true rstest.workspace = true diff --git a/proxy/src/http/health_server.rs b/proxy/src/http/health_server.rs index cae9eb5b97..6643ddc2f6 100644 --- a/proxy/src/http/health_server.rs +++ b/proxy/src/http/health_server.rs @@ -1,12 +1,17 @@ use anyhow::{anyhow, bail}; +use camino::Utf8PathBuf; +use camino_tempfile::Utf8TempDir; use hyper::{header::CONTENT_TYPE, Body, Request, Response, StatusCode}; use measured::{text::BufferedTextEncoder, MetricGroup}; use metrics::NeonMetrics; +use once_cell::sync::Lazy; use std::{ convert::Infallible, + ffi::CString, net::TcpListener, sync::{Arc, Mutex}, }; +use tikv_jemalloc_ctl::{opt, prof}; use tracing::{info, info_span}; use utils::http::{ endpoint::{self, request_span}, @@ -21,18 +26,44 @@ async fn status_handler(_: Request) -> Result, ApiError> { json_response(StatusCode::OK, "") } +async fn prof_dump(_: Request) -> Result, ApiError> { + static PROF_MIB: Lazy = + Lazy::new(|| prof::dump::mib().expect("could not create prof.dump MIB")); + static PROF_DIR: Lazy = + Lazy::new(|| camino_tempfile::tempdir().expect("could not create tempdir")); + static PROF_FILE: Lazy = Lazy::new(|| PROF_DIR.path().join("prof.dump")); + static PROF_FILE0: Lazy = Lazy::new(|| CString::new(PROF_FILE.as_str()).unwrap()); + static DUMP_LOCK: Mutex<()> = Mutex::new(()); + + tokio::task::spawn_blocking(|| { + let _guard = DUMP_LOCK.lock(); + PROF_MIB + .write(&PROF_FILE0) + .expect("could not trigger prof.dump"); + let prof_dump = std::fs::read_to_string(&*PROF_FILE).expect("could not open prof.dump"); + + Response::new(Body::from(prof_dump)) + }) + .await + .map_err(|e| ApiError::InternalServerError(e.into())) +} + fn make_router(metrics: AppMetrics) -> RouterBuilder { let state = Arc::new(Mutex::new(PrometheusHandler { encoder: BufferedTextEncoder::new(), metrics, })); + info!(enabled = opt::prof::read().unwrap(), "jemalloc profiling"); + prof::active::write(true).unwrap(); + endpoint::make_router() .get("/metrics", move |r| { let state = state.clone(); request_span(r, move |b| prometheus_metrics_handler(b, state)) }) .get("/v1/status", status_handler) + .get("/v1/jemalloc/prof.dump", prof_dump) } pub async fn task_main(