From e386a366d027dd2a8a69827f5ee6038662eefe03 Mon Sep 17 00:00:00 2001 From: "Lei, HUANG" <6406592+v0y4g3r@users.noreply.github.com> Date: Sun, 26 Oct 2025 18:41:19 -0700 Subject: [PATCH] feat: add HTTP endpoint to control prof.gdump feature (#6999) * feat/gdump: ### Add Support for Jemalloc Gdump Flag - **`jemalloc.rs`**: Introduced `PROF_GDUMP` constant and added functions `set_gdump_active` and `is_gdump_active` to manage the gdump flag. - **`error.rs`**: Added error handling for reading and updating the jemalloc gdump flag with `ReadGdump` and `UpdateGdump` errors. - **`lib.rs`**: Exposed `is_gdump_active` and `set_gdump_active` functions for non-Windows platforms. - **`http.rs`**: Added HTTP routes for checking and toggling the jemalloc gdump flag status. - **`mem_prof.rs`**: Implemented handlers `gdump_toggle_handler` and `gdump_status_handler` for managing gdump flag via HTTP requests. Signed-off-by: Lei, HUANG * Update docs/how-to/how-to-profile-memory.md Co-authored-by: shuiyisong <113876041+shuiyisong@users.noreply.github.com> * fix: typo in docs Signed-off-by: Lei, HUANG --------- Signed-off-by: Lei, HUANG Co-authored-by: shuiyisong <113876041+shuiyisong@users.noreply.github.com> --- docs/how-to/how-to-profile-memory.md | 9 ++++ src/common/mem-prof/src/jemalloc.rs | 14 ++++++ src/common/mem-prof/src/jemalloc/error.rs | 14 ++++++ src/common/mem-prof/src/lib.rs | 12 ++++- src/servers/src/http.rs | 5 ++ src/servers/src/http/mem_prof.rs | 56 +++++++++++++++++++++++ 6 files changed, 109 insertions(+), 1 deletion(-) diff --git a/docs/how-to/how-to-profile-memory.md b/docs/how-to/how-to-profile-memory.md index a860c95246..b4bc00093a 100644 --- a/docs/how-to/how-to-profile-memory.md +++ b/docs/how-to/how-to-profile-memory.md @@ -71,6 +71,15 @@ curl -X POST localhost:4000/debug/prof/mem/activate # Deactivate heap profiling curl -X POST localhost:4000/debug/prof/mem/deactivate + +# Activate gdump feature that dumps memory profiling data every time virtual memory usage exceeds previous maximum value. +curl -X POST localhost:4000/debug/prof/mem/gdump -d 'activate=true' + +# Deactivate gdump. +curl -X POST localhost:4000/debug/prof/mem/gdump -d 'activate=false' + +# Retrieve current gdump status. +curl -X GET localhost:4000/debug/prof/mem/gdump ``` ### Dump memory profiling data diff --git a/src/common/mem-prof/src/jemalloc.rs b/src/common/mem-prof/src/jemalloc.rs index 05966b4754..a9359dad41 100644 --- a/src/common/mem-prof/src/jemalloc.rs +++ b/src/common/mem-prof/src/jemalloc.rs @@ -32,6 +32,7 @@ use crate::error::{FlamegraphSnafu, ParseJeHeapSnafu, Result}; const PROF_DUMP: &[u8] = b"prof.dump\0"; const OPT_PROF: &[u8] = b"opt.prof\0"; const PROF_ACTIVE: &[u8] = b"prof.active\0"; +const PROF_GDUMP: &[u8] = b"prof.gdump\0"; pub async fn dump_profile() -> Result> { ensure!(is_prof_enabled()?, ProfilingNotEnabledSnafu); @@ -119,3 +120,16 @@ fn is_prof_enabled() -> Result { // safety: OPT_PROF variable, if present, is always a boolean value. Ok(unsafe { tikv_jemalloc_ctl::raw::read::(OPT_PROF).context(ReadOptProfSnafu)? }) } + +pub fn set_gdump_active(active: bool) -> Result<()> { + ensure!(is_prof_enabled()?, ProfilingNotEnabledSnafu); + unsafe { + tikv_jemalloc_ctl::raw::update(PROF_GDUMP, active).context(error::UpdateGdumpSnafu)?; + } + Ok(()) +} + +pub fn is_gdump_active() -> Result { + // safety: PROF_GDUMP, if present, is a boolean value. + unsafe { Ok(tikv_jemalloc_ctl::raw::read::(PROF_GDUMP).context(error::ReadGdumpSnafu)?) } +} diff --git a/src/common/mem-prof/src/jemalloc/error.rs b/src/common/mem-prof/src/jemalloc/error.rs index 1787e97a7d..79e4b8f9a6 100644 --- a/src/common/mem-prof/src/jemalloc/error.rs +++ b/src/common/mem-prof/src/jemalloc/error.rs @@ -71,6 +71,18 @@ pub enum Error { #[snafu(source)] error: tikv_jemalloc_ctl::Error, }, + + #[snafu(display("Failed to read jemalloc gdump flag"))] + ReadGdump { + #[snafu(source)] + error: tikv_jemalloc_ctl::Error, + }, + + #[snafu(display("Failed to update jemalloc gdump flag"))] + UpdateGdump { + #[snafu(source)] + error: tikv_jemalloc_ctl::Error, + }, } impl ErrorExt for Error { @@ -84,6 +96,8 @@ impl ErrorExt for Error { Error::ActivateProf { .. } => StatusCode::Internal, Error::DeactivateProf { .. } => StatusCode::Internal, Error::ReadProfActive { .. } => StatusCode::Internal, + Error::ReadGdump { .. } => StatusCode::Internal, + Error::UpdateGdump { .. } => StatusCode::Internal, } } diff --git a/src/common/mem-prof/src/lib.rs b/src/common/mem-prof/src/lib.rs index 3fa6273f6e..9ff67e7277 100644 --- a/src/common/mem-prof/src/lib.rs +++ b/src/common/mem-prof/src/lib.rs @@ -19,7 +19,7 @@ mod jemalloc; #[cfg(not(windows))] pub use jemalloc::{ activate_heap_profile, deactivate_heap_profile, dump_flamegraph, dump_pprof, dump_profile, - is_heap_profile_active, + is_gdump_active, is_heap_profile_active, set_gdump_active, }; #[cfg(windows)] @@ -51,3 +51,13 @@ pub fn deactivate_heap_profile() -> error::Result<()> { pub fn is_heap_profile_active() -> error::Result { error::ProfilingNotSupportedSnafu.fail() } + +#[cfg(windows)] +pub fn is_gdump_active() -> error::Result { + error::ProfilingNotSupportedSnafu.fail() +} + +#[cfg(windows)] +pub fn set_gdump_active(_: bool) -> error::Result<()> { + error::ProfilingNotSupportedSnafu.fail() +} diff --git a/src/servers/src/http.rs b/src/servers/src/http.rs index 404b087535..8fa658b6bb 100644 --- a/src/servers/src/http.rs +++ b/src/servers/src/http.rs @@ -924,6 +924,11 @@ impl HttpServer { .route( "/mem/status", routing::get(mem_prof::heap_prof_status_handler), + ) // jemalloc gdump flag status and toggle + .route( + "/mem/gdump", + routing::get(mem_prof::gdump_status_handler) + .post(mem_prof::gdump_toggle_handler), ), ), )) diff --git a/src/servers/src/http/mem_prof.rs b/src/servers/src/http/mem_prof.rs index 92995fd2de..e6362aef3f 100644 --- a/src/servers/src/http/mem_prof.rs +++ b/src/servers/src/http/mem_prof.rs @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#[cfg(feature = "mem-prof")] +use axum::Form; #[cfg(feature = "mem-prof")] use axum::extract::Query; use axum::http::StatusCode; @@ -127,3 +129,57 @@ pub async fn heap_prof_status_handler() -> crate::error::Result, +) -> crate::error::Result { + use snafu::ResultExt; + + use crate::error::DumpProfileDataSnafu; + + common_mem_prof::set_gdump_active(form.activate).context(DumpProfileDataSnafu)?; + + let msg = if form.activate { + "gdump activated" + } else { + "gdump deactivated" + }; + Ok((StatusCode::OK, msg)) +} + +#[cfg(not(feature = "mem-prof"))] +#[axum_macros::debug_handler] +pub async fn gdump_toggle_handler() -> crate::error::Result { + Ok(( + StatusCode::NOT_IMPLEMENTED, + "The 'mem-prof' feature is disabled", + )) +} + +#[cfg(feature = "mem-prof")] +#[axum_macros::debug_handler] +pub async fn gdump_status_handler() -> crate::error::Result { + use snafu::ResultExt; + + use crate::error::DumpProfileDataSnafu; + + let is_active = common_mem_prof::is_gdump_active().context(DumpProfileDataSnafu)?; + Ok((StatusCode::OK, format!("{{\"active\": {}}}", is_active))) +} + +#[cfg(not(feature = "mem-prof"))] +#[axum_macros::debug_handler] +pub async fn gdump_status_handler() -> crate::error::Result { + Ok(( + StatusCode::NOT_IMPLEMENTED, + "The 'mem-prof' feature is disabled", + )) +}