From b33299dc37d9269fe55bd3256b7a4a72c129b81c Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Fri, 3 Jan 2025 16:21:31 +0100 Subject: [PATCH] pageserver,safekeeper: disable heap profiling (#10268) ## Problem Since enabling continuous profiling in staging, we've seen frequent seg faults. This is suspected to be because jemalloc and pprof-rs take a stack trace at the same time, and the handlers aren't signal safe. jemalloc does this probabilistically on every allocation, regardless of whether someone is taking a heap profile, which means that any CPU profile has a chance to cause a seg fault. Touches #10225. ## Summary of changes For now, just disable heap profiles -- CPU profiles are more important, and we need to be able to take them without risking a crash. --- pageserver/src/bin/pageserver.rs | 10 ++++++---- safekeeper/src/bin/safekeeper.rs | 10 ++++++---- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/pageserver/src/bin/pageserver.rs b/pageserver/src/bin/pageserver.rs index 567a69da3b..b92ff4ebf9 100644 --- a/pageserver/src/bin/pageserver.rs +++ b/pageserver/src/bin/pageserver.rs @@ -53,10 +53,12 @@ project_build_tag!(BUILD_TAG); #[global_allocator] static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc; -/// Configure jemalloc to sample allocations for profiles every 1 MB (1 << 20). -#[allow(non_upper_case_globals)] -#[export_name = "malloc_conf"] -pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:20\0"; +// Configure jemalloc to sample allocations for profiles every 1 MB (1 << 20). +// TODO: disabled because concurrent CPU profiles cause seg faults. See: +// https://github.com/neondatabase/neon/issues/10225. +//#[allow(non_upper_case_globals)] +//#[export_name = "malloc_conf"] +//pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:20\0"; const PID_FILE_NAME: &str = "pageserver.pid"; diff --git a/safekeeper/src/bin/safekeeper.rs b/safekeeper/src/bin/safekeeper.rs index 13f6e34575..e0ba38d638 100644 --- a/safekeeper/src/bin/safekeeper.rs +++ b/safekeeper/src/bin/safekeeper.rs @@ -51,10 +51,12 @@ use utils::{ #[global_allocator] static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc; -/// Configure jemalloc to sample allocations for profiles every 1 MB (1 << 20). -#[allow(non_upper_case_globals)] -#[export_name = "malloc_conf"] -pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:20\0"; +// Configure jemalloc to sample allocations for profiles every 1 MB (1 << 20). +// TODO: disabled because concurrent CPU profiles cause seg faults. See: +// https://github.com/neondatabase/neon/issues/10225. +//#[allow(non_upper_case_globals)] +//#[export_name = "malloc_conf"] +//pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:20\0"; const PID_FILE_NAME: &str = "safekeeper.pid"; const ID_FILE_NAME: &str = "safekeeper.id";