diff --git a/Cargo.lock b/Cargo.lock index 1499c9721f..2871750516 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2100,6 +2100,7 @@ dependencies = [ "common-error", "common-grpc", "common-macro", + "common-mem-prof", "common-meta", "common-options", "common-procedure", @@ -3799,6 +3800,7 @@ dependencies = [ "common-grpc", "common-macro", "common-meta", + "common-options", "common-procedure", "common-query", "common-recordbatch", @@ -4680,6 +4682,7 @@ dependencies = [ "common-grpc", "common-macro", "common-meta", + "common-options", "common-query", "common-recordbatch", "common-runtime", diff --git a/config/config.md b/config/config.md index 5290e0d179..23c166aecd 100644 --- a/config/config.md +++ b/config/config.md @@ -207,6 +207,8 @@ | `export_metrics.remote_write.headers` | InlineTable | -- | HTTP headers of Prometheus remote-write carry. | | `tracing` | -- | -- | The tracing options. Only effect when compiled with `tokio-console` feature. | | `tracing.tokio_console_addr` | String | Unset | The tokio console address. | +| `memory` | -- | -- | The memory options. | +| `memory.enable_heap_profiling` | Bool | `true` | Whether to enable heap profiling activation during startup.
When enabled, heap profiling will be activated if the `MALLOC_CONF` environment variable
is set to "prof:true,prof_active:false". The official image adds this env variable.
Default is true. | ## Distributed Mode @@ -311,6 +313,8 @@ | `export_metrics.remote_write.headers` | InlineTable | -- | HTTP headers of Prometheus remote-write carry. | | `tracing` | -- | -- | The tracing options. Only effect when compiled with `tokio-console` feature. | | `tracing.tokio_console_addr` | String | Unset | The tokio console address. | +| `memory` | -- | -- | The memory options. | +| `memory.enable_heap_profiling` | Bool | `true` | Whether to enable heap profiling activation during startup.
When enabled, heap profiling will be activated if the `MALLOC_CONF` environment variable
is set to "prof:true,prof_active:false". The official image adds this env variable.
Default is true. | ### Metasrv @@ -389,6 +393,8 @@ | `export_metrics.remote_write.headers` | InlineTable | -- | HTTP headers of Prometheus remote-write carry. | | `tracing` | -- | -- | The tracing options. Only effect when compiled with `tokio-console` feature. | | `tracing.tokio_console_addr` | String | Unset | The tokio console address. | +| `memory` | -- | -- | The memory options. | +| `memory.enable_heap_profiling` | Bool | `true` | Whether to enable heap profiling activation during startup.
When enabled, heap profiling will be activated if the `MALLOC_CONF` environment variable
is set to "prof:true,prof_active:false". The official image adds this env variable.
Default is true. | ### Datanode @@ -554,6 +560,8 @@ | `export_metrics.remote_write.headers` | InlineTable | -- | HTTP headers of Prometheus remote-write carry. | | `tracing` | -- | -- | The tracing options. Only effect when compiled with `tokio-console` feature. | | `tracing.tokio_console_addr` | String | Unset | The tokio console address. | +| `memory` | -- | -- | The memory options. | +| `memory.enable_heap_profiling` | Bool | `true` | Whether to enable heap profiling activation during startup.
When enabled, heap profiling will be activated if the `MALLOC_CONF` environment variable
is set to "prof:true,prof_active:false". The official image adds this env variable.
Default is true. | ### Flownode @@ -611,3 +619,5 @@ | `tracing.tokio_console_addr` | String | Unset | The tokio console address. | | `query` | -- | -- | -- | | `query.parallelism` | Integer | `1` | Parallelism of the query engine for query sent by flownode.
Default to 1, so it won't use too much cpu or memory | +| `memory` | -- | -- | The memory options. | +| `memory.enable_heap_profiling` | Bool | `true` | Whether to enable heap profiling activation during startup.
When enabled, heap profiling will be activated if the `MALLOC_CONF` environment variable
is set to "prof:true,prof_active:false". The official image adds this env variable.
Default is true. | diff --git a/config/datanode.example.toml b/config/datanode.example.toml index d32c1c0766..0d865d4034 100644 --- a/config/datanode.example.toml +++ b/config/datanode.example.toml @@ -669,3 +669,11 @@ headers = { } ## The tokio console address. ## @toml2docs:none-default #+ tokio_console_addr = "127.0.0.1" + +## The memory options. +[memory] +## Whether to enable heap profiling activation during startup. +## When enabled, heap profiling will be activated if the `MALLOC_CONF` environment variable +## is set to "prof:true,prof_active:false". The official image adds this env variable. +## Default is true. +enable_heap_profiling = true diff --git a/config/flownode.example.toml b/config/flownode.example.toml index cc9dd90705..a1d40f514b 100644 --- a/config/flownode.example.toml +++ b/config/flownode.example.toml @@ -136,3 +136,11 @@ default_ratio = 1.0 ## Parallelism of the query engine for query sent by flownode. ## Default to 1, so it won't use too much cpu or memory parallelism = 1 + +## The memory options. +[memory] +## Whether to enable heap profiling activation during startup. +## When enabled, heap profiling will be activated if the `MALLOC_CONF` environment variable +## is set to "prof:true,prof_active:false". The official image adds this env variable. +## Default is true. +enable_heap_profiling = true diff --git a/config/frontend.example.toml b/config/frontend.example.toml index 55ab09b9cc..e71f5248fa 100644 --- a/config/frontend.example.toml +++ b/config/frontend.example.toml @@ -280,3 +280,11 @@ headers = { } ## The tokio console address. ## @toml2docs:none-default #+ tokio_console_addr = "127.0.0.1" + +## The memory options. +[memory] +## Whether to enable heap profiling activation during startup. +## When enabled, heap profiling will be activated if the `MALLOC_CONF` environment variable +## is set to "prof:true,prof_active:false". The official image adds this env variable. +## Default is true. +enable_heap_profiling = true diff --git a/config/metasrv.example.toml b/config/metasrv.example.toml index 9da28b2092..03b74f3953 100644 --- a/config/metasrv.example.toml +++ b/config/metasrv.example.toml @@ -265,3 +265,11 @@ headers = { } ## The tokio console address. ## @toml2docs:none-default #+ tokio_console_addr = "127.0.0.1" + +## The memory options. +[memory] +## Whether to enable heap profiling activation during startup. +## When enabled, heap profiling will be activated if the `MALLOC_CONF` environment variable +## is set to "prof:true,prof_active:false". The official image adds this env variable. +## Default is true. +enable_heap_profiling = true diff --git a/config/standalone.example.toml b/config/standalone.example.toml index 42bc7c913e..22913dbea2 100644 --- a/config/standalone.example.toml +++ b/config/standalone.example.toml @@ -783,3 +783,11 @@ headers = { } ## The tokio console address. ## @toml2docs:none-default #+ tokio_console_addr = "127.0.0.1" + +## The memory options. +[memory] +## Whether to enable heap profiling activation during startup. +## When enabled, heap profiling will be activated if the `MALLOC_CONF` environment variable +## is set to "prof:true,prof_active:false". The official image adds this env variable. +## Default is true. +enable_heap_profiling = true diff --git a/docker/buildx/centos/Dockerfile b/docker/buildx/centos/Dockerfile index 9e7b35b392..b7e822fac6 100644 --- a/docker/buildx/centos/Dockerfile +++ b/docker/buildx/centos/Dockerfile @@ -47,4 +47,6 @@ WORKDIR /greptime COPY --from=builder /out/target/${OUTPUT_DIR}/greptime /greptime/bin/ ENV PATH /greptime/bin/:$PATH +ENV MALLOC_CONF="prof:true,prof_active:false" + ENTRYPOINT ["greptime"] diff --git a/docker/buildx/ubuntu/Dockerfile b/docker/buildx/ubuntu/Dockerfile index 86bf9e2669..6306e04688 100644 --- a/docker/buildx/ubuntu/Dockerfile +++ b/docker/buildx/ubuntu/Dockerfile @@ -47,4 +47,6 @@ WORKDIR /greptime COPY --from=builder /out/target/${OUTPUT_DIR}/greptime /greptime/bin/ ENV PATH /greptime/bin/:$PATH +ENV MALLOC_CONF="prof:true,prof_active:false" + ENTRYPOINT ["greptime"] diff --git a/docker/ci/centos/Dockerfile b/docker/ci/centos/Dockerfile index 14f3e793a8..480f2196b2 100644 --- a/docker/ci/centos/Dockerfile +++ b/docker/ci/centos/Dockerfile @@ -15,4 +15,6 @@ ADD $TARGETARCH/greptime /greptime/bin/ ENV PATH /greptime/bin/:$PATH +ENV MALLOC_CONF="prof:true,prof_active:false" + ENTRYPOINT ["greptime"] diff --git a/docker/ci/ubuntu/Dockerfile b/docker/ci/ubuntu/Dockerfile index b543e35778..046fd62972 100644 --- a/docker/ci/ubuntu/Dockerfile +++ b/docker/ci/ubuntu/Dockerfile @@ -18,4 +18,6 @@ ENV PATH /greptime/bin/:$PATH ENV TARGET_BIN=$TARGET_BIN +ENV MALLOC_CONF="prof:true,prof_active:false" + ENTRYPOINT ["sh", "-c", "exec $TARGET_BIN \"$@\"", "--"] diff --git a/docs/how-to/how-to-profile-memory.md b/docs/how-to/how-to-profile-memory.md index 85ba803af4..6c0bea6cbc 100644 --- a/docs/how-to/how-to-profile-memory.md +++ b/docs/how-to/how-to-profile-memory.md @@ -30,6 +30,23 @@ curl https://raw.githubusercontent.com/brendangregg/FlameGraph/master/flamegraph ## Profiling +### Configuration + +You can control heap profiling activation through configuration. Add the following to your configuration file: + +```toml +[memory] +# Whether to enable heap profiling activation during startup. +# When enabled, heap profiling will be activated if the `MALLOC_CONF` environment variable +# is set to "prof:true,prof_active:false". The official image adds this env variable. +# Default is true. +enable_heap_profiling = true +``` + +By default, if you set `MALLOC_CONF=prof:true,prof_active:false`, the database will enable profiling during startup. You can disable this behavior by setting `enable_heap_profiling = false` in the configuration. + +### Starting with environment variables + Start GreptimeDB instance with environment variables: ```bash @@ -40,6 +57,23 @@ MALLOC_CONF=prof:true ./target/debug/greptime standalone start _RJEM_MALLOC_CONF=prof:true ./target/debug/greptime standalone start ``` +### Memory profiling control + +You can control heap profiling activation using the new HTTP APIs: + +```bash +# Check current profiling status +curl -X GET localhost:4000/debug/prof/mem/status + +# Activate heap profiling (if not already active) +curl -X POST localhost:4000/debug/prof/mem/activate + +# Deactivate heap profiling +curl -X POST localhost:4000/debug/prof/mem/deactivate +``` + +### Dump memory profiling data + Dump memory profiling data through HTTP API: ```bash diff --git a/src/cmd/Cargo.toml b/src/cmd/Cargo.toml index bcfd2a8bcd..bcdf5faf3f 100644 --- a/src/cmd/Cargo.toml +++ b/src/cmd/Cargo.toml @@ -38,6 +38,7 @@ common-config.workspace = true common-error.workspace = true common-grpc.workspace = true common-macro.workspace = true +common-mem-prof.workspace = true common-meta.workspace = true common-options.workspace = true common-procedure.workspace = true diff --git a/src/cmd/src/datanode/builder.rs b/src/cmd/src/datanode/builder.rs index 672f61eac0..e4a5b69864 100644 --- a/src/cmd/src/datanode/builder.rs +++ b/src/cmd/src/datanode/builder.rs @@ -28,7 +28,7 @@ use tracing_appender::non_blocking::WorkerGuard; use crate::datanode::{DatanodeOptions, Instance, APP_NAME}; use crate::error::{MetaClientInitSnafu, MissingConfigSnafu, Result, StartDatanodeSnafu}; -use crate::{create_resource_limit_metrics, log_versions}; +use crate::{create_resource_limit_metrics, log_versions, maybe_activate_heap_profile}; /// Builder for Datanode instance. pub struct InstanceBuilder { @@ -68,6 +68,7 @@ impl InstanceBuilder { ); log_versions(verbose_version(), short_version(), APP_NAME); + maybe_activate_heap_profile(&dn_opts.memory); create_resource_limit_metrics(APP_NAME); plugins::setup_datanode_plugins(plugins, &opts.plugins, dn_opts) diff --git a/src/cmd/src/flownode.rs b/src/cmd/src/flownode.rs index 1a071e10a3..55ce5aae0c 100644 --- a/src/cmd/src/flownode.rs +++ b/src/cmd/src/flownode.rs @@ -46,7 +46,7 @@ use crate::error::{ MissingConfigSnafu, Result, ShutdownFlownodeSnafu, StartFlownodeSnafu, }; use crate::options::{GlobalOptions, GreptimeOptions}; -use crate::{create_resource_limit_metrics, log_versions, App}; +use crate::{create_resource_limit_metrics, log_versions, maybe_activate_heap_profile, App}; pub const APP_NAME: &str = "greptime-flownode"; @@ -280,6 +280,7 @@ impl StartCommand { ); log_versions(verbose_version(), short_version(), APP_NAME); + maybe_activate_heap_profile(&opts.component.memory); create_resource_limit_metrics(APP_NAME); info!("Flownode start command: {:#?}", self); diff --git a/src/cmd/src/frontend.rs b/src/cmd/src/frontend.rs index c7dd1a2eb2..2c85f6a0bf 100644 --- a/src/cmd/src/frontend.rs +++ b/src/cmd/src/frontend.rs @@ -47,7 +47,7 @@ use tracing_appender::non_blocking::WorkerGuard; use crate::error::{self, Result}; use crate::options::{GlobalOptions, GreptimeOptions}; -use crate::{create_resource_limit_metrics, log_versions, App}; +use crate::{create_resource_limit_metrics, log_versions, maybe_activate_heap_profile, App}; type FrontendOptions = GreptimeOptions; @@ -283,6 +283,7 @@ impl StartCommand { ); log_versions(verbose_version(), short_version(), APP_NAME); + maybe_activate_heap_profile(&opts.component.memory); create_resource_limit_metrics(APP_NAME); info!("Frontend start command: {:#?}", self); diff --git a/src/cmd/src/lib.rs b/src/cmd/src/lib.rs index 1729135adc..6d63e21276 100644 --- a/src/cmd/src/lib.rs +++ b/src/cmd/src/lib.rs @@ -15,7 +15,10 @@ #![feature(assert_matches, let_chains)] use async_trait::async_trait; -use common_telemetry::{error, info}; +use common_error::ext::ErrorExt; +use common_error::status_code::StatusCode; +use common_mem_prof::activate_heap_profile; +use common_telemetry::{error, info, warn}; use stat::{get_cpu_limit, get_memory_limit}; use crate::error::Result; @@ -145,3 +148,20 @@ fn log_env_flags() { info!("argument: {}", argument); } } + +pub fn maybe_activate_heap_profile(memory_options: &common_options::memory::MemoryOptions) { + if memory_options.enable_heap_profiling { + match activate_heap_profile() { + Ok(()) => { + info!("Heap profile is active"); + } + Err(err) => { + if err.status_code() == StatusCode::Unsupported { + info!("Heap profile is not supported"); + } else { + warn!(err; "Failed to activate heap profile"); + } + } + } + } +} diff --git a/src/cmd/src/metasrv.rs b/src/cmd/src/metasrv.rs index c0714b82ea..8e59559bf1 100644 --- a/src/cmd/src/metasrv.rs +++ b/src/cmd/src/metasrv.rs @@ -30,7 +30,7 @@ use tracing_appender::non_blocking::WorkerGuard; use crate::error::{self, LoadLayeredConfigSnafu, Result, StartMetaServerSnafu}; use crate::options::{GlobalOptions, GreptimeOptions}; -use crate::{create_resource_limit_metrics, log_versions, App}; +use crate::{create_resource_limit_metrics, log_versions, maybe_activate_heap_profile, App}; type MetasrvOptions = GreptimeOptions; @@ -325,6 +325,7 @@ impl StartCommand { ); log_versions(verbose_version(), short_version(), APP_NAME); + maybe_activate_heap_profile(&opts.component.memory); create_resource_limit_metrics(APP_NAME); info!("Metasrv start command: {:#?}", self); diff --git a/src/cmd/src/standalone.rs b/src/cmd/src/standalone.rs index 655bbd6661..2063ba83df 100644 --- a/src/cmd/src/standalone.rs +++ b/src/cmd/src/standalone.rs @@ -45,6 +45,7 @@ use common_meta::region_keeper::MemoryRegionKeeper; use common_meta::region_registry::LeaderRegionRegistry; use common_meta::sequence::SequenceBuilder; use common_meta::wal_options_allocator::{build_wal_options_allocator, WalOptionsAllocatorRef}; +use common_options::memory::MemoryOptions; use common_procedure::{ProcedureInfo, ProcedureManagerRef}; use common_telemetry::info; use common_telemetry::logging::{ @@ -83,7 +84,7 @@ use tracing_appender::non_blocking::WorkerGuard; use crate::error::{Result, StartFlownodeSnafu}; use crate::options::{GlobalOptions, GreptimeOptions}; -use crate::{create_resource_limit_metrics, error, log_versions, App}; +use crate::{create_resource_limit_metrics, error, log_versions, maybe_activate_heap_profile, App}; pub const APP_NAME: &str = "greptime-standalone"; @@ -157,6 +158,7 @@ pub struct StandaloneOptions { pub max_in_flight_write_bytes: Option, pub slow_query: Option, pub query: QueryOptions, + pub memory: MemoryOptions, } impl Default for StandaloneOptions { @@ -190,6 +192,7 @@ impl Default for StandaloneOptions { max_in_flight_write_bytes: None, slow_query: Some(SlowQueryOptions::default()), query: QueryOptions::default(), + memory: MemoryOptions::default(), } } } @@ -486,6 +489,7 @@ impl StartCommand { ); log_versions(verbose_version(), short_version(), APP_NAME); + maybe_activate_heap_profile(&opts.component.memory); create_resource_limit_metrics(APP_NAME); info!("Standalone start command: {:#?}", self); diff --git a/src/cmd/tests/load_config_test.rs b/src/cmd/tests/load_config_test.rs index 2da84a71a3..e79ba915d4 100644 --- a/src/cmd/tests/load_config_test.rs +++ b/src/cmd/tests/load_config_test.rs @@ -245,6 +245,7 @@ fn test_load_flownode_example_config() { ..Default::default() }, user_provider: None, + memory: Default::default(), }, ..Default::default() }; diff --git a/src/common/mem-prof/src/jemalloc.rs b/src/common/mem-prof/src/jemalloc.rs index 65356297ef..cabcc81127 100644 --- a/src/common/mem-prof/src/jemalloc.rs +++ b/src/common/mem-prof/src/jemalloc.rs @@ -19,8 +19,8 @@ use std::io::BufReader; use std::path::PathBuf; use error::{ - BuildTempPathSnafu, DumpProfileDataSnafu, OpenTempFileSnafu, ProfilingNotEnabledSnafu, - ReadOptProfSnafu, + ActivateProfSnafu, BuildTempPathSnafu, DeactivateProfSnafu, DumpProfileDataSnafu, + OpenTempFileSnafu, ProfilingNotEnabledSnafu, ReadOptProfSnafu, ReadProfActiveSnafu, }; use jemalloc_pprof_mappings::MAPPINGS; use jemalloc_pprof_utils::{parse_jeheap, FlamegraphOptions, StackProfile}; @@ -31,6 +31,7 @@ use crate::error::{FlamegraphSnafu, ParseJeHeapSnafu, Result}; const PROF_DUMP: &[u8] = b"prof.dump\0"; const OPT_PROF: &[u8] = b"opt.prof\0"; +const PROF_ACTIVE: &[u8] = b"prof.active\0"; pub async fn dump_profile() -> Result> { ensure!(is_prof_enabled()?, ProfilingNotEnabledSnafu); @@ -93,6 +94,27 @@ pub async fn dump_flamegraph() -> Result> { let flamegraph = profile.to_flamegraph(&mut opts).context(FlamegraphSnafu)?; Ok(flamegraph) } + +pub fn activate_heap_profile() -> Result<()> { + ensure!(is_prof_enabled()?, ProfilingNotEnabledSnafu); + unsafe { + tikv_jemalloc_ctl::raw::update(PROF_ACTIVE, true).context(ActivateProfSnafu)?; + } + Ok(()) +} + +pub fn deactivate_heap_profile() -> Result<()> { + ensure!(is_prof_enabled()?, ProfilingNotEnabledSnafu); + unsafe { + tikv_jemalloc_ctl::raw::update(PROF_ACTIVE, false).context(DeactivateProfSnafu)?; + } + Ok(()) +} + +pub fn is_heap_profile_active() -> Result { + unsafe { Ok(tikv_jemalloc_ctl::raw::read::(PROF_ACTIVE).context(ReadProfActiveSnafu)?) } +} + fn is_prof_enabled() -> Result { // safety: OPT_PROF variable, if present, is always a boolean value. Ok(unsafe { tikv_jemalloc_ctl::raw::read::(OPT_PROF).context(ReadOptProfSnafu)? }) diff --git a/src/common/mem-prof/src/jemalloc/error.rs b/src/common/mem-prof/src/jemalloc/error.rs index 76a93db09a..1787e97a7d 100644 --- a/src/common/mem-prof/src/jemalloc/error.rs +++ b/src/common/mem-prof/src/jemalloc/error.rs @@ -53,6 +53,24 @@ pub enum Error { #[snafu(source)] error: tikv_jemalloc_ctl::Error, }, + + #[snafu(display("Failed to activate heap profiling"))] + ActivateProf { + #[snafu(source)] + error: tikv_jemalloc_ctl::Error, + }, + + #[snafu(display("Failed to deactivate heap profiling"))] + DeactivateProf { + #[snafu(source)] + error: tikv_jemalloc_ctl::Error, + }, + + #[snafu(display("Failed to read heap profiling status"))] + ReadProfActive { + #[snafu(source)] + error: tikv_jemalloc_ctl::Error, + }, } impl ErrorExt for Error { @@ -63,6 +81,9 @@ impl ErrorExt for Error { Error::BuildTempPath { .. } => StatusCode::Internal, Error::OpenTempFile { .. } => StatusCode::StorageUnavailable, Error::DumpProfileData { .. } => StatusCode::StorageUnavailable, + Error::ActivateProf { .. } => StatusCode::Internal, + Error::DeactivateProf { .. } => StatusCode::Internal, + Error::ReadProfActive { .. } => StatusCode::Internal, } } diff --git a/src/common/mem-prof/src/lib.rs b/src/common/mem-prof/src/lib.rs index 69c5f79138..3fa6273f6e 100644 --- a/src/common/mem-prof/src/lib.rs +++ b/src/common/mem-prof/src/lib.rs @@ -17,7 +17,10 @@ pub mod error; #[cfg(not(windows))] mod jemalloc; #[cfg(not(windows))] -pub use jemalloc::{dump_flamegraph, dump_pprof, dump_profile}; +pub use jemalloc::{ + activate_heap_profile, deactivate_heap_profile, dump_flamegraph, dump_pprof, dump_profile, + is_heap_profile_active, +}; #[cfg(windows)] pub async fn dump_profile() -> error::Result> { @@ -33,3 +36,18 @@ pub async fn dump_pprof() -> error::Result> { pub async fn dump_flamegraph() -> error::Result> { error::ProfilingNotSupportedSnafu.fail() } + +#[cfg(windows)] +pub fn activate_heap_profile() -> error::Result<()> { + error::ProfilingNotSupportedSnafu.fail() +} + +#[cfg(windows)] +pub fn deactivate_heap_profile() -> error::Result<()> { + error::ProfilingNotSupportedSnafu.fail() +} + +#[cfg(windows)] +pub fn is_heap_profile_active() -> error::Result { + error::ProfilingNotSupportedSnafu.fail() +} diff --git a/src/common/options/src/lib.rs b/src/common/options/src/lib.rs index 6c77623b76..6072e816ac 100644 --- a/src/common/options/src/lib.rs +++ b/src/common/options/src/lib.rs @@ -13,3 +13,4 @@ // limitations under the License. pub mod datanode; +pub mod memory; diff --git a/src/common/options/src/memory.rs b/src/common/options/src/memory.rs new file mode 100644 index 0000000000..885f9fde6b --- /dev/null +++ b/src/common/options/src/memory.rs @@ -0,0 +1,33 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use serde::{Deserialize, Serialize}; + +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] +#[serde(default)] +pub struct MemoryOptions { + /// Whether to enable heap profiling activation. + /// When enabled, heap profiling will be activated if the `MALLOC_CONF` environment variable + /// is set to "prof:true,prof_active:false". The official image adds this env variable. + /// Default is true. + pub enable_heap_profiling: bool, +} + +impl Default for MemoryOptions { + fn default() -> Self { + Self { + enable_heap_profiling: true, + } + } +} diff --git a/src/datanode/Cargo.toml b/src/datanode/Cargo.toml index 3b14f93b5f..6d21a428cf 100644 --- a/src/datanode/Cargo.toml +++ b/src/datanode/Cargo.toml @@ -26,6 +26,7 @@ common-greptimedb-telemetry.workspace = true common-grpc.workspace = true common-macro.workspace = true common-meta.workspace = true +common-options.workspace = true common-procedure.workspace = true common-query.workspace = true common-recordbatch.workspace = true diff --git a/src/datanode/src/config.rs b/src/datanode/src/config.rs index ba8eb0c7ba..5f7a0b8f85 100644 --- a/src/datanode/src/config.rs +++ b/src/datanode/src/config.rs @@ -16,6 +16,7 @@ use common_base::readable_size::ReadableSize; use common_config::{Configurable, DEFAULT_DATA_HOME}; +use common_options::memory::MemoryOptions; pub use common_procedure::options::ProcedureConfig; use common_telemetry::logging::{LoggingOptions, TracingOptions}; use common_wal::config::DatanodeWalConfig; @@ -85,6 +86,7 @@ pub struct DatanodeOptions { pub export_metrics: ExportMetricsOption, pub tracing: TracingOptions, pub query: QueryOptions, + pub memory: MemoryOptions, /// Deprecated options, please use the new options instead. #[deprecated(note = "Please use `grpc.addr` instead.")] @@ -131,6 +133,7 @@ impl Default for DatanodeOptions { export_metrics: ExportMetricsOption::default(), tracing: TracingOptions::default(), query: QueryOptions::default(), + memory: MemoryOptions::default(), // Deprecated options rpc_addr: None, diff --git a/src/flow/Cargo.toml b/src/flow/Cargo.toml index 7d22f45b6c..1aae903453 100644 --- a/src/flow/Cargo.toml +++ b/src/flow/Cargo.toml @@ -28,6 +28,7 @@ common-function.workspace = true common-grpc.workspace = true common-macro.workspace = true common-meta.workspace = true +common-options.workspace = true common-query.workspace = true common-recordbatch.workspace = true common-runtime.workspace = true diff --git a/src/flow/src/adapter.rs b/src/flow/src/adapter.rs index f7ff942a0e..ff26387986 100644 --- a/src/flow/src/adapter.rs +++ b/src/flow/src/adapter.rs @@ -24,6 +24,7 @@ use api::v1::{RowDeleteRequest, RowDeleteRequests, RowInsertRequest, RowInsertRe use common_config::Configurable; use common_error::ext::BoxedError; use common_meta::key::TableMetadataManagerRef; +use common_options::memory::MemoryOptions; use common_runtime::JoinHandle; use common_telemetry::logging::{LoggingOptions, TracingOptions}; use common_telemetry::{debug, info, trace}; @@ -111,6 +112,7 @@ pub struct FlownodeOptions { pub heartbeat: HeartbeatOptions, pub query: QueryOptions, pub user_provider: Option, + pub memory: MemoryOptions, } impl Default for FlownodeOptions { @@ -131,6 +133,7 @@ impl Default for FlownodeOptions { allow_query_fallback: false, }, user_provider: None, + memory: MemoryOptions::default(), } } } diff --git a/src/frontend/src/frontend.rs b/src/frontend/src/frontend.rs index d92bd5737a..e4051965be 100644 --- a/src/frontend/src/frontend.rs +++ b/src/frontend/src/frontend.rs @@ -17,6 +17,7 @@ use std::sync::Arc; use common_base::readable_size::ReadableSize; use common_config::config::Configurable; use common_options::datanode::DatanodeClientOptions; +use common_options::memory::MemoryOptions; use common_telemetry::logging::{LoggingOptions, SlowQueryOptions, TracingOptions}; use meta_client::MetaClientOptions; use query::options::QueryOptions; @@ -62,6 +63,7 @@ pub struct FrontendOptions { pub query: QueryOptions, pub max_in_flight_write_bytes: Option, pub slow_query: Option, + pub memory: MemoryOptions, } impl Default for FrontendOptions { @@ -88,6 +90,7 @@ impl Default for FrontendOptions { query: QueryOptions::default(), max_in_flight_write_bytes: None, slow_query: Some(SlowQueryOptions::default()), + memory: MemoryOptions::default(), } } } diff --git a/src/meta-srv/src/metasrv.rs b/src/meta-srv/src/metasrv.rs index 6b6c6b0a8c..ff025a0cc7 100644 --- a/src/meta-srv/src/metasrv.rs +++ b/src/meta-srv/src/metasrv.rs @@ -40,6 +40,7 @@ use common_meta::region_registry::LeaderRegionRegistryRef; use common_meta::sequence::SequenceRef; use common_meta::wal_options_allocator::WalOptionsAllocatorRef; use common_options::datanode::DatanodeClientOptions; +use common_options::memory::MemoryOptions; use common_procedure::options::ProcedureConfig; use common_procedure::ProcedureManagerRef; use common_telemetry::logging::{LoggingOptions, TracingOptions}; @@ -160,6 +161,8 @@ pub struct MetasrvOptions { pub flush_stats_factor: usize, /// The tracing options. pub tracing: TracingOptions, + /// The memory options. + pub memory: MemoryOptions, /// The datastore for kv metadata. pub backend: BackendImpl, #[cfg(any(feature = "pg_kvbackend", feature = "mysql_kvbackend"))] @@ -251,6 +254,7 @@ impl Default for MetasrvOptions { max_txn_ops: 128, flush_stats_factor: 3, tracing: TracingOptions::default(), + memory: MemoryOptions::default(), backend: BackendImpl::EtcdStore, #[cfg(any(feature = "pg_kvbackend", feature = "mysql_kvbackend"))] meta_table_name: common_meta::kv_backend::DEFAULT_META_TABLE_NAME.to_string(), diff --git a/src/servers/src/http.rs b/src/servers/src/http.rs index 1465c900cb..74a375139a 100644 --- a/src/servers/src/http.rs +++ b/src/servers/src/http.rs @@ -887,7 +887,19 @@ impl HttpServer { "/prof", Router::new() .route("/cpu", routing::post(pprof::pprof_handler)) - .route("/mem", routing::post(mem_prof::mem_prof_handler)), + .route("/mem", routing::post(mem_prof::mem_prof_handler)) + .route( + "/mem/activate", + routing::post(mem_prof::activate_heap_prof_handler), + ) + .route( + "/mem/deactivate", + routing::post(mem_prof::deactivate_heap_prof_handler), + ) + .route( + "/mem/status", + routing::get(mem_prof::heap_prof_status_handler), + ), ), )) } diff --git a/src/servers/src/http/mem_prof.rs b/src/servers/src/http/mem_prof.rs index 342136eecd..92995fd2de 100644 --- a/src/servers/src/http/mem_prof.rs +++ b/src/servers/src/http/mem_prof.rs @@ -56,6 +56,30 @@ pub async fn mem_prof_handler( Ok((StatusCode::OK, dump)) } +#[cfg(feature = "mem-prof")] +#[axum_macros::debug_handler] +pub async fn activate_heap_prof_handler() -> crate::error::Result { + use snafu::ResultExt; + + use crate::error::DumpProfileDataSnafu; + + common_mem_prof::activate_heap_profile().context(DumpProfileDataSnafu)?; + + Ok((StatusCode::OK, "Heap profiling activated")) +} + +#[cfg(feature = "mem-prof")] +#[axum_macros::debug_handler] +pub async fn deactivate_heap_prof_handler() -> crate::error::Result { + use snafu::ResultExt; + + use crate::error::DumpProfileDataSnafu; + + common_mem_prof::deactivate_heap_profile().context(DumpProfileDataSnafu)?; + + Ok((StatusCode::OK, "Heap profiling deactivated")) +} + #[cfg(not(feature = "mem-prof"))] #[axum_macros::debug_handler] pub async fn mem_prof_handler() -> crate::error::Result { @@ -64,3 +88,42 @@ pub async fn mem_prof_handler() -> crate::error::Result { "The 'mem-prof' feature is disabled", )) } + +#[cfg(not(feature = "mem-prof"))] +#[axum_macros::debug_handler] +pub async fn activate_heap_prof_handler() -> crate::error::Result { + Ok(( + StatusCode::NOT_IMPLEMENTED, + "The 'mem-prof' feature is disabled", + )) +} + +#[cfg(feature = "mem-prof")] +#[axum_macros::debug_handler] +pub async fn heap_prof_status_handler() -> crate::error::Result { + use snafu::ResultExt; + + use crate::error::DumpProfileDataSnafu; + + let is_active = common_mem_prof::is_heap_profile_active().context(DumpProfileDataSnafu)?; + + Ok((StatusCode::OK, format!("{{\"active\": {}}}", is_active))) +} + +#[cfg(not(feature = "mem-prof"))] +#[axum_macros::debug_handler] +pub async fn deactivate_heap_prof_handler() -> crate::error::Result { + Ok(( + StatusCode::NOT_IMPLEMENTED, + "The 'mem-prof' feature is disabled", + )) +} + +#[cfg(not(feature = "mem-prof"))] +#[axum_macros::debug_handler] +pub async fn heap_prof_status_handler() -> crate::error::Result { + Ok(( + StatusCode::NOT_IMPLEMENTED, + "The 'mem-prof' feature is disabled", + )) +} diff --git a/tests-integration/tests/http.rs b/tests-integration/tests/http.rs index cdeb10cf5f..fc9737c388 100644 --- a/tests-integration/tests/http.rs +++ b/tests-integration/tests/http.rs @@ -1221,6 +1221,9 @@ ttl = "30d" [query] parallelism = 0 allow_query_fallback = false + +[memory] +enable_heap_profiling = true "#, ) .trim()