mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-05-21 07:20:41 +00:00
feat: memory profiling (#1124)
* feat: use jemalloc as default allocator * feat: add feature for mem-prof * feat: add errors * make common-mem-prof optional dep * fix: toml format * doc: add profile doc * fix: typo
This commit is contained in:
56
Cargo.lock
generated
56
Cargo.lock
generated
@@ -1419,6 +1419,8 @@ dependencies = [
|
||||
"substrait 0.1.0",
|
||||
"tempdir",
|
||||
"tempfile",
|
||||
"tikv-jemalloc-ctl",
|
||||
"tikv-jemallocator",
|
||||
"tokio",
|
||||
"toml",
|
||||
]
|
||||
@@ -1570,6 +1572,19 @@ dependencies = [
|
||||
"table",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "common-mem-prof"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"common-error",
|
||||
"snafu",
|
||||
"tempfile",
|
||||
"tikv-jemalloc-ctl",
|
||||
"tikv-jemalloc-sys",
|
||||
"tikv-jemallocator",
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "common-procedure"
|
||||
version = "0.1.0"
|
||||
@@ -6906,6 +6921,7 @@ dependencies = [
|
||||
"common-error",
|
||||
"common-grpc",
|
||||
"common-grpc-expr",
|
||||
"common-mem-prof",
|
||||
"common-query",
|
||||
"common-recordbatch",
|
||||
"common-runtime",
|
||||
@@ -7665,16 +7681,15 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tempfile"
|
||||
version = "3.3.0"
|
||||
version = "3.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5cdb1ef4eaeeaddc8fbd371e5017057064af0911902ef36b39801f67cc6d79e4"
|
||||
checksum = "af18f7ae1acd354b992402e9ec5864359d693cd8a79dcbef59f76891701c1e95"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"fastrand",
|
||||
"libc",
|
||||
"redox_syscall",
|
||||
"remove_dir_all",
|
||||
"winapi",
|
||||
"rustix",
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -7835,6 +7850,37 @@ dependencies = [
|
||||
"ordered-float 2.10.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tikv-jemalloc-ctl"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e37706572f4b151dff7a0146e040804e9c26fe3a3118591112f05cf12a4216c1"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"paste",
|
||||
"tikv-jemalloc-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tikv-jemalloc-sys"
|
||||
version = "0.5.3+5.3.0-patched"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a678df20055b43e57ef8cddde41cdfda9a3c1a060b67f4c5836dfb1d78543ba8"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tikv-jemallocator"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "20612db8a13a6c06d57ec83953694185a367e16945f66565e8028d2c0bd76979"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"tikv-jemalloc-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "time"
|
||||
version = "0.1.45"
|
||||
|
||||
@@ -12,6 +12,7 @@ members = [
|
||||
"src/common/function-macro",
|
||||
"src/common/grpc",
|
||||
"src/common/grpc-expr",
|
||||
"src/common/mem-prof",
|
||||
"src/common/procedure",
|
||||
"src/common/query",
|
||||
"src/common/recordbatch",
|
||||
|
||||
@@ -9,6 +9,9 @@ default-run = "greptime"
|
||||
name = "greptime"
|
||||
path = "src/bin/greptime.rs"
|
||||
|
||||
[features]
|
||||
mem-prof = ["tikv-jemallocator", "tikv-jemalloc-ctl"]
|
||||
|
||||
[dependencies]
|
||||
anymap = "1.0.0-beta.2"
|
||||
catalog = { path = "../catalog" }
|
||||
@@ -18,7 +21,6 @@ common-base = { path = "../common/base" }
|
||||
common-error = { path = "../common/error" }
|
||||
common-query = { path = "../common/query" }
|
||||
common-recordbatch = { path = "../common/recordbatch" }
|
||||
substrait = { path = "../common/substrait" }
|
||||
common-telemetry = { path = "../common/telemetry", features = [
|
||||
"deadlock_detection",
|
||||
] }
|
||||
@@ -36,9 +38,13 @@ serde.workspace = true
|
||||
servers = { path = "../servers" }
|
||||
session = { path = "../session" }
|
||||
snafu.workspace = true
|
||||
substrait = { path = "../common/substrait" }
|
||||
tikv-jemalloc-ctl = { version = "0.5", optional = true }
|
||||
tikv-jemallocator = { version = "0.5", optional = true }
|
||||
tokio.workspace = true
|
||||
toml = "0.5"
|
||||
|
||||
|
||||
[dev-dependencies]
|
||||
rexpect = "0.5"
|
||||
serde.workspace = true
|
||||
|
||||
@@ -87,6 +87,10 @@ fn print_version() -> &'static str {
|
||||
)
|
||||
}
|
||||
|
||||
#[cfg(feature = "mem-prof")]
|
||||
#[global_allocator]
|
||||
static ALLOC: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
let cmd = Command::parse();
|
||||
|
||||
20
src/common/mem-prof/Cargo.toml
Normal file
20
src/common/mem-prof/Cargo.toml
Normal file
@@ -0,0 +1,20 @@
|
||||
[package]
|
||||
name = "common-mem-prof"
|
||||
version.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
common-error = { path = "../error" }
|
||||
snafu.workspace = true
|
||||
tempfile = "3.4"
|
||||
tikv-jemalloc-ctl = { version = "0.5", features = ["use_std"] }
|
||||
tikv-jemallocator = "0.5"
|
||||
tokio.workspace = true
|
||||
|
||||
[dependencies.tikv-jemalloc-sys]
|
||||
version = "0.5"
|
||||
features = ["stats", "profiling", "unprefixed_malloc_on_supported_platforms"]
|
||||
|
||||
[profile.release]
|
||||
debug = true
|
||||
50
src/common/mem-prof/README.md
Normal file
50
src/common/mem-prof/README.md
Normal file
@@ -0,0 +1,50 @@
|
||||
# Profile memory usage of GreptimeDB
|
||||
|
||||
This crate provides an easy approach to dump memory profiling info.
|
||||
|
||||
## Prerequisites
|
||||
### jemalloc
|
||||
```bash
|
||||
# for macOS
|
||||
brew install jemalloc
|
||||
|
||||
# for Ubuntu
|
||||
sudo apt install libjemalloc-dev
|
||||
```
|
||||
|
||||
### [flamegraph](https://github.com/brendangregg/FlameGraph)
|
||||
|
||||
```bash
|
||||
curl https://raw.githubusercontent.com/brendangregg/FlameGraph/master/flamegraph.pl > ./flamegraph.pl
|
||||
```
|
||||
|
||||
### Build GreptimeDB with `mem-prof` feature.
|
||||
|
||||
```bash
|
||||
cargo build --features=mem-prof
|
||||
```
|
||||
|
||||
## Profiling
|
||||
|
||||
Start GreptimeDB instance with environment variables:
|
||||
|
||||
```bash
|
||||
MALLOC_CONF=prof:true,lg_prof_interval:28 ./target/debug/greptime standalone start
|
||||
```
|
||||
|
||||
Dump memory profiling data through HTTP API:
|
||||
|
||||
```bash
|
||||
curl localhost:4000/v1/prof/mem > greptime.hprof
|
||||
```
|
||||
|
||||
You can periodically dump profiling data and compare them to find the delta memory usage.
|
||||
|
||||
## Analyze profiling data with flamegraph
|
||||
|
||||
To create flamegraph according to dumped profiling data:
|
||||
|
||||
```bash
|
||||
jeprof --svg <path_to_greptimedb_binary> --base=<baseline_prof> <profile_data> > output.svg
|
||||
```
|
||||
|
||||
66
src/common/mem-prof/src/error.rs
Normal file
66
src/common/mem-prof/src/error.rs
Normal file
@@ -0,0 +1,66 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use common_error::prelude::{ErrorExt, StatusCode};
|
||||
use snafu::{Backtrace, Snafu};
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
#[snafu(visibility(pub))]
|
||||
pub enum Error {
|
||||
#[snafu(display("Failed to read OPT_PROF"))]
|
||||
ReadOptProf { source: tikv_jemalloc_ctl::Error },
|
||||
|
||||
#[snafu(display("Memory profiling is not enabled"))]
|
||||
ProfilingNotEnabled,
|
||||
|
||||
#[snafu(display("Failed to build temp file from given path: {:?}", path))]
|
||||
BuildTempPath { path: PathBuf, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display("Failed to open temp file: {}", path))]
|
||||
OpenTempFile {
|
||||
path: String,
|
||||
source: std::io::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to dump profiling data to temp file: {:?}", path))]
|
||||
DumpProfileData {
|
||||
path: PathBuf,
|
||||
source: tikv_jemalloc_ctl::Error,
|
||||
},
|
||||
}
|
||||
|
||||
impl ErrorExt for Error {
|
||||
fn status_code(&self) -> StatusCode {
|
||||
match self {
|
||||
Error::ReadOptProf { .. } => StatusCode::Internal,
|
||||
Error::ProfilingNotEnabled => StatusCode::InvalidArguments,
|
||||
Error::BuildTempPath { .. } => StatusCode::Internal,
|
||||
Error::OpenTempFile { .. } => StatusCode::StorageUnavailable,
|
||||
Error::DumpProfileData { .. } => StatusCode::StorageUnavailable,
|
||||
}
|
||||
}
|
||||
|
||||
fn backtrace_opt(&self) -> Option<&Backtrace> {
|
||||
snafu::ErrorCompat::backtrace(self)
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
74
src/common/mem-prof/src/lib.rs
Normal file
74
src/common/mem-prof/src/lib.rs
Normal file
@@ -0,0 +1,74 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
pub mod error;
|
||||
|
||||
use std::ffi::{c_char, CString};
|
||||
use std::path::PathBuf;
|
||||
|
||||
use snafu::{ensure, ResultExt};
|
||||
use tokio::io::AsyncReadExt;
|
||||
|
||||
use crate::error::{
|
||||
BuildTempPathSnafu, DumpProfileDataSnafu, OpenTempFileSnafu, ProfilingNotEnabledSnafu,
|
||||
ReadOptProfSnafu,
|
||||
};
|
||||
|
||||
const PROF_DUMP: &[u8] = b"prof.dump\0";
|
||||
const OPT_PROF: &[u8] = b"opt.prof\0";
|
||||
|
||||
pub async fn dump_profile() -> error::Result<Vec<u8>> {
|
||||
ensure!(is_prof_enabled()?, ProfilingNotEnabledSnafu);
|
||||
let tmp_path = tempfile::tempdir().map_err(|_| {
|
||||
BuildTempPathSnafu {
|
||||
path: std::env::temp_dir(),
|
||||
}
|
||||
.build()
|
||||
})?;
|
||||
|
||||
let mut path_buf = PathBuf::from(tmp_path.path());
|
||||
path_buf.push("greptimedb.hprof");
|
||||
|
||||
let path = path_buf
|
||||
.to_str()
|
||||
.ok_or_else(|| BuildTempPathSnafu { path: &path_buf }.build())?
|
||||
.to_string();
|
||||
|
||||
let mut bytes = CString::new(path.as_str())
|
||||
.map_err(|_| BuildTempPathSnafu { path: &path_buf }.build())?
|
||||
.into_bytes_with_nul();
|
||||
|
||||
{
|
||||
// #safety: we always expect a valid temp file path to write profiling data to.
|
||||
let ptr = bytes.as_mut_ptr() as *mut c_char;
|
||||
unsafe {
|
||||
tikv_jemalloc_ctl::raw::write(PROF_DUMP, ptr)
|
||||
.context(DumpProfileDataSnafu { path: path_buf })?
|
||||
}
|
||||
}
|
||||
|
||||
let mut f = tokio::fs::File::open(path.as_str())
|
||||
.await
|
||||
.context(OpenTempFileSnafu { path: &path })?;
|
||||
let mut buf = vec![];
|
||||
f.read_to_end(&mut buf)
|
||||
.await
|
||||
.context(OpenTempFileSnafu { path })?;
|
||||
Ok(buf)
|
||||
}
|
||||
|
||||
fn is_prof_enabled() -> error::Result<bool> {
|
||||
// safety: OPT_PROF variable, if present, is always a boolean value.
|
||||
Ok(unsafe { tikv_jemalloc_ctl::raw::read::<bool>(OPT_PROF).context(ReadOptProfSnafu)? })
|
||||
}
|
||||
@@ -4,10 +4,13 @@ version.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[features]
|
||||
mem-prof = ["dep:common-mem-prof"]
|
||||
|
||||
[dependencies]
|
||||
aide = { version = "0.9", features = ["axum"] }
|
||||
arrow-flight.workspace = true
|
||||
api = { path = "../api" }
|
||||
arrow-flight.workspace = true
|
||||
async-trait = "0.1"
|
||||
axum = "0.6"
|
||||
axum-macros = "0.3"
|
||||
@@ -20,6 +23,7 @@ common-catalog = { path = "../common/catalog" }
|
||||
common-error = { path = "../common/error" }
|
||||
common-grpc = { path = "../common/grpc" }
|
||||
common-grpc-expr = { path = "../common/grpc-expr" }
|
||||
common-mem-prof = { path = "../common/mem-prof", optional = true }
|
||||
common-query = { path = "../common/query" }
|
||||
common-recordbatch = { path = "../common/recordbatch" }
|
||||
common-runtime = { path = "../common/runtime" }
|
||||
@@ -59,13 +63,14 @@ snap = "1"
|
||||
sql = { path = "../sql" }
|
||||
strum = { version = "0.24", features = ["derive"] }
|
||||
table = { path = "../table" }
|
||||
tokio.workspace = true
|
||||
tokio-rustls = "0.23"
|
||||
tokio-stream = { version = "0.1", features = ["net"] }
|
||||
tokio.workspace = true
|
||||
tonic.workspace = true
|
||||
tower = { version = "0.4", features = ["full"] }
|
||||
tower-http = { version = "0.3", features = ["full"] }
|
||||
|
||||
|
||||
[dev-dependencies]
|
||||
axum-test-helper = { git = "https://github.com/sunng87/axum-test-helper.git", branch = "patch-1" }
|
||||
client = { path = "../client" }
|
||||
|
||||
@@ -256,6 +256,13 @@ pub enum Error {
|
||||
|
||||
#[snafu(display("Cannot find requested database: {}-{}", catalog, schema))]
|
||||
DatabaseNotFound { catalog: String, schema: String },
|
||||
|
||||
#[cfg(feature = "mem-prof")]
|
||||
#[snafu(display("Failed to dump profile data, source: {}", source))]
|
||||
DumpProfileData {
|
||||
#[snafu(backtrace)]
|
||||
source: common_mem_prof::error::Error,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -315,6 +322,8 @@ impl ErrorExt for Error {
|
||||
| InvalidUtf8Value { .. } => StatusCode::InvalidAuthHeader,
|
||||
|
||||
DatabaseNotFound { .. } => StatusCode::DatabaseNotFound,
|
||||
#[cfg(feature = "mem-prof")]
|
||||
DumpProfileData { source, .. } => source.status_code(),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -19,6 +19,9 @@ pub mod opentsdb;
|
||||
pub mod prometheus;
|
||||
pub mod script;
|
||||
|
||||
#[cfg(feature = "mem-prof")]
|
||||
pub mod mem_prof;
|
||||
|
||||
use std::net::SocketAddr;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
@@ -445,6 +448,15 @@ impl HttpServer {
|
||||
);
|
||||
}
|
||||
|
||||
// mem profiler
|
||||
#[cfg(feature = "mem-prof")]
|
||||
{
|
||||
router = router.nest(
|
||||
&format!("/{HTTP_API_VERSION}/prof"),
|
||||
Router::new().route("/mem", routing::get(crate::http::mem_prof::mem_prof)),
|
||||
);
|
||||
}
|
||||
|
||||
router = router.route("/metrics", routing::get(handler::metrics));
|
||||
|
||||
router = router.route(
|
||||
|
||||
30
src/servers/src/http/mem_prof.rs
Normal file
30
src/servers/src/http/mem_prof.rs
Normal file
@@ -0,0 +1,30 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use axum::http::StatusCode;
|
||||
use axum::response::IntoResponse;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::DumpProfileDataSnafu;
|
||||
|
||||
#[cfg(feature = "mem-prof")]
|
||||
#[axum_macros::debug_handler]
|
||||
pub async fn mem_prof() -> crate::error::Result<impl IntoResponse> {
|
||||
Ok((
|
||||
StatusCode::OK,
|
||||
common_mem_prof::dump_profile()
|
||||
.await
|
||||
.context(DumpProfileDataSnafu)?,
|
||||
))
|
||||
}
|
||||
Reference in New Issue
Block a user