feat: memory profiling (#1124)

* feat: use jemalloc as default allocator

* feat: add feature for mem-prof

* feat: add errors

* make common-mem-prof optional dep

* fix: toml format

* doc: add profile doc

* fix: typo
This commit is contained in:
Lei, HUANG
2023-03-07 17:12:51 +08:00
committed by GitHub
parent bd98a26cca
commit a4c01f4a3a
12 changed files with 331 additions and 8 deletions

56
Cargo.lock generated
View File

@@ -1419,6 +1419,8 @@ dependencies = [
"substrait 0.1.0",
"tempdir",
"tempfile",
"tikv-jemalloc-ctl",
"tikv-jemallocator",
"tokio",
"toml",
]
@@ -1570,6 +1572,19 @@ dependencies = [
"table",
]
[[package]]
name = "common-mem-prof"
version = "0.1.0"
dependencies = [
"common-error",
"snafu",
"tempfile",
"tikv-jemalloc-ctl",
"tikv-jemalloc-sys",
"tikv-jemallocator",
"tokio",
]
[[package]]
name = "common-procedure"
version = "0.1.0"
@@ -6906,6 +6921,7 @@ dependencies = [
"common-error",
"common-grpc",
"common-grpc-expr",
"common-mem-prof",
"common-query",
"common-recordbatch",
"common-runtime",
@@ -7665,16 +7681,15 @@ dependencies = [
[[package]]
name = "tempfile"
version = "3.3.0"
version = "3.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5cdb1ef4eaeeaddc8fbd371e5017057064af0911902ef36b39801f67cc6d79e4"
checksum = "af18f7ae1acd354b992402e9ec5864359d693cd8a79dcbef59f76891701c1e95"
dependencies = [
"cfg-if 1.0.0",
"fastrand",
"libc",
"redox_syscall",
"remove_dir_all",
"winapi",
"rustix",
"windows-sys",
]
[[package]]
@@ -7835,6 +7850,37 @@ dependencies = [
"ordered-float 2.10.0",
]
[[package]]
name = "tikv-jemalloc-ctl"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e37706572f4b151dff7a0146e040804e9c26fe3a3118591112f05cf12a4216c1"
dependencies = [
"libc",
"paste",
"tikv-jemalloc-sys",
]
[[package]]
name = "tikv-jemalloc-sys"
version = "0.5.3+5.3.0-patched"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a678df20055b43e57ef8cddde41cdfda9a3c1a060b67f4c5836dfb1d78543ba8"
dependencies = [
"cc",
"libc",
]
[[package]]
name = "tikv-jemallocator"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "20612db8a13a6c06d57ec83953694185a367e16945f66565e8028d2c0bd76979"
dependencies = [
"libc",
"tikv-jemalloc-sys",
]
[[package]]
name = "time"
version = "0.1.45"

View File

@@ -12,6 +12,7 @@ members = [
"src/common/function-macro",
"src/common/grpc",
"src/common/grpc-expr",
"src/common/mem-prof",
"src/common/procedure",
"src/common/query",
"src/common/recordbatch",

View File

@@ -9,6 +9,9 @@ default-run = "greptime"
name = "greptime"
path = "src/bin/greptime.rs"
[features]
mem-prof = ["tikv-jemallocator", "tikv-jemalloc-ctl"]
[dependencies]
anymap = "1.0.0-beta.2"
catalog = { path = "../catalog" }
@@ -18,7 +21,6 @@ common-base = { path = "../common/base" }
common-error = { path = "../common/error" }
common-query = { path = "../common/query" }
common-recordbatch = { path = "../common/recordbatch" }
substrait = { path = "../common/substrait" }
common-telemetry = { path = "../common/telemetry", features = [
"deadlock_detection",
] }
@@ -36,9 +38,13 @@ serde.workspace = true
servers = { path = "../servers" }
session = { path = "../session" }
snafu.workspace = true
substrait = { path = "../common/substrait" }
tikv-jemalloc-ctl = { version = "0.5", optional = true }
tikv-jemallocator = { version = "0.5", optional = true }
tokio.workspace = true
toml = "0.5"
[dev-dependencies]
rexpect = "0.5"
serde.workspace = true

View File

@@ -87,6 +87,10 @@ fn print_version() -> &'static str {
)
}
#[cfg(feature = "mem-prof")]
#[global_allocator]
static ALLOC: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
#[tokio::main]
async fn main() -> Result<()> {
let cmd = Command::parse();

View File

@@ -0,0 +1,20 @@
[package]
name = "common-mem-prof"
version.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
common-error = { path = "../error" }
snafu.workspace = true
tempfile = "3.4"
tikv-jemalloc-ctl = { version = "0.5", features = ["use_std"] }
tikv-jemallocator = "0.5"
tokio.workspace = true
[dependencies.tikv-jemalloc-sys]
version = "0.5"
features = ["stats", "profiling", "unprefixed_malloc_on_supported_platforms"]
[profile.release]
debug = true

View File

@@ -0,0 +1,50 @@
# Profile memory usage of GreptimeDB
This crate provides an easy approach to dump memory profiling info.
## Prerequisites
### jemalloc
```bash
# for macOS
brew install jemalloc
# for Ubuntu
sudo apt install libjemalloc-dev
```
### [flamegraph](https://github.com/brendangregg/FlameGraph)
```bash
curl https://raw.githubusercontent.com/brendangregg/FlameGraph/master/flamegraph.pl > ./flamegraph.pl
```
### Build GreptimeDB with `mem-prof` feature.
```bash
cargo build --features=mem-prof
```
## Profiling
Start GreptimeDB instance with environment variables:
```bash
MALLOC_CONF=prof:true,lg_prof_interval:28 ./target/debug/greptime standalone start
```
Dump memory profiling data through HTTP API:
```bash
curl localhost:4000/v1/prof/mem > greptime.hprof
```
You can periodically dump profiling data and compare them to find the delta memory usage.
## Analyze profiling data with flamegraph
To create flamegraph according to dumped profiling data:
```bash
jeprof --svg <path_to_greptimedb_binary> --base=<baseline_prof> <profile_data> > output.svg
```

View File

@@ -0,0 +1,66 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::any::Any;
use std::path::PathBuf;
use common_error::prelude::{ErrorExt, StatusCode};
use snafu::{Backtrace, Snafu};
pub type Result<T> = std::result::Result<T, Error>;
#[derive(Debug, Snafu)]
#[snafu(visibility(pub))]
pub enum Error {
#[snafu(display("Failed to read OPT_PROF"))]
ReadOptProf { source: tikv_jemalloc_ctl::Error },
#[snafu(display("Memory profiling is not enabled"))]
ProfilingNotEnabled,
#[snafu(display("Failed to build temp file from given path: {:?}", path))]
BuildTempPath { path: PathBuf, backtrace: Backtrace },
#[snafu(display("Failed to open temp file: {}", path))]
OpenTempFile {
path: String,
source: std::io::Error,
},
#[snafu(display("Failed to dump profiling data to temp file: {:?}", path))]
DumpProfileData {
path: PathBuf,
source: tikv_jemalloc_ctl::Error,
},
}
impl ErrorExt for Error {
fn status_code(&self) -> StatusCode {
match self {
Error::ReadOptProf { .. } => StatusCode::Internal,
Error::ProfilingNotEnabled => StatusCode::InvalidArguments,
Error::BuildTempPath { .. } => StatusCode::Internal,
Error::OpenTempFile { .. } => StatusCode::StorageUnavailable,
Error::DumpProfileData { .. } => StatusCode::StorageUnavailable,
}
}
fn backtrace_opt(&self) -> Option<&Backtrace> {
snafu::ErrorCompat::backtrace(self)
}
fn as_any(&self) -> &dyn Any {
self
}
}

View File

@@ -0,0 +1,74 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
pub mod error;
use std::ffi::{c_char, CString};
use std::path::PathBuf;
use snafu::{ensure, ResultExt};
use tokio::io::AsyncReadExt;
use crate::error::{
BuildTempPathSnafu, DumpProfileDataSnafu, OpenTempFileSnafu, ProfilingNotEnabledSnafu,
ReadOptProfSnafu,
};
const PROF_DUMP: &[u8] = b"prof.dump\0";
const OPT_PROF: &[u8] = b"opt.prof\0";
pub async fn dump_profile() -> error::Result<Vec<u8>> {
ensure!(is_prof_enabled()?, ProfilingNotEnabledSnafu);
let tmp_path = tempfile::tempdir().map_err(|_| {
BuildTempPathSnafu {
path: std::env::temp_dir(),
}
.build()
})?;
let mut path_buf = PathBuf::from(tmp_path.path());
path_buf.push("greptimedb.hprof");
let path = path_buf
.to_str()
.ok_or_else(|| BuildTempPathSnafu { path: &path_buf }.build())?
.to_string();
let mut bytes = CString::new(path.as_str())
.map_err(|_| BuildTempPathSnafu { path: &path_buf }.build())?
.into_bytes_with_nul();
{
// #safety: we always expect a valid temp file path to write profiling data to.
let ptr = bytes.as_mut_ptr() as *mut c_char;
unsafe {
tikv_jemalloc_ctl::raw::write(PROF_DUMP, ptr)
.context(DumpProfileDataSnafu { path: path_buf })?
}
}
let mut f = tokio::fs::File::open(path.as_str())
.await
.context(OpenTempFileSnafu { path: &path })?;
let mut buf = vec![];
f.read_to_end(&mut buf)
.await
.context(OpenTempFileSnafu { path })?;
Ok(buf)
}
fn is_prof_enabled() -> error::Result<bool> {
// safety: OPT_PROF variable, if present, is always a boolean value.
Ok(unsafe { tikv_jemalloc_ctl::raw::read::<bool>(OPT_PROF).context(ReadOptProfSnafu)? })
}

View File

@@ -4,10 +4,13 @@ version.workspace = true
edition.workspace = true
license.workspace = true
[features]
mem-prof = ["dep:common-mem-prof"]
[dependencies]
aide = { version = "0.9", features = ["axum"] }
arrow-flight.workspace = true
api = { path = "../api" }
arrow-flight.workspace = true
async-trait = "0.1"
axum = "0.6"
axum-macros = "0.3"
@@ -20,6 +23,7 @@ common-catalog = { path = "../common/catalog" }
common-error = { path = "../common/error" }
common-grpc = { path = "../common/grpc" }
common-grpc-expr = { path = "../common/grpc-expr" }
common-mem-prof = { path = "../common/mem-prof", optional = true }
common-query = { path = "../common/query" }
common-recordbatch = { path = "../common/recordbatch" }
common-runtime = { path = "../common/runtime" }
@@ -59,13 +63,14 @@ snap = "1"
sql = { path = "../sql" }
strum = { version = "0.24", features = ["derive"] }
table = { path = "../table" }
tokio.workspace = true
tokio-rustls = "0.23"
tokio-stream = { version = "0.1", features = ["net"] }
tokio.workspace = true
tonic.workspace = true
tower = { version = "0.4", features = ["full"] }
tower-http = { version = "0.3", features = ["full"] }
[dev-dependencies]
axum-test-helper = { git = "https://github.com/sunng87/axum-test-helper.git", branch = "patch-1" }
client = { path = "../client" }

View File

@@ -256,6 +256,13 @@ pub enum Error {
#[snafu(display("Cannot find requested database: {}-{}", catalog, schema))]
DatabaseNotFound { catalog: String, schema: String },
#[cfg(feature = "mem-prof")]
#[snafu(display("Failed to dump profile data, source: {}", source))]
DumpProfileData {
#[snafu(backtrace)]
source: common_mem_prof::error::Error,
},
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -315,6 +322,8 @@ impl ErrorExt for Error {
| InvalidUtf8Value { .. } => StatusCode::InvalidAuthHeader,
DatabaseNotFound { .. } => StatusCode::DatabaseNotFound,
#[cfg(feature = "mem-prof")]
DumpProfileData { source, .. } => source.status_code(),
}
}

View File

@@ -19,6 +19,9 @@ pub mod opentsdb;
pub mod prometheus;
pub mod script;
#[cfg(feature = "mem-prof")]
pub mod mem_prof;
use std::net::SocketAddr;
use std::sync::Arc;
use std::time::Duration;
@@ -445,6 +448,15 @@ impl HttpServer {
);
}
// mem profiler
#[cfg(feature = "mem-prof")]
{
router = router.nest(
&format!("/{HTTP_API_VERSION}/prof"),
Router::new().route("/mem", routing::get(crate::http::mem_prof::mem_prof)),
);
}
router = router.route("/metrics", routing::get(handler::metrics));
router = router.route(

View File

@@ -0,0 +1,30 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use axum::http::StatusCode;
use axum::response::IntoResponse;
use snafu::ResultExt;
use crate::error::DumpProfileDataSnafu;
#[cfg(feature = "mem-prof")]
#[axum_macros::debug_handler]
pub async fn mem_prof() -> crate::error::Result<impl IntoResponse> {
Ok((
StatusCode::OK,
common_mem_prof::dump_profile()
.await
.context(DumpProfileDataSnafu)?,
))
}