feat: enable prof features by default (#4815)

* feat: enable prof by default

* docs: don't need to build with features

* feat: add common-pprof as optional dep for pprof feature

* build: remove optional

* feat: use dump_text
This commit is contained in:
Yingwen
2024-10-14 11:32:47 +08:00
committed by GitHub
parent a889ea88ca
commit 3b2ce31a19
11 changed files with 222 additions and 29 deletions

13
Cargo.lock generated
View File

@@ -2214,6 +2214,18 @@ dependencies = [
name = "common-plugins"
version = "0.9.3"
[[package]]
name = "common-pprof"
version = "0.9.3"
dependencies = [
"common-error",
"common-macro",
"pprof",
"prost 0.12.6",
"snafu 0.8.5",
"tokio",
]
[[package]]
name = "common-procedure"
version = "0.9.3"
@@ -10705,6 +10717,7 @@ dependencies = [
"common-mem-prof",
"common-meta",
"common-plugins",
"common-pprof",
"common-query",
"common-recordbatch",
"common-runtime",

View File

@@ -20,6 +20,7 @@ members = [
"src/common/mem-prof",
"src/common/meta",
"src/common/plugins",
"src/common/pprof",
"src/common/procedure",
"src/common/procedure-test",
"src/common/query",
@@ -208,6 +209,7 @@ common-macro = { path = "src/common/macro" }
common-mem-prof = { path = "src/common/mem-prof" }
common-meta = { path = "src/common/meta" }
common-plugins = { path = "src/common/plugins" }
common-pprof = { path = "src/common/pprof" }
common-procedure = { path = "src/common/procedure" }
common-procedure-test = { path = "src/common/procedure-test" }
common-query = { path = "src/common/query" }

View File

@@ -1,11 +1,5 @@
# Profiling CPU
## Build GreptimeDB with `pprof` feature
```bash
cargo build --features=pprof
```
## HTTP API
Sample at 99 Hertz, for 5 seconds, output report in [protobuf format](https://github.com/google/pprof/blob/master/proto/profile.proto).
```bash

View File

@@ -18,12 +18,6 @@ sudo apt install libjemalloc-dev
curl https://raw.githubusercontent.com/brendangregg/FlameGraph/master/flamegraph.pl > ./flamegraph.pl
```
### Build GreptimeDB with `mem-prof` feature.
```bash
cargo build --features=mem-prof
```
## Profiling
Start GreptimeDB instance with environment variables:

View File

@@ -10,7 +10,7 @@ name = "greptime"
path = "src/bin/greptime.rs"
[features]
default = ["python"]
default = ["python", "servers/pprof", "servers/mem-prof"]
tokio-console = ["common-telemetry/tokio-console"]
python = ["frontend/python"]

View File

@@ -0,0 +1,22 @@
[package]
name = "common-pprof"
version.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
common-error.workspace = true
common-macro.workspace = true
prost.workspace = true
snafu.workspace = true
tokio.workspace = true
[target.'cfg(unix)'.dependencies]
pprof = { version = "0.13", features = [
"flamegraph",
"prost-codec",
"protobuf",
] }
[lints]
workspace = true

View File

@@ -0,0 +1,99 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#[cfg(unix)]
pub mod nix;
pub mod error {
use std::any::Any;
use common_error::ext::ErrorExt;
use common_error::status_code::StatusCode;
use common_macro::stack_trace_debug;
use snafu::{Location, Snafu};
#[derive(Snafu)]
#[stack_trace_debug]
#[snafu(visibility(pub(crate)))]
pub enum Error {
#[cfg(unix)]
#[snafu(display("Pprof error"))]
Pprof {
#[snafu(source)]
error: pprof::Error,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Pprof is unsupported on this platform"))]
Unsupported {
#[snafu(implicit)]
location: Location,
},
}
pub type Result<T> = std::result::Result<T, Error>;
impl ErrorExt for Error {
fn status_code(&self) -> StatusCode {
match self {
#[cfg(unix)]
Error::Pprof { .. } => StatusCode::Unexpected,
Error::Unsupported { .. } => StatusCode::Unsupported,
}
}
fn as_any(&self) -> &dyn Any {
self
}
}
}
#[cfg(not(unix))]
pub mod dummy {
use std::time::Duration;
use crate::error::{Result, UnsupportedSnafu};
/// Dummpy CPU profiler utility.
#[derive(Debug)]
pub struct Profiling {}
impl Profiling {
/// Creates a new profiler.
pub fn new(_duration: Duration, _frequency: i32) -> Profiling {
Profiling {}
}
/// Profiles and returns a generated text.
pub async fn dump_text(&self) -> Result<String> {
UnsupportedSnafu {}.fail()
}
/// Profiles and returns a generated flamegraph.
pub async fn dump_flamegraph(&self) -> Result<Vec<u8>> {
UnsupportedSnafu {}.fail()
}
/// Profiles and returns a generated proto.
pub async fn dump_proto(&self) -> Result<Vec<u8>> {
UnsupportedSnafu {}.fail()
}
}
}
#[cfg(not(unix))]
pub use dummy::Profiling;
#[cfg(unix)]
pub use nix::Profiling;

View File

@@ -0,0 +1,78 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::time::Duration;
use pprof::protos::Message;
use snafu::ResultExt;
use crate::error::{PprofSnafu, Result};
/// CPU profiler utility.
// Inspired by https://github.com/datafuselabs/databend/blob/67f445e83cd4eceda98f6c1c114858929d564029/src/common/base/src/base/profiling.rs
#[derive(Debug)]
pub struct Profiling {
/// Sample duration.
duration: Duration,
/// Sample frequency.
frequency: i32,
}
impl Profiling {
/// Creates a new profiler.
pub fn new(duration: Duration, frequency: i32) -> Profiling {
Profiling {
duration,
frequency,
}
}
/// Profiles and returns a generated pprof report.
pub async fn report(&self) -> Result<pprof::Report> {
let guard = pprof::ProfilerGuardBuilder::default()
.frequency(self.frequency)
.blocklist(&["libc", "libgcc", "pthread", "vdso"])
.build()
.context(PprofSnafu)?;
tokio::time::sleep(self.duration).await;
guard.report().build().context(PprofSnafu)
}
/// Profiles and returns a generated text.
pub async fn dump_text(&self) -> Result<String> {
let report = self.report().await?;
let text = format!("{report:?}");
Ok(text)
}
/// Profiles and returns a generated flamegraph.
pub async fn dump_flamegraph(&self) -> Result<Vec<u8>> {
let mut body: Vec<u8> = Vec::new();
let report = self.report().await?;
report.flamegraph(&mut body).context(PprofSnafu)?;
Ok(body)
}
/// Profiles and returns a generated proto.
pub async fn dump_proto(&self) -> Result<Vec<u8>> {
let report = self.report().await?;
// Generate googles pprof format report.
let profile = report.pprof().context(PprofSnafu)?;
let body = profile.encode_to_vec();
Ok(body)
}
}

View File

@@ -7,7 +7,7 @@ license.workspace = true
[features]
dashboard = []
mem-prof = ["dep:common-mem-prof"]
pprof = ["dep:pprof"]
pprof = ["dep:common-pprof"]
testing = []
[lints]
@@ -37,6 +37,7 @@ common-macro.workspace = true
common-mem-prof = { workspace = true, optional = true }
common-meta.workspace = true
common-plugins.workspace = true
common-pprof = { workspace = true, optional = true }
common-query.workspace = true
common-recordbatch.workspace = true
common-runtime.workspace = true
@@ -75,11 +76,6 @@ pgwire = { version = "0.25.0", default-features = false, features = ["server-api
pin-project = "1.0"
pipeline.workspace = true
postgres-types = { version = "0.2", features = ["with-chrono-0_4", "with-serde_json-1"] }
pprof = { version = "0.13", features = [
"flamegraph",
"prost-codec",
"protobuf",
], optional = true }
prometheus.workspace = true
promql-parser.workspace = true
prost.workspace = true
@@ -136,7 +132,7 @@ tokio-postgres = "0.7"
tokio-postgres-rustls = "0.12"
tokio-test = "0.4"
[target.'cfg(not(windows))'.dev-dependencies]
[target.'cfg(unix)'.dev-dependencies]
pprof = { version = "0.13", features = ["criterion", "flamegraph"] }
[target.'cfg(windows)'.dependencies]

View File

@@ -424,9 +424,7 @@ pub enum Error {
#[cfg(feature = "pprof")]
#[snafu(display("Failed to dump pprof data"))]
DumpPprof {
source: crate::http::pprof::nix::Error,
},
DumpPprof { source: common_pprof::error::Error },
#[cfg(not(windows))]
#[snafu(display("Failed to update jemalloc metrics"))]

View File

@@ -12,9 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#[cfg(feature = "pprof")]
pub(crate) mod nix;
#[cfg(feature = "pprof")]
pub mod handler {
use std::num::NonZeroI32;
@@ -23,13 +20,13 @@ pub mod handler {
use axum::extract::Query;
use axum::http::StatusCode;
use axum::response::IntoResponse;
use common_pprof::Profiling;
use common_telemetry::info;
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use snafu::ResultExt;
use crate::error::{DumpPprofSnafu, Result};
use crate::http::pprof::nix::Profiling;
/// Output format.
#[derive(Debug, Serialize, Deserialize, JsonSchema)]
@@ -70,8 +67,8 @@ pub mod handler {
let body = match req.output {
Output::Proto => profiling.dump_proto().await.context(DumpPprofSnafu)?,
Output::Text => {
let report = profiling.report().await.context(DumpPprofSnafu)?;
format!("{:?}", report).into_bytes()
let report = profiling.dump_text().await.context(DumpPprofSnafu)?;
report.into_bytes()
}
Output::Flamegraph => profiling.dump_flamegraph().await.context(DumpPprofSnafu)?,
};