Collect and expose I/O disk write metrics

This commit is contained in:
Kirill Bulatov
2021-09-01 18:24:42 +03:00
parent 291c2c9a1b
commit 212920e47e
4 changed files with 74 additions and 10 deletions

9
Cargo.lock generated
View File

@@ -903,9 +903,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
[[package]]
name = "libc"
version = "0.2.98"
version = "0.2.101"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "320cfe77175da3a483efed4bc0adc1968ca050b098ce4f2f1c13a56626128790"
checksum = "3cb00336871be5ed2c8ed44b60ae9959dc5b9f08539422ed43f09e34ecaeba21"
[[package]]
name = "libloading"
@@ -1536,9 +1536,9 @@ dependencies = [
[[package]]
name = "regex"
version = "1.5.4"
version = "1.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461"
checksum = "2a26af418b574bd56588335b3a3659a65725d4e636eb1016c2f9e3b38c7cc759"
dependencies = [
"aho-corasick",
"memchr",
@@ -2619,6 +2619,7 @@ name = "zenith_metrics"
version = "0.1.0"
dependencies = [
"lazy_static",
"libc",
"prometheus",
]

View File

@@ -3,10 +3,7 @@ name = "zenith_metrics"
version = "0.1.0"
edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
prometheus = "0.12"
[dev-dependencies]
lazy_static = "1.4.0"
libc = "0.2"
lazy_static = "1.4"

View File

@@ -2,7 +2,7 @@
//! make sure that we use the same dep version everywhere.
//! Otherwise, we might not see all metrics registered via
//! a default registry.
pub use prometheus::gather;
use lazy_static::lazy_static;
pub use prometheus::{exponential_buckets, linear_buckets};
pub use prometheus::{register_histogram, Histogram};
pub use prometheus::{register_histogram_vec, HistogramVec};
@@ -13,4 +13,69 @@ pub use prometheus::{register_int_gauge_vec, IntGaugeVec};
pub use prometheus::{Encoder, TextEncoder};
mod wrappers;
use libc::{c_long, getrusage, rusage, suseconds_t, time_t, timeval, RUSAGE_SELF};
pub use wrappers::{CountedReader, CountedWriter};
/// Gathers all Prometheus metrics and records the I/O stats just before that.
///
/// Metrics gathering is a relatively simple and standalone operation, so
/// it might be fine to do it this way to keep things simple.
pub fn gather() -> Vec<prometheus::proto::MetricFamily> {
update_io_metrics();
prometheus::gather()
}
lazy_static! {
static ref DISK_IO_BYTES: IntGaugeVec = register_int_gauge_vec!(
"pageserver_disk_io_bytes",
"Bytes written and read from disk, grouped by the operation (read|write)",
&["io_operation"]
)
.expect("Failed to register disk i/o bytes int gauge vec");
}
// Records I/O stats in a "cross-platform" way.
// Compiles both on macOs and Linux, but current macOs implementation always returns 0 as values for I/O stats.
// An alternative is to read procfs (`/proc/[pid]/io`) which does not work under macOs at all, hence abandoned.
//
// Uses https://www.freebsd.org/cgi/man.cgi?query=getrusage to retrieve the number of block operations
// performed by the process.
// We know the the size of the block, so we can determine the I/O bytes out of it.
// The value might be not 100% exact, but should be fine for Prometheus metrics in this case.
fn update_io_metrics() {
let mut usage = rusage {
ru_utime: timeval {
tv_sec: 0 as time_t,
tv_usec: 0 as suseconds_t,
},
ru_stime: timeval {
tv_sec: 0 as time_t,
tv_usec: 0 as suseconds_t,
},
ru_maxrss: 0 as c_long,
ru_ixrss: 0 as c_long,
ru_idrss: 0 as c_long,
ru_isrss: 0 as c_long,
ru_minflt: 0 as c_long,
ru_majflt: 0 as c_long,
ru_nswap: 0 as c_long,
ru_inblock: 0 as c_long,
ru_oublock: 0 as c_long,
ru_msgsnd: 0 as c_long,
ru_msgrcv: 0 as c_long,
ru_nsignals: 0 as c_long,
ru_nvcsw: 0 as c_long,
ru_nivcsw: 0 as c_long,
};
unsafe {
getrusage(RUSAGE_SELF, (&mut usage) as *mut rusage);
}
const BYTES_IN_BLOCK: i64 = 512;
DISK_IO_BYTES
.with_label_values(&["read"])
.set(usage.ru_inblock * BYTES_IN_BLOCK);
DISK_IO_BYTES
.with_label_values(&["write"])
.set(usage.ru_oublock * BYTES_IN_BLOCK);
}

View File

@@ -31,6 +31,7 @@ async fn prometheus_metrics_handler(_req: Request<Body>) -> Result<Response<Body
let mut buffer = vec![];
let encoder = TextEncoder::new();
let metrics = zenith_metrics::gather();
encoder.encode(&metrics, &mut buffer).unwrap();