From 7e552b645faedcbb5b3f52ebe5ab501e50324435 Mon Sep 17 00:00:00 2001 From: Egor Suvorov Date: Thu, 28 Oct 2021 18:38:36 +0300 Subject: [PATCH] Add disk write/sync metrics to Safekeeper (#745) --- walkeeper/src/safekeeper.rs | 35 ++++++++++++++++++++++++++++++++--- walkeeper/src/timeline.rs | 31 +++++++++++++++++++++++++++++++ zenith_metrics/src/lib.rs | 4 ++++ 3 files changed, 67 insertions(+), 3 deletions(-) diff --git a/walkeeper/src/safekeeper.rs b/walkeeper/src/safekeeper.rs index 49e5945c95..d2590c12b6 100644 --- a/walkeeper/src/safekeeper.rs +++ b/walkeeper/src/safekeeper.rs @@ -19,7 +19,10 @@ use lazy_static::lazy_static; use crate::replication::HotStandbyFeedback; use postgres_ffi::xlog_utils::MAX_SEND_SIZE; -use zenith_metrics::{register_gauge_vec, Gauge, GaugeVec}; +use zenith_metrics::{ + register_gauge_vec, register_histogram_vec, Gauge, GaugeVec, Histogram, HistogramVec, + DISK_WRITE_SECONDS_BUCKETS, +}; use zenith_utils::bin_ser::LeSer; use zenith_utils::lsn::Lsn; use zenith_utils::pq_proto::SystemId; @@ -299,11 +302,27 @@ lazy_static! { &["ztli"] ) .expect("Failed to register safekeeper_commit_lsn gauge vec"); + static ref WRITE_WAL_BYTES: HistogramVec = register_histogram_vec!( + "safekeeper_write_wal_bytes", + "Bytes written to WAL in a single request, grouped by timeline", + &["timeline_id"], + vec![1.0, 10.0, 100.0, 1024.0, 8192.0, 128.0 * 1024.0, 1024.0 * 1024.0, 10.0 * 1024.0 * 1024.0] + ) + .expect("Failed to register safekeeper_write_wal_bytes histogram vec"); + static ref WRITE_WAL_SECONDS: HistogramVec = register_histogram_vec!( + "safekeeper_write_wal_seconds", + "Seconds spent writing and syncing WAL to a disk in a single request, grouped by timeline", + &["timeline_id"], + DISK_WRITE_SECONDS_BUCKETS.to_vec() + ) + .expect("Failed to register safekeeper_write_wal_seconds histogram vec"); } struct SafeKeeperMetrics { flush_lsn: Gauge, commit_lsn: Gauge, + write_wal_bytes: Histogram, + write_wal_seconds: Histogram, } impl SafeKeeperMetrics { @@ -312,6 +331,8 @@ impl SafeKeeperMetrics { SafeKeeperMetrics { flush_lsn: FLUSH_LSN_GAUGE.with_label_values(&[&ztli_str]), commit_lsn: COMMIT_LSN_GAUGE.with_label_values(&[&ztli_str]), + write_wal_bytes: WRITE_WAL_BYTES.with_label_values(&[&ztli_str]), + write_wal_seconds: WRITE_WAL_SECONDS.with_label_values(&[&ztli_str]), } } @@ -319,6 +340,8 @@ impl SafeKeeperMetrics { SafeKeeperMetrics { flush_lsn: FLUSH_LSN_GAUGE.with_label_values(&["n/a"]), commit_lsn: COMMIT_LSN_GAUGE.with_label_values(&["n/a"]), + write_wal_bytes: WRITE_WAL_BYTES.with_label_values(&["n/a"]), + write_wal_seconds: WRITE_WAL_SECONDS.with_label_values(&["n/a"]), } } } @@ -472,8 +495,14 @@ where // do the job let mut last_rec_lsn = Lsn(0); if !msg.wal_data.is_empty() { - self.storage - .write_wal(&self.s.server, msg.h.begin_lsn, &msg.wal_data)?; + self.metrics + .write_wal_bytes + .observe(msg.wal_data.len() as f64); + { + let _timer = self.metrics.write_wal_seconds.start_timer(); + self.storage + .write_wal(&self.s.server, msg.h.begin_lsn, &msg.wal_data)?; + } // figure out last record's end lsn for reporting (if we got the // whole record) diff --git a/walkeeper/src/timeline.rs b/walkeeper/src/timeline.rs index d59ee5701c..c0edc34d00 100644 --- a/walkeeper/src/timeline.rs +++ b/walkeeper/src/timeline.rs @@ -12,6 +12,7 @@ use std::fs::{self, File, OpenOptions}; use std::io::{Seek, SeekFrom, Write}; use std::sync::{Arc, Condvar, Mutex}; use std::time::Duration; +use zenith_metrics::{register_histogram_vec, Histogram, HistogramVec, DISK_WRITE_SECONDS_BUCKETS}; use zenith_utils::bin_ser::LeSer; use zenith_utils::lsn::Lsn; use zenith_utils::zid::{ZTenantId, ZTimelineId}; @@ -73,6 +74,23 @@ pub enum CreateControlFile { False, } +lazy_static! { + static ref PERSIST_SYNC_CONTROL_FILE_SECONDS: HistogramVec = register_histogram_vec!( + "safekeeper_persist_sync_control_file_seconds", + "Seconds to persist and sync control file, grouped by timeline", + &["timeline_id"], + DISK_WRITE_SECONDS_BUCKETS.to_vec() + ) + .expect("Failed to register safekeeper_persist_sync_control_file_seconds histogram vec"); + static ref PERSIST_NOSYNC_CONTROL_FILE_SECONDS: HistogramVec = register_histogram_vec!( + "safekeeper_persist_nosync_control_file_seconds", + "Seconds to persist and sync control file, grouped by timeline", + &["timeline_id"], + DISK_WRITE_SECONDS_BUCKETS.to_vec() + ) + .expect("Failed to register safekeeper_persist_nosync_control_file_seconds histogram vec"); +} + impl SharedState { /// Get combined stateof all alive replicas pub fn get_replicas_state(&self) -> ReplicaState { @@ -109,9 +127,14 @@ impl SharedState { create: CreateControlFile, ) -> Result { let (cf, state) = SharedState::load_control_file(conf, timelineid, create)?; + let timelineid_str = format!("{}", timelineid); let storage = FileStorage { control_file: cf, conf: conf.clone(), + persist_sync_control_file_seconds: PERSIST_SYNC_CONTROL_FILE_SECONDS + .with_label_values(&[&timelineid_str]), + persist_nosync_control_file_seconds: PERSIST_NOSYNC_CONTROL_FILE_SECONDS + .with_label_values(&[&timelineid_str]), }; let (flush_lsn, tli) = if state.server.wal_seg_size != 0 { let wal_dir = conf.workdir.join(format!("{}", timelineid)); @@ -376,10 +399,18 @@ impl GlobalTimelines { struct FileStorage { control_file: File, conf: SafeKeeperConf, + persist_sync_control_file_seconds: Histogram, + persist_nosync_control_file_seconds: Histogram, } impl Storage for FileStorage { fn persist(&mut self, s: &SafeKeeperState, sync: bool) -> Result<()> { + let _timer = if sync { + &self.persist_sync_control_file_seconds + } else { + &self.persist_nosync_control_file_seconds + } + .start_timer(); self.control_file.seek(SeekFrom::Start(0))?; s.ser_into(&mut self.control_file)?; if sync { diff --git a/zenith_metrics/src/lib.rs b/zenith_metrics/src/lib.rs index 59a8a31c9e..8756a078c3 100644 --- a/zenith_metrics/src/lib.rs +++ b/zenith_metrics/src/lib.rs @@ -74,6 +74,10 @@ lazy_static! { .expect("Failed to register maxrss_kb int gauge"); } +pub const DISK_WRITE_SECONDS_BUCKETS: &[f64] = &[ + 0.000_050, 0.000_100, 0.000_500, 0.001, 0.003, 0.005, 0.01, 0.05, 0.1, 0.3, 0.5, +]; + // Records I/O stats in a "cross-platform" way. // Compiles both on macOS and Linux, but current macOS implementation always returns 0 as values for I/O stats. // An alternative is to read procfs (`/proc/[pid]/io`) which does not work under macOS at all, hence abandoned.