diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs index b654be031c..7ae2d0f14c 100644 --- a/pageserver/src/metrics.rs +++ b/pageserver/src/metrics.rs @@ -277,11 +277,15 @@ pub static TENANT_TASK_EVENTS: Lazy = Lazy::new(|| { /// smallest redo processing times. These buckets allow us to measure down /// to 5us, which equates to 200'000 pages/sec, which equates to 1.6GB/sec. /// This is much better than the previous 5ms aka 200 pages/sec aka 1.6MB/sec. +/// +/// Values up to 1s are recorded because metrics show that we have redo +/// durations and lock times larger than 0.250s. macro_rules! redo_histogram_time_buckets { () => { vec![ 0.000_005, 0.000_010, 0.000_025, 0.000_050, 0.000_100, 0.000_250, 0.000_500, 0.001_000, - 0.002_500, 0.005_000, 0.010_000, 0.025_000, 0.050_000, 0.100_000, 0.250_000, + 0.002_500, 0.005_000, 0.010_000, 0.025_000, 0.050_000, 0.100_000, 0.250_000, 0.500_000, + 1.000_000, ] }; } @@ -296,6 +300,17 @@ macro_rules! redo_histogram_count_buckets { }; } +macro_rules! redo_bytes_histogram_count_buckets { + () => { + // powers of (2^.5), from 2^4.5 to 2^15 (22 buckets) + // rounded up to the next multiple of 8 to capture any MAXALIGNed record of that size, too. + vec![ + 24.0, 32.0, 48.0, 64.0, 96.0, 128.0, 184.0, 256.0, 368.0, 512.0, 728.0, 1024.0, 1456.0, + 2048.0, 2904.0, 4096.0, 5800.0, 8192.0, 11592.0, 16384.0, 23176.0, 32768.0, + ] + }; +} + pub static WAL_REDO_TIME: Lazy = Lazy::new(|| { register_histogram!( "pageserver_wal_redo_seconds", @@ -323,6 +338,15 @@ pub static WAL_REDO_RECORDS_HISTOGRAM: Lazy = Lazy::new(|| { .expect("failed to define a metric") }); +pub static WAL_REDO_BYTES_HISTOGRAM: Lazy = Lazy::new(|| { + register_histogram!( + "pageserver_wal_redo_bytes_histogram", + "Histogram of number of records replayed per redo", + redo_bytes_histogram_count_buckets!(), + ) + .expect("failed to define a metric") +}); + pub static WAL_REDO_RECORD_COUNTER: Lazy = Lazy::new(|| { register_int_counter!( "pageserver_replayed_wal_records_total", diff --git a/pageserver/src/walredo.rs b/pageserver/src/walredo.rs index 15a9408dc9..1dd27caba6 100644 --- a/pageserver/src/walredo.rs +++ b/pageserver/src/walredo.rs @@ -39,7 +39,8 @@ use utils::crashsafe_dir::path_with_suffix_extension; use utils::{bin_ser::BeSer, id::TenantId, lsn::Lsn, nonblock::set_nonblock}; use crate::metrics::{ - WAL_REDO_RECORDS_HISTOGRAM, WAL_REDO_RECORD_COUNTER, WAL_REDO_TIME, WAL_REDO_WAIT_TIME, + WAL_REDO_BYTES_HISTOGRAM, WAL_REDO_RECORDS_HISTOGRAM, WAL_REDO_RECORD_COUNTER, WAL_REDO_TIME, + WAL_REDO_WAIT_TIME, }; use crate::pgdatadir_mapping::{key_to_rel_block, key_to_slru_block}; use crate::reltag::{RelTag, SlruKind}; @@ -244,12 +245,23 @@ impl PostgresRedoManager { let end_time = Instant::now(); let duration = end_time.duration_since(lock_time); + let len = records.len(); + let nbytes = records.iter().fold(0, |acumulator, record| { + acumulator + + match &record.1 { + NeonWalRecord::Postgres { rec, .. } => rec.len(), + _ => unreachable!("Only PostgreSQL records are accepted in this batch"), + } + }); + WAL_REDO_TIME.observe(duration.as_secs_f64()); - WAL_REDO_RECORDS_HISTOGRAM.observe(records.len() as f64); + WAL_REDO_RECORDS_HISTOGRAM.observe(len as f64); + WAL_REDO_BYTES_HISTOGRAM.observe(nbytes as f64); debug!( - "postgres applied {} WAL records in {} us to reconstruct page image at LSN {}", - records.len(), + "postgres applied {} WAL records ({} bytes) in {} us to reconstruct page image at LSN {}", + len, + nbytes, duration.as_micros(), lsn ); @@ -258,8 +270,9 @@ impl PostgresRedoManager { // next request will launch a new one. if result.is_err() { error!( - "error applying {} WAL records to reconstruct page image at LSN {}", + "error applying {} WAL records ({} bytes) to reconstruct page image at LSN {}", records.len(), + nbytes, lsn ); let process = process_guard.take().unwrap();