Add more redo metrics: (#2645)

- Measure size of redo WAL (new histogram), with bounds between 24B-32kB - Add 2 more buckets at the upper end of the redo time histogram We often (>0.1% of several hours each day) take more than 250ms to do the redo round-trip to the postgres process. We need to measure these redo times more precisely.
2026-01-10 06:52:55 +00:00 · 2022-10-19 22:47:11 +02:00
parent 4d1e48f3b9
commit b237feedab
2 changed files with 43 additions and 6 deletions
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -277,11 +277,15 @@ pub static TENANT_TASK_EVENTS: Lazy<IntCounterVec> = Lazy::new(|| {
 /// smallest redo processing times. These buckets allow us to measure down
 /// to 5us, which equates to 200'000 pages/sec, which equates to 1.6GB/sec.
 /// This is much better than the previous 5ms aka 200 pages/sec aka 1.6MB/sec.
+///
+/// Values up to 1s are recorded because metrics show that we have redo
+/// durations and lock times larger than 0.250s.
 macro_rules! redo_histogram_time_buckets {
    () => {
        vec![
            0.000_005, 0.000_010, 0.000_025, 0.000_050, 0.000_100, 0.000_250, 0.000_500, 0.001_000,
-            0.002_500, 0.005_000, 0.010_000, 0.025_000, 0.050_000, 0.100_000, 0.250_000,
+            0.002_500, 0.005_000, 0.010_000, 0.025_000, 0.050_000, 0.100_000, 0.250_000, 0.500_000,
+            1.000_000,
        ]
    };
 }
@@ -296,6 +300,17 @@ macro_rules! redo_histogram_count_buckets {
    };
 }

+macro_rules! redo_bytes_histogram_count_buckets {
+    () => {
+        // powers of (2^.5), from 2^4.5 to 2^15 (22 buckets)
+        // rounded up to the next multiple of 8 to capture any MAXALIGNed record of that size, too.
+        vec![
+            24.0, 32.0, 48.0, 64.0, 96.0, 128.0, 184.0, 256.0, 368.0, 512.0, 728.0, 1024.0, 1456.0,
+            2048.0, 2904.0, 4096.0, 5800.0, 8192.0, 11592.0, 16384.0, 23176.0, 32768.0,
+        ]
+    };
+}
+
 pub static WAL_REDO_TIME: Lazy<Histogram> = Lazy::new(|| {
    register_histogram!(
        "pageserver_wal_redo_seconds",
@@ -323,6 +338,15 @@ pub static WAL_REDO_RECORDS_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
    .expect("failed to define a metric")
 });

+pub static WAL_REDO_BYTES_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
+    register_histogram!(
+        "pageserver_wal_redo_bytes_histogram",
+        "Histogram of number of records replayed per redo",
+        redo_bytes_histogram_count_buckets!(),
+    )
+    .expect("failed to define a metric")
+});
+
 pub static WAL_REDO_RECORD_COUNTER: Lazy<IntCounter> = Lazy::new(|| {
    register_int_counter!(
        "pageserver_replayed_wal_records_total",
--- a/pageserver/src/walredo.rs
+++ b/pageserver/src/walredo.rs
@@ -39,7 +39,8 @@ use utils::crashsafe_dir::path_with_suffix_extension;
 use utils::{bin_ser::BeSer, id::TenantId, lsn::Lsn, nonblock::set_nonblock};

 use crate::metrics::{
-    WAL_REDO_RECORDS_HISTOGRAM, WAL_REDO_RECORD_COUNTER, WAL_REDO_TIME, WAL_REDO_WAIT_TIME,
+    WAL_REDO_BYTES_HISTOGRAM, WAL_REDO_RECORDS_HISTOGRAM, WAL_REDO_RECORD_COUNTER, WAL_REDO_TIME,
+    WAL_REDO_WAIT_TIME,
 };
 use crate::pgdatadir_mapping::{key_to_rel_block, key_to_slru_block};
 use crate::reltag::{RelTag, SlruKind};
@@ -244,12 +245,23 @@ impl PostgresRedoManager {
        let end_time = Instant::now();
        let duration = end_time.duration_since(lock_time);

+        let len = records.len();
+        let nbytes = records.iter().fold(0, |acumulator, record| {
+            acumulator
+                + match &record.1 {
+                    NeonWalRecord::Postgres { rec, .. } => rec.len(),
+                    _ => unreachable!("Only PostgreSQL records are accepted in this batch"),
+                }
+        });
+
        WAL_REDO_TIME.observe(duration.as_secs_f64());
-        WAL_REDO_RECORDS_HISTOGRAM.observe(records.len() as f64);
+        WAL_REDO_RECORDS_HISTOGRAM.observe(len as f64);
+        WAL_REDO_BYTES_HISTOGRAM.observe(nbytes as f64);

        debug!(
-            "postgres applied {} WAL records in {} us to reconstruct page image at LSN {}",
-            records.len(),
+            "postgres applied {} WAL records ({} bytes) in {} us to reconstruct page image at LSN {}",
+            len,
+            nbytes,
            duration.as_micros(),
            lsn
        );
@@ -258,8 +270,9 @@ impl PostgresRedoManager {
        // next request will launch a new one.
        if result.is_err() {
            error!(
-                "error applying {} WAL records to reconstruct page image at LSN {}",
+                "error applying {} WAL records ({} bytes) to reconstruct page image at LSN {}",
                records.len(),
+                nbytes,
                lsn
            );
            let process = process_guard.take().unwrap();