From 5e0409de95ed1d19ffdb36c31b12792c49938635 Mon Sep 17 00:00:00 2001
From: Tristan Partin <tristan@neon.tech>
Date: Thu, 25 Jul 2024 15:45:15 -0500
Subject: [PATCH] Fix negative replication delay metric

In some cases, we can get a negative metric for replication_delay_bytes.
My best guess from all the research I've done is that we evaluate
pg_last_wal_receive_lsn() before pg_last_wal_replay_lsn(), and that by
the time everything is said and done, the replay LSN has advanced past
the receive LSN. In this case, our lag can effectively be modeled as
0 due to the speed of the WAL reception and replay.
---
 vm-image-spec.yaml | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/vm-image-spec.yaml b/vm-image-spec.yaml
index 2767710bad..7d005c7139 100644
--- a/vm-image-spec.yaml
+++ b/vm-image-spec.yaml
@@ -277,8 +277,12 @@ files:
         help: 'Bytes between received and replayed LSN'
         key_labels:
         values: [replication_delay_bytes]
+        # We use a GREATEST call here because this calculation can be negative.
+        # The calculation is not atomic, meaning after we've gotten the receive
+        # LSN, the replay LSN may have advanced past the receive LSN we
+        # are using for the calculation.
         query: |
-          SELECT pg_wal_lsn_diff(pg_last_wal_receive_lsn(), pg_last_wal_replay_lsn()) AS replication_delay_bytes;
+          SELECT GREATEST(0, pg_wal_lsn_diff(pg_last_wal_receive_lsn(), pg_last_wal_replay_lsn())) AS replication_delay_bytes;
 
       - metric_name: replication_delay_seconds
         type: gauge