From 6ca49b4d0c90009bc0c9b9934fe3d3835ade65ea Mon Sep 17 00:00:00 2001 From: Vlad Lazar Date: Mon, 3 Mar 2025 19:16:03 +0000 Subject: [PATCH] safekeeper: fix a gap tracking edge case (#11054) The interpreted reader tracks a record aligned current position in the WAL stream. Partial reads move the stream internally, but not from the pov of the interpreted WAL reader. Hence, where new shards subscribe with a start position that matches the reader's current position, but we've also done some partial reads. This confuses the gap tracking. To make it more robust, update the current batch start to the min between the new start position and its current value. Since no record has been decoded yet (position matches), we can't have lost it --- safekeeper/src/send_interpreted_wal.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/safekeeper/src/send_interpreted_wal.rs b/safekeeper/src/send_interpreted_wal.rs index 2c1c73c25c..bf03f27d48 100644 --- a/safekeeper/src/send_interpreted_wal.rs +++ b/safekeeper/src/send_interpreted_wal.rs @@ -184,6 +184,16 @@ impl InterpretedWalReaderState { to: *current_position, } } else { + // Edge case: The new shard is at the same current position as + // the reader. Note that the current position is WAL record aligned, + // so the reader might have done some partial reads and updated the + // batch start. If that's the case, adjust the batch start to match + // starting position of the new shard. It can lead to some shards + // seeing overlaps, but in that case the actual record LSNs are checked + // which should be fine based on the filtering logic. + if let Some(start) = current_batch_wal_start { + *start = std::cmp::min(*start, new_shard_start_pos); + } CurrentPositionUpdate::NotReset(*current_position) } }