From c848b995b296124b686b4eeec54b08aee3e539a1 Mon Sep 17 00:00:00 2001 From: Vlad Lazar Date: Tue, 8 Jul 2025 22:24:59 +0100 Subject: [PATCH] safekeeper: trim dead senders before adding more (#12490) ## Problem We only trim the senders if we tried to send a message to them and discovered that the channel is closed. This is problematic if the pageserver keeps connecting while there's nothing to send back for the shard. In this scenario we never trim down the senders list and can panic due to the u8 limit. ## Summary of Changes Trim down the dead senders before adding a new one. Closes LKB-178 --- safekeeper/src/send_interpreted_wal.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/safekeeper/src/send_interpreted_wal.rs b/safekeeper/src/send_interpreted_wal.rs index 3797ac39d1..72a436e25f 100644 --- a/safekeeper/src/send_interpreted_wal.rs +++ b/safekeeper/src/send_interpreted_wal.rs @@ -561,6 +561,20 @@ impl InterpretedWalReader { // Update internal and external state, then reset the WAL stream // if required. let senders = self.shard_senders.entry(shard_id).or_default(); + + // Clean up any shard senders that have dropped out before adding the new + // one. This avoids a build up of dead senders. + senders.retain(|sender| { + let closed = sender.tx.is_closed(); + + if closed { + let sender_id = ShardSenderId::new(shard_id, sender.sender_id); + tracing::info!("Removed shard sender {}", sender_id); + } + + !closed + }); + let new_sender_id = match senders.last() { Some(sender) => sender.sender_id.next(), None => SenderId::first()