[proxy] replace the batch cancellation queue, shorten the TTL for cancel keys (#11943)

See #11942 Idea: * if connections are short lived, they can get enqueued and then also remove themselves later if they never made it to redis. This reduces the load on the queue. * short lived connections (<10m, most?) will only issue 1 command, we remove the delete command and rely on ttl. * we can enqueue as many commands as we want, as we can always cancel the enqueue, thanks to the ~~intrusive linked lists~~ `BTreeMap`.
2026-05-24 16:40:38 +00:00 · 2025-06-20 12:48:01 +01:00
parent 8b197de7ff
commit a298d2c29b
16 changed files with 410 additions and 427 deletions
--- a/proxy/src/batch.rs
+++ b/proxy/src/batch.rs
@@ -0,0 +1,146 @@
+//! Batch processing system based on intrusive linked lists.
+//!
+//! Enqueuing a batch job requires no allocations, with
+//! direct support for cancelling jobs early.
+use std::collections::BTreeMap;
+use std::pin::pin;
+use std::sync::Mutex;
+
+use futures::future::Either;
+use scopeguard::ScopeGuard;
+use tokio::sync::oneshot::error::TryRecvError;
+
+use crate::ext::LockExt;
+
+pub trait QueueProcessing: Send + 'static {
+    type Req: Send + 'static;
+    type Res: Send;
+
+    /// Get the desired batch size.
+    fn batch_size(&self, queue_size: usize) -> usize;
+
+    /// This applies a full batch of events.
+    /// Must respond with a full batch of replies.
+    ///
+    /// If this apply can error, it's expected that errors be forwarded to each Self::Res.
+    ///
+    /// Batching does not need to happen atomically.
+    fn apply(&mut self, req: Vec<Self::Req>) -> impl Future<Output = Vec<Self::Res>> + Send;
+}
+
+pub struct BatchQueue<P: QueueProcessing> {
+    processor: tokio::sync::Mutex<P>,
+    inner: Mutex<BatchQueueInner<P>>,
+}
+
+struct BatchJob<P: QueueProcessing> {
+    req: P::Req,
+    res: tokio::sync::oneshot::Sender<P::Res>,
+}
+
+impl<P: QueueProcessing> BatchQueue<P> {
+    pub fn new(p: P) -> Self {
+        Self {
+            processor: tokio::sync::Mutex::new(p),
+            inner: Mutex::new(BatchQueueInner {
+                version: 0,
+                queue: BTreeMap::new(),
+            }),
+        }
+    }
+
+    pub async fn call(&self, req: P::Req) -> P::Res {
+        let (id, mut rx) = self.inner.lock_propagate_poison().register_job(req);
+        let guard = scopeguard::guard(id, move |id| {
+            let mut inner = self.inner.lock_propagate_poison();
+            if inner.queue.remove(&id).is_some() {
+                tracing::debug!("batched task cancelled before completion");
+            }
+        });
+
+        let resp = loop {
+            // try become the leader, or try wait for success.
+            let mut processor = match futures::future::select(rx, pin!(self.processor.lock())).await
+            {
+                // we got the resp.
+                Either::Left((resp, _)) => break resp.ok(),
+                // we are the leader.
+                Either::Right((p, rx_)) => {
+                    rx = rx_;
+                    p
+                }
+            };
+
+            let (reqs, resps) = self.inner.lock_propagate_poison().get_batch(&processor);
+
+            // apply a batch.
+            let values = processor.apply(reqs).await;
+
+            // send response values.
+            for (tx, value) in std::iter::zip(resps, values) {
+                // sender hung up but that's fine.
+                drop(tx.send(value));
+            }
+
+            match rx.try_recv() {
+                Ok(resp) => break Some(resp),
+                Err(TryRecvError::Closed) => break None,
+                // edge case - there was a race condition where
+                // we became the leader but were not in the batch.
+                //
+                // Example:
+                // thread 1: register job id=1
+                // thread 2: register job id=2
+                // thread 2: processor.lock().await
+                // thread 1: processor.lock().await
+                // thread 2: becomes leader, batch_size=1, jobs=[1].
+                Err(TryRecvError::Empty) => {}
+            }
+        };
+
+        // already removed.
+        ScopeGuard::into_inner(guard);
+
+        resp.expect("no response found. batch processer should not panic")
+    }
+}
+
+struct BatchQueueInner<P: QueueProcessing> {
+    version: u64,
+    queue: BTreeMap<u64, BatchJob<P>>,
+}
+
+impl<P: QueueProcessing> BatchQueueInner<P> {
+    fn register_job(&mut self, req: P::Req) -> (u64, tokio::sync::oneshot::Receiver<P::Res>) {
+        let (tx, rx) = tokio::sync::oneshot::channel();
+
+        let id = self.version;
+
+        // Overflow concern:
+        // This is a u64, and we might enqueue 2^16 tasks per second.
+        // This gives us 2^48 seconds (9 million years).
+        // Even if this does overflow, it will not break, but some
+        // jobs with the higher version might never get prioritised.
+        self.version += 1;
+
+        self.queue.insert(id, BatchJob { req, res: tx });
+
+        (id, rx)
+    }
+
+    fn get_batch(&mut self, p: &P) -> (Vec<P::Req>, Vec<tokio::sync::oneshot::Sender<P::Res>>) {
+        let batch_size = p.batch_size(self.queue.len());
+        let mut reqs = Vec::with_capacity(batch_size);
+        let mut resps = Vec::with_capacity(batch_size);
+
+        while reqs.len() < batch_size {
+            let Some((_, job)) = self.queue.pop_first() else {
+                break;
+            };
+            reqs.push(job.req);
+            resps.push(job.res);
+        }
+
+        (reqs, resps)
+    }
+}