From 579470849b58f1252a710a9d7d98572a44ca4e68 Mon Sep 17 00:00:00 2001 From: Joonas Koivunen Date: Thu, 22 Feb 2024 14:24:45 +0000 Subject: [PATCH] fix(walredo): yield while applying neon records this should allow more graceful degradation even in situations like having to apply thousands of requests. it comes with an elevated OOM risk as more such requests could be ongoing at the same time. --- pageserver/src/walredo.rs | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/pageserver/src/walredo.rs b/pageserver/src/walredo.rs index 35cbefb92c..4c6f814d84 100644 --- a/pageserver/src/walredo.rs +++ b/pageserver/src/walredo.rs @@ -91,6 +91,7 @@ impl PostgresRedoManager { if rec_neon != batch_neon { let result = if batch_neon { self.apply_batch_neon(key, lsn, img, &records[batch_start..i]) + .await } else { self.apply_batch_postgres( key, @@ -111,6 +112,7 @@ impl PostgresRedoManager { // last batch if batch_neon { self.apply_batch_neon(key, lsn, img, &records[batch_start..]) + .await } else { self.apply_batch_postgres( key, @@ -314,7 +316,7 @@ impl PostgresRedoManager { /// /// Process a batch of WAL records using bespoken Neon code. /// - fn apply_batch_neon( + async fn apply_batch_neon( &self, key: Key, lsn: Lsn, @@ -332,9 +334,17 @@ impl PostgresRedoManager { anyhow::bail!("invalid neon WAL redo request with no base image"); } - // Apply all the WAL records in the batch - for (record_lsn, record) in records.iter() { - self.apply_record_neon(key, &mut page, *record_lsn, record)?; + // process the records in batches and yield; this should guard against pathological + // situations where we accidentially have a huge number of in-neon applied records. + let yield_every = 200; + + for records in records.chunks(yield_every) { + // Apply all the WAL records in the batch + for (record_lsn, record) in records { + self.apply_record_neon(key, &mut page, *record_lsn, record)?; + } + + tokio::task::yield_now().await; } // Success! let duration = start_time.elapsed();