From 78ca8679e8a810b5e33b6f327da023325fd8bab3 Mon Sep 17 00:00:00 2001
From: Joonas Koivunen <joonas@neon.tech>
Date: Mon, 25 Mar 2024 15:32:47 +0000
Subject: [PATCH] doc: comment why this is not so easy

---
 pageserver/src/tenant/timeline/compaction.rs | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs
index 6168d7869a..8d52fa68b7 100644
--- a/pageserver/src/tenant/timeline/compaction.rs
+++ b/pageserver/src/tenant/timeline/compaction.rs
@@ -263,6 +263,22 @@ impl Timeline {
             }
         }
 
+        // drop the readlock for now; in theory, gc could also remove the same layers as we are now
+        // compacting. FIXME: how to prepare such a test case?
+        // 0. tenant with minimal pitr
+        // 1. create 10 layers
+        // 2. await on pausable_failpoint after dropping the read guard
+        // 3. delete all data, vacuum, checkpoint
+        // 4. gc
+        //
+        // now gc deletes the layers and when we finally get to writing our results back in
+        // finish_compact_batch, LayerManager::finish_compact_l0 will panic when deleting a layer
+        // which does not exist in LayerMap::remove_historic_noflush or LayerFileManager::remove.
+        //
+        // is the easy solution just to make the deletions from compaction more lenient? currently
+        // gc holds a write lock, so it cannot have this problem right now. if gc were to be loosened to take the
+        // read lock only momentarily and write lock for applying, it would have a similar issue in
+        // trying to gc layers which have just been compacted.
         stats.read_lock_held_compute_holes_micros = stats.read_lock_held_key_sort_micros.till_now();
         drop_rlock(guard);