Improve comments in page_cache.rs.

Explain the mix of async and other functions in the page cache.
2026-01-05 20:42:54 +00:00 · 2021-04-24 17:54:28 +03:00
parent 021462da3e
commit 0fc05569e0
1 changed files with 46 additions and 1 deletions
--- a/pageserver/src/page_cache.rs
+++ b/pageserver/src/page_cache.rs
@@ -1,7 +1,32 @@
 //
 // Page Cache holds all the different page versions and WAL records
 //
-// The Page Cache is currenusing RocksDB for storing wal records and full page images, keyed by the RelFileNode, blocknumber, and the LSN.
+// Currently, the page cache uses RocksDB to store WAL wal records and
+// full page images, keyed by the RelFileNode, blocknumber, and the
+// LSN.
+//
+// On async vs blocking:
+//
+// All the functions that can block for any significant length use
+// Tokio async, and are marked as 'async'. That currently includes all
+// the "Get" functions that get a page image or a relation size.  The
+// "Put" functions that add base images or WAL records to the cache
+// cannot block.
+//
+// However, we have a funny definition of blocking: waiting on a Mutex
+// that protects the in-memory data structures is not considered as
+// blocking, as those are short waits, and there is no deadlock
+// risk. It is *not* OK to do I/O or wait on other threads while
+// holding a Mutex, however.
+//
+// Another wart is that we currently consider the RocksDB operations
+// to be non-blocking, and we do those while holding a lock, and
+// without async. That's fantasy, as RocksDB will do I/O, possibly a
+// lot of it. But that's not a correctness issue, since the RocksDB
+// calls will not call back to any of the other functions in page
+// server. RocksDB is just a stopgap solution, to be replaced with
+// something else, so it doesn't seem worth it to wrangle those calls
+// into async model.
 //

 use crate::restore_local_repo::restore_timeline;
@@ -445,6 +470,9 @@ impl PageCache {
        }
    }

+    //
+    // Wait until WAL has been received up to the given LSN.
+    //
    async fn wait_lsn(&self, req_lsn: u64) -> anyhow::Result<u64> {
        let mut lsn = req_lsn;
        //When invalid LSN is requested, it means "don't wait, return latest version of the page"
@@ -457,6 +485,7 @@ impl PageCache {
                lsn
            );
        }
+
        self.seqwait_lsn
            .wait_for_timeout(lsn, TIMEOUT)
            .await
@@ -766,11 +795,19 @@ impl PageCache {
        shared.last_record_lsn
    }

+    //
+    // Get size of relation at given LSN.
+    //
    pub async fn relsize_get(&self, rel: &RelTag, lsn: u64) -> anyhow::Result<u32> {
        self.wait_lsn(lsn).await?;
        self.relsize_get_nowait(rel, lsn)
    }

+    //
+    // Internal function to get relation size at given LSN.
+    //
+    // The caller must ensure that WAL has been received up to 'lsn'.
+    //
    fn relsize_get_nowait(&self, rel: &RelTag, lsn: u64) -> anyhow::Result<u32> {

        assert!(lsn <= self.last_valid_lsn.load(Ordering::Acquire));
@@ -848,6 +885,9 @@ impl PageCache {
        Ok(false)
    }

+    //
+    // Get statistics to be displayed in the user interface.
+    //
    pub fn get_stats(&self) -> PageCacheStats {
        PageCacheStats {
            num_entries: self.num_entries.load(Ordering::Relaxed),
@@ -911,6 +951,11 @@ impl PageCache {
    }
 }

+//
+// Get statistics to be displayed in the user interface.
+//
+// This combines the stats from all PageCache instances
+//
 pub fn get_stats() -> PageCacheStats {
    let pcaches = PAGECACHES.lock().unwrap();