From 4316f0fab27312c1a236cf8b42091cb665847bf0 Mon Sep 17 00:00:00 2001
From: John Spray <john@neon.tech>
Date: Fri, 19 Apr 2024 13:22:20 +0100
Subject: [PATCH] Tidy up old size code

---
 libs/pageserver_api/src/keyspace.rs           | 114 +++++++++---------
 .../tenant/storage_layer/inmemory_layer.rs    |   2 +-
 pageserver/src/tenant/timeline.rs             |  10 +-
 3 files changed, 63 insertions(+), 63 deletions(-)

diff --git a/libs/pageserver_api/src/keyspace.rs b/libs/pageserver_api/src/keyspace.rs
index 2d228a9611..1283c48924 100644
--- a/libs/pageserver_api/src/keyspace.rs
+++ b/libs/pageserver_api/src/keyspace.rs
@@ -87,41 +87,28 @@ impl<'a> ShardedRange<'a> {
     /// Estimate the physical pages that are within this range, on this shard.  This returns
     /// u32::MAX if the range spans relations: this return value should be interpreted as "large".
     pub fn page_count(&self) -> u32 {
-        let start = self.range.start;
-        let end = self.range.end;
-        // Although only field4 & field6 are included in the hash, if other fields differ then
-        // it would be inaccurate for us to return a block count that assumed they were the same.
-        if end.field1 != start.field1
-            || end.field2 != start.field2
-            || end.field3 != start.field3
-            || end.field4 != start.field4
-        {
+        let raw_size = Self::raw_size(&self.range);
+        if raw_size == u32::MAX {
             return u32::MAX;
         }
 
-        // Fast path: avoid hashing if we can immediately identify the owner of the whole range
+        // Special case for single sharded tenants: our logical and physical sizes are the same
         if self.shard_identity.count < ShardCount::new(2) {
-            let start = (start.field5 as u64) << 32 | start.field6 as u64;
-            let end = (end.field5 as u64) << 32 | end.field6 as u64;
-
-            let diff = end - start;
-            if diff > u32::MAX as u64 {
-                return u32::MAX;
-            } else {
-                return diff as u32;
-            }
+            return raw_size;
         }
 
         // Special cases for single keys like logical sizes
-        if end == start.add(1) && self.shard_identity.is_key_local(&start) {
+        if self.range.end == self.range.start.add(1)
+            && self.shard_identity.is_key_local(&self.range.start)
+        {
             return 1;
         }
 
         // Normal path: step through stripes and part-stripes in the range, evaluate whether each one belongs
         // to Self, and add the stripe's block count to our total if so.
         let mut result: u64 = 0;
-        let mut stripe_start = start;
-        while stripe_start < end {
+        let mut stripe_start = self.range.start;
+        while stripe_start < self.range.end {
             let is_key_disposable = self.shard_identity.is_key_disposable(&stripe_start);
 
             // Count up to the next stripe_size boundary
@@ -130,7 +117,7 @@ impl<'a> ShardedRange<'a> {
                 - (stripe_start.field6 - stripe_index * self.shard_identity.stripe_size.0);
 
             let next_stripe_start = stripe_start.add(stripe_remainder);
-            let stripe_end = std::cmp::min(end, next_stripe_start);
+            let stripe_end = std::cmp::min(self.range.end, next_stripe_start);
 
             // If this blocks in this stripe belong to us, add them to our count
             if !is_key_disposable {
@@ -141,12 +128,44 @@ impl<'a> ShardedRange<'a> {
 
             stripe_start = next_stripe_start;
         }
+
+        // Sharding should always decrease the number of pages we estimate, never increase it
+        debug_assert!(result <= raw_size as u64);
+
         if result > u32::MAX as u64 {
             u32::MAX
         } else {
             result as u32
         }
     }
+
+    /// Whereas `page_count` estimates the number of pages physically in this range on this shard,
+    /// this function simply calculates the number of pages in the space, without accounting for those
+    /// pages that would not actually be stored on this node.
+    ///
+    /// Don't use this function in code that works with physical entities like layer files.
+    fn raw_size(range: &Range<Key>) -> u32 {
+        let start = range.start;
+        let end = range.end;
+
+        if end.field1 != start.field1
+            || end.field2 != start.field2
+            || end.field3 != start.field3
+            || end.field4 != start.field4
+        {
+            return u32::MAX;
+        }
+
+        let start = (start.field5 as u64) << 32 | start.field6 as u64;
+        let end = (end.field5 as u64) << 32 | end.field6 as u64;
+
+        let diff = end - start;
+        if diff > u32::MAX as u64 {
+            u32::MAX
+        } else {
+            diff as u32
+        }
+    }
 }
 
 impl KeySpace {
@@ -285,16 +304,16 @@ impl KeySpace {
         self.ranges.last().map(|range| range.end)
     }
 
-    #[allow(unused)]
-    pub fn total_size(&self) -> usize {
+    /// The size of the keyspace in pages, before accounting for sharding
+    pub fn total_raw_size(&self) -> usize {
         self.ranges
             .iter()
-            .map(|range| key_range_size(range) as usize)
+            .map(|range| ShardedRange::raw_size(range) as usize)
             .sum()
     }
 
     pub fn is_empty(&self) -> bool {
-        self.total_size() == 0
+        self.total_raw_size() == 0
     }
 
     fn overlaps_at(&self, range: &Range<Key>) -> Option<usize> {
@@ -367,7 +386,7 @@ impl KeySpaceAccum {
 
     #[inline(always)]
     pub fn add_range(&mut self, range: Range<Key>) {
-        self.size += key_range_size(&range) as u64;
+        self.size += ShardedRange::raw_size(&range) as u64;
 
         match self.accum.as_mut() {
             Some(accum) => {
@@ -399,7 +418,9 @@ impl KeySpaceAccum {
         std::mem::take(self).to_keyspace()
     }
 
-    pub fn size(&self) -> u64 {
+    // The total number of keys in this object, ignoring any sharding effects that might cause some of
+    // the keys to be omitted in storage on this shard.
+    pub fn raw_size(&self) -> u64 {
         self.size
     }
 }
@@ -455,30 +476,6 @@ impl KeySpaceRandomAccum {
     }
 }
 
-#[inline(always)]
-fn key_range_size(key_range: &Range<Key>) -> u32 {
-    let start = key_range.start;
-    let end = key_range.end;
-
-    if end.field1 != start.field1
-        || end.field2 != start.field2
-        || end.field3 != start.field3
-        || end.field4 != start.field4
-    {
-        return u32::MAX;
-    }
-
-    let start = (start.field5 as u64) << 32 | start.field6 as u64;
-    let end = (end.field5 as u64) << 32 | end.field6 as u64;
-
-    let diff = end - start;
-    if diff > u32::MAX as u64 {
-        u32::MAX
-    } else {
-        diff as u32
-    }
-}
-
 pub fn singleton_range(key: Key) -> Range<Key> {
     key..key.next()
 }
@@ -532,14 +529,17 @@ mod tests {
             accum.add_range(range.clone());
         }
 
-        let expected_size: u64 = ranges.iter().map(|r| key_range_size(r) as u64).sum();
-        assert_eq!(accum.size(), expected_size);
+        let expected_size: u64 = ranges
+            .iter()
+            .map(|r| ShardedRange::raw_size(r) as u64)
+            .sum();
+        assert_eq!(accum.raw_size(), expected_size);
 
         assert_ks_eq(&accum.consume_keyspace(), ranges.clone());
-        assert_eq!(accum.size(), 0);
+        assert_eq!(accum.raw_size(), 0);
 
         assert_ks_eq(&accum.consume_keyspace(), vec![]);
-        assert_eq!(accum.size(), 0);
+        assert_eq!(accum.raw_size(), 0);
 
         for range in &ranges {
             accum.add_range(range.clone());
diff --git a/pageserver/src/tenant/storage_layer/inmemory_layer.rs b/pageserver/src/tenant/storage_layer/inmemory_layer.rs
index 5939b969d6..0b2774524a 100644
--- a/pageserver/src/tenant/storage_layer/inmemory_layer.rs
+++ b/pageserver/src/tenant/storage_layer/inmemory_layer.rs
@@ -406,7 +406,7 @@ impl InMemoryLayer {
             }
         }
 
-        let keyspace_size = keyspace.total_size();
+        let keyspace_size = keyspace.total_raw_size();
 
         let mut completed_keys = HashSet::new();
         while completed_keys.len() < keyspace_size && !planned_block_reads.is_empty() {
diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs
index ffade79596..cfbe70c519 100644
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -936,7 +936,7 @@ impl Timeline {
             return Err(GetVectoredError::InvalidLsn(lsn));
         }
 
-        let key_count = keyspace.total_size().try_into().unwrap();
+        let key_count = keyspace.total_raw_size().try_into().unwrap();
         if key_count > Timeline::MAX_GET_VECTORED_KEYS {
             return Err(GetVectoredError::Oversized(key_count));
         }
@@ -1076,7 +1076,7 @@ impl Timeline {
         mut reconstruct_state: ValuesReconstructState,
         ctx: &RequestContext,
     ) -> Result<BTreeMap<Key, Result<Bytes, PageReconstructError>>, GetVectoredError> {
-        let get_kind = if keyspace.total_size() == 1 {
+        let get_kind = if keyspace.total_raw_size() == 1 {
             GetKind::Singular
         } else {
             GetKind::Vectored
@@ -3193,7 +3193,7 @@ impl Timeline {
                 }
             }
 
-            if keyspace.total_size() == 0 || timeline.ancestor_timeline.is_none() {
+            if keyspace.total_raw_size() == 0 || timeline.ancestor_timeline.is_none() {
                 break;
             }
 
@@ -3206,7 +3206,7 @@ impl Timeline {
             timeline = &*timeline_owned;
         }
 
-        if keyspace.total_size() != 0 {
+        if keyspace.total_raw_size() != 0 {
             return Err(GetVectoredError::MissingKey(keyspace.start().unwrap()));
         }
 
@@ -4040,7 +4040,7 @@ impl Timeline {
                     key = key.next();
 
                     // Maybe flush `key_rest_accum`
-                    if key_request_accum.size() >= Timeline::MAX_GET_VECTORED_KEYS
+                    if key_request_accum.raw_size() >= Timeline::MAX_GET_VECTORED_KEYS
                         || last_key_in_range
                     {
                         let results = self