From b6447462dc72b8634cf122c76d3e155c2f6b5d60 Mon Sep 17 00:00:00 2001 From: bojanserafimov Date: Wed, 31 May 2023 12:23:00 -0400 Subject: [PATCH] Fix layer map correctness bug (#4342) --- .../layer_map/historic_layer_coverage.rs | 29 ++++++++++++++++ .../src/tenant/layer_map/layer_coverage.rs | 33 ++++++++++++------- 2 files changed, 50 insertions(+), 12 deletions(-) diff --git a/pageserver/src/tenant/layer_map/historic_layer_coverage.rs b/pageserver/src/tenant/layer_map/historic_layer_coverage.rs index b63c361314..49dcbc63c2 100644 --- a/pageserver/src/tenant/layer_map/historic_layer_coverage.rs +++ b/pageserver/src/tenant/layer_map/historic_layer_coverage.rs @@ -204,6 +204,35 @@ fn test_off_by_one() { assert_eq!(version.image_coverage.query(5), None); } +/// White-box regression test, checking for incorrect removal of node at key.end +#[test] +fn test_regression() { + let mut map = HistoricLayerCoverage::::new(); + map.insert( + LayerKey { + key: 0..5, + lsn: 0..5, + is_image: false, + }, + "Layer 1".to_string(), + ); + map.insert( + LayerKey { + key: 0..5, + lsn: 1..2, + is_image: false, + }, + "Layer 2".to_string(), + ); + + // If an insertion operation improperly deletes the endpoint of a previous layer + // (which is more likely to happen with layers that collide on key.end), we will + // end up with an infinite layer, covering the entire keyspace. Here we assert + // that there's no layer at key 100 because we didn't insert any layer there. + let version = map.get_version(100).unwrap(); + assert_eq!(version.delta_coverage.query(100), None); +} + /// Cover edge cases where layers begin or end on the same key #[test] fn test_key_collision() { diff --git a/pageserver/src/tenant/layer_map/layer_coverage.rs b/pageserver/src/tenant/layer_map/layer_coverage.rs index 4e3b4516dc..9d9d1d6ccf 100644 --- a/pageserver/src/tenant/layer_map/layer_coverage.rs +++ b/pageserver/src/tenant/layer_map/layer_coverage.rs @@ -10,19 +10,22 @@ use rpds::RedBlackTreeMapSync; /// - iterate the latest layers in a key range /// - insert layers in non-decreasing lsn.start order /// -/// The struct is parameterized over Value for easier -/// testing, but in practice it's some sort of layer. +/// For a detailed explanation and justification of this approach, see: +/// https://neon.tech/blog/persistent-structures-in-neons-wal-indexing +/// +/// NOTE The struct is parameterized over Value for easier +/// testing, but in practice it's some sort of layer. pub struct LayerCoverage { /// For every change in coverage (as we sweep the key space) /// we store (lsn.end, value). /// - /// We use an immutable/persistent tree so that we can keep historic - /// versions of this coverage without cloning the whole thing and - /// incurring quadratic memory cost. See HistoricLayerCoverage. + /// NOTE We use an immutable/persistent tree so that we can keep historic + /// versions of this coverage without cloning the whole thing and + /// incurring quadratic memory cost. See HistoricLayerCoverage. /// - /// We use the Sync version of the map because we want Self to - /// be Sync. Using nonsync might be faster, if we can work with - /// that. + /// NOTE We use the Sync version of the map because we want Self to + /// be Sync. Using nonsync might be faster, if we can work with + /// that. nodes: RedBlackTreeMapSync>, } @@ -41,6 +44,13 @@ impl LayerCoverage { /// Helper function to subdivide the key range without changing any values /// + /// This operation has no semantic effect by itself. It only helps us pin in + /// place the part of the coverage we don't want to change when inserting. + /// + /// As an analogy, think of a polygon. If you add a vertex along one of the + /// segments, the polygon is still the same, but it behaves differently when + /// we move or delete one of the other points. + /// /// Complexity: O(log N) fn add_node(&mut self, key: i128) { let value = match self.nodes.range(..=key).last() { @@ -74,7 +84,7 @@ impl LayerCoverage { let mut to_update = Vec::new(); let mut to_remove = Vec::new(); let mut prev_covered = false; - for (k, node) in self.nodes.range(key.clone()) { + for (k, node) in self.nodes.range(key) { let needs_cover = match node { None => true, Some((h, _)) => h < &lsn.end, @@ -87,9 +97,8 @@ impl LayerCoverage { } prev_covered = needs_cover; } - if !prev_covered { - to_remove.push(key.end); - } + // TODO check if the nodes inserted at key.start and key.end are safe + // to remove. It's fine to keep them but they could be redundant. for k in to_update { self.nodes.insert_mut(k, Some((lsn.end, value.clone()))); }