From 9333937946eb0063c0db912100cbe542138f9f27 Mon Sep 17 00:00:00 2001 From: Konstantin Knizhnik Date: Thu, 25 May 2023 10:01:00 +0300 Subject: [PATCH] Add test for duplicate layers --- pageserver/src/tenant/layer_map.rs | 14 ++++--- .../layer_map/historic_layer_coverage.rs | 10 ++++- .../performance/test_duplicate_layers.py | 42 +++++++++++++++++++ 3 files changed, 60 insertions(+), 6 deletions(-) create mode 100644 test_runner/performance/test_duplicate_layers.py diff --git a/pageserver/src/tenant/layer_map.rs b/pageserver/src/tenant/layer_map.rs index 8d06ccd565..3587abbca3 100644 --- a/pageserver/src/tenant/layer_map.rs +++ b/pageserver/src/tenant/layer_map.rs @@ -56,6 +56,7 @@ use std::collections::VecDeque; use std::ops::Range; use std::sync::Arc; use utils::lsn::Lsn; +use tracing::*; use historic_layer_coverage::BufferedHistoricLayerCoverage; pub use historic_layer_coverage::Replacement; @@ -275,11 +276,14 @@ where /// pub(self) fn insert_historic_noflush(&mut self, layer: Arc) { // TODO: See #3869, resulting #4088, attempted fix and repro #4094 - self.historic.insert( - historic_layer_coverage::LayerKey::from(&*layer), - Arc::clone(&layer), - ); - + let key = historic_layer_coverage::LayerKey::from(&*layer); + if self.historic.contains(&key) { + warn!( + "Attempt to insert duplicate layer {} in layer map", + layer.short_id() + ); + } + self.historic.insert(key, Arc::clone(&layer)); if Self::is_l0(&layer) { self.l0_delta_layers.push(layer); } diff --git a/pageserver/src/tenant/layer_map/historic_layer_coverage.rs b/pageserver/src/tenant/layer_map/historic_layer_coverage.rs index b63c361314..ffd81b908d 100644 --- a/pageserver/src/tenant/layer_map/historic_layer_coverage.rs +++ b/pageserver/src/tenant/layer_map/historic_layer_coverage.rs @@ -417,7 +417,15 @@ impl BufferedHistoricLayerCoverage { } } - pub fn insert(&mut self, layer_key: LayerKey, value: Value) { + pub fn contains(&self, layer_key: &LayerKey) -> bool { + match self.buffer.get(layer_key) { + Some(None) => false, // layer remove was buffered + Some(_) => true, // layer insert was buffered + None => self.layers.contains_key(layer_key), // no buffered ops for this layer + } + } + + pub fn insert(&mut self, layer_key: LayerKey, value: Value) { self.buffer.insert(layer_key, Some(value)); } diff --git a/test_runner/performance/test_duplicate_layers.py b/test_runner/performance/test_duplicate_layers.py new file mode 100644 index 0000000000..77d79cfcc0 --- /dev/null +++ b/test_runner/performance/test_duplicate_layers.py @@ -0,0 +1,42 @@ +import time + +import pytest +from fixtures.neon_fixtures import NeonEnvBuilder, PgBin + + +# Test duplicate layer detection +# +# This test sets fail point at the end of first compaction phase: +# after flushing new L1 layers but before deletion of L0 layes +# It should cause generation of duplicate L1 layer by compaction after restart +@pytest.mark.timeout(600) +def test_duplicate_layers(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin): + env = neon_env_builder.init_start() + + # These warnings are expected, when the pageserver is restarted abruptly + env.pageserver.allowed_errors.append(".*found future image layer.*") + env.pageserver.allowed_errors.append(".*found future delta layer.*") + env.pageserver.allowed_errors.append(".*duplicate layer.*") + + pageserver_http = env.pageserver.http_client() + + # Use aggressive compaction and checkpoint settings + tenant_id, _ = env.neon_cli.create_tenant( + conf={ + "checkpoint_distance": f"{1024 ** 2}", + "compaction_target_size": f"{1024 ** 2}", + "compaction_period": "1 s", + "compaction_threshold": "3", + } + ) + endpoint = env.endpoints.create_start("main", tenant_id=tenant_id) + connstr = endpoint.connstr(options="-csynchronous_commit=off") + pg_bin.run_capture(["pgbench", "-i", "-s10", connstr]) + + pageserver_http.configure_failpoints(("compact-level0-phase1-finish", "exit")) + + with pytest.raises(Exception): + pg_bin.run_capture(["pgbench", "-P1", "-N", "-c5", "-T500", "-Mprepared", connstr]) + env.pageserver.stop() + env.pageserver.start() + time.sleep(10) # let compaction to be performed