diff --git a/pageserver/src/tenant/layer_map.rs b/pageserver/src/tenant/layer_map.rs index 0ee0c6f77d..d19f5c9247 100644 --- a/pageserver/src/tenant/layer_map.rs +++ b/pageserver/src/tenant/layer_map.rs @@ -51,11 +51,12 @@ use crate::keyspace::KeyPartitioning; use crate::repository::Key; use crate::tenant::storage_layer::InMemoryLayer; use crate::tenant::storage_layer::Layer; -use anyhow::{bail, Result}; +use anyhow::Result; use std::collections::VecDeque; use std::ops::Range; use std::sync::Arc; use utils::lsn::Lsn; +use tracing::*; use historic_layer_coverage::BufferedHistoricLayerCoverage; pub use historic_layer_coverage::Replacement; @@ -276,17 +277,17 @@ where pub(self) fn insert_historic_noflush(&mut self, layer: Arc) -> anyhow::Result<()> { let key = historic_layer_coverage::LayerKey::from(&*layer); if self.historic.contains(&key) { - bail!( + error!( "Attempt to insert duplicate layer {} in layer map", layer.short_id() ); - } - self.historic.insert(key, Arc::clone(&layer)); - - if Self::is_l0(&layer) { - self.l0_delta_layers.push(layer); - } + } else { + self.historic.insert(key, Arc::clone(&layer)); + if Self::is_l0(&layer) { + self.l0_delta_layers.push(layer); + } + } Ok(()) } diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index b8b1f963e5..3db8247531 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -3300,6 +3300,10 @@ impl Timeline { drop(all_keys_iter); // So that deltas_to_compact is no longer borrowed + fail_point!("compact-level0-phase1-finish", |_| { + Err(anyhow::anyhow!("failpoint compact-level0-phase1-finish").into()) + }); + Ok(CompactLevel0Phase1Result { new_layers, deltas_to_compact, diff --git a/test_runner/regress/test_duplicate_layers.py b/test_runner/regress/test_duplicate_layers.py new file mode 100644 index 0000000000..0b0a428247 --- /dev/null +++ b/test_runner/regress/test_duplicate_layers.py @@ -0,0 +1,41 @@ +import pytest +import time +from fixtures.neon_fixtures import NeonEnvBuilder, PgBin + + +# Test duplicate layer detection +# +# This test sets fail point at the end of first compaction phase: +# after flushing new L1 layers but before deletion of L0 layes +# It should cause generation of duplicate L1 layer by compaction after restart +@pytest.mark.timeout(600) +def test_duplicate_layers(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin): + env = neon_env_builder.init_start() + + # These warnings are expected, when the pageserver is restarted abruptly + env.pageserver.allowed_errors.append(".*found future image layer.*") + env.pageserver.allowed_errors.append(".*found future delta layer.*") + env.pageserver.allowed_errors.append(".*Attempt to insert duplicate layer.*") + + pageserver_http = env.pageserver.http_client() + + # Use aggressive compaction and checkpoint settings + tenant_id, _ = env.neon_cli.create_tenant( + conf={ + "checkpoint_distance": f"{1024 ** 2}", + "compaction_target_size": f"{1024 ** 2}", + "compaction_period": "1 s", + "compaction_threshold": "3", + } + ) + endpoint = env.endpoints.create_start("main", tenant_id=tenant_id) + connstr = endpoint.connstr(options="-csynchronous_commit=off") + pg_bin.run_capture(["pgbench", "-i", "-s10", connstr]) + + pageserver_http.configure_failpoints(("compact-level0-phase1-finish", "exit")) + + with pytest.raises(Exception): + pg_bin.run_capture(["pgbench", "-P1", "-N", "-c5", "-T500", "-Mprepared", connstr]) + env.pageserver.stop() + env.pageserver.start() + time.sleep(10) # let compaction to be performed