From 8f170c51059087b26ac098219ca97072f053f513 Mon Sep 17 00:00:00 2001 From: Joonas Koivunen Date: Tue, 13 Aug 2024 20:00:54 +0300 Subject: [PATCH] fix: make compaction more sensitive to cancellation (#8706) A few of the benchmarks have started failing after #8655 where they are waiting for compactor task. Reads done by image layer creation should already be cancellation sensitive because vectored get does a check each time, but try sprinkling additional cancellation points to: - each partition - after each vectored read batch --- pageserver/src/tenant/timeline.rs | 11 +++++++++++ pageserver/src/tenant/timeline/compaction.rs | 8 ++++++++ test_runner/fixtures/neon_fixtures.py | 2 ++ test_runner/regress/test_pageserver_restart.py | 10 ++++++++++ 4 files changed, 31 insertions(+) diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index abe3f56e45..767f5969fc 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -3917,6 +3917,10 @@ impl Timeline { .get_vectored(key_request_accum.consume_keyspace(), lsn, ctx) .await?; + if self.cancel.is_cancelled() { + return Err(CreateImageLayersError::Cancelled); + } + for (img_key, img) in results { let img = match img { Ok(img) => img, @@ -4024,6 +4028,9 @@ impl Timeline { next_start_key: img_range.end, }); } + if self.cancel.is_cancelled() { + return Err(CreateImageLayersError::Cancelled); + } let mut wrote_any_image = false; for (k, v) in data { if v.is_empty() { @@ -4138,6 +4145,10 @@ impl Timeline { let check_for_image_layers = self.should_check_if_image_layers_required(lsn); for partition in partitioning.parts.iter() { + if self.cancel.is_cancelled() { + return Err(CreateImageLayersError::Cancelled); + } + let img_range = start..partition.ranges.last().unwrap().end; let compact_metadata = partition.overlaps(&Key::metadata_key_range()); if compact_metadata { diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs index 8390cb839c..9ac0086cde 100644 --- a/pageserver/src/tenant/timeline/compaction.rs +++ b/pageserver/src/tenant/timeline/compaction.rs @@ -748,6 +748,9 @@ impl Timeline { let all_keys = { let mut all_keys = Vec::new(); for l in deltas_to_compact.iter() { + if self.cancel.is_cancelled() { + return Err(CompactionError::ShuttingDown); + } all_keys.extend(l.load_keys(ctx).await.map_err(CompactionError::Other)?); } // The current stdlib sorting implementation is designed in a way where it is @@ -830,6 +833,11 @@ impl Timeline { }; stats.read_lock_held_compute_holes_micros = stats.read_lock_held_key_sort_micros.till_now(); drop_rlock(guard); + + if self.cancel.is_cancelled() { + return Err(CompactionError::ShuttingDown); + } + stats.read_lock_drop_micros = stats.read_lock_held_compute_holes_micros.till_now(); // This iterator walks through all key-value pairs from all the layers diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 561e8bce04..daa4c8b97f 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -1251,6 +1251,8 @@ class NeonEnv: def stop(self, immediate=False, ps_assert_metric_no_errors=False, fail_on_endpoint_errors=True): """ After this method returns, there should be no child processes running. + + Unless of course, some stopping failed, in that case, all remaining child processes are leaked. """ self.endpoints.stop_all(fail_on_endpoint_errors) diff --git a/test_runner/regress/test_pageserver_restart.py b/test_runner/regress/test_pageserver_restart.py index 68a45f957c..bbf82fea4c 100644 --- a/test_runner/regress/test_pageserver_restart.py +++ b/test_runner/regress/test_pageserver_restart.py @@ -159,6 +159,8 @@ def test_pageserver_chaos( if build_type == "debug": pytest.skip("times out in debug builds") + # same rationale as with the immediate stop; we might leave orphan layers behind. + neon_env_builder.disable_scrub_on_exit() neon_env_builder.enable_pageserver_remote_storage(s3_storage()) if shard_count is not None: neon_env_builder.num_pageservers = shard_count @@ -220,3 +222,11 @@ def test_pageserver_chaos( # Check that all the updates are visible num_updates = endpoint.safe_psql("SELECT sum(updates) FROM foo")[0][0] assert num_updates == i * 100000 + + # currently pageserver cannot tolerate the fact that "s3" goes away, and if + # we succeeded in a compaction before shutdown, there might be a lot of + # uploads pending, certainly more than what we can ingest with MOCK_S3 + # + # so instead, do a fast shutdown for this one test. + # See https://github.com/neondatabase/neon/issues/8709 + env.stop(immediate=True)