mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-11 07:22:55 +00:00
tests: always scrub on test exit when using S3Storage (#8437)
## Problem Currently, tests may have a scrub during teardown if they ask for it, but most tests don't request it. To detect "unknown unknowns", let's run it at the end of every test where possible. This is similar to asserting that there are no errors in the log at the end of tests. ## Summary of changes - Remove explicit `enable_scrub_on_exit` - Always scrub if remote storage is an S3Storage.
This commit is contained in:
@@ -523,7 +523,7 @@ class NeonEnvBuilder:
|
||||
self.preserve_database_files = preserve_database_files
|
||||
self.initial_tenant = initial_tenant or TenantId.generate()
|
||||
self.initial_timeline = initial_timeline or TimelineId.generate()
|
||||
self.scrub_on_exit = False
|
||||
self.enable_scrub_on_exit = True
|
||||
self.test_output_dir = test_output_dir
|
||||
self.test_overlay_dir = test_overlay_dir
|
||||
self.overlay_mounts_created_by_us: List[Tuple[str, Path]] = []
|
||||
@@ -852,6 +852,13 @@ class NeonEnvBuilder:
|
||||
)
|
||||
ident_state_dir.rmdir() # should be empty since we moved `upper` out
|
||||
|
||||
def disable_scrub_on_exit(self):
|
||||
"""
|
||||
Some tests intentionally leave the remote storage contents empty or corrupt,
|
||||
so it doesn't make sense to do the usual scrub at the end of the test.
|
||||
"""
|
||||
self.enable_scrub_on_exit = False
|
||||
|
||||
def overlay_cleanup_teardown(self):
|
||||
"""
|
||||
Unmount the overlayfs mounts created by `self.overlay_mount()`.
|
||||
@@ -877,23 +884,6 @@ class NeonEnvBuilder:
|
||||
# assert all overlayfs mounts in our test directory are gone
|
||||
assert [] == list(overlayfs.iter_mounts_beneath(self.test_overlay_dir))
|
||||
|
||||
def enable_scrub_on_exit(self):
|
||||
"""
|
||||
Call this if you would like the fixture to automatically run
|
||||
storage_scrubber at the end of the test, as a bidirectional test
|
||||
that the scrubber is working properly, and that the code within
|
||||
the test didn't produce any invalid remote state.
|
||||
"""
|
||||
|
||||
if not isinstance(self.pageserver_remote_storage, S3Storage):
|
||||
# The scrubber can't talk to e.g. LocalFS -- it needs
|
||||
# an HTTP endpoint (mock is fine) to connect to.
|
||||
raise RuntimeError(
|
||||
"Cannot scrub with remote_storage={self.pageserver_remote_storage}, require an S3 endpoint"
|
||||
)
|
||||
|
||||
self.scrub_on_exit = True
|
||||
|
||||
def enable_pageserver_remote_storage(
|
||||
self,
|
||||
remote_storage_kind: RemoteStorageKind,
|
||||
@@ -995,7 +985,12 @@ class NeonEnvBuilder:
|
||||
)
|
||||
cleanup_error = None
|
||||
|
||||
if self.scrub_on_exit:
|
||||
# If we are running with S3Storage (required by the scrubber), check that whatever the test
|
||||
# did does not generate any corruption
|
||||
if (
|
||||
isinstance(self.env.pageserver_remote_storage, S3Storage)
|
||||
and self.enable_scrub_on_exit
|
||||
):
|
||||
try:
|
||||
self.env.storage_scrubber.scan_metadata()
|
||||
except Exception as e:
|
||||
|
||||
@@ -13,7 +13,6 @@ from fixtures.utils import wait_until
|
||||
# running.
|
||||
def test_pageserver_restart(neon_env_builder: NeonEnvBuilder):
|
||||
neon_env_builder.enable_pageserver_remote_storage(s3_storage())
|
||||
neon_env_builder.enable_scrub_on_exit()
|
||||
|
||||
# We inject a delay of 15 seconds for tenant activation below.
|
||||
# Hence, bump the max delay here to not skip over the activation.
|
||||
@@ -161,7 +160,6 @@ def test_pageserver_chaos(
|
||||
pytest.skip("times out in debug builds")
|
||||
|
||||
neon_env_builder.enable_pageserver_remote_storage(s3_storage())
|
||||
neon_env_builder.enable_scrub_on_exit()
|
||||
if shard_count is not None:
|
||||
neon_env_builder.num_pageservers = shard_count
|
||||
|
||||
|
||||
@@ -390,6 +390,9 @@ def test_live_migration(neon_env_builder: NeonEnvBuilder):
|
||||
# (reproduce https://github.com/neondatabase/neon/issues/6802)
|
||||
pageserver_b.http_client().tenant_delete(tenant_id)
|
||||
|
||||
# We deleted our only tenant, and the scrubber fails if it detects nothing
|
||||
neon_env_builder.disable_scrub_on_exit()
|
||||
|
||||
|
||||
def test_heatmap_uploads(neon_env_builder: NeonEnvBuilder):
|
||||
"""
|
||||
@@ -589,6 +592,9 @@ def test_secondary_downloads(neon_env_builder: NeonEnvBuilder):
|
||||
)
|
||||
workload.stop()
|
||||
|
||||
# We deleted our only tenant, and the scrubber fails if it detects nothing
|
||||
neon_env_builder.disable_scrub_on_exit()
|
||||
|
||||
|
||||
def test_secondary_background_downloads(neon_env_builder: NeonEnvBuilder):
|
||||
"""
|
||||
|
||||
@@ -138,7 +138,6 @@ def test_pg_regress(
|
||||
neon_env_builder.num_pageservers = shard_count
|
||||
|
||||
neon_env_builder.enable_pageserver_remote_storage(s3_storage())
|
||||
neon_env_builder.enable_scrub_on_exit()
|
||||
env = neon_env_builder.init_start(
|
||||
initial_tenant_conf=TENANT_CONF,
|
||||
initial_tenant_shard_count=shard_count,
|
||||
@@ -202,7 +201,6 @@ def test_isolation(
|
||||
if shard_count is not None:
|
||||
neon_env_builder.num_pageservers = shard_count
|
||||
neon_env_builder.enable_pageserver_remote_storage(s3_storage())
|
||||
neon_env_builder.enable_scrub_on_exit()
|
||||
env = neon_env_builder.init_start(
|
||||
initial_tenant_conf=TENANT_CONF, initial_tenant_shard_count=shard_count
|
||||
)
|
||||
@@ -265,7 +263,6 @@ def test_sql_regress(
|
||||
if shard_count is not None:
|
||||
neon_env_builder.num_pageservers = shard_count
|
||||
neon_env_builder.enable_pageserver_remote_storage(s3_storage())
|
||||
neon_env_builder.enable_scrub_on_exit()
|
||||
env = neon_env_builder.init_start(
|
||||
initial_tenant_conf=TENANT_CONF, initial_tenant_shard_count=shard_count
|
||||
)
|
||||
|
||||
@@ -47,7 +47,6 @@ def test_sharding_smoke(
|
||||
# Use S3-compatible remote storage so that we can scrub: this test validates
|
||||
# that the scrubber doesn't barf when it sees a sharded tenant.
|
||||
neon_env_builder.enable_pageserver_remote_storage(s3_storage())
|
||||
neon_env_builder.enable_scrub_on_exit()
|
||||
|
||||
neon_env_builder.preserve_database_files = True
|
||||
|
||||
@@ -128,7 +127,6 @@ def test_sharding_smoke(
|
||||
# Check the scrubber isn't confused by sharded content, then disable
|
||||
# it during teardown because we'll have deleted by then
|
||||
env.storage_scrubber.scan_metadata()
|
||||
neon_env_builder.scrub_on_exit = False
|
||||
|
||||
env.storage_controller.pageserver_api().tenant_delete(tenant_id)
|
||||
assert_prefix_empty(
|
||||
@@ -373,7 +371,6 @@ def test_sharding_split_smoke(
|
||||
# Use S3-compatible remote storage so that we can scrub: this test validates
|
||||
# that the scrubber doesn't barf when it sees a sharded tenant.
|
||||
neon_env_builder.enable_pageserver_remote_storage(s3_storage())
|
||||
neon_env_builder.enable_scrub_on_exit()
|
||||
|
||||
neon_env_builder.preserve_database_files = True
|
||||
|
||||
|
||||
@@ -315,6 +315,9 @@ def test_tenant_delete_races_timeline_creation(
|
||||
# Zero tenants remain (we deleted the default tenant)
|
||||
assert ps_http.get_metric_value("pageserver_tenant_manager_slots", {"mode": "attached"}) == 0
|
||||
|
||||
# We deleted our only tenant, and the scrubber fails if it detects nothing
|
||||
neon_env_builder.disable_scrub_on_exit()
|
||||
|
||||
|
||||
def test_tenant_delete_scrubber(pg_bin: PgBin, neon_env_builder: NeonEnvBuilder):
|
||||
"""
|
||||
|
||||
@@ -485,6 +485,9 @@ def test_timeline_delete_fail_before_local_delete(neon_env_builder: NeonEnvBuild
|
||||
lambda: assert_prefix_empty(neon_env_builder.pageserver_remote_storage),
|
||||
)
|
||||
|
||||
# We deleted our only tenant, and the scrubber fails if it detects nothing
|
||||
neon_env_builder.disable_scrub_on_exit()
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"stuck_failpoint",
|
||||
@@ -703,6 +706,9 @@ def test_timeline_delete_works_for_remote_smoke(
|
||||
# Assume it is mock server inconsistency and check twice.
|
||||
wait_until(2, 0.5, lambda: assert_prefix_empty(neon_env_builder.pageserver_remote_storage))
|
||||
|
||||
# We deleted our only tenant, and the scrubber fails if it detects nothing
|
||||
neon_env_builder.disable_scrub_on_exit()
|
||||
|
||||
|
||||
def test_delete_orphaned_objects(
|
||||
neon_env_builder: NeonEnvBuilder,
|
||||
|
||||
Reference in New Issue
Block a user