use explicit checks in test

Signed-off-by: Yuchen Liang <yuchen@neon.tech>
This commit is contained in:
Yuchen Liang
2024-07-30 16:46:14 -04:00
parent f0db90091b
commit f8a727ac47
6 changed files with 27 additions and 17 deletions

View File

@@ -978,7 +978,10 @@ class NeonEnvBuilder:
and self.enable_scrub_on_exit
):
try:
self.env.storage_scrubber.scan_metadata()
healthy, _ = self.env.storage_scrubber.scan_metadata()
if not healthy:
e = Exception("Remote storage metadata corrupted")
cleanup_error = e
except Exception as e:
log.error(f"Error during remote storage scrub: {e}")
cleanup_error = e
@@ -4401,14 +4404,19 @@ class StorageScrubber:
assert stdout is not None
return stdout
def scan_metadata(self, post_to_storage_controller: bool = False) -> Any:
def scan_metadata(self, post_to_storage_controller: bool = False) -> Tuple[bool, Any]:
"""
Returns the health status and the metadata summary.
"""
args = ["scan-metadata", "--node-kind", "pageserver", "--json"]
if post_to_storage_controller:
args.append("--post")
stdout = self.scrubber_cli(args, timeout=30)
try:
return json.loads(stdout)
summary = json.loads(stdout)
healthy = not summary["with_errors"] and not summary["with_warnings"]
return healthy, summary
except:
log.error("Failed to decode JSON output from `scan-metadata`. Dumping stdout:")
log.error(stdout)

View File

@@ -214,12 +214,11 @@ def test_generations_upgrade(neon_env_builder: NeonEnvBuilder):
# Having written a mixture of generation-aware and legacy index_part.json,
# ensure the scrubber handles the situation as expected.
metadata_summary = env.storage_scrubber.scan_metadata()
healthy, metadata_summary = env.storage_scrubber.scan_metadata()
assert metadata_summary["tenant_count"] == 1 # Scrubber should have seen our timeline
assert metadata_summary["timeline_count"] == 1
assert metadata_summary["timeline_shard_count"] == 1
assert not metadata_summary["with_errors"]
assert not metadata_summary["with_warnings"]
assert healthy
def test_deferred_deletion(neon_env_builder: NeonEnvBuilder):

View File

@@ -563,7 +563,8 @@ def test_secondary_downloads(neon_env_builder: NeonEnvBuilder):
# Scrub the remote storage
# ========================
# This confirms that the scrubber isn't upset by the presence of the heatmap
env.storage_scrubber.scan_metadata()
healthy, _ = env.storage_scrubber.scan_metadata()
assert healthy
# Detach secondary and delete tenant
# ===================================

View File

@@ -124,7 +124,8 @@ def test_sharding_smoke(
# Check the scrubber isn't confused by sharded content, then disable
# it during teardown because we'll have deleted by then
env.storage_scrubber.scan_metadata()
healthy, _ = env.storage_scrubber.scan_metadata()
assert healthy
env.storage_controller.pageserver_api().tenant_delete(tenant_id)
assert_prefix_empty(

View File

@@ -440,9 +440,8 @@ def test_scrubber_scan_pageserver_metadata(
assert len(index.layer_metadata) > 0
it = iter(index.layer_metadata.items())
scan_summary = env.storage_scrubber.scan_metadata(post_to_storage_controller=True)
assert not scan_summary["with_warnings"]
assert not scan_summary["with_errors"]
healthy, scan_summary = env.storage_scrubber.scan_metadata(post_to_storage_controller=True)
assert healthy
assert env.storage_controller.metadata_health_is_healthy()
@@ -456,16 +455,18 @@ def test_scrubber_scan_pageserver_metadata(
log.info(f"delete response: {delete_response}")
# Check scan summary without posting to storage controller. Expect it to be a L0 layer so only emit warnings.
scan_summary = env.storage_scrubber.scan_metadata()
_, scan_summary = env.storage_scrubber.scan_metadata()
log.info(f"{pprint.pformat(scan_summary)}")
assert len(scan_summary["with_warnings"]) > 0
assert env.storage_controller.metadata_health_is_healthy()
# Now post to storage controller, expect seeing one unhealthy health record
scan_summary = env.storage_scrubber.scan_metadata(post_to_storage_controller=True)
_, scan_summary = env.storage_scrubber.scan_metadata(post_to_storage_controller=True)
log.info(f"{pprint.pformat(scan_summary)}")
assert len(scan_summary["with_warnings"]) > 0
unhealthy = env.storage_controller.metadata_health_list_unhealthy()["unhealthy_tenant_shards"]
assert len(unhealthy) == 1 and unhealthy[0] == str(tenant_shard_id)
neon_env_builder.disable_scrub_on_exit()

View File

@@ -341,13 +341,13 @@ def test_tenant_delete_scrubber(pg_bin: PgBin, neon_env_builder: NeonEnvBuilder)
wait_for_upload(ps_http, tenant_id, timeline_id, last_flush_lsn)
env.stop()
result = env.storage_scrubber.scan_metadata()
assert result["with_warnings"] == []
healthy, _ = env.storage_scrubber.scan_metadata()
assert healthy
env.start()
ps_http = env.pageserver.http_client()
ps_http.tenant_delete(tenant_id)
env.stop()
env.storage_scrubber.scan_metadata()
assert result["with_warnings"] == []
healthy, _ = env.storage_scrubber.scan_metadata()
assert healthy