mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-13 08:22:55 +00:00
use explicit checks in test
Signed-off-by: Yuchen Liang <yuchen@neon.tech>
This commit is contained in:
@@ -978,7 +978,10 @@ class NeonEnvBuilder:
|
||||
and self.enable_scrub_on_exit
|
||||
):
|
||||
try:
|
||||
self.env.storage_scrubber.scan_metadata()
|
||||
healthy, _ = self.env.storage_scrubber.scan_metadata()
|
||||
if not healthy:
|
||||
e = Exception("Remote storage metadata corrupted")
|
||||
cleanup_error = e
|
||||
except Exception as e:
|
||||
log.error(f"Error during remote storage scrub: {e}")
|
||||
cleanup_error = e
|
||||
@@ -4401,14 +4404,19 @@ class StorageScrubber:
|
||||
assert stdout is not None
|
||||
return stdout
|
||||
|
||||
def scan_metadata(self, post_to_storage_controller: bool = False) -> Any:
|
||||
def scan_metadata(self, post_to_storage_controller: bool = False) -> Tuple[bool, Any]:
|
||||
"""
|
||||
Returns the health status and the metadata summary.
|
||||
"""
|
||||
args = ["scan-metadata", "--node-kind", "pageserver", "--json"]
|
||||
if post_to_storage_controller:
|
||||
args.append("--post")
|
||||
stdout = self.scrubber_cli(args, timeout=30)
|
||||
|
||||
try:
|
||||
return json.loads(stdout)
|
||||
summary = json.loads(stdout)
|
||||
healthy = not summary["with_errors"] and not summary["with_warnings"]
|
||||
return healthy, summary
|
||||
except:
|
||||
log.error("Failed to decode JSON output from `scan-metadata`. Dumping stdout:")
|
||||
log.error(stdout)
|
||||
|
||||
@@ -214,12 +214,11 @@ def test_generations_upgrade(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
# Having written a mixture of generation-aware and legacy index_part.json,
|
||||
# ensure the scrubber handles the situation as expected.
|
||||
metadata_summary = env.storage_scrubber.scan_metadata()
|
||||
healthy, metadata_summary = env.storage_scrubber.scan_metadata()
|
||||
assert metadata_summary["tenant_count"] == 1 # Scrubber should have seen our timeline
|
||||
assert metadata_summary["timeline_count"] == 1
|
||||
assert metadata_summary["timeline_shard_count"] == 1
|
||||
assert not metadata_summary["with_errors"]
|
||||
assert not metadata_summary["with_warnings"]
|
||||
assert healthy
|
||||
|
||||
|
||||
def test_deferred_deletion(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
@@ -563,7 +563,8 @@ def test_secondary_downloads(neon_env_builder: NeonEnvBuilder):
|
||||
# Scrub the remote storage
|
||||
# ========================
|
||||
# This confirms that the scrubber isn't upset by the presence of the heatmap
|
||||
env.storage_scrubber.scan_metadata()
|
||||
healthy, _ = env.storage_scrubber.scan_metadata()
|
||||
assert healthy
|
||||
|
||||
# Detach secondary and delete tenant
|
||||
# ===================================
|
||||
|
||||
@@ -124,7 +124,8 @@ def test_sharding_smoke(
|
||||
|
||||
# Check the scrubber isn't confused by sharded content, then disable
|
||||
# it during teardown because we'll have deleted by then
|
||||
env.storage_scrubber.scan_metadata()
|
||||
healthy, _ = env.storage_scrubber.scan_metadata()
|
||||
assert healthy
|
||||
|
||||
env.storage_controller.pageserver_api().tenant_delete(tenant_id)
|
||||
assert_prefix_empty(
|
||||
|
||||
@@ -440,9 +440,8 @@ def test_scrubber_scan_pageserver_metadata(
|
||||
assert len(index.layer_metadata) > 0
|
||||
it = iter(index.layer_metadata.items())
|
||||
|
||||
scan_summary = env.storage_scrubber.scan_metadata(post_to_storage_controller=True)
|
||||
assert not scan_summary["with_warnings"]
|
||||
assert not scan_summary["with_errors"]
|
||||
healthy, scan_summary = env.storage_scrubber.scan_metadata(post_to_storage_controller=True)
|
||||
assert healthy
|
||||
|
||||
assert env.storage_controller.metadata_health_is_healthy()
|
||||
|
||||
@@ -456,16 +455,18 @@ def test_scrubber_scan_pageserver_metadata(
|
||||
log.info(f"delete response: {delete_response}")
|
||||
|
||||
# Check scan summary without posting to storage controller. Expect it to be a L0 layer so only emit warnings.
|
||||
scan_summary = env.storage_scrubber.scan_metadata()
|
||||
_, scan_summary = env.storage_scrubber.scan_metadata()
|
||||
log.info(f"{pprint.pformat(scan_summary)}")
|
||||
assert len(scan_summary["with_warnings"]) > 0
|
||||
|
||||
assert env.storage_controller.metadata_health_is_healthy()
|
||||
|
||||
# Now post to storage controller, expect seeing one unhealthy health record
|
||||
scan_summary = env.storage_scrubber.scan_metadata(post_to_storage_controller=True)
|
||||
_, scan_summary = env.storage_scrubber.scan_metadata(post_to_storage_controller=True)
|
||||
log.info(f"{pprint.pformat(scan_summary)}")
|
||||
assert len(scan_summary["with_warnings"]) > 0
|
||||
|
||||
unhealthy = env.storage_controller.metadata_health_list_unhealthy()["unhealthy_tenant_shards"]
|
||||
assert len(unhealthy) == 1 and unhealthy[0] == str(tenant_shard_id)
|
||||
|
||||
neon_env_builder.disable_scrub_on_exit()
|
||||
|
||||
@@ -341,13 +341,13 @@ def test_tenant_delete_scrubber(pg_bin: PgBin, neon_env_builder: NeonEnvBuilder)
|
||||
wait_for_upload(ps_http, tenant_id, timeline_id, last_flush_lsn)
|
||||
env.stop()
|
||||
|
||||
result = env.storage_scrubber.scan_metadata()
|
||||
assert result["with_warnings"] == []
|
||||
healthy, _ = env.storage_scrubber.scan_metadata()
|
||||
assert healthy
|
||||
|
||||
env.start()
|
||||
ps_http = env.pageserver.http_client()
|
||||
ps_http.tenant_delete(tenant_id)
|
||||
env.stop()
|
||||
|
||||
env.storage_scrubber.scan_metadata()
|
||||
assert result["with_warnings"] == []
|
||||
healthy, _ = env.storage_scrubber.scan_metadata()
|
||||
assert healthy
|
||||
|
||||
Reference in New Issue
Block a user