tests: save pageserver metrics at the end of the test (#11559)

## Problem

Sometimes it's useful to see the pageserver metrics after a test in
order to debug stuff.
For example, for https://github.com/neondatabase/neon/issues/11465 I'd
like to know
what the remote storage latencies are from the client.

## Summary of changes

When stopping the env, record the pageserver metrics into a file in the
pageserver's workdir.
This commit is contained in:
Vlad Lazar
2025-04-14 16:13:20 +01:00
committed by GitHub
parent 057ce115de
commit 90b706cd96

View File

@@ -947,6 +947,8 @@ class NeonEnvBuilder:
continue
if SMALL_DB_FILE_NAME_REGEX.fullmatch(test_file.name):
continue
if FINAL_METRICS_FILE_NAME == test_file.name:
continue
log.debug(f"Removing large database {test_file} file")
test_file.unlink()
elif test_entry.is_dir():
@@ -1457,6 +1459,12 @@ class NeonEnv:
except Exception as e:
metric_errors.append(e)
log.error(f"metric validation failed on {pageserver.id}: {e}")
try:
pageserver.snapshot_final_metrics()
except Exception as e:
log.error(f"metric snapshot failed on {pageserver.id}: {e}")
try:
pageserver.stop(immediate=immediate)
except RuntimeError:
@@ -2972,6 +2980,20 @@ class NeonPageserver(PgProtocol, LogUtils):
value = self.http_client().get_metric_value(metric)
assert value == 0, f"Nonzero {metric} == {value}"
def snapshot_final_metrics(self):
"""
Take a snapshot of this pageserver's metrics and stash in its work directory.
"""
if not self.running:
log.info(f"Skipping metrics snapshot on pageserver {self.id}, it is not running")
return
metrics = self.http_client().get_metrics_str()
metrics_snapshot_path = self.workdir / FINAL_METRICS_FILE_NAME
with open(metrics_snapshot_path, "w") as f:
f.write(metrics)
def tenant_attach(
self,
tenant_id: TenantId,
@@ -5134,6 +5156,8 @@ SMALL_DB_FILE_NAME_REGEX: re.Pattern[str] = re.compile(
r"config-v1|heatmap-v1|tenant-manifest|metadata|.+\.(?:toml|pid|json|sql|conf)"
)
FINAL_METRICS_FILE_NAME: str = "final_metrics.txt"
SKIP_DIRS = frozenset(
(