mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-07 05:22:56 +00:00
tests: save pageserver metrics at the end of the test (#11559)
## Problem Sometimes it's useful to see the pageserver metrics after a test in order to debug stuff. For example, for https://github.com/neondatabase/neon/issues/11465 I'd like to know what the remote storage latencies are from the client. ## Summary of changes When stopping the env, record the pageserver metrics into a file in the pageserver's workdir.
This commit is contained in:
@@ -947,6 +947,8 @@ class NeonEnvBuilder:
|
|||||||
continue
|
continue
|
||||||
if SMALL_DB_FILE_NAME_REGEX.fullmatch(test_file.name):
|
if SMALL_DB_FILE_NAME_REGEX.fullmatch(test_file.name):
|
||||||
continue
|
continue
|
||||||
|
if FINAL_METRICS_FILE_NAME == test_file.name:
|
||||||
|
continue
|
||||||
log.debug(f"Removing large database {test_file} file")
|
log.debug(f"Removing large database {test_file} file")
|
||||||
test_file.unlink()
|
test_file.unlink()
|
||||||
elif test_entry.is_dir():
|
elif test_entry.is_dir():
|
||||||
@@ -1457,6 +1459,12 @@ class NeonEnv:
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
metric_errors.append(e)
|
metric_errors.append(e)
|
||||||
log.error(f"metric validation failed on {pageserver.id}: {e}")
|
log.error(f"metric validation failed on {pageserver.id}: {e}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
pageserver.snapshot_final_metrics()
|
||||||
|
except Exception as e:
|
||||||
|
log.error(f"metric snapshot failed on {pageserver.id}: {e}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
pageserver.stop(immediate=immediate)
|
pageserver.stop(immediate=immediate)
|
||||||
except RuntimeError:
|
except RuntimeError:
|
||||||
@@ -2972,6 +2980,20 @@ class NeonPageserver(PgProtocol, LogUtils):
|
|||||||
value = self.http_client().get_metric_value(metric)
|
value = self.http_client().get_metric_value(metric)
|
||||||
assert value == 0, f"Nonzero {metric} == {value}"
|
assert value == 0, f"Nonzero {metric} == {value}"
|
||||||
|
|
||||||
|
def snapshot_final_metrics(self):
|
||||||
|
"""
|
||||||
|
Take a snapshot of this pageserver's metrics and stash in its work directory.
|
||||||
|
"""
|
||||||
|
if not self.running:
|
||||||
|
log.info(f"Skipping metrics snapshot on pageserver {self.id}, it is not running")
|
||||||
|
return
|
||||||
|
|
||||||
|
metrics = self.http_client().get_metrics_str()
|
||||||
|
metrics_snapshot_path = self.workdir / FINAL_METRICS_FILE_NAME
|
||||||
|
|
||||||
|
with open(metrics_snapshot_path, "w") as f:
|
||||||
|
f.write(metrics)
|
||||||
|
|
||||||
def tenant_attach(
|
def tenant_attach(
|
||||||
self,
|
self,
|
||||||
tenant_id: TenantId,
|
tenant_id: TenantId,
|
||||||
@@ -5134,6 +5156,8 @@ SMALL_DB_FILE_NAME_REGEX: re.Pattern[str] = re.compile(
|
|||||||
r"config-v1|heatmap-v1|tenant-manifest|metadata|.+\.(?:toml|pid|json|sql|conf)"
|
r"config-v1|heatmap-v1|tenant-manifest|metadata|.+\.(?:toml|pid|json|sql|conf)"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
FINAL_METRICS_FILE_NAME: str = "final_metrics.txt"
|
||||||
|
|
||||||
|
|
||||||
SKIP_DIRS = frozenset(
|
SKIP_DIRS = frozenset(
|
||||||
(
|
(
|
||||||
|
|||||||
Reference in New Issue
Block a user