From 90b706cd96fe5cc40b43035c5d11f8c596d5e783 Mon Sep 17 00:00:00 2001 From: Vlad Lazar Date: Mon, 14 Apr 2025 16:13:20 +0100 Subject: [PATCH] tests: save pageserver metrics at the end of the test (#11559) ## Problem Sometimes it's useful to see the pageserver metrics after a test in order to debug stuff. For example, for https://github.com/neondatabase/neon/issues/11465 I'd like to know what the remote storage latencies are from the client. ## Summary of changes When stopping the env, record the pageserver metrics into a file in the pageserver's workdir. --- test_runner/fixtures/neon_fixtures.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 3761f29d2f..10bbb7020b 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -947,6 +947,8 @@ class NeonEnvBuilder: continue if SMALL_DB_FILE_NAME_REGEX.fullmatch(test_file.name): continue + if FINAL_METRICS_FILE_NAME == test_file.name: + continue log.debug(f"Removing large database {test_file} file") test_file.unlink() elif test_entry.is_dir(): @@ -1457,6 +1459,12 @@ class NeonEnv: except Exception as e: metric_errors.append(e) log.error(f"metric validation failed on {pageserver.id}: {e}") + + try: + pageserver.snapshot_final_metrics() + except Exception as e: + log.error(f"metric snapshot failed on {pageserver.id}: {e}") + try: pageserver.stop(immediate=immediate) except RuntimeError: @@ -2972,6 +2980,20 @@ class NeonPageserver(PgProtocol, LogUtils): value = self.http_client().get_metric_value(metric) assert value == 0, f"Nonzero {metric} == {value}" + def snapshot_final_metrics(self): + """ + Take a snapshot of this pageserver's metrics and stash in its work directory. + """ + if not self.running: + log.info(f"Skipping metrics snapshot on pageserver {self.id}, it is not running") + return + + metrics = self.http_client().get_metrics_str() + metrics_snapshot_path = self.workdir / FINAL_METRICS_FILE_NAME + + with open(metrics_snapshot_path, "w") as f: + f.write(metrics) + def tenant_attach( self, tenant_id: TenantId, @@ -5134,6 +5156,8 @@ SMALL_DB_FILE_NAME_REGEX: re.Pattern[str] = re.compile( r"config-v1|heatmap-v1|tenant-manifest|metadata|.+\.(?:toml|pid|json|sql|conf)" ) +FINAL_METRICS_FILE_NAME: str = "final_metrics.txt" + SKIP_DIRS = frozenset( (