feat(compute_ctl): Add a basic HTTP API benchmark (#11123)

## Problem We just had a regression reported at https://neondb.slack.com/archives/C08EXUJF554/p1741102467515599, which clearly came with one of the releases. It's not a huge problem yet, but it's annoying that we cannot quickly attribute it to a specific commit. ## Summary of changes Add a very simple `compute_ctl` HTTP API benchmark that does 10k requests to `/status` and `metrics.json` and reports p50 and p99. --------- Co-authored-by: Peter Bendel <peterbendel@neon.tech>
2025-12-22 21:59:59 +00:00 · 2025-03-07 13:35:42 +01:00
parent cea67fc062
commit f5aa8c3eac
2 changed files with 76 additions and 0 deletions
--- a/test_runner/fixtures/endpoint/http.py
+++ b/test_runner/fixtures/endpoint/http.py
@@ -53,6 +53,18 @@ class EndpointHttpClient(requests.Session):
        res.raise_for_status()
        return res.text
    # Current compute status.
    def status(self):
        res = self.get(f"http://localhost:{self.external_port}/status")
        res.raise_for_status()
        return res.json()
    # Compute startup-related metrics.
    def metrics_json(self):
        res = self.get(f"http://localhost:{self.external_port}/metrics.json")
        res.raise_for_status()
        return res.json()
    def configure_failpoints(self, *args: tuple[str, str]) -> None:
        body: list[dict[str, str]] = []
--- a/test_runner/performance/test_compute_ctl_api.py
+++ b/test_runner/performance/test_compute_ctl_api.py
@@ -0,0 +1,64 @@
 from __future__ import annotations
 import datetime
 import pytest
 from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker
 from fixtures.neon_fixtures import NeonEnv
@pytest.mark.timeout(120)
 def test_compute_ctl_api_latencies(
    neon_simple_env: NeonEnv,
    zenbenchmark: NeonBenchmarker,
 ):
    """
    Test compute_ctl HTTP API performance. Do simple GET requests
    to catch any pathological degradations in the HTTP server.
    """
    env = neon_simple_env
    endpoint = env.endpoints.create_start("main")
    client = endpoint.http_client()
    NUM_REQUESTS = 10000
    status_response_latency_us = []
    metrics_response_latency_us = []
    for _i in range(NUM_REQUESTS):
        start_time = datetime.datetime.now()
        _ = client.status()
        status_response_latency_us.append((datetime.datetime.now() - start_time).microseconds)
        start_time = datetime.datetime.now()
        _ = client.metrics_json()
        metrics_response_latency_us.append((datetime.datetime.now() - start_time).microseconds)
    status_response_latency_us = sorted(status_response_latency_us)
    metrics_response_latency_us = sorted(metrics_response_latency_us)
    zenbenchmark.record(
        "status_response_latency_p50_us",
        status_response_latency_us[len(status_response_latency_us) // 2],
        "microseconds",
        MetricReport.LOWER_IS_BETTER,
    )
    zenbenchmark.record(
        "metrics_response_latency_p50_us",
        metrics_response_latency_us[len(metrics_response_latency_us) // 2],
        "microseconds",
        MetricReport.LOWER_IS_BETTER,
    )
    zenbenchmark.record(
        "status_response_latency_p99_us",
        status_response_latency_us[len(status_response_latency_us) * 99 // 100],
        "microseconds",
        MetricReport.LOWER_IS_BETTER,
    )
    zenbenchmark.record(
        "metrics_response_latency_p99_us",
        metrics_response_latency_us[len(metrics_response_latency_us) * 99 // 100],
        "microseconds",
        MetricReport.LOWER_IS_BETTER,
    )