From f5aa8c3eac0cebbfd6d0394d3f361526b06a8ded Mon Sep 17 00:00:00 2001 From: Alexey Kondratov Date: Fri, 7 Mar 2025 13:35:42 +0100 Subject: [PATCH] feat(compute_ctl): Add a basic HTTP API benchmark (#11123) ## Problem We just had a regression reported at https://neondb.slack.com/archives/C08EXUJF554/p1741102467515599, which clearly came with one of the releases. It's not a huge problem yet, but it's annoying that we cannot quickly attribute it to a specific commit. ## Summary of changes Add a very simple `compute_ctl` HTTP API benchmark that does 10k requests to `/status` and `metrics.json` and reports p50 and p99. --------- Co-authored-by: Peter Bendel --- test_runner/fixtures/endpoint/http.py | 12 ++++ .../performance/test_compute_ctl_api.py | 64 +++++++++++++++++++ 2 files changed, 76 insertions(+) create mode 100644 test_runner/performance/test_compute_ctl_api.py diff --git a/test_runner/fixtures/endpoint/http.py b/test_runner/fixtures/endpoint/http.py index cdc162fca2..9b28246f58 100644 --- a/test_runner/fixtures/endpoint/http.py +++ b/test_runner/fixtures/endpoint/http.py @@ -53,6 +53,18 @@ class EndpointHttpClient(requests.Session): res.raise_for_status() return res.text + # Current compute status. + def status(self): + res = self.get(f"http://localhost:{self.external_port}/status") + res.raise_for_status() + return res.json() + + # Compute startup-related metrics. + def metrics_json(self): + res = self.get(f"http://localhost:{self.external_port}/metrics.json") + res.raise_for_status() + return res.json() + def configure_failpoints(self, *args: tuple[str, str]) -> None: body: list[dict[str, str]] = [] diff --git a/test_runner/performance/test_compute_ctl_api.py b/test_runner/performance/test_compute_ctl_api.py new file mode 100644 index 0000000000..87eb1f2c35 --- /dev/null +++ b/test_runner/performance/test_compute_ctl_api.py @@ -0,0 +1,64 @@ +from __future__ import annotations + +import datetime + +import pytest +from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker +from fixtures.neon_fixtures import NeonEnv + + +@pytest.mark.timeout(120) +def test_compute_ctl_api_latencies( + neon_simple_env: NeonEnv, + zenbenchmark: NeonBenchmarker, +): + """ + Test compute_ctl HTTP API performance. Do simple GET requests + to catch any pathological degradations in the HTTP server. + """ + env = neon_simple_env + + endpoint = env.endpoints.create_start("main") + client = endpoint.http_client() + + NUM_REQUESTS = 10000 + + status_response_latency_us = [] + metrics_response_latency_us = [] + + for _i in range(NUM_REQUESTS): + start_time = datetime.datetime.now() + _ = client.status() + status_response_latency_us.append((datetime.datetime.now() - start_time).microseconds) + + start_time = datetime.datetime.now() + _ = client.metrics_json() + metrics_response_latency_us.append((datetime.datetime.now() - start_time).microseconds) + + status_response_latency_us = sorted(status_response_latency_us) + metrics_response_latency_us = sorted(metrics_response_latency_us) + + zenbenchmark.record( + "status_response_latency_p50_us", + status_response_latency_us[len(status_response_latency_us) // 2], + "microseconds", + MetricReport.LOWER_IS_BETTER, + ) + zenbenchmark.record( + "metrics_response_latency_p50_us", + metrics_response_latency_us[len(metrics_response_latency_us) // 2], + "microseconds", + MetricReport.LOWER_IS_BETTER, + ) + zenbenchmark.record( + "status_response_latency_p99_us", + status_response_latency_us[len(status_response_latency_us) * 99 // 100], + "microseconds", + MetricReport.LOWER_IS_BETTER, + ) + zenbenchmark.record( + "metrics_response_latency_p99_us", + metrics_response_latency_us[len(metrics_response_latency_us) * 99 // 100], + "microseconds", + MetricReport.LOWER_IS_BETTER, + )