feat(compute_ctl): Add a basic HTTP API benchmark (#11123)

## Problem

We just had a regression reported at
https://neondb.slack.com/archives/C08EXUJF554/p1741102467515599, which
clearly came with one of the releases. It's not a huge problem yet, but
it's annoying that we cannot quickly attribute it to a specific commit.

## Summary of changes

Add a very simple `compute_ctl` HTTP API benchmark that does 10k
requests to `/status` and `metrics.json` and reports p50 and p99.

---------

Co-authored-by: Peter Bendel <peterbendel@neon.tech>
This commit is contained in:
Alexey Kondratov
2025-03-07 13:35:42 +01:00
committed by GitHub
parent cea67fc062
commit f5aa8c3eac
2 changed files with 76 additions and 0 deletions

View File

@@ -53,6 +53,18 @@ class EndpointHttpClient(requests.Session):
res.raise_for_status() res.raise_for_status()
return res.text return res.text
# Current compute status.
def status(self):
res = self.get(f"http://localhost:{self.external_port}/status")
res.raise_for_status()
return res.json()
# Compute startup-related metrics.
def metrics_json(self):
res = self.get(f"http://localhost:{self.external_port}/metrics.json")
res.raise_for_status()
return res.json()
def configure_failpoints(self, *args: tuple[str, str]) -> None: def configure_failpoints(self, *args: tuple[str, str]) -> None:
body: list[dict[str, str]] = [] body: list[dict[str, str]] = []

View File

@@ -0,0 +1,64 @@
from __future__ import annotations
import datetime
import pytest
from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker
from fixtures.neon_fixtures import NeonEnv
@pytest.mark.timeout(120)
def test_compute_ctl_api_latencies(
neon_simple_env: NeonEnv,
zenbenchmark: NeonBenchmarker,
):
"""
Test compute_ctl HTTP API performance. Do simple GET requests
to catch any pathological degradations in the HTTP server.
"""
env = neon_simple_env
endpoint = env.endpoints.create_start("main")
client = endpoint.http_client()
NUM_REQUESTS = 10000
status_response_latency_us = []
metrics_response_latency_us = []
for _i in range(NUM_REQUESTS):
start_time = datetime.datetime.now()
_ = client.status()
status_response_latency_us.append((datetime.datetime.now() - start_time).microseconds)
start_time = datetime.datetime.now()
_ = client.metrics_json()
metrics_response_latency_us.append((datetime.datetime.now() - start_time).microseconds)
status_response_latency_us = sorted(status_response_latency_us)
metrics_response_latency_us = sorted(metrics_response_latency_us)
zenbenchmark.record(
"status_response_latency_p50_us",
status_response_latency_us[len(status_response_latency_us) // 2],
"microseconds",
MetricReport.LOWER_IS_BETTER,
)
zenbenchmark.record(
"metrics_response_latency_p50_us",
metrics_response_latency_us[len(metrics_response_latency_us) // 2],
"microseconds",
MetricReport.LOWER_IS_BETTER,
)
zenbenchmark.record(
"status_response_latency_p99_us",
status_response_latency_us[len(status_response_latency_us) * 99 // 100],
"microseconds",
MetricReport.LOWER_IS_BETTER,
)
zenbenchmark.record(
"metrics_response_latency_p99_us",
metrics_response_latency_us[len(metrics_response_latency_us) * 99 // 100],
"microseconds",
MetricReport.LOWER_IS_BETTER,
)