mirror of
https://github.com/neondatabase/neon.git
synced 2025-12-22 21:59:59 +00:00
Currently, the exporter exposes the same LFC metrics that are exposed by the "autoscaling" sql_exporter in the docker image. With this, we can remove the dedicated sql_exporter instance. (Actually doing the removal is left as a TODO until this is rolled out to production and we have changed autoscaling-agent to fetch the metrics from this new endpoint.) The exporter runs as a Postgres background worker process. This is extracted from the Rust communicator rewrite project, which will use the same worker process for much more, to handle the communications with the pageservers. For now, though, it merely handles the metrics requests. In the future, we will add more metrics, and perhaps even APIs to control the running Postgres instance. The exporter listens on a Unix Domain socket within the Postgres data directory. A Unix Domain socket is a bit unconventional, but it has some advantages: - Permissions are taken care of. Only processes that can access the data directory, and therefore already have full access to the running Postgres instance, can connect to it. - No need to allocate and manage a new port number for the listener It has some downsides too: it's not immediately accessible from the outside world, and the functions to work with Unix Domain sockets are more low-level than TCP sockets (see the symlink hack in `postgres_metrics_client.rs`, for example). To expose the metrics from the local Unix Domain Socket to the autoscaling agent, introduce a new '/autoscaling_metrics' endpoint in the compute_ctl's HTTP server. Currently it merely forwards the request to the Postgres instance, but we could add rate limiting and access control there in the future. --------- Co-authored-by: Conrad Ludgate <conrad@neon.tech>
55 lines
1.9 KiB
Python
55 lines
1.9 KiB
Python
from __future__ import annotations
|
|
|
|
import os
|
|
from typing import TYPE_CHECKING
|
|
|
|
import pytest
|
|
import requests
|
|
import requests_unixsocket # type: ignore [import-untyped]
|
|
from fixtures.metrics import parse_metrics
|
|
|
|
if TYPE_CHECKING:
|
|
from fixtures.neon_fixtures import NeonEnv
|
|
|
|
NEON_COMMUNICATOR_SOCKET_NAME = "neon-communicator.socket"
|
|
|
|
|
|
def test_communicator_metrics(neon_simple_env: NeonEnv):
|
|
"""
|
|
Test the communicator's built-in HTTP prometheus exporter
|
|
"""
|
|
env = neon_simple_env
|
|
|
|
endpoint = env.endpoints.create("main")
|
|
endpoint.start()
|
|
|
|
# Change current directory to the data directory, so that we can use
|
|
# a short relative path to refer to the socket. (There's a 100 char
|
|
# limitation on the path.)
|
|
os.chdir(str(endpoint.pgdata_dir))
|
|
session = requests_unixsocket.Session()
|
|
r = session.get(f"http+unix://{NEON_COMMUNICATOR_SOCKET_NAME}/metrics")
|
|
assert r.status_code == 200, f"got response {r.status_code}: {r.text}"
|
|
|
|
# quick test that the endpoint returned something expected. (We don't validate
|
|
# that the metrics returned are sensible.)
|
|
m = parse_metrics(r.text)
|
|
m.query_one("lfc_hits")
|
|
m.query_one("lfc_misses")
|
|
|
|
# Test panic handling. The /debug/panic endpoint raises a Rust panic. It's
|
|
# expected to unwind and drop the HTTP connection without response, but not
|
|
# kill the process or the server.
|
|
with pytest.raises(
|
|
requests.ConnectionError, match="Remote end closed connection without response"
|
|
):
|
|
r = session.get(f"http+unix://{NEON_COMMUNICATOR_SOCKET_NAME}/debug/panic")
|
|
assert r.status_code == 500
|
|
|
|
# Test that subsequent requests after the panic still work.
|
|
r = session.get(f"http+unix://{NEON_COMMUNICATOR_SOCKET_NAME}/metrics")
|
|
assert r.status_code == 200, f"got response {r.status_code}: {r.text}"
|
|
m = parse_metrics(r.text)
|
|
m.query_one("lfc_hits")
|
|
m.query_one("lfc_misses")
|