Files
neon/test_runner/regress/test_compute_metrics.py
Alexander Bayandin 8d1c44039e Python 3.11 (#9515)
## Problem

On Debian 12 (Bookworm), Python 3.11 is the latest available version.

## Summary of changes
- Update Python to 3.11 in build-tools
- Fix ruff check / format
- Fix mypy
- Use `StrEnum` instead of pair `str`, `Enum`
- Update docs
2024-11-21 16:25:31 +00:00

469 lines
15 KiB
Python

from __future__ import annotations
import enum
import os
import shutil
from enum import StrEnum
from pathlib import Path
from typing import TYPE_CHECKING, cast
# Docs are available at https://jsonnet.org/ref/bindings.html#python_api
import _jsonnet
import pytest
import requests
import yaml
from fixtures.log_helper import log
from fixtures.paths import BASE_DIR, COMPUTE_CONFIG_DIR
if TYPE_CHECKING:
from types import TracebackType
from typing import TypedDict
from fixtures.neon_fixtures import NeonEnv
from fixtures.pg_version import PgVersion
from fixtures.port_distributor import PortDistributor
class Metric(TypedDict):
metric_name: str
type: str
help: str
key_labels: list[str] | None
values: list[str] | None
query: str | None
query_ref: str | None
class Collector(TypedDict):
collector_name: str
metrics: list[Metric]
queries: list[Query] | None
class Query(TypedDict):
query_name: str
query: str
JSONNET_IMPORT_CACHE: dict[str, bytes] = {}
JSONNET_PATH: list[Path] = [BASE_DIR / "compute" / "jsonnet", COMPUTE_CONFIG_DIR]
def __import_callback(dir: str, rel: str) -> tuple[str, bytes]:
"""
dir: The directory of the Jsonnet file which tried to import a file
rel: The actual import path from Jsonnet
"""
if not rel:
raise RuntimeError("Empty filename")
full_path: str | None = None
if os.path.isabs(rel):
full_path = rel
else:
for p in (dir, *JSONNET_PATH):
assert isinstance(p, str | Path), "for mypy"
full_path = os.path.join(p, rel)
assert isinstance(full_path, str), "for mypy"
if not os.path.exists(full_path):
full_path = None
continue
break
if not full_path:
raise RuntimeError(f"Could not resolve import ({rel}) in {dir}")
if os.path.isdir(full_path):
raise RuntimeError(f"Attempted to import directory: {full_path}")
if full_path not in JSONNET_IMPORT_CACHE:
with open(full_path, encoding="utf-8") as f:
JSONNET_IMPORT_CACHE[full_path] = f.read().encode()
return full_path, JSONNET_IMPORT_CACHE[full_path]
def jsonnet_evaluate_file(
jsonnet_file: str | Path,
ext_vars: str | dict[str, str] | None = None,
tla_vars: str | dict[str, str] | None = None,
) -> str:
return cast(
"str",
_jsonnet.evaluate_file(
str(jsonnet_file),
ext_vars=ext_vars,
tla_vars=tla_vars,
import_callback=__import_callback,
),
)
def evaluate_collector(jsonnet_file: Path, pg_version: PgVersion) -> str:
return jsonnet_evaluate_file(jsonnet_file, ext_vars={"pg_version": str(pg_version)})
def evaluate_config(
jsonnet_file: Path, collector_name: str, collector_file: str | Path, connstr: str
) -> str:
return jsonnet_evaluate_file(
jsonnet_file,
tla_vars={
"collector_name": collector_name,
"collector_file": str(collector_file),
"connection_string": connstr,
},
)
@enum.unique
class SqlExporterProcess(StrEnum):
COMPUTE = "compute"
AUTOSCALING = "autoscaling"
@pytest.mark.parametrize(
"collector_name",
["neon_collector", "neon_collector_autoscaling"],
ids=[SqlExporterProcess.COMPUTE, SqlExporterProcess.AUTOSCALING],
)
def test_sql_exporter_metrics_smoke(
pg_version: PgVersion,
neon_simple_env: NeonEnv,
compute_config_dir: Path,
collector_name: str,
):
"""
This is a smoke test to ensure the metrics SQL queries for sql_exporter
work without errors.
"""
env = neon_simple_env
endpoint = env.endpoints.create("main")
endpoint.respec(skip_pg_catalog_updates=False)
endpoint.start()
# Extract all the SQL queries from the sql_exporter config files, and run
# them.
collector = cast(
"Collector",
yaml.safe_load(
jsonnet_evaluate_file(
str(compute_config_dir / f"{collector_name}.jsonnet"),
ext_vars={"pg_version": pg_version},
)
),
)
for metric in collector["metrics"]:
query = metric.get("query")
if query is not None:
log.info("Checking query for metric %s in %s", metric["metric_name"], collector_name)
endpoint.safe_psql(query)
queries = collector.get("queries")
if queries is not None:
# This variable is named q because mypy is too silly to understand it is
# different from the query above.
#
# query: Optional[str]
# q: Metric
for q in queries:
log.info("Checking query %s in %s", q["query_name"], collector_name)
endpoint.safe_psql(q["query"])
class SqlExporterRunner:
def __init__(self, test_output_dir: Path, sql_exporter_port: int) -> None:
self._log_file_name = test_output_dir / "sql_exporter.stderr"
self._sql_exporter_port = sql_exporter_port
log.info(f"Starting sql_exporter at http://localhost:{self._sql_exporter_port}")
def start(self) -> None:
raise NotImplementedError()
def stop(self) -> None:
raise NotImplementedError()
def __enter__(self) -> SqlExporterRunner:
self.start()
return self
def __exit__(
self,
exc_type: type[BaseException] | None,
exc: BaseException | None,
tb: TracebackType | None,
):
self.stop()
SQL_EXPORTER = shutil.which("sql_exporter")
if SQL_EXPORTER is None:
from testcontainers.core.container import DockerContainer
from testcontainers.core.waiting_utils import wait_for_logs
from typing_extensions import override
class SqlExporterContainer(DockerContainer): # type: ignore
def __init__(
self, logs_dir: Path, config_file: Path, collector_file: Path, port: int
) -> None:
# NOTE: Keep the version the same as in
# compute/Dockerfile.compute-node and Dockerfile.build-tools.
#
# The "host" network mode allows sql_exporter to talk to the
# endpoint which is running on the host.
super().__init__("docker.io/burningalchemist/sql_exporter:0.13.1", network_mode="host")
self.__logs_dir = logs_dir
self.__port = port
config_file_name = config_file.name
collector_file_name = collector_file.name
self.with_command(f"-config.file=/etc/{config_file_name} -web.listen-address=:{port}")
container_config_file = f"/etc/{config_file_name}"
container_collector_file = f"/etc/{collector_file_name}"
log.info(
"Mapping %s to %s in sql_exporter container", config_file, container_config_file
)
log.info(
"Mapping %s to %s in sql_exporter container",
collector_file,
container_collector_file,
)
# NOTE: z allows Podman to work with SELinux. Please don't change it.
# Ideally this would be a ro (read-only) mount, but I couldn't seem to
# get it to work.
self.with_volume_mapping(str(config_file), container_config_file, "z")
self.with_volume_mapping(str(collector_file), container_collector_file, "z")
@override
def start(self) -> SqlExporterContainer:
super().start()
log.info("Waiting for sql_exporter to be ready")
wait_for_logs(
self,
rf'level=info msg="Listening on" address=\[::\]:{self.__port}',
timeout=5,
)
return self
class SqlExporterContainerRunner(SqlExporterRunner):
def __init__(
self,
test_output_dir: Path,
config_file: Path,
collector_file: Path,
sql_exporter_port: int,
) -> None:
super().__init__(test_output_dir, sql_exporter_port)
self.__container = SqlExporterContainer(
test_output_dir, config_file, collector_file, sql_exporter_port
)
@override
def start(self) -> None:
self.__container.start()
@override
def stop(self) -> None:
try:
# sql_exporter doesn't print anything to stdout
with open(self._log_file_name, "w", encoding="utf-8") as f:
f.write(self.__container.get_logs()[1].decode())
except Exception:
log.exception("Failed to write sql_exporter logs")
# Stop the container *after* getting the logs
self.__container.stop()
else:
import subprocess
import time
from signal import Signals
from typing_extensions import override
if TYPE_CHECKING:
from collections.abc import Mapping
class SqlExporterNativeRunner(SqlExporterRunner):
def __init__(
self,
test_output_dir: Path,
config_file: Path,
collector_file: Path,
sql_exporter_port: int,
) -> None:
super().__init__(test_output_dir, sql_exporter_port)
self.__config_file = config_file
self.__collector_file = collector_file
self.__proc: subprocess.Popen[str]
@override
def start(self) -> None:
assert SQL_EXPORTER is not None
log_file = open(self._log_file_name, "w", encoding="utf-8")
self.__proc = subprocess.Popen(
[
os.path.realpath(SQL_EXPORTER),
f"-config.file={self.__config_file}",
f"-web.listen-address=:{self._sql_exporter_port}",
],
# If PGSERVICEFILE is set, sql_exporter won't launch.
env=cast("Mapping[str, str]", {}),
stderr=log_file,
bufsize=0,
text=True,
)
log.info("Waiting for sql_exporter to be ready")
with open(self._log_file_name, encoding="utf-8") as f:
started = time.time()
while True:
if time.time() - started > 5:
self.__proc.kill()
raise RuntimeError("sql_exporter did not start up properly")
line = f.readline()
if not line:
time.sleep(0.5)
continue
if (
f'level=info msg="Listening on" address=[::]:{self._sql_exporter_port}'
in line
):
break
@override
def stop(self) -> None:
self.__proc.send_signal(Signals.SIGINT)
self.__proc.wait()
@pytest.mark.parametrize(
"exporter",
[SqlExporterProcess.COMPUTE, SqlExporterProcess.AUTOSCALING],
)
def test_sql_exporter_metrics_e2e(
pg_version: PgVersion,
neon_simple_env: NeonEnv,
test_output_dir: Path,
compute_config_dir: Path,
exporter: SqlExporterProcess,
port_distributor: PortDistributor,
):
"""
This is a full E2E test of the sql_exporter setup to make sure it works
without error.
If you use Podman instead of Docker, you may run into issues. If you run
rootful Podman, you may need to add a ~/.testcontainers.properties file
with the following content:
ryuk.container.privileged=true
If you are not running rootful Podman, set the following environment
variable:
TESTCONTAINERS_RYUK_DISABLED=true
Note that you will need the Podman socket to be running. On a systemd-based
system, that command will look something like:
# Use `enable --now` to start the socket on login and immediately.
systemctl --user start podman.socket
Whether you use the user service manager or the system service manager is
up to you, but may have implications on the above ryuk related steps. Note
that you may also need the docker(1) Podman frontend. I am unsure if the
docker Python package supports Podman natively.
"""
env = neon_simple_env
endpoint = env.endpoints.create("main")
endpoint.respec(skip_pg_catalog_updates=False)
endpoint.start()
if exporter == SqlExporterProcess.COMPUTE:
stem_suffix = ""
elif exporter == SqlExporterProcess.AUTOSCALING:
stem_suffix = "_autoscaling"
# Write the collector file
collector_file = test_output_dir / f"neon_collector{stem_suffix}.yml"
with open(collector_file, "w", encoding="utf-8") as o:
collector = evaluate_collector(
compute_config_dir / f"neon_collector{stem_suffix}.jsonnet", pg_version
)
o.write(collector)
conn_options = endpoint.conn_options()
pg_host = conn_options["host"]
pg_port = conn_options["port"]
pg_user = conn_options["user"]
pg_dbname = conn_options["dbname"]
pg_application_name = f"sql_exporter{stem_suffix}"
connstr = f"postgresql://{pg_user}@{pg_host}:{pg_port}/{pg_dbname}?sslmode=disable&application_name={pg_application_name}"
def escape_go_filepath_match_characters(s: str) -> str:
"""
Unfortunately sql_exporter doesn't use plain file paths, so we need to
escape special characters. pytest encodes the parameters of a test using
[ and ], so we need to escape them with backslashes.
See https://pkg.go.dev/path/filepath#Match.
"""
return s.replace("[", r"\[").replace("]", r"\]")
# Write the config file
config_file = test_output_dir / f"sql_exporter{stem_suffix}.yml"
with open(config_file, "w", encoding="utf-8") as o:
config = evaluate_config(
compute_config_dir / "sql_exporter.jsonnet",
collector_name=collector_file.stem,
collector_file=escape_go_filepath_match_characters(str(collector_file))
if SQL_EXPORTER
else collector_file.name,
connstr=connstr,
)
o.write(config)
sql_exporter_port = port_distributor.get_port()
with (SqlExporterNativeRunner if SQL_EXPORTER else SqlExporterContainerRunner)(
test_output_dir, config_file, collector_file, sql_exporter_port
) as _runner:
resp = requests.get(f"http://localhost:{sql_exporter_port}/metrics")
resp.raise_for_status()
def test_perf_counters(neon_simple_env: NeonEnv):
"""
Test compute metrics, exposed in the neon_backend_perf_counters and
neon_perf_counters views
"""
env = neon_simple_env
endpoint = env.endpoints.create_start("main")
conn = endpoint.connect()
cur = conn.cursor()
# We don't check that the values make sense, this is just a very
# basic check that the server doesn't crash or something like that.
#
# 1.5 is the minimum version to contain these views.
cur.execute("CREATE EXTENSION neon VERSION '1.5'")
cur.execute("SELECT * FROM neon_perf_counters")
cur.execute("SELECT * FROM neon_backend_perf_counters")