mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-06 04:52:55 +00:00
pgaudit can spam logs due to all the monitoring that we do. Logs from these connections are not necessary for HIPPA compliance, so we can stop logging from those connections. Part-of: https://github.com/neondatabase/cloud/issues/29574 Signed-off-by: Tristan Partin <tristan@neon.tech>
573 lines
18 KiB
Python
573 lines
18 KiB
Python
from __future__ import annotations
|
|
|
|
import enum
|
|
import os
|
|
import shutil
|
|
from enum import StrEnum
|
|
from logging import debug
|
|
from pathlib import Path
|
|
from typing import TYPE_CHECKING, cast
|
|
|
|
# Docs are available at https://jsonnet.org/ref/bindings.html#python_api
|
|
import _jsonnet
|
|
import pytest
|
|
import requests
|
|
import yaml
|
|
from fixtures.log_helper import log
|
|
from fixtures.metrics import parse_metrics
|
|
from fixtures.paths import BASE_DIR, COMPUTE_CONFIG_DIR
|
|
from fixtures.utils import wait_until
|
|
|
|
if TYPE_CHECKING:
|
|
from collections.abc import Callable
|
|
from types import TracebackType
|
|
from typing import Self, TypedDict
|
|
|
|
from fixtures.endpoint.http import EndpointHttpClient
|
|
from fixtures.neon_fixtures import NeonEnv
|
|
from fixtures.pg_version import PgVersion
|
|
from fixtures.port_distributor import PortDistributor
|
|
from prometheus_client.samples import Sample
|
|
|
|
class Metric(TypedDict):
|
|
metric_name: str
|
|
type: str
|
|
help: str
|
|
key_labels: list[str] | None
|
|
values: list[str] | None
|
|
query: str | None
|
|
query_ref: str | None
|
|
|
|
class Collector(TypedDict):
|
|
collector_name: str
|
|
metrics: list[Metric]
|
|
queries: list[Query] | None
|
|
|
|
class Query(TypedDict):
|
|
query_name: str
|
|
query: str
|
|
|
|
|
|
JSONNET_IMPORT_CACHE: dict[str, bytes] = {}
|
|
JSONNET_PATH: list[Path] = [BASE_DIR / "compute" / "jsonnet", COMPUTE_CONFIG_DIR]
|
|
|
|
|
|
def __import_callback(dir: str, rel: str) -> tuple[str, bytes]:
|
|
"""
|
|
dir: The directory of the Jsonnet file which tried to import a file
|
|
rel: The actual import path from Jsonnet
|
|
"""
|
|
if not rel:
|
|
raise RuntimeError("Empty filename")
|
|
|
|
full_path: str | None = None
|
|
if os.path.isabs(rel):
|
|
full_path = rel
|
|
else:
|
|
for p in (dir, *JSONNET_PATH):
|
|
assert isinstance(p, str | Path), "for mypy"
|
|
full_path = os.path.join(p, rel)
|
|
|
|
assert isinstance(full_path, str), "for mypy"
|
|
if not os.path.exists(full_path):
|
|
full_path = None
|
|
continue
|
|
|
|
break
|
|
|
|
if not full_path:
|
|
raise RuntimeError(f"Could not resolve import ({rel}) in {dir}")
|
|
|
|
if os.path.isdir(full_path):
|
|
raise RuntimeError(f"Attempted to import directory: {full_path}")
|
|
|
|
if full_path not in JSONNET_IMPORT_CACHE:
|
|
with open(full_path, encoding="utf-8") as f:
|
|
JSONNET_IMPORT_CACHE[full_path] = f.read().encode()
|
|
|
|
return full_path, JSONNET_IMPORT_CACHE[full_path]
|
|
|
|
|
|
def jsonnet_evaluate_file(
|
|
jsonnet_file: str | Path,
|
|
ext_vars: str | dict[str, str] | None = None,
|
|
tla_vars: str | dict[str, str] | None = None,
|
|
) -> str:
|
|
return cast(
|
|
"str",
|
|
_jsonnet.evaluate_file(
|
|
str(jsonnet_file),
|
|
ext_vars=ext_vars,
|
|
tla_vars=tla_vars,
|
|
import_callback=__import_callback,
|
|
),
|
|
)
|
|
|
|
|
|
def evaluate_collector(jsonnet_file: Path, pg_version: PgVersion) -> str:
|
|
return jsonnet_evaluate_file(jsonnet_file, ext_vars={"pg_version": str(pg_version)})
|
|
|
|
|
|
def evaluate_config(
|
|
jsonnet_file: Path, collector_name: str, collector_file: str | Path, connstr: str
|
|
) -> str:
|
|
return jsonnet_evaluate_file(
|
|
jsonnet_file,
|
|
tla_vars={
|
|
"collector_name": collector_name,
|
|
"collector_file": str(collector_file),
|
|
"connection_string": connstr,
|
|
},
|
|
)
|
|
|
|
|
|
@enum.unique
|
|
class SqlExporterProcess(StrEnum):
|
|
COMPUTE = "compute"
|
|
AUTOSCALING = "autoscaling"
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"collector_name",
|
|
["neon_collector", "neon_collector_autoscaling"],
|
|
ids=[SqlExporterProcess.COMPUTE, SqlExporterProcess.AUTOSCALING],
|
|
)
|
|
def test_sql_exporter_metrics_smoke(
|
|
pg_version: PgVersion,
|
|
neon_simple_env: NeonEnv,
|
|
compute_config_dir: Path,
|
|
collector_name: str,
|
|
):
|
|
"""
|
|
This is a smoke test to ensure the metrics SQL queries for sql_exporter
|
|
work without errors.
|
|
"""
|
|
env = neon_simple_env
|
|
|
|
endpoint = env.endpoints.create("main")
|
|
endpoint.respec(skip_pg_catalog_updates=False)
|
|
endpoint.start()
|
|
|
|
# Extract all the SQL queries from the sql_exporter config files, and run
|
|
# them.
|
|
collector = cast(
|
|
"Collector",
|
|
yaml.safe_load(
|
|
jsonnet_evaluate_file(
|
|
str(compute_config_dir / f"{collector_name}.jsonnet"),
|
|
ext_vars={"pg_version": pg_version},
|
|
)
|
|
),
|
|
)
|
|
|
|
for metric in collector["metrics"]:
|
|
query = metric.get("query")
|
|
if query is not None:
|
|
log.info("Checking query for metric %s in %s", metric["metric_name"], collector_name)
|
|
endpoint.safe_psql(query)
|
|
|
|
queries = collector.get("queries")
|
|
if queries is not None:
|
|
# This variable is named q because mypy is too silly to understand it is
|
|
# different from the query above.
|
|
#
|
|
# query: Optional[str]
|
|
# q: Metric
|
|
for q in queries:
|
|
log.info("Checking query %s in %s", q["query_name"], collector_name)
|
|
endpoint.safe_psql(q["query"])
|
|
|
|
|
|
class SqlExporterRunner:
|
|
def __init__(self, test_output_dir: Path, sql_exporter_port: int) -> None:
|
|
self._log_file_name = test_output_dir / "sql_exporter.stderr"
|
|
self._sql_exporter_port = sql_exporter_port
|
|
|
|
log.info(f"Starting sql_exporter at http://localhost:{self._sql_exporter_port}")
|
|
|
|
def start(self) -> None:
|
|
raise NotImplementedError()
|
|
|
|
def stop(self) -> None:
|
|
raise NotImplementedError()
|
|
|
|
def __enter__(self) -> Self:
|
|
self.start()
|
|
|
|
return self
|
|
|
|
def __exit__(
|
|
self,
|
|
exc_type: type[BaseException] | None,
|
|
exc: BaseException | None,
|
|
tb: TracebackType | None,
|
|
):
|
|
self.stop()
|
|
|
|
|
|
SQL_EXPORTER = shutil.which("sql_exporter")
|
|
|
|
if SQL_EXPORTER is None:
|
|
from testcontainers.core.container import DockerContainer
|
|
from testcontainers.core.waiting_utils import wait_for_logs
|
|
from typing_extensions import override
|
|
|
|
class SqlExporterContainer(DockerContainer): # type: ignore
|
|
def __init__(
|
|
self, logs_dir: Path, config_file: Path, collector_file: Path, port: int
|
|
) -> None:
|
|
# NOTE: Keep the version the same as in
|
|
# compute/compute-node.Dockerfile and build-tools.Dockerfile.
|
|
#
|
|
# The "host" network mode allows sql_exporter to talk to the
|
|
# endpoint which is running on the host.
|
|
super().__init__("docker.io/burningalchemist/sql_exporter:0.17.3", network_mode="host")
|
|
|
|
self.__logs_dir = logs_dir
|
|
self.__port = port
|
|
|
|
config_file_name = config_file.name
|
|
collector_file_name = collector_file.name
|
|
|
|
self.with_command(f"-config.file=/etc/{config_file_name} -web.listen-address=:{port}")
|
|
|
|
container_config_file = f"/etc/{config_file_name}"
|
|
container_collector_file = f"/etc/{collector_file_name}"
|
|
log.info(
|
|
"Mapping %s to %s in sql_exporter container", config_file, container_config_file
|
|
)
|
|
log.info(
|
|
"Mapping %s to %s in sql_exporter container",
|
|
collector_file,
|
|
container_collector_file,
|
|
)
|
|
|
|
# NOTE: z allows Podman to work with SELinux. Please don't change it.
|
|
# Ideally this would be a ro (read-only) mount, but I couldn't seem to
|
|
# get it to work.
|
|
self.with_volume_mapping(str(config_file), container_config_file, "z")
|
|
self.with_volume_mapping(str(collector_file), container_collector_file, "z")
|
|
|
|
def start(self) -> Self:
|
|
super().start()
|
|
|
|
log.info("Waiting for sql_exporter to be ready")
|
|
wait_for_logs(
|
|
self,
|
|
rf'msg="Listening on" address=\[::\]:{self.__port}',
|
|
timeout=5,
|
|
)
|
|
|
|
return self
|
|
|
|
class SqlExporterContainerRunner(SqlExporterRunner):
|
|
def __init__(
|
|
self,
|
|
test_output_dir: Path,
|
|
config_file: Path,
|
|
collector_file: Path,
|
|
sql_exporter_port: int,
|
|
) -> None:
|
|
super().__init__(test_output_dir, sql_exporter_port)
|
|
|
|
self.__container = SqlExporterContainer(
|
|
test_output_dir, config_file, collector_file, sql_exporter_port
|
|
)
|
|
|
|
@override
|
|
def start(self) -> None:
|
|
self.__container.start()
|
|
|
|
@override
|
|
def stop(self) -> None:
|
|
try:
|
|
# sql_exporter doesn't print anything to stdout
|
|
with open(self._log_file_name, "w", encoding="utf-8") as f:
|
|
f.write(self.__container.get_logs()[1].decode())
|
|
except Exception:
|
|
log.exception("Failed to write sql_exporter logs")
|
|
|
|
# Stop the container *after* getting the logs
|
|
self.__container.stop()
|
|
|
|
else:
|
|
import subprocess
|
|
import time
|
|
from signal import Signals
|
|
|
|
from typing_extensions import override
|
|
|
|
if TYPE_CHECKING:
|
|
from collections.abc import Mapping
|
|
|
|
class SqlExporterNativeRunner(SqlExporterRunner):
|
|
def __init__(
|
|
self,
|
|
test_output_dir: Path,
|
|
config_file: Path,
|
|
collector_file: Path,
|
|
sql_exporter_port: int,
|
|
) -> None:
|
|
super().__init__(test_output_dir, sql_exporter_port)
|
|
|
|
self.__config_file = config_file
|
|
self.__collector_file = collector_file
|
|
self.__proc: subprocess.Popen[str]
|
|
|
|
@override
|
|
def start(self) -> None:
|
|
assert SQL_EXPORTER is not None
|
|
|
|
log_file = open(self._log_file_name, "w", encoding="utf-8")
|
|
self.__proc = subprocess.Popen(
|
|
[
|
|
os.path.realpath(SQL_EXPORTER),
|
|
f"-config.file={self.__config_file}",
|
|
f"-web.listen-address=:{self._sql_exporter_port}",
|
|
],
|
|
# If PGSERVICEFILE is set, sql_exporter won't launch.
|
|
env=cast("Mapping[str, str]", {}),
|
|
stderr=log_file,
|
|
bufsize=0,
|
|
text=True,
|
|
)
|
|
|
|
log.info("Waiting for sql_exporter to be ready")
|
|
|
|
with open(self._log_file_name, encoding="utf-8") as f:
|
|
started = time.time()
|
|
while True:
|
|
if time.time() - started > 5:
|
|
self.__proc.kill()
|
|
raise RuntimeError("sql_exporter did not start up properly")
|
|
|
|
line = f.readline()
|
|
if not line:
|
|
time.sleep(0.5)
|
|
continue
|
|
|
|
if f'msg="Listening on" address=[::]:{self._sql_exporter_port}' in line:
|
|
break
|
|
|
|
@override
|
|
def stop(self) -> None:
|
|
self.__proc.send_signal(Signals.SIGINT)
|
|
self.__proc.wait()
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"exporter",
|
|
[SqlExporterProcess.COMPUTE, SqlExporterProcess.AUTOSCALING],
|
|
)
|
|
def test_sql_exporter_metrics_e2e(
|
|
pg_version: PgVersion,
|
|
neon_simple_env: NeonEnv,
|
|
test_output_dir: Path,
|
|
compute_config_dir: Path,
|
|
exporter: SqlExporterProcess,
|
|
port_distributor: PortDistributor,
|
|
):
|
|
"""
|
|
This is a full E2E test of the sql_exporter setup to make sure it works
|
|
without error.
|
|
|
|
If you use Podman instead of Docker, you may run into issues. If you run
|
|
rootful Podman, you may need to add a ~/.testcontainers.properties file
|
|
with the following content:
|
|
|
|
ryuk.container.privileged=true
|
|
|
|
If you are not running rootful Podman, set the following environment
|
|
variable:
|
|
|
|
TESTCONTAINERS_RYUK_DISABLED=true
|
|
|
|
Note that you will need the Podman socket to be running. On a systemd-based
|
|
system, that command will look something like:
|
|
|
|
# Use `enable --now` to start the socket on login and immediately.
|
|
systemctl --user start podman.socket
|
|
|
|
Whether you use the user service manager or the system service manager is
|
|
up to you, but may have implications on the above ryuk related steps. Note
|
|
that you may also need the docker(1) Podman frontend. I am unsure if the
|
|
docker Python package supports Podman natively.
|
|
"""
|
|
env = neon_simple_env
|
|
|
|
endpoint = env.endpoints.create("main")
|
|
endpoint.respec(skip_pg_catalog_updates=False)
|
|
endpoint.start()
|
|
|
|
if exporter == SqlExporterProcess.COMPUTE:
|
|
stem_suffix = ""
|
|
elif exporter == SqlExporterProcess.AUTOSCALING:
|
|
stem_suffix = "_autoscaling"
|
|
|
|
# Write the collector file
|
|
collector_file = test_output_dir / f"neon_collector{stem_suffix}.yml"
|
|
with open(collector_file, "w", encoding="utf-8") as o:
|
|
collector = evaluate_collector(
|
|
compute_config_dir / f"neon_collector{stem_suffix}.jsonnet", pg_version
|
|
)
|
|
o.write(collector)
|
|
|
|
conn_options = endpoint.conn_options()
|
|
pg_host = conn_options["host"]
|
|
pg_port = conn_options["port"]
|
|
pg_user = conn_options["user"]
|
|
pg_dbname = conn_options["dbname"]
|
|
pg_application_name = f"sql_exporter{stem_suffix}"
|
|
connstr = f"postgresql://{pg_user}@{pg_host}:{pg_port}/{pg_dbname}?sslmode=disable&application_name={pg_application_name}&pgaudit.log=none"
|
|
|
|
def escape_go_filepath_match_characters(s: str) -> str:
|
|
"""
|
|
Unfortunately sql_exporter doesn't use plain file paths, so we need to
|
|
escape special characters. pytest encodes the parameters of a test using
|
|
[ and ], so we need to escape them with backslashes.
|
|
See https://pkg.go.dev/path/filepath#Match.
|
|
"""
|
|
return s.replace("[", r"\[").replace("]", r"\]")
|
|
|
|
# Write the config file
|
|
config_file = test_output_dir / f"sql_exporter{stem_suffix}.yml"
|
|
with open(config_file, "w", encoding="utf-8") as o:
|
|
config = evaluate_config(
|
|
compute_config_dir / "sql_exporter.jsonnet",
|
|
collector_name=collector_file.stem,
|
|
collector_file=escape_go_filepath_match_characters(str(collector_file))
|
|
if SQL_EXPORTER
|
|
else collector_file.name,
|
|
connstr=connstr,
|
|
)
|
|
o.write(config)
|
|
|
|
sql_exporter_port = port_distributor.get_port()
|
|
with (SqlExporterNativeRunner if SQL_EXPORTER else SqlExporterContainerRunner)(
|
|
test_output_dir, config_file, collector_file, sql_exporter_port
|
|
) as _runner:
|
|
resp = requests.get(f"http://localhost:{sql_exporter_port}/metrics")
|
|
resp.raise_for_status()
|
|
|
|
|
|
def test_perf_counters(neon_simple_env: NeonEnv):
|
|
"""
|
|
Test compute metrics, exposed in the neon_backend_perf_counters and
|
|
neon_perf_counters views
|
|
"""
|
|
env = neon_simple_env
|
|
endpoint = env.endpoints.create_start("main")
|
|
|
|
conn = endpoint.connect()
|
|
cur = conn.cursor()
|
|
|
|
# We don't check that the values make sense, this is just a very
|
|
# basic check that the server doesn't crash or something like that.
|
|
#
|
|
# 1.5 is the minimum version to contain these views.
|
|
cur.execute("CREATE EXTENSION neon VERSION '1.5'")
|
|
cur.execute("set neon.monitor_query_exec_time = on")
|
|
cur.execute("SELECT * FROM neon_perf_counters")
|
|
cur.execute("SELECT * FROM neon_backend_perf_counters")
|
|
cur.execute(
|
|
"select value from neon_backend_perf_counters where metric='query_time_seconds_count' and pid=pg_backend_pid()"
|
|
)
|
|
assert cur.fetchall()[0][0] == 2
|
|
|
|
|
|
def collect_metric(
|
|
client: EndpointHttpClient,
|
|
name: str,
|
|
filter: dict[str, str],
|
|
predicate: Callable[[list[Sample]], bool],
|
|
) -> Callable[[], list[Sample]]:
|
|
"""
|
|
Call this function as the first argument to wait_until().
|
|
"""
|
|
|
|
def __collect_metric() -> list[Sample]:
|
|
resp = client.metrics()
|
|
debug("Metrics: %s", resp)
|
|
m = parse_metrics(resp)
|
|
samples = m.query_all(name, filter)
|
|
debug("Samples: %s", samples)
|
|
assert predicate(samples), "predicate failed"
|
|
return samples
|
|
|
|
return __collect_metric
|
|
|
|
|
|
def test_compute_installed_extensions_metric(neon_simple_env: NeonEnv):
|
|
"""
|
|
Test that the compute_installed_extensions properly reports accurate
|
|
results. Important to note that currently this metric is only gathered on
|
|
compute start. We install the neon extension into a database other than
|
|
postgres because compute_ctl will run `ALTER EXTENSION neon UPDATE` during
|
|
Postgres startup in the postgres database, creating a race condition.
|
|
"""
|
|
DB_NAME = "test"
|
|
|
|
env = neon_simple_env
|
|
|
|
endpoint = env.endpoints.create_start("main")
|
|
endpoint.safe_psql(f"CREATE DATABASE {DB_NAME}")
|
|
|
|
# The metric is only gathered on compute start, so restart to check that
|
|
# plpgsql is now in 3 databases, instead of its regular 2, template1 and
|
|
# postgres.
|
|
endpoint.stop()
|
|
endpoint.start()
|
|
|
|
client = endpoint.http_client()
|
|
|
|
def __has_plpgsql(samples: list[Sample]) -> bool:
|
|
"""
|
|
Check that plpgsql is installed in the template1, postgres, and test
|
|
databases
|
|
"""
|
|
return len(samples) == 1 and samples[0].value == 3
|
|
|
|
wait_until(
|
|
collect_metric(
|
|
client,
|
|
"compute_installed_extensions",
|
|
{"extension_name": "plpgsql", "version": "1.0", "owned_by_superuser": "1"},
|
|
__has_plpgsql,
|
|
),
|
|
name="compute_installed_extensions",
|
|
)
|
|
|
|
# Install the neon extension, so we can check for it on the restart.
|
|
endpoint.safe_psql("CREATE EXTENSION neon VERSION '1.0'", dbname=DB_NAME)
|
|
|
|
# The metric is only gathered on compute start, so restart to check if the
|
|
# neon extension will now be there.
|
|
endpoint.stop()
|
|
endpoint.start()
|
|
|
|
client = endpoint.http_client()
|
|
|
|
def __has_neon(samples: list[Sample]) -> bool:
|
|
return len(samples) == 1 and samples[0].value == 1
|
|
|
|
wait_until(
|
|
collect_metric(
|
|
client,
|
|
"compute_installed_extensions",
|
|
{"extension_name": "neon", "version": "1.0", "owned_by_superuser": "1"},
|
|
__has_neon,
|
|
),
|
|
name="compute_installed_extensions",
|
|
)
|
|
|
|
# Double check that we also still have plpgsql
|
|
wait_until(
|
|
collect_metric(
|
|
client,
|
|
"compute_installed_extensions",
|
|
{"extension_name": "plpgsql", "version": "1.0", "owned_by_superuser": "1"},
|
|
__has_plpgsql,
|
|
),
|
|
name="compute_installed_extensions",
|
|
)
|