Proxy metrics (#3290)

Implement proxy metrics collection.
Only collect metric for outbound traffic.

Add proxy CLI parameters:
- metric-collection-endpoint
- metric-collection-interval.

Add test_proxy_metric_collection test.

Move shared consumption metrics code to libs/consumption_metrics.
Refactor the code.
This commit is contained in:
Anastasia Lubennikova
2023-01-16 17:17:28 +02:00
committed by GitHub
parent 5c6a7a17cb
commit 2cbe84b78f
13 changed files with 586 additions and 214 deletions

View File

@@ -22,6 +22,7 @@ from itertools import chain, product
from pathlib import Path
from types import TracebackType
from typing import Any, Dict, Iterator, List, Optional, Tuple, Type, Union, cast
from urllib.parse import urlparse
import asyncpg
import backoff # type: ignore
@@ -2323,6 +2324,8 @@ class NeonProxy(PgProtocol):
http_port: int,
mgmt_port: int,
auth_backend: NeonProxy.AuthBackend,
metric_collection_endpoint: Optional[str] = None,
metric_collection_interval: Optional[str] = None,
):
host = "127.0.0.1"
super().__init__(dsn=auth_backend.default_conn_url, host=host, port=proxy_port)
@@ -2333,6 +2336,8 @@ class NeonProxy(PgProtocol):
self.proxy_port = proxy_port
self.mgmt_port = mgmt_port
self.auth_backend = auth_backend
self.metric_collection_endpoint = metric_collection_endpoint
self.metric_collection_interval = metric_collection_interval
self._popen: Optional[subprocess.Popen[bytes]] = None
def start(self) -> NeonProxy:
@@ -2344,6 +2349,16 @@ class NeonProxy(PgProtocol):
*["--mgmt", f"{self.host}:{self.mgmt_port}"],
*self.auth_backend.extra_args(),
]
if (
self.metric_collection_endpoint is not None
and self.metric_collection_interval is not None
):
args += [
*["--metric-collection-endpoint", self.metric_collection_endpoint],
*["--metric-collection-interval", self.metric_collection_interval],
]
self._popen = subprocess.Popen(args)
self._wait_until_ready()
return self
@@ -2357,6 +2372,25 @@ class NeonProxy(PgProtocol):
request_result.raise_for_status()
return request_result.text
@staticmethod
def get_session_id(uri_prefix, uri_line):
assert uri_prefix in uri_line
url_parts = urlparse(uri_line)
psql_session_id = url_parts.path[1:]
assert psql_session_id.isalnum(), "session_id should only contain alphanumeric chars"
return psql_session_id
@staticmethod
async def find_auth_link(link_auth_uri, proc):
for _ in range(100):
line = (await proc.stderr.readline()).decode("utf-8").strip()
log.info(f"psql line: {line}")
if link_auth_uri in line:
log.info(f"SUCCESS, found auth url: {line}")
return line
def __enter__(self) -> NeonProxy:
return self
@@ -2371,6 +2405,46 @@ class NeonProxy(PgProtocol):
# it's a child process. This is mostly to clean up in between different tests.
self._popen.kill()
@staticmethod
async def activate_link_auth(
local_vanilla_pg, proxy_with_metric_collector, psql_session_id, create_user=True
):
pg_user = "proxy"
if create_user:
log.info("creating a new user for link auth test")
local_vanilla_pg.start()
local_vanilla_pg.safe_psql(f"create user {pg_user} with login superuser")
db_info = json.dumps(
{
"session_id": psql_session_id,
"result": {
"Success": {
"host": local_vanilla_pg.default_options["host"],
"port": local_vanilla_pg.default_options["port"],
"dbname": local_vanilla_pg.default_options["dbname"],
"user": pg_user,
"aux": {
"project_id": "test_project_id",
"endpoint_id": "test_endpoint_id",
"branch_id": "test_branch_id",
},
}
},
}
)
log.info("sending session activation message")
psql = await PSQL(
host=proxy_with_metric_collector.host,
port=proxy_with_metric_collector.mgmt_port,
).run(db_info)
assert psql.stdout is not None
out = (await psql.stdout.read()).decode("utf-8").strip()
assert out == "ok"
@pytest.fixture(scope="function")
def link_proxy(port_distributor: PortDistributor, neon_binpath: Path) -> Iterator[NeonProxy]:

View File

@@ -1,12 +1,22 @@
#
# Test for collecting metrics from pageserver and proxy.
# Use mock HTTP server to receive metrics and verify that they look sane.
#
import time
from pathlib import Path
from typing import Iterator
import pytest
from fixtures.log_helper import log
from fixtures.metrics import parse_metrics
from fixtures.neon_fixtures import (
PSQL,
NeonEnvBuilder,
NeonProxy,
PortDistributor,
RemoteStorageKind,
VanillaPostgres,
wait_for_last_flush_lsn,
)
from fixtures.types import TenantId, TimelineId
@@ -22,6 +32,10 @@ def httpserver_listen_address(port_distributor: PortDistributor):
return ("localhost", port)
# ==============================================================================
# Storage metrics tests
# ==============================================================================
initial_tenant = TenantId.generate()
remote_uploaded = 0
checks = {
@@ -161,3 +175,102 @@ def test_metric_collection(
assert len(metric_kinds_checked) == len(
checks
), f"Expected to receive and check all kind of metrics, but {expected_checks - metric_kinds_checked} got uncovered"
# ==============================================================================
# Proxy metrics tests
# ==============================================================================
def proxy_metrics_handler(request: Request) -> Response:
if request.json is None:
return Response(status=400)
events = request.json["events"]
log.info("received events:")
log.info(events)
# perform basic sanity checks
for event in events:
assert event["metric"] == "proxy_io_bytes_per_client"
assert event["endpoint_id"] == "test_endpoint_id"
assert event["value"] >= 0
assert event["stop_time"] >= event["start_time"]
return Response(status=200)
@pytest.fixture(scope="session")
def proxy_with_metric_collector(
port_distributor: PortDistributor, neon_binpath: Path, httpserver_listen_address
) -> Iterator[NeonProxy]:
"""Neon proxy that routes through link auth and has metric collection enabled."""
http_port = port_distributor.get_port()
proxy_port = port_distributor.get_port()
mgmt_port = port_distributor.get_port()
(host, port) = httpserver_listen_address
metric_collection_endpoint = f"http://{host}:{port}/billing/api/v1/usage_events"
metric_collection_interval = "5s"
with NeonProxy(
neon_binpath=neon_binpath,
proxy_port=proxy_port,
http_port=http_port,
mgmt_port=mgmt_port,
metric_collection_endpoint=metric_collection_endpoint,
metric_collection_interval=metric_collection_interval,
auth_backend=NeonProxy.Link(),
) as proxy:
proxy.start()
yield proxy
@pytest.mark.asyncio
async def test_proxy_metric_collection(
httpserver: HTTPServer,
httpserver_listen_address,
proxy_with_metric_collector: NeonProxy,
vanilla_pg: VanillaPostgres,
):
# mock http server that returns OK for the metrics
httpserver.expect_request("/billing/api/v1/usage_events", method="POST").respond_with_handler(
proxy_metrics_handler
)
# do something to generate load to generate metrics
# sleep for 5 seconds to give metric collector time to collect metrics
psql = await PSQL(
host=proxy_with_metric_collector.host, port=proxy_with_metric_collector.proxy_port
).run(
"create table tbl as select * from generate_series(0,1000); select pg_sleep(5); select 42"
)
base_uri = proxy_with_metric_collector.link_auth_uri
link = await NeonProxy.find_auth_link(base_uri, psql)
psql_session_id = NeonProxy.get_session_id(base_uri, link)
await NeonProxy.activate_link_auth(vanilla_pg, proxy_with_metric_collector, psql_session_id)
assert psql.stdout is not None
out = (await psql.stdout.read()).decode("utf-8").strip()
assert out == "42"
# do something to generate load to generate metrics
# sleep for 5 seconds to give metric collector time to collect metrics
psql = await PSQL(
host=proxy_with_metric_collector.host, port=proxy_with_metric_collector.proxy_port
).run("insert into tbl select * from generate_series(0,1000); select pg_sleep(5); select 42")
link = await NeonProxy.find_auth_link(base_uri, psql)
psql_session_id = NeonProxy.get_session_id(base_uri, link)
await NeonProxy.activate_link_auth(
vanilla_pg, proxy_with_metric_collector, psql_session_id, create_user=False
)
assert psql.stdout is not None
out = (await psql.stdout.read()).decode("utf-8").strip()
assert out == "42"
httpserver.check()

View File

@@ -1,9 +1,5 @@
import json
from urllib.parse import urlparse
import psycopg2
import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import PSQL, NeonProxy, VanillaPostgres
@@ -30,62 +26,14 @@ def test_password_hack(static_proxy: NeonProxy):
@pytest.mark.asyncio
async def test_psql_session_id(vanilla_pg: VanillaPostgres, link_proxy: NeonProxy):
def get_session_id(uri_prefix, uri_line):
assert uri_prefix in uri_line
url_parts = urlparse(uri_line)
psql_session_id = url_parts.path[1:]
assert psql_session_id.isalnum(), "session_id should only contain alphanumeric chars"
return psql_session_id
async def find_auth_link(link_auth_uri, proc):
for _ in range(100):
line = (await proc.stderr.readline()).decode("utf-8").strip()
log.info(f"psql line: {line}")
if link_auth_uri in line:
log.info(f"SUCCESS, found auth url: {line}")
return line
async def activate_link_auth(local_vanilla_pg, link_proxy, psql_session_id):
pg_user = "proxy"
log.info("creating a new user for link auth test")
local_vanilla_pg.start()
local_vanilla_pg.safe_psql(f"create user {pg_user} with login superuser")
db_info = json.dumps(
{
"session_id": psql_session_id,
"result": {
"Success": {
"host": local_vanilla_pg.default_options["host"],
"port": local_vanilla_pg.default_options["port"],
"dbname": local_vanilla_pg.default_options["dbname"],
"user": pg_user,
"aux": {
"project_id": "project",
"endpoint_id": "endpoint",
"branch_id": "branch",
},
}
},
}
)
log.info("sending session activation message")
psql = await PSQL(host=link_proxy.host, port=link_proxy.mgmt_port).run(db_info)
assert psql.stdout is not None
out = (await psql.stdout.read()).decode("utf-8").strip()
assert out == "ok"
psql = await PSQL(host=link_proxy.host, port=link_proxy.proxy_port).run("select 42")
base_uri = link_proxy.link_auth_uri
link = await find_auth_link(base_uri, psql)
link = await NeonProxy.find_auth_link(base_uri, psql)
psql_session_id = get_session_id(base_uri, link)
await activate_link_auth(vanilla_pg, link_proxy, psql_session_id)
psql_session_id = NeonProxy.get_session_id(base_uri, link)
await NeonProxy.activate_link_auth(vanilla_pg, link_proxy, psql_session_id)
assert psql.stdout is not None
out = (await psql.stdout.read()).decode("utf-8").strip()