mirror of
https://github.com/neondatabase/neon.git
synced 2025-12-26 07:39:58 +00:00
Add more common storage metrics (#1722)
- Enabled process exporter for storage services - Changed zenith_proxy prefix to just proxy - Removed old `monitoring` directory - Removed common prefix for metrics, now our common metrics have `libmetrics_` prefix, for example `libmetrics_serve_metrics_count` - Added `test_metrics_normal_work`
This commit is contained in:
committed by
GitHub
parent
55ea3f262e
commit
134eeeb096
@@ -355,7 +355,7 @@ jobs:
|
||||
when: always
|
||||
command: |
|
||||
du -sh /tmp/test_output/*
|
||||
find /tmp/test_output -type f ! -name "pg.log" ! -name "pageserver.log" ! -name "safekeeper.log" ! -name "etcd.log" ! -name "regression.diffs" ! -name "junit.xml" ! -name "*.filediff" ! -name "*.stdout" ! -name "*.stderr" ! -name "flamegraph.svg" -delete
|
||||
find /tmp/test_output -type f ! -name "pg.log" ! -name "pageserver.log" ! -name "safekeeper.log" ! -name "etcd.log" ! -name "regression.diffs" ! -name "junit.xml" ! -name "*.filediff" ! -name "*.stdout" ! -name "*.stderr" ! -name "flamegraph.svg" ! -name "*.metrics" -delete
|
||||
du -sh /tmp/test_output/*
|
||||
- store_artifacts:
|
||||
path: /tmp/test_output
|
||||
|
||||
39
Cargo.lock
generated
39
Cargo.lock
generated
@@ -166,7 +166,7 @@ dependencies = [
|
||||
"cc",
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"miniz_oxide",
|
||||
"miniz_oxide 0.4.4",
|
||||
"object",
|
||||
"rustc-demangle",
|
||||
]
|
||||
@@ -868,6 +868,18 @@ version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "279fb028e20b3c4c320317955b77c5e0c9701f05a1d309905d6fc702cdc5053e"
|
||||
|
||||
[[package]]
|
||||
name = "flate2"
|
||||
version = "1.0.23"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b39522e96686d38f4bc984b9198e3a0613264abaebaff2c5c918bfa6b6da09af"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"crc32fast",
|
||||
"libc",
|
||||
"miniz_oxide 0.5.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fnv"
|
||||
version = "1.0.7"
|
||||
@@ -1527,6 +1539,15 @@ dependencies = [
|
||||
"autocfg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "miniz_oxide"
|
||||
version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d2b29bd4bc3f33391105ebee3589c19197c4271e3e5a9ec9bfe8127eeff8f082"
|
||||
dependencies = [
|
||||
"adler",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mio"
|
||||
version = "0.8.2"
|
||||
@@ -2088,6 +2109,20 @@ dependencies = [
|
||||
"unicode-xid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "procfs"
|
||||
version = "0.10.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "95e344cafeaeefe487300c361654bcfc85db3ac53619eeccced29f5ea18c4c70"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"byteorder",
|
||||
"flate2",
|
||||
"hex",
|
||||
"lazy_static",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "prometheus"
|
||||
version = "0.13.0"
|
||||
@@ -2097,8 +2132,10 @@ dependencies = [
|
||||
"cfg-if",
|
||||
"fnv",
|
||||
"lazy_static",
|
||||
"libc",
|
||||
"memchr",
|
||||
"parking_lot 0.11.2",
|
||||
"procfs",
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
prometheus = {version = "0.13", default_features=false} # removes protobuf dependency
|
||||
prometheus = {version = "0.13", default_features=false, features = ["process"]} # removes protobuf dependency
|
||||
libc = "0.2"
|
||||
lazy_static = "1.4"
|
||||
once_cell = "1.8.0"
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
//! Otherwise, we might not see all metrics registered via
|
||||
//! a default registry.
|
||||
use lazy_static::lazy_static;
|
||||
use once_cell::race::OnceBox;
|
||||
pub use prometheus::{exponential_buckets, linear_buckets};
|
||||
pub use prometheus::{register_gauge, Gauge};
|
||||
pub use prometheus::{register_gauge_vec, GaugeVec};
|
||||
@@ -27,48 +26,15 @@ pub fn gather() -> Vec<prometheus::proto::MetricFamily> {
|
||||
prometheus::gather()
|
||||
}
|
||||
|
||||
static COMMON_METRICS_PREFIX: OnceBox<&str> = OnceBox::new();
|
||||
|
||||
/// Sets a prefix which will be used for all common metrics, typically a service
|
||||
/// name like 'pageserver'. Should be executed exactly once in the beginning of
|
||||
/// any executable which uses common metrics.
|
||||
pub fn set_common_metrics_prefix(prefix: &'static str) {
|
||||
// Not unwrap() because metrics may be initialized after multiple threads have been started.
|
||||
COMMON_METRICS_PREFIX
|
||||
.set(prefix.into())
|
||||
.unwrap_or_else(|_| {
|
||||
eprintln!(
|
||||
"set_common_metrics_prefix() was called second time with '{}', exiting",
|
||||
prefix
|
||||
);
|
||||
std::process::exit(1);
|
||||
});
|
||||
}
|
||||
|
||||
/// Prepends a prefix to a common metric name so they are distinguished between
|
||||
/// different services, see <https://github.com/zenithdb/zenith/pull/681>
|
||||
/// A call to set_common_metrics_prefix() is necessary prior to calling this.
|
||||
pub fn new_common_metric_name(unprefixed_metric_name: &str) -> String {
|
||||
// Not unwrap() because metrics may be initialized after multiple threads have been started.
|
||||
format!(
|
||||
"{}_{}",
|
||||
COMMON_METRICS_PREFIX.get().unwrap_or_else(|| {
|
||||
eprintln!("set_common_metrics_prefix() was not called, but metrics are used, exiting");
|
||||
std::process::exit(1);
|
||||
}),
|
||||
unprefixed_metric_name
|
||||
)
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref DISK_IO_BYTES: IntGaugeVec = register_int_gauge_vec!(
|
||||
new_common_metric_name("disk_io_bytes"),
|
||||
"libmetrics_disk_io_bytes",
|
||||
"Bytes written and read from disk, grouped by the operation (read|write)",
|
||||
&["io_operation"]
|
||||
)
|
||||
.expect("Failed to register disk i/o bytes int gauge vec");
|
||||
static ref MAXRSS_KB: IntGauge = register_int_gauge!(
|
||||
new_common_metric_name("maxrss_kb"),
|
||||
"libmetrics_maxrss_kb",
|
||||
"Memory usage (Maximum Resident Set Size)"
|
||||
)
|
||||
.expect("Failed to register maxrss_kb int gauge");
|
||||
|
||||
@@ -5,7 +5,7 @@ use anyhow::anyhow;
|
||||
use hyper::header::AUTHORIZATION;
|
||||
use hyper::{header::CONTENT_TYPE, Body, Request, Response, Server};
|
||||
use lazy_static::lazy_static;
|
||||
use metrics::{new_common_metric_name, register_int_counter, Encoder, IntCounter, TextEncoder};
|
||||
use metrics::{register_int_counter, Encoder, IntCounter, TextEncoder};
|
||||
use routerify::ext::RequestExt;
|
||||
use routerify::RequestInfo;
|
||||
use routerify::{Middleware, Router, RouterBuilder, RouterService};
|
||||
@@ -18,7 +18,7 @@ use super::error::ApiError;
|
||||
|
||||
lazy_static! {
|
||||
static ref SERVE_METRICS_COUNT: IntCounter = register_int_counter!(
|
||||
new_common_metric_name("serve_metrics_count"),
|
||||
"libmetrics_serve_metrics_count",
|
||||
"Number of metric requests made"
|
||||
)
|
||||
.expect("failed to define a metric");
|
||||
|
||||
@@ -1,25 +0,0 @@
|
||||
version: "3"
|
||||
services:
|
||||
|
||||
prometheus:
|
||||
container_name: prometheus
|
||||
image: prom/prometheus:latest
|
||||
volumes:
|
||||
- ./prometheus.yaml:/etc/prometheus/prometheus.yml
|
||||
# ports:
|
||||
# - "9090:9090"
|
||||
# TODO: find a proper portable solution
|
||||
network_mode: "host"
|
||||
|
||||
grafana:
|
||||
image: grafana/grafana:latest
|
||||
volumes:
|
||||
- ./grafana.yaml:/etc/grafana/provisioning/datasources/datasources.yaml
|
||||
environment:
|
||||
- GF_AUTH_ANONYMOUS_ENABLED=true
|
||||
- GF_AUTH_ANONYMOUS_ORG_ROLE=Admin
|
||||
- GF_AUTH_DISABLE_LOGIN_FORM=true
|
||||
# ports:
|
||||
# - "3000:3000"
|
||||
# TODO: find a proper portable solution
|
||||
network_mode: "host"
|
||||
@@ -1,12 +0,0 @@
|
||||
apiVersion: 1
|
||||
|
||||
datasources:
|
||||
- name: Prometheus
|
||||
type: prometheus
|
||||
access: proxy
|
||||
orgId: 1
|
||||
url: http://localhost:9090
|
||||
basicAuth: false
|
||||
isDefault: false
|
||||
version: 1
|
||||
editable: false
|
||||
@@ -1,5 +0,0 @@
|
||||
scrape_configs:
|
||||
- job_name: 'default'
|
||||
scrape_interval: 10s
|
||||
static_configs:
|
||||
- targets: ['localhost:9898']
|
||||
@@ -38,7 +38,6 @@ fn version() -> String {
|
||||
}
|
||||
|
||||
fn main() -> anyhow::Result<()> {
|
||||
metrics::set_common_metrics_prefix("pageserver");
|
||||
let arg_matches = App::new("Zenith page server")
|
||||
.about("Materializes WAL stream to pages and serves them to the postgres")
|
||||
.version(&*version())
|
||||
|
||||
30
poetry.lock
generated
30
poetry.lock
generated
@@ -822,7 +822,7 @@ python-versions = "*"
|
||||
|
||||
[[package]]
|
||||
name = "moto"
|
||||
version = "3.1.7"
|
||||
version = "3.1.9"
|
||||
description = "A library that allows your python tests to easily mock out the boto library"
|
||||
category = "main"
|
||||
optional = false
|
||||
@@ -868,6 +868,7 @@ ds = ["sshpubkeys (>=3.1.0)"]
|
||||
dynamodb = ["docker (>=2.5.1)"]
|
||||
dynamodb2 = ["docker (>=2.5.1)"]
|
||||
dynamodbstreams = ["docker (>=2.5.1)"]
|
||||
ebs = ["sshpubkeys (>=3.1.0)"]
|
||||
ec2 = ["sshpubkeys (>=3.1.0)"]
|
||||
efs = ["sshpubkeys (>=3.1.0)"]
|
||||
glue = ["pyparsing (>=3.0.0)"]
|
||||
@@ -953,6 +954,17 @@ importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""}
|
||||
dev = ["pre-commit", "tox"]
|
||||
testing = ["pytest", "pytest-benchmark"]
|
||||
|
||||
[[package]]
|
||||
name = "prometheus-client"
|
||||
version = "0.14.1"
|
||||
description = "Python client for the Prometheus monitoring system."
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
|
||||
[package.extras]
|
||||
twisted = ["twisted"]
|
||||
|
||||
[[package]]
|
||||
name = "psycopg2-binary"
|
||||
version = "2.9.3"
|
||||
@@ -1003,7 +1015,7 @@ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
|
||||
|
||||
[[package]]
|
||||
name = "pyjwt"
|
||||
version = "2.3.0"
|
||||
version = "2.4.0"
|
||||
description = "JSON Web Token implementation in Python"
|
||||
category = "main"
|
||||
optional = false
|
||||
@@ -1375,7 +1387,7 @@ testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-
|
||||
[metadata]
|
||||
lock-version = "1.1"
|
||||
python-versions = "^3.7"
|
||||
content-hash = "dc63b6e02d0ceccdc4b5616e9362c149a27fdcc6c54fda63a3b115a5b980c42e"
|
||||
content-hash = "d2fcba2af0a32cde3a1d0c8cfdfe5fb26531599b0c8c376bf16e200a74b55553"
|
||||
|
||||
[metadata.files]
|
||||
aiopg = [
|
||||
@@ -1693,8 +1705,8 @@ mccabe = [
|
||||
{file = "mccabe-0.6.1.tar.gz", hash = "sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f"},
|
||||
]
|
||||
moto = [
|
||||
{file = "moto-3.1.7-py3-none-any.whl", hash = "sha256:4ab6fb8dd150343e115d75e3dbdb5a8f850fc7236790819d7cef438c11ee6e89"},
|
||||
{file = "moto-3.1.7.tar.gz", hash = "sha256:20607a0fd0cf6530e05ffb623ca84d3f45d50bddbcec2a33705a0cf471e71289"},
|
||||
{file = "moto-3.1.9-py3-none-any.whl", hash = "sha256:8928ec168e5fd88b1127413b2fa570a80d45f25182cdad793edd208d07825269"},
|
||||
{file = "moto-3.1.9.tar.gz", hash = "sha256:ba683e70950b6579189bc12d74c1477aa036c090c6ad8b151a22f5896c005113"},
|
||||
]
|
||||
mypy = [
|
||||
{file = "mypy-0.910-cp35-cp35m-macosx_10_9_x86_64.whl", hash = "sha256:a155d80ea6cee511a3694b108c4494a39f42de11ee4e61e72bc424c490e46457"},
|
||||
@@ -1741,6 +1753,10 @@ pluggy = [
|
||||
{file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"},
|
||||
{file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"},
|
||||
]
|
||||
prometheus-client = [
|
||||
{file = "prometheus_client-0.14.1-py3-none-any.whl", hash = "sha256:522fded625282822a89e2773452f42df14b5a8e84a86433e3f8a189c1d54dc01"},
|
||||
{file = "prometheus_client-0.14.1.tar.gz", hash = "sha256:5459c427624961076277fdc6dc50540e2bacb98eebde99886e59ec55ed92093a"},
|
||||
]
|
||||
psycopg2-binary = [
|
||||
{file = "psycopg2-binary-2.9.3.tar.gz", hash = "sha256:761df5313dc15da1502b21453642d7599d26be88bff659382f8f9747c7ebea4e"},
|
||||
{file = "psycopg2_binary-2.9.3-cp310-cp310-macosx_10_14_x86_64.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl", hash = "sha256:539b28661b71da7c0e428692438efbcd048ca21ea81af618d845e06ebfd29478"},
|
||||
@@ -1831,8 +1847,8 @@ pyflakes = [
|
||||
{file = "pyflakes-2.3.1.tar.gz", hash = "sha256:f5bc8ecabc05bb9d291eb5203d6810b49040f6ff446a756326104746cc00c1db"},
|
||||
]
|
||||
pyjwt = [
|
||||
{file = "PyJWT-2.3.0-py3-none-any.whl", hash = "sha256:e0c4bb8d9f0af0c7f5b1ec4c5036309617d03d56932877f2f7a0beeb5318322f"},
|
||||
{file = "PyJWT-2.3.0.tar.gz", hash = "sha256:b888b4d56f06f6dcd777210c334e69c737be74755d3e5e9ee3fe67dc18a0ee41"},
|
||||
{file = "PyJWT-2.4.0-py3-none-any.whl", hash = "sha256:72d1d253f32dbd4f5c88eaf1fdc62f3a19f676ccbadb9dbc5d07e951b2b26daf"},
|
||||
{file = "PyJWT-2.4.0.tar.gz", hash = "sha256:d42908208c699b3b973cbeb01a969ba6a96c821eefb1c5bfe4c390c01d67abba"},
|
||||
]
|
||||
pyparsing = [
|
||||
{file = "pyparsing-3.0.6-py3-none-any.whl", hash = "sha256:04ff808a5b90911829c55c4e26f75fa5ca8a2f5f36aa3a51f68e27033341d3e4"},
|
||||
|
||||
@@ -38,7 +38,6 @@ async fn flatten_err(
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
metrics::set_common_metrics_prefix("zenith_proxy");
|
||||
let arg_matches = App::new("Neon proxy/router")
|
||||
.version(GIT_VERSION)
|
||||
.arg(
|
||||
|
||||
@@ -5,7 +5,7 @@ use crate::stream::{MetricsStream, PqStream, Stream};
|
||||
use anyhow::{bail, Context};
|
||||
use futures::TryFutureExt;
|
||||
use lazy_static::lazy_static;
|
||||
use metrics::{new_common_metric_name, register_int_counter, IntCounter};
|
||||
use metrics::{register_int_counter, IntCounter};
|
||||
use std::sync::Arc;
|
||||
use tokio::io::{AsyncRead, AsyncWrite};
|
||||
use utils::pq_proto::{BeMessage as Be, *};
|
||||
@@ -15,17 +15,17 @@ const ERR_PROTO_VIOLATION: &str = "protocol violation";
|
||||
|
||||
lazy_static! {
|
||||
static ref NUM_CONNECTIONS_ACCEPTED_COUNTER: IntCounter = register_int_counter!(
|
||||
new_common_metric_name("num_connections_accepted"),
|
||||
"proxy_accepted_connections",
|
||||
"Number of TCP client connections accepted."
|
||||
)
|
||||
.unwrap();
|
||||
static ref NUM_CONNECTIONS_CLOSED_COUNTER: IntCounter = register_int_counter!(
|
||||
new_common_metric_name("num_connections_closed"),
|
||||
"proxy_closed_connections",
|
||||
"Number of TCP client connections closed."
|
||||
)
|
||||
.unwrap();
|
||||
static ref NUM_BYTES_PROXIED_COUNTER: IntCounter = register_int_counter!(
|
||||
new_common_metric_name("num_bytes_proxied"),
|
||||
"proxy_io_bytes",
|
||||
"Number of bytes sent/received between any client and backend."
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -23,6 +23,7 @@ boto3-stubs = "^1.20.40"
|
||||
moto = {version = "^3.0.0", extras = ["server"]}
|
||||
backoff = "^1.11.1"
|
||||
pytest-lazy-fixture = "^0.6.3"
|
||||
prometheus-client = "^0.14.1"
|
||||
|
||||
[tool.poetry.dev-dependencies]
|
||||
yapf = "==0.31.0"
|
||||
|
||||
@@ -32,7 +32,6 @@ const ID_FILE_NAME: &str = "safekeeper.id";
|
||||
project_git_version!(GIT_VERSION);
|
||||
|
||||
fn main() -> anyhow::Result<()> {
|
||||
metrics::set_common_metrics_prefix("safekeeper");
|
||||
let arg_matches = App::new("Zenith safekeeper")
|
||||
.about("Store WAL stream to local file system and push it to WAL receivers")
|
||||
.version(GIT_VERSION)
|
||||
|
||||
@@ -1,8 +1,12 @@
|
||||
from contextlib import closing
|
||||
|
||||
from datetime import datetime
|
||||
import os
|
||||
import pytest
|
||||
|
||||
from fixtures.zenith_fixtures import ZenithEnvBuilder
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.metrics import parse_metrics
|
||||
from fixtures.utils import lsn_to_hex
|
||||
|
||||
|
||||
@pytest.mark.parametrize('with_safekeepers', [False, True])
|
||||
@@ -38,3 +42,79 @@ def test_tenants_normal_work(zenith_env_builder: ZenithEnvBuilder, with_safekeep
|
||||
cur.execute("INSERT INTO t SELECT generate_series(1,100000), 'payload'")
|
||||
cur.execute("SELECT sum(key) FROM t")
|
||||
assert cur.fetchone() == (5000050000, )
|
||||
|
||||
|
||||
def test_metrics_normal_work(zenith_env_builder: ZenithEnvBuilder):
|
||||
zenith_env_builder.num_safekeepers = 3
|
||||
|
||||
env = zenith_env_builder.init_start()
|
||||
tenant_1, _ = env.zenith_cli.create_tenant()
|
||||
tenant_2, _ = env.zenith_cli.create_tenant()
|
||||
|
||||
timeline_1 = env.zenith_cli.create_timeline('test_metrics_normal_work', tenant_id=tenant_1)
|
||||
timeline_2 = env.zenith_cli.create_timeline('test_metrics_normal_work', tenant_id=tenant_2)
|
||||
|
||||
pg_tenant1 = env.postgres.create_start('test_metrics_normal_work', tenant_id=tenant_1)
|
||||
pg_tenant2 = env.postgres.create_start('test_metrics_normal_work', tenant_id=tenant_2)
|
||||
|
||||
for pg in [pg_tenant1, pg_tenant2]:
|
||||
with closing(pg.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("CREATE TABLE t(key int primary key, value text)")
|
||||
cur.execute("INSERT INTO t SELECT generate_series(1,100000), 'payload'")
|
||||
cur.execute("SELECT sum(key) FROM t")
|
||||
assert cur.fetchone() == (5000050000, )
|
||||
|
||||
collected_metrics = {
|
||||
"pageserver": env.pageserver.http_client().get_metrics(),
|
||||
}
|
||||
for sk in env.safekeepers:
|
||||
collected_metrics[f'safekeeper{sk.id}'] = sk.http_client().get_metrics_str()
|
||||
|
||||
for name in collected_metrics:
|
||||
basepath = os.path.join(zenith_env_builder.repo_dir, f'{name}.metrics')
|
||||
|
||||
with open(basepath, 'w') as stdout_f:
|
||||
print(collected_metrics[name], file=stdout_f, flush=True)
|
||||
|
||||
all_metrics = [parse_metrics(m, name) for name, m in collected_metrics.items()]
|
||||
ps_metrics = all_metrics[0]
|
||||
sk_metrics = all_metrics[1:]
|
||||
|
||||
ttids = [{
|
||||
'tenant_id': tenant_1.hex, 'timeline_id': timeline_1.hex
|
||||
}, {
|
||||
'tenant_id': tenant_2.hex, 'timeline_id': timeline_2.hex
|
||||
}]
|
||||
|
||||
# Test metrics per timeline
|
||||
for tt in ttids:
|
||||
log.info(f"Checking metrics for {tt}")
|
||||
|
||||
ps_lsn = int(ps_metrics.query_one("pageserver_last_record_lsn", filter=tt).value)
|
||||
sk_lsns = [int(sk.query_one("safekeeper_commit_lsn", filter=tt).value) for sk in sk_metrics]
|
||||
|
||||
log.info(f"ps_lsn: {lsn_to_hex(ps_lsn)}")
|
||||
log.info(f"sk_lsns: {list(map(lsn_to_hex, sk_lsns))}")
|
||||
|
||||
assert ps_lsn <= max(sk_lsns)
|
||||
assert ps_lsn > 0
|
||||
|
||||
# Test common metrics
|
||||
for metrics in all_metrics:
|
||||
log.info(f"Checking common metrics for {metrics.name}")
|
||||
|
||||
log.info(
|
||||
f"process_cpu_seconds_total: {metrics.query_one('process_cpu_seconds_total').value}")
|
||||
log.info(f"process_threads: {int(metrics.query_one('process_threads').value)}")
|
||||
log.info(
|
||||
f"process_resident_memory_bytes (MB): {metrics.query_one('process_resident_memory_bytes').value / 1024 / 1024}"
|
||||
)
|
||||
log.info(
|
||||
f"process_virtual_memory_bytes (MB): {metrics.query_one('process_virtual_memory_bytes').value / 1024 / 1024}"
|
||||
)
|
||||
log.info(f"process_open_fds: {int(metrics.query_one('process_open_fds').value)}")
|
||||
log.info(f"process_max_fds: {int(metrics.query_one('process_max_fds').value)}")
|
||||
log.info(
|
||||
f"process_start_time_seconds (UTC): {datetime.fromtimestamp(metrics.query_one('process_start_time_seconds').value)}"
|
||||
)
|
||||
|
||||
@@ -236,14 +236,14 @@ class ZenithBenchmarker:
|
||||
"""
|
||||
Fetch the "cumulative # of bytes written" metric from the pageserver
|
||||
"""
|
||||
metric_name = r'pageserver_disk_io_bytes{io_operation="write"}'
|
||||
metric_name = r'libmetrics_disk_io_bytes{io_operation="write"}'
|
||||
return self.get_int_counter_value(pageserver, metric_name)
|
||||
|
||||
def get_peak_mem(self, pageserver) -> int:
|
||||
"""
|
||||
Fetch the "maxrss" metric from the pageserver
|
||||
"""
|
||||
metric_name = r'pageserver_maxrss_kb'
|
||||
metric_name = r'libmetrics_maxrss_kb'
|
||||
return self.get_int_counter_value(pageserver, metric_name)
|
||||
|
||||
def get_int_counter_value(self, pageserver, metric_name) -> int:
|
||||
|
||||
38
test_runner/fixtures/metrics.py
Normal file
38
test_runner/fixtures/metrics.py
Normal file
@@ -0,0 +1,38 @@
|
||||
from dataclasses import dataclass
|
||||
from prometheus_client.parser import text_string_to_metric_families
|
||||
from prometheus_client.samples import Sample
|
||||
from typing import Dict, List
|
||||
from collections import defaultdict
|
||||
|
||||
from fixtures.log_helper import log
|
||||
|
||||
|
||||
class Metrics:
|
||||
metrics: Dict[str, List[Sample]]
|
||||
name: str
|
||||
|
||||
def __init__(self, name: str = ""):
|
||||
self.metrics = defaultdict(list)
|
||||
self.name = name
|
||||
|
||||
def query_all(self, name: str, filter: Dict[str, str]) -> List[Sample]:
|
||||
res = []
|
||||
for sample in self.metrics[name]:
|
||||
if all(sample.labels[k] == v for k, v in filter.items()):
|
||||
res.append(sample)
|
||||
return res
|
||||
|
||||
def query_one(self, name: str, filter: Dict[str, str] = {}) -> Sample:
|
||||
res = self.query_all(name, filter)
|
||||
assert len(res) == 1, f"expected single sample for {name} {filter}, found {res}"
|
||||
return res[0]
|
||||
|
||||
|
||||
def parse_metrics(text: str, name: str = ""):
|
||||
metrics = Metrics(name)
|
||||
gen = text_string_to_metric_families(text)
|
||||
for family in gen:
|
||||
for sample in family.samples:
|
||||
metrics.metrics[sample.name].append(sample)
|
||||
|
||||
return metrics
|
||||
@@ -1833,10 +1833,13 @@ class SafekeeperHttpClient(requests.Session):
|
||||
assert isinstance(res_json, dict)
|
||||
return res_json
|
||||
|
||||
def get_metrics(self) -> SafekeeperMetrics:
|
||||
def get_metrics_str(self) -> str:
|
||||
request_result = self.get(f"http://localhost:{self.port}/metrics")
|
||||
request_result.raise_for_status()
|
||||
all_metrics_text = request_result.text
|
||||
return request_result.text
|
||||
|
||||
def get_metrics(self) -> SafekeeperMetrics:
|
||||
all_metrics_text = self.get_metrics_str()
|
||||
|
||||
metrics = SafekeeperMetrics()
|
||||
for match in re.finditer(
|
||||
|
||||
Reference in New Issue
Block a user