Add build info metric to pageserver, safekeeper and proxy (#2596)

* Test that we emit build info metric for pageserver, safekeeper and proxy with some non-zero length revision label

* Emit libmetrics_build_info on startup of pageserver, safekeeper and
proxy with label "revision" which tells the git revision.
This commit is contained in:
Lassi Pölönen
2022-10-11 09:54:32 +03:00
committed by GitHub
parent 241e549757
commit e520293090
6 changed files with 42 additions and 0 deletions

View File

@@ -77,6 +77,16 @@ pub const DISK_WRITE_SECONDS_BUCKETS: &[f64] = &[
0.000_050, 0.000_100, 0.000_500, 0.001, 0.003, 0.005, 0.01, 0.05, 0.1, 0.3, 0.5,
];
pub fn set_build_info_metric(revision: &str) {
let metric = register_int_gauge_vec!(
"libmetrics_build_info",
"Build/version information",
&["revision"]
)
.expect("Failed to register build info metric");
metric.with_label_values(&[revision]).set(1);
}
// Records I/O stats in a "cross-platform" way.
// Compiles both on macOS and Linux, but current macOS implementation always returns 0 as values for I/O stats.
// An alternative is to read procfs (`/proc/[pid]/io`) which does not work under macOS at all, hence abandoned.

View File

@@ -10,6 +10,8 @@ use clap::{App, Arg};
use daemonize::Daemonize;
use fail::FailScenario;
use metrics::set_build_info_metric;
use pageserver::{
config::{defaults::*, PageServerConf},
http, page_cache, page_service, profiling, task_mgr,
@@ -359,6 +361,8 @@ fn start_pageserver(conf: &'static PageServerConf, daemonize: bool) -> Result<()
},
);
set_build_info_metric(GIT_VERSION);
// All started up! Now just sit and wait for shutdown signal.
signals.handle(|signal| match signal {
Signal::Quit => {

View File

@@ -23,6 +23,7 @@ use anyhow::{bail, Context};
use clap::{self, Arg};
use config::ProxyConfig;
use futures::FutureExt;
use metrics::set_build_info_metric;
use std::{borrow::Cow, future::Future, net::SocketAddr};
use tokio::{net::TcpListener, task::JoinError};
use tracing::info;
@@ -166,6 +167,7 @@ async fn main() -> anyhow::Result<()> {
]
.map(flatten_err);
set_build_info_metric(GIT_VERSION);
// This will block until all tasks have completed.
// Furthermore, the first one to fail will cancel the rest.
let _: Vec<()> = futures::future::try_join_all(tasks).await?;

View File

@@ -17,6 +17,7 @@ use toml_edit::Document;
use tracing::*;
use url::{ParseError, Url};
use metrics::set_build_info_metric;
use safekeeper::broker;
use safekeeper::control_file;
use safekeeper::defaults::{
@@ -363,6 +364,7 @@ fn start_safekeeper(mut conf: SafeKeeperConf, given_id: Option<NodeId>, init: bo
})?,
);
set_build_info_metric(GIT_VERSION);
// TODO: put more thoughts into handling of failed threads
// We probably should restart them.

View File

@@ -1961,6 +1961,11 @@ class NeonProxy(PgProtocol):
def _wait_until_ready(self):
requests.get(f"http://{self.host}:{self.http_port}/v1/status")
def get_metrics(self) -> str:
request_result = requests.get(f"http://{self.host}:{self.http_port}/metrics")
request_result.raise_for_status()
return request_result.text
def __enter__(self):
return self

View File

@@ -0,0 +1,19 @@
from fixtures.metrics import parse_metrics
from fixtures.neon_fixtures import NeonEnvBuilder, NeonProxy
def test_build_info_metric(neon_env_builder: NeonEnvBuilder, link_proxy: NeonProxy):
neon_env_builder.num_safekeepers = 1
env = neon_env_builder.init_start()
parsed_metrics = {}
parsed_metrics["pageserver"] = parse_metrics(env.pageserver.http_client().get_metrics())
parsed_metrics["safekeeper"] = parse_metrics(env.safekeepers[0].http_client().get_metrics_str())
parsed_metrics["proxy"] = parse_metrics(link_proxy.get_metrics())
for component, metrics in parsed_metrics.items():
sample = metrics.query_one("libmetrics_build_info")
assert "revision" in sample.labels
assert len(sample.labels["revision"]) > 0