diff --git a/libs/metrics/src/lib.rs b/libs/metrics/src/lib.rs index e290828d37..880ab0e83c 100644 --- a/libs/metrics/src/lib.rs +++ b/libs/metrics/src/lib.rs @@ -77,6 +77,16 @@ pub const DISK_WRITE_SECONDS_BUCKETS: &[f64] = &[ 0.000_050, 0.000_100, 0.000_500, 0.001, 0.003, 0.005, 0.01, 0.05, 0.1, 0.3, 0.5, ]; +pub fn set_build_info_metric(revision: &str) { + let metric = register_int_gauge_vec!( + "libmetrics_build_info", + "Build/version information", + &["revision"] + ) + .expect("Failed to register build info metric"); + metric.with_label_values(&[revision]).set(1); +} + // Records I/O stats in a "cross-platform" way. // Compiles both on macOS and Linux, but current macOS implementation always returns 0 as values for I/O stats. // An alternative is to read procfs (`/proc/[pid]/io`) which does not work under macOS at all, hence abandoned. diff --git a/pageserver/src/bin/pageserver.rs b/pageserver/src/bin/pageserver.rs index 46a36c6118..4cd82e37b1 100644 --- a/pageserver/src/bin/pageserver.rs +++ b/pageserver/src/bin/pageserver.rs @@ -10,6 +10,8 @@ use clap::{App, Arg}; use daemonize::Daemonize; use fail::FailScenario; +use metrics::set_build_info_metric; + use pageserver::{ config::{defaults::*, PageServerConf}, http, page_cache, page_service, profiling, task_mgr, @@ -359,6 +361,8 @@ fn start_pageserver(conf: &'static PageServerConf, daemonize: bool) -> Result<() }, ); + set_build_info_metric(GIT_VERSION); + // All started up! Now just sit and wait for shutdown signal. signals.handle(|signal| match signal { Signal::Quit => { diff --git a/proxy/src/main.rs b/proxy/src/main.rs index 2e6c365d32..91ef26a37f 100644 --- a/proxy/src/main.rs +++ b/proxy/src/main.rs @@ -23,6 +23,7 @@ use anyhow::{bail, Context}; use clap::{self, Arg}; use config::ProxyConfig; use futures::FutureExt; +use metrics::set_build_info_metric; use std::{borrow::Cow, future::Future, net::SocketAddr}; use tokio::{net::TcpListener, task::JoinError}; use tracing::info; @@ -166,6 +167,7 @@ async fn main() -> anyhow::Result<()> { ] .map(flatten_err); + set_build_info_metric(GIT_VERSION); // This will block until all tasks have completed. // Furthermore, the first one to fail will cancel the rest. let _: Vec<()> = futures::future::try_join_all(tasks).await?; diff --git a/safekeeper/src/bin/safekeeper.rs b/safekeeper/src/bin/safekeeper.rs index e4545dad87..3f55d823cc 100644 --- a/safekeeper/src/bin/safekeeper.rs +++ b/safekeeper/src/bin/safekeeper.rs @@ -17,6 +17,7 @@ use toml_edit::Document; use tracing::*; use url::{ParseError, Url}; +use metrics::set_build_info_metric; use safekeeper::broker; use safekeeper::control_file; use safekeeper::defaults::{ @@ -363,6 +364,7 @@ fn start_safekeeper(mut conf: SafeKeeperConf, given_id: Option, init: bo })?, ); + set_build_info_metric(GIT_VERSION); // TODO: put more thoughts into handling of failed threads // We probably should restart them. diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 28c65223ba..5df0f5cc50 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -1961,6 +1961,11 @@ class NeonProxy(PgProtocol): def _wait_until_ready(self): requests.get(f"http://{self.host}:{self.http_port}/v1/status") + def get_metrics(self) -> str: + request_result = requests.get(f"http://{self.host}:{self.http_port}/metrics") + request_result.raise_for_status() + return request_result.text + def __enter__(self): return self diff --git a/test_runner/regress/test_build_info_metric.py b/test_runner/regress/test_build_info_metric.py new file mode 100644 index 0000000000..b75b5bd775 --- /dev/null +++ b/test_runner/regress/test_build_info_metric.py @@ -0,0 +1,19 @@ +from fixtures.metrics import parse_metrics +from fixtures.neon_fixtures import NeonEnvBuilder, NeonProxy + + +def test_build_info_metric(neon_env_builder: NeonEnvBuilder, link_proxy: NeonProxy): + neon_env_builder.num_safekeepers = 1 + env = neon_env_builder.init_start() + + parsed_metrics = {} + + parsed_metrics["pageserver"] = parse_metrics(env.pageserver.http_client().get_metrics()) + parsed_metrics["safekeeper"] = parse_metrics(env.safekeepers[0].http_client().get_metrics_str()) + parsed_metrics["proxy"] = parse_metrics(link_proxy.get_metrics()) + + for component, metrics in parsed_metrics.items(): + sample = metrics.query_one("libmetrics_build_info") + + assert "revision" in sample.labels + assert len(sample.labels["revision"]) > 0