mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-26 09:30:37 +00:00
introduce LaunchTimestamp to identify process restarts
This patch adds a LaunchTimestamp type to the `metrics` crate, along with a `libmetric_` Prometheus metric. The initial user is pageserver. In addition to exposing the Prometheus metric, it also reproduces the launch timestamp as a header in the API responses. The motivation for this is that we plan to scrape the pageserver's /v1/tenant/:tenant_id/timeline/:timeline_id/layer HTTP endpoint over time. It will soon expose access metrics (#3496) which reset upon process restart. We will use the pageserver's launch ID to identify a restart between two scrape points. However, there are other potential uses. For example, we could use the Prometheus metric to annotate Grafana plots whenever the launch timestamp changes.
This commit is contained in:
committed by
Christian Schwarz
parent
be81db21b9
commit
87cd2bae77
@@ -7,6 +7,7 @@ use std::{env, ops::ControlFlow, path::Path, str::FromStr};
|
||||
use anyhow::{anyhow, Context};
|
||||
use clap::{Arg, ArgAction, Command};
|
||||
use fail::FailScenario;
|
||||
use metrics::launch_timestamp::{set_launch_timestamp_metric, LaunchTimestamp};
|
||||
use remote_storage::GenericRemoteStorage;
|
||||
use tracing::*;
|
||||
|
||||
@@ -52,6 +53,8 @@ fn version() -> String {
|
||||
}
|
||||
|
||||
fn main() -> anyhow::Result<()> {
|
||||
let launch_ts = Box::leak(Box::new(LaunchTimestamp::generate()));
|
||||
|
||||
let arg_matches = cli().get_matches();
|
||||
|
||||
if arg_matches.get_flag("enabled-features") {
|
||||
@@ -108,7 +111,7 @@ fn main() -> anyhow::Result<()> {
|
||||
virtual_file::init(conf.max_file_descriptors);
|
||||
page_cache::init(conf.page_cache_size);
|
||||
|
||||
start_pageserver(conf).context("Failed to start pageserver")?;
|
||||
start_pageserver(launch_ts, conf).context("Failed to start pageserver")?;
|
||||
|
||||
scenario.teardown();
|
||||
Ok(())
|
||||
@@ -203,13 +206,24 @@ fn initialize_config(
|
||||
})
|
||||
}
|
||||
|
||||
fn start_pageserver(conf: &'static PageServerConf) -> anyhow::Result<()> {
|
||||
fn start_pageserver(
|
||||
launch_ts: &'static LaunchTimestamp,
|
||||
conf: &'static PageServerConf,
|
||||
) -> anyhow::Result<()> {
|
||||
// Initialize logging
|
||||
logging::init(conf.log_format)?;
|
||||
|
||||
// Print version to the log, and expose it as a prometheus metric too.
|
||||
info!("version: {}", version());
|
||||
// Print version and launch timestamp to the log,
|
||||
// and expose them as prometheus metrics.
|
||||
// A changed version string indicates changed software.
|
||||
// A changed launch timestamp indicates a pageserver restart.
|
||||
info!(
|
||||
"version: {} launch_timestamp: {}",
|
||||
version(),
|
||||
launch_ts.to_string()
|
||||
);
|
||||
set_build_info_metric(GIT_VERSION);
|
||||
set_launch_timestamp_metric(launch_ts);
|
||||
|
||||
// If any failpoints were set from FAILPOINTS environment variable,
|
||||
// print them to the log for debugging purposes
|
||||
@@ -307,7 +321,7 @@ fn start_pageserver(conf: &'static PageServerConf) -> anyhow::Result<()> {
|
||||
{
|
||||
let _rt_guard = MGMT_REQUEST_RUNTIME.enter();
|
||||
|
||||
let router = http::make_router(conf, auth.clone(), remote_storage)?
|
||||
let router = http::make_router(conf, launch_ts, auth.clone(), remote_storage)?
|
||||
.build()
|
||||
.map_err(|err| anyhow!(err))?;
|
||||
let service = utils::http::RouterService::new(router).unwrap();
|
||||
|
||||
@@ -3,6 +3,7 @@ use std::sync::Arc;
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use hyper::StatusCode;
|
||||
use hyper::{Body, Request, Response, Uri};
|
||||
use metrics::launch_timestamp::LaunchTimestamp;
|
||||
use pageserver_api::models::DownloadRemoteLayersTaskSpawnRequest;
|
||||
use remote_storage::GenericRemoteStorage;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
@@ -921,6 +922,7 @@ async fn handler_404(_: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
|
||||
pub fn make_router(
|
||||
conf: &'static PageServerConf,
|
||||
launch_ts: &'static LaunchTimestamp,
|
||||
auth: Option<Arc<JwtAuth>>,
|
||||
remote_storage: Option<GenericRemoteStorage>,
|
||||
) -> anyhow::Result<RouterBuilder<hyper::Body, ApiError>> {
|
||||
@@ -937,6 +939,14 @@ pub fn make_router(
|
||||
}))
|
||||
}
|
||||
|
||||
router = router.middleware(
|
||||
endpoint::add_response_header_middleware(
|
||||
"PAGESERVER_LAUNCH_TIMESTAMP",
|
||||
&launch_ts.to_string(),
|
||||
)
|
||||
.expect("construct launch timestamp header middleware"),
|
||||
);
|
||||
|
||||
macro_rules! testing_api {
|
||||
($handler_desc:literal, $handler:path $(,)?) => {{
|
||||
#[cfg(not(feature = "testing"))]
|
||||
|
||||
Reference in New Issue
Block a user