mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-08 14:02:55 +00:00
pageserver: expose prometheus metrics for startup time (#4893)
## Problem Currently to know how long pageserver startup took requires inspecting logs. ## Summary of changes `pageserver_startup_duration_ms` metric is added, with label `phase` for different phases of startup. These are broken down by phase, where the phases correspond to the existing wait points in the code: - Start of doing I/O - When tenant load is done - When initial size calculation is done - When background jobs start - Then "complete" when everything is done. `pageserver_startup_is_loading` is a 0/1 gauge that indicates whether we are in the initial load of tenants. `pageserver_tenant_activation_seconds` is a histogram of time in seconds taken to activate a tenant. Co-authored-by: Joonas Koivunen <joonas@neon.tech>
This commit is contained in:
@@ -9,8 +9,10 @@ use clap::{Arg, ArgAction, Command};
|
||||
use fail::FailScenario;
|
||||
use metrics::launch_timestamp::{set_launch_timestamp_metric, LaunchTimestamp};
|
||||
use pageserver::disk_usage_eviction_task::{self, launch_disk_usage_global_eviction_task};
|
||||
use pageserver::metrics::{STARTUP_DURATION, STARTUP_IS_LOADING};
|
||||
use pageserver::task_mgr::WALRECEIVER_RUNTIME;
|
||||
use remote_storage::GenericRemoteStorage;
|
||||
use tokio::time::Instant;
|
||||
use tracing::*;
|
||||
|
||||
use metrics::set_build_info_metric;
|
||||
@@ -224,6 +226,19 @@ fn start_pageserver(
|
||||
launch_ts: &'static LaunchTimestamp,
|
||||
conf: &'static PageServerConf,
|
||||
) -> anyhow::Result<()> {
|
||||
// Monotonic time for later calculating startup duration
|
||||
let started_startup_at = Instant::now();
|
||||
|
||||
let startup_checkpoint = move |phase: &str, human_phase: &str| {
|
||||
let elapsed = started_startup_at.elapsed();
|
||||
let secs = elapsed.as_secs_f64();
|
||||
STARTUP_DURATION.with_label_values(&[phase]).set(secs);
|
||||
info!(
|
||||
elapsed_ms = elapsed.as_millis(),
|
||||
"{human_phase} ({secs:.3}s since start)"
|
||||
)
|
||||
};
|
||||
|
||||
// Print version and launch timestamp to the log,
|
||||
// and expose them as prometheus metrics.
|
||||
// A changed version string indicates changed software.
|
||||
@@ -333,6 +348,11 @@ fn start_pageserver(
|
||||
// Set up remote storage client
|
||||
let remote_storage = create_remote_storage_client(conf)?;
|
||||
|
||||
// Up to this point no significant I/O has been done: this should have been fast. Record
|
||||
// duration prior to starting I/O intensive phase of startup.
|
||||
startup_checkpoint("initial", "Starting loading tenants");
|
||||
STARTUP_IS_LOADING.set(1);
|
||||
|
||||
// Startup staging or optimizing:
|
||||
//
|
||||
// We want to minimize downtime for `page_service` connections, and trying not to overload
|
||||
@@ -358,7 +378,6 @@ fn start_pageserver(
|
||||
};
|
||||
|
||||
// Scan the local 'tenants/' directory and start loading the tenants
|
||||
let init_started_at = std::time::Instant::now();
|
||||
let shutdown_pageserver = tokio_util::sync::CancellationToken::new();
|
||||
|
||||
BACKGROUND_RUNTIME.block_on(mgr::init_tenant_mgr(
|
||||
@@ -376,18 +395,13 @@ fn start_pageserver(
|
||||
let guard = scopeguard::guard_on_success((), |_| tracing::info!("Cancelled before initial load completed"));
|
||||
|
||||
init_done_rx.wait().await;
|
||||
startup_checkpoint("initial_tenant_load", "Initial load completed");
|
||||
STARTUP_IS_LOADING.set(0);
|
||||
|
||||
// initial logical sizes can now start, as they were waiting on init_done_rx.
|
||||
|
||||
scopeguard::ScopeGuard::into_inner(guard);
|
||||
|
||||
let init_done = std::time::Instant::now();
|
||||
let elapsed = init_done - init_started_at;
|
||||
|
||||
tracing::info!(
|
||||
elapsed_millis = elapsed.as_millis(),
|
||||
"Initial load completed"
|
||||
);
|
||||
|
||||
let mut init_sizes_done = std::pin::pin!(init_logical_size_done_rx.wait());
|
||||
|
||||
let timeout = conf.background_task_maximum_delay;
|
||||
@@ -396,12 +410,7 @@ fn start_pageserver(
|
||||
|
||||
let init_sizes_done = match tokio::time::timeout(timeout, &mut init_sizes_done).await {
|
||||
Ok(_) => {
|
||||
let now = std::time::Instant::now();
|
||||
tracing::info!(
|
||||
from_init_done_millis = (now - init_done).as_millis(),
|
||||
from_init_millis = (now - init_started_at).as_millis(),
|
||||
"Initial logical sizes completed"
|
||||
);
|
||||
startup_checkpoint("initial_logical_sizes", "Initial logical sizes completed");
|
||||
None
|
||||
}
|
||||
Err(_) => {
|
||||
@@ -417,6 +426,7 @@ fn start_pageserver(
|
||||
|
||||
// allow background jobs to start
|
||||
drop(background_jobs_can_start);
|
||||
startup_checkpoint("background_jobs_can_start", "Starting background jobs");
|
||||
|
||||
if let Some(init_sizes_done) = init_sizes_done {
|
||||
// ending up here is not a bug; at the latest logical sizes will be queried by
|
||||
@@ -426,14 +436,11 @@ fn start_pageserver(
|
||||
|
||||
scopeguard::ScopeGuard::into_inner(guard);
|
||||
|
||||
let now = std::time::Instant::now();
|
||||
tracing::info!(
|
||||
from_init_done_millis = (now - init_done).as_millis(),
|
||||
from_init_millis = (now - init_started_at).as_millis(),
|
||||
"Initial logical sizes completed after timeout (background jobs already started)"
|
||||
);
|
||||
startup_checkpoint("initial_logical_sizes", "Initial logical sizes completed after timeout (background jobs already started)");
|
||||
|
||||
}
|
||||
|
||||
startup_checkpoint("complete", "Startup complete");
|
||||
};
|
||||
|
||||
async move {
|
||||
|
||||
@@ -7,7 +7,7 @@ pub mod disk_usage_eviction_task;
|
||||
pub mod http;
|
||||
pub mod import_datadir;
|
||||
pub mod keyspace;
|
||||
pub(crate) mod metrics;
|
||||
pub mod metrics;
|
||||
pub mod page_cache;
|
||||
pub mod page_service;
|
||||
pub mod pgdatadir_mapping;
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
use metrics::metric_vec_duration::DurationResultObserver;
|
||||
use metrics::{
|
||||
register_counter_vec, register_histogram, register_histogram_vec, register_int_counter,
|
||||
register_int_counter_vec, register_int_gauge, register_int_gauge_vec, register_uint_gauge,
|
||||
register_uint_gauge_vec, Counter, CounterVec, Histogram, HistogramVec, IntCounter,
|
||||
IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec,
|
||||
register_counter_vec, register_gauge_vec, register_histogram, register_histogram_vec,
|
||||
register_int_counter, register_int_counter_vec, register_int_gauge, register_int_gauge_vec,
|
||||
register_uint_gauge, register_uint_gauge_vec, Counter, CounterVec, GaugeVec, Histogram,
|
||||
HistogramVec, IntCounter, IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec,
|
||||
};
|
||||
use once_cell::sync::Lazy;
|
||||
use strum::VariantNames;
|
||||
@@ -394,6 +394,35 @@ pub(crate) static UNEXPECTED_ONDEMAND_DOWNLOADS: Lazy<IntCounter> = Lazy::new(||
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
/// How long did we take to start up? Broken down by labels to describe
|
||||
/// different phases of startup.
|
||||
pub static STARTUP_DURATION: Lazy<GaugeVec> = Lazy::new(|| {
|
||||
register_gauge_vec!(
|
||||
"pageserver_startup_duration_seconds",
|
||||
"Time taken by phases of pageserver startup, in seconds",
|
||||
&["phase"]
|
||||
)
|
||||
.expect("Failed to register pageserver_startup_duration_seconds metric")
|
||||
});
|
||||
|
||||
pub static STARTUP_IS_LOADING: Lazy<UIntGauge> = Lazy::new(|| {
|
||||
register_uint_gauge!(
|
||||
"pageserver_startup_is_loading",
|
||||
"1 while in initial startup load of tenants, 0 at other times"
|
||||
)
|
||||
.expect("Failed to register pageserver_startup_is_loading")
|
||||
});
|
||||
|
||||
/// How long did tenants take to go from construction to active state?
|
||||
pub(crate) static TENANT_ACTIVATION: Lazy<Histogram> = Lazy::new(|| {
|
||||
register_histogram!(
|
||||
"pageserver_tenant_activation_seconds",
|
||||
"Time taken by tenants to activate, in seconds",
|
||||
CRITICAL_OP_BUCKETS.into()
|
||||
)
|
||||
.expect("Failed to register pageserver_tenant_activation_seconds metric")
|
||||
});
|
||||
|
||||
/// Each `Timeline`'s [`EVICTIONS_WITH_LOW_RESIDENCE_DURATION`] metric.
|
||||
#[derive(Debug)]
|
||||
pub struct EvictionsWithLowResidenceDuration {
|
||||
|
||||
@@ -56,6 +56,7 @@ use crate::config::PageServerConf;
|
||||
use crate::context::{DownloadBehavior, RequestContext};
|
||||
use crate::import_datadir;
|
||||
use crate::is_uninit_mark;
|
||||
use crate::metrics::TENANT_ACTIVATION;
|
||||
use crate::metrics::{remove_tenant_metrics, TENANT_STATE_METRIC, TENANT_SYNTHETIC_SIZE_METRIC};
|
||||
use crate::repository::GcResult;
|
||||
use crate::task_mgr;
|
||||
@@ -1639,6 +1640,8 @@ impl Tenant {
|
||||
post_state = <&'static str>::from(&*current_state),
|
||||
"activation attempt finished"
|
||||
);
|
||||
|
||||
TENANT_ACTIVATION.observe(elapsed.as_secs_f64());
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,6 +12,7 @@ def test_pageserver_restart(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
env.neon_cli.create_branch("test_pageserver_restart")
|
||||
endpoint = env.endpoints.create_start("test_pageserver_restart")
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
|
||||
pg_conn = endpoint.connect()
|
||||
cur = pg_conn.cursor()
|
||||
@@ -52,8 +53,11 @@ def test_pageserver_restart(neon_env_builder: NeonEnvBuilder):
|
||||
# pageserver does if a compute node connects and sends a request for the tenant
|
||||
# while it's still in Loading state. (It waits for the loading to finish, and then
|
||||
# processes the request.)
|
||||
tenant_load_delay_ms = 5000
|
||||
env.pageserver.stop()
|
||||
env.pageserver.start(extra_env_vars={"FAILPOINTS": "before-loading-tenant=return(5000)"})
|
||||
env.pageserver.start(
|
||||
extra_env_vars={"FAILPOINTS": f"before-loading-tenant=return({tenant_load_delay_ms})"}
|
||||
)
|
||||
|
||||
# Check that it's in Loading state
|
||||
client = env.pageserver.http_client()
|
||||
@@ -65,6 +69,41 @@ def test_pageserver_restart(neon_env_builder: NeonEnvBuilder):
|
||||
cur.execute("SELECT count(*) FROM foo")
|
||||
assert cur.fetchone() == (100000,)
|
||||
|
||||
# Validate startup time metrics
|
||||
metrics = pageserver_http.get_metrics()
|
||||
|
||||
# Expectation callbacks: arg t is sample value, arg p is the previous phase's sample value
|
||||
expectations = {
|
||||
"initial": lambda t, p: True, # make no assumptions about the initial time point, it could be 0 in theory
|
||||
# Initial tenant load should reflect the delay we injected
|
||||
"initial_tenant_load": lambda t, p: t >= (tenant_load_delay_ms / 1000.0) and t >= p,
|
||||
# Subsequent steps should occur in expected order
|
||||
"initial_logical_sizes": lambda t, p: t > 0 and t >= p,
|
||||
"background_jobs_can_start": lambda t, p: t > 0 and t >= p,
|
||||
"complete": lambda t, p: t > 0 and t >= p,
|
||||
}
|
||||
|
||||
prev_value = None
|
||||
for sample in metrics.query_all("pageserver_startup_duration_seconds"):
|
||||
labels = dict(sample.labels)
|
||||
phase = labels["phase"]
|
||||
log.info(f"metric {phase}={sample.value}")
|
||||
assert phase in expectations, f"Unexpected phase {phase}"
|
||||
assert expectations[phase](
|
||||
sample.value, prev_value
|
||||
), f"Unexpected value for {phase}: {sample.value}"
|
||||
prev_value = sample.value
|
||||
|
||||
# Startup is complete, this metric should exist but be zero
|
||||
assert metrics.query_one("pageserver_startup_is_loading").value == 0
|
||||
|
||||
# This histogram should have been populated, although we aren't specific about exactly
|
||||
# which bucket values: just nonzero
|
||||
assert any(
|
||||
bucket.value > 0
|
||||
for bucket in metrics.query_all("pageserver_tenant_activation_seconds_bucket")
|
||||
)
|
||||
|
||||
|
||||
# Test that repeatedly kills and restarts the page server, while the
|
||||
# safekeeper and compute node keep running.
|
||||
|
||||
Reference in New Issue
Block a user