pageserver: add initdb metrics (#10434)

## Problem

Initdb observability is poor.

## Summary of changes

Add some metrics so we can figure out which part, if any, is slow.

Closes https://github.com/neondatabase/neon/issues/10423
This commit is contained in:
Vlad Lazar
2025-01-17 14:51:33 +00:00
committed by GitHub
parent 053abff71f
commit 6975228a76
2 changed files with 39 additions and 1 deletions

View File

@@ -100,6 +100,32 @@ pub(crate) static VEC_READ_NUM_LAYERS_VISITED: Lazy<Histogram> = Lazy::new(|| {
.expect("failed to define a metric")
});
pub(crate) static CONCURRENT_INITDBS: Lazy<UIntGauge> = Lazy::new(|| {
register_uint_gauge!(
"pageserver_concurrent_initdb",
"Number of initdb processes running"
)
.expect("failed to define a metric")
});
pub(crate) static INITDB_SEMAPHORE_ACQUISITION_TIME: Lazy<Histogram> = Lazy::new(|| {
register_histogram!(
"pageserver_initdb_semaphore_seconds_global",
"Time spent getting a permit from the global initdb semaphore",
STORAGE_OP_BUCKETS.into()
)
.expect("failed to define metric")
});
pub(crate) static INITDB_RUN_TIME: Lazy<Histogram> = Lazy::new(|| {
register_histogram!(
"pageserver_initdb_seconds_global",
"Time spent performing initdb",
STORAGE_OP_BUCKETS.into()
)
.expect("failed to define metric")
});
// Metrics collected on operations on the storage repository.
#[derive(
Clone, Copy, enum_map::Enum, strum_macros::EnumString, strum_macros::Display, IntoStaticStr,

View File

@@ -95,6 +95,9 @@ use crate::deletion_queue::DeletionQueueError;
use crate::import_datadir;
use crate::is_uninit_mark;
use crate::l0_flush::L0FlushGlobalState;
use crate::metrics::CONCURRENT_INITDBS;
use crate::metrics::INITDB_RUN_TIME;
use crate::metrics::INITDB_SEMAPHORE_ACQUISITION_TIME;
use crate::metrics::TENANT;
use crate::metrics::{
remove_tenant_metrics, BROKEN_TENANTS_SET, CIRCUIT_BREAKERS_BROKEN, CIRCUIT_BREAKERS_UNBROKEN,
@@ -5347,8 +5350,17 @@ async fn run_initdb(
initdb_bin_path, initdb_target_dir, initdb_lib_dir,
);
let _permit = INIT_DB_SEMAPHORE.acquire().await;
let _permit = {
let _timer = INITDB_SEMAPHORE_ACQUISITION_TIME.start_timer();
INIT_DB_SEMAPHORE.acquire().await
};
CONCURRENT_INITDBS.inc();
scopeguard::defer! {
CONCURRENT_INITDBS.dec();
}
let _timer = INITDB_RUN_TIME.start_timer();
let res = postgres_initdb::do_run_initdb(postgres_initdb::RunInitdbArgs {
superuser: &conf.superuser,
locale: &conf.locale,