diff --git a/storage_controller/src/metrics.rs b/storage_controller/src/metrics.rs index ea390df726..5ce2fb65e4 100644 --- a/storage_controller/src/metrics.rs +++ b/storage_controller/src/metrics.rs @@ -44,6 +44,15 @@ pub(crate) struct StorageControllerMetricGroup { /// Size of the in-memory map of pageserver_nodes pub(crate) storage_controller_pageserver_nodes: measured::Gauge, + /// Count of how many pageserver nodes from in-memory map have https configured + pub(crate) storage_controller_https_pageserver_nodes: measured::Gauge, + + /// Size of the in-memory map of safekeeper_nodes + pub(crate) storage_controller_safekeeper_nodes: measured::Gauge, + + /// Count of how many safekeeper nodes from in-memory map have https configured + pub(crate) storage_controller_https_safekeeper_nodes: measured::Gauge, + /// Reconciler tasks completed, broken down by success/failure/cancelled pub(crate) storage_controller_reconcile_complete: measured::CounterVec, diff --git a/storage_controller/src/node.rs b/storage_controller/src/node.rs index f667514517..e180c49b43 100644 --- a/storage_controller/src/node.rs +++ b/storage_controller/src/node.rs @@ -89,6 +89,10 @@ impl Node { self.scheduling = scheduling } + pub(crate) fn has_https_port(&self) -> bool { + self.listen_https_port.is_some() + } + /// Does this registration request match `self`? This is used when deciding whether a registration /// request should be allowed to update an existing record with the same node ID. pub(crate) fn registration_match(&self, register_req: &NodeRegisterRequest) -> bool { diff --git a/storage_controller/src/safekeeper.rs b/storage_controller/src/safekeeper.rs index 3b731acf7e..5a13ef750e 100644 --- a/storage_controller/src/safekeeper.rs +++ b/storage_controller/src/safekeeper.rs @@ -89,6 +89,9 @@ impl Safekeeper { pub(crate) fn availability(&self) -> SafekeeperState { self.availability.clone() } + pub(crate) fn has_https_port(&self) -> bool { + self.listen_https_port.is_some() + } /// Perform an operation (which is given a [`SafekeeperClient`]) with retries #[allow(clippy::too_many_arguments)] pub(crate) async fn with_client_retries( diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs index e4db58cc84..c1c2e2c189 100644 --- a/storage_controller/src/service.rs +++ b/storage_controller/src/service.rs @@ -1509,6 +1509,10 @@ impl Service { .metrics_group .storage_controller_pageserver_nodes .set(nodes.len() as i64); + metrics::METRICS_REGISTRY + .metrics_group + .storage_controller_https_pageserver_nodes + .set(nodes.values().filter(|n| n.has_https_port()).count() as i64); tracing::info!("Loading safekeepers from database..."); let safekeepers = persistence @@ -1526,6 +1530,14 @@ impl Service { let safekeepers: HashMap = safekeepers.into_iter().map(|n| (n.get_id(), n)).collect(); tracing::info!("Loaded {} safekeepers from database.", safekeepers.len()); + metrics::METRICS_REGISTRY + .metrics_group + .storage_controller_safekeeper_nodes + .set(safekeepers.len() as i64); + metrics::METRICS_REGISTRY + .metrics_group + .storage_controller_https_safekeeper_nodes + .set(safekeepers.values().filter(|s| s.has_https_port()).count() as i64); tracing::info!("Loading shards from database..."); let mut tenant_shard_persistence = persistence.load_active_tenant_shards().await?; @@ -6254,6 +6266,10 @@ impl Service { .metrics_group .storage_controller_pageserver_nodes .set(locked.nodes.len() as i64); + metrics::METRICS_REGISTRY + .metrics_group + .storage_controller_https_pageserver_nodes + .set(locked.nodes.values().filter(|n| n.has_https_port()).count() as i64); locked.scheduler.node_remove(node_id); @@ -6345,6 +6361,10 @@ impl Service { .metrics_group .storage_controller_pageserver_nodes .set(nodes.len() as i64); + metrics::METRICS_REGISTRY + .metrics_group + .storage_controller_https_pageserver_nodes + .set(nodes.values().filter(|n| n.has_https_port()).count() as i64); } } @@ -6569,6 +6589,10 @@ impl Service { .metrics_group .storage_controller_pageserver_nodes .set(locked.nodes.len() as i64); + metrics::METRICS_REGISTRY + .metrics_group + .storage_controller_https_pageserver_nodes + .set(locked.nodes.values().filter(|n| n.has_https_port()).count() as i64); match registration_status { RegistrationStatus::New => { diff --git a/storage_controller/src/service/safekeeper_service.rs b/storage_controller/src/service/safekeeper_service.rs index 7f2c63b9af..099d0305ba 100644 --- a/storage_controller/src/service/safekeeper_service.rs +++ b/storage_controller/src/service/safekeeper_service.rs @@ -5,6 +5,7 @@ use std::time::Duration; use super::safekeeper_reconciler::ScheduleRequest; use crate::heartbeater::SafekeeperState; +use crate::metrics; use crate::persistence::{ DatabaseError, SafekeeperTimelineOpKind, TimelinePendingOpPersistence, TimelinePersistence, }; @@ -590,6 +591,20 @@ impl Service { } } locked.safekeepers = Arc::new(safekeepers); + metrics::METRICS_REGISTRY + .metrics_group + .storage_controller_safekeeper_nodes + .set(locked.safekeepers.len() as i64); + metrics::METRICS_REGISTRY + .metrics_group + .storage_controller_https_safekeeper_nodes + .set( + locked + .safekeepers + .values() + .filter(|s| s.has_https_port()) + .count() as i64, + ); } Ok(()) }