A few SC changes (#12615)

## Summary of changes A bunch of no-op changes. --------- Co-authored-by: Vlad Lazar <vlad@neon.tech>
2026-01-09 06:22:57 +00:00 · 2025-07-17 06:14:36 -07:00
parent 4559ba79b6
commit b7fc5a2fe0
11 changed files with 203 additions and 10 deletions
--- a/storage_controller/src/auth.rs
+++ b/storage_controller/src/auth.rs
@@ -1,4 +1,5 @@
 use utils::auth::{AuthError, Claims, Scope};
+use uuid::Uuid;

 pub fn check_permission(claims: &Claims, required_scope: Scope) -> Result<(), AuthError> {
    if claims.scope != required_scope {
@@ -7,3 +8,14 @@ pub fn check_permission(claims: &Claims, required_scope: Scope) -> Result<(), Au

    Ok(())
 }
+
+#[allow(dead_code)]
+pub fn check_endpoint_permission(claims: &Claims, endpoint_id: Uuid) -> Result<(), AuthError> {
+    if claims.scope != Scope::TenantEndpoint {
+        return Err(AuthError("Scope mismatch. Permission denied".into()));
+    }
+    if claims.endpoint_id != Some(endpoint_id) {
+        return Err(AuthError("Endpoint id mismatch. Permission denied".into()));
+    }
+    Ok(())
+}
--- a/storage_controller/src/compute_hook.rs
+++ b/storage_controller/src/compute_hook.rs
@@ -810,6 +810,7 @@ impl ComputeHook {
                let send_locked = tokio::select! {
                    guard = send_lock.lock_owned() => {guard},
                    _ = cancel.cancelled() => {
+                        tracing::info!("Notification cancelled while waiting for lock");
                        return Err(NotifyError::ShuttingDown)
                    }
                };
@@ -851,11 +852,32 @@ impl ComputeHook {
            let notify_url = compute_hook_url.as_ref().unwrap();
            self.do_notify(notify_url, &request, cancel).await
        } else {
-            self.do_notify_local::<M>(&request).await.map_err(|e| {
+            match self.do_notify_local::<M>(&request).await.map_err(|e| {
                // This path is for testing only, so munge the error into our prod-style error type.
-                tracing::error!("neon_local notification hook failed: {e}");
-                NotifyError::Fatal(StatusCode::INTERNAL_SERVER_ERROR)
-            })
+                if e.to_string().contains("refresh-configuration-pending") {
+                    // If the error message mentions "refresh-configuration-pending", it means the compute node
+                    // rejected our notification request because it already trying to reconfigure itself. We
+                    // can proceed with the rest of the reconcliation process as the compute node already
+                    // discovers the need to reconfigure and will eventually update its configuration once
+                    // we update the pageserver mappings. In fact, it is important that we continue with
+                    // reconcliation to make sure we update the pageserver mappings to unblock the compute node.
+                    tracing::info!("neon_local notification hook failed: {e}");
+                    tracing::info!("Notification failed likely due to compute node self-reconfiguration, will retry.");
+                    Ok(())
+                } else {
+                    tracing::error!("neon_local notification hook failed: {e}");
+                    Err(NotifyError::Fatal(StatusCode::INTERNAL_SERVER_ERROR))
+                }
+            }) {
+                // Compute node accepted the notification request. Ok to proceed.
+                Ok(_) => Ok(()),
+                // Compute node rejected our request but it is already self-reconfiguring. Ok to proceed.
+                Err(Ok(_)) => Ok(()),
+                // Fail the reconciliation attempt in all other cases. Recall that this whole code path involving
+                // neon_local is for testing only. In production we always retry failed reconcliations so we
+                // don't have any deadends here.
+                Err(Err(e)) => Err(e),
+            }
        };

        match result {
--- a/storage_controller/src/metrics.rs
+++ b/storage_controller/src/metrics.rs
@@ -151,6 +151,29 @@ pub(crate) struct StorageControllerMetricGroup {
    /// Indicator of completed safekeeper reconciles, broken down by safekeeper.
    pub(crate) storage_controller_safekeeper_reconciles_complete:
        measured::CounterVec<SafekeeperReconcilerLabelGroupSet>,
+
+    /* BEGIN HADRON */
+    /// Hadron `config_watcher` reconciliation runs completed, broken down by success/failure.
+    pub(crate) storage_controller_config_watcher_complete:
+        measured::CounterVec<ConfigWatcherCompleteLabelGroupSet>,
+
+    /// Hadron long waits for node state changes during drain and fill.
+    pub(crate) storage_controller_drain_and_fill_long_waits: measured::Counter,
+
+    /// Set to 1 if we detect any page server pods with pending node pool rotation annotations.
+    /// Requires manual reset after oncall investigation.
+    pub(crate) storage_controller_ps_node_pool_rotation_pending: measured::Gauge,
+
+    /// Hadron storage scrubber status.
+    pub(crate) storage_controller_storage_scrub_status:
+        measured::CounterVec<StorageScrubberLabelGroupSet>,
+
+    /// Desired number of pageservers managed by the storage controller
+    pub(crate) storage_controller_num_pageservers_desired: measured::Gauge,
+
+    /// Desired number of safekeepers managed by the storage controller
+    pub(crate) storage_controller_num_safekeeper_desired: measured::Gauge,
+    /* END HADRON */
 }

 impl StorageControllerMetrics {
@@ -173,6 +196,10 @@ impl Default for StorageControllerMetrics {
            .storage_controller_reconcile_complete
            .init_all_dense();

+        metrics_group
+            .storage_controller_config_watcher_complete
+            .init_all_dense();
+
        Self {
            metrics_group,
            encoder: Mutex::new(measured::text::BufferedTextEncoder::new()),
@@ -262,11 +289,48 @@ pub(crate) struct ReconcileLongRunningLabelGroup<'a> {
    pub(crate) sequence: &'a str,
 }

+#[derive(measured::LabelGroup, Clone)]
+#[label(set = StorageScrubberLabelGroupSet)]
+pub(crate) struct StorageScrubberLabelGroup<'a> {
+    #[label(dynamic_with = lasso::ThreadedRodeo, default)]
+    pub(crate) tenant_id: &'a str,
+    #[label(dynamic_with = lasso::ThreadedRodeo, default)]
+    pub(crate) shard_number: &'a str,
+    #[label(dynamic_with = lasso::ThreadedRodeo, default)]
+    pub(crate) timeline_id: &'a str,
+    pub(crate) outcome: StorageScrubberOutcome,
+}
+
+#[derive(FixedCardinalityLabel, Clone, Copy)]
+pub(crate) enum StorageScrubberOutcome {
+    PSOk,
+    PSWarning,
+    PSError,
+    PSOrphan,
+    SKOk,
+    SKError,
+}
+
+#[derive(measured::LabelGroup)]
+#[label(set = ConfigWatcherCompleteLabelGroupSet)]
+pub(crate) struct ConfigWatcherCompleteLabelGroup {
+    // Reuse the ReconcileOutcome from the SC's reconciliation metrics.
+    pub(crate) status: ReconcileOutcome,
+}
+
 #[derive(FixedCardinalityLabel, Clone, Copy)]
 pub(crate) enum ReconcileOutcome {
+    // Successfully reconciled everything.
    #[label(rename = "ok")]
    Success,
+    // Used by tenant-shard reconciler only. Reconciled pageserver state successfully,
+    // but failed to delivery the compute notificiation. This error is typically transient
+    // but if its occurance keeps increasing, it should be investigated.
+    #[label(rename = "ok_no_notify")]
+    SuccessNoNotify,
+    // We failed to reconcile some state and the reconcilation will be retried.
    Error,
+    // Reconciliation was cancelled.
    Cancel,
 }

--- a/storage_controller/src/node.rs
+++ b/storage_controller/src/node.rs
@@ -51,6 +51,39 @@ pub(crate) struct Node {
    cancel: CancellationToken,
 }

+#[allow(dead_code)]
+const ONE_MILLION: i64 = 1000000;
+
+// Converts a pool ID to a large number that can be used to assign unique IDs to pods in StatefulSets.
+/// For example, if pool_id is 1, then the pods have NodeIds 1000000, 1000001, 1000002, etc.
+/// If pool_id is None, then the pods have NodeIds 0, 1, 2, etc.
+#[allow(dead_code)]
+pub fn transform_pool_id(pool_id: Option<i32>) -> i64 {
+    match pool_id {
+        Some(id) => (id as i64) * ONE_MILLION,
+        None => 0,
+    }
+}
+
+#[allow(dead_code)]
+pub fn get_pool_id_from_node_id(node_id: i64) -> i32 {
+    (node_id / ONE_MILLION) as i32
+}
+
+/// Example pod name: page-server-0-1, safe-keeper-1-0
+#[allow(dead_code)]
+pub fn get_node_id_from_pod_name(pod_name: &str) -> anyhow::Result<NodeId> {
+    let parts: Vec<&str> = pod_name.split('-').collect();
+    if parts.len() != 4 {
+        return Err(anyhow::anyhow!("Invalid pod name: {}", pod_name));
+    }
+    let pool_id = parts[2].parse::<i32>()?;
+    let node_offset = parts[3].parse::<i64>()?;
+    let node_id = transform_pool_id(Some(pool_id)) + node_offset;
+
+    Ok(NodeId(node_id as u64))
+}
+
 /// When updating [`Node::availability`] we use this type to indicate to the caller
 /// whether/how they changed it.
 pub(crate) enum AvailabilityTransition {
@@ -403,3 +436,25 @@ impl std::fmt::Debug for Node {
        write!(f, "{} ({})", self.id, self.listen_http_addr)
    }
 }
+
+#[cfg(test)]
+mod tests {
+    use utils::id::NodeId;
+
+    use crate::node::get_node_id_from_pod_name;
+
+    #[test]
+    fn test_get_node_id_from_pod_name() {
+        let pod_name = "page-server-3-12";
+        let node_id = get_node_id_from_pod_name(pod_name).unwrap();
+        assert_eq!(node_id, NodeId(3000012));
+
+        let pod_name = "safe-keeper-1-0";
+        let node_id = get_node_id_from_pod_name(pod_name).unwrap();
+        assert_eq!(node_id, NodeId(1000000));
+
+        let pod_name = "invalid-pod-name";
+        let result = get_node_id_from_pod_name(pod_name);
+        assert!(result.is_err());
+    }
+}