From b0f34099f90cfa08223ed653a7c7460943f34f0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arpad=20M=C3=BCller?= Date: Fri, 17 Jan 2025 22:43:52 +0100 Subject: [PATCH] Add safekeeper utilization endpoint (#10429) Add an endpoint to obtain the utilization of a safekeeper. Future changes to the storage controller can use this endpoint to find the most suitable safekeepers for newly created timelines, analogously to how it's done for pageservers already. Initially we just want to assign by timeline count, then we can iterate from there. Part of https://github.com/neondatabase/neon/issues/9011 --- libs/safekeeper_api/src/models.rs | 5 +++++ safekeeper/client/src/mgmt_api.rs | 5 +++++ safekeeper/src/http/routes.rs | 8 ++++++++ safekeeper/src/timelines_global_map.rs | 15 +++++++++++++++ 4 files changed, 33 insertions(+) diff --git a/libs/safekeeper_api/src/models.rs b/libs/safekeeper_api/src/models.rs index b5fa903820..30418b0efd 100644 --- a/libs/safekeeper_api/src/models.rs +++ b/libs/safekeeper_api/src/models.rs @@ -277,3 +277,8 @@ pub struct TimelineTermBumpResponse { pub previous_term: u64, pub current_term: u64, } + +#[derive(Debug, Clone, Deserialize, Serialize)] +pub struct SafekeeperUtilization { + pub timeline_count: u64, +} diff --git a/safekeeper/client/src/mgmt_api.rs b/safekeeper/client/src/mgmt_api.rs index f78745043a..5727f32509 100644 --- a/safekeeper/client/src/mgmt_api.rs +++ b/safekeeper/client/src/mgmt_api.rs @@ -102,6 +102,11 @@ impl Client { self.get(&uri).await } + pub async fn utilization(&self) -> Result { + let uri = format!("{}/v1/utilization/", self.mgmt_api_endpoint); + self.get(&uri).await + } + async fn get(&self, uri: U) -> Result { self.request(Method::GET, uri, ()).await } diff --git a/safekeeper/src/http/routes.rs b/safekeeper/src/http/routes.rs index 4b9fb9eb67..7ec08ecf9a 100644 --- a/safekeeper/src/http/routes.rs +++ b/safekeeper/src/http/routes.rs @@ -127,6 +127,13 @@ async fn timeline_create_handler(mut request: Request) -> Result) -> Result, ApiError> { + check_permission(&request, None)?; + let global_timelines = get_global_timelines(&request); + let utilization = global_timelines.get_timeline_counts(); + json_response(StatusCode::OK, utilization) +} + /// List all (not deleted) timelines. /// Note: it is possible to do the same with debug_dump. async fn timeline_list_handler(request: Request) -> Result, ApiError> { @@ -620,6 +627,7 @@ pub fn make_router( failpoints_handler(r, cancel).await }) }) + .get("/v1/uzilization", |r| request_span(r, utilization_handler)) .delete("/v1/tenant/:tenant_id", |r| { request_span(r, tenant_delete_handler) }) diff --git a/safekeeper/src/timelines_global_map.rs b/safekeeper/src/timelines_global_map.rs index a701534f65..01c6aff6c3 100644 --- a/safekeeper/src/timelines_global_map.rs +++ b/safekeeper/src/timelines_global_map.rs @@ -13,6 +13,7 @@ use anyhow::{bail, Context, Result}; use camino::Utf8PathBuf; use camino_tempfile::Utf8TempDir; use safekeeper_api::membership::Configuration; +use safekeeper_api::models::SafekeeperUtilization; use safekeeper_api::ServerInfo; use serde::Serialize; use std::collections::HashMap; @@ -416,6 +417,20 @@ impl GlobalTimelines { .collect() } + /// Returns statistics about timeline counts + pub fn get_timeline_counts(&self) -> SafekeeperUtilization { + let global_lock = self.state.lock().unwrap(); + let timeline_count = global_lock + .timelines + .values() + .filter(|t| match t { + GlobalMapTimeline::CreationInProgress => false, + GlobalMapTimeline::Timeline(t) => !t.is_cancelled(), + }) + .count() as u64; + SafekeeperUtilization { timeline_count } + } + /// Returns all timelines belonging to a given tenant. Used for deleting all timelines of a tenant, /// and that's why it can return cancelled timelines, to retry deleting them. fn get_all_for_tenant(&self, tenant_id: TenantId) -> Vec> {