Add safekeeper utilization endpoint (#10429)

Add an endpoint to obtain the utilization of a safekeeper. Future
changes to the storage controller can use this endpoint to find the most
suitable safekeepers for newly created timelines, analogously to how
it's done for pageservers already.

Initially we just want to assign by timeline count, then we can iterate
from there.

Part of https://github.com/neondatabase/neon/issues/9011
This commit is contained in:
Arpad Müller
2025-01-17 22:43:52 +01:00
committed by GitHub
parent 6975228a76
commit b0f34099f9
4 changed files with 33 additions and 0 deletions

View File

@@ -277,3 +277,8 @@ pub struct TimelineTermBumpResponse {
pub previous_term: u64,
pub current_term: u64,
}
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct SafekeeperUtilization {
pub timeline_count: u64,
}

View File

@@ -102,6 +102,11 @@ impl Client {
self.get(&uri).await
}
pub async fn utilization(&self) -> Result<reqwest::Response> {
let uri = format!("{}/v1/utilization/", self.mgmt_api_endpoint);
self.get(&uri).await
}
async fn get<U: IntoUrl>(&self, uri: U) -> Result<reqwest::Response> {
self.request(Method::GET, uri, ()).await
}

View File

@@ -127,6 +127,13 @@ async fn timeline_create_handler(mut request: Request<Body>) -> Result<Response<
json_response(StatusCode::OK, ())
}
async fn utilization_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
check_permission(&request, None)?;
let global_timelines = get_global_timelines(&request);
let utilization = global_timelines.get_timeline_counts();
json_response(StatusCode::OK, utilization)
}
/// List all (not deleted) timelines.
/// Note: it is possible to do the same with debug_dump.
async fn timeline_list_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
@@ -620,6 +627,7 @@ pub fn make_router(
failpoints_handler(r, cancel).await
})
})
.get("/v1/uzilization", |r| request_span(r, utilization_handler))
.delete("/v1/tenant/:tenant_id", |r| {
request_span(r, tenant_delete_handler)
})

View File

@@ -13,6 +13,7 @@ use anyhow::{bail, Context, Result};
use camino::Utf8PathBuf;
use camino_tempfile::Utf8TempDir;
use safekeeper_api::membership::Configuration;
use safekeeper_api::models::SafekeeperUtilization;
use safekeeper_api::ServerInfo;
use serde::Serialize;
use std::collections::HashMap;
@@ -416,6 +417,20 @@ impl GlobalTimelines {
.collect()
}
/// Returns statistics about timeline counts
pub fn get_timeline_counts(&self) -> SafekeeperUtilization {
let global_lock = self.state.lock().unwrap();
let timeline_count = global_lock
.timelines
.values()
.filter(|t| match t {
GlobalMapTimeline::CreationInProgress => false,
GlobalMapTimeline::Timeline(t) => !t.is_cancelled(),
})
.count() as u64;
SafekeeperUtilization { timeline_count }
}
/// Returns all timelines belonging to a given tenant. Used for deleting all timelines of a tenant,
/// and that's why it can return cancelled timelines, to retry deleting them.
fn get_all_for_tenant(&self, tenant_id: TenantId) -> Vec<Arc<Timeline>> {