Add endpoint and storcon cli cmd to set sk scheduling policy (#10400)

Implementing the last missing endpoint of #9981, this adds support to
set the scheduling policy of an individual safekeeper, as specified in
the RFC. However, unlike in the RFC we call the endpoint
`scheduling_policy` not `status`

Closes #9981.

As for why not use the upsert endpoint for this: we want to have the
safekeeper upsert endpoint be used for testing and for deploying new
safekeepers, but not for changes of the scheduling policy. We don't want
to change any of the other fields when marking a safekeeper as
decommissioned for example, so we'd have to first fetch them only to
then specify them again. Of course one can also design an endpoint where
one can omit any field and it doesn't get modified, but it's still not
great for observability to put everything into one big "change something
about this safekeeper" endpoint.
This commit is contained in:
Arpad Müller
2025-01-15 19:15:30 +01:00
committed by GitHub
parent 3d41069dc4
commit efaec6cdf8
7 changed files with 155 additions and 9 deletions

View File

@@ -15,7 +15,7 @@ use metrics::{BuildInfo, NeonMetrics};
use pageserver_api::controller_api::{
MetadataHealthListOutdatedRequest, MetadataHealthListOutdatedResponse,
MetadataHealthListUnhealthyResponse, MetadataHealthUpdateRequest, MetadataHealthUpdateResponse,
ShardsPreferredAzsRequest, TenantCreateRequest,
SafekeeperSchedulingPolicyRequest, ShardsPreferredAzsRequest, TenantCreateRequest,
};
use pageserver_api::models::{
TenantConfigPatchRequest, TenantConfigRequest, TenantLocationConfigRequest,
@@ -1305,6 +1305,35 @@ async fn handle_upsert_safekeeper(mut req: Request<Body>) -> Result<Response<Bod
.unwrap())
}
/// Sets the scheduling policy of the specified safekeeper
async fn handle_safekeeper_scheduling_policy(
mut req: Request<Body>,
) -> Result<Response<Body>, ApiError> {
check_permissions(&req, Scope::Admin)?;
let body = json_request::<SafekeeperSchedulingPolicyRequest>(&mut req).await?;
let id = parse_request_param::<i64>(&req, "id")?;
let req = match maybe_forward(req).await {
ForwardOutcome::Forwarded(res) => {
return res;
}
ForwardOutcome::NotForwarded(req) => req,
};
let state = get_state(&req);
state
.service
.set_safekeeper_scheduling_policy(id, body.scheduling_policy)
.await?;
Ok(Response::builder()
.status(StatusCode::NO_CONTENT)
.body(Body::empty())
.unwrap())
}
/// Common wrapper for request handlers that call into Service and will operate on tenants: they must only
/// be allowed to run if Service has finished its initial reconciliation.
async fn tenant_service_handler<R, H>(
@@ -1873,7 +1902,18 @@ pub fn make_router(
})
.post("/control/v1/safekeeper/:id", |r| {
// id is in the body
named_request_span(r, handle_upsert_safekeeper, RequestName("v1_safekeeper"))
named_request_span(
r,
handle_upsert_safekeeper,
RequestName("v1_safekeeper_post"),
)
})
.post("/control/v1/safekeeper/:id/scheduling_policy", |r| {
named_request_span(
r,
handle_safekeeper_scheduling_policy,
RequestName("v1_safekeeper_status"),
)
})
// Tenant Shard operations
.put("/control/v1/tenant/:tenant_shard_id/migrate", |r| {

View File

@@ -1104,6 +1104,37 @@ impl Persistence {
})
.await
}
pub(crate) async fn set_safekeeper_scheduling_policy(
&self,
id_: i64,
scheduling_policy_: SkSchedulingPolicy,
) -> Result<(), DatabaseError> {
use crate::schema::safekeepers::dsl::*;
self.with_conn(move |conn| -> DatabaseResult<()> {
#[derive(Insertable, AsChangeset)]
#[diesel(table_name = crate::schema::safekeepers)]
struct UpdateSkSchedulingPolicy<'a> {
id: i64,
scheduling_policy: &'a str,
}
let scheduling_policy_ = String::from(scheduling_policy_);
let rows_affected = diesel::update(safekeepers.filter(id.eq(id_)))
.set(scheduling_policy.eq(scheduling_policy_))
.execute(conn)?;
if rows_affected != 1 {
return Err(DatabaseError::Logical(format!(
"unexpected number of rows ({rows_affected})",
)));
}
Ok(())
})
.await
}
}
/// Parts of [`crate::tenant_shard::TenantShard`] that are stored durably

View File

@@ -47,7 +47,7 @@ use pageserver_api::{
AvailabilityZone, MetadataHealthRecord, MetadataHealthUpdateRequest, NodeAvailability,
NodeRegisterRequest, NodeSchedulingPolicy, NodeShard, NodeShardResponse, PlacementPolicy,
SafekeeperDescribeResponse, ShardSchedulingPolicy, ShardsPreferredAzsRequest,
ShardsPreferredAzsResponse, TenantCreateRequest, TenantCreateResponse,
ShardsPreferredAzsResponse, SkSchedulingPolicy, TenantCreateRequest, TenantCreateResponse,
TenantCreateResponseShard, TenantDescribeResponse, TenantDescribeResponseShard,
TenantLocateResponse, TenantPolicyRequest, TenantShardMigrateRequest,
TenantShardMigrateResponse,
@@ -7651,6 +7651,16 @@ impl Service {
self.persistence.safekeeper_upsert(record).await
}
pub(crate) async fn set_safekeeper_scheduling_policy(
&self,
id: i64,
scheduling_policy: SkSchedulingPolicy,
) -> Result<(), DatabaseError> {
self.persistence
.set_safekeeper_scheduling_policy(id, scheduling_policy)
.await
}
pub(crate) async fn update_shards_preferred_azs(
&self,
req: ShardsPreferredAzsRequest,