mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-18 05:30:37 +00:00
storcon: track preferred AZ for each tenant shard (#8937)
## Problem We want to do AZ aware scheduling, but don't have enough metadata. ## Summary of changes Introduce a `preferred_az_id` concept for each managed tenant shard. In a future PR, the scheduler will use this as a soft preference. The idea is to try and keep the shard attachments within the same AZ. Under the assumption that the compute was placed in the correct AZ, this reduces the chances of cross AZ trafic from between compute and PS. In terms of code changes we: 1. Add a new nullable `preferred_az_id` column to the `tenant_shards` table. Also include an in-memory counterpart. 2. Populate the preferred az on tenant creation and shard splits. 3. Add an endpoint which allows to bulk-set preferred AZs. (3) gives us the migration path. I'll write a script which queries the cplane db in the region and sets the preferred az of all shards with an active compute to the AZ of said compute. For shards without an active compute, I'll use the AZ of the currently attached pageserver since this is what cplane uses now to schedule computes.
This commit is contained in:
@@ -14,7 +14,7 @@ use metrics::{BuildInfo, NeonMetrics};
|
||||
use pageserver_api::controller_api::{
|
||||
MetadataHealthListOutdatedRequest, MetadataHealthListOutdatedResponse,
|
||||
MetadataHealthListUnhealthyResponse, MetadataHealthUpdateRequest, MetadataHealthUpdateResponse,
|
||||
TenantCreateRequest,
|
||||
ShardsPreferredAzsRequest, TenantCreateRequest,
|
||||
};
|
||||
use pageserver_api::models::{
|
||||
TenantConfigRequest, TenantLocationConfigRequest, TenantShardSplitRequest,
|
||||
@@ -688,6 +688,18 @@ async fn handle_tenant_update_policy(mut req: Request<Body>) -> Result<Response<
|
||||
)
|
||||
}
|
||||
|
||||
async fn handle_update_preferred_azs(mut req: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
check_permissions(&req, Scope::Admin)?;
|
||||
|
||||
let azs_req = json_request::<ShardsPreferredAzsRequest>(&mut req).await?;
|
||||
let state = get_state(&req);
|
||||
|
||||
json_response(
|
||||
StatusCode::OK,
|
||||
state.service.update_shards_preferred_azs(azs_req).await?,
|
||||
)
|
||||
}
|
||||
|
||||
async fn handle_step_down(req: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
check_permissions(&req, Scope::Admin)?;
|
||||
|
||||
@@ -1174,6 +1186,13 @@ pub fn make_router(
|
||||
RequestName("control_v1_tenant_policy"),
|
||||
)
|
||||
})
|
||||
.put("/control/v1/preferred_azs", |r| {
|
||||
named_request_span(
|
||||
r,
|
||||
handle_update_preferred_azs,
|
||||
RequestName("control_v1_preferred_azs"),
|
||||
)
|
||||
})
|
||||
.put("/control/v1/step_down", |r| {
|
||||
named_request_span(r, handle_step_down, RequestName("control_v1_step_down"))
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user