storage controller: API + CLI for migrating secondary locations (#10284)

## Problem

Currently, if we want to move a secondary there isn't a neat way to do
that: we just have migration API for the attached location, and it is
only clean to use that if you've manually created a secondary via
pageserver API in the place you're going to move it to.

Secondary migration API enables:
- Moving the secondary somewhere because we would like to later move the
attached location there.
- Move the secondary location because we just want to reclaim some disk
space from its current location.

## Summary of changes

- Add `/migrate_secondary` API
- Add `tenant-shard-migrate-secondary` CLI
- Add tests for above
This commit is contained in:
John Spray
2025-01-13 14:52:43 +00:00
committed by GitHub
parent ceacc29609
commit ef8bfacd6b
7 changed files with 210 additions and 32 deletions

View File

@@ -124,7 +124,10 @@ impl ComputeHookTenant {
if let Some(shard_idx) = shard_idx {
sharded.shards.remove(shard_idx);
} else {
tracing::warn!("Shard not found while handling detach")
// This is a valid but niche case, where the tenant was previously attached
// as a Secondary location and then detached, so has no previously notified
// state.
tracing::info!("Shard not found while handling detach")
}
}
ComputeHookTenant::Unsharded(_) => {
@@ -761,7 +764,10 @@ impl ComputeHook {
let mut state_locked = self.state.lock().unwrap();
match state_locked.entry(tenant_shard_id.tenant_id) {
Entry::Vacant(_) => {
tracing::warn!("Compute hook tenant not found for detach");
// This is a valid but niche case, where the tenant was previously attached
// as a Secondary location and then detached, so has no previously notified
// state.
tracing::info!("Compute hook tenant not found for detach");
}
Entry::Occupied(mut e) => {
let sharded = e.get().is_sharded();

View File

@@ -690,7 +690,8 @@ async fn handle_node_list(req: Request<Body>) -> Result<Response<Body>, ApiError
};
let state = get_state(&req);
let nodes = state.service.node_list().await?;
let mut nodes = state.service.node_list().await?;
nodes.sort_by_key(|n| n.get_id());
let api_nodes = nodes.into_iter().map(|n| n.describe()).collect::<Vec<_>>();
json_response(StatusCode::OK, api_nodes)
@@ -1005,6 +1006,29 @@ async fn handle_tenant_shard_migrate(
)
}
async fn handle_tenant_shard_migrate_secondary(
service: Arc<Service>,
req: Request<Body>,
) -> Result<Response<Body>, ApiError> {
check_permissions(&req, Scope::Admin)?;
let mut req = match maybe_forward(req).await {
ForwardOutcome::Forwarded(res) => {
return res;
}
ForwardOutcome::NotForwarded(req) => req,
};
let tenant_shard_id: TenantShardId = parse_request_param(&req, "tenant_shard_id")?;
let migrate_req = json_request::<TenantShardMigrateRequest>(&mut req).await?;
json_response(
StatusCode::OK,
service
.tenant_shard_migrate_secondary(tenant_shard_id, migrate_req)
.await?,
)
}
async fn handle_tenant_shard_cancel_reconcile(
service: Arc<Service>,
req: Request<Body>,
@@ -1855,6 +1879,16 @@ pub fn make_router(
RequestName("control_v1_tenant_migrate"),
)
})
.put(
"/control/v1/tenant/:tenant_shard_id/migrate_secondary",
|r| {
tenant_service_handler(
r,
handle_tenant_shard_migrate_secondary,
RequestName("control_v1_tenant_migrate_secondary"),
)
},
)
.put(
"/control/v1/tenant/:tenant_shard_id/cancel_reconcile",
|r| {

View File

@@ -5055,6 +5055,69 @@ impl Service {
Ok(TenantShardMigrateResponse {})
}
pub(crate) async fn tenant_shard_migrate_secondary(
&self,
tenant_shard_id: TenantShardId,
migrate_req: TenantShardMigrateRequest,
) -> Result<TenantShardMigrateResponse, ApiError> {
let waiter = {
let mut locked = self.inner.write().unwrap();
let (nodes, tenants, scheduler) = locked.parts_mut();
let Some(node) = nodes.get(&migrate_req.node_id) else {
return Err(ApiError::BadRequest(anyhow::anyhow!(
"Node {} not found",
migrate_req.node_id
)));
};
if !node.is_available() {
// Warn but proceed: the caller may intend to manually adjust the placement of
// a shard even if the node is down, e.g. if intervening during an incident.
tracing::warn!("Migrating to unavailable node {node}");
}
let Some(shard) = tenants.get_mut(&tenant_shard_id) else {
return Err(ApiError::NotFound(
anyhow::anyhow!("Tenant shard not found").into(),
));
};
if shard.intent.get_secondary().len() == 1
&& shard.intent.get_secondary()[0] == migrate_req.node_id
{
tracing::info!(
"Migrating secondary to {node}: intent is unchanged {:?}",
shard.intent
);
} else if shard.intent.get_attached() == &Some(migrate_req.node_id) {
tracing::info!("Migrating secondary to {node}: already attached where we were asked to create a secondary");
} else {
let old_secondaries = shard.intent.get_secondary().clone();
for secondary in old_secondaries {
shard.intent.remove_secondary(scheduler, secondary);
}
shard.intent.push_secondary(scheduler, migrate_req.node_id);
shard.sequence = shard.sequence.next();
tracing::info!(
"Migrating secondary to {node}: new intent {:?}",
shard.intent
);
}
self.maybe_reconcile_shard(shard, nodes)
};
if let Some(waiter) = waiter {
waiter.wait_timeout(RECONCILE_TIMEOUT).await?;
} else {
tracing::info!("Migration is a no-op");
}
Ok(TenantShardMigrateResponse {})
}
/// 'cancel' in this context means cancel any ongoing reconcile
pub(crate) async fn tenant_shard_cancel_reconcile(
&self,