mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-05 20:42:54 +00:00
feat(storcon): timeline detach ancestor passthrough (#8353)
Currently storage controller does not support forwarding timeline detach ancestor requests to pageservers. Add support for forwarding `PUT .../:tenant_id/timelines/:timeline_id/detach_ancestor`. Implement the support mostly as is, because the timeline detach ancestor will be made (mostly) idempotent in future PR. Cc: #6994
This commit is contained in:
@@ -330,6 +330,22 @@ async fn handle_tenant_timeline_delete(
|
||||
.await
|
||||
}
|
||||
|
||||
async fn handle_tenant_timeline_detach_ancestor(
|
||||
service: Arc<Service>,
|
||||
req: Request<Body>,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
|
||||
check_permissions(&req, Scope::PageServerApi)?;
|
||||
|
||||
let timeline_id: TimelineId = parse_request_param(&req, "timeline_id")?;
|
||||
|
||||
let res = service
|
||||
.tenant_timeline_detach_ancestor(tenant_id, timeline_id)
|
||||
.await?;
|
||||
|
||||
json_response(StatusCode::OK, res)
|
||||
}
|
||||
|
||||
async fn handle_tenant_timeline_passthrough(
|
||||
service: Arc<Service>,
|
||||
req: Request<Body>,
|
||||
@@ -1006,6 +1022,16 @@ pub fn make_router(
|
||||
RequestName("v1_tenant_timeline"),
|
||||
)
|
||||
})
|
||||
.put(
|
||||
"/v1/tenant/:tenant_id/timeline/:timeline_id/detach_ancestor",
|
||||
|r| {
|
||||
tenant_service_handler(
|
||||
r,
|
||||
handle_tenant_timeline_detach_ancestor,
|
||||
RequestName("v1_tenant_timeline_detach_ancestor"),
|
||||
)
|
||||
},
|
||||
)
|
||||
// Tenant detail GET passthrough to shard zero:
|
||||
.get("/v1/tenant/:tenant_id", |r| {
|
||||
tenant_service_handler(
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
use pageserver_api::{
|
||||
models::{
|
||||
LocationConfig, LocationConfigListResponse, PageserverUtilization, SecondaryProgress,
|
||||
TenantScanRemoteStorageResponse, TenantShardSplitRequest, TenantShardSplitResponse,
|
||||
TimelineCreateRequest, TimelineInfo, TopTenantShardsRequest, TopTenantShardsResponse,
|
||||
detach_ancestor::AncestorDetached, LocationConfig, LocationConfigListResponse,
|
||||
PageserverUtilization, SecondaryProgress, TenantScanRemoteStorageResponse,
|
||||
TenantShardSplitRequest, TenantShardSplitResponse, TimelineCreateRequest, TimelineInfo,
|
||||
TopTenantShardsRequest, TopTenantShardsResponse,
|
||||
},
|
||||
shard::TenantShardId,
|
||||
};
|
||||
@@ -226,6 +227,21 @@ impl PageserverClient {
|
||||
)
|
||||
}
|
||||
|
||||
pub(crate) async fn timeline_detach_ancestor(
|
||||
&self,
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
) -> Result<AncestorDetached> {
|
||||
measured_request!(
|
||||
"timeline_detach_ancestor",
|
||||
crate::metrics::Method::Put,
|
||||
&self.node_id_label,
|
||||
self.inner
|
||||
.timeline_detach_ancestor(tenant_shard_id, timeline_id)
|
||||
.await
|
||||
)
|
||||
}
|
||||
|
||||
pub(crate) async fn get_utilization(&self) -> Result<PageserverUtilization> {
|
||||
measured_request!(
|
||||
"utilization",
|
||||
|
||||
@@ -117,6 +117,7 @@ enum TenantOperations {
|
||||
TimelineCreate,
|
||||
TimelineDelete,
|
||||
AttachHook,
|
||||
TimelineDetachAncestor,
|
||||
}
|
||||
|
||||
#[derive(Clone, strum_macros::Display)]
|
||||
@@ -2376,18 +2377,18 @@ impl Service {
|
||||
tracing::info!("Doing time travel recovery for shard {tenant_shard_id}",);
|
||||
|
||||
client
|
||||
.tenant_time_travel_remote_storage(
|
||||
tenant_shard_id,
|
||||
×tamp,
|
||||
&done_if_after,
|
||||
)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
ApiError::InternalServerError(anyhow::anyhow!(
|
||||
"Error doing time travel recovery for shard {tenant_shard_id} on node {}: {e}",
|
||||
node
|
||||
))
|
||||
})?;
|
||||
.tenant_time_travel_remote_storage(
|
||||
tenant_shard_id,
|
||||
×tamp,
|
||||
&done_if_after,
|
||||
)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
ApiError::InternalServerError(anyhow::anyhow!(
|
||||
"Error doing time travel recovery for shard {tenant_shard_id} on node {}: {e}",
|
||||
node
|
||||
))
|
||||
})?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
@@ -2757,7 +2758,7 @@ impl Service {
|
||||
// Create timeline on remaining shards with number >0
|
||||
if !targets.is_empty() {
|
||||
// If we had multiple shards, issue requests for the remainder now.
|
||||
let jwt = self.config.jwt_token.clone();
|
||||
let jwt = &self.config.jwt_token;
|
||||
self.tenant_for_shards(targets, |tenant_shard_id: TenantShardId, node: Node| {
|
||||
let create_req = create_req.clone();
|
||||
Box::pin(create_one(tenant_shard_id, node, jwt.clone(), create_req))
|
||||
@@ -2768,6 +2769,115 @@ impl Service {
|
||||
Ok(timeline_info)
|
||||
}
|
||||
|
||||
pub(crate) async fn tenant_timeline_detach_ancestor(
|
||||
&self,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
) -> Result<models::detach_ancestor::AncestorDetached, ApiError> {
|
||||
tracing::info!("Detaching timeline {tenant_id}/{timeline_id}",);
|
||||
|
||||
let _tenant_lock = trace_shared_lock(
|
||||
&self.tenant_op_locks,
|
||||
tenant_id,
|
||||
TenantOperations::TimelineDetachAncestor,
|
||||
)
|
||||
.await;
|
||||
|
||||
self.ensure_attached_wait(tenant_id).await?;
|
||||
|
||||
let targets = {
|
||||
let locked = self.inner.read().unwrap();
|
||||
let mut targets = Vec::new();
|
||||
|
||||
for (tenant_shard_id, shard) in
|
||||
locked.tenants.range(TenantShardId::tenant_range(tenant_id))
|
||||
{
|
||||
let node_id = shard.intent.get_attached().ok_or_else(|| {
|
||||
ApiError::InternalServerError(anyhow::anyhow!("Shard not scheduled"))
|
||||
})?;
|
||||
let node = locked
|
||||
.nodes
|
||||
.get(&node_id)
|
||||
.expect("Pageservers may not be deleted while referenced");
|
||||
|
||||
targets.push((*tenant_shard_id, node.clone()));
|
||||
}
|
||||
targets
|
||||
};
|
||||
|
||||
if targets.is_empty() {
|
||||
return Err(ApiError::NotFound(
|
||||
anyhow::anyhow!("Tenant not found").into(),
|
||||
));
|
||||
}
|
||||
|
||||
async fn detach_one(
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
node: Node,
|
||||
jwt: Option<String>,
|
||||
) -> Result<(ShardNumber, models::detach_ancestor::AncestorDetached), ApiError> {
|
||||
tracing::info!(
|
||||
"Detaching timeline on shard {tenant_shard_id}/{timeline_id}, attached to node {node}",
|
||||
);
|
||||
|
||||
let client = PageserverClient::new(node.get_id(), node.base_url(), jwt.as_deref());
|
||||
client
|
||||
.timeline_detach_ancestor(tenant_shard_id, timeline_id)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
use mgmt_api::Error;
|
||||
|
||||
match e {
|
||||
// no ancestor (ever)
|
||||
Error::ApiError(StatusCode::CONFLICT, msg) => {
|
||||
ApiError::Conflict(format!("{node}: {msg}"))
|
||||
}
|
||||
// too many ancestors
|
||||
Error::ApiError(StatusCode::BAD_REQUEST, msg) => {
|
||||
ApiError::BadRequest(anyhow::anyhow!("{node}: {msg}"))
|
||||
}
|
||||
// rest can be mapped
|
||||
other => passthrough_api_error(&node, other),
|
||||
}
|
||||
})
|
||||
.map(|res| (tenant_shard_id.shard_number, res))
|
||||
}
|
||||
|
||||
// no shard needs to go first/last; the operation should be idempotent
|
||||
// TODO: it would be great to ensure that all shards return the same error
|
||||
let mut results = self
|
||||
.tenant_for_shards(targets, |tenant_shard_id, node| {
|
||||
futures::FutureExt::boxed(detach_one(
|
||||
tenant_shard_id,
|
||||
timeline_id,
|
||||
node,
|
||||
self.config.jwt_token.clone(),
|
||||
))
|
||||
})
|
||||
.await?;
|
||||
|
||||
let any = results.pop().expect("we must have at least one response");
|
||||
|
||||
// FIXME: the ordering is not stable yet on pageserver, should be (ancestor_lsn,
|
||||
// TimelineId)
|
||||
let mismatching = results
|
||||
.iter()
|
||||
.filter(|(_, res)| res != &any.1)
|
||||
.collect::<Vec<_>>();
|
||||
if !mismatching.is_empty() {
|
||||
let matching = results.len() - mismatching.len();
|
||||
tracing::error!(
|
||||
matching,
|
||||
compared_against=?any,
|
||||
?mismatching,
|
||||
"shards returned different results"
|
||||
);
|
||||
}
|
||||
|
||||
Ok(any.1)
|
||||
}
|
||||
|
||||
/// Helper for concurrently calling a pageserver API on a number of shards, such as timeline creation.
|
||||
///
|
||||
/// On success, the returned vector contains exactly the same number of elements as the input `locations`.
|
||||
@@ -2894,8 +3004,8 @@ impl Service {
|
||||
.await
|
||||
.map_err(|e| {
|
||||
ApiError::InternalServerError(anyhow::anyhow!(
|
||||
"Error deleting timeline {timeline_id} on {tenant_shard_id} on node {node}: {e}",
|
||||
))
|
||||
"Error deleting timeline {timeline_id} on {tenant_shard_id} on node {node}: {e}",
|
||||
))
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user