feat(storcon): forward gc blocking and unblocking (#8956)

Currently using gc blocking and unblocking with storage controller
managed pageservers is painful. Implement the API on storage controller.

Fixes: #8893
This commit is contained in:
Joonas Koivunen
2024-09-07 00:42:55 +03:00
committed by GitHub
parent fa3fc73c1b
commit 3dbd34aa78
6 changed files with 220 additions and 18 deletions

View File

@@ -21,7 +21,7 @@ use pageserver_api::models::{
TenantTimeTravelRequest, TimelineArchivalConfigRequest, TimelineCreateRequest,
};
use pageserver_api::shard::TenantShardId;
use pageserver_client::mgmt_api;
use pageserver_client::{mgmt_api, BlockUnblock};
use std::sync::Arc;
use std::time::{Duration, Instant};
use tokio_util::sync::CancellationToken;
@@ -369,6 +369,23 @@ async fn handle_tenant_timeline_detach_ancestor(
json_response(StatusCode::OK, res)
}
async fn handle_tenant_timeline_block_unblock_gc(
service: Arc<Service>,
req: Request<Body>,
dir: BlockUnblock,
) -> Result<Response<Body>, ApiError> {
let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
check_permissions(&req, Scope::PageServerApi)?;
let timeline_id: TimelineId = parse_request_param(&req, "timeline_id")?;
service
.tenant_timeline_block_unblock_gc(tenant_id, timeline_id, dir)
.await?;
json_response(StatusCode::OK, ())
}
async fn handle_tenant_timeline_passthrough(
service: Arc<Service>,
req: Request<Body>,
@@ -1292,6 +1309,26 @@ pub fn make_router(
)
},
)
.post(
"/v1/tenant/:tenant_id/timeline/:timeline_id/block_gc",
|r| {
tenant_service_handler(
r,
|s, r| handle_tenant_timeline_block_unblock_gc(s, r, BlockUnblock::Block),
RequestName("v1_tenant_timeline_block_unblock_gc"),
)
},
)
.post(
"/v1/tenant/:tenant_id/timeline/:timeline_id/unblock_gc",
|r| {
tenant_service_handler(
r,
|s, r| handle_tenant_timeline_block_unblock_gc(s, r, BlockUnblock::Unblock),
RequestName("v1_tenant_timeline_block_unblock_gc"),
)
},
)
// Tenant detail GET passthrough to shard zero:
.get("/v1/tenant/:tenant_id", |r| {
tenant_service_handler(

View File

@@ -7,7 +7,10 @@ use pageserver_api::{
},
shard::TenantShardId,
};
use pageserver_client::mgmt_api::{Client, Result};
use pageserver_client::{
mgmt_api::{Client, Result},
BlockUnblock,
};
use reqwest::StatusCode;
use utils::id::{NodeId, TenantId, TimelineId};
@@ -258,6 +261,24 @@ impl PageserverClient {
)
}
pub(crate) async fn timeline_block_unblock_gc(
&self,
tenant_shard_id: TenantShardId,
timeline_id: TimelineId,
dir: BlockUnblock,
) -> Result<()> {
// measuring these makes no sense because we synchronize with the gc loop and remote
// storage on block_gc so there should be huge outliers
measured_request!(
"timeline_block_unblock_gc",
crate::metrics::Method::Post,
&self.node_id_label,
self.inner
.timeline_block_unblock_gc(tenant_shard_id, timeline_id, dir)
.await
)
}
pub(crate) async fn get_utilization(&self) -> Result<PageserverUtilization> {
measured_request!(
"utilization",

View File

@@ -69,7 +69,7 @@ use pageserver_api::{
ValidateResponse, ValidateResponseTenant,
},
};
use pageserver_client::mgmt_api;
use pageserver_client::{mgmt_api, BlockUnblock};
use tokio::sync::mpsc::error::TrySendError;
use tokio_util::sync::CancellationToken;
use utils::{
@@ -142,6 +142,7 @@ enum TenantOperations {
AttachHook,
TimelineArchivalConfig,
TimelineDetachAncestor,
TimelineGcBlockUnblock,
}
#[derive(Clone, strum_macros::Display)]
@@ -3197,6 +3198,57 @@ impl Service {
}).await?
}
pub(crate) async fn tenant_timeline_block_unblock_gc(
&self,
tenant_id: TenantId,
timeline_id: TimelineId,
dir: BlockUnblock,
) -> Result<(), ApiError> {
let _tenant_lock = trace_shared_lock(
&self.tenant_op_locks,
tenant_id,
TenantOperations::TimelineGcBlockUnblock,
)
.await;
self.tenant_remote_mutation(tenant_id, move |targets| async move {
if targets.is_empty() {
return Err(ApiError::NotFound(
anyhow::anyhow!("Tenant not found").into(),
));
}
async fn do_one(
tenant_shard_id: TenantShardId,
timeline_id: TimelineId,
node: Node,
jwt: Option<String>,
dir: BlockUnblock,
) -> Result<(), ApiError> {
let client = PageserverClient::new(node.get_id(), node.base_url(), jwt.as_deref());
client
.timeline_block_unblock_gc(tenant_shard_id, timeline_id, dir)
.await
.map_err(|e| passthrough_api_error(&node, e))
}
// no shard needs to go first/last; the operation should be idempotent
self.tenant_for_shards(targets, |tenant_shard_id, node| {
futures::FutureExt::boxed(do_one(
tenant_shard_id,
timeline_id,
node,
self.config.jwt_token.clone(),
dir,
))
})
.await
})
.await??;
Ok(())
}
/// Helper for concurrently calling a pageserver API on a number of shards, such as timeline creation.
///
/// On success, the returned vector contains exactly the same number of elements as the input `locations`.