switch to per-tenant attach/detach

download operations of all timelines for one tenant are now grouped
together so when attach is invoked pageserver downloads all of them
and registers them in a single apply_sync_status_update call so
branches can be used safely with attach/detach
This commit is contained in:
Dmitry Rodionov
2022-06-15 17:59:24 +03:00
committed by Dmitry Rodionov
parent ae116ff0a9
commit 4c54e4b37d
19 changed files with 835 additions and 333 deletions

View File

@@ -170,7 +170,6 @@ paths:
application/json:
schema:
$ref: "#/components/schemas/Error"
/v1/tenant/{tenant_id}/timeline/{timeline_id}/attach:
parameters:
- name: tenant_id
@@ -186,12 +185,27 @@ paths:
type: string
format: hex
post:
description: Attach remote timeline
description: Deprecated
responses:
"200":
description: Timeline attaching scheduled
"410":
description: GONE
/v1/tenant/{tenant_id}/attach:
parameters:
- name: tenant_id
in: path
required: true
schema:
type: string
format: hex
post:
description: Deprecated
responses:
"202":
description: Tenant attaching scheduled
"400":
description: Error when no tenant id found in path or no timeline id
description: Error when no tenant id found in path parameters
content:
application/json:
schema:
@@ -215,7 +229,7 @@ paths:
schema:
$ref: "#/components/schemas/NotFoundError"
"409":
description: Timeline download is already in progress
description: Tenant download is already in progress
content:
application/json:
schema:
@@ -227,7 +241,6 @@ paths:
schema:
$ref: "#/components/schemas/Error"
/v1/tenant/{tenant_id}/timeline/{timeline_id}/detach:
parameters:
- name: tenant_id
@@ -243,12 +256,26 @@ paths:
type: string
format: hex
post:
description: Detach local timeline
description: Deprecated
responses:
"410":
description: GONE
/v1/tenant/{tenant_id}/detach:
parameters:
- name: tenant_id
in: path
required: true
schema:
type: string
format: hex
post:
description: Detach local tenant
responses:
"200":
description: Timeline detached
description: Tenant detached
"400":
description: Error when no tenant id found in path or no timeline id
description: Error when no tenant id found in path parameters
content:
application/json:
schema:

View File

@@ -209,9 +209,9 @@ async fn timeline_detail_handler(request: Request<Body>) -> Result<Response<Body
.await;
if local_timeline_info.is_none() && remote_timeline_info.is_none() {
return Err(ApiError::NotFound(
"Timeline is not found neither locally nor remotely".to_string(),
));
return Err(ApiError::NotFound(format!(
"Timeline {tenant_id}/{timeline_id} is not found neither locally nor remotely"
)));
}
let timeline_info = TimelineInfo {
@@ -241,119 +241,130 @@ async fn wal_receiver_get_handler(request: Request<Body>) -> Result<Response<Bod
json_response(StatusCode::OK, &wal_receiver_entry)
}
async fn timeline_attach_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
async fn timeline_attach_handler(_: Request<Body>) -> Result<Response<Body>, ApiError> {
json_response(StatusCode::GONE, ())
}
// TODO makes sense to provide tenant config right away the same way as it handled in tenant_create
async fn tenant_attach_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
let tenant_id: ZTenantId = parse_request_param(&request, "tenant_id")?;
check_permission(&request, Some(tenant_id))?;
let timeline_id: ZTimelineId = parse_request_param(&request, "timeline_id")?;
info!(
"Handling timeline {} attach for tenant: {}",
timeline_id, tenant_id,
);
info!("Handling tenant attach {}", tenant_id,);
tokio::task::spawn_blocking(move || {
if tenant_mgr::get_local_timeline_with_load(tenant_id, timeline_id).is_ok() {
// TODO: maybe answer with 309 Not Modified here?
anyhow::bail!("Timeline is already present locally")
if tenant_mgr::get_tenant_state(tenant_id).is_some() {
anyhow::bail!("Tenant is already present locally")
};
Ok(())
})
.await
.map_err(ApiError::from_err)??;
let sync_id = ZTenantTimelineId {
tenant_id,
timeline_id,
};
let state = get_state(&request);
let remote_index = &state.remote_index;
let mut index_accessor = remote_index.write().await;
if let Some(remote_timeline) = index_accessor.timeline_entry_mut(&sync_id) {
if remote_timeline.awaits_download {
if let Some(tenant_entry) = index_accessor.tenant_entry_mut(&tenant_id) {
if tenant_entry.has_in_progress_downloads() {
return Err(ApiError::Conflict(
"Timeline download is already in progress".to_string(),
"Tenant download is already in progress".to_string(),
));
}
remote_timeline.awaits_download = true;
storage_sync::schedule_layer_download(tenant_id, timeline_id);
return json_response(StatusCode::ACCEPTED, ());
} else {
// no timeline in the index, release the lock to make the potentially lengthy download opetation
drop(index_accessor);
}
let new_timeline = match try_download_index_part_data(state, sync_id).await {
Ok(Some(mut new_timeline)) => {
tokio::fs::create_dir_all(state.conf.timeline_path(&timeline_id, &tenant_id))
.await
.context("Failed to create new timeline directory")?;
new_timeline.awaits_download = true;
new_timeline
for (timeline_id, remote_timeline) in tenant_entry.iter_mut() {
storage_sync::schedule_layer_download(tenant_id, *timeline_id);
remote_timeline.awaits_download = true;
}
Ok(None) => return Err(ApiError::NotFound("Unknown remote timeline".to_string())),
return json_response(StatusCode::ACCEPTED, ());
}
// no tenant in the index, release the lock to make the potentially lengthy download opetation
drop(index_accessor);
// download index parts for every tenant timeline
let remote_timelines = match try_download_tenant_index(state, tenant_id).await {
Ok(Some(remote_timelines)) => remote_timelines,
Ok(None) => return Err(ApiError::NotFound("Unknown remote tenant".to_string())),
Err(e) => {
error!("Failed to retrieve remote timeline data: {:?}", e);
error!("Failed to retrieve remote tenant data: {:?}", e);
return Err(ApiError::NotFound(
"Failed to retrieve remote timeline".to_string(),
"Failed to retrieve remote tenant".to_string(),
));
}
};
// recheck that download is not in progress because
// we've released the lock to avoid holding it during the download
let mut index_accessor = remote_index.write().await;
match index_accessor.timeline_entry_mut(&sync_id) {
Some(remote_timeline) => {
if remote_timeline.awaits_download {
let tenant_entry = match index_accessor.tenant_entry_mut(&tenant_id) {
Some(tenant_entry) => {
if tenant_entry.has_in_progress_downloads() {
return Err(ApiError::Conflict(
"Timeline download is already in progress".to_string(),
"Tenant download is already in progress".to_string(),
));
}
remote_timeline.awaits_download = true;
tenant_entry
}
None => index_accessor.add_timeline_entry(sync_id, new_timeline),
None => index_accessor.add_tenant_entry(tenant_id),
};
// populate remote index with the data from index part and create directories on the local filesystem
for (timeline_id, mut remote_timeline) in remote_timelines {
tokio::fs::create_dir_all(state.conf.timeline_path(&timeline_id, &tenant_id))
.await
.context("Failed to create new timeline directory")?;
remote_timeline.awaits_download = true;
tenant_entry.insert(timeline_id, remote_timeline);
// schedule actual download
storage_sync::schedule_layer_download(tenant_id, timeline_id);
}
storage_sync::schedule_layer_download(tenant_id, timeline_id);
json_response(StatusCode::ACCEPTED, ())
}
async fn try_download_index_part_data(
async fn try_download_tenant_index(
state: &State,
sync_id: ZTenantTimelineId,
) -> anyhow::Result<Option<RemoteTimeline>> {
let index_part = match state.remote_storage.as_ref() {
tenant_id: ZTenantId,
) -> anyhow::Result<Option<Vec<(ZTimelineId, RemoteTimeline)>>> {
let index_parts = match state.remote_storage.as_ref() {
Some(GenericRemoteStorage::Local(local_storage)) => {
storage_sync::download_index_part(state.conf, local_storage, sync_id).await
storage_sync::download_tenant_index_parts(state.conf, local_storage, tenant_id).await
}
// FIXME here s3 storage contains its own limits, that are separate from sync storage thread ones
// because it is a different instance. We can move this limit to some global static
// or use one instance everywhere.
Some(GenericRemoteStorage::S3(s3_storage)) => {
storage_sync::download_index_part(state.conf, s3_storage, sync_id).await
storage_sync::download_tenant_index_parts(state.conf, s3_storage, tenant_id).await
}
None => return Ok(None),
}
.with_context(|| format!("Failed to download index part for timeline {sync_id}"))?;
.with_context(|| format!("Failed to download index parts for tenant {tenant_id}"))?;
let timeline_path = state
.conf
.timeline_path(&sync_id.timeline_id, &sync_id.tenant_id);
RemoteTimeline::from_index_part(&timeline_path, index_part)
.map(Some)
.with_context(|| {
format!("Failed to convert index part into remote timeline for timeline {sync_id}")
})
let mut remote_timelines = Vec::with_capacity(index_parts.len());
for (timeline_id, index_part) in index_parts {
let timeline_path = state.conf.timeline_path(&timeline_id, &tenant_id);
let remote_timeline = RemoteTimeline::from_index_part(&timeline_path, index_part)
.with_context(|| {
format!("Failed to convert index part into remote timeline for timeline {tenant_id}/{timeline_id}")
})?;
remote_timelines.push((timeline_id, remote_timeline));
}
Ok(Some(remote_timelines))
}
async fn timeline_detach_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
async fn timeline_detach_handler(_: Request<Body>) -> Result<Response<Body>, ApiError> {
json_response(StatusCode::GONE, ())
}
async fn tenant_detach_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
let tenant_id: ZTenantId = parse_request_param(&request, "tenant_id")?;
check_permission(&request, Some(tenant_id))?;
let timeline_id: ZTimelineId = parse_request_param(&request, "timeline_id")?;
tokio::task::spawn_blocking(move || {
let _enter =
info_span!("timeline_detach_handler", tenant = %tenant_id, timeline = %timeline_id)
.entered();
let _enter = info_span!("tenant_detach_handler", tenant = %tenant_id).entered();
let state = get_state(&request);
tenant_mgr::detach_timeline(state.conf, tenant_id, timeline_id)
tenant_mgr::detach_tenant(state.conf, tenant_id)
})
.await
.map_err(ApiError::from_err)??;
@@ -523,6 +534,8 @@ pub fn make_router(
.put("/v1/tenant/config", tenant_config_handler)
.get("/v1/tenant/:tenant_id/timeline", timeline_list_handler)
.post("/v1/tenant/:tenant_id/timeline", timeline_create_handler)
.post("/v1/tenant/:tenant_id/attach", tenant_attach_handler)
.post("/v1/tenant/:tenant_id/detach", tenant_detach_handler)
.get(
"/v1/tenant/:tenant_id/timeline/:timeline_id",
timeline_detail_handler,