pageserver: do not bump priority of background task for timeline status requests (#6301)

## Problem

Previously, `GET /v1/tenant/:tenant_id/timeline` and `GET
/v1/tenant/:tenant_id/timeline/:timeline_id`
would bump the priority of the background task which computes the
initial logical size by cancelling
the wait on the synchronisation semaphore. However, the request would
still return an approximate
logical size. It's undesirable to force background work for a status
request.

## Summary of changes
This PR updates the priority used by the timeline status request such
that they don't do priority boosting
by default anymore. An optional query parameter,
`force-await-initial-logical-size`, is added for both
mentioned endpoints. When set to true, it will skip the concurrency
limiting semaphore and wait
for the background task to complete before returning the exact logical
size.

In order to exercise this behaviour in a test I had to add an extra
failpoint. If you think it's too intrusive,
it can be removed.

Also  fixeda small bug where the cancellation of a download is reported as an
opaque download failure upstream. This caused `test_location_conf_churn`
to fail at teardown due to a WARN log line.

Closes https://github.com/neondatabase/neon/issues/6168
This commit is contained in:
Vlad Lazar
2024-01-11 15:55:32 +00:00
committed by GitHub
parent 551f0cc097
commit da7a7c867e
9 changed files with 138 additions and 14 deletions

View File

@@ -323,11 +323,21 @@ impl From<crate::tenant::delete::DeleteTenantError> for ApiError {
async fn build_timeline_info(
timeline: &Arc<Timeline>,
include_non_incremental_logical_size: bool,
force_await_initial_logical_size: bool,
ctx: &RequestContext,
) -> anyhow::Result<TimelineInfo> {
crate::tenant::debug_assert_current_span_has_tenant_and_timeline_id();
let mut info = build_timeline_info_common(timeline, ctx).await?;
if force_await_initial_logical_size {
timeline.clone().await_initial_logical_size().await
}
let mut info = build_timeline_info_common(
timeline,
ctx,
tenant::timeline::GetLogicalSizePriority::Background,
)
.await?;
if include_non_incremental_logical_size {
// XXX we should be using spawn_ondemand_logical_size_calculation here.
// Otherwise, if someone deletes the timeline / detaches the tenant while
@@ -344,6 +354,7 @@ async fn build_timeline_info(
async fn build_timeline_info_common(
timeline: &Arc<Timeline>,
ctx: &RequestContext,
logical_size_task_priority: tenant::timeline::GetLogicalSizePriority,
) -> anyhow::Result<TimelineInfo> {
crate::tenant::debug_assert_current_span_has_tenant_and_timeline_id();
let initdb_lsn = timeline.initdb_lsn;
@@ -366,8 +377,7 @@ async fn build_timeline_info_common(
Lsn(0) => None,
lsn @ Lsn(_) => Some(lsn),
};
let current_logical_size =
timeline.get_current_logical_size(tenant::timeline::GetLogicalSizePriority::User, ctx);
let current_logical_size = timeline.get_current_logical_size(logical_size_task_priority, ctx);
let current_physical_size = Some(timeline.layer_size_sum().await);
let state = timeline.current_state();
let remote_consistent_lsn_projected = timeline
@@ -478,7 +488,7 @@ async fn timeline_create_handler(
.await {
Ok(new_timeline) => {
// Created. Construct a TimelineInfo for it.
let timeline_info = build_timeline_info_common(&new_timeline, &ctx)
let timeline_info = build_timeline_info_common(&new_timeline, &ctx, tenant::timeline::GetLogicalSizePriority::User)
.await
.map_err(ApiError::InternalServerError)?;
json_response(StatusCode::CREATED, timeline_info)
@@ -514,6 +524,8 @@ async fn timeline_list_handler(
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
let include_non_incremental_logical_size: Option<bool> =
parse_query_param(&request, "include-non-incremental-logical-size")?;
let force_await_initial_logical_size: Option<bool> =
parse_query_param(&request, "force-await-initial-logical-size")?;
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
@@ -527,6 +539,7 @@ async fn timeline_list_handler(
let timeline_info = build_timeline_info(
&timeline,
include_non_incremental_logical_size.unwrap_or(false),
force_await_initial_logical_size.unwrap_or(false),
&ctx,
)
.instrument(info_span!("build_timeline_info", timeline_id = %timeline.timeline_id))
@@ -554,6 +567,8 @@ async fn timeline_detail_handler(
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
let include_non_incremental_logical_size: Option<bool> =
parse_query_param(&request, "include-non-incremental-logical-size")?;
let force_await_initial_logical_size: Option<bool> =
parse_query_param(&request, "force-await-initial-logical-size")?;
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
// Logical size calculation needs downloading.
@@ -569,6 +584,7 @@ async fn timeline_detail_handler(
let timeline_info = build_timeline_info(
&timeline,
include_non_incremental_logical_size.unwrap_or(false),
force_await_initial_logical_size.unwrap_or(false),
&ctx,
)
.await