From 5775662276cbeb1b7cdcfcc0dca1ad59880825f1 Mon Sep 17 00:00:00 2001 From: John Spray Date: Mon, 29 Jul 2024 15:05:30 +0100 Subject: [PATCH] pageserver: fix return code from secondary_download_handler (#8508) ## Problem The secondary download HTTP API is meant to return 200 if the download is complete, and 202 if it is still in progress. In #8198 the download implementation was changed to drop out with success early if it over-runs a time budget, which resulted in 200 responses for incomplete downloads. This breaks storcon_cli's "tenant-warmup" command, which uses the OK status to indicate download complete. ## Summary of changes - Only return 200 if we get an Ok() _and_ the progress stats indicate the download is complete. --- pageserver/src/http/routes.rs | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index 7935aeb5e9..9222123ad3 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -2129,14 +2129,24 @@ async fn secondary_download_handler( let timeout = wait.unwrap_or(Duration::MAX); - let status = match tokio::time::timeout( + let result = tokio::time::timeout( timeout, state.secondary_controller.download_tenant(tenant_shard_id), ) - .await - { - // Download job ran to completion. - Ok(Ok(())) => StatusCode::OK, + .await; + + let progress = secondary_tenant.progress.lock().unwrap().clone(); + + let status = match result { + Ok(Ok(())) => { + if progress.layers_downloaded >= progress.layers_total { + // Download job ran to completion + StatusCode::OK + } else { + // Download dropped out without errors because it ran out of time budget + StatusCode::ACCEPTED + } + } // Edge case: downloads aren't usually fallible: things like a missing heatmap are considered // okay. We could get an error here in the unlikely edge case that the tenant // was detached between our check above and executing the download job. @@ -2146,8 +2156,6 @@ async fn secondary_download_handler( Err(_) => StatusCode::ACCEPTED, }; - let progress = secondary_tenant.progress.lock().unwrap().clone(); - json_response(status, progress) }