mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-30 03:20:36 +00:00
pageserver: return proper status code for heatmap_upload errors (#9991)
## Problem During deploys, we see a lot of 500 errors due to heapmap uploads for inactive tenants. These should be 503s instead. Resolves #9574. ## Summary of changes Make the secondary tenant scheduler use `ApiError` rather than `anyhow::Error`, to propagate the tenant error and convert it to an appropriate status code.
This commit is contained in:
@@ -279,7 +279,10 @@ impl From<TenantStateError> for ApiError {
|
||||
impl From<GetTenantError> for ApiError {
|
||||
fn from(tse: GetTenantError) -> ApiError {
|
||||
match tse {
|
||||
GetTenantError::NotFound(tid) => ApiError::NotFound(anyhow!("tenant {}", tid).into()),
|
||||
GetTenantError::NotFound(tid) => ApiError::NotFound(anyhow!("tenant {tid}").into()),
|
||||
GetTenantError::ShardNotFound(tid) => {
|
||||
ApiError::NotFound(anyhow!("tenant {tid}").into())
|
||||
}
|
||||
GetTenantError::NotActive(_) => {
|
||||
// Why is this not `ApiError::NotFound`?
|
||||
// Because we must be careful to never return 404 for a tenant if it does
|
||||
@@ -387,6 +390,16 @@ impl From<crate::tenant::mgr::DeleteTenantError> for ApiError {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<crate::tenant::secondary::SecondaryTenantError> for ApiError {
|
||||
fn from(ste: crate::tenant::secondary::SecondaryTenantError) -> ApiError {
|
||||
use crate::tenant::secondary::SecondaryTenantError;
|
||||
match ste {
|
||||
SecondaryTenantError::GetTenant(gte) => gte.into(),
|
||||
SecondaryTenantError::ShuttingDown => ApiError::ShuttingDown,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Helper function to construct a TimelineInfo struct for a timeline
|
||||
async fn build_timeline_info(
|
||||
timeline: &Arc<Timeline>,
|
||||
@@ -1047,9 +1060,11 @@ async fn timeline_delete_handler(
|
||||
match e {
|
||||
// GetTenantError has a built-in conversion to ApiError, but in this context we don't
|
||||
// want to treat missing tenants as 404, to avoid ambiguity with successful deletions.
|
||||
GetTenantError::NotFound(_) => ApiError::PreconditionFailed(
|
||||
"Requested tenant is missing".to_string().into_boxed_str(),
|
||||
),
|
||||
GetTenantError::NotFound(_) | GetTenantError::ShardNotFound(_) => {
|
||||
ApiError::PreconditionFailed(
|
||||
"Requested tenant is missing".to_string().into_boxed_str(),
|
||||
)
|
||||
}
|
||||
e => e.into(),
|
||||
}
|
||||
})?;
|
||||
@@ -2462,8 +2477,7 @@ async fn secondary_upload_handler(
|
||||
state
|
||||
.secondary_controller
|
||||
.upload_tenant(tenant_shard_id)
|
||||
.await
|
||||
.map_err(ApiError::InternalServerError)?;
|
||||
.await?;
|
||||
|
||||
json_response(StatusCode::OK, ())
|
||||
}
|
||||
@@ -2578,7 +2592,7 @@ async fn secondary_download_handler(
|
||||
// Edge case: downloads aren't usually fallible: things like a missing heatmap are considered
|
||||
// okay. We could get an error here in the unlikely edge case that the tenant
|
||||
// was detached between our check above and executing the download job.
|
||||
Ok(Err(e)) => return Err(ApiError::InternalServerError(e)),
|
||||
Ok(Err(e)) => return Err(e.into()),
|
||||
// A timeout is not an error: we have started the download, we're just not done
|
||||
// yet. The caller will get a response body indicating status.
|
||||
Err(_) => StatusCode::ACCEPTED,
|
||||
|
||||
Reference in New Issue
Block a user