mirror of
https://github.com/neondatabase/neon.git
synced 2025-12-22 21:59:59 +00:00
fix(storcon): correctly pass through lease error code (#12519)
## Problem close LKB-199 ## Summary of changes We always return the error as 500 to the cplane if a LSN lease request fails. This cause issues for the cplane as they don't retry on 500. This patch correctly passes through the error and assign the error code so that cplane can know if it is a retryable error. (TODO: look at the cplane code and learn the retry logic). Note that this patch does not resolve LKB-253 -- we need to handle not found error separately in the lsn lease path, like wait until the tenant gets attached, or return 503 so that cplane can retry. --------- Signed-off-by: Alex Chi Z <chi@neon.tech>
This commit is contained in:
@@ -4428,7 +4428,7 @@ impl Service {
|
||||
.await;
|
||||
|
||||
let mut failed = 0;
|
||||
for (tid, result) in targeted_tenant_shards.iter().zip(results.into_iter()) {
|
||||
for (tid, (_, result)) in targeted_tenant_shards.iter().zip(results.into_iter()) {
|
||||
match result {
|
||||
Ok(ok) => {
|
||||
if tid.is_shard_zero() {
|
||||
@@ -4795,7 +4795,7 @@ impl Service {
|
||||
.await;
|
||||
|
||||
let mut valid_until = None;
|
||||
for r in res {
|
||||
for (node, r) in res {
|
||||
match r {
|
||||
Ok(lease) => {
|
||||
if let Some(ref mut valid_until) = valid_until {
|
||||
@@ -4805,7 +4805,7 @@ impl Service {
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
return Err(ApiError::InternalServerError(anyhow::anyhow!(e)));
|
||||
return Err(passthrough_api_error(&node, e));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -4919,7 +4919,7 @@ impl Service {
|
||||
max_retries: u32,
|
||||
timeout: Duration,
|
||||
cancel: &CancellationToken,
|
||||
) -> Vec<mgmt_api::Result<T>>
|
||||
) -> Vec<(Node, mgmt_api::Result<T>)>
|
||||
where
|
||||
O: Fn(TenantShardId, PageserverClient) -> F + Copy,
|
||||
F: std::future::Future<Output = mgmt_api::Result<T>>,
|
||||
@@ -4940,16 +4940,16 @@ impl Service {
|
||||
cancel,
|
||||
)
|
||||
.await;
|
||||
(idx, r)
|
||||
(idx, node, r)
|
||||
});
|
||||
}
|
||||
|
||||
while let Some((idx, r)) = futs.next().await {
|
||||
results.push((idx, r.unwrap_or(Err(mgmt_api::Error::Cancelled))));
|
||||
while let Some((idx, node, r)) = futs.next().await {
|
||||
results.push((idx, node, r.unwrap_or(Err(mgmt_api::Error::Cancelled))));
|
||||
}
|
||||
|
||||
results.sort_by_key(|(idx, _)| *idx);
|
||||
results.into_iter().map(|(_, r)| r).collect()
|
||||
results.sort_by_key(|(idx, _, _)| *idx);
|
||||
results.into_iter().map(|(_, node, r)| (node, r)).collect()
|
||||
}
|
||||
|
||||
/// Helper for safely working with the shards in a tenant remotely on pageservers, for example
|
||||
@@ -5862,7 +5862,7 @@ impl Service {
|
||||
return;
|
||||
}
|
||||
|
||||
for result in self
|
||||
for (_, result) in self
|
||||
.tenant_for_shards_api(
|
||||
attached,
|
||||
|tenant_shard_id, client| async move {
|
||||
@@ -5881,7 +5881,7 @@ impl Service {
|
||||
}
|
||||
}
|
||||
|
||||
for result in self
|
||||
for (_, result) in self
|
||||
.tenant_for_shards_api(
|
||||
secondary,
|
||||
|tenant_shard_id, client| async move {
|
||||
@@ -8768,7 +8768,7 @@ impl Service {
|
||||
)
|
||||
.await;
|
||||
|
||||
for ((tenant_shard_id, node, optimization), secondary_status) in
|
||||
for ((tenant_shard_id, node, optimization), (_, secondary_status)) in
|
||||
want_secondary_status.into_iter().zip(results.into_iter())
|
||||
{
|
||||
match secondary_status {
|
||||
|
||||
Reference in New Issue
Block a user