mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-14 11:40:38 +00:00
pageserver: handle shutdown cleanly in layer download API (#10598)
## Problem This API is used in tests and occasionally for support. It cast all errors to 500. That can cause a failure on the log checks: https://neon-github-public-dev.s3.amazonaws.com/reports/main/13056992876/index.html#suites/ad9c266207b45eafe19909d1020dd987/683a7031d877f3db/ ## Summary of changes - Avoid using generic anyhow::Error for layer downloads - Map shutdown cases to 503 in http route
This commit is contained in:
@@ -1472,7 +1472,13 @@ async fn layer_download_handler(
|
||||
let downloaded = timeline
|
||||
.download_layer(&layer_name)
|
||||
.await
|
||||
.map_err(ApiError::InternalServerError)?;
|
||||
.map_err(|e| match e {
|
||||
tenant::storage_layer::layer::DownloadError::TimelineShutdown
|
||||
| tenant::storage_layer::layer::DownloadError::DownloadCancelled => {
|
||||
ApiError::ShuttingDown
|
||||
}
|
||||
other => ApiError::InternalServerError(other.into()),
|
||||
})?;
|
||||
|
||||
match downloaded {
|
||||
Some(true) => json_response(StatusCode::OK, ()),
|
||||
|
||||
@@ -340,7 +340,7 @@ impl Layer {
|
||||
/// Download the layer if evicted.
|
||||
///
|
||||
/// Will not error when the layer is already downloaded.
|
||||
pub(crate) async fn download(&self) -> anyhow::Result<()> {
|
||||
pub(crate) async fn download(&self) -> Result<(), DownloadError> {
|
||||
self.0.get_or_maybe_download(true, None).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -2028,8 +2028,16 @@ impl Timeline {
|
||||
pub(crate) async fn download_layer(
|
||||
&self,
|
||||
layer_file_name: &LayerName,
|
||||
) -> anyhow::Result<Option<bool>> {
|
||||
let Some(layer) = self.find_layer(layer_file_name).await? else {
|
||||
) -> Result<Option<bool>, super::storage_layer::layer::DownloadError> {
|
||||
let Some(layer) = self
|
||||
.find_layer(layer_file_name)
|
||||
.await
|
||||
.map_err(|e| match e {
|
||||
layer_manager::Shutdown => {
|
||||
super::storage_layer::layer::DownloadError::TimelineShutdown
|
||||
}
|
||||
})?
|
||||
else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
|
||||
@@ -27,6 +27,7 @@ from fixtures.pageserver.utils import (
|
||||
)
|
||||
from fixtures.remote_storage import RemoteStorageKind, S3Storage, s3_storage
|
||||
from fixtures.utils import query_scalar, wait_until
|
||||
from urllib3 import Retry
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from typing import Any
|
||||
@@ -676,16 +677,14 @@ def test_layer_download_cancelled_by_config_location(neon_env_builder: NeonEnvBu
|
||||
"compaction_period": "0s",
|
||||
}
|
||||
)
|
||||
client = env.pageserver.http_client()
|
||||
|
||||
# Disable retries, because we'll hit code paths that can give us
|
||||
# 503 and want to see that directly
|
||||
client = env.pageserver.http_client(retries=Retry(status=0))
|
||||
|
||||
failpoint = "before-downloading-layer-stream-pausable"
|
||||
client.configure_failpoints((failpoint, "pause"))
|
||||
|
||||
env.pageserver.allowed_errors.extend(
|
||||
[
|
||||
".*downloading failed, possibly for shutdown.*",
|
||||
]
|
||||
)
|
||||
|
||||
info = client.layer_map_info(env.initial_tenant, env.initial_timeline)
|
||||
assert len(info.delta_layers()) == 1
|
||||
|
||||
@@ -720,13 +719,9 @@ def test_layer_download_cancelled_by_config_location(neon_env_builder: NeonEnvBu
|
||||
|
||||
client.configure_failpoints((failpoint, "off"))
|
||||
|
||||
with pytest.raises(
|
||||
PageserverApiException, match="downloading failed, possibly for shutdown"
|
||||
):
|
||||
with pytest.raises(PageserverApiException, match="Shutting down"):
|
||||
download.result()
|
||||
|
||||
env.pageserver.assert_log_contains(".*downloading failed, possibly for shutdown.*")
|
||||
|
||||
detach.result()
|
||||
|
||||
client.configure_failpoints((failpoint, "pause"))
|
||||
|
||||
Reference in New Issue
Block a user