pageserver: imrpove error parity between read paths

This commit is contained in:
Vlad Lazar
2024-04-17 16:44:26 +01:00
parent e489fd7a1d
commit 2cd9b3cab5
3 changed files with 23 additions and 2 deletions

View File

@@ -336,6 +336,12 @@ impl Layer {
.get_values_reconstruct_data(keyspace, lsn_range, reconstruct_data, &self.0, ctx)
.instrument(tracing::debug_span!("get_values_reconstruct_data", layer=%self))
.await
.map_err(|err| match err {
GetVectoredError::Other(err) => GetVectoredError::Other(
err.context(format!("get_value_reconstruct_data for layer {self}")),
),
err => err,
})
}
/// Download the layer if evicted.

View File

@@ -969,6 +969,16 @@ impl Timeline {
{
return Err(GetVectoredError::MissingKey(key))
}
Err(Other(err))
if err
.chain()
.any(|cause| cause.to_string().contains("layer loading failed")) =>
{
// The intent here is to achieve error parity with the vectored read path.
// When vectored read fails to load a layer it fails the whole read, hence
// we mimic this behaviour here to keep the validation happy.
return Err(GetVectoredError::Other(err))
}
_ => {
values.insert(key, block);
key = key.next();

View File

@@ -17,11 +17,16 @@ from fixtures.types import TenantId, TimelineId
# Test restarting page server, while safekeeper and compute node keep
# running.
def test_local_corruption(neon_env_builder: NeonEnvBuilder):
if neon_env_builder.pageserver_get_impl == "vectored":
reconstruct_function_name = "get_values_reconstruct_data"
else:
reconstruct_function_name = "get_value_reconstruct_data"
env = neon_env_builder.init_start()
env.pageserver.allowed_errors.extend(
[
".*get_value_reconstruct_data for layer .*",
f".*{reconstruct_function_name} for layer .*",
".*could not find data for key.*",
".*is not active. Current state: Broken.*",
".*will not become active. Current state: Broken.*",
@@ -84,7 +89,7 @@ def test_local_corruption(neon_env_builder: NeonEnvBuilder):
# (We don't check layer file contents on startup, when loading the timeline)
#
# This will change when we implement checksums for layers
with pytest.raises(Exception, match="get_value_reconstruct_data for layer ") as err:
with pytest.raises(Exception, match=f"{reconstruct_function_name} for layer ") as err:
pg2.start()
log.info(
f"As expected, compute startup failed for timeline {tenant2}/{timeline2} with corrupt layers: {err}"