mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-14 19:50:38 +00:00
pageserver: eliminate CalculateSyntheticSizeError::LsnNotFound (test_metric_collection flake) (#8065)
## Problem ``` ERROR synthetic_size_worker: failed to calculate synthetic size for tenant ae449af30216ac56d2c1173f894b1122: Could not find size at 0/218CA70 in timeline d8da32b5e3e0bf18cfdb560f9de29638\n') ``` e.g. https://neon-github-public-dev.s3.amazonaws.com/reports/main/9518948590/index.html#/testresult/30a6d1e2471d2775 This test had allow lists but was disrupted by https://github.com/neondatabase/neon/pull/8051. In that PR, I had kept an error path in fill_logical_sizes that covered the case where we couldn't find sizes for some of the segments, but that path could only be hit in the case that some Timeline was shut down concurrently with a synthetic size calculation, so it makes sense to just leave the segment's size None in this case: the subsequent size calculations do not assume it is Some. ## Summary of changes - Remove `CalculateSyntheticSizeError::LsnNotFound` and just proceed in the case where we used to return it - Remove defunct allow list entries in `test_metric_collection`
This commit is contained in:
@@ -60,10 +60,6 @@ pub(crate) enum CalculateSyntheticSizeError {
|
||||
#[error(transparent)]
|
||||
Fatal(anyhow::Error),
|
||||
|
||||
/// The LSN we are trying to calculate a size at no longer exists at the point we query it
|
||||
#[error("Could not find size at {lsn} in timeline {timeline_id}")]
|
||||
LsnNotFound { timeline_id: TimelineId, lsn: Lsn },
|
||||
|
||||
/// Tenant shut down while calculating size
|
||||
#[error("Cancelled")]
|
||||
Cancelled,
|
||||
@@ -375,9 +371,8 @@ pub(super) async fn gather_inputs(
|
||||
|
||||
/// Augment 'segments' with logical sizes
|
||||
///
|
||||
/// this will probably conflict with on-demand downloaded layers, or at least force them all
|
||||
/// to be downloaded
|
||||
///
|
||||
/// This will leave segments' sizes as None if the Timeline associated with the segment is deleted concurrently
|
||||
/// (i.e. we cannot read its logical size at a particular LSN).
|
||||
async fn fill_logical_sizes(
|
||||
timelines: &[Arc<Timeline>],
|
||||
segments: &mut [SegmentMeta],
|
||||
@@ -498,8 +493,6 @@ async fn fill_logical_sizes(
|
||||
|
||||
if let Some(Some(size)) = sizes_needed.get(&(timeline_id, lsn)) {
|
||||
seg.segment.size = Some(*size);
|
||||
} else {
|
||||
return Err(CalculateSyntheticSizeError::LsnNotFound { timeline_id, lsn });
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
|
||||
@@ -75,9 +75,6 @@ def test_metric_collection(
|
||||
env.pageserver.allowed_errors.extend(
|
||||
[
|
||||
".*metrics endpoint refused the sent metrics*",
|
||||
# we have a fast rate of calculation, these can happen at shutdown
|
||||
".*synthetic_size_worker:calculate_synthetic_size.*:gather_size_inputs.*: failed to calculate logical size at .*: cancelled.*",
|
||||
".*synthetic_size_worker: failed to calculate synthetic size for tenant .*: failed to calculate some logical_sizes",
|
||||
".*metrics_collection: failed to upload to S3: Failed to upload data of length .* to storage path.*",
|
||||
]
|
||||
)
|
||||
@@ -238,9 +235,6 @@ def test_metric_collection_cleans_up_tempfile(
|
||||
env.pageserver.allowed_errors.extend(
|
||||
[
|
||||
".*metrics endpoint refused the sent metrics*",
|
||||
# we have a fast rate of calculation, these can happen at shutdown
|
||||
".*synthetic_size_worker:calculate_synthetic_size.*:gather_size_inputs.*: failed to calculate logical size at .*: cancelled.*",
|
||||
".*synthetic_size_worker: failed to calculate synthetic size for tenant .*: failed to calculate some logical_sizes",
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user