pageserver: fix read path max lsn bug (#7007)

## Summary of changes
The problem it fixes is when `request_lsn` is `u64::MAX-1` the
`cont_lsn` becomes `u64::MAX` which is the same as `prev_lsn` which
stops the loop.

Closes https://github.com/neondatabase/neon/issues/6812
This commit is contained in:
Jure Bajic
2024-03-12 17:32:47 +01:00
committed by GitHub
parent 7ae8364b0b
commit bac06ea1ac
2 changed files with 35 additions and 17 deletions

View File

@@ -4625,10 +4625,7 @@ mod tests {
drop(guard);
// Pick a big LSN such that we query over all the changes.
// Technically, u64::MAX - 1 is the largest LSN supported by the read path,
// but there seems to be a bug on the non-vectored search path which surfaces
// in that case.
let reads_lsn = Lsn(u64::MAX - 1000);
let reads_lsn = Lsn(u64::MAX - 1);
for read in reads {
info!("Doing vectored read on {:?}", read);
@@ -5145,4 +5142,23 @@ mod tests {
Ok(())
}
#[tokio::test]
async fn test_read_at_max_lsn() -> anyhow::Result<()> {
let harness = TenantHarness::create("test_read_at_max_lsn")?;
let (tenant, ctx) = harness.load().await;
let tline = tenant
.create_test_timeline(TIMELINE_ID, Lsn(0x08), DEFAULT_PG_VERSION, &ctx)
.await?;
let lsn = Lsn(0x10);
bulk_insert_compact_gc(tline.clone(), &ctx, lsn, 50, 10000).await?;
let test_key = Key::from_hex("010000000033333333444444445500000000").unwrap();
let read_lsn = Lsn(u64::MAX - 1);
assert!(tline.get(test_key, read_lsn, &ctx).await.is_ok());
Ok(())
}
}

View File

@@ -2478,7 +2478,7 @@ impl Timeline {
// 'prev_lsn' tracks the last LSN that we were at in our search. It's used
// to check that each iteration make some progress, to break infinite
// looping if something goes wrong.
let mut prev_lsn = Lsn(u64::MAX);
let mut prev_lsn = None;
let mut result = ValueReconstructResult::Continue;
let mut cont_lsn = Lsn(request_lsn.0 + 1);
@@ -2498,18 +2498,20 @@ impl Timeline {
MATERIALIZED_PAGE_CACHE_HIT.inc_by(1);
return Ok(traversal_path);
}
if prev_lsn <= cont_lsn {
// Didn't make any progress in last iteration. Error out to avoid
// getting stuck in the loop.
return Err(layer_traversal_error(format!(
"could not find layer with more data for key {} at LSN {}, request LSN {}, ancestor {}",
key,
Lsn(cont_lsn.0 - 1),
request_lsn,
timeline.ancestor_lsn
), traversal_path));
if let Some(prev) = prev_lsn {
if prev <= cont_lsn {
// Didn't make any progress in last iteration. Error out to avoid
// getting stuck in the loop.
return Err(layer_traversal_error(format!(
"could not find layer with more data for key {} at LSN {}, request LSN {}, ancestor {}",
key,
Lsn(cont_lsn.0 - 1),
request_lsn,
timeline.ancestor_lsn
), traversal_path));
}
}
prev_lsn = cont_lsn;
prev_lsn = Some(cont_lsn);
}
ValueReconstructResult::Missing => {
return Err(layer_traversal_error(
@@ -2539,7 +2541,7 @@ impl Timeline {
timeline_owned = timeline.get_ready_ancestor_timeline(ctx).await?;
timeline = &*timeline_owned;
prev_lsn = Lsn(u64::MAX);
prev_lsn = None;
continue 'outer;
}