Fix a few cases where request beyond end of rel would error out.

Currently, we return an all-zeros page if you request a block beyond end of
a relation. That has been implemented in LayeredTimeline::materialize_page,
so that if Layer::get_page_reconstruct_data returns Missing, it returns
and all-zeros page.

However InMemoryLayer and DeltaLayer would return Continue, not Missing,
in that case, and materialize_page would try to find the predecessor
layer. If there was a preceding image layer, then everything would still
work, but if there wasn't, it would return a "could not find predecessor
of layer" error. Fix that in InMemoryLayer and DeltaLayer, making them
check the size of the relation and return Missing in that case.

This is hard to reproduce at the moment, but it happened quickly with
pgbench when I modified InMemoryLayer::write_to_disk so that it didn't
always create a new ImageLayer.
This commit is contained in:
Heikki Linnakangas
2021-12-09 17:46:48 +02:00
parent 8388e14bbd
commit f3f059c1f8
5 changed files with 88 additions and 14 deletions

View File

@@ -1737,8 +1737,8 @@ impl LayeredTimeline {
// We landed on the same layer again. Shouldn't happen, but if it does,
// don't get stuck in an infinite loop.
bail!(
"could not find predecessor layer of segment {} at {}",
seg.rel,
"could not find predecessor of layer {} at {}, layer returned its own LSN",
layer_ref.filename().display(),
cont_lsn
);
}
@@ -1748,8 +1748,8 @@ impl LayeredTimeline {
continue;
} else {
bail!(
"could not find predecessor layer of segment {} at {}",
seg.rel,
"could not find predecessor of layer {} at {}",
layer_ref.filename().display(),
cont_lsn
);
}

View File

@@ -41,7 +41,7 @@ use crate::layered_repository::blob::BlobWriter;
use crate::layered_repository::filename::{DeltaFileName, PathOrConf};
use crate::layered_repository::page_versions::PageVersions;
use crate::layered_repository::storage_layer::{
Layer, PageReconstructData, PageReconstructResult, PageVersion, SegmentTag,
Layer, PageReconstructData, PageReconstructResult, PageVersion, SegmentTag, RELISH_SEG_SIZE,
};
use crate::virtual_file::VirtualFile;
use crate::waldecoder;
@@ -150,6 +150,19 @@ pub struct DeltaLayerInner {
relsizes: VecMap<Lsn, u32>,
}
impl DeltaLayerInner {
fn get_seg_size(&self, lsn: Lsn) -> Result<u32> {
let slice = self
.relsizes
.slice_range((Included(&Lsn(0)), Included(&lsn)));
if let Some((_entry_lsn, entry)) = slice.last() {
Ok(*entry)
} else {
Err(anyhow::anyhow!("could not find seg size in delta layer"))
}
}
}
impl Layer for DeltaLayer {
fn get_tenant_id(&self) -> ZTenantId {
self.tenantid
@@ -244,6 +257,15 @@ impl Layer for DeltaLayer {
}
}
// If we didn't find any records for this, check if the request is beyond EOF
if need_image
&& reconstruct_data.records.is_empty()
&& self.seg.rel.is_blocky()
&& blknum - self.seg.segno * RELISH_SEG_SIZE >= inner.get_seg_size(lsn)?
{
return Ok(PageReconstructResult::Missing(self.start_lsn));
}
// release metadata lock and close the file
}
@@ -266,15 +288,7 @@ impl Layer for DeltaLayer {
// Scan the BTreeMap backwards, starting from the given entry.
let inner = self.load()?;
let slice = inner
.relsizes
.slice_range((Included(&Lsn(0)), Included(&lsn)));
if let Some((_entry_lsn, entry)) = slice.last() {
Ok(*entry)
} else {
Err(anyhow::anyhow!("could not find seg size in delta layer"))
}
inner.get_seg_size(lsn)
}
/// Does this segment exist at given LSN?

View File

@@ -162,6 +162,7 @@ impl Layer for ImageLayer {
let buf = match &inner.image_type {
ImageType::Blocky { num_blocks } => {
// Check if the request is beyond EOF
if base_blknum >= *num_blocks {
return Ok(PageReconstructResult::Missing(lsn));
}

View File

@@ -193,6 +193,16 @@ impl Layer for InMemoryLayer {
}
}
}
// If we didn't find any records for this, check if the request is beyond EOF
if need_image
&& reconstruct_data.records.is_empty()
&& self.seg.rel.is_blocky()
&& blknum - self.seg.segno * RELISH_SEG_SIZE >= self.get_seg_size(lsn)?
{
return Ok(PageReconstructResult::Missing(self.start_lsn));
}
// release lock on 'inner'
}

View File

@@ -948,6 +948,55 @@ mod tests {
Ok(())
}
#[test]
fn test_read_beyond_eof() -> Result<()> {
let harness = RepoHarness::create("test_read_beyond_eof")?;
let repo = harness.load();
let tline = repo.create_empty_timeline(TIMELINE_ID, Lsn(0))?;
make_some_layers(&tline, Lsn(0x20))?;
{
let writer = tline.writer();
writer.put_page_image(
TESTREL_A,
0,
Lsn(0x60),
TEST_IMG(&format!("foo blk 0 at {}", Lsn(0x50))),
)?;
writer.advance_last_record_lsn(Lsn(0x60));
}
// Test read before rel creation. Should error out.
assert!(tline.get_page_at_lsn(TESTREL_A, 1, Lsn(0x10)).is_err());
// Read block beyond end of relation at different points in time.
// These reads should fall into different delta, image, and in-memory layers.
assert_eq!(tline.get_page_at_lsn(TESTREL_A, 1, Lsn(0x20))?, ZERO_PAGE);
assert_eq!(tline.get_page_at_lsn(TESTREL_A, 1, Lsn(0x25))?, ZERO_PAGE);
assert_eq!(tline.get_page_at_lsn(TESTREL_A, 1, Lsn(0x30))?, ZERO_PAGE);
assert_eq!(tline.get_page_at_lsn(TESTREL_A, 1, Lsn(0x35))?, ZERO_PAGE);
assert_eq!(tline.get_page_at_lsn(TESTREL_A, 1, Lsn(0x40))?, ZERO_PAGE);
assert_eq!(tline.get_page_at_lsn(TESTREL_A, 1, Lsn(0x45))?, ZERO_PAGE);
assert_eq!(tline.get_page_at_lsn(TESTREL_A, 1, Lsn(0x50))?, ZERO_PAGE);
assert_eq!(tline.get_page_at_lsn(TESTREL_A, 1, Lsn(0x55))?, ZERO_PAGE);
assert_eq!(tline.get_page_at_lsn(TESTREL_A, 1, Lsn(0x60))?, ZERO_PAGE);
// Test on an in-memory layer with no preceding layer
{
let writer = tline.writer();
writer.put_page_image(
TESTREL_B,
0,
Lsn(0x70),
TEST_IMG(&format!("foo blk 0 at {}", Lsn(0x70))),
)?;
writer.advance_last_record_lsn(Lsn(0x70));
}
assert_eq!(tline.get_page_at_lsn(TESTREL_B, 1, Lsn(0x70))?, ZERO_PAGE);
Ok(())
}
#[test]
fn corrupt_metadata() -> Result<()> {
const TEST_NAME: &str = "corrupt_metadata";