Compare commits

...

3 Commits

Author SHA1 Message Date
John Spray
422310c19b slightly more efficient 2025-02-17 22:43:36 +01:00
John Spray
77b1fd40b5 wip prototype 2025-02-17 22:37:46 +01:00
John Spray
a8f59f851d Revert "tests: broaden allow-list for #10720 workaround (#10807)"
This reverts commit ae463f366b.
2025-02-17 22:08:03 +01:00
5 changed files with 63 additions and 5 deletions

View File

@@ -570,8 +570,12 @@ impl LayerMap {
self.historic.iter()
}
pub fn riter_historic_layers(&self) -> impl '_ + Iterator<Item = Arc<PersistentLayerDesc>> {
self.historic.riter()
}
/// Get a ref counted pointer for the first in memory layer that matches the provided predicate.
pub fn find_in_memory_layer<Pred>(&self, mut pred: Pred) -> Option<Arc<InMemoryLayer>>
pub(crate) fn find_in_memory_layer<Pred>(&self, mut pred: Pred) -> Option<Arc<InMemoryLayer>>
where
Pred: FnMut(&Arc<InMemoryLayer>) -> bool,
{
@@ -900,6 +904,24 @@ impl LayerMap {
Ok(())
}
/// Efficiency: this is a single btreemap walk to the end of the map in the common case where
/// we are queried for image layers after the start of an ephemeral layer. In the general case
/// where we are called with some arbitrary LSN, this function is O(N) -- so don't use it like that.
pub(crate) fn get_newest_image_after(&self, lsn: Lsn) -> Option<Arc<PersistentLayerDesc>> {
// TODO: an efficient equivalent, this is a crude placeholder
for layer in self.riter_historic_layers() {
if !layer.is_delta() && layer.image_layer_lsn() >= lsn {
return Some(layer);
}
if layer.lsn_range.start < lsn {
// We are past the layers that could possibly intersect with the requested bound
break;
}
}
None
}
/// `read_points` represent the tip of a timeline and any branch points, i.e. the places
/// where we expect to serve reads.
///

View File

@@ -509,6 +509,18 @@ impl<Value: Clone> BufferedHistoricLayerCoverage<Value> {
self.layers.values().cloned()
}
/// Iterate all the layers in reverse order (newest LSNs first)
pub fn riter(&self) -> impl '_ + Iterator<Item = Value> {
// NOTE we can actually perform this without rebuilding,
// but it's not necessary for now.
if !self.buffer.is_empty() {
panic!("rebuild pls")
}
// TODO: is cloned() really needed?
self.layers.values().rev().cloned()
}
/// Return a reference to a queryable map, assuming all updates
/// have already been processed using self.rebuild()
pub fn get(&self) -> anyhow::Result<&HistoricLayerCoverage<Value>> {

View File

@@ -3775,6 +3775,8 @@ impl Timeline {
let mut completed_keyspace = KeySpace::default();
let mut image_covered_keyspace = KeySpaceRandomAccum::new();
let mut in_memory_layers_considered = Vec::new();
// Prevent GC from progressing while visiting the current timeline.
// If we are GC-ing because a new image layer was added while traversing
// the timeline, then it will remove layers that are required for fulfilling
@@ -3810,12 +3812,34 @@ impl Timeline {
let in_memory_layer = layers.find_in_memory_layer(|l| {
let start_lsn = l.get_lsn_range().start;
cont_lsn > start_lsn
!in_memory_layers_considered.contains(&start_lsn) && cont_lsn > start_lsn
});
match in_memory_layer {
Some(l) => {
let lsn_range = l.get_lsn_range().start..cont_lsn;
in_memory_layers_considered.push(l.get_lsn_range().start);
// Search for image layers that overlap with the in-memory layer: this is rare but permitted, and
// we must bound the `lsn_range` of this layer to avoid skipping past the image layer.
// TODO: a narrower search that only hits on image layers matching `unmapped_keyspace`
let lsn_range = if let Some(image) =
layers.get_newest_image_after(l.get_lsn_range().start)
{
// Note that this does not guarantee serving a read from an image layer, just that we will
// not skip considering thge image layer in our Fringe. We can still end up doing walredo work
// in spite of the presence of an image layer, if the inmemory layers we visit contain enough
// information to fully construct a page. For example:
// - ephemeral layer contains I1, D1, D2, <LSN X>
// - image layer at LSN X contains image equal to I2
// - we will end up doing a walredo of I1+D1+D2, rather than reading from the image layer
//
// This is not a problem for correctness, and is rare enough that the wasted time doing walredo
// doesn't matter.
image.get_lsn_range().start + 1..cont_lsn
} else {
l.get_lsn_range().start..cont_lsn
};
fringe.update(
ReadableLayer::InMemoryLayer(l),
unmapped_keyspace.clone(),

View File

@@ -1821,7 +1821,7 @@ def test_sharding_gc(
# TODO: remove when https://github.com/neondatabase/neon/issues/10720 is fixed
ps.allowed_errors.extend(
[
".*could not find data for key.*",
".*could not find data for key 020000000000000000000000000000000000.*",
".*could not ingest record.*",
]
)

View File

@@ -318,7 +318,7 @@ def test_scrubber_physical_gc_ancestors(neon_env_builder: NeonEnvBuilder, shard_
# TODO: remove when https://github.com/neondatabase/neon/issues/10720 is fixed
ps.allowed_errors.extend(
[
".*could not find data for key.*",
".*could not find data for key 020000000000000000000000000000000000.*",
".*could not ingest record.*",
]
)