mirror of
https://github.com/neondatabase/neon.git
synced 2025-12-22 21:59:59 +00:00
Filter obsolete layer files from an older generation from heatmap
This commit is contained in:
@@ -1580,7 +1580,7 @@ impl TenantShard {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[instrument(skip_all)]
|
#[instrument(skip_all)]
|
||||||
pub(crate) async fn preload(
|
pub(crate) async fn preload(
|
||||||
self: &Arc<Self>,
|
self: &Arc<Self>,
|
||||||
remote_storage: &GenericRemoteStorage,
|
remote_storage: &GenericRemoteStorage,
|
||||||
cancel: CancellationToken,
|
cancel: CancellationToken,
|
||||||
|
|||||||
@@ -88,7 +88,7 @@ use self::eviction_task::EvictionTaskTimelineState;
|
|||||||
use self::logical_size::LogicalSize;
|
use self::logical_size::LogicalSize;
|
||||||
use self::walreceiver::{WalReceiver, WalReceiverConf};
|
use self::walreceiver::{WalReceiver, WalReceiverConf};
|
||||||
use super::remote_timeline_client::RemoteTimelineClient;
|
use super::remote_timeline_client::RemoteTimelineClient;
|
||||||
use super::remote_timeline_client::index::{GcCompactionState, IndexPart};
|
use super::remote_timeline_client::index::{GcCompactionState, IndexPart, LayerFileMetadata};
|
||||||
use super::secondary::heatmap::HeatMapLayer;
|
use super::secondary::heatmap::HeatMapLayer;
|
||||||
use super::storage_layer::{LayerFringe, LayerVisibilityHint, ReadableLayer};
|
use super::storage_layer::{LayerFringe, LayerVisibilityHint, ReadableLayer};
|
||||||
use super::tasks::log_compaction_error;
|
use super::tasks::log_compaction_error;
|
||||||
@@ -4205,6 +4205,12 @@ impl Timeline {
|
|||||||
let desc: PersistentLayerDesc = hl.name.clone().into();
|
let desc: PersistentLayerDesc = hl.name.clone().into();
|
||||||
let layer = guard.try_get_from_key(&desc.key())?;
|
let layer = guard.try_get_from_key(&desc.key())?;
|
||||||
|
|
||||||
|
// Make sure the layer in the old heatmap is the same generation one as in the layer
|
||||||
|
// map otherwise we can in some edge case keep old obsolete layers in the heatmap.
|
||||||
|
if layer.metadata().generation != hl.metadata.generation {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
if layer.visibility() == LayerVisibilityHint::Covered {
|
if layer.visibility() == LayerVisibilityHint::Covered {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
@@ -6474,6 +6480,7 @@ pub struct DeltaLayerTestDesc {
|
|||||||
pub lsn_range: Range<Lsn>,
|
pub lsn_range: Range<Lsn>,
|
||||||
pub key_range: Range<Key>,
|
pub key_range: Range<Key>,
|
||||||
pub data: Vec<(Key, Lsn, Value)>,
|
pub data: Vec<(Key, Lsn, Value)>,
|
||||||
|
pub resident: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
@@ -6491,12 +6498,22 @@ impl DeltaLayerTestDesc {
|
|||||||
lsn_range,
|
lsn_range,
|
||||||
key_range,
|
key_range,
|
||||||
data,
|
data,
|
||||||
|
// Default test code creates resident layers.
|
||||||
|
resident: true,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn new_with_inferred_key_range(
|
pub fn new_with_inferred_key_range(
|
||||||
lsn_range: Range<Lsn>,
|
lsn_range: Range<Lsn>,
|
||||||
data: Vec<(Key, Lsn, Value)>,
|
data: Vec<(Key, Lsn, Value)>,
|
||||||
|
) -> Self {
|
||||||
|
Self::new_with_inferred_key_range_and_resident_state(lsn_range, data, true)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn new_with_inferred_key_range_and_resident_state(
|
||||||
|
lsn_range: Range<Lsn>,
|
||||||
|
data: Vec<(Key, Lsn, Value)>,
|
||||||
|
resident: bool,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
let key_min = data.iter().map(|(key, _, _)| key).min().unwrap();
|
let key_min = data.iter().map(|(key, _, _)| key).min().unwrap();
|
||||||
let key_max = data.iter().map(|(key, _, _)| key).max().unwrap();
|
let key_max = data.iter().map(|(key, _, _)| key).max().unwrap();
|
||||||
@@ -6504,6 +6521,7 @@ impl DeltaLayerTestDesc {
|
|||||||
key_range: (*key_min)..(key_max.next()),
|
key_range: (*key_min)..(key_max.next()),
|
||||||
lsn_range,
|
lsn_range,
|
||||||
data,
|
data,
|
||||||
|
resident
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -7505,6 +7523,30 @@ impl Timeline {
|
|||||||
check_start_lsn: Option<Lsn>,
|
check_start_lsn: Option<Lsn>,
|
||||||
ctx: &RequestContext,
|
ctx: &RequestContext,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
|
|
||||||
|
if !deltas.resident {
|
||||||
|
// Don't need to bother creating an on-disk file, we just want the metadata for a non-resident layer.
|
||||||
|
let delta_layer = Layer::for_evicted(
|
||||||
|
self.conf,
|
||||||
|
self,
|
||||||
|
deltas.layer_name(),
|
||||||
|
LayerFileMetadata {
|
||||||
|
generation: self.generation,
|
||||||
|
shard: self.get_shard_index(),
|
||||||
|
file_size: 1024,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
info!("force created non-resident delta layer {}", deltas.layer_name());
|
||||||
|
{
|
||||||
|
let mut guard = self.layers.write(LayerManagerLockHolder::Testing).await;
|
||||||
|
guard
|
||||||
|
.open_mut()
|
||||||
|
.unwrap()
|
||||||
|
.force_insert_optionally_resident_layer(delta_layer);
|
||||||
|
}
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
let last_record_lsn = self.get_last_record_lsn();
|
let last_record_lsn = self.get_last_record_lsn();
|
||||||
deltas
|
deltas
|
||||||
.data
|
.data
|
||||||
@@ -8263,6 +8305,148 @@ mod tests {
|
|||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_heatmap_generation_removes_layers_from_old_generation() {
|
||||||
|
use std::time::SystemTime;
|
||||||
|
use utils::generation::Generation;
|
||||||
|
use crate::tenant::remote_timeline_client::index::LayerFileMetadata;
|
||||||
|
use crate::tenant::secondary::heatmap::{HeatMapLayer, HeatMapTimeline as HeatMapTimelineStruct};
|
||||||
|
|
||||||
|
let harness = TenantHarness::create("heatmaheatmap_genereation_removes_layers_from_old_generationp_generation").await.unwrap();
|
||||||
|
|
||||||
|
// Create your existing layer descriptions
|
||||||
|
let covered_delta = DeltaLayerTestDesc::new_with_inferred_key_range(
|
||||||
|
Lsn(0x10)..Lsn(0x20),
|
||||||
|
vec![(
|
||||||
|
Key::from_hex("620000000033333333444444445500000000").unwrap(),
|
||||||
|
Lsn(0x11),
|
||||||
|
Value::Image(test_img("foo")),
|
||||||
|
)],
|
||||||
|
);
|
||||||
|
// This visible layer is non-resident on disk. This is important to reproduce the failure as
|
||||||
|
// a resident file will take priority over the previous heatmap even without the fix for
|
||||||
|
// this issue.
|
||||||
|
let visible_delta = DeltaLayerTestDesc::new_with_inferred_key_range_and_resident_state(
|
||||||
|
Lsn(0x10)..Lsn(0x20),
|
||||||
|
vec![(
|
||||||
|
Key::from_hex("720000000033333333444444445500000000").unwrap(),
|
||||||
|
Lsn(0x11),
|
||||||
|
Value::Image(test_img("foo")),
|
||||||
|
)],
|
||||||
|
false, // Non-resident
|
||||||
|
);
|
||||||
|
let l0_delta = DeltaLayerTestDesc::new(
|
||||||
|
Lsn(0x20)..Lsn(0x30),
|
||||||
|
Key::from_hex("000000000000000000000000000000000000").unwrap()
|
||||||
|
..Key::from_hex("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF").unwrap(),
|
||||||
|
vec![(
|
||||||
|
Key::from_hex("720000000033333333444444445500000000").unwrap(),
|
||||||
|
Lsn(0x25),
|
||||||
|
Value::Image(test_img("foo")),
|
||||||
|
)],
|
||||||
|
);
|
||||||
|
let delta_layers = vec![
|
||||||
|
covered_delta.clone(),
|
||||||
|
visible_delta.clone(),
|
||||||
|
l0_delta.clone(),
|
||||||
|
];
|
||||||
|
|
||||||
|
let image_layer = (
|
||||||
|
Lsn(0x40),
|
||||||
|
vec![(
|
||||||
|
Key::from_hex("620000000033333333444444445500000000").unwrap(),
|
||||||
|
test_img("bar"),
|
||||||
|
)],
|
||||||
|
);
|
||||||
|
let image_layers = vec![image_layer];
|
||||||
|
|
||||||
|
let (tenant, ctx) = harness.load().await;
|
||||||
|
|
||||||
|
// Create timeline with current generation (0xdeadbeef by default)
|
||||||
|
let timeline = tenant
|
||||||
|
.create_test_timeline_with_layers(
|
||||||
|
TimelineId::generate(),
|
||||||
|
Lsn(0x10),
|
||||||
|
PgMajorVersion::PG14,
|
||||||
|
&ctx,
|
||||||
|
Vec::new(), // in-memory layers
|
||||||
|
delta_layers,
|
||||||
|
image_layers,
|
||||||
|
Lsn(0x100),
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// Now create a previous heatmap with the visible_delta layer from an older generation
|
||||||
|
let current_layer_metadata = LayerFileMetadata {
|
||||||
|
generation: timeline.generation,
|
||||||
|
shard: timeline.get_shard_index(),
|
||||||
|
file_size: 1024,
|
||||||
|
};
|
||||||
|
let old_generation = Generation::new(0x12345678); // Older than 0xdeadbeef
|
||||||
|
let old_layer_metadata = LayerFileMetadata {
|
||||||
|
generation: old_generation,
|
||||||
|
shard: timeline.get_shard_index(),
|
||||||
|
file_size: 1024,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Create heatmap layers that reference the same keys but with old generation
|
||||||
|
let prev_heatmap_layers = vec![
|
||||||
|
HeatMapLayer::new(
|
||||||
|
covered_delta.layer_name(),
|
||||||
|
current_layer_metadata.clone(),
|
||||||
|
SystemTime::now(),
|
||||||
|
false, // not cold
|
||||||
|
),
|
||||||
|
HeatMapLayer::new(
|
||||||
|
visible_delta.layer_name(),
|
||||||
|
// Visible delta layer is from an older generation in heatmap
|
||||||
|
old_layer_metadata.clone(),
|
||||||
|
SystemTime::now(),
|
||||||
|
false, // not cold
|
||||||
|
),
|
||||||
|
HeatMapLayer::new(
|
||||||
|
l0_delta.layer_name(),
|
||||||
|
current_layer_metadata.clone(),
|
||||||
|
SystemTime::now(),
|
||||||
|
false, // not cold
|
||||||
|
),
|
||||||
|
];
|
||||||
|
|
||||||
|
// Create the previous heatmap with old generation layers
|
||||||
|
let prev_heatmap = HeatMapTimelineStruct::new(timeline.timeline_id, prev_heatmap_layers);
|
||||||
|
|
||||||
|
// Set the previous heatmap on the timeline
|
||||||
|
timeline
|
||||||
|
.previous_heatmap
|
||||||
|
.store(Some(Arc::new(PreviousHeatmap::Active {
|
||||||
|
heatmap: prev_heatmap,
|
||||||
|
read_at: std::time::Instant::now(),
|
||||||
|
end_lsn: None,
|
||||||
|
})));
|
||||||
|
|
||||||
|
// Layer visibility is an input to heatmap generation, so refresh it first
|
||||||
|
timeline.update_layer_visibility().await.unwrap();
|
||||||
|
|
||||||
|
// Generate a new heatmap - this should filter out the old generation layers
|
||||||
|
let heatmap = timeline
|
||||||
|
.generate_heatmap()
|
||||||
|
.await
|
||||||
|
.expect("Infallible while timeline is not shut down");
|
||||||
|
|
||||||
|
assert_eq!(heatmap.timeline_id, timeline.timeline_id);
|
||||||
|
|
||||||
|
// Verify that layers exist but they should be the current generation ones,
|
||||||
|
// not the old generation ones from previous_heatmap
|
||||||
|
for layer in heatmap.all_layers() {
|
||||||
|
assert_eq!(
|
||||||
|
layer.metadata.generation,
|
||||||
|
timeline.generation,
|
||||||
|
"Heatmap should only contain current generation layers, not old ones"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn two_layer_eviction_attempts_at_the_same_time() {
|
async fn two_layer_eviction_attempts_at_the_same_time() {
|
||||||
let harness = TenantHarness::create("two_layer_eviction_attempts_at_the_same_time")
|
let harness = TenantHarness::create("two_layer_eviction_attempts_at_the_same_time")
|
||||||
|
|||||||
@@ -129,6 +129,16 @@ pub(super) fn reconcile(
|
|||||||
// Construct Decisions for layers that are found locally, if they're in remote metadata. Otherwise
|
// Construct Decisions for layers that are found locally, if they're in remote metadata. Otherwise
|
||||||
// construct DismissedLayers to get rid of them.
|
// construct DismissedLayers to get rid of them.
|
||||||
for (layer_name, local_metadata) in local_layers {
|
for (layer_name, local_metadata) in local_layers {
|
||||||
|
// FIXME: This should probably take generation into account. Currently it's possible to have
|
||||||
|
// an old generation file on disk while a newer one with same name is in index (because
|
||||||
|
// primary just split shard) and we miss that here. We are saved by the check below because
|
||||||
|
// the file size is very likely to be different (and if it isn't then the file contents will
|
||||||
|
// probably be the same anyway in case of shard split), but it's confusing that this logic
|
||||||
|
// doesn't account for name collisions from older generations. Ideally, we should consider a
|
||||||
|
// local file from an older generation than the one in the index to be a different file and
|
||||||
|
// return `DismissedLayer::LocalOnly` if generations don't match. Right now though,
|
||||||
|
// layer_name has the generation part stripped so we'd need to re-parse the generation from
|
||||||
|
// the file name here or back in scan_timeline_dir and add it to LocalLayerFileMetadata.
|
||||||
let Some(remote_metadata) = index_part.layer_metadata.get(&layer_name) else {
|
let Some(remote_metadata) = index_part.layer_metadata.get(&layer_name) else {
|
||||||
result.push((layer_name, Err(DismissedLayer::LocalOnly(local_metadata))));
|
result.push((layer_name, Err(DismissedLayer::LocalOnly(local_metadata))));
|
||||||
continue;
|
continue;
|
||||||
|
|||||||
@@ -645,8 +645,13 @@ impl OpenLayerManager {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
pub(crate) fn force_insert_layer(&mut self, layer: ResidentLayer) {
|
pub(crate) fn force_insert_layer(&mut self, layer: ResidentLayer) {
|
||||||
|
self.force_insert_optionally_resident_layer(layer.as_ref().clone());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
pub(crate) fn force_insert_optionally_resident_layer(&mut self, layer: Layer) {
|
||||||
let mut updates = self.layer_map.batch_update();
|
let mut updates = self.layer_map.batch_update();
|
||||||
Self::insert_historic_layer(layer.as_ref().clone(), &mut updates, &mut self.layer_fmgr);
|
Self::insert_historic_layer(layer, &mut updates, &mut self.layer_fmgr);
|
||||||
updates.flush()
|
updates.flush()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user