Merge 2025-04-09 main commit 'd11f23a3419a5b8eef62bc5736a4dd9d413bdab8' into yuchen/direct-io-delta-image-layer-write

This commit is contained in:
Christian Schwarz
2025-04-11 16:18:59 +02:00
2 changed files with 72 additions and 52 deletions

View File

@@ -115,7 +115,7 @@ use crate::pgdatadir_mapping::{
use crate::task_mgr::TaskKind;
use crate::tenant::config::AttachmentMode;
use crate::tenant::gc_result::GcResult;
use crate::tenant::layer_map::{LayerMap, SearchResult};
use crate::tenant::layer_map::LayerMap;
use crate::tenant::metadata::TimelineMetadata;
use crate::tenant::storage_layer::delta_layer::DeltaEntry;
use crate::tenant::storage_layer::inmemory_layer::IndexEntry;
@@ -4104,12 +4104,6 @@ impl Timeline {
cancel: &CancellationToken,
ctx: &RequestContext,
) -> Result<TimelineVisitOutcome, GetVectoredError> {
let mut unmapped_keyspace = keyspace.clone();
let mut fringe = LayerFringe::new();
let mut completed_keyspace = KeySpace::default();
let mut image_covered_keyspace = KeySpaceRandomAccum::new();
// Prevent GC from progressing while visiting the current timeline.
// If we are GC-ing because a new image layer was added while traversing
// the timeline, then it will remove layers that are required for fulfilling
@@ -4120,11 +4114,44 @@ impl Timeline {
// See `compaction::compact_with_gc` for why we need this.
let _guard = timeline.gc_compaction_layer_update_lock.read().await;
loop {
// Initialize the fringe
let mut fringe = {
let mut fringe = LayerFringe::new();
let guard = timeline.layers.read().await;
guard.update_search_fringe(&keyspace, cont_lsn, &mut fringe)?;
fringe
};
let mut completed_keyspace = KeySpace::default();
let mut image_covered_keyspace = KeySpaceRandomAccum::new();
while let Some((layer_to_read, keyspace_to_read, lsn_range)) = fringe.next_layer() {
if cancel.is_cancelled() {
return Err(GetVectoredError::Cancelled);
}
if let Some(ref mut read_path) = reconstruct_state.read_path {
read_path.record_layer_visit(&layer_to_read, &keyspace_to_read, &lsn_range);
}
// Visit the layer and plan IOs for it
let next_cont_lsn = lsn_range.start;
layer_to_read
.get_values_reconstruct_data(
keyspace_to_read.clone(),
lsn_range,
reconstruct_state,
ctx,
)
.await?;
let mut unmapped_keyspace = keyspace_to_read;
cont_lsn = next_cont_lsn;
reconstruct_state.on_layer_visited(&layer_to_read);
let (keys_done_last_step, keys_with_image_coverage) =
reconstruct_state.consume_done_keys();
unmapped_keyspace.remove_overlapping_with(&keys_done_last_step);
@@ -4135,31 +4162,15 @@ impl Timeline {
image_covered_keyspace.add_range(keys_with_image_coverage);
}
// Query the layer map for the next layers to read.
//
// Do not descent any further if the last layer we visited
// completed all keys in the keyspace it inspected. This is not
// required for correctness, but avoids visiting extra layers
// which turns out to be a perf bottleneck in some cases.
if !unmapped_keyspace.is_empty() {
let guard = timeline.layers.read().await;
let layers = guard.layer_map()?;
for range in unmapped_keyspace.ranges.iter() {
let results = layers.range_search(range.clone(), cont_lsn);
results
.found
.into_iter()
.map(|(SearchResult { layer, lsn_floor }, keyspace_accum)| {
(
guard.upgrade(layer),
keyspace_accum.to_keyspace(),
lsn_floor..cont_lsn,
)
})
.for_each(|(layer, keyspace, lsn_range)| {
fringe.update(layer, keyspace, lsn_range)
});
}
guard.update_search_fringe(&unmapped_keyspace, cont_lsn, &mut fringe)?;
// It's safe to drop the layer map lock after planning the next round of reads.
// The fringe keeps readable handles for the layers which are safe to read even
@@ -4173,28 +4184,6 @@ impl Timeline {
// at two different time points.
drop(guard);
}
if let Some((layer_to_read, keyspace_to_read, lsn_range)) = fringe.next_layer() {
if let Some(ref mut read_path) = reconstruct_state.read_path {
read_path.record_layer_visit(&layer_to_read, &keyspace_to_read, &lsn_range);
}
let next_cont_lsn = lsn_range.start;
layer_to_read
.get_values_reconstruct_data(
keyspace_to_read.clone(),
lsn_range,
reconstruct_state,
ctx,
)
.await?;
unmapped_keyspace = keyspace_to_read;
cont_lsn = next_cont_lsn;
reconstruct_state.on_layer_visited(&layer_to_read);
} else {
break;
}
}
Ok(TimelineVisitOutcome {

View File

@@ -3,17 +3,18 @@ use std::sync::Arc;
use anyhow::{Context, bail, ensure};
use itertools::Itertools;
use pageserver_api::keyspace::KeySpace;
use pageserver_api::shard::TenantShardId;
use tokio_util::sync::CancellationToken;
use tracing::trace;
use utils::id::TimelineId;
use utils::lsn::{AtomicLsn, Lsn};
use super::{ReadableLayer, TimelineWriterState};
use super::{LayerFringe, ReadableLayer, TimelineWriterState};
use crate::config::PageServerConf;
use crate::context::RequestContext;
use crate::metrics::TimelineMetrics;
use crate::tenant::layer_map::{BatchedUpdates, LayerMap};
use crate::tenant::layer_map::{BatchedUpdates, LayerMap, SearchResult};
use crate::tenant::storage_layer::{
AsLayerDesc, InMemoryLayer, Layer, LayerVisibilityHint, PersistentLayerDesc,
PersistentLayerKey, ReadableLayerWeak, ResidentLayer,
@@ -38,7 +39,7 @@ impl Default for LayerManager {
}
impl LayerManager {
pub(crate) fn upgrade(&self, weak: ReadableLayerWeak) -> ReadableLayer {
fn upgrade(&self, weak: ReadableLayerWeak) -> ReadableLayer {
match weak {
ReadableLayerWeak::PersistentLayer(desc) => {
ReadableLayer::PersistentLayer(self.get_from_desc(&desc))
@@ -147,6 +148,36 @@ impl LayerManager {
self.layers().keys().cloned().collect_vec()
}
/// Update the [`LayerFringe`] of a read request
///
/// Take a key space at a given LSN and query the layer map below each range
/// of the key space to find the next layers to visit.
pub(crate) fn update_search_fringe(
&self,
keyspace: &KeySpace,
cont_lsn: Lsn,
fringe: &mut LayerFringe,
) -> Result<(), Shutdown> {
let map = self.layer_map()?;
for range in keyspace.ranges.iter() {
let results = map.range_search(range.clone(), cont_lsn);
results
.found
.into_iter()
.map(|(SearchResult { layer, lsn_floor }, keyspace_accum)| {
(
self.upgrade(layer),
keyspace_accum.to_keyspace(),
lsn_floor..cont_lsn,
)
})
.for_each(|(layer, keyspace, lsn_range)| fringe.update(layer, keyspace, lsn_range));
}
Ok(())
}
fn layers(&self) -> &HashMap<PersistentLayerKey, Layer> {
use LayerManager::*;
match self {