mirror of
https://github.com/neondatabase/neon.git
synced 2026-06-05 06:20:37 +00:00
feat(pageserver): preempt image layer generation if L0 piles up (#10572)
## Problem Image layer generation could block L0 compactions for a long time. ## Summary of changes * Refactored the return value of `create_image_layers_for_*` functions to make it self-explainable. * Preempt image layer generation in `Try` mode if L0 piles up. Note that we might potentially run into a state that only the beginning part of the keyspace gets image coverage. In that case, we either need to implement something to prioritize some keyspaces with image coverage, or tune the image_creation_threshold to ensure that the frequency of image creation could keep up with L0 compaction. --------- Signed-off-by: Alex Chi Z <chi@neon.tech> Co-authored-by: Erik Grinaker <erik@neon.tech>
This commit is contained in:
@@ -388,6 +388,11 @@ impl PageServerNode {
|
||||
.map(|x| x.parse::<u8>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'image_creation_check_threshold' as integer")?,
|
||||
image_creation_preempt_threshold: settings
|
||||
.remove("image_creation_preempt_threshold")
|
||||
.map(|x| x.parse::<usize>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'image_creation_preempt_threshold' as integer")?,
|
||||
pitr_interval: settings.remove("pitr_interval").map(|x| x.to_string()),
|
||||
walreceiver_connect_timeout: settings
|
||||
.remove("walreceiver_connect_timeout")
|
||||
|
||||
@@ -323,6 +323,10 @@ pub struct TenantConfigToml {
|
||||
// Expresed in multiples of checkpoint distance.
|
||||
pub image_layer_creation_check_threshold: u8,
|
||||
|
||||
// How many multiples of L0 `compaction_threshold` will preempt image layer creation and do L0 compaction.
|
||||
// Set to 0 to disable preemption.
|
||||
pub image_creation_preempt_threshold: usize,
|
||||
|
||||
/// The length for an explicit LSN lease request.
|
||||
/// Layers needed to reconstruct pages at LSN will not be GC-ed during this interval.
|
||||
#[serde(with = "humantime_serde")]
|
||||
@@ -547,6 +551,10 @@ pub mod tenant_conf_defaults {
|
||||
// Relevant: https://github.com/neondatabase/neon/issues/3394
|
||||
pub const DEFAULT_GC_PERIOD: &str = "1 hr";
|
||||
pub const DEFAULT_IMAGE_CREATION_THRESHOLD: usize = 3;
|
||||
// If there are more than threshold * compaction_threshold (that is 3 * 10 in the default config) L0 layers, image
|
||||
// layer creation will end immediately. Set to 0 to disable. The target default will be 3 once we
|
||||
// want to enable this feature.
|
||||
pub const DEFAULT_IMAGE_CREATION_PREEMPT_THRESHOLD: usize = 0;
|
||||
pub const DEFAULT_PITR_INTERVAL: &str = "7 days";
|
||||
pub const DEFAULT_WALRECEIVER_CONNECT_TIMEOUT: &str = "10 seconds";
|
||||
pub const DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT: &str = "10 seconds";
|
||||
@@ -605,6 +613,7 @@ impl Default for TenantConfigToml {
|
||||
lazy_slru_download: false,
|
||||
timeline_get_throttle: crate::models::ThrottleConfig::disabled(),
|
||||
image_layer_creation_check_threshold: DEFAULT_IMAGE_LAYER_CREATION_CHECK_THRESHOLD,
|
||||
image_creation_preempt_threshold: DEFAULT_IMAGE_CREATION_PREEMPT_THRESHOLD,
|
||||
lsn_lease_length: LsnLease::DEFAULT_LENGTH,
|
||||
lsn_lease_length_for_ts: LsnLease::DEFAULT_LENGTH_FOR_TS,
|
||||
timeline_offloading: false,
|
||||
|
||||
@@ -498,6 +498,8 @@ pub struct TenantConfigPatch {
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub image_layer_creation_check_threshold: FieldPatch<u8>,
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub image_creation_preempt_threshold: FieldPatch<usize>,
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub lsn_lease_length: FieldPatch<String>,
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub lsn_lease_length_for_ts: FieldPatch<String>,
|
||||
@@ -544,6 +546,7 @@ pub struct TenantConfig {
|
||||
pub lazy_slru_download: Option<bool>,
|
||||
pub timeline_get_throttle: Option<ThrottleConfig>,
|
||||
pub image_layer_creation_check_threshold: Option<u8>,
|
||||
pub image_creation_preempt_threshold: Option<usize>,
|
||||
pub lsn_lease_length: Option<String>,
|
||||
pub lsn_lease_length_for_ts: Option<String>,
|
||||
pub timeline_offloading: Option<bool>,
|
||||
@@ -581,6 +584,7 @@ impl TenantConfig {
|
||||
mut lazy_slru_download,
|
||||
mut timeline_get_throttle,
|
||||
mut image_layer_creation_check_threshold,
|
||||
mut image_creation_preempt_threshold,
|
||||
mut lsn_lease_length,
|
||||
mut lsn_lease_length_for_ts,
|
||||
mut timeline_offloading,
|
||||
@@ -635,6 +639,9 @@ impl TenantConfig {
|
||||
patch
|
||||
.image_layer_creation_check_threshold
|
||||
.apply(&mut image_layer_creation_check_threshold);
|
||||
patch
|
||||
.image_creation_preempt_threshold
|
||||
.apply(&mut image_creation_preempt_threshold);
|
||||
patch.lsn_lease_length.apply(&mut lsn_lease_length);
|
||||
patch
|
||||
.lsn_lease_length_for_ts
|
||||
@@ -679,6 +686,7 @@ impl TenantConfig {
|
||||
lazy_slru_download,
|
||||
timeline_get_throttle,
|
||||
image_layer_creation_check_threshold,
|
||||
image_creation_preempt_threshold,
|
||||
lsn_lease_length,
|
||||
lsn_lease_length_for_ts,
|
||||
timeline_offloading,
|
||||
|
||||
@@ -5486,6 +5486,9 @@ pub(crate) mod harness {
|
||||
image_layer_creation_check_threshold: Some(
|
||||
tenant_conf.image_layer_creation_check_threshold,
|
||||
),
|
||||
image_creation_preempt_threshold: Some(
|
||||
tenant_conf.image_creation_preempt_threshold,
|
||||
),
|
||||
lsn_lease_length: Some(tenant_conf.lsn_lease_length),
|
||||
lsn_lease_length_for_ts: Some(tenant_conf.lsn_lease_length_for_ts),
|
||||
timeline_offloading: Some(tenant_conf.timeline_offloading),
|
||||
|
||||
@@ -357,6 +357,9 @@ pub struct TenantConfOpt {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub image_layer_creation_check_threshold: Option<u8>,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub image_creation_preempt_threshold: Option<usize>,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
#[serde(with = "humantime_serde")]
|
||||
#[serde(default)]
|
||||
@@ -453,6 +456,9 @@ impl TenantConfOpt {
|
||||
image_layer_creation_check_threshold: self
|
||||
.image_layer_creation_check_threshold
|
||||
.unwrap_or(global_conf.image_layer_creation_check_threshold),
|
||||
image_creation_preempt_threshold: self
|
||||
.image_creation_preempt_threshold
|
||||
.unwrap_or(global_conf.image_creation_preempt_threshold),
|
||||
lsn_lease_length: self
|
||||
.lsn_lease_length
|
||||
.unwrap_or(global_conf.lsn_lease_length),
|
||||
@@ -504,6 +510,7 @@ impl TenantConfOpt {
|
||||
mut lazy_slru_download,
|
||||
mut timeline_get_throttle,
|
||||
mut image_layer_creation_check_threshold,
|
||||
mut image_creation_preempt_threshold,
|
||||
mut lsn_lease_length,
|
||||
mut lsn_lease_length_for_ts,
|
||||
mut timeline_offloading,
|
||||
@@ -578,6 +585,9 @@ impl TenantConfOpt {
|
||||
patch
|
||||
.image_layer_creation_check_threshold
|
||||
.apply(&mut image_layer_creation_check_threshold);
|
||||
patch
|
||||
.image_creation_preempt_threshold
|
||||
.apply(&mut image_creation_preempt_threshold);
|
||||
patch
|
||||
.lsn_lease_length
|
||||
.map(|v| humantime::parse_duration(&v))?
|
||||
@@ -626,6 +636,7 @@ impl TenantConfOpt {
|
||||
lazy_slru_download,
|
||||
timeline_get_throttle,
|
||||
image_layer_creation_check_threshold,
|
||||
image_creation_preempt_threshold,
|
||||
lsn_lease_length,
|
||||
lsn_lease_length_for_ts,
|
||||
timeline_offloading,
|
||||
@@ -689,6 +700,7 @@ impl From<TenantConfOpt> for models::TenantConfig {
|
||||
lazy_slru_download: value.lazy_slru_download,
|
||||
timeline_get_throttle: value.timeline_get_throttle,
|
||||
image_layer_creation_check_threshold: value.image_layer_creation_check_threshold,
|
||||
image_creation_preempt_threshold: value.image_creation_preempt_threshold,
|
||||
lsn_lease_length: value.lsn_lease_length.map(humantime),
|
||||
lsn_lease_length_for_ts: value.lsn_lease_length_for_ts.map(humantime),
|
||||
timeline_offloading: value.timeline_offloading,
|
||||
|
||||
@@ -166,6 +166,10 @@ impl BatchLayerWriter {
|
||||
// END: catch every error and do the recovery in the above section
|
||||
Ok(generated_layers)
|
||||
}
|
||||
|
||||
pub fn pending_layer_num(&self) -> usize {
|
||||
self.generated_layer_writers.len()
|
||||
}
|
||||
}
|
||||
|
||||
/// An image writer that takes images and produces multiple image layers.
|
||||
|
||||
@@ -189,6 +189,14 @@ pub enum ImageLayerCreationMode {
|
||||
Initial,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub enum LastImageLayerCreationStatus {
|
||||
Incomplete, // TODO: record the last key being processed
|
||||
Complete,
|
||||
#[default]
|
||||
Initial,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for ImageLayerCreationMode {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{:?}", self)
|
||||
@@ -347,6 +355,8 @@ pub struct Timeline {
|
||||
// garbage collecting data that is still needed by the child timelines.
|
||||
pub(crate) gc_info: std::sync::RwLock<GcInfo>,
|
||||
|
||||
pub(crate) last_image_layer_creation_status: ArcSwap<LastImageLayerCreationStatus>,
|
||||
|
||||
// It may change across major versions so for simplicity
|
||||
// keep it after running initdb for a timeline.
|
||||
// It is needed in checks when we want to error on some operations
|
||||
@@ -936,9 +946,16 @@ pub(crate) enum ShutdownMode {
|
||||
Hard,
|
||||
}
|
||||
|
||||
struct ImageLayerCreationOutcome {
|
||||
unfinished_image_layer: Option<ImageLayerWriter>,
|
||||
next_start_key: Key,
|
||||
enum ImageLayerCreationOutcome {
|
||||
/// We generated an image layer
|
||||
Generated {
|
||||
unfinished_image_layer: ImageLayerWriter,
|
||||
},
|
||||
/// The key range is empty
|
||||
Empty,
|
||||
/// (Only used in metadata image layer creation), after reading the metadata keys, we decide to skip
|
||||
/// the image layer creation.
|
||||
Skip,
|
||||
}
|
||||
|
||||
/// Public interface functions
|
||||
@@ -2349,6 +2366,18 @@ impl Timeline {
|
||||
)
|
||||
}
|
||||
|
||||
fn get_image_creation_preempt_threshold(&self) -> usize {
|
||||
let tenant_conf = self.tenant_conf.load();
|
||||
tenant_conf
|
||||
.tenant_conf
|
||||
.image_creation_preempt_threshold
|
||||
.unwrap_or(
|
||||
self.conf
|
||||
.default_tenant_conf
|
||||
.image_creation_preempt_threshold,
|
||||
)
|
||||
}
|
||||
|
||||
/// Resolve the effective WAL receiver protocol to use for this tenant.
|
||||
///
|
||||
/// Priority order is:
|
||||
@@ -2499,6 +2528,10 @@ impl Timeline {
|
||||
|
||||
gc_info: std::sync::RwLock::new(GcInfo::default()),
|
||||
|
||||
last_image_layer_creation_status: ArcSwap::new(Arc::new(
|
||||
LastImageLayerCreationStatus::default(),
|
||||
)),
|
||||
|
||||
latest_gc_cutoff_lsn: Rcu::new(metadata.latest_gc_cutoff_lsn()),
|
||||
initdb_lsn: metadata.initdb_lsn(),
|
||||
|
||||
@@ -4042,15 +4075,20 @@ impl Timeline {
|
||||
}
|
||||
|
||||
let mut layers_to_upload = Vec::new();
|
||||
layers_to_upload.extend(
|
||||
self.create_image_layers(
|
||||
let (generated_image_layers, is_complete) = self
|
||||
.create_image_layers(
|
||||
&partitions,
|
||||
self.initdb_lsn,
|
||||
ImageLayerCreationMode::Initial,
|
||||
ctx,
|
||||
LastImageLayerCreationStatus::Initial,
|
||||
)
|
||||
.await?,
|
||||
.await?;
|
||||
debug_assert!(
|
||||
matches!(is_complete, LastImageLayerCreationStatus::Complete),
|
||||
"init image generation mode must fully cover the keyspace"
|
||||
);
|
||||
layers_to_upload.extend(generated_image_layers);
|
||||
|
||||
(layers_to_upload, None)
|
||||
} else {
|
||||
@@ -4370,7 +4408,6 @@ impl Timeline {
|
||||
lsn: Lsn,
|
||||
ctx: &RequestContext,
|
||||
img_range: Range<Key>,
|
||||
start: Key,
|
||||
io_concurrency: IoConcurrency,
|
||||
) -> Result<ImageLayerCreationOutcome, CreateImageLayersError> {
|
||||
let mut wrote_keys = false;
|
||||
@@ -4458,26 +4495,23 @@ impl Timeline {
|
||||
lsn
|
||||
},
|
||||
);
|
||||
Ok(ImageLayerCreationOutcome {
|
||||
unfinished_image_layer: Some(image_layer_writer),
|
||||
next_start_key: img_range.end,
|
||||
Ok(ImageLayerCreationOutcome::Generated {
|
||||
unfinished_image_layer: image_layer_writer,
|
||||
})
|
||||
} else {
|
||||
// Special case: the image layer may be empty if this is a sharded tenant and the
|
||||
// partition does not cover any keys owned by this shard. In this case, to ensure
|
||||
// we don't leave gaps between image layers, leave `start` where it is, so that the next
|
||||
// layer we write will cover the key range that we just scanned.
|
||||
tracing::debug!("no data in range {}-{}", img_range.start, img_range.end);
|
||||
Ok(ImageLayerCreationOutcome {
|
||||
unfinished_image_layer: None,
|
||||
next_start_key: start,
|
||||
})
|
||||
Ok(ImageLayerCreationOutcome::Empty)
|
||||
}
|
||||
}
|
||||
|
||||
/// Create an image layer for metadata keys. This function produces one image layer for all metadata
|
||||
/// keys for now. Because metadata keys cannot exceed basebackup size limit, the image layer for it
|
||||
/// would not be too large to fit in a single image layer.
|
||||
///
|
||||
/// Creating image layers for metadata keys are different from relational keys. Firstly, instead of
|
||||
/// iterating each key and get an image for each of them, we do a `vectored_get` scan over the sparse
|
||||
/// keyspace to get all images in one run. Secondly, we use a different image layer generation metrics
|
||||
/// for metadata keys than relational keys, which is the number of delta files visited during the scan.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
async fn create_image_layer_for_metadata_keys(
|
||||
self: &Arc<Self>,
|
||||
@@ -4487,12 +4521,13 @@ impl Timeline {
|
||||
ctx: &RequestContext,
|
||||
img_range: Range<Key>,
|
||||
mode: ImageLayerCreationMode,
|
||||
start: Key,
|
||||
io_concurrency: IoConcurrency,
|
||||
) -> Result<ImageLayerCreationOutcome, CreateImageLayersError> {
|
||||
// Metadata keys image layer creation.
|
||||
let mut reconstruct_state = ValuesReconstructState::new(io_concurrency);
|
||||
let begin = Instant::now();
|
||||
// Directly use `get_vectored_impl` to skip the max_vectored_read_key limit check. Note that the keyspace should
|
||||
// not contain too many keys, otherwise this takes a lot of memory.
|
||||
let data = self
|
||||
.get_vectored_impl(partition.clone(), lsn, &mut reconstruct_state, ctx)
|
||||
.await?;
|
||||
@@ -4517,10 +4552,7 @@ impl Timeline {
|
||||
);
|
||||
|
||||
if !trigger_generation && mode == ImageLayerCreationMode::Try {
|
||||
return Ok(ImageLayerCreationOutcome {
|
||||
unfinished_image_layer: None,
|
||||
next_start_key: img_range.end,
|
||||
});
|
||||
return Ok(ImageLayerCreationOutcome::Skip);
|
||||
}
|
||||
if self.cancel.is_cancelled() {
|
||||
return Err(CreateImageLayersError::Cancelled);
|
||||
@@ -4551,20 +4583,12 @@ impl Timeline {
|
||||
lsn
|
||||
}
|
||||
);
|
||||
Ok(ImageLayerCreationOutcome {
|
||||
unfinished_image_layer: Some(image_layer_writer),
|
||||
next_start_key: img_range.end,
|
||||
Ok(ImageLayerCreationOutcome::Generated {
|
||||
unfinished_image_layer: image_layer_writer,
|
||||
})
|
||||
} else {
|
||||
// Special case: the image layer may be empty if this is a sharded tenant and the
|
||||
// partition does not cover any keys owned by this shard. In this case, to ensure
|
||||
// we don't leave gaps between image layers, leave `start` where it is, so that the next
|
||||
// layer we write will cover the key range that we just scanned.
|
||||
tracing::debug!("no data in range {}-{}", img_range.start, img_range.end);
|
||||
Ok(ImageLayerCreationOutcome {
|
||||
unfinished_image_layer: None,
|
||||
next_start_key: start,
|
||||
})
|
||||
Ok(ImageLayerCreationOutcome::Empty)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4620,6 +4644,8 @@ impl Timeline {
|
||||
decision
|
||||
}
|
||||
|
||||
/// Returns the image layers generated and an enum indicating whether the process is fully completed.
|
||||
/// true = we have generate all image layers, false = we preempt the process for L0 compaction.
|
||||
#[tracing::instrument(skip_all, fields(%lsn, %mode))]
|
||||
async fn create_image_layers(
|
||||
self: &Arc<Timeline>,
|
||||
@@ -4627,7 +4653,8 @@ impl Timeline {
|
||||
lsn: Lsn,
|
||||
mode: ImageLayerCreationMode,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<Vec<ResidentLayer>, CreateImageLayersError> {
|
||||
last_status: LastImageLayerCreationStatus,
|
||||
) -> Result<(Vec<ResidentLayer>, LastImageLayerCreationStatus), CreateImageLayersError> {
|
||||
let timer = self.metrics.create_images_time_histo.start_timer();
|
||||
|
||||
// We need to avoid holes between generated image layers.
|
||||
@@ -4641,10 +4668,23 @@ impl Timeline {
|
||||
// image layers <100000000..100000099> and <200000000..200000199> are not completely covering it.
|
||||
let mut start = Key::MIN;
|
||||
|
||||
let check_for_image_layers = self.should_check_if_image_layers_required(lsn);
|
||||
let check_for_image_layers = if let LastImageLayerCreationStatus::Incomplete = last_status {
|
||||
info!(
|
||||
"resuming image layer creation: last_status={:?}",
|
||||
last_status
|
||||
);
|
||||
true
|
||||
} else {
|
||||
self.should_check_if_image_layers_required(lsn)
|
||||
};
|
||||
|
||||
let mut batch_image_writer = BatchLayerWriter::new(self.conf).await?;
|
||||
|
||||
let mut all_generated = true;
|
||||
|
||||
let mut partition_processed = 0;
|
||||
let total_partitions = partitioning.parts.len();
|
||||
|
||||
for partition in partitioning.parts.iter() {
|
||||
if self.cancel.is_cancelled() {
|
||||
return Err(CreateImageLayersError::Cancelled);
|
||||
@@ -4717,17 +4757,13 @@ impl Timeline {
|
||||
.map_err(|_| CreateImageLayersError::Cancelled)?,
|
||||
);
|
||||
|
||||
let ImageLayerCreationOutcome {
|
||||
unfinished_image_layer,
|
||||
next_start_key,
|
||||
} = if !compact_metadata {
|
||||
let outcome = if !compact_metadata {
|
||||
self.create_image_layer_for_rel_blocks(
|
||||
partition,
|
||||
image_layer_writer,
|
||||
lsn,
|
||||
ctx,
|
||||
img_range.clone(),
|
||||
start,
|
||||
io_concurrency,
|
||||
)
|
||||
.await?
|
||||
@@ -4739,18 +4775,58 @@ impl Timeline {
|
||||
ctx,
|
||||
img_range.clone(),
|
||||
mode,
|
||||
start,
|
||||
io_concurrency,
|
||||
)
|
||||
.await?
|
||||
};
|
||||
start = next_start_key;
|
||||
if let Some(unfinished_image_layer) = unfinished_image_layer {
|
||||
batch_image_writer.add_unfinished_image_writer(
|
||||
match outcome {
|
||||
ImageLayerCreationOutcome::Empty => {
|
||||
// No data in this partition, so we don't need to create an image layer (for now).
|
||||
// The next image layer should cover this key range, so we don't advance the `start`
|
||||
// key.
|
||||
}
|
||||
ImageLayerCreationOutcome::Generated {
|
||||
unfinished_image_layer,
|
||||
img_range,
|
||||
lsn,
|
||||
);
|
||||
} => {
|
||||
batch_image_writer.add_unfinished_image_writer(
|
||||
unfinished_image_layer,
|
||||
img_range.clone(),
|
||||
lsn,
|
||||
);
|
||||
// The next image layer should be generated right after this one.
|
||||
start = img_range.end;
|
||||
}
|
||||
ImageLayerCreationOutcome::Skip => {
|
||||
// We don't need to create an image layer for this partition.
|
||||
// The next image layer should NOT cover this range, otherwise
|
||||
// the keyspace becomes empty (reads don't go past image layers).
|
||||
start = img_range.end;
|
||||
}
|
||||
}
|
||||
|
||||
partition_processed += 1;
|
||||
|
||||
if let ImageLayerCreationMode::Try = mode {
|
||||
// We have at least made some progress
|
||||
if batch_image_writer.pending_layer_num() >= 1 {
|
||||
// The `Try` mode is currently only used on the compaction path. We want to avoid
|
||||
// image layer generation taking too long time and blocking L0 compaction. So in this
|
||||
// mode, we also inspect the current number of L0 layers and skip image layer generation
|
||||
// if there are too many of them.
|
||||
let num_of_l0_layers = {
|
||||
let layers = self.layers.read().await;
|
||||
layers.layer_map()?.level0_deltas().len()
|
||||
};
|
||||
let image_preempt_threshold = self.get_image_creation_preempt_threshold()
|
||||
* self.get_compaction_threshold();
|
||||
if image_preempt_threshold != 0 && num_of_l0_layers >= image_preempt_threshold {
|
||||
tracing::info!(
|
||||
"preempt image layer generation at {start} at {lsn}: too many L0 layers {num_of_l0_layers}",
|
||||
);
|
||||
all_generated = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4765,14 +4841,35 @@ impl Timeline {
|
||||
.open_mut()?
|
||||
.track_new_image_layers(&image_layers, &self.metrics);
|
||||
drop_wlock(guard);
|
||||
timer.stop_and_record();
|
||||
let duration = timer.stop_and_record();
|
||||
|
||||
// Creating image layers may have caused some previously visible layers to be covered
|
||||
if !image_layers.is_empty() {
|
||||
self.update_layer_visibility().await?;
|
||||
}
|
||||
|
||||
Ok(image_layers)
|
||||
let total_layer_size = image_layers
|
||||
.iter()
|
||||
.map(|l| l.metadata().file_size)
|
||||
.sum::<u64>();
|
||||
|
||||
info!(
|
||||
"created {} image layers ({} bytes) in {}s, processed {} out of {} partitions",
|
||||
image_layers.len(),
|
||||
total_layer_size,
|
||||
duration.as_secs_f64(),
|
||||
partition_processed,
|
||||
total_partitions
|
||||
);
|
||||
|
||||
Ok((
|
||||
image_layers,
|
||||
if all_generated {
|
||||
LastImageLayerCreationStatus::Complete
|
||||
} else {
|
||||
LastImageLayerCreationStatus::Incomplete
|
||||
},
|
||||
))
|
||||
}
|
||||
|
||||
/// Wait until the background initial logical size calculation is complete, or
|
||||
|
||||
@@ -11,7 +11,7 @@ use std::sync::Arc;
|
||||
use super::layer_manager::LayerManager;
|
||||
use super::{
|
||||
CompactFlags, CompactOptions, CreateImageLayersError, DurationRecorder, ImageLayerCreationMode,
|
||||
RecordedDuration, Timeline,
|
||||
LastImageLayerCreationStatus, RecordedDuration, Timeline,
|
||||
};
|
||||
|
||||
use anyhow::{anyhow, bail, Context};
|
||||
@@ -709,7 +709,7 @@ impl Timeline {
|
||||
.extend(sparse_partitioning.into_dense().parts);
|
||||
|
||||
// 3. Create new image layers for partitions that have been modified "enough".
|
||||
let image_layers = self
|
||||
let (image_layers, outcome) = self
|
||||
.create_image_layers(
|
||||
&partitioning,
|
||||
lsn,
|
||||
@@ -722,10 +722,22 @@ impl Timeline {
|
||||
ImageLayerCreationMode::Try
|
||||
},
|
||||
&image_ctx,
|
||||
self.last_image_layer_creation_status
|
||||
.load()
|
||||
.as_ref()
|
||||
.clone(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
self.last_image_layer_creation_status
|
||||
.store(Arc::new(outcome.clone()));
|
||||
|
||||
self.upload_new_image_layers(image_layers)?;
|
||||
if let LastImageLayerCreationStatus::Incomplete = outcome {
|
||||
// Yield and do not do any other kind of compaction.
|
||||
info!("skipping shard ancestor compaction due to pending image layer generation tasks (preempted by L0 compaction).");
|
||||
return Ok(true);
|
||||
}
|
||||
partitioning.parts.len()
|
||||
}
|
||||
Err(err) => {
|
||||
@@ -3232,11 +3244,7 @@ impl TimelineAdaptor {
|
||||
ranges: self.get_keyspace(key_range, lsn, ctx).await?,
|
||||
};
|
||||
// TODO set proper (stateful) start. The create_image_layer_for_rel_blocks function mostly
|
||||
let start = Key::MIN;
|
||||
let ImageLayerCreationOutcome {
|
||||
unfinished_image_layer,
|
||||
next_start_key: _,
|
||||
} = self
|
||||
let outcome = self
|
||||
.timeline
|
||||
.create_image_layer_for_rel_blocks(
|
||||
&keyspace,
|
||||
@@ -3244,13 +3252,15 @@ impl TimelineAdaptor {
|
||||
lsn,
|
||||
ctx,
|
||||
key_range.clone(),
|
||||
start,
|
||||
IoConcurrency::sequential(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
if let Some(image_layer_writer) = unfinished_image_layer {
|
||||
let (desc, path) = image_layer_writer.finish(ctx).await?;
|
||||
if let ImageLayerCreationOutcome::Generated {
|
||||
unfinished_image_layer,
|
||||
} = outcome
|
||||
{
|
||||
let (desc, path) = unfinished_image_layer.finish(ctx).await?;
|
||||
let image_layer =
|
||||
Layer::finish_creating(self.timeline.conf, &self.timeline, desc, &path)?;
|
||||
self.new_images.push(image_layer);
|
||||
|
||||
@@ -184,6 +184,7 @@ def test_fully_custom_config(positive_env: NeonEnv):
|
||||
"gc_compaction_enabled": True,
|
||||
"gc_compaction_initial_threshold_kb": 1024000,
|
||||
"gc_compaction_ratio_percent": 200,
|
||||
"image_creation_preempt_threshold": 5,
|
||||
}
|
||||
|
||||
vps_http = env.storage_controller.pageserver_api()
|
||||
|
||||
Reference in New Issue
Block a user