feat(pageserver): revisit error types for gc-compaction (#11082)

## Problem

part of https://github.com/neondatabase/neon/issues/9114

We used anyhow::Error everywhere and it's time to fix.

## Summary of changes

* Make sure that cancel errors are correctly propagated as
CompactionError::ShuttingDown.
* Skip all the trigger computation work if gc_cutoff is not generated
yet.

---------

Signed-off-by: Alex Chi Z <chi@neon.tech>
This commit is contained in:
Alex Chi Z.
2025-03-05 10:57:38 -05:00
committed by GitHub
parent 2d45522fa6
commit 9cdc8c0e6c

View File

@@ -15,7 +15,7 @@ use super::{
Timeline,
};
use anyhow::{Context, anyhow, bail};
use anyhow::{Context, anyhow};
use bytes::Bytes;
use enumset::EnumSet;
use fail::fail_point;
@@ -234,6 +234,12 @@ impl GcCompactionQueue {
// it enough in staging yet.
return Ok(());
}
if timeline.get_gc_compaction_watermark() == Lsn::INVALID {
// If the gc watermark is not set, we don't need to trigger auto compaction.
// This check is the same as in `gc_compaction_split_jobs` but we don't log
// here and we can also skip the computation of the trigger condition earlier.
return Ok(());
}
let Ok(permit) = CONCURRENT_GC_COMPACTION_TASKS.clone().try_acquire_owned() else {
// Only allow one compaction run at a time. TODO: As we do `try_acquire_owned`, we cannot ensure
@@ -357,8 +363,7 @@ impl GcCompactionQueue {
GcCompactJob::from_compact_options(options.clone()),
options.sub_compaction_max_job_size_mb,
)
.await
.map_err(CompactionError::Other)?;
.await?;
if jobs.is_empty() {
info!("no jobs to run, skipping scheduled compaction task");
self.notify_and_unblock(id);
@@ -825,9 +830,7 @@ impl Timeline {
.flags
.contains(CompactFlags::EnhancedGcBottomMostCompaction)
{
self.compact_with_gc(cancel, options, ctx)
.await
.map_err(CompactionError::Other)?;
self.compact_with_gc(cancel, options, ctx).await?;
return Ok(CompactionOutcome::Done);
}
@@ -2345,12 +2348,19 @@ impl Timeline {
async fn check_compaction_space(
self: &Arc<Self>,
layer_selection: &[Layer],
) -> anyhow::Result<()> {
let available_space = self.check_available_space().await?;
) -> Result<(), CompactionError> {
let available_space = self
.check_available_space()
.await
.map_err(CompactionError::Other)?;
let mut remote_layer_size = 0;
let mut all_layer_size = 0;
for layer in layer_selection {
let needs_download = layer.needs_download().await?;
let needs_download = layer
.needs_download()
.await
.context("failed to check if layer needs download")
.map_err(CompactionError::Other)?;
if needs_download.is_some() {
remote_layer_size += layer.layer_desc().file_size;
}
@@ -2359,14 +2369,14 @@ impl Timeline {
let allocated_space = (available_space as f64 * 0.8) as u64; /* reserve 20% space for other tasks */
if all_layer_size /* space needed for newly-generated file */ + remote_layer_size /* space for downloading layers */ > allocated_space
{
return Err(anyhow!(
return Err(CompactionError::Other(anyhow!(
"not enough space for compaction: available_space={}, allocated_space={}, all_layer_size={}, remote_layer_size={}, required_space={}",
available_space,
allocated_space,
all_layer_size,
remote_layer_size,
all_layer_size + remote_layer_size
));
)));
}
Ok(())
}
@@ -2397,7 +2407,7 @@ impl Timeline {
self: &Arc<Self>,
job: GcCompactJob,
sub_compaction_max_job_size_mb: Option<u64>,
) -> anyhow::Result<Vec<GcCompactJob>> {
) -> Result<Vec<GcCompactJob>, CompactionError> {
let compact_below_lsn = if job.compact_lsn_range.end != Lsn::MAX {
job.compact_lsn_range.end
} else {
@@ -2548,7 +2558,7 @@ impl Timeline {
cancel: &CancellationToken,
options: CompactOptions,
ctx: &RequestContext,
) -> anyhow::Result<()> {
) -> Result<(), CompactionError> {
let sub_compaction = options.sub_compaction;
let job = GcCompactJob::from_compact_options(options.clone());
if sub_compaction {
@@ -2580,7 +2590,7 @@ impl Timeline {
cancel: &CancellationToken,
job: GcCompactJob,
ctx: &RequestContext,
) -> anyhow::Result<()> {
) -> Result<(), CompactionError> {
// Block other compaction/GC tasks from running for now. GC-compaction could run along
// with legacy compaction tasks in the future. Always ensure the lock order is compaction -> gc.
// Note that we already acquired the compaction lock when the outer `compact` function gets called.
@@ -2588,8 +2598,7 @@ impl Timeline {
let gc_lock = async {
tokio::select! {
guard = self.gc_lock.lock() => Ok(guard),
// TODO: refactor to CompactionError to correctly pass cancelled error
_ = cancel.cancelled() => Err(anyhow!("cancelled")),
_ = cancel.cancelled() => Err(CompactionError::ShuttingDown),
}
};
@@ -2810,10 +2819,10 @@ impl Timeline {
.map(|layer| layer.layer_desc().layer_name())
.collect_vec();
if let Some(err) = check_valid_layermap(&layer_names) {
bail!(
return Err(CompactionError::Other(anyhow!(
"gc-compaction layer map check failed because {}, cannot proceed with compaction due to potential data loss",
err
);
)));
}
// The maximum LSN we are processing in this compaction loop
let end_lsn = job_desc
@@ -2828,11 +2837,24 @@ impl Timeline {
let mut total_downloaded_size = 0;
let mut total_layer_size = 0;
for layer in &job_desc.selected_layers {
if layer.needs_download().await?.is_some() {
if layer
.needs_download()
.await
.context("failed to check if layer needs download")
.map_err(CompactionError::Other)?
.is_some()
{
total_downloaded_size += layer.layer_desc().file_size;
}
total_layer_size += layer.layer_desc().file_size;
let resident_layer = layer.download_and_keep_resident(ctx).await?;
if cancel.is_cancelled() {
return Err(CompactionError::ShuttingDown);
}
let resident_layer = layer
.download_and_keep_resident(ctx)
.await
.context("failed to download and keep resident layer")
.map_err(CompactionError::Other)?;
downloaded_layers.push(resident_layer);
}
info!(
@@ -2843,19 +2865,33 @@ impl Timeline {
);
for resident_layer in &downloaded_layers {
if resident_layer.layer_desc().is_delta() {
let layer = resident_layer.get_as_delta(ctx).await?;
let layer = resident_layer
.get_as_delta(ctx)
.await
.context("failed to get delta layer")
.map_err(CompactionError::Other)?;
delta_layers.push(layer);
} else {
let layer = resident_layer.get_as_image(ctx).await?;
let layer = resident_layer
.get_as_image(ctx)
.await
.context("failed to get image layer")
.map_err(CompactionError::Other)?;
image_layers.push(layer);
}
}
let (dense_ks, sparse_ks) = self.collect_gc_compaction_keyspace().await?;
let (dense_ks, sparse_ks) = self
.collect_gc_compaction_keyspace()
.await
.context("failed to collect gc compaction keyspace")
.map_err(CompactionError::Other)?;
let mut merge_iter = FilterIterator::create(
MergeIterator::create(&delta_layers, &image_layers, ctx),
dense_ks,
sparse_ks,
)?;
)
.context("failed to create filter iterator")
.map_err(CompactionError::Other)?;
// Step 2: Produce images+deltas.
let mut accumulated_values = Vec::new();
@@ -2874,7 +2910,9 @@ impl Timeline {
self.get_compaction_target_size(),
ctx,
)
.await?,
.await
.context("failed to create image layer writer")
.map_err(CompactionError::Other)?,
)
} else {
None
@@ -2887,7 +2925,9 @@ impl Timeline {
lowest_retain_lsn..end_lsn,
self.get_compaction_target_size(),
)
.await?;
.await
.context("failed to create delta layer writer")
.map_err(CompactionError::Other)?;
#[derive(Default)]
struct RewritingLayers {
@@ -2927,9 +2967,14 @@ impl Timeline {
// the key and LSN range are determined. However, to keep things simple here, we still
// create this writer, and discard the writer in the end.
while let Some(((key, lsn, val), desc)) = merge_iter.next_with_trace().await? {
while let Some(((key, lsn, val), desc)) = merge_iter
.next_with_trace()
.await
.context("failed to get next key-value pair")
.map_err(CompactionError::Other)?
{
if cancel.is_cancelled() {
return Err(anyhow!("cancelled")); // TODO: refactor to CompactionError and pass cancel error
return Err(CompactionError::ShuttingDown);
}
if self.shard_identity.is_key_disposable(&key) {
// If this shard does not need to store this key, simply skip it.
@@ -2960,7 +3005,9 @@ impl Timeline {
desc.lsn_range.clone(),
ctx,
)
.await?,
.await
.context("failed to create delta layer writer")
.map_err(CompactionError::Other)?,
);
}
rewriter.before.as_mut().unwrap()
@@ -2975,14 +3022,20 @@ impl Timeline {
desc.lsn_range.clone(),
ctx,
)
.await?,
.await
.context("failed to create delta layer writer")
.map_err(CompactionError::Other)?,
);
}
rewriter.after.as_mut().unwrap()
} else {
unreachable!()
};
rewriter.put_value(key, lsn, val, ctx).await?;
rewriter
.put_value(key, lsn, val, ctx)
.await
.context("failed to put value")
.map_err(CompactionError::Other)?;
continue;
}
match val {
@@ -3005,9 +3058,13 @@ impl Timeline {
&job_desc.retain_lsns_below_horizon,
COMPACTION_DELTA_THRESHOLD,
get_ancestor_image(self, *last_key, ctx, has_data_below, lowest_retain_lsn)
.await?,
.await
.context("failed to get ancestor image")
.map_err(CompactionError::Other)?,
)
.await?;
.await
.context("failed to generate key retention")
.map_err(CompactionError::Other)?;
retention
.pipe_to(
*last_key,
@@ -3016,7 +3073,9 @@ impl Timeline {
&mut stat,
ctx,
)
.await?;
.await
.context("failed to pipe to delta layer writer")
.map_err(CompactionError::Other)?;
accumulated_values.clear();
*last_key = key;
accumulated_values.push((key, lsn, val));
@@ -3034,9 +3093,14 @@ impl Timeline {
job_desc.gc_cutoff,
&job_desc.retain_lsns_below_horizon,
COMPACTION_DELTA_THRESHOLD,
get_ancestor_image(self, last_key, ctx, has_data_below, lowest_retain_lsn).await?,
get_ancestor_image(self, last_key, ctx, has_data_below, lowest_retain_lsn)
.await
.context("failed to get ancestor image")
.map_err(CompactionError::Other)?,
)
.await?;
.await
.context("failed to generate key retention")
.map_err(CompactionError::Other)?;
retention
.pipe_to(
last_key,
@@ -3045,7 +3109,9 @@ impl Timeline {
&mut stat,
ctx,
)
.await?;
.await
.context("failed to pipe to delta layer writer")
.map_err(CompactionError::Other)?;
// end: move the above part to the loop body
let mut rewrote_delta_layers = Vec::new();
@@ -3053,13 +3119,23 @@ impl Timeline {
if let Some(delta_writer_before) = writers.before {
let (desc, path) = delta_writer_before
.finish(job_desc.compaction_key_range.start, ctx)
.await?;
let layer = Layer::finish_creating(self.conf, self, desc, &path)?;
.await
.context("failed to finish delta layer writer")
.map_err(CompactionError::Other)?;
let layer = Layer::finish_creating(self.conf, self, desc, &path)
.context("failed to finish creating delta layer")
.map_err(CompactionError::Other)?;
rewrote_delta_layers.push(layer);
}
if let Some(delta_writer_after) = writers.after {
let (desc, path) = delta_writer_after.finish(key.key_range.end, ctx).await?;
let layer = Layer::finish_creating(self.conf, self, desc, &path)?;
let (desc, path) = delta_writer_after
.finish(key.key_range.end, ctx)
.await
.context("failed to finish delta layer writer")
.map_err(CompactionError::Other)?;
let layer = Layer::finish_creating(self.conf, self, desc, &path)
.context("failed to finish creating delta layer")
.map_err(CompactionError::Other)?;
rewrote_delta_layers.push(layer);
}
}
@@ -3074,7 +3150,9 @@ impl Timeline {
let end_key = job_desc.compaction_key_range.end;
writer
.finish_with_discard_fn(self, ctx, end_key, discard)
.await?
.await
.context("failed to finish image layer writer")
.map_err(CompactionError::Other)?
} else {
drop(writer);
Vec::new()
@@ -3086,7 +3164,9 @@ impl Timeline {
let produced_delta_layers = if !dry_run {
delta_layer_writer
.finish_with_discard_fn(self, ctx, discard)
.await?
.await
.context("failed to finish delta layer writer")
.map_err(CompactionError::Other)?
} else {
drop(delta_layer_writer);
Vec::new()
@@ -3166,7 +3246,9 @@ impl Timeline {
&layer.layer_desc().key_range,
&job_desc.compaction_key_range,
) {
bail!("violated constraint: image layer outside of compaction key range");
return Err(CompactionError::Other(anyhow!(
"violated constraint: image layer outside of compaction key range"
)));
}
if !fully_contains(
&job_desc.compaction_key_range,
@@ -3181,7 +3263,9 @@ impl Timeline {
info!(
"gc-compaction statistics: {}",
serde_json::to_string(&stat)?
serde_json::to_string(&stat)
.context("failed to serialize gc-compaction statistics")
.map_err(CompactionError::Other)?
);
if dry_run {
@@ -3220,10 +3304,10 @@ impl Timeline {
// the writer, so potentially, we will need a function like `ImageLayerBatchWriter::get_all_pending_layer_keys` to get all the keys that are
// in the writer before finalizing the persistent layers. Now we would leave some dangling layers on the disk if the check fails.
if let Some(err) = check_valid_layermap(&final_layers) {
bail!(
return Err(CompactionError::Other(anyhow!(
"gc-compaction layer map check failed after compaction because {}, compaction result not applied to the layer map due to potential data loss",
err
);
)));
}
// Between the sanity check and this compaction update, there could be new layers being flushed, but it should be fine because we only
@@ -3275,7 +3359,9 @@ impl Timeline {
// find_gc_cutoffs will try accessing things below the cutoff. TODO: ideally, this should
// be batched into `schedule_compaction_update`.
let disk_consistent_lsn = self.disk_consistent_lsn.load();
self.schedule_uploads(disk_consistent_lsn, None)?;
self.schedule_uploads(disk_consistent_lsn, None)
.context("failed to schedule uploads")
.map_err(CompactionError::Other)?;
// If a layer gets rewritten throughout gc-compaction, we need to keep that layer only in `compact_to` instead
// of `compact_from`.
let compact_from = {