From 9cdc8c0e6c7adf9bf31ec3cff6f8a978833e528a Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com> Date: Wed, 5 Mar 2025 10:57:38 -0500 Subject: [PATCH] feat(pageserver): revisit error types for gc-compaction (#11082) ## Problem part of https://github.com/neondatabase/neon/issues/9114 We used anyhow::Error everywhere and it's time to fix. ## Summary of changes * Make sure that cancel errors are correctly propagated as CompactionError::ShuttingDown. * Skip all the trigger computation work if gc_cutoff is not generated yet. --------- Signed-off-by: Alex Chi Z --- pageserver/src/tenant/timeline/compaction.rs | 182 ++++++++++++++----- 1 file changed, 134 insertions(+), 48 deletions(-) diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs index 76c28e11ab..17f7d96e5e 100644 --- a/pageserver/src/tenant/timeline/compaction.rs +++ b/pageserver/src/tenant/timeline/compaction.rs @@ -15,7 +15,7 @@ use super::{ Timeline, }; -use anyhow::{Context, anyhow, bail}; +use anyhow::{Context, anyhow}; use bytes::Bytes; use enumset::EnumSet; use fail::fail_point; @@ -234,6 +234,12 @@ impl GcCompactionQueue { // it enough in staging yet. return Ok(()); } + if timeline.get_gc_compaction_watermark() == Lsn::INVALID { + // If the gc watermark is not set, we don't need to trigger auto compaction. + // This check is the same as in `gc_compaction_split_jobs` but we don't log + // here and we can also skip the computation of the trigger condition earlier. + return Ok(()); + } let Ok(permit) = CONCURRENT_GC_COMPACTION_TASKS.clone().try_acquire_owned() else { // Only allow one compaction run at a time. TODO: As we do `try_acquire_owned`, we cannot ensure @@ -357,8 +363,7 @@ impl GcCompactionQueue { GcCompactJob::from_compact_options(options.clone()), options.sub_compaction_max_job_size_mb, ) - .await - .map_err(CompactionError::Other)?; + .await?; if jobs.is_empty() { info!("no jobs to run, skipping scheduled compaction task"); self.notify_and_unblock(id); @@ -825,9 +830,7 @@ impl Timeline { .flags .contains(CompactFlags::EnhancedGcBottomMostCompaction) { - self.compact_with_gc(cancel, options, ctx) - .await - .map_err(CompactionError::Other)?; + self.compact_with_gc(cancel, options, ctx).await?; return Ok(CompactionOutcome::Done); } @@ -2345,12 +2348,19 @@ impl Timeline { async fn check_compaction_space( self: &Arc, layer_selection: &[Layer], - ) -> anyhow::Result<()> { - let available_space = self.check_available_space().await?; + ) -> Result<(), CompactionError> { + let available_space = self + .check_available_space() + .await + .map_err(CompactionError::Other)?; let mut remote_layer_size = 0; let mut all_layer_size = 0; for layer in layer_selection { - let needs_download = layer.needs_download().await?; + let needs_download = layer + .needs_download() + .await + .context("failed to check if layer needs download") + .map_err(CompactionError::Other)?; if needs_download.is_some() { remote_layer_size += layer.layer_desc().file_size; } @@ -2359,14 +2369,14 @@ impl Timeline { let allocated_space = (available_space as f64 * 0.8) as u64; /* reserve 20% space for other tasks */ if all_layer_size /* space needed for newly-generated file */ + remote_layer_size /* space for downloading layers */ > allocated_space { - return Err(anyhow!( + return Err(CompactionError::Other(anyhow!( "not enough space for compaction: available_space={}, allocated_space={}, all_layer_size={}, remote_layer_size={}, required_space={}", available_space, allocated_space, all_layer_size, remote_layer_size, all_layer_size + remote_layer_size - )); + ))); } Ok(()) } @@ -2397,7 +2407,7 @@ impl Timeline { self: &Arc, job: GcCompactJob, sub_compaction_max_job_size_mb: Option, - ) -> anyhow::Result> { + ) -> Result, CompactionError> { let compact_below_lsn = if job.compact_lsn_range.end != Lsn::MAX { job.compact_lsn_range.end } else { @@ -2548,7 +2558,7 @@ impl Timeline { cancel: &CancellationToken, options: CompactOptions, ctx: &RequestContext, - ) -> anyhow::Result<()> { + ) -> Result<(), CompactionError> { let sub_compaction = options.sub_compaction; let job = GcCompactJob::from_compact_options(options.clone()); if sub_compaction { @@ -2580,7 +2590,7 @@ impl Timeline { cancel: &CancellationToken, job: GcCompactJob, ctx: &RequestContext, - ) -> anyhow::Result<()> { + ) -> Result<(), CompactionError> { // Block other compaction/GC tasks from running for now. GC-compaction could run along // with legacy compaction tasks in the future. Always ensure the lock order is compaction -> gc. // Note that we already acquired the compaction lock when the outer `compact` function gets called. @@ -2588,8 +2598,7 @@ impl Timeline { let gc_lock = async { tokio::select! { guard = self.gc_lock.lock() => Ok(guard), - // TODO: refactor to CompactionError to correctly pass cancelled error - _ = cancel.cancelled() => Err(anyhow!("cancelled")), + _ = cancel.cancelled() => Err(CompactionError::ShuttingDown), } }; @@ -2810,10 +2819,10 @@ impl Timeline { .map(|layer| layer.layer_desc().layer_name()) .collect_vec(); if let Some(err) = check_valid_layermap(&layer_names) { - bail!( + return Err(CompactionError::Other(anyhow!( "gc-compaction layer map check failed because {}, cannot proceed with compaction due to potential data loss", err - ); + ))); } // The maximum LSN we are processing in this compaction loop let end_lsn = job_desc @@ -2828,11 +2837,24 @@ impl Timeline { let mut total_downloaded_size = 0; let mut total_layer_size = 0; for layer in &job_desc.selected_layers { - if layer.needs_download().await?.is_some() { + if layer + .needs_download() + .await + .context("failed to check if layer needs download") + .map_err(CompactionError::Other)? + .is_some() + { total_downloaded_size += layer.layer_desc().file_size; } total_layer_size += layer.layer_desc().file_size; - let resident_layer = layer.download_and_keep_resident(ctx).await?; + if cancel.is_cancelled() { + return Err(CompactionError::ShuttingDown); + } + let resident_layer = layer + .download_and_keep_resident(ctx) + .await + .context("failed to download and keep resident layer") + .map_err(CompactionError::Other)?; downloaded_layers.push(resident_layer); } info!( @@ -2843,19 +2865,33 @@ impl Timeline { ); for resident_layer in &downloaded_layers { if resident_layer.layer_desc().is_delta() { - let layer = resident_layer.get_as_delta(ctx).await?; + let layer = resident_layer + .get_as_delta(ctx) + .await + .context("failed to get delta layer") + .map_err(CompactionError::Other)?; delta_layers.push(layer); } else { - let layer = resident_layer.get_as_image(ctx).await?; + let layer = resident_layer + .get_as_image(ctx) + .await + .context("failed to get image layer") + .map_err(CompactionError::Other)?; image_layers.push(layer); } } - let (dense_ks, sparse_ks) = self.collect_gc_compaction_keyspace().await?; + let (dense_ks, sparse_ks) = self + .collect_gc_compaction_keyspace() + .await + .context("failed to collect gc compaction keyspace") + .map_err(CompactionError::Other)?; let mut merge_iter = FilterIterator::create( MergeIterator::create(&delta_layers, &image_layers, ctx), dense_ks, sparse_ks, - )?; + ) + .context("failed to create filter iterator") + .map_err(CompactionError::Other)?; // Step 2: Produce images+deltas. let mut accumulated_values = Vec::new(); @@ -2874,7 +2910,9 @@ impl Timeline { self.get_compaction_target_size(), ctx, ) - .await?, + .await + .context("failed to create image layer writer") + .map_err(CompactionError::Other)?, ) } else { None @@ -2887,7 +2925,9 @@ impl Timeline { lowest_retain_lsn..end_lsn, self.get_compaction_target_size(), ) - .await?; + .await + .context("failed to create delta layer writer") + .map_err(CompactionError::Other)?; #[derive(Default)] struct RewritingLayers { @@ -2927,9 +2967,14 @@ impl Timeline { // the key and LSN range are determined. However, to keep things simple here, we still // create this writer, and discard the writer in the end. - while let Some(((key, lsn, val), desc)) = merge_iter.next_with_trace().await? { + while let Some(((key, lsn, val), desc)) = merge_iter + .next_with_trace() + .await + .context("failed to get next key-value pair") + .map_err(CompactionError::Other)? + { if cancel.is_cancelled() { - return Err(anyhow!("cancelled")); // TODO: refactor to CompactionError and pass cancel error + return Err(CompactionError::ShuttingDown); } if self.shard_identity.is_key_disposable(&key) { // If this shard does not need to store this key, simply skip it. @@ -2960,7 +3005,9 @@ impl Timeline { desc.lsn_range.clone(), ctx, ) - .await?, + .await + .context("failed to create delta layer writer") + .map_err(CompactionError::Other)?, ); } rewriter.before.as_mut().unwrap() @@ -2975,14 +3022,20 @@ impl Timeline { desc.lsn_range.clone(), ctx, ) - .await?, + .await + .context("failed to create delta layer writer") + .map_err(CompactionError::Other)?, ); } rewriter.after.as_mut().unwrap() } else { unreachable!() }; - rewriter.put_value(key, lsn, val, ctx).await?; + rewriter + .put_value(key, lsn, val, ctx) + .await + .context("failed to put value") + .map_err(CompactionError::Other)?; continue; } match val { @@ -3005,9 +3058,13 @@ impl Timeline { &job_desc.retain_lsns_below_horizon, COMPACTION_DELTA_THRESHOLD, get_ancestor_image(self, *last_key, ctx, has_data_below, lowest_retain_lsn) - .await?, + .await + .context("failed to get ancestor image") + .map_err(CompactionError::Other)?, ) - .await?; + .await + .context("failed to generate key retention") + .map_err(CompactionError::Other)?; retention .pipe_to( *last_key, @@ -3016,7 +3073,9 @@ impl Timeline { &mut stat, ctx, ) - .await?; + .await + .context("failed to pipe to delta layer writer") + .map_err(CompactionError::Other)?; accumulated_values.clear(); *last_key = key; accumulated_values.push((key, lsn, val)); @@ -3034,9 +3093,14 @@ impl Timeline { job_desc.gc_cutoff, &job_desc.retain_lsns_below_horizon, COMPACTION_DELTA_THRESHOLD, - get_ancestor_image(self, last_key, ctx, has_data_below, lowest_retain_lsn).await?, + get_ancestor_image(self, last_key, ctx, has_data_below, lowest_retain_lsn) + .await + .context("failed to get ancestor image") + .map_err(CompactionError::Other)?, ) - .await?; + .await + .context("failed to generate key retention") + .map_err(CompactionError::Other)?; retention .pipe_to( last_key, @@ -3045,7 +3109,9 @@ impl Timeline { &mut stat, ctx, ) - .await?; + .await + .context("failed to pipe to delta layer writer") + .map_err(CompactionError::Other)?; // end: move the above part to the loop body let mut rewrote_delta_layers = Vec::new(); @@ -3053,13 +3119,23 @@ impl Timeline { if let Some(delta_writer_before) = writers.before { let (desc, path) = delta_writer_before .finish(job_desc.compaction_key_range.start, ctx) - .await?; - let layer = Layer::finish_creating(self.conf, self, desc, &path)?; + .await + .context("failed to finish delta layer writer") + .map_err(CompactionError::Other)?; + let layer = Layer::finish_creating(self.conf, self, desc, &path) + .context("failed to finish creating delta layer") + .map_err(CompactionError::Other)?; rewrote_delta_layers.push(layer); } if let Some(delta_writer_after) = writers.after { - let (desc, path) = delta_writer_after.finish(key.key_range.end, ctx).await?; - let layer = Layer::finish_creating(self.conf, self, desc, &path)?; + let (desc, path) = delta_writer_after + .finish(key.key_range.end, ctx) + .await + .context("failed to finish delta layer writer") + .map_err(CompactionError::Other)?; + let layer = Layer::finish_creating(self.conf, self, desc, &path) + .context("failed to finish creating delta layer") + .map_err(CompactionError::Other)?; rewrote_delta_layers.push(layer); } } @@ -3074,7 +3150,9 @@ impl Timeline { let end_key = job_desc.compaction_key_range.end; writer .finish_with_discard_fn(self, ctx, end_key, discard) - .await? + .await + .context("failed to finish image layer writer") + .map_err(CompactionError::Other)? } else { drop(writer); Vec::new() @@ -3086,7 +3164,9 @@ impl Timeline { let produced_delta_layers = if !dry_run { delta_layer_writer .finish_with_discard_fn(self, ctx, discard) - .await? + .await + .context("failed to finish delta layer writer") + .map_err(CompactionError::Other)? } else { drop(delta_layer_writer); Vec::new() @@ -3166,7 +3246,9 @@ impl Timeline { &layer.layer_desc().key_range, &job_desc.compaction_key_range, ) { - bail!("violated constraint: image layer outside of compaction key range"); + return Err(CompactionError::Other(anyhow!( + "violated constraint: image layer outside of compaction key range" + ))); } if !fully_contains( &job_desc.compaction_key_range, @@ -3181,7 +3263,9 @@ impl Timeline { info!( "gc-compaction statistics: {}", - serde_json::to_string(&stat)? + serde_json::to_string(&stat) + .context("failed to serialize gc-compaction statistics") + .map_err(CompactionError::Other)? ); if dry_run { @@ -3220,10 +3304,10 @@ impl Timeline { // the writer, so potentially, we will need a function like `ImageLayerBatchWriter::get_all_pending_layer_keys` to get all the keys that are // in the writer before finalizing the persistent layers. Now we would leave some dangling layers on the disk if the check fails. if let Some(err) = check_valid_layermap(&final_layers) { - bail!( + return Err(CompactionError::Other(anyhow!( "gc-compaction layer map check failed after compaction because {}, compaction result not applied to the layer map due to potential data loss", err - ); + ))); } // Between the sanity check and this compaction update, there could be new layers being flushed, but it should be fine because we only @@ -3275,7 +3359,9 @@ impl Timeline { // find_gc_cutoffs will try accessing things below the cutoff. TODO: ideally, this should // be batched into `schedule_compaction_update`. let disk_consistent_lsn = self.disk_consistent_lsn.load(); - self.schedule_uploads(disk_consistent_lsn, None)?; + self.schedule_uploads(disk_consistent_lsn, None) + .context("failed to schedule uploads") + .map_err(CompactionError::Other)?; // If a layer gets rewritten throughout gc-compaction, we need to keep that layer only in `compact_to` instead // of `compact_from`. let compact_from = {