mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-10 15:02:56 +00:00
fix(pageserver): log compaction errors with timeline ids (#11231)
## Problem Makes it easier to debug. ## Summary of changes Log compaction errors with timeline ids. Signed-off-by: Alex Chi Z <chi@neon.tech>
This commit is contained in:
@@ -268,7 +268,7 @@ async fn compaction_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
|
||||
error_run += 1;
|
||||
let backoff =
|
||||
exponential_backoff_duration(error_run, BASE_BACKOFF_SECS, MAX_BACKOFF_SECS);
|
||||
log_compaction_error(&err, error_run, backoff, cancel.is_cancelled());
|
||||
log_compaction_error(&err, Some((error_run, backoff)), cancel.is_cancelled());
|
||||
continue;
|
||||
}
|
||||
}
|
||||
@@ -281,10 +281,9 @@ async fn compaction_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
|
||||
}
|
||||
}
|
||||
|
||||
fn log_compaction_error(
|
||||
pub(crate) fn log_compaction_error(
|
||||
err: &CompactionError,
|
||||
error_count: u32,
|
||||
sleep_duration: Duration,
|
||||
retry_info: Option<(u32, Duration)>,
|
||||
task_cancelled: bool,
|
||||
) {
|
||||
use CompactionError::*;
|
||||
@@ -318,14 +317,26 @@ fn log_compaction_error(
|
||||
}
|
||||
};
|
||||
|
||||
match level {
|
||||
Level::ERROR => {
|
||||
error!("Compaction failed {error_count} times, retrying in {sleep_duration:?}: {err:#}")
|
||||
if let Some((error_count, sleep_duration)) = retry_info {
|
||||
match level {
|
||||
Level::ERROR => {
|
||||
error!(
|
||||
"Compaction failed {error_count} times, retrying in {sleep_duration:?}: {err:#}"
|
||||
)
|
||||
}
|
||||
Level::INFO => {
|
||||
info!(
|
||||
"Compaction failed {error_count} times, retrying in {sleep_duration:?}: {err:#}"
|
||||
)
|
||||
}
|
||||
level => unimplemented!("unexpected level {level:?}"),
|
||||
}
|
||||
Level::INFO => {
|
||||
info!("Compaction failed {error_count} times, retrying in {sleep_duration:?}: {err:#}")
|
||||
} else {
|
||||
match level {
|
||||
Level::ERROR => error!("Compaction failed: {err:#}"),
|
||||
Level::INFO => info!("Compaction failed: {err:#}"),
|
||||
level => unimplemented!("unexpected level {level:?}"),
|
||||
}
|
||||
level => unimplemented!("unexpected level {level:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -89,6 +89,7 @@ use super::remote_timeline_client::index::{GcCompactionState, IndexPart};
|
||||
use super::remote_timeline_client::{RemoteTimelineClient, WaitCompletionError};
|
||||
use super::secondary::heatmap::HeatMapLayer;
|
||||
use super::storage_layer::{LayerFringe, LayerVisibilityHint, ReadableLayer};
|
||||
use super::tasks::log_compaction_error;
|
||||
use super::upload_queue::NotInitialized;
|
||||
use super::{
|
||||
AttachedTenantConf, GcError, HeatMapTimeline, MaybeOffloaded,
|
||||
@@ -1856,18 +1857,23 @@ impl Timeline {
|
||||
flags: EnumSet<CompactFlags>,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<CompactionOutcome, CompactionError> {
|
||||
self.compact_with_options(
|
||||
cancel,
|
||||
CompactOptions {
|
||||
flags,
|
||||
compact_key_range: None,
|
||||
compact_lsn_range: None,
|
||||
sub_compaction: false,
|
||||
sub_compaction_max_job_size_mb: None,
|
||||
},
|
||||
ctx,
|
||||
)
|
||||
.await
|
||||
let res = self
|
||||
.compact_with_options(
|
||||
cancel,
|
||||
CompactOptions {
|
||||
flags,
|
||||
compact_key_range: None,
|
||||
compact_lsn_range: None,
|
||||
sub_compaction: false,
|
||||
sub_compaction_max_job_size_mb: None,
|
||||
},
|
||||
ctx,
|
||||
)
|
||||
.await;
|
||||
if let Err(err) = &res {
|
||||
log_compaction_error(err, None, cancel.is_cancelled());
|
||||
}
|
||||
res
|
||||
}
|
||||
|
||||
/// Outermost timeline compaction operation; downloads needed layers.
|
||||
|
||||
@@ -56,6 +56,7 @@ use crate::tenant::storage_layer::merge_iterator::MergeIterator;
|
||||
use crate::tenant::storage_layer::{
|
||||
AsLayerDesc, PersistentLayerDesc, PersistentLayerKey, ValueReconstructState,
|
||||
};
|
||||
use crate::tenant::tasks::log_compaction_error;
|
||||
use crate::tenant::timeline::{
|
||||
DeltaLayerWriter, ImageLayerCreationOutcome, ImageLayerWriter, IoConcurrency, Layer,
|
||||
ResidentLayer, drop_rlock,
|
||||
@@ -440,6 +441,20 @@ impl GcCompactionQueue {
|
||||
ctx: &RequestContext,
|
||||
gc_block: &GcBlock,
|
||||
timeline: &Arc<Timeline>,
|
||||
) -> Result<CompactionOutcome, CompactionError> {
|
||||
let res = self.iteration_inner(cancel, ctx, gc_block, timeline).await;
|
||||
if let Err(err) = &res {
|
||||
log_compaction_error(err, None, cancel.is_cancelled());
|
||||
}
|
||||
res
|
||||
}
|
||||
|
||||
async fn iteration_inner(
|
||||
&self,
|
||||
cancel: &CancellationToken,
|
||||
ctx: &RequestContext,
|
||||
gc_block: &GcBlock,
|
||||
timeline: &Arc<Timeline>,
|
||||
) -> Result<CompactionOutcome, CompactionError> {
|
||||
let Ok(_one_op_at_a_time_guard) = self.consumer_lock.try_lock() else {
|
||||
return Err(CompactionError::AlreadyRunning(
|
||||
|
||||
Reference in New Issue
Block a user