mirror of
https://github.com/neondatabase/neon.git
synced 2026-06-04 14:00:38 +00:00
Skip unnecessary shared state updates in safekeepers (#7851)
I looked at the metrics from https://github.com/neondatabase/neon/pull/7768 on staging and it seems that manager does too many iterations. This is probably caused by background job `remove_wal.rs` which iterates over all timelines and tries to remove WAL and persist control file. This causes shared state updates and wakes up the manager. The fix is to skip notifying about the updates if nothing was updated.
This commit is contained in:
committed by
GitHub
parent
58e31fe098
commit
8f3c316bae
@@ -827,10 +827,10 @@ where
|
||||
|
||||
/// Persist control file if there is something to save and enough time
|
||||
/// passed after the last save.
|
||||
pub async fn maybe_persist_inmem_control_file(&mut self) -> Result<()> {
|
||||
pub async fn maybe_persist_inmem_control_file(&mut self) -> Result<bool> {
|
||||
const CF_SAVE_INTERVAL: Duration = Duration::from_secs(300);
|
||||
if self.state.pers.last_persist_at().elapsed() < CF_SAVE_INTERVAL {
|
||||
return Ok(());
|
||||
return Ok(false);
|
||||
}
|
||||
let need_persist = self.state.inmem.commit_lsn > self.state.commit_lsn
|
||||
|| self.state.inmem.backup_lsn > self.state.backup_lsn
|
||||
@@ -840,7 +840,7 @@ where
|
||||
self.state.flush().await?;
|
||||
trace!("saved control file: {CF_SAVE_INTERVAL:?} passed");
|
||||
}
|
||||
Ok(())
|
||||
Ok(need_persist)
|
||||
}
|
||||
|
||||
/// Handle request to append WAL.
|
||||
|
||||
@@ -104,11 +104,16 @@ pub type ReadGuardSharedState<'a> = RwLockReadGuard<'a, SharedState>;
|
||||
pub struct WriteGuardSharedState<'a> {
|
||||
tli: Arc<Timeline>,
|
||||
guard: RwLockWriteGuard<'a, SharedState>,
|
||||
skip_update: bool,
|
||||
}
|
||||
|
||||
impl<'a> WriteGuardSharedState<'a> {
|
||||
fn new(tli: Arc<Timeline>, guard: RwLockWriteGuard<'a, SharedState>) -> Self {
|
||||
WriteGuardSharedState { tli, guard }
|
||||
WriteGuardSharedState {
|
||||
tli,
|
||||
guard,
|
||||
skip_update: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -149,10 +154,12 @@ impl<'a> Drop for WriteGuardSharedState<'a> {
|
||||
}
|
||||
});
|
||||
|
||||
// send notification about shared state update
|
||||
self.tli.shared_state_version_tx.send_modify(|old| {
|
||||
*old += 1;
|
||||
});
|
||||
if !self.skip_update {
|
||||
// send notification about shared state update
|
||||
self.tli.shared_state_version_tx.send_modify(|old| {
|
||||
*old += 1;
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -802,7 +809,11 @@ impl Timeline {
|
||||
|
||||
// update last_removed_segno
|
||||
let mut shared_state = self.write_shared_state().await;
|
||||
shared_state.last_removed_segno = horizon_segno;
|
||||
if shared_state.last_removed_segno != horizon_segno {
|
||||
shared_state.last_removed_segno = horizon_segno;
|
||||
} else {
|
||||
shared_state.skip_update = true;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -811,11 +822,10 @@ impl Timeline {
|
||||
/// to date so that storage nodes restart doesn't cause many pageserver ->
|
||||
/// safekeeper reconnections.
|
||||
pub async fn maybe_persist_control_file(self: &Arc<Self>) -> Result<()> {
|
||||
self.write_shared_state()
|
||||
.await
|
||||
.sk
|
||||
.maybe_persist_inmem_control_file()
|
||||
.await
|
||||
let mut guard = self.write_shared_state().await;
|
||||
let changed = guard.sk.maybe_persist_inmem_control_file().await?;
|
||||
guard.skip_update = !changed;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Gather timeline data for metrics.
|
||||
|
||||
Reference in New Issue
Block a user