Skip unnecessary shared state updates in safekeepers (#7851)

I looked at the metrics from
https://github.com/neondatabase/neon/pull/7768 on staging and it seems
that manager does too many iterations. This is probably caused by
background job `remove_wal.rs` which iterates over all timelines and
tries to remove WAL and persist control file. This causes shared state
updates and wakes up the manager. The fix is to skip notifying about the
updates if nothing was updated.
This commit is contained in:
Arthur Petukhovsky
2024-05-23 09:45:24 +01:00
committed by GitHub
parent 58e31fe098
commit 8f3c316bae
2 changed files with 24 additions and 14 deletions

View File

@@ -827,10 +827,10 @@ where
/// Persist control file if there is something to save and enough time
/// passed after the last save.
pub async fn maybe_persist_inmem_control_file(&mut self) -> Result<()> {
pub async fn maybe_persist_inmem_control_file(&mut self) -> Result<bool> {
const CF_SAVE_INTERVAL: Duration = Duration::from_secs(300);
if self.state.pers.last_persist_at().elapsed() < CF_SAVE_INTERVAL {
return Ok(());
return Ok(false);
}
let need_persist = self.state.inmem.commit_lsn > self.state.commit_lsn
|| self.state.inmem.backup_lsn > self.state.backup_lsn
@@ -840,7 +840,7 @@ where
self.state.flush().await?;
trace!("saved control file: {CF_SAVE_INTERVAL:?} passed");
}
Ok(())
Ok(need_persist)
}
/// Handle request to append WAL.

View File

@@ -104,11 +104,16 @@ pub type ReadGuardSharedState<'a> = RwLockReadGuard<'a, SharedState>;
pub struct WriteGuardSharedState<'a> {
tli: Arc<Timeline>,
guard: RwLockWriteGuard<'a, SharedState>,
skip_update: bool,
}
impl<'a> WriteGuardSharedState<'a> {
fn new(tli: Arc<Timeline>, guard: RwLockWriteGuard<'a, SharedState>) -> Self {
WriteGuardSharedState { tli, guard }
WriteGuardSharedState {
tli,
guard,
skip_update: false,
}
}
}
@@ -149,10 +154,12 @@ impl<'a> Drop for WriteGuardSharedState<'a> {
}
});
// send notification about shared state update
self.tli.shared_state_version_tx.send_modify(|old| {
*old += 1;
});
if !self.skip_update {
// send notification about shared state update
self.tli.shared_state_version_tx.send_modify(|old| {
*old += 1;
});
}
}
}
@@ -802,7 +809,11 @@ impl Timeline {
// update last_removed_segno
let mut shared_state = self.write_shared_state().await;
shared_state.last_removed_segno = horizon_segno;
if shared_state.last_removed_segno != horizon_segno {
shared_state.last_removed_segno = horizon_segno;
} else {
shared_state.skip_update = true;
}
Ok(())
}
@@ -811,11 +822,10 @@ impl Timeline {
/// to date so that storage nodes restart doesn't cause many pageserver ->
/// safekeeper reconnections.
pub async fn maybe_persist_control_file(self: &Arc<Self>) -> Result<()> {
self.write_shared_state()
.await
.sk
.maybe_persist_inmem_control_file()
.await
let mut guard = self.write_shared_state().await;
let changed = guard.sk.maybe_persist_inmem_control_file().await?;
guard.skip_update = !changed;
Ok(())
}
/// Gather timeline data for metrics.