Don't keep FDs open on cancelled timelines in safekeepers.

Since PR #3300 we don't remove timelines completely until next restart, so this
prevents leakage.

fixes https://github.com/neondatabase/neon/issues/3336
This commit is contained in:
Arseny Sher
2023-01-16 17:22:23 +04:00
committed by Arseny Sher
parent bce4233d3a
commit 84ffdc8b4f
2 changed files with 18 additions and 8 deletions

View File

@@ -466,7 +466,7 @@ impl Timeline {
Ok(_) => Ok(()),
Err(e) => {
// Bootstrap failed, cancel timeline and remove timeline directory.
self.cancel();
self.cancel(shared_state);
if let Err(fs_err) = std::fs::remove_dir_all(&self.timeline_dir) {
warn!(
@@ -487,20 +487,23 @@ impl Timeline {
shared_state: &mut MutexGuard<SharedState>,
) -> Result<(bool, bool)> {
let was_active = shared_state.active;
self.cancel();
self.cancel(shared_state);
let dir_existed = delete_dir(&self.timeline_dir)?;
Ok((dir_existed, was_active))
}
/// Cancel timeline to prevent further usage. Background tasks will stop
/// eventually after receiving cancellation signal.
fn cancel(&self) {
info!("Timeline {} is cancelled", self.ttid);
fn cancel(&self, shared_state: &mut MutexGuard<SharedState>) {
info!("timeline {} is cancelled", self.ttid);
let _ = self.cancellation_tx.send(true);
let res = self.wal_backup_launcher_tx.blocking_send(self.ttid);
if let Err(e) = res {
error!("Failed to send stop signal to wal_backup_launcher: {}", e);
}
// Close associated FDs. Nobody will be able to touch timeline data once
// it is cancelled, so WAL storage won't be opened again.
shared_state.sk.wal_store.close();
}
/// Returns if timeline is cancelled.
@@ -537,10 +540,6 @@ impl Timeline {
/// De-register compute connection, shutting down timeline activity if
/// pageserver doesn't need catchup.
pub fn on_compute_disconnect(&self) -> Result<()> {
if self.is_cancelled() {
bail!(TimelineError::Cancelled(self.ttid));
}
let is_wal_backup_action_pending: bool;
{
let mut shared_state = self.write_shared_state();

View File

@@ -55,6 +55,12 @@ pub trait Storage {
/// that without timeline lock.
fn remove_up_to(&self) -> Box<dyn Fn(XLogSegNo) -> Result<()>>;
/// Release resources associated with the storage -- technically, close FDs.
/// Currently we don't remove timelines until restart (#3146), so need to
/// spare descriptors. This would be useful for temporary tli detach as
/// well.
fn close(&mut self) {}
/// Get metrics for this timeline.
fn get_metrics(&self) -> WalStorageMetrics;
}
@@ -401,6 +407,11 @@ impl Storage for PhysicalStorage {
})
}
fn close(&mut self) {
// close happens in destructor
let _open_file = self.file.take();
}
fn get_metrics(&self) -> WalStorageMetrics {
self.metrics.clone()
}