mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-10 15:02:56 +00:00
pageserver: respond to multiple shutdown signals (#9982)
## Problem The Pageserver signal handler would only respond to a single signal and initiate shutdown. Subsequent signals were ignored. This meant that a `SIGQUIT` sent after a `SIGTERM` had no effect (e.g. in the case of a slow or stalled shutdown). The `test_runner` uses this to force shutdown if graceful shutdown is slow. Touches #9740. ## Summary of changes Keep responding to signals after the initial shutdown signal has been received. Arguably, the `test_runner` should also use `SIGKILL` rather than `SIGQUIT` in this case, but it seems reasonable to respond to `SIGQUIT` regardless.
This commit is contained in:
@@ -636,45 +636,59 @@ fn start_pageserver(
|
||||
tokio::net::TcpListener::from_std(pageserver_listener).context("create tokio listener")?
|
||||
});
|
||||
|
||||
let mut shutdown_pageserver = Some(shutdown_pageserver.drop_guard());
|
||||
|
||||
// All started up! Now just sit and wait for shutdown signal.
|
||||
BACKGROUND_RUNTIME.block_on(async move {
|
||||
let signal_token = CancellationToken::new();
|
||||
let signal_cancel = signal_token.child_token();
|
||||
|
||||
{
|
||||
BACKGROUND_RUNTIME.block_on(async move {
|
||||
// Spawn signal handlers. Runs in a loop since we want to be responsive to multiple signals
|
||||
// even after triggering shutdown (e.g. a SIGQUIT after a slow SIGTERM shutdown). See:
|
||||
// https://github.com/neondatabase/neon/issues/9740.
|
||||
tokio::spawn(async move {
|
||||
let mut sigint = tokio::signal::unix::signal(SignalKind::interrupt()).unwrap();
|
||||
let mut sigterm = tokio::signal::unix::signal(SignalKind::terminate()).unwrap();
|
||||
let mut sigquit = tokio::signal::unix::signal(SignalKind::quit()).unwrap();
|
||||
let signal = tokio::select! {
|
||||
_ = sigquit.recv() => {
|
||||
info!("Got signal SIGQUIT. Terminating in immediate shutdown mode",);
|
||||
std::process::exit(111);
|
||||
|
||||
loop {
|
||||
let signal = tokio::select! {
|
||||
_ = sigquit.recv() => {
|
||||
info!("Got signal SIGQUIT. Terminating in immediate shutdown mode.");
|
||||
std::process::exit(111);
|
||||
}
|
||||
_ = sigint.recv() => "SIGINT",
|
||||
_ = sigterm.recv() => "SIGTERM",
|
||||
};
|
||||
|
||||
if !signal_token.is_cancelled() {
|
||||
info!("Got signal {signal}. Terminating gracefully in fast shutdown mode.");
|
||||
signal_token.cancel();
|
||||
} else {
|
||||
info!("Got signal {signal}. Already shutting down.");
|
||||
}
|
||||
_ = sigint.recv() => { "SIGINT" },
|
||||
_ = sigterm.recv() => { "SIGTERM" },
|
||||
};
|
||||
}
|
||||
});
|
||||
|
||||
info!("Got signal {signal}. Terminating gracefully in fast shutdown mode",);
|
||||
// Wait for cancellation signal and shut down the pageserver.
|
||||
//
|
||||
// This cancels the `shutdown_pageserver` cancellation tree. Right now that tree doesn't
|
||||
// reach very far, and `task_mgr` is used instead. The plan is to change that over time.
|
||||
signal_cancel.cancelled().await;
|
||||
|
||||
// This cancels the `shutdown_pageserver` cancellation tree.
|
||||
// Right now that tree doesn't reach very far, and `task_mgr` is used instead.
|
||||
// The plan is to change that over time.
|
||||
shutdown_pageserver.take();
|
||||
pageserver::shutdown_pageserver(
|
||||
http_endpoint_listener,
|
||||
page_service,
|
||||
consumption_metrics_tasks,
|
||||
disk_usage_eviction_task,
|
||||
&tenant_manager,
|
||||
background_purges,
|
||||
deletion_queue.clone(),
|
||||
secondary_controller_tasks,
|
||||
0,
|
||||
)
|
||||
.await;
|
||||
unreachable!()
|
||||
})
|
||||
}
|
||||
shutdown_pageserver.cancel();
|
||||
pageserver::shutdown_pageserver(
|
||||
http_endpoint_listener,
|
||||
page_service,
|
||||
consumption_metrics_tasks,
|
||||
disk_usage_eviction_task,
|
||||
&tenant_manager,
|
||||
background_purges,
|
||||
deletion_queue.clone(),
|
||||
secondary_controller_tasks,
|
||||
0,
|
||||
)
|
||||
.await;
|
||||
unreachable!();
|
||||
})
|
||||
}
|
||||
|
||||
async fn create_remote_storage_client(
|
||||
|
||||
Reference in New Issue
Block a user