Files
neon/libs/utils/src/sync/gate.rs
John Spray 3c560d27a8 pageserver: implement secondary-mode downloads (#6123)
Follows on from #6050 , in which we upload heatmaps. Secondary locations
will now poll those heatmaps and download layers mentioned in the
heatmap.

TODO:
- [X] ~Unify/reconcile stats for behind-schedule execution with
warn_when_period_overrun
(https://github.com/neondatabase/neon/pull/6050#discussion_r1426560695)~
- [x] Give downloads their own concurrency config independent of uploads

Deferred optimizations:
- https://github.com/neondatabase/neon/issues/6199
- https://github.com/neondatabase/neon/issues/6200

Eviction will be the next PR:
- #5342
2024-01-05 12:29:20 +00:00

179 lines
6.1 KiB
Rust

use std::{sync::Arc, time::Duration};
/// Gates are a concurrency helper, primarily used for implementing safe shutdown.
///
/// Users of a resource call `enter()` to acquire a GateGuard, and the owner of
/// the resource calls `close()` when they want to ensure that all holders of guards
/// have released them, and that no future guards will be issued.
pub struct Gate {
/// Each caller of enter() takes one unit from the semaphore. In close(), we
/// take all the units to ensure all GateGuards are destroyed.
sem: Arc<tokio::sync::Semaphore>,
/// For observability only: a name that will be used to log warnings if a particular
/// gate is holding up shutdown
name: String,
}
impl std::fmt::Debug for Gate {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "Gate<{}>", self.name)
}
}
/// RAII guard for a [`Gate`]: as long as this exists, calls to [`Gate::close`] will
/// not complete.
#[derive(Debug)]
pub struct GateGuard(tokio::sync::OwnedSemaphorePermit);
/// Observability helper: every `warn_period`, emit a log warning that we're still waiting on this gate
async fn warn_if_stuck<Fut: std::future::Future>(
fut: Fut,
name: &str,
warn_period: std::time::Duration,
) -> <Fut as std::future::Future>::Output {
let started = std::time::Instant::now();
let mut fut = std::pin::pin!(fut);
let mut warned = false;
let ret = loop {
match tokio::time::timeout(warn_period, &mut fut).await {
Ok(ret) => break ret,
Err(_) => {
tracing::warn!(
gate = name,
elapsed_ms = started.elapsed().as_millis(),
"still waiting, taking longer than expected..."
);
warned = true;
}
}
};
// If we emitted a warning for slowness, also emit a message when we complete, so that
// someone debugging a shutdown can know for sure whether we have moved past this operation.
if warned {
tracing::info!(
gate = name,
elapsed_ms = started.elapsed().as_millis(),
"completed, after taking longer than expected"
)
}
ret
}
#[derive(Debug)]
pub enum GateError {
GateClosed,
}
impl Gate {
const MAX_UNITS: u32 = u32::MAX;
pub fn new(name: String) -> Self {
Self {
sem: Arc::new(tokio::sync::Semaphore::new(Self::MAX_UNITS as usize)),
name,
}
}
/// Acquire a guard that will prevent close() calls from completing. If close()
/// was already called, this will return an error which should be interpreted
/// as "shutting down".
///
/// This function would typically be used from e.g. request handlers. While holding
/// the guard returned from this function, it is important to respect a CancellationToken
/// to avoid blocking close() indefinitely: typically types that contain a Gate will
/// also contain a CancellationToken.
pub fn enter(&self) -> Result<GateGuard, GateError> {
self.sem
.clone()
.try_acquire_owned()
.map(GateGuard)
.map_err(|_| GateError::GateClosed)
}
/// Types with a shutdown() method and a gate should call this method at the
/// end of shutdown, to ensure that all GateGuard holders are done.
///
/// This will wait for all guards to be destroyed. For this to complete promptly, it is
/// important that the holders of such guards are respecting a CancellationToken which has
/// been cancelled before entering this function.
pub async fn close(&self) {
warn_if_stuck(self.do_close(), &self.name, Duration::from_millis(1000)).await
}
/// Check if [`Self::close()`] has finished waiting for all [`Self::enter()`] users to finish. This
/// is usually analoguous for "Did shutdown finish?" for types that include a Gate, whereas checking
/// the CancellationToken on such types is analogous to "Did shutdown start?"
pub fn close_complete(&self) -> bool {
self.sem.is_closed()
}
async fn do_close(&self) {
tracing::debug!(gate = self.name, "Closing Gate...");
match self.sem.acquire_many(Self::MAX_UNITS).await {
Ok(_units) => {
// While holding all units, close the semaphore. All subsequent calls to enter() will fail.
self.sem.close();
}
Err(_) => {
// Semaphore closed: we are the only function that can do this, so it indicates a double-call.
// This is legal. Timeline::shutdown for example is not protected from being called more than
// once.
tracing::debug!(gate = self.name, "Double close")
}
}
tracing::debug!(gate = self.name, "Closed Gate.")
}
}
#[cfg(test)]
mod tests {
use futures::FutureExt;
use super::*;
#[tokio::test]
async fn test_idle_gate() {
// Having taken no gates, we should not be blocked in close
let gate = Gate::new("test".to_string());
gate.close().await;
// If a guard is dropped before entering, close should not be blocked
let gate = Gate::new("test".to_string());
let guard = gate.enter().unwrap();
drop(guard);
gate.close().await;
// Entering a closed guard fails
gate.enter().expect_err("enter should fail after close");
}
#[tokio::test]
async fn test_busy_gate() {
let gate = Gate::new("test".to_string());
let guard = gate.enter().unwrap();
let mut close_fut = std::pin::pin!(gate.close());
// Close should be blocked
assert!(close_fut.as_mut().now_or_never().is_none());
// Attempting to enter() should fail, even though close isn't done yet.
gate.enter()
.expect_err("enter should fail after entering close");
drop(guard);
// Guard is gone, close should finish
assert!(close_fut.as_mut().now_or_never().is_some());
// Attempting to enter() is still forbidden
gate.enter().expect_err("enter should fail finishing close");
}
}