mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-08 05:52:55 +00:00
Fix defusing race between Tenant::shutdown and offload_timeline (#10150)
There is a race condition between `Tenant::shutdown`'s `defuse_for_drop` loop and `offload_timeline`, where timeline offloading can insert into a tenant that is in the process of shutting down, in fact so far progressed that the `defuse_for_drop` has already been called. This prevents warn log lines of the form: ``` offloaded timeline <hash> was dropped without having cleaned it up at the ancestor ``` The solution piggybacks on the `offloaded_timelines` lock: both the defuse loop and the offloaded timeline insertion need to acquire the lock, and we know that the defuse loop only runs after the tenant has set its `TenantState` to `Stopping`. So if we hold the `offloaded_timelines` lock, and know that the `TenantState` is not `Stopping`, then we know that the defuse loop has not ran yet, and holding the lock ensures that it doesn't start running while we are inserting the offloaded timeline. Fixes #10070
This commit is contained in:
@@ -1,5 +1,7 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use pageserver_api::models::TenantState;
|
||||
|
||||
use super::delete::{delete_local_timeline_directory, DeleteTimelineFlow, DeletionGuard};
|
||||
use super::Timeline;
|
||||
use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
|
||||
@@ -70,6 +72,15 @@ pub(crate) async fn offload_timeline(
|
||||
|
||||
{
|
||||
let mut offloaded_timelines = tenant.timelines_offloaded.lock().unwrap();
|
||||
if matches!(
|
||||
tenant.current_state(),
|
||||
TenantState::Stopping { .. } | TenantState::Broken { .. }
|
||||
) {
|
||||
// Cancel the operation if the tenant is shutting down. Do this while the
|
||||
// timelines_offloaded lock is held to prevent a race with Tenant::shutdown
|
||||
// for defusing the lock
|
||||
return Err(OffloadError::Cancelled);
|
||||
}
|
||||
offloaded_timelines.insert(
|
||||
timeline.timeline_id,
|
||||
Arc::new(
|
||||
|
||||
Reference in New Issue
Block a user