From 5be8d38a63dc6b761200a7fd8f2dd748b8de0b13 Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Tue, 12 Sep 2023 11:23:46 +0200 Subject: [PATCH] fix deadlock around TENANTS (#5285) The sequence that can lead to a deadlock: 1. DELETE request gets all the way to `tenant.shutdown(progress, false).await.is_err() ` , while holding TENANTS.read() 2. POST request for tenant creation comes in, calls `tenant_map_insert`, it does `let mut guard = TENANTS.write().await;` 3. Something that `tenant.shutdown()` needs to wait for needs a `TENANTS.read().await`. The only case identified in exhaustive manual scanning of the code base is this one: Imitate size access does `get_tenant().await`, which does `TENANTS.read().await` under the hood. In the above case (1) waits for (3), (3)'s read-lock request is queued behind (2)'s write-lock, and (2) waits for (1). Deadlock. I made a reproducer/proof-that-above-hypothesis-holds in https://github.com/neondatabase/neon/pull/5281 , but, it's not ready for merge yet and we want the fix _now_. fixes https://github.com/neondatabase/neon/issues/5284 --- pageserver/src/tenant/mgr.rs | 2 ++ .../src/tenant/timeline/eviction_task.rs | 21 ++++++++++++++++--- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/pageserver/src/tenant/mgr.rs b/pageserver/src/tenant/mgr.rs index 0cf5c36b87..74faee1115 100644 --- a/pageserver/src/tenant/mgr.rs +++ b/pageserver/src/tenant/mgr.rs @@ -503,6 +503,8 @@ pub enum GetTenantError { /// Gets the tenant from the in-memory data, erroring if it's absent or is not fitting to the query. /// `active_only = true` allows to query only tenants that are ready for operations, erroring on other kinds of tenants. +/// +/// This method is cancel-safe. pub async fn get_tenant( tenant_id: TenantId, active_only: bool, diff --git a/pageserver/src/tenant/timeline/eviction_task.rs b/pageserver/src/tenant/timeline/eviction_task.rs index 3e407dda57..39f0d03a01 100644 --- a/pageserver/src/tenant/timeline/eviction_task.rs +++ b/pageserver/src/tenant/timeline/eviction_task.rs @@ -328,9 +328,24 @@ impl Timeline { // Make one of the tenant's timelines draw the short straw and run the calculation. // The others wait until the calculation is done so that they take into account the // imitated accesses that the winner made. - let Ok(tenant) = crate::tenant::mgr::get_tenant(self.tenant_id, true).await else { - // likely, we're shutting down - return ControlFlow::Break(()); + // + // It is critical we are responsive to cancellation here. Otherwise, we deadlock with + // tenant deletion (holds TENANTS in read mode) any other task that attempts to + // acquire TENANTS in write mode before we here call get_tenant. + // See https://github.com/neondatabase/neon/issues/5284. + let res = tokio::select! { + _ = cancel.cancelled() => { + return ControlFlow::Break(()); + } + res = crate::tenant::mgr::get_tenant(self.tenant_id, true) => { + res + } + }; + let tenant = match res { + Ok(t) => t, + Err(_) => { + return ControlFlow::Break(()); + } }; let mut state = tenant.eviction_task_tenant_state.lock().await; match state.last_layer_access_imitation {