From 9b7a8e67a4ccd0957afd46d857d81374126fb255 Mon Sep 17 00:00:00 2001 From: Dmitry Rodionov Date: Tue, 12 Apr 2022 23:57:33 +0300 Subject: [PATCH] fix deadlock in upload_timeline_checkpoint It originated from the fact that we were calling to fetch_full_index without releasing the read guard, and fetch_full_index tries to acquire read again. For plain mutex it is already a deeadlock, for RW lock deadlock was achieved by an attempt to acquire write access later in the code while still having active read guard up in the stack This is sort of a bandaid because Kirill plans to change this code during removal of an archiving mechanism --- .../src/remote_storage/storage_sync/upload.rs | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/pageserver/src/remote_storage/storage_sync/upload.rs b/pageserver/src/remote_storage/storage_sync/upload.rs index f955e04474..7b6d58a661 100644 --- a/pageserver/src/remote_storage/storage_sync/upload.rs +++ b/pageserver/src/remote_storage/storage_sync/upload.rs @@ -1,6 +1,6 @@ //! Timeline synchronization logic to compress and upload to the remote storage all new timeline files from the checkpoints. -use std::{borrow::Cow, collections::BTreeSet, path::PathBuf, sync::Arc}; +use std::{collections::BTreeSet, path::PathBuf, sync::Arc}; use tracing::{debug, error, warn}; @@ -46,13 +46,21 @@ pub(super) async fn upload_timeline_checkpoint< let index_read = index.read().await; let remote_timeline = match index_read.timeline_entry(&sync_id) { - None => None, + None => { + drop(index_read); + None + } Some(entry) => match entry.inner() { - TimelineIndexEntryInner::Full(remote_timeline) => Some(Cow::Borrowed(remote_timeline)), + TimelineIndexEntryInner::Full(remote_timeline) => { + let r = Some(remote_timeline.clone()); + drop(index_read); + r + } TimelineIndexEntryInner::Description(_) => { + drop(index_read); debug!("Found timeline description for the given ids, downloading the full index"); match fetch_full_index(remote_assets.as_ref(), &timeline_dir, sync_id).await { - Ok(remote_timeline) => Some(Cow::Owned(remote_timeline)), + Ok(remote_timeline) => Some(remote_timeline), Err(e) => { error!("Failed to download full timeline index: {:?}", e); sync_queue::push(SyncTask::new( @@ -82,7 +90,6 @@ pub(super) async fn upload_timeline_checkpoint< let already_uploaded_files = remote_timeline .map(|timeline| timeline.stored_files(&timeline_dir)) .unwrap_or_default(); - drop(index_read); match try_upload_checkpoint( config,