From b09d68633510bdb12b017fb01ac055ffe7298833 Mon Sep 17 00:00:00 2001 From: Joonas Koivunen Date: Sat, 9 Mar 2024 15:09:08 +0200 Subject: [PATCH] fix: on-demand downloads can outlive timeline shutdown (#7051) ## Problem Before this PR, it was possible that on-demand downloads were started after `Timeline::shutdown()`. For example, we have observed a walreceiver-connection-handler-initiated on-demand download that was started after `Timeline::shutdown()`s final `task_mgr::shutdown_tasks()` call. The underlying issue is that `task_mgr::shutdown_tasks()` isn't sticky, i.e., new tasks can be spawned during or after `task_mgr::shutdown_tasks()`. Cc: https://github.com/neondatabase/neon/issues/4175 in lieu of a more specific issue for task_mgr. We already decided we want to get rid of it anyways. Original investigation: https://neondb.slack.com/archives/C033RQ5SPDH/p1709824952465949 ## Changes - enter gate while downloading - use timeline cancellation token for cancelling download thereby, fixes #7054 Entering the gate might also remove recent "kept the gate from closing" in staging. --- libs/remote_storage/tests/test_real_s3.rs | 26 +++++++++++-------- pageserver/src/task_mgr.rs | 3 --- pageserver/src/tenant/storage_layer/layer.rs | 27 ++++++++------------ test_runner/regress/test_tenant_delete.py | 2 ++ test_runner/regress/test_timeline_delete.py | 4 ++- 5 files changed, 31 insertions(+), 31 deletions(-) diff --git a/libs/remote_storage/tests/test_real_s3.rs b/libs/remote_storage/tests/test_real_s3.rs index e927b40e80..d8b9824d99 100644 --- a/libs/remote_storage/tests/test_real_s3.rs +++ b/libs/remote_storage/tests/test_real_s3.rs @@ -17,6 +17,7 @@ use remote_storage::{ }; use test_context::test_context; use test_context::AsyncTestContext; +use tokio::io::AsyncBufReadExt; use tokio_util::sync::CancellationToken; use tracing::info; @@ -484,32 +485,33 @@ async fn download_is_cancelled(ctx: &mut MaybeEnabledStorage) { )) .unwrap(); - let len = upload_large_enough_file(&ctx.client, &path, &cancel).await; + let file_len = upload_large_enough_file(&ctx.client, &path, &cancel).await; { - let mut stream = ctx + let stream = ctx .client .download(&path, &cancel) .await .expect("download succeeds") .download_stream; - let first = stream - .next() - .await - .expect("should have the first blob") - .expect("should have succeeded"); + let mut reader = std::pin::pin!(tokio_util::io::StreamReader::new(stream)); - tracing::info!(len = first.len(), "downloaded first chunk"); + let first = reader.fill_buf().await.expect("should have the first blob"); + + let len = first.len(); + tracing::info!(len, "downloaded first chunk"); assert!( - first.len() < len, + first.len() < file_len, "uploaded file is too small, we downloaded all on first chunk" ); + reader.consume(len); + cancel.cancel(); - let next = stream.next().await.expect("stream should have more"); + let next = reader.fill_buf().await; let e = next.expect_err("expected an error, but got a chunk?"); @@ -520,6 +522,10 @@ async fn download_is_cancelled(ctx: &mut MaybeEnabledStorage) { .is_some_and(|e| matches!(e, DownloadError::Cancelled)), "{inner:?}" ); + + let e = DownloadError::from(e); + + assert!(matches!(e, DownloadError::Cancelled), "{e:?}"); } let cancel = CancellationToken::new(); diff --git a/pageserver/src/task_mgr.rs b/pageserver/src/task_mgr.rs index adaa55c179..275a72c0b0 100644 --- a/pageserver/src/task_mgr.rs +++ b/pageserver/src/task_mgr.rs @@ -272,9 +272,6 @@ pub enum TaskKind { // Task that uploads a file to remote storage RemoteUploadTask, - // Task that downloads a file from remote storage - RemoteDownloadTask, - // task that handles the initial downloading of all tenants InitialLoad, diff --git a/pageserver/src/tenant/storage_layer/layer.rs b/pageserver/src/tenant/storage_layer/layer.rs index 6c46b83622..aabb13b15c 100644 --- a/pageserver/src/tenant/storage_layer/layer.rs +++ b/pageserver/src/tenant/storage_layer/layer.rs @@ -880,23 +880,18 @@ impl LayerInner { ) -> Result { debug_assert_current_span_has_tenant_and_timeline_id(); - let task_name = format!("download layer {}", self); - let (tx, rx) = tokio::sync::oneshot::channel(); - // this is sadly needed because of task_mgr::shutdown_tasks, otherwise we cannot - // block tenant::mgr::remove_tenant_from_memory. - let this: Arc = self.clone(); - crate::task_mgr::spawn( - &tokio::runtime::Handle::current(), - crate::task_mgr::TaskKind::RemoteDownloadTask, - Some(self.desc.tenant_shard_id), - Some(self.desc.timeline_id), - &task_name, - false, - async move { + let guard = timeline + .gate + .enter() + .map_err(|_| DownloadError::DownloadCancelled)?; + + tokio::task::spawn(async move { + + let _guard = guard; let client = timeline .remote_client @@ -906,7 +901,7 @@ impl LayerInner { let result = client.download_layer_file( &this.desc.filename(), &this.metadata(), - &crate::task_mgr::shutdown_token() + &timeline.cancel ) .await; @@ -929,7 +924,6 @@ impl LayerInner { tokio::select! { _ = tokio::time::sleep(backoff) => {}, - _ = crate::task_mgr::shutdown_token().cancelled_owned() => {}, _ = timeline.cancel.cancelled() => {}, }; @@ -959,11 +953,10 @@ impl LayerInner { } } } - - Ok(()) } .in_current_span(), ); + match rx.await { Ok((Ok(()), permit)) => { if let Some(reason) = self diff --git a/test_runner/regress/test_tenant_delete.py b/test_runner/regress/test_tenant_delete.py index c4b4e5fb77..52de889084 100644 --- a/test_runner/regress/test_tenant_delete.py +++ b/test_runner/regress/test_tenant_delete.py @@ -190,6 +190,8 @@ def test_delete_tenant_exercise_crash_safety_failpoints( # So by ignoring these instead of waiting for empty upload queue # we execute more distinct code paths. '.*stopping left-over name="remote upload".*', + # an on-demand is cancelled by shutdown + ".*initial size calculation failed: downloading failed, possibly for shutdown", ] ) diff --git a/test_runner/regress/test_timeline_delete.py b/test_runner/regress/test_timeline_delete.py index 795110d90b..96a5cc491a 100644 --- a/test_runner/regress/test_timeline_delete.py +++ b/test_runner/regress/test_timeline_delete.py @@ -213,7 +213,9 @@ def test_delete_timeline_exercise_crash_safety_failpoints( # This happens when timeline remains are cleaned up during loading ".*Timeline dir entry become invalid.*", # In one of the branches we poll for tenant to become active. Polls can generate this log message: - f".*Tenant {env.initial_tenant} is not active*", + f".*Tenant {env.initial_tenant} is not active.*", + # an on-demand is cancelled by shutdown + ".*initial size calculation failed: downloading failed, possibly for shutdown", ] )