fix: on-demand downloads can outlive timeline shutdown (#7051)

## Problem

Before this PR, it was possible that on-demand downloads were started
after `Timeline::shutdown()`.

For example, we have observed a walreceiver-connection-handler-initiated
on-demand download that was started after `Timeline::shutdown()`s final
`task_mgr::shutdown_tasks()` call.

The underlying issue is that `task_mgr::shutdown_tasks()` isn't sticky,
i.e., new tasks can be spawned during or after
`task_mgr::shutdown_tasks()`.

Cc: https://github.com/neondatabase/neon/issues/4175 in lieu of a more
specific issue for task_mgr. We already decided we want to get rid of it
anyways.

Original investigation:
https://neondb.slack.com/archives/C033RQ5SPDH/p1709824952465949

## Changes

- enter gate while downloading
- use timeline cancellation token for cancelling download

thereby, fixes #7054

Entering the gate might also remove recent "kept the gate from closing"
in staging.
This commit is contained in:
Joonas Koivunen
2024-03-09 15:09:08 +02:00
committed by GitHub
parent 74d24582cf
commit b09d686335
5 changed files with 31 additions and 31 deletions

View File

@@ -17,6 +17,7 @@ use remote_storage::{
};
use test_context::test_context;
use test_context::AsyncTestContext;
use tokio::io::AsyncBufReadExt;
use tokio_util::sync::CancellationToken;
use tracing::info;
@@ -484,32 +485,33 @@ async fn download_is_cancelled(ctx: &mut MaybeEnabledStorage) {
))
.unwrap();
let len = upload_large_enough_file(&ctx.client, &path, &cancel).await;
let file_len = upload_large_enough_file(&ctx.client, &path, &cancel).await;
{
let mut stream = ctx
let stream = ctx
.client
.download(&path, &cancel)
.await
.expect("download succeeds")
.download_stream;
let first = stream
.next()
.await
.expect("should have the first blob")
.expect("should have succeeded");
let mut reader = std::pin::pin!(tokio_util::io::StreamReader::new(stream));
tracing::info!(len = first.len(), "downloaded first chunk");
let first = reader.fill_buf().await.expect("should have the first blob");
let len = first.len();
tracing::info!(len, "downloaded first chunk");
assert!(
first.len() < len,
first.len() < file_len,
"uploaded file is too small, we downloaded all on first chunk"
);
reader.consume(len);
cancel.cancel();
let next = stream.next().await.expect("stream should have more");
let next = reader.fill_buf().await;
let e = next.expect_err("expected an error, but got a chunk?");
@@ -520,6 +522,10 @@ async fn download_is_cancelled(ctx: &mut MaybeEnabledStorage) {
.is_some_and(|e| matches!(e, DownloadError::Cancelled)),
"{inner:?}"
);
let e = DownloadError::from(e);
assert!(matches!(e, DownloadError::Cancelled), "{e:?}");
}
let cancel = CancellationToken::new();