mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-08 22:12:56 +00:00
test: no missed wakeups, cancellation and timeout flow to downloads (#7863)
I suspected a wakeup could be lost with `remote_storage::support::DownloadStream` if the cancellation and inner stream wakeups happen simultaneously. The next poll would only return the cancellation error without setting the wakeup. There is no lost wakeup because the single future for getting the cancellation error is consumed when the value is ready, and a new future is created for the *next* value. The new future is always polled. Similarly, if only the `Stream::poll_next` is being used after a `Some(_)` value has been yielded, it makes no sense to have an expectation of a wakeup for the *(N+1)th* stream value already set because when a value is wanted, `Stream::poll_next` will be called. A test is added to show that the above is true. Additionally, there was a question of these cancellations and timeouts flowing to attached or secondary tenant downloads. A test is added to show that this, in fact, happens. Lastly, a warning message is logged when a download stream is polled after a timeout or cancellation error (currently unexpected) so we can rule it out while troubleshooting.
This commit is contained in:
@@ -78,6 +78,10 @@ where
|
||||
let e = Err(std::io::Error::from(e));
|
||||
return Poll::Ready(Some(e));
|
||||
}
|
||||
} else {
|
||||
// this would be perfectly valid behaviour for doing a graceful completion on the
|
||||
// download for example, but not one we expect to do right now.
|
||||
tracing::warn!("continuing polling after having cancelled or timeouted");
|
||||
}
|
||||
|
||||
this.inner.poll_next(cx)
|
||||
@@ -89,13 +93,22 @@ where
|
||||
}
|
||||
|
||||
/// Fires only on the first cancel or timeout, not on both.
|
||||
pub(crate) async fn cancel_or_timeout(
|
||||
pub(crate) fn cancel_or_timeout(
|
||||
timeout: Duration,
|
||||
cancel: CancellationToken,
|
||||
) -> TimeoutOrCancel {
|
||||
tokio::select! {
|
||||
_ = tokio::time::sleep(timeout) => TimeoutOrCancel::Timeout,
|
||||
_ = cancel.cancelled() => TimeoutOrCancel::Cancel,
|
||||
) -> impl std::future::Future<Output = TimeoutOrCancel> + 'static {
|
||||
// futures are lazy, they don't do anything before being polled.
|
||||
//
|
||||
// "precalculate" the wanted deadline before returning the future, so that we can use pause
|
||||
// failpoint to trigger a timeout in test.
|
||||
let deadline = tokio::time::Instant::now() + timeout;
|
||||
async move {
|
||||
tokio::select! {
|
||||
_ = tokio::time::sleep_until(deadline) => TimeoutOrCancel::Timeout,
|
||||
_ = cancel.cancelled() => {
|
||||
TimeoutOrCancel::Cancel
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -172,4 +185,31 @@ mod tests {
|
||||
_ = tokio::time::sleep(Duration::from_secs(121)) => {},
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn notified_but_pollable_after() {
|
||||
let inner = futures::stream::once(futures::future::ready(Ok(bytes::Bytes::from_static(
|
||||
b"hello world",
|
||||
))));
|
||||
let timeout = Duration::from_secs(120);
|
||||
let cancel = CancellationToken::new();
|
||||
|
||||
cancel.cancel();
|
||||
let stream = DownloadStream::new(cancel_or_timeout(timeout, cancel.clone()), inner);
|
||||
let mut stream = std::pin::pin!(stream);
|
||||
|
||||
let next = stream.next().await;
|
||||
let ioe = next.unwrap().unwrap_err();
|
||||
assert!(
|
||||
matches!(
|
||||
ioe.get_ref().unwrap().downcast_ref::<DownloadError>(),
|
||||
Some(&DownloadError::Cancelled)
|
||||
),
|
||||
"{ioe:?}"
|
||||
);
|
||||
|
||||
let next = stream.next().await;
|
||||
let bytes = next.unwrap().unwrap();
|
||||
assert_eq!(&b"hello world"[..], bytes);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -28,6 +28,7 @@ use crate::TEMP_FILE_SUFFIX;
|
||||
use remote_storage::{DownloadError, GenericRemoteStorage, ListingMode, RemotePath};
|
||||
use utils::crashsafe::path_with_suffix_extension;
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
use utils::pausable_failpoint;
|
||||
|
||||
use super::index::{IndexPart, LayerFileMetadata};
|
||||
use super::{
|
||||
@@ -152,6 +153,8 @@ async fn download_object<'a>(
|
||||
|
||||
let download = storage.download(src_path, cancel).await?;
|
||||
|
||||
pausable_failpoint!("before-downloading-layer-stream-pausable");
|
||||
|
||||
let mut buf_writer =
|
||||
tokio::io::BufWriter::with_capacity(super::BUFFER_SIZE, destination_file);
|
||||
|
||||
@@ -199,6 +202,8 @@ async fn download_object<'a>(
|
||||
|
||||
let mut download = storage.download(src_path, cancel).await?;
|
||||
|
||||
pausable_failpoint!("before-downloading-layer-stream-pausable");
|
||||
|
||||
// TODO: use vectored write (writev) once supported by tokio-epoll-uring.
|
||||
// There's chunks_vectored() on the stream.
|
||||
let (bytes_amount, destination_file) = async {
|
||||
|
||||
@@ -1000,7 +1000,7 @@ impl<'a> TenantDownloader<'a> {
|
||||
layer.name,
|
||||
layer.metadata.file_size
|
||||
);
|
||||
let downloaded_bytes = match download_layer_file(
|
||||
let downloaded_bytes = download_layer_file(
|
||||
self.conf,
|
||||
self.remote_storage,
|
||||
*tenant_shard_id,
|
||||
@@ -1011,8 +1011,9 @@ impl<'a> TenantDownloader<'a> {
|
||||
&self.secondary_state.cancel,
|
||||
ctx,
|
||||
)
|
||||
.await
|
||||
{
|
||||
.await;
|
||||
|
||||
let downloaded_bytes = match downloaded_bytes {
|
||||
Ok(bytes) => bytes,
|
||||
Err(DownloadError::NotFound) => {
|
||||
// A heatmap might be out of date and refer to a layer that doesn't exist any more.
|
||||
|
||||
@@ -334,8 +334,11 @@ where
|
||||
|
||||
let tenant_shard_id = job.get_tenant_shard_id();
|
||||
let barrier = if let Some(barrier) = self.get_running(tenant_shard_id) {
|
||||
tracing::info!(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(),
|
||||
"Command already running, waiting for it");
|
||||
tracing::info!(
|
||||
tenant_id=%tenant_shard_id.tenant_id,
|
||||
shard_id=%tenant_shard_id.shard_slug(),
|
||||
"Command already running, waiting for it"
|
||||
);
|
||||
barrier
|
||||
} else {
|
||||
let running = self.spawn_now(job);
|
||||
|
||||
@@ -171,6 +171,8 @@ class S3Storage:
|
||||
"""Is this MOCK_S3 (false) or REAL_S3 (true)"""
|
||||
real: bool
|
||||
endpoint: Optional[str] = None
|
||||
"""formatting deserialized with humantime crate, for example "1s"."""
|
||||
custom_timeout: Optional[str] = None
|
||||
|
||||
def access_env_vars(self) -> Dict[str, str]:
|
||||
if self.aws_profile is not None:
|
||||
@@ -208,6 +210,9 @@ class S3Storage:
|
||||
if self.endpoint is not None:
|
||||
rv["endpoint"] = self.endpoint
|
||||
|
||||
if self.custom_timeout is not None:
|
||||
rv["timeout"] = self.custom_timeout
|
||||
|
||||
return rv
|
||||
|
||||
def to_toml_inline_table(self) -> str:
|
||||
|
||||
@@ -3,8 +3,10 @@
|
||||
|
||||
import time
|
||||
from collections import defaultdict
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from typing import Any, DefaultDict, Dict, Tuple
|
||||
|
||||
import pytest
|
||||
from fixtures.common_types import Lsn
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import (
|
||||
@@ -13,7 +15,7 @@ from fixtures.neon_fixtures import (
|
||||
last_flush_lsn_upload,
|
||||
wait_for_last_flush_lsn,
|
||||
)
|
||||
from fixtures.pageserver.http import PageserverHttpClient
|
||||
from fixtures.pageserver.http import PageserverApiException, PageserverHttpClient
|
||||
from fixtures.pageserver.utils import (
|
||||
assert_tenant_state,
|
||||
wait_for_last_record_lsn,
|
||||
@@ -21,7 +23,7 @@ from fixtures.pageserver.utils import (
|
||||
wait_for_upload_queue_empty,
|
||||
wait_until_tenant_active,
|
||||
)
|
||||
from fixtures.remote_storage import RemoteStorageKind
|
||||
from fixtures.remote_storage import RemoteStorageKind, S3Storage, s3_storage
|
||||
from fixtures.utils import query_scalar, wait_until
|
||||
|
||||
|
||||
@@ -656,5 +658,200 @@ def test_compaction_downloads_on_demand_with_image_creation(neon_env_builder: Ne
|
||||
assert dict(kinds_after) == {"Delta": 4, "Image": 1}
|
||||
|
||||
|
||||
def test_layer_download_cancelled_by_config_location(neon_env_builder: NeonEnvBuilder):
|
||||
"""
|
||||
Demonstrates that tenant shutdown will cancel on-demand download and secondary doing warmup.
|
||||
"""
|
||||
neon_env_builder.enable_pageserver_remote_storage(s3_storage())
|
||||
|
||||
# turn off background tasks so that they don't interfere with the downloads
|
||||
env = neon_env_builder.init_start(
|
||||
initial_tenant_conf={
|
||||
"gc_period": "0s",
|
||||
"compaction_period": "0s",
|
||||
}
|
||||
)
|
||||
client = env.pageserver.http_client()
|
||||
failpoint = "before-downloading-layer-stream-pausable"
|
||||
client.configure_failpoints((failpoint, "pause"))
|
||||
|
||||
env.pageserver.allowed_errors.extend(
|
||||
[
|
||||
".*downloading failed, possibly for shutdown.*",
|
||||
]
|
||||
)
|
||||
|
||||
info = client.layer_map_info(env.initial_tenant, env.initial_timeline)
|
||||
assert len(info.delta_layers()) == 1
|
||||
|
||||
layer = info.delta_layers()[0]
|
||||
|
||||
client.tenant_heatmap_upload(env.initial_tenant)
|
||||
|
||||
# evict the initdb layer so we can download it
|
||||
client.evict_layer(env.initial_tenant, env.initial_timeline, layer.layer_file_name)
|
||||
|
||||
with ThreadPoolExecutor(max_workers=2) as exec:
|
||||
download = exec.submit(
|
||||
client.download_layer,
|
||||
env.initial_tenant,
|
||||
env.initial_timeline,
|
||||
layer.layer_file_name,
|
||||
)
|
||||
|
||||
_, offset = wait_until(
|
||||
20, 0.5, lambda: env.pageserver.assert_log_contains(f"at failpoint {failpoint}")
|
||||
)
|
||||
|
||||
location_conf = {"mode": "Detached", "tenant_conf": {}}
|
||||
# assume detach removes the layers
|
||||
detach = exec.submit(client.tenant_location_conf, env.initial_tenant, location_conf)
|
||||
|
||||
_, offset = wait_until(
|
||||
20,
|
||||
0.5,
|
||||
lambda: env.pageserver.assert_log_contains(
|
||||
"closing is taking longer than expected", offset
|
||||
),
|
||||
)
|
||||
|
||||
client.configure_failpoints((failpoint, "off"))
|
||||
|
||||
with pytest.raises(
|
||||
PageserverApiException, match="downloading failed, possibly for shutdown"
|
||||
):
|
||||
download.result()
|
||||
|
||||
env.pageserver.assert_log_contains(".*downloading failed, possibly for shutdown.*")
|
||||
|
||||
detach.result()
|
||||
|
||||
client.configure_failpoints((failpoint, "pause"))
|
||||
|
||||
_, offset = wait_until(
|
||||
20,
|
||||
0.5,
|
||||
lambda: env.pageserver.assert_log_contains(f"cfg failpoint: {failpoint} pause", offset),
|
||||
)
|
||||
|
||||
location_conf = {
|
||||
"mode": "Secondary",
|
||||
"secondary_conf": {"warm": True},
|
||||
"tenant_conf": {},
|
||||
}
|
||||
|
||||
client.tenant_location_conf(env.initial_tenant, location_conf)
|
||||
|
||||
warmup = exec.submit(client.tenant_secondary_download, env.initial_tenant, wait_ms=30000)
|
||||
|
||||
_, offset = wait_until(
|
||||
20,
|
||||
0.5,
|
||||
lambda: env.pageserver.assert_log_contains(f"at failpoint {failpoint}", offset),
|
||||
)
|
||||
|
||||
client.configure_failpoints((failpoint, "off"))
|
||||
location_conf = {"mode": "Detached", "tenant_conf": {}}
|
||||
client.tenant_location_conf(env.initial_tenant, location_conf)
|
||||
|
||||
client.configure_failpoints((failpoint, "off"))
|
||||
|
||||
# here we have nothing in the log, but we see that the warmup and conf location update worked
|
||||
warmup.result()
|
||||
|
||||
|
||||
def test_layer_download_timeouted(neon_env_builder: NeonEnvBuilder):
|
||||
"""
|
||||
Pause using a pausable_failpoint longer than the client timeout to simulate the timeout happening.
|
||||
"""
|
||||
neon_env_builder.enable_pageserver_remote_storage(s3_storage())
|
||||
assert isinstance(neon_env_builder.pageserver_remote_storage, S3Storage)
|
||||
neon_env_builder.pageserver_remote_storage.custom_timeout = "1s"
|
||||
|
||||
# turn off background tasks so that they don't interfere with the downloads
|
||||
env = neon_env_builder.init_start(
|
||||
initial_tenant_conf={
|
||||
"gc_period": "0s",
|
||||
"compaction_period": "0s",
|
||||
}
|
||||
)
|
||||
client = env.pageserver.http_client()
|
||||
failpoint = "before-downloading-layer-stream-pausable"
|
||||
client.configure_failpoints((failpoint, "pause"))
|
||||
|
||||
info = client.layer_map_info(env.initial_tenant, env.initial_timeline)
|
||||
assert len(info.delta_layers()) == 1
|
||||
|
||||
layer = info.delta_layers()[0]
|
||||
|
||||
client.tenant_heatmap_upload(env.initial_tenant)
|
||||
|
||||
# evict so we can download it
|
||||
client.evict_layer(env.initial_tenant, env.initial_timeline, layer.layer_file_name)
|
||||
|
||||
with ThreadPoolExecutor(max_workers=2) as exec:
|
||||
download = exec.submit(
|
||||
client.download_layer,
|
||||
env.initial_tenant,
|
||||
env.initial_timeline,
|
||||
layer.layer_file_name,
|
||||
)
|
||||
|
||||
_, offset = wait_until(
|
||||
20, 0.5, lambda: env.pageserver.assert_log_contains(f"at failpoint {failpoint}")
|
||||
)
|
||||
# ensure enough time while paused to trip the timeout
|
||||
time.sleep(2)
|
||||
|
||||
client.configure_failpoints((failpoint, "off"))
|
||||
download.result()
|
||||
|
||||
_, offset = env.pageserver.assert_log_contains(
|
||||
".*failed, will retry \\(attempt 0\\): timeout.*"
|
||||
)
|
||||
_, offset = env.pageserver.assert_log_contains(".*succeeded after [0-9]+ retries.*", offset)
|
||||
|
||||
client.evict_layer(env.initial_tenant, env.initial_timeline, layer.layer_file_name)
|
||||
|
||||
client.configure_failpoints((failpoint, "pause"))
|
||||
|
||||
# capture the next offset for a new synchronization with the failpoint
|
||||
_, offset = wait_until(
|
||||
20,
|
||||
0.5,
|
||||
lambda: env.pageserver.assert_log_contains(f"cfg failpoint: {failpoint} pause", offset),
|
||||
)
|
||||
|
||||
location_conf = {
|
||||
"mode": "Secondary",
|
||||
"secondary_conf": {"warm": True},
|
||||
"tenant_conf": {},
|
||||
}
|
||||
|
||||
client.tenant_location_conf(
|
||||
env.initial_tenant,
|
||||
location_conf,
|
||||
)
|
||||
|
||||
started = time.time()
|
||||
|
||||
warmup = exec.submit(client.tenant_secondary_download, env.initial_tenant, wait_ms=30000)
|
||||
# ensure enough time while paused to trip the timeout
|
||||
time.sleep(2)
|
||||
|
||||
client.configure_failpoints((failpoint, "off"))
|
||||
|
||||
warmup.result()
|
||||
|
||||
elapsed = time.time() - started
|
||||
|
||||
_, offset = env.pageserver.assert_log_contains(
|
||||
".*failed, will retry \\(attempt 0\\): timeout.*", offset
|
||||
)
|
||||
_, offset = env.pageserver.assert_log_contains(".*succeeded after [0-9]+ retries.*", offset)
|
||||
|
||||
assert elapsed < 30, "too long passed: {elapsed=}"
|
||||
|
||||
|
||||
def stringify(conf: Dict[str, Any]) -> Dict[str, str]:
|
||||
return dict(map(lambda x: (x[0], str(x[1])), conf.items()))
|
||||
|
||||
Reference in New Issue
Block a user