mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-09 06:22:57 +00:00
Handle NotInitialized::ShuttingDown error in shard split (#8506)
There is a race condition between timeline shutdown and the split task.
Timeline shutdown first shuts down the upload queue, and only then fires
the cancellation token. A parallel running timeline split operation
might thus encounter a cancelled upload queue before the cancellation
token is fired, and print a noisy error.
Fix this by mapping `anyhow::Error{ NotInitialized::ShuttingDown }) to
`FlushLayerError::Cancelled` instead of `FlushLayerError::Other(_)`.
Fixes #8496
This commit is contained in:
@@ -137,7 +137,7 @@ use self::layer_manager::LayerManager;
|
||||
use self::logical_size::LogicalSize;
|
||||
use self::walreceiver::{WalReceiver, WalReceiverConf};
|
||||
|
||||
use super::config::TenantConf;
|
||||
use super::{config::TenantConf, upload_queue::NotInitialized};
|
||||
use super::{debug_assert_current_span_has_tenant_and_timeline_id, AttachedTenantConf};
|
||||
use super::{remote_timeline_client::index::IndexPart, storage_layer::LayerFringe};
|
||||
use super::{remote_timeline_client::RemoteTimelineClient, storage_layer::ReadableLayer};
|
||||
@@ -642,7 +642,13 @@ impl FlushLayerError {
|
||||
// When crossing from generic anyhow errors to this error type, we explicitly check
|
||||
// for timeline cancellation to avoid logging inoffensive shutdown errors as warn/err.
|
||||
fn from_anyhow(timeline: &Timeline, err: anyhow::Error) -> Self {
|
||||
if timeline.cancel.is_cancelled() {
|
||||
let cancelled = timeline.cancel.is_cancelled()
|
||||
// The upload queue might have been shut down before the official cancellation of the timeline.
|
||||
|| err
|
||||
.downcast_ref::<NotInitialized>()
|
||||
.map(NotInitialized::is_stopping)
|
||||
.unwrap_or_default();
|
||||
if cancelled {
|
||||
Self::Cancelled
|
||||
} else {
|
||||
Self::Other(Arc::new(err))
|
||||
|
||||
Reference in New Issue
Block a user