mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-07 21:42:56 +00:00
storage_controller: fix node flap detach race (#10298)
## Problem The observed state removal may race with the inline updates of the observed state done from `Service::node_activate_reconcile`. This was intended to work as follows: 1. Detaches while the node is unavailable remove the entry from the observed state. 2. `Service::node_activate_reconcile` diffs the locations returned by the pageserver with the observed state and detaches in-line when required. ## Summary of changes This PR removes step (1) and lets background reconciliations deal with the mismatch between the intent and observed state. A follow up will attempt to remove `Service::node_activate_reconcile` altogether. Closes https://github.com/neondatabase/neon/issues/10253
This commit is contained in:
@@ -14,7 +14,6 @@ use std::sync::Arc;
|
||||
use std::time::{Duration, Instant};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use utils::backoff::exponential_backoff;
|
||||
use utils::failpoint_support;
|
||||
use utils::generation::Generation;
|
||||
use utils::id::{NodeId, TimelineId};
|
||||
use utils::lsn::Lsn;
|
||||
@@ -212,11 +211,12 @@ impl Reconciler {
|
||||
lazy: bool,
|
||||
) -> Result<(), ReconcileError> {
|
||||
if !node.is_available() && config.mode == LocationConfigMode::Detached {
|
||||
// Attempts to detach from offline nodes may be imitated without doing I/O: a node which is offline
|
||||
// will get fully reconciled wrt the shard's intent state when it is reactivated, irrespective of
|
||||
// what we put into `observed`, in [`crate::service::Service::node_activate_reconcile`]
|
||||
tracing::info!("Node {node} is unavailable during detach: proceeding anyway, it will be detached on next activation");
|
||||
self.observed.locations.remove(&node.get_id());
|
||||
// [`crate::service::Service::node_activate_reconcile`] will update the observed state
|
||||
// when the node comes back online. At that point, the intent and observed states will
|
||||
// be mismatched and a background reconciliation will detach.
|
||||
tracing::info!(
|
||||
"Node {node} is unavailable during detach: proceeding anyway, it will be detached via background reconciliation"
|
||||
);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
@@ -749,6 +749,8 @@ impl Reconciler {
|
||||
};
|
||||
|
||||
if increment_generation {
|
||||
pausable_failpoint!("reconciler-pre-increment-generation");
|
||||
|
||||
let generation = self
|
||||
.persistence
|
||||
.increment_generation(self.tenant_shard_id, node.get_id())
|
||||
@@ -824,7 +826,7 @@ impl Reconciler {
|
||||
.handle_detach(self.tenant_shard_id, self.shard.stripe_size);
|
||||
}
|
||||
|
||||
failpoint_support::sleep_millis_async!("sleep-on-reconcile-epilogue");
|
||||
pausable_failpoint!("reconciler-epilogue");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -83,6 +83,7 @@ use utils::{
|
||||
generation::Generation,
|
||||
http::error::ApiError,
|
||||
id::{NodeId, TenantId, TimelineId},
|
||||
pausable_failpoint,
|
||||
sync::gate::Gate,
|
||||
};
|
||||
|
||||
@@ -1024,6 +1025,8 @@ impl Service {
|
||||
)
|
||||
.await;
|
||||
|
||||
pausable_failpoint!("heartbeat-pre-node-state-configure");
|
||||
|
||||
// This is the code path for geniune availability transitions (i.e node
|
||||
// goes unavailable and/or comes back online).
|
||||
let res = self
|
||||
@@ -2492,6 +2495,7 @@ impl Service {
|
||||
// Persist updates
|
||||
// Ordering: write to the database before applying changes in-memory, so that
|
||||
// we will not appear time-travel backwards on a restart.
|
||||
|
||||
let mut schedule_context = ScheduleContext::default();
|
||||
for ShardUpdate {
|
||||
tenant_shard_id,
|
||||
|
||||
Reference in New Issue
Block a user