mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-17 05:00:38 +00:00
storcon: do not update observed state on node activation (#11155)
## Problem When a node becomes active, we query its locations and update the observed state in-place. This can race with the observed state updates done when processing reconcile results. ## Summary of changes The argument for this reconciliation step is that is reduces the need for background reconciliations. I don't think is actually true anymore. There's two cases. 1. Restart of node after drain. Usually the node does not go through the offline state here, so observed locations were not marked as none. In any case, there should be a handful of shards max on the node since we've just drained it. 2. Node comes back online after failure or network partition. When the node is marked offline, we reschedule everything away from it. When it later becomes active, the previous observed location is extraneous and requires a reconciliation anyway. Closes https://github.com/neondatabase/neon/issues/11148
This commit is contained in:
@@ -2004,21 +2004,41 @@ impl Service {
|
||||
tracing::info!("Loaded {} LocationConfigs", configs.tenant_shards.len());
|
||||
|
||||
let mut cleanup = Vec::new();
|
||||
let mut mismatched_locations = 0;
|
||||
{
|
||||
let mut locked = self.inner.write().unwrap();
|
||||
|
||||
for (tenant_shard_id, observed_loc) in configs.tenant_shards {
|
||||
for (tenant_shard_id, reported) in configs.tenant_shards {
|
||||
let Some(tenant_shard) = locked.tenants.get_mut(&tenant_shard_id) else {
|
||||
cleanup.push(tenant_shard_id);
|
||||
continue;
|
||||
};
|
||||
tenant_shard
|
||||
|
||||
let on_record = &mut tenant_shard
|
||||
.observed
|
||||
.locations
|
||||
.insert(node.get_id(), ObservedStateLocation { conf: observed_loc });
|
||||
.entry(node.get_id())
|
||||
.or_insert_with(|| ObservedStateLocation { conf: None })
|
||||
.conf;
|
||||
|
||||
// If the location reported by the node does not match our observed state,
|
||||
// then we mark it as uncertain and let the background reconciliation loop
|
||||
// deal with it.
|
||||
//
|
||||
// Note that this also covers net new locations reported by the node.
|
||||
if *on_record != reported {
|
||||
mismatched_locations += 1;
|
||||
*on_record = None;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if mismatched_locations > 0 {
|
||||
tracing::info!(
|
||||
"Set observed state to None for {mismatched_locations} mismatched locations"
|
||||
);
|
||||
}
|
||||
|
||||
for tenant_shard_id in cleanup {
|
||||
tracing::info!("Detaching {tenant_shard_id}");
|
||||
match node
|
||||
|
||||
Reference in New Issue
Block a user