diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index a0ea9b90dc..669eeffa32 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -2005,6 +2005,10 @@ async fn put_tenant_location_config_handler( let state = get_state(&request); let conf = state.conf; + fail::fail_point!("put-location-conf-handler", |_| { + Err(ApiError::ResourceUnavailable("failpoint".into())) + }); + // The `Detached` state is special, it doesn't upsert a tenant, it removes // its local disk content and drops it from memory. if let LocationConfigMode::Detached = request_data.config.mode { diff --git a/storage_controller/src/reconciler.rs b/storage_controller/src/reconciler.rs index d1590ec75e..ff5a3831cd 100644 --- a/storage_controller/src/reconciler.rs +++ b/storage_controller/src/reconciler.rs @@ -981,6 +981,7 @@ impl Reconciler { )); } + let mut first_err = None; for (node, conf) in changes { if self.cancel.is_cancelled() { return Err(ReconcileError::Cancel); @@ -990,7 +991,12 @@ impl Reconciler { // shard _available_ (the attached location), and configuring secondary locations // can be done lazily when the node becomes available (via background reconciliation). if node.is_available() { - self.location_config(&node, conf, None, false).await?; + let res = self.location_config(&node, conf, None, false).await; + if let Err(err) = res { + if first_err.is_none() { + first_err = Some(err); + } + } } else { // If the node is unavailable, we skip and consider the reconciliation successful: this // is a common case where a pageserver is marked unavailable: we demote a location on @@ -1002,6 +1008,10 @@ impl Reconciler { } } + if let Some(err) = first_err { + return Err(err); + } + // The condition below identifies a detach. We must have no attached intent and // must have been attached to something previously. Pass this information to // the [`ComputeHook`] such that it can update its tenant-wide state. diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs index 8f5efe8ac4..4c011033af 100644 --- a/storage_controller/src/service.rs +++ b/storage_controller/src/service.rs @@ -1530,10 +1530,19 @@ impl Service { // so that waiters will see the correct error after waiting. tenant.set_last_error(result.sequence, e); - // Skip deletions on reconcile failures - let upsert_deltas = - deltas.filter(|delta| matches!(delta, ObservedStateDelta::Upsert(_))); - tenant.apply_observed_deltas(upsert_deltas); + // If the reconciliation failed, don't clear the observed state for places where we + // detached. Instead, mark the observed state as uncertain. + let failed_reconcile_deltas = deltas.map(|delta| { + if let ObservedStateDelta::Delete(node_id) = delta { + ObservedStateDelta::Upsert(Box::new(( + node_id, + ObservedStateLocation { conf: None }, + ))) + } else { + delta + } + }); + tenant.apply_observed_deltas(failed_reconcile_deltas); } } diff --git a/test_runner/regress/test_storage_controller.py b/test_runner/regress/test_storage_controller.py index 9986c1f24a..a6f05fe8ad 100644 --- a/test_runner/regress/test_storage_controller.py +++ b/test_runner/regress/test_storage_controller.py @@ -4959,3 +4959,49 @@ def test_storage_controller_forward_404(neon_env_builder: NeonEnvBuilder): env.storage_controller.configure_failpoints( ("reconciler-live-migrate-post-generation-inc", "off") ) + + +def test_re_attach_with_stuck_secondary(neon_env_builder: NeonEnvBuilder): + """ + This test assumes that the secondary location cannot be configured for whatever reason. + It then attempts to detach and and attach the tenant back again and, finally, checks + for observed state consistency by attempting to create a timeline. + + See LKB-204 for more details. + """ + + neon_env_builder.num_pageservers = 2 + + env = neon_env_builder.init_configs() + env.start() + + env.storage_controller.allowed_errors.append(".*failpoint.*") + + tenant_id, _ = env.create_tenant(shard_count=1, placement_policy='{"Attached":1}') + env.storage_controller.reconcile_until_idle() + + locations = env.storage_controller.locate(tenant_id) + assert len(locations) == 1 + primary: int = locations[0]["node_id"] + + not_primary = [ps.id for ps in env.pageservers if ps.id != primary] + assert len(not_primary) == 1 + secondary = not_primary[0] + + env.get_pageserver(secondary).http_client().configure_failpoints( + ("put-location-conf-handler", "return(1)") + ) + + env.storage_controller.tenant_policy_update(tenant_id, {"placement": "Detached"}) + + with pytest.raises(Exception, match="failpoint"): + env.storage_controller.reconcile_all() + + env.storage_controller.tenant_policy_update(tenant_id, {"placement": {"Attached": 1}}) + + with pytest.raises(Exception, match="failpoint"): + env.storage_controller.reconcile_all() + + env.storage_controller.pageserver_api().timeline_create( + pg_version=PgVersion.NOT_SET, tenant_id=tenant_id, new_timeline_id=TimelineId.generate() + )