attachment_service: /attach-hook: correctly handle detach (#6433)

Before this patch, we would update the `tenant_state.intent` in memory
but not persist the detachment to disk.

I noticed this in https://github.com/neondatabase/neon/pull/6214 where
we stop, then restart, the attachment service.
This commit is contained in:
Christian Schwarz
2024-01-22 19:27:05 +01:00
committed by GitHub
parent 93572a3e99
commit 205b6111e6
3 changed files with 22 additions and 12 deletions

View File

@@ -184,7 +184,7 @@ impl Persistence {
pub(crate) async fn increment_generation(
&self,
tenant_shard_id: TenantShardId,
node_id: Option<NodeId>,
node_id: NodeId,
) -> anyhow::Result<Generation> {
let (write, gen) = {
let mut locked = self.state.lock().unwrap();
@@ -192,14 +192,9 @@ impl Persistence {
anyhow::bail!("Tried to increment generation of unknown shard");
};
// If we're called with a None pageserver, we need only update the generation
// record to disassociate it with this pageserver, not actually increment the number, as
// the increment is guaranteed to happen the next time this tenant is attached.
if node_id.is_some() {
shard.generation += 1;
}
shard.generation += 1;
shard.generation_pageserver = Some(node_id);
shard.generation_pageserver = node_id;
let gen = Generation::new(shard.generation);
(locked.save(), gen)
};
@@ -208,6 +203,19 @@ impl Persistence {
Ok(gen)
}
pub(crate) async fn detach(&self, tenant_shard_id: TenantShardId) -> anyhow::Result<()> {
let write = {
let mut locked = self.state.lock().unwrap();
let Some(shard) = locked.tenants.get_mut(&tenant_shard_id) else {
anyhow::bail!("Tried to increment generation of unknown shard");
};
shard.generation_pageserver = None;
locked.save()
};
write.commit().await?;
Ok(())
}
pub(crate) async fn re_attach(
&self,
node_id: NodeId,

View File

@@ -296,7 +296,7 @@ impl Reconciler {
// Increment generation before attaching to new pageserver
self.generation = self
.persistence
.increment_generation(self.tenant_shard_id, Some(dest_ps_id))
.increment_generation(self.tenant_shard_id, dest_ps_id)
.await?;
let dest_conf = build_location_config(
@@ -395,7 +395,7 @@ impl Reconciler {
// as locations with unknown (None) observed state.
self.generation = self
.persistence
.increment_generation(self.tenant_shard_id, Some(node_id))
.increment_generation(self.tenant_shard_id, node_id)
.await?;
wanted_conf.generation = self.generation.into();
tracing::info!("Observed configuration requires update.");

View File

@@ -362,13 +362,14 @@ impl Service {
);
}
let new_generation = if attach_req.node_id.is_some() {
let new_generation = if let Some(req_node_id) = attach_req.node_id {
Some(
self.persistence
.increment_generation(attach_req.tenant_shard_id, attach_req.node_id)
.increment_generation(attach_req.tenant_shard_id, req_node_id)
.await?,
)
} else {
self.persistence.detach(attach_req.tenant_shard_id).await?;
None
};
@@ -407,6 +408,7 @@ impl Service {
"attach_hook: tenant {} set generation {:?}, pageserver {}",
attach_req.tenant_shard_id,
tenant_state.generation,
// TODO: this is an odd number of 0xf's
attach_req.node_id.unwrap_or(utils::id::NodeId(0xfffffff))
);