storcon: track pageserver availability zone (#8852)

## Problem
In order to build AZ aware scheduling, the storage controller needs to
know what AZ pageservers are in.

Related https://github.com/neondatabase/neon/issues/8848

## Summary of changes
This patch set adds a new nullable column to the `nodes` table:
`availability_zone_id`. The node registration
request is extended to include the AZ id (pageservers already have this
in their `metadata.json` file).

If the node is already registered, then we update the persistent and
in-memory state with the provided AZ.
Otherwise, we add the node with the AZ to begin with.

A couple assumptions are made here:
1. Pageserver AZ ids are stable
2. AZ ids do not change over time

Once all pageservers have a configured AZ, we can remove the optionals
in the code and make the database column not nullable.
This commit is contained in:
Vlad Lazar
2024-08-28 18:23:55 +01:00
committed by GitHub
parent a889a49e06
commit 793b5061ec
11 changed files with 143 additions and 19 deletions

View File

@@ -36,6 +36,8 @@ pub(crate) struct Node {
listen_pg_addr: String,
listen_pg_port: u16,
availability_zone_id: Option<String>,
// This cancellation token means "stop any RPCs in flight to this node, and don't start
// any more". It is not related to process shutdown.
#[serde(skip)]
@@ -61,6 +63,10 @@ impl Node {
self.id
}
pub(crate) fn get_availability_zone_id(&self) -> Option<&str> {
self.availability_zone_id.as_deref()
}
pub(crate) fn get_scheduling(&self) -> NodeSchedulingPolicy {
self.scheduling
}
@@ -72,7 +78,18 @@ impl Node {
/// Does this registration request match `self`? This is used when deciding whether a registration
/// request should be allowed to update an existing record with the same node ID.
pub(crate) fn registration_match(&self, register_req: &NodeRegisterRequest) -> bool {
self.id == register_req.node_id
let az_ids_match = {
match (
self.availability_zone_id.as_deref(),
register_req.availability_zone_id.as_deref(),
) {
(Some(current_az), Some(register_req_az)) => current_az == register_req_az,
_ => true,
}
};
az_ids_match
&& self.id == register_req.node_id
&& self.listen_http_addr == register_req.listen_http_addr
&& self.listen_http_port == register_req.listen_http_port
&& self.listen_pg_addr == register_req.listen_pg_addr
@@ -173,6 +190,7 @@ impl Node {
listen_http_port: u16,
listen_pg_addr: String,
listen_pg_port: u16,
availability_zone_id: Option<String>,
) -> Self {
Self {
id,
@@ -182,6 +200,7 @@ impl Node {
listen_pg_port,
scheduling: NodeSchedulingPolicy::Active,
availability: NodeAvailability::Offline,
availability_zone_id,
cancel: CancellationToken::new(),
}
}
@@ -194,6 +213,7 @@ impl Node {
listen_http_port: self.listen_http_port as i32,
listen_pg_addr: self.listen_pg_addr.clone(),
listen_pg_port: self.listen_pg_port as i32,
availability_zone_id: self.availability_zone_id.clone(),
}
}
@@ -208,6 +228,7 @@ impl Node {
listen_http_port: np.listen_http_port as u16,
listen_pg_addr: np.listen_pg_addr,
listen_pg_port: np.listen_pg_port as u16,
availability_zone_id: np.availability_zone_id,
cancel: CancellationToken::new(),
}
}