storcon: use https for pageservers (#10759)

## Problem

Storage controller uses unsecure http for pageserver API.

Closes: https://github.com/neondatabase/cloud/issues/23734
Closes: https://github.com/neondatabase/cloud/issues/24091

## Summary of changes

- Add an optional `listen_https_port` field to storage controller's Node
state and its API (RegisterNode/ListNodes/etc).
- Allow updating `listen_https_port` on node registration to gradually
add https port for all nodes.
- Add `use_https_pageserver_api` CLI option to storage controller to
enable https.
- Pageserver doesn't support https for now and always reports
`https_port=None`. This will be addressed in follow-up PR.
This commit is contained in:
Dmitrii Kovalkov
2025-02-20 21:16:04 +04:00
committed by GitHub
parent 7c7180a79d
commit e808e9432a
13 changed files with 231 additions and 31 deletions

View File

@@ -0,0 +1 @@
ALTER TABLE nodes DROP listen_https_port;

View File

@@ -0,0 +1 @@
ALTER TABLE nodes ADD listen_https_port INTEGER;

View File

@@ -126,6 +126,10 @@ struct Cli {
#[arg(long)]
long_reconcile_threshold: Option<humantime::Duration>,
// Flag to use https for requests to pageserver API.
#[arg(long, default_value = "false")]
use_https_pageserver_api: bool,
}
enum StrictMode {
@@ -321,6 +325,7 @@ async fn async_main() -> anyhow::Result<()> {
address_for_peers: args.address_for_peers,
start_as_candidate: args.start_as_candidate,
http_service_port: args.listen.port() as i32,
use_https_pageserver_api: args.use_https_pageserver_api,
};
// Validate that we can connect to the database

View File

@@ -1,5 +1,6 @@
use std::{str::FromStr, time::Duration};
use anyhow::anyhow;
use pageserver_api::{
controller_api::{
AvailabilityZone, NodeAvailability, NodeDescribeResponse, NodeRegisterRequest,
@@ -32,12 +33,16 @@ pub(crate) struct Node {
listen_http_addr: String,
listen_http_port: u16,
listen_https_port: Option<u16>,
listen_pg_addr: String,
listen_pg_port: u16,
availability_zone_id: AvailabilityZone,
// Flag from storcon's config to use https for pageserver admin API.
// Invariant: if |true|, listen_https_port should contain a value.
use_https: bool,
// This cancellation token means "stop any RPCs in flight to this node, and don't start
// any more". It is not related to process shutdown.
#[serde(skip)]
@@ -56,7 +61,16 @@ pub(crate) enum AvailabilityTransition {
impl Node {
pub(crate) fn base_url(&self) -> String {
format!("http://{}:{}", self.listen_http_addr, self.listen_http_port)
if self.use_https {
format!(
"https://{}:{}",
self.listen_http_addr,
self.listen_https_port
.expect("https port should be specified if use_https is on")
)
} else {
format!("http://{}:{}", self.listen_http_addr, self.listen_http_port)
}
}
pub(crate) fn get_id(&self) -> NodeId {
@@ -82,11 +96,20 @@ impl Node {
self.id == register_req.node_id
&& self.listen_http_addr == register_req.listen_http_addr
&& self.listen_http_port == register_req.listen_http_port
// Note: listen_https_port may change. See [`Self::need_update`] for mode details.
// && self.listen_https_port == register_req.listen_https_port
&& self.listen_pg_addr == register_req.listen_pg_addr
&& self.listen_pg_port == register_req.listen_pg_port
&& self.availability_zone_id == register_req.availability_zone_id
}
// Do we need to update an existing record in DB on this registration request?
pub(crate) fn need_update(&self, register_req: &NodeRegisterRequest) -> bool {
// listen_https_port is checked here because it may change during migration to https.
// After migration, this check may be moved to registration_match.
self.listen_https_port != register_req.listen_https_port
}
/// For a shard located on this node, populate a response object
/// with this node's address information.
pub(crate) fn shard_location(&self, shard_id: TenantShardId) -> TenantLocateResponseShard {
@@ -95,6 +118,7 @@ impl Node {
node_id: self.id,
listen_http_addr: self.listen_http_addr.clone(),
listen_http_port: self.listen_http_port,
listen_https_port: self.listen_https_port,
listen_pg_addr: self.listen_pg_addr.clone(),
listen_pg_port: self.listen_pg_port,
}
@@ -175,25 +199,34 @@ impl Node {
}
}
#[allow(clippy::too_many_arguments)]
pub(crate) fn new(
id: NodeId,
listen_http_addr: String,
listen_http_port: u16,
listen_https_port: Option<u16>,
listen_pg_addr: String,
listen_pg_port: u16,
availability_zone_id: AvailabilityZone,
) -> Self {
Self {
use_https: bool,
) -> anyhow::Result<Self> {
if use_https && listen_https_port.is_none() {
return Err(anyhow!("https is enabled, but node has no https port"));
}
Ok(Self {
id,
listen_http_addr,
listen_http_port,
listen_https_port,
listen_pg_addr,
listen_pg_port,
scheduling: NodeSchedulingPolicy::Active,
availability: NodeAvailability::Offline,
availability_zone_id,
use_https,
cancel: CancellationToken::new(),
}
})
}
pub(crate) fn to_persistent(&self) -> NodePersistence {
@@ -202,14 +235,19 @@ impl Node {
scheduling_policy: self.scheduling.into(),
listen_http_addr: self.listen_http_addr.clone(),
listen_http_port: self.listen_http_port as i32,
listen_https_port: self.listen_https_port.map(|x| x as i32),
listen_pg_addr: self.listen_pg_addr.clone(),
listen_pg_port: self.listen_pg_port as i32,
availability_zone_id: self.availability_zone_id.0.clone(),
}
}
pub(crate) fn from_persistent(np: NodePersistence) -> Self {
Self {
pub(crate) fn from_persistent(np: NodePersistence, use_https: bool) -> anyhow::Result<Self> {
if use_https && np.listen_https_port.is_none() {
return Err(anyhow!("https is enabled, but node has no https port"));
}
Ok(Self {
id: NodeId(np.node_id as u64),
// At startup we consider a node offline until proven otherwise.
availability: NodeAvailability::Offline,
@@ -217,11 +255,13 @@ impl Node {
.expect("Bad scheduling policy in DB"),
listen_http_addr: np.listen_http_addr,
listen_http_port: np.listen_http_port as u16,
listen_https_port: np.listen_https_port.map(|x| x as u16),
listen_pg_addr: np.listen_pg_addr,
listen_pg_port: np.listen_pg_port as u16,
availability_zone_id: AvailabilityZone(np.availability_zone_id),
use_https,
cancel: CancellationToken::new(),
}
})
}
/// Wrapper for issuing requests to pageserver management API: takes care of generic
@@ -285,8 +325,9 @@ impl Node {
warn_threshold,
max_retries,
&format!(
"Call to node {} ({}:{}) management API",
self.id, self.listen_http_addr, self.listen_http_port
"Call to node {} ({}) management API",
self.id,
self.base_url(),
),
cancel,
)
@@ -302,6 +343,7 @@ impl Node {
availability_zone_id: self.availability_zone_id.0.clone(),
listen_http_addr: self.listen_http_addr.clone(),
listen_http_port: self.listen_http_port,
listen_https_port: self.listen_https_port,
listen_pg_addr: self.listen_pg_addr.clone(),
listen_pg_port: self.listen_pg_port,
}

View File

@@ -375,18 +375,23 @@ impl Persistence {
Ok(nodes)
}
pub(crate) async fn update_node(
pub(crate) async fn update_node<V>(
&self,
input_node_id: NodeId,
input_scheduling: NodeSchedulingPolicy,
) -> DatabaseResult<()> {
values: V,
) -> DatabaseResult<()>
where
V: diesel::AsChangeset<Target = crate::schema::nodes::table> + Clone + Send + Sync,
V::Changeset: diesel::query_builder::QueryFragment<diesel::pg::Pg> + Send, // valid Postgres SQL
{
use crate::schema::nodes::dsl::*;
let updated = self
.with_measured_conn(DatabaseOperation::UpdateNode, move |conn| {
let values = values.clone();
Box::pin(async move {
let updated = diesel::update(nodes)
.filter(node_id.eq(input_node_id.0 as i64))
.set((scheduling_policy.eq(String::from(input_scheduling)),))
.set(values)
.execute(conn)
.await?;
Ok(updated)
@@ -403,6 +408,32 @@ impl Persistence {
}
}
pub(crate) async fn update_node_scheduling_policy(
&self,
input_node_id: NodeId,
input_scheduling: NodeSchedulingPolicy,
) -> DatabaseResult<()> {
use crate::schema::nodes::dsl::*;
self.update_node(
input_node_id,
scheduling_policy.eq(String::from(input_scheduling)),
)
.await
}
pub(crate) async fn update_node_on_registration(
&self,
input_node_id: NodeId,
input_https_port: Option<u16>,
) -> DatabaseResult<()> {
use crate::schema::nodes::dsl::*;
self.update_node(
input_node_id,
listen_https_port.eq(input_https_port.map(|x| x as i32)),
)
.await
}
/// At startup, load the high level state for shards, such as their config + policy. This will
/// be enriched at runtime with state discovered on pageservers.
///
@@ -1452,6 +1483,7 @@ pub(crate) struct NodePersistence {
pub(crate) listen_pg_addr: String,
pub(crate) listen_pg_port: i32,
pub(crate) availability_zone_id: String,
pub(crate) listen_https_port: Option<i32>,
}
/// Tenant metadata health status that are stored durably.

View File

@@ -930,13 +930,16 @@ pub(crate) mod test_utils {
NodeId(i),
format!("httphost-{i}"),
80 + i as u16,
None,
format!("pghost-{i}"),
5432 + i as u16,
az_iter
.next()
.cloned()
.unwrap_or(AvailabilityZone("test-az".to_string())),
);
false,
)
.unwrap();
node.set_availability(NodeAvailability::Active(test_utilization::simple(0, 0)));
assert!(node.is_available());
node

View File

@@ -26,6 +26,7 @@ diesel::table! {
listen_pg_addr -> Varchar,
listen_pg_port -> Int4,
availability_zone_id -> Varchar,
listen_https_port -> Nullable<Int4>,
}
}

View File

@@ -399,6 +399,8 @@ pub struct Config {
pub http_service_port: i32,
pub long_reconcile_threshold: Duration,
pub use_https_pageserver_api: bool,
}
impl From<DatabaseError> for ApiError {
@@ -1401,8 +1403,8 @@ impl Service {
.list_nodes()
.await?
.into_iter()
.map(Node::from_persistent)
.collect::<Vec<_>>();
.map(|x| Node::from_persistent(x, config.use_https_pageserver_api))
.collect::<anyhow::Result<Vec<Node>>>()?;
let nodes: HashMap<NodeId, Node> = nodes.into_iter().map(|n| (n.get_id(), n)).collect();
tracing::info!("Loaded {} nodes from database.", nodes.len());
metrics::METRICS_REGISTRY
@@ -1501,10 +1503,13 @@ impl Service {
NodeId(node_id as u64),
"".to_string(),
123,
None,
"".to_string(),
123,
AvailabilityZone("test_az".to_string()),
);
false,
)
.unwrap();
scheduler.node_upsert(&node);
}
@@ -5907,8 +5912,10 @@ impl Service {
)
.await;
#[derive(PartialEq)]
enum RegistrationStatus {
Matched,
UpToDate,
NeedUpdate,
Mismatched,
New,
}
@@ -5917,7 +5924,11 @@ impl Service {
let locked = self.inner.read().unwrap();
if let Some(node) = locked.nodes.get(&register_req.node_id) {
if node.registration_match(&register_req) {
RegistrationStatus::Matched
if node.need_update(&register_req) {
RegistrationStatus::NeedUpdate
} else {
RegistrationStatus::UpToDate
}
} else {
RegistrationStatus::Mismatched
}
@@ -5927,9 +5938,9 @@ impl Service {
};
match registration_status {
RegistrationStatus::Matched => {
RegistrationStatus::UpToDate => {
tracing::info!(
"Node {} re-registered with matching address",
"Node {} re-registered with matching address and is up to date",
register_req.node_id
);
@@ -5947,7 +5958,7 @@ impl Service {
"Node is already registered with different address".to_string(),
));
}
RegistrationStatus::New => {
RegistrationStatus::New | RegistrationStatus::NeedUpdate => {
// fallthrough
}
}
@@ -5976,6 +5987,16 @@ impl Service {
));
}
if self.config.use_https_pageserver_api && register_req.listen_https_port.is_none() {
return Err(ApiError::PreconditionFailed(
format!(
"Node {} has no https port, but use_https is enabled",
register_req.node_id
)
.into(),
));
}
// Ordering: we must persist the new node _before_ adding it to in-memory state.
// This ensures that before we use it for anything or expose it via any external
// API, it is guaranteed to be available after a restart.
@@ -5983,13 +6004,29 @@ impl Service {
register_req.node_id,
register_req.listen_http_addr,
register_req.listen_http_port,
register_req.listen_https_port,
register_req.listen_pg_addr,
register_req.listen_pg_port,
register_req.availability_zone_id.clone(),
self.config.use_https_pageserver_api,
);
let new_node = match new_node {
Ok(new_node) => new_node,
Err(error) => return Err(ApiError::InternalServerError(error)),
};
// TODO: idempotency if the node already exists in the database
self.persistence.insert_node(&new_node).await?;
match registration_status {
RegistrationStatus::New => self.persistence.insert_node(&new_node).await?,
RegistrationStatus::NeedUpdate => {
self.persistence
.update_node_on_registration(
register_req.node_id,
register_req.listen_https_port,
)
.await?
}
_ => unreachable!("Other statuses have been processed earlier"),
}
let mut locked = self.inner.write().unwrap();
let mut new_nodes = (*locked.nodes).clone();
@@ -6004,12 +6041,24 @@ impl Service {
.storage_controller_pageserver_nodes
.set(locked.nodes.len() as i64);
tracing::info!(
"Registered pageserver {} ({}), now have {} pageservers",
register_req.node_id,
register_req.availability_zone_id,
locked.nodes.len()
);
match registration_status {
RegistrationStatus::New => {
tracing::info!(
"Registered pageserver {} ({}), now have {} pageservers",
register_req.node_id,
register_req.availability_zone_id,
locked.nodes.len()
);
}
RegistrationStatus::NeedUpdate => {
tracing::info!(
"Re-registered and updated node {} ({})",
register_req.node_id,
register_req.availability_zone_id,
);
}
_ => unreachable!("Other statuses have been processed earlier"),
}
Ok(())
}
@@ -6027,7 +6076,9 @@ impl Service {
if let Some(scheduling) = scheduling {
// Scheduling is a persistent part of Node: we must write updates to the database before
// applying them in memory
self.persistence.update_node(node_id, scheduling).await?;
self.persistence
.update_node_scheduling_policy(node_id, scheduling)
.await?;
}
// If we're activating a node, then before setting it active we must reconcile any shard locations