From aeb53fea94556ce7b8b24fa26fe2eec857fd44a8 Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov <34828390+DimasKovas@users.noreply.github.com> Date: Fri, 21 Mar 2025 17:43:38 +0400 Subject: [PATCH] storage: support multiple SSL CA certificates (#11341) ## Problem - We need to support multiple SSL CA certificates for graceful root CA certificate rotation. - Closes: https://github.com/neondatabase/cloud/issues/25971 ## Summary of changes - Parses `ssl_ca_file` as a pem bundle, which may contain multiple certificates. Single pem cert is a valid pem bundle, so the change is backward compatible. --- control_plane/src/pageserver.rs | 6 +++--- control_plane/src/storage_controller.rs | 13 ++++++++----- control_plane/storcon_cli/src/main.rs | 12 ++++++------ pageserver/src/config.rs | 10 +++++----- pageserver/src/controller_upcall_client.rs | 2 +- safekeeper/src/bin/safekeeper.rs | 10 +++++----- safekeeper/src/http/routes.rs | 2 +- safekeeper/src/lib.rs | 4 ++-- safekeeper/src/pull_timeline.rs | 4 ++-- safekeeper/tests/walproposer_sim/safekeeper.rs | 2 +- storage_controller/src/main.rs | 10 +++++----- storage_controller/src/service.rs | 4 ++-- 12 files changed, 41 insertions(+), 38 deletions(-) diff --git a/control_plane/src/pageserver.rs b/control_plane/src/pageserver.rs index 8a93ea5349..172e00e8bd 100644 --- a/control_plane/src/pageserver.rs +++ b/control_plane/src/pageserver.rs @@ -51,13 +51,13 @@ impl PageServerNode { parse_host_port(&conf.listen_pg_addr).expect("Unable to parse listen_pg_addr"); let port = port.unwrap_or(5432); - let ssl_ca_cert = env.ssl_ca_cert_path().map(|ssl_ca_file| { + let ssl_ca_certs = env.ssl_ca_cert_path().map(|ssl_ca_file| { let buf = std::fs::read(ssl_ca_file).expect("SSL root CA file should exist"); - Certificate::from_pem(&buf).expect("CA certificate should be valid") + Certificate::from_pem_bundle(&buf).expect("SSL CA file should be valid") }); let mut http_client = reqwest::Client::builder(); - if let Some(ssl_ca_cert) = ssl_ca_cert { + for ssl_ca_cert in ssl_ca_certs.unwrap_or_default() { http_client = http_client.add_root_certificate(ssl_ca_cert); } let http_client = http_client diff --git a/control_plane/src/storage_controller.rs b/control_plane/src/storage_controller.rs index 626be268d9..bb81999da7 100644 --- a/control_plane/src/storage_controller.rs +++ b/control_plane/src/storage_controller.rs @@ -17,7 +17,7 @@ use pageserver_api::models::{TenantConfigRequest, TimelineCreateRequest, Timelin use pageserver_api::shard::TenantShardId; use pageserver_client::mgmt_api::ResponseErrorMessageExt; use postgres_backend::AuthType; -use reqwest::Method; +use reqwest::{Certificate, Method}; use serde::de::DeserializeOwned; use serde::{Deserialize, Serialize}; use tokio::process::Command; @@ -143,11 +143,14 @@ impl StorageController { } }; - let mut http_client = reqwest::Client::builder(); - if let Some(ssl_ca_file) = env.ssl_ca_cert_path() { + let ssl_ca_certs = env.ssl_ca_cert_path().map(|ssl_ca_file| { let buf = std::fs::read(ssl_ca_file).expect("SSL CA file should exist"); - let cert = reqwest::Certificate::from_pem(&buf).expect("SSL CA file should be valid"); - http_client = http_client.add_root_certificate(cert); + Certificate::from_pem_bundle(&buf).expect("SSL CA file should be valid") + }); + + let mut http_client = reqwest::Client::builder(); + for ssl_ca_cert in ssl_ca_certs.unwrap_or_default() { + http_client = http_client.add_root_certificate(ssl_ca_cert); } let http_client = http_client .build() diff --git a/control_plane/storcon_cli/src/main.rs b/control_plane/storcon_cli/src/main.rs index eb75f300fa..c503697acc 100644 --- a/control_plane/storcon_cli/src/main.rs +++ b/control_plane/storcon_cli/src/main.rs @@ -20,7 +20,7 @@ use pageserver_api::models::{ }; use pageserver_api::shard::{ShardStripeSize, TenantShardId}; use pageserver_client::mgmt_api::{self}; -use reqwest::{Method, StatusCode, Url}; +use reqwest::{Certificate, Method, StatusCode, Url}; use storage_controller_client::control_api::Client; use utils::id::{NodeId, TenantId, TimelineId}; @@ -274,7 +274,7 @@ struct Cli { jwt: Option, #[arg(long)] - /// Trusted root CA certificate to use in https APIs. + /// Trusted root CA certificates to use in https APIs. ssl_ca_file: Option, #[command(subcommand)] @@ -387,16 +387,16 @@ async fn main() -> anyhow::Result<()> { let storcon_client = Client::new(cli.api.clone(), cli.jwt.clone()); - let ssl_ca_cert = match &cli.ssl_ca_file { + let ssl_ca_certs = match &cli.ssl_ca_file { Some(ssl_ca_file) => { let buf = tokio::fs::read(ssl_ca_file).await?; - Some(reqwest::Certificate::from_pem(&buf)?) + Certificate::from_pem_bundle(&buf)? } - None => None, + None => Vec::new(), }; let mut http_client = reqwest::Client::builder(); - if let Some(ssl_ca_cert) = ssl_ca_cert { + for ssl_ca_cert in ssl_ca_certs { http_client = http_client.add_root_certificate(ssl_ca_cert); } let http_client = http_client.build()?; diff --git a/pageserver/src/config.rs b/pageserver/src/config.rs index 8f05daf5f5..c336f22f8e 100644 --- a/pageserver/src/config.rs +++ b/pageserver/src/config.rs @@ -65,8 +65,8 @@ pub struct PageServerConf { /// Period to reload certificate and private key from files. /// Default: 60s. pub ssl_cert_reload_period: Duration, - /// Trusted root CA certificate to use in https APIs. - pub ssl_ca_cert: Option, + /// Trusted root CA certificates to use in https APIs. + pub ssl_ca_certs: Vec, /// Current availability zone. Used for traffic metrics. pub availability_zone: Option, @@ -481,12 +481,12 @@ impl PageServerConf { validate_wal_contiguity: validate_wal_contiguity.unwrap_or(false), load_previous_heatmap: load_previous_heatmap.unwrap_or(true), generate_unarchival_heatmap: generate_unarchival_heatmap.unwrap_or(true), - ssl_ca_cert: match ssl_ca_file { + ssl_ca_certs: match ssl_ca_file { Some(ssl_ca_file) => { let buf = std::fs::read(ssl_ca_file)?; - Some(Certificate::from_pem(&buf)?) + Certificate::from_pem_bundle(&buf)? } - None => None, + None => Vec::new(), }, }; diff --git a/pageserver/src/controller_upcall_client.rs b/pageserver/src/controller_upcall_client.rs index b472da6eec..fd5fbfcba9 100644 --- a/pageserver/src/controller_upcall_client.rs +++ b/pageserver/src/controller_upcall_client.rs @@ -76,7 +76,7 @@ impl StorageControllerUpcallClient { client = client.default_headers(headers); } - if let Some(ssl_ca_cert) = &conf.ssl_ca_cert { + for ssl_ca_cert in &conf.ssl_ca_certs { client = client.add_root_certificate(ssl_ca_cert.clone()); } diff --git a/safekeeper/src/bin/safekeeper.rs b/safekeeper/src/bin/safekeeper.rs index d9b1b76a4c..6ce43815a6 100644 --- a/safekeeper/src/bin/safekeeper.rs +++ b/safekeeper/src/bin/safekeeper.rs @@ -217,7 +217,7 @@ struct Args { /// Period to reload certificate and private key from files. #[arg(long, value_parser = humantime::parse_duration, default_value = DEFAULT_SSL_CERT_RELOAD_PERIOD)] pub ssl_cert_reload_period: Duration, - /// Trusted root CA certificate to use in https APIs. + /// Trusted root CA certificates to use in https APIs. #[arg(long)] ssl_ca_file: Option, } @@ -353,13 +353,13 @@ async fn main() -> anyhow::Result<()> { } }; - let ssl_ca_cert = match args.ssl_ca_file.as_ref() { + let ssl_ca_certs = match args.ssl_ca_file.as_ref() { Some(ssl_ca_file) => { tracing::info!("Using ssl root CA file: {ssl_ca_file:?}"); let buf = tokio::fs::read(ssl_ca_file).await?; - Some(Certificate::from_pem(&buf)?) + Certificate::from_pem_bundle(&buf)? } - None => None, + None => Vec::new(), }; let conf = Arc::new(SafeKeeperConf { @@ -398,7 +398,7 @@ async fn main() -> anyhow::Result<()> { ssl_key_file: args.ssl_key_file, ssl_cert_file: args.ssl_cert_file, ssl_cert_reload_period: args.ssl_cert_reload_period, - ssl_ca_cert, + ssl_ca_certs, }); // initialize sentry if SENTRY_DSN is provided diff --git a/safekeeper/src/http/routes.rs b/safekeeper/src/http/routes.rs index 3299d77545..b264fe8a1c 100644 --- a/safekeeper/src/http/routes.rs +++ b/safekeeper/src/http/routes.rs @@ -235,7 +235,7 @@ async fn timeline_pull_handler(mut request: Request) -> Result, + pub ssl_ca_certs: Vec, } impl SafeKeeperConf { @@ -169,7 +169,7 @@ impl SafeKeeperConf { ssl_key_file: Utf8PathBuf::from(defaults::DEFAULT_SSL_KEY_FILE), ssl_cert_file: Utf8PathBuf::from(defaults::DEFAULT_SSL_CERT_FILE), ssl_cert_reload_period: Duration::from_secs(60), - ssl_ca_cert: None, + ssl_ca_certs: Vec::new(), } } } diff --git a/safekeeper/src/pull_timeline.rs b/safekeeper/src/pull_timeline.rs index dab8142dfb..653b084ad8 100644 --- a/safekeeper/src/pull_timeline.rs +++ b/safekeeper/src/pull_timeline.rs @@ -393,7 +393,7 @@ pub struct DebugDumpResponse { pub async fn handle_request( request: PullTimelineRequest, sk_auth_token: Option, - ssl_ca_cert: Option, + ssl_ca_certs: Vec, global_timelines: Arc, ) -> Result { let existing_tli = global_timelines.get(TenantTimelineId::new( @@ -405,7 +405,7 @@ pub async fn handle_request( } let mut http_client = reqwest::Client::builder(); - if let Some(ssl_ca_cert) = ssl_ca_cert { + for ssl_ca_cert in ssl_ca_certs { http_client = http_client.add_root_certificate(ssl_ca_cert); } let http_client = http_client.build()?; diff --git a/safekeeper/tests/walproposer_sim/safekeeper.rs b/safekeeper/tests/walproposer_sim/safekeeper.rs index 65dfa64512..58913537aa 100644 --- a/safekeeper/tests/walproposer_sim/safekeeper.rs +++ b/safekeeper/tests/walproposer_sim/safekeeper.rs @@ -183,7 +183,7 @@ pub fn run_server(os: NodeOs, disk: Arc) -> Result<()> { ssl_key_file: Utf8PathBuf::from(""), ssl_cert_file: Utf8PathBuf::from(""), ssl_cert_reload_period: Duration::ZERO, - ssl_ca_cert: None, + ssl_ca_certs: Vec::new(), }; let mut global = GlobalMap::new(disk, conf.clone())?; diff --git a/storage_controller/src/main.rs b/storage_controller/src/main.rs index 5fcf66b464..ed93aff877 100644 --- a/storage_controller/src/main.rs +++ b/storage_controller/src/main.rs @@ -200,7 +200,7 @@ struct Cli { /// Period to reload certificate and private key from files. #[arg(long, default_value = DEFAULT_SSL_CERT_RELOAD_PERIOD)] ssl_cert_reload_period: humantime::Duration, - /// Trusted root CA certificate to use in https APIs. + /// Trusted root CA certificates to use in https APIs. #[arg(long)] ssl_ca_file: Option, } @@ -376,13 +376,13 @@ async fn async_main() -> anyhow::Result<()> { } } - let ssl_ca_cert = match args.ssl_ca_file.as_ref() { + let ssl_ca_certs = match args.ssl_ca_file.as_ref() { Some(ssl_ca_file) => { tracing::info!("Using ssl root CA file: {ssl_ca_file:?}"); let buf = tokio::fs::read(ssl_ca_file).await?; - Some(Certificate::from_pem(&buf)?) + Certificate::from_pem_bundle(&buf)? } - None => None, + None => Vec::new(), }; let config = Config { @@ -425,7 +425,7 @@ async fn async_main() -> anyhow::Result<()> { start_as_candidate: args.start_as_candidate, use_https_pageserver_api: args.use_https_pageserver_api, use_https_safekeeper_api: args.use_https_safekeeper_api, - ssl_ca_cert, + ssl_ca_certs, timelines_onto_safekeepers: args.timelines_onto_safekeepers, }; diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs index c956c1dd1c..af99d67440 100644 --- a/storage_controller/src/service.rs +++ b/storage_controller/src/service.rs @@ -445,7 +445,7 @@ pub struct Config { pub use_https_safekeeper_api: bool, - pub ssl_ca_cert: Option, + pub ssl_ca_certs: Vec, pub timelines_onto_safekeepers: bool, } @@ -1668,7 +1668,7 @@ impl Service { // // The bug has been fixed in hyper v1, so keep alive may be enabled only after we migrate to hyper1. http_client = http_client.pool_max_idle_per_host(0); - if let Some(ssl_ca_cert) = &config.ssl_ca_cert { + for ssl_ca_cert in &config.ssl_ca_certs { http_client = http_client.add_root_certificate(ssl_ca_cert.clone()); } let http_client = http_client.build()?;