storcon + safekeeper + scrubber: propagate root CA certs everywhere (#11418)

## Problem
There are some places in the code where we create `reqwest::Client`
without providing SSL CA certs from `ssl_ca_file`. These will break
after we enable TLS everywhere.
- Part of https://github.com/neondatabase/cloud/issues/22686

## Summary of changes
- Support `ssl_ca_file` in storage scrubber.
- Add `use_https_safekeeper_api` option to safekeeper to use https for
peer requests.
- Propagate SSL CA certs to storage_controller/client, storcon's
ComputeHook, PeerClient and maybe_forward.
This commit is contained in:
Dmitrii Kovalkov
2025-04-04 10:30:48 +04:00
committed by GitHub
parent 497116b76d
commit 181af302b5
21 changed files with 121 additions and 39 deletions

View File

@@ -385,8 +385,6 @@ where
async fn main() -> anyhow::Result<()> { async fn main() -> anyhow::Result<()> {
let cli = Cli::parse(); let cli = Cli::parse();
let storcon_client = Client::new(cli.api.clone(), cli.jwt.clone());
let ssl_ca_certs = match &cli.ssl_ca_file { let ssl_ca_certs = match &cli.ssl_ca_file {
Some(ssl_ca_file) => { Some(ssl_ca_file) => {
let buf = tokio::fs::read(ssl_ca_file).await?; let buf = tokio::fs::read(ssl_ca_file).await?;
@@ -401,9 +399,11 @@ async fn main() -> anyhow::Result<()> {
} }
let http_client = http_client.build()?; let http_client = http_client.build()?;
let storcon_client = Client::new(http_client.clone(), cli.api.clone(), cli.jwt.clone());
let mut trimmed = cli.api.to_string(); let mut trimmed = cli.api.to_string();
trimmed.pop(); trimmed.pop();
let vps_client = mgmt_api::Client::new(http_client, trimmed, cli.jwt.as_deref()); let vps_client = mgmt_api::Client::new(http_client.clone(), trimmed, cli.jwt.as_deref());
match cli.command { match cli.command {
Command::NodeRegister { Command::NodeRegister {
@@ -1056,7 +1056,7 @@ async fn main() -> anyhow::Result<()> {
const DEFAULT_MIGRATE_CONCURRENCY: usize = 8; const DEFAULT_MIGRATE_CONCURRENCY: usize = 8;
let mut stream = futures::stream::iter(moves) let mut stream = futures::stream::iter(moves)
.map(|mv| { .map(|mv| {
let client = Client::new(cli.api.clone(), cli.jwt.clone()); let client = Client::new(http_client.clone(), cli.api.clone(), cli.jwt.clone());
async move { async move {
client client
.dispatch::<TenantShardMigrateRequest, TenantShardMigrateResponse>( .dispatch::<TenantShardMigrateRequest, TenantShardMigrateResponse>(

View File

@@ -71,6 +71,7 @@ pub struct PeerInfo {
pub ts: Instant, pub ts: Instant,
pub pg_connstr: String, pub pg_connstr: String,
pub http_connstr: String, pub http_connstr: String,
pub https_connstr: Option<String>,
} }
pub type FullTransactionId = u64; pub type FullTransactionId = u64;
@@ -261,6 +262,8 @@ pub struct SkTimelineInfo {
pub safekeeper_connstr: Option<String>, pub safekeeper_connstr: Option<String>,
#[serde(default)] #[serde(default)]
pub http_connstr: Option<String>, pub http_connstr: Option<String>,
#[serde(default)]
pub https_connstr: Option<String>,
// Minimum of all active RO replicas flush LSN // Minimum of all active RO replicas flush LSN
#[serde(default = "lsn_invalid")] #[serde(default = "lsn_invalid")]
pub standby_horizon: Lsn, pub standby_horizon: Lsn,

View File

@@ -32,9 +32,15 @@ impl Client {
let Some(ref base_url) = conf.import_pgdata_upcall_api else { let Some(ref base_url) = conf.import_pgdata_upcall_api else {
anyhow::bail!("import_pgdata_upcall_api is not configured") anyhow::bail!("import_pgdata_upcall_api is not configured")
}; };
let mut http_client = reqwest::Client::builder();
for cert in &conf.ssl_ca_certs {
http_client = http_client.add_root_certificate(cert.clone());
}
let http_client = http_client.build()?;
Ok(Self { Ok(Self {
base_url: base_url.to_string(), base_url: base_url.to_string(),
client: reqwest::Client::new(), client: http_client,
cancel, cancel,
authorization_header: conf authorization_header: conf
.import_pgdata_upcall_api_token .import_pgdata_upcall_api_token

View File

@@ -219,7 +219,10 @@ struct Args {
pub ssl_cert_reload_period: Duration, pub ssl_cert_reload_period: Duration,
/// Trusted root CA certificates to use in https APIs. /// Trusted root CA certificates to use in https APIs.
#[arg(long)] #[arg(long)]
ssl_ca_file: Option<Utf8PathBuf>, pub ssl_ca_file: Option<Utf8PathBuf>,
/// Flag to use https for requests to peer's safekeeper API.
#[arg(long)]
pub use_https_safekeeper_api: bool,
} }
// Like PathBufValueParser, but allows empty string. // Like PathBufValueParser, but allows empty string.
@@ -399,6 +402,7 @@ async fn main() -> anyhow::Result<()> {
ssl_cert_file: args.ssl_cert_file, ssl_cert_file: args.ssl_cert_file,
ssl_cert_reload_period: args.ssl_cert_reload_period, ssl_cert_reload_period: args.ssl_cert_reload_period,
ssl_ca_certs, ssl_ca_certs,
use_https_safekeeper_api: args.use_https_safekeeper_api,
}); });
// initialize sentry if SENTRY_DSN is provided // initialize sentry if SENTRY_DSN is provided

View File

@@ -536,6 +536,7 @@ async fn record_safekeeper_info(mut request: Request<Body>) -> Result<Response<B
peer_horizon_lsn: sk_info.peer_horizon_lsn.0, peer_horizon_lsn: sk_info.peer_horizon_lsn.0,
safekeeper_connstr: sk_info.safekeeper_connstr.unwrap_or_else(|| "".to_owned()), safekeeper_connstr: sk_info.safekeeper_connstr.unwrap_or_else(|| "".to_owned()),
http_connstr: sk_info.http_connstr.unwrap_or_else(|| "".to_owned()), http_connstr: sk_info.http_connstr.unwrap_or_else(|| "".to_owned()),
https_connstr: sk_info.https_connstr,
backup_lsn: sk_info.backup_lsn.0, backup_lsn: sk_info.backup_lsn.0,
local_start_lsn: sk_info.local_start_lsn.0, local_start_lsn: sk_info.local_start_lsn.0,
availability_zone: None, availability_zone: None,

View File

@@ -121,6 +121,7 @@ pub struct SafeKeeperConf {
pub ssl_cert_file: Utf8PathBuf, pub ssl_cert_file: Utf8PathBuf,
pub ssl_cert_reload_period: Duration, pub ssl_cert_reload_period: Duration,
pub ssl_ca_certs: Vec<Certificate>, pub ssl_ca_certs: Vec<Certificate>,
pub use_https_safekeeper_api: bool,
} }
impl SafeKeeperConf { impl SafeKeeperConf {
@@ -170,6 +171,7 @@ impl SafeKeeperConf {
ssl_cert_file: Utf8PathBuf::from(defaults::DEFAULT_SSL_CERT_FILE), ssl_cert_file: Utf8PathBuf::from(defaults::DEFAULT_SSL_CERT_FILE),
ssl_cert_reload_period: Duration::from_secs(60), ssl_cert_reload_period: Duration::from_secs(60),
ssl_ca_certs: Vec::new(), ssl_ca_certs: Vec::new(),
use_https_safekeeper_api: false,
} }
} }
} }

View File

@@ -176,6 +176,7 @@ pub struct Donor {
pub flush_lsn: Lsn, pub flush_lsn: Lsn,
pub pg_connstr: String, pub pg_connstr: String,
pub http_connstr: String, pub http_connstr: String,
pub https_connstr: Option<String>,
} }
impl From<&PeerInfo> for Donor { impl From<&PeerInfo> for Donor {
@@ -186,6 +187,7 @@ impl From<&PeerInfo> for Donor {
flush_lsn: p.flush_lsn, flush_lsn: p.flush_lsn,
pg_connstr: p.pg_connstr.clone(), pg_connstr: p.pg_connstr.clone(),
http_connstr: p.http_connstr.clone(), http_connstr: p.http_connstr.clone(),
https_connstr: p.https_connstr.clone(),
} }
} }
} }
@@ -236,11 +238,33 @@ async fn recover(
conf: &SafeKeeperConf, conf: &SafeKeeperConf,
) -> anyhow::Result<String> { ) -> anyhow::Result<String> {
// Learn donor term switch history to figure out starting point. // Learn donor term switch history to figure out starting point.
let client = reqwest::Client::new();
let mut client = reqwest::Client::builder();
for cert in &conf.ssl_ca_certs {
client = client.add_root_certificate(cert.clone());
}
let client = client
.build()
.context("Failed to build http client for recover")?;
let url = if conf.use_https_safekeeper_api {
if let Some(https_connstr) = donor.https_connstr.as_ref() {
format!("https://{https_connstr}")
} else {
anyhow::bail!(
"cannot recover from donor {}: \
https is enabled, but https_connstr is not specified",
donor.sk_id
);
}
} else {
format!("http://{}", donor.http_connstr)
};
let timeline_info: TimelineStatus = client let timeline_info: TimelineStatus = client
.get(format!( .get(format!(
"http://{}/v1/tenant/{}/timeline/{}", "{}/v1/tenant/{}/timeline/{}",
donor.http_connstr, tli.ttid.tenant_id, tli.ttid.timeline_id url, tli.ttid.tenant_id, tli.ttid.timeline_id
)) ))
.send() .send()
.await? .await?

View File

@@ -50,6 +50,7 @@ fn peer_info_from_sk_info(sk_info: &SafekeeperTimelineInfo, ts: Instant) -> Peer
local_start_lsn: Lsn(sk_info.local_start_lsn), local_start_lsn: Lsn(sk_info.local_start_lsn),
pg_connstr: sk_info.safekeeper_connstr.clone(), pg_connstr: sk_info.safekeeper_connstr.clone(),
http_connstr: sk_info.http_connstr.clone(), http_connstr: sk_info.http_connstr.clone(),
https_connstr: sk_info.https_connstr.clone(),
ts, ts,
} }
} }
@@ -363,6 +364,7 @@ impl SharedState {
.to_owned() .to_owned()
.unwrap_or(conf.listen_pg_addr.clone()), .unwrap_or(conf.listen_pg_addr.clone()),
http_connstr: conf.listen_http_addr.to_owned(), http_connstr: conf.listen_http_addr.to_owned(),
https_connstr: conf.listen_https_addr.to_owned(),
backup_lsn: self.sk.state().inmem.backup_lsn.0, backup_lsn: self.sk.state().inmem.backup_lsn.0,
local_start_lsn: self.sk.state().local_start_lsn.0, local_start_lsn: self.sk.state().local_start_lsn.0,
availability_zone: conf.availability_zone.clone(), availability_zone: conf.availability_zone.clone(),

View File

@@ -184,6 +184,7 @@ pub fn run_server(os: NodeOs, disk: Arc<SafekeeperDisk>) -> Result<()> {
ssl_cert_file: Utf8PathBuf::from(""), ssl_cert_file: Utf8PathBuf::from(""),
ssl_cert_reload_period: Duration::ZERO, ssl_cert_reload_period: Duration::ZERO,
ssl_ca_certs: Vec::new(), ssl_ca_certs: Vec::new(),
use_https_safekeeper_api: false,
}; };
let mut global = GlobalMap::new(disk, conf.clone())?; let mut global = GlobalMap::new(disk, conf.clone())?;

View File

@@ -141,6 +141,7 @@ async fn publish(client: Option<BrokerClientChannel>, n_keys: u64) {
peer_horizon_lsn: 5, peer_horizon_lsn: 5,
safekeeper_connstr: "zenith-1-sk-1.local:7676".to_owned(), safekeeper_connstr: "zenith-1-sk-1.local:7676".to_owned(),
http_connstr: "zenith-1-sk-1.local:7677".to_owned(), http_connstr: "zenith-1-sk-1.local:7677".to_owned(),
https_connstr: Some("zenith-1-sk-1.local:7678".to_owned()),
local_start_lsn: 0, local_start_lsn: 0,
availability_zone: None, availability_zone: None,
standby_horizon: 0, standby_horizon: 0,

View File

@@ -45,8 +45,10 @@ message SafekeeperTimelineInfo {
uint64 standby_horizon = 14; uint64 standby_horizon = 14;
// A connection string to use for WAL receiving. // A connection string to use for WAL receiving.
string safekeeper_connstr = 10; string safekeeper_connstr = 10;
// HTTP endpoint connection string // HTTP endpoint connection string.
string http_connstr = 13; string http_connstr = 13;
// HTTPS endpoint connection string.
optional string https_connstr = 15;
// Availability zone of a safekeeper. // Availability zone of a safekeeper.
optional string availability_zone = 11; optional string availability_zone = 11;
} }

View File

@@ -764,6 +764,7 @@ mod tests {
peer_horizon_lsn: 5, peer_horizon_lsn: 5,
safekeeper_connstr: "neon-1-sk-1.local:7676".to_owned(), safekeeper_connstr: "neon-1-sk-1.local:7676".to_owned(),
http_connstr: "neon-1-sk-1.local:7677".to_owned(), http_connstr: "neon-1-sk-1.local:7677".to_owned(),
https_connstr: Some("neon-1-sk-1.local:7678".to_owned()),
local_start_lsn: 0, local_start_lsn: 0,
availability_zone: None, availability_zone: None,
standby_horizon: 0, standby_horizon: 0,

View File

@@ -10,13 +10,11 @@ pub struct Client {
} }
impl Client { impl Client {
pub fn new(base_url: Url, jwt_token: Option<String>) -> Self { pub fn new(http_client: reqwest::Client, base_url: Url, jwt_token: Option<String>) -> Self {
Self { Self {
base_url, base_url,
jwt_token, jwt_token,
client: reqwest::ClientBuilder::new() client: http_client,
.build()
.expect("Failed to construct http client"),
} }
} }

View File

@@ -4,6 +4,7 @@ use std::error::Error as _;
use std::sync::Arc; use std::sync::Arc;
use std::time::Duration; use std::time::Duration;
use anyhow::Context;
use control_plane::endpoint::{ComputeControlPlane, EndpointStatus}; use control_plane::endpoint::{ComputeControlPlane, EndpointStatus};
use control_plane::local_env::LocalEnv; use control_plane::local_env::LocalEnv;
use futures::StreamExt; use futures::StreamExt;
@@ -364,25 +365,28 @@ pub(crate) struct ShardUpdate<'a> {
} }
impl ComputeHook { impl ComputeHook {
pub(super) fn new(config: Config) -> Self { pub(super) fn new(config: Config) -> anyhow::Result<Self> {
let authorization_header = config let authorization_header = config
.control_plane_jwt_token .control_plane_jwt_token
.clone() .clone()
.map(|jwt| format!("Bearer {}", jwt)); .map(|jwt| format!("Bearer {}", jwt));
let client = reqwest::ClientBuilder::new() let mut client = reqwest::ClientBuilder::new().timeout(NOTIFY_REQUEST_TIMEOUT);
.timeout(NOTIFY_REQUEST_TIMEOUT) for cert in &config.ssl_ca_certs {
client = client.add_root_certificate(cert.clone());
}
let client = client
.build() .build()
.expect("Failed to construct HTTP client"); .context("Failed to build http client for compute hook")?;
Self { Ok(Self {
state: Default::default(), state: Default::default(),
config, config,
authorization_header, authorization_header,
neon_local_lock: Default::default(), neon_local_lock: Default::default(),
api_concurrency: tokio::sync::Semaphore::new(API_CONCURRENCY), api_concurrency: tokio::sync::Semaphore::new(API_CONCURRENCY),
client, client,
} })
} }
/// For test environments: use neon_local's LocalEnv to update compute /// For test environments: use neon_local's LocalEnv to update compute

View File

@@ -1744,19 +1744,17 @@ async fn maybe_forward(req: Request<Body>) -> ForwardOutcome {
// Use [`RECONCILE_TIMEOUT`] as the max amount of time a request should block for and // Use [`RECONCILE_TIMEOUT`] as the max amount of time a request should block for and
// include some leeway to get the timeout for proxied requests. // include some leeway to get the timeout for proxied requests.
const PROXIED_REQUEST_TIMEOUT: Duration = Duration::from_secs(RECONCILE_TIMEOUT.as_secs() + 10); const PROXIED_REQUEST_TIMEOUT: Duration = Duration::from_secs(RECONCILE_TIMEOUT.as_secs() + 10);
let client = reqwest::ClientBuilder::new()
.timeout(PROXIED_REQUEST_TIMEOUT)
.build();
let client = match client {
Ok(client) => client,
Err(err) => {
return ForwardOutcome::Forwarded(Err(ApiError::InternalServerError(anyhow::anyhow!(
"Failed to build leader client for forwarding while in stepped down state: {err}"
))));
}
};
let request: reqwest::Request = match convert_request(req, &client, leader.address).await { let client = state.service.get_http_client().clone();
let request: reqwest::Request = match convert_request(
req,
&client,
leader.address,
PROXIED_REQUEST_TIMEOUT,
)
.await
{
Ok(r) => r, Ok(r) => r,
Err(err) => { Err(err) => {
return ForwardOutcome::Forwarded(Err(ApiError::InternalServerError(anyhow::anyhow!( return ForwardOutcome::Forwarded(Err(ApiError::InternalServerError(anyhow::anyhow!(
@@ -1814,6 +1812,7 @@ async fn convert_request(
req: hyper::Request<Body>, req: hyper::Request<Body>,
client: &reqwest::Client, client: &reqwest::Client,
to_address: String, to_address: String,
timeout: Duration,
) -> Result<reqwest::Request, ApiError> { ) -> Result<reqwest::Request, ApiError> {
use std::str::FromStr; use std::str::FromStr;
@@ -1868,6 +1867,7 @@ async fn convert_request(
.request(method, uri) .request(method, uri)
.headers(headers) .headers(headers)
.body(body) .body(body)
.timeout(timeout)
.build() .build()
.map_err(|err| { .map_err(|err| {
ApiError::InternalServerError(anyhow::anyhow!("Request conversion failed: {err}")) ApiError::InternalServerError(anyhow::anyhow!("Request conversion failed: {err}"))

View File

@@ -110,7 +110,20 @@ impl Leadership {
) -> Option<GlobalObservedState> { ) -> Option<GlobalObservedState> {
tracing::info!("Sending step down request to {leader:?}"); tracing::info!("Sending step down request to {leader:?}");
let mut http_client = reqwest::Client::builder();
for cert in &self.config.ssl_ca_certs {
http_client = http_client.add_root_certificate(cert.clone());
}
let http_client = match http_client.build() {
Ok(http_client) => http_client,
Err(err) => {
tracing::error!("Failed to build client for leader step-down request: {err}");
return None;
}
};
let client = PeerClient::new( let client = PeerClient::new(
http_client,
Uri::try_from(leader.address.as_str()).expect("Failed to build leader URI"), Uri::try_from(leader.address.as_str()).expect("Failed to build leader URI"),
self.config.peer_jwt_token.clone(), self.config.peer_jwt_token.clone(),
); );

View File

@@ -59,11 +59,11 @@ impl ResponseErrorMessageExt for reqwest::Response {
pub(crate) struct GlobalObservedState(pub(crate) HashMap<TenantShardId, ObservedState>); pub(crate) struct GlobalObservedState(pub(crate) HashMap<TenantShardId, ObservedState>);
impl PeerClient { impl PeerClient {
pub(crate) fn new(uri: Uri, jwt: Option<String>) -> Self { pub(crate) fn new(http_client: reqwest::Client, uri: Uri, jwt: Option<String>) -> Self {
Self { Self {
uri, uri,
jwt, jwt,
client: reqwest::Client::new(), client: http_client,
} }
} }

View File

@@ -1711,7 +1711,7 @@ impl Service {
))), ))),
config: config.clone(), config: config.clone(),
persistence, persistence,
compute_hook: Arc::new(ComputeHook::new(config.clone())), compute_hook: Arc::new(ComputeHook::new(config.clone())?),
result_tx, result_tx,
heartbeater_ps, heartbeater_ps,
heartbeater_sk, heartbeater_sk,

View File

@@ -295,8 +295,8 @@ pub struct ControllerClientConfig {
} }
impl ControllerClientConfig { impl ControllerClientConfig {
pub fn build_client(self) -> control_api::Client { pub fn build_client(self, http_client: reqwest::Client) -> control_api::Client {
control_api::Client::new(self.controller_api, Some(self.controller_jwt)) control_api::Client::new(http_client, self.controller_api, Some(self.controller_jwt))
} }
} }

View File

@@ -3,7 +3,7 @@ use camino::Utf8PathBuf;
use clap::{Parser, Subcommand}; use clap::{Parser, Subcommand};
use pageserver_api::controller_api::{MetadataHealthUpdateRequest, MetadataHealthUpdateResponse}; use pageserver_api::controller_api::{MetadataHealthUpdateRequest, MetadataHealthUpdateResponse};
use pageserver_api::shard::TenantShardId; use pageserver_api::shard::TenantShardId;
use reqwest::{Method, Url}; use reqwest::{Certificate, Method, Url};
use storage_controller_client::control_api; use storage_controller_client::control_api;
use storage_scrubber::garbage::{PurgeMode, find_garbage, purge_garbage}; use storage_scrubber::garbage::{PurgeMode, find_garbage, purge_garbage};
use storage_scrubber::pageserver_physical_gc::{GcMode, pageserver_physical_gc}; use storage_scrubber::pageserver_physical_gc::{GcMode, pageserver_physical_gc};
@@ -41,6 +41,10 @@ struct Cli {
/// If set to true, the scrubber will exit with error code on fatal error. /// If set to true, the scrubber will exit with error code on fatal error.
#[arg(long, default_value_t = false)] #[arg(long, default_value_t = false)]
exit_code: bool, exit_code: bool,
/// Trusted root CA certificates to use in https APIs.
#[arg(long)]
ssl_ca_file: Option<Utf8PathBuf>,
} }
#[derive(Subcommand, Debug)] #[derive(Subcommand, Debug)]
@@ -146,13 +150,28 @@ async fn main() -> anyhow::Result<()> {
tracing::info!("version: {}, build_tag {}", GIT_VERSION, BUILD_TAG); tracing::info!("version: {}, build_tag {}", GIT_VERSION, BUILD_TAG);
let ssl_ca_certs = match cli.ssl_ca_file.as_ref() {
Some(ssl_ca_file) => {
tracing::info!("Using ssl root CA file: {ssl_ca_file:?}");
let buf = tokio::fs::read(ssl_ca_file).await?;
Certificate::from_pem_bundle(&buf)?
}
None => Vec::new(),
};
let mut http_client = reqwest::Client::builder();
for cert in ssl_ca_certs {
http_client = http_client.add_root_certificate(cert);
}
let http_client = http_client.build()?;
let controller_client = cli.controller_api.map(|controller_api| { let controller_client = cli.controller_api.map(|controller_api| {
ControllerClientConfig { ControllerClientConfig {
controller_api, controller_api,
// Default to no key: this is a convenience when working in a development environment // Default to no key: this is a convenience when working in a development environment
controller_jwt: cli.controller_jwt.unwrap_or("".to_owned()), controller_jwt: cli.controller_jwt.unwrap_or("".to_owned()),
} }
.build_client() .build_client(http_client)
}); });
match cli.command { match cli.command {

View File

@@ -1318,6 +1318,7 @@ class NeonEnv:
"http_port": port.http, "http_port": port.http,
"https_port": port.https, "https_port": port.https,
"sync": config.safekeepers_enable_fsync, "sync": config.safekeepers_enable_fsync,
"use_https_safekeeper_api": config.use_https_safekeeper_api,
} }
if config.auth_enabled: if config.auth_enabled:
sk_cfg["auth_enabled"] = True sk_cfg["auth_enabled"] = True