storcon + safekeeper + scrubber: propagate root CA certs everywhere (#11418)

## Problem
There are some places in the code where we create `reqwest::Client`
without providing SSL CA certs from `ssl_ca_file`. These will break
after we enable TLS everywhere.
- Part of https://github.com/neondatabase/cloud/issues/22686

## Summary of changes
- Support `ssl_ca_file` in storage scrubber.
- Add `use_https_safekeeper_api` option to safekeeper to use https for
peer requests.
- Propagate SSL CA certs to storage_controller/client, storcon's
ComputeHook, PeerClient and maybe_forward.
This commit is contained in:
Dmitrii Kovalkov
2025-04-04 10:30:48 +04:00
committed by GitHub
parent 497116b76d
commit 181af302b5
21 changed files with 121 additions and 39 deletions

View File

@@ -4,6 +4,7 @@ use std::error::Error as _;
use std::sync::Arc;
use std::time::Duration;
use anyhow::Context;
use control_plane::endpoint::{ComputeControlPlane, EndpointStatus};
use control_plane::local_env::LocalEnv;
use futures::StreamExt;
@@ -364,25 +365,28 @@ pub(crate) struct ShardUpdate<'a> {
}
impl ComputeHook {
pub(super) fn new(config: Config) -> Self {
pub(super) fn new(config: Config) -> anyhow::Result<Self> {
let authorization_header = config
.control_plane_jwt_token
.clone()
.map(|jwt| format!("Bearer {}", jwt));
let client = reqwest::ClientBuilder::new()
.timeout(NOTIFY_REQUEST_TIMEOUT)
let mut client = reqwest::ClientBuilder::new().timeout(NOTIFY_REQUEST_TIMEOUT);
for cert in &config.ssl_ca_certs {
client = client.add_root_certificate(cert.clone());
}
let client = client
.build()
.expect("Failed to construct HTTP client");
.context("Failed to build http client for compute hook")?;
Self {
Ok(Self {
state: Default::default(),
config,
authorization_header,
neon_local_lock: Default::default(),
api_concurrency: tokio::sync::Semaphore::new(API_CONCURRENCY),
client,
}
})
}
/// For test environments: use neon_local's LocalEnv to update compute

View File

@@ -1744,19 +1744,17 @@ async fn maybe_forward(req: Request<Body>) -> ForwardOutcome {
// Use [`RECONCILE_TIMEOUT`] as the max amount of time a request should block for and
// include some leeway to get the timeout for proxied requests.
const PROXIED_REQUEST_TIMEOUT: Duration = Duration::from_secs(RECONCILE_TIMEOUT.as_secs() + 10);
let client = reqwest::ClientBuilder::new()
.timeout(PROXIED_REQUEST_TIMEOUT)
.build();
let client = match client {
Ok(client) => client,
Err(err) => {
return ForwardOutcome::Forwarded(Err(ApiError::InternalServerError(anyhow::anyhow!(
"Failed to build leader client for forwarding while in stepped down state: {err}"
))));
}
};
let request: reqwest::Request = match convert_request(req, &client, leader.address).await {
let client = state.service.get_http_client().clone();
let request: reqwest::Request = match convert_request(
req,
&client,
leader.address,
PROXIED_REQUEST_TIMEOUT,
)
.await
{
Ok(r) => r,
Err(err) => {
return ForwardOutcome::Forwarded(Err(ApiError::InternalServerError(anyhow::anyhow!(
@@ -1814,6 +1812,7 @@ async fn convert_request(
req: hyper::Request<Body>,
client: &reqwest::Client,
to_address: String,
timeout: Duration,
) -> Result<reqwest::Request, ApiError> {
use std::str::FromStr;
@@ -1868,6 +1867,7 @@ async fn convert_request(
.request(method, uri)
.headers(headers)
.body(body)
.timeout(timeout)
.build()
.map_err(|err| {
ApiError::InternalServerError(anyhow::anyhow!("Request conversion failed: {err}"))

View File

@@ -110,7 +110,20 @@ impl Leadership {
) -> Option<GlobalObservedState> {
tracing::info!("Sending step down request to {leader:?}");
let mut http_client = reqwest::Client::builder();
for cert in &self.config.ssl_ca_certs {
http_client = http_client.add_root_certificate(cert.clone());
}
let http_client = match http_client.build() {
Ok(http_client) => http_client,
Err(err) => {
tracing::error!("Failed to build client for leader step-down request: {err}");
return None;
}
};
let client = PeerClient::new(
http_client,
Uri::try_from(leader.address.as_str()).expect("Failed to build leader URI"),
self.config.peer_jwt_token.clone(),
);

View File

@@ -59,11 +59,11 @@ impl ResponseErrorMessageExt for reqwest::Response {
pub(crate) struct GlobalObservedState(pub(crate) HashMap<TenantShardId, ObservedState>);
impl PeerClient {
pub(crate) fn new(uri: Uri, jwt: Option<String>) -> Self {
pub(crate) fn new(http_client: reqwest::Client, uri: Uri, jwt: Option<String>) -> Self {
Self {
uri,
jwt,
client: reqwest::Client::new(),
client: http_client,
}
}

View File

@@ -1711,7 +1711,7 @@ impl Service {
))),
config: config.clone(),
persistence,
compute_hook: Arc::new(ComputeHook::new(config.clone())),
compute_hook: Arc::new(ComputeHook::new(config.clone())?),
result_tx,
heartbeater_ps,
heartbeater_sk,