storcon: add https API (#11239)

## Problem

Pageservers use unencrypted HTTP requests for storage controller API.

- Closes: https://github.com/neondatabase/cloud/issues/25524

## Summary of changes

- Replace hyper0::server::Server with http_utils::server::Server in
storage controller.
- Add HTTPS handler for storage controller API.
- Support `ssl_ca_file` in pageserver.
This commit is contained in:
Dmitrii Kovalkov
2025-03-20 12:22:02 +04:00
committed by GitHub
parent c6f5a58d3b
commit 9bf59989db
14 changed files with 234 additions and 112 deletions

View File

@@ -4,7 +4,9 @@ use std::sync::Arc;
use std::time::Duration;
use anyhow::{Context, anyhow};
use camino::Utf8PathBuf;
use clap::Parser;
use futures::future::OptionFuture;
use hyper0::Uri;
use metrics::BuildInfo;
use metrics::launch_timestamp::LaunchTimestamp;
@@ -39,13 +41,28 @@ static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
#[unsafe(export_name = "malloc_conf")]
pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:21\0";
const DEFAULT_SSL_KEY_FILE: &str = "server.key";
const DEFAULT_SSL_CERT_FILE: &str = "server.crt";
#[derive(Parser)]
#[command(author, version, about, long_about = None)]
#[command(arg_required_else_help(true))]
#[clap(group(
clap::ArgGroup::new("listen-addresses")
.required(true)
.multiple(true)
.args(&["listen", "listen_https"]),
))]
struct Cli {
/// Host and port to listen on, like `127.0.0.1:1234`
/// Host and port to listen HTTP on, like `127.0.0.1:1234`.
/// At least one of ["listen", "listen_https"] should be specified.
// TODO: Make this option dev-only when https is out everywhere.
#[arg(short, long)]
listen: std::net::SocketAddr,
listen: Option<std::net::SocketAddr>,
/// Host and port to listen HTTPS on, like `127.0.0.1:1234`.
/// At least one of ["listen", "listen_https"] should be specified.
#[arg(long)]
listen_https: Option<std::net::SocketAddr>,
/// Public key for JWT authentication of clients
#[arg(long)]
@@ -157,6 +174,12 @@ struct Cli {
#[arg(long, default_value = "false")]
use_https_safekeeper_api: bool,
/// Path to a file with certificate's private key for https API.
#[arg(long, default_value = DEFAULT_SSL_KEY_FILE)]
ssl_key_file: Utf8PathBuf,
/// Path to a file with a X509 certificate for https API.
#[arg(long, default_value = DEFAULT_SSL_CERT_FILE)]
ssl_cert_file: Utf8PathBuf,
/// Trusted root CA certificate to use in https APIs.
#[arg(long)]
ssl_ca_file: Option<PathBuf>,
@@ -283,11 +306,10 @@ async fn async_main() -> anyhow::Result<()> {
let args = Cli::parse();
tracing::info!(
"version: {}, launch_timestamp: {}, build_tag {}, listening on {}",
"version: {}, launch_timestamp: {}, build_tag {}",
GIT_VERSION,
launch_ts.to_string(),
BUILD_TAG,
args.listen
);
let build_info = BuildInfo {
@@ -378,7 +400,6 @@ async fn async_main() -> anyhow::Result<()> {
.unwrap_or(LONG_RECONCILE_THRESHOLD_DEFAULT),
address_for_peers: args.address_for_peers,
start_as_candidate: args.start_as_candidate,
http_service_port: args.listen.port() as i32,
use_https_pageserver_api: args.use_https_pageserver_api,
use_https_safekeeper_api: args.use_https_safekeeper_api,
ssl_ca_cert,
@@ -392,28 +413,52 @@ async fn async_main() -> anyhow::Result<()> {
let service = Service::spawn(config, persistence.clone()).await?;
let http_listener = tcp_listener::bind(args.listen)?;
let auth = secrets
.public_key
.map(|jwt_auth| Arc::new(SwappableJwtAuth::new(jwt_auth)));
let router = make_router(service.clone(), auth, build_info)
.build()
.map_err(|err| anyhow!(err))?;
let router_service = http_utils::RouterService::new(router).unwrap();
let http_service =
Arc::new(http_utils::RequestServiceBuilder::new(router).map_err(|err| anyhow!(err))?);
let api_shutdown = CancellationToken::new();
// Start HTTP server
let server_shutdown = CancellationToken::new();
let server = hyper0::Server::from_tcp(http_listener)?
.serve(router_service)
.with_graceful_shutdown({
let server_shutdown = server_shutdown.clone();
async move {
server_shutdown.cancelled().await;
}
});
tracing::info!("Serving on {0}", args.listen);
let server_task = tokio::task::spawn(server);
let http_server_task: OptionFuture<_> = match args.listen {
Some(http_addr) => {
let http_listener = tcp_listener::bind(http_addr)?;
let http_server =
http_utils::server::Server::new(Arc::clone(&http_service), http_listener, None)?;
tracing::info!("Serving HTTP on {}", http_addr);
Some(tokio::task::spawn(http_server.serve(api_shutdown.clone())))
}
None => None,
}
.into();
// Start HTTPS server
let https_server_task: OptionFuture<_> = match args.listen_https {
Some(https_addr) => {
let https_listener = tcp_listener::bind(https_addr)?;
let certs = http_utils::tls_certs::load_cert_chain(args.ssl_cert_file.as_path())?;
let key = http_utils::tls_certs::load_private_key(args.ssl_key_file.as_path())?;
let server_config = rustls::ServerConfig::builder()
.with_no_client_auth()
.with_single_cert(certs, key)?;
let tls_acceptor = tokio_rustls::TlsAcceptor::from(Arc::new(server_config));
let https_server =
http_utils::server::Server::new(http_service, https_listener, Some(tls_acceptor))?;
tracing::info!("Serving HTTPS on {}", https_addr);
Some(tokio::task::spawn(https_server.serve(api_shutdown.clone())))
}
None => None,
}
.into();
let chaos_task = args.chaos_interval.map(|interval| {
let service = service.clone();
@@ -437,28 +482,41 @@ async fn async_main() -> anyhow::Result<()> {
let mut sigint = tokio::signal::unix::signal(SignalKind::interrupt())?;
let mut sigquit = tokio::signal::unix::signal(SignalKind::quit())?;
let mut sigterm = tokio::signal::unix::signal(SignalKind::terminate())?;
tokio::pin!(http_server_task, https_server_task);
tokio::select! {
_ = sigint.recv() => {},
_ = sigterm.recv() => {},
_ = sigquit.recv() => {},
Some(err) = &mut http_server_task => {
panic!("HTTP server task failed: {err:#?}");
}
Some(err) = &mut https_server_task => {
panic!("HTTPS server task failed: {err:#?}");
}
}
tracing::info!("Terminating on signal");
// Stop HTTP server first, so that we don't have to service requests
// Stop HTTP and HTTPS servers first, so that we don't have to service requests
// while shutting down Service.
server_shutdown.cancel();
match tokio::time::timeout(Duration::from_secs(5), server_task).await {
Ok(Ok(_)) => {
tracing::info!("Joined HTTP server task");
}
Ok(Err(e)) => {
tracing::error!("Error joining HTTP server task: {e}")
}
Err(_) => {
tracing::warn!("Timed out joining HTTP server task");
// We will fall through and shut down the service anyway, any request handlers
// in flight will experience cancellation & their clients will see a torn connection.
}
api_shutdown.cancel();
// If the deadline is exceeded, we will fall through and shut down the service anyway,
// any request handlers in flight will experience cancellation & their clients will
// see a torn connection.
let deadline = tokio::time::Instant::now() + Duration::from_secs(5);
match tokio::time::timeout_at(deadline, http_server_task).await {
Ok(Some(Ok(_))) => tracing::info!("Joined HTTP server task"),
Ok(Some(Err(e))) => tracing::error!("Error joining HTTP server task: {e}"),
Ok(None) => {} // HTTP is disabled.
Err(_) => tracing::warn!("Timed out joining HTTP server task"),
}
match tokio::time::timeout_at(deadline, https_server_task).await {
Ok(Some(Ok(_))) => tracing::info!("Joined HTTPS server task"),
Ok(Some(Err(e))) => tracing::error!("Error joining HTTPS server task: {e}"),
Ok(None) => {} // HTTPS is disabled.
Err(_) => tracing::warn!("Timed out joining HTTPS server task"),
}
// If we were injecting chaos, stop that so that we're not calling into Service while it shuts down

View File

@@ -412,8 +412,6 @@ pub struct Config {
pub start_as_candidate: bool,
pub http_service_port: i32,
pub long_reconcile_threshold: Duration,
pub use_https_pageserver_api: bool,