feat(compute_ctl): use TLS if configured (#10972)

Closes: https://github.com/neondatabase/cloud/issues/22998

If control-plane reports that TLS should be used, load the certificates
(and watch for updates), make sure postgres use them, and detects
updates.

Procedure:
1. Load certificates
2. Reconfigure postgres/pgbouncer
3. Loop on a timer until certificates have loaded
4. Go to 1

Notes:
1. We only run this procedure if requested on startup by control plane.
2. We needed to compile pgbouncer with openssl enabled
3. Postgres doesn't allow tls keys to be globally accessible - must be
read only to the postgres user. I couldn't convince the autoscaling team
to let me put this logic into the VM settings, so instead compute_ctl
will copy the keys to be read-only by postgres.
4. To mitigate a race condition, we also verify that the key matches the
cert.
This commit is contained in:
Conrad Ludgate
2025-03-13 15:03:22 +00:00
committed by GitHub
parent b2286f5bcb
commit 3dec117572
24 changed files with 427 additions and 87 deletions

View File

@@ -26,6 +26,7 @@ fail.workspace = true
flate2.workspace = true
futures.workspace = true
http.workspace = true
indexmap.workspace = true
jsonwebtoken.workspace = true
metrics.workspace = true
nix.workspace = true
@@ -34,16 +35,19 @@ num_cpus.workspace = true
once_cell.workspace = true
opentelemetry.workspace = true
opentelemetry_sdk.workspace = true
p256 = { version = "0.13", features = ["pem"] }
postgres.workspace = true
regex.workspace = true
reqwest = { workspace = true, features = ["json"] }
ring = "0.17"
serde.workspace = true
serde_with.workspace = true
serde_json.workspace = true
signal-hook.workspace = true
spki = { version = "0.7.3", features = ["std"] }
tar.workspace = true
tower.workspace = true
tower-http.workspace = true
reqwest = { workspace = true, features = ["json"] }
tokio = { workspace = true, features = ["rt", "rt-multi-thread"] }
tokio-postgres.workspace = true
tokio-util.workspace = true
@@ -57,6 +61,7 @@ thiserror.workspace = true
url.workspace = true
uuid.workspace = true
walkdir.workspace = true
x509-cert = { version = "0.2.5" }
postgres_initdb.workspace = true
compute_api.workspace = true

View File

@@ -41,6 +41,7 @@ use crate::rsyslog::configure_audit_rsyslog;
use crate::spec::*;
use crate::swap::resize_swap;
use crate::sync_sk::{check_if_synced, ping_safekeeper};
use crate::tls::watch_cert_for_changes;
use crate::{config, extension_server, local_proxy};
pub static SYNC_SAFEKEEPERS_PID: AtomicU32 = AtomicU32::new(0);
@@ -112,6 +113,7 @@ pub struct ComputeNode {
// key: ext_archive_name, value: started download time, download_completed?
pub ext_download_progress: RwLock<HashMap<String, (DateTime<Utc>, bool)>>,
pub compute_ctl_config: ComputeCtlConfig,
}
// store some metrics about download size that might impact startup time
@@ -135,8 +137,6 @@ pub struct ComputeState {
/// passed by the control plane with a /configure HTTP request.
pub pspec: Option<ParsedSpec>,
pub compute_ctl_config: ComputeCtlConfig,
/// If the spec is passed by a /configure request, 'startup_span' is the
/// /configure request's tracing span. The main thread enters it when it
/// processes the compute startup, so that the compute startup is considered
@@ -160,7 +160,6 @@ impl ComputeState {
last_active: None,
error: None,
pspec: None,
compute_ctl_config: ComputeCtlConfig::default(),
startup_span: None,
metrics: ComputeMetrics::default(),
}
@@ -314,7 +313,6 @@ impl ComputeNode {
let pspec = ParsedSpec::try_from(cli_spec).map_err(|msg| anyhow::anyhow!(msg))?;
new_state.pspec = Some(pspec);
}
new_state.compute_ctl_config = compute_ctl_config;
Ok(ComputeNode {
params,
@@ -323,6 +321,7 @@ impl ComputeNode {
state: Mutex::new(new_state),
state_changed: Condvar::new(),
ext_download_progress: RwLock::new(HashMap::new()),
compute_ctl_config,
})
}
@@ -345,7 +344,7 @@ impl ComputeNode {
// requests while configuration is still in progress.
crate::http::server::Server::External {
port: this.params.external_http_port,
jwks: this.state.lock().unwrap().compute_ctl_config.jwks.clone(),
config: this.compute_ctl_config.clone(),
compute_id: this.params.compute_id.clone(),
}
.launch(&this);
@@ -524,6 +523,16 @@ impl ComputeNode {
// Collect all the tasks that must finish here
let mut pre_tasks = tokio::task::JoinSet::new();
// Make sure TLS certificates are properly loaded and in the right place.
if self.compute_ctl_config.tls.is_some() {
let this = self.clone();
pre_tasks.spawn(async move {
this.watch_cert_for_changes().await;
Ok::<(), anyhow::Error>(())
});
}
// If there are any remote extensions in shared_preload_libraries, start downloading them
if pspec.spec.remote_extensions.is_some() {
let (this, spec) = (self.clone(), pspec.spec.clone());
@@ -579,11 +588,13 @@ impl ComputeNode {
if let Some(pgbouncer_settings) = &pspec.spec.pgbouncer_settings {
info!("tuning pgbouncer");
let pgbouncer_settings = pgbouncer_settings.clone();
let tls_config = self.compute_ctl_config.tls.clone();
// Spawn a background task to do the tuning,
// so that we don't block the main thread that starts Postgres.
let pgbouncer_settings = pgbouncer_settings.clone();
let _handle = tokio::spawn(async move {
let res = tune_pgbouncer(pgbouncer_settings).await;
let res = tune_pgbouncer(pgbouncer_settings, tls_config).await;
if let Err(err) = res {
error!("error while tuning pgbouncer: {err:?}");
// Continue with the startup anyway
@@ -1105,9 +1116,10 @@ impl ComputeNode {
// Remove/create an empty pgdata directory and put configuration there.
self.create_pgdata()?;
config::write_postgres_conf(
&pgdata_path.join("postgresql.conf"),
pgdata_path,
&pspec.spec,
self.params.internal_http_port,
&self.compute_ctl_config.tls,
)?;
// Syncing safekeepers is only safe with primary nodes: if a primary
@@ -1489,11 +1501,13 @@ impl ComputeNode {
if let Some(ref pgbouncer_settings) = spec.pgbouncer_settings {
info!("tuning pgbouncer");
let pgbouncer_settings = pgbouncer_settings.clone();
let tls_config = self.compute_ctl_config.tls.clone();
// Spawn a background task to do the tuning,
// so that we don't block the main thread that starts Postgres.
let pgbouncer_settings = pgbouncer_settings.clone();
tokio::spawn(async move {
let res = tune_pgbouncer(pgbouncer_settings).await;
let res = tune_pgbouncer(pgbouncer_settings, tls_config).await;
if let Err(err) = res {
error!("error while tuning pgbouncer: {err:?}");
}
@@ -1505,7 +1519,8 @@ impl ComputeNode {
// Spawn a background task to do the configuration,
// so that we don't block the main thread that starts Postgres.
let local_proxy = local_proxy.clone();
let mut local_proxy = local_proxy.clone();
local_proxy.tls = self.compute_ctl_config.tls.clone();
tokio::spawn(async move {
if let Err(err) = local_proxy::configure(&local_proxy) {
error!("error while configuring local_proxy: {err:?}");
@@ -1515,8 +1530,12 @@ impl ComputeNode {
// Write new config
let pgdata_path = Path::new(&self.params.pgdata);
let postgresql_conf_path = pgdata_path.join("postgresql.conf");
config::write_postgres_conf(&postgresql_conf_path, &spec, self.params.internal_http_port)?;
config::write_postgres_conf(
pgdata_path,
&spec,
self.params.internal_http_port,
&self.compute_ctl_config.tls,
)?;
if !spec.skip_pg_catalog_updates {
let max_concurrent_connections = spec.reconfigure_concurrency;
@@ -1587,6 +1606,56 @@ impl ComputeNode {
Ok(())
}
pub async fn watch_cert_for_changes(self: Arc<Self>) {
// update status on cert renewal
if let Some(tls_config) = &self.compute_ctl_config.tls {
let tls_config = tls_config.clone();
// wait until the cert exists.
let mut cert_watch = watch_cert_for_changes(tls_config.cert_path.clone()).await;
tokio::task::spawn_blocking(move || {
let handle = tokio::runtime::Handle::current();
'cert_update: loop {
// let postgres/pgbouncer/local_proxy know the new cert/key exists.
// we need to wait until it's configurable first.
let mut state = self.state.lock().unwrap();
'status_update: loop {
match state.status {
// let's update the state to config pending
ComputeStatus::ConfigurationPending | ComputeStatus::Running => {
state.set_status(
ComputeStatus::ConfigurationPending,
&self.state_changed,
);
break 'status_update;
}
// exit loop
ComputeStatus::Failed
| ComputeStatus::TerminationPending
| ComputeStatus::Terminated => break 'cert_update,
// wait
ComputeStatus::Init
| ComputeStatus::Configuration
| ComputeStatus::Empty => {
state = self.state_changed.wait(state).unwrap();
}
}
}
drop(state);
// wait for a new certificate update
if handle.block_on(cert_watch.changed()).is_err() {
break;
}
}
});
}
}
/// Update the `last_active` in the shared state, but ensure that it's a more recent one.
pub fn update_last_active(&self, last_active: Option<DateTime<Utc>>) {
let mut state = self.state.lock().unwrap();

View File

@@ -6,11 +6,13 @@ use std::io::Write;
use std::io::prelude::*;
use std::path::Path;
use compute_api::responses::TlsConfig;
use compute_api::spec::{ComputeAudit, ComputeMode, ComputeSpec, GenericOption};
use crate::pg_helpers::{
GenericOptionExt, GenericOptionsSearch, PgOptionsSerialize, escape_conf_value,
};
use crate::tls::{self, SERVER_CRT, SERVER_KEY};
/// Check that `line` is inside a text file and put it there if it is not.
/// Create file if it doesn't exist.
@@ -38,10 +40,12 @@ pub fn line_in_file(path: &Path, line: &str) -> Result<bool> {
/// Create or completely rewrite configuration file specified by `path`
pub fn write_postgres_conf(
path: &Path,
pgdata_path: &Path,
spec: &ComputeSpec,
extension_server_port: u16,
tls_config: &Option<TlsConfig>,
) -> Result<()> {
let path = pgdata_path.join("postgresql.conf");
// File::create() destroys the file content if it exists.
let mut file = File::create(path)?;
@@ -86,6 +90,20 @@ pub fn write_postgres_conf(
)?;
}
// tls
if let Some(tls_config) = tls_config {
writeln!(file, "ssl = on")?;
// postgres requires the keyfile to be in a secure file,
// currently too complicated to ensure that at the VM level,
// so we just copy them to another file instead. :shrug:
tls::update_key_path_blocking(pgdata_path, tls_config);
// these are the default, but good to be explicit.
writeln!(file, "ssl_cert_file = '{}'", SERVER_CRT)?;
writeln!(file, "ssl_key_file = '{}'", SERVER_KEY)?;
}
// Locales
if cfg!(target_os = "macos") {
writeln!(file, "lc_messages='C'")?;

View File

@@ -8,8 +8,8 @@ use axum::Router;
use axum::middleware::{self};
use axum::response::IntoResponse;
use axum::routing::{get, post};
use compute_api::responses::ComputeCtlConfig;
use http::StatusCode;
use jsonwebtoken::jwk::JwkSet;
use tokio::net::TcpListener;
use tower::ServiceBuilder;
use tower_http::{
@@ -41,7 +41,7 @@ pub enum Server {
},
External {
port: u16,
jwks: JwkSet,
config: ComputeCtlConfig,
compute_id: String,
},
}
@@ -79,7 +79,7 @@ impl From<&Server> for Router<Arc<ComputeNode>> {
router
}
Server::External {
jwks, compute_id, ..
config, compute_id, ..
} => {
let unauthenticated_router =
Router::<Arc<ComputeNode>>::new().route("/metrics", get(metrics::get_metrics));
@@ -95,7 +95,7 @@ impl From<&Server> for Router<Arc<ComputeNode>> {
.route("/terminate", post(terminate::terminate))
.layer(AsyncRequireAuthorizationLayer::new(Authorize::new(
compute_id.clone(),
jwks.clone(),
config.jwks.clone(),
)));
router

View File

@@ -26,3 +26,4 @@ pub mod spec;
mod spec_apply;
pub mod swap;
pub mod sync_sk;
pub mod tls;

View File

@@ -10,8 +10,10 @@ use std::str::FromStr;
use std::time::{Duration, Instant};
use anyhow::{Result, bail};
use compute_api::responses::TlsConfig;
use compute_api::spec::{Database, GenericOption, GenericOptions, PgIdent, Role};
use futures::StreamExt;
use indexmap::IndexMap;
use ini::Ini;
use notify::{RecursiveMode, Watcher};
use postgres::config::Config;
@@ -406,7 +408,7 @@ pub fn create_pgdata(pgdata: &str) -> Result<()> {
/// Update pgbouncer.ini with provided options
fn update_pgbouncer_ini(
pgbouncer_config: HashMap<String, String>,
pgbouncer_config: IndexMap<String, String>,
pgbouncer_ini_path: &str,
) -> Result<()> {
let mut conf = Ini::load_from_file(pgbouncer_ini_path)?;
@@ -427,7 +429,10 @@ fn update_pgbouncer_ini(
/// Tune pgbouncer.
/// 1. Apply new config using pgbouncer admin console
/// 2. Add new values to pgbouncer.ini to preserve them after restart
pub async fn tune_pgbouncer(pgbouncer_config: HashMap<String, String>) -> Result<()> {
pub async fn tune_pgbouncer(
mut pgbouncer_config: IndexMap<String, String>,
tls_config: Option<TlsConfig>,
) -> Result<()> {
let pgbouncer_connstr = if std::env::var_os("AUTOSCALING").is_some() {
// for VMs use pgbouncer specific way to connect to
// pgbouncer admin console without password
@@ -473,19 +478,21 @@ pub async fn tune_pgbouncer(pgbouncer_config: HashMap<String, String>) -> Result
}
};
// Apply new config
for (option_name, value) in pgbouncer_config.iter() {
let query = format!("SET {}={}", option_name, value);
// keep this log line for debugging purposes
info!("Applying pgbouncer setting change: {}", query);
if let Some(tls_config) = tls_config {
// pgbouncer starts in a half-ok state if it cannot find these files.
// It will default to client_tls_sslmode=deny, which causes proxy to error.
// There is a small window at startup where these files don't yet exist in the VM.
// Best to wait until it exists.
loop {
if let Ok(true) = tokio::fs::try_exists(&tls_config.key_path).await {
break;
}
tokio::time::sleep(Duration::from_millis(500)).await
}
if let Err(err) = client.simple_query(&query).await {
// Don't fail on error, just print it into log
error!(
"Failed to apply pgbouncer setting change: {}, {}",
query, err
);
};
pgbouncer_config.insert("client_tls_cert_file".to_string(), tls_config.cert_path);
pgbouncer_config.insert("client_tls_key_file".to_string(), tls_config.key_path);
pgbouncer_config.insert("client_tls_sslmode".to_string(), "allow".to_string());
}
// save values to pgbouncer.ini
@@ -501,6 +508,13 @@ pub async fn tune_pgbouncer(pgbouncer_config: HashMap<String, String>) -> Result
};
update_pgbouncer_ini(pgbouncer_config, &pgbouncer_ini_path)?;
info!("Applying pgbouncer setting change");
if let Err(err) = client.simple_query("RELOAD").await {
// Don't fail on error, just print it into log
error!("Failed to apply pgbouncer setting change, {err}",);
};
Ok(())
}

118
compute_tools/src/tls.rs Normal file
View File

@@ -0,0 +1,118 @@
use std::{io::Write, os::unix::fs::OpenOptionsExt, path::Path, time::Duration};
use anyhow::{Context, Result, bail};
use compute_api::responses::TlsConfig;
use ring::digest;
use spki::ObjectIdentifier;
use spki::der::{Decode, PemReader};
use x509_cert::Certificate;
#[derive(Clone, Copy)]
pub struct CertDigest(digest::Digest);
pub async fn watch_cert_for_changes(cert_path: String) -> tokio::sync::watch::Receiver<CertDigest> {
let mut digest = compute_digest(&cert_path).await;
let (tx, rx) = tokio::sync::watch::channel(digest);
tokio::spawn(async move {
while !tx.is_closed() {
let new_digest = compute_digest(&cert_path).await;
if digest.0.as_ref() != new_digest.0.as_ref() {
digest = new_digest;
_ = tx.send(digest);
}
tokio::time::sleep(Duration::from_secs(60)).await
}
});
rx
}
async fn compute_digest(cert_path: &str) -> CertDigest {
loop {
match try_compute_digest(cert_path).await {
Ok(d) => break d,
Err(e) => {
tracing::error!("could not read cert file {e:?}");
tokio::time::sleep(Duration::from_secs(1)).await
}
}
}
}
async fn try_compute_digest(cert_path: &str) -> Result<CertDigest> {
let data = tokio::fs::read(cert_path).await?;
// sha256 is extremely collision resistent. can safely assume the digest to be unique
Ok(CertDigest(digest::digest(&digest::SHA256, &data)))
}
pub const SERVER_CRT: &str = "server.crt";
pub const SERVER_KEY: &str = "server.key";
pub fn update_key_path_blocking(pg_data: &Path, tls_config: &TlsConfig) {
loop {
match try_update_key_path_blocking(pg_data, tls_config) {
Ok(()) => break,
Err(e) => {
tracing::error!("could not create key file {e:?}");
std::thread::sleep(Duration::from_secs(1))
}
}
}
}
// Postgres requires the keypath be "secure". This means
// 1. Owned by the postgres user.
// 2. Have permission 600.
fn try_update_key_path_blocking(pg_data: &Path, tls_config: &TlsConfig) -> Result<()> {
let key = std::fs::read_to_string(&tls_config.key_path)?;
let crt = std::fs::read_to_string(&tls_config.cert_path)?;
// to mitigate a race condition during renewal.
verify_key_cert(&key, &crt)?;
let mut key_file = std::fs::OpenOptions::new()
.write(true)
.create(true)
.truncate(true)
.mode(0o600)
.open(pg_data.join(SERVER_KEY))?;
let mut crt_file = std::fs::OpenOptions::new()
.write(true)
.create(true)
.truncate(true)
.mode(0o600)
.open(pg_data.join(SERVER_CRT))?;
key_file.write_all(key.as_bytes())?;
crt_file.write_all(crt.as_bytes())?;
Ok(())
}
fn verify_key_cert(key: &str, cert: &str) -> Result<()> {
const ECDSA_WITH_SHA256: ObjectIdentifier = ObjectIdentifier::new_unwrap("1.2.840.10045.4.3.2");
let cert = Certificate::decode(&mut PemReader::new(cert.as_bytes()).context("pem reader")?)
.context("decode cert")?;
match cert.signature_algorithm.oid {
ECDSA_WITH_SHA256 => {
let key = p256::SecretKey::from_sec1_pem(key).context("parse key")?;
let a = key.public_key().to_sec1_bytes();
let b = cert
.tbs_certificate
.subject_public_key_info
.subject_public_key
.raw_bytes();
if *a != *b {
bail!("private key file does not match certificate")
}
}
_ => bail!("unknown TLS key type"),
}
Ok(())
}