mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-10 23:12:54 +00:00
refactor TLS processing. Only use blocking-IO, split out the loading of certificates from the updating of certificates
This commit is contained in:
@@ -57,7 +57,6 @@ use crate::rsyslog::{
|
||||
use crate::spec::*;
|
||||
use crate::swap::resize_swap;
|
||||
use crate::sync_sk::{check_if_synced, ping_safekeeper};
|
||||
use crate::tls::watch_cert_for_changes;
|
||||
use crate::{config, extension_server, local_proxy};
|
||||
|
||||
pub static SYNC_SAFEKEEPERS_PID: AtomicU32 = AtomicU32::new(0);
|
||||
@@ -844,9 +843,8 @@ impl ComputeNode {
|
||||
// Make sure TLS certificates are properly loaded and in the right place.
|
||||
if self.compute_ctl_config.tls.is_some() {
|
||||
let this = self.clone();
|
||||
pre_tasks.spawn(async move {
|
||||
this.watch_cert_for_changes().await;
|
||||
|
||||
pre_tasks.spawn_blocking(|| {
|
||||
this.watch_cert_for_changes();
|
||||
Ok::<(), anyhow::Error>(())
|
||||
});
|
||||
}
|
||||
@@ -2187,16 +2185,20 @@ impl ComputeNode {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn watch_cert_for_changes(self: Arc<Self>) {
|
||||
pub fn watch_cert_for_changes(self: Arc<Self>) {
|
||||
// update status on cert renewal
|
||||
if let Some(tls_config) = &self.compute_ctl_config.tls {
|
||||
let tls_config = tls_config.clone();
|
||||
|
||||
// wait until the cert exists.
|
||||
let mut cert_watch = watch_cert_for_changes(tls_config.cert_path.clone()).await;
|
||||
let mut digest = crate::tls::compute_digest(&tls_config.cert_path);
|
||||
info!(
|
||||
cert_path = tls_config.cert_path,
|
||||
key_path = tls_config.key_path,
|
||||
"TLS certificates found"
|
||||
);
|
||||
|
||||
tokio::task::spawn_blocking(move || {
|
||||
let handle = tokio::runtime::Handle::current();
|
||||
'cert_update: loop {
|
||||
// let postgres/pgbouncer/local_proxy know the new cert/key exists.
|
||||
// we need to wait until it's configurable first.
|
||||
@@ -2233,11 +2235,13 @@ impl ComputeNode {
|
||||
drop(state);
|
||||
|
||||
// wait for a new certificate update
|
||||
if handle.block_on(cert_watch.changed()).is_err() {
|
||||
error!("certificate renewal monitoring task closed unexpectedly");
|
||||
break;
|
||||
}
|
||||
info!("TLS certificates renewed");
|
||||
digest = crate::tls::wait_until_cert_changed(digest, &tls_config.cert_path);
|
||||
|
||||
info!(
|
||||
cert_path = tls_config.cert_path,
|
||||
key_path = tls_config.key_path,
|
||||
"TLS certificates renewed",
|
||||
);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@@ -184,7 +184,8 @@ pub fn write_postgres_conf(
|
||||
// postgres requires the keyfile to be in a secure file,
|
||||
// currently too complicated to ensure that at the VM level,
|
||||
// so we just copy them to another file instead. :shrug:
|
||||
tls::update_key_path_blocking(pgdata_path, tls_config);
|
||||
let keys = tls::load_certs_blocking(tls_config);
|
||||
tls::update_key_path_blocking(pgdata_path, &keys)?;
|
||||
|
||||
// these are the default, but good to be explicit.
|
||||
writeln!(file, "ssl_cert_file = '{SERVER_CRT}'")?;
|
||||
|
||||
@@ -8,37 +8,39 @@ use x509_cert::Certificate;
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct CertDigest(digest::Digest);
|
||||
|
||||
pub async fn watch_cert_for_changes(cert_path: String) -> tokio::sync::watch::Receiver<CertDigest> {
|
||||
let mut digest = compute_digest(&cert_path).await;
|
||||
let (tx, rx) = tokio::sync::watch::channel(digest);
|
||||
tokio::spawn(async move {
|
||||
while !tx.is_closed() {
|
||||
let new_digest = compute_digest(&cert_path).await;
|
||||
if digest.0.as_ref() != new_digest.0.as_ref() {
|
||||
digest = new_digest;
|
||||
_ = tx.send(digest);
|
||||
}
|
||||
|
||||
tokio::time::sleep(Duration::from_secs(60)).await
|
||||
}
|
||||
});
|
||||
rx
|
||||
impl PartialEq for CertDigest {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.0.as_ref() == other.0.as_ref()
|
||||
}
|
||||
}
|
||||
|
||||
async fn compute_digest(cert_path: &str) -> CertDigest {
|
||||
pub fn wait_until_cert_changed(digest: CertDigest, cert_path: &str) -> CertDigest {
|
||||
loop {
|
||||
match try_compute_digest(cert_path).await {
|
||||
let new_digest = compute_digest(cert_path);
|
||||
if digest != new_digest {
|
||||
break new_digest;
|
||||
}
|
||||
|
||||
// Wait a while before checking the certificates.
|
||||
// We renew on a daily basis, so there's no rush.
|
||||
std::thread::sleep(Duration::from_secs(60));
|
||||
}
|
||||
}
|
||||
|
||||
pub fn compute_digest(cert_path: &str) -> CertDigest {
|
||||
loop {
|
||||
match try_compute_digest(cert_path) {
|
||||
Ok(d) => break d,
|
||||
Err(e) => {
|
||||
tracing::error!("could not read cert file {e:?}");
|
||||
tokio::time::sleep(Duration::from_secs(1)).await
|
||||
std::thread::sleep(Duration::from_secs(1))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn try_compute_digest(cert_path: &str) -> Result<CertDigest> {
|
||||
let data = tokio::fs::read(cert_path).await?;
|
||||
fn try_compute_digest(cert_path: &str) -> Result<CertDigest> {
|
||||
let data = std::fs::read(cert_path)?;
|
||||
// sha256 is extremely collision resistent. can safely assume the digest to be unique
|
||||
Ok(CertDigest(digest::digest(&digest::SHA256, &data)))
|
||||
}
|
||||
@@ -46,28 +48,37 @@ async fn try_compute_digest(cert_path: &str) -> Result<CertDigest> {
|
||||
pub const SERVER_CRT: &str = "server.crt";
|
||||
pub const SERVER_KEY: &str = "server.key";
|
||||
|
||||
pub fn update_key_path_blocking(pg_data: &Path, tls_config: &TlsConfig) {
|
||||
pub struct KeyPair {
|
||||
crt: String,
|
||||
key: String,
|
||||
}
|
||||
|
||||
pub fn load_certs_blocking(tls_config: &TlsConfig) -> KeyPair {
|
||||
loop {
|
||||
match try_update_key_path_blocking(pg_data, tls_config) {
|
||||
Ok(()) => break,
|
||||
match try_load_certs_blocking(tls_config) {
|
||||
Ok(key_pair) => break key_pair,
|
||||
Err(e) => {
|
||||
tracing::error!(error = ?e, "could not create key file");
|
||||
tracing::error!(error = ?e, "could not load certs");
|
||||
std::thread::sleep(Duration::from_secs(1))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Postgres requires the keypath be "secure". This means
|
||||
// 1. Owned by the postgres user.
|
||||
// 2. Have permission 600.
|
||||
fn try_update_key_path_blocking(pg_data: &Path, tls_config: &TlsConfig) -> Result<()> {
|
||||
fn try_load_certs_blocking(tls_config: &TlsConfig) -> Result<KeyPair> {
|
||||
let key = std::fs::read_to_string(&tls_config.key_path)?;
|
||||
let crt = std::fs::read_to_string(&tls_config.cert_path)?;
|
||||
|
||||
// to mitigate a race condition during renewal.
|
||||
verify_key_cert(&key, &crt)?;
|
||||
|
||||
Ok(KeyPair { key, crt })
|
||||
}
|
||||
|
||||
// Postgres requires the keypath be "secure". This means
|
||||
// 1. Owned by the postgres user.
|
||||
// 2. Have permission 600.
|
||||
pub fn update_key_path_blocking(pg_data: &Path, key_pair: &KeyPair) -> Result<()> {
|
||||
let mut key_file = std::fs::OpenOptions::new()
|
||||
.write(true)
|
||||
.create(true)
|
||||
@@ -82,8 +93,13 @@ fn try_update_key_path_blocking(pg_data: &Path, tls_config: &TlsConfig) -> Resul
|
||||
.mode(0o600)
|
||||
.open(pg_data.join(SERVER_CRT))?;
|
||||
|
||||
key_file.write_all(key.as_bytes())?;
|
||||
crt_file.write_all(crt.as_bytes())?;
|
||||
// NOTE: We currently ensure that an explicit reload does not happen during TLS renewal, but
|
||||
// there's a chance that postgres/pgbouncer/local_proxy reloads implicitly halfway between
|
||||
// these writes. This could allow them to reads the wrong keys to the wrong certs.
|
||||
// There doesn't seem to be any way to prevent that. However, we will issue a reload shortly
|
||||
// after which should at least correct it.
|
||||
key_file.write_all(key_pair.key.as_bytes())?;
|
||||
crt_file.write_all(key_pair.crt.as_bytes())?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user