Compare commits

...

4 Commits

Author SHA1 Message Date
Conrad Ludgate
87c793f58c log spec json and return parse error
dont cancel e2e test early
2024-10-04 11:37:48 +01:00
Conrad Ludgate
52a7d780ad test 2024-10-04 10:32:18 +01:00
Conrad Ludgate
2255a8ebac minor changes to local_proxy 2024-10-04 09:18:43 +01:00
Conrad Ludgate
e109d5aac0 add local_proxy to computespec 2024-10-04 09:16:21 +01:00
9 changed files with 176 additions and 68 deletions

View File

@@ -33,7 +33,7 @@ jobs:
github-event-name: ${{ github.event_name }}
cancel-previous-e2e-tests:
needs: [ check-permissions ]
needs: [ check-permissions, promote-images, tag ]
if: github.event_name == 'pull_request'
runs-on: ubuntu-22.04
@@ -518,7 +518,7 @@ jobs:
trigger-e2e-tests:
if: ${{ !github.event.pull_request.draft || contains( github.event.pull_request.labels.*.name, 'run-e2e-tests-in-draft') || github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy' }}
needs: [ check-permissions, promote-images, tag ]
needs: [ check-permissions, promote-images, tag, cancel-previous-e2e-tests ]
uses: ./.github/workflows/trigger-e2e-tests.yml
secrets: inherit

View File

@@ -11,6 +11,7 @@ testing = []
[dependencies]
anyhow.workspace = true
# camino.workspace = true
chrono.workspace = true
cfg-if.workspace = true
clap.workspace = true

View File

@@ -264,68 +264,72 @@ async fn handle_configure_request(
let body_bytes = hyper::body::to_bytes(req.into_body()).await.unwrap();
let spec_raw = String::from_utf8(body_bytes.to_vec()).unwrap();
if let Ok(request) = serde_json::from_str::<ConfigurationRequest>(&spec_raw) {
let spec = request.spec;
match serde_json::from_str::<ConfigurationRequest>(&spec_raw) {
Ok(request) => {
let spec = request.spec;
let parsed_spec = match ParsedSpec::try_from(spec) {
Ok(ps) => ps,
Err(msg) => return Err((msg, StatusCode::BAD_REQUEST)),
};
let parsed_spec = match ParsedSpec::try_from(spec) {
Ok(ps) => ps,
Err(msg) => return Err((msg, StatusCode::BAD_REQUEST)),
};
// XXX: wrap state update under lock in code blocks. Otherwise,
// we will try to `Send` `mut state` into the spawned thread
// bellow, which will cause error:
// ```
// error: future cannot be sent between threads safely
// ```
{
let mut state = compute.state.lock().unwrap();
if state.status != ComputeStatus::Empty && state.status != ComputeStatus::Running {
let msg = format!(
"invalid compute status for configuration request: {:?}",
state.status.clone()
);
return Err((msg, StatusCode::PRECONDITION_FAILED));
}
state.pspec = Some(parsed_spec);
state.status = ComputeStatus::ConfigurationPending;
compute.state_changed.notify_all();
drop(state);
info!("set new spec and notified waiters");
}
// Spawn a blocking thread to wait for compute to become Running.
// This is needed to do not block the main pool of workers and
// be able to serve other requests while some particular request
// is waiting for compute to finish configuration.
let c = compute.clone();
task::spawn_blocking(move || {
let mut state = c.state.lock().unwrap();
while state.status != ComputeStatus::Running {
state = c.state_changed.wait(state).unwrap();
info!(
"waiting for compute to become Running, current status: {:?}",
state.status
);
if state.status == ComputeStatus::Failed {
let err = state.error.as_ref().map_or("unknown error", |x| x);
let msg = format!("compute configuration failed: {:?}", err);
return Err((msg, StatusCode::INTERNAL_SERVER_ERROR));
// XXX: wrap state update under lock in code blocks. Otherwise,
// we will try to `Send` `mut state` into the spawned thread
// bellow, which will cause error:
// ```
// error: future cannot be sent between threads safely
// ```
{
let mut state = compute.state.lock().unwrap();
if state.status != ComputeStatus::Empty && state.status != ComputeStatus::Running {
let msg = format!(
"invalid compute status for configuration request: {:?}",
state.status.clone()
);
return Err((msg, StatusCode::PRECONDITION_FAILED));
}
state.pspec = Some(parsed_spec);
state.status = ComputeStatus::ConfigurationPending;
compute.state_changed.notify_all();
drop(state);
info!("set new spec and notified waiters");
}
Ok(())
})
.await
.unwrap()?;
// Spawn a blocking thread to wait for compute to become Running.
// This is needed to do not block the main pool of workers and
// be able to serve other requests while some particular request
// is waiting for compute to finish configuration.
let c = compute.clone();
task::spawn_blocking(move || {
let mut state = c.state.lock().unwrap();
while state.status != ComputeStatus::Running {
state = c.state_changed.wait(state).unwrap();
info!(
"waiting for compute to become Running, current status: {:?}",
state.status
);
// Return current compute state if everything went well.
let state = compute.state.lock().unwrap().clone();
let status_response = status_response_from_state(&state);
Ok(serde_json::to_string(&status_response).unwrap())
} else {
Err(("invalid spec".to_string(), StatusCode::BAD_REQUEST))
if state.status == ComputeStatus::Failed {
let err = state.error.as_ref().map_or("unknown error", |x| x);
let msg = format!("compute configuration failed: {:?}", err);
return Err((msg, StatusCode::INTERNAL_SERVER_ERROR));
}
}
Ok(())
})
.await
.unwrap()?;
// Return current compute state if everything went well.
let state = compute.state.lock().unwrap().clone();
let status_response = status_response_from_state(&state);
Ok(serde_json::to_string(&status_response).unwrap())
}
Err(err) => {
error!("could not parse spec: {spec_raw}");
Err((format!("invalid spec: {err:?}"), StatusCode::BAD_REQUEST))
}
}
}

View File

@@ -15,6 +15,7 @@ pub mod catalog;
pub mod compute;
pub mod disk_quota;
pub mod extension_server;
// pub mod local_proxy;
pub mod lsn_lease;
mod migration;
pub mod monitor;

View File

@@ -0,0 +1,56 @@
//! Local Proxy is a feature of our BaaS Neon Authorize project.
//!
//! Local Proxy validates JWTs and manages the pg_session_jwt extension.
//! It also maintains a connection pool to postgres.
use anyhow::{Context, Result};
use camino::Utf8Path;
use compute_api::spec::LocalProxySpec;
use nix::sys::signal::Signal;
use utils::pid_file::{self, PidFileRead};
pub fn configure(local_proxy: &LocalProxySpec) -> Result<()> {
write_local_proxy_conf("/etc/local_proxy/config.json".as_ref(), local_proxy)?;
notify_local_proxy("/etc/local_proxy/pid".as_ref())?;
Ok(())
}
/// Create or completely rewrite configuration file specified by `path`
fn write_local_proxy_conf(path: &Utf8Path, local_proxy: &LocalProxySpec) -> Result<()> {
let config =
serde_json::to_string_pretty(local_proxy).context("serializing LocalProxySpec to json")?;
std::fs::write(path, config).with_context(|| format!("writing {path}"))?;
Ok(())
}
/// Notify local proxy about a new config file.
fn notify_local_proxy(path: &Utf8Path) -> Result<()> {
match pid_file::read(path)? {
// if the file doesn't exist, or isn't locked, local_proxy isn't running
// and will naturally pick up our config later
PidFileRead::NotExist | PidFileRead::NotHeldByAnyProcess(_) => {}
PidFileRead::LockedByOtherProcess(pid) => {
// From the pid_file docs:
//
// > 1. The other process might exit at any time, turning the given PID stale.
// > 2. There is a small window in which `claim_for_current_process` has already
// > locked the file but not yet updates its contents. [`read`] will return
// > this variant here, but with the old file contents, i.e., a stale PID.
// >
// > The kernel is free to recycle PID once it has been `wait(2)`ed upon by
// > its creator. Thus, acting upon a stale PID, e.g., by issuing a `kill`
// > system call on it, bears the risk of killing an unrelated process.
// > This is an inherent limitation of using pidfiles.
// > The only race-free solution is to have a supervisor-process with a lifetime
// > that exceeds that of all of its child-processes (e.g., `runit`, `supervisord`).
//
// This is an ok risk as we only send a SIGHUP which likely won't actually
// kill the process, only reload config.
nix::sys::signal::kill(pid, Signal::SIGHUP).context("sending signal to local_proxy")?;
}
}
Ok(())
}

View File

@@ -599,6 +599,7 @@ impl Endpoint {
remote_extensions,
pgbouncer_settings: None,
shard_stripe_size: Some(shard_stripe_size),
local_proxy_config: None,
};
let spec_path = self.endpoint_path().join("spec.json");
std::fs::write(spec_path, serde_json::to_string_pretty(&spec)?)?;

View File

@@ -106,6 +106,11 @@ pub struct ComputeSpec {
// Stripe size for pageserver sharding, in pages
#[serde(default)]
pub shard_stripe_size: Option<usize>,
/// Local Proxy configuration used for JWT authentication
#[serde(default)]
#[serde(skip_serializing_if = "Option::is_none")]
pub local_proxy_config: Option<LocalProxySpec>,
}
/// Feature flag to signal `compute_ctl` to enable certain experimental functionality.

View File

@@ -109,7 +109,7 @@ struct SqlOverHttpArgs {
#[tokio::main]
async fn main() -> anyhow::Result<()> {
let _logging_guard = proxy::logging::init().await?;
let _logging_guard = proxy::logging::init_local_proxy()?;
let _panic_hook_guard = utils::logging::replace_panic_hook_with_tracing_panic_hook();
let _sentry_guard = init_sentry(Some(GIT_VERSION.into()), &[]);
@@ -138,7 +138,7 @@ async fn main() -> anyhow::Result<()> {
// in order to trigger the appropriate SIGHUP on config change.
//
// This also claims a "lock" that makes sure only one instance
// of local-proxy runs at a time.
// of local_proxy runs at a time.
let _process_guard = loop {
match pid_file::claim_for_current_process(&args.pid_path) {
Ok(guard) => break guard,
@@ -164,12 +164,6 @@ async fn main() -> anyhow::Result<()> {
16,
));
// write the process ID to a file so that compute-ctl can find our process later
// in order to trigger the appropriate SIGHUP on config change.
let pid = std::process::id();
info!("process running in PID {pid}");
std::fs::write(args.pid_path, format!("{pid}\n")).context("writing PID to file")?;
let mut maintenance_tasks = JoinSet::new();
let refresh_config_notify = Arc::new(Notify::new());
@@ -182,9 +176,9 @@ async fn main() -> anyhow::Result<()> {
// trigger the first config load **after** setting up the signal hook
// to avoid the race condition where:
// 1. No config file registered when local-proxy starts up
// 1. No config file registered when local_proxy starts up
// 2. The config file is written but the signal hook is not yet received
// 3. local-proxy completes startup but has no config loaded, despite there being a registerd config.
// 3. local_proxy completes startup but has no config loaded, despite there being a registerd config.
refresh_config_notify.notify_one();
tokio::spawn(refresh_config_loop(args.config_path, refresh_config_notify));

View File

@@ -1,6 +1,13 @@
use tracing::Subscriber;
use tracing_subscriber::{
filter::{EnvFilter, LevelFilter},
fmt::{
format::{Format, Full},
time::SystemTime,
FormatEvent, FormatFields,
},
prelude::*,
registry::LookupSpan,
};
/// Initialize logging and OpenTelemetry tracing and exporter.
@@ -33,6 +40,45 @@ pub async fn init() -> anyhow::Result<LoggingGuard> {
Ok(LoggingGuard)
}
/// Initialize logging for local_proxy with log prefix and no opentelemetry.
///
/// Logging can be configured using `RUST_LOG` environment variable.
pub fn init_local_proxy() -> anyhow::Result<LoggingGuard> {
let env_filter = EnvFilter::builder()
.with_default_directive(LevelFilter::INFO.into())
.from_env_lossy();
let fmt_layer = tracing_subscriber::fmt::layer()
.with_ansi(false)
.with_writer(std::io::stderr)
.event_format(LocalProxyFormatter(Format::default().with_target(false)));
tracing_subscriber::registry()
.with(env_filter)
.with(fmt_layer)
.try_init()?;
Ok(LoggingGuard)
}
pub struct LocalProxyFormatter(Format<Full, SystemTime>);
impl<S, N> FormatEvent<S, N> for LocalProxyFormatter
where
S: Subscriber + for<'a> LookupSpan<'a>,
N: for<'a> FormatFields<'a> + 'static,
{
fn format_event(
&self,
ctx: &tracing_subscriber::fmt::FmtContext<'_, S, N>,
mut writer: tracing_subscriber::fmt::format::Writer<'_>,
event: &tracing::Event<'_>,
) -> std::fmt::Result {
writer.write_str("[local_proxy] ")?;
self.0.format_event(ctx, writer, event)
}
}
pub struct LoggingGuard;
impl Drop for LoggingGuard {