mirror of
https://github.com/neondatabase/neon.git
synced 2025-12-28 00:23:00 +00:00
added changes from hadron configurator
This commit is contained in:
@@ -1,10 +1,16 @@
|
||||
use std::sync::Arc;
|
||||
use std::fs::File;
|
||||
use std::thread;
|
||||
use std::{path::Path, sync::Arc};
|
||||
|
||||
use anyhow::Result;
|
||||
use compute_api::responses::ComputeStatus;
|
||||
use compute_api::spec::ComputeSpec;
|
||||
use tracing::{error, info, instrument};
|
||||
|
||||
use crate::compute::ComputeNode;
|
||||
use crate::{
|
||||
compute::{ComputeNode, ParsedSpec},
|
||||
spec::get_config_from_control_plane,
|
||||
};
|
||||
|
||||
#[instrument(skip_all)]
|
||||
fn configurator_main_loop(compute: &Arc<ComputeNode>) {
|
||||
@@ -12,12 +18,27 @@ fn configurator_main_loop(compute: &Arc<ComputeNode>) {
|
||||
loop {
|
||||
let mut state = compute.state.lock().unwrap();
|
||||
|
||||
// We have to re-check the status after re-acquiring the lock because it could be that
|
||||
// the status has changed while we were waiting for the lock, and we might not need to
|
||||
// wait on the condition variable. Otherwise, we might end up in some soft-/deadlock, i.e.
|
||||
// we are waiting for a condition variable that will never be signaled.
|
||||
if state.status != ComputeStatus::ConfigurationPending {
|
||||
state = compute.state_changed.wait(state).unwrap();
|
||||
if compute.params.lakebase_mode {
|
||||
/* BEGIN_HADRON */
|
||||
// RefreshConfiguration should only be used inside the loop
|
||||
assert_ne!(state.status, ComputeStatus::RefreshConfiguration);
|
||||
/* END_HADRON */
|
||||
|
||||
while state.status != ComputeStatus::ConfigurationPending
|
||||
&& state.status != ComputeStatus::RefreshConfigurationPending
|
||||
&& state.status != ComputeStatus::Failed
|
||||
{
|
||||
info!("configurator: compute status: {:?}, sleeping", state.status);
|
||||
state = compute.state_changed.wait(state).unwrap();
|
||||
}
|
||||
} else {
|
||||
// We have to re-check the status after re-acquiring the lock because it could be that
|
||||
// the status has changed while we were waiting for the lock, and we might not need to
|
||||
// wait on the condition variable. Otherwise, we might end up in some soft-/deadlock, i.e.
|
||||
// we are waiting for a condition variable that will never be signaled.
|
||||
if state.status != ComputeStatus::ConfigurationPending {
|
||||
state = compute.state_changed.wait(state).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
// Re-check the status after waking up
|
||||
@@ -26,17 +47,146 @@ fn configurator_main_loop(compute: &Arc<ComputeNode>) {
|
||||
state.set_status(ComputeStatus::Configuration, &compute.state_changed);
|
||||
drop(state);
|
||||
|
||||
let mut new_status = ComputeStatus::Failed;
|
||||
let mut _new_status = ComputeStatus::Failed;
|
||||
if let Err(e) = compute.reconfigure() {
|
||||
error!("could not configure compute node: {}", e);
|
||||
// TODO(BRC-1726): Remove this panic once we fix the state machine to allow futher
|
||||
// configuration attempts after a failed configuration attempt.
|
||||
error!("Compute node exiting due to configuration failure.");
|
||||
std::process::exit(1);
|
||||
} else {
|
||||
new_status = ComputeStatus::Running;
|
||||
_new_status = ComputeStatus::Running;
|
||||
info!("compute node configured");
|
||||
}
|
||||
|
||||
// XXX: used to test that API is blocking
|
||||
// std::thread::sleep(std::time::Duration::from_millis(10000));
|
||||
|
||||
compute.set_status(_new_status);
|
||||
} else if state.status == ComputeStatus::RefreshConfigurationPending {
|
||||
info!(
|
||||
"compute node suspects its configuration is out of date, now refreshing configuration"
|
||||
);
|
||||
state.set_status(ComputeStatus::RefreshConfiguration, &compute.state_changed);
|
||||
// Drop the lock guard here to avoid holding the lock while downloading spec from the control plane / HCC.
|
||||
// This is the only thread that can move compute_ctl out of the `RefreshConfiguration` state, so it
|
||||
// is safe to drop the lock like this.
|
||||
drop(state);
|
||||
|
||||
let get_spec_result: anyhow::Result<Option<ComputeSpec>> =
|
||||
if let Some(sp) = &compute.params.spec_path_test_only {
|
||||
// This path is only to make testing easier. In production we always get the spec from the HCM.
|
||||
info!("reloading spec.json from path: {:?}", sp);
|
||||
let path = Path::new(sp);
|
||||
if let Ok(file) = File::open(path) {
|
||||
match serde_json::from_reader(file) {
|
||||
Ok(spec) => Ok(Some(spec)),
|
||||
Err(e) => {
|
||||
error!("could not parse spec file: {}", e);
|
||||
Err(anyhow::anyhow!("could not parse spec file: {}", e))
|
||||
}
|
||||
}
|
||||
} else {
|
||||
error!("could not open spec file at path: {:?}", sp);
|
||||
Err(anyhow::anyhow!(
|
||||
"could not open spec file at path: {:?}",
|
||||
sp
|
||||
))
|
||||
}
|
||||
} else if let Some(control_plane_uri) = &compute.params.control_plane_uri {
|
||||
get_config_from_control_plane(control_plane_uri, &compute.params.compute_id).map(
|
||||
|(spec_opt, _)| {
|
||||
info!("got spec from control plane: {:?}", spec_opt);
|
||||
spec_opt
|
||||
},
|
||||
)
|
||||
} else {
|
||||
Err(anyhow::anyhow!("spec_path_test_only is not set"))
|
||||
};
|
||||
|
||||
// Parse any received ComputeSpec and transpose the result into a Result<Option<ParsedSpec>>.
|
||||
let parsed_spec_result: Result<Option<ParsedSpec>> = get_spec_result.and_then(|spec| {
|
||||
if let Some(spec) = spec {
|
||||
if let Ok(pspec) = ParsedSpec::try_from(spec) {
|
||||
Ok(Some(pspec))
|
||||
} else {
|
||||
Err(anyhow::anyhow!("could not parse spec"))
|
||||
}
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
});
|
||||
|
||||
let new_status: ComputeStatus;
|
||||
match parsed_spec_result {
|
||||
// Control plane (HCM) returned a spec and we were able to parse it.
|
||||
Ok(Some(pspec)) => {
|
||||
{
|
||||
let mut state = compute.state.lock().unwrap();
|
||||
// Defensive programming to make sure this thread is indeed the only one that can move the compute
|
||||
// node out of the `RefreshConfiguration` state. Would be nice if we can encode this invariant
|
||||
// into the type system.
|
||||
assert_eq!(state.status, ComputeStatus::RefreshConfiguration);
|
||||
|
||||
if state.pspec.as_ref().map(|ps| ps.pageserver_connstr.clone())
|
||||
== Some(pspec.pageserver_connstr.clone())
|
||||
{
|
||||
info!(
|
||||
"Refresh configuration: Retrieved spec is the same as the current spec. Waiting for control plane to update the spec before attempting reconfiguration."
|
||||
);
|
||||
state.status = ComputeStatus::Running;
|
||||
compute.state_changed.notify_all();
|
||||
drop(state);
|
||||
std::thread::sleep(std::time::Duration::from_secs(5));
|
||||
continue;
|
||||
}
|
||||
// state.pspec is consumed by compute.reconfigure() below. Note that compute.reconfigure() will acquire
|
||||
// the compute.state lock again so we need to have the lock guard go out of scope here. We could add a
|
||||
// "locked" variant of compute.reconfigure() that takes the lock guard as an argument to make this cleaner,
|
||||
// but it's not worth forking the codebase too much for this minor point alone right now.
|
||||
ComputeNode::set_spec(&compute.params, &mut state, pspec);
|
||||
}
|
||||
match compute.reconfigure() {
|
||||
Ok(_) => {
|
||||
info!("Refresh configuration: compute node configured");
|
||||
new_status = ComputeStatus::Running;
|
||||
}
|
||||
Err(e) => {
|
||||
error!(
|
||||
"Refresh configuration: could not configure compute node: {}",
|
||||
e
|
||||
);
|
||||
// Set the compute node back to the `RefreshConfigurationPending` state if the configuration
|
||||
// was not successful. It should be okay to treat this situation the same as if the loop
|
||||
// hasn't executed yet as long as the detection side keeps notifying.
|
||||
new_status = ComputeStatus::RefreshConfigurationPending;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Control plane (HCM)'s response does not contain a spec. This is the "Empty" attachment case.
|
||||
Ok(None) => {
|
||||
info!(
|
||||
"Compute Manager signaled that this compute is no longer attached to any storage. Exiting."
|
||||
);
|
||||
// We just immediately terminate the whole compute_ctl in this case. It's not necessary to attempt a
|
||||
// clean shutdown as Postgres is probably not responding anyway (which is why we are in this refresh
|
||||
// configuration state).
|
||||
std::process::exit(1);
|
||||
}
|
||||
// Various error cases:
|
||||
// - The request to the control plane (HCM) either failed or returned a malformed spec.
|
||||
// - compute_ctl itself is configured incorrectly (e.g., compute_id is not set).
|
||||
Err(e) => {
|
||||
error!(
|
||||
"Refresh configuration: error getting a parsed spec: {:?}",
|
||||
e
|
||||
);
|
||||
new_status = ComputeStatus::RefreshConfigurationPending;
|
||||
// We may be dealing with an overloaded HCM if we end up in this path. Backoff 5 seconds before
|
||||
// retrying to avoid hammering the HCM.
|
||||
std::thread::sleep(std::time::Duration::from_secs(5));
|
||||
}
|
||||
}
|
||||
compute.set_status(new_status);
|
||||
} else if state.status == ComputeStatus::Failed {
|
||||
info!("compute node is now in Failed state, exiting");
|
||||
|
||||
@@ -97,6 +97,8 @@ pub struct EndpointConf {
|
||||
reconfigure_concurrency: usize,
|
||||
drop_subscriptions_before_start: bool,
|
||||
features: Vec<ComputeFeature>,
|
||||
compute_id: String,
|
||||
instance_id: Option<String>,
|
||||
cluster: Option<Cluster>,
|
||||
compute_ctl_config: ComputeCtlConfig,
|
||||
privileged_role_name: Option<String>,
|
||||
@@ -199,6 +201,8 @@ impl ComputeControlPlane {
|
||||
mode: ComputeMode,
|
||||
grpc: bool,
|
||||
skip_pg_catalog_updates: bool,
|
||||
compute_id: &str,
|
||||
instance_id: Option<String>,
|
||||
drop_subscriptions_before_start: bool,
|
||||
privileged_role_name: Option<String>,
|
||||
) -> Result<Arc<Endpoint>> {
|
||||
@@ -236,6 +240,8 @@ impl ComputeControlPlane {
|
||||
grpc,
|
||||
reconfigure_concurrency: 1,
|
||||
features: vec![],
|
||||
compute_id: compute_id.to_owned(),
|
||||
instance_id: instance_id.clone(),
|
||||
cluster: None,
|
||||
compute_ctl_config: compute_ctl_config.clone(),
|
||||
privileged_role_name: privileged_role_name.clone(),
|
||||
@@ -258,6 +264,8 @@ impl ComputeControlPlane {
|
||||
drop_subscriptions_before_start,
|
||||
reconfigure_concurrency: 1,
|
||||
features: vec![],
|
||||
compute_id: compute_id.to_string(),
|
||||
instance_id: instance_id.clone(),
|
||||
cluster: None,
|
||||
compute_ctl_config,
|
||||
privileged_role_name,
|
||||
@@ -331,6 +339,13 @@ pub struct Endpoint {
|
||||
reconfigure_concurrency: usize,
|
||||
// Feature flags
|
||||
features: Vec<ComputeFeature>,
|
||||
|
||||
// The compute_id is used to identify the compute node in the cloud.
|
||||
compute_id: String,
|
||||
|
||||
// Hadron database instance id used for PG authentication and logs
|
||||
instance_id: Option<String>,
|
||||
|
||||
// Cluster settings
|
||||
cluster: Option<Cluster>,
|
||||
|
||||
@@ -395,6 +410,7 @@ pub struct EndpointStartArgs {
|
||||
pub autoprewarm: bool,
|
||||
pub offload_lfc_interval_seconds: Option<std::num::NonZeroU64>,
|
||||
pub dev: bool,
|
||||
pub pg_init_timeout: Option<Duration>,
|
||||
}
|
||||
|
||||
impl Endpoint {
|
||||
@@ -437,6 +453,8 @@ impl Endpoint {
|
||||
reconfigure_concurrency: conf.reconfigure_concurrency,
|
||||
drop_subscriptions_before_start: conf.drop_subscriptions_before_start,
|
||||
features: conf.features,
|
||||
compute_id: conf.compute_id,
|
||||
instance_id: conf.instance_id,
|
||||
cluster: conf.cluster,
|
||||
compute_ctl_config: conf.compute_ctl_config,
|
||||
privileged_role_name: conf.privileged_role_name,
|
||||
@@ -481,7 +499,7 @@ impl Endpoint {
|
||||
conf.append("restart_after_crash", "off");
|
||||
|
||||
// Load the 'neon' extension
|
||||
conf.append("shared_preload_libraries", "neon");
|
||||
conf.append("shared_preload_libraries", "neon, databricks_auth");
|
||||
|
||||
conf.append_line("");
|
||||
// Replication-related configurations, such as WAL sending
|
||||
@@ -785,6 +803,7 @@ impl Endpoint {
|
||||
shard_stripe_size: Some(args.shard_stripe_size),
|
||||
local_proxy_config: None,
|
||||
reconfigure_concurrency: self.reconfigure_concurrency,
|
||||
databricks_settings: None,
|
||||
drop_subscriptions_before_start: self.drop_subscriptions_before_start,
|
||||
audit_log_level: ComputeAudit::Disabled,
|
||||
logs_export_host: None::<String>,
|
||||
|
||||
@@ -173,6 +173,11 @@ pub enum ComputeStatus {
|
||||
TerminationPendingImmediate,
|
||||
// Terminated Postgres
|
||||
Terminated,
|
||||
// A spec refresh is being requested
|
||||
RefreshConfigurationPending,
|
||||
// A spec refresh is being applied. We cannot refresh configuration again until the current
|
||||
// refresh is done, i.e., signal_refresh_configuration() will return 500 error.
|
||||
RefreshConfiguration,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Serialize)]
|
||||
@@ -185,6 +190,10 @@ impl Display for ComputeStatus {
|
||||
match self {
|
||||
ComputeStatus::Empty => f.write_str("empty"),
|
||||
ComputeStatus::ConfigurationPending => f.write_str("configuration-pending"),
|
||||
ComputeStatus::RefreshConfigurationPending => {
|
||||
f.write_str("refresh-configuration-pending")
|
||||
}
|
||||
ComputeStatus::RefreshConfiguration => f.write_str("refresh-configuration"),
|
||||
ComputeStatus::Init => f.write_str("init"),
|
||||
ComputeStatus::Running => f.write_str("running"),
|
||||
ComputeStatus::Configuration => f.write_str("configuration"),
|
||||
@@ -285,10 +294,15 @@ pub struct TlsConfig {
|
||||
}
|
||||
|
||||
/// Response of the `/computes/{compute_id}/spec` control-plane API.
|
||||
/// This is not actually a compute API response, so consider moving
|
||||
/// to a different place.
|
||||
#[derive(Deserialize, Debug)]
|
||||
pub struct ControlPlaneConfigResponse {
|
||||
pub spec: Option<ComputeSpec>,
|
||||
pub status: ControlPlaneComputeStatus,
|
||||
// Hadron: Deserialize this field into a harmless default if
|
||||
// compute_ctl_config is not present for compatibility.
|
||||
#[serde(default)]
|
||||
pub compute_ctl_config: ComputeCtlConfig,
|
||||
}
|
||||
|
||||
|
||||
2
vendor/postgres-v14
vendored
2
vendor/postgres-v14
vendored
Submodule vendor/postgres-v14 updated: c9f9fdd011...4cacada8bd
2
vendor/postgres-v15
vendored
2
vendor/postgres-v15
vendored
Submodule vendor/postgres-v15 updated: aaaeff2550...e5ee23d998
2
vendor/postgres-v16
vendored
2
vendor/postgres-v16
vendored
Submodule vendor/postgres-v16 updated: 9b9cb4b3e3...ad2b69b582
2
vendor/postgres-v17
vendored
2
vendor/postgres-v17
vendored
Submodule vendor/postgres-v17 updated: fa1788475e...ba750903a9
Reference in New Issue
Block a user