feat(compute): Allow specifying the reconfiguration concurrency (#10006)

## Problem

We need a higher concurrency during reconfiguration in case of many DBs,
but the instance is already running and used by the client. We can
easily get out of `max_connections` limit, and the current code won't
handle that.

## Summary of changes

Default to 1, but also allow control plane to override this value for
specific projects. It's also recommended to bump
`superuser_reserved_connections` += `reconfigure_concurrency` for such
projects to ensure that we always have enough spare connections for
reconfiguration process to succeed.

Quick workaround for neondatabase/cloud#17846
This commit is contained in:
Alexey Kondratov
2024-12-05 18:57:25 +01:00
committed by GitHub
parent db79304416
commit 13e8105740
3 changed files with 24 additions and 9 deletions

View File

@@ -1243,12 +1243,7 @@ impl ComputeNode {
let postgresql_conf_path = pgdata_path.join("postgresql.conf");
config::write_postgres_conf(&postgresql_conf_path, &spec, self.http_port)?;
// TODO(ololobus): We need a concurrency during reconfiguration as well,
// but DB is already running and used by user. We can easily get out of
// `max_connections` limit, and the current code won't handle that.
// let compute_state = self.state.lock().unwrap().clone();
// let max_concurrent_connections = self.max_service_connections(&compute_state, &spec);
let max_concurrent_connections = 1;
let max_concurrent_connections = spec.reconfigure_concurrency;
// Temporarily reset max_cluster_size in config
// to avoid the possibility of hitting the limit, while we are reconfiguring:

View File

@@ -618,6 +618,7 @@ impl Endpoint {
pgbouncer_settings: None,
shard_stripe_size: Some(shard_stripe_size),
local_proxy_config: None,
reconfigure_concurrency: 1,
};
let spec_path = self.endpoint_path().join("spec.json");
std::fs::write(spec_path, serde_json::to_string_pretty(&spec)?)?;

View File

@@ -19,6 +19,10 @@ pub type PgIdent = String;
/// String type alias representing Postgres extension version
pub type ExtVersion = String;
fn default_reconfigure_concurrency() -> usize {
1
}
/// Cluster spec or configuration represented as an optional number of
/// delta operations + final cluster state description.
#[derive(Clone, Debug, Default, Deserialize, Serialize)]
@@ -67,7 +71,7 @@ pub struct ComputeSpec {
pub cluster: Cluster,
pub delta_operations: Option<Vec<DeltaOp>>,
/// An optinal hint that can be passed to speed up startup time if we know
/// An optional hint that can be passed to speed up startup time if we know
/// that no pg catalog mutations (like role creation, database creation,
/// extension creation) need to be done on the actual database to start.
#[serde(default)] // Default false
@@ -86,9 +90,7 @@ pub struct ComputeSpec {
// etc. GUCs in cluster.settings. TODO: Once the control plane has been
// updated to fill these fields, we can make these non optional.
pub tenant_id: Option<TenantId>,
pub timeline_id: Option<TimelineId>,
pub pageserver_connstring: Option<String>,
#[serde(default)]
@@ -113,6 +115,20 @@ pub struct ComputeSpec {
/// Local Proxy configuration used for JWT authentication
#[serde(default)]
pub local_proxy_config: Option<LocalProxySpec>,
/// Number of concurrent connections during the parallel RunInEachDatabase
/// phase of the apply config process.
///
/// We need a higher concurrency during reconfiguration in case of many DBs,
/// but instance is already running and used by client. We can easily get out of
/// `max_connections` limit, and the current code won't handle that.
///
/// Default is 1, but also allow control plane to override this value for specific
/// projects. It's also recommended to bump `superuser_reserved_connections` +=
/// `reconfigure_concurrency` for such projects to ensure that we always have
/// enough spare connections for reconfiguration process to succeed.
#[serde(default = "default_reconfigure_concurrency")]
pub reconfigure_concurrency: usize,
}
/// Feature flag to signal `compute_ctl` to enable certain experimental functionality.
@@ -315,6 +331,9 @@ mod tests {
// Features list defaults to empty vector.
assert!(spec.features.is_empty());
// Reconfigure concurrency defaults to 1.
assert_eq!(spec.reconfigure_concurrency, 1);
}
#[test]