mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-04 12:02:55 +00:00
compute_ctl: start pg in cgroup for vms (#4920)
Starts `postgres` in cgroup directly from `compute_ctl` instead of from `vm-builder`. This is required because the `vm-monitor` cannot be in the cgroup it is managing. Otherwise, it itself would be frozen when freezing the cgroup. Requires https://github.com/neondatabase/cloud/pull/6331, which adds the `AUTOSCALING` environment variable letting `compute_ctl` know to start `postgres` in the cgroup. Requires https://github.com/neondatabase/autoscaling/pull/468, which prevents `vm-builder` from starting the monitor and putting postgres in a cgroup. This will require a `VM_BUILDER_VERSION` bump.
This commit is contained in:
3
.github/workflows/build_and_test.yml
vendored
3
.github/workflows/build_and_test.yml
vendored
@@ -752,7 +752,7 @@ jobs:
|
||||
run:
|
||||
shell: sh -eu {0}
|
||||
env:
|
||||
VM_BUILDER_VERSION: v0.16.3
|
||||
VM_BUILDER_VERSION: v0.17.5
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
@@ -775,6 +775,7 @@ jobs:
|
||||
run: |
|
||||
./vm-builder \
|
||||
-enable-file-cache \
|
||||
-cgroup-uid=postgres \
|
||||
-src=369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}} \
|
||||
-dst=369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}
|
||||
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
use std::collections::HashMap;
|
||||
use std::env;
|
||||
use std::fs;
|
||||
use std::io::BufRead;
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
@@ -175,6 +176,27 @@ impl TryFrom<ComputeSpec> for ParsedSpec {
|
||||
}
|
||||
}
|
||||
|
||||
/// If we are a VM, returns a [`Command`] that will run in the `neon-postgres`
|
||||
/// cgroup. Otherwise returns the default `Command::new(cmd)`
|
||||
///
|
||||
/// This function should be used to start postgres, as it will start it in the
|
||||
/// neon-postgres cgroup if we are a VM. This allows autoscaling to control
|
||||
/// postgres' resource usage. The cgroup will exist in VMs because vm-builder
|
||||
/// creates it during the sysinit phase of its inittab.
|
||||
fn maybe_cgexec(cmd: &str) -> Command {
|
||||
// The cplane sets this env var for autoscaling computes.
|
||||
// use `var_os` so we don't have to worry about the variable being valid
|
||||
// unicode. Should never be an concern . . . but just in case
|
||||
if env::var_os("AUTOSCALING").is_some() {
|
||||
let mut command = Command::new("cgexec");
|
||||
command.args(["-g", "memory:neon-postgres"]);
|
||||
command.arg(cmd);
|
||||
command
|
||||
} else {
|
||||
Command::new(cmd)
|
||||
}
|
||||
}
|
||||
|
||||
/// Create special neon_superuser role, that's a slightly nerfed version of a real superuser
|
||||
/// that we give to customers
|
||||
fn create_neon_superuser(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
|
||||
@@ -451,7 +473,7 @@ impl ComputeNode {
|
||||
pub fn sync_safekeepers(&self, storage_auth_token: Option<String>) -> Result<Lsn> {
|
||||
let start_time = Utc::now();
|
||||
|
||||
let sync_handle = Command::new(&self.pgbin)
|
||||
let sync_handle = maybe_cgexec(&self.pgbin)
|
||||
.args(["--sync-safekeepers"])
|
||||
.env("PGDATA", &self.pgdata) // we cannot use -D in this mode
|
||||
.envs(if let Some(storage_auth_token) = &storage_auth_token {
|
||||
@@ -586,7 +608,7 @@ impl ComputeNode {
|
||||
|
||||
// Start postgres
|
||||
info!("starting postgres");
|
||||
let mut pg = Command::new(&self.pgbin)
|
||||
let mut pg = maybe_cgexec(&self.pgbin)
|
||||
.args(["-D", pgdata])
|
||||
.spawn()
|
||||
.expect("cannot start postgres process");
|
||||
@@ -614,7 +636,7 @@ impl ComputeNode {
|
||||
let pgdata_path = Path::new(&self.pgdata);
|
||||
|
||||
// Run postgres as a child process.
|
||||
let mut pg = Command::new(&self.pgbin)
|
||||
let mut pg = maybe_cgexec(&self.pgbin)
|
||||
.args(["-D", &self.pgdata])
|
||||
.envs(if let Some(storage_auth_token) = &storage_auth_token {
|
||||
vec![("NEON_AUTH_TOKEN", storage_auth_token)]
|
||||
|
||||
Reference in New Issue
Block a user