mirror of
https://github.com/neondatabase/neon.git
synced 2025-12-24 06:39:58 +00:00
Run compute_ctl in a cgroup in VMs (#3577)
This commit is contained in:
@@ -21,3 +21,4 @@
|
|||||||
!workspace_hack/
|
!workspace_hack/
|
||||||
!neon_local/
|
!neon_local/
|
||||||
!scripts/ninstall.sh
|
!scripts/ninstall.sh
|
||||||
|
!vm-cgconfig.conf
|
||||||
|
|||||||
25
.github/workflows/build_and_test.yml
vendored
25
.github/workflows/build_and_test.yml
vendored
@@ -611,34 +611,31 @@ jobs:
|
|||||||
run:
|
run:
|
||||||
shell: sh -eu {0}
|
shell: sh -eu {0}
|
||||||
env:
|
env:
|
||||||
VM_INFORMANT_VERSION: 0.1.1
|
VM_BUILDER_VERSION: v0.4.6
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Downloading latest vm-builder
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v1
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
|
||||||
|
- name: Downloading vm-builder
|
||||||
run: |
|
run: |
|
||||||
curl -L https://github.com/neondatabase/neonvm/releases/latest/download/vm-builder -o vm-builder
|
curl -L https://github.com/neondatabase/neonvm/releases/download/$VM_BUILDER_VERSION/vm-builder -o vm-builder
|
||||||
chmod +x vm-builder
|
chmod +x vm-builder
|
||||||
|
|
||||||
- name: Pulling compute-node image
|
- name: Pulling compute-node image
|
||||||
run: |
|
run: |
|
||||||
docker pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}
|
docker pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}
|
||||||
|
|
||||||
- name: Downloading VM informant version ${{ env.VM_INFORMANT_VERSION }}
|
- name: Building VM compute-node rootfs
|
||||||
run: |
|
run: |
|
||||||
curl -fL https://github.com/neondatabase/autoscaling/releases/download/${{ env.VM_INFORMANT_VERSION }}/vm-informant -o vm-informant
|
docker build -t temp-vm-compute-node --build-arg SRC_IMAGE=369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}} -f Dockerfile.vm-compute-node .
|
||||||
chmod +x vm-informant
|
|
||||||
|
|
||||||
- name: Adding VM informant to compute-node image
|
|
||||||
run: |
|
|
||||||
ID=$(docker create 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}})
|
|
||||||
docker cp vm-informant $ID:/bin/vm-informant
|
|
||||||
docker commit $ID temp-vm-compute-node
|
|
||||||
docker rm -f $ID
|
|
||||||
|
|
||||||
- name: Build vm image
|
- name: Build vm image
|
||||||
run: |
|
run: |
|
||||||
# note: as of 2023-01-12, vm-builder requires a trailing ":latest" for local images
|
# note: as of 2023-01-12, vm-builder requires a trailing ":latest" for local images
|
||||||
./vm-builder -src=temp-vm-compute-node:latest -dst=369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}
|
./vm-builder -use-inittab -src=temp-vm-compute-node:latest -dst=369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}
|
||||||
|
|
||||||
- name: Pushing vm-compute-node image
|
- name: Pushing vm-compute-node image
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
32
Dockerfile.vm-compute-node
Normal file
32
Dockerfile.vm-compute-node
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
# Note: this file *mostly* just builds on Dockerfile.compute-node
|
||||||
|
|
||||||
|
ARG SRC_IMAGE
|
||||||
|
ARG VM_INFORMANT_VERSION=v0.1.6
|
||||||
|
|
||||||
|
# Pull VM informant and set up inittab
|
||||||
|
FROM neondatabase/vm-informant:$VM_INFORMANT_VERSION as informant
|
||||||
|
|
||||||
|
RUN set -e \
|
||||||
|
&& rm -f /etc/inittab \
|
||||||
|
&& touch /etc/inittab
|
||||||
|
|
||||||
|
ADD vm-cgconfig.conf /etc/cgconfig.conf
|
||||||
|
RUN set -e \
|
||||||
|
&& echo "::sysinit:cgconfigparser -l /etc/cgconfig.conf -s 1664" >> /etc/inittab \
|
||||||
|
&& echo "::respawn:su vm-informant -c '/usr/local/bin/vm-informant --auto-restart --cgroup=neon-postgres'" >> /etc/inittab
|
||||||
|
|
||||||
|
# Combine, starting from non-VM compute node image.
|
||||||
|
FROM $SRC_IMAGE as base
|
||||||
|
|
||||||
|
# Temporarily set user back to root so we can run apt update and adduser
|
||||||
|
USER root
|
||||||
|
RUN apt update && \
|
||||||
|
apt install --no-install-recommends -y \
|
||||||
|
cgroup-tools
|
||||||
|
RUN adduser vm-informant --disabled-password --no-create-home
|
||||||
|
USER postgres
|
||||||
|
|
||||||
|
COPY --from=informant /etc/inittab /etc/inittab
|
||||||
|
COPY --from=informant /usr/bin/vm-informant /usr/local/bin/vm-informant
|
||||||
|
|
||||||
|
ENTRYPOINT ["/usr/sbin/cgexec", "-g", "*:neon-postgres", "/usr/local/bin/compute_ctl"]
|
||||||
@@ -44,7 +44,6 @@ use tracing::{error, info};
|
|||||||
|
|
||||||
use compute_tools::compute::{ComputeMetrics, ComputeNode, ComputeState, ComputeStatus};
|
use compute_tools::compute::{ComputeMetrics, ComputeNode, ComputeState, ComputeStatus};
|
||||||
use compute_tools::http::api::launch_http_server;
|
use compute_tools::http::api::launch_http_server;
|
||||||
use compute_tools::informant::spawn_vm_informant_if_present;
|
|
||||||
use compute_tools::logger::*;
|
use compute_tools::logger::*;
|
||||||
use compute_tools::monitor::launch_monitor;
|
use compute_tools::monitor::launch_monitor;
|
||||||
use compute_tools::params::*;
|
use compute_tools::params::*;
|
||||||
@@ -141,8 +140,6 @@ fn main() -> Result<()> {
|
|||||||
// requests, while configuration is still in progress.
|
// requests, while configuration is still in progress.
|
||||||
let _http_handle = launch_http_server(&compute).expect("cannot launch http endpoint thread");
|
let _http_handle = launch_http_server(&compute).expect("cannot launch http endpoint thread");
|
||||||
let _monitor_handle = launch_monitor(&compute).expect("cannot launch compute monitor thread");
|
let _monitor_handle = launch_monitor(&compute).expect("cannot launch compute monitor thread");
|
||||||
// Also spawn the thread responsible for handling the VM informant -- if it's present
|
|
||||||
let _vm_informant_handle = spawn_vm_informant_if_present().expect("cannot launch VM informant");
|
|
||||||
|
|
||||||
// Start Postgres
|
// Start Postgres
|
||||||
let mut delay_exit = false;
|
let mut delay_exit = false;
|
||||||
|
|||||||
@@ -1,50 +0,0 @@
|
|||||||
use std::path::Path;
|
|
||||||
use std::process;
|
|
||||||
use std::thread;
|
|
||||||
use std::time::Duration;
|
|
||||||
use tracing::{info, warn};
|
|
||||||
|
|
||||||
use anyhow::{Context, Result};
|
|
||||||
|
|
||||||
const VM_INFORMANT_PATH: &str = "/bin/vm-informant";
|
|
||||||
const RESTART_INFORMANT_AFTER_MILLIS: u64 = 5000;
|
|
||||||
|
|
||||||
/// Launch a thread to start the VM informant if it's present (and restart, on failure)
|
|
||||||
pub fn spawn_vm_informant_if_present() -> Result<Option<thread::JoinHandle<()>>> {
|
|
||||||
let exists = Path::new(VM_INFORMANT_PATH)
|
|
||||||
.try_exists()
|
|
||||||
.context("could not check if path exists")?;
|
|
||||||
|
|
||||||
if !exists {
|
|
||||||
return Ok(None);
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(Some(
|
|
||||||
thread::Builder::new()
|
|
||||||
.name("run-vm-informant".into())
|
|
||||||
.spawn(move || run_informant())?,
|
|
||||||
))
|
|
||||||
}
|
|
||||||
|
|
||||||
fn run_informant() -> ! {
|
|
||||||
let restart_wait = Duration::from_millis(RESTART_INFORMANT_AFTER_MILLIS);
|
|
||||||
|
|
||||||
info!("starting VM informant");
|
|
||||||
|
|
||||||
loop {
|
|
||||||
let mut cmd = process::Command::new(VM_INFORMANT_PATH);
|
|
||||||
// Block on subprocess:
|
|
||||||
let result = cmd.status();
|
|
||||||
|
|
||||||
match result {
|
|
||||||
Err(e) => warn!("failed to run VM informant at {VM_INFORMANT_PATH:?}: {e}"),
|
|
||||||
Ok(status) if !status.success() => {
|
|
||||||
warn!("{VM_INFORMANT_PATH} exited with code {status:?}, retrying")
|
|
||||||
}
|
|
||||||
Ok(_) => info!("{VM_INFORMANT_PATH} ended gracefully (unexpectedly). Retrying"),
|
|
||||||
}
|
|
||||||
|
|
||||||
// Wait before retrying
|
|
||||||
thread::sleep(restart_wait);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -8,7 +8,6 @@ pub mod http;
|
|||||||
#[macro_use]
|
#[macro_use]
|
||||||
pub mod logger;
|
pub mod logger;
|
||||||
pub mod compute;
|
pub mod compute;
|
||||||
pub mod informant;
|
|
||||||
pub mod monitor;
|
pub mod monitor;
|
||||||
pub mod params;
|
pub mod params;
|
||||||
pub mod pg_helpers;
|
pub mod pg_helpers;
|
||||||
|
|||||||
12
vm-cgconfig.conf
Normal file
12
vm-cgconfig.conf
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
# Configuration for cgroups in VM compute nodes
|
||||||
|
group neon-postgres {
|
||||||
|
perm {
|
||||||
|
admin {
|
||||||
|
uid = vm-informant;
|
||||||
|
}
|
||||||
|
task {
|
||||||
|
gid = users;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
memory {}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user