mirror of
https://github.com/neondatabase/neon.git
synced 2026-02-11 06:30:37 +00:00
Compare commits
8 Commits
conrad/ref
...
wip-neonvm
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4fb9bc4b01 | ||
|
|
5cb17cb384 | ||
|
|
9a5a7ebac2 | ||
|
|
7263076c17 | ||
|
|
6dbe4184f3 | ||
|
|
88595db586 | ||
|
|
7c39d90e98 | ||
|
|
2c680bad4d |
@@ -8,8 +8,10 @@
|
||||
!scripts/ninstall.sh
|
||||
!docker-compose/run-tests.sh
|
||||
|
||||
!.cargo/config.toml
|
||||
|
||||
# Directories
|
||||
!.cargo/
|
||||
#!.cargo
|
||||
!.config/
|
||||
!compute/
|
||||
!compute_tools/
|
||||
|
||||
3
Cargo.lock
generated
3
Cargo.lock
generated
@@ -1315,6 +1315,9 @@ dependencies = [
|
||||
"flate2",
|
||||
"futures",
|
||||
"http 1.1.0",
|
||||
"http-body-util",
|
||||
"hyper 1.4.1",
|
||||
"hyper-util",
|
||||
"jsonwebtoken",
|
||||
"metrics",
|
||||
"nix 0.27.1",
|
||||
|
||||
@@ -5,6 +5,39 @@
|
||||
# We use Debian as the base for all the steps. The production images use Debian bookworm
|
||||
# for v17, and Debian bullseye for older PostgreSQL versions.
|
||||
#
|
||||
# This same Dockerfile can be used to build several kinds of target images:
|
||||
#
|
||||
# Target: compute-node
|
||||
# --------------------
|
||||
#
|
||||
# Contains compute_ctl, Postgres, extensions, pgbouncer, and metrics exporters.
|
||||
# Everything that's needed to provide the user-visible services of a compute
|
||||
# endpoint. The target produces a docker image that's suitable for running
|
||||
# compute_ctl in a docker container (compute_ctl is set as the entrypoint). The
|
||||
# other services like pgbouncer are not launched when you execute this
|
||||
# container, although the binaries are included in the image.
|
||||
#
|
||||
# When building old-style VM images with vm-builder, this is the input to
|
||||
# vm-builder. See the vm-compute-node-image job in the build_and_test.yml github
|
||||
# workflow for how that's done. For backwards-compatibility with the github
|
||||
# action and any other scripts lying around, this is the default target.
|
||||
#
|
||||
# Target: compute-node-bootable
|
||||
# -----------------------------
|
||||
#
|
||||
# Produces an image with systemd, and systemd configuration to run all the
|
||||
# services. This is suitable for running in a VM. For testing, it can also be
|
||||
# launched in a docker container with:
|
||||
#
|
||||
# docker run --name=compute-node --privileged neondatabase/compute-node-bootable:local /sbin/init
|
||||
#
|
||||
# Target: compute-node-neonvm-payload
|
||||
# -----------------------------------
|
||||
#
|
||||
# Processes 'compute-node-bootable' into a QCOW2 image, suitable for loading with
|
||||
# neonvm-guest
|
||||
#
|
||||
#
|
||||
# ## Intermediary layers
|
||||
#
|
||||
# build-tools: This contains Rust compiler toolchain and other tools needed at compile
|
||||
@@ -62,19 +95,6 @@
|
||||
# The configuration files for the metrics exporters are under etc/ directory. We use
|
||||
# a templating system to handle variations between different PostgreSQL versions,
|
||||
# building slightly different config files for each PostgreSQL version.
|
||||
#
|
||||
#
|
||||
# ## Final image
|
||||
#
|
||||
# The final image puts together the PostgreSQL binaries (pg-build), the compute tools
|
||||
# (compute-tools), all the extensions (all-extensions) and the extra components into
|
||||
# one image.
|
||||
#
|
||||
# VM image: The final image built by this dockerfile isn't actually the final image that
|
||||
# we use in computes VMs. There's an extra step that adds some files and makes other
|
||||
# small adjustments, and builds the QCOV2 filesystem image suitable for using in a VM.
|
||||
# That step is done by the 'vm-builder' tool. See the vm-compute-node-image job in the
|
||||
# build_and_test.yml github workflow for how that's done.
|
||||
|
||||
ARG PG_VERSION
|
||||
ARG REPOSITORY=neondatabase
|
||||
@@ -1624,7 +1644,19 @@ ENV BUILD_TAG=$BUILD_TAG
|
||||
|
||||
USER nonroot
|
||||
# Copy entire project to get Cargo.* files with proper dependencies for the whole project
|
||||
COPY --chown=nonroot . .
|
||||
COPY --chown=nonroot Cargo.lock Cargo.toml rust-toolchain.toml .
|
||||
COPY .cargo .cargo
|
||||
COPY .config .config
|
||||
COPY compute_tools compute_tools
|
||||
COPY control_plane control_plane
|
||||
COPY libs libs
|
||||
COPY pageserver pageserver
|
||||
COPY proxy proxy
|
||||
COPY storage_scrubber storage_scrubber
|
||||
COPY safekeeper safekeeper
|
||||
COPY storage_broker storage_broker
|
||||
COPY storage_controller storage_controller
|
||||
COPY workspace_hack workspace_hack
|
||||
RUN --mount=type=cache,uid=1000,target=/home/nonroot/.cargo/registry \
|
||||
--mount=type=cache,uid=1000,target=/home/nonroot/.cargo/git \
|
||||
--mount=type=cache,uid=1000,target=/home/nonroot/target \
|
||||
@@ -1652,6 +1684,7 @@ RUN set -e \
|
||||
autoconf \
|
||||
automake \
|
||||
libevent-dev \
|
||||
libsystemd-dev \
|
||||
libtool \
|
||||
pkg-config \
|
||||
&& apt clean && rm -rf /var/lib/apt/lists/*
|
||||
@@ -1662,7 +1695,7 @@ RUN set -e \
|
||||
&& git clone --recurse-submodules --depth 1 --branch ${PGBOUNCER_TAG} https://github.com/pgbouncer/pgbouncer.git pgbouncer \
|
||||
&& cd pgbouncer \
|
||||
&& ./autogen.sh \
|
||||
&& ./configure --prefix=/usr/local/pgbouncer --without-openssl \
|
||||
&& ./configure --prefix=/usr/local/pgbouncer --with-systemd --without-openssl \
|
||||
&& make -j $(nproc) dist_man_MANS= \
|
||||
&& make install dist_man_MANS=
|
||||
|
||||
@@ -1718,6 +1751,50 @@ RUN set -ex; \
|
||||
/tmp/awscliv2/aws/install; \
|
||||
rm -rf /tmp/awscliv2.zip /tmp/awscliv2
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "cgroup-tools"
|
||||
#
|
||||
#########################################################################################
|
||||
|
||||
# Build cgroup-tools
|
||||
#
|
||||
# At time of writing (2023-03-14), debian bullseye has a version of cgroup-tools (technically
|
||||
# libcgroup) that doesn't support cgroup v2 (version 0.41-11). Unfortunately, the vm-monitor
|
||||
# requires cgroup v2, so we'll build cgroup-tools ourselves.
|
||||
#
|
||||
# At time of migration to bookworm (2024-10-09), debian has a version of libcgroup/cgroup-tools 2.0.2,
|
||||
# and it _probably_ can be used as-is. However, we'll build it ourselves to minimise the changeset
|
||||
# for debian version migration.
|
||||
#
|
||||
FROM debian:bookworm-slim as cgroup-tools
|
||||
ENV LIBCGROUP_VERSION=v2.0.3
|
||||
|
||||
RUN set -exu \
|
||||
&& apt update \
|
||||
&& apt install --no-install-recommends -y \
|
||||
git \
|
||||
ca-certificates \
|
||||
automake \
|
||||
cmake \
|
||||
make \
|
||||
gcc \
|
||||
byacc \
|
||||
flex \
|
||||
libtool \
|
||||
libpam0g-dev \
|
||||
&& git clone --depth 1 -b $LIBCGROUP_VERSION https://github.com/libcgroup/libcgroup \
|
||||
&& INSTALL_DIR="/libcgroup-install" \
|
||||
&& mkdir -p "$INSTALL_DIR/bin" "$INSTALL_DIR/include" \
|
||||
&& cd libcgroup \
|
||||
# extracted from bootstrap.sh, with modified flags:
|
||||
&& (test -d m4 || mkdir m4) \
|
||||
&& autoreconf -fi \
|
||||
&& rm -rf autom4te.cache \
|
||||
&& CFLAGS="-O3" ./configure --prefix="$INSTALL_DIR" --sysconfdir=/etc --localstatedir=/var --enable-opaque-hierarchy="name=systemd" \
|
||||
# actually build the thing...
|
||||
&& make install
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Clean up postgres folder before inclusion
|
||||
@@ -1809,11 +1886,14 @@ ENV PGDATABASE=postgres
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Final layer
|
||||
# Put it all together into the final image
|
||||
# Target: compute-node
|
||||
#
|
||||
# Put it all together into the final 'compute-node' image. It can be executed directly
|
||||
# with docker, to run the 'compute_ctl'. The other services will not be launched in
|
||||
# that case.
|
||||
#
|
||||
#########################################################################################
|
||||
FROM $BASE_IMAGE_SHA
|
||||
FROM $BASE_IMAGE_SHA as compute-node-build
|
||||
ARG DEBIAN_VERSION
|
||||
|
||||
# Use strict mode for bash to catch errors early
|
||||
@@ -1870,7 +1950,6 @@ RUN apt update && \
|
||||
procps \
|
||||
ca-certificates \
|
||||
$VERSION_INSTALLS && \
|
||||
apt clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
|
||||
localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8
|
||||
|
||||
# Add user postgres
|
||||
@@ -1890,6 +1969,13 @@ RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \
|
||||
# aws cli is used by fast_import
|
||||
COPY --from=awscli /usr/local/aws-cli /usr/local/aws-cli
|
||||
|
||||
# locally built cgroup-tools
|
||||
COPY --from=cgroup-tools /libcgroup-install/bin/* /usr/local/bin/
|
||||
COPY --from=cgroup-tools /libcgroup-install/lib/* /usr/local/lib/
|
||||
COPY --from=cgroup-tools /libcgroup-install/sbin/* /usr/local/sbin/
|
||||
|
||||
COPY --chmod=0644 compute/etc/cgconfig.conf /etc/cgconfig.conf
|
||||
|
||||
# pgbouncer and its config
|
||||
COPY --from=pgbouncer /usr/local/pgbouncer/bin/pgbouncer /usr/local/bin/pgbouncer
|
||||
COPY --chmod=0666 --chown=postgres compute/etc/pgbouncer.ini /etc/pgbouncer.ini
|
||||
@@ -1917,6 +2003,92 @@ COPY --from=sql_exporter_preprocessor --chmod=0644 /home/nonroot/compute/etc/neo
|
||||
# Make the libraries we built available
|
||||
RUN echo '/usr/local/lib' >> /etc/ld.so.conf && /sbin/ldconfig
|
||||
|
||||
FROM compute-node-build as compute-node
|
||||
ARG DEBIAN_VERSION
|
||||
|
||||
RUN apt clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
||||
|
||||
# If this image is executed as a stand-alone docker container, these are used.
|
||||
ENV LANG=en_US.utf8
|
||||
USER postgres
|
||||
ENTRYPOINT ["/usr/local/bin/compute_ctl"]
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Target: compute-node-bootable
|
||||
#
|
||||
# A "bootable" image which includes systemd, configured to launch all the services.
|
||||
#
|
||||
# For testing purposes, this can be run directly with docker:
|
||||
#
|
||||
# docker run --name=compute-node --privileged neondatabase/compute-node-bootable:local /sbin/init
|
||||
#
|
||||
#########################################################################################
|
||||
|
||||
FROM compute-node-build as compute-node-bootable
|
||||
|
||||
# dbus is required so that you can "machinectl shell" into this when run in an systemd-nspawn
|
||||
# container
|
||||
RUN apt install --no-install-recommends -y \
|
||||
systemd \
|
||||
systemd-sysv \
|
||||
dbus && \
|
||||
apt clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
||||
|
||||
## copy systemd unit files for the services and enable them
|
||||
COPY compute/etc/systemd/ /etc/systemd
|
||||
RUN systemctl enable \
|
||||
systemd-networkd.service \
|
||||
pgbouncer \
|
||||
postgres_exporter sql_exporter sql_exporter-autoscaling \
|
||||
local_proxy \
|
||||
compute_ctl \
|
||||
chown-pgdata \
|
||||
make-cgroup-procs-writable \
|
||||
load-cgconfig.service
|
||||
|
||||
ENTRYPOINT ["/sbin/init"]
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Target: compute-node-neonvm-payload
|
||||
#
|
||||
# Contains 'compute-node-bootable', as a QCOW2 disk image, suitable for booting with
|
||||
# neonvm-guest
|
||||
#
|
||||
#########################################################################################
|
||||
|
||||
# Wrap the same in a QCOW2 image
|
||||
FROM debian:bookworm-slim AS compute-node-neonvm-payload-build
|
||||
ARG DISK_SIZE=5G
|
||||
# tools for qemu disk creation. procps is for sysctl, needed because neonvm-controller
|
||||
# launches this in an init container that runs sysctl.
|
||||
RUN apt update && apt install --no-install-recommends --no-install-suggests -y \
|
||||
qemu-utils \
|
||||
e2fsprogs \
|
||||
procps
|
||||
|
||||
COPY --from=compute-node-bootable / /rootdisk/
|
||||
|
||||
RUN set -e \
|
||||
&& mkfs.ext4 -L neonvm-payload -d /rootdisk /disk.raw ${DISK_SIZE} \
|
||||
&& qemu-img convert -f raw -O qcow2 -o cluster_size=2M,lazy_refcounts=on /disk.raw /neonvm-payload.qcow2
|
||||
|
||||
FROM debian:bookworm-slim AS compute-node-neonvm-payload
|
||||
ARG DISK_SIZE=5G
|
||||
ARG DISK_SIZE=5G
|
||||
# procps is for sysctl, needed because neonvm-controller launches this in an init
|
||||
# container that runs sysctl.
|
||||
RUN apt update && apt install --no-install-recommends --no-install-suggests -y \
|
||||
procps
|
||||
|
||||
COPY --from=compute-node-neonvm-payload-build /neonvm-payload.qcow2 /
|
||||
|
||||
RUN apt clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# make 'compute-node' the default target
|
||||
#
|
||||
#########################################################################################
|
||||
FROM compute-node
|
||||
|
||||
12
compute/etc/cgconfig.conf
Normal file
12
compute/etc/cgconfig.conf
Normal file
@@ -0,0 +1,12 @@
|
||||
# Configuration for cgroups in VM compute nodes
|
||||
group neon-postgres {
|
||||
perm {
|
||||
admin {
|
||||
uid = postgres;
|
||||
}
|
||||
task {
|
||||
gid = users;
|
||||
}
|
||||
}
|
||||
memory {}
|
||||
}
|
||||
12
compute/etc/systemd/system/chown-pgdata.service
Normal file
12
compute/etc/systemd/system/chown-pgdata.service
Normal file
@@ -0,0 +1,12 @@
|
||||
# When running under neonvm-guest a separate disk is mounted to
|
||||
# /var/db/postgres/compute. Make it owned by the postgres user.
|
||||
[Unit]
|
||||
Description=Change owner of /var/db/postgres/compute to postgres
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=chown postgres:postgres /var/db/postgres/compute
|
||||
RemainAfterExit=yes
|
||||
|
||||
[Install]
|
||||
WantedBy=compute_ctl.service
|
||||
16
compute/etc/systemd/system/compute_ctl.service
Normal file
16
compute/etc/systemd/system/compute_ctl.service
Normal file
@@ -0,0 +1,16 @@
|
||||
[Unit]
|
||||
Description=Neon PostgreSQL launcher tool
|
||||
After=network-online.target
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=exec
|
||||
User=postgres
|
||||
# neonvm-runner mounts and populates this directory based on the k8s VM spec
|
||||
EnvironmentFile=/neonvm/runtime/command.env
|
||||
ExecStart=/usr/local/bin/compute_ctl $COMPUTE_CTL_ARGS
|
||||
Restart=on-failure
|
||||
Delegate=yes
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
10
compute/etc/systemd/system/load-cgconfig.service
Normal file
10
compute/etc/systemd/system/load-cgconfig.service
Normal file
@@ -0,0 +1,10 @@
|
||||
[Unit]
|
||||
Description=Create neonvm-postgres cgroup
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=cgconfigparser -l /etc/cgconfig.conf
|
||||
RemainAfterExit=yes
|
||||
|
||||
[Install]
|
||||
WantedBy=compute_ctl.service
|
||||
13
compute/etc/systemd/system/local_proxy.service
Normal file
13
compute/etc/systemd/system/local_proxy.service
Normal file
@@ -0,0 +1,13 @@
|
||||
[Unit]
|
||||
Description=Neon local proxy
|
||||
After=network-online.target
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=notify
|
||||
User=postgres
|
||||
ExecStart=/usr/local/bin/local_proxy --config-path /etc/local_proxy/config.json --pid-path /etc/local_proxy/pid --http 0.0.0.0:10432
|
||||
Restart=on-failure
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
@@ -0,0 +1,20 @@
|
||||
# Allow all users to move processes to/from the root cgroup.
|
||||
#
|
||||
# This is required in order to be able to 'cgexec' anything, if the entrypoint is not being run as
|
||||
# root, because moving tasks between one cgroup and another *requires write access to the
|
||||
# cgroup.procs file of the common ancestor*, and because the entrypoint isn't already in a cgroup,
|
||||
# any new tasks are automatically placed in the top-level cgroup.
|
||||
#
|
||||
# This *would* be bad for security, if we relied on cgroups for security; but instead because they
|
||||
# are just used for cooperative signaling, this should be mostly ok.
|
||||
|
||||
[Unit]
|
||||
Description=Allow all users to move processes to/from the root cgroup.
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=chmod go+w /sys/fs/cgroup/cgroup.procs
|
||||
RemainAfterExit=yes
|
||||
|
||||
[Install]
|
||||
WantedBy=compute_ctl.service
|
||||
45
compute/etc/systemd/system/pgbouncer.service
Normal file
45
compute/etc/systemd/system/pgbouncer.service
Normal file
@@ -0,0 +1,45 @@
|
||||
# Example systemd service unit for PgBouncer
|
||||
#
|
||||
# - Adjust the paths in ExecStart for your installation.
|
||||
#
|
||||
# - For systemd 253 and later, PgBouncer supports Type=notify-reload
|
||||
# (instead of Type=notify with ExecReload= command).
|
||||
#
|
||||
# - The User setting requires careful consideration. PgBouncer needs
|
||||
# to be able to place a Unix-domain socket file where PostgreSQL
|
||||
# clients will look for it. In the olden days, this was in /tmp,
|
||||
# but systems using systemd now prefer something like
|
||||
# /var/run/postgresql/. But then some systems also lock down that
|
||||
# directory so that only the postgres user can write to it. That
|
||||
# means you need to either
|
||||
#
|
||||
# - run PgBouncer as the postgres user, or
|
||||
#
|
||||
# - create a separate user and add it to the postgres group and
|
||||
# make /var/run/postgresql/ group-writable, or
|
||||
#
|
||||
# - use systemd to create the sockets; see pgbouncer.socket nearby.
|
||||
#
|
||||
# For packagers and deployment systems, this requires some
|
||||
# coordination between the PgBouncer and the PostgreSQL
|
||||
# packages/components.
|
||||
#
|
||||
[Unit]
|
||||
Description=connection pooler for PostgreSQL
|
||||
Documentation=man:pgbouncer(1)
|
||||
Documentation=https://www.pgbouncer.org/
|
||||
After=network-online.target
|
||||
Wants=network-online.target
|
||||
#Requires=pgbouncer.socket
|
||||
|
||||
[Service]
|
||||
Type=notify
|
||||
User=postgres
|
||||
ExecStart=/usr/local/bin/pgbouncer /etc/pgbouncer.ini
|
||||
ExecReload=/bin/kill -HUP $MAINPID
|
||||
KillSignal=SIGINT
|
||||
Restart=on-failure
|
||||
#LimitNOFILE=1024
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
14
compute/etc/systemd/system/postgres_exporter.service
Normal file
14
compute/etc/systemd/system/postgres_exporter.service
Normal file
@@ -0,0 +1,14 @@
|
||||
[Unit]
|
||||
Description=Postgres metrics exporter
|
||||
After=network-online.target
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=notify
|
||||
User=nobody
|
||||
Environment=DATA_SOURCE_NAME="user=cloud_admin sslmode=disable dbname=postgres application_name=postgres-exporter"
|
||||
ExecStart=/bin/postgres_exporter --config.file=/etc/postgres_exporter.yml
|
||||
Restart=on-failure
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
13
compute/etc/systemd/system/sql_exporter-autoscaling.service
Normal file
13
compute/etc/systemd/system/sql_exporter-autoscaling.service
Normal file
@@ -0,0 +1,13 @@
|
||||
[Unit]
|
||||
Description=SQL metrics exporter (autoscaling)
|
||||
After=network-online.target
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=notify
|
||||
User=nobody
|
||||
ExecStart=/bin/sql_exporter -config.file=/etc/sql_exporter_autoscaling.yml -web.listen-address=:9499
|
||||
Restart=on-failure
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
13
compute/etc/systemd/system/sql_exporter.service
Normal file
13
compute/etc/systemd/system/sql_exporter.service
Normal file
@@ -0,0 +1,13 @@
|
||||
[Unit]
|
||||
Description=SQL metrics exporter
|
||||
After=network-online.target
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=notify
|
||||
User=nobody
|
||||
ExecStart=/bin/sql_exporter -config.file=/etc/sql_exporter.yml -web.listen-address=:9399
|
||||
Restart=on-failure
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
@@ -24,6 +24,9 @@ fail.workspace = true
|
||||
flate2.workspace = true
|
||||
futures.workspace = true
|
||||
http.workspace = true
|
||||
http-body-util.workspace = true
|
||||
hyper-util.workspace = true
|
||||
hyper.workspace = true
|
||||
jsonwebtoken.workspace = true
|
||||
metrics.workspace = true
|
||||
nix.workspace = true
|
||||
|
||||
@@ -47,9 +47,9 @@ use std::{thread, time::Duration};
|
||||
use anyhow::{Context, Result};
|
||||
use chrono::Utc;
|
||||
use clap::Parser;
|
||||
use compute_tools::disk_quota::set_disk_quota;
|
||||
use compute_tools::http::server::Server;
|
||||
use compute_tools::lsn_lease::launch_lsn_lease_bg_task_for_static;
|
||||
use compute_tools::neonvmd_client::{resize_swap, set_disk_quota};
|
||||
use signal_hook::consts::{SIGQUIT, SIGTERM};
|
||||
use signal_hook::{consts::SIGINT, iterator::Signals};
|
||||
use tracing::{error, info, warn};
|
||||
@@ -67,7 +67,6 @@ use compute_tools::logger::*;
|
||||
use compute_tools::monitor::launch_monitor;
|
||||
use compute_tools::params::*;
|
||||
use compute_tools::spec::*;
|
||||
use compute_tools::swap::resize_swap;
|
||||
use rlimit::{setrlimit, Resource};
|
||||
use utils::failpoint_support;
|
||||
|
||||
@@ -147,6 +146,7 @@ struct Cli {
|
||||
#[arg(long, action = clap::ArgAction::SetTrue)]
|
||||
pub resize_swap_on_bind: bool,
|
||||
|
||||
/// This is no longer used for anything. It's kept for now just for backwards-compatibility.
|
||||
#[arg(long)]
|
||||
pub set_disk_quota_for_fs: Option<String>,
|
||||
|
||||
@@ -474,10 +474,8 @@ fn start_postgres(
|
||||
}
|
||||
|
||||
// Set disk quota if the compute spec says so
|
||||
if let (Some(disk_quota_bytes), Some(disk_quota_fs_mountpoint)) =
|
||||
(disk_quota_bytes, cli.set_disk_quota_for_fs.as_ref())
|
||||
{
|
||||
match set_disk_quota(disk_quota_bytes, disk_quota_fs_mountpoint) {
|
||||
if let Some(disk_quota_bytes) = disk_quota_bytes {
|
||||
match set_disk_quota(disk_quota_bytes) {
|
||||
Ok(()) => {
|
||||
let size_mib = disk_quota_bytes as f32 / (1 << 20) as f32; // just for more coherent display.
|
||||
info!(%disk_quota_bytes, %size_mib, "set disk quota");
|
||||
|
||||
@@ -1,25 +0,0 @@
|
||||
use anyhow::Context;
|
||||
|
||||
pub const DISK_QUOTA_BIN: &str = "/neonvm/bin/set-disk-quota";
|
||||
|
||||
/// If size_bytes is 0, it disables the quota. Otherwise, it sets filesystem quota to size_bytes.
|
||||
/// `fs_mountpoint` should point to the mountpoint of the filesystem where the quota should be set.
|
||||
pub fn set_disk_quota(size_bytes: u64, fs_mountpoint: &str) -> anyhow::Result<()> {
|
||||
let size_kb = size_bytes / 1024;
|
||||
// run `/neonvm/bin/set-disk-quota {size_kb} {mountpoint}`
|
||||
let child_result = std::process::Command::new("/usr/bin/sudo")
|
||||
.arg(DISK_QUOTA_BIN)
|
||||
.arg(size_kb.to_string())
|
||||
.arg(fs_mountpoint)
|
||||
.spawn();
|
||||
|
||||
child_result
|
||||
.context("spawn() failed")
|
||||
.and_then(|mut child| child.wait().context("wait() failed"))
|
||||
.and_then(|status| match status.success() {
|
||||
true => Ok(()),
|
||||
false => Err(anyhow::anyhow!("process exited with {status}")),
|
||||
})
|
||||
// wrap any prior error with the overall context that we couldn't run the command
|
||||
.with_context(|| format!("could not run `/usr/bin/sudo {DISK_QUOTA_BIN}`"))
|
||||
}
|
||||
@@ -11,7 +11,6 @@ pub mod http;
|
||||
pub mod logger;
|
||||
pub mod catalog;
|
||||
pub mod compute;
|
||||
pub mod disk_quota;
|
||||
pub mod extension_server;
|
||||
pub mod installed_extensions;
|
||||
pub mod local_proxy;
|
||||
@@ -19,9 +18,9 @@ pub mod lsn_lease;
|
||||
pub mod metrics;
|
||||
mod migration;
|
||||
pub mod monitor;
|
||||
pub mod neonvmd_client;
|
||||
pub mod params;
|
||||
pub mod pg_helpers;
|
||||
pub mod spec;
|
||||
mod spec_apply;
|
||||
pub mod swap;
|
||||
pub mod sync_sk;
|
||||
|
||||
102
compute_tools/src/neonvmd_client.rs
Normal file
102
compute_tools/src/neonvmd_client.rs
Normal file
@@ -0,0 +1,102 @@
|
||||
use anyhow::Context;
|
||||
use hyper::client::conn;
|
||||
use hyper::client::conn::http1::SendRequest;
|
||||
use hyper::{Request, StatusCode};
|
||||
use hyper_util::rt::TokioIo;
|
||||
use tracing::warn;
|
||||
|
||||
const NEONVM_DAEMON_CONTROL_SOCKET_PATH: &str = "/neonvm/run/neonvm-daemon-socket";
|
||||
|
||||
/// Open a connection to neonvm-daemon's control socket, prepare to send
|
||||
/// requests to it with hyper.
|
||||
async fn connect_neonvm_daemon<B>() -> anyhow::Result<SendRequest<B>>
|
||||
where
|
||||
B: hyper::body::Body + 'static + Send,
|
||||
B::Data: Send,
|
||||
B::Error: Into<Box<dyn std::error::Error + Send + Sync>>,
|
||||
{
|
||||
let mut attempts = 0;
|
||||
let stream = loop {
|
||||
match tokio::net::UnixStream::connect(NEONVM_DAEMON_CONTROL_SOCKET_PATH).await {
|
||||
Ok(stream) => break stream,
|
||||
Err(err) if err.kind() == std::io::ErrorKind::NotFound && attempts < 50 => {
|
||||
// Retry
|
||||
warn!("neonvm-daemon control socket not found, retrying...");
|
||||
attempts += 1;
|
||||
tokio::time::sleep(std::time::Duration::from_millis(100)).await;
|
||||
}
|
||||
Err(err) => Err(err).context("opening neonvm-daemon control socket")?,
|
||||
}
|
||||
};
|
||||
let io = TokioIo::new(stream);
|
||||
let (request_sender, connection) = conn::http1::handshake(io).await.unwrap();
|
||||
|
||||
// spawn a task to poll the connection and drive the HTTP state
|
||||
tokio::spawn(async move {
|
||||
if let Err(e) = connection.await {
|
||||
eprintln!("Error in connection: {}", e);
|
||||
}
|
||||
});
|
||||
|
||||
Ok(request_sender)
|
||||
}
|
||||
|
||||
pub fn resize_swap(size_bytes: u64) -> anyhow::Result<()> {
|
||||
let rt = tokio::runtime::Handle::current();
|
||||
rt.block_on(resize_swap_async(size_bytes))
|
||||
}
|
||||
|
||||
pub async fn resize_swap_async(size_bytes: u64) -> anyhow::Result<()> {
|
||||
let mut neonvmd = connect_neonvm_daemon().await?;
|
||||
|
||||
// Passing 'once' causes neonvm-daemon to reject any future resize requests
|
||||
let request = Request::builder()
|
||||
.method("POST")
|
||||
.uri("/resize-swap-once")
|
||||
.header("Host", "localhost") // hyper requires Host, even though the server won't care
|
||||
.body(format!("{}", size_bytes))
|
||||
.unwrap();
|
||||
|
||||
let resp = neonvmd.send_request(request).await?;
|
||||
let status = resp.status();
|
||||
match status {
|
||||
StatusCode::OK => Ok(()),
|
||||
StatusCode::CONFLICT => {
|
||||
// 409 Conflict means that the swap was already resized. That happens if the
|
||||
// compute_ctl restarts within the VM. That's considered OK.
|
||||
warn!("Swap was already resized");
|
||||
Ok(())
|
||||
}
|
||||
_ => Err(anyhow::anyhow!(
|
||||
"error resizing swap: {}",
|
||||
status.to_string()
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_disk_quota(size_bytes: u64) -> anyhow::Result<()> {
|
||||
let rt = tokio::runtime::Handle::current();
|
||||
rt.block_on(set_disk_quota_async(size_bytes))
|
||||
}
|
||||
|
||||
/// If size_bytes is 0, it disables the quota. Otherwise, it sets filesystem quota to size_bytes.
|
||||
pub async fn set_disk_quota_async(size_bytes: u64) -> anyhow::Result<()> {
|
||||
let mut neonvmd = connect_neonvm_daemon().await?;
|
||||
|
||||
let request = Request::builder()
|
||||
.method("POST")
|
||||
.uri("/set-disk-quota")
|
||||
.header("Host", "localhost") // hyper requires Host, even though the server won't care
|
||||
.body(format!("{}", size_bytes))
|
||||
.unwrap();
|
||||
|
||||
let resp = neonvmd.send_request(request).await?;
|
||||
let status = resp.status();
|
||||
match status {
|
||||
StatusCode::OK => Ok(()),
|
||||
_ => Err(anyhow::anyhow!(
|
||||
"error setting disk quota: {}",
|
||||
status.to_string()
|
||||
)),
|
||||
}
|
||||
}
|
||||
@@ -1,45 +0,0 @@
|
||||
use std::path::Path;
|
||||
|
||||
use anyhow::{anyhow, Context};
|
||||
use tracing::warn;
|
||||
|
||||
pub const RESIZE_SWAP_BIN: &str = "/neonvm/bin/resize-swap";
|
||||
|
||||
pub fn resize_swap(size_bytes: u64) -> anyhow::Result<()> {
|
||||
// run `/neonvm/bin/resize-swap --once {size_bytes}`
|
||||
//
|
||||
// Passing '--once' causes resize-swap to delete itself after successful completion, which
|
||||
// means that if compute_ctl restarts later, we won't end up calling 'swapoff' while
|
||||
// postgres is running.
|
||||
//
|
||||
// NOTE: resize-swap is not very clever. If present, --once MUST be the first arg.
|
||||
let child_result = std::process::Command::new("/usr/bin/sudo")
|
||||
.arg(RESIZE_SWAP_BIN)
|
||||
.arg("--once")
|
||||
.arg(size_bytes.to_string())
|
||||
.spawn();
|
||||
|
||||
child_result
|
||||
.context("spawn() failed")
|
||||
.and_then(|mut child| child.wait().context("wait() failed"))
|
||||
.and_then(|status| match status.success() {
|
||||
true => Ok(()),
|
||||
false => {
|
||||
// The command failed. Maybe it was because the resize-swap file doesn't exist?
|
||||
// The --once flag causes it to delete itself on success so we don't disable swap
|
||||
// while postgres is running; maybe this is fine.
|
||||
match Path::new(RESIZE_SWAP_BIN).try_exists() {
|
||||
Err(_) | Ok(true) => Err(anyhow!("process exited with {status}")),
|
||||
// The path doesn't exist; we're actually ok
|
||||
Ok(false) => {
|
||||
warn!("ignoring \"not found\" error from resize-swap to avoid swapoff while compute is running");
|
||||
Ok(())
|
||||
},
|
||||
}
|
||||
}
|
||||
})
|
||||
// wrap any prior error with the overall context that we couldn't run the command
|
||||
.with_context(|| {
|
||||
format!("could not run `/usr/bin/sudo {RESIZE_SWAP_BIN} --once {size_bytes}`")
|
||||
})
|
||||
}
|
||||
Reference in New Issue
Block a user