mirror of
https://github.com/neondatabase/neon.git
synced 2025-12-22 21:59:59 +00:00
## Problem `postgres_exporter` has database collector enabled by default and it doesn't filter out invalid databases, see06a553c816/collector/pg_database.go (L67)so if it hits one, it starts spamming logs ``` ERROR: [NEON_SMGR] [reqid d9700000018] could not read db size of db 705302 from page server at lsn 5/A2457EB0 ``` ## Summary of changes We don't use `pg_database_size_bytes` metric anyway, see5e19b3fd89/apps/base/compute-metrics/scrape-compute-pg-exporter-neon.yaml (L29)so just turn it off by passing `--no-collector.database`.
165 lines
7.2 KiB
YAML
165 lines
7.2 KiB
YAML
# Supplemental file for neondatabase/autoscaling's vm-builder, for producing the VM compute image.
|
|
---
|
|
commands:
|
|
- name: cgconfigparser
|
|
user: root
|
|
sysvInitAction: sysinit
|
|
shell: 'cgconfigparser -l /etc/cgconfig.conf -s 1664'
|
|
# restrict permissions on /neonvm/bin/resize-swap, because we grant access to compute_ctl for
|
|
# running it as root.
|
|
- name: chmod-resize-swap
|
|
user: root
|
|
sysvInitAction: sysinit
|
|
shell: 'chmod 711 /neonvm/bin/resize-swap'
|
|
- name: chmod-set-disk-quota
|
|
user: root
|
|
sysvInitAction: sysinit
|
|
shell: 'chmod 711 /neonvm/bin/set-disk-quota'
|
|
- name: pgbouncer
|
|
user: postgres
|
|
sysvInitAction: respawn
|
|
shell: '/usr/local/bin/pgbouncer /etc/pgbouncer.ini 2>&1 > /dev/virtio-ports/tech.neon.log.0'
|
|
- name: local_proxy
|
|
user: postgres
|
|
sysvInitAction: respawn
|
|
shell: 'RUST_LOG="error" /usr/local/bin/local_proxy --config-path /etc/local_proxy/config.json --pid-path /etc/local_proxy/pid --http 0.0.0.0:10432'
|
|
- name: postgres-exporter
|
|
user: nobody
|
|
sysvInitAction: respawn
|
|
# Turn off database collector (`--no-collector.database`), we don't use `pg_database_size_bytes` metric anyway, see
|
|
# https://github.com/neondatabase/flux-fleet/blob/5e19b3fd897667b70d9a7ad4aa06df0ca22b49ff/apps/base/compute-metrics/scrape-compute-pg-exporter-neon.yaml#L29
|
|
# but it's enabled by default and it doesn't filter out invalid databases, see
|
|
# https://github.com/prometheus-community/postgres_exporter/blob/06a553c8166512c9d9c5ccf257b0f9bba8751dbc/collector/pg_database.go#L67
|
|
# so if it hits one, it starts spamming logs
|
|
# ERROR: [NEON_SMGR] [reqid d9700000018] could not read db size of db 705302 from page server at lsn 5/A2457EB0
|
|
shell: 'DATA_SOURCE_NAME="user=cloud_admin sslmode=disable dbname=postgres application_name=postgres-exporter pgaudit.log=none" /bin/postgres_exporter --no-collector.database --config.file=/etc/postgres_exporter.yml'
|
|
- name: pgbouncer-exporter
|
|
user: postgres
|
|
sysvInitAction: respawn
|
|
shell: '/bin/pgbouncer_exporter --pgBouncer.connectionString="postgres:///pgbouncer?host=/tmp&port=6432&dbname=pgbouncer&user=pgbouncer"'
|
|
- name: sql-exporter
|
|
user: nobody
|
|
sysvInitAction: respawn
|
|
shell: '/bin/sql_exporter -config.file=/etc/sql_exporter.yml -web.listen-address=:9399'
|
|
- name: sql-exporter-autoscaling
|
|
user: nobody
|
|
sysvInitAction: respawn
|
|
shell: '/bin/sql_exporter -config.file=/etc/sql_exporter_autoscaling.yml -web.listen-address=:9499'
|
|
# Rsyslog by default creates a unix socket under /dev/log . That's where Postgres sends logs also.
|
|
# We run syslog with postgres user so it can't create /dev/log. Instead we configure rsyslog to
|
|
# use a different path for the socket. The symlink actually points to our custom path.
|
|
- name: rsyslogd-socket-symlink
|
|
user: root
|
|
sysvInitAction: sysinit
|
|
shell: "ln -s /var/db/postgres/rsyslogpipe /dev/log"
|
|
- name: rsyslogd
|
|
user: postgres
|
|
sysvInitAction: respawn
|
|
shell: '/usr/sbin/rsyslogd -n -i /var/run/rsyslogd/rsyslogd.pid -f /etc/compute_rsyslog.conf'
|
|
shutdownHook: |
|
|
su -p postgres --session-command '/usr/local/bin/pg_ctl stop -D /var/db/postgres/compute/pgdata -m fast --wait -t 10'
|
|
files:
|
|
- filename: compute_ctl-sudoers
|
|
content: |
|
|
# Reverse hostname lookup doesn't currently work, and isn't needed anyway when all
|
|
# the rules use ALL as the hostname. Avoid the pointless lookups and the "unable to
|
|
# resolve host" log messages that they generate.
|
|
Defaults !fqdn
|
|
|
|
# Allow postgres user (which is what compute_ctl runs as) to run /neonvm/bin/resize-swap
|
|
# and /neonvm/bin/set-disk-quota as root without requiring entering a password (NOPASSWD),
|
|
# regardless of hostname (ALL)
|
|
#
|
|
# Also allow it to shut down the VM. The fast_import job does that when it's finished.
|
|
postgres ALL=(root) NOPASSWD: /neonvm/bin/resize-swap, /neonvm/bin/set-disk-quota, /neonvm/bin/poweroff, /usr/sbin/rsyslogd
|
|
- filename: cgconfig.conf
|
|
content: |
|
|
# Configuration for cgroups in VM compute nodes
|
|
group neon-postgres {
|
|
perm {
|
|
admin {
|
|
uid = postgres;
|
|
}
|
|
task {
|
|
gid = users;
|
|
}
|
|
}
|
|
memory {}
|
|
}
|
|
# Create dummy rsyslog config, because it refuses to start without at least one action configured.
|
|
# compute_ctl will rewrite this file with the actual configuration, if needed.
|
|
- filename: compute_rsyslog.conf
|
|
content: |
|
|
# Syslock.Name specifies a non-default pipe location that is writeable for the postgres user.
|
|
module(load="imuxsock" SysSock.Name="/var/db/postgres/rsyslogpipe") # provides support for local system logging
|
|
|
|
*.* /dev/null
|
|
$IncludeConfig /etc/rsyslog.d/*.conf
|
|
build: |
|
|
# Build cgroup-tools
|
|
#
|
|
# At time of writing (2023-03-14), debian bullseye has a version of cgroup-tools (technically
|
|
# libcgroup) that doesn't support cgroup v2 (version 0.41-11). Unfortunately, the vm-monitor
|
|
# requires cgroup v2, so we'll build cgroup-tools ourselves.
|
|
ARG BULLSEYE_SLIM_SHA=sha256:e831d9a884d63734fe3dd9c491ed9a5a3d4c6a6d32c5b14f2067357c49b0b7e1
|
|
FROM debian@$BULLSEYE_SLIM_SHA as libcgroup-builder
|
|
ENV LIBCGROUP_VERSION=v2.0.3
|
|
|
|
RUN set -exu \
|
|
&& apt update \
|
|
&& apt install --no-install-recommends -y \
|
|
git \
|
|
ca-certificates \
|
|
automake \
|
|
cmake \
|
|
make \
|
|
gcc \
|
|
byacc \
|
|
flex \
|
|
libtool \
|
|
libpam0g-dev \
|
|
&& git clone --depth 1 -b $LIBCGROUP_VERSION https://github.com/libcgroup/libcgroup \
|
|
&& INSTALL_DIR="/libcgroup-install" \
|
|
&& mkdir -p "$INSTALL_DIR/bin" "$INSTALL_DIR/include" \
|
|
&& cd libcgroup \
|
|
# extracted from bootstrap.sh, with modified flags:
|
|
&& (test -d m4 || mkdir m4) \
|
|
&& autoreconf -fi \
|
|
&& rm -rf autom4te.cache \
|
|
&& CFLAGS="-O3" ./configure --prefix="$INSTALL_DIR" --sysconfdir=/etc --localstatedir=/var --enable-opaque-hierarchy="name=systemd" \
|
|
# actually build the thing...
|
|
&& make install
|
|
merge: |
|
|
# tweak nofile limits
|
|
RUN set -e \
|
|
&& echo 'fs.file-max = 1048576' >>/etc/sysctl.conf \
|
|
&& test ! -e /etc/security || ( \
|
|
echo '* - nofile 1048576' >>/etc/security/limits.conf \
|
|
&& echo 'root - nofile 1048576' >>/etc/security/limits.conf \
|
|
)
|
|
|
|
# Allow postgres user (compute_ctl) to run swap resizer.
|
|
# Need to install sudo in order to allow this.
|
|
#
|
|
# Also, remove the 'read' permission from group/other on /neonvm/bin/resize-swap, just to be safe.
|
|
RUN set -e \
|
|
&& apt update \
|
|
&& apt install --no-install-recommends -y \
|
|
sudo \
|
|
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
|
COPY compute_ctl-sudoers /etc/sudoers.d/compute_ctl-sudoers
|
|
|
|
COPY cgconfig.conf /etc/cgconfig.conf
|
|
|
|
RUN set -e \
|
|
&& chmod 0644 /etc/cgconfig.conf
|
|
|
|
COPY compute_rsyslog.conf /etc/compute_rsyslog.conf
|
|
RUN chmod 0666 /etc/compute_rsyslog.conf
|
|
RUN mkdir /var/log/rsyslog && chown -R postgres /var/log/rsyslog
|
|
|
|
|
|
COPY --from=libcgroup-builder /libcgroup-install/bin/* /usr/bin/
|
|
COPY --from=libcgroup-builder /libcgroup-install/lib/* /usr/lib/
|
|
COPY --from=libcgroup-builder /libcgroup-install/sbin/* /usr/sbin/
|