diff --git a/compute/Dockerfile.compute-node b/compute/Dockerfile.compute-node index 6bf6fb650f..18c68c116a 100644 --- a/compute/Dockerfile.compute-node +++ b/compute/Dockerfile.compute-node @@ -280,7 +280,7 @@ FROM build-deps AS vector-pg-build ARG PG_VERSION COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ -COPY patches/pgvector.patch /pgvector.patch +COPY compute/patches/pgvector.patch /pgvector.patch # By default, pgvector Makefile uses `-march=native`. We don't want that, # because we build the images on different machines than where we run them. @@ -366,7 +366,7 @@ FROM build-deps AS rum-pg-build ARG PG_VERSION COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ -COPY patches/rum.patch /rum.patch +COPY compute/patches/rum.patch /rum.patch RUN case "${PG_VERSION}" in "v17") \ echo "v17 extensions are not supported yet. Quit" && exit 0;; \ @@ -1031,6 +1031,41 @@ FROM debian:bullseye-slim AS compute-tools-image COPY --from=compute-tools /home/nonroot/target/release-line-debug-size-lto/compute_ctl /usr/local/bin/compute_ctl +######################################################################################### +# +# Layer "pgbouncer" +# +######################################################################################### + +FROM debian:bullseye-slim AS pgbouncer +RUN set -e \ + && apt-get update \ + && apt-get install -y \ + build-essential \ + git \ + libevent-dev \ + libtool \ + pkg-config + +# Use `dist_man_MANS=` to skip manpage generation (which requires python3/pandoc) +ENV PGBOUNCER_TAG=pgbouncer_1_22_1 +RUN set -e \ + && git clone --recurse-submodules --depth 1 --branch ${PGBOUNCER_TAG} https://github.com/pgbouncer/pgbouncer.git pgbouncer \ + && cd pgbouncer \ + && ./autogen.sh \ + && LDFLAGS=-static ./configure --prefix=/usr/local/pgbouncer --without-openssl \ + && make -j $(nproc) dist_man_MANS= \ + && make install dist_man_MANS= + +######################################################################################### +# +# Layers "postgres-exporter" and "sql-exporter" +# +######################################################################################### + +FROM quay.io/prometheuscommunity/postgres-exporter:v0.12.1 AS postgres-exporter +FROM burningalchemist/sql_exporter:0.13 AS sql-exporter + ######################################################################################### # # Clean up postgres folder before inclusion @@ -1078,7 +1113,7 @@ COPY --from=pgjwt-pg-build /pgjwt.tar.gz /ext-src COPY --from=hypopg-pg-build /hypopg.tar.gz /ext-src COPY --from=pg-hashids-pg-build /pg_hashids.tar.gz /ext-src COPY --from=rum-pg-build /rum.tar.gz /ext-src -COPY patches/rum.patch /ext-src +COPY compute/patches/rum.patch /ext-src #COPY --from=pgtap-pg-build /pgtap.tar.gz /ext-src COPY --from=ip4r-pg-build /ip4r.tar.gz /ext-src COPY --from=prefix-pg-build /prefix.tar.gz /ext-src @@ -1086,9 +1121,9 @@ COPY --from=hll-pg-build /hll.tar.gz /ext-src COPY --from=plpgsql-check-pg-build /plpgsql_check.tar.gz /ext-src #COPY --from=timescaledb-pg-build /timescaledb.tar.gz /ext-src COPY --from=pg-hint-plan-pg-build /pg_hint_plan.tar.gz /ext-src -COPY patches/pg_hint_plan.patch /ext-src +COPY compute/patches/pg_hint_plan.patch /ext-src COPY --from=pg-cron-pg-build /pg_cron.tar.gz /ext-src -COPY patches/pg_cron.patch /ext-src +COPY compute/patches/pg_cron.patch /ext-src #COPY --from=pg-pgx-ulid-build /home/nonroot/pgx_ulid.tar.gz /ext-src #COPY --from=rdkit-pg-build /rdkit.tar.gz /ext-src COPY --from=pg-uuidv7-pg-build /pg_uuidv7.tar.gz /ext-src @@ -1097,7 +1132,7 @@ COPY --from=pg-semver-pg-build /pg_semver.tar.gz /ext-src #COPY --from=pg-embedding-pg-build /home/nonroot/pg_embedding-src/ /ext-src #COPY --from=wal2json-pg-build /wal2json_2_5.tar.gz /ext-src COPY --from=pg-anon-pg-build /pg_anon.tar.gz /ext-src -COPY patches/pg_anon.patch /ext-src +COPY compute/patches/pg_anon.patch /ext-src COPY --from=pg-ivm-build /pg_ivm.tar.gz /ext-src COPY --from=pg-partman-build /pg_partman.tar.gz /ext-src RUN case "${PG_VERSION}" in "v17") \ @@ -1160,9 +1195,23 @@ RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \ COPY --from=postgres-cleanup-layer --chown=postgres /usr/local/pgsql /usr/local COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-debug-size-lto/compute_ctl /usr/local/bin/compute_ctl +# pgbouncer and its config +COPY --from=pgbouncer /usr/local/pgbouncer/bin/pgbouncer /usr/local/bin/pgbouncer +COPY --chmod=0666 --chown=postgres compute/etc/pgbouncer.ini /etc/pgbouncer.ini + +# Metrics exporter binaries and configuration files +COPY --from=postgres-exporter /bin/postgres_exporter /bin/postgres_exporter +COPY --from=sql-exporter /bin/sql_exporter /bin/sql_exporter + +COPY --chmod=0644 compute/etc/sql_exporter.yml /etc/sql_exporter.yml +COPY --chmod=0644 compute/etc/neon_collector.yml /etc/neon_collector.yml +COPY --chmod=0644 compute/etc/sql_exporter_autoscaling.yml /etc/sql_exporter_autoscaling.yml +COPY --chmod=0644 compute/etc/neon_collector_autoscaling.yml /etc/neon_collector_autoscaling.yml + # Create remote extension download directory RUN mkdir /usr/local/download_extensions && chown -R postgres:postgres /usr/local/download_extensions + # Install: # libreadline8 for psql # libicu67, locales for collations (including ICU and plpgsql_check) diff --git a/compute/README.md b/compute/README.md index 981d290fc0..bb1e42ab53 100644 --- a/compute/README.md +++ b/compute/README.md @@ -8,6 +8,9 @@ vm-image-spec.yaml Instructions for vm-builder, to turn the compute-node image into corresponding vm-compute-node image. +etc/ + Configuration files included in /etc in the compute image + patches/ Some extensions need to be patched to work with Neon. This directory contains such patches. They are applied to the extension diff --git a/compute/etc/neon_collector.yml b/compute/etc/neon_collector.yml new file mode 100644 index 0000000000..29be0958dd --- /dev/null +++ b/compute/etc/neon_collector.yml @@ -0,0 +1,247 @@ +collector_name: neon_collector +metrics: +- metric_name: lfc_misses + type: gauge + help: 'lfc_misses' + key_labels: + values: [lfc_misses] + query: | + select lfc_value as lfc_misses from neon.neon_lfc_stats where lfc_key='file_cache_misses'; + +- metric_name: lfc_used + type: gauge + help: 'LFC chunks used (chunk = 1MB)' + key_labels: + values: [lfc_used] + query: | + select lfc_value as lfc_used from neon.neon_lfc_stats where lfc_key='file_cache_used'; + +- metric_name: lfc_hits + type: gauge + help: 'lfc_hits' + key_labels: + values: [lfc_hits] + query: | + select lfc_value as lfc_hits from neon.neon_lfc_stats where lfc_key='file_cache_hits'; + +- metric_name: lfc_writes + type: gauge + help: 'lfc_writes' + key_labels: + values: [lfc_writes] + query: | + select lfc_value as lfc_writes from neon.neon_lfc_stats where lfc_key='file_cache_writes'; + +- metric_name: lfc_cache_size_limit + type: gauge + help: 'LFC cache size limit in bytes' + key_labels: + values: [lfc_cache_size_limit] + query: | + select pg_size_bytes(current_setting('neon.file_cache_size_limit')) as lfc_cache_size_limit; + +- metric_name: connection_counts + type: gauge + help: 'Connection counts' + key_labels: + - datname + - state + values: [count] + query: | + select datname, state, count(*) as count from pg_stat_activity where state <> '' group by datname, state; + +- metric_name: pg_stats_userdb + type: gauge + help: 'Stats for several oldest non-system dbs' + key_labels: + - datname + value_label: kind + values: + - db_size + - deadlocks + # Rows + - inserted + - updated + - deleted + # We export stats for 10 non-system database. Without this limit + # it is too easy to abuse the system by creating lots of databases. + query: | + select pg_database_size(datname) as db_size, deadlocks, + tup_inserted as inserted, tup_updated as updated, tup_deleted as deleted, + datname + from pg_stat_database + where datname IN ( + select datname + from pg_database + where datname <> 'postgres' and not datistemplate + order by oid + limit 10 + ); + +- metric_name: max_cluster_size + type: gauge + help: 'neon.max_cluster_size setting' + key_labels: + values: [max_cluster_size] + query: | + select setting::int as max_cluster_size from pg_settings where name = 'neon.max_cluster_size'; + +- metric_name: db_total_size + type: gauge + help: 'Size of all databases' + key_labels: + values: [total] + query: | + select sum(pg_database_size(datname)) as total from pg_database; + +# DEPRECATED +- metric_name: lfc_approximate_working_set_size + type: gauge + help: 'Approximate working set size in pages of 8192 bytes' + key_labels: + values: [approximate_working_set_size] + query: | + select neon.approximate_working_set_size(false) as approximate_working_set_size; + +- metric_name: lfc_approximate_working_set_size_windows + type: gauge + help: 'Approximate working set size in pages of 8192 bytes' + key_labels: [duration] + values: [size] + # NOTE: This is the "public" / "human-readable" version. Here, we supply a small selection + # of durations in a pretty-printed form. + query: | + select + x as duration, + neon.approximate_working_set_size_seconds(extract('epoch' from x::interval)::int) as size + from + (values ('5m'),('15m'),('1h')) as t (x); + +- metric_name: compute_current_lsn + type: gauge + help: 'Current LSN of the database' + key_labels: + values: [lsn] + query: | + select + case + when pg_catalog.pg_is_in_recovery() + then (pg_last_wal_replay_lsn() - '0/0')::FLOAT8 + else (pg_current_wal_lsn() - '0/0')::FLOAT8 + end as lsn; + +- metric_name: compute_receive_lsn + type: gauge + help: 'Returns the last write-ahead log location that has been received and synced to disk by streaming replication' + key_labels: + values: [lsn] + query: | + SELECT + CASE + WHEN pg_catalog.pg_is_in_recovery() + THEN (pg_last_wal_receive_lsn() - '0/0')::FLOAT8 + ELSE 0 + END AS lsn; + +- metric_name: replication_delay_bytes + type: gauge + help: 'Bytes between received and replayed LSN' + key_labels: + values: [replication_delay_bytes] + # We use a GREATEST call here because this calculation can be negative. + # The calculation is not atomic, meaning after we've gotten the receive + # LSN, the replay LSN may have advanced past the receive LSN we + # are using for the calculation. + query: | + SELECT GREATEST(0, pg_wal_lsn_diff(pg_last_wal_receive_lsn(), pg_last_wal_replay_lsn())) AS replication_delay_bytes; + +- metric_name: replication_delay_seconds + type: gauge + help: 'Time since last LSN was replayed' + key_labels: + values: [replication_delay_seconds] + query: | + SELECT + CASE + WHEN pg_last_wal_receive_lsn() = pg_last_wal_replay_lsn() THEN 0 + ELSE GREATEST (0, EXTRACT (EPOCH FROM now() - pg_last_xact_replay_timestamp())) + END AS replication_delay_seconds; + +- metric_name: checkpoints_req + type: gauge + help: 'Number of requested checkpoints' + key_labels: + values: [checkpoints_req] + query: | + SELECT checkpoints_req FROM pg_stat_bgwriter; + +- metric_name: checkpoints_timed + type: gauge + help: 'Number of scheduled checkpoints' + key_labels: + values: [checkpoints_timed] + query: | + SELECT checkpoints_timed FROM pg_stat_bgwriter; + +- metric_name: compute_logical_snapshot_files + type: gauge + help: 'Number of snapshot files in pg_logical/snapshot' + key_labels: + - timeline_id + values: [num_logical_snapshot_files] + query: | + SELECT + (SELECT setting FROM pg_settings WHERE name = 'neon.timeline_id') AS timeline_id, + -- Postgres creates temporary snapshot files of the form %X-%X.snap.%d.tmp. These + -- temporary snapshot files are renamed to the actual snapshot files after they are + -- completely built. We only WAL-log the completely built snapshot files. + (SELECT COUNT(*) FROM pg_ls_logicalsnapdir() WHERE name LIKE '%.snap') AS num_logical_snapshot_files; + +# In all the below metrics, we cast LSNs to floats because Prometheus only supports floats. +# It's probably fine because float64 can store integers from -2^53 to +2^53 exactly. + +# Number of slots is limited by max_replication_slots, so collecting position for all of them shouldn't be bad. +- metric_name: logical_slot_restart_lsn + type: gauge + help: 'restart_lsn of logical slots' + key_labels: + - slot_name + values: [restart_lsn] + query: | + select slot_name, (restart_lsn - '0/0')::FLOAT8 as restart_lsn + from pg_replication_slots + where slot_type = 'logical'; + +- metric_name: compute_subscriptions_count + type: gauge + help: 'Number of logical replication subscriptions grouped by enabled/disabled' + key_labels: + - enabled + values: [subscriptions_count] + query: | + select subenabled::text as enabled, count(*) as subscriptions_count + from pg_subscription + group by subenabled; + +- metric_name: retained_wal + type: gauge + help: 'Retained WAL in inactive replication slots' + key_labels: + - slot_name + values: [retained_wal] + query: | + SELECT slot_name, pg_wal_lsn_diff(pg_current_wal_lsn(), restart_lsn)::FLOAT8 AS retained_wal + FROM pg_replication_slots + WHERE active = false; + +- metric_name: wal_is_lost + type: gauge + help: 'Whether or not the replication slot wal_status is lost' + key_labels: + - slot_name + values: [wal_is_lost] + query: | + SELECT slot_name, + CASE WHEN wal_status = 'lost' THEN 1 ELSE 0 END AS wal_is_lost + FROM pg_replication_slots; + diff --git a/compute/etc/neon_collector_autoscaling.yml b/compute/etc/neon_collector_autoscaling.yml new file mode 100644 index 0000000000..5616264eba --- /dev/null +++ b/compute/etc/neon_collector_autoscaling.yml @@ -0,0 +1,55 @@ +collector_name: neon_collector_autoscaling +metrics: +- metric_name: lfc_misses + type: gauge + help: 'lfc_misses' + key_labels: + values: [lfc_misses] + query: | + select lfc_value as lfc_misses from neon.neon_lfc_stats where lfc_key='file_cache_misses'; + +- metric_name: lfc_used + type: gauge + help: 'LFC chunks used (chunk = 1MB)' + key_labels: + values: [lfc_used] + query: | + select lfc_value as lfc_used from neon.neon_lfc_stats where lfc_key='file_cache_used'; + +- metric_name: lfc_hits + type: gauge + help: 'lfc_hits' + key_labels: + values: [lfc_hits] + query: | + select lfc_value as lfc_hits from neon.neon_lfc_stats where lfc_key='file_cache_hits'; + +- metric_name: lfc_writes + type: gauge + help: 'lfc_writes' + key_labels: + values: [lfc_writes] + query: | + select lfc_value as lfc_writes from neon.neon_lfc_stats where lfc_key='file_cache_writes'; + +- metric_name: lfc_cache_size_limit + type: gauge + help: 'LFC cache size limit in bytes' + key_labels: + values: [lfc_cache_size_limit] + query: | + select pg_size_bytes(current_setting('neon.file_cache_size_limit')) as lfc_cache_size_limit; + +- metric_name: lfc_approximate_working_set_size_windows + type: gauge + help: 'Approximate working set size in pages of 8192 bytes' + key_labels: [duration_seconds] + values: [size] + # NOTE: This is the "internal" / "machine-readable" version. This outputs the working set + # size looking back 1..60 minutes, labeled with the number of minutes. + query: | + select + x::text as duration_seconds, + neon.approximate_working_set_size_seconds(x) as size + from + (select generate_series * 60 as x from generate_series(1, 60)) as t (x); diff --git a/compute/etc/pgbouncer.ini b/compute/etc/pgbouncer.ini new file mode 100644 index 0000000000..cb994f961c --- /dev/null +++ b/compute/etc/pgbouncer.ini @@ -0,0 +1,17 @@ +[databases] +*=host=localhost port=5432 auth_user=cloud_admin +[pgbouncer] +listen_port=6432 +listen_addr=0.0.0.0 +auth_type=scram-sha-256 +auth_user=cloud_admin +auth_dbname=postgres +client_tls_sslmode=disable +server_tls_sslmode=disable +pool_mode=transaction +max_client_conn=10000 +default_pool_size=64 +max_prepared_statements=0 +admin_users=postgres +unix_socket_dir=/tmp/ +unix_socket_mode=0777 diff --git a/compute/etc/sql_exporter.yml b/compute/etc/sql_exporter.yml new file mode 100644 index 0000000000..139d04468a --- /dev/null +++ b/compute/etc/sql_exporter.yml @@ -0,0 +1,33 @@ +# Configuration for sql_exporter +# Global defaults. +global: + # If scrape_timeout <= 0, no timeout is set unless Prometheus provides one. The default is 10s. + scrape_timeout: 10s + # Subtracted from Prometheus' scrape_timeout to give us some headroom and prevent Prometheus from timing out first. + scrape_timeout_offset: 500ms + # Minimum interval between collector runs: by default (0s) collectors are executed on every scrape. + min_interval: 0s + # Maximum number of open connections to any one target. Metric queries will run concurrently on multiple connections, + # as will concurrent scrapes. + max_connections: 1 + # Maximum number of idle connections to any one target. Unless you use very long collection intervals, this should + # always be the same as max_connections. + max_idle_connections: 1 + # Maximum number of maximum amount of time a connection may be reused. Expired connections may be closed lazily before reuse. + # If 0, connections are not closed due to a connection's age. + max_connection_lifetime: 5m + +# The target to monitor and the collectors to execute on it. +target: + # Data source name always has a URI schema that matches the driver name. In some cases (e.g. MySQL) + # the schema gets dropped or replaced to match the driver expected DSN format. + data_source_name: 'postgresql://cloud_admin@127.0.0.1:5432/postgres?sslmode=disable&application_name=sql_exporter' + + # Collectors (referenced by name) to execute on the target. + # Glob patterns are supported (see for syntax). + collectors: [neon_collector] + +# Collector files specifies a list of globs. One collector definition is read from each matching file. +# Glob patterns are supported (see for syntax). +collector_files: + - "neon_collector.yml" diff --git a/compute/etc/sql_exporter_autoscaling.yml b/compute/etc/sql_exporter_autoscaling.yml new file mode 100644 index 0000000000..044557233e --- /dev/null +++ b/compute/etc/sql_exporter_autoscaling.yml @@ -0,0 +1,33 @@ +# Configuration for sql_exporter for autoscaling-agent +# Global defaults. +global: + # If scrape_timeout <= 0, no timeout is set unless Prometheus provides one. The default is 10s. + scrape_timeout: 10s + # Subtracted from Prometheus' scrape_timeout to give us some headroom and prevent Prometheus from timing out first. + scrape_timeout_offset: 500ms + # Minimum interval between collector runs: by default (0s) collectors are executed on every scrape. + min_interval: 0s + # Maximum number of open connections to any one target. Metric queries will run concurrently on multiple connections, + # as will concurrent scrapes. + max_connections: 1 + # Maximum number of idle connections to any one target. Unless you use very long collection intervals, this should + # always be the same as max_connections. + max_idle_connections: 1 + # Maximum number of maximum amount of time a connection may be reused. Expired connections may be closed lazily before reuse. + # If 0, connections are not closed due to a connection's age. + max_connection_lifetime: 5m + +# The target to monitor and the collectors to execute on it. +target: + # Data source name always has a URI schema that matches the driver name. In some cases (e.g. MySQL) + # the schema gets dropped or replaced to match the driver expected DSN format. + data_source_name: 'postgresql://cloud_admin@127.0.0.1:5432/postgres?sslmode=disable&application_name=sql_exporter_autoscaling' + + # Collectors (referenced by name) to execute on the target. + # Glob patterns are supported (see for syntax). + collectors: [neon_collector_autoscaling] + +# Collector files specifies a list of globs. One collector definition is read from each matching file. +# Glob patterns are supported (see for syntax). +collector_files: + - "neon_collector_autoscaling.yml" diff --git a/compute/vm-image-spec.yaml b/compute/vm-image-spec.yaml index c94f95f447..0af44745e5 100644 --- a/compute/vm-image-spec.yaml +++ b/compute/vm-image-spec.yaml @@ -35,25 +35,6 @@ files: # Allow postgres user (which is what compute_ctl runs as) to run /neonvm/bin/resize-swap # as root without requiring entering a password (NOPASSWD), regardless of hostname (ALL) postgres ALL=(root) NOPASSWD: /neonvm/bin/resize-swap - - filename: pgbouncer.ini - content: | - [databases] - *=host=localhost port=5432 auth_user=cloud_admin - [pgbouncer] - listen_port=6432 - listen_addr=0.0.0.0 - auth_type=scram-sha-256 - auth_user=cloud_admin - auth_dbname=postgres - client_tls_sslmode=disable - server_tls_sslmode=disable - pool_mode=transaction - max_client_conn=10000 - default_pool_size=64 - max_prepared_statements=0 - admin_users=postgres - unix_socket_dir=/tmp/ - unix_socket_mode=0777 - filename: cgconfig.conf content: | # Configuration for cgroups in VM compute nodes @@ -68,385 +49,6 @@ files: } memory {} } - - filename: sql_exporter.yml - content: | - # Configuration for sql_exporter - # Global defaults. - global: - # If scrape_timeout <= 0, no timeout is set unless Prometheus provides one. The default is 10s. - scrape_timeout: 10s - # Subtracted from Prometheus' scrape_timeout to give us some headroom and prevent Prometheus from timing out first. - scrape_timeout_offset: 500ms - # Minimum interval between collector runs: by default (0s) collectors are executed on every scrape. - min_interval: 0s - # Maximum number of open connections to any one target. Metric queries will run concurrently on multiple connections, - # as will concurrent scrapes. - max_connections: 1 - # Maximum number of idle connections to any one target. Unless you use very long collection intervals, this should - # always be the same as max_connections. - max_idle_connections: 1 - # Maximum number of maximum amount of time a connection may be reused. Expired connections may be closed lazily before reuse. - # If 0, connections are not closed due to a connection's age. - max_connection_lifetime: 5m - - # The target to monitor and the collectors to execute on it. - target: - # Data source name always has a URI schema that matches the driver name. In some cases (e.g. MySQL) - # the schema gets dropped or replaced to match the driver expected DSN format. - data_source_name: 'postgresql://cloud_admin@127.0.0.1:5432/postgres?sslmode=disable&application_name=sql_exporter' - - # Collectors (referenced by name) to execute on the target. - # Glob patterns are supported (see for syntax). - collectors: [neon_collector] - - # Collector files specifies a list of globs. One collector definition is read from each matching file. - # Glob patterns are supported (see for syntax). - collector_files: - - "neon_collector.yml" - - filename: sql_exporter_autoscaling.yml - content: | - # Configuration for sql_exporter for autoscaling-agent - # Global defaults. - global: - # If scrape_timeout <= 0, no timeout is set unless Prometheus provides one. The default is 10s. - scrape_timeout: 10s - # Subtracted from Prometheus' scrape_timeout to give us some headroom and prevent Prometheus from timing out first. - scrape_timeout_offset: 500ms - # Minimum interval between collector runs: by default (0s) collectors are executed on every scrape. - min_interval: 0s - # Maximum number of open connections to any one target. Metric queries will run concurrently on multiple connections, - # as will concurrent scrapes. - max_connections: 1 - # Maximum number of idle connections to any one target. Unless you use very long collection intervals, this should - # always be the same as max_connections. - max_idle_connections: 1 - # Maximum number of maximum amount of time a connection may be reused. Expired connections may be closed lazily before reuse. - # If 0, connections are not closed due to a connection's age. - max_connection_lifetime: 5m - - # The target to monitor and the collectors to execute on it. - target: - # Data source name always has a URI schema that matches the driver name. In some cases (e.g. MySQL) - # the schema gets dropped or replaced to match the driver expected DSN format. - data_source_name: 'postgresql://cloud_admin@127.0.0.1:5432/postgres?sslmode=disable&application_name=sql_exporter_autoscaling' - - # Collectors (referenced by name) to execute on the target. - # Glob patterns are supported (see for syntax). - collectors: [neon_collector_autoscaling] - - # Collector files specifies a list of globs. One collector definition is read from each matching file. - # Glob patterns are supported (see for syntax). - collector_files: - - "neon_collector_autoscaling.yml" - - filename: neon_collector.yml - content: | - collector_name: neon_collector - metrics: - - metric_name: lfc_misses - type: gauge - help: 'lfc_misses' - key_labels: - values: [lfc_misses] - query: | - select lfc_value as lfc_misses from neon.neon_lfc_stats where lfc_key='file_cache_misses'; - - - metric_name: lfc_used - type: gauge - help: 'LFC chunks used (chunk = 1MB)' - key_labels: - values: [lfc_used] - query: | - select lfc_value as lfc_used from neon.neon_lfc_stats where lfc_key='file_cache_used'; - - - metric_name: lfc_hits - type: gauge - help: 'lfc_hits' - key_labels: - values: [lfc_hits] - query: | - select lfc_value as lfc_hits from neon.neon_lfc_stats where lfc_key='file_cache_hits'; - - - metric_name: lfc_writes - type: gauge - help: 'lfc_writes' - key_labels: - values: [lfc_writes] - query: | - select lfc_value as lfc_writes from neon.neon_lfc_stats where lfc_key='file_cache_writes'; - - - metric_name: lfc_cache_size_limit - type: gauge - help: 'LFC cache size limit in bytes' - key_labels: - values: [lfc_cache_size_limit] - query: | - select pg_size_bytes(current_setting('neon.file_cache_size_limit')) as lfc_cache_size_limit; - - - metric_name: connection_counts - type: gauge - help: 'Connection counts' - key_labels: - - datname - - state - values: [count] - query: | - select datname, state, count(*) as count from pg_stat_activity where state <> '' group by datname, state; - - - metric_name: pg_stats_userdb - type: gauge - help: 'Stats for several oldest non-system dbs' - key_labels: - - datname - value_label: kind - values: - - db_size - - deadlocks - # Rows - - inserted - - updated - - deleted - # We export stats for 10 non-system database. Without this limit - # it is too easy to abuse the system by creating lots of databases. - query: | - select pg_database_size(datname) as db_size, deadlocks, - tup_inserted as inserted, tup_updated as updated, tup_deleted as deleted, - datname - from pg_stat_database - where datname IN ( - select datname - from pg_database - where datname <> 'postgres' and not datistemplate - order by oid - limit 10 - ); - - - metric_name: max_cluster_size - type: gauge - help: 'neon.max_cluster_size setting' - key_labels: - values: [max_cluster_size] - query: | - select setting::int as max_cluster_size from pg_settings where name = 'neon.max_cluster_size'; - - - metric_name: db_total_size - type: gauge - help: 'Size of all databases' - key_labels: - values: [total] - query: | - select sum(pg_database_size(datname)) as total from pg_database; - - # DEPRECATED - - metric_name: lfc_approximate_working_set_size - type: gauge - help: 'Approximate working set size in pages of 8192 bytes' - key_labels: - values: [approximate_working_set_size] - query: | - select neon.approximate_working_set_size(false) as approximate_working_set_size; - - - metric_name: lfc_approximate_working_set_size_windows - type: gauge - help: 'Approximate working set size in pages of 8192 bytes' - key_labels: [duration] - values: [size] - # NOTE: This is the "public" / "human-readable" version. Here, we supply a small selection - # of durations in a pretty-printed form. - query: | - select - x as duration, - neon.approximate_working_set_size_seconds(extract('epoch' from x::interval)::int) as size - from - (values ('5m'),('15m'),('1h')) as t (x); - - - metric_name: compute_current_lsn - type: gauge - help: 'Current LSN of the database' - key_labels: - values: [lsn] - query: | - select - case - when pg_catalog.pg_is_in_recovery() - then (pg_last_wal_replay_lsn() - '0/0')::FLOAT8 - else (pg_current_wal_lsn() - '0/0')::FLOAT8 - end as lsn; - - - metric_name: compute_receive_lsn - type: gauge - help: 'Returns the last write-ahead log location that has been received and synced to disk by streaming replication' - key_labels: - values: [lsn] - query: | - SELECT - CASE - WHEN pg_catalog.pg_is_in_recovery() - THEN (pg_last_wal_receive_lsn() - '0/0')::FLOAT8 - ELSE 0 - END AS lsn; - - - metric_name: replication_delay_bytes - type: gauge - help: 'Bytes between received and replayed LSN' - key_labels: - values: [replication_delay_bytes] - # We use a GREATEST call here because this calculation can be negative. - # The calculation is not atomic, meaning after we've gotten the receive - # LSN, the replay LSN may have advanced past the receive LSN we - # are using for the calculation. - query: | - SELECT GREATEST(0, pg_wal_lsn_diff(pg_last_wal_receive_lsn(), pg_last_wal_replay_lsn())) AS replication_delay_bytes; - - - metric_name: replication_delay_seconds - type: gauge - help: 'Time since last LSN was replayed' - key_labels: - values: [replication_delay_seconds] - query: | - SELECT - CASE - WHEN pg_last_wal_receive_lsn() = pg_last_wal_replay_lsn() THEN 0 - ELSE GREATEST (0, EXTRACT (EPOCH FROM now() - pg_last_xact_replay_timestamp())) - END AS replication_delay_seconds; - - - metric_name: checkpoints_req - type: gauge - help: 'Number of requested checkpoints' - key_labels: - values: [checkpoints_req] - query: | - SELECT checkpoints_req FROM pg_stat_bgwriter; - - - metric_name: checkpoints_timed - type: gauge - help: 'Number of scheduled checkpoints' - key_labels: - values: [checkpoints_timed] - query: | - SELECT checkpoints_timed FROM pg_stat_bgwriter; - - - metric_name: compute_logical_snapshot_files - type: gauge - help: 'Number of snapshot files in pg_logical/snapshot' - key_labels: - - timeline_id - values: [num_logical_snapshot_files] - query: | - SELECT - (SELECT setting FROM pg_settings WHERE name = 'neon.timeline_id') AS timeline_id, - -- Postgres creates temporary snapshot files of the form %X-%X.snap.%d.tmp. These - -- temporary snapshot files are renamed to the actual snapshot files after they are - -- completely built. We only WAL-log the completely built snapshot files. - (SELECT COUNT(*) FROM pg_ls_logicalsnapdir() WHERE name LIKE '%.snap') AS num_logical_snapshot_files; - - # In all the below metrics, we cast LSNs to floats because Prometheus only supports floats. - # It's probably fine because float64 can store integers from -2^53 to +2^53 exactly. - - # Number of slots is limited by max_replication_slots, so collecting position for all of them shouldn't be bad. - - metric_name: logical_slot_restart_lsn - type: gauge - help: 'restart_lsn of logical slots' - key_labels: - - slot_name - values: [restart_lsn] - query: | - select slot_name, (restart_lsn - '0/0')::FLOAT8 as restart_lsn - from pg_replication_slots - where slot_type = 'logical'; - - - metric_name: compute_subscriptions_count - type: gauge - help: 'Number of logical replication subscriptions grouped by enabled/disabled' - key_labels: - - enabled - values: [subscriptions_count] - query: | - select subenabled::text as enabled, count(*) as subscriptions_count - from pg_subscription - group by subenabled; - - - metric_name: retained_wal - type: gauge - help: 'Retained WAL in inactive replication slots' - key_labels: - - slot_name - values: [retained_wal] - query: | - SELECT slot_name, pg_wal_lsn_diff(pg_current_wal_lsn(), restart_lsn)::FLOAT8 AS retained_wal - FROM pg_replication_slots - WHERE active = false; - - - metric_name: wal_is_lost - type: gauge - help: 'Whether or not the replication slot wal_status is lost' - key_labels: - - slot_name - values: [wal_is_lost] - query: | - SELECT slot_name, - CASE - WHEN wal_status = 'lost' THEN 1 - ELSE 0 - END AS wal_is_lost - FROM pg_replication_slots; - - - filename: neon_collector_autoscaling.yml - content: | - collector_name: neon_collector_autoscaling - metrics: - - metric_name: lfc_misses - type: gauge - help: 'lfc_misses' - key_labels: - values: [lfc_misses] - query: | - select lfc_value as lfc_misses from neon.neon_lfc_stats where lfc_key='file_cache_misses'; - - - metric_name: lfc_used - type: gauge - help: 'LFC chunks used (chunk = 1MB)' - key_labels: - values: [lfc_used] - query: | - select lfc_value as lfc_used from neon.neon_lfc_stats where lfc_key='file_cache_used'; - - - metric_name: lfc_hits - type: gauge - help: 'lfc_hits' - key_labels: - values: [lfc_hits] - query: | - select lfc_value as lfc_hits from neon.neon_lfc_stats where lfc_key='file_cache_hits'; - - - metric_name: lfc_writes - type: gauge - help: 'lfc_writes' - key_labels: - values: [lfc_writes] - query: | - select lfc_value as lfc_writes from neon.neon_lfc_stats where lfc_key='file_cache_writes'; - - - metric_name: lfc_cache_size_limit - type: gauge - help: 'LFC cache size limit in bytes' - key_labels: - values: [lfc_cache_size_limit] - query: | - select pg_size_bytes(current_setting('neon.file_cache_size_limit')) as lfc_cache_size_limit; - - - metric_name: lfc_approximate_working_set_size_windows - type: gauge - help: 'Approximate working set size in pages of 8192 bytes' - key_labels: [duration_seconds] - values: [size] - # NOTE: This is the "internal" / "machine-readable" version. This outputs the working set - # size looking back 1..60 minutes, labeled with the number of minutes. - query: | - select - x::text as duration_seconds, - neon.approximate_working_set_size_seconds(x) as size - from - (select generate_series * 60 as x from generate_series(1, 60)) as t (x); build: | # Build cgroup-tools # @@ -480,32 +82,6 @@ build: | && CFLAGS="-O3" ./configure --prefix="$INSTALL_DIR" --sysconfdir=/etc --localstatedir=/var --enable-opaque-hierarchy="name=systemd" \ # actually build the thing... && make install - - FROM quay.io/prometheuscommunity/postgres-exporter:v0.12.1 AS postgres-exporter - - FROM burningalchemist/sql_exporter:0.13 AS sql-exporter - - # Build pgbouncer - # - FROM debian:bullseye-slim AS pgbouncer - RUN set -e \ - && apt-get update \ - && apt-get install -y \ - build-essential \ - git \ - libevent-dev \ - libtool \ - pkg-config - - # Use `dist_man_MANS=` to skip manpage generation (which requires python3/pandoc) - ENV PGBOUNCER_TAG=pgbouncer_1_22_1 - RUN set -e \ - && git clone --recurse-submodules --depth 1 --branch ${PGBOUNCER_TAG} https://github.com/pgbouncer/pgbouncer.git pgbouncer \ - && cd pgbouncer \ - && ./autogen.sh \ - && LDFLAGS=-static ./configure --prefix=/usr/local/pgbouncer --without-openssl \ - && make -j $(nproc) dist_man_MANS= \ - && make install dist_man_MANS= merge: | # tweak nofile limits RUN set -e \ @@ -527,24 +103,10 @@ merge: | COPY compute_ctl-resize-swap /etc/sudoers.d/compute_ctl-resize-swap COPY cgconfig.conf /etc/cgconfig.conf - COPY pgbouncer.ini /etc/pgbouncer.ini - COPY sql_exporter.yml /etc/sql_exporter.yml - COPY neon_collector.yml /etc/neon_collector.yml - COPY sql_exporter_autoscaling.yml /etc/sql_exporter_autoscaling.yml - COPY neon_collector_autoscaling.yml /etc/neon_collector_autoscaling.yml RUN set -e \ - && chown postgres:postgres /etc/pgbouncer.ini \ - && chmod 0666 /etc/pgbouncer.ini \ - && chmod 0644 /etc/cgconfig.conf \ - && chmod 0644 /etc/sql_exporter.yml \ - && chmod 0644 /etc/neon_collector.yml \ - && chmod 0644 /etc/sql_exporter_autoscaling.yml \ - && chmod 0644 /etc/neon_collector_autoscaling.yml + && chmod 0644 /etc/cgconfig.conf COPY --from=libcgroup-builder /libcgroup-install/bin/* /usr/bin/ COPY --from=libcgroup-builder /libcgroup-install/lib/* /usr/lib/ COPY --from=libcgroup-builder /libcgroup-install/sbin/* /usr/sbin/ - COPY --from=postgres-exporter /bin/postgres_exporter /bin/postgres_exporter - COPY --from=sql-exporter /bin/sql_exporter /bin/sql_exporter - COPY --from=pgbouncer /usr/local/pgbouncer/bin/pgbouncer /usr/local/bin/pgbouncer