From d6753e9ee4925166cced51cd1b1426fbee75ccad Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Mon, 22 Jul 2024 11:28:08 -0700 Subject: [PATCH] vm-image: Expose new LFC working set size metrics (#8298) In general, replace: * 'lfc_approximate_working_set_size' with * 'lfc_approximate_working_set_size_windows' For the "main" metrics that are actually scraped and used internally, the old one is just marked as deprecated. For the "autoscaling" metrics, we're not currently using the old one, so we can get away with just replacing it. Also, for the user-visible metrics we'll only store & expose a few different time windows, to avoid making the UI overly busy or bloating our internal metrics storage. But for the autoscaling-related scraper, we aren't storing the metrics, and it's useful to be able to programmatically operate on the trendline of how WSS increases (or doesn't!) with window size. So there, we can just output datapoints for each minute. Part of neondatabase/autoscaling#872 See also https://www.notion.so/neondatabase/cca38138fadd45eaa753d81b859490c6 --- vm-image-spec.yaml | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/vm-image-spec.yaml b/vm-image-spec.yaml index 3c446ecdea..224e9847f3 100644 --- a/vm-image-spec.yaml +++ b/vm-image-spec.yaml @@ -236,6 +236,7 @@ files: query: | select sum(pg_database_size(datname)) as total from pg_database; + # DEPRECATED - metric_name: lfc_approximate_working_set_size type: gauge help: 'Approximate working set size in pages of 8192 bytes' @@ -244,6 +245,20 @@ files: query: | select neon.approximate_working_set_size(false) as approximate_working_set_size; + - metric_name: lfc_approximate_working_set_size_windows + type: gauge + help: 'Approximate working set size in pages of 8192 bytes' + key_labels: [duration] + values: [size] + # NOTE: This is the "public" / "human-readable" version. Here, we supply a small selection + # of durations in a pretty-printed form. + query: | + select + x as duration, + neon.approximate_working_set_size_seconds(extract('epoch' from x::interval)::int) as size + from + (values ('5m'),('15m'),('1h')) as t (x); + - metric_name: current_lsn type: gauge help: 'Current LSN of the database' @@ -377,13 +392,19 @@ files: query: | select pg_size_bytes(current_setting('neon.file_cache_size_limit')) as lfc_cache_size_limit; - - metric_name: lfc_approximate_working_set_size + - metric_name: lfc_approximate_working_set_size_windows type: gauge help: 'Approximate working set size in pages of 8192 bytes' - key_labels: - values: [approximate_working_set_size] + key_labels: [duration_seconds] + values: [size] + # NOTE: This is the "internal" / "machine-readable" version. This outputs the working set + # size looking back 1..60 minutes, labeled with the number of minutes. query: | - select neon.approximate_working_set_size(false) as approximate_working_set_size; + select + x::text as duration_seconds, + neon.approximate_working_set_size_seconds(x) as size + from + (select generate_series * 60 as x from generate_series(1, 60)); build: | # Build cgroup-tools #