Merge remote-tracking branch 'origin/local-proxy-lazy-ext-install' into test-local-proxy-jwt-ext-install

Merge branch 'main' into local-proxy-lazy-ext-install
Merge remote-tracking branch 'origin/grants-endpoint' into test-local-proxy-jwt-ext-install
2026-02-28 23:10:37 +00:00 · 2024-10-17 12:39:22 +01:00 · 2024-10-17 12:38:51 +01:00 · 2024-10-17 11:52:56 +01:00 · 2024-10-17 11:50:12 +01:00 · 2024-10-17 13:38:24 +03:00
186 changed files with 3144 additions and 2166 deletions
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -120,6 +120,24 @@ jobs:
      - name: Run mypy to check types
        run: poetry run mypy .

+  check-codestyle-jsonnet:
+    needs: [ check-permissions, build-build-tools-image ]
+    runs-on: [ self-hosted, small ]
+    container:
+      image: ${{ needs.build-build-tools-image.outputs.image }}
+      credentials:
+        username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
+        password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
+      options: --init
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Check Jsonnet code formatting
+        run: |
+          make -C compute jsonnetfmt-test
+
  # Check that the vendor/postgres-* submodules point to the
  # corresponding REL_*_STABLE_neon branches.
  check-submodules:
@@ -1082,7 +1100,6 @@ jobs:
        run: |
          if [[ "$GITHUB_REF_NAME" == "main" ]]; then
            gh workflow --repo neondatabase/infra run deploy-dev.yml --ref main -f branch=main -f dockerTag=${{needs.tag.outputs.build-tag}} -f deployPreprodRegion=false
-            gh workflow --repo neondatabase/azure run deploy.yml -f dockerTag=${{needs.tag.outputs.build-tag}}
          elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
            gh workflow --repo neondatabase/infra run deploy-dev.yml --ref main \
              -f deployPgSniRouter=false \
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4648,9 +4648,10 @@ dependencies = [
 "camino-tempfile",
 "futures",
 "futures-util",
+ "http-body-util",
 "http-types",
 "humantime-serde",
- "hyper 0.14.30",
+ "hyper 1.4.1",
 "itertools 0.10.5",
 "metrics",
 "once_cell",
--- a/Dockerfile.build-tools
+++ b/Dockerfile.build-tools
@@ -27,6 +27,7 @@ RUN set -e \
        gnupg \
        gzip \
        jq \
+        jsonnet \
        libcurl4-openssl-dev \
        libbz2-dev \
        libffi-dev \
--- a/1
+++ b/1
@@ -291,6 +291,7 @@ postgres-check: \
 # This doesn't remove the effects of 'configure'.
 .PHONY: clean
 clean: postgres-clean neon-pg-clean-ext
+	$(MAKE) -C compute clean
 	$(CARGO_CMD_PREFIX) cargo clean

 # This removes everything
--- a/README.md
+++ b/README.md
@@ -31,7 +31,7 @@ See developer documentation in [SUMMARY.md](/docs/SUMMARY.md) for more informati
 ```bash
 apt install build-essential libtool libreadline-dev zlib1g-dev flex bison libseccomp-dev \
 libssl-dev clang pkg-config libpq-dev cmake postgresql-client protobuf-compiler \
-libcurl4-openssl-dev openssl python3-poetry lsof libicu-dev
+libprotobuf-dev libcurl4-openssl-dev openssl python3-poetry lsof libicu-dev
 ```
 * On Fedora, these packages are needed:
 ```bash
--- a/compute/.gitignore
+++ b/compute/.gitignore
@@ -0,0 +1,5 @@
+# sql_exporter config files generated from Jsonnet
+etc/neon_collector.yml
+etc/neon_collector_autoscaling.yml
+etc/sql_exporter.yml
+etc/sql_exporter_autoscaling.yml
--- a/compute/Dockerfile.compute-node
+++ b/compute/Dockerfile.compute-node
@@ -18,13 +18,14 @@ RUN case $DEBIAN_VERSION in \
      # Version-specific installs for Bullseye (PG14-PG16):
      # The h3_pg extension needs a cmake 3.20+, but Debian bullseye has 3.18.
      # Install newer version (3.25) from backports.
+      # libstdc++-10-dev is required for plv8
      bullseye) \
        echo "deb http://deb.debian.org/debian bullseye-backports main" > /etc/apt/sources.list.d/bullseye-backports.list; \
-        VERSION_INSTALLS="cmake/bullseye-backports cmake-data/bullseye-backports"; \
+        VERSION_INSTALLS="cmake/bullseye-backports cmake-data/bullseye-backports libstdc++-10-dev"; \
      ;; \
      # Version-specific installs for Bookworm (PG17):
      bookworm) \
-        VERSION_INSTALLS="cmake"; \
+        VERSION_INSTALLS="cmake libstdc++-12-dev"; \
      ;; \
      *) \
        echo "Unknown Debian version ${DEBIAN_VERSION}" && exit 1 \
@@ -227,18 +228,33 @@ FROM build-deps AS plv8-build
 ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    apt update && \
+RUN apt update && \
    apt install --no-install-recommends -y ninja-build python3-dev libncurses5 binutils clang

-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
+# plv8 3.2.3 supports v17
+# last release v3.2.3 - Sep 7, 2024
+#
+# clone the repo instead of downloading the release tarball because plv8 has submodule dependencies
+# and the release tarball doesn't include them
+#
+# Use new version only for v17
+# because since v3.2, plv8 doesn't include plcoffee and plls extensions
+ENV PLV8_TAG=v3.2.3
+
+RUN case "${PG_VERSION}" in \
+    "v17") \
+        export PLV8_TAG=v3.2.3 \
+    ;; \
+    "v14" | "v15" | "v16") \
+        export PLV8_TAG=v3.1.10 \
+    ;; \
+    *) \
+        echo "unexpected PostgreSQL version" && exit 1 \
+    ;; \
    esac && \
-    wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.10.tar.gz -O plv8.tar.gz && \
-    echo "7096c3290928561f0d4901b7a52794295dc47f6303102fae3f8e42dd575ad97d plv8.tar.gz" | sha256sum --check && \
-    mkdir plv8-src && cd plv8-src && tar xzf ../plv8.tar.gz --strip-components=1 -C . && \
+    git clone --recurse-submodules --depth 1 --branch ${PLV8_TAG} https://github.com/plv8/plv8.git plv8-src && \
+    tar -czf plv8.tar.gz --exclude .git plv8-src && \
+    cd plv8-src && \
    # generate and copy upgrade scripts
    mkdir -p upgrade && ./generate_upgrade.sh 3.1.10 && \
    cp upgrade/* /usr/local/pgsql/share/extension/ && \
@@ -248,8 +264,17 @@ RUN case "${PG_VERSION}" in "v17") \
    find /usr/local/pgsql/ -name "plv8-*.so" | xargs strip && \
    # don't break computes with installed old version of plv8
    cd /usr/local/pgsql/lib/ && \
-    ln -s plv8-3.1.10.so plv8-3.1.5.so && \
-    ln -s plv8-3.1.10.so plv8-3.1.8.so && \
+    case "${PG_VERSION}" in \
+    "v17") \
+        ln -s plv8-3.2.3.so plv8-3.1.8.so && \
+        ln -s plv8-3.2.3.so plv8-3.1.5.so && \
+        ln -s plv8-3.2.3.so plv8-3.1.10.so \
+    ;; \
+    "v14" | "v15" | "v16") \
+        ln -s plv8-3.1.10.so plv8-3.1.5.so && \
+        ln -s plv8-3.1.10.so plv8-3.1.8.so \
+    ;; \
+    esac && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/plv8.control && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/plcoffee.control && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/plls.control
@@ -327,6 +352,9 @@ COPY compute/patches/pgvector.patch /pgvector.patch
 # By default, pgvector Makefile uses `-march=native`. We don't want that,
 # because we build the images on different machines than where we run them.
 # Pass OPTFLAGS="" to remove it.
+#
+# v17 is not supported yet because of upstream issue
+# https://github.com/pgvector/pgvector/issues/669
 RUN case "${PG_VERSION}" in "v17") \
    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
    esac && \
@@ -349,7 +377,7 @@ ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

 # not version-specific
-# doesn't use releases, last commit f3d82fd - Mar 2, 2023 
+# doesn't use releases, last commit f3d82fd - Mar 2, 2023
 RUN wget https://github.com/michelp/pgjwt/archive/f3d82fd30151e754e19ce5d6a06c71c20689ce3d.tar.gz -O pgjwt.tar.gz && \
    echo "dae8ed99eebb7593b43013f6532d772b12dfecd55548d2673f2dfd0163f6d2b9 pgjwt.tar.gz" | sha256sum --check && \
    mkdir pgjwt-src && cd pgjwt-src && tar xzf ../pgjwt.tar.gz --strip-components=1 -C . && \
@@ -366,11 +394,10 @@ FROM build-deps AS hypopg-pg-build
 ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    wget https://github.com/HypoPG/hypopg/archive/refs/tags/1.4.0.tar.gz -O hypopg.tar.gz && \
-    echo "0821011743083226fc9b813c1f2ef5897a91901b57b6bea85a78e466187c6819 hypopg.tar.gz" | sha256sum --check && \
+# HypoPG 1.4.1 supports v17
+# last release 1.4.1 - Apr 28, 2024
+RUN wget https://github.com/HypoPG/hypopg/archive/refs/tags/1.4.1.tar.gz -O hypopg.tar.gz && \
+    echo "9afe6357fd389d8d33fad81703038ce520b09275ec00153c6c89282bcdedd6bc hypopg.tar.gz" | sha256sum --check && \
    mkdir hypopg-src && cd hypopg-src && tar xzf ../hypopg.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
@@ -407,6 +434,9 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

 COPY compute/patches/rum.patch /rum.patch

+# maybe version-specific
+# support for v17 is unknown
+# last release 1.3.13 - Sep 19, 2022
 RUN case "${PG_VERSION}" in "v17") \
    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
    esac && \
@@ -428,11 +458,10 @@ FROM build-deps AS pgtap-pg-build
 ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    wget https://github.com/theory/pgtap/archive/refs/tags/v1.2.0.tar.gz -O pgtap.tar.gz && \
-    echo "9c7c3de67ea41638e14f06da5da57bac6f5bd03fea05c165a0ec862205a5c052 pgtap.tar.gz" | sha256sum --check && \
+# pgtap 1.3.3 supports v17
+# last release v1.3.3 - Apr 8, 2024
+RUN wget https://github.com/theory/pgtap/archive/refs/tags/v1.3.3.tar.gz -O pgtap.tar.gz && \
+    echo "325ea79d0d2515bce96bce43f6823dcd3effbd6c54cb2a4d6c2384fffa3a14c7 pgtap.tar.gz" | sha256sum --check && \
    mkdir pgtap-src && cd pgtap-src && tar xzf ../pgtap.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
@@ -505,11 +534,10 @@ FROM build-deps AS plpgsql-check-pg-build
 ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    wget https://github.com/okbob/plpgsql_check/archive/refs/tags/v2.5.3.tar.gz -O plpgsql_check.tar.gz && \
-    echo "6631ec3e7fb3769eaaf56e3dfedb829aa761abf163d13dba354b4c218508e1c0 plpgsql_check.tar.gz" | sha256sum --check && \
+# plpgsql_check v2.7.11 supports v17
+# last release v2.7.11 - Sep 16, 2024
+RUN wget https://github.com/okbob/plpgsql_check/archive/refs/tags/v2.7.11.tar.gz -O plpgsql_check.tar.gz && \
+    echo "208933f8dbe8e0d2628eb3851e9f52e6892b8e280c63700c0f1ce7883625d172 plpgsql_check.tar.gz" | sha256sum --check && \
    mkdir plpgsql_check-src && cd plpgsql_check-src && tar xzf ../plpgsql_check.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
@@ -527,18 +555,19 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
 ARG PG_VERSION
 ENV PATH="/usr/local/pgsql/bin:$PATH"

-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    case "${PG_VERSION}" in \
+RUN case "${PG_VERSION}" in \
      "v14" | "v15") \
        export TIMESCALEDB_VERSION=2.10.1 \
        export TIMESCALEDB_CHECKSUM=6fca72a6ed0f6d32d2b3523951ede73dc5f9b0077b38450a029a5f411fdb8c73 \
        ;; \
-      *) \
+      "v16") \
        export TIMESCALEDB_VERSION=2.13.0 \
        export TIMESCALEDB_CHECKSUM=584a351c7775f0e067eaa0e7277ea88cab9077cc4c455cbbf09a5d9723dce95d \
        ;; \
+      "v17") \
+        export TIMESCALEDB_VERSION=2.17.0 \
+        export TIMESCALEDB_CHECKSUM=155bf64391d3558c42f31ca0e523cfc6252921974f75298c9039ccad1c89811a \
+        ;; \
    esac && \
    wget https://github.com/timescale/timescaledb/archive/refs/tags/${TIMESCALEDB_VERSION}.tar.gz -O timescaledb.tar.gz && \
    echo "${TIMESCALEDB_CHECKSUM} timescaledb.tar.gz" | sha256sum --check && \
@@ -561,10 +590,8 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
 ARG PG_VERSION
 ENV PATH="/usr/local/pgsql/bin:$PATH"

-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    case "${PG_VERSION}" in \
+# version-specific, has separate releases for each version
+RUN case "${PG_VERSION}" in \
      "v14") \
        export PG_HINT_PLAN_VERSION=14_1_4_1 \
        export PG_HINT_PLAN_CHECKSUM=c3501becf70ead27f70626bce80ea401ceac6a77e2083ee5f3ff1f1444ec1ad1 \
@@ -578,7 +605,8 @@ RUN case "${PG_VERSION}" in "v17") \
        export PG_HINT_PLAN_CHECKSUM=fc85a9212e7d2819d4ae4ac75817481101833c3cfa9f0fe1f980984e12347d00 \
        ;; \
      "v17") \
-        echo "TODO: PG17 pg_hint_plan support" && exit 0 \
+        export PG_HINT_PLAN_VERSION=17_1_7_0 \
+        export PG_HINT_PLAN_CHECKSUM=06dd306328c67a4248f48403c50444f30959fb61ebe963248dbc2afb396fe600 \
        ;; \
      *) \
        echo "Export the valid PG_HINT_PLAN_VERSION variable" && exit 1 \
@@ -602,6 +630,10 @@ FROM build-deps AS pg-cron-pg-build
 ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

+# 1.6.4 available, supports v17
+# This is an experimental extension that we do not support on prod yet.
+# !Do not remove!
+# We set it in shared_preload_libraries and computes will fail to start if library is not found.
 ENV PATH="/usr/local/pgsql/bin/:$PATH"
 RUN case "${PG_VERSION}" in "v17") \
    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
@@ -623,23 +655,37 @@ FROM build-deps AS rdkit-pg-build
 ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    apt-get update && \
+RUN apt-get update && \
    apt-get install --no-install-recommends -y \
        libboost-iostreams1.74-dev \
        libboost-regex1.74-dev \
        libboost-serialization1.74-dev \
        libboost-system1.74-dev \
-        libeigen3-dev
+        libeigen3-dev \
+        libboost-all-dev

+# rdkit Release_2024_09_1 supports v17
+# last release Release_2024_09_1 - Sep 27, 2024
+#
+# Use new version only for v17
+# because Release_2024_09_1 has some backward incompatible changes
+# https://github.com/rdkit/rdkit/releases/tag/Release_2024_09_1 
 ENV PATH="/usr/local/pgsql/bin/:/usr/local/pgsql/:$PATH"
-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
+RUN case "${PG_VERSION}" in \
+    "v17") \
+        export RDKIT_VERSION=Release_2024_09_1 \
+        export RDKIT_CHECKSUM=034c00d6e9de323506834da03400761ed8c3721095114369d06805409747a60f \
+    ;; \
+    "v14" | "v15" | "v16") \
+        export RDKIT_VERSION=Release_2023_03_3 \
+        export RDKIT_CHECKSUM=bdbf9a2e6988526bfeb8c56ce3cdfe2998d60ac289078e2215374288185e8c8d \
+    ;; \
+    *) \
+        echo "unexpected PostgreSQL version" && exit 1 \
+    ;; \
    esac && \
-    wget https://github.com/rdkit/rdkit/archive/refs/tags/Release_2023_03_3.tar.gz -O rdkit.tar.gz && \
-    echo "bdbf9a2e6988526bfeb8c56ce3cdfe2998d60ac289078e2215374288185e8c8d rdkit.tar.gz" | sha256sum --check && \
+    wget https://github.com/rdkit/rdkit/archive/refs/tags/${RDKIT_VERSION}.tar.gz -O rdkit.tar.gz && \
+    echo "${RDKIT_CHECKSUM} rdkit.tar.gz" | sha256sum --check && \
    mkdir rdkit-src && cd rdkit-src && tar xzf ../rdkit.tar.gz --strip-components=1 -C . && \
    cmake \
        -D RDK_BUILD_CAIRO_SUPPORT=OFF \
@@ -678,12 +724,11 @@ FROM build-deps AS pg-uuidv7-pg-build
 ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

+# not version-specific
+# last release v1.6.0 - Oct 9, 2024
 ENV PATH="/usr/local/pgsql/bin/:$PATH"
-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    wget https://github.com/fboulnois/pg_uuidv7/archive/refs/tags/v1.0.1.tar.gz -O pg_uuidv7.tar.gz && \
-    echo "0d0759ab01b7fb23851ecffb0bce27822e1868a4a5819bfd276101c716637a7a pg_uuidv7.tar.gz" | sha256sum --check && \
+RUN wget https://github.com/fboulnois/pg_uuidv7/archive/refs/tags/v1.6.0.tar.gz -O pg_uuidv7.tar.gz && \
+    echo "0fa6c710929d003f6ce276a7de7a864e9d1667b2d78be3dc2c07f2409eb55867 pg_uuidv7.tar.gz" | sha256sum --check && \
    mkdir pg_uuidv7-src && cd pg_uuidv7-src && tar xzf ../pg_uuidv7.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
    make -j $(getconf _NPROCESSORS_ONLN) install && \
@@ -754,6 +799,8 @@ RUN case "${PG_VERSION}" in \
 FROM build-deps AS pg-embedding-pg-build
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

+# This is our extension, support stopped in favor of pgvector
+# TODO: deprecate it
 ARG PG_VERSION
 ENV PATH="/usr/local/pgsql/bin/:$PATH"
 RUN case "${PG_VERSION}" in \
@@ -780,6 +827,8 @@ FROM build-deps AS pg-anon-pg-build
 ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

+# This is an experimental extension, never got to real production.
+# !Do not remove! It can be present in shared_preload_libraries and compute will fail to start if library is not found.
 ENV PATH="/usr/local/pgsql/bin/:$PATH"
 RUN case "${PG_VERSION}" in "v17") \
    echo "postgresql_anonymizer does not yet support PG17" && exit 0;; \
@@ -929,8 +978,8 @@ ARG PG_VERSION
 RUN case "${PG_VERSION}" in "v17") \
    echo "pg_session_jwt does not yet have a release that supports pg17" && exit 0;; \
    esac && \
-    wget https://github.com/neondatabase/pg_session_jwt/archive/5aee2625af38213650e1a07ae038fdc427250ee4.tar.gz -O pg_session_jwt.tar.gz && \
-    echo "5d91b10bc1347d36cffc456cb87bec25047935d6503dc652ca046f04760828e7 pg_session_jwt.tar.gz" | sha256sum --check && \
+    wget https://github.com/neondatabase/pg_session_jwt/archive/1c79c014c4c225c8684dc24a88369e79b4dbe762.tar.gz -O pg_session_jwt.tar.gz && \
+    echo "bc04b25626a88580b6fed1b87f45ba0a7ca66dbac003a3ec378a1a21b1456d8b pg_session_jwt.tar.gz" | sha256sum --check && \
    mkdir pg_session_jwt-src && cd pg_session_jwt-src && tar xzf ../pg_session_jwt.tar.gz --strip-components=1 -C . && \
    sed -i 's/pgrx = "=0.11.3"/pgrx = { version = "=0.11.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
    cargo pgrx install --release
@@ -946,13 +995,12 @@ FROM build-deps AS wal2json-pg-build
 ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

+# wal2json wal2json_2_6 supports v17
+# last release wal2json_2_6 - Apr 25, 2024
 ENV PATH="/usr/local/pgsql/bin/:$PATH"
-RUN case "${PG_VERSION}" in "v17") \
-    echo "We'll need to update wal2json to 2.6+ for pg17 support" && exit 0;; \
-    esac && \
-    wget https://github.com/eulerto/wal2json/archive/refs/tags/wal2json_2_5.tar.gz && \
-    echo "b516653575541cf221b99cf3f8be9b6821f6dbcfc125675c85f35090f824f00e wal2json_2_5.tar.gz" | sha256sum --check && \
-    mkdir wal2json-src && cd wal2json-src && tar xzf ../wal2json_2_5.tar.gz --strip-components=1 -C . && \
+RUN wget https://github.com/eulerto/wal2json/archive/refs/tags/wal2json_2_6.tar.gz -O wal2json.tar.gz && \
+    echo "18b4bdec28c74a8fc98a11c72de38378a760327ef8e5e42e975b0029eb96ba0d wal2json.tar.gz" | sha256sum --check && \
+    mkdir wal2json-src && cd wal2json-src && tar xzf ../wal2json.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
    make -j $(getconf _NPROCESSORS_ONLN) install

@@ -966,12 +1014,11 @@ FROM build-deps AS pg-ivm-build
 ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

+# pg_ivm v1.9 supports v17
+# last release v1.9 - Jul 31
 ENV PATH="/usr/local/pgsql/bin/:$PATH"
-RUN case "${PG_VERSION}" in "v17") \
-    echo "We'll need to update pg_ivm to 1.9+ for pg17 support" && exit 0;; \
-    esac && \
-    wget https://github.com/sraoss/pg_ivm/archive/refs/tags/v1.7.tar.gz -O pg_ivm.tar.gz && \
-    echo "ebfde04f99203c7be4b0e873f91104090e2e83e5429c32ac242d00f334224d5e pg_ivm.tar.gz" | sha256sum --check && \
+RUN wget https://github.com/sraoss/pg_ivm/archive/refs/tags/v1.9.tar.gz -O pg_ivm.tar.gz && \
+    echo "59e15722939f274650abf637f315dd723c87073496ca77236b044cb205270d8b pg_ivm.tar.gz" | sha256sum --check && \
    mkdir pg_ivm-src && cd pg_ivm-src && tar xzf ../pg_ivm.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
    make -j $(getconf _NPROCESSORS_ONLN) install && \
@@ -987,12 +1034,11 @@ FROM build-deps AS pg-partman-build
 ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

+# should support v17 https://github.com/pgpartman/pg_partman/discussions/693
+# last release 5.1.0  Apr 2, 2024
 ENV PATH="/usr/local/pgsql/bin/:$PATH"
-RUN case "${PG_VERSION}" in "v17") \
-    echo "pg_partman doesn't support PG17 yet" && exit 0;; \
-    esac && \
-    wget https://github.com/pgpartman/pg_partman/archive/refs/tags/v5.0.1.tar.gz -O pg_partman.tar.gz && \
-    echo "75b541733a9659a6c90dbd40fccb904a630a32880a6e3044d0c4c5f4c8a65525 pg_partman.tar.gz" | sha256sum --check && \
+RUN wget https://github.com/pgpartman/pg_partman/archive/refs/tags/v5.1.0.tar.gz -O pg_partman.tar.gz && \
+    echo "3e3a27d7ff827295d5c55ef72f07a49062d6204b3cb0b9a048645d6db9f3cb9f pg_partman.tar.gz" | sha256sum --check && \
    mkdir pg_partman-src && cd pg_partman-src && tar xzf ../pg_partman.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
    make -j $(getconf _NPROCESSORS_ONLN) install && \
@@ -1169,6 +1215,19 @@ RUN rm -r /usr/local/pgsql/include
 # if they were to be used by other libraries.
 RUN rm /usr/local/pgsql/lib/lib*.a

+#########################################################################################
+#
+# Preprocess the sql_exporter configuration files
+#
+#########################################################################################
+FROM $REPOSITORY/$IMAGE:$TAG AS sql_exporter_preprocessor
+ARG PG_VERSION
+
+USER nonroot
+
+COPY --chown=nonroot compute compute
+
+RUN make PG_VERSION="${PG_VERSION}" -C compute

 #########################################################################################
 #
@@ -1287,10 +1346,10 @@ RUN mkdir -p /etc/local_proxy && chown postgres:postgres /etc/local_proxy
 COPY --from=postgres-exporter /bin/postgres_exporter /bin/postgres_exporter
 COPY --from=sql-exporter      /bin/sql_exporter      /bin/sql_exporter

-COPY --chmod=0644 compute/etc/sql_exporter.yml               /etc/sql_exporter.yml
-COPY --chmod=0644 compute/etc/neon_collector.yml             /etc/neon_collector.yml
-COPY --chmod=0644 compute/etc/sql_exporter_autoscaling.yml   /etc/sql_exporter_autoscaling.yml
-COPY --chmod=0644 compute/etc/neon_collector_autoscaling.yml /etc/neon_collector_autoscaling.yml
+COPY --from=sql_exporter_preprocessor --chmod=0644 /home/nonroot/compute/etc/sql_exporter.yml               /etc/sql_exporter.yml
+COPY --from=sql_exporter_preprocessor --chmod=0644 /home/nonroot/compute/etc/neon_collector.yml             /etc/neon_collector.yml
+COPY --from=sql_exporter_preprocessor --chmod=0644 /home/nonroot/compute/etc/sql_exporter_autoscaling.yml   /etc/sql_exporter_autoscaling.yml
+COPY --from=sql_exporter_preprocessor --chmod=0644 /home/nonroot/compute/etc/neon_collector_autoscaling.yml /etc/neon_collector_autoscaling.yml

 # Create remote extension download directory
 RUN mkdir /usr/local/download_extensions && chown -R postgres:postgres /usr/local/download_extensions
--- a/compute/Makefile
+++ b/compute/Makefile
@@ -0,0 +1,47 @@
+jsonnet_files = $(wildcard \
+	etc/*.jsonnet \
+	etc/sql_exporter/*.libsonnet)
+
+.PHONY: all
+all: neon_collector.yml neon_collector_autoscaling.yml sql_exporter.yml sql_exporter_autoscaling.yml
+
+neon_collector.yml: $(jsonnet_files)
+	JSONNET_PATH=jsonnet:etc jsonnet \
+		--output-file etc/$@ \
+		--ext-str pg_version=$(PG_VERSION) \
+		etc/neon_collector.jsonnet
+
+neon_collector_autoscaling.yml: $(jsonnet_files)
+	JSONNET_PATH=jsonnet:etc jsonnet \
+		--output-file etc/$@ \
+		--ext-str pg_version=$(PG_VERSION) \
+		etc/neon_collector_autoscaling.jsonnet
+
+sql_exporter.yml: $(jsonnet_files)
+	JSONNET_PATH=etc jsonnet \
+		--output-file etc/$@ \
+		--tla-str collector_file=neon_collector.yml \
+		etc/sql_exporter.jsonnet
+
+sql_exporter_autoscaling.yml: $(jsonnet_files)
+	JSONNET_PATH=etc jsonnet \
+		--output-file etc/$@ \
+		--tla-str collector_file=neon_collector_autoscaling.yml \
+		--tla-str application_name=sql_exporter_autoscaling \
+		etc/sql_exporter.jsonnet
+
+.PHONY: clean
+clean:
+	rm --force \
+		etc/neon_collector.yml \
+		etc/neon_collector_autoscaling.yml \
+		etc/sql_exporter.yml \
+		etc/sql_exporter_autoscaling.yml
+
+.PHONY: jsonnetfmt-test
+jsonnetfmt-test:
+	jsonnetfmt --test $(jsonnet_files)
+
+.PHONY: jsonnetfmt-format
+jsonnetfmt-format:
+	jsonnetfmt --in-place $(jsonnet_files)
--- a/compute/etc/README.md
+++ b/compute/etc/README.md
@@ -0,0 +1,17 @@
+# Compute Configuration
+
+These files are the configuration files for various other pieces of software
+that will be running in the compute alongside Postgres.
+
+## `sql_exporter`
+
+### Adding a `sql_exporter` Metric
+
+We use `sql_exporter` to export various metrics from Postgres. In order to add
+a metric, you will need to create two files: a `libsonnet` and a `sql` file. You
+will then import the `libsonnet` file in one of the collector files, and the
+`sql` file will be imported in the `libsonnet` file.
+
+In the event your statistic is an LSN, you may want to cast it to a `float8`
+because Prometheus only supports floats. It's probably fine because `float8` can
+store integers from `-2^53` to `+2^53` exactly.
--- a/compute/etc/neon_collector.jsonnet
+++ b/compute/etc/neon_collector.jsonnet
@@ -0,0 +1,51 @@
+{
+  collector_name: 'neon_collector',
+  metrics: [
+    import 'sql_exporter/checkpoints_req.libsonnet',
+    import 'sql_exporter/checkpoints_timed.libsonnet',
+    import 'sql_exporter/compute_current_lsn.libsonnet',
+    import 'sql_exporter/compute_logical_snapshot_files.libsonnet',
+    import 'sql_exporter/compute_receive_lsn.libsonnet',
+    import 'sql_exporter/compute_subscriptions_count.libsonnet',
+    import 'sql_exporter/connection_counts.libsonnet',
+    import 'sql_exporter/db_total_size.libsonnet',
+    import 'sql_exporter/file_cache_read_wait_seconds_bucket.libsonnet',
+    import 'sql_exporter/file_cache_read_wait_seconds_count.libsonnet',
+    import 'sql_exporter/file_cache_read_wait_seconds_sum.libsonnet',
+    import 'sql_exporter/file_cache_write_wait_seconds_bucket.libsonnet',
+    import 'sql_exporter/file_cache_write_wait_seconds_count.libsonnet',
+    import 'sql_exporter/file_cache_write_wait_seconds_sum.libsonnet',
+    import 'sql_exporter/getpage_prefetch_discards_total.libsonnet',
+    import 'sql_exporter/getpage_prefetch_misses_total.libsonnet',
+    import 'sql_exporter/getpage_prefetch_requests_total.libsonnet',
+    import 'sql_exporter/getpage_prefetches_buffered.libsonnet',
+    import 'sql_exporter/getpage_sync_requests_total.libsonnet',
+    import 'sql_exporter/getpage_wait_seconds_bucket.libsonnet',
+    import 'sql_exporter/getpage_wait_seconds_count.libsonnet',
+    import 'sql_exporter/getpage_wait_seconds_sum.libsonnet',
+    import 'sql_exporter/lfc_approximate_working_set_size.libsonnet',
+    import 'sql_exporter/lfc_approximate_working_set_size_windows.libsonnet',
+    import 'sql_exporter/lfc_cache_size_limit.libsonnet',
+    import 'sql_exporter/lfc_hits.libsonnet',
+    import 'sql_exporter/lfc_misses.libsonnet',
+    import 'sql_exporter/lfc_used.libsonnet',
+    import 'sql_exporter/lfc_writes.libsonnet',
+    import 'sql_exporter/logical_slot_restart_lsn.libsonnet',
+    import 'sql_exporter/max_cluster_size.libsonnet',
+    import 'sql_exporter/pageserver_disconnects_total.libsonnet',
+    import 'sql_exporter/pageserver_requests_sent_total.libsonnet',
+    import 'sql_exporter/pageserver_send_flushes_total.libsonnet',
+    import 'sql_exporter/pageserver_open_requests.libsonnet',
+    import 'sql_exporter/pg_stats_userdb.libsonnet',
+    import 'sql_exporter/replication_delay_bytes.libsonnet',
+    import 'sql_exporter/replication_delay_seconds.libsonnet',
+    import 'sql_exporter/retained_wal.libsonnet',
+    import 'sql_exporter/wal_is_lost.libsonnet',
+  ],
+  queries: [
+    {
+      query_name: 'neon_perf_counters',
+      query: importstr 'sql_exporter/neon_perf_counters.sql',
+    },
+  ],
+}
--- a/compute/etc/neon_collector.yml
+++ b/compute/etc/neon_collector.yml
@@ -1,331 +0,0 @@
-collector_name: neon_collector
-metrics:
- metric_name: lfc_misses
-  type: gauge
-  help: 'lfc_misses'
-  key_labels:
-  values: [lfc_misses]
-  query: |
-    select lfc_value as lfc_misses from neon.neon_lfc_stats where lfc_key='file_cache_misses';
-
- metric_name: lfc_used
-  type: gauge
-  help: 'LFC chunks used (chunk = 1MB)'
-  key_labels:
-  values: [lfc_used]
-  query: |
-    select lfc_value as lfc_used from neon.neon_lfc_stats where lfc_key='file_cache_used';
-
- metric_name: lfc_hits
-  type: gauge
-  help: 'lfc_hits'
-  key_labels:
-  values: [lfc_hits]
-  query: |
-    select lfc_value as lfc_hits from neon.neon_lfc_stats where lfc_key='file_cache_hits';
-
- metric_name: lfc_writes
-  type: gauge
-  help: 'lfc_writes'
-  key_labels:
-  values: [lfc_writes]
-  query: |
-    select lfc_value as lfc_writes from neon.neon_lfc_stats where lfc_key='file_cache_writes';
-
- metric_name: lfc_cache_size_limit
-  type: gauge
-  help: 'LFC cache size limit in bytes'
-  key_labels:
-  values: [lfc_cache_size_limit]
-  query: |
-    select pg_size_bytes(current_setting('neon.file_cache_size_limit')) as lfc_cache_size_limit;
-
- metric_name: connection_counts
-  type: gauge
-  help: 'Connection counts'
-  key_labels:
-    - datname
-    - state
-  values: [count]
-  query: |
-    select datname, state, count(*) as count from pg_stat_activity where state <> '' group by datname, state;
-
- metric_name: pg_stats_userdb
-  type: gauge
-  help: 'Stats for several oldest non-system dbs'
-  key_labels:
-    - datname
-  value_label: kind
-  values:
-    - db_size
-    - deadlocks
-    # Rows
-    - inserted
-    - updated
-    - deleted
-  # We export stats for 10 non-system database. Without this limit
-  # it is too easy to abuse the system by creating lots of databases.
-  query: |
-    select pg_database_size(datname) as db_size, deadlocks,
-       tup_inserted as inserted, tup_updated as updated, tup_deleted as deleted,
-       datname
-     from pg_stat_database
-     where datname IN (
-       select datname
-       from pg_database
-       where datname <> 'postgres' and not datistemplate
-       order by oid
-       limit 10
-     );
-
- metric_name: max_cluster_size
-  type: gauge
-  help: 'neon.max_cluster_size setting'
-  key_labels:
-  values: [max_cluster_size]
-  query: |
-    select setting::int as max_cluster_size from pg_settings where name = 'neon.max_cluster_size';
-
- metric_name: db_total_size
-  type: gauge
-  help: 'Size of all databases'
-  key_labels:
-  values: [total]
-  query: |
-    select sum(pg_database_size(datname)) as total from pg_database;
-
- metric_name: getpage_wait_seconds_count
-  type: counter
-  help: 'Number of getpage requests'
-  values: [getpage_wait_seconds_count]
-  query_ref: neon_perf_counters
-
- metric_name: getpage_wait_seconds_sum
-  type: counter
-  help: 'Time spent in getpage requests'
-  values: [getpage_wait_seconds_sum]
-  query_ref: neon_perf_counters
-
- metric_name: getpage_prefetch_requests_total
-  type: counter
-  help: 'Number of getpage issued for prefetching'
-  values: [getpage_prefetch_requests_total]
-  query_ref: neon_perf_counters
-
- metric_name: getpage_sync_requests_total
-  type: counter
-  help: 'Number of synchronous getpage issued'
-  values: [getpage_sync_requests_total]
-  query_ref: neon_perf_counters
-
- metric_name: getpage_prefetch_misses_total
-  type: counter
-  help: 'Total number of readahead misses; consisting of either prefetches that don''t satisfy the LSN bounds once the prefetch got read by the backend, or cases where somehow no readahead was issued for the read'
-  values: [getpage_prefetch_misses_total]
-  query_ref: neon_perf_counters
-
- metric_name: getpage_prefetch_discards_total
-  type: counter
-  help: 'Number of prefetch responses issued but not used'
-  values: [getpage_prefetch_discards_total]
-  query_ref: neon_perf_counters
-
- metric_name: pageserver_requests_sent_total
-  type: counter
-  help: 'Number of all requests sent to the pageserver (not just GetPage requests)'
-  values: [pageserver_requests_sent_total]
-  query_ref: neon_perf_counters
-
- metric_name: pageserver_disconnects_total
-  type: counter
-  help: 'Number of times that the connection to the pageserver was lost'
-  values: [pageserver_disconnects_total]
-  query_ref: neon_perf_counters
-
- metric_name: pageserver_send_flushes_total
-  type: counter
-  help: 'Number of flushes to the pageserver connection'
-  values: [pageserver_send_flushes_total]
-  query_ref: neon_perf_counters
-
- metric_name: getpage_wait_seconds_bucket
-  type: counter
-  help: 'Histogram buckets of getpage request latency'
-  key_labels:
-      - bucket_le
-  values: [value]
-  query_ref: getpage_wait_seconds_buckets
-
-# DEPRECATED
- metric_name: lfc_approximate_working_set_size
-  type: gauge
-  help: 'Approximate working set size in pages of 8192 bytes'
-  key_labels:
-  values: [approximate_working_set_size]
-  query: |
-    select neon.approximate_working_set_size(false) as approximate_working_set_size;
-
- metric_name: lfc_approximate_working_set_size_windows
-  type: gauge
-  help: 'Approximate working set size in pages of 8192 bytes'
-  key_labels: [duration]
-  values: [size]
-  # NOTE: This is the "public" / "human-readable" version. Here, we supply a small selection
-  # of durations in a pretty-printed form.
-  query: |
-    select
-      x as duration,
-      neon.approximate_working_set_size_seconds(extract('epoch' from x::interval)::int) as size
-    from
-      (values ('5m'),('15m'),('1h')) as t (x);
-
- metric_name: compute_current_lsn
-  type: gauge
-  help: 'Current LSN of the database'
-  key_labels:
-  values: [lsn]
-  query: |
-    select
-      case
-        when pg_catalog.pg_is_in_recovery()
-        then (pg_last_wal_replay_lsn() - '0/0')::FLOAT8
-        else (pg_current_wal_lsn() - '0/0')::FLOAT8
-      end as lsn;
-
- metric_name: compute_receive_lsn
-  type: gauge
-  help: 'Returns the last write-ahead log location that has been received and synced to disk by streaming replication'
-  key_labels:
-  values: [lsn]
-  query: |
-    SELECT
-      CASE
-        WHEN pg_catalog.pg_is_in_recovery()
-        THEN (pg_last_wal_receive_lsn() - '0/0')::FLOAT8
-        ELSE 0
-      END AS lsn;
-
- metric_name: replication_delay_bytes
-  type: gauge
-  help: 'Bytes between received and replayed LSN'
-  key_labels:
-  values: [replication_delay_bytes]
-  # We use a GREATEST call here because this calculation can be negative.
-  # The calculation is not atomic, meaning after we've gotten the receive
-  # LSN, the replay LSN may have advanced past the receive LSN we
-  # are using for the calculation.
-  query: |
-    SELECT GREATEST(0, pg_wal_lsn_diff(pg_last_wal_receive_lsn(), pg_last_wal_replay_lsn())) AS replication_delay_bytes;
-
- metric_name: replication_delay_seconds
-  type: gauge
-  help: 'Time since last LSN was replayed'
-  key_labels:
-  values: [replication_delay_seconds]
-  query: |
-    SELECT
-      CASE
-        WHEN pg_last_wal_receive_lsn() = pg_last_wal_replay_lsn() THEN 0
-        ELSE GREATEST (0, EXTRACT (EPOCH FROM now() - pg_last_xact_replay_timestamp()))
-     END AS replication_delay_seconds;
-
- metric_name: checkpoints_req
-  type: gauge
-  help: 'Number of requested checkpoints'
-  key_labels:
-  values: [checkpoints_req]
-  query: |
-    SELECT checkpoints_req FROM pg_stat_bgwriter;
-
- metric_name: checkpoints_timed
-  type: gauge
-  help: 'Number of scheduled checkpoints'
-  key_labels:
-  values: [checkpoints_timed]
-  query: |
-    SELECT checkpoints_timed FROM pg_stat_bgwriter;
-
- metric_name: compute_logical_snapshot_files
-  type: gauge
-  help: 'Number of snapshot files in pg_logical/snapshot'
-  key_labels:
-    - timeline_id
-  values: [num_logical_snapshot_files]
-  query: |
-    SELECT
-      (SELECT setting FROM pg_settings WHERE name = 'neon.timeline_id') AS timeline_id,
-      -- Postgres creates temporary snapshot files of the form %X-%X.snap.%d.tmp. These
-      -- temporary snapshot files are renamed to the actual snapshot files after they are
-      -- completely built. We only WAL-log the completely built snapshot files.
-      (SELECT COUNT(*) FROM pg_ls_dir('pg_logical/snapshots') AS name WHERE name LIKE '%.snap') AS num_logical_snapshot_files;
-
-# In all the below metrics, we cast LSNs to floats because Prometheus only supports floats.
-# It's probably fine because float64 can store integers from -2^53 to +2^53 exactly.
-
-# Number of slots is limited by max_replication_slots, so collecting position for all of them shouldn't be bad.
- metric_name: logical_slot_restart_lsn
-  type: gauge
-  help: 'restart_lsn of logical slots'
-  key_labels:
-    - slot_name
-  values: [restart_lsn]
-  query: |
-    select slot_name, (restart_lsn - '0/0')::FLOAT8 as restart_lsn
-    from pg_replication_slots
-    where slot_type = 'logical';
-
- metric_name: compute_subscriptions_count
-  type: gauge
-  help: 'Number of logical replication subscriptions grouped by enabled/disabled'
-  key_labels:
-    - enabled
-  values: [subscriptions_count]
-  query: |
-    select subenabled::text as enabled, count(*) as subscriptions_count
-    from pg_subscription
-    group by subenabled;
-
- metric_name: retained_wal
-  type: gauge
-  help: 'Retained WAL in inactive replication slots'
-  key_labels:
-    - slot_name
-  values: [retained_wal]
-  query: |
-    SELECT slot_name, pg_wal_lsn_diff(pg_current_wal_lsn(), restart_lsn)::FLOAT8 AS retained_wal
-    FROM pg_replication_slots
-    WHERE active = false;
-
- metric_name: wal_is_lost
-  type: gauge
-  help: 'Whether or not the replication slot wal_status is lost'
-  key_labels:
-    - slot_name
-  values: [wal_is_lost]
-  query: |
-    SELECT slot_name,
-           CASE WHEN wal_status = 'lost' THEN 1 ELSE 0 END AS wal_is_lost
-    FROM pg_replication_slots;
-
-queries:
-  - query_name: neon_perf_counters
-    query: |
-      WITH c AS (
-        SELECT pg_catalog.jsonb_object_agg(metric, value) jb FROM neon.neon_perf_counters
-      )
-      SELECT d.*
-      FROM pg_catalog.jsonb_to_record((select jb from c)) as d(
-          getpage_wait_seconds_count numeric,
-          getpage_wait_seconds_sum numeric,
-          getpage_prefetch_requests_total numeric,
-          getpage_sync_requests_total numeric,
-          getpage_prefetch_misses_total numeric,
-          getpage_prefetch_discards_total numeric,
-          pageserver_requests_sent_total numeric,
-          pageserver_disconnects_total numeric,
-          pageserver_send_flushes_total numeric
-      );
-
-  - query_name: getpage_wait_seconds_buckets
-    query: |
-      SELECT bucket_le, value FROM neon.neon_perf_counters WHERE metric = 'getpage_wait_seconds_bucket';
--- a/compute/etc/neon_collector_autoscaling.jsonnet
+++ b/compute/etc/neon_collector_autoscaling.jsonnet
@@ -0,0 +1,11 @@
+{
+  collector_name: 'neon_collector_autoscaling',
+  metrics: [
+    import 'sql_exporter/lfc_approximate_working_set_size_windows.autoscaling.libsonnet',
+    import 'sql_exporter/lfc_cache_size_limit.libsonnet',
+    import 'sql_exporter/lfc_hits.libsonnet',
+    import 'sql_exporter/lfc_misses.libsonnet',
+    import 'sql_exporter/lfc_used.libsonnet',
+    import 'sql_exporter/lfc_writes.libsonnet',
+  ],
+}
--- a/compute/etc/neon_collector_autoscaling.yml
+++ b/compute/etc/neon_collector_autoscaling.yml
@@ -1,55 +0,0 @@
-collector_name: neon_collector_autoscaling
-metrics:
- metric_name: lfc_misses
-  type: gauge
-  help: 'lfc_misses'
-  key_labels:
-  values: [lfc_misses]
-  query: |
-    select lfc_value as lfc_misses from neon.neon_lfc_stats where lfc_key='file_cache_misses';
-
- metric_name: lfc_used
-  type: gauge
-  help: 'LFC chunks used (chunk = 1MB)'
-  key_labels:
-  values: [lfc_used]
-  query: |
-    select lfc_value as lfc_used from neon.neon_lfc_stats where lfc_key='file_cache_used';
-
- metric_name: lfc_hits
-  type: gauge
-  help: 'lfc_hits'
-  key_labels:
-  values: [lfc_hits]
-  query: |
-    select lfc_value as lfc_hits from neon.neon_lfc_stats where lfc_key='file_cache_hits';
-
- metric_name: lfc_writes
-  type: gauge
-  help: 'lfc_writes'
-  key_labels:
-  values: [lfc_writes]
-  query: |
-    select lfc_value as lfc_writes from neon.neon_lfc_stats where lfc_key='file_cache_writes';
-
- metric_name: lfc_cache_size_limit
-  type: gauge
-  help: 'LFC cache size limit in bytes'
-  key_labels:
-  values: [lfc_cache_size_limit]
-  query: |
-    select pg_size_bytes(current_setting('neon.file_cache_size_limit')) as lfc_cache_size_limit;
-
- metric_name: lfc_approximate_working_set_size_windows
-  type: gauge
-  help: 'Approximate working set size in pages of 8192 bytes'
-  key_labels: [duration_seconds]
-  values: [size]
-  # NOTE: This is the "internal" / "machine-readable" version. This outputs the working set
-  # size looking back 1..60 minutes, labeled with the number of minutes.
-  query: |
-    select
-      x::text as duration_seconds,
-      neon.approximate_working_set_size_seconds(x) as size
-    from
-      (select generate_series * 60 as x from generate_series(1, 60)) as t (x);
--- a/compute/etc/sql_exporter.jsonnet
+++ b/compute/etc/sql_exporter.jsonnet
@@ -0,0 +1,40 @@
+function(collector_file, application_name='sql_exporter') {
+  // Configuration for sql_exporter for autoscaling-agent
+  // Global defaults.
+  global: {
+    // If scrape_timeout <= 0, no timeout is set unless Prometheus provides one. The default is 10s.
+    scrape_timeout: '10s',
+    // Subtracted from Prometheus' scrape_timeout to give us some headroom and prevent Prometheus from timing out first.
+    scrape_timeout_offset: '500ms',
+    // Minimum interval between collector runs: by default (0s) collectors are executed on every scrape.
+    min_interval: '0s',
+    // Maximum number of open connections to any one target. Metric queries will run concurrently on multiple connections,
+    // as will concurrent scrapes.
+    max_connections: 1,
+    // Maximum number of idle connections to any one target. Unless you use very long collection intervals, this should
+    // always be the same as max_connections.
+    max_idle_connections: 1,
+    // Maximum number of maximum amount of time a connection may be reused. Expired connections may be closed lazily before reuse.
+    // If 0, connections are not closed due to a connection's age.
+    max_connection_lifetime: '5m',
+  },
+
+  // The target to monitor and the collectors to execute on it.
+  target: {
+    // Data source name always has a URI schema that matches the driver name. In some cases (e.g. MySQL)
+    // the schema gets dropped or replaced to match the driver expected DSN format.
+    data_source_name: std.format('postgresql://cloud_admin@127.0.0.1:5432/postgres?sslmode=disable&application_name=%s', [application_name]),
+
+    // Collectors (referenced by name) to execute on the target.
+    // Glob patterns are supported (see <https://pkg.go.dev/path/filepath#Match> for syntax).
+    collectors: [
+      'neon_collector',
+    ],
+  },
+
+  // Collector files specifies a list of globs. One collector definition is read from each matching file.
+  // Glob patterns are supported (see <https://pkg.go.dev/path/filepath#Match> for syntax).
+  collector_files: [
+    collector_file,
+  ],
+}
--- a/compute/etc/sql_exporter.yml
+++ b/compute/etc/sql_exporter.yml
@@ -1,33 +0,0 @@
-# Configuration for sql_exporter
-# Global defaults.
-global:
-  # If scrape_timeout <= 0, no timeout is set unless Prometheus provides one. The default is 10s.
-  scrape_timeout: 10s
-  # Subtracted from Prometheus' scrape_timeout to give us some headroom and prevent Prometheus from timing out first.
-  scrape_timeout_offset: 500ms
-  # Minimum interval between collector runs: by default (0s) collectors are executed on every scrape.
-  min_interval: 0s
-  # Maximum number of open connections to any one target. Metric queries will run concurrently on multiple connections,
-  # as will concurrent scrapes.
-  max_connections: 1
-  # Maximum number of idle connections to any one target. Unless you use very long collection intervals, this should
-  # always be the same as max_connections.
-  max_idle_connections: 1
-  # Maximum number of maximum amount of time a connection may be reused. Expired connections may be closed lazily before reuse.
-  # If 0, connections are not closed due to a connection's age.
-  max_connection_lifetime: 5m
-
-# The target to monitor and the collectors to execute on it.
-target:
-  # Data source name always has a URI schema that matches the driver name. In some cases (e.g. MySQL)
-  # the schema gets dropped or replaced to match the driver expected DSN format.
-  data_source_name: 'postgresql://cloud_admin@127.0.0.1:5432/postgres?sslmode=disable&application_name=sql_exporter'
-
-  # Collectors (referenced by name) to execute on the target.
-  # Glob patterns are supported (see <https://pkg.go.dev/path/filepath#Match> for syntax).
-  collectors: [neon_collector]
-
-# Collector files specifies a list of globs. One collector definition is read from each matching file.
-# Glob patterns are supported (see <https://pkg.go.dev/path/filepath#Match> for syntax).
-collector_files:
-  - "neon_collector.yml"
--- a/compute/etc/sql_exporter/checkpoints_req.17.sql
+++ b/compute/etc/sql_exporter/checkpoints_req.17.sql
@@ -0,0 +1 @@
+SELECT num_requested AS checkpoints_req FROM pg_stat_checkpointer;
--- a/compute/etc/sql_exporter/checkpoints_req.libsonnet
+++ b/compute/etc/sql_exporter/checkpoints_req.libsonnet
@@ -0,0 +1,15 @@
+local neon = import 'neon.libsonnet';
+
+local pg_stat_bgwriter = importstr 'sql_exporter/checkpoints_req.sql';
+local pg_stat_checkpointer = importstr 'sql_exporter/checkpoints_req.17.sql';
+
+{
+  metric_name: 'checkpoints_req',
+  type: 'gauge',
+  help: 'Number of requested checkpoints',
+  key_labels: null,
+  values: [
+    'checkpoints_req',
+  ],
+  query: if neon.PG_MAJORVERSION_NUM < 17 then pg_stat_bgwriter else pg_stat_checkpointer,
+}
--- a/compute/etc/sql_exporter/checkpoints_req.sql
+++ b/compute/etc/sql_exporter/checkpoints_req.sql
@@ -0,0 +1 @@
+SELECT checkpoints_req FROM pg_stat_bgwriter;
--- a/compute/etc/sql_exporter/checkpoints_timed.17.sql
+++ b/compute/etc/sql_exporter/checkpoints_timed.17.sql
@@ -0,0 +1 @@
+SELECT num_timed AS checkpoints_timed FROM pg_stat_checkpointer;
--- a/compute/etc/sql_exporter/checkpoints_timed.libsonnet
+++ b/compute/etc/sql_exporter/checkpoints_timed.libsonnet
@@ -0,0 +1,15 @@
+local neon = import 'neon.libsonnet';
+
+local pg_stat_bgwriter = importstr 'sql_exporter/checkpoints_req.sql';
+local pg_stat_checkpointer = importstr 'sql_exporter/checkpoints_req.17.sql';
+
+{
+  metric_name: 'checkpoints_timed',
+  type: 'gauge',
+  help: 'Number of scheduled checkpoints',
+  key_labels: null,
+  values: [
+    'checkpoints_timed',
+  ],
+  query: if neon.PG_MAJORVERSION_NUM < 17 then pg_stat_bgwriter else pg_stat_checkpointer,
+}
--- a/compute/etc/sql_exporter/checkpoints_timed.sql
+++ b/compute/etc/sql_exporter/checkpoints_timed.sql
@@ -0,0 +1 @@
+SELECT checkpoints_timed FROM pg_stat_bgwriter;
--- a/compute/etc/sql_exporter/compute_current_lsn.libsonnet
+++ b/compute/etc/sql_exporter/compute_current_lsn.libsonnet
@@ -0,0 +1,10 @@
+{
+  metric_name: 'compute_current_lsn',
+  type: 'gauge',
+  help: 'Current LSN of the database',
+  key_labels: null,
+  values: [
+    'lsn',
+  ],
+  query: importstr 'sql_exporter/compute_current_lsn.sql',
+}
--- a/compute/etc/sql_exporter/compute_current_lsn.sql
+++ b/compute/etc/sql_exporter/compute_current_lsn.sql
@@ -0,0 +1,4 @@
+SELECT CASE
+  WHEN pg_catalog.pg_is_in_recovery() THEN (pg_last_wal_replay_lsn() - '0/0')::FLOAT8
+  ELSE (pg_current_wal_lsn() - '0/0')::FLOAT8
+END AS lsn;
--- a/compute/etc/sql_exporter/compute_logical_snapshot_files.libsonnet
+++ b/compute/etc/sql_exporter/compute_logical_snapshot_files.libsonnet
@@ -0,0 +1,12 @@
+{
+  metric_name: 'compute_logical_snapshot_files',
+  type: 'gauge',
+  help: 'Number of snapshot files in pg_logical/snapshot',
+  key_labels: [
+    'timeline_id',
+  ],
+  values: [
+    'num_logical_snapshot_files',
+  ],
+  query: importstr 'sql_exporter/compute_logical_snapshot_files.sql',
+}
--- a/compute/etc/sql_exporter/compute_logical_snapshot_files.sql
+++ b/compute/etc/sql_exporter/compute_logical_snapshot_files.sql
@@ -0,0 +1,7 @@
+SELECT
+  (SELECT setting FROM pg_settings WHERE name = 'neon.timeline_id') AS timeline_id,
+  -- Postgres creates temporary snapshot files of the form %X-%X.snap.%d.tmp.
+  -- These temporary snapshot files are renamed to the actual snapshot files
+  -- after they are completely built. We only WAL-log the completely built
+  -- snapshot files
+  (SELECT COUNT(*) FROM pg_ls_dir('pg_logical/snapshots') AS name WHERE name LIKE '%.snap') AS num_logical_snapshot_files;
--- a/compute/etc/sql_exporter/compute_receive_lsn.libsonnet
+++ b/compute/etc/sql_exporter/compute_receive_lsn.libsonnet
@@ -0,0 +1,10 @@
+{
+  metric_name: 'compute_receive_lsn',
+  type: 'gauge',
+  help: 'Returns the last write-ahead log location that has been received and synced to disk by streaming replication',
+  key_labels: null,
+  values: [
+    'lsn',
+  ],
+  query: importstr 'sql_exporter/compute_receive_lsn.sql',
+}
--- a/compute/etc/sql_exporter/compute_receive_lsn.sql
+++ b/compute/etc/sql_exporter/compute_receive_lsn.sql
@@ -0,0 +1,4 @@
+SELECT CASE
+  WHEN pg_catalog.pg_is_in_recovery() THEN (pg_last_wal_receive_lsn() - '0/0')::FLOAT8
+  ELSE 0
+END AS lsn;
--- a/compute/etc/sql_exporter/compute_subscriptions_count.libsonnet
+++ b/compute/etc/sql_exporter/compute_subscriptions_count.libsonnet
@@ -0,0 +1,12 @@
+{
+  metric_name: 'compute_subscriptions_count',
+  type: 'gauge',
+  help: 'Number of logical replication subscriptions grouped by enabled/disabled',
+  key_labels: [
+    'enabled',
+  ],
+  values: [
+    'subscriptions_count',
+  ],
+  query: importstr 'sql_exporter/compute_subscriptions_count.sql',
+}
--- a/compute/etc/sql_exporter/compute_subscriptions_count.sql
+++ b/compute/etc/sql_exporter/compute_subscriptions_count.sql
@@ -0,0 +1 @@
+SELECT subenabled::text AS enabled, count(*) AS subscriptions_count FROM pg_subscription GROUP BY subenabled;
--- a/compute/etc/sql_exporter/connection_counts.libsonnet
+++ b/compute/etc/sql_exporter/connection_counts.libsonnet
@@ -0,0 +1,13 @@
+{
+  metric_name: 'connection_counts',
+  type: 'gauge',
+  help: 'Connection counts',
+  key_labels: [
+    'datname',
+    'state',
+  ],
+  values: [
+    'count',
+  ],
+  query: importstr 'sql_exporter/connection_counts.sql',
+}
--- a/compute/etc/sql_exporter/connection_counts.sql
+++ b/compute/etc/sql_exporter/connection_counts.sql
@@ -0,0 +1 @@
+SELECT datname, state, count(*) AS count FROM pg_stat_activity WHERE state <> '' GROUP BY datname, state;
--- a/compute/etc/sql_exporter/db_total_size.libsonnet
+++ b/compute/etc/sql_exporter/db_total_size.libsonnet
@@ -0,0 +1,10 @@
+{
+  metric_name: 'db_total_size',
+  type: 'gauge',
+  help: 'Size of all databases',
+  key_labels: null,
+  values: [
+    'total',
+  ],
+  query: importstr 'sql_exporter/db_total_size.sql',
+}
--- a/compute/etc/sql_exporter/db_total_size.sql
+++ b/compute/etc/sql_exporter/db_total_size.sql
@@ -0,0 +1 @@
+SELECT sum(pg_database_size(datname)) AS total FROM pg_database;
--- a/compute/etc/sql_exporter/file_cache_read_wait_seconds_bucket.libsonnet
+++ b/compute/etc/sql_exporter/file_cache_read_wait_seconds_bucket.libsonnet
@@ -0,0 +1,12 @@
+{
+  metric_name: 'file_cache_read_wait_seconds_bucket',
+  type: 'counter',
+  help: 'Histogram buckets of LFC read operation latencies',
+  key_labels: [
+    'bucket_le',
+  ],
+  values: [
+    'value',
+  ],
+  query: importstr 'sql_exporter/file_cache_read_wait_seconds_bucket.sql',
+}
--- a/compute/etc/sql_exporter/file_cache_read_wait_seconds_bucket.sql
+++ b/compute/etc/sql_exporter/file_cache_read_wait_seconds_bucket.sql
@@ -0,0 +1 @@
+SELECT bucket_le, value FROM neon.neon_perf_counters WHERE metric = 'file_cache_read_wait_seconds_bucket';
--- a/compute/etc/sql_exporter/file_cache_read_wait_seconds_count.libsonnet
+++ b/compute/etc/sql_exporter/file_cache_read_wait_seconds_count.libsonnet
@@ -0,0 +1,9 @@
+{
+  metric_name: 'file_cache_read_wait_seconds_count',
+  type: 'counter',
+  help: 'Number of read operations in LFC',
+  values: [
+    'file_cache_read_wait_seconds_count',
+  ],
+  query_ref: 'neon_perf_counters',
+}
--- a/compute/etc/sql_exporter/file_cache_read_wait_seconds_sum.libsonnet
+++ b/compute/etc/sql_exporter/file_cache_read_wait_seconds_sum.libsonnet
@@ -0,0 +1,9 @@
+{
+  metric_name: 'file_cache_read_wait_seconds_sum',
+  type: 'counter',
+  help: 'Time spent in LFC read operations',
+  values: [
+    'file_cache_read_wait_seconds_sum',
+  ],
+  query_ref: 'neon_perf_counters',
+}
--- a/compute/etc/sql_exporter/file_cache_write_wait_seconds_bucket.libsonnet
+++ b/compute/etc/sql_exporter/file_cache_write_wait_seconds_bucket.libsonnet
@@ -0,0 +1,12 @@
+{
+  metric_name: 'file_cache_write_wait_seconds_bucket',
+  type: 'counter',
+  help: 'Histogram buckets of LFC write operation latencies',
+  key_labels: [
+    'bucket_le',
+  ],
+  values: [
+    'value',
+  ],
+  query: importstr 'sql_exporter/file_cache_write_wait_seconds_bucket.sql',
+}
--- a/compute/etc/sql_exporter/file_cache_write_wait_seconds_bucket.sql
+++ b/compute/etc/sql_exporter/file_cache_write_wait_seconds_bucket.sql
@@ -0,0 +1 @@
+SELECT bucket_le, value FROM neon.neon_perf_counters WHERE metric = 'file_cache_write_wait_seconds_bucket';
--- a/compute/etc/sql_exporter/file_cache_write_wait_seconds_count.libsonnet
+++ b/compute/etc/sql_exporter/file_cache_write_wait_seconds_count.libsonnet
@@ -0,0 +1,9 @@
+{
+  metric_name: 'file_cache_write_wait_seconds_count',
+  type: 'counter',
+  help: 'Number of write operations in LFC',
+  values: [
+    'file_cache_write_wait_seconds_count',
+  ],
+  query_ref: 'neon_perf_counters',
+}
--- a/compute/etc/sql_exporter/file_cache_write_wait_seconds_sum.libsonnet
+++ b/compute/etc/sql_exporter/file_cache_write_wait_seconds_sum.libsonnet
@@ -0,0 +1,9 @@
+{
+  metric_name: 'file_cache_write_wait_seconds_sum',
+  type: 'counter',
+  help: 'Time spent in LFC write operations',
+  values: [
+    'file_cache_write_wait_seconds_sum',
+  ],
+  query_ref: 'neon_perf_counters',
+}
--- a/compute/etc/sql_exporter/getpage_prefetch_discards_total.libsonnet
+++ b/compute/etc/sql_exporter/getpage_prefetch_discards_total.libsonnet
@@ -0,0 +1,9 @@
+{
+  metric_name: 'getpage_prefetch_discards_total',
+  type: 'counter',
+  help: 'Number of prefetch responses issued but not used',
+  values: [
+    'getpage_prefetch_discards_total',
+  ],
+  query_ref: 'neon_perf_counters',
+}
--- a/compute/etc/sql_exporter/getpage_prefetch_misses_total.libsonnet
+++ b/compute/etc/sql_exporter/getpage_prefetch_misses_total.libsonnet
@@ -0,0 +1,9 @@
+{
+  metric_name: 'getpage_prefetch_misses_total',
+  type: 'counter',
+  help: "Total number of readahead misses; consisting of either prefetches that don't satisfy the LSN bounds once the prefetch got read by the backend, or cases where somehow no readahead was issued for the read",
+  values: [
+    'getpage_prefetch_misses_total',
+  ],
+  query_ref: 'neon_perf_counters',
+}
--- a/compute/etc/sql_exporter/getpage_prefetch_requests_total.libsonnet
+++ b/compute/etc/sql_exporter/getpage_prefetch_requests_total.libsonnet
@@ -0,0 +1,9 @@
+{
+  metric_name: 'getpage_prefetch_requests_total',
+  type: 'counter',
+  help: 'Number of getpage issued for prefetching',
+  values: [
+    'getpage_prefetch_requests_total',
+  ],
+  query_ref: 'neon_perf_counters',
+}
--- a/compute/etc/sql_exporter/getpage_prefetches_buffered.libsonnet
+++ b/compute/etc/sql_exporter/getpage_prefetches_buffered.libsonnet
@@ -0,0 +1,9 @@
+{
+  metric_name: 'getpage_prefetches_buffered',
+  type: 'gauge',
+  help: 'Number of prefetched pages buffered in neon',
+  values: [
+    'getpage_prefetches_buffered',
+  ],
+  query_ref: 'neon_perf_counters',
+}
--- a/compute/etc/sql_exporter/getpage_sync_requests_total.libsonnet
+++ b/compute/etc/sql_exporter/getpage_sync_requests_total.libsonnet
@@ -0,0 +1,9 @@
+{
+  metric_name: 'getpage_sync_requests_total',
+  type: 'counter',
+  help: 'Number of synchronous getpage issued',
+  values: [
+    'getpage_sync_requests_total',
+  ],
+  query_ref: 'neon_perf_counters',
+}
--- a/compute/etc/sql_exporter/getpage_wait_seconds_bucket.libsonnet
+++ b/compute/etc/sql_exporter/getpage_wait_seconds_bucket.libsonnet
@@ -0,0 +1,12 @@
+{
+  metric_name: 'getpage_wait_seconds_bucket',
+  type: 'counter',
+  help: 'Histogram buckets of getpage request latency',
+  key_labels: [
+    'bucket_le',
+  ],
+  values: [
+    'value',
+  ],
+  query: importstr 'sql_exporter/getpage_wait_seconds_bucket.sql',
+}
--- a/compute/etc/sql_exporter/getpage_wait_seconds_bucket.sql
+++ b/compute/etc/sql_exporter/getpage_wait_seconds_bucket.sql
@@ -0,0 +1 @@
+SELECT bucket_le, value FROM neon.neon_perf_counters WHERE metric = 'getpage_wait_seconds_bucket';
--- a/compute/etc/sql_exporter/getpage_wait_seconds_count.libsonnet
+++ b/compute/etc/sql_exporter/getpage_wait_seconds_count.libsonnet
@@ -0,0 +1,9 @@
+{
+  metric_name: 'getpage_wait_seconds_count',
+  type: 'counter',
+  help: 'Number of getpage requests',
+  values: [
+    'getpage_wait_seconds_count',
+  ],
+  query_ref: 'neon_perf_counters',
+}
--- a/compute/etc/sql_exporter/getpage_wait_seconds_sum.libsonnet
+++ b/compute/etc/sql_exporter/getpage_wait_seconds_sum.libsonnet
@@ -0,0 +1,9 @@
+{
+  metric_name: 'getpage_wait_seconds_sum',
+  type: 'counter',
+  help: 'Time spent in getpage requests',
+  values: [
+    'getpage_wait_seconds_sum',
+  ],
+  query_ref: 'neon_perf_counters',
+}
--- a/compute/etc/sql_exporter/lfc_approximate_working_set_size.libsonnet
+++ b/compute/etc/sql_exporter/lfc_approximate_working_set_size.libsonnet
@@ -0,0 +1,12 @@
+// DEPRECATED
+
+{
+  metric_name: 'lfc_approximate_working_set_size',
+  type: 'gauge',
+  help: 'Approximate working set size in pages of 8192 bytes',
+  key_labels: null,
+  values: [
+    'approximate_working_set_size',
+  ],
+  query: importstr 'sql_exporter/lfc_approximate_working_set_size.sql',
+}
--- a/compute/etc/sql_exporter/lfc_approximate_working_set_size.sql
+++ b/compute/etc/sql_exporter/lfc_approximate_working_set_size.sql
@@ -0,0 +1 @@
+SELECT neon.approximate_working_set_size(false) AS approximate_working_set_size;
--- a/compute/etc/sql_exporter/lfc_approximate_working_set_size_windows.autoscaling.libsonnet
+++ b/compute/etc/sql_exporter/lfc_approximate_working_set_size_windows.autoscaling.libsonnet
@@ -0,0 +1,12 @@
+{
+  metric_name: 'lfc_approximate_working_set_size_windows',
+  type: 'gauge',
+  help: 'Approximate working set size in pages of 8192 bytes',
+  key_labels: [
+    'duration_seconds',
+  ],
+  values: [
+    'size',
+  ],
+  query: importstr 'sql_exporter/lfc_approximate_working_set_size_windows.autoscaling.sql',
+}
--- a/compute/etc/sql_exporter/lfc_approximate_working_set_size_windows.autoscaling.sql
+++ b/compute/etc/sql_exporter/lfc_approximate_working_set_size_windows.autoscaling.sql
@@ -0,0 +1,8 @@
+-- NOTE: This is the "internal" / "machine-readable" version. This outputs the
+-- working set size looking back 1..60 minutes, labeled with the number of
+-- minutes.
+
+SELECT
+  x::text as duration_seconds,
+  neon.approximate_working_set_size_seconds(x) AS size
+FROM (SELECT generate_series * 60 AS x FROM generate_series(1, 60)) AS t (x);
--- a/compute/etc/sql_exporter/lfc_approximate_working_set_size_windows.libsonnet
+++ b/compute/etc/sql_exporter/lfc_approximate_working_set_size_windows.libsonnet
@@ -0,0 +1,12 @@
+{
+  metric_name: 'lfc_approximate_working_set_size_windows',
+  type: 'gauge',
+  help: 'Approximate working set size in pages of 8192 bytes',
+  key_labels: [
+    'duration',
+  ],
+  values: [
+    'size',
+  ],
+  query: importstr 'sql_exporter/lfc_approximate_working_set_size_windows.sql',
+}
--- a/compute/etc/sql_exporter/lfc_approximate_working_set_size_windows.sql
+++ b/compute/etc/sql_exporter/lfc_approximate_working_set_size_windows.sql
@@ -0,0 +1,8 @@
+-- NOTE: This is the "public" / "human-readable" version. Here, we supply a
+-- small selection of durations in a pretty-printed form.
+
+SELECT
+  x AS duration,
+  neon.approximate_working_set_size_seconds(extract('epoch' FROM x::interval)::int) AS size FROM (
+    VALUES ('5m'), ('15m'), ('1h')
+  ) AS t (x);
--- a/compute/etc/sql_exporter/lfc_cache_size_limit.libsonnet
+++ b/compute/etc/sql_exporter/lfc_cache_size_limit.libsonnet
@@ -0,0 +1,10 @@
+{
+  metric_name: 'lfc_cache_size_limit',
+  type: 'gauge',
+  help: 'LFC cache size limit in bytes',
+  key_labels: null,
+  values: [
+    'lfc_cache_size_limit',
+  ],
+  query: importstr 'sql_exporter/lfc_cache_size_limit.sql',
+}
--- a/compute/etc/sql_exporter/lfc_cache_size_limit.sql
+++ b/compute/etc/sql_exporter/lfc_cache_size_limit.sql
@@ -0,0 +1 @@
+SELECT pg_size_bytes(current_setting('neon.file_cache_size_limit')) AS lfc_cache_size_limit;
--- a/compute/etc/sql_exporter/lfc_hits.libsonnet
+++ b/compute/etc/sql_exporter/lfc_hits.libsonnet
@@ -0,0 +1,10 @@
+{
+  metric_name: 'lfc_hits',
+  type: 'gauge',
+  help: 'lfc_hits',
+  key_labels: null,
+  values: [
+    'lfc_hits',
+  ],
+  query: importstr 'sql_exporter/lfc_hits.sql',
+}
--- a/compute/etc/sql_exporter/lfc_hits.sql
+++ b/compute/etc/sql_exporter/lfc_hits.sql
@@ -0,0 +1 @@
+SELECT lfc_value AS lfc_hits FROM neon.neon_lfc_stats WHERE lfc_key = 'file_cache_hits';
--- a/compute/etc/sql_exporter/lfc_misses.libsonnet
+++ b/compute/etc/sql_exporter/lfc_misses.libsonnet
@@ -0,0 +1,10 @@
+{
+  metric_name: 'lfc_misses',
+  type: 'gauge',
+  help: 'lfc_misses',
+  key_labels: null,
+  values: [
+    'lfc_misses',
+  ],
+  query: importstr 'sql_exporter/lfc_misses.sql',
+}
--- a/compute/etc/sql_exporter/lfc_misses.sql
+++ b/compute/etc/sql_exporter/lfc_misses.sql
@@ -0,0 +1 @@
+SELECT lfc_value AS lfc_misses FROM neon.neon_lfc_stats WHERE lfc_key = 'file_cache_misses';
--- a/compute/etc/sql_exporter/lfc_used.libsonnet
+++ b/compute/etc/sql_exporter/lfc_used.libsonnet
@@ -0,0 +1,10 @@
+{
+  metric_name: 'lfc_used',
+  type: 'gauge',
+  help: 'LFC chunks used (chunk = 1MB)',
+  key_labels: null,
+  values: [
+    'lfc_used',
+  ],
+  query: importstr 'sql_exporter/lfc_used.sql',
+}
--- a/compute/etc/sql_exporter/lfc_used.sql
+++ b/compute/etc/sql_exporter/lfc_used.sql
@@ -0,0 +1 @@
+SELECT lfc_value AS lfc_used FROM neon.neon_lfc_stats WHERE lfc_key = 'file_cache_used';
--- a/compute/etc/sql_exporter/lfc_writes.libsonnet
+++ b/compute/etc/sql_exporter/lfc_writes.libsonnet
@@ -0,0 +1,10 @@
+{
+  metric_name: 'lfc_writes',
+  type: 'gauge',
+  help: 'lfc_writes',
+  key_labels: null,
+  values: [
+    'lfc_writes',
+  ],
+  query: importstr 'sql_exporter/lfc_writes.sql',
+}
--- a/compute/etc/sql_exporter/lfc_writes.sql
+++ b/compute/etc/sql_exporter/lfc_writes.sql
@@ -0,0 +1 @@
+SELECT lfc_value AS lfc_writes FROM neon.neon_lfc_stats WHERE lfc_key = 'file_cache_writes';
--- a/compute/etc/sql_exporter/logical_slot_restart_lsn.libsonnet
+++ b/compute/etc/sql_exporter/logical_slot_restart_lsn.libsonnet
@@ -0,0 +1,15 @@
+// Number of slots is limited by max_replication_slots, so collecting position
+// for all of them shouldn't be bad.
+
+{
+  metric_name: 'logical_slot_restart_lsn',
+  type: 'gauge',
+  help: 'restart_lsn of logical slots',
+  key_labels: [
+    'slot_name',
+  ],
+  values: [
+    'restart_lsn',
+  ],
+  query: importstr 'sql_exporter/logical_slot_restart_lsn.sql',
+}
--- a/compute/etc/sql_exporter/logical_slot_restart_lsn.sql
+++ b/compute/etc/sql_exporter/logical_slot_restart_lsn.sql
@@ -0,0 +1,3 @@
+SELECT slot_name, (restart_lsn - '0/0')::FLOAT8 as restart_lsn
+FROM pg_replication_slots
+WHERE slot_type = 'logical';
--- a/compute/etc/sql_exporter/max_cluster_size.libsonnet
+++ b/compute/etc/sql_exporter/max_cluster_size.libsonnet
@@ -0,0 +1,10 @@
+{
+  metric_name: 'max_cluster_size',
+  type: 'gauge',
+  help: 'neon.max_cluster_size setting',
+  key_labels: null,
+  values: [
+    'max_cluster_size',
+  ],
+  query: importstr 'sql_exporter/max_cluster_size.sql',
+}
--- a/compute/etc/sql_exporter/max_cluster_size.sql
+++ b/compute/etc/sql_exporter/max_cluster_size.sql
@@ -0,0 +1 @@
+SELECT setting::int AS max_cluster_size FROM pg_settings WHERE name = 'neon.max_cluster_size';
--- a/compute/etc/sql_exporter/neon_perf_counters.sql
+++ b/compute/etc/sql_exporter/neon_perf_counters.sql
@@ -0,0 +1,19 @@
+WITH c AS (SELECT pg_catalog.jsonb_object_agg(metric, value) jb FROM neon.neon_perf_counters)
+
+SELECT d.* FROM pg_catalog.jsonb_to_record((SELECT jb FROM c)) AS d(
+  file_cache_read_wait_seconds_count numeric,
+  file_cache_read_wait_seconds_sum numeric,
+  file_cache_write_wait_seconds_count numeric,
+  file_cache_write_wait_seconds_sum numeric,
+  getpage_wait_seconds_count numeric,
+  getpage_wait_seconds_sum numeric,
+  getpage_prefetch_requests_total numeric,
+  getpage_sync_requests_total numeric,
+  getpage_prefetch_misses_total numeric,
+  getpage_prefetch_discards_total numeric,
+  getpage_prefetches_buffered numeric,
+  pageserver_requests_sent_total numeric,
+  pageserver_disconnects_total numeric,
+  pageserver_send_flushes_total numeric,
+  pageserver_open_requests numeric
+);
--- a/compute/etc/sql_exporter/pageserver_disconnects_total.libsonnet
+++ b/compute/etc/sql_exporter/pageserver_disconnects_total.libsonnet
@@ -0,0 +1,9 @@
+{
+  metric_name: 'pageserver_disconnects_total',
+  type: 'counter',
+  help: 'Number of times that the connection to the pageserver was lost',
+  values: [
+    'pageserver_disconnects_total',
+  ],
+  query_ref: 'neon_perf_counters',
+}
--- a/compute/etc/sql_exporter/pageserver_open_requests.libsonnet
+++ b/compute/etc/sql_exporter/pageserver_open_requests.libsonnet
@@ -0,0 +1,9 @@
+{
+  metric_name: 'pageserver_open_requests',
+  type: 'gauge',
+  help: 'Number of open requests to PageServer',
+  values: [
+    'pageserver_open_requests',
+  ],
+  query_ref: 'neon_perf_counters',
+}
--- a/compute/etc/sql_exporter/pageserver_requests_sent_total.libsonnet
+++ b/compute/etc/sql_exporter/pageserver_requests_sent_total.libsonnet
@@ -0,0 +1,9 @@
+{
+  metric_name: 'pageserver_requests_sent_total',
+  type: 'counter',
+  help: 'Number of all requests sent to the pageserver (not just GetPage requests)',
+  values: [
+    'pageserver_requests_sent_total',
+  ],
+  query_ref: 'neon_perf_counters',
+}
--- a/compute/etc/sql_exporter/pageserver_send_flushes_total.libsonnet
+++ b/compute/etc/sql_exporter/pageserver_send_flushes_total.libsonnet
@@ -0,0 +1,9 @@
+{
+  metric_name: 'pageserver_send_flushes_total',
+  type: 'counter',
+  help: 'Number of flushes to the pageserver connection',
+  values: [
+    'pageserver_send_flushes_total',
+  ],
+  query_ref: 'neon_perf_counters',
+}
--- a/compute/etc/sql_exporter/pg_stats_userdb.libsonnet
+++ b/compute/etc/sql_exporter/pg_stats_userdb.libsonnet
@@ -0,0 +1,18 @@
+{
+  metric_name: 'pg_stats_userdb',
+  type: 'gauge',
+  help: 'Stats for several oldest non-system dbs',
+  key_labels: [
+    'datname',
+  ],
+  value_label: 'kind',
+  values: [
+    'db_size',
+    'deadlocks',
+    // Rows
+    'inserted',
+    'updated',
+    'deleted',
+  ],
+  query: importstr 'sql_exporter/pg_stats_userdb.sql',
+}
--- a/compute/etc/sql_exporter/pg_stats_userdb.sql
+++ b/compute/etc/sql_exporter/pg_stats_userdb.sql
@@ -0,0 +1,10 @@
+-- We export stats for 10 non-system databases. Without this limit it is too
+-- easy to abuse the system by creating lots of databases.
+
+SELECT pg_database_size(datname) AS db_size, deadlocks, tup_inserted AS inserted,
+  tup_updated AS updated, tup_deleted AS deleted, datname
+FROM pg_stat_database
+WHERE datname IN (
+  SELECT datname FROM pg_database
+  WHERE datname <> 'postgres' AND NOT datistemplate ORDER BY oid LIMIT 10
+);
--- a/compute/etc/sql_exporter/replication_delay_bytes.libsonnet
+++ b/compute/etc/sql_exporter/replication_delay_bytes.libsonnet
@@ -0,0 +1,10 @@
+{
+  metric_name: 'replication_delay_bytes',
+  type: 'gauge',
+  help: 'Bytes between received and replayed LSN',
+  key_labels: null,
+  values: [
+    'replication_delay_bytes',
+  ],
+  query: importstr 'sql_exporter/replication_delay_bytes.sql',
+}
--- a/compute/etc/sql_exporter/replication_delay_bytes.sql
+++ b/compute/etc/sql_exporter/replication_delay_bytes.sql
@@ -0,0 +1,6 @@
+-- We use a GREATEST call here because this calculation can be negative. The
+-- calculation is not atomic, meaning after we've gotten the receive LSN, the
+-- replay LSN may have advanced past the receive LSN we are using for the
+-- calculation.
+
+SELECT GREATEST(0, pg_wal_lsn_diff(pg_last_wal_receive_lsn(), pg_last_wal_replay_lsn())) AS replication_delay_bytes;
--- a/compute/etc/sql_exporter/replication_delay_seconds.libsonnet
+++ b/compute/etc/sql_exporter/replication_delay_seconds.libsonnet
@@ -0,0 +1,10 @@
+{
+  metric_name: 'replication_delay_seconds',
+  type: 'gauge',
+  help: 'Time since last LSN was replayed',
+  key_labels: null,
+  values: [
+    'replication_delay_seconds',
+  ],
+  query: importstr 'sql_exporter/replication_delay_seconds.sql',
+}
--- a/compute/etc/sql_exporter/replication_delay_seconds.sql
+++ b/compute/etc/sql_exporter/replication_delay_seconds.sql
@@ -0,0 +1,5 @@
+SELECT
+  CASE
+    WHEN pg_last_wal_receive_lsn() = pg_last_wal_replay_lsn() THEN 0
+    ELSE GREATEST(0, EXTRACT (EPOCH FROM now() - pg_last_xact_replay_timestamp()))
+  END AS replication_delay_seconds;
--- a/compute/etc/sql_exporter/retained_wal.libsonnet
+++ b/compute/etc/sql_exporter/retained_wal.libsonnet
@@ -0,0 +1,12 @@
+{
+  metric_name: 'retained_wal',
+  type: 'gauge',
+  help: 'Retained WAL in inactive replication slots',
+  key_labels: [
+    'slot_name',
+  ],
+  values: [
+    'retained_wal',
+  ],
+  query: importstr 'sql_exporter/retained_wal.sql',
+}
--- a/compute/etc/sql_exporter/retained_wal.sql
+++ b/compute/etc/sql_exporter/retained_wal.sql
@@ -0,0 +1,5 @@
+SELECT
+  slot_name,
+  pg_wal_lsn_diff(pg_current_wal_lsn(), restart_lsn)::FLOAT8 AS retained_wal
+FROM pg_replication_slots
+WHERE active = false;
--- a/compute/etc/sql_exporter/wal_is_lost.libsonnet
+++ b/compute/etc/sql_exporter/wal_is_lost.libsonnet
@@ -0,0 +1,12 @@
+{
+  metric_name: 'wal_is_lost',
+  type: 'gauge',
+  help: 'Whether or not the replication slot wal_status is lost',
+  key_labels: [
+    'slot_name',
+  ],
+  values: [
+    'wal_is_lost',
+  ],
+  query: importstr 'sql_exporter/wal_is_lost.sql',
+}
--- a/compute/etc/sql_exporter/wal_is_lost.sql
+++ b/compute/etc/sql_exporter/wal_is_lost.sql
@@ -0,0 +1,7 @@
+SELECT
+  slot_name,
+  CASE
+    WHEN wal_status = 'lost' THEN 1
+    ELSE 0
+  END AS wal_is_lost
+FROM pg_replication_slots;
--- a/compute/etc/sql_exporter_autoscaling.yml
+++ b/compute/etc/sql_exporter_autoscaling.yml
@@ -1,33 +0,0 @@
-# Configuration for sql_exporter for autoscaling-agent
-# Global defaults.
-global:
-  # If scrape_timeout <= 0, no timeout is set unless Prometheus provides one. The default is 10s.
-  scrape_timeout: 10s
-  # Subtracted from Prometheus' scrape_timeout to give us some headroom and prevent Prometheus from timing out first.
-  scrape_timeout_offset: 500ms
-  # Minimum interval between collector runs: by default (0s) collectors are executed on every scrape.
-  min_interval: 0s
-  # Maximum number of open connections to any one target. Metric queries will run concurrently on multiple connections,
-  # as will concurrent scrapes.
-  max_connections: 1
-  # Maximum number of idle connections to any one target. Unless you use very long collection intervals, this should
-  # always be the same as max_connections.
-  max_idle_connections: 1
-  # Maximum number of maximum amount of time a connection may be reused. Expired connections may be closed lazily before reuse.
-  # If 0, connections are not closed due to a connection's age.
-  max_connection_lifetime: 5m
-
-# The target to monitor and the collectors to execute on it.
-target:
-  # Data source name always has a URI schema that matches the driver name. In some cases (e.g. MySQL)
-  # the schema gets dropped or replaced to match the driver expected DSN format.
-  data_source_name: 'postgresql://cloud_admin@127.0.0.1:5432/postgres?sslmode=disable&application_name=sql_exporter_autoscaling'
-
-  # Collectors (referenced by name) to execute on the target.
-  # Glob patterns are supported (see <https://pkg.go.dev/path/filepath#Match> for syntax).
-  collectors: [neon_collector_autoscaling]
-
-# Collector files specifies a list of globs. One collector definition is read from each matching file.
-# Glob patterns are supported (see <https://pkg.go.dev/path/filepath#Match> for syntax).
-collector_files:
-  - "neon_collector_autoscaling.yml"
--- a/compute/jsonnet/neon.libsonnet
+++ b/compute/jsonnet/neon.libsonnet
@@ -0,0 +1,16 @@
+local MIN_SUPPORTED_VERSION = 14;
+local MAX_SUPPORTED_VERSION = 17;
+local SUPPORTED_VERSIONS = std.range(MIN_SUPPORTED_VERSION, MAX_SUPPORTED_VERSION);
+
+# If we receive the pg_version with a leading "v", ditch it.
+local pg_version = std.strReplace(std.extVar('pg_version'), 'v', '');
+local pg_version_num = std.parseInt(pg_version);
+
+assert std.setMember(pg_version_num, SUPPORTED_VERSIONS) :
+       std.format('%s is an unsupported Postgres version: %s',
+                  [pg_version, std.toString(SUPPORTED_VERSIONS)]);
+
+{
+  PG_MAJORVERSION: pg_version,
+  PG_MAJORVERSION_NUM: pg_version_num,
+}
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -25,6 +25,7 @@ use tracing::{debug, error, info, instrument, warn};
 use utils::id::{TenantId, TimelineId};
 use utils::lsn::Lsn;

+use compute_api::privilege::Privilege;
 use compute_api::responses::{ComputeMetrics, ComputeStatus};
 use compute_api::spec::{ComputeFeature, ComputeMode, ComputeSpec};
 use utils::measured_stream::MeasuredReader;
@@ -1367,6 +1368,96 @@ LIMIT 100",
        download_size
    }

+    pub async fn install_extension(
+        &self,
+        ext_name: &str,
+        db_name: &str,
+        ext_version: &str,
+    ) -> Result<String> {
+        use tokio_postgres::config::Config;
+        use tokio_postgres::NoTls;
+
+        let mut conf = Config::from_str(self.connstr.as_str()).unwrap();
+        conf.dbname(db_name);
+
+        let (db_client, conn) = conf
+            .connect(NoTls)
+            .await
+            .context("Failed to connect to the database")?;
+        tokio::spawn(conn);
+
+        let version_query = "SELECT extversion FROM pg_extension WHERE extname = $1";
+        let version: Option<String> = db_client
+            .query_opt(version_query, &[&ext_name])
+            .await
+            .with_context(|| format!("Failed to execute query: {}", version_query))?
+            .map(|row| row.get(0));
+
+        // sanitize the inputs as postgres idents.
+        let ext_name: String = ext_name.to_string().pg_quote();
+        let ext_version: String = ext_version.to_string().pg_quote();
+
+        if let Some(installed_version) = version {
+            if installed_version == ext_version {
+                return Ok(installed_version);
+            }
+            let query = format!("ALTER EXTENSION {ext_name} UPDATE TO {ext_version}");
+            db_client
+                .simple_query(&query)
+                .await
+                .context(format!("Failed to execute query: {}", query))?;
+        } else {
+            let query =
+                format!("CREATE EXTENSION IF NOT EXISTS {ext_name} WITH VERSION {ext_version}");
+            db_client
+                .simple_query(&query)
+                .await
+                .context(format!("Failed to execute query: {}", query))?;
+        }
+
+        Ok(ext_version.to_string())
+    }
+
+    pub async fn set_role_grants(
+        &self,
+        db_name: &str,
+        schema_name: &str,
+        privileges: &[Privilege],
+        role_name: &str,
+    ) -> Result<()> {
+        use tokio_postgres::config::Config;
+        use tokio_postgres::NoTls;
+
+        let mut conf = Config::from_str(self.connstr.as_str()).unwrap();
+        conf.dbname(db_name);
+
+        let (db_client, conn) = conf
+            .connect(NoTls)
+            .await
+            .context("Failed to connect to the database")?;
+        tokio::spawn(conn);
+
+        let query = format!(
+            "GRANT {} ON SCHEMA {} TO {}",
+            privileges
+                .iter()
+                // should not be quoted as it's part of the command.
+                // is already sanitized so it's ok
+                .map(|p| p.as_str())
+                .collect::<Vec<&'static str>>()
+                .join(", "),
+            // quote the schema and role name as identifiers to sanitize them.
+            schema_name.to_string().pg_quote(),
+            role_name.to_string().pg_quote(),
+        );
+        db_client
+            .simple_query(&query)
+            .await
+            .context(format!("Failed to execute query: {}", query))?;
+
+        Ok(())
+    }
+
    #[tokio::main]
    pub async fn prepare_preload_libraries(
        &self,
--- a/compute_tools/src/http/api.rs
+++ b/compute_tools/src/http/api.rs
@@ -9,8 +9,13 @@ use crate::catalog::SchemaDumpError;
 use crate::catalog::{get_database_schema, get_dbs_and_roles};
 use crate::compute::forward_termination_signal;
 use crate::compute::{ComputeNode, ComputeState, ParsedSpec};
-use compute_api::requests::ConfigurationRequest;
-use compute_api::responses::{ComputeStatus, ComputeStatusResponse, GenericAPIError};
+use compute_api::requests::{
+    ExtensionInstallRequest, {ConfigurationRequest, SetRoleGrantsRequest},
+};
+use compute_api::responses::{
+    ComputeStatus, ComputeStatusResponse, ExtensionInstallResult, GenericAPIError,
+    SetRoleGrantsResponse,
+};

 use anyhow::Result;
 use hyper::header::CONTENT_TYPE;
@@ -98,6 +103,38 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
            }
        }

+        (&Method::POST, "/extensions") => {
+            info!("serving /extensions POST request");
+            let status = compute.get_status();
+            if status != ComputeStatus::Running {
+                let msg = format!(
+                    "invalid compute status for extensions request: {:?}",
+                    status
+                );
+                error!(msg);
+                return Response::new(Body::from(msg));
+            }
+
+            let request = hyper::body::to_bytes(req.into_body()).await.unwrap();
+            let request = serde_json::from_slice::<ExtensionInstallRequest>(&request).unwrap();
+            let res = compute
+                .install_extension(&request.extension, &request.database, &request.version)
+                .await;
+            match res {
+                Ok(version) => render_json(Body::from(
+                    serde_json::to_string(&ExtensionInstallResult {
+                        extension: request.extension,
+                        version,
+                    })
+                    .unwrap(),
+                )),
+                Err(e) => {
+                    error!("install_extension failed: {}", e);
+                    render_json_error(&e.to_string(), StatusCode::INTERNAL_SERVER_ERROR)
+                }
+            }
+        }
+
        (&Method::GET, "/info") => {
            let num_cpus = num_cpus::get_physical();
            info!("serving /info GET request. num_cpus: {}", num_cpus);
@@ -165,6 +202,46 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
            }
        }

+        (&Method::POST, "/grants") => {
+            info!("serving /grants POST request");
+            let status = compute.get_status();
+            if status != ComputeStatus::Running {
+                let msg = format!(
+                    "invalid compute status for set_role_grants request: {:?}",
+                    status
+                );
+                error!(msg);
+                return Response::new(Body::from(msg));
+            }
+
+            let request = hyper::body::to_bytes(req.into_body()).await.unwrap();
+            let request = serde_json::from_slice::<SetRoleGrantsRequest>(&request).unwrap();
+
+            let res = compute
+                .set_role_grants(
+                    &request.database,
+                    &request.schema,
+                    &request.privileges,
+                    &request.role,
+                )
+                .await;
+            match res {
+                Ok(()) => render_json(Body::from(
+                    serde_json::to_string(&SetRoleGrantsResponse {
+                        database: request.database,
+                        schema: request.schema,
+                        role: request.role,
+                        privileges: request.privileges,
+                    })
+                    .unwrap(),
+                )),
+                Err(e) => {
+                    error!("set_role_grants failed: {}", e);
+                    Response::new(Body::from(e.to_string()))
+                }
+            }
+        }
+
        // get the list of installed extensions
        // currently only used in python tests
        // TODO: call it from cplane
--- a/compute_tools/src/http/openapi_spec.yaml
+++ b/compute_tools/src/http/openapi_spec.yaml
@@ -10,7 +10,7 @@ paths:
  /status:
    get:
      tags:
-      - Info
+        - Info
      summary: Get compute node internal status.
      description: ""
      operationId: getComputeStatus
@@ -25,7 +25,7 @@ paths:
  /metrics.json:
    get:
      tags:
-      - Info
+        - Info
      summary: Get compute node startup metrics in JSON format.
      description: ""
      operationId: getComputeMetricsJSON
@@ -40,7 +40,7 @@ paths:
  /insights:
    get:
      tags:
-      - Info
+        - Info
      summary: Get current compute insights in JSON format.
      description: |
        Note, that this doesn't include any historical data.
@@ -56,7 +56,7 @@ paths:
  /installed_extensions:
    get:
      tags:
-      - Info
+        - Info
      summary: Get installed extensions.
      description: ""
      operationId: getInstalledExtensions
@@ -70,7 +70,7 @@ paths:
  /info:
    get:
      tags:
-      - Info
+        - Info
      summary: Get info about the compute pod / VM.
      description: ""
      operationId: getInfo
@@ -127,10 +127,38 @@ paths:
              schema:
                $ref: "#/components/schemas/GenericError"

+  /grants:
+    post:
+      tags:
+        - Grants
+      summary: Apply grants to the database.
+      description: ""
+      operationId: setRoleGrants
+      requestBody:
+        description: Grants request.
+        required: true
+        content:
+          application/json:
+            schema:
+              $ref: SetRoleGrantsRequest
+      responses:
+        200:
+          description: Grants applied.
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/SetRoleGrantsResponse"
+        500:
+          description: Error occurred during grants application.
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/GenericError"
+
  /check_writability:
    post:
      tags:
-      - Check
+        - Check
      summary: Check that we can write new data on this compute.
      description: ""
      operationId: checkComputeWritability
@@ -144,10 +172,38 @@ paths:
                description: Error text or 'true' if check passed.
                example: "true"

+  /extensions:
+    post:
+      tags:
+        - Extensions
+      summary: Install extension if possible.
+      description: ""
+      operationId: installExtension
+      requestBody:
+        description: Extension name and database to install it to.
+        required: true
+        content:
+          application/json:
+            schema:
+              $ref: "#/components/schemas/ExtensionInstallRequest"
+      responses:
+        200:
+          description: Result from extension installation
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ExtensionInstallResult"
+        500:
+          description: Error during extension installation.
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/GenericError"
+
  /configure:
    post:
      tags:
-      - Configure
+        - Configure
      summary: Perform compute node configuration.
      description: |
        This is a blocking API endpoint, i.e. it blocks waiting until
@@ -201,7 +257,7 @@ paths:
  /extension_server:
    post:
      tags:
-      - Extension
+        - Extension
      summary: Download extension from S3 to local folder.
      description: ""
      operationId: downloadExtension
@@ -230,7 +286,7 @@ paths:
  /terminate:
    post:
      tags:
-      - Terminate
+        - Terminate
      summary: Terminate Postgres and wait for it to exit
      description: ""
      operationId: terminate
@@ -369,7 +425,7 @@ components:
            moment, when spec was received.
          example: "2022-10-12T07:20:50.52Z"
        status:
-          $ref: '#/components/schemas/ComputeStatus'
+          $ref: "#/components/schemas/ComputeStatus"
        last_active:
          type: string
          description: |
@@ -409,6 +465,38 @@ components:
        - configuration
      example: running

+    ExtensionInstallRequest:
+      type: object
+      required:
+        - extension
+        - database
+        - version
+      properties:
+        extension:
+          type: string
+          description: Extension name.
+          example: "pg_session_jwt"
+        version:
+          type: string
+          description: Version of the extension.
+          example: "1.0.0"
+        database:
+          type: string
+          description: Database name.
+          example: "neondb"
+
+    ExtensionInstallResult:
+      type: object
+      properties:
+        extension:
+          description: Name of the extension.
+          type: string
+          example: "pg_session_jwt"
+        version:
+          description: Version of the extension.
+          type: string
+          example: "1.0.0"
+
    InstalledExtensions:
      type: object
      properties:
@@ -427,6 +515,60 @@ components:
              n_databases:
                type: integer

+    SetRoleGrantsRequest:
+      type: object
+      required:
+        - database
+        - schema
+        - privileges
+        - role
+      properties:
+        database:
+          type: string
+          description: Database name.
+          example: "neondb"
+        schema:
+          type: string
+          description: Schema name.
+          example: "public"
+        privileges:
+          type: array
+          items:
+            type: string
+          description: List of privileges to set.
+          example: ["SELECT", "INSERT"]
+        role:
+          type: string
+          description: Role name.
+          example: "neon"
+
+    SetRoleGrantsResponse:
+      type: object
+      required:
+        - database
+        - schema
+        - privileges
+        - role
+      properties:
+        database:
+          type: string
+          description: Database name.
+          example: "neondb"
+        schema:
+          type: string
+          description: Schema name.
+          example: "public"
+        privileges:
+          type: array
+          items:
+            type: string
+          description: List of privileges set.
+          example: ["SELECT", "INSERT"]
+        role:
+          type: string
+          description: Role name.
+          example: "neon"
+
    #
    # Errors
    #
--- a/compute_tools/src/installed_extensions.rs
+++ b/compute_tools/src/installed_extensions.rs
@@ -33,6 +33,7 @@ fn list_dbs(client: &mut Client) -> Result<Vec<String>> {
 }

 /// Connect to every database (see list_dbs above) and get the list of installed extensions.
+///
 /// Same extension can be installed in multiple databases with different versions,
 /// we only keep the highest and lowest version across all databases.
 pub async fn get_installed_extensions(connstr: Url) -> Result<InstalledExtensions> {
--- a/libs/compute_api/src/lib.rs
+++ b/libs/compute_api/src/lib.rs
@@ -1,5 +1,6 @@
 #![deny(unsafe_code)]
 #![deny(clippy::undocumented_unsafe_blocks)]
+pub mod privilege;
 pub mod requests;
 pub mod responses;
 pub mod spec;
--- a/libs/compute_api/src/privilege.rs
+++ b/libs/compute_api/src/privilege.rs
@@ -0,0 +1,35 @@
+#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
+#[serde(rename_all = "UPPERCASE")]
+pub enum Privilege {
+    Select,
+    Insert,
+    Update,
+    Delete,
+    Truncate,
+    References,
+    Trigger,
+    Usage,
+    Create,
+    Connect,
+    Temporary,
+    Execute,
+}
+
+impl Privilege {
+    pub fn as_str(&self) -> &'static str {
+        match self {
+            Privilege::Select => "SELECT",
+            Privilege::Insert => "INSERT",
+            Privilege::Update => "UPDATE",
+            Privilege::Delete => "DELETE",
+            Privilege::Truncate => "TRUNCATE",
+            Privilege::References => "REFERENCES",
+            Privilege::Trigger => "TRIGGER",
+            Privilege::Usage => "USAGE",
+            Privilege::Create => "CREATE",
+            Privilege::Connect => "CONNECT",
+            Privilege::Temporary => "TEMPORARY",
+            Privilege::Execute => "EXECUTE",
+        }
+    }
+}
--- a/libs/compute_api/src/requests.rs
+++ b/libs/compute_api/src/requests.rs
@@ -1,6 +1,6 @@
 //! Structs representing the JSON formats used in the compute_ctl's HTTP API.

-use crate::spec::ComputeSpec;
+use crate::{privilege::Privilege, spec::ComputeSpec};
 use serde::Deserialize;

 /// Request of the /configure API
@@ -12,3 +12,18 @@ use serde::Deserialize;
 pub struct ConfigurationRequest {
    pub spec: ComputeSpec,
 }
+
+#[derive(Deserialize, Debug)]
+pub struct ExtensionInstallRequest {
+    pub extension: String,
+    pub database: String,
+    pub version: String,
+}
+
+#[derive(Deserialize, Debug)]
+pub struct SetRoleGrantsRequest {
+    pub database: String,
+    pub schema: String,
+    pub privileges: Vec<Privilege>,
+    pub role: String,
+}
--- a/libs/compute_api/src/responses.rs
+++ b/libs/compute_api/src/responses.rs
@@ -6,7 +6,10 @@ use std::fmt::Display;
 use chrono::{DateTime, Utc};
 use serde::{Deserialize, Serialize, Serializer};

-use crate::spec::{ComputeSpec, Database, Role};
+use crate::{
+    privilege::Privilege,
+    spec::{ComputeSpec, Database, Role},
+};

 #[derive(Serialize, Debug, Deserialize)]
 pub struct GenericAPIError {
@@ -168,3 +171,17 @@ pub struct InstalledExtension {
 pub struct InstalledExtensions {
    pub extensions: Vec<InstalledExtension>,
 }
+
+#[derive(Clone, Debug, Default, Serialize)]
+pub struct ExtensionInstallResult {
+    pub extension: String,
+    pub version: String,
+}
+
+#[derive(Clone, Debug, Default, Serialize)]
+pub struct SetRoleGrantsResponse {
+    pub database: String,
+    pub schema: String,
+    pub privileges: Vec<Privilege>,
+    pub role: String,
+}
--- a/libs/remote_storage/Cargo.toml
+++ b/libs/remote_storage/Cargo.toml
@@ -16,7 +16,7 @@ aws-sdk-s3.workspace = true
 bytes.workspace = true
 camino = { workspace = true, features = ["serde1"] }
 humantime-serde.workspace = true
-hyper0 = { workspace = true, features = ["stream"] }
+hyper = { workspace = true, features = ["client"] }
 futures.workspace = true
 serde.workspace = true
 serde_json.workspace = true
@@ -36,6 +36,7 @@ azure_storage.workspace = true
 azure_storage_blobs.workspace = true
 futures-util.workspace = true
 http-types.workspace = true
+http-body-util.workspace = true
 itertools.workspace = true
 sync_wrapper = { workspace = true, features = ["futures"] }

--- a/libs/remote_storage/src/s3_bucket.rs
+++ b/libs/remote_storage/src/s3_bucket.rs
@@ -28,13 +28,15 @@ use aws_sdk_s3::{
    Client,
 };
 use aws_smithy_async::rt::sleep::TokioSleep;
+use http_body_util::StreamBody;
 use http_types::StatusCode;

 use aws_smithy_types::{body::SdkBody, DateTime};
 use aws_smithy_types::{byte_stream::ByteStream, date_time::ConversionError};
 use bytes::Bytes;
 use futures::stream::Stream;
-use hyper0::Body;
+use futures_util::StreamExt;
+use hyper::body::Frame;
 use scopeguard::ScopeGuard;
 use tokio_util::sync::CancellationToken;
 use utils::backoff;
@@ -710,8 +712,8 @@ impl RemoteStorage for S3Bucket {

        let started_at = start_measuring_requests(kind);

-        let body = Body::wrap_stream(from);
-        let bytes_stream = ByteStream::new(SdkBody::from_body_0_4(body));
+        let body = StreamBody::new(from.map(|x| x.map(Frame::data)));
+        let bytes_stream = ByteStream::new(SdkBody::from_body_1_x(body));

        let upload = self
            .client
--- a/pageserver/src/basebackup.rs
+++ b/pageserver/src/basebackup.rs
@@ -16,7 +16,7 @@ use fail::fail_point;
 use pageserver_api::key::Key;
 use postgres_ffi::pg_constants;
 use std::fmt::Write as FmtWrite;
-use std::time::SystemTime;
+use std::time::{Instant, SystemTime};
 use tokio::io;
 use tokio::io::AsyncWrite;
 use tracing::*;
@@ -352,12 +352,25 @@ where
            }
        }

-        for (path, content) in self
+        let start_time = Instant::now();
+        let aux_files = self
            .timeline
            .list_aux_files(self.lsn, self.ctx)
            .await
-            .map_err(|e| BasebackupError::Server(e.into()))?
-        {
+            .map_err(|e| BasebackupError::Server(e.into()))?;
+        let aux_scan_time = start_time.elapsed();
+        let aux_estimated_size = aux_files
+            .values()
+            .map(|content| content.len())
+            .sum::<usize>();
+        info!(
+            "Scanned {} aux files in {}ms, aux file content size = {}",
+            aux_files.len(),
+            aux_scan_time.as_millis(),
+            aux_estimated_size
+        );
+
+        for (path, content) in aux_files {
            if path.starts_with("pg_replslot") {
                let offs = pg_constants::REPL_SLOT_ON_DISK_OFFSETOF_RESTART_LSN;
                let restart_lsn = Lsn(u64::from_le_bytes(
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -720,7 +720,12 @@ async fn timeline_archival_config_handler(
        tenant.wait_to_become_active(ACTIVE_TENANT_TIMEOUT).await?;

        tenant
-            .apply_timeline_archival_config(timeline_id, request_data.state, ctx)
+            .apply_timeline_archival_config(
+                timeline_id,
+                request_data.state,
+                state.broker_client.clone(),
+                ctx,
+            )
            .await?;
        Ok::<_, ApiError>(())
    }
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -26,8 +26,8 @@ use std::str::FromStr;
 use std::sync::Arc;
 use std::time::SystemTime;
 use std::time::{Duration, Instant};
-use tokio::io::AsyncWriteExt;
 use tokio::io::{AsyncRead, AsyncWrite};
+use tokio::io::{AsyncWriteExt, BufWriter};
 use tokio::task::JoinHandle;
 use tokio_util::sync::CancellationToken;
 use tracing::*;
@@ -1137,10 +1137,10 @@ impl PageServerHandler {
            .await
            .map_err(map_basebackup_error)?;
        } else {
-            let mut writer = pgb.copyout_writer();
+            let mut writer = BufWriter::new(pgb.copyout_writer());
            if gzip {
                let mut encoder = GzipEncoder::with_quality(
-                    writer,
+                    &mut writer,
                    // NOTE using fast compression because it's on the critical path
                    //      for compute startup. For an empty database, we get
                    //      <100KB with this method. The Level::Best compression method
@@ -1175,6 +1175,10 @@ impl PageServerHandler {
                .await
                .map_err(map_basebackup_error)?;
            }
+            writer
+                .flush()
+                .await
+                .map_err(|e| map_basebackup_error(BasebackupError::Client(e)))?;
        }

        pgb.write_message_noflush(&BeMessage::CopyDone)
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Conrad Ludgate	45232ca632	Merge remote-tracking branch 'origin/local-proxy-lazy-ext-install' into test-local-proxy-jwt-ext-install	2024-10-17 12:39:22 +01:00
Conrad Ludgate	e3385a6436	Merge branch 'main' into local-proxy-lazy-ext-install	2024-10-17 12:38:51 +01:00
Conrad Ludgate	7e7976b5cf	Merge remote-tracking branch 'origin/grants-endpoint' into test-local-proxy-jwt-ext-install	2024-10-17 11:52:56 +01:00
Conrad Ludgate	eda964f7c4	do not quote privileges	2024-10-17 11:50:12 +01:00
Ivan Efremov	22d8834474	proxy: move the connection pools to separate file (#9398 ) First PR for #9284 Start unification of the client and connection pool interfaces: - Exclude the 'global_connections_count' out from the get_conn_entry() - Move remote connection pools to the conn_pool_lib as a reference - Unify clients among all the conn pools	2024-10-17 13:38:24 +03:00
Conrad Ludgate	815d499b14	update version	2024-10-17 10:16:45 +01:00
Conrad Ludgate	8a7ed8ce72	Merge remote-tracking branch 'origin/grants-endpoint' into test-local-proxy-jwt-ext-install	2024-10-17 10:15:37 +01:00
Conrad Ludgate	3f3cb744f0	Merge remote-tracking branch 'origin/install-extensions-endpoint' into test-local-proxy-jwt-ext-install	2024-10-17 10:13:28 +01:00
Conrad Ludgate	d0166ee736	Merge remote-tracking branch 'origin/local-proxy-lazy-ext-install' into test-local-proxy-jwt-ext-install	2024-10-17 10:13:19 +01:00
Conrad Ludgate	284c84da5c	refactor	2024-10-17 10:11:58 +01:00
Conrad Ludgate	94204dbbae	[local_proxy]: install pg_session_jwt extension on demand	2024-10-17 10:10:24 +01:00
John Spray	db68e82235	storage_scrubber: fixes to garbage commands (#9409 ) ## Problem While running `find-garbage` and `purge-garbage`, I encountered two things that needed updating: - Console API may omit `user_id` since org accounts were added - When we cut over to using GenericRemoteStorage, the object listings we do during purge did not get proper retry handling, so could easily fail on usual S3 errors, and make the whole process drop out. ...and one bug: - We had a `.unwrap` which expects that after finding an object in a tenant path, a listing in that path will always return objects. This is not true, because a pageserver might be deleting the path at the same time as we scan it. ## Summary of changes - When listing objects during purge, use backoff::retry - Make `user_id` an `Option` - Handle the case where a tenant's objects go away during find-garbage.	2024-10-17 10:06:02 +01:00
Konstantin Knizhnik	934dbb61f5	Check access_count in lfc_evict (#9407 ) ## Problem See https://neondb.slack.com/archives/C033A2WE6BZ/p1729007738526309?thread_ts=1722942856.987979&cid=C033A2WE6BZ When replica receives WAL record which target page is not present in shared buffer, we evict this page from LFC. If all pages from the LFC chunk are evicted, then chunk is moved to the beginning of LRU least to force it reuse. Unfortunately access_count is not checked and if the entry is access at this moment then this operation can cause LRU list corruption. ## Summary of changes Check `access_count` in `lfc_evict` ## Checklist before requesting a review - [ ] I have performed a self-review of my code. - [ ] If it is a core feature, I have added thorough tests. - [ ] Do we need to implement analytics? if so did you add the relevant metrics to the dashboard? - [ ] If this PR requires public announcement, mark it with /release-notes label and add several sentences in this section. ## Checklist before merging - [ ] Do not forget to reformat commit message to not include the above checklist Co-authored-by: Konstantin Knizhnik <knizhnik@neon.tech>	2024-10-17 08:04:57 +03:00
Christian Schwarz	67d5d98b19	readme: fix build instructions for debian 12 (#9371 ) We need libprotobuf-dev for some of the `/usr/include/google/protobuf/...*.proto` referenced by our protobuf decls.	2024-10-16 21:47:53 +02:00
Tristan Partin	e0fa6bcf1a	Fix some sql_exporter metrics for PG 17 Checkpointer related statistics moved from pg_stat_bgwriter to pg_stat_checkpointer, so we need to adjust our queries accordingly. Signed-off-by: Tristan Partin <tristan@neon.tech>	2024-10-16 14:46:33 -05:00
Tristan Partin	409a286eaa	Fix typo in sql_exporter generator Bad copy-paste seemingly. This manifested itself as a failure to start for the sql_exporter, and was just dying on loop in staging. A future PR will have E2E testing of sql_exporter. Signed-off-by: Tristan Partin <tristan@neon.tech>	2024-10-16 13:08:40 -05:00
Arpad Müller	0551cfb6a7	Fix beta clippy warnings (#9419 ) ``` warning: first doc comment paragraph is too long --> compute_tools/src/installed_extensions.rs:35:1 \| 35 \| / /// Connect to every database (see list_dbs above) and get the list of installed extensions. 36 \| \| /// Same extension can be installed in multiple databases with different versions, 37 \| \| /// we only keep the highest and lowest version across all databases. \| \|_ \| = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#too_long_first_doc_paragraph = note: `#[warn(clippy::too_long_first_doc_paragraph)]` on by default help: add an empty line \| 35 ~ /// Connect to every database (see list_dbs above) and get the list of installed extensions. 36 + /// \| ```	2024-10-16 19:04:56 +01:00
Folke Behrens	ed694732e7	proxy: merge AuthError and AuthErrorImpl (#9418 ) Since GetAuthInfoError now boxes the ControlPlaneError message the variant is not big anymore and AuthError is 32 bytes.	2024-10-16 19:10:49 +02:00
Alex Chi Z.	8a114e3aed	refactor(pageserver): upgrade remote_storage to use hyper1 (#9405 ) part of https://github.com/neondatabase/neon/issues/9255 ## Summary of changes Upgrade remote_storage crate to use hyper1. Hyper0 is used when providing the streaming HTTP body to the s3 SDK, and it is refactored to use hyper1. Signed-off-by: Alex Chi Z <chi@neon.tech>	2024-10-16 16:19:45 +01:00
Arpad Müller	55b246085e	Activate timelines during unoffload (#9399 ) The current code has forgotten to activate timelines during unoffload, leading to inability to receive the basebackup, due to the timeline still being in loading state. ``` stderr: command failed: compute startup failed: failed to get basebackup@0/0 from pageserver postgresql://no_user@localhost:15014 Caused by: 0: db error: ERROR: Not found: Timeline 508546c79b2b16a84ab609fdf966e0d3/bfc18c24c4b837ecae5dbb5216c80fce is not active, state: Loading 1: ERROR: Not found: Timeline 508546c79b2b16a84ab609fdf966e0d3/bfc18c24c4b837ecae5dbb5216c80fce is not active, state: Loading ``` Therefore, also activate the timeline during unoffloading. Part of #8088	2024-10-16 16:47:17 +02:00
Anastasia Lubennikova	9668601f46	Add support of extensions for v17 (part 2) (#9389 ) - plv8 3.2.3 - HypoPG 1.4.1 - pgtap 1.3.3 - timescaledb 2.17.0 - pg_hint_plan 17_1_7_0 - rdkit Release_2024_09_1 - pg_uuidv7 1.6.0 - wal2json 2.6 - pg_ivm 1.9 - pg_partman 5.1.0 update support of extensions for v14-v16: - HypoPG 1.4.0 -> 1.4.1 - pgtap 1.2.0 -> 1.3.3 - plpgsql_check 2.5.3 -> 2.7.11 - pg_uuidv7 1.0.1 -> 1.6.0 - wal2json 2.5 -> 2.6 - pg_ivm 1.7 -> 1.9 - pg_partman 5.0.1 -> 5.1.0	2024-10-16 15:29:23 +01:00
Arpad Müller	3140c14d60	Remove allow(clippy::unknown_lints) (#9416 ) the lint stabilized in 1.80.	2024-10-16 16:28:55 +02:00
John Spray	d6281cbe65	tests: stabilize test_timelines_parallel_endpoints (#9413 ) ## Problem This test would get failures like `command failed: Found no timeline id for branch name 'branch_8'` It's because neon_local is being invoked concurrently for branch creation, which is unsafe (they'll step on each others' JSON writes) Example failure: https://neon-github-public-dev.s3.amazonaws.com/reports/pr-9410/11363051979/index.html#testresult/5ddc56c640f5422b/retries ## Summary of changes - Don't do branch creation concurrently with endpoint creation via neon_local	2024-10-16 15:27:46 +01:00
Vlad Lazar	d490ad23e0	storcon: use the same trace fields for reconciler and results (#9410 ) ## Problem The reconciler use `seq`, but processing of results uses `sequence`. Order is different too. It makes it annoying to read logs. ## Summary of Changes Use the same tracing fields in both	2024-10-16 14:04:17 +01:00
Folke Behrens	f14e45f0ce	proxy: format imports with nightly rustfmt (#9414 ) ```shell cargo +nightly fmt -p proxy -- -l --config imports_granularity=Module,group_imports=StdExternalCrate,reorder_imports=true ``` These rust-analyzer settings for VSCode should help retain this style: ```json "rust-analyzer.imports.group.enable": true, "rust-analyzer.imports.prefix": "crate", "rust-analyzer.imports.merge.glob": false, "rust-analyzer.imports.granularity.group": "module", "rust-analyzer.imports.granularity.enforce": true, ```	2024-10-16 15:01:56 +02:00
John Spray	89a65a9e5a	pageserver: improve handling of archival_config calls during Timeline shutdown (#9415 ) ## Problem In test `test_timeline_offloading`, we see failures like: ``` PageserverApiException: queue is in state Stopped ``` Example failure: https://neon-github-public-dev.s3.amazonaws.com/reports/main/11356917668/index.html#testresult/ff0e348a78a974ee/retries ## Summary of changes - Amend code paths that handle errors from RemoteTimelineClient to check for cancellation and emit the Cancelled error variant in these cases (will give clients a 503 to retry) - Remove the implicit `#[from]` for the Other error case, to make it harder to add code that accidentally squashes errors into this (500-equivalent) error variant. This would be neater if we made RemoteTimelineClient return a structured error instead of anyhow::Error, but that's a bigger refactor. I'm not sure if the test really intends to hit this path, but the error handling fix makes sense either way.	2024-10-16 13:39:58 +01:00
Conrad Ludgate	6de90c7ce4	quote identifiers	2024-10-16 12:09:25 +01:00
Conrad Ludgate	59d197f6f6	use pg_quote	2024-10-16 12:05:27 +01:00
Conrad Ludgate	028f5291f8	call out to quote_ident	2024-10-16 11:55:37 +01:00
Jere Vaara	f105dc9397	Make install extensions fn async	2024-10-16 13:32:11 +03:00
Jere Vaara	603ca192a7	Change path as per Alexey's feedback	2024-10-16 13:32:11 +03:00
Jere Vaara	36bffe5ff3	Install required version or update	2024-10-16 13:32:11 +03:00
Jere Vaara	133ea2f31f	Require extension version parameter	2024-10-16 13:32:11 +03:00
Jere Vaara	43400dfdea	change path	2024-10-16 13:32:11 +03:00
Jere Vaara	2b5d79f625	Respond with extension name and version	2024-10-16 13:32:11 +03:00
Jere Vaara	a766ac3dc7	Add error response status code	2024-10-16 13:32:11 +03:00
Jere Vaara	04f5807b99	Add endpoint that allows extensions to be installed	2024-10-16 13:32:11 +03:00
Jere Vaara	b57f2c60f2	Make set role grants fn async	2024-10-16 13:25:19 +03:00
Cihan Demirci	bc6b8cee01	don't trigger workflows in two repos (#9340 ) https://github.com/neondatabase/cloud/issues/16723	2024-10-16 10:43:48 +01:00
Tristan Partin	061ea0de7a	Add jsonnetfmt targets This should make it a little bit easier for people wanting to check if their files are formated correctly. Has the added bonus of making the CI check simpler as well. Signed-off-by: Tristan Partin <tristan@neon.tech>	2024-10-15 20:01:13 -05:00
Tristan Partin	be5d6a69dc	Fix jsonnet_files wildcard Just a typo in a path. Signed-off-by: Tristan Partin <tristan@neon.tech>	2024-10-15 16:30:31 -05:00
Matthias van de Meent	18f4e5f10c	Add newly added metrics from neondatabase/neon#9116 to exports (#9402 ) They weren't added in that PR, but should be available immediately on rollout as the neon extension already defaults to 1.5.	2024-10-15 23:13:31 +02:00
Alex Chi Z.	f1eb703256	fix(pageserver): use a buffer for basebackup; add aux basebackup metrics log (#9401 ) Our replication bench project is stuck because it is too slow to generate basebackup and it caused compute to disconnect. https://neondb.slack.com/archives/C03438W3FLZ/p1728330685012419 The compute timeout for waiting for basebackup is 10m (is it true?). Generating basebackup directly on pageserver takes ~3min. Therefore, I suspect it's because there are too many wasted round-trip time for writing the 10000+ snapshot aux files. Also, it is possible that the basebackup process takes too long time retrieving all aux files that it did not write anything over the wire protocol, causing a read timeout. Basebackup size is 800KB gzipped for that project and was 55MB tar before compression. ## Summary of changes * Potentially fix the issue by placing a write buffer for basebackup. * Log how many aux files did we read + the time spent on it. Signed-off-by: Alex Chi Z <chi@neon.tech>	2024-10-15 16:35:21 -04:00
Tristan Partin	cf7a596a15	Generate sql_exporter config files with Jsonnet There are quite a few benefits to this approach: - Reduce config duplication - The two sql_exporter configs were super similar with just a few differences - Pull SQL queries into standalone files - That means we could run a SQL formatter on the file in the future - It also means access to syntax highlighting - In the future, run different queries for different PG versions - This is relevant because right now, we have queries that are failing on PG 17 due to catalog updates Signed-off-by: Tristan Partin <tristan@neon.tech>	2024-10-15 11:18:38 -05:00
Jere Vaara	d0ca79aeb3	Use serde to serialize/deserialize Privilege instead of manual	2024-10-15 17:18:59 +03:00
Konstantin Knizhnik	614c3aef72	Remove redundant code (#9373 ) ## Problem There is double update of resize cache in `put_rel_truncation` Also `page_server_request` contains check that fork is MAIN_FORKNUM which 1. is incorrect (because Vm/FSM pages are shreded in the same way as MAIN fork pages and 2. is redundant because `page_server_request` is never called for `get page` request so first part to OR condition is always true. ## Summary of changes Remove redundant code ## Checklist before requesting a review - [ ] I have performed a self-review of my code. - [ ] If it is a core feature, I have added thorough tests. - [ ] Do we need to implement analytics? if so did you add the relevant metrics to the dashboard? - [ ] If this PR requires public announcement, mark it with /release-notes label and add several sentences in this section. ## Checklist before merging - [ ] Do not forget to reformat commit message to not include the above checklist --------- Co-authored-by: Konstantin Knizhnik <knizhnik@neon.tech>	2024-10-15 17:18:52 +03:00
Jere Vaara	39e0e31605	Move privilege to compute api	2024-10-15 16:21:52 +03:00
Jere Vaara	e495b99abe	Add to_string for privilege	2024-10-15 16:18:06 +03:00
Jere Vaara	3f92477af0	Use string formatted query instead	2024-10-15 16:18:06 +03:00
Jere Vaara	c6c29f86da	Handle privileges more strictly	2024-10-15 16:18:06 +03:00
Jere Vaara	4e15a68ffd	Add endpoint to set role grants	2024-10-15 16:18:06 +03:00
				`@@ -0,0 +1 @@`
				`SELECT num_requested AS checkpoints_req FROM pg_stat_checkpointer;`
				`@@ -0,0 +1 @@`
				`SELECT checkpoints_req FROM pg_stat_bgwriter;`
				`@@ -0,0 +1 @@`
				`SELECT num_timed AS checkpoints_timed FROM pg_stat_checkpointer;`
				`@@ -0,0 +1 @@`
				`SELECT checkpoints_timed FROM pg_stat_bgwriter;`
				`@@ -0,0 +1 @@`
				`SELECT subenabled::text AS enabled, count(*) AS subscriptions_count FROM pg_subscription GROUP BY subenabled;`
				`@@ -0,0 +1 @@`
				`SELECT datname, state, count(*) AS count FROM pg_stat_activity WHERE state <> '' GROUP BY datname, state;`
				`@@ -0,0 +1 @@`
				`SELECT sum(pg_database_size(datname)) AS total FROM pg_database;`
				`@@ -0,0 +1 @@`
				`SELECT bucket_le, value FROM neon.neon_perf_counters WHERE metric = 'file_cache_read_wait_seconds_bucket';`
				`@@ -0,0 +1 @@`
				`SELECT neon.approximate_working_set_size(false) AS approximate_working_set_size;`