Compare commits

..

6 Commits

Author SHA1 Message Date
Conrad Ludgate
fe8b93ab9d fix Payload deser 2024-10-14 14:02:46 +01:00
Conrad Ludgate
7e3e7f1cca turns out we don't actually need to deser everything 2024-10-14 11:58:53 +01:00
Conrad Ludgate
0b0ed662d9 proxy: use RawValue to lazily process inputs 2024-10-14 11:48:58 +01:00
Conrad Ludgate
50bd65769f a seemingly random change... 2024-10-14 11:44:18 +01:00
Conrad Ludgate
90534b1745 remove iterator join 2024-10-14 11:42:20 +01:00
Conrad Ludgate
99d52df475 proxy: slight refactor to json parsing 2024-10-14 11:38:18 +01:00
141 changed files with 1332 additions and 2523 deletions

View File

@@ -183,7 +183,7 @@ runs:
uses: actions/cache@v4
with:
path: ~/.cache/pypoetry/virtualenvs
key: v2-${{ runner.os }}-${{ runner.arch }}-python-deps-bookworm-${{ hashFiles('poetry.lock') }}
key: v2-${{ runner.os }}-${{ runner.arch }}-python-deps-${{ hashFiles('poetry.lock') }}
- name: Store Allure test stat in the DB (new)
if: ${{ !cancelled() && inputs.store-test-results-into-db == 'true' }}

View File

@@ -88,7 +88,7 @@ runs:
uses: actions/cache@v4
with:
path: ~/.cache/pypoetry/virtualenvs
key: v2-${{ runner.os }}-${{ runner.arch }}-python-deps-bookworm-${{ hashFiles('poetry.lock') }}
key: v2-${{ runner.os }}-${{ runner.arch }}-python-deps-${{ hashFiles('poetry.lock') }}
- name: Install Python deps
shell: bash -euxo pipefail {0}

View File

@@ -124,28 +124,28 @@ jobs:
uses: actions/cache@v4
with:
path: pg_install/v14
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'Dockerfile.build-tools') }}
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-${{ hashFiles('Makefile', 'Dockerfile.build-tools') }}
- name: Cache postgres v15 build
id: cache_pg_15
uses: actions/cache@v4
with:
path: pg_install/v15
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'Dockerfile.build-tools') }}
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-${{ hashFiles('Makefile', 'Dockerfile.build-tools') }}
- name: Cache postgres v16 build
id: cache_pg_16
uses: actions/cache@v4
with:
path: pg_install/v16
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'Dockerfile.build-tools') }}
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-${{ hashFiles('Makefile', 'Dockerfile.build-tools') }}
- name: Cache postgres v17 build
id: cache_pg_17
uses: actions/cache@v4
with:
path: pg_install/v17
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v17_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'Dockerfile.build-tools') }}
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v17_rev.outputs.pg_rev }}-${{ hashFiles('Makefile', 'Dockerfile.build-tools') }}
- name: Build postgres v14
if: steps.cache_pg_14.outputs.cache-hit != 'true'

View File

@@ -19,16 +19,9 @@ defaults:
run:
shell: bash -euo pipefail {0}
# The initial idea was to prevent the waste of resources by not re-building the `build-tools` image
# for the same tag in parallel workflow runs, and queue them to be skipped once we have
# the first image pushed to Docker registry, but GitHub's concurrency mechanism is not working as expected.
# GitHub can't have more than 1 job in a queue and removes the previous one, it causes failures if the dependent jobs.
#
# Ref https://github.com/orgs/community/discussions/41518
#
# concurrency:
# group: build-build-tools-image-${{ inputs.image-tag }}
# cancel-in-progress: false
concurrency:
group: build-build-tools-image-${{ inputs.image-tag }}
cancel-in-progress: false
# No permission for GITHUB_TOKEN by default; the **minimal required** set of permissions should be granted in each job.
permissions: {}
@@ -43,7 +36,6 @@ jobs:
strategy:
matrix:
debian-version: [ bullseye, bookworm ]
arch: [ x64, arm64 ]
runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }}
@@ -82,22 +74,22 @@ jobs:
- uses: docker/build-push-action@v6
with:
file: Dockerfile.build-tools
context: .
provenance: false
push: true
pull: true
build-args: |
DEBIAN_VERSION=${{ matrix.debian-version }}
cache-from: type=registry,ref=cache.neon.build/build-tools:cache-${{ matrix.debian-version }}-${{ matrix.arch }}
cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/build-tools:cache-{0}-{1},mode=max', matrix.debian-version, matrix.arch) || '' }}
tags: |
neondatabase/build-tools:${{ inputs.image-tag }}-${{ matrix.debian-version }}-${{ matrix.arch }}
file: Dockerfile.build-tools
cache-from: type=registry,ref=cache.neon.build/build-tools:cache-${{ matrix.arch }}
cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/build-tools:cache-{0},mode=max', matrix.arch) || '' }}
tags: neondatabase/build-tools:${{ inputs.image-tag }}-${{ matrix.arch }}
merge-images:
needs: [ build-image ]
runs-on: ubuntu-22.04
env:
IMAGE_TAG: ${{ inputs.image-tag }}
steps:
- uses: docker/login-action@v3
with:
@@ -105,17 +97,7 @@ jobs:
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
- name: Create multi-arch image
env:
DEFAULT_DEBIAN_VERSION: bullseye
IMAGE_TAG: ${{ inputs.image-tag }}
run: |
for debian_version in bullseye bookworm; do
tags=("-t" "neondatabase/build-tools:${IMAGE_TAG}-${debian_version}")
if [ "${debian_version}" == "${DEFAULT_DEBIAN_VERSION}" ]; then
tags+=("-t" "neondatabase/build-tools:${IMAGE_TAG}")
fi
docker buildx imagetools create "${tags[@]}" \
neondatabase/build-tools:${IMAGE_TAG}-${debian_version}-x64 \
neondatabase/build-tools:${IMAGE_TAG}-${debian_version}-arm64
done
docker buildx imagetools create -t neondatabase/build-tools:${IMAGE_TAG} \
neondatabase/build-tools:${IMAGE_TAG}-x64 \
neondatabase/build-tools:${IMAGE_TAG}-arm64

View File

@@ -92,7 +92,7 @@ jobs:
needs: [ check-permissions, build-build-tools-image ]
runs-on: [ self-hosted, small ]
container:
image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
image: ${{ needs.build-build-tools-image.outputs.image }}
credentials:
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
@@ -106,7 +106,7 @@ jobs:
uses: actions/cache@v4
with:
path: ~/.cache/pypoetry/virtualenvs
key: v2-${{ runner.os }}-${{ runner.arch }}-python-deps-bookworm-${{ hashFiles('poetry.lock') }}
key: v2-${{ runner.os }}-${{ runner.arch }}-python-deps-${{ hashFiles('poetry.lock') }}
- name: Install Python deps
run: ./scripts/pysync
@@ -120,24 +120,6 @@ jobs:
- name: Run mypy to check types
run: poetry run mypy .
check-codestyle-jsonnet:
needs: [ check-permissions, build-build-tools-image ]
runs-on: [ self-hosted, small ]
container:
image: ${{ needs.build-build-tools-image.outputs.image }}
credentials:
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
options: --init
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Check Jsonnet code formatting
run: |
make -C compute jsonnetfmt-test
# Check that the vendor/postgres-* submodules point to the
# corresponding REL_*_STABLE_neon branches.
check-submodules:
@@ -199,7 +181,7 @@ jobs:
runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'small-arm64' || 'small')) }}
container:
image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
image: ${{ needs.build-build-tools-image.outputs.image }}
credentials:
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
@@ -279,7 +261,7 @@ jobs:
uses: ./.github/workflows/_build-and-test-locally.yml
with:
arch: ${{ matrix.arch }}
build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}
build-tag: ${{ needs.tag.outputs.build-tag }}
build-type: ${{ matrix.build-type }}
# Run tests on all Postgres versions in release builds and only on the latest version in debug builds
@@ -294,7 +276,7 @@ jobs:
needs: [ check-permissions, build-build-tools-image ]
runs-on: [ self-hosted, small ]
container:
image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
image: ${{ needs.build-build-tools-image.outputs.image }}
credentials:
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
@@ -307,7 +289,7 @@ jobs:
uses: actions/cache@v4
with:
path: ~/.cache/pypoetry/virtualenvs
key: v2-${{ runner.os }}-${{ runner.arch }}-python-deps-bookworm-${{ hashFiles('poetry.lock') }}
key: v1-${{ runner.os }}-${{ runner.arch }}-python-deps-${{ hashFiles('poetry.lock') }}
- name: Install Python deps
run: ./scripts/pysync
@@ -327,7 +309,7 @@ jobs:
needs: [ check-permissions, build-and-test-locally, build-build-tools-image, get-benchmarks-durations ]
runs-on: [ self-hosted, small ]
container:
image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
image: ${{ needs.build-build-tools-image.outputs.image }}
credentials:
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
@@ -385,7 +367,7 @@ jobs:
runs-on: [ self-hosted, small ]
container:
image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
image: ${{ needs.build-build-tools-image.outputs.image }}
credentials:
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
@@ -433,7 +415,7 @@ jobs:
needs: [ check-permissions, build-build-tools-image, build-and-test-locally ]
runs-on: [ self-hosted, small ]
container:
image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
image: ${{ needs.build-build-tools-image.outputs.image }}
credentials:
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
@@ -577,16 +559,15 @@ jobs:
ADDITIONAL_RUSTFLAGS=${{ matrix.arch == 'arm64' && '-Ctarget-feature=+lse -Ctarget-cpu=neoverse-n1' || '' }}
GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }}
BUILD_TAG=${{ needs.tag.outputs.build-tag }}
TAG=${{ needs.build-build-tools-image.outputs.image-tag }}-bookworm
DEBIAN_VERSION=bookworm
TAG=${{ needs.build-build-tools-image.outputs.image-tag }}
provenance: false
push: true
pull: true
file: Dockerfile
cache-from: type=registry,ref=cache.neon.build/neon:cache-bookworm-${{ matrix.arch }}
cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/neon:cache-{0}-{1},mode=max', 'bookworm', matrix.arch) || '' }}
cache-from: type=registry,ref=cache.neon.build/neon:cache-${{ matrix.arch }}
cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/neon:cache-{0},mode=max', matrix.arch) || '' }}
tags: |
neondatabase/neon:${{ needs.tag.outputs.build-tag }}-bookworm-${{ matrix.arch }}
neondatabase/neon:${{ needs.tag.outputs.build-tag }}-${{ matrix.arch }}
neon-image:
needs: [ neon-image-arch, tag ]
@@ -601,9 +582,8 @@ jobs:
- name: Create multi-arch image
run: |
docker buildx imagetools create -t neondatabase/neon:${{ needs.tag.outputs.build-tag }} \
-t neondatabase/neon:${{ needs.tag.outputs.build-tag }}-bookworm \
neondatabase/neon:${{ needs.tag.outputs.build-tag }}-bookworm-x64 \
neondatabase/neon:${{ needs.tag.outputs.build-tag }}-bookworm-arm64
neondatabase/neon:${{ needs.tag.outputs.build-tag }}-x64 \
neondatabase/neon:${{ needs.tag.outputs.build-tag }}-arm64
- uses: docker/login-action@v3
with:
@@ -624,16 +604,17 @@ jobs:
version:
# Much data was already generated on old PG versions with bullseye's
# libraries, the locales of which can cause data incompatibilities.
# However, new PG versions should be build on newer images,
# as that reduces the support burden of old and ancient distros.
# However, new PG versions should check if they can be built on newer
# images, as that reduces the support burden of old and ancient
# distros.
- pg: v14
debian: bullseye
debian: bullseye-slim
- pg: v15
debian: bullseye
debian: bullseye-slim
- pg: v16
debian: bullseye
debian: bullseye-slim
- pg: v17
debian: bookworm
debian: bookworm-slim
arch: [ x64, arm64 ]
runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }}
@@ -678,16 +659,16 @@ jobs:
GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }}
PG_VERSION=${{ matrix.version.pg }}
BUILD_TAG=${{ needs.tag.outputs.build-tag }}
TAG=${{ needs.build-build-tools-image.outputs.image-tag }}-${{ matrix.version.debian }}
DEBIAN_VERSION=${{ matrix.version.debian }}
TAG=${{ needs.build-build-tools-image.outputs.image-tag }}
DEBIAN_FLAVOR=${{ matrix.version.debian }}
provenance: false
push: true
pull: true
file: compute/Dockerfile.compute-node
cache-from: type=registry,ref=cache.neon.build/compute-node-${{ matrix.version.pg }}:cache-${{ matrix.version.debian }}-${{ matrix.arch }}
cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/compute-node-{0}:cache-{1}-{2},mode=max', matrix.version.pg, matrix.version.debian, matrix.arch) || '' }}
cache-from: type=registry,ref=cache.neon.build/compute-node-${{ matrix.version.pg }}:cache-${{ matrix.arch }}
cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/compute-node-{0}:cache-{1},mode=max', matrix.version.pg, matrix.arch) || '' }}
tags: |
neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-${{ matrix.arch }}
neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.arch }}
- name: Build neon extensions test image
if: matrix.version.pg == 'v16'
@@ -698,17 +679,17 @@ jobs:
GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }}
PG_VERSION=${{ matrix.version.pg }}
BUILD_TAG=${{ needs.tag.outputs.build-tag }}
TAG=${{ needs.build-build-tools-image.outputs.image-tag }}-${{ matrix.version.debian }}
DEBIAN_VERSION=${{ matrix.version.debian }}
TAG=${{ needs.build-build-tools-image.outputs.image-tag }}
DEBIAN_FLAVOR=${{ matrix.version.debian }}
provenance: false
push: true
pull: true
file: compute/Dockerfile.compute-node
target: neon-pg-ext-test
cache-from: type=registry,ref=cache.neon.build/neon-test-extensions-${{ matrix.version.pg }}:cache-${{ matrix.version.debian }}-${{ matrix.arch }}
cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/neon-test-extensions-{0}:cache-{1}-{2},mode=max', matrix.version.pg, matrix.version.debian, matrix.arch) || '' }}
cache-from: type=registry,ref=cache.neon.build/neon-test-extensions-${{ matrix.version.pg }}:cache-${{ matrix.arch }}
cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/neon-test-extensions-{0}:cache-{1},mode=max', matrix.version.pg, matrix.arch) || '' }}
tags: |
neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{needs.tag.outputs.build-tag}}-${{ matrix.version.debian }}-${{ matrix.arch }}
neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{needs.tag.outputs.build-tag}}-${{ matrix.arch }}
- name: Build compute-tools image
# compute-tools are Postgres independent, so build it only once
@@ -723,16 +704,14 @@ jobs:
build-args: |
GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }}
BUILD_TAG=${{ needs.tag.outputs.build-tag }}
TAG=${{ needs.build-build-tools-image.outputs.image-tag }}-${{ matrix.version.debian }}
DEBIAN_VERSION=${{ matrix.version.debian }}
TAG=${{ needs.build-build-tools-image.outputs.image-tag }}
DEBIAN_FLAVOR=${{ matrix.version.debian }}
provenance: false
push: true
pull: true
file: compute/Dockerfile.compute-node
cache-from: type=registry,ref=cache.neon.build/neon-test-extensions-${{ matrix.version.pg }}:cache-${{ matrix.version.debian }}-${{ matrix.arch }}
cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/compute-tools-{0}:cache-{1}-{2},mode=max', matrix.version.pg, matrix.version.debian, matrix.arch) || '' }}
tags: |
neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-${{ matrix.arch }}
neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }}-${{ matrix.arch }}
compute-node-image:
needs: [ compute-node-image-arch, tag ]
@@ -740,16 +719,7 @@ jobs:
strategy:
matrix:
version:
# see the comment for `compute-node-image-arch` job
- pg: v14
debian: bullseye
- pg: v15
debian: bullseye
- pg: v16
debian: bullseye
- pg: v17
debian: bookworm
version: [ v14, v15, v16, v17 ]
steps:
- uses: docker/login-action@v3
@@ -759,26 +729,23 @@ jobs:
- name: Create multi-arch compute-node image
run: |
docker buildx imagetools create -t neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }} \
-t neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }} \
neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-x64 \
neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-arm64
docker buildx imagetools create -t neondatabase/compute-node-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }} \
neondatabase/compute-node-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }}-x64 \
neondatabase/compute-node-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }}-arm64
- name: Create multi-arch neon-test-extensions image
if: matrix.version.pg == 'v16'
if: matrix.version == 'v16'
run: |
docker buildx imagetools create -t neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }} \
-t neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }} \
neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-x64 \
neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-arm64
docker buildx imagetools create -t neondatabase/neon-test-extensions-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }} \
neondatabase/neon-test-extensions-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }}-x64 \
neondatabase/neon-test-extensions-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }}-arm64
- name: Create multi-arch compute-tools image
if: matrix.version.pg == 'v16'
if: matrix.version == 'v17'
run: |
docker buildx imagetools create -t neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }} \
-t neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }} \
neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-x64 \
neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-arm64
neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }}-x64 \
neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }}-arm64
- uses: docker/login-action@v3
with:
@@ -786,13 +753,13 @@ jobs:
username: ${{ secrets.AWS_ACCESS_KEY_DEV }}
password: ${{ secrets.AWS_SECRET_KEY_DEV }}
- name: Push multi-arch compute-node-${{ matrix.version.pg }} image to ECR
- name: Push multi-arch compute-node-${{ matrix.version }} image to ECR
run: |
docker buildx imagetools create -t 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }} \
neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}
docker buildx imagetools create -t 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }} \
neondatabase/compute-node-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }}
- name: Push multi-arch compute-tools image to ECR
if: matrix.version.pg == 'v16'
if: matrix.version == 'v17'
run: |
docker buildx imagetools create -t 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{ needs.tag.outputs.build-tag }} \
neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }}
@@ -803,16 +770,7 @@ jobs:
strategy:
fail-fast: false
matrix:
version:
# see the comment for `compute-node-image-arch` job
- pg: v14
debian: bullseye
- pg: v15
debian: bullseye
- pg: v16
debian: bullseye
- pg: v17
debian: bookworm
version: [ v14, v15, v16, v17 ]
env:
VM_BUILDER_VERSION: v0.35.0
@@ -834,18 +792,18 @@ jobs:
# it won't have the proper authentication (written at v0.6.0)
- name: Pulling compute-node image
run: |
docker pull neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}
docker pull neondatabase/compute-node-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }}
- name: Build vm image
run: |
./vm-builder \
-spec=compute/vm-image-spec-${{ matrix.version.debian }}.yaml \
-src=neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }} \
-dst=neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}
-spec=compute/vm-image-spec.yaml \
-src=neondatabase/compute-node-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }} \
-dst=neondatabase/vm-compute-node-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }}
- name: Pushing vm-compute-node image
run: |
docker push neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}
docker push neondatabase/vm-compute-node-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }}
test-images:
needs: [ check-permissions, tag, neon-image, compute-node-image ]

View File

@@ -155,7 +155,7 @@ jobs:
github.ref_name == 'main'
runs-on: [ self-hosted, large ]
container:
image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
image: ${{ needs.build-build-tools-image.outputs.image }}
credentials:
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}

View File

@@ -55,7 +55,7 @@ jobs:
runs-on: ubuntu-22.04
container:
image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
image: ${{ needs.build-build-tools-image.outputs.image }}
credentials:
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
@@ -150,7 +150,7 @@ jobs:
runs-on: ubuntu-22.04
container:
image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
image: ${{ needs.build-build-tools-image.outputs.image }}
credentials:
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}

View File

@@ -71,6 +71,7 @@ jobs:
steps:
- uses: docker/login-action@v3
with:
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
@@ -93,22 +94,8 @@ jobs:
az acr login --name=neoneastus2
- name: Tag build-tools with `${{ env.TO_TAG }}` in Docker Hub, ECR, and ACR
env:
DEFAULT_DEBIAN_VERSION: bullseye
run: |
for debian_version in bullseye bookworm; do
tags=()
tags+=("-t" "neondatabase/build-tools:${TO_TAG}-${debian_version}")
tags+=("-t" "369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:${TO_TAG}-${debian_version}")
tags+=("-t" "neoneastus2.azurecr.io/neondatabase/build-tools:${TO_TAG}-${debian_version}")
if [ "${debian_version}" == "${DEFAULT_DEBIAN_VERSION}" ]; then
tags+=("-t" "neondatabase/build-tools:${TO_TAG}")
tags+=("-t" "369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:${TO_TAG}")
tags+=("-t" "neoneastus2.azurecr.io/neondatabase/build-tools:${TO_TAG}")
fi
docker buildx imagetools create "${tags[@]}" \
neondatabase/build-tools:${FROM_TAG}-${debian_version}
done
docker buildx imagetools create -t 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:${TO_TAG} \
-t neoneastus2.azurecr.io/neondatabase/build-tools:${TO_TAG} \
-t neondatabase/build-tools:${TO_TAG} \
neondatabase/build-tools:${FROM_TAG}

7
Cargo.lock generated
View File

@@ -2695,7 +2695,6 @@ checksum = "ad227c3af19d4914570ad36d30409928b75967c298feb9ea1969db3a610bb14e"
dependencies = [
"equivalent",
"hashbrown 0.14.5",
"serde",
]
[[package]]
@@ -2795,9 +2794,9 @@ dependencies = [
[[package]]
name = "itoa"
version = "1.0.11"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b"
checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6"
[[package]]
name = "jobserver"
@@ -4297,7 +4296,6 @@ dependencies = [
"indexmap 2.0.1",
"ipnet",
"itertools 0.10.5",
"itoa",
"jose-jwa",
"jose-jwk",
"lasso",
@@ -7309,7 +7307,6 @@ dependencies = [
"hyper 1.4.1",
"hyper-util",
"indexmap 1.9.3",
"indexmap 2.0.1",
"itertools 0.12.1",
"lazy_static",
"libc",

View File

@@ -107,7 +107,6 @@ indexmap = "2"
indoc = "2"
ipnet = "2.9.0"
itertools = "0.10"
itoa = "1.0.11"
jsonwebtoken = "9"
lasso = "0.7"
libc = "0.2"

View File

@@ -7,8 +7,6 @@ ARG IMAGE=build-tools
ARG TAG=pinned
ARG DEFAULT_PG_VERSION=17
ARG STABLE_PG_VERSION=16
ARG DEBIAN_VERSION=bullseye
ARG DEBIAN_FLAVOR=${DEBIAN_VERSION}-slim
# Build Postgres
FROM $REPOSITORY/$IMAGE:$TAG AS pg-build
@@ -59,7 +57,7 @@ RUN set -e \
# Build final image
#
FROM debian:${DEBIAN_FLAVOR}
FROM debian:bullseye-slim
ARG DEFAULT_PG_VERSION
WORKDIR /data

View File

@@ -1,7 +1,12 @@
ARG DEBIAN_VERSION=bullseye
FROM debian:bullseye-slim
FROM debian:${DEBIAN_VERSION}-slim
ARG DEBIAN_VERSION
# Use ARG as a build-time environment variable here to allow.
# It's not supposed to be set outside.
# Alternatively it can be obtained using the following command
# ```
# . /etc/os-release && echo "${VERSION_CODENAME}"
# ```
ARG DEBIAN_VERSION_CODENAME=bullseye
# Add nonroot user
RUN useradd -ms /bin/bash nonroot -b /home
@@ -27,7 +32,6 @@ RUN set -e \
gnupg \
gzip \
jq \
jsonnet \
libcurl4-openssl-dev \
libbz2-dev \
libffi-dev \
@@ -38,14 +42,14 @@ RUN set -e \
libseccomp-dev \
libsqlite3-dev \
libssl-dev \
$([[ "${DEBIAN_VERSION}" = "bullseye" ]] && libstdc++-10-dev || libstdc++-11-dev) \
libstdc++-10-dev \
libtool \
libxml2-dev \
libxmlsec1-dev \
libxxhash-dev \
lsof \
make \
netcat-openbsd \
netcat \
net-tools \
openssh-client \
parallel \
@@ -74,7 +78,7 @@ RUN curl -sL "https://github.com/peak/s5cmd/releases/download/v${S5CMD_VERSION}/
# LLVM
ENV LLVM_VERSION=18
RUN curl -fsSL 'https://apt.llvm.org/llvm-snapshot.gpg.key' | apt-key add - \
&& echo "deb http://apt.llvm.org/${DEBIAN_VERSION}/ llvm-toolchain-${DEBIAN_VERSION}-${LLVM_VERSION} main" > /etc/apt/sources.list.d/llvm.stable.list \
&& echo "deb http://apt.llvm.org/${DEBIAN_VERSION_CODENAME}/ llvm-toolchain-${DEBIAN_VERSION_CODENAME}-${LLVM_VERSION} main" > /etc/apt/sources.list.d/llvm.stable.list \
&& apt update \
&& apt install -y clang-${LLVM_VERSION} llvm-${LLVM_VERSION} \
&& bash -c 'for f in /usr/bin/clang*-${LLVM_VERSION} /usr/bin/llvm*-${LLVM_VERSION}; do ln -s "${f}" "${f%-${LLVM_VERSION}}"; done' \
@@ -82,7 +86,7 @@ RUN curl -fsSL 'https://apt.llvm.org/llvm-snapshot.gpg.key' | apt-key add - \
# Install docker
RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg \
&& echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/debian ${DEBIAN_VERSION} stable" > /etc/apt/sources.list.d/docker.list \
&& echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/debian ${DEBIAN_VERSION_CODENAME} stable" > /etc/apt/sources.list.d/docker.list \
&& apt update \
&& apt install -y docker-ce docker-ce-cli \
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*

View File

@@ -291,7 +291,6 @@ postgres-check: \
# This doesn't remove the effects of 'configure'.
.PHONY: clean
clean: postgres-clean neon-pg-clean-ext
$(MAKE) -C compute clean
$(CARGO_CMD_PREFIX) cargo clean
# This removes everything

5
compute/.gitignore vendored
View File

@@ -1,5 +0,0 @@
# sql_exporter config files generated from Jsonnet
etc/neon_collector.yml
etc/neon_collector_autoscaling.yml
etc/sql_exporter.yml
etc/sql_exporter_autoscaling.yml

View File

@@ -3,8 +3,7 @@ ARG REPOSITORY=neondatabase
ARG IMAGE=build-tools
ARG TAG=pinned
ARG BUILD_TAG
ARG DEBIAN_VERSION=bullseye
ARG DEBIAN_FLAVOR=${DEBIAN_VERSION}-slim
ARG DEBIAN_FLAVOR=bullseye-slim
#########################################################################################
#
@@ -12,23 +11,20 @@ ARG DEBIAN_FLAVOR=${DEBIAN_VERSION}-slim
#
#########################################################################################
FROM debian:$DEBIAN_FLAVOR AS build-deps
ARG DEBIAN_VERSION
ARG DEBIAN_FLAVOR
RUN case $DEBIAN_VERSION in \
RUN case $DEBIAN_FLAVOR in \
# Version-specific installs for Bullseye (PG14-PG16):
# The h3_pg extension needs a cmake 3.20+, but Debian bullseye has 3.18.
# Install newer version (3.25) from backports.
bullseye) \
bullseye*) \
echo "deb http://deb.debian.org/debian bullseye-backports main" > /etc/apt/sources.list.d/bullseye-backports.list; \
VERSION_INSTALLS="cmake/bullseye-backports cmake-data/bullseye-backports"; \
;; \
# Version-specific installs for Bookworm (PG17):
bookworm) \
bookworm*) \
VERSION_INSTALLS="cmake"; \
;; \
*) \
echo "Unknown Debian version ${DEBIAN_VERSION}" && exit 1 \
;; \
esac && \
apt update && \
apt install --no-install-recommends -y git autoconf automake libtool build-essential bison flex libreadline-dev \
@@ -349,7 +345,7 @@ ARG PG_VERSION
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
# not version-specific
# doesn't use releases, last commit f3d82fd - Mar 2, 2023
# doesn't use releases, last commit f3d82fd - Mar 2, 2023
RUN wget https://github.com/michelp/pgjwt/archive/f3d82fd30151e754e19ce5d6a06c71c20689ce3d.tar.gz -O pgjwt.tar.gz && \
echo "dae8ed99eebb7593b43013f6532d772b12dfecd55548d2673f2dfd0163f6d2b9 pgjwt.tar.gz" | sha256sum --check && \
mkdir pgjwt-src && cd pgjwt-src && tar xzf ../pgjwt.tar.gz --strip-components=1 -C . && \
@@ -929,8 +925,8 @@ ARG PG_VERSION
RUN case "${PG_VERSION}" in "v17") \
echo "pg_session_jwt does not yet have a release that supports pg17" && exit 0;; \
esac && \
wget https://github.com/neondatabase/pg_session_jwt/archive/5aee2625af38213650e1a07ae038fdc427250ee4.tar.gz -O pg_session_jwt.tar.gz && \
echo "5d91b10bc1347d36cffc456cb87bec25047935d6503dc652ca046f04760828e7 pg_session_jwt.tar.gz" | sha256sum --check && \
wget https://github.com/neondatabase/pg_session_jwt/archive/ff0a72440e8ff584dab24b3f9b7c00c56c660b8e.tar.gz -O pg_session_jwt.tar.gz && \
echo "1fbb2b5a339263bcf6daa847fad8bccbc0b451cea6a62e6d3bf232b0087f05cb pg_session_jwt.tar.gz" | sha256sum --check && \
mkdir pg_session_jwt-src && cd pg_session_jwt-src && tar xzf ../pg_session_jwt.tar.gz --strip-components=1 -C . && \
sed -i 's/pgrx = "=0.11.3"/pgrx = { version = "=0.11.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
cargo pgrx install --release
@@ -1095,6 +1091,7 @@ RUN cd compute_tools && mold -run cargo build --locked --profile release-line-de
#########################################################################################
FROM debian:$DEBIAN_FLAVOR AS compute-tools-image
ARG DEBIAN_FLAVOR
COPY --from=compute-tools /home/nonroot/target/release-line-debug-size-lto/compute_ctl /usr/local/bin/compute_ctl
@@ -1105,6 +1102,7 @@ COPY --from=compute-tools /home/nonroot/target/release-line-debug-size-lto/compu
#########################################################################################
FROM debian:$DEBIAN_FLAVOR AS pgbouncer
ARG DEBIAN_FLAVOR
RUN set -e \
&& apt-get update \
&& apt-get install --no-install-recommends -y \
@@ -1169,18 +1167,6 @@ RUN rm -r /usr/local/pgsql/include
# if they were to be used by other libraries.
RUN rm /usr/local/pgsql/lib/lib*.a
#########################################################################################
#
# Preprocess the sql_exporter configuration files
#
#########################################################################################
FROM $REPOSITORY/$IMAGE:$TAG AS sql_exporter_preprocessor
USER nonroot
COPY --chown=nonroot compute compute
RUN make -C compute
#########################################################################################
#
@@ -1271,7 +1257,7 @@ ENV PGDATABASE=postgres
#
#########################################################################################
FROM debian:$DEBIAN_FLAVOR
ARG DEBIAN_VERSION
ARG DEBIAN_FLAVOR
# Add user postgres
RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \
echo "postgres:test_console_pass" | chpasswd && \
@@ -1299,10 +1285,10 @@ RUN mkdir -p /etc/local_proxy && chown postgres:postgres /etc/local_proxy
COPY --from=postgres-exporter /bin/postgres_exporter /bin/postgres_exporter
COPY --from=sql-exporter /bin/sql_exporter /bin/sql_exporter
COPY --from=sql_exporter_preprocessor --chmod=0644 /home/nonroot/compute/etc/sql_exporter.yml /etc/sql_exporter.yml
COPY --from=sql_exporter_preprocessor --chmod=0644 /home/nonroot/compute/etc/neon_collector.yml /etc/neon_collector.yml
COPY --from=sql_exporter_preprocessor --chmod=0644 /home/nonroot/compute/etc/sql_exporter_autoscaling.yml /etc/sql_exporter_autoscaling.yml
COPY --from=sql_exporter_preprocessor --chmod=0644 /home/nonroot/compute/etc/neon_collector_autoscaling.yml /etc/neon_collector_autoscaling.yml
COPY --chmod=0644 compute/etc/sql_exporter.yml /etc/sql_exporter.yml
COPY --chmod=0644 compute/etc/neon_collector.yml /etc/neon_collector.yml
COPY --chmod=0644 compute/etc/sql_exporter_autoscaling.yml /etc/sql_exporter_autoscaling.yml
COPY --chmod=0644 compute/etc/neon_collector_autoscaling.yml /etc/neon_collector_autoscaling.yml
# Create remote extension download directory
RUN mkdir /usr/local/download_extensions && chown -R postgres:postgres /usr/local/download_extensions
@@ -1319,22 +1305,19 @@ RUN mkdir /usr/local/download_extensions && chown -R postgres:postgres /usr/loca
RUN apt update && \
case $DEBIAN_VERSION in \
case $DEBIAN_FLAVOR in \
# Version-specific installs for Bullseye (PG14-PG16):
# libicu67, locales for collations (including ICU and plpgsql_check)
# libgdal28, libproj19 for PostGIS
bullseye) \
bullseye*) \
VERSION_INSTALLS="libicu67 libgdal28 libproj19"; \
;; \
# Version-specific installs for Bookworm (PG17):
# libicu72, locales for collations (including ICU and plpgsql_check)
# libgdal32, libproj25 for PostGIS
bookworm) \
bookworm*) \
VERSION_INSTALLS="libicu72 libgdal32 libproj25"; \
;; \
*) \
echo "Unknown Debian version ${DEBIAN_VERSION}" && exit 1 \
;; \
esac && \
apt install --no-install-recommends -y \
gdb \

View File

@@ -1,45 +0,0 @@
jsonnet_files = $(wildcard \
etc/*.jsonnet \
etc/sql_exporter/*.libsonnet)
.PHONY: all
all: neon_collector.yml neon_collector_autoscaling.yml sql_exporter.yml sql_exporter_autoscaling.yml
neon_collector.yml: $(jsonnet_files)
JSONNET_PATH=etc jsonnet \
--output-file etc/$@ \
etc/neon_collector.jsonnet
neon_collector_autoscaling.yml: $(jsonnet_files)
JSONNET_PATH=etc jsonnet \
--output-file etc/$@ \
etc/neon_collector_autoscaling.jsonnet
sql_exporter.yml: $(jsonnet_files)
JSONNET_PATH=etc jsonnet \
--output-file etc/$@ \
--tla-str collector_file=neon_collector.yml \
etc/sql_exporter.jsonnet
sql_exporter_autoscaling.yml: $(jsonnet_files)
JSONNET_PATH=etc jsonnet \
--output-file etc/$@ \
--tla-str collector_file=neon_collector_autoscaling.yml \
--tla-str application_name=sql_exporter_autoscaling \
etc/sql_exporter.jsonnet
.PHONY: clean
clean:
rm --force \
etc/neon_collector.yml \
etc/neon_collector_autoscaling.yml \
etc/sql_exporter.yml \
etc/sql_exporter_autoscaling.yml
.PHONY: jsonnetfmt-test
jsonnetfmt-test:
jsonnetfmt --test $(jsonnet_files)
.PHONY: jsonnetfmt-format
jsonnetfmt-format:
jsonnetfmt --in-place $(jsonnet_files)

View File

@@ -1,17 +0,0 @@
# Compute Configuration
These files are the configuration files for various other pieces of software
that will be running in the compute alongside Postgres.
## `sql_exporter`
### Adding a `sql_exporter` Metric
We use `sql_exporter` to export various metrics from Postgres. In order to add
a metric, you will need to create two files: a `libsonnet` and a `sql` file. You
will then import the `libsonnet` file in one of the collector files, and the
`sql` file will be imported in the `libsonnet` file.
In the event your statistic is an LSN, you may want to cast it to a `float8`
because Prometheus only supports floats. It's probably fine because `float8` can
store integers from `-2^53` to `+2^53` exactly.

View File

@@ -1,51 +0,0 @@
{
collector_name: 'neon_collector',
metrics: [
import 'sql_exporter/checkpoints_req.libsonnet',
import 'sql_exporter/checkpoints_timed.libsonnet',
import 'sql_exporter/compute_current_lsn.libsonnet',
import 'sql_exporter/compute_logical_snapshot_files.libsonnet',
import 'sql_exporter/compute_receive_lsn.libsonnet',
import 'sql_exporter/compute_subscriptions_count.libsonnet',
import 'sql_exporter/connection_counts.libsonnet',
import 'sql_exporter/db_total_size.libsonnet',
import 'sql_exporter/file_cache_read_wait_seconds_bucket.libsonnet',
import 'sql_exporter/file_cache_read_wait_seconds_count.libsonnet',
import 'sql_exporter/file_cache_read_wait_seconds_sum.libsonnet',
import 'sql_exporter/file_cache_write_wait_seconds_bucket.libsonnet',
import 'sql_exporter/file_cache_write_wait_seconds_count.libsonnet',
import 'sql_exporter/file_cache_write_wait_seconds_sum.libsonnet',
import 'sql_exporter/getpage_prefetch_discards_total.libsonnet',
import 'sql_exporter/getpage_prefetch_misses_total.libsonnet',
import 'sql_exporter/getpage_prefetch_requests_total.libsonnet',
import 'sql_exporter/getpage_prefetches_buffered.libsonnet',
import 'sql_exporter/getpage_sync_requests_total.libsonnet',
import 'sql_exporter/getpage_wait_seconds_bucket.libsonnet',
import 'sql_exporter/getpage_wait_seconds_count.libsonnet',
import 'sql_exporter/getpage_wait_seconds_sum.libsonnet',
import 'sql_exporter/lfc_approximate_working_set_size.libsonnet',
import 'sql_exporter/lfc_approximate_working_set_size_windows.libsonnet',
import 'sql_exporter/lfc_cache_size_limit.libsonnet',
import 'sql_exporter/lfc_hits.libsonnet',
import 'sql_exporter/lfc_misses.libsonnet',
import 'sql_exporter/lfc_used.libsonnet',
import 'sql_exporter/lfc_writes.libsonnet',
import 'sql_exporter/logical_slot_restart_lsn.libsonnet',
import 'sql_exporter/max_cluster_size.libsonnet',
import 'sql_exporter/pageserver_disconnects_total.libsonnet',
import 'sql_exporter/pageserver_requests_sent_total.libsonnet',
import 'sql_exporter/pageserver_send_flushes_total.libsonnet',
import 'sql_exporter/pageserver_open_requests.libsonnet',
import 'sql_exporter/pg_stats_userdb.libsonnet',
import 'sql_exporter/replication_delay_bytes.libsonnet',
import 'sql_exporter/replication_delay_seconds.libsonnet',
import 'sql_exporter/retained_wal.libsonnet',
import 'sql_exporter/wal_is_lost.libsonnet',
],
queries: [
{
query_name: 'neon_perf_counters',
query: importstr 'sql_exporter/neon_perf_counters.sql',
},
],
}

View File

@@ -0,0 +1,331 @@
collector_name: neon_collector
metrics:
- metric_name: lfc_misses
type: gauge
help: 'lfc_misses'
key_labels:
values: [lfc_misses]
query: |
select lfc_value as lfc_misses from neon.neon_lfc_stats where lfc_key='file_cache_misses';
- metric_name: lfc_used
type: gauge
help: 'LFC chunks used (chunk = 1MB)'
key_labels:
values: [lfc_used]
query: |
select lfc_value as lfc_used from neon.neon_lfc_stats where lfc_key='file_cache_used';
- metric_name: lfc_hits
type: gauge
help: 'lfc_hits'
key_labels:
values: [lfc_hits]
query: |
select lfc_value as lfc_hits from neon.neon_lfc_stats where lfc_key='file_cache_hits';
- metric_name: lfc_writes
type: gauge
help: 'lfc_writes'
key_labels:
values: [lfc_writes]
query: |
select lfc_value as lfc_writes from neon.neon_lfc_stats where lfc_key='file_cache_writes';
- metric_name: lfc_cache_size_limit
type: gauge
help: 'LFC cache size limit in bytes'
key_labels:
values: [lfc_cache_size_limit]
query: |
select pg_size_bytes(current_setting('neon.file_cache_size_limit')) as lfc_cache_size_limit;
- metric_name: connection_counts
type: gauge
help: 'Connection counts'
key_labels:
- datname
- state
values: [count]
query: |
select datname, state, count(*) as count from pg_stat_activity where state <> '' group by datname, state;
- metric_name: pg_stats_userdb
type: gauge
help: 'Stats for several oldest non-system dbs'
key_labels:
- datname
value_label: kind
values:
- db_size
- deadlocks
# Rows
- inserted
- updated
- deleted
# We export stats for 10 non-system database. Without this limit
# it is too easy to abuse the system by creating lots of databases.
query: |
select pg_database_size(datname) as db_size, deadlocks,
tup_inserted as inserted, tup_updated as updated, tup_deleted as deleted,
datname
from pg_stat_database
where datname IN (
select datname
from pg_database
where datname <> 'postgres' and not datistemplate
order by oid
limit 10
);
- metric_name: max_cluster_size
type: gauge
help: 'neon.max_cluster_size setting'
key_labels:
values: [max_cluster_size]
query: |
select setting::int as max_cluster_size from pg_settings where name = 'neon.max_cluster_size';
- metric_name: db_total_size
type: gauge
help: 'Size of all databases'
key_labels:
values: [total]
query: |
select sum(pg_database_size(datname)) as total from pg_database;
- metric_name: getpage_wait_seconds_count
type: counter
help: 'Number of getpage requests'
values: [getpage_wait_seconds_count]
query_ref: neon_perf_counters
- metric_name: getpage_wait_seconds_sum
type: counter
help: 'Time spent in getpage requests'
values: [getpage_wait_seconds_sum]
query_ref: neon_perf_counters
- metric_name: getpage_prefetch_requests_total
type: counter
help: 'Number of getpage issued for prefetching'
values: [getpage_prefetch_requests_total]
query_ref: neon_perf_counters
- metric_name: getpage_sync_requests_total
type: counter
help: 'Number of synchronous getpage issued'
values: [getpage_sync_requests_total]
query_ref: neon_perf_counters
- metric_name: getpage_prefetch_misses_total
type: counter
help: 'Total number of readahead misses; consisting of either prefetches that don''t satisfy the LSN bounds once the prefetch got read by the backend, or cases where somehow no readahead was issued for the read'
values: [getpage_prefetch_misses_total]
query_ref: neon_perf_counters
- metric_name: getpage_prefetch_discards_total
type: counter
help: 'Number of prefetch responses issued but not used'
values: [getpage_prefetch_discards_total]
query_ref: neon_perf_counters
- metric_name: pageserver_requests_sent_total
type: counter
help: 'Number of all requests sent to the pageserver (not just GetPage requests)'
values: [pageserver_requests_sent_total]
query_ref: neon_perf_counters
- metric_name: pageserver_disconnects_total
type: counter
help: 'Number of times that the connection to the pageserver was lost'
values: [pageserver_disconnects_total]
query_ref: neon_perf_counters
- metric_name: pageserver_send_flushes_total
type: counter
help: 'Number of flushes to the pageserver connection'
values: [pageserver_send_flushes_total]
query_ref: neon_perf_counters
- metric_name: getpage_wait_seconds_bucket
type: counter
help: 'Histogram buckets of getpage request latency'
key_labels:
- bucket_le
values: [value]
query_ref: getpage_wait_seconds_buckets
# DEPRECATED
- metric_name: lfc_approximate_working_set_size
type: gauge
help: 'Approximate working set size in pages of 8192 bytes'
key_labels:
values: [approximate_working_set_size]
query: |
select neon.approximate_working_set_size(false) as approximate_working_set_size;
- metric_name: lfc_approximate_working_set_size_windows
type: gauge
help: 'Approximate working set size in pages of 8192 bytes'
key_labels: [duration]
values: [size]
# NOTE: This is the "public" / "human-readable" version. Here, we supply a small selection
# of durations in a pretty-printed form.
query: |
select
x as duration,
neon.approximate_working_set_size_seconds(extract('epoch' from x::interval)::int) as size
from
(values ('5m'),('15m'),('1h')) as t (x);
- metric_name: compute_current_lsn
type: gauge
help: 'Current LSN of the database'
key_labels:
values: [lsn]
query: |
select
case
when pg_catalog.pg_is_in_recovery()
then (pg_last_wal_replay_lsn() - '0/0')::FLOAT8
else (pg_current_wal_lsn() - '0/0')::FLOAT8
end as lsn;
- metric_name: compute_receive_lsn
type: gauge
help: 'Returns the last write-ahead log location that has been received and synced to disk by streaming replication'
key_labels:
values: [lsn]
query: |
SELECT
CASE
WHEN pg_catalog.pg_is_in_recovery()
THEN (pg_last_wal_receive_lsn() - '0/0')::FLOAT8
ELSE 0
END AS lsn;
- metric_name: replication_delay_bytes
type: gauge
help: 'Bytes between received and replayed LSN'
key_labels:
values: [replication_delay_bytes]
# We use a GREATEST call here because this calculation can be negative.
# The calculation is not atomic, meaning after we've gotten the receive
# LSN, the replay LSN may have advanced past the receive LSN we
# are using for the calculation.
query: |
SELECT GREATEST(0, pg_wal_lsn_diff(pg_last_wal_receive_lsn(), pg_last_wal_replay_lsn())) AS replication_delay_bytes;
- metric_name: replication_delay_seconds
type: gauge
help: 'Time since last LSN was replayed'
key_labels:
values: [replication_delay_seconds]
query: |
SELECT
CASE
WHEN pg_last_wal_receive_lsn() = pg_last_wal_replay_lsn() THEN 0
ELSE GREATEST (0, EXTRACT (EPOCH FROM now() - pg_last_xact_replay_timestamp()))
END AS replication_delay_seconds;
- metric_name: checkpoints_req
type: gauge
help: 'Number of requested checkpoints'
key_labels:
values: [checkpoints_req]
query: |
SELECT checkpoints_req FROM pg_stat_bgwriter;
- metric_name: checkpoints_timed
type: gauge
help: 'Number of scheduled checkpoints'
key_labels:
values: [checkpoints_timed]
query: |
SELECT checkpoints_timed FROM pg_stat_bgwriter;
- metric_name: compute_logical_snapshot_files
type: gauge
help: 'Number of snapshot files in pg_logical/snapshot'
key_labels:
- timeline_id
values: [num_logical_snapshot_files]
query: |
SELECT
(SELECT setting FROM pg_settings WHERE name = 'neon.timeline_id') AS timeline_id,
-- Postgres creates temporary snapshot files of the form %X-%X.snap.%d.tmp. These
-- temporary snapshot files are renamed to the actual snapshot files after they are
-- completely built. We only WAL-log the completely built snapshot files.
(SELECT COUNT(*) FROM pg_ls_dir('pg_logical/snapshots') AS name WHERE name LIKE '%.snap') AS num_logical_snapshot_files;
# In all the below metrics, we cast LSNs to floats because Prometheus only supports floats.
# It's probably fine because float64 can store integers from -2^53 to +2^53 exactly.
# Number of slots is limited by max_replication_slots, so collecting position for all of them shouldn't be bad.
- metric_name: logical_slot_restart_lsn
type: gauge
help: 'restart_lsn of logical slots'
key_labels:
- slot_name
values: [restart_lsn]
query: |
select slot_name, (restart_lsn - '0/0')::FLOAT8 as restart_lsn
from pg_replication_slots
where slot_type = 'logical';
- metric_name: compute_subscriptions_count
type: gauge
help: 'Number of logical replication subscriptions grouped by enabled/disabled'
key_labels:
- enabled
values: [subscriptions_count]
query: |
select subenabled::text as enabled, count(*) as subscriptions_count
from pg_subscription
group by subenabled;
- metric_name: retained_wal
type: gauge
help: 'Retained WAL in inactive replication slots'
key_labels:
- slot_name
values: [retained_wal]
query: |
SELECT slot_name, pg_wal_lsn_diff(pg_current_wal_lsn(), restart_lsn)::FLOAT8 AS retained_wal
FROM pg_replication_slots
WHERE active = false;
- metric_name: wal_is_lost
type: gauge
help: 'Whether or not the replication slot wal_status is lost'
key_labels:
- slot_name
values: [wal_is_lost]
query: |
SELECT slot_name,
CASE WHEN wal_status = 'lost' THEN 1 ELSE 0 END AS wal_is_lost
FROM pg_replication_slots;
queries:
- query_name: neon_perf_counters
query: |
WITH c AS (
SELECT pg_catalog.jsonb_object_agg(metric, value) jb FROM neon.neon_perf_counters
)
SELECT d.*
FROM pg_catalog.jsonb_to_record((select jb from c)) as d(
getpage_wait_seconds_count numeric,
getpage_wait_seconds_sum numeric,
getpage_prefetch_requests_total numeric,
getpage_sync_requests_total numeric,
getpage_prefetch_misses_total numeric,
getpage_prefetch_discards_total numeric,
pageserver_requests_sent_total numeric,
pageserver_disconnects_total numeric,
pageserver_send_flushes_total numeric
);
- query_name: getpage_wait_seconds_buckets
query: |
SELECT bucket_le, value FROM neon.neon_perf_counters WHERE metric = 'getpage_wait_seconds_bucket';

View File

@@ -1,11 +0,0 @@
{
collector_name: 'neon_collector_autoscaling',
metrics: [
import 'sql_exporter/lfc_approximate_working_set_size_windows.autoscaling.libsonnet',
import 'sql_exporter/lfc_cache_size_limit.libsonnet',
import 'sql_exporter/lfc_hits.libsonnet',
import 'sql_exporter/lfc_misses.libsonnet',
import 'sql_exporter/lfc_used.libsonnet',
import 'sql_exporter/lfc_writes.libsonnet',
],
}

View File

@@ -0,0 +1,55 @@
collector_name: neon_collector_autoscaling
metrics:
- metric_name: lfc_misses
type: gauge
help: 'lfc_misses'
key_labels:
values: [lfc_misses]
query: |
select lfc_value as lfc_misses from neon.neon_lfc_stats where lfc_key='file_cache_misses';
- metric_name: lfc_used
type: gauge
help: 'LFC chunks used (chunk = 1MB)'
key_labels:
values: [lfc_used]
query: |
select lfc_value as lfc_used from neon.neon_lfc_stats where lfc_key='file_cache_used';
- metric_name: lfc_hits
type: gauge
help: 'lfc_hits'
key_labels:
values: [lfc_hits]
query: |
select lfc_value as lfc_hits from neon.neon_lfc_stats where lfc_key='file_cache_hits';
- metric_name: lfc_writes
type: gauge
help: 'lfc_writes'
key_labels:
values: [lfc_writes]
query: |
select lfc_value as lfc_writes from neon.neon_lfc_stats where lfc_key='file_cache_writes';
- metric_name: lfc_cache_size_limit
type: gauge
help: 'LFC cache size limit in bytes'
key_labels:
values: [lfc_cache_size_limit]
query: |
select pg_size_bytes(current_setting('neon.file_cache_size_limit')) as lfc_cache_size_limit;
- metric_name: lfc_approximate_working_set_size_windows
type: gauge
help: 'Approximate working set size in pages of 8192 bytes'
key_labels: [duration_seconds]
values: [size]
# NOTE: This is the "internal" / "machine-readable" version. This outputs the working set
# size looking back 1..60 minutes, labeled with the number of minutes.
query: |
select
x::text as duration_seconds,
neon.approximate_working_set_size_seconds(x) as size
from
(select generate_series * 60 as x from generate_series(1, 60)) as t (x);

View File

@@ -1,40 +0,0 @@
function(collector_file, application_name='sql_exporter') {
// Configuration for sql_exporter for autoscaling-agent
// Global defaults.
global: {
// If scrape_timeout <= 0, no timeout is set unless Prometheus provides one. The default is 10s.
scrape_timeout: '10s',
// Subtracted from Prometheus' scrape_timeout to give us some headroom and prevent Prometheus from timing out first.
scrape_timeout_offset: '500ms',
// Minimum interval between collector runs: by default (0s) collectors are executed on every scrape.
min_interval: '0s',
// Maximum number of open connections to any one target. Metric queries will run concurrently on multiple connections,
// as will concurrent scrapes.
max_connections: 1,
// Maximum number of idle connections to any one target. Unless you use very long collection intervals, this should
// always be the same as max_connections.
max_idle_connections: 1,
// Maximum number of maximum amount of time a connection may be reused. Expired connections may be closed lazily before reuse.
// If 0, connections are not closed due to a connection's age.
max_connection_lifetime: '5m',
},
// The target to monitor and the collectors to execute on it.
target: {
// Data source name always has a URI schema that matches the driver name. In some cases (e.g. MySQL)
// the schema gets dropped or replaced to match the driver expected DSN format.
data_source_name: std.format('postgresql://cloud_admin@127.0.0.1:5432/postgres?sslmode=disable&application_name=%s', [application_name]),
// Collectors (referenced by name) to execute on the target.
// Glob patterns are supported (see <https://pkg.go.dev/path/filepath#Match> for syntax).
collectors: [
'neon_collector_autoscaling',
],
},
// Collector files specifies a list of globs. One collector definition is read from each matching file.
// Glob patterns are supported (see <https://pkg.go.dev/path/filepath#Match> for syntax).
collector_files: [
collector_file,
],
}

View File

@@ -0,0 +1,33 @@
# Configuration for sql_exporter
# Global defaults.
global:
# If scrape_timeout <= 0, no timeout is set unless Prometheus provides one. The default is 10s.
scrape_timeout: 10s
# Subtracted from Prometheus' scrape_timeout to give us some headroom and prevent Prometheus from timing out first.
scrape_timeout_offset: 500ms
# Minimum interval between collector runs: by default (0s) collectors are executed on every scrape.
min_interval: 0s
# Maximum number of open connections to any one target. Metric queries will run concurrently on multiple connections,
# as will concurrent scrapes.
max_connections: 1
# Maximum number of idle connections to any one target. Unless you use very long collection intervals, this should
# always be the same as max_connections.
max_idle_connections: 1
# Maximum number of maximum amount of time a connection may be reused. Expired connections may be closed lazily before reuse.
# If 0, connections are not closed due to a connection's age.
max_connection_lifetime: 5m
# The target to monitor and the collectors to execute on it.
target:
# Data source name always has a URI schema that matches the driver name. In some cases (e.g. MySQL)
# the schema gets dropped or replaced to match the driver expected DSN format.
data_source_name: 'postgresql://cloud_admin@127.0.0.1:5432/postgres?sslmode=disable&application_name=sql_exporter'
# Collectors (referenced by name) to execute on the target.
# Glob patterns are supported (see <https://pkg.go.dev/path/filepath#Match> for syntax).
collectors: [neon_collector]
# Collector files specifies a list of globs. One collector definition is read from each matching file.
# Glob patterns are supported (see <https://pkg.go.dev/path/filepath#Match> for syntax).
collector_files:
- "neon_collector.yml"

View File

@@ -1,10 +0,0 @@
{
metric_name: 'checkpoints_req',
type: 'gauge',
help: 'Number of requested checkpoints',
key_labels: null,
values: [
'checkpoints_req',
],
query: importstr 'sql_exporter/checkpoints_req.sql',
}

View File

@@ -1 +0,0 @@
SELECT checkpoints_req FROM pg_stat_bgwriter;

View File

@@ -1,10 +0,0 @@
{
metric_name: 'checkpoints_timed',
type: 'gauge',
help: 'Number of scheduled checkpoints',
key_labels: null,
values: [
'checkpoints_timed',
],
query: importstr 'sql_exporter/checkpoints_timed.sql',
}

View File

@@ -1 +0,0 @@
SELECT checkpoints_timed FROM pg_stat_bgwriter;

View File

@@ -1,10 +0,0 @@
{
metric_name: 'compute_current_lsn',
type: 'gauge',
help: 'Current LSN of the database',
key_labels: null,
values: [
'lsn',
],
query: importstr 'sql_exporter/compute_current_lsn.sql',
}

View File

@@ -1,4 +0,0 @@
SELECT CASE
WHEN pg_catalog.pg_is_in_recovery() THEN (pg_last_wal_replay_lsn() - '0/0')::FLOAT8
ELSE (pg_current_wal_lsn() - '0/0')::FLOAT8
END AS lsn;

View File

@@ -1,12 +0,0 @@
{
metric_name: 'compute_logical_snapshot_files',
type: 'gauge',
help: 'Number of snapshot files in pg_logical/snapshot',
key_labels: [
'timeline_id',
],
values: [
'num_logical_snapshot_files',
],
query: importstr 'sql_exporter/compute_logical_snapshot_files.sql',
}

View File

@@ -1,7 +0,0 @@
SELECT
(SELECT setting FROM pg_settings WHERE name = 'neon.timeline_id') AS timeline_id,
-- Postgres creates temporary snapshot files of the form %X-%X.snap.%d.tmp.
-- These temporary snapshot files are renamed to the actual snapshot files
-- after they are completely built. We only WAL-log the completely built
-- snapshot files
(SELECT COUNT(*) FROM pg_ls_dir('pg_logical/snapshots') AS name WHERE name LIKE '%.snap') AS num_logical_snapshot_files;

View File

@@ -1,10 +0,0 @@
{
metric_name: 'compute_receive_lsn',
type: 'gauge',
help: 'Returns the last write-ahead log location that has been received and synced to disk by streaming replication',
key_labels: null,
values: [
'lsn',
],
query: importstr 'sql_exporter/compute_receive_lsn.sql',
}

View File

@@ -1,4 +0,0 @@
SELECT CASE
WHEN pg_catalog.pg_is_in_recovery() THEN (pg_last_wal_receive_lsn() - '0/0')::FLOAT8
ELSE 0
END AS lsn;

View File

@@ -1,12 +0,0 @@
{
metric_name: 'compute_subscriptions_count',
type: 'gauge',
help: 'Number of logical replication subscriptions grouped by enabled/disabled',
key_labels: [
'enabled',
],
values: [
'subscriptions_count',
],
query: importstr 'sql_exporter/compute_subscriptions_count.sql',
}

View File

@@ -1 +0,0 @@
SELECT subenabled::text AS enabled, count(*) AS subscriptions_count FROM pg_subscription GROUP BY subenabled;

View File

@@ -1,13 +0,0 @@
{
metric_name: 'connection_counts',
type: 'gauge',
help: 'Connection counts',
key_labels: [
'datname',
'state',
],
values: [
'count',
],
query: importstr 'sql_exporter/connection_counts.sql',
}

View File

@@ -1 +0,0 @@
SELECT datname, state, count(*) AS count FROM pg_stat_activity WHERE state <> '' GROUP BY datname, state;

View File

@@ -1,10 +0,0 @@
{
metric_name: 'db_total_size',
type: 'gauge',
help: 'Size of all databases',
key_labels: null,
values: [
'total',
],
query: importstr 'sql_exporter/db_total_size.sql',
}

View File

@@ -1 +0,0 @@
SELECT sum(pg_database_size(datname)) AS total FROM pg_database;

View File

@@ -1,12 +0,0 @@
{
metric_name: 'file_cache_read_wait_seconds_bucket',
type: 'counter',
help: 'Histogram buckets of LFC read operation latencies',
key_labels: [
'bucket_le',
],
values: [
'value',
],
query: importstr 'sql_exporter/file_cache_read_wait_seconds_bucket.sql',
}

View File

@@ -1 +0,0 @@
SELECT bucket_le, value FROM neon.neon_perf_counters WHERE metric = 'file_cache_read_wait_seconds_bucket';

View File

@@ -1,9 +0,0 @@
{
metric_name: 'file_cache_read_wait_seconds_count',
type: 'counter',
help: 'Number of read operations in LFC',
values: [
'file_cache_read_wait_seconds_count',
],
query_ref: 'neon_perf_counters',
}

View File

@@ -1,9 +0,0 @@
{
metric_name: 'file_cache_read_wait_seconds_sum',
type: 'counter',
help: 'Time spent in LFC read operations',
values: [
'file_cache_read_wait_seconds_sum',
],
query_ref: 'neon_perf_counters',
}

View File

@@ -1,12 +0,0 @@
{
metric_name: 'file_cache_write_wait_seconds_bucket',
type: 'counter',
help: 'Histogram buckets of LFC write operation latencies',
key_labels: [
'bucket_le',
],
values: [
'value',
],
query: importstr 'sql_exporter/file_cache_write_wait_seconds_bucket.sql',
}

View File

@@ -1 +0,0 @@
SELECT bucket_le, value FROM neon.neon_perf_counters WHERE metric = 'file_cache_write_wait_seconds_bucket';

View File

@@ -1,9 +0,0 @@
{
metric_name: 'file_cache_write_wait_seconds_count',
type: 'counter',
help: 'Number of write operations in LFC',
values: [
'file_cache_write_wait_seconds_count',
],
query_ref: 'neon_perf_counters',
}

View File

@@ -1,9 +0,0 @@
{
metric_name: 'file_cache_write_wait_seconds_sum',
type: 'counter',
help: 'Time spent in LFC write operations',
values: [
'file_cache_write_wait_seconds_sum',
],
query_ref: 'neon_perf_counters',
}

View File

@@ -1,9 +0,0 @@
{
metric_name: 'getpage_prefetch_discards_total',
type: 'counter',
help: 'Number of prefetch responses issued but not used',
values: [
'getpage_prefetch_discards_total',
],
query_ref: 'neon_perf_counters',
}

View File

@@ -1,9 +0,0 @@
{
metric_name: 'getpage_prefetch_misses_total',
type: 'counter',
help: "Total number of readahead misses; consisting of either prefetches that don't satisfy the LSN bounds once the prefetch got read by the backend, or cases where somehow no readahead was issued for the read",
values: [
'getpage_prefetch_misses_total',
],
query_ref: 'neon_perf_counters',
}

View File

@@ -1,9 +0,0 @@
{
metric_name: 'getpage_prefetch_requests_total',
type: 'counter',
help: 'Number of getpage issued for prefetching',
values: [
'getpage_prefetch_requests_total',
],
query_ref: 'neon_perf_counters',
}

View File

@@ -1,9 +0,0 @@
{
metric_name: 'getpage_prefetches_buffered',
type: 'gauge',
help: 'Number of prefetched pages buffered in neon',
values: [
'getpage_prefetches_buffered',
],
query_ref: 'neon_perf_counters',
}

View File

@@ -1,9 +0,0 @@
{
metric_name: 'getpage_sync_requests_total',
type: 'counter',
help: 'Number of synchronous getpage issued',
values: [
'getpage_sync_requests_total',
],
query_ref: 'neon_perf_counters',
}

View File

@@ -1,12 +0,0 @@
{
metric_name: 'getpage_wait_seconds_bucket',
type: 'counter',
help: 'Histogram buckets of getpage request latency',
key_labels: [
'bucket_le',
],
values: [
'value',
],
query: importstr 'sql_exporter/getpage_wait_seconds_bucket.sql',
}

View File

@@ -1 +0,0 @@
SELECT bucket_le, value FROM neon.neon_perf_counters WHERE metric = 'getpage_wait_seconds_bucket';

View File

@@ -1,9 +0,0 @@
{
metric_name: 'getpage_wait_seconds_count',
type: 'counter',
help: 'Number of getpage requests',
values: [
'getpage_wait_seconds_count',
],
query_ref: 'neon_perf_counters',
}

View File

@@ -1,9 +0,0 @@
{
metric_name: 'getpage_wait_seconds_sum',
type: 'counter',
help: 'Time spent in getpage requests',
values: [
'getpage_wait_seconds_sum',
],
query_ref: 'neon_perf_counters',
}

View File

@@ -1,12 +0,0 @@
// DEPRECATED
{
metric_name: 'lfc_approximate_working_set_size',
type: 'gauge',
help: 'Approximate working set size in pages of 8192 bytes',
key_labels: null,
values: [
'approximate_working_set_size',
],
query: importstr 'sql_exporter/lfc_approximate_working_set_size.sql',
}

View File

@@ -1 +0,0 @@
SELECT neon.approximate_working_set_size(false) AS approximate_working_set_size;

View File

@@ -1,12 +0,0 @@
{
metric_name: 'lfc_approximate_working_set_size_windows',
type: 'gauge',
help: 'Approximate working set size in pages of 8192 bytes',
key_labels: [
'duration_seconds',
],
values: [
'size',
],
query: importstr 'sql_exporter/lfc_approximate_working_set_size_windows.autoscaling.sql',
}

View File

@@ -1,8 +0,0 @@
-- NOTE: This is the "internal" / "machine-readable" version. This outputs the
-- working set size looking back 1..60 minutes, labeled with the number of
-- minutes.
SELECT
x::text as duration_seconds,
neon.approximate_working_set_size_seconds(x) AS size
FROM (SELECT generate_series * 60 AS x FROM generate_series(1, 60)) AS t (x);

View File

@@ -1,12 +0,0 @@
{
metric_name: 'lfc_approximate_working_set_size_windows',
type: 'gauge',
help: 'Approximate working set size in pages of 8192 bytes',
key_labels: [
'duration',
],
values: [
'size',
],
query: importstr 'sql_exporter/lfc_approximate_working_set_size_windows.sql',
}

View File

@@ -1,8 +0,0 @@
-- NOTE: This is the "public" / "human-readable" version. Here, we supply a
-- small selection of durations in a pretty-printed form.
SELECT
x AS duration,
neon.approximate_working_set_size_seconds(extract('epoch' FROM x::interval)::int) AS size FROM (
VALUES ('5m'), ('15m'), ('1h')
) AS t (x);

View File

@@ -1,10 +0,0 @@
{
metric_name: 'lfc_cache_size_limit',
type: 'gauge',
help: 'LFC cache size limit in bytes',
key_labels: null,
values: [
'lfc_cache_size_limit',
],
query: importstr 'sql_exporter/lfc_cache_size_limit.sql',
}

View File

@@ -1 +0,0 @@
SELECT pg_size_bytes(current_setting('neon.file_cache_size_limit')) AS lfc_cache_size_limit;

View File

@@ -1,10 +0,0 @@
{
metric_name: 'lfc_hits',
type: 'gauge',
help: 'lfc_hits',
key_labels: null,
values: [
'lfc_hits',
],
query: importstr 'sql_exporter/lfc_hits.sql',
}

View File

@@ -1 +0,0 @@
SELECT lfc_value AS lfc_hits FROM neon.neon_lfc_stats WHERE lfc_key = 'file_cache_hits';

View File

@@ -1,10 +0,0 @@
{
metric_name: 'lfc_misses',
type: 'gauge',
help: 'lfc_misses',
key_labels: null,
values: [
'lfc_misses',
],
query: importstr 'sql_exporter/lfc_misses.sql',
}

View File

@@ -1 +0,0 @@
SELECT lfc_value AS lfc_misses FROM neon.neon_lfc_stats WHERE lfc_key = 'file_cache_misses';

View File

@@ -1,10 +0,0 @@
{
metric_name: 'lfc_used',
type: 'gauge',
help: 'LFC chunks used (chunk = 1MB)',
key_labels: null,
values: [
'lfc_used',
],
query: importstr 'sql_exporter/lfc_used.sql',
}

View File

@@ -1 +0,0 @@
SELECT lfc_value AS lfc_used FROM neon.neon_lfc_stats WHERE lfc_key = 'file_cache_used';

View File

@@ -1,10 +0,0 @@
{
metric_name: 'lfc_writes',
type: 'gauge',
help: 'lfc_writes',
key_labels: null,
values: [
'lfc_writes',
],
query: importstr 'sql_exporter/lfc_writes.sql',
}

View File

@@ -1 +0,0 @@
SELECT lfc_value AS lfc_writes FROM neon.neon_lfc_stats WHERE lfc_key = 'file_cache_writes';

View File

@@ -1,15 +0,0 @@
// Number of slots is limited by max_replication_slots, so collecting position
// for all of them shouldn't be bad.
{
metric_name: 'logical_slot_restart_lsn',
type: 'gauge',
help: 'restart_lsn of logical slots',
key_labels: [
'slot_name',
],
values: [
'restart_lsn',
],
query: importstr 'sql_exporter/logical_slot_restart_lsn.sql',
}

View File

@@ -1,3 +0,0 @@
SELECT slot_name, (restart_lsn - '0/0')::FLOAT8 as restart_lsn
FROM pg_replication_slots
WHERE slot_type = 'logical';

View File

@@ -1,10 +0,0 @@
{
metric_name: 'max_cluster_size',
type: 'gauge',
help: 'neon.max_cluster_size setting',
key_labels: null,
values: [
'max_cluster_size',
],
query: importstr 'sql_exporter/max_cluster_size.sql',
}

View File

@@ -1 +0,0 @@
SELECT setting::int AS max_cluster_size FROM pg_settings WHERE name = 'neon.max_cluster_size';

View File

@@ -1,19 +0,0 @@
WITH c AS (SELECT pg_catalog.jsonb_object_agg(metric, value) jb FROM neon.neon_perf_counters)
SELECT d.* FROM pg_catalog.jsonb_to_record((SELECT jb FROM c)) AS d(
file_cache_read_wait_seconds_count numeric,
file_cache_read_wait_seconds_sum numeric,
file_cache_write_wait_seconds_count numeric,
file_cache_write_wait_seconds_sum numeric,
getpage_wait_seconds_count numeric,
getpage_wait_seconds_sum numeric,
getpage_prefetch_requests_total numeric,
getpage_sync_requests_total numeric,
getpage_prefetch_misses_total numeric,
getpage_prefetch_discards_total numeric,
getpage_prefetches_buffered numeric,
pageserver_requests_sent_total numeric,
pageserver_disconnects_total numeric,
pageserver_send_flushes_total numeric,
pageserver_open_requests numeric
);

View File

@@ -1,9 +0,0 @@
{
metric_name: 'pageserver_disconnects_total',
type: 'counter',
help: 'Number of times that the connection to the pageserver was lost',
values: [
'pageserver_disconnects_total',
],
query_ref: 'neon_perf_counters',
}

View File

@@ -1,9 +0,0 @@
{
metric_name: 'pageserver_open_requests',
type: 'gauge',
help: 'Number of open requests to PageServer',
values: [
'pageserver_open_requests',
],
query_ref: 'neon_perf_counters',
}

View File

@@ -1,9 +0,0 @@
{
metric_name: 'pageserver_requests_sent_total',
type: 'counter',
help: 'Number of all requests sent to the pageserver (not just GetPage requests)',
values: [
'pageserver_requests_sent_total',
],
query_ref: 'neon_perf_counters',
}

View File

@@ -1,9 +0,0 @@
{
metric_name: 'pageserver_send_flushes_total',
type: 'counter',
help: 'Number of flushes to the pageserver connection',
values: [
'pageserver_send_flushes_total',
],
query_ref: 'neon_perf_counters',
}

View File

@@ -1,18 +0,0 @@
{
metric_name: 'pg_stats_userdb',
type: 'gauge',
help: 'Stats for several oldest non-system dbs',
key_labels: [
'datname',
],
value_label: 'kind',
values: [
'db_size',
'deadlocks',
// Rows
'inserted',
'updated',
'deleted',
],
query: importstr 'sql_exporter/pg_stats_userdb.sql',
}

View File

@@ -1,10 +0,0 @@
-- We export stats for 10 non-system databases. Without this limit it is too
-- easy to abuse the system by creating lots of databases.
SELECT pg_database_size(datname) AS db_size, deadlocks, tup_inserted AS inserted,
tup_updated AS updated, tup_deleted AS deleted, datname
FROM pg_stat_database
WHERE datname IN (
SELECT datname FROM pg_database
WHERE datname <> 'postgres' AND NOT datistemplate ORDER BY oid LIMIT 10
);

View File

@@ -1,10 +0,0 @@
{
metric_name: 'replication_delay_bytes',
type: 'gauge',
help: 'Bytes between received and replayed LSN',
key_labels: null,
values: [
'replication_delay_bytes',
],
query: importstr 'sql_exporter/replication_delay_bytes.sql',
}

View File

@@ -1,6 +0,0 @@
-- We use a GREATEST call here because this calculation can be negative. The
-- calculation is not atomic, meaning after we've gotten the receive LSN, the
-- replay LSN may have advanced past the receive LSN we are using for the
-- calculation.
SELECT GREATEST(0, pg_wal_lsn_diff(pg_last_wal_receive_lsn(), pg_last_wal_replay_lsn())) AS replication_delay_bytes;

View File

@@ -1,10 +0,0 @@
{
metric_name: 'replication_delay_seconds',
type: 'gauge',
help: 'Time since last LSN was replayed',
key_labels: null,
values: [
'replication_delay_seconds',
],
query: importstr 'sql_exporter/replication_delay_seconds.sql',
}

View File

@@ -1,5 +0,0 @@
SELECT
CASE
WHEN pg_last_wal_receive_lsn() = pg_last_wal_replay_lsn() THEN 0
ELSE GREATEST(0, EXTRACT (EPOCH FROM now() - pg_last_xact_replay_timestamp()))
END AS replication_delay_seconds;

View File

@@ -1,12 +0,0 @@
{
metric_name: 'retained_wal',
type: 'gauge',
help: 'Retained WAL in inactive replication slots',
key_labels: [
'slot_name',
],
values: [
'retained_wal',
],
query: importstr 'sql_exporter/retained_wal.sql',
}

View File

@@ -1,5 +0,0 @@
SELECT
slot_name,
pg_wal_lsn_diff(pg_current_wal_lsn(), restart_lsn)::FLOAT8 AS retained_wal
FROM pg_replication_slots
WHERE active = false;

View File

@@ -1,12 +0,0 @@
{
metric_name: 'wal_is_lost',
type: 'gauge',
help: 'Whether or not the replication slot wal_status is lost',
key_labels: [
'slot_name',
],
values: [
'wal_is_lost',
],
query: importstr 'sql_exporter/wal_is_lost.sql',
}

View File

@@ -1,7 +0,0 @@
SELECT
slot_name,
CASE
WHEN wal_status = 'lost' THEN 1
ELSE 0
END AS wal_is_lost
FROM pg_replication_slots;

View File

@@ -0,0 +1,33 @@
# Configuration for sql_exporter for autoscaling-agent
# Global defaults.
global:
# If scrape_timeout <= 0, no timeout is set unless Prometheus provides one. The default is 10s.
scrape_timeout: 10s
# Subtracted from Prometheus' scrape_timeout to give us some headroom and prevent Prometheus from timing out first.
scrape_timeout_offset: 500ms
# Minimum interval between collector runs: by default (0s) collectors are executed on every scrape.
min_interval: 0s
# Maximum number of open connections to any one target. Metric queries will run concurrently on multiple connections,
# as will concurrent scrapes.
max_connections: 1
# Maximum number of idle connections to any one target. Unless you use very long collection intervals, this should
# always be the same as max_connections.
max_idle_connections: 1
# Maximum number of maximum amount of time a connection may be reused. Expired connections may be closed lazily before reuse.
# If 0, connections are not closed due to a connection's age.
max_connection_lifetime: 5m
# The target to monitor and the collectors to execute on it.
target:
# Data source name always has a URI schema that matches the driver name. In some cases (e.g. MySQL)
# the schema gets dropped or replaced to match the driver expected DSN format.
data_source_name: 'postgresql://cloud_admin@127.0.0.1:5432/postgres?sslmode=disable&application_name=sql_exporter_autoscaling'
# Collectors (referenced by name) to execute on the target.
# Glob patterns are supported (see <https://pkg.go.dev/path/filepath#Match> for syntax).
collectors: [neon_collector_autoscaling]
# Collector files specifies a list of globs. One collector definition is read from each matching file.
# Glob patterns are supported (see <https://pkg.go.dev/path/filepath#Match> for syntax).
collector_files:
- "neon_collector_autoscaling.yml"

View File

@@ -1,126 +0,0 @@
# Supplemental file for neondatabase/autoscaling's vm-builder, for producing the VM compute image.
---
commands:
- name: cgconfigparser
user: root
sysvInitAction: sysinit
shell: 'cgconfigparser -l /etc/cgconfig.conf -s 1664'
# restrict permissions on /neonvm/bin/resize-swap, because we grant access to compute_ctl for
# running it as root.
- name: chmod-resize-swap
user: root
sysvInitAction: sysinit
shell: 'chmod 711 /neonvm/bin/resize-swap'
- name: chmod-set-disk-quota
user: root
sysvInitAction: sysinit
shell: 'chmod 711 /neonvm/bin/set-disk-quota'
- name: pgbouncer
user: postgres
sysvInitAction: respawn
shell: '/usr/local/bin/pgbouncer /etc/pgbouncer.ini'
- name: local_proxy
user: postgres
sysvInitAction: respawn
shell: '/usr/local/bin/local_proxy --config-path /etc/local_proxy/config.json --pid-path /etc/local_proxy/pid --http 0.0.0.0:10432'
- name: postgres-exporter
user: nobody
sysvInitAction: respawn
shell: 'DATA_SOURCE_NAME="user=cloud_admin sslmode=disable dbname=postgres application_name=postgres-exporter" /bin/postgres_exporter'
- name: sql-exporter
user: nobody
sysvInitAction: respawn
shell: '/bin/sql_exporter -config.file=/etc/sql_exporter.yml -web.listen-address=:9399'
- name: sql-exporter-autoscaling
user: nobody
sysvInitAction: respawn
shell: '/bin/sql_exporter -config.file=/etc/sql_exporter_autoscaling.yml -web.listen-address=:9499'
shutdownHook: |
su -p postgres --session-command '/usr/local/bin/pg_ctl stop -D /var/db/postgres/compute/pgdata -m fast --wait -t 10'
files:
- filename: compute_ctl-sudoers
content: |
# Allow postgres user (which is what compute_ctl runs as) to run /neonvm/bin/resize-swap
# and /neonvm/bin/set-disk-quota as root without requiring entering a password (NOPASSWD),
# regardless of hostname (ALL)
postgres ALL=(root) NOPASSWD: /neonvm/bin/resize-swap, /neonvm/bin/set-disk-quota
- filename: cgconfig.conf
content: |
# Configuration for cgroups in VM compute nodes
group neon-postgres {
perm {
admin {
uid = postgres;
}
task {
gid = users;
}
}
memory {}
}
build: |
# Build cgroup-tools
#
# At time of writing (2023-03-14), debian bullseye has a version of cgroup-tools (technically
# libcgroup) that doesn't support cgroup v2 (version 0.41-11). Unfortunately, the vm-monitor
# requires cgroup v2, so we'll build cgroup-tools ourselves.
#
# At time of migration to bookworm (2024-10-09), debian has a version of libcgroup/cgroup-tools 2.0.2,
# and it _probably_ can be used as-is. However, we'll build it ourselves to minimise the changeset
# for debian version migration.
#
FROM debian:bookworm-slim as libcgroup-builder
ENV LIBCGROUP_VERSION=v2.0.3
RUN set -exu \
&& apt update \
&& apt install --no-install-recommends -y \
git \
ca-certificates \
automake \
cmake \
make \
gcc \
byacc \
flex \
libtool \
libpam0g-dev \
&& git clone --depth 1 -b $LIBCGROUP_VERSION https://github.com/libcgroup/libcgroup \
&& INSTALL_DIR="/libcgroup-install" \
&& mkdir -p "$INSTALL_DIR/bin" "$INSTALL_DIR/include" \
&& cd libcgroup \
# extracted from bootstrap.sh, with modified flags:
&& (test -d m4 || mkdir m4) \
&& autoreconf -fi \
&& rm -rf autom4te.cache \
&& CFLAGS="-O3" ./configure --prefix="$INSTALL_DIR" --sysconfdir=/etc --localstatedir=/var --enable-opaque-hierarchy="name=systemd" \
# actually build the thing...
&& make install
merge: |
# tweak nofile limits
RUN set -e \
&& echo 'fs.file-max = 1048576' >>/etc/sysctl.conf \
&& test ! -e /etc/security || ( \
echo '* - nofile 1048576' >>/etc/security/limits.conf \
&& echo 'root - nofile 1048576' >>/etc/security/limits.conf \
)
# Allow postgres user (compute_ctl) to run swap resizer.
# Need to install sudo in order to allow this.
#
# Also, remove the 'read' permission from group/other on /neonvm/bin/resize-swap, just to be safe.
RUN set -e \
&& apt update \
&& apt install --no-install-recommends -y \
sudo \
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
COPY compute_ctl-sudoers /etc/sudoers.d/compute_ctl-sudoers
COPY cgconfig.conf /etc/cgconfig.conf
RUN set -e \
&& chmod 0644 /etc/cgconfig.conf
COPY --from=libcgroup-builder /libcgroup-install/bin/* /usr/bin/
COPY --from=libcgroup-builder /libcgroup-install/lib/* /usr/lib/
COPY --from=libcgroup-builder /libcgroup-install/sbin/* /usr/sbin/

View File

@@ -97,21 +97,7 @@ impl ComputeControlPlane {
for endpoint_dir in std::fs::read_dir(env.endpoints_path())
.with_context(|| format!("failed to list {}", env.endpoints_path().display()))?
{
let ep_res = Endpoint::from_dir_entry(endpoint_dir?, &env);
let ep = match ep_res {
Ok(ep) => ep,
Err(e) => match e.downcast::<std::io::Error>() {
Ok(e) => {
// A parallel task could delete an endpoint while we have just scanned the directory
if e.kind() == std::io::ErrorKind::NotFound {
continue;
} else {
Err(e)?
}
}
Err(e) => Err(e)?,
},
};
let ep = Endpoint::from_dir_entry(endpoint_dir?, &env)?;
endpoints.insert(ep.endpoint_id.clone(), Arc::new(ep));
}

View File

@@ -28,9 +28,6 @@ pub enum ApiError {
#[error("Resource temporarily unavailable: {0}")]
ResourceUnavailable(Cow<'static, str>),
#[error("Too many requests: {0}")]
TooManyRequests(Cow<'static, str>),
#[error("Shutting down")]
ShuttingDown,
@@ -76,10 +73,6 @@ impl ApiError {
err.to_string(),
StatusCode::SERVICE_UNAVAILABLE,
),
ApiError::TooManyRequests(err) => HttpErrorBody::response_from_msg_and_status(
err.to_string(),
StatusCode::TOO_MANY_REQUESTS,
),
ApiError::Timeout(err) => HttpErrorBody::response_from_msg_and_status(
err.to_string(),
StatusCode::REQUEST_TIMEOUT,

View File

@@ -16,7 +16,7 @@ use fail::fail_point;
use pageserver_api::key::Key;
use postgres_ffi::pg_constants;
use std::fmt::Write as FmtWrite;
use std::time::{Instant, SystemTime};
use std::time::SystemTime;
use tokio::io;
use tokio::io::AsyncWrite;
use tracing::*;
@@ -352,25 +352,12 @@ where
}
}
let start_time = Instant::now();
let aux_files = self
for (path, content) in self
.timeline
.list_aux_files(self.lsn, self.ctx)
.await
.map_err(|e| BasebackupError::Server(e.into()))?;
let aux_scan_time = start_time.elapsed();
let aux_estimated_size = aux_files
.values()
.map(|content| content.len())
.sum::<usize>();
info!(
"Scanned {} aux files in {}ms, aux file content size = {}",
aux_files.len(),
aux_scan_time.as_millis(),
aux_estimated_size
);
for (path, content) in aux_files {
.map_err(|e| BasebackupError::Server(e.into()))?
{
if path.starts_with("pg_replslot") {
let offs = pg_constants::REPL_SLOT_ON_DISK_OFFSETOF_RESTART_LSN;
let restart_lsn = Lsn(u64::from_le_bytes(

View File

@@ -77,7 +77,6 @@ use crate::tenant::secondary::SecondaryController;
use crate::tenant::size::ModelInputs;
use crate::tenant::storage_layer::LayerAccessStatsReset;
use crate::tenant::storage_layer::LayerName;
use crate::tenant::timeline::offload::offload_timeline;
use crate::tenant::timeline::CompactFlags;
use crate::tenant::timeline::CompactionError;
use crate::tenant::timeline::Timeline;
@@ -326,7 +325,6 @@ impl From<crate::tenant::TimelineArchivalError> for ApiError {
match value {
NotFound => ApiError::NotFound(anyhow::anyhow!("timeline not found").into()),
Timeout => ApiError::Timeout("hit pageserver internal timeout".into()),
Cancelled => ApiError::ShuttingDown,
e @ HasArchivedParent(_) => {
ApiError::PreconditionFailed(e.to_string().into_boxed_str())
}
@@ -717,8 +715,6 @@ async fn timeline_archival_config_handler(
.tenant_manager
.get_attached_tenant_shard(tenant_shard_id)?;
tenant.wait_to_become_active(ACTIVE_TENANT_TIMEOUT).await?;
tenant
.apply_timeline_archival_config(timeline_id, request_data.state, ctx)
.await?;
@@ -1787,49 +1783,6 @@ async fn timeline_compact_handler(
.await
}
// Run offload immediately on given timeline.
async fn timeline_offload_handler(
request: Request<Body>,
_cancel: CancellationToken,
) -> Result<Response<Body>, ApiError> {
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
let state = get_state(&request);
async {
let tenant = state
.tenant_manager
.get_attached_tenant_shard(tenant_shard_id)?;
if tenant.get_offloaded_timeline(timeline_id).is_ok() {
return json_response(StatusCode::OK, ());
}
let timeline =
active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id)
.await?;
if !tenant.timeline_has_no_attached_children(timeline_id) {
return Err(ApiError::PreconditionFailed(
"timeline has attached children".into(),
));
}
if !timeline.can_offload() {
return Err(ApiError::PreconditionFailed(
"Timeline::can_offload() returned false".into(),
));
}
offload_timeline(&tenant, &timeline)
.await
.map_err(ApiError::InternalServerError)?;
json_response(StatusCode::OK, ())
}
.instrument(info_span!("manual_timeline_offload", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %timeline_id))
.await
}
// Run checkpoint immediately on given timeline.
async fn timeline_checkpoint_handler(
request: Request<Body>,
@@ -3053,10 +3006,6 @@ pub fn make_router(
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/compact",
|r| api_handler(r, timeline_compact_handler),
)
.put(
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/offload",
|r| testing_api_handler("attempt timeline offload", r, timeline_offload_handler),
)
.put(
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/checkpoint",
|r| testing_api_handler("run timeline checkpoint", r, timeline_checkpoint_handler),

View File

@@ -26,8 +26,8 @@ use std::str::FromStr;
use std::sync::Arc;
use std::time::SystemTime;
use std::time::{Duration, Instant};
use tokio::io::AsyncWriteExt;
use tokio::io::{AsyncRead, AsyncWrite};
use tokio::io::{AsyncWriteExt, BufWriter};
use tokio::task::JoinHandle;
use tokio_util::sync::CancellationToken;
use tracing::*;
@@ -1137,10 +1137,10 @@ impl PageServerHandler {
.await
.map_err(map_basebackup_error)?;
} else {
let mut writer = BufWriter::new(pgb.copyout_writer());
let mut writer = pgb.copyout_writer();
if gzip {
let mut encoder = GzipEncoder::with_quality(
&mut writer,
writer,
// NOTE using fast compression because it's on the critical path
// for compute startup. For an empty database, we get
// <100KB with this method. The Level::Best compression method
@@ -1175,10 +1175,6 @@ impl PageServerHandler {
.await
.map_err(map_basebackup_error)?;
}
writer
.flush()
.await
.map_err(|e| map_basebackup_error(BasebackupError::Client(e)))?;
}
pgb.write_message_noflush(&BeMessage::CopyDone)

View File

@@ -1545,6 +1545,9 @@ impl<'a> DatadirModification<'a> {
// Update relation size cache
self.tline.set_cached_rel_size(rel, self.lsn, nblocks);
// Update relation size cache
self.tline.set_cached_rel_size(rel, self.lsn, nblocks);
// Update logical database size.
self.pending_nblocks -= old_size as i64 - nblocks as i64;
}

Some files were not shown because too many files have changed in this diff Show More