From cb67f9a6517652d21917a77f61b3e466f992d901 Mon Sep 17 00:00:00 2001 From: Peter Bendel Date: Mon, 5 May 2025 16:30:13 +0200 Subject: [PATCH] delete orphan left over projects (#11826) ## Problem sometimes our benchmarking GitHub workflow is terminated by side-effects beyond our control (e.g. GitHub runner looses connection to server) and then we have left-over Neon projects created during the workflow [Example where GitHub runner lost connection and project was not deleted](https://github.com/neondatabase/neon/actions/runs/14017400543/job/39244816485) Fixes https://github.com/neondatabase/cloud/issues/28546 ## Summary of changes - Add a cleanup step that cleans up left-over projects - also give each project created during workflows a name that references the testcase and GitHub runid ## Example run (test of new job steps) https://github.com/neondatabase/neon/actions/runs/14837092399/job/41650741922#step:6:63 --------- Co-authored-by: a-masterov <72613290+a-masterov@users.noreply.github.com> --- .github/workflows/benchmarking.yml | 71 +++++++++++++++++++ .../test_cumulative_statistics_persistence.py | 6 +- .../performance/test_physical_replication.py | 8 ++- 3 files changed, 82 insertions(+), 3 deletions(-) diff --git a/.github/workflows/benchmarking.yml b/.github/workflows/benchmarking.yml index 5107f457e2..220d7905b1 100644 --- a/.github/workflows/benchmarking.yml +++ b/.github/workflows/benchmarking.yml @@ -53,6 +53,77 @@ concurrency: cancel-in-progress: true jobs: + cleanup: + runs-on: [ self-hosted, us-east-2, x64 ] + container: + image: ghcr.io/neondatabase/build-tools:pinned-bookworm + credentials: + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + options: --init + env: + ORG_ID: org-solitary-dew-09443886 + LIMIT: 100 + SEARCH: "Created by actions/neon-project-create; GITHUB_RUN_ID" + BASE_URL: https://console-stage.neon.build/api/v2 + DRY_RUN: "false" # Set to "true" to just test out the workflow + + steps: + - name: Harden the runner (Audit all outbound calls) + uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0 + with: + egress-policy: audit + + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + + - name: Cleanup inactive Neon projects left over from prior runs + env: + API_KEY: ${{ secrets.NEON_STAGING_API_KEY }} + run: | + set -euo pipefail + + NOW=$(date -u +%s) + DAYS_AGO=$((NOW - 5 * 86400)) + + REQUEST_URL="$BASE_URL/projects?limit=$LIMIT&search=$(printf '%s' "$SEARCH" | jq -sRr @uri)&org_id=$ORG_ID" + + echo "Requesting project list from:" + echo "$REQUEST_URL" + + response=$(curl -s -X GET "$REQUEST_URL" \ + --header "Accept: application/json" \ + --header "Content-Type: application/json" \ + --header "Authorization: Bearer ${API_KEY}" ) + + echo "Response:" + echo "$response" | jq . + + projects_to_delete=$(echo "$response" | jq --argjson cutoff "$DAYS_AGO" ' + .projects[] + | select(.compute_last_active_at != null) + | select((.compute_last_active_at | fromdateiso8601) < $cutoff) + | {id, name, compute_last_active_at} + ') + + if [ -z "$projects_to_delete" ]; then + echo "No projects eligible for deletion." + exit 0 + fi + + echo "Projects that will be deleted:" + echo "$projects_to_delete" | jq -r '.id' + + if [ "$DRY_RUN" = "false" ]; then + echo "$projects_to_delete" | jq -r '.id' | while read -r project_id; do + echo "Deleting project: $project_id" + curl -s -X DELETE "$BASE_URL/projects/$project_id" \ + --header "Accept: application/json" \ + --header "Content-Type: application/json" \ + --header "Authorization: Bearer ${API_KEY}" + done + else + echo "Dry run enabled — no projects were deleted." + fi bench: if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }} permissions: diff --git a/test_runner/performance/test_cumulative_statistics_persistence.py b/test_runner/performance/test_cumulative_statistics_persistence.py index 061467bbad..5e9e55cb0f 100644 --- a/test_runner/performance/test_cumulative_statistics_persistence.py +++ b/test_runner/performance/test_cumulative_statistics_persistence.py @@ -1,4 +1,5 @@ import math # Add this import +import os import time import traceback from pathlib import Path @@ -87,7 +88,10 @@ def test_cumulative_statistics_persistence( - insert additional tuples that by itself are not enough to trigger auto-vacuum but in combination with the previous tuples are - verify that autovacuum is triggered by the combination of tuples inserted before and after endpoint suspension """ - project = neon_api.create_project(pg_version) + project = neon_api.create_project( + pg_version, + f"Test cumulative statistics persistence, GITHUB_RUN_ID={os.getenv('GITHUB_RUN_ID')}", + ) project_id = project["project"]["id"] neon_api.wait_for_operation_to_finish(project_id) endpoint_id = project["endpoints"][0]["id"] diff --git a/test_runner/performance/test_physical_replication.py b/test_runner/performance/test_physical_replication.py index bdafa2d657..c580bfcc14 100644 --- a/test_runner/performance/test_physical_replication.py +++ b/test_runner/performance/test_physical_replication.py @@ -62,7 +62,9 @@ def test_ro_replica_lag( pgbench_duration = f"-T{test_duration_min * 60 * 2}" - project = neon_api.create_project(pg_version) + project = neon_api.create_project( + pg_version, f"Test readonly replica lag, GITHUB_RUN_ID={os.getenv('GITHUB_RUN_ID')}" + ) project_id = project["project"]["id"] log.info("Project ID: %s", project_id) log.info("Primary endpoint ID: %s", project["endpoints"][0]["id"]) @@ -195,7 +197,9 @@ def test_replication_start_stop( pgbench_duration = f"-T{2**num_replicas * configuration_test_time_sec}" error_occurred = False - project = neon_api.create_project(pg_version) + project = neon_api.create_project( + pg_version, f"Test replication start stop, GITHUB_RUN_ID={os.getenv('GITHUB_RUN_ID')}" + ) project_id = project["project"]["id"] log.info("Project ID: %s", project_id) log.info("Primary endpoint ID: %s", project["endpoints"][0]["id"])