From 295be03a33dba9778244234792e4d176a6906b86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?JC=20Gr=C3=BCnhage?= Date: Fri, 4 Apr 2025 16:56:41 +0200 Subject: [PATCH] impr(ci): send clearer notifications to slack when retrying container image pushes (#11447) ## Problem We've started sending slack notifications for failed container image pushes that are being retried. There are more messages coming in than expected, so clicking through the link to see what image failed is happening more often than we hoped. ## Summary of changes - Make slack notifications clearer, including whether the job succeeded and what retries have happened. - Log failures/retries in step more clearly, so that you can easily see when something fails. --- .github/scripts/push_with_image_map.py | 12 ++++++++++-- .github/workflows/_push-to-container-registry.yml | 13 ++++++++++--- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/.github/scripts/push_with_image_map.py b/.github/scripts/push_with_image_map.py index 53f83379ae..85e2eb1937 100644 --- a/.github/scripts/push_with_image_map.py +++ b/.github/scripts/push_with_image_map.py @@ -2,6 +2,9 @@ import json import os import subprocess +RED = "\033[91m" +RESET = "\033[0m" + image_map = os.getenv("IMAGE_MAP") if not image_map: raise ValueError("IMAGE_MAP environment variable is not set") @@ -29,9 +32,14 @@ while len(pending) > 0: result = subprocess.run(cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) if result.returncode != 0: - failures.append((" ".join(cmd), result.stdout)) + failures.append((" ".join(cmd), result.stdout, target)) pending.append((source, target)) + print( + f"{RED}[RETRY]{RESET} Push failed for {target}. Retrying... (failure count: {len(failures)})" + ) + print(result.stdout) if len(failures) > 0 and (github_output := os.getenv("GITHUB_OUTPUT")): + failed_targets = [target for _, _, target in failures] with open(github_output, "a") as f: - f.write("slack_notify=true\n") + f.write(f"push_failures={json.dumps(failed_targets)}\n") diff --git a/.github/workflows/_push-to-container-registry.yml b/.github/workflows/_push-to-container-registry.yml index 9b3ad0fdbb..7d3a11409b 100644 --- a/.github/workflows/_push-to-container-registry.yml +++ b/.github/workflows/_push-to-container-registry.yml @@ -110,12 +110,19 @@ jobs: IMAGE_MAP: ${{ inputs.image-map }} - name: Notify Slack if container image pushing fails - if: steps.push.outputs.slack_notify == 'true' || failure() + if: steps.push.outputs.push_failures || failure() uses: slackapi/slack-github-action@485a9d42d3a73031f12ec201c457e2162c45d02d # v2.0.0 with: method: chat.postMessage token: ${{ secrets.SLACK_BOT_TOKEN }} payload: | channel: ${{ vars.SLACK_ON_CALL_DEVPROD_STREAM }} - text: | - Pushing container images failed in <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run> + text: > + *Container image pushing ${{ + steps.push.outcome == 'failure' && 'failed completely' || 'succeeded with some retries' + }}* in + <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run> + + ${{ steps.push.outputs.push_failures && format( + '*Failed targets:*\n• {0}', join(fromJson(steps.push.outputs.push_failures), '\n• ') + ) || '' }}