From 118e13438df173b98c83bea853e346ebbe00eab3 Mon Sep 17 00:00:00 2001 From: Alexander Lakhin Date: Mon, 16 Jun 2025 16:29:39 +0300 Subject: [PATCH] Add "Build and Test Fully" workflow (#11931) ## Problem We don't test debug builds for v14..v16 in the regular "Build and Test" runs to perform the testing faster, but it means we can't detect assertion failures in those versions. (See https://github.com/neondatabase/neon/issues/11891, https://github.com/neondatabase/neon/issues/11997) ## Summary of changes Add a new workflow to test all build types and all versions on all architectures. --- .github/workflows/_build-and-test-locally.yml | 11 +- .../workflows/build_and_run_selected_test.yml | 1 + .github/workflows/build_and_test_fully.yml | 151 ++++++++++++++++++ test_runner/regress/test_compatibility.py | 9 ++ 4 files changed, 169 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/build_and_test_fully.yml diff --git a/.github/workflows/_build-and-test-locally.yml b/.github/workflows/_build-and-test-locally.yml index 663afa2c8b..4729aea4f6 100644 --- a/.github/workflows/_build-and-test-locally.yml +++ b/.github/workflows/_build-and-test-locally.yml @@ -38,6 +38,11 @@ on: required: false default: 1 type: number + rerun_failed: + description: 'rerun failed tests to ignore flaky tests' + required: false + default: true + type: boolean defaults: run: @@ -379,7 +384,7 @@ jobs: - name: Pytest regression tests continue-on-error: ${{ matrix.lfc_state == 'with-lfc' && inputs.build-type == 'debug' }} uses: ./.github/actions/run-python-test-set - timeout-minutes: ${{ inputs.sanitizers != 'enabled' && 75 || 180 }} + timeout-minutes: ${{ (inputs.build-type == 'release' && inputs.sanitizers != 'enabled') && 75 || 180 }} with: build_type: ${{ inputs.build-type }} test_selection: regress @@ -387,14 +392,14 @@ jobs: run_with_real_s3: true real_s3_bucket: neon-github-ci-tests real_s3_region: eu-central-1 - rerun_failed: ${{ inputs.test-run-count == 1 }} + rerun_failed: ${{ inputs.rerun_failed }} pg_version: ${{ matrix.pg_version }} sanitizers: ${{ inputs.sanitizers }} aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} # `--session-timeout` is equal to (timeout-minutes - 10 minutes) * 60 seconds. # Attempt to stop tests gracefully to generate test reports # until they are forcibly stopped by the stricter `timeout-minutes` limit. - extra_params: --session-timeout=${{ inputs.sanitizers != 'enabled' && 3000 || 10200 }} --count=${{ inputs.test-run-count }} + extra_params: --session-timeout=${{ (inputs.build-type == 'release' && inputs.sanitizers != 'enabled') && 3000 || 10200 }} --count=${{ inputs.test-run-count }} ${{ inputs.test-selection != '' && format('-k "{0}"', inputs.test-selection) || '' }} env: TEST_RESULT_CONNSTR: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }} diff --git a/.github/workflows/build_and_run_selected_test.yml b/.github/workflows/build_and_run_selected_test.yml index 7f1eb991c4..0f53d44a13 100644 --- a/.github/workflows/build_and_run_selected_test.yml +++ b/.github/workflows/build_and_run_selected_test.yml @@ -58,6 +58,7 @@ jobs: test-cfg: ${{ inputs.pg-versions }} test-selection: ${{ inputs.test-selection }} test-run-count: ${{ fromJson(inputs.run-count) }} + rerun_failed: false secrets: inherit create-test-report: diff --git a/.github/workflows/build_and_test_fully.yml b/.github/workflows/build_and_test_fully.yml new file mode 100644 index 0000000000..7d6543ee26 --- /dev/null +++ b/.github/workflows/build_and_test_fully.yml @@ -0,0 +1,151 @@ +name: Build and Test Fully + +on: + schedule: + # * is a special character in YAML so you have to quote this string + # ┌───────────── minute (0 - 59) + # │ ┌───────────── hour (0 - 23) + # │ │ ┌───────────── day of the month (1 - 31) + # │ │ │ ┌───────────── month (1 - 12 or JAN-DEC) + # │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT) + - cron: '0 3 * * *' # run once a day, timezone is utc + workflow_dispatch: + +defaults: + run: + shell: bash -euxo pipefail {0} + +concurrency: + # Allow only one workflow per any non-`main` branch. + group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }} + cancel-in-progress: true + +env: + RUST_BACKTRACE: 1 + COPT: '-Werror' + +jobs: + tag: + runs-on: [ self-hosted, small ] + container: ${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_ECR_REGION }}.amazonaws.com/base:pinned + outputs: + build-tag: ${{steps.build-tag.outputs.tag}} + + steps: + # Need `fetch-depth: 0` to count the number of commits in the branch + - name: Harden the runner (Audit all outbound calls) + uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0 + with: + egress-policy: audit + + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + fetch-depth: 0 + + - name: Get build tag + run: | + echo run:$GITHUB_RUN_ID + echo ref:$GITHUB_REF_NAME + echo rev:$(git rev-list --count HEAD) + if [[ "$GITHUB_REF_NAME" == "main" ]]; then + echo "tag=$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT + elif [[ "$GITHUB_REF_NAME" == "release" ]]; then + echo "tag=release-$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT + elif [[ "$GITHUB_REF_NAME" == "release-proxy" ]]; then + echo "tag=release-proxy-$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT + elif [[ "$GITHUB_REF_NAME" == "release-compute" ]]; then + echo "tag=release-compute-$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT + else + echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release', 'release-proxy', 'release-compute'" + echo "tag=$GITHUB_RUN_ID" >> $GITHUB_OUTPUT + fi + shell: bash + id: build-tag + + build-build-tools-image: + uses: ./.github/workflows/build-build-tools-image.yml + secrets: inherit + + build-and-test-locally: + needs: [ tag, build-build-tools-image ] + strategy: + fail-fast: false + matrix: + arch: [ x64, arm64 ] + build-type: [ debug, release ] + uses: ./.github/workflows/_build-and-test-locally.yml + with: + arch: ${{ matrix.arch }} + build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm + build-tag: ${{ needs.tag.outputs.build-tag }} + build-type: ${{ matrix.build-type }} + rerun_failed: false + test-cfg: '[{"pg_version":"v14", "lfc_state": "with-lfc"}, + {"pg_version":"v15", "lfc_state": "with-lfc"}, + {"pg_version":"v16", "lfc_state": "with-lfc"}, + {"pg_version":"v17", "lfc_state": "with-lfc"}, + {"pg_version":"v14", "lfc_state": "without-lfc"}, + {"pg_version":"v15", "lfc_state": "without-lfc"}, + {"pg_version":"v16", "lfc_state": "without-lfc"}, + {"pg_version":"v17", "lfc_state": "withouts-lfc"}]' + secrets: inherit + + + create-test-report: + needs: [ build-and-test-locally, build-build-tools-image ] + if: ${{ !cancelled() }} + permissions: + id-token: write # aws-actions/configure-aws-credentials + statuses: write + contents: write + pull-requests: write + outputs: + report-url: ${{ steps.create-allure-report.outputs.report-url }} + + runs-on: [ self-hosted, small ] + container: + image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm + credentials: + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + options: --init + + steps: + - name: Harden the runner (Audit all outbound calls) + uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0 + with: + egress-policy: audit + + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + + - name: Create Allure report + if: ${{ !cancelled() }} + id: create-allure-report + uses: ./.github/actions/allure-report-generate + with: + store-test-results-into-db: true + aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + env: + REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }} + + - uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 + if: ${{ !cancelled() }} + with: + # Retry script for 5XX server errors: https://github.com/actions/github-script#retries + retries: 5 + script: | + const report = { + reportUrl: "${{ steps.create-allure-report.outputs.report-url }}", + reportJsonUrl: "${{ steps.create-allure-report.outputs.report-json-url }}", + } + + const coverage = {} + + const script = require("./scripts/comment-test-report.js") + await script({ + github, + context, + fetch, + report, + coverage, + }) diff --git a/test_runner/regress/test_compatibility.py b/test_runner/regress/test_compatibility.py index 31e38144e3..bc9b534095 100644 --- a/test_runner/regress/test_compatibility.py +++ b/test_runner/regress/test_compatibility.py @@ -127,6 +127,12 @@ check_ondisk_data_compatibility_if_enabled = pytest.mark.skipif( reason="CHECK_ONDISK_DATA_COMPATIBILITY env is not set", ) +skip_old_debug_versions = pytest.mark.skipif( + os.getenv("BUILD_TYPE", "debug") == "debug" + and os.getenv("DEFAULT_PG_VERSION") in [PgVersion.V14, PgVersion.V15, PgVersion.V16], + reason="compatibility snaphots not available for old versions of debug builds", +) + @pytest.mark.xdist_group("compatibility") @pytest.mark.order(before="test_forward_compatibility") @@ -197,6 +203,7 @@ ingest_lag_log_line = ".*ingesting record with timestamp lagging more than wait_ @check_ondisk_data_compatibility_if_enabled +@skip_old_debug_versions @pytest.mark.xdist_group("compatibility") @pytest.mark.order(after="test_create_snapshot") def test_backward_compatibility( @@ -224,6 +231,7 @@ def test_backward_compatibility( @check_ondisk_data_compatibility_if_enabled +@skip_old_debug_versions @pytest.mark.xdist_group("compatibility") @pytest.mark.order(after="test_create_snapshot") def test_forward_compatibility( @@ -593,6 +601,7 @@ def test_historic_storage_formats( @check_ondisk_data_compatibility_if_enabled +@skip_old_debug_versions @pytest.mark.xdist_group("compatibility") @pytest.mark.parametrize( **fixtures.utils.allpairs_versions(),