mirror of
https://github.com/neondatabase/neon.git
synced 2025-12-22 21:59:59 +00:00
## Problem In large oltp test run https://github.com/neondatabase/neon/actions/runs/15905488707/job/44859116742 we see that the `Benchmark database maintenance` step is skipped in all 3 strategy variants, however it should be executed in two. This is due to treating the `test_maintenance` boolean type in the strategy in the condition of the `Benchmark database maintenance` step ## Summary of changes Use a boolean condition instead of a string comparison ## Test run from this pull request branch https://github.com/neondatabase/neon/actions/runs/15923605412
204 lines
8.7 KiB
YAML
204 lines
8.7 KiB
YAML
name: large oltp benchmark
|
|
|
|
on:
|
|
# uncomment to run on push for debugging your PR
|
|
#push:
|
|
# branches: [ bodobolero/synthetic_oltp_workload ]
|
|
|
|
schedule:
|
|
# * is a special character in YAML so you have to quote this string
|
|
# ┌───────────── minute (0 - 59)
|
|
# │ ┌───────────── hour (0 - 23)
|
|
# │ │ ┌───────────── day of the month (1 - 31)
|
|
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
|
|
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
|
|
- cron: '0 15 * * 0,2,4' # run on Sunday, Tuesday, Thursday at 3 PM UTC
|
|
workflow_dispatch: # adds ability to run this manually
|
|
|
|
defaults:
|
|
run:
|
|
shell: bash -euxo pipefail {0}
|
|
|
|
concurrency:
|
|
# Allow only one workflow globally because we need dedicated resources which only exist once
|
|
group: large-oltp-bench-workflow
|
|
cancel-in-progress: false
|
|
|
|
permissions:
|
|
contents: read
|
|
|
|
jobs:
|
|
oltp:
|
|
strategy:
|
|
fail-fast: false # allow other variants to continue even if one fails
|
|
matrix:
|
|
include:
|
|
# test only read-only custom scripts in new branch without database maintenance
|
|
- target: new_branch
|
|
custom_scripts: select_any_webhook_with_skew.sql@300 select_recent_webhook.sql@397 select_prefetch_webhook.sql@3
|
|
test_maintenance: false
|
|
# test all custom scripts in new branch with database maintenance
|
|
- target: new_branch
|
|
custom_scripts: insert_webhooks.sql@200 select_any_webhook_with_skew.sql@300 select_recent_webhook.sql@397 select_prefetch_webhook.sql@3 IUD_one_transaction.sql@100
|
|
test_maintenance: true
|
|
# test all custom scripts in reuse branch with database maintenance
|
|
- target: reuse_branch
|
|
custom_scripts: insert_webhooks.sql@200 select_any_webhook_with_skew.sql@300 select_recent_webhook.sql@397 select_prefetch_webhook.sql@3 IUD_one_transaction.sql@100
|
|
test_maintenance: true
|
|
max-parallel: 1 # we want to run each benchmark sequentially to not have noisy neighbors on shared storage (PS, SK)
|
|
permissions:
|
|
contents: write
|
|
statuses: write
|
|
id-token: write # aws-actions/configure-aws-credentials
|
|
env:
|
|
TEST_PG_BENCH_DURATIONS_MATRIX: "1h" # todo update to > 1 h
|
|
TEST_PGBENCH_CUSTOM_SCRIPTS: ${{ matrix.custom_scripts }}
|
|
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
|
PG_VERSION: 16 # pre-determined by pre-determined project
|
|
TEST_OUTPUT: /tmp/test_output
|
|
BUILD_TYPE: remote
|
|
PLATFORM: ${{ matrix.target }}
|
|
|
|
runs-on: [ self-hosted, us-east-2, x64 ]
|
|
container:
|
|
image: ghcr.io/neondatabase/build-tools:pinned-bookworm
|
|
credentials:
|
|
username: ${{ github.actor }}
|
|
password: ${{ secrets.GITHUB_TOKEN }}
|
|
options: --init
|
|
|
|
# Increase timeout to 2 days, default timeout is 6h - database maintenance can take a long time
|
|
# (normally 1h pgbench, 3h vacuum analyze 3.5h re-index) x 2 = 15h, leave some buffer for regressions
|
|
# in one run vacuum didn't finish within 12 hours
|
|
timeout-minutes: 2880
|
|
|
|
steps:
|
|
- name: Harden the runner (Audit all outbound calls)
|
|
uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
|
|
with:
|
|
egress-policy: audit
|
|
|
|
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
|
|
|
- name: Configure AWS credentials # necessary to download artefacts
|
|
uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2
|
|
with:
|
|
aws-region: eu-central-1
|
|
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
|
role-duration-seconds: 18000 # 5 hours is currently max associated with IAM role
|
|
|
|
- name: Download Neon artifact
|
|
uses: ./.github/actions/download
|
|
with:
|
|
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
|
|
path: /tmp/neon/
|
|
prefix: latest
|
|
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
|
|
|
- name: Create Neon Branch for large tenant
|
|
if: ${{ matrix.target == 'new_branch' }}
|
|
id: create-neon-branch-oltp-target
|
|
uses: ./.github/actions/neon-branch-create
|
|
with:
|
|
project_id: ${{ vars.BENCHMARK_LARGE_OLTP_PROJECTID }}
|
|
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
|
|
|
|
- name: Set up Connection String
|
|
id: set-up-connstr
|
|
run: |
|
|
case "${{ matrix.target }}" in
|
|
new_branch)
|
|
CONNSTR=${{ steps.create-neon-branch-oltp-target.outputs.dsn }}
|
|
;;
|
|
reuse_branch)
|
|
CONNSTR=${{ secrets.BENCHMARK_LARGE_OLTP_REUSE_CONNSTR }}
|
|
;;
|
|
*)
|
|
echo >&2 "Unknown target=${{ matrix.target }}"
|
|
exit 1
|
|
;;
|
|
esac
|
|
|
|
CONNSTR_WITHOUT_POOLER="${CONNSTR//-pooler/}"
|
|
|
|
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
|
|
echo "connstr_without_pooler=${CONNSTR_WITHOUT_POOLER}" >> $GITHUB_OUTPUT
|
|
|
|
- name: Delete rows from prior runs in reuse branch
|
|
if: ${{ matrix.target == 'reuse_branch' }}
|
|
env:
|
|
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr_without_pooler }}
|
|
PG_CONFIG: /tmp/neon/pg_install/v16/bin/pg_config
|
|
PSQL: /tmp/neon/pg_install/v16/bin/psql
|
|
PG_16_LIB_PATH: /tmp/neon/pg_install/v16/lib
|
|
run: |
|
|
echo "$(date '+%Y-%m-%d %H:%M:%S') - Deleting rows in table webhook.incoming_webhooks from prior runs"
|
|
export LD_LIBRARY_PATH=${PG_16_LIB_PATH}
|
|
${PSQL} "${BENCHMARK_CONNSTR}" -c "SET statement_timeout = 0; DELETE FROM webhook.incoming_webhooks WHERE created_at > '2025-02-27 23:59:59+00';"
|
|
echo "$(date '+%Y-%m-%d %H:%M:%S') - Finished deleting rows in table webhook.incoming_webhooks from prior runs"
|
|
|
|
- name: Benchmark pgbench with custom-scripts
|
|
uses: ./.github/actions/run-python-test-set
|
|
with:
|
|
build_type: ${{ env.BUILD_TYPE }}
|
|
test_selection: performance
|
|
run_in_parallel: false
|
|
save_perf_report: true
|
|
extra_params: -m remote_cluster --timeout 7200 -k test_perf_oltp_large_tenant_pgbench
|
|
pg_version: ${{ env.PG_VERSION }}
|
|
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
|
env:
|
|
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
|
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
|
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
|
|
|
- name: Benchmark database maintenance
|
|
if: ${{ matrix.test_maintenance }}
|
|
uses: ./.github/actions/run-python-test-set
|
|
with:
|
|
build_type: ${{ env.BUILD_TYPE }}
|
|
test_selection: performance
|
|
run_in_parallel: false
|
|
save_perf_report: true
|
|
extra_params: -m remote_cluster --timeout 172800 -k test_perf_oltp_large_tenant_maintenance
|
|
pg_version: ${{ env.PG_VERSION }}
|
|
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
|
env:
|
|
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr_without_pooler }}
|
|
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
|
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
|
|
|
- name: Delete Neon Branch for large tenant
|
|
if: ${{ always() && matrix.target == 'new_branch' }}
|
|
uses: ./.github/actions/neon-branch-delete
|
|
with:
|
|
project_id: ${{ vars.BENCHMARK_LARGE_OLTP_PROJECTID }}
|
|
branch_id: ${{ steps.create-neon-branch-oltp-target.outputs.branch_id }}
|
|
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
|
|
|
|
- name: Configure AWS credentials # again because prior steps could have exceeded 5 hours
|
|
uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2
|
|
with:
|
|
aws-region: eu-central-1
|
|
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
|
role-duration-seconds: 18000 # 5 hours
|
|
|
|
- name: Create Allure report
|
|
id: create-allure-report
|
|
if: ${{ !cancelled() }}
|
|
uses: ./.github/actions/allure-report-generate
|
|
with:
|
|
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
|
|
|
- name: Post to a Slack channel
|
|
if: ${{ github.event.schedule && failure() }}
|
|
uses: slackapi/slack-github-action@fcfb566f8b0aab22203f066d80ca1d7e4b5d05b3 # v1.27.1
|
|
with:
|
|
channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream
|
|
slack-message: |
|
|
Periodic large oltp perf testing: ${{ job.status }}
|
|
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
|
|
<${{ steps.create-allure-report.outputs.report-url }}|Allure report>
|
|
env:
|
|
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|