From 2ca5ff26d787a89ff9d3176a6e63ea2a2e4716b2 Mon Sep 17 00:00:00 2001 From: Peter Bendel Date: Fri, 9 Aug 2024 09:36:29 +0200 Subject: [PATCH] Run a subset of benchmarking job steps on GitHub action runners in Azure - closer to the system under test (#8651) ## Problem Latency from one cloud provider to another one is higher than within the same cloud provider. Some of our benchmarks are latency sensitive - we run a pgbench or psql in the github action runner and the system under test is running in Neon (database project). For realistic perf tps and latency results we need to compare apples to apples and run the database client in the same "latency distance" for all tests. ## Summary of changes Move job steps that test Neon databases deployed on Azure into Azure action runners. - bench strategy variant using azure database - pgvector strategy variant using azure database - pgbench-compare strategy variants using azure database ## Test run https://github.com/neondatabase/neon/actions/runs/10314848502 --- .github/actionlint.yml | 1 + .github/workflows/benchmarking.yml | 88 +++++++++++++++++++++++------- 2 files changed, 69 insertions(+), 20 deletions(-) diff --git a/.github/actionlint.yml b/.github/actionlint.yml index d27fa01efa..a5282876d0 100644 --- a/.github/actionlint.yml +++ b/.github/actionlint.yml @@ -13,3 +13,4 @@ config-variables: - REMOTE_STORAGE_AZURE_CONTAINER - REMOTE_STORAGE_AZURE_REGION - SLACK_UPCOMING_RELEASE_CHANNEL_ID + - DEV_AWS_OIDC_ROLE_ARN diff --git a/.github/workflows/benchmarking.yml b/.github/workflows/benchmarking.yml index 0f4dac841e..6f80d6e431 100644 --- a/.github/workflows/benchmarking.yml +++ b/.github/workflows/benchmarking.yml @@ -56,6 +56,10 @@ concurrency: jobs: bench: if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }} + permissions: + contents: write + statuses: write + id-token: write # Required for OIDC authentication in azure runners strategy: fail-fast: false matrix: @@ -63,9 +67,13 @@ jobs: - DEFAULT_PG_VERSION: 16 PLATFORM: "neon-staging" region_id: ${{ github.event.inputs.region_id || 'aws-us-east-2' }} + RUNNER: [ self-hosted, us-east-2, x64 ] + IMAGE: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned - DEFAULT_PG_VERSION: 16 PLATFORM: "azure-staging" region_id: 'azure-eastus2' + RUNNER: [ self-hosted, eastus2, x64 ] + IMAGE: neondatabase/build-tools:pinned env: TEST_PG_BENCH_DURATIONS_MATRIX: "300" TEST_PG_BENCH_SCALES_MATRIX: "10,100" @@ -76,14 +84,21 @@ jobs: SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }} PLATFORM: ${{ matrix.PLATFORM }} - runs-on: [ self-hosted, us-east-2, x64 ] + runs-on: ${{ matrix.RUNNER }} container: - image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned + image: ${{ matrix.IMAGE }} options: --init steps: - uses: actions/checkout@v4 + - name: Configure AWS credentials # necessary on Azure runners + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-region: eu-central-1 + role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + role-duration-seconds: 18000 # 5 hours + - name: Download Neon artifact uses: ./.github/actions/download with: @@ -161,6 +176,7 @@ jobs: steps: - uses: actions/checkout@v4 + - name: Download Neon artifact uses: ./.github/actions/download with: @@ -237,6 +253,9 @@ jobs: id: pgbench-compare-matrix run: | region_id_default=${{ env.DEFAULT_REGION_ID }} + runner_default='["self-hosted", "us-east-2", "x64"]' + runner_azure='["self-hosted", "eastus2", "x64"]' + image_default="369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned" matrix='{ "pg_version" : [ 16 @@ -250,16 +269,19 @@ jobs: "neonvm-captest-new" ], "db_size": [ "10gb" ], - "include": [{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-freetier", "db_size": "3gb" }, - { "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new", "db_size": "50gb" }, - { "pg_version": 16, "region_id": "azure-eastus2", "platform": "neonvm-azure-captest-freetier", "db_size": "3gb" }, - { "pg_version": 16, "region_id": "azure-eastus2", "platform": "neonvm-azure-captest-new", "db_size": "10gb" }, - { "pg_version": 16, "region_id": "azure-eastus2", "platform": "neonvm-azure-captest-new", "db_size": "50gb" }, - { "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-sharding-reuse", "db_size": "50gb" }] + "runner": ['"$runner_default"'], + "image": [ "'"$image_default"'" ], + "include": [{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-freetier", "db_size": "3gb" ,"runner": '"$runner_default"', "image": "'"$image_default"'" }, + { "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new", "db_size": "10gb","runner": '"$runner_default"', "image": "'"$image_default"'" }, + { "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new", "db_size": "50gb","runner": '"$runner_default"', "image": "'"$image_default"'" }, + { "pg_version": 16, "region_id": "azure-eastus2", "platform": "neonvm-azure-captest-freetier", "db_size": "3gb" ,"runner": '"$runner_azure"', "image": "neondatabase/build-tools:pinned" }, + { "pg_version": 16, "region_id": "azure-eastus2", "platform": "neonvm-azure-captest-new", "db_size": "10gb","runner": '"$runner_azure"', "image": "neondatabase/build-tools:pinned" }, + { "pg_version": 16, "region_id": "azure-eastus2", "platform": "neonvm-azure-captest-new", "db_size": "50gb","runner": '"$runner_azure"', "image": "neondatabase/build-tools:pinned" }, + { "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-sharding-reuse", "db_size": "50gb","runner": '"$runner_default"', "image": "'"$image_default"'" }] }' if [ "$(date +%A)" = "Saturday" ]; then - matrix=$(echo "$matrix" | jq '.include += [{ "pg_version": 14, "region_id": "'"$region_id_default"'", "platform": "rds-postgres", "db_size": "10gb"}]') + matrix=$(echo "$matrix" | jq '.include += [{ "pg_version": 14, "region_id": "'"$region_id_default"'", "platform": "rds-postgres", "db_size": "10gb","runner": '"$runner_default"', "image": "'"$image_default"'" }]') fi echo "matrix=$(echo "$matrix" | jq --compact-output '.')" >> $GITHUB_OUTPUT @@ -302,6 +324,10 @@ jobs: pgbench-compare: if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }} needs: [ generate-matrices ] + permissions: + contents: write + statuses: write + id-token: write # Required for OIDC authentication in azure runners strategy: fail-fast: false @@ -317,9 +343,9 @@ jobs: SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }} PLATFORM: ${{ matrix.platform }} - runs-on: [ self-hosted, us-east-2, x64 ] + runs-on: ${{ matrix.runner }} container: - image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned + image: ${{ matrix.image }} options: --init # Increase timeout to 8h, default timeout is 6h @@ -328,6 +354,13 @@ jobs: steps: - uses: actions/checkout@v4 + - name: Configure AWS credentials # necessary on Azure runners + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-region: eu-central-1 + role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + role-duration-seconds: 18000 # 5 hours + - name: Download Neon artifact uses: ./.github/actions/download with: @@ -435,12 +468,20 @@ jobs: SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} pgbench-pgvector: + permissions: + contents: write + statuses: write + id-token: write # Required for OIDC authentication in azure runners strategy: fail-fast: false matrix: include: - PLATFORM: "neonvm-captest-pgvector" + RUNNER: [ self-hosted, us-east-2, x64 ] + IMAGE: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned - PLATFORM: "azure-captest-pgvector" + RUNNER: [ self-hosted, eastus2, x64 ] + IMAGE: neondatabase/build-tools:pinned env: TEST_PG_BENCH_DURATIONS_MATRIX: "15m" @@ -453,9 +494,9 @@ jobs: SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }} PLATFORM: ${{ matrix.PLATFORM }} - runs-on: [ self-hosted, us-east-2, x64 ] + runs-on: ${{ matrix.RUNNER }} container: - image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned + image: ${{ matrix.IMAGE }} options: --init steps: @@ -466,12 +507,12 @@ jobs: - name: Install postgresql-16 where pytest expects it run: | cd /home/nonroot - wget -q https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/libpq5_16.3-1.pgdg110%2B1_amd64.deb - wget -q https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-client-16_16.3-1.pgdg110%2B1_amd64.deb - wget -q https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-16_16.3-1.pgdg110%2B1_amd64.deb - dpkg -x libpq5_16.3-1.pgdg110+1_amd64.deb pg - dpkg -x postgresql-client-16_16.3-1.pgdg110+1_amd64.deb pg - dpkg -x postgresql-16_16.3-1.pgdg110+1_amd64.deb pg + wget -q https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/libpq5_16.4-1.pgdg110%2B1_amd64.deb + wget -q https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-client-16_16.4-1.pgdg110%2B1_amd64.deb + wget -q https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-16_16.4-1.pgdg110%2B1_amd64.deb + dpkg -x libpq5_16.4-1.pgdg110+1_amd64.deb pg + dpkg -x postgresql-client-16_16.4-1.pgdg110+1_amd64.deb pg + dpkg -x postgresql-16_16.4-1.pgdg110+1_amd64.deb pg mkdir -p /tmp/neon/pg_install/v16/bin ln -s /home/nonroot/pg/usr/lib/postgresql/16/bin/pgbench /tmp/neon/pg_install/v16/bin/pgbench ln -s /home/nonroot/pg/usr/lib/postgresql/16/bin/psql /tmp/neon/pg_install/v16/bin/psql @@ -496,6 +537,13 @@ jobs: esac echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT + + - name: Configure AWS credentials # necessary on Azure runners to read/write from/to S3 + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-region: eu-central-1 + role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + role-duration-seconds: 18000 # 5 hours - name: Benchmark pgvector hnsw indexing uses: ./.github/actions/run-python-test-set @@ -524,7 +572,7 @@ jobs: BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }} VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" - + - name: Create Allure report if: ${{ !cancelled() }} uses: ./.github/actions/allure-report-generate