neon/.github/workflows/benchmarking.yml

name: Benchmarking

on:
  # uncomment to run on push for debugging your PR
  # push:
  #   branches: [ your branch ]
  schedule:
    # * is a special character in YAML so you have to quote this string
    #          ┌───────────── minute (0 - 59)
    #          │ ┌───────────── hour (0 - 23)
    #          │ │ ┌───────────── day of the month (1 - 31)
    #          │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
    #          │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
    - cron:  '36 4 * * *' # run once a day, timezone is utc

  workflow_dispatch: # adds ability to run this manually

defaults:
  run:
    shell: bash -euxo pipefail {0}

concurrency:
  # Allow only one workflow per any non-`main` branch.
  group: ${{ github.workflow }}-${{ github.ref }}-${{ github.ref == 'refs/heads/main' && github.sha || 'anysha' }}
  cancel-in-progress: true

jobs:
  bench:
    # this workflow runs on self hosteed runner
    # it's environment is quite different from usual guthub runner
    # probably the most important difference is that it doesn't start from clean workspace each time
    # e g if you install system packages they are not cleaned up since you install them directly in host machine
    # not a container or something
    # See documentation for more info: https://docs.github.com/en/actions/hosting-your-own-runners/about-self-hosted-runners
    runs-on: [self-hosted, zenith-benchmarker]

    env:
      POSTGRES_DISTRIB_DIR: "/usr/pgsql-14"

    steps:
    - name: Checkout zenith repo
      uses: actions/checkout@v3

    # actions/setup-python@v2 is not working correctly on self-hosted runners
    # see https://github.com/actions/setup-python/issues/162
    # and probably https://github.com/actions/setup-python/issues/162#issuecomment-865387976 in particular
    # so the simplest solution to me is to use already installed system python and spin virtualenvs for job runs.
    # there is Python 3.7.10 already installed on the machine so use it to install poetry and then use poetry's virtuealenvs
    - name: Install poetry & deps
      run: |
        python3 -m pip install --upgrade poetry wheel
        # since pip/poetry caches are reused there shouldn't be any troubles with install every time
        ./scripts/pysync

    - name: Show versions
      run: |
        echo Python
        python3 --version
        poetry run python3 --version
        echo Poetry
        poetry --version
        echo Pgbench
        $POSTGRES_DISTRIB_DIR/bin/pgbench --version

    # FIXME cluster setup is skipped due to various changes in console API
    # for now pre created cluster is used. When API gain some stability
    # after massive changes dynamic cluster setup will be revived.
    # So use pre created cluster. It needs to be started manually, but stop is automatic after 5 minutes of inactivity
    - name: Setup cluster
      env:
        BENCHMARK_CONNSTR: "${{ secrets.BENCHMARK_STAGING_CONNSTR }}"
      run: |
        set -e

        echo "Starting cluster"
        # wake up the cluster
        $POSTGRES_DISTRIB_DIR/bin/psql $BENCHMARK_CONNSTR -c "SELECT 1"

    - name: Run benchmark
      # pgbench is installed system wide from official repo
      # https://download.postgresql.org/pub/repos/yum/13/redhat/rhel-7-x86_64/
      # via
      # sudo tee /etc/yum.repos.d/pgdg.repo<<EOF
      # [pgdg13]
      # name=PostgreSQL 13 for RHEL/CentOS 7 - x86_64
      # baseurl=https://download.postgresql.org/pub/repos/yum/13/redhat/rhel-7-x86_64/
      # enabled=1
      # gpgcheck=0
      # EOF
      # sudo yum makecache
      # sudo yum install postgresql13-contrib
      # actual binaries are located in /usr/pgsql-13/bin/
      env:
        # The pgbench test runs two tests of given duration against each scale.
        # So the total runtime with these parameters is 2 * 2 * 300 = 1200, or 20 minutes.
        # Plus time needed to initialize the test databases.
        TEST_PG_BENCH_DURATIONS_MATRIX: "300"
        TEST_PG_BENCH_SCALES_MATRIX: "10,100"
        PLATFORM: "neon-staging"
        BENCHMARK_CONNSTR: "${{ secrets.BENCHMARK_STAGING_CONNSTR }}"
        REMOTE_ENV: "1" # indicate to test harness that we do not have zenith binaries locally
      run: |
        # just to be sure that no data was cached on self hosted runner
        # since it might generate duplicates when calling ingest_perf_test_result.py
        rm -rf perf-report-staging
        mkdir -p perf-report-staging
        # Set --sparse-ordering option of pytest-order plugin to ensure tests are running in order of appears in the file,
        # it's important for test_perf_pgbench.py::test_pgbench_remote_* tests
        ./scripts/pytest test_runner/performance/ -v -m "remote_cluster" --sparse-ordering --out-dir perf-report-staging --timeout 5400

    - name: Submit result
      env:
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
      run: |
        REPORT_FROM=$(realpath perf-report-staging) REPORT_TO=staging scripts/generate_and_push_perf_report.sh

    - name: Post to a Slack channel
      if: ${{ github.event.schedule && failure() }}
      uses: slackapi/slack-github-action@v1
      with:
        channel-id: "C033QLM5P7D" # dev-staging-stream
        slack-message: "Periodic perf testing: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
      env:
        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}

  pgbench-compare:
    env:
      TEST_PG_BENCH_DURATIONS_MATRIX: "60m"
      TEST_PG_BENCH_SCALES_MATRIX: "10gb"
      REMOTE_ENV: "1"
      POSTGRES_DISTRIB_DIR: /usr
      TEST_OUTPUT: /tmp/test_output

    strategy:
      fail-fast: false
      matrix:
        connstr: [ BENCHMARK_CAPTEST_CONNSTR, BENCHMARK_RDS_CONNSTR ]

    runs-on: dev
    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rustlegacy:2817580636

    timeout-minutes: 360 # 6h

    steps:
    - uses: actions/checkout@v3

    - name: Cache poetry deps
      id: cache_poetry
      uses: actions/cache@v3
      with:
        path: ~/.cache/pypoetry/virtualenvs
        key: v2-${{ runner.os }}-python-deps-${{ hashFiles('poetry.lock') }}

    - name: Install Python deps
      run: ./scripts/pysync

    - name: Calculate platform
      id: calculate-platform
      env:
        CONNSTR: ${{ matrix.connstr }}
      run: |
        if [ "${CONNSTR}" = "BENCHMARK_CAPTEST_CONNSTR" ]; then
          PLATFORM=neon-captest
        elif [ "${CONNSTR}" = "BENCHMARK_RDS_CONNSTR" ]; then
          PLATFORM=rds-aurora
        else
          echo 2>&1 "Unknown CONNSTR=${CONNSTR}. Allowed are BENCHMARK_CAPTEST_CONNSTR, and BENCHMARK_RDS_CONNSTR only"
          exit 1
        fi

        echo "::set-output name=PLATFORM::${PLATFORM}"

    - name: Install Deps
      run: |
        echo "deb http://apt.postgresql.org/pub/repos/apt focal-pgdg main" | sudo tee /etc/apt/sources.list.d/pgdg.list
        wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | sudo apt-key add -
        sudo apt -y update
        sudo apt install -y postgresql-14 postgresql-client-14

    - name: Benchmark init
      env:
        PLATFORM: ${{ steps.calculate-platform.outputs.PLATFORM }}
        BENCHMARK_CONNSTR: ${{ secrets[matrix.connstr] }}
      run: |
        mkdir -p perf-report-captest

        psql $BENCHMARK_CONNSTR -c "SELECT 1;"
        ./scripts/pytest test_runner/performance/test_perf_pgbench.py::test_pgbench_remote_init -v -m "remote_cluster" --out-dir perf-report-captest --timeout 21600

    - name: Benchmark simple-update
      env:
        PLATFORM: ${{ steps.calculate-platform.outputs.PLATFORM }}
        BENCHMARK_CONNSTR: ${{ secrets[matrix.connstr] }}
      run: |
        psql $BENCHMARK_CONNSTR -c "SELECT 1;"
        ./scripts/pytest test_runner/performance/test_perf_pgbench.py::test_pgbench_remote_simple_update -v -m "remote_cluster" --out-dir perf-report-captest --timeout 21600

    - name: Benchmark select-only
      env:
        PLATFORM: ${{ steps.calculate-platform.outputs.PLATFORM }}
        BENCHMARK_CONNSTR: ${{ secrets[matrix.connstr] }}
      run: |
        psql $BENCHMARK_CONNSTR -c "SELECT 1;"
        ./scripts/pytest test_runner/performance/test_perf_pgbench.py::test_pgbench_remote_select_only -v -m "remote_cluster" --out-dir perf-report-captest --timeout 21600

    - name: Submit result
      env:
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
      run: |
        REPORT_FROM=$(realpath perf-report-captest) REPORT_TO=staging scripts/generate_and_push_perf_report.sh

    - name: Upload logs
      if: always()
      uses: ./.github/actions/upload
      with:
        name: bench-captest-${{ steps.calculate-platform.outputs.PLATFORM }}
        path: /tmp/test_output/

    - name: Post to a Slack channel
      if: ${{ github.event.schedule && failure() }}
      uses: slackapi/slack-github-action@v1
      with:
        channel-id: "C033QLM5P7D" # dev-staging-stream
        slack-message: "Periodic perf testing: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
      env:
        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}