name: benchmarking ingest on: # uncomment to run on push for debugging your PR # push: # branches: [ your branch ] schedule: # * is a special character in YAML so you have to quote this string # ┌───────────── minute (0 - 59) # │ ┌───────────── hour (0 - 23) # │ │ ┌───────────── day of the month (1 - 31) # │ │ │ ┌───────────── month (1 - 12 or JAN-DEC) # │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT) - cron: '0 9 * * *' # run once a day, timezone is utc workflow_dispatch: # adds ability to run this manually defaults: run: shell: bash -euxo pipefail {0} concurrency: # Allow only one workflow globally because we need dedicated resources which only exist once group: ingest-bench-workflow cancel-in-progress: true permissions: contents: read jobs: ingest: strategy: fail-fast: false # allow other variants to continue even if one fails matrix: include: - target_project: new_empty_project_stripe_size_2048 stripe_size: 2048 # 16 MiB postgres_version: 16 disable_sharding: false - target_project: new_empty_project_stripe_size_32768 stripe_size: 32768 # 256 MiB # note that this is different from null because using null will shard_split the project only if it reaches the threshold # while here it is sharded from the beginning with a shard size of 256 MiB disable_sharding: false postgres_version: 16 - target_project: new_empty_project stripe_size: null # run with neon defaults which will shard split only when reaching the threshold disable_sharding: false postgres_version: 16 - target_project: new_empty_project stripe_size: null # run with neon defaults which will shard split only when reaching the threshold disable_sharding: false postgres_version: 17 - target_project: large_existing_project stripe_size: null # cannot re-shared or choose different stripe size for existing, already sharded project disable_sharding: false postgres_version: 16 - target_project: new_empty_project_unsharded stripe_size: null # run with neon defaults which will shard split only when reaching the threshold disable_sharding: true postgres_version: 16 max-parallel: 1 # we want to run each stripe size sequentially to be able to compare the results permissions: contents: write statuses: write id-token: write # aws-actions/configure-aws-credentials env: PG_CONFIG: /tmp/neon/pg_install/v16/bin/pg_config PSQL: /tmp/neon/pg_install/v16/bin/psql PG_16_LIB_PATH: /tmp/neon/pg_install/v16/lib PGCOPYDB: /pgcopydb/bin/pgcopydb PGCOPYDB_LIB_PATH: /pgcopydb/lib runs-on: [ self-hosted, us-east-2, x64 ] container: image: ghcr.io/neondatabase/build-tools:pinned-bookworm credentials: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} options: --init timeout-minutes: 1440 steps: - name: Harden the runner (Audit all outbound calls) uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0 with: egress-policy: audit - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Configure AWS credentials # necessary to download artefacts uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2 with: aws-region: eu-central-1 role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} role-duration-seconds: 18000 # 5 hours is currently max associated with IAM role - name: Download Neon artifact uses: ./.github/actions/download with: name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact path: /tmp/neon/ prefix: latest aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Create Neon Project if: ${{ startsWith(matrix.target_project, 'new_empty_project') }} id: create-neon-project-ingest-target uses: ./.github/actions/neon-project-create with: region_id: aws-us-east-2 postgres_version: ${{ matrix.postgres_version }} compute_units: '[7, 7]' # we want to test large compute here to avoid compute-side bottleneck api_key: ${{ secrets.NEON_STAGING_API_KEY }} shard_split_project: ${{ matrix.stripe_size != null && 'true' || 'false' }} admin_api_key: ${{ secrets.NEON_STAGING_ADMIN_API_KEY }} shard_count: 8 stripe_size: ${{ matrix.stripe_size }} disable_sharding: ${{ matrix.disable_sharding }} - name: Initialize Neon project if: ${{ startsWith(matrix.target_project, 'new_empty_project') }} env: BENCHMARK_INGEST_TARGET_CONNSTR: ${{ steps.create-neon-project-ingest-target.outputs.dsn }} NEW_PROJECT_ID: ${{ steps.create-neon-project-ingest-target.outputs.project_id }} run: | echo "Initializing Neon project with project_id: ${NEW_PROJECT_ID}" export LD_LIBRARY_PATH=${PG_16_LIB_PATH} ${PSQL} "${BENCHMARK_INGEST_TARGET_CONNSTR}" -c "CREATE EXTENSION IF NOT EXISTS neon; CREATE EXTENSION IF NOT EXISTS neon_utils;" echo "BENCHMARK_INGEST_TARGET_CONNSTR=${BENCHMARK_INGEST_TARGET_CONNSTR}" >> $GITHUB_ENV - name: Create Neon Branch for large tenant if: ${{ matrix.target_project == 'large_existing_project' }} id: create-neon-branch-ingest-target uses: ./.github/actions/neon-branch-create with: project_id: ${{ vars.BENCHMARK_INGEST_TARGET_PROJECTID }} api_key: ${{ secrets.NEON_STAGING_API_KEY }} - name: Initialize Neon project if: ${{ matrix.target_project == 'large_existing_project' }} env: BENCHMARK_INGEST_TARGET_CONNSTR: ${{ steps.create-neon-branch-ingest-target.outputs.dsn }} NEW_BRANCH_ID: ${{ steps.create-neon-branch-ingest-target.outputs.branch_id }} run: | echo "Initializing Neon branch with branch_id: ${NEW_BRANCH_ID}" export LD_LIBRARY_PATH=${PG_16_LIB_PATH} # Extract the part before the database name base_connstr="${BENCHMARK_INGEST_TARGET_CONNSTR%/*}" # Extract the query parameters (if any) after the database name query_params="${BENCHMARK_INGEST_TARGET_CONNSTR#*\?}" # Reconstruct the new connection string if [ "$query_params" != "$BENCHMARK_INGEST_TARGET_CONNSTR" ]; then new_connstr="${base_connstr}/neondb?${query_params}" else new_connstr="${base_connstr}/neondb" fi ${PSQL} "${new_connstr}" -c "drop database ludicrous;" ${PSQL} "${new_connstr}" -c "CREATE DATABASE ludicrous;" if [ "$query_params" != "$BENCHMARK_INGEST_TARGET_CONNSTR" ]; then BENCHMARK_INGEST_TARGET_CONNSTR="${base_connstr}/ludicrous?${query_params}" else BENCHMARK_INGEST_TARGET_CONNSTR="${base_connstr}/ludicrous" fi ${PSQL} "${BENCHMARK_INGEST_TARGET_CONNSTR}" -c "CREATE EXTENSION IF NOT EXISTS neon; CREATE EXTENSION IF NOT EXISTS neon_utils;" echo "BENCHMARK_INGEST_TARGET_CONNSTR=${BENCHMARK_INGEST_TARGET_CONNSTR}" >> $GITHUB_ENV - name: Invoke pgcopydb uses: ./.github/actions/run-python-test-set with: build_type: remote test_selection: performance/test_perf_ingest_using_pgcopydb.py run_in_parallel: false extra_params: -s -m remote_cluster --timeout 86400 -k test_ingest_performance_using_pgcopydb pg_version: v${{ matrix.postgres_version }} save_perf_report: true aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: BENCHMARK_INGEST_SOURCE_CONNSTR: ${{ secrets.BENCHMARK_INGEST_SOURCE_CONNSTR }} TARGET_PROJECT_TYPE: ${{ matrix.target_project }} # we report PLATFORM in zenbenchmark NeonBenchmarker perf database and want to distinguish between new project and large tenant PLATFORM: "${{ matrix.target_project }}-us-east-2-staging" PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" - name: show tables sizes after ingest run: | export LD_LIBRARY_PATH=${PG_16_LIB_PATH} ${PSQL} "${BENCHMARK_INGEST_TARGET_CONNSTR}" -c "\dt+" - name: Delete Neon Project if: ${{ always() && startsWith(matrix.target_project, 'new_empty_project') }} uses: ./.github/actions/neon-project-delete with: project_id: ${{ steps.create-neon-project-ingest-target.outputs.project_id }} api_key: ${{ secrets.NEON_STAGING_API_KEY }} - name: Delete Neon Branch for large tenant if: ${{ always() && matrix.target_project == 'large_existing_project' }} uses: ./.github/actions/neon-branch-delete with: project_id: ${{ vars.BENCHMARK_INGEST_TARGET_PROJECTID }} branch_id: ${{ steps.create-neon-branch-ingest-target.outputs.branch_id }} api_key: ${{ secrets.NEON_STAGING_API_KEY }}