name: TPC-C like benchmark using benchbase on: schedule: # * is a special character in YAML so you have to quote this string # ┌───────────── minute (0 - 59) # │ ┌───────────── hour (0 - 23) # │ │ ┌───────────── day of the month (1 - 31) # │ │ │ ┌───────────── month (1 - 12 or JAN-DEC) # │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT) - cron: '0 6 * * *' # run once a day at 6 AM UTC workflow_dispatch: # adds ability to run this manually defaults: run: shell: bash -euxo pipefail {0} concurrency: # Allow only one workflow globally because we do not want to be too noisy in production environment group: benchbase-tpcc-workflow cancel-in-progress: false permissions: contents: read jobs: benchbase-tpcc: strategy: fail-fast: false # allow other variants to continue even if one fails matrix: include: - warehouses: 50 # defines number of warehouses and is used to compute number of terminals max_rate: 800 # measured max TPS at scale factor based on experiments. Adjust if performance is better/worse min_cu: 0.25 # simulate free tier plan (0.25 -2 CU) max_cu: 2 - warehouses: 500 # serverless plan (2-8 CU) max_rate: 2000 min_cu: 2 max_cu: 8 - warehouses: 1000 # business plan (2-16 CU) max_rate: 2900 min_cu: 2 max_cu: 16 max-parallel: 1 # we want to run each workload size sequentially to avoid noisy neighbors permissions: contents: write statuses: write id-token: write # aws-actions/configure-aws-credentials env: PG_CONFIG: /tmp/neon/pg_install/v17/bin/pg_config PSQL: /tmp/neon/pg_install/v17/bin/psql PG_17_LIB_PATH: /tmp/neon/pg_install/v17/lib POSTGRES_VERSION: 17 runs-on: [ self-hosted, us-east-2, x64 ] timeout-minutes: 1440 steps: - name: Harden the runner (Audit all outbound calls) uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0 with: egress-policy: audit - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Configure AWS credentials # necessary to download artefacts uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2 with: aws-region: eu-central-1 role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} role-duration-seconds: 18000 # 5 hours is currently max associated with IAM role - name: Download Neon artifact uses: ./.github/actions/download with: name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact path: /tmp/neon/ prefix: latest aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Create Neon Project id: create-neon-project-tpcc uses: ./.github/actions/neon-project-create with: region_id: aws-us-east-2 postgres_version: ${{ env.POSTGRES_VERSION }} compute_units: '[${{ matrix.min_cu }}, ${{ matrix.max_cu }}]' api_key: ${{ secrets.NEON_PRODUCTION_API_KEY_4_BENCHMARKS }} api_host: console.neon.tech # production (!) - name: Initialize Neon project env: BENCHMARK_TPCC_CONNSTR: ${{ steps.create-neon-project-tpcc.outputs.dsn }} PROJECT_ID: ${{ steps.create-neon-project-tpcc.outputs.project_id }} run: | echo "Initializing Neon project with project_id: ${PROJECT_ID}" export LD_LIBRARY_PATH=${PG_17_LIB_PATH} # Retry logic for psql connection with 1 minute sleep between attempts for attempt in {1..3}; do echo "Attempt ${attempt}/3: Creating extensions in Neon project" if ${PSQL} "${BENCHMARK_TPCC_CONNSTR}" -c "CREATE EXTENSION IF NOT EXISTS neon; CREATE EXTENSION IF NOT EXISTS neon_utils;"; then echo "Successfully created extensions" break else echo "Failed to create extensions on attempt ${attempt}" if [ ${attempt} -lt 3 ]; then echo "Waiting 60 seconds before retry..." sleep 60 else echo "All attempts failed, exiting" exit 1 fi fi done echo "BENCHMARK_TPCC_CONNSTR=${BENCHMARK_TPCC_CONNSTR}" >> $GITHUB_ENV - name: Generate BenchBase workload configuration env: WAREHOUSES: ${{ matrix.warehouses }} MAX_RATE: ${{ matrix.max_rate }} run: | echo "Generating BenchBase configs for warehouses: ${WAREHOUSES}, max_rate: ${MAX_RATE}" # Extract hostname and password from connection string # Format: postgresql://username:password@hostname/database?params (no port for Neon) HOSTNAME=$(echo "${BENCHMARK_TPCC_CONNSTR}" | sed -n 's|.*://[^:]*:[^@]*@\([^/]*\)/.*|\1|p') PASSWORD=$(echo "${BENCHMARK_TPCC_CONNSTR}" | sed -n 's|.*://[^:]*:\([^@]*\)@.*|\1|p') echo "Extracted hostname: ${HOSTNAME}" # Use runner temp (NVMe) as working directory cd "${RUNNER_TEMP}" # Copy the generator script cp "${GITHUB_WORKSPACE}/test_runner/performance/benchbase_tpc_c_helpers/generate_workload_size.py" . # Generate configs and scripts python3 generate_workload_size.py \ --warehouses ${WAREHOUSES} \ --max-rate ${MAX_RATE} \ --hostname ${HOSTNAME} \ --password ${PASSWORD} \ --runner-arch ${{ runner.arch }} # Fix path mismatch: move generated configs and scripts to expected locations mv ../configs ./configs mv ../scripts ./scripts - name: Prepare database (load data) env: WAREHOUSES: ${{ matrix.warehouses }} run: | cd "${RUNNER_TEMP}" echo "Loading ${WAREHOUSES} warehouses into database..." # Run the loader script and capture output to log file while preserving stdout/stderr ./scripts/load_${WAREHOUSES}_warehouses.sh 2>&1 | tee "load_${WAREHOUSES}_warehouses.log" echo "Database loading completed" - name: Run TPC-C benchmark (warmup phase, then benchmark at 70% of configuredmax TPS) env: WAREHOUSES: ${{ matrix.warehouses }} run: | cd "${RUNNER_TEMP}" echo "Running TPC-C benchmark with ${WAREHOUSES} warehouses..." # Run the optimal rate benchmark ./scripts/execute_${WAREHOUSES}_warehouses_opt_rate.sh echo "Benchmark execution completed" - name: Run TPC-C benchmark (warmup phase, then ramp down TPS and up again in 5 minute intervals) env: WAREHOUSES: ${{ matrix.warehouses }} run: | cd "${RUNNER_TEMP}" echo "Running TPC-C ramp-down-up with ${WAREHOUSES} warehouses..." # Run the optimal rate benchmark ./scripts/execute_${WAREHOUSES}_warehouses_ramp_up.sh echo "Benchmark execution completed" - name: Process results (upload to test results database and generate diagrams) env: WAREHOUSES: ${{ matrix.warehouses }} MIN_CU: ${{ matrix.min_cu }} MAX_CU: ${{ matrix.max_cu }} PROJECT_ID: ${{ steps.create-neon-project-tpcc.outputs.project_id }} REVISION: ${{ github.sha }} PERF_DB_CONNSTR: ${{ secrets.PERF_TEST_RESULT_CONNSTR }} run: | cd "${RUNNER_TEMP}" echo "Creating temporary Python environment for results processing..." # Create temporary virtual environment python3 -m venv temp_results_env source temp_results_env/bin/activate # Install required packages in virtual environment pip install matplotlib pandas psycopg2-binary echo "Copying results processing scripts..." # Copy both processing scripts cp "${GITHUB_WORKSPACE}/test_runner/performance/benchbase_tpc_c_helpers/generate_diagrams.py" . cp "${GITHUB_WORKSPACE}/test_runner/performance/benchbase_tpc_c_helpers/upload_results_to_perf_test_results.py" . echo "Processing load phase metrics..." # Find and process load log LOAD_LOG=$(find . -name "load_${WAREHOUSES}_warehouses.log" -type f | head -1) if [ -n "$LOAD_LOG" ]; then echo "Processing load metrics from: $LOAD_LOG" python upload_results_to_perf_test_results.py \ --load-log "$LOAD_LOG" \ --run-type "load" \ --warehouses "${WAREHOUSES}" \ --min-cu "${MIN_CU}" \ --max-cu "${MAX_CU}" \ --project-id "${PROJECT_ID}" \ --revision "${REVISION}" \ --connection-string "${PERF_DB_CONNSTR}" else echo "Warning: Load log file not found: load_${WAREHOUSES}_warehouses.log" fi echo "Processing warmup results for optimal rate..." # Find and process warmup results WARMUP_CSV=$(find results_warmup -name "*.results.csv" -type f | head -1) WARMUP_JSON=$(find results_warmup -name "*.summary.json" -type f | head -1) if [ -n "$WARMUP_CSV" ] && [ -n "$WARMUP_JSON" ]; then echo "Generating warmup diagram from: $WARMUP_CSV" python generate_diagrams.py \ --input-csv "$WARMUP_CSV" \ --output-svg "warmup_${WAREHOUSES}_warehouses_performance.svg" \ --title-suffix "Warmup at max TPS" echo "Uploading warmup metrics from: $WARMUP_JSON" python upload_results_to_perf_test_results.py \ --summary-json "$WARMUP_JSON" \ --results-csv "$WARMUP_CSV" \ --run-type "warmup" \ --min-cu "${MIN_CU}" \ --max-cu "${MAX_CU}" \ --project-id "${PROJECT_ID}" \ --revision "${REVISION}" \ --connection-string "${PERF_DB_CONNSTR}" else echo "Warning: Missing warmup results files (CSV: $WARMUP_CSV, JSON: $WARMUP_JSON)" fi echo "Processing optimal rate results..." # Find and process optimal rate results OPTRATE_CSV=$(find results_opt_rate -name "*.results.csv" -type f | head -1) OPTRATE_JSON=$(find results_opt_rate -name "*.summary.json" -type f | head -1) if [ -n "$OPTRATE_CSV" ] && [ -n "$OPTRATE_JSON" ]; then echo "Generating optimal rate diagram from: $OPTRATE_CSV" python generate_diagrams.py \ --input-csv "$OPTRATE_CSV" \ --output-svg "benchmark_${WAREHOUSES}_warehouses_performance.svg" \ --title-suffix "70% of max TPS" echo "Uploading optimal rate metrics from: $OPTRATE_JSON" python upload_results_to_perf_test_results.py \ --summary-json "$OPTRATE_JSON" \ --results-csv "$OPTRATE_CSV" \ --run-type "opt-rate" \ --min-cu "${MIN_CU}" \ --max-cu "${MAX_CU}" \ --project-id "${PROJECT_ID}" \ --revision "${REVISION}" \ --connection-string "${PERF_DB_CONNSTR}" else echo "Warning: Missing optimal rate results files (CSV: $OPTRATE_CSV, JSON: $OPTRATE_JSON)" fi echo "Processing warmup 2 results for ramp down/up phase..." # Find and process warmup results WARMUP_CSV=$(find results_warmup -name "*.results.csv" -type f | tail -1) WARMUP_JSON=$(find results_warmup -name "*.summary.json" -type f | tail -1) if [ -n "$WARMUP_CSV" ] && [ -n "$WARMUP_JSON" ]; then echo "Generating warmup diagram from: $WARMUP_CSV" python generate_diagrams.py \ --input-csv "$WARMUP_CSV" \ --output-svg "warmup_2_${WAREHOUSES}_warehouses_performance.svg" \ --title-suffix "Warmup at max TPS" echo "Uploading warmup metrics from: $WARMUP_JSON" python upload_results_to_perf_test_results.py \ --summary-json "$WARMUP_JSON" \ --results-csv "$WARMUP_CSV" \ --run-type "warmup" \ --min-cu "${MIN_CU}" \ --max-cu "${MAX_CU}" \ --project-id "${PROJECT_ID}" \ --revision "${REVISION}" \ --connection-string "${PERF_DB_CONNSTR}" else echo "Warning: Missing warmup results files (CSV: $WARMUP_CSV, JSON: $WARMUP_JSON)" fi echo "Processing ramp results..." # Find and process ramp results RAMPUP_CSV=$(find results_ramp_up -name "*.results.csv" -type f | head -1) RAMPUP_JSON=$(find results_ramp_up -name "*.summary.json" -type f | head -1) if [ -n "$RAMPUP_CSV" ] && [ -n "$RAMPUP_JSON" ]; then echo "Generating ramp diagram from: $RAMPUP_CSV" python generate_diagrams.py \ --input-csv "$RAMPUP_CSV" \ --output-svg "ramp_${WAREHOUSES}_warehouses_performance.svg" \ --title-suffix "ramp TPS down and up in 5 minute intervals" echo "Uploading ramp metrics from: $RAMPUP_JSON" python upload_results_to_perf_test_results.py \ --summary-json "$RAMPUP_JSON" \ --results-csv "$RAMPUP_CSV" \ --run-type "ramp-up" \ --min-cu "${MIN_CU}" \ --max-cu "${MAX_CU}" \ --project-id "${PROJECT_ID}" \ --revision "${REVISION}" \ --connection-string "${PERF_DB_CONNSTR}" else echo "Warning: Missing ramp results files (CSV: $RAMPUP_CSV, JSON: $RAMPUP_JSON)" fi # Deactivate and clean up virtual environment deactivate rm -rf temp_results_env rm upload_results_to_perf_test_results.py echo "Results processing completed and environment cleaned up" - name: Set date for upload id: set-date run: echo "date=$(date +%Y-%m-%d)" >> $GITHUB_OUTPUT - name: Configure AWS credentials # necessary to upload results uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2 with: aws-region: us-east-2 role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} role-duration-seconds: 900 # 900 is minimum value - name: Upload benchmark results to S3 env: S3_BUCKET: neon-public-benchmark-results S3_PREFIX: benchbase-tpc-c/${{ steps.set-date.outputs.date }}/${{ github.run_id }}/${{ matrix.warehouses }}-warehouses run: | echo "Redacting passwords from configuration files before upload..." # Mask all passwords in XML config files find "${RUNNER_TEMP}/configs" -name "*.xml" -type f -exec sed -i 's|[^<]*|redacted|g' {} \; echo "Uploading benchmark results to s3://${S3_BUCKET}/${S3_PREFIX}/" # Upload the entire benchmark directory recursively aws s3 cp --only-show-errors --recursive "${RUNNER_TEMP}" s3://${S3_BUCKET}/${S3_PREFIX}/ echo "Upload completed" - name: Delete Neon Project if: ${{ always() }} uses: ./.github/actions/neon-project-delete with: project_id: ${{ steps.create-neon-project-tpcc.outputs.project_id }} api_key: ${{ secrets.NEON_PRODUCTION_API_KEY_4_BENCHMARKS }} api_host: console.neon.tech # production (!)