Move logic for ingest benchmark from GitHub workflow into python testcase (#9762)

## Problem

The first version of the ingest benchmark had some parsing and reporting
logic in shell script inside GitHub workflow.
it is better to move that logic into a python testcase so that we can
also run it locally.

## Summary of changes

- Create new python testcase
- invoke pgcopydb inside python test case
- move the following logic into python testcase
  - determine backpressure
  - invoke pgcopydb and report its progress
  - parse pgcopydb log and extract metrics
  - insert metrics into perf test database
 
- add additional column to perf test database that can receive endpoint
ID used for pgcopydb run to have it available in grafana dashboard when
retrieving other metrics for an endpoint

## Example run


https://github.com/neondatabase/neon/actions/runs/11860622170/job/33056264386
This commit is contained in:
Peter Bendel
2024-11-19 10:46:46 +01:00
committed by GitHub
parent 9b6af2bcad
commit 982cb1c15d
6 changed files with 324 additions and 248 deletions

View File

@@ -48,6 +48,10 @@ inputs:
description: 'benchmark durations JSON'
required: false
default: '{}'
aws_oicd_role_arn:
description: 'the OIDC role arn to (re-)acquire for allure report upload - if not set call must acquire OIDC role'
required: false
default: ''
runs:
using: "composite"
@@ -222,6 +226,13 @@ runs:
# (for example if we didn't run the test for non build-and-test workflow)
skip-if-does-not-exist: true
- name: (Re-)configure AWS credentials # necessary to upload reports to S3 after a long-running test
if: ${{ !cancelled() && (inputs.aws_oicd_role_arn != '') }}
uses: aws-actions/configure-aws-credentials@v4
with:
aws-region: eu-central-1
role-to-assume: ${{ inputs.aws_oicd_role_arn }}
role-duration-seconds: 3600 # 1 hour should be more than enough to upload report
- name: Upload test results
if: ${{ !cancelled() }}
uses: ./.github/actions/allure-report-store

View File

@@ -133,6 +133,7 @@ jobs:
--ignore test_runner/performance/test_perf_pgvector_queries.py
--ignore test_runner/performance/test_logical_replication.py
--ignore test_runner/performance/test_physical_replication.py
--ignore test_runner/performance/test_perf_ingest_using_pgcopydb.py
env:
BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"

View File

@@ -1,4 +1,4 @@
name: Benchmarking
name: benchmarking ingest
on:
# uncomment to run on push for debugging your PR
@@ -74,18 +74,16 @@ jobs:
compute_units: '[7, 7]' # we want to test large compute here to avoid compute-side bottleneck
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
- name: Initialize Neon project and retrieve current backpressure seconds
- name: Initialize Neon project
if: ${{ matrix.target_project == 'new_empty_project' }}
env:
NEW_PROJECT_CONNSTR: ${{ steps.create-neon-project-ingest-target.outputs.dsn }}
BENCHMARK_INGEST_TARGET_CONNSTR: ${{ steps.create-neon-project-ingest-target.outputs.dsn }}
NEW_PROJECT_ID: ${{ steps.create-neon-project-ingest-target.outputs.project_id }}
run: |
echo "Initializing Neon project with project_id: ${NEW_PROJECT_ID}"
export LD_LIBRARY_PATH=${PG_16_LIB_PATH}
${PSQL} "${NEW_PROJECT_CONNSTR}" -c "CREATE EXTENSION IF NOT EXISTS neon; CREATE EXTENSION IF NOT EXISTS neon_utils;"
BACKPRESSURE_TIME_BEFORE_INGEST=$(${PSQL} "${NEW_PROJECT_CONNSTR}" -t -c "select backpressure_throttling_time()/1000000;")
echo "BACKPRESSURE_TIME_BEFORE_INGEST=${BACKPRESSURE_TIME_BEFORE_INGEST}" >> $GITHUB_ENV
echo "NEW_PROJECT_CONNSTR=${NEW_PROJECT_CONNSTR}" >> $GITHUB_ENV
${PSQL} "${BENCHMARK_INGEST_TARGET_CONNSTR}" -c "CREATE EXTENSION IF NOT EXISTS neon; CREATE EXTENSION IF NOT EXISTS neon_utils;"
echo "BENCHMARK_INGEST_TARGET_CONNSTR=${BENCHMARK_INGEST_TARGET_CONNSTR}" >> $GITHUB_ENV
- name: Create Neon Branch for large tenant
if: ${{ matrix.target_project == 'large_existing_project' }}
@@ -95,266 +93,55 @@ jobs:
project_id: ${{ vars.BENCHMARK_INGEST_TARGET_PROJECTID }}
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
- name: Initialize Neon project and retrieve current backpressure seconds
- name: Initialize Neon project
if: ${{ matrix.target_project == 'large_existing_project' }}
env:
NEW_PROJECT_CONNSTR: ${{ steps.create-neon-branch-ingest-target.outputs.dsn }}
BENCHMARK_INGEST_TARGET_CONNSTR: ${{ steps.create-neon-branch-ingest-target.outputs.dsn }}
NEW_BRANCH_ID: ${{ steps.create-neon-branch-ingest-target.outputs.branch_id }}
run: |
echo "Initializing Neon branch with branch_id: ${NEW_BRANCH_ID}"
export LD_LIBRARY_PATH=${PG_16_LIB_PATH}
# Extract the part before the database name
base_connstr="${NEW_PROJECT_CONNSTR%/*}"
base_connstr="${BENCHMARK_INGEST_TARGET_CONNSTR%/*}"
# Extract the query parameters (if any) after the database name
query_params="${NEW_PROJECT_CONNSTR#*\?}"
query_params="${BENCHMARK_INGEST_TARGET_CONNSTR#*\?}"
# Reconstruct the new connection string
if [ "$query_params" != "$NEW_PROJECT_CONNSTR" ]; then
if [ "$query_params" != "$BENCHMARK_INGEST_TARGET_CONNSTR" ]; then
new_connstr="${base_connstr}/neondb?${query_params}"
else
new_connstr="${base_connstr}/neondb"
fi
${PSQL} "${new_connstr}" -c "drop database ludicrous;"
${PSQL} "${new_connstr}" -c "CREATE DATABASE ludicrous;"
if [ "$query_params" != "$NEW_PROJECT_CONNSTR" ]; then
NEW_PROJECT_CONNSTR="${base_connstr}/ludicrous?${query_params}"
if [ "$query_params" != "$BENCHMARK_INGEST_TARGET_CONNSTR" ]; then
BENCHMARK_INGEST_TARGET_CONNSTR="${base_connstr}/ludicrous?${query_params}"
else
NEW_PROJECT_CONNSTR="${base_connstr}/ludicrous"
BENCHMARK_INGEST_TARGET_CONNSTR="${base_connstr}/ludicrous"
fi
${PSQL} "${NEW_PROJECT_CONNSTR}" -c "CREATE EXTENSION IF NOT EXISTS neon; CREATE EXTENSION IF NOT EXISTS neon_utils;"
BACKPRESSURE_TIME_BEFORE_INGEST=$(${PSQL} "${NEW_PROJECT_CONNSTR}" -t -c "select backpressure_throttling_time()/1000000;")
echo "BACKPRESSURE_TIME_BEFORE_INGEST=${BACKPRESSURE_TIME_BEFORE_INGEST}" >> $GITHUB_ENV
echo "NEW_PROJECT_CONNSTR=${NEW_PROJECT_CONNSTR}" >> $GITHUB_ENV
- name: Create pgcopydb filter file
run: |
cat << EOF > /tmp/pgcopydb_filter.txt
[include-only-table]
public.events
public.emails
public.email_transmissions
public.payments
public.editions
public.edition_modules
public.sp_content
public.email_broadcasts
public.user_collections
public.devices
public.user_accounts
public.lessons
public.lesson_users
public.payment_methods
public.orders
public.course_emails
public.modules
public.users
public.module_users
public.courses
public.payment_gateway_keys
public.accounts
public.roles
public.payment_gateways
public.management
public.event_names
EOF
${PSQL} "${BENCHMARK_INGEST_TARGET_CONNSTR}" -c "CREATE EXTENSION IF NOT EXISTS neon; CREATE EXTENSION IF NOT EXISTS neon_utils;"
echo "BENCHMARK_INGEST_TARGET_CONNSTR=${BENCHMARK_INGEST_TARGET_CONNSTR}" >> $GITHUB_ENV
- name: Invoke pgcopydb
- name: Invoke pgcopydb
uses: ./.github/actions/run-python-test-set
with:
build_type: remote
test_selection: performance/test_perf_ingest_using_pgcopydb.py
run_in_parallel: false
extra_params: -s -m remote_cluster --timeout 86400 -k test_ingest_performance_using_pgcopydb
pg_version: v16
save_perf_report: true
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
BENCHMARK_INGEST_SOURCE_CONNSTR: ${{ secrets.BENCHMARK_INGEST_SOURCE_CONNSTR }}
run: |
export LD_LIBRARY_PATH=${PGCOPYDB_LIB_PATH}:${PG_16_LIB_PATH}
export PGCOPYDB_SOURCE_PGURI="${BENCHMARK_INGEST_SOURCE_CONNSTR}"
export PGCOPYDB_TARGET_PGURI="${NEW_PROJECT_CONNSTR}"
export PGOPTIONS="-c maintenance_work_mem=8388608 -c max_parallel_maintenance_workers=7"
${PG_CONFIG} --bindir
${PGCOPYDB} --version
${PGCOPYDB} clone --skip-vacuum --no-owner --no-acl --skip-db-properties --table-jobs 4 \
--index-jobs 4 --restore-jobs 4 --split-tables-larger-than 10GB --skip-extensions \
--use-copy-binary --filters /tmp/pgcopydb_filter.txt 2>&1 | tee /tmp/pgcopydb_${{ matrix.target_project }}.log
BENCHMARK_INGEST_SOURCE_CONNSTR: ${{ secrets.BENCHMARK_INGEST_SOURCE_CONNSTR }}
TARGET_PROJECT_TYPE: ${{ matrix.target_project }}
# we report PLATFORM in zenbenchmark NeonBenchmarker perf database and want to distinguish between new project and large tenant
PLATFORM: "${{ matrix.target_project }}-us-east-2-staging"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
# create dummy pgcopydb log to test parsing
# - name: create dummy log for parser test
# run: |
# cat << EOF > /tmp/pgcopydb_${{ matrix.target_project }}.log
# 2024-11-04 18:00:53.433 500861 INFO main.c:136 Running pgcopydb version 0.17.10.g8361a93 from "/usr/lib/postgresql/17/bin/pgcopydb"
# 2024-11-04 18:00:53.434 500861 INFO cli_common.c:1225 [SOURCE] Copying database from "postgres://neondb_owner@ep-bitter-shape-w2c1ir0a.us-east-2.aws.neon.build/neondb?sslmode=require&keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60"
# 2024-11-04 18:00:53.434 500861 INFO cli_common.c:1226 [TARGET] Copying database into "postgres://neondb_owner@ep-icy-union-w25qd5pj.us-east-2.aws.neon.build/ludicrous?sslmode=require&keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60"
# 2024-11-04 18:00:53.442 500861 INFO copydb.c:105 Using work dir "/tmp/pgcopydb"
# 2024-11-04 18:00:53.541 500861 INFO snapshot.c:107 Exported snapshot "00000008-00000033-1" from the source database
# 2024-11-04 18:00:53.556 500865 INFO cli_clone_follow.c:543 STEP 1: fetch source database tables, indexes, and sequences
# 2024-11-04 18:00:54.570 500865 INFO copydb_schema.c:716 Splitting source candidate tables larger than 10 GB
# 2024-11-04 18:00:54.570 500865 INFO copydb_schema.c:829 Table public.events is 96 GB large which is larger than --split-tables-larger-than 10 GB, and does not have a unique column of type integer: splitting by CTID
# 2024-11-04 18:01:05.538 500865 INFO copydb_schema.c:905 Table public.events is 96 GB large, 10 COPY processes will be used, partitioning on ctid.
# 2024-11-04 18:01:05.564 500865 INFO copydb_schema.c:905 Table public.email_transmissions is 27 GB large, 4 COPY processes will be used, partitioning on id.
# 2024-11-04 18:01:05.584 500865 INFO copydb_schema.c:905 Table public.lessons is 25 GB large, 4 COPY processes will be used, partitioning on id.
# 2024-11-04 18:01:05.605 500865 INFO copydb_schema.c:905 Table public.lesson_users is 16 GB large, 3 COPY processes will be used, partitioning on id.
# 2024-11-04 18:01:05.605 500865 INFO copydb_schema.c:761 Fetched information for 26 tables (including 4 tables split in 21 partitions total), with an estimated total of 907 million tuples and 175 GB on-disk
# 2024-11-04 18:01:05.687 500865 INFO copydb_schema.c:968 Fetched information for 57 indexes (supporting 25 constraints)
# 2024-11-04 18:01:05.753 500865 INFO sequences.c:78 Fetching information for 24 sequences
# 2024-11-04 18:01:05.903 500865 INFO copydb_schema.c:1122 Fetched information for 4 extensions
# 2024-11-04 18:01:06.178 500865 INFO copydb_schema.c:1538 Found 0 indexes (supporting 0 constraints) in the target database
# 2024-11-04 18:01:06.184 500865 INFO cli_clone_follow.c:584 STEP 2: dump the source database schema (pre/post data)
# 2024-11-04 18:01:06.186 500865 INFO pgcmd.c:468 /usr/lib/postgresql/16/bin/pg_dump -Fc --snapshot 00000008-00000033-1 --section=pre-data --section=post-data --file /tmp/pgcopydb/schema/schema.dump 'postgres://neondb_owner@ep-bitter-shape-w2c1ir0a.us-east-2.aws.neon.build/neondb?sslmode=require&keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60'
# 2024-11-04 18:01:06.952 500865 INFO cli_clone_follow.c:592 STEP 3: restore the pre-data section to the target database
# 2024-11-04 18:01:07.004 500865 INFO pgcmd.c:1001 /usr/lib/postgresql/16/bin/pg_restore --dbname 'postgres://neondb_owner@ep-icy-union-w25qd5pj.us-east-2.aws.neon.build/ludicrous?sslmode=require&keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60' --section pre-data --jobs 4 --no-owner --no-acl --use-list /tmp/pgcopydb/schema/pre-filtered.list /tmp/pgcopydb/schema/schema.dump
# 2024-11-04 18:01:07.438 500874 INFO table-data.c:656 STEP 4: starting 4 table-data COPY processes
# 2024-11-04 18:01:07.451 500877 INFO vacuum.c:139 STEP 8: skipping VACUUM jobs per --skip-vacuum
# 2024-11-04 18:01:07.457 500875 INFO indexes.c:182 STEP 6: starting 4 CREATE INDEX processes
# 2024-11-04 18:01:07.457 500875 INFO indexes.c:183 STEP 7: constraints are built by the CREATE INDEX processes
# 2024-11-04 18:01:07.507 500865 INFO blobs.c:74 Skipping large objects: none found.
# 2024-11-04 18:01:07.509 500865 INFO sequences.c:194 STEP 9: reset sequences values
# 2024-11-04 18:01:07.510 500886 INFO sequences.c:290 Set sequences values on the target database
# 2024-11-04 20:49:00.587 500865 INFO cli_clone_follow.c:608 STEP 10: restore the post-data section to the target database
# 2024-11-04 20:49:00.600 500865 INFO pgcmd.c:1001 /usr/lib/postgresql/16/bin/pg_restore --dbname 'postgres://neondb_owner@ep-icy-union-w25qd5pj.us-east-2.aws.neon.build/ludicrous?sslmode=require&keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60' --section post-data --jobs 4 --no-owner --no-acl --use-list /tmp/pgcopydb/schema/post-filtered.list /tmp/pgcopydb/schema/schema.dump
# 2024-11-05 10:50:58.508 500865 INFO cli_clone_follow.c:639 All step are now done, 16h49m elapsed
# 2024-11-05 10:50:58.508 500865 INFO summary.c:3155 Printing summary for 26 tables and 57 indexes
# OID | Schema | Name | Parts | copy duration | transmitted bytes | indexes | create index duration
# ------+--------+----------------------+-------+---------------+-------------------+---------+----------------------
# 24654 | public | events | 10 | 1d11h | 878 GB | 1 | 1h41m
# 24623 | public | email_transmissions | 4 | 4h46m | 99 GB | 3 | 2h04m
# 24665 | public | lessons | 4 | 4h42m | 161 GB | 4 | 1m11s
# 24661 | public | lesson_users | 3 | 2h46m | 49 GB | 3 | 39m35s
# 24631 | public | emails | 1 | 34m07s | 10 GB | 2 | 17s
# 24739 | public | payments | 1 | 5m47s | 1848 MB | 4 | 4m40s
# 24681 | public | module_users | 1 | 4m57s | 1610 MB | 3 | 1m50s
# 24694 | public | orders | 1 | 2m50s | 835 MB | 3 | 1m05s
# 24597 | public | devices | 1 | 1m45s | 498 MB | 2 | 40s
# 24723 | public | payment_methods | 1 | 1m24s | 548 MB | 2 | 31s
# 24765 | public | user_collections | 1 | 2m17s | 1005 MB | 2 | 968ms
# 24774 | public | users | 1 | 52s | 291 MB | 4 | 27s
# 24760 | public | user_accounts | 1 | 16s | 172 MB | 3 | 16s
# 24606 | public | edition_modules | 1 | 8s983 | 46 MB | 3 | 4s749
# 24583 | public | course_emails | 1 | 8s526 | 26 MB | 2 | 996ms
# 24685 | public | modules | 1 | 1s592 | 21 MB | 3 | 1s696
# 24610 | public | editions | 1 | 2s199 | 7483 kB | 2 | 1s032
# 24755 | public | sp_content | 1 | 1s555 | 4177 kB | 0 | 0ms
# 24619 | public | email_broadcasts | 1 | 744ms | 2645 kB | 2 | 677ms
# 24590 | public | courses | 1 | 387ms | 1540 kB | 2 | 367ms
# 24704 | public | payment_gateway_keys | 1 | 1s972 | 164 kB | 2 | 27ms
# 24576 | public | accounts | 1 | 58ms | 24 kB | 1 | 14ms
# 24647 | public | event_names | 1 | 32ms | 397 B | 1 | 8ms
# 24716 | public | payment_gateways | 1 | 1s675 | 117 B | 1 | 11ms
# 24748 | public | roles | 1 | 71ms | 173 B | 1 | 8ms
# 24676 | public | management | 1 | 33ms | 40 B | 1 | 19ms
# Step Connection Duration Transfer Concurrency
# -------------------------------------------------- ---------- ---------- ---------- ------------
# Catalog Queries (table ordering, filtering, etc) source 12s 1
# Dump Schema source 765ms 1
# Prepare Schema target 466ms 1
# COPY, INDEX, CONSTRAINTS, VACUUM (wall clock) both 2h47m 12
# COPY (cumulative) both 7h46m 1225 GB 4
# CREATE INDEX (cumulative) target 4h36m 4
# CONSTRAINTS (cumulative) target 8s493 4
# VACUUM (cumulative) target 0ms 4
# Reset Sequences both 60ms 1
# Large Objects (cumulative) (null) 0ms 0
# Finalize Schema both 14h01m 4
# -------------------------------------------------- ---------- ---------- ---------- ------------
# Total Wall Clock Duration both 16h49m 20
# EOF
- name: show tables sizes and retrieve current backpressure seconds
- name: show tables sizes after ingest
run: |
export LD_LIBRARY_PATH=${PG_16_LIB_PATH}
${PSQL} "${NEW_PROJECT_CONNSTR}" -c "\dt+"
BACKPRESSURE_TIME_AFTER_INGEST=$(${PSQL} "${NEW_PROJECT_CONNSTR}" -t -c "select backpressure_throttling_time()/1000000;")
echo "BACKPRESSURE_TIME_AFTER_INGEST=${BACKPRESSURE_TIME_AFTER_INGEST}" >> $GITHUB_ENV
- name: Parse pgcopydb log and report performance metrics
env:
PERF_TEST_RESULT_CONNSTR: ${{ secrets.PERF_TEST_RESULT_CONNSTR }}
run: |
export LD_LIBRARY_PATH=${PG_16_LIB_PATH}
# Define the log file path
LOG_FILE="/tmp/pgcopydb_${{ matrix.target_project }}.log"
# Get the current git commit hash
git config --global --add safe.directory /__w/neon/neon
COMMIT_HASH=$(git rev-parse --short HEAD)
# Define the platform and test suite
PLATFORM="pg16-${{ matrix.target_project }}-us-east-2-staging"
SUIT="pgcopydb_ingest_bench"
# Function to convert time (e.g., "2h47m", "4h36m", "118ms", "8s493") to seconds
convert_to_seconds() {
local duration=$1
local total_seconds=0
# Check for hours (h)
if [[ "$duration" =~ ([0-9]+)h ]]; then
total_seconds=$((total_seconds + ${BASH_REMATCH[1]#0} * 3600))
fi
# Check for seconds (s)
if [[ "$duration" =~ ([0-9]+)s ]]; then
total_seconds=$((total_seconds + ${BASH_REMATCH[1]#0}))
fi
# Check for milliseconds (ms) (if applicable)
if [[ "$duration" =~ ([0-9]+)ms ]]; then
total_seconds=$((total_seconds + ${BASH_REMATCH[1]#0} / 1000))
duration=${duration/${BASH_REMATCH[0]}/} # need to remove it to avoid double counting with m
fi
# Check for minutes (m) - must be checked after ms because m is contained in ms
if [[ "$duration" =~ ([0-9]+)m ]]; then
total_seconds=$((total_seconds + ${BASH_REMATCH[1]#0} * 60))
fi
echo $total_seconds
}
# Calculate the backpressure difference in seconds
BACKPRESSURE_TIME_DIFF=$(awk "BEGIN {print $BACKPRESSURE_TIME_AFTER_INGEST - $BACKPRESSURE_TIME_BEFORE_INGEST}")
# Insert the backpressure time difference into the performance database
if [ -n "$BACKPRESSURE_TIME_DIFF" ]; then
PSQL_CMD="${PSQL} \"${PERF_TEST_RESULT_CONNSTR}\" -c \"
INSERT INTO public.perf_test_results (suit, revision, platform, metric_name, metric_value, metric_unit, metric_report_type, recorded_at_timestamp)
VALUES ('${SUIT}', '${COMMIT_HASH}', '${PLATFORM}', 'backpressure_time', ${BACKPRESSURE_TIME_DIFF}, 'seconds', 'lower_is_better', now());
\""
echo "Inserting backpressure time difference: ${BACKPRESSURE_TIME_DIFF} seconds"
eval $PSQL_CMD
fi
# Extract and process log lines
while IFS= read -r line; do
METRIC_NAME=""
# Match each desired line and extract the relevant information
if [[ "$line" =~ COPY,\ INDEX,\ CONSTRAINTS,\ VACUUM.* ]]; then
METRIC_NAME="COPY, INDEX, CONSTRAINTS, VACUUM (wall clock)"
elif [[ "$line" =~ COPY\ \(cumulative\).* ]]; then
METRIC_NAME="COPY (cumulative)"
elif [[ "$line" =~ CREATE\ INDEX\ \(cumulative\).* ]]; then
METRIC_NAME="CREATE INDEX (cumulative)"
elif [[ "$line" =~ CONSTRAINTS\ \(cumulative\).* ]]; then
METRIC_NAME="CONSTRAINTS (cumulative)"
elif [[ "$line" =~ Finalize\ Schema.* ]]; then
METRIC_NAME="Finalize Schema"
elif [[ "$line" =~ Total\ Wall\ Clock\ Duration.* ]]; then
METRIC_NAME="Total Wall Clock Duration"
fi
# If a metric was matched, insert it into the performance database
if [ -n "$METRIC_NAME" ]; then
DURATION=$(echo "$line" | grep -oP '\d+h\d+m|\d+s|\d+ms|\d{1,2}h\d{1,2}m|\d+\.\d+s' | head -n 1)
METRIC_VALUE=$(convert_to_seconds "$DURATION")
PSQL_CMD="${PSQL} \"${PERF_TEST_RESULT_CONNSTR}\" -c \"
INSERT INTO public.perf_test_results (suit, revision, platform, metric_name, metric_value, metric_unit, metric_report_type, recorded_at_timestamp)
VALUES ('${SUIT}', '${COMMIT_HASH}', '${PLATFORM}', '${METRIC_NAME}', ${METRIC_VALUE}, 'seconds', 'lower_is_better', now());
\""
echo "Inserting ${METRIC_NAME} with value ${METRIC_VALUE} seconds"
eval $PSQL_CMD
fi
done < "$LOG_FILE"
${PSQL} "${BENCHMARK_INGEST_TARGET_CONNSTR}" -c "\dt+"
- name: Delete Neon Project
if: ${{ always() && matrix.target_project == 'new_empty_project' }}

View File

@@ -25,7 +25,8 @@ CREATE TABLE IF NOT EXISTS perf_test_results (
metric_value NUMERIC,
metric_unit VARCHAR(10),
metric_report_type TEXT,
recorded_at_timestamp TIMESTAMP WITH TIME ZONE DEFAULT NOW()
recorded_at_timestamp TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
labels JSONB with default '{}'
)
"""
@@ -91,6 +92,7 @@ def ingest_perf_test_result(cursor, data_file: Path, recorded_at_timestamp: int)
"metric_unit": metric["unit"],
"metric_report_type": metric["report"],
"recorded_at_timestamp": datetime.utcfromtimestamp(recorded_at_timestamp),
"labels": json.dumps(metric.get("labels")),
}
args_list.append(values)
@@ -105,7 +107,8 @@ def ingest_perf_test_result(cursor, data_file: Path, recorded_at_timestamp: int)
metric_value,
metric_unit,
metric_report_type,
recorded_at_timestamp
recorded_at_timestamp,
labels
) VALUES %s
""",
args_list,
@@ -117,7 +120,8 @@ def ingest_perf_test_result(cursor, data_file: Path, recorded_at_timestamp: int)
%(metric_value)s,
%(metric_unit)s,
%(metric_report_type)s,
%(recorded_at_timestamp)s
%(recorded_at_timestamp)s,
%(labels)s
)""",
)
return len(args_list)

View File

@@ -256,12 +256,17 @@ class NeonBenchmarker:
metric_value: float,
unit: str,
report: MetricReport,
labels: Optional[
dict[str, str]
] = None, # use this to associate additional key/value pairs in json format for associated Neon object IDs like project ID with the metric
):
"""
Record a benchmark result.
"""
# just to namespace the value
name = f"{self.PROPERTY_PREFIX}_{metric_name}"
if labels is None:
labels = {}
self.property_recorder(
name,
{
@@ -269,6 +274,7 @@ class NeonBenchmarker:
"value": metric_value,
"unit": unit,
"report": report,
"labels": labels,
},
)

View File

@@ -0,0 +1,267 @@
import os
import re
import subprocess
import sys
import textwrap
from pathlib import Path
from typing import cast
from urllib.parse import urlparse
import pytest
from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker
from fixtures.utils import humantime_to_ms
def setup_environment():
"""Set up necessary environment variables for pgcopydb execution.
Expects the following variables to be set in the environment:
- PG_CONFIG: e.g. /tmp/neon/pg_install/v16/bin/pg_config
- PSQL: e.g. /tmp/neon/pg_install/v16/bin/psql
- PG_16_LIB_PATH: e.g. /tmp/neon/pg_install/v16/lib
- PGCOPYDB: e.g. /pgcopydb/bin/pgcopydb
- PGCOPYDB_LIB_PATH: e.g. /pgcopydb/lib
- BENCHMARK_INGEST_SOURCE_CONNSTR
- BENCHMARK_INGEST_TARGET_CONNSTR
- PERF_TEST_RESULT_CONNSTR
- TARGET_PROJECT_TYPE
"""
# Ensure required environment variables are set
required_env_vars = [
"PGCOPYDB",
"PGCOPYDB_LIB_PATH",
"PG_CONFIG",
"PSQL",
"PG_16_LIB_PATH",
"BENCHMARK_INGEST_SOURCE_CONNSTR",
"BENCHMARK_INGEST_TARGET_CONNSTR",
"PERF_TEST_RESULT_CONNSTR",
"TARGET_PROJECT_TYPE",
]
for var in required_env_vars:
if not os.getenv(var):
raise OSError(f"Required environment variable '{var}' is not set.")
def build_pgcopydb_command(pgcopydb_filter_file: Path, test_output_dir: Path):
"""Builds the pgcopydb command to execute using existing environment variables."""
pgcopydb_executable = os.getenv("PGCOPYDB")
if not pgcopydb_executable:
raise OSError("PGCOPYDB environment variable is not set.")
return [
pgcopydb_executable,
"clone",
"--dir",
str(test_output_dir),
"--skip-vacuum",
"--no-owner",
"--no-acl",
"--skip-db-properties",
"--table-jobs",
"4",
"--index-jobs",
"4",
"--restore-jobs",
"4",
"--split-tables-larger-than",
"10GB",
"--skip-extensions",
"--use-copy-binary",
"--filters",
str(pgcopydb_filter_file),
]
@pytest.fixture() # must be function scoped because test_output_dir is function scoped
def pgcopydb_filter_file(test_output_dir: Path) -> Path:
"""Creates the pgcopydb_filter.txt file required by pgcopydb."""
filter_content = textwrap.dedent("""\
[include-only-table]
public.events
public.emails
public.email_transmissions
public.payments
public.editions
public.edition_modules
public.sp_content
public.email_broadcasts
public.user_collections
public.devices
public.user_accounts
public.lessons
public.lesson_users
public.payment_methods
public.orders
public.course_emails
public.modules
public.users
public.module_users
public.courses
public.payment_gateway_keys
public.accounts
public.roles
public.payment_gateways
public.management
public.event_names
""")
filter_path = test_output_dir / "pgcopydb_filter.txt"
filter_path.write_text(filter_content)
return filter_path
def get_backpressure_time(connstr):
"""Executes a query to get the backpressure throttling time in seconds."""
query = "select backpressure_throttling_time()/1000000;"
psql_path = os.getenv("PSQL")
if psql_path is None:
raise OSError("The PSQL environment variable is not set.")
result = subprocess.run(
[psql_path, connstr, "-t", "-c", query], capture_output=True, text=True, check=True
)
return float(result.stdout.strip())
def run_command_and_log_output(command, log_file_path: Path):
"""
Runs a command and logs output to both a file and GitHub Actions console.
Args:
command (list): The command to execute.
log_file_path (Path): Path object for the log file where output is written.
"""
# Define a list of necessary environment variables for pgcopydb
custom_env_vars = {
"LD_LIBRARY_PATH": f"{os.getenv('PGCOPYDB_LIB_PATH')}:{os.getenv('PG_16_LIB_PATH')}",
"PGCOPYDB_SOURCE_PGURI": cast(str, os.getenv("BENCHMARK_INGEST_SOURCE_CONNSTR")),
"PGCOPYDB_TARGET_PGURI": cast(str, os.getenv("BENCHMARK_INGEST_TARGET_CONNSTR")),
"PGOPTIONS": "-c maintenance_work_mem=8388608 -c max_parallel_maintenance_workers=7",
}
# Combine the current environment with custom variables
env = os.environ.copy()
env.update(custom_env_vars)
with log_file_path.open("w") as log_file:
process = subprocess.Popen(
command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, env=env
)
assert process.stdout is not None, "process.stdout should not be None"
# Stream output to both log file and console
for line in process.stdout:
print(line, end="") # Stream to GitHub Actions log
sys.stdout.flush()
log_file.write(line) # Write to log file
process.wait() # Wait for the process to finish
if process.returncode != 0:
raise subprocess.CalledProcessError(process.returncode, command)
def parse_log_and_report_metrics(
zenbenchmark: NeonBenchmarker, log_file_path: Path, backpressure_time_diff: float
):
"""Parses the pgcopydb log file for performance metrics and reports them to the database."""
metrics = {"backpressure_time": backpressure_time_diff}
# Define regex patterns to capture metrics
metric_patterns = {
"COPY_INDEX_CONSTRAINTS_VACUUM": re.compile(
r"COPY, INDEX, CONSTRAINTS, VACUUM \(wall clock\).*"
),
"COPY_CUMULATIVE": re.compile(r"COPY \(cumulative\).*"),
"CREATE_INDEX_CUMULATIVE": re.compile(r"CREATE INDEX \(cumulative\).*"),
"CONSTRAINTS_CUMULATIVE": re.compile(r"CONSTRAINTS \(cumulative\).*"),
"FINALIZE_SCHEMA": re.compile(r"Finalize Schema.*"),
"TOTAL_DURATION": re.compile(r"Total Wall Clock Duration.*"),
}
# Parse log file
with log_file_path.open("r") as log_file:
for line in log_file:
for metric_name, pattern in metric_patterns.items():
if pattern.search(line):
# Extract duration and convert it to seconds
duration_match = re.search(r"\d+h\d+m|\d+s|\d+ms|\d+\.\d+s", line)
if duration_match:
duration_str = duration_match.group(0)
parts = re.findall(r"\d+[a-zA-Z]+", duration_str)
rust_like_humantime = " ".join(parts)
duration_seconds = humantime_to_ms(rust_like_humantime) / 1000.0
metrics[metric_name] = duration_seconds
endpoint_id = {"endpoint_id": get_endpoint_id()}
for metric_name, duration_seconds in metrics.items():
zenbenchmark.record(
metric_name, duration_seconds, "s", MetricReport.LOWER_IS_BETTER, endpoint_id
)
def get_endpoint_id():
"""Extracts and returns the first segment of the hostname from the PostgreSQL URI stored in BENCHMARK_INGEST_TARGET_CONNSTR."""
connstr = os.getenv("BENCHMARK_INGEST_TARGET_CONNSTR")
if connstr is None:
raise OSError("BENCHMARK_INGEST_TARGET_CONNSTR environment variable is not set.")
# Parse the URI
parsed_url = urlparse(connstr)
# Extract the hostname and split to get the first segment
hostname = parsed_url.hostname
if hostname is None:
raise ValueError("Unable to parse hostname from BENCHMARK_INGEST_TARGET_CONNSTR")
# Split the hostname by dots and take the first segment
endpoint_id = hostname.split(".")[0]
return endpoint_id
@pytest.fixture() # must be function scoped because test_output_dir is function scoped
def log_file_path(test_output_dir):
"""Fixture to provide a temporary log file path."""
if not os.getenv("TARGET_PROJECT_TYPE"):
raise OSError("Required environment variable 'TARGET_PROJECT_TYPE' is not set.")
return (test_output_dir / os.getenv("TARGET_PROJECT_TYPE")).with_suffix(".log")
@pytest.mark.remote_cluster
def test_ingest_performance_using_pgcopydb(
zenbenchmark: NeonBenchmarker,
log_file_path: Path,
pgcopydb_filter_file: Path,
test_output_dir: Path,
):
"""
Simulate project migration from another PostgreSQL provider to Neon.
Measure performance for Neon ingest steps
- COPY
- CREATE INDEX
- CREATE CONSTRAINT
- VACUUM ANALYZE
- create foreign keys
Use pgcopydb to copy data from the source database to the destination database.
"""
# Set up environment and create filter file
setup_environment()
# Get backpressure time before ingest
backpressure_time_before = get_backpressure_time(os.getenv("BENCHMARK_INGEST_TARGET_CONNSTR"))
# Build and run the pgcopydb command
command = build_pgcopydb_command(pgcopydb_filter_file, test_output_dir)
try:
run_command_and_log_output(command, log_file_path)
except subprocess.CalledProcessError as e:
pytest.fail(f"pgcopydb command failed with error: {e}")
# Get backpressure time after ingest and calculate the difference
backpressure_time_after = get_backpressure_time(os.getenv("BENCHMARK_INGEST_TARGET_CONNSTR"))
backpressure_time_diff = backpressure_time_after - backpressure_time_before
# Parse log file and report metrics, including backpressure time difference
parse_log_and_report_metrics(zenbenchmark, log_file_path, backpressure_time_diff)