diff --git a/.github/actions/run-python-test-set/action.yml b/.github/actions/run-python-test-set/action.yml index 037b9aeb1e..275f161019 100644 --- a/.github/actions/run-python-test-set/action.yml +++ b/.github/actions/run-python-test-set/action.yml @@ -48,6 +48,10 @@ inputs: description: 'benchmark durations JSON' required: false default: '{}' + aws_oicd_role_arn: + description: 'the OIDC role arn to (re-)acquire for allure report upload - if not set call must acquire OIDC role' + required: false + default: '' runs: using: "composite" @@ -222,6 +226,13 @@ runs: # (for example if we didn't run the test for non build-and-test workflow) skip-if-does-not-exist: true + - name: (Re-)configure AWS credentials # necessary to upload reports to S3 after a long-running test + if: ${{ !cancelled() && (inputs.aws_oicd_role_arn != '') }} + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-region: eu-central-1 + role-to-assume: ${{ inputs.aws_oicd_role_arn }} + role-duration-seconds: 3600 # 1 hour should be more than enough to upload report - name: Upload test results if: ${{ !cancelled() }} uses: ./.github/actions/allure-report-store diff --git a/.github/workflows/benchmarking.yml b/.github/workflows/benchmarking.yml index 68bc555982..0e3c31ec57 100644 --- a/.github/workflows/benchmarking.yml +++ b/.github/workflows/benchmarking.yml @@ -133,6 +133,7 @@ jobs: --ignore test_runner/performance/test_perf_pgvector_queries.py --ignore test_runner/performance/test_logical_replication.py --ignore test_runner/performance/test_physical_replication.py + --ignore test_runner/performance/test_perf_ingest_using_pgcopydb.py env: BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }} VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" diff --git a/.github/workflows/ingest_benchmark.yml b/.github/workflows/ingest_benchmark.yml index d770bb2bb5..1033dc6489 100644 --- a/.github/workflows/ingest_benchmark.yml +++ b/.github/workflows/ingest_benchmark.yml @@ -1,4 +1,4 @@ -name: Benchmarking +name: benchmarking ingest on: # uncomment to run on push for debugging your PR @@ -74,18 +74,16 @@ jobs: compute_units: '[7, 7]' # we want to test large compute here to avoid compute-side bottleneck api_key: ${{ secrets.NEON_STAGING_API_KEY }} - - name: Initialize Neon project and retrieve current backpressure seconds + - name: Initialize Neon project if: ${{ matrix.target_project == 'new_empty_project' }} env: - NEW_PROJECT_CONNSTR: ${{ steps.create-neon-project-ingest-target.outputs.dsn }} + BENCHMARK_INGEST_TARGET_CONNSTR: ${{ steps.create-neon-project-ingest-target.outputs.dsn }} NEW_PROJECT_ID: ${{ steps.create-neon-project-ingest-target.outputs.project_id }} run: | echo "Initializing Neon project with project_id: ${NEW_PROJECT_ID}" export LD_LIBRARY_PATH=${PG_16_LIB_PATH} - ${PSQL} "${NEW_PROJECT_CONNSTR}" -c "CREATE EXTENSION IF NOT EXISTS neon; CREATE EXTENSION IF NOT EXISTS neon_utils;" - BACKPRESSURE_TIME_BEFORE_INGEST=$(${PSQL} "${NEW_PROJECT_CONNSTR}" -t -c "select backpressure_throttling_time()/1000000;") - echo "BACKPRESSURE_TIME_BEFORE_INGEST=${BACKPRESSURE_TIME_BEFORE_INGEST}" >> $GITHUB_ENV - echo "NEW_PROJECT_CONNSTR=${NEW_PROJECT_CONNSTR}" >> $GITHUB_ENV + ${PSQL} "${BENCHMARK_INGEST_TARGET_CONNSTR}" -c "CREATE EXTENSION IF NOT EXISTS neon; CREATE EXTENSION IF NOT EXISTS neon_utils;" + echo "BENCHMARK_INGEST_TARGET_CONNSTR=${BENCHMARK_INGEST_TARGET_CONNSTR}" >> $GITHUB_ENV - name: Create Neon Branch for large tenant if: ${{ matrix.target_project == 'large_existing_project' }} @@ -95,266 +93,55 @@ jobs: project_id: ${{ vars.BENCHMARK_INGEST_TARGET_PROJECTID }} api_key: ${{ secrets.NEON_STAGING_API_KEY }} - - name: Initialize Neon project and retrieve current backpressure seconds + - name: Initialize Neon project if: ${{ matrix.target_project == 'large_existing_project' }} env: - NEW_PROJECT_CONNSTR: ${{ steps.create-neon-branch-ingest-target.outputs.dsn }} + BENCHMARK_INGEST_TARGET_CONNSTR: ${{ steps.create-neon-branch-ingest-target.outputs.dsn }} NEW_BRANCH_ID: ${{ steps.create-neon-branch-ingest-target.outputs.branch_id }} run: | echo "Initializing Neon branch with branch_id: ${NEW_BRANCH_ID}" export LD_LIBRARY_PATH=${PG_16_LIB_PATH} # Extract the part before the database name - base_connstr="${NEW_PROJECT_CONNSTR%/*}" + base_connstr="${BENCHMARK_INGEST_TARGET_CONNSTR%/*}" # Extract the query parameters (if any) after the database name - query_params="${NEW_PROJECT_CONNSTR#*\?}" + query_params="${BENCHMARK_INGEST_TARGET_CONNSTR#*\?}" # Reconstruct the new connection string - if [ "$query_params" != "$NEW_PROJECT_CONNSTR" ]; then + if [ "$query_params" != "$BENCHMARK_INGEST_TARGET_CONNSTR" ]; then new_connstr="${base_connstr}/neondb?${query_params}" else new_connstr="${base_connstr}/neondb" fi ${PSQL} "${new_connstr}" -c "drop database ludicrous;" ${PSQL} "${new_connstr}" -c "CREATE DATABASE ludicrous;" - if [ "$query_params" != "$NEW_PROJECT_CONNSTR" ]; then - NEW_PROJECT_CONNSTR="${base_connstr}/ludicrous?${query_params}" + if [ "$query_params" != "$BENCHMARK_INGEST_TARGET_CONNSTR" ]; then + BENCHMARK_INGEST_TARGET_CONNSTR="${base_connstr}/ludicrous?${query_params}" else - NEW_PROJECT_CONNSTR="${base_connstr}/ludicrous" + BENCHMARK_INGEST_TARGET_CONNSTR="${base_connstr}/ludicrous" fi - ${PSQL} "${NEW_PROJECT_CONNSTR}" -c "CREATE EXTENSION IF NOT EXISTS neon; CREATE EXTENSION IF NOT EXISTS neon_utils;" - BACKPRESSURE_TIME_BEFORE_INGEST=$(${PSQL} "${NEW_PROJECT_CONNSTR}" -t -c "select backpressure_throttling_time()/1000000;") - echo "BACKPRESSURE_TIME_BEFORE_INGEST=${BACKPRESSURE_TIME_BEFORE_INGEST}" >> $GITHUB_ENV - echo "NEW_PROJECT_CONNSTR=${NEW_PROJECT_CONNSTR}" >> $GITHUB_ENV - - - - name: Create pgcopydb filter file - run: | - cat << EOF > /tmp/pgcopydb_filter.txt - [include-only-table] - public.events - public.emails - public.email_transmissions - public.payments - public.editions - public.edition_modules - public.sp_content - public.email_broadcasts - public.user_collections - public.devices - public.user_accounts - public.lessons - public.lesson_users - public.payment_methods - public.orders - public.course_emails - public.modules - public.users - public.module_users - public.courses - public.payment_gateway_keys - public.accounts - public.roles - public.payment_gateways - public.management - public.event_names - EOF + ${PSQL} "${BENCHMARK_INGEST_TARGET_CONNSTR}" -c "CREATE EXTENSION IF NOT EXISTS neon; CREATE EXTENSION IF NOT EXISTS neon_utils;" + echo "BENCHMARK_INGEST_TARGET_CONNSTR=${BENCHMARK_INGEST_TARGET_CONNSTR}" >> $GITHUB_ENV - - name: Invoke pgcopydb + - name: Invoke pgcopydb + uses: ./.github/actions/run-python-test-set + with: + build_type: remote + test_selection: performance/test_perf_ingest_using_pgcopydb.py + run_in_parallel: false + extra_params: -s -m remote_cluster --timeout 86400 -k test_ingest_performance_using_pgcopydb + pg_version: v16 + save_perf_report: true + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: - BENCHMARK_INGEST_SOURCE_CONNSTR: ${{ secrets.BENCHMARK_INGEST_SOURCE_CONNSTR }} - run: | - export LD_LIBRARY_PATH=${PGCOPYDB_LIB_PATH}:${PG_16_LIB_PATH} - export PGCOPYDB_SOURCE_PGURI="${BENCHMARK_INGEST_SOURCE_CONNSTR}" - export PGCOPYDB_TARGET_PGURI="${NEW_PROJECT_CONNSTR}" - export PGOPTIONS="-c maintenance_work_mem=8388608 -c max_parallel_maintenance_workers=7" - ${PG_CONFIG} --bindir - ${PGCOPYDB} --version - ${PGCOPYDB} clone --skip-vacuum --no-owner --no-acl --skip-db-properties --table-jobs 4 \ - --index-jobs 4 --restore-jobs 4 --split-tables-larger-than 10GB --skip-extensions \ - --use-copy-binary --filters /tmp/pgcopydb_filter.txt 2>&1 | tee /tmp/pgcopydb_${{ matrix.target_project }}.log + BENCHMARK_INGEST_SOURCE_CONNSTR: ${{ secrets.BENCHMARK_INGEST_SOURCE_CONNSTR }} + TARGET_PROJECT_TYPE: ${{ matrix.target_project }} + # we report PLATFORM in zenbenchmark NeonBenchmarker perf database and want to distinguish between new project and large tenant + PLATFORM: "${{ matrix.target_project }}-us-east-2-staging" + PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" - # create dummy pgcopydb log to test parsing - # - name: create dummy log for parser test - # run: | - # cat << EOF > /tmp/pgcopydb_${{ matrix.target_project }}.log - # 2024-11-04 18:00:53.433 500861 INFO main.c:136 Running pgcopydb version 0.17.10.g8361a93 from "/usr/lib/postgresql/17/bin/pgcopydb" - # 2024-11-04 18:00:53.434 500861 INFO cli_common.c:1225 [SOURCE] Copying database from "postgres://neondb_owner@ep-bitter-shape-w2c1ir0a.us-east-2.aws.neon.build/neondb?sslmode=require&keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60" - # 2024-11-04 18:00:53.434 500861 INFO cli_common.c:1226 [TARGET] Copying database into "postgres://neondb_owner@ep-icy-union-w25qd5pj.us-east-2.aws.neon.build/ludicrous?sslmode=require&keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60" - # 2024-11-04 18:00:53.442 500861 INFO copydb.c:105 Using work dir "/tmp/pgcopydb" - # 2024-11-04 18:00:53.541 500861 INFO snapshot.c:107 Exported snapshot "00000008-00000033-1" from the source database - # 2024-11-04 18:00:53.556 500865 INFO cli_clone_follow.c:543 STEP 1: fetch source database tables, indexes, and sequences - # 2024-11-04 18:00:54.570 500865 INFO copydb_schema.c:716 Splitting source candidate tables larger than 10 GB - # 2024-11-04 18:00:54.570 500865 INFO copydb_schema.c:829 Table public.events is 96 GB large which is larger than --split-tables-larger-than 10 GB, and does not have a unique column of type integer: splitting by CTID - # 2024-11-04 18:01:05.538 500865 INFO copydb_schema.c:905 Table public.events is 96 GB large, 10 COPY processes will be used, partitioning on ctid. - # 2024-11-04 18:01:05.564 500865 INFO copydb_schema.c:905 Table public.email_transmissions is 27 GB large, 4 COPY processes will be used, partitioning on id. - # 2024-11-04 18:01:05.584 500865 INFO copydb_schema.c:905 Table public.lessons is 25 GB large, 4 COPY processes will be used, partitioning on id. - # 2024-11-04 18:01:05.605 500865 INFO copydb_schema.c:905 Table public.lesson_users is 16 GB large, 3 COPY processes will be used, partitioning on id. - # 2024-11-04 18:01:05.605 500865 INFO copydb_schema.c:761 Fetched information for 26 tables (including 4 tables split in 21 partitions total), with an estimated total of 907 million tuples and 175 GB on-disk - # 2024-11-04 18:01:05.687 500865 INFO copydb_schema.c:968 Fetched information for 57 indexes (supporting 25 constraints) - # 2024-11-04 18:01:05.753 500865 INFO sequences.c:78 Fetching information for 24 sequences - # 2024-11-04 18:01:05.903 500865 INFO copydb_schema.c:1122 Fetched information for 4 extensions - # 2024-11-04 18:01:06.178 500865 INFO copydb_schema.c:1538 Found 0 indexes (supporting 0 constraints) in the target database - # 2024-11-04 18:01:06.184 500865 INFO cli_clone_follow.c:584 STEP 2: dump the source database schema (pre/post data) - # 2024-11-04 18:01:06.186 500865 INFO pgcmd.c:468 /usr/lib/postgresql/16/bin/pg_dump -Fc --snapshot 00000008-00000033-1 --section=pre-data --section=post-data --file /tmp/pgcopydb/schema/schema.dump 'postgres://neondb_owner@ep-bitter-shape-w2c1ir0a.us-east-2.aws.neon.build/neondb?sslmode=require&keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60' - # 2024-11-04 18:01:06.952 500865 INFO cli_clone_follow.c:592 STEP 3: restore the pre-data section to the target database - # 2024-11-04 18:01:07.004 500865 INFO pgcmd.c:1001 /usr/lib/postgresql/16/bin/pg_restore --dbname 'postgres://neondb_owner@ep-icy-union-w25qd5pj.us-east-2.aws.neon.build/ludicrous?sslmode=require&keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60' --section pre-data --jobs 4 --no-owner --no-acl --use-list /tmp/pgcopydb/schema/pre-filtered.list /tmp/pgcopydb/schema/schema.dump - # 2024-11-04 18:01:07.438 500874 INFO table-data.c:656 STEP 4: starting 4 table-data COPY processes - # 2024-11-04 18:01:07.451 500877 INFO vacuum.c:139 STEP 8: skipping VACUUM jobs per --skip-vacuum - # 2024-11-04 18:01:07.457 500875 INFO indexes.c:182 STEP 6: starting 4 CREATE INDEX processes - # 2024-11-04 18:01:07.457 500875 INFO indexes.c:183 STEP 7: constraints are built by the CREATE INDEX processes - # 2024-11-04 18:01:07.507 500865 INFO blobs.c:74 Skipping large objects: none found. - # 2024-11-04 18:01:07.509 500865 INFO sequences.c:194 STEP 9: reset sequences values - # 2024-11-04 18:01:07.510 500886 INFO sequences.c:290 Set sequences values on the target database - # 2024-11-04 20:49:00.587 500865 INFO cli_clone_follow.c:608 STEP 10: restore the post-data section to the target database - # 2024-11-04 20:49:00.600 500865 INFO pgcmd.c:1001 /usr/lib/postgresql/16/bin/pg_restore --dbname 'postgres://neondb_owner@ep-icy-union-w25qd5pj.us-east-2.aws.neon.build/ludicrous?sslmode=require&keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60' --section post-data --jobs 4 --no-owner --no-acl --use-list /tmp/pgcopydb/schema/post-filtered.list /tmp/pgcopydb/schema/schema.dump - # 2024-11-05 10:50:58.508 500865 INFO cli_clone_follow.c:639 All step are now done, 16h49m elapsed - # 2024-11-05 10:50:58.508 500865 INFO summary.c:3155 Printing summary for 26 tables and 57 indexes - - # OID | Schema | Name | Parts | copy duration | transmitted bytes | indexes | create index duration - # ------+--------+----------------------+-------+---------------+-------------------+---------+---------------------- - # 24654 | public | events | 10 | 1d11h | 878 GB | 1 | 1h41m - # 24623 | public | email_transmissions | 4 | 4h46m | 99 GB | 3 | 2h04m - # 24665 | public | lessons | 4 | 4h42m | 161 GB | 4 | 1m11s - # 24661 | public | lesson_users | 3 | 2h46m | 49 GB | 3 | 39m35s - # 24631 | public | emails | 1 | 34m07s | 10 GB | 2 | 17s - # 24739 | public | payments | 1 | 5m47s | 1848 MB | 4 | 4m40s - # 24681 | public | module_users | 1 | 4m57s | 1610 MB | 3 | 1m50s - # 24694 | public | orders | 1 | 2m50s | 835 MB | 3 | 1m05s - # 24597 | public | devices | 1 | 1m45s | 498 MB | 2 | 40s - # 24723 | public | payment_methods | 1 | 1m24s | 548 MB | 2 | 31s - # 24765 | public | user_collections | 1 | 2m17s | 1005 MB | 2 | 968ms - # 24774 | public | users | 1 | 52s | 291 MB | 4 | 27s - # 24760 | public | user_accounts | 1 | 16s | 172 MB | 3 | 16s - # 24606 | public | edition_modules | 1 | 8s983 | 46 MB | 3 | 4s749 - # 24583 | public | course_emails | 1 | 8s526 | 26 MB | 2 | 996ms - # 24685 | public | modules | 1 | 1s592 | 21 MB | 3 | 1s696 - # 24610 | public | editions | 1 | 2s199 | 7483 kB | 2 | 1s032 - # 24755 | public | sp_content | 1 | 1s555 | 4177 kB | 0 | 0ms - # 24619 | public | email_broadcasts | 1 | 744ms | 2645 kB | 2 | 677ms - # 24590 | public | courses | 1 | 387ms | 1540 kB | 2 | 367ms - # 24704 | public | payment_gateway_keys | 1 | 1s972 | 164 kB | 2 | 27ms - # 24576 | public | accounts | 1 | 58ms | 24 kB | 1 | 14ms - # 24647 | public | event_names | 1 | 32ms | 397 B | 1 | 8ms - # 24716 | public | payment_gateways | 1 | 1s675 | 117 B | 1 | 11ms - # 24748 | public | roles | 1 | 71ms | 173 B | 1 | 8ms - # 24676 | public | management | 1 | 33ms | 40 B | 1 | 19ms - - - # Step Connection Duration Transfer Concurrency - # -------------------------------------------------- ---------- ---------- ---------- ------------ - # Catalog Queries (table ordering, filtering, etc) source 12s 1 - # Dump Schema source 765ms 1 - # Prepare Schema target 466ms 1 - # COPY, INDEX, CONSTRAINTS, VACUUM (wall clock) both 2h47m 12 - # COPY (cumulative) both 7h46m 1225 GB 4 - # CREATE INDEX (cumulative) target 4h36m 4 - # CONSTRAINTS (cumulative) target 8s493 4 - # VACUUM (cumulative) target 0ms 4 - # Reset Sequences both 60ms 1 - # Large Objects (cumulative) (null) 0ms 0 - # Finalize Schema both 14h01m 4 - # -------------------------------------------------- ---------- ---------- ---------- ------------ - # Total Wall Clock Duration both 16h49m 20 - - - # EOF - - - - name: show tables sizes and retrieve current backpressure seconds + - name: show tables sizes after ingest run: | export LD_LIBRARY_PATH=${PG_16_LIB_PATH} - ${PSQL} "${NEW_PROJECT_CONNSTR}" -c "\dt+" - BACKPRESSURE_TIME_AFTER_INGEST=$(${PSQL} "${NEW_PROJECT_CONNSTR}" -t -c "select backpressure_throttling_time()/1000000;") - echo "BACKPRESSURE_TIME_AFTER_INGEST=${BACKPRESSURE_TIME_AFTER_INGEST}" >> $GITHUB_ENV - - - name: Parse pgcopydb log and report performance metrics - env: - PERF_TEST_RESULT_CONNSTR: ${{ secrets.PERF_TEST_RESULT_CONNSTR }} - run: | - export LD_LIBRARY_PATH=${PG_16_LIB_PATH} - - # Define the log file path - LOG_FILE="/tmp/pgcopydb_${{ matrix.target_project }}.log" - - # Get the current git commit hash - git config --global --add safe.directory /__w/neon/neon - COMMIT_HASH=$(git rev-parse --short HEAD) - - # Define the platform and test suite - PLATFORM="pg16-${{ matrix.target_project }}-us-east-2-staging" - SUIT="pgcopydb_ingest_bench" - - # Function to convert time (e.g., "2h47m", "4h36m", "118ms", "8s493") to seconds - convert_to_seconds() { - local duration=$1 - local total_seconds=0 - - # Check for hours (h) - if [[ "$duration" =~ ([0-9]+)h ]]; then - total_seconds=$((total_seconds + ${BASH_REMATCH[1]#0} * 3600)) - fi - - # Check for seconds (s) - if [[ "$duration" =~ ([0-9]+)s ]]; then - total_seconds=$((total_seconds + ${BASH_REMATCH[1]#0})) - fi - - # Check for milliseconds (ms) (if applicable) - if [[ "$duration" =~ ([0-9]+)ms ]]; then - total_seconds=$((total_seconds + ${BASH_REMATCH[1]#0} / 1000)) - duration=${duration/${BASH_REMATCH[0]}/} # need to remove it to avoid double counting with m - fi - - # Check for minutes (m) - must be checked after ms because m is contained in ms - if [[ "$duration" =~ ([0-9]+)m ]]; then - total_seconds=$((total_seconds + ${BASH_REMATCH[1]#0} * 60)) - fi - - echo $total_seconds - } - - # Calculate the backpressure difference in seconds - BACKPRESSURE_TIME_DIFF=$(awk "BEGIN {print $BACKPRESSURE_TIME_AFTER_INGEST - $BACKPRESSURE_TIME_BEFORE_INGEST}") - - # Insert the backpressure time difference into the performance database - if [ -n "$BACKPRESSURE_TIME_DIFF" ]; then - PSQL_CMD="${PSQL} \"${PERF_TEST_RESULT_CONNSTR}\" -c \" - INSERT INTO public.perf_test_results (suit, revision, platform, metric_name, metric_value, metric_unit, metric_report_type, recorded_at_timestamp) - VALUES ('${SUIT}', '${COMMIT_HASH}', '${PLATFORM}', 'backpressure_time', ${BACKPRESSURE_TIME_DIFF}, 'seconds', 'lower_is_better', now()); - \"" - echo "Inserting backpressure time difference: ${BACKPRESSURE_TIME_DIFF} seconds" - eval $PSQL_CMD - fi - - # Extract and process log lines - while IFS= read -r line; do - METRIC_NAME="" - # Match each desired line and extract the relevant information - if [[ "$line" =~ COPY,\ INDEX,\ CONSTRAINTS,\ VACUUM.* ]]; then - METRIC_NAME="COPY, INDEX, CONSTRAINTS, VACUUM (wall clock)" - elif [[ "$line" =~ COPY\ \(cumulative\).* ]]; then - METRIC_NAME="COPY (cumulative)" - elif [[ "$line" =~ CREATE\ INDEX\ \(cumulative\).* ]]; then - METRIC_NAME="CREATE INDEX (cumulative)" - elif [[ "$line" =~ CONSTRAINTS\ \(cumulative\).* ]]; then - METRIC_NAME="CONSTRAINTS (cumulative)" - elif [[ "$line" =~ Finalize\ Schema.* ]]; then - METRIC_NAME="Finalize Schema" - elif [[ "$line" =~ Total\ Wall\ Clock\ Duration.* ]]; then - METRIC_NAME="Total Wall Clock Duration" - fi - - # If a metric was matched, insert it into the performance database - if [ -n "$METRIC_NAME" ]; then - DURATION=$(echo "$line" | grep -oP '\d+h\d+m|\d+s|\d+ms|\d{1,2}h\d{1,2}m|\d+\.\d+s' | head -n 1) - METRIC_VALUE=$(convert_to_seconds "$DURATION") - PSQL_CMD="${PSQL} \"${PERF_TEST_RESULT_CONNSTR}\" -c \" - INSERT INTO public.perf_test_results (suit, revision, platform, metric_name, metric_value, metric_unit, metric_report_type, recorded_at_timestamp) - VALUES ('${SUIT}', '${COMMIT_HASH}', '${PLATFORM}', '${METRIC_NAME}', ${METRIC_VALUE}, 'seconds', 'lower_is_better', now()); - \"" - echo "Inserting ${METRIC_NAME} with value ${METRIC_VALUE} seconds" - eval $PSQL_CMD - fi - done < "$LOG_FILE" + ${PSQL} "${BENCHMARK_INGEST_TARGET_CONNSTR}" -c "\dt+" - name: Delete Neon Project if: ${{ always() && matrix.target_project == 'new_empty_project' }} diff --git a/scripts/ingest_perf_test_result.py b/scripts/ingest_perf_test_result.py index 40071c01b0..804f8a3cde 100644 --- a/scripts/ingest_perf_test_result.py +++ b/scripts/ingest_perf_test_result.py @@ -25,7 +25,8 @@ CREATE TABLE IF NOT EXISTS perf_test_results ( metric_value NUMERIC, metric_unit VARCHAR(10), metric_report_type TEXT, - recorded_at_timestamp TIMESTAMP WITH TIME ZONE DEFAULT NOW() + recorded_at_timestamp TIMESTAMP WITH TIME ZONE DEFAULT NOW(), + labels JSONB with default '{}' ) """ @@ -91,6 +92,7 @@ def ingest_perf_test_result(cursor, data_file: Path, recorded_at_timestamp: int) "metric_unit": metric["unit"], "metric_report_type": metric["report"], "recorded_at_timestamp": datetime.utcfromtimestamp(recorded_at_timestamp), + "labels": json.dumps(metric.get("labels")), } args_list.append(values) @@ -105,7 +107,8 @@ def ingest_perf_test_result(cursor, data_file: Path, recorded_at_timestamp: int) metric_value, metric_unit, metric_report_type, - recorded_at_timestamp + recorded_at_timestamp, + labels ) VALUES %s """, args_list, @@ -117,7 +120,8 @@ def ingest_perf_test_result(cursor, data_file: Path, recorded_at_timestamp: int) %(metric_value)s, %(metric_unit)s, %(metric_report_type)s, - %(recorded_at_timestamp)s + %(recorded_at_timestamp)s, + %(labels)s )""", ) return len(args_list) diff --git a/test_runner/fixtures/benchmark_fixture.py b/test_runner/fixtures/benchmark_fixture.py index d3419bd8b1..8e68775471 100644 --- a/test_runner/fixtures/benchmark_fixture.py +++ b/test_runner/fixtures/benchmark_fixture.py @@ -256,12 +256,17 @@ class NeonBenchmarker: metric_value: float, unit: str, report: MetricReport, + labels: Optional[ + dict[str, str] + ] = None, # use this to associate additional key/value pairs in json format for associated Neon object IDs like project ID with the metric ): """ Record a benchmark result. """ # just to namespace the value name = f"{self.PROPERTY_PREFIX}_{metric_name}" + if labels is None: + labels = {} self.property_recorder( name, { @@ -269,6 +274,7 @@ class NeonBenchmarker: "value": metric_value, "unit": unit, "report": report, + "labels": labels, }, ) diff --git a/test_runner/performance/test_perf_ingest_using_pgcopydb.py b/test_runner/performance/test_perf_ingest_using_pgcopydb.py new file mode 100644 index 0000000000..2f4574ba88 --- /dev/null +++ b/test_runner/performance/test_perf_ingest_using_pgcopydb.py @@ -0,0 +1,267 @@ +import os +import re +import subprocess +import sys +import textwrap +from pathlib import Path +from typing import cast +from urllib.parse import urlparse + +import pytest +from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker +from fixtures.utils import humantime_to_ms + + +def setup_environment(): + """Set up necessary environment variables for pgcopydb execution. + + Expects the following variables to be set in the environment: + - PG_CONFIG: e.g. /tmp/neon/pg_install/v16/bin/pg_config + - PSQL: e.g. /tmp/neon/pg_install/v16/bin/psql + - PG_16_LIB_PATH: e.g. /tmp/neon/pg_install/v16/lib + - PGCOPYDB: e.g. /pgcopydb/bin/pgcopydb + - PGCOPYDB_LIB_PATH: e.g. /pgcopydb/lib + - BENCHMARK_INGEST_SOURCE_CONNSTR + - BENCHMARK_INGEST_TARGET_CONNSTR + - PERF_TEST_RESULT_CONNSTR + - TARGET_PROJECT_TYPE + + """ + # Ensure required environment variables are set + required_env_vars = [ + "PGCOPYDB", + "PGCOPYDB_LIB_PATH", + "PG_CONFIG", + "PSQL", + "PG_16_LIB_PATH", + "BENCHMARK_INGEST_SOURCE_CONNSTR", + "BENCHMARK_INGEST_TARGET_CONNSTR", + "PERF_TEST_RESULT_CONNSTR", + "TARGET_PROJECT_TYPE", + ] + for var in required_env_vars: + if not os.getenv(var): + raise OSError(f"Required environment variable '{var}' is not set.") + + +def build_pgcopydb_command(pgcopydb_filter_file: Path, test_output_dir: Path): + """Builds the pgcopydb command to execute using existing environment variables.""" + pgcopydb_executable = os.getenv("PGCOPYDB") + if not pgcopydb_executable: + raise OSError("PGCOPYDB environment variable is not set.") + + return [ + pgcopydb_executable, + "clone", + "--dir", + str(test_output_dir), + "--skip-vacuum", + "--no-owner", + "--no-acl", + "--skip-db-properties", + "--table-jobs", + "4", + "--index-jobs", + "4", + "--restore-jobs", + "4", + "--split-tables-larger-than", + "10GB", + "--skip-extensions", + "--use-copy-binary", + "--filters", + str(pgcopydb_filter_file), + ] + + +@pytest.fixture() # must be function scoped because test_output_dir is function scoped +def pgcopydb_filter_file(test_output_dir: Path) -> Path: + """Creates the pgcopydb_filter.txt file required by pgcopydb.""" + filter_content = textwrap.dedent("""\ + [include-only-table] + public.events + public.emails + public.email_transmissions + public.payments + public.editions + public.edition_modules + public.sp_content + public.email_broadcasts + public.user_collections + public.devices + public.user_accounts + public.lessons + public.lesson_users + public.payment_methods + public.orders + public.course_emails + public.modules + public.users + public.module_users + public.courses + public.payment_gateway_keys + public.accounts + public.roles + public.payment_gateways + public.management + public.event_names + """) + filter_path = test_output_dir / "pgcopydb_filter.txt" + filter_path.write_text(filter_content) + return filter_path + + +def get_backpressure_time(connstr): + """Executes a query to get the backpressure throttling time in seconds.""" + query = "select backpressure_throttling_time()/1000000;" + psql_path = os.getenv("PSQL") + if psql_path is None: + raise OSError("The PSQL environment variable is not set.") + result = subprocess.run( + [psql_path, connstr, "-t", "-c", query], capture_output=True, text=True, check=True + ) + return float(result.stdout.strip()) + + +def run_command_and_log_output(command, log_file_path: Path): + """ + Runs a command and logs output to both a file and GitHub Actions console. + + Args: + command (list): The command to execute. + log_file_path (Path): Path object for the log file where output is written. + """ + # Define a list of necessary environment variables for pgcopydb + custom_env_vars = { + "LD_LIBRARY_PATH": f"{os.getenv('PGCOPYDB_LIB_PATH')}:{os.getenv('PG_16_LIB_PATH')}", + "PGCOPYDB_SOURCE_PGURI": cast(str, os.getenv("BENCHMARK_INGEST_SOURCE_CONNSTR")), + "PGCOPYDB_TARGET_PGURI": cast(str, os.getenv("BENCHMARK_INGEST_TARGET_CONNSTR")), + "PGOPTIONS": "-c maintenance_work_mem=8388608 -c max_parallel_maintenance_workers=7", + } + # Combine the current environment with custom variables + env = os.environ.copy() + env.update(custom_env_vars) + + with log_file_path.open("w") as log_file: + process = subprocess.Popen( + command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, env=env + ) + + assert process.stdout is not None, "process.stdout should not be None" + + # Stream output to both log file and console + for line in process.stdout: + print(line, end="") # Stream to GitHub Actions log + sys.stdout.flush() + log_file.write(line) # Write to log file + + process.wait() # Wait for the process to finish + if process.returncode != 0: + raise subprocess.CalledProcessError(process.returncode, command) + + +def parse_log_and_report_metrics( + zenbenchmark: NeonBenchmarker, log_file_path: Path, backpressure_time_diff: float +): + """Parses the pgcopydb log file for performance metrics and reports them to the database.""" + metrics = {"backpressure_time": backpressure_time_diff} + + # Define regex patterns to capture metrics + metric_patterns = { + "COPY_INDEX_CONSTRAINTS_VACUUM": re.compile( + r"COPY, INDEX, CONSTRAINTS, VACUUM \(wall clock\).*" + ), + "COPY_CUMULATIVE": re.compile(r"COPY \(cumulative\).*"), + "CREATE_INDEX_CUMULATIVE": re.compile(r"CREATE INDEX \(cumulative\).*"), + "CONSTRAINTS_CUMULATIVE": re.compile(r"CONSTRAINTS \(cumulative\).*"), + "FINALIZE_SCHEMA": re.compile(r"Finalize Schema.*"), + "TOTAL_DURATION": re.compile(r"Total Wall Clock Duration.*"), + } + + # Parse log file + with log_file_path.open("r") as log_file: + for line in log_file: + for metric_name, pattern in metric_patterns.items(): + if pattern.search(line): + # Extract duration and convert it to seconds + duration_match = re.search(r"\d+h\d+m|\d+s|\d+ms|\d+\.\d+s", line) + if duration_match: + duration_str = duration_match.group(0) + parts = re.findall(r"\d+[a-zA-Z]+", duration_str) + rust_like_humantime = " ".join(parts) + duration_seconds = humantime_to_ms(rust_like_humantime) / 1000.0 + metrics[metric_name] = duration_seconds + + endpoint_id = {"endpoint_id": get_endpoint_id()} + for metric_name, duration_seconds in metrics.items(): + zenbenchmark.record( + metric_name, duration_seconds, "s", MetricReport.LOWER_IS_BETTER, endpoint_id + ) + + +def get_endpoint_id(): + """Extracts and returns the first segment of the hostname from the PostgreSQL URI stored in BENCHMARK_INGEST_TARGET_CONNSTR.""" + connstr = os.getenv("BENCHMARK_INGEST_TARGET_CONNSTR") + if connstr is None: + raise OSError("BENCHMARK_INGEST_TARGET_CONNSTR environment variable is not set.") + + # Parse the URI + parsed_url = urlparse(connstr) + + # Extract the hostname and split to get the first segment + hostname = parsed_url.hostname + if hostname is None: + raise ValueError("Unable to parse hostname from BENCHMARK_INGEST_TARGET_CONNSTR") + + # Split the hostname by dots and take the first segment + endpoint_id = hostname.split(".")[0] + + return endpoint_id + + +@pytest.fixture() # must be function scoped because test_output_dir is function scoped +def log_file_path(test_output_dir): + """Fixture to provide a temporary log file path.""" + if not os.getenv("TARGET_PROJECT_TYPE"): + raise OSError("Required environment variable 'TARGET_PROJECT_TYPE' is not set.") + return (test_output_dir / os.getenv("TARGET_PROJECT_TYPE")).with_suffix(".log") + + +@pytest.mark.remote_cluster +def test_ingest_performance_using_pgcopydb( + zenbenchmark: NeonBenchmarker, + log_file_path: Path, + pgcopydb_filter_file: Path, + test_output_dir: Path, +): + """ + Simulate project migration from another PostgreSQL provider to Neon. + + Measure performance for Neon ingest steps + - COPY + - CREATE INDEX + - CREATE CONSTRAINT + - VACUUM ANALYZE + - create foreign keys + + Use pgcopydb to copy data from the source database to the destination database. + """ + # Set up environment and create filter file + setup_environment() + + # Get backpressure time before ingest + backpressure_time_before = get_backpressure_time(os.getenv("BENCHMARK_INGEST_TARGET_CONNSTR")) + + # Build and run the pgcopydb command + command = build_pgcopydb_command(pgcopydb_filter_file, test_output_dir) + try: + run_command_and_log_output(command, log_file_path) + except subprocess.CalledProcessError as e: + pytest.fail(f"pgcopydb command failed with error: {e}") + + # Get backpressure time after ingest and calculate the difference + backpressure_time_after = get_backpressure_time(os.getenv("BENCHMARK_INGEST_TARGET_CONNSTR")) + backpressure_time_diff = backpressure_time_after - backpressure_time_before + + # Parse log file and report metrics, including backpressure time difference + parse_log_and_report_metrics(zenbenchmark, log_file_path, backpressure_time_diff)