From 228bb753546149135ddb7f4c198d8c8d28fa1c6e Mon Sep 17 00:00:00 2001 From: Peter Bendel Date: Sun, 16 Mar 2025 15:04:48 +0100 Subject: [PATCH] Extend large tenant OLTP workload ... (#11166) ... to better match the workload characteristics of real Neon customers ## Problem We analyzed workloads of large Neon users and want to extend the oltp workload to include characteristics seen in those workloads. ## Summary of changes - for re-use branch delete inserted rows from last run - adjust expected run-time (time-outs) in GitHub workflow - add queries that exposes the prefetch getpages path - add I/U/D transactions for another table (so far the workload was insert/append-only) - add an explicit vacuum analyze step and measure its time - add reindex concurrently step and measure its time (and take care that this step succeeds even if prior reindex runs have failed or were canceled) - create a second connection string for the pooled connection that removes the `-pooler` suffix from the hostname because we want to run long-running statements (database maintenance) and bypass the pooler which doesn't support unlimited statement timeout ## Test run https://github.com/neondatabase/neon/actions/runs/13851772887/job/38760172415 --- .github/workflows/large_oltp_benchmark.yml | 89 +++++++--- .../IUD_one_transaction.sql | 162 ++++++++++++++++++ .../select_prefetch_webhook.sql | 25 +++ .../test_perf_oltp_large_tenant.py | 78 ++++++++- 4 files changed, 327 insertions(+), 27 deletions(-) create mode 100644 test_runner/performance/large_synthetic_oltp/IUD_one_transaction.sql create mode 100644 test_runner/performance/large_synthetic_oltp/select_prefetch_webhook.sql diff --git a/.github/workflows/large_oltp_benchmark.yml b/.github/workflows/large_oltp_benchmark.yml index f33e11cd08..a7c3118e34 100644 --- a/.github/workflows/large_oltp_benchmark.yml +++ b/.github/workflows/large_oltp_benchmark.yml @@ -2,8 +2,8 @@ name: large oltp benchmark on: # uncomment to run on push for debugging your PR - push: - branches: [ bodobolero/synthetic_oltp_workload ] + #push: + # branches: [ bodobolero/synthetic_oltp_workload ] schedule: # * is a special character in YAML so you have to quote this string @@ -12,7 +12,7 @@ on: # │ │ ┌───────────── day of the month (1 - 31) # │ │ │ ┌───────────── month (1 - 12 or JAN-DEC) # │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT) - - cron: '0 15 * * *' # run once a day, timezone is utc, avoid conflict with other benchmarks + - cron: '0 15 * * 0,2,4' # run on Sunday, Tuesday, Thursday at 3 PM UTC workflow_dispatch: # adds ability to run this manually defaults: @@ -22,7 +22,7 @@ defaults: concurrency: # Allow only one workflow globally because we need dedicated resources which only exist once group: large-oltp-bench-workflow - cancel-in-progress: true + cancel-in-progress: false jobs: oltp: @@ -31,9 +31,9 @@ jobs: matrix: include: - target: new_branch - custom_scripts: insert_webhooks.sql@2 select_any_webhook_with_skew.sql@4 select_recent_webhook.sql@4 + custom_scripts: insert_webhooks.sql@200 select_any_webhook_with_skew.sql@300 select_recent_webhook.sql@397 select_prefetch_webhook.sql@3 IUD_one_transaction.sql@100 - target: reuse_branch - custom_scripts: insert_webhooks.sql@2 select_any_webhook_with_skew.sql@4 select_recent_webhook.sql@4 + custom_scripts: insert_webhooks.sql@200 select_any_webhook_with_skew.sql@300 select_recent_webhook.sql@397 select_prefetch_webhook.sql@3 IUD_one_transaction.sql@100 max-parallel: 1 # we want to run each stripe size sequentially to be able to compare the results permissions: contents: write @@ -46,7 +46,6 @@ jobs: PG_VERSION: 16 # pre-determined by pre-determined project TEST_OUTPUT: /tmp/test_output BUILD_TYPE: remote - SAVE_PERF_REPORT: ${{ github.ref_name == 'main' }} PLATFORM: ${{ matrix.target }} runs-on: [ self-hosted, us-east-2, x64 ] @@ -57,8 +56,10 @@ jobs: password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }} options: --init - # Increase timeout to 8h, default timeout is 6h - timeout-minutes: 480 + # Increase timeout to 2 days, default timeout is 6h - database maintenance can take a long time + # (normally 1h pgbench, 3h vacuum analyze 3.5h re-index) x 2 = 15h, leave some buffer for regressions + # in one run vacuum didn't finish within 12 hours + timeout-minutes: 2880 steps: - uses: actions/checkout@v4 @@ -89,29 +90,45 @@ jobs: - name: Set up Connection String id: set-up-connstr run: | - case "${{ matrix.target }}" in - new_branch) - CONNSTR=${{ steps.create-neon-branch-oltp-target.outputs.dsn }} - ;; - reuse_branch) - CONNSTR=${{ secrets.BENCHMARK_LARGE_OLTP_REUSE_CONNSTR }} - ;; - *) - echo >&2 "Unknown target=${{ matrix.target }}" - exit 1 - ;; - esac + case "${{ matrix.target }}" in + new_branch) + CONNSTR=${{ steps.create-neon-branch-oltp-target.outputs.dsn }} + ;; + reuse_branch) + CONNSTR=${{ secrets.BENCHMARK_LARGE_OLTP_REUSE_CONNSTR }} + ;; + *) + echo >&2 "Unknown target=${{ matrix.target }}" + exit 1 + ;; + esac - echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT + CONNSTR_WITHOUT_POOLER="${CONNSTR//-pooler/}" - - name: Benchmark pgbench with custom-scripts + echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT + echo "connstr_without_pooler=${CONNSTR_WITHOUT_POOLER}" >> $GITHUB_OUTPUT + + - name: Delete rows from prior runs in reuse branch + if: ${{ matrix.target == 'reuse_branch' }} + env: + BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr_without_pooler }} + PG_CONFIG: /tmp/neon/pg_install/v16/bin/pg_config + PSQL: /tmp/neon/pg_install/v16/bin/psql + PG_16_LIB_PATH: /tmp/neon/pg_install/v16/lib + run: | + echo "$(date '+%Y-%m-%d %H:%M:%S') - Deleting rows in table webhook.incoming_webhooks from prior runs" + export LD_LIBRARY_PATH=${PG_16_LIB_PATH} + ${PSQL} "${BENCHMARK_CONNSTR}" -c "SET statement_timeout = 0; DELETE FROM webhook.incoming_webhooks WHERE created_at > '2025-02-27 23:59:59+00';" + echo "$(date '+%Y-%m-%d %H:%M:%S') - Finished deleting rows in table webhook.incoming_webhooks from prior runs" + + - name: Benchmark pgbench with custom-scripts uses: ./.github/actions/run-python-test-set with: build_type: ${{ env.BUILD_TYPE }} test_selection: performance run_in_parallel: false - save_perf_report: ${{ env.SAVE_PERF_REPORT }} - extra_params: -m remote_cluster --timeout 21600 -k test_perf_oltp_large_tenant + save_perf_report: true + extra_params: -m remote_cluster --timeout 7200 -k test_perf_oltp_large_tenant_pgbench pg_version: ${{ env.PG_VERSION }} aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: @@ -119,6 +136,21 @@ jobs: VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" + - name: Benchmark database maintenance + uses: ./.github/actions/run-python-test-set + with: + build_type: ${{ env.BUILD_TYPE }} + test_selection: performance + run_in_parallel: false + save_perf_report: true + extra_params: -m remote_cluster --timeout 172800 -k test_perf_oltp_large_tenant_maintenance + pg_version: ${{ env.PG_VERSION }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + env: + BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr_without_pooler }} + VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" + PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" + - name: Delete Neon Branch for large tenant if: ${{ always() && matrix.target == 'new_branch' }} uses: ./.github/actions/neon-branch-delete @@ -127,6 +159,13 @@ jobs: branch_id: ${{ steps.create-neon-branch-oltp-target.outputs.branch_id }} api_key: ${{ secrets.NEON_STAGING_API_KEY }} + - name: Configure AWS credentials # again because prior steps could have exceeded 5 hours + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-region: eu-central-1 + role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + role-duration-seconds: 18000 # 5 hours + - name: Create Allure report id: create-allure-report if: ${{ !cancelled() }} diff --git a/test_runner/performance/large_synthetic_oltp/IUD_one_transaction.sql b/test_runner/performance/large_synthetic_oltp/IUD_one_transaction.sql new file mode 100644 index 0000000000..4c5b3fbd11 --- /dev/null +++ b/test_runner/performance/large_synthetic_oltp/IUD_one_transaction.sql @@ -0,0 +1,162 @@ +\set min_id 1 +\set max_id 1500000000 +\set range_size 100 + +-- Use uniform random instead of random_zipfian +\set random_id random(:min_id, :max_id) +\set random_mar_id random(1, 65536) +\set random_delete_id random(:min_id, :max_id) + +-- Update exactly one row (if it exists) using the uniformly chosen random_id +UPDATE transaction.transaction + SET state = 'COMPLETED', + settlement_date = CURRENT_DATE, + mar_identifier = (:random_mar_id)::int + WHERE id = (:random_id)::bigint; + +-- Insert exactly one row +INSERT INTO transaction.transaction ( + user_id, + card_id, + business_id, + preceding_transaction_id, + is_last, + is_mocked, + type, + state, + network, + subnetwork, + user_transaction_time, + settlement_date, + request_amount, + amount, + currency_code, + approval_code, + response, + gpa, + gpa_order_unload, + gpa_order, + program_transfer, + fee_transfer, + peer_transfer, + msa_orders, + risk_assessment, + auto_reload, + direct_deposit, + polarity, + real_time_fee_group, + fee, + chargeback, + standin_approved_by, + acquirer_fee_amount, + funded_account_holder, + digital_wallet_token, + network_fees, + card_security_code_verification, + fraud, + cardholder_authentication_data, + currency_conversion, + merchant, + store, + card_acceptor, + acquirer, + pos, + avs, + mar_token, + mar_preceding_related_transaction_token, + mar_business_token, + mar_acting_user_token, + mar_card_token, + mar_duration, + mar_created_time, + issuer_interchange_amount, + offer_orders, + transaction_canonical_id, + mar_identifier, + created_at, + card_acceptor_mid, + card_acceptor_name, + address_verification, + issuing_product, + mar_enhanced_data_token, + standin_reason +) +SELECT + (:random_id % 100000) + 1 AS user_id, + (:random_id % 500000) + 1 AS card_id, + (:random_id % 20000) + 1 AS business_id, + NULL AS preceding_transaction_id, + (:random_id % 2) = 0 AS is_last, + (:random_id % 5) = 0 AS is_mocked, + 'authorization' AS type, + 'PENDING' AS state, + 'VISA' AS network, + 'VISANET' AS subnetwork, + now() - ((:random_id % 100) || ' days')::interval AS user_transaction_time, + now() - ((:random_id % 100) || ' days')::interval AS settlement_date, + random() * 1000 AS request_amount, + random() * 1000 AS amount, + 'USD' AS currency_code, + md5((:random_id)::text) AS approval_code, + '{}'::jsonb AS response, + '{}'::jsonb AS gpa, + '{}'::jsonb AS gpa_order_unload, + '{}'::jsonb AS gpa_order, + '{}'::jsonb AS program_transfer, + '{}'::jsonb AS fee_transfer, + '{}'::jsonb AS peer_transfer, + '{}'::jsonb AS msa_orders, + '{}'::jsonb AS risk_assessment, + '{}'::jsonb AS auto_reload, + '{}'::jsonb AS direct_deposit, + '{}'::jsonb AS polarity, + '{}'::jsonb AS real_time_fee_group, + '{}'::jsonb AS fee, + '{}'::jsonb AS chargeback, + NULL AS standin_approved_by, + random() * 100 AS acquirer_fee_amount, + '{}'::jsonb AS funded_account_holder, + '{}'::jsonb AS digital_wallet_token, + '{}'::jsonb AS network_fees, + '{}'::jsonb AS card_security_code_verification, + '{}'::jsonb AS fraud, + '{}'::jsonb AS cardholder_authentication_data, + '{}'::jsonb AS currency_conversion, + '{}'::jsonb AS merchant, + '{}'::jsonb AS store, + '{}'::jsonb AS card_acceptor, + '{}'::jsonb AS acquirer, + '{}'::jsonb AS pos, + '{}'::jsonb AS avs, + md5((:random_id)::text || 'token') AS mar_token, + NULL AS mar_preceding_related_transaction_token, + NULL AS mar_business_token, + NULL AS mar_acting_user_token, + NULL AS mar_card_token, + random() * 1000 AS mar_duration, + now() AS mar_created_time, + random() * 100 AS issuer_interchange_amount, + '{}'::jsonb AS offer_orders, + (:random_id % 500) + 1 AS transaction_canonical_id, + :random_id::integer AS mar_identifier, + now() AS created_at, + NULL AS card_acceptor_mid, + NULL AS card_acceptor_name, + '{}'::jsonb AS address_verification, + 'DEFAULT_PRODUCT' AS issuing_product, + NULL AS mar_enhanced_data_token, + NULL AS standin_reason +FROM (SELECT 1) AS dummy; + +-- Delete exactly one row using the uniformly chosen random_delete_id +WITH to_delete AS ( + SELECT id + FROM transaction.transaction + WHERE id >= (:random_delete_id)::bigint + AND id < ((:random_delete_id)::bigint + :range_size) + ORDER BY id + LIMIT 1 +) +DELETE FROM transaction.transaction +USING to_delete +WHERE transaction.transaction.id = to_delete.id; \ No newline at end of file diff --git a/test_runner/performance/large_synthetic_oltp/select_prefetch_webhook.sql b/test_runner/performance/large_synthetic_oltp/select_prefetch_webhook.sql new file mode 100644 index 0000000000..e0b0e52276 --- /dev/null +++ b/test_runner/performance/large_synthetic_oltp/select_prefetch_webhook.sql @@ -0,0 +1,25 @@ +-- enforce a controlled number of getpages prefetch requests from a range of +-- 40 million first pages (320 GB) of a 500 GiB table +-- the table has 55 million pages + + +-- Zipfian distributions model real-world access patterns where: +-- A few values (popular IDs) are accessed frequently. +-- Many values are accessed rarely. +-- This is useful for simulating realistic workloads + +\set alpha 1.2 +\set min_page 1 +\set max_page 40000000 + +\set zipf_random_page random_zipfian(:min_page, :max_page, :alpha) + +-- Read 500 consecutive pages from a Zipfian-distributed random start page +-- This enforces PostgreSQL prefetching +WITH random_page AS ( + SELECT :zipf_random_page::int AS start_page +) +SELECT MAX(created_at) +FROM webhook.incoming_webhooks +WHERE ctid >= (SELECT format('(%s,1)', start_page)::tid FROM random_page) +AND ctid < (SELECT format('(%s,1)', start_page + 500)::tid FROM random_page); \ No newline at end of file diff --git a/test_runner/performance/test_perf_oltp_large_tenant.py b/test_runner/performance/test_perf_oltp_large_tenant.py index ae00dbb3b5..842e6a904b 100644 --- a/test_runner/performance/test_perf_oltp_large_tenant.py +++ b/test_runner/performance/test_perf_oltp_large_tenant.py @@ -2,11 +2,13 @@ from __future__ import annotations import os import timeit +from contextlib import closing from pathlib import Path import pytest from fixtures.benchmark_fixture import PgBenchRunResult from fixtures.compare_fixtures import PgCompare +from fixtures.log_helper import log from performance.test_perf_pgbench import get_durations_matrix, utc_now_timestamp @@ -82,9 +84,81 @@ def run_pgbench(env: PgCompare, prefix: str, cmdline, password: None): env.zenbenchmark.record_pg_bench_result(prefix, res) +def run_database_maintenance(env: PgCompare): + with closing(env.pg.connect()) as conn: + with conn.cursor() as cur: + log.info("start vacuum analyze transaction.transaction") + with env.zenbenchmark.record_duration("vacuum_analyze"): + cur.execute("SET statement_timeout = 0;") + cur.execute("SET max_parallel_maintenance_workers = 7;") + cur.execute("SET maintenance_work_mem = '10GB';") + cur.execute("vacuum analyze transaction.transaction;") + log.info("finished vacuum analyze transaction.transaction") + + # recover previously failed or canceled re-indexing + cur.execute( + """ + DO $$ + DECLARE + invalid_index TEXT; + BEGIN + FOR invalid_index IN + SELECT c.relname + FROM pg_class c + JOIN pg_index i ON i.indexrelid = c.oid + JOIN pg_namespace n ON n.oid = c.relnamespace + WHERE n.nspname = 'transaction' + AND i.indisvalid = FALSE + AND c.relname LIKE '%_ccnew%' + LOOP + EXECUTE 'DROP INDEX IF EXISTS transaction.' || invalid_index; + END LOOP; + END $$; + """ + ) + # also recover failed or canceled re-indexing on toast part of table + cur.execute( + """ + DO $$ + DECLARE + invalid_index TEXT; + BEGIN + FOR invalid_index IN + SELECT c.relname + FROM pg_class c + JOIN pg_index i ON i.indexrelid = c.oid + JOIN pg_namespace n ON n.oid = c.relnamespace + WHERE n.nspname = 'pg_toast' + AND i.indisvalid = FALSE + AND c.relname LIKE '%_ccnew%' + AND i.indrelid = ( + SELECT reltoastrelid FROM pg_class + WHERE relname = 'transaction' + AND relnamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'transaction') + ) + LOOP + EXECUTE 'DROP INDEX IF EXISTS pg_toast.' || invalid_index; + END LOOP; + END $$; + """ + ) + + log.info("start REINDEX TABLE CONCURRENTLY transaction.transaction") + with env.zenbenchmark.record_duration("reindex concurrently"): + cur.execute("REINDEX TABLE CONCURRENTLY transaction.transaction;") + log.info("finished REINDEX TABLE CONCURRENTLY transaction.transaction") + + @pytest.mark.parametrize("custom_scripts", get_custom_scripts()) @pytest.mark.parametrize("duration", get_durations_matrix()) @pytest.mark.remote_cluster -def test_perf_oltp_large_tenant(remote_compare: PgCompare, custom_scripts: str, duration: int): +def test_perf_oltp_large_tenant_pgbench( + remote_compare: PgCompare, custom_scripts: str, duration: int +): run_test_pgbench(remote_compare, custom_scripts, duration) - # todo: run re-index, analyze, vacuum, etc. after the test and measure and report its duration + + +@pytest.mark.remote_cluster +def test_perf_oltp_large_tenant_maintenance(remote_compare: PgCompare): + # run analyze, vacuum, re-index after the test and measure and report its duration + run_database_maintenance(remote_compare)