From 228bb753546149135ddb7f4c198d8c8d28fa1c6e Mon Sep 17 00:00:00 2001
From: Peter Bendel <peterbendel@neon.tech>
Date: Sun, 16 Mar 2025 15:04:48 +0100
Subject: [PATCH] Extend large tenant OLTP workload ... (#11166)

... to better match the workload characteristics of real Neon customers

## Problem

We analyzed workloads of large Neon users and want to extend the oltp
workload to include characteristics seen in those workloads.

## Summary of changes

- for re-use branch delete inserted rows from last run
- adjust expected run-time (time-outs) in GitHub workflow
- add queries that exposes the prefetch getpages path
- add I/U/D transactions for another table (so far the workload was
insert/append-only)
- add an explicit vacuum analyze step and measure its time
- add reindex concurrently step and measure its time (and take care that
this step succeeds even if prior reindex runs have failed or were
canceled)
- create a second connection string for the pooled connection that
removes the `-pooler` suffix from the hostname because we want to run
long-running statements (database maintenance) and bypass the pooler
which doesn't support unlimited statement timeout

## Test run


https://github.com/neondatabase/neon/actions/runs/13851772887/job/38760172415
---
 .github/workflows/large_oltp_benchmark.yml    |  89 +++++++---
 .../IUD_one_transaction.sql                   | 162 ++++++++++++++++++
 .../select_prefetch_webhook.sql               |  25 +++
 .../test_perf_oltp_large_tenant.py            |  78 ++++++++-
 4 files changed, 327 insertions(+), 27 deletions(-)
 create mode 100644 test_runner/performance/large_synthetic_oltp/IUD_one_transaction.sql
 create mode 100644 test_runner/performance/large_synthetic_oltp/select_prefetch_webhook.sql

diff --git a/.github/workflows/large_oltp_benchmark.yml b/.github/workflows/large_oltp_benchmark.yml
index f33e11cd08..a7c3118e34 100644
--- a/.github/workflows/large_oltp_benchmark.yml
+++ b/.github/workflows/large_oltp_benchmark.yml
@@ -2,8 +2,8 @@ name: large oltp benchmark
 
 on:
   # uncomment to run on push for debugging your PR
-  push:
-    branches: [ bodobolero/synthetic_oltp_workload ]
+  #push:
+  #  branches: [ bodobolero/synthetic_oltp_workload ]
 
   schedule:
     # * is a special character in YAML so you have to quote this string
@@ -12,7 +12,7 @@ on:
     #          │ │  ┌───────────── day of the month (1 - 31)
     #          │ │  │ ┌───────────── month (1 - 12 or JAN-DEC)
     #          │ │  │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
-    - cron:   '0 15 * * *' # run once a day, timezone is utc, avoid conflict with other benchmarks
+    - cron:   '0 15 * * 0,2,4' # run on Sunday, Tuesday, Thursday at 3 PM UTC
   workflow_dispatch: # adds ability to run this manually
 
 defaults:
@@ -22,7 +22,7 @@ defaults:
 concurrency:
   # Allow only one workflow globally because we need dedicated resources which only exist once
   group: large-oltp-bench-workflow
-  cancel-in-progress: true
+  cancel-in-progress: false
 
 jobs:
   oltp:
@@ -31,9 +31,9 @@ jobs:
       matrix:
         include:
           - target: new_branch 
-            custom_scripts: insert_webhooks.sql@2 select_any_webhook_with_skew.sql@4 select_recent_webhook.sql@4 
+            custom_scripts: insert_webhooks.sql@200 select_any_webhook_with_skew.sql@300 select_recent_webhook.sql@397 select_prefetch_webhook.sql@3 IUD_one_transaction.sql@100
           - target: reuse_branch 
-            custom_scripts: insert_webhooks.sql@2 select_any_webhook_with_skew.sql@4 select_recent_webhook.sql@4 
+            custom_scripts: insert_webhooks.sql@200 select_any_webhook_with_skew.sql@300 select_recent_webhook.sql@397 select_prefetch_webhook.sql@3 IUD_one_transaction.sql@100
       max-parallel: 1 # we want to run each stripe size sequentially to be able to compare the results
     permissions:
       contents: write
@@ -46,7 +46,6 @@ jobs:
       PG_VERSION: 16 # pre-determined by pre-determined project
       TEST_OUTPUT: /tmp/test_output
       BUILD_TYPE: remote
-      SAVE_PERF_REPORT: ${{ github.ref_name == 'main' }}
       PLATFORM: ${{ matrix.target }}
 
     runs-on: [ self-hosted, us-east-2, x64 ]
@@ -57,8 +56,10 @@ jobs:
         password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
       options: --init
 
-    # Increase timeout to 8h, default timeout is 6h
-    timeout-minutes: 480
+    # Increase timeout to 2 days, default timeout is 6h - database maintenance can take a long time
+    # (normally 1h pgbench, 3h vacuum analyze 3.5h re-index) x 2 = 15h, leave some buffer for regressions
+    # in one run vacuum didn't finish within 12 hours
+    timeout-minutes: 2880
 
     steps:
     - uses: actions/checkout@v4
@@ -89,29 +90,45 @@ jobs:
     - name: Set up Connection String
       id: set-up-connstr
       run: |
-          case "${{ matrix.target }}" in
-              new_branch)
-              CONNSTR=${{ steps.create-neon-branch-oltp-target.outputs.dsn }}
-              ;;
-              reuse_branch)
-              CONNSTR=${{ secrets.BENCHMARK_LARGE_OLTP_REUSE_CONNSTR }}
-              ;;
-              *)
-              echo >&2 "Unknown target=${{ matrix.target }}"
-              exit 1
-              ;;
-          esac
+        case "${{ matrix.target }}" in
+          new_branch)
+          CONNSTR=${{ steps.create-neon-branch-oltp-target.outputs.dsn }}
+          ;;
+          reuse_branch)
+          CONNSTR=${{ secrets.BENCHMARK_LARGE_OLTP_REUSE_CONNSTR }}
+          ;;
+          *)
+          echo >&2 "Unknown target=${{ matrix.target }}"
+          exit 1
+          ;;
+        esac
 
-          echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
+        CONNSTR_WITHOUT_POOLER="${CONNSTR//-pooler/}"
 
-    - name: Benchmark pgbench with custom-scripts
+        echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
+        echo "connstr_without_pooler=${CONNSTR_WITHOUT_POOLER}" >> $GITHUB_OUTPUT
+
+    - name: Delete rows from prior runs in reuse branch
+      if: ${{ matrix.target == 'reuse_branch' }}
+      env:
+          BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr_without_pooler }}
+          PG_CONFIG: /tmp/neon/pg_install/v16/bin/pg_config
+          PSQL: /tmp/neon/pg_install/v16/bin/psql
+          PG_16_LIB_PATH: /tmp/neon/pg_install/v16/lib
+      run: |
+        echo "$(date '+%Y-%m-%d %H:%M:%S') - Deleting rows in table webhook.incoming_webhooks from prior runs"
+        export LD_LIBRARY_PATH=${PG_16_LIB_PATH}
+        ${PSQL} "${BENCHMARK_CONNSTR}" -c "SET statement_timeout = 0; DELETE FROM webhook.incoming_webhooks WHERE created_at > '2025-02-27 23:59:59+00';"
+        echo "$(date '+%Y-%m-%d %H:%M:%S') - Finished deleting rows in table webhook.incoming_webhooks from prior runs"
+
+    - name: Benchmark pgbench with custom-scripts 
       uses: ./.github/actions/run-python-test-set
       with:
         build_type: ${{ env.BUILD_TYPE }}
         test_selection: performance
         run_in_parallel: false
-        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
-        extra_params: -m remote_cluster --timeout 21600 -k test_perf_oltp_large_tenant
+        save_perf_report: true
+        extra_params: -m remote_cluster --timeout 7200 -k test_perf_oltp_large_tenant_pgbench
         pg_version: ${{ env.PG_VERSION }}
         aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
       env:
@@ -119,6 +136,21 @@ jobs:
         VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
         PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
 
+    - name: Benchmark database maintenance
+      uses: ./.github/actions/run-python-test-set
+      with:
+        build_type: ${{ env.BUILD_TYPE }}
+        test_selection: performance
+        run_in_parallel: false
+        save_perf_report: true
+        extra_params: -m remote_cluster --timeout 172800 -k test_perf_oltp_large_tenant_maintenance
+        pg_version: ${{ env.PG_VERSION }}
+        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+      env:
+        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr_without_pooler }}
+        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
+        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
+
     - name: Delete Neon Branch for large tenant
       if: ${{ always() && matrix.target == 'new_branch' }}
       uses: ./.github/actions/neon-branch-delete
@@ -127,6 +159,13 @@ jobs:
         branch_id: ${{ steps.create-neon-branch-oltp-target.outputs.branch_id }}
         api_key: ${{ secrets.NEON_STAGING_API_KEY }}
 
+    - name: Configure AWS credentials # again because prior steps could have exceeded 5 hours
+      uses: aws-actions/configure-aws-credentials@v4
+      with:
+        aws-region: eu-central-1
+        role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        role-duration-seconds: 18000 # 5 hours
+
     - name: Create Allure report
       id: create-allure-report
       if: ${{ !cancelled() }}
diff --git a/test_runner/performance/large_synthetic_oltp/IUD_one_transaction.sql b/test_runner/performance/large_synthetic_oltp/IUD_one_transaction.sql
new file mode 100644
index 0000000000..4c5b3fbd11
--- /dev/null
+++ b/test_runner/performance/large_synthetic_oltp/IUD_one_transaction.sql
@@ -0,0 +1,162 @@
+\set min_id 1
+\set max_id 1500000000
+\set range_size 100
+
+-- Use uniform random instead of random_zipfian
+\set random_id random(:min_id, :max_id)
+\set random_mar_id random(1, 65536)
+\set random_delete_id random(:min_id, :max_id)
+
+-- Update exactly one row (if it exists) using the uniformly chosen random_id
+UPDATE transaction.transaction
+   SET state = 'COMPLETED',
+       settlement_date = CURRENT_DATE,
+       mar_identifier = (:random_mar_id)::int
+ WHERE id = (:random_id)::bigint;
+
+-- Insert exactly one row
+INSERT INTO transaction.transaction (
+    user_id,
+    card_id,
+    business_id,
+    preceding_transaction_id,
+    is_last,
+    is_mocked,
+    type,
+    state,
+    network,
+    subnetwork,
+    user_transaction_time,
+    settlement_date,
+    request_amount,
+    amount,
+    currency_code,
+    approval_code,
+    response,
+    gpa,
+    gpa_order_unload,
+    gpa_order,
+    program_transfer,
+    fee_transfer,
+    peer_transfer,
+    msa_orders,
+    risk_assessment,
+    auto_reload,
+    direct_deposit,
+    polarity,
+    real_time_fee_group,
+    fee,
+    chargeback,
+    standin_approved_by,
+    acquirer_fee_amount,
+    funded_account_holder,
+    digital_wallet_token,
+    network_fees,
+    card_security_code_verification,
+    fraud,
+    cardholder_authentication_data,
+    currency_conversion,
+    merchant,
+    store,
+    card_acceptor,
+    acquirer,
+    pos,
+    avs,
+    mar_token,
+    mar_preceding_related_transaction_token,
+    mar_business_token,
+    mar_acting_user_token,
+    mar_card_token,
+    mar_duration,
+    mar_created_time,
+    issuer_interchange_amount,
+    offer_orders,
+    transaction_canonical_id,
+    mar_identifier,
+    created_at,
+    card_acceptor_mid,
+    card_acceptor_name,
+    address_verification,
+    issuing_product,
+    mar_enhanced_data_token,
+    standin_reason
+)
+SELECT
+    (:random_id % 100000) + 1 AS user_id,
+    (:random_id % 500000) + 1 AS card_id,
+    (:random_id % 20000) + 1  AS business_id,
+    NULL                     AS preceding_transaction_id,
+    (:random_id % 2) = 0     AS is_last,
+    (:random_id % 5) = 0     AS is_mocked,
+    'authorization'          AS type,
+    'PENDING'                AS state,
+    'VISA'                   AS network,
+    'VISANET'                AS subnetwork,
+    now() - ((:random_id % 100) || ' days')::interval AS user_transaction_time,
+    now() - ((:random_id % 100) || ' days')::interval AS settlement_date,
+    random() * 1000          AS request_amount,
+    random() * 1000          AS amount,
+    'USD'                    AS currency_code,
+    md5((:random_id)::text)  AS approval_code,
+    '{}'::jsonb              AS response,
+    '{}'::jsonb              AS gpa,
+    '{}'::jsonb              AS gpa_order_unload,
+    '{}'::jsonb              AS gpa_order,
+    '{}'::jsonb              AS program_transfer,
+    '{}'::jsonb              AS fee_transfer,
+    '{}'::jsonb              AS peer_transfer,
+    '{}'::jsonb              AS msa_orders,
+    '{}'::jsonb              AS risk_assessment,
+    '{}'::jsonb              AS auto_reload,
+    '{}'::jsonb              AS direct_deposit,
+    '{}'::jsonb              AS polarity,
+    '{}'::jsonb              AS real_time_fee_group,
+    '{}'::jsonb              AS fee,
+    '{}'::jsonb              AS chargeback,
+    NULL                     AS standin_approved_by,
+    random() * 100           AS acquirer_fee_amount,
+    '{}'::jsonb              AS funded_account_holder,
+    '{}'::jsonb              AS digital_wallet_token,
+    '{}'::jsonb              AS network_fees,
+    '{}'::jsonb              AS card_security_code_verification,
+    '{}'::jsonb              AS fraud,
+    '{}'::jsonb              AS cardholder_authentication_data,
+    '{}'::jsonb              AS currency_conversion,
+    '{}'::jsonb              AS merchant,
+    '{}'::jsonb              AS store,
+    '{}'::jsonb              AS card_acceptor,
+    '{}'::jsonb              AS acquirer,
+    '{}'::jsonb              AS pos,
+    '{}'::jsonb              AS avs,
+    md5((:random_id)::text || 'token') AS mar_token,
+    NULL                     AS mar_preceding_related_transaction_token,
+    NULL                     AS mar_business_token,
+    NULL                     AS mar_acting_user_token,
+    NULL                     AS mar_card_token,
+    random() * 1000          AS mar_duration,
+    now()                    AS mar_created_time,
+    random() * 100           AS issuer_interchange_amount,
+    '{}'::jsonb              AS offer_orders,
+    (:random_id % 500) + 1   AS transaction_canonical_id,
+    :random_id::integer      AS mar_identifier,
+    now()                    AS created_at,
+    NULL                     AS card_acceptor_mid,
+    NULL                     AS card_acceptor_name,
+    '{}'::jsonb              AS address_verification,
+    'DEFAULT_PRODUCT'        AS issuing_product,
+    NULL                     AS mar_enhanced_data_token,
+    NULL                     AS standin_reason
+FROM (SELECT 1) AS dummy;
+
+-- Delete exactly one row using the uniformly chosen random_delete_id
+WITH to_delete AS (
+    SELECT id
+      FROM transaction.transaction
+     WHERE id >= (:random_delete_id)::bigint
+       AND id < ((:random_delete_id)::bigint + :range_size)
+     ORDER BY id
+     LIMIT 1
+)
+DELETE FROM transaction.transaction
+USING to_delete
+WHERE transaction.transaction.id = to_delete.id;
\ No newline at end of file
diff --git a/test_runner/performance/large_synthetic_oltp/select_prefetch_webhook.sql b/test_runner/performance/large_synthetic_oltp/select_prefetch_webhook.sql
new file mode 100644
index 0000000000..e0b0e52276
--- /dev/null
+++ b/test_runner/performance/large_synthetic_oltp/select_prefetch_webhook.sql
@@ -0,0 +1,25 @@
+-- enforce a controlled number of getpages prefetch requests from a range of
+-- 40 million first pages (320 GB) of a 500 GiB table
+-- the table has 55 million pages
+
+
+-- Zipfian distributions model real-world access patterns where:
+--	A few values (popular IDs) are accessed frequently.
+--	Many values are accessed rarely.
+-- This is useful for simulating realistic workloads
+
+\set alpha 1.2  
+\set min_page 1
+\set max_page 40000000 
+
+\set zipf_random_page random_zipfian(:min_page, :max_page, :alpha)
+
+-- Read 500 consecutive pages from a Zipfian-distributed random start page
+-- This enforces PostgreSQL prefetching
+WITH random_page AS (
+    SELECT :zipf_random_page::int AS start_page
+)
+SELECT MAX(created_at)
+FROM webhook.incoming_webhooks
+WHERE ctid >= (SELECT format('(%s,1)', start_page)::tid FROM random_page)
+AND ctid < (SELECT format('(%s,1)', start_page + 500)::tid FROM random_page);
\ No newline at end of file
diff --git a/test_runner/performance/test_perf_oltp_large_tenant.py b/test_runner/performance/test_perf_oltp_large_tenant.py
index ae00dbb3b5..842e6a904b 100644
--- a/test_runner/performance/test_perf_oltp_large_tenant.py
+++ b/test_runner/performance/test_perf_oltp_large_tenant.py
@@ -2,11 +2,13 @@ from __future__ import annotations
 
 import os
 import timeit
+from contextlib import closing
 from pathlib import Path
 
 import pytest
 from fixtures.benchmark_fixture import PgBenchRunResult
 from fixtures.compare_fixtures import PgCompare
+from fixtures.log_helper import log
 
 from performance.test_perf_pgbench import get_durations_matrix, utc_now_timestamp
 
@@ -82,9 +84,81 @@ def run_pgbench(env: PgCompare, prefix: str, cmdline, password: None):
     env.zenbenchmark.record_pg_bench_result(prefix, res)
 
 
+def run_database_maintenance(env: PgCompare):
+    with closing(env.pg.connect()) as conn:
+        with conn.cursor() as cur:
+            log.info("start vacuum analyze transaction.transaction")
+            with env.zenbenchmark.record_duration("vacuum_analyze"):
+                cur.execute("SET statement_timeout = 0;")
+                cur.execute("SET max_parallel_maintenance_workers = 7;")
+                cur.execute("SET maintenance_work_mem = '10GB';")
+                cur.execute("vacuum analyze transaction.transaction;")
+            log.info("finished vacuum analyze transaction.transaction")
+
+            # recover previously failed or canceled re-indexing
+            cur.execute(
+                """
+                DO $$
+                DECLARE
+                    invalid_index TEXT;
+                BEGIN
+                    FOR invalid_index IN
+                        SELECT c.relname
+                        FROM pg_class c
+                        JOIN pg_index i ON i.indexrelid = c.oid
+                        JOIN pg_namespace n ON n.oid = c.relnamespace
+                        WHERE n.nspname = 'transaction'
+                        AND i.indisvalid = FALSE
+                        AND c.relname LIKE '%_ccnew%'
+                    LOOP
+                        EXECUTE 'DROP INDEX IF EXISTS transaction.' || invalid_index;
+                    END LOOP;
+                END $$;
+                """
+            )
+            # also recover failed or canceled re-indexing on toast part of table
+            cur.execute(
+                """
+                DO $$
+                DECLARE
+                    invalid_index TEXT;
+                BEGIN
+                    FOR invalid_index IN
+                        SELECT c.relname
+                        FROM pg_class c
+                        JOIN pg_index i ON i.indexrelid = c.oid
+                        JOIN pg_namespace n ON n.oid = c.relnamespace
+                        WHERE n.nspname = 'pg_toast'
+                        AND i.indisvalid = FALSE
+                        AND c.relname LIKE '%_ccnew%'
+                        AND i.indrelid = (
+                            SELECT reltoastrelid FROM pg_class
+                            WHERE relname = 'transaction'
+                            AND relnamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'transaction')
+                        )
+                    LOOP
+                        EXECUTE 'DROP INDEX IF EXISTS pg_toast.' || invalid_index;
+                    END LOOP;
+                END $$;
+                """
+            )
+
+            log.info("start REINDEX TABLE CONCURRENTLY transaction.transaction")
+            with env.zenbenchmark.record_duration("reindex concurrently"):
+                cur.execute("REINDEX TABLE CONCURRENTLY transaction.transaction;")
+            log.info("finished REINDEX TABLE CONCURRENTLY transaction.transaction")
+
+
 @pytest.mark.parametrize("custom_scripts", get_custom_scripts())
 @pytest.mark.parametrize("duration", get_durations_matrix())
 @pytest.mark.remote_cluster
-def test_perf_oltp_large_tenant(remote_compare: PgCompare, custom_scripts: str, duration: int):
+def test_perf_oltp_large_tenant_pgbench(
+    remote_compare: PgCompare, custom_scripts: str, duration: int
+):
     run_test_pgbench(remote_compare, custom_scripts, duration)
-    # todo: run re-index, analyze, vacuum, etc. after the test and measure and report its duration
+
+
+@pytest.mark.remote_cluster
+def test_perf_oltp_large_tenant_maintenance(remote_compare: PgCompare):
+    # run analyze, vacuum, re-index after the test and measure and report its duration
+    run_database_maintenance(remote_compare)