Extend large tenant OLTP workload ... (#11166)

... to better match the workload characteristics of real Neon customers

## Problem

We analyzed workloads of large Neon users and want to extend the oltp
workload to include characteristics seen in those workloads.

## Summary of changes

- for re-use branch delete inserted rows from last run
- adjust expected run-time (time-outs) in GitHub workflow
- add queries that exposes the prefetch getpages path
- add I/U/D transactions for another table (so far the workload was
insert/append-only)
- add an explicit vacuum analyze step and measure its time
- add reindex concurrently step and measure its time (and take care that
this step succeeds even if prior reindex runs have failed or were
canceled)
- create a second connection string for the pooled connection that
removes the `-pooler` suffix from the hostname because we want to run
long-running statements (database maintenance) and bypass the pooler
which doesn't support unlimited statement timeout

## Test run


https://github.com/neondatabase/neon/actions/runs/13851772887/job/38760172415
This commit is contained in:
Peter Bendel
2025-03-16 15:04:48 +01:00
committed by GitHub
parent a5b00b87ba
commit 228bb75354
4 changed files with 327 additions and 27 deletions

View File

@@ -0,0 +1,162 @@
\set min_id 1
\set max_id 1500000000
\set range_size 100
-- Use uniform random instead of random_zipfian
\set random_id random(:min_id, :max_id)
\set random_mar_id random(1, 65536)
\set random_delete_id random(:min_id, :max_id)
-- Update exactly one row (if it exists) using the uniformly chosen random_id
UPDATE transaction.transaction
SET state = 'COMPLETED',
settlement_date = CURRENT_DATE,
mar_identifier = (:random_mar_id)::int
WHERE id = (:random_id)::bigint;
-- Insert exactly one row
INSERT INTO transaction.transaction (
user_id,
card_id,
business_id,
preceding_transaction_id,
is_last,
is_mocked,
type,
state,
network,
subnetwork,
user_transaction_time,
settlement_date,
request_amount,
amount,
currency_code,
approval_code,
response,
gpa,
gpa_order_unload,
gpa_order,
program_transfer,
fee_transfer,
peer_transfer,
msa_orders,
risk_assessment,
auto_reload,
direct_deposit,
polarity,
real_time_fee_group,
fee,
chargeback,
standin_approved_by,
acquirer_fee_amount,
funded_account_holder,
digital_wallet_token,
network_fees,
card_security_code_verification,
fraud,
cardholder_authentication_data,
currency_conversion,
merchant,
store,
card_acceptor,
acquirer,
pos,
avs,
mar_token,
mar_preceding_related_transaction_token,
mar_business_token,
mar_acting_user_token,
mar_card_token,
mar_duration,
mar_created_time,
issuer_interchange_amount,
offer_orders,
transaction_canonical_id,
mar_identifier,
created_at,
card_acceptor_mid,
card_acceptor_name,
address_verification,
issuing_product,
mar_enhanced_data_token,
standin_reason
)
SELECT
(:random_id % 100000) + 1 AS user_id,
(:random_id % 500000) + 1 AS card_id,
(:random_id % 20000) + 1 AS business_id,
NULL AS preceding_transaction_id,
(:random_id % 2) = 0 AS is_last,
(:random_id % 5) = 0 AS is_mocked,
'authorization' AS type,
'PENDING' AS state,
'VISA' AS network,
'VISANET' AS subnetwork,
now() - ((:random_id % 100) || ' days')::interval AS user_transaction_time,
now() - ((:random_id % 100) || ' days')::interval AS settlement_date,
random() * 1000 AS request_amount,
random() * 1000 AS amount,
'USD' AS currency_code,
md5((:random_id)::text) AS approval_code,
'{}'::jsonb AS response,
'{}'::jsonb AS gpa,
'{}'::jsonb AS gpa_order_unload,
'{}'::jsonb AS gpa_order,
'{}'::jsonb AS program_transfer,
'{}'::jsonb AS fee_transfer,
'{}'::jsonb AS peer_transfer,
'{}'::jsonb AS msa_orders,
'{}'::jsonb AS risk_assessment,
'{}'::jsonb AS auto_reload,
'{}'::jsonb AS direct_deposit,
'{}'::jsonb AS polarity,
'{}'::jsonb AS real_time_fee_group,
'{}'::jsonb AS fee,
'{}'::jsonb AS chargeback,
NULL AS standin_approved_by,
random() * 100 AS acquirer_fee_amount,
'{}'::jsonb AS funded_account_holder,
'{}'::jsonb AS digital_wallet_token,
'{}'::jsonb AS network_fees,
'{}'::jsonb AS card_security_code_verification,
'{}'::jsonb AS fraud,
'{}'::jsonb AS cardholder_authentication_data,
'{}'::jsonb AS currency_conversion,
'{}'::jsonb AS merchant,
'{}'::jsonb AS store,
'{}'::jsonb AS card_acceptor,
'{}'::jsonb AS acquirer,
'{}'::jsonb AS pos,
'{}'::jsonb AS avs,
md5((:random_id)::text || 'token') AS mar_token,
NULL AS mar_preceding_related_transaction_token,
NULL AS mar_business_token,
NULL AS mar_acting_user_token,
NULL AS mar_card_token,
random() * 1000 AS mar_duration,
now() AS mar_created_time,
random() * 100 AS issuer_interchange_amount,
'{}'::jsonb AS offer_orders,
(:random_id % 500) + 1 AS transaction_canonical_id,
:random_id::integer AS mar_identifier,
now() AS created_at,
NULL AS card_acceptor_mid,
NULL AS card_acceptor_name,
'{}'::jsonb AS address_verification,
'DEFAULT_PRODUCT' AS issuing_product,
NULL AS mar_enhanced_data_token,
NULL AS standin_reason
FROM (SELECT 1) AS dummy;
-- Delete exactly one row using the uniformly chosen random_delete_id
WITH to_delete AS (
SELECT id
FROM transaction.transaction
WHERE id >= (:random_delete_id)::bigint
AND id < ((:random_delete_id)::bigint + :range_size)
ORDER BY id
LIMIT 1
)
DELETE FROM transaction.transaction
USING to_delete
WHERE transaction.transaction.id = to_delete.id;

View File

@@ -0,0 +1,25 @@
-- enforce a controlled number of getpages prefetch requests from a range of
-- 40 million first pages (320 GB) of a 500 GiB table
-- the table has 55 million pages
-- Zipfian distributions model real-world access patterns where:
-- A few values (popular IDs) are accessed frequently.
-- Many values are accessed rarely.
-- This is useful for simulating realistic workloads
\set alpha 1.2
\set min_page 1
\set max_page 40000000
\set zipf_random_page random_zipfian(:min_page, :max_page, :alpha)
-- Read 500 consecutive pages from a Zipfian-distributed random start page
-- This enforces PostgreSQL prefetching
WITH random_page AS (
SELECT :zipf_random_page::int AS start_page
)
SELECT MAX(created_at)
FROM webhook.incoming_webhooks
WHERE ctid >= (SELECT format('(%s,1)', start_page)::tid FROM random_page)
AND ctid < (SELECT format('(%s,1)', start_page + 500)::tid FROM random_page);

View File

@@ -2,11 +2,13 @@ from __future__ import annotations
import os
import timeit
from contextlib import closing
from pathlib import Path
import pytest
from fixtures.benchmark_fixture import PgBenchRunResult
from fixtures.compare_fixtures import PgCompare
from fixtures.log_helper import log
from performance.test_perf_pgbench import get_durations_matrix, utc_now_timestamp
@@ -82,9 +84,81 @@ def run_pgbench(env: PgCompare, prefix: str, cmdline, password: None):
env.zenbenchmark.record_pg_bench_result(prefix, res)
def run_database_maintenance(env: PgCompare):
with closing(env.pg.connect()) as conn:
with conn.cursor() as cur:
log.info("start vacuum analyze transaction.transaction")
with env.zenbenchmark.record_duration("vacuum_analyze"):
cur.execute("SET statement_timeout = 0;")
cur.execute("SET max_parallel_maintenance_workers = 7;")
cur.execute("SET maintenance_work_mem = '10GB';")
cur.execute("vacuum analyze transaction.transaction;")
log.info("finished vacuum analyze transaction.transaction")
# recover previously failed or canceled re-indexing
cur.execute(
"""
DO $$
DECLARE
invalid_index TEXT;
BEGIN
FOR invalid_index IN
SELECT c.relname
FROM pg_class c
JOIN pg_index i ON i.indexrelid = c.oid
JOIN pg_namespace n ON n.oid = c.relnamespace
WHERE n.nspname = 'transaction'
AND i.indisvalid = FALSE
AND c.relname LIKE '%_ccnew%'
LOOP
EXECUTE 'DROP INDEX IF EXISTS transaction.' || invalid_index;
END LOOP;
END $$;
"""
)
# also recover failed or canceled re-indexing on toast part of table
cur.execute(
"""
DO $$
DECLARE
invalid_index TEXT;
BEGIN
FOR invalid_index IN
SELECT c.relname
FROM pg_class c
JOIN pg_index i ON i.indexrelid = c.oid
JOIN pg_namespace n ON n.oid = c.relnamespace
WHERE n.nspname = 'pg_toast'
AND i.indisvalid = FALSE
AND c.relname LIKE '%_ccnew%'
AND i.indrelid = (
SELECT reltoastrelid FROM pg_class
WHERE relname = 'transaction'
AND relnamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'transaction')
)
LOOP
EXECUTE 'DROP INDEX IF EXISTS pg_toast.' || invalid_index;
END LOOP;
END $$;
"""
)
log.info("start REINDEX TABLE CONCURRENTLY transaction.transaction")
with env.zenbenchmark.record_duration("reindex concurrently"):
cur.execute("REINDEX TABLE CONCURRENTLY transaction.transaction;")
log.info("finished REINDEX TABLE CONCURRENTLY transaction.transaction")
@pytest.mark.parametrize("custom_scripts", get_custom_scripts())
@pytest.mark.parametrize("duration", get_durations_matrix())
@pytest.mark.remote_cluster
def test_perf_oltp_large_tenant(remote_compare: PgCompare, custom_scripts: str, duration: int):
def test_perf_oltp_large_tenant_pgbench(
remote_compare: PgCompare, custom_scripts: str, duration: int
):
run_test_pgbench(remote_compare, custom_scripts, duration)
# todo: run re-index, analyze, vacuum, etc. after the test and measure and report its duration
@pytest.mark.remote_cluster
def test_perf_oltp_large_tenant_maintenance(remote_compare: PgCompare):
# run analyze, vacuum, re-index after the test and measure and report its duration
run_database_maintenance(remote_compare)