Increase tenant size for large tenant oltp workload (#12260)

## Problem

- We run the large tenant oltp workload with a fixed size (larger than
existing customers' workloads).
Our customer's workloads are continuously growing and our testing should
stay ahead of the customers' production workloads.
- we want to touch all tables in the tenant's database (updates) so that
we simulate a continuous change in layer files like in a real production
workload
- our current oltp benchmark uses a mixture of read and write
transactions, however we also want a separate test run with read-only
transactions only

## Summary of changes
- modify the existing workload to have a separate run with pgbench
custom scripts that are read-only
- create a new workload that 
- grows all large tables in each run (for the reuse branch in the large
oltp tenant's project)
- updates a percentage of rows in all large tables in each run (to
enforce table bloat and auto-vacuum runs and layer rebuild in
pageservers

Each run of the new workflow increases the logical database size about
16 GB.
We start with 6 runs per day which will give us about 96-100 GB growth
per day.

---------

Co-authored-by: Alexander Lakhin <alexander.lakhin@neon.tech>
This commit is contained in:
Peter Bendel
2025-06-18 14:40:25 +02:00
committed by GitHub
parent e95f2f9a67
commit 7e711ede44
38 changed files with 679 additions and 4 deletions

View File

@@ -33,11 +33,19 @@ jobs:
fail-fast: false # allow other variants to continue even if one fails
matrix:
include:
# test only read-only custom scripts in new branch without database maintenance
- target: new_branch
custom_scripts: select_any_webhook_with_skew.sql@300 select_recent_webhook.sql@397 select_prefetch_webhook.sql@3
test_maintenance: false
# test all custom scripts in new branch with database maintenance
- target: new_branch
custom_scripts: insert_webhooks.sql@200 select_any_webhook_with_skew.sql@300 select_recent_webhook.sql@397 select_prefetch_webhook.sql@3 IUD_one_transaction.sql@100
test_maintenance: true
# test all custom scripts in reuse branch with database maintenance
- target: reuse_branch
custom_scripts: insert_webhooks.sql@200 select_any_webhook_with_skew.sql@300 select_recent_webhook.sql@397 select_prefetch_webhook.sql@3 IUD_one_transaction.sql@100
max-parallel: 1 # we want to run each stripe size sequentially to be able to compare the results
test_maintenance: true
max-parallel: 1 # we want to run each benchmark sequentially to not have noisy neighbors on shared storage (PS, SK)
permissions:
contents: write
statuses: write
@@ -145,6 +153,7 @@ jobs:
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
- name: Benchmark database maintenance
if: ${{ matrix.test_maintenance == 'true' }}
uses: ./.github/actions/run-python-test-set
with:
build_type: ${{ env.BUILD_TYPE }}

175
.github/workflows/large_oltp_growth.yml vendored Normal file
View File

@@ -0,0 +1,175 @@
name: large oltp growth
# workflow to grow the reuse branch of large oltp benchmark continuously (about 16 GB per run)
on:
# uncomment to run on push for debugging your PR
# push:
# branches: [ bodobolero/increase_large_oltp_workload ]
schedule:
# * is a special character in YAML so you have to quote this string
# ┌───────────── minute (0 - 59)
# │ ┌───────────── hour (0 - 23)
# │ │ ┌───────────── day of the month (1 - 31)
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
- cron: '0 6 * * *' # 06:00 UTC
- cron: '0 8 * * *' # 08:00 UTC
- cron: '0 10 * * *' # 10:00 UTC
- cron: '0 12 * * *' # 12:00 UTC
- cron: '0 14 * * *' # 14:00 UTC
- cron: '0 16 * * *' # 16:00 UTC
workflow_dispatch: # adds ability to run this manually
defaults:
run:
shell: bash -euxo pipefail {0}
concurrency:
# Allow only one workflow globally because we need dedicated resources which only exist once
group: large-oltp-growth
cancel-in-progress: true
permissions:
contents: read
jobs:
oltp:
strategy:
fail-fast: false # allow other variants to continue even if one fails
matrix:
include:
# for now only grow the reuse branch, not the other branches.
- target: reuse_branch
custom_scripts:
- grow_action_blocks.sql
- grow_action_kwargs.sql
- grow_device_fingerprint_event.sql
- grow_edges.sql
- grow_hotel_rate_mapping.sql
- grow_ocr_pipeline_results_version.sql
- grow_priceline_raw_response.sql
- grow_relabled_transactions.sql
- grow_state_values.sql
- grow_values.sql
- grow_vertices.sql
- update_accounting_coding_body_tracking_category_selection.sql
- update_action_blocks.sql
- update_action_kwargs.sql
- update_denormalized_approval_workflow.sql
- update_device_fingerprint_event.sql
- update_edges.sql
- update_heron_transaction_enriched_log.sql
- update_heron_transaction_enrichment_requests.sql
- update_hotel_rate_mapping.sql
- update_incoming_webhooks.sql
- update_manual_transaction.sql
- update_ml_receipt_matching_log.sql
- update_ocr_pipeine_results_version.sql
- update_orc_pipeline_step_results.sql
- update_orc_pipeline_step_results_version.sql
- update_priceline_raw_response.sql
- update_quickbooks_transactions.sql
- update_raw_finicity_transaction.sql
- update_relabeled_transactions.sql
- update_state_values.sql
- update_stripe_authorization_event_log.sql
- update_transaction.sql
- update_values.sql
- update_vertices.sql
max-parallel: 1 # we want to run each growth workload sequentially (for now there is just one)
permissions:
contents: write
statuses: write
id-token: write # aws-actions/configure-aws-credentials
env:
TEST_PG_BENCH_DURATIONS_MATRIX: "1h"
TEST_PGBENCH_CUSTOM_SCRIPTS: ${{ join(matrix.custom_scripts, ' ') }}
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
PG_VERSION: 16 # pre-determined by pre-determined project
TEST_OUTPUT: /tmp/test_output
BUILD_TYPE: remote
PLATFORM: ${{ matrix.target }}
runs-on: [ self-hosted, us-east-2, x64 ]
container:
image: ghcr.io/neondatabase/build-tools:pinned-bookworm
credentials:
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
options: --init
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
with:
egress-policy: audit
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Configure AWS credentials # necessary to download artefacts
uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2
with:
aws-region: eu-central-1
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
role-duration-seconds: 18000 # 5 hours is currently max associated with IAM role
- name: Download Neon artifact
uses: ./.github/actions/download
with:
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
path: /tmp/neon/
prefix: latest
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
- name: Set up Connection String
id: set-up-connstr
run: |
case "${{ matrix.target }}" in
reuse_branch)
CONNSTR=${{ secrets.BENCHMARK_LARGE_OLTP_REUSE_CONNSTR }}
;;
*)
echo >&2 "Unknown target=${{ matrix.target }}"
exit 1
;;
esac
CONNSTR_WITHOUT_POOLER="${CONNSTR//-pooler/}"
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
echo "connstr_without_pooler=${CONNSTR_WITHOUT_POOLER}" >> $GITHUB_OUTPUT
- name: pgbench with custom-scripts
uses: ./.github/actions/run-python-test-set
with:
build_type: ${{ env.BUILD_TYPE }}
test_selection: performance
run_in_parallel: false
save_perf_report: true
extra_params: -m remote_cluster --timeout 7200 -k test_perf_oltp_large_tenant_growth
pg_version: ${{ env.PG_VERSION }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
- name: Create Allure report
id: create-allure-report
if: ${{ !cancelled() }}
uses: ./.github/actions/allure-report-generate
with:
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
- name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }}
uses: slackapi/slack-github-action@fcfb566f8b0aab22203f066d80ca1d7e4b5d05b3 # v1.27.1
with:
channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream
slack-message: |
Periodic large oltp tenant growth increase: ${{ job.status }}
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
<${{ steps.create-allure-report.outputs.report-url }}|Allure report>
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}

View File

@@ -0,0 +1,22 @@
-- add 100000 rows or approximately 11 MB to the action_blocks table
-- takes about 1 second
INSERT INTO workflows.action_blocks (
id,
uuid,
created_at,
status,
function_signature,
reference_id,
blocking,
run_synchronously
)
SELECT
id,
uuid_generate_v4(),
now() - (random() * interval '100 days'), -- Random date within the last 100 days
'CONDITIONS_NOT_MET',
'function_signature_' || id, -- Create a unique function signature using id
CASE WHEN random() > 0.5 THEN 'reference_' || id ELSE NULL END, -- 50% chance of being NULL
true,
CASE WHEN random() > 0.5 THEN true ELSE false END -- Random boolean value
FROM generate_series(1, 100000) AS id;

View File

@@ -0,0 +1,11 @@
-- add 100000 rows or approximately 10 MB to the action_kwargs table
-- takes about 5 minutes
INSERT INTO workflows.action_kwargs (created_at, key, uuid, value_id, state_value_id, action_block_id)
SELECT
now(), -- Using the default value for `created_at`
'key_' || gs.id, -- Generating a unique key based on the id
uuid_generate_v4(), -- Generating a new UUID for each row
CASE WHEN gs.id % 2 = 0 THEN gs.id ELSE NULL END, -- Setting value_id for even ids
CASE WHEN gs.id % 2 <> 0 THEN gs.id ELSE NULL END, -- Setting state_value_id for odd ids
1 -- Setting action_block_id as 1 for simplicity
FROM generate_series(1, 100000) AS gs(id);

View File

@@ -0,0 +1,56 @@
-- add 100000 rows or approx. 30 MB to the device_fingerprint_event table
-- takes about 4 minutes
INSERT INTO authentication.device_fingerprint_event (
uuid,
created_at,
identity_uuid,
fingerprint_request_id,
fingerprint_id,
confidence_score,
ip_address,
url,
client_referrer,
last_seen_at,
raw_fingerprint_response,
session_uuid,
fingerprint_response,
browser_version,
browser_name,
device,
operating_system,
operating_system_version,
user_agent,
ip_address_location_city,
ip_address_location_region,
ip_address_location_country_code,
ip_address_location_latitude,
ip_address_location_longitude,
is_incognito
)
SELECT
gen_random_uuid(), -- Generates a random UUID for primary key
now() - (random() * interval '10 days'), -- Random timestamp within the last 10 days
gen_random_uuid(), -- Random UUID for identity
md5(gs::text), -- Simulates unique fingerprint request ID using `md5` hash of series number
md5((gs + 10000)::text), -- Simulates unique fingerprint ID
round(CAST(random() AS numeric), 2), -- Generates a random score between 0 and 1, cast `random()` to numeric
'192.168.' || (random() * 255)::int || '.' || (random() * 255)::int, -- Random IP address
'https://example.com/' || (gs % 1000), -- Random URL with series number suffix
CASE WHEN random() < 0.5 THEN NULL ELSE 'https://referrer.com/' || (gs % 100)::text END, -- Random referrer, 50% chance of being NULL
now() - (random() * interval '5 days'), -- Last seen timestamp within the last 5 days
NULL, -- Keeping raw_fingerprint_response NULL for simplicity
CASE WHEN random() < 0.3 THEN gen_random_uuid() ELSE NULL END, -- Session UUID, 30% chance of NULL
NULL, -- Keeping fingerprint_response NULL for simplicity
CASE WHEN random() < 0.5 THEN '93.0' ELSE '92.0' END, -- Random browser version
CASE WHEN random() < 0.5 THEN 'Firefox' ELSE 'Chrome' END, -- Random browser name
CASE WHEN random() < 0.5 THEN 'Desktop' ELSE 'Mobile' END, -- Random device type
'Windows', -- Static value for operating system
'10.0', -- Static value for operating system version
'Mozilla/5.0', -- Static value for user agent
'City ' || (gs % 1000)::text, -- Random city name
'Region ' || (gs % 100)::text, -- Random region name
'US', -- Static country code
random() * 180 - 90, -- Random latitude between -90 and 90
random() * 360 - 180, -- Random longitude between -180 and 180
random() < 0.1 -- 10% chance of being incognito
FROM generate_series(1, 100000) AS gs;

View File

@@ -0,0 +1,10 @@
-- add 100000 rows or approximately 11 MB to the edges table
-- takes about 1 minute
INSERT INTO workflows.edges (created_at, workflow_id, uuid, from_vertex_id, to_vertex_id)
SELECT
now() - (random() * interval '365 days'), -- Random `created_at` timestamp in the last year
(random() * 100)::int + 1, -- Random `workflow_id` between 1 and 100
uuid_generate_v4(), -- Generate a new UUID for each row
(random() * 100000)::bigint + 1, -- Random `from_vertex_id` between 1 and 100,000
(random() * 100000)::bigint + 1 -- Random `to_vertex_id` between 1 and 100,000
FROM generate_series(1, 100000) AS gs; -- Generate 100,000 sequential IDs

View File

@@ -0,0 +1,21 @@
-- add 100000 rows or approximately 10 MB to the hotel_rate_mapping table
-- takes about 1 second
INSERT INTO booking_inventory.hotel_rate_mapping (
uuid,
created_at,
updated_at,
hotel_rate_id,
remote_id,
source
)
SELECT
uuid_generate_v4(), -- Unique UUID for each row
now(), -- Created at timestamp
now(), -- Updated at timestamp
'rate_' || gs AS hotel_rate_id, -- Unique hotel_rate_id
'remote_' || gs AS remote_id, -- Unique remote_id
CASE WHEN gs % 3 = 0 THEN 'source_1'
WHEN gs % 3 = 1 THEN 'source_2'
ELSE 'source_3'
END AS source -- Distributing sources among three options
FROM generate_series(1, 100000) AS gs;

View File

@@ -0,0 +1,31 @@
-- add 100000 rows or approximately 20 MB to the ocr_pipeline_results_version table
-- takes about 1 second
INSERT INTO ocr.ocr_pipeline_results_version (
id, transaction_id, operation_type, created_at, updated_at, s3_filename, completed_at, result,
end_transaction_id, pipeline_type, is_async, callback, callback_kwargs, input, error, file_type, s3_bucket_name, pipeline_kwargs
)
SELECT
gs.aid, -- id
gs.aid, -- transaction_id (same as id for simplicity)
(gs.aid % 5)::smallint + 1, -- operation_type (cyclic values from 1 to 5)
now() - interval '1 day' * (random() * 30), -- created_at (random timestamp within the last 30 days)
now() - interval '1 day' * (random() * 30), -- updated_at (random timestamp within the last 30 days)
's3_file_' || gs.aid || '.txt', -- s3_filename (synthetic filename)
now() - interval '1 day' * (random() * 30), -- completed_at (random timestamp within the last 30 days)
'{}'::jsonb, -- result (empty JSON object)
NULL, -- end_transaction_id (NULL)
CASE (gs.aid % 3) -- pipeline_type (cyclic text values)
WHEN 0 THEN 'OCR'
WHEN 1 THEN 'PDF'
ELSE 'Image'
END,
gs.aid % 2 = 0, -- is_async (alternating between true and false)
'http://callback/' || gs.aid, -- callback (synthetic URL)
'{}'::jsonb, -- callback_kwargs (empty JSON object)
'Input text ' || gs.aid, -- input (synthetic input text)
NULL, -- error (NULL)
'pdf', -- file_type (default to 'pdf')
'bucket_' || gs.aid % 10, -- s3_bucket_name (synthetic bucket names)
'{}'::jsonb -- pipeline_kwargs (empty JSON object)
FROM
generate_series(1, 100000) AS gs(aid);

View File

@@ -0,0 +1,18 @@
-- add 100000 rows or approx. 20 MB to the priceline_raw_response table
-- takes about 20 seconds
INSERT INTO booking_inventory.priceline_raw_response (
uuid, created_at, updated_at, url, base_url, path, method, params, request, response
)
SELECT
gen_random_uuid(), -- Generate random UUIDs
now() - (random() * interval '30 days'), -- Random creation time within the past 30 days
now() - (random() * interval '30 days'), -- Random update time within the past 30 days
'https://example.com/resource/' || gs, -- Construct a unique URL for each row
'https://example.com', -- Base URL for all rows
'/resource/' || gs, -- Path for each row
CASE WHEN gs % 2 = 0 THEN 'GET' ELSE 'POST' END, -- Alternate between GET and POST methods
'id=' || gs, -- Simple parameter pattern for each row
'{}'::jsonb, -- Empty JSON object for request
jsonb_build_object('status', 'success', 'data', gs) -- Construct a valid JSON response
FROM
generate_series(1, 100000) AS gs;

View File

@@ -0,0 +1,26 @@
-- add 100000 rows or approx. 1 MB to the relabeled_transactions table
-- takes about 1 second
INSERT INTO heron.relabeled_transactions (
id,
created_at,
universal_transaction_id,
raw_result,
category,
category_confidence,
merchant,
batch_id
)
SELECT
gs.aid AS id,
now() - (gs.aid % 1000) * interval '1 second' AS created_at,
'txn_' || gs.aid AS universal_transaction_id,
'{}'::jsonb AS raw_result,
CASE WHEN gs.aid % 5 = 0 THEN 'grocery'
WHEN gs.aid % 5 = 1 THEN 'electronics'
WHEN gs.aid % 5 = 2 THEN 'clothing'
WHEN gs.aid % 5 = 3 THEN 'utilities'
ELSE NULL END AS category,
ROUND(RANDOM()::numeric, 2) AS category_confidence,
CASE WHEN gs.aid % 2 = 0 THEN 'Merchant_' || gs.aid % 20 ELSE NULL END AS merchant,
gs.aid % 100 + 1 AS batch_id
FROM generate_series(1, 100000) AS gs(aid);

View File

@@ -0,0 +1,9 @@
-- add 100000 rows or approx.10 MB to the state_values table
-- takes about 14 seconds
INSERT INTO workflows.state_values (key, workflow_id, state_type, value_id)
SELECT
'key_' || gs::text, -- Key: Generate as 'key_1', 'key_2', etc.
(gs - 1) / 1000 + 1, -- workflow_id: Distribute over a range (1000 workflows)
'STATIC', -- state_type: Use constant 'STATIC' as defined in schema
gs::bigint -- value_id: Use the same as the series value
FROM generate_series(1, 100000) AS gs; -- Generate 100,000 rows

View File

@@ -0,0 +1,30 @@
-- add 100000 rows or approx. 24 MB to the values table
-- takes about 126 seconds
INSERT INTO workflows.values (
id,
type,
int_value,
string_value,
child_type,
bool_value,
uuid,
numeric_value,
workflow_id,
jsonb_value,
parent_value_id
)
SELECT
gs AS id,
'TYPE_A' AS type,
CASE WHEN selector = 1 THEN gs ELSE NULL END AS int_value,
CASE WHEN selector = 2 THEN 'string_value_' || gs::text ELSE NULL END AS string_value,
'CHILD_TYPE_A' AS child_type, -- Always non-null
CASE WHEN selector = 3 THEN (gs % 2 = 0) ELSE NULL END AS bool_value,
uuid_generate_v4() AS uuid, -- Always non-null
CASE WHEN selector = 4 THEN gs * 1.0 ELSE NULL END AS numeric_value,
(array[1, 2, 3, 4, 5])[gs % 5 + 1] AS workflow_id, -- Use only existing workflow IDs
CASE WHEN selector = 5 THEN ('{"key":' || gs::text || '}')::jsonb ELSE NULL END AS jsonb_value,
(gs % 100) + 1 AS parent_value_id -- Always non-null
FROM
generate_series(1, 100000) AS gs,
(SELECT floor(random() * 5 + 1)::int AS selector) AS s;

View File

@@ -0,0 +1,26 @@
-- add 100000 rows or approx. 18 MB to the vertices table
-- takes about 90 seconds
INSERT INTO workflows.vertices(
uuid,
created_at,
condition_block_id,
operator,
has_been_visited,
reference_id,
workflow_id,
meta_data,
-- id,
action_block_id
)
SELECT
uuid_generate_v4() AS uuid,
now() AS created_at,
CASE WHEN (gs % 2 = 0) THEN gs % 10 ELSE NULL END AS condition_block_id, -- Every alternate row has a condition_block_id
'operator_' || (gs % 10) AS operator, -- Cyclical operator values (e.g., operator_0, operator_1)
false AS has_been_visited,
'ref_' || gs AS reference_id, -- Unique reference_id for each row
(gs % 1000) + 1 AS workflow_id, -- Random workflow_id values between 1 and 1000
'{}'::jsonb AS meta_data, -- Empty JSON metadata
-- gs AS id, -- default from sequence to get unique ID
CASE WHEN (gs % 2 = 1) THEN gs ELSE NULL END AS action_block_id -- Complementary to condition_block_id
FROM generate_series(1, 100000) AS gs;

View File

@@ -0,0 +1,9 @@
-- update approximately 2000 rows or 200 kb in the accounting_coding_body_tracking_category_selection table
-- takes about 1 second
UPDATE accounting.accounting_coding_body_tracking_category_selection
SET created_at = now()
WHERE ctid in (
SELECT ctid
FROM accounting.accounting_coding_body_tracking_category_selection
TABLESAMPLE SYSTEM (0.0005)
);

View File

@@ -0,0 +1,9 @@
-- update approximately 9000 rows or 1 MB in the action_blocks table
-- takes about 1 second
UPDATE workflows.action_blocks
SET run_synchronously = NOT run_synchronously
WHERE ctid in (
SELECT ctid
FROM workflows.action_blocks
TABLESAMPLE SYSTEM (0.001)
);

View File

@@ -0,0 +1,9 @@
-- update approximately 5000 rows or 1 MB in the action_kwargs table
-- takes about 1 second
UPDATE workflows.action_kwargs
SET created_at = now()
WHERE ctid in (
SELECT ctid
FROM workflows.action_kwargs
TABLESAMPLE SYSTEM (0.0002)
);

View File

@@ -0,0 +1,10 @@
-- update approximately 3000 rows or 500 KB in the denormalized_approval_workflow table
-- takes about 1 second
UPDATE approvals_v2.denormalized_approval_workflow
SET created_at = now()
WHERE ctid in (
SELECT ctid
FROM approvals_v2.denormalized_approval_workflow
TABLESAMPLE SYSTEM (0.0005)
);

View File

@@ -0,0 +1,9 @@
-- update approximately 2000 rows or 1 MB in the device_fingerprint_event table
-- takes about 5 seconds
UPDATE authentication.device_fingerprint_event
SET is_incognito = NOT is_incognito
WHERE ctid in (
SELECT ctid
FROM authentication.device_fingerprint_event
TABLESAMPLE SYSTEM (0.001)
);

View File

@@ -0,0 +1,9 @@
-- update approximately 4000 rows or 600 kb in the edges table
-- takes about 1 second
UPDATE workflows.edges
SET created_at = now()
WHERE ctid in (
SELECT ctid
FROM workflows.edges
TABLESAMPLE SYSTEM (0.0005)
);

View File

@@ -0,0 +1,9 @@
-- update approximately 10000 rows or 200 KB in the heron_transaction_enriched_log table
-- takes about 1 minutes
UPDATE heron.heron_transaction_enriched_log
SET created_at = now()
WHERE ctid in (
SELECT ctid
FROM heron.heron_transaction_enriched_log
TABLESAMPLE SYSTEM (0.0005)
);

View File

@@ -0,0 +1,9 @@
-- update approximately 4000 rows or 1 MB in the heron_transaction_enrichment_requests table
-- takes about 2 minutes
UPDATE heron.heron_transaction_enrichment_requests
SET created_at = now()
WHERE ctid in (
SELECT ctid
FROM heron.heron_transaction_enrichment_requests
TABLESAMPLE SYSTEM (0.0002)
);

View File

@@ -0,0 +1,9 @@
-- update approximately 6000 rows or 600 kb in the hotel_rate_mapping table
-- takes about 1 second
UPDATE booking_inventory.hotel_rate_mapping
SET created_at = now()
WHERE ctid in (
SELECT ctid
FROM booking_inventory.hotel_rate_mapping
TABLESAMPLE SYSTEM (0.0005)
);

View File

@@ -0,0 +1,9 @@
-- update approximately 2000 rows or 1 MB in the incoming_webhooks table
-- takes about 5 seconds
UPDATE webhook.incoming_webhooks
SET is_body_encrypted = NOT is_body_encrypted
WHERE ctid in (
SELECT ctid
FROM webhook.incoming_webhooks
TABLESAMPLE SYSTEM (0.0002)
);

View File

@@ -0,0 +1,9 @@
-- update approximately 1000 rows or 200 kb in the manual_transaction table
-- takes about 2 seconds
UPDATE banking.manual_transaction
SET created_at = now()
WHERE ctid in (
SELECT ctid
FROM banking.manual_transaction
TABLESAMPLE SYSTEM (0.0005)
);

View File

@@ -0,0 +1,9 @@
-- update approximately 1000 rows or 100 kb in the ml_receipt_matching_log table
-- takes about 1 second
UPDATE receipt.ml_receipt_matching_log
SET is_shadow_mode = NOT is_shadow_mode
WHERE ctid in (
SELECT ctid
FROM receipt.ml_receipt_matching_log
TABLESAMPLE SYSTEM (0.0005)
);

View File

@@ -0,0 +1,9 @@
-- update approximately 2000 rows or 400 kb in the ocr_pipeline_results_version table
-- takes about 1 second
UPDATE ocr.ocr_pipeline_results_version
SET is_async = NOT is_async
WHERE ctid in (
SELECT ctid
FROM ocr.ocr_pipeline_results_version
TABLESAMPLE SYSTEM (0.0005)
);

View File

@@ -0,0 +1,9 @@
-- update approximately 3000 rows or 1 MB in the ocr_pipeline_step_results table
-- takes about 11 seconds
UPDATE ocr.ocr_pipeline_step_results
SET created_at = now()
WHERE ctid in (
SELECT ctid
FROM ocr.ocr_pipeline_step_results
TABLESAMPLE SYSTEM (0.0005)
);

View File

@@ -0,0 +1,9 @@
-- update approximately 5000 rows or 1 MB in the ocr_pipeline_step_results_version table
-- takes about 40 seconds
UPDATE ocr.ocr_pipeline_step_results_version
SET created_at = now()
WHERE ctid in (
SELECT ctid
FROM ocr.ocr_pipeline_step_results_version
TABLESAMPLE SYSTEM (0.0005)
);

View File

@@ -0,0 +1,9 @@
-- update approximately 5000 rows or 1 MB in the priceline_raw_response table
-- takes about 1 second
UPDATE booking_inventory.priceline_raw_response
SET created_at = now()
WHERE ctid in (
SELECT ctid
FROM booking_inventory.priceline_raw_response
TABLESAMPLE SYSTEM (0.0005)
);

View File

@@ -0,0 +1,9 @@
-- update approximately 5000 rows or 1 MB in the quickbooks_transactions table
-- takes about 30 seconds
UPDATE accounting.quickbooks_transactions
SET created_at = now()
WHERE ctid in (
SELECT ctid
FROM accounting.quickbooks_transactions
TABLESAMPLE SYSTEM (0.0005)
);

View File

@@ -0,0 +1,15 @@
-- update approximately 6000 rows or 600 kb in the raw_finicity_transaction table
-- takes about 1 second
UPDATE banking.raw_finicity_transaction
SET raw_data =
jsonb_set(
raw_data,
'{updated}',
to_jsonb(now()),
true
)
WHERE ctid IN (
SELECT ctid
FROM banking.raw_finicity_transaction
TABLESAMPLE SYSTEM (0.0005)
);

View File

@@ -0,0 +1,9 @@
-- update approximately 8000 rows or 1 MB in the relabeled_transactions table
-- takes about 1 second
UPDATE heron.relabeled_transactions
SET created_at = now()
WHERE ctid in (
SELECT ctid
FROM heron.relabeled_transactions
TABLESAMPLE SYSTEM (0.0005)
);

View File

@@ -0,0 +1,9 @@
-- update approximately 8000 rows or 1 MB in the state_values table
-- takes about 2 minutes
UPDATE workflows.state_values
SET state_type = now()::text
WHERE ctid in (
SELECT ctid
FROM workflows.state_values
TABLESAMPLE SYSTEM (0.0002)
);

View File

@@ -0,0 +1,9 @@
-- update approximately 4000 rows or 1 MB in the stripe_authorization_event_log table
-- takes about 5 minutes
UPDATE stripe.stripe_authorization_event_log
SET approved = NOT approved
WHERE ctid in (
SELECT ctid
FROM stripe.stripe_authorization_event_log
TABLESAMPLE SYSTEM (0.0002)
);

View File

@@ -0,0 +1,9 @@
-- update approximately 2000 rows or 301 MB in the transaction table
-- takes about 90 seconds
UPDATE transaction.transaction
SET is_last = NOT is_last
WHERE ctid in (
SELECT ctid
FROM transaction.transaction
TABLESAMPLE SYSTEM (0.0002)
);

View File

@@ -0,0 +1,9 @@
-- update approximately 2500 rows or 1 MB in the values table
-- takes about 3 minutes
UPDATE workflows.values
SET bool_value = NOT bool_value
WHERE ctid in (
SELECT ctid
FROM workflows.values
TABLESAMPLE SYSTEM (0.0002)
) AND bool_value IS NOT NULL;

View File

@@ -0,0 +1,9 @@
-- update approximately 10000 rows or 2 MB in the vertices table
-- takes about 1 minute
UPDATE workflows.vertices
SET has_been_visited = NOT has_been_visited
WHERE ctid in (
SELECT ctid
FROM workflows.vertices
TABLESAMPLE SYSTEM (0.0002)
);

View File

@@ -31,7 +31,9 @@ def get_custom_scripts(
return rv
def run_test_pgbench(env: PgCompare, custom_scripts: str, duration: int):
def run_test_pgbench(
env: PgCompare, custom_scripts: str, duration: int, clients: int = 500, jobs: int = 100
):
password = env.pg.default_options.get("password", None)
options = env.pg.default_options.get("options", "")
# drop password from the connection string by passing password=None and set password separately
@@ -46,8 +48,8 @@ def run_test_pgbench(env: PgCompare, custom_scripts: str, duration: int):
"-n", # no explicit vacuum before the test - we want to rely on auto-vacuum
"-M",
"prepared",
"--client=500",
"--jobs=100",
f"--client={clients}",
f"--jobs={jobs}",
f"-T{duration}",
"-P60", # progress every minute
"--progress-timestamp",
@@ -164,6 +166,12 @@ def test_perf_oltp_large_tenant_pgbench(
run_test_pgbench(remote_compare, custom_scripts, duration)
@pytest.mark.parametrize("duration", get_durations_matrix())
@pytest.mark.remote_cluster
def test_perf_oltp_large_tenant_growth(remote_compare: PgCompare, duration: int):
run_test_pgbench(remote_compare, " ".join(get_custom_scripts()), duration, 35, 35)
@pytest.mark.remote_cluster
def test_perf_oltp_large_tenant_maintenance(remote_compare: PgCompare):
# run analyze, vacuum, re-index after the test and measure and report its duration