mirror of
https://github.com/neondatabase/neon.git
synced 2025-12-22 21:59:59 +00:00
Increase tenant size for large tenant oltp workload (#12260)
## Problem - We run the large tenant oltp workload with a fixed size (larger than existing customers' workloads). Our customer's workloads are continuously growing and our testing should stay ahead of the customers' production workloads. - we want to touch all tables in the tenant's database (updates) so that we simulate a continuous change in layer files like in a real production workload - our current oltp benchmark uses a mixture of read and write transactions, however we also want a separate test run with read-only transactions only ## Summary of changes - modify the existing workload to have a separate run with pgbench custom scripts that are read-only - create a new workload that - grows all large tables in each run (for the reuse branch in the large oltp tenant's project) - updates a percentage of rows in all large tables in each run (to enforce table bloat and auto-vacuum runs and layer rebuild in pageservers Each run of the new workflow increases the logical database size about 16 GB. We start with 6 runs per day which will give us about 96-100 GB growth per day. --------- Co-authored-by: Alexander Lakhin <alexander.lakhin@neon.tech>
This commit is contained in:
11
.github/workflows/large_oltp_benchmark.yml
vendored
11
.github/workflows/large_oltp_benchmark.yml
vendored
@@ -33,11 +33,19 @@ jobs:
|
||||
fail-fast: false # allow other variants to continue even if one fails
|
||||
matrix:
|
||||
include:
|
||||
# test only read-only custom scripts in new branch without database maintenance
|
||||
- target: new_branch
|
||||
custom_scripts: select_any_webhook_with_skew.sql@300 select_recent_webhook.sql@397 select_prefetch_webhook.sql@3
|
||||
test_maintenance: false
|
||||
# test all custom scripts in new branch with database maintenance
|
||||
- target: new_branch
|
||||
custom_scripts: insert_webhooks.sql@200 select_any_webhook_with_skew.sql@300 select_recent_webhook.sql@397 select_prefetch_webhook.sql@3 IUD_one_transaction.sql@100
|
||||
test_maintenance: true
|
||||
# test all custom scripts in reuse branch with database maintenance
|
||||
- target: reuse_branch
|
||||
custom_scripts: insert_webhooks.sql@200 select_any_webhook_with_skew.sql@300 select_recent_webhook.sql@397 select_prefetch_webhook.sql@3 IUD_one_transaction.sql@100
|
||||
max-parallel: 1 # we want to run each stripe size sequentially to be able to compare the results
|
||||
test_maintenance: true
|
||||
max-parallel: 1 # we want to run each benchmark sequentially to not have noisy neighbors on shared storage (PS, SK)
|
||||
permissions:
|
||||
contents: write
|
||||
statuses: write
|
||||
@@ -145,6 +153,7 @@ jobs:
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
|
||||
- name: Benchmark database maintenance
|
||||
if: ${{ matrix.test_maintenance == 'true' }}
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
with:
|
||||
build_type: ${{ env.BUILD_TYPE }}
|
||||
|
||||
175
.github/workflows/large_oltp_growth.yml
vendored
Normal file
175
.github/workflows/large_oltp_growth.yml
vendored
Normal file
@@ -0,0 +1,175 @@
|
||||
name: large oltp growth
|
||||
# workflow to grow the reuse branch of large oltp benchmark continuously (about 16 GB per run)
|
||||
|
||||
on:
|
||||
# uncomment to run on push for debugging your PR
|
||||
# push:
|
||||
# branches: [ bodobolero/increase_large_oltp_workload ]
|
||||
|
||||
schedule:
|
||||
# * is a special character in YAML so you have to quote this string
|
||||
# ┌───────────── minute (0 - 59)
|
||||
# │ ┌───────────── hour (0 - 23)
|
||||
# │ │ ┌───────────── day of the month (1 - 31)
|
||||
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
|
||||
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
|
||||
- cron: '0 6 * * *' # 06:00 UTC
|
||||
- cron: '0 8 * * *' # 08:00 UTC
|
||||
- cron: '0 10 * * *' # 10:00 UTC
|
||||
- cron: '0 12 * * *' # 12:00 UTC
|
||||
- cron: '0 14 * * *' # 14:00 UTC
|
||||
- cron: '0 16 * * *' # 16:00 UTC
|
||||
workflow_dispatch: # adds ability to run this manually
|
||||
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -euxo pipefail {0}
|
||||
|
||||
concurrency:
|
||||
# Allow only one workflow globally because we need dedicated resources which only exist once
|
||||
group: large-oltp-growth
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
oltp:
|
||||
strategy:
|
||||
fail-fast: false # allow other variants to continue even if one fails
|
||||
matrix:
|
||||
include:
|
||||
# for now only grow the reuse branch, not the other branches.
|
||||
- target: reuse_branch
|
||||
custom_scripts:
|
||||
- grow_action_blocks.sql
|
||||
- grow_action_kwargs.sql
|
||||
- grow_device_fingerprint_event.sql
|
||||
- grow_edges.sql
|
||||
- grow_hotel_rate_mapping.sql
|
||||
- grow_ocr_pipeline_results_version.sql
|
||||
- grow_priceline_raw_response.sql
|
||||
- grow_relabled_transactions.sql
|
||||
- grow_state_values.sql
|
||||
- grow_values.sql
|
||||
- grow_vertices.sql
|
||||
- update_accounting_coding_body_tracking_category_selection.sql
|
||||
- update_action_blocks.sql
|
||||
- update_action_kwargs.sql
|
||||
- update_denormalized_approval_workflow.sql
|
||||
- update_device_fingerprint_event.sql
|
||||
- update_edges.sql
|
||||
- update_heron_transaction_enriched_log.sql
|
||||
- update_heron_transaction_enrichment_requests.sql
|
||||
- update_hotel_rate_mapping.sql
|
||||
- update_incoming_webhooks.sql
|
||||
- update_manual_transaction.sql
|
||||
- update_ml_receipt_matching_log.sql
|
||||
- update_ocr_pipeine_results_version.sql
|
||||
- update_orc_pipeline_step_results.sql
|
||||
- update_orc_pipeline_step_results_version.sql
|
||||
- update_priceline_raw_response.sql
|
||||
- update_quickbooks_transactions.sql
|
||||
- update_raw_finicity_transaction.sql
|
||||
- update_relabeled_transactions.sql
|
||||
- update_state_values.sql
|
||||
- update_stripe_authorization_event_log.sql
|
||||
- update_transaction.sql
|
||||
- update_values.sql
|
||||
- update_vertices.sql
|
||||
max-parallel: 1 # we want to run each growth workload sequentially (for now there is just one)
|
||||
permissions:
|
||||
contents: write
|
||||
statuses: write
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
env:
|
||||
TEST_PG_BENCH_DURATIONS_MATRIX: "1h"
|
||||
TEST_PGBENCH_CUSTOM_SCRIPTS: ${{ join(matrix.custom_scripts, ' ') }}
|
||||
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
||||
PG_VERSION: 16 # pre-determined by pre-determined project
|
||||
TEST_OUTPUT: /tmp/test_output
|
||||
BUILD_TYPE: remote
|
||||
PLATFORM: ${{ matrix.target }}
|
||||
|
||||
runs-on: [ self-hosted, us-east-2, x64 ]
|
||||
container:
|
||||
image: ghcr.io/neondatabase/build-tools:pinned-bookworm
|
||||
credentials:
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
- name: Harden the runner (Audit all outbound calls)
|
||||
uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
|
||||
with:
|
||||
egress-policy: audit
|
||||
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
|
||||
- name: Configure AWS credentials # necessary to download artefacts
|
||||
uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2
|
||||
with:
|
||||
aws-region: eu-central-1
|
||||
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
role-duration-seconds: 18000 # 5 hours is currently max associated with IAM role
|
||||
|
||||
- name: Download Neon artifact
|
||||
uses: ./.github/actions/download
|
||||
with:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
|
||||
path: /tmp/neon/
|
||||
prefix: latest
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Set up Connection String
|
||||
id: set-up-connstr
|
||||
run: |
|
||||
case "${{ matrix.target }}" in
|
||||
reuse_branch)
|
||||
CONNSTR=${{ secrets.BENCHMARK_LARGE_OLTP_REUSE_CONNSTR }}
|
||||
;;
|
||||
*)
|
||||
echo >&2 "Unknown target=${{ matrix.target }}"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
CONNSTR_WITHOUT_POOLER="${CONNSTR//-pooler/}"
|
||||
|
||||
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
|
||||
echo "connstr_without_pooler=${CONNSTR_WITHOUT_POOLER}" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: pgbench with custom-scripts
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
with:
|
||||
build_type: ${{ env.BUILD_TYPE }}
|
||||
test_selection: performance
|
||||
run_in_parallel: false
|
||||
save_perf_report: true
|
||||
extra_params: -m remote_cluster --timeout 7200 -k test_perf_oltp_large_tenant_growth
|
||||
pg_version: ${{ env.PG_VERSION }}
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
|
||||
- name: Create Allure report
|
||||
id: create-allure-report
|
||||
if: ${{ !cancelled() }}
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
with:
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Post to a Slack channel
|
||||
if: ${{ github.event.schedule && failure() }}
|
||||
uses: slackapi/slack-github-action@fcfb566f8b0aab22203f066d80ca1d7e4b5d05b3 # v1.27.1
|
||||
with:
|
||||
channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream
|
||||
slack-message: |
|
||||
Periodic large oltp tenant growth increase: ${{ job.status }}
|
||||
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
|
||||
<${{ steps.create-allure-report.outputs.report-url }}|Allure report>
|
||||
env:
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
@@ -0,0 +1,22 @@
|
||||
-- add 100000 rows or approximately 11 MB to the action_blocks table
|
||||
-- takes about 1 second
|
||||
INSERT INTO workflows.action_blocks (
|
||||
id,
|
||||
uuid,
|
||||
created_at,
|
||||
status,
|
||||
function_signature,
|
||||
reference_id,
|
||||
blocking,
|
||||
run_synchronously
|
||||
)
|
||||
SELECT
|
||||
id,
|
||||
uuid_generate_v4(),
|
||||
now() - (random() * interval '100 days'), -- Random date within the last 100 days
|
||||
'CONDITIONS_NOT_MET',
|
||||
'function_signature_' || id, -- Create a unique function signature using id
|
||||
CASE WHEN random() > 0.5 THEN 'reference_' || id ELSE NULL END, -- 50% chance of being NULL
|
||||
true,
|
||||
CASE WHEN random() > 0.5 THEN true ELSE false END -- Random boolean value
|
||||
FROM generate_series(1, 100000) AS id;
|
||||
@@ -0,0 +1,11 @@
|
||||
-- add 100000 rows or approximately 10 MB to the action_kwargs table
|
||||
-- takes about 5 minutes
|
||||
INSERT INTO workflows.action_kwargs (created_at, key, uuid, value_id, state_value_id, action_block_id)
|
||||
SELECT
|
||||
now(), -- Using the default value for `created_at`
|
||||
'key_' || gs.id, -- Generating a unique key based on the id
|
||||
uuid_generate_v4(), -- Generating a new UUID for each row
|
||||
CASE WHEN gs.id % 2 = 0 THEN gs.id ELSE NULL END, -- Setting value_id for even ids
|
||||
CASE WHEN gs.id % 2 <> 0 THEN gs.id ELSE NULL END, -- Setting state_value_id for odd ids
|
||||
1 -- Setting action_block_id as 1 for simplicity
|
||||
FROM generate_series(1, 100000) AS gs(id);
|
||||
@@ -0,0 +1,56 @@
|
||||
-- add 100000 rows or approx. 30 MB to the device_fingerprint_event table
|
||||
-- takes about 4 minutes
|
||||
INSERT INTO authentication.device_fingerprint_event (
|
||||
uuid,
|
||||
created_at,
|
||||
identity_uuid,
|
||||
fingerprint_request_id,
|
||||
fingerprint_id,
|
||||
confidence_score,
|
||||
ip_address,
|
||||
url,
|
||||
client_referrer,
|
||||
last_seen_at,
|
||||
raw_fingerprint_response,
|
||||
session_uuid,
|
||||
fingerprint_response,
|
||||
browser_version,
|
||||
browser_name,
|
||||
device,
|
||||
operating_system,
|
||||
operating_system_version,
|
||||
user_agent,
|
||||
ip_address_location_city,
|
||||
ip_address_location_region,
|
||||
ip_address_location_country_code,
|
||||
ip_address_location_latitude,
|
||||
ip_address_location_longitude,
|
||||
is_incognito
|
||||
)
|
||||
SELECT
|
||||
gen_random_uuid(), -- Generates a random UUID for primary key
|
||||
now() - (random() * interval '10 days'), -- Random timestamp within the last 10 days
|
||||
gen_random_uuid(), -- Random UUID for identity
|
||||
md5(gs::text), -- Simulates unique fingerprint request ID using `md5` hash of series number
|
||||
md5((gs + 10000)::text), -- Simulates unique fingerprint ID
|
||||
round(CAST(random() AS numeric), 2), -- Generates a random score between 0 and 1, cast `random()` to numeric
|
||||
'192.168.' || (random() * 255)::int || '.' || (random() * 255)::int, -- Random IP address
|
||||
'https://example.com/' || (gs % 1000), -- Random URL with series number suffix
|
||||
CASE WHEN random() < 0.5 THEN NULL ELSE 'https://referrer.com/' || (gs % 100)::text END, -- Random referrer, 50% chance of being NULL
|
||||
now() - (random() * interval '5 days'), -- Last seen timestamp within the last 5 days
|
||||
NULL, -- Keeping raw_fingerprint_response NULL for simplicity
|
||||
CASE WHEN random() < 0.3 THEN gen_random_uuid() ELSE NULL END, -- Session UUID, 30% chance of NULL
|
||||
NULL, -- Keeping fingerprint_response NULL for simplicity
|
||||
CASE WHEN random() < 0.5 THEN '93.0' ELSE '92.0' END, -- Random browser version
|
||||
CASE WHEN random() < 0.5 THEN 'Firefox' ELSE 'Chrome' END, -- Random browser name
|
||||
CASE WHEN random() < 0.5 THEN 'Desktop' ELSE 'Mobile' END, -- Random device type
|
||||
'Windows', -- Static value for operating system
|
||||
'10.0', -- Static value for operating system version
|
||||
'Mozilla/5.0', -- Static value for user agent
|
||||
'City ' || (gs % 1000)::text, -- Random city name
|
||||
'Region ' || (gs % 100)::text, -- Random region name
|
||||
'US', -- Static country code
|
||||
random() * 180 - 90, -- Random latitude between -90 and 90
|
||||
random() * 360 - 180, -- Random longitude between -180 and 180
|
||||
random() < 0.1 -- 10% chance of being incognito
|
||||
FROM generate_series(1, 100000) AS gs;
|
||||
10
test_runner/performance/large_synthetic_oltp/grow_edges.sql
Normal file
10
test_runner/performance/large_synthetic_oltp/grow_edges.sql
Normal file
@@ -0,0 +1,10 @@
|
||||
-- add 100000 rows or approximately 11 MB to the edges table
|
||||
-- takes about 1 minute
|
||||
INSERT INTO workflows.edges (created_at, workflow_id, uuid, from_vertex_id, to_vertex_id)
|
||||
SELECT
|
||||
now() - (random() * interval '365 days'), -- Random `created_at` timestamp in the last year
|
||||
(random() * 100)::int + 1, -- Random `workflow_id` between 1 and 100
|
||||
uuid_generate_v4(), -- Generate a new UUID for each row
|
||||
(random() * 100000)::bigint + 1, -- Random `from_vertex_id` between 1 and 100,000
|
||||
(random() * 100000)::bigint + 1 -- Random `to_vertex_id` between 1 and 100,000
|
||||
FROM generate_series(1, 100000) AS gs; -- Generate 100,000 sequential IDs
|
||||
@@ -0,0 +1,21 @@
|
||||
-- add 100000 rows or approximately 10 MB to the hotel_rate_mapping table
|
||||
-- takes about 1 second
|
||||
INSERT INTO booking_inventory.hotel_rate_mapping (
|
||||
uuid,
|
||||
created_at,
|
||||
updated_at,
|
||||
hotel_rate_id,
|
||||
remote_id,
|
||||
source
|
||||
)
|
||||
SELECT
|
||||
uuid_generate_v4(), -- Unique UUID for each row
|
||||
now(), -- Created at timestamp
|
||||
now(), -- Updated at timestamp
|
||||
'rate_' || gs AS hotel_rate_id, -- Unique hotel_rate_id
|
||||
'remote_' || gs AS remote_id, -- Unique remote_id
|
||||
CASE WHEN gs % 3 = 0 THEN 'source_1'
|
||||
WHEN gs % 3 = 1 THEN 'source_2'
|
||||
ELSE 'source_3'
|
||||
END AS source -- Distributing sources among three options
|
||||
FROM generate_series(1, 100000) AS gs;
|
||||
@@ -0,0 +1,31 @@
|
||||
-- add 100000 rows or approximately 20 MB to the ocr_pipeline_results_version table
|
||||
-- takes about 1 second
|
||||
INSERT INTO ocr.ocr_pipeline_results_version (
|
||||
id, transaction_id, operation_type, created_at, updated_at, s3_filename, completed_at, result,
|
||||
end_transaction_id, pipeline_type, is_async, callback, callback_kwargs, input, error, file_type, s3_bucket_name, pipeline_kwargs
|
||||
)
|
||||
SELECT
|
||||
gs.aid, -- id
|
||||
gs.aid, -- transaction_id (same as id for simplicity)
|
||||
(gs.aid % 5)::smallint + 1, -- operation_type (cyclic values from 1 to 5)
|
||||
now() - interval '1 day' * (random() * 30), -- created_at (random timestamp within the last 30 days)
|
||||
now() - interval '1 day' * (random() * 30), -- updated_at (random timestamp within the last 30 days)
|
||||
's3_file_' || gs.aid || '.txt', -- s3_filename (synthetic filename)
|
||||
now() - interval '1 day' * (random() * 30), -- completed_at (random timestamp within the last 30 days)
|
||||
'{}'::jsonb, -- result (empty JSON object)
|
||||
NULL, -- end_transaction_id (NULL)
|
||||
CASE (gs.aid % 3) -- pipeline_type (cyclic text values)
|
||||
WHEN 0 THEN 'OCR'
|
||||
WHEN 1 THEN 'PDF'
|
||||
ELSE 'Image'
|
||||
END,
|
||||
gs.aid % 2 = 0, -- is_async (alternating between true and false)
|
||||
'http://callback/' || gs.aid, -- callback (synthetic URL)
|
||||
'{}'::jsonb, -- callback_kwargs (empty JSON object)
|
||||
'Input text ' || gs.aid, -- input (synthetic input text)
|
||||
NULL, -- error (NULL)
|
||||
'pdf', -- file_type (default to 'pdf')
|
||||
'bucket_' || gs.aid % 10, -- s3_bucket_name (synthetic bucket names)
|
||||
'{}'::jsonb -- pipeline_kwargs (empty JSON object)
|
||||
FROM
|
||||
generate_series(1, 100000) AS gs(aid);
|
||||
@@ -0,0 +1,18 @@
|
||||
-- add 100000 rows or approx. 20 MB to the priceline_raw_response table
|
||||
-- takes about 20 seconds
|
||||
INSERT INTO booking_inventory.priceline_raw_response (
|
||||
uuid, created_at, updated_at, url, base_url, path, method, params, request, response
|
||||
)
|
||||
SELECT
|
||||
gen_random_uuid(), -- Generate random UUIDs
|
||||
now() - (random() * interval '30 days'), -- Random creation time within the past 30 days
|
||||
now() - (random() * interval '30 days'), -- Random update time within the past 30 days
|
||||
'https://example.com/resource/' || gs, -- Construct a unique URL for each row
|
||||
'https://example.com', -- Base URL for all rows
|
||||
'/resource/' || gs, -- Path for each row
|
||||
CASE WHEN gs % 2 = 0 THEN 'GET' ELSE 'POST' END, -- Alternate between GET and POST methods
|
||||
'id=' || gs, -- Simple parameter pattern for each row
|
||||
'{}'::jsonb, -- Empty JSON object for request
|
||||
jsonb_build_object('status', 'success', 'data', gs) -- Construct a valid JSON response
|
||||
FROM
|
||||
generate_series(1, 100000) AS gs;
|
||||
@@ -0,0 +1,26 @@
|
||||
-- add 100000 rows or approx. 1 MB to the relabeled_transactions table
|
||||
-- takes about 1 second
|
||||
INSERT INTO heron.relabeled_transactions (
|
||||
id,
|
||||
created_at,
|
||||
universal_transaction_id,
|
||||
raw_result,
|
||||
category,
|
||||
category_confidence,
|
||||
merchant,
|
||||
batch_id
|
||||
)
|
||||
SELECT
|
||||
gs.aid AS id,
|
||||
now() - (gs.aid % 1000) * interval '1 second' AS created_at,
|
||||
'txn_' || gs.aid AS universal_transaction_id,
|
||||
'{}'::jsonb AS raw_result,
|
||||
CASE WHEN gs.aid % 5 = 0 THEN 'grocery'
|
||||
WHEN gs.aid % 5 = 1 THEN 'electronics'
|
||||
WHEN gs.aid % 5 = 2 THEN 'clothing'
|
||||
WHEN gs.aid % 5 = 3 THEN 'utilities'
|
||||
ELSE NULL END AS category,
|
||||
ROUND(RANDOM()::numeric, 2) AS category_confidence,
|
||||
CASE WHEN gs.aid % 2 = 0 THEN 'Merchant_' || gs.aid % 20 ELSE NULL END AS merchant,
|
||||
gs.aid % 100 + 1 AS batch_id
|
||||
FROM generate_series(1, 100000) AS gs(aid);
|
||||
@@ -0,0 +1,9 @@
|
||||
-- add 100000 rows or approx.10 MB to the state_values table
|
||||
-- takes about 14 seconds
|
||||
INSERT INTO workflows.state_values (key, workflow_id, state_type, value_id)
|
||||
SELECT
|
||||
'key_' || gs::text, -- Key: Generate as 'key_1', 'key_2', etc.
|
||||
(gs - 1) / 1000 + 1, -- workflow_id: Distribute over a range (1000 workflows)
|
||||
'STATIC', -- state_type: Use constant 'STATIC' as defined in schema
|
||||
gs::bigint -- value_id: Use the same as the series value
|
||||
FROM generate_series(1, 100000) AS gs; -- Generate 100,000 rows
|
||||
30
test_runner/performance/large_synthetic_oltp/grow_values.sql
Normal file
30
test_runner/performance/large_synthetic_oltp/grow_values.sql
Normal file
@@ -0,0 +1,30 @@
|
||||
-- add 100000 rows or approx. 24 MB to the values table
|
||||
-- takes about 126 seconds
|
||||
INSERT INTO workflows.values (
|
||||
id,
|
||||
type,
|
||||
int_value,
|
||||
string_value,
|
||||
child_type,
|
||||
bool_value,
|
||||
uuid,
|
||||
numeric_value,
|
||||
workflow_id,
|
||||
jsonb_value,
|
||||
parent_value_id
|
||||
)
|
||||
SELECT
|
||||
gs AS id,
|
||||
'TYPE_A' AS type,
|
||||
CASE WHEN selector = 1 THEN gs ELSE NULL END AS int_value,
|
||||
CASE WHEN selector = 2 THEN 'string_value_' || gs::text ELSE NULL END AS string_value,
|
||||
'CHILD_TYPE_A' AS child_type, -- Always non-null
|
||||
CASE WHEN selector = 3 THEN (gs % 2 = 0) ELSE NULL END AS bool_value,
|
||||
uuid_generate_v4() AS uuid, -- Always non-null
|
||||
CASE WHEN selector = 4 THEN gs * 1.0 ELSE NULL END AS numeric_value,
|
||||
(array[1, 2, 3, 4, 5])[gs % 5 + 1] AS workflow_id, -- Use only existing workflow IDs
|
||||
CASE WHEN selector = 5 THEN ('{"key":' || gs::text || '}')::jsonb ELSE NULL END AS jsonb_value,
|
||||
(gs % 100) + 1 AS parent_value_id -- Always non-null
|
||||
FROM
|
||||
generate_series(1, 100000) AS gs,
|
||||
(SELECT floor(random() * 5 + 1)::int AS selector) AS s;
|
||||
@@ -0,0 +1,26 @@
|
||||
-- add 100000 rows or approx. 18 MB to the vertices table
|
||||
-- takes about 90 seconds
|
||||
INSERT INTO workflows.vertices(
|
||||
uuid,
|
||||
created_at,
|
||||
condition_block_id,
|
||||
operator,
|
||||
has_been_visited,
|
||||
reference_id,
|
||||
workflow_id,
|
||||
meta_data,
|
||||
-- id,
|
||||
action_block_id
|
||||
)
|
||||
SELECT
|
||||
uuid_generate_v4() AS uuid,
|
||||
now() AS created_at,
|
||||
CASE WHEN (gs % 2 = 0) THEN gs % 10 ELSE NULL END AS condition_block_id, -- Every alternate row has a condition_block_id
|
||||
'operator_' || (gs % 10) AS operator, -- Cyclical operator values (e.g., operator_0, operator_1)
|
||||
false AS has_been_visited,
|
||||
'ref_' || gs AS reference_id, -- Unique reference_id for each row
|
||||
(gs % 1000) + 1 AS workflow_id, -- Random workflow_id values between 1 and 1000
|
||||
'{}'::jsonb AS meta_data, -- Empty JSON metadata
|
||||
-- gs AS id, -- default from sequence to get unique ID
|
||||
CASE WHEN (gs % 2 = 1) THEN gs ELSE NULL END AS action_block_id -- Complementary to condition_block_id
|
||||
FROM generate_series(1, 100000) AS gs;
|
||||
@@ -0,0 +1,9 @@
|
||||
-- update approximately 2000 rows or 200 kb in the accounting_coding_body_tracking_category_selection table
|
||||
-- takes about 1 second
|
||||
UPDATE accounting.accounting_coding_body_tracking_category_selection
|
||||
SET created_at = now()
|
||||
WHERE ctid in (
|
||||
SELECT ctid
|
||||
FROM accounting.accounting_coding_body_tracking_category_selection
|
||||
TABLESAMPLE SYSTEM (0.0005)
|
||||
);
|
||||
@@ -0,0 +1,9 @@
|
||||
-- update approximately 9000 rows or 1 MB in the action_blocks table
|
||||
-- takes about 1 second
|
||||
UPDATE workflows.action_blocks
|
||||
SET run_synchronously = NOT run_synchronously
|
||||
WHERE ctid in (
|
||||
SELECT ctid
|
||||
FROM workflows.action_blocks
|
||||
TABLESAMPLE SYSTEM (0.001)
|
||||
);
|
||||
@@ -0,0 +1,9 @@
|
||||
-- update approximately 5000 rows or 1 MB in the action_kwargs table
|
||||
-- takes about 1 second
|
||||
UPDATE workflows.action_kwargs
|
||||
SET created_at = now()
|
||||
WHERE ctid in (
|
||||
SELECT ctid
|
||||
FROM workflows.action_kwargs
|
||||
TABLESAMPLE SYSTEM (0.0002)
|
||||
);
|
||||
@@ -0,0 +1,10 @@
|
||||
-- update approximately 3000 rows or 500 KB in the denormalized_approval_workflow table
|
||||
-- takes about 1 second
|
||||
UPDATE approvals_v2.denormalized_approval_workflow
|
||||
SET created_at = now()
|
||||
WHERE ctid in (
|
||||
SELECT ctid
|
||||
FROM approvals_v2.denormalized_approval_workflow
|
||||
TABLESAMPLE SYSTEM (0.0005)
|
||||
);
|
||||
|
||||
@@ -0,0 +1,9 @@
|
||||
-- update approximately 2000 rows or 1 MB in the device_fingerprint_event table
|
||||
-- takes about 5 seconds
|
||||
UPDATE authentication.device_fingerprint_event
|
||||
SET is_incognito = NOT is_incognito
|
||||
WHERE ctid in (
|
||||
SELECT ctid
|
||||
FROM authentication.device_fingerprint_event
|
||||
TABLESAMPLE SYSTEM (0.001)
|
||||
);
|
||||
@@ -0,0 +1,9 @@
|
||||
-- update approximately 4000 rows or 600 kb in the edges table
|
||||
-- takes about 1 second
|
||||
UPDATE workflows.edges
|
||||
SET created_at = now()
|
||||
WHERE ctid in (
|
||||
SELECT ctid
|
||||
FROM workflows.edges
|
||||
TABLESAMPLE SYSTEM (0.0005)
|
||||
);
|
||||
@@ -0,0 +1,9 @@
|
||||
-- update approximately 10000 rows or 200 KB in the heron_transaction_enriched_log table
|
||||
-- takes about 1 minutes
|
||||
UPDATE heron.heron_transaction_enriched_log
|
||||
SET created_at = now()
|
||||
WHERE ctid in (
|
||||
SELECT ctid
|
||||
FROM heron.heron_transaction_enriched_log
|
||||
TABLESAMPLE SYSTEM (0.0005)
|
||||
);
|
||||
@@ -0,0 +1,9 @@
|
||||
-- update approximately 4000 rows or 1 MB in the heron_transaction_enrichment_requests table
|
||||
-- takes about 2 minutes
|
||||
UPDATE heron.heron_transaction_enrichment_requests
|
||||
SET created_at = now()
|
||||
WHERE ctid in (
|
||||
SELECT ctid
|
||||
FROM heron.heron_transaction_enrichment_requests
|
||||
TABLESAMPLE SYSTEM (0.0002)
|
||||
);
|
||||
@@ -0,0 +1,9 @@
|
||||
-- update approximately 6000 rows or 600 kb in the hotel_rate_mapping table
|
||||
-- takes about 1 second
|
||||
UPDATE booking_inventory.hotel_rate_mapping
|
||||
SET created_at = now()
|
||||
WHERE ctid in (
|
||||
SELECT ctid
|
||||
FROM booking_inventory.hotel_rate_mapping
|
||||
TABLESAMPLE SYSTEM (0.0005)
|
||||
);
|
||||
@@ -0,0 +1,9 @@
|
||||
-- update approximately 2000 rows or 1 MB in the incoming_webhooks table
|
||||
-- takes about 5 seconds
|
||||
UPDATE webhook.incoming_webhooks
|
||||
SET is_body_encrypted = NOT is_body_encrypted
|
||||
WHERE ctid in (
|
||||
SELECT ctid
|
||||
FROM webhook.incoming_webhooks
|
||||
TABLESAMPLE SYSTEM (0.0002)
|
||||
);
|
||||
@@ -0,0 +1,9 @@
|
||||
-- update approximately 1000 rows or 200 kb in the manual_transaction table
|
||||
-- takes about 2 seconds
|
||||
UPDATE banking.manual_transaction
|
||||
SET created_at = now()
|
||||
WHERE ctid in (
|
||||
SELECT ctid
|
||||
FROM banking.manual_transaction
|
||||
TABLESAMPLE SYSTEM (0.0005)
|
||||
);
|
||||
@@ -0,0 +1,9 @@
|
||||
-- update approximately 1000 rows or 100 kb in the ml_receipt_matching_log table
|
||||
-- takes about 1 second
|
||||
UPDATE receipt.ml_receipt_matching_log
|
||||
SET is_shadow_mode = NOT is_shadow_mode
|
||||
WHERE ctid in (
|
||||
SELECT ctid
|
||||
FROM receipt.ml_receipt_matching_log
|
||||
TABLESAMPLE SYSTEM (0.0005)
|
||||
);
|
||||
@@ -0,0 +1,9 @@
|
||||
-- update approximately 2000 rows or 400 kb in the ocr_pipeline_results_version table
|
||||
-- takes about 1 second
|
||||
UPDATE ocr.ocr_pipeline_results_version
|
||||
SET is_async = NOT is_async
|
||||
WHERE ctid in (
|
||||
SELECT ctid
|
||||
FROM ocr.ocr_pipeline_results_version
|
||||
TABLESAMPLE SYSTEM (0.0005)
|
||||
);
|
||||
@@ -0,0 +1,9 @@
|
||||
-- update approximately 3000 rows or 1 MB in the ocr_pipeline_step_results table
|
||||
-- takes about 11 seconds
|
||||
UPDATE ocr.ocr_pipeline_step_results
|
||||
SET created_at = now()
|
||||
WHERE ctid in (
|
||||
SELECT ctid
|
||||
FROM ocr.ocr_pipeline_step_results
|
||||
TABLESAMPLE SYSTEM (0.0005)
|
||||
);
|
||||
@@ -0,0 +1,9 @@
|
||||
-- update approximately 5000 rows or 1 MB in the ocr_pipeline_step_results_version table
|
||||
-- takes about 40 seconds
|
||||
UPDATE ocr.ocr_pipeline_step_results_version
|
||||
SET created_at = now()
|
||||
WHERE ctid in (
|
||||
SELECT ctid
|
||||
FROM ocr.ocr_pipeline_step_results_version
|
||||
TABLESAMPLE SYSTEM (0.0005)
|
||||
);
|
||||
@@ -0,0 +1,9 @@
|
||||
-- update approximately 5000 rows or 1 MB in the priceline_raw_response table
|
||||
-- takes about 1 second
|
||||
UPDATE booking_inventory.priceline_raw_response
|
||||
SET created_at = now()
|
||||
WHERE ctid in (
|
||||
SELECT ctid
|
||||
FROM booking_inventory.priceline_raw_response
|
||||
TABLESAMPLE SYSTEM (0.0005)
|
||||
);
|
||||
@@ -0,0 +1,9 @@
|
||||
-- update approximately 5000 rows or 1 MB in the quickbooks_transactions table
|
||||
-- takes about 30 seconds
|
||||
UPDATE accounting.quickbooks_transactions
|
||||
SET created_at = now()
|
||||
WHERE ctid in (
|
||||
SELECT ctid
|
||||
FROM accounting.quickbooks_transactions
|
||||
TABLESAMPLE SYSTEM (0.0005)
|
||||
);
|
||||
@@ -0,0 +1,15 @@
|
||||
-- update approximately 6000 rows or 600 kb in the raw_finicity_transaction table
|
||||
-- takes about 1 second
|
||||
UPDATE banking.raw_finicity_transaction
|
||||
SET raw_data =
|
||||
jsonb_set(
|
||||
raw_data,
|
||||
'{updated}',
|
||||
to_jsonb(now()),
|
||||
true
|
||||
)
|
||||
WHERE ctid IN (
|
||||
SELECT ctid
|
||||
FROM banking.raw_finicity_transaction
|
||||
TABLESAMPLE SYSTEM (0.0005)
|
||||
);
|
||||
@@ -0,0 +1,9 @@
|
||||
-- update approximately 8000 rows or 1 MB in the relabeled_transactions table
|
||||
-- takes about 1 second
|
||||
UPDATE heron.relabeled_transactions
|
||||
SET created_at = now()
|
||||
WHERE ctid in (
|
||||
SELECT ctid
|
||||
FROM heron.relabeled_transactions
|
||||
TABLESAMPLE SYSTEM (0.0005)
|
||||
);
|
||||
@@ -0,0 +1,9 @@
|
||||
-- update approximately 8000 rows or 1 MB in the state_values table
|
||||
-- takes about 2 minutes
|
||||
UPDATE workflows.state_values
|
||||
SET state_type = now()::text
|
||||
WHERE ctid in (
|
||||
SELECT ctid
|
||||
FROM workflows.state_values
|
||||
TABLESAMPLE SYSTEM (0.0002)
|
||||
);
|
||||
@@ -0,0 +1,9 @@
|
||||
-- update approximately 4000 rows or 1 MB in the stripe_authorization_event_log table
|
||||
-- takes about 5 minutes
|
||||
UPDATE stripe.stripe_authorization_event_log
|
||||
SET approved = NOT approved
|
||||
WHERE ctid in (
|
||||
SELECT ctid
|
||||
FROM stripe.stripe_authorization_event_log
|
||||
TABLESAMPLE SYSTEM (0.0002)
|
||||
);
|
||||
@@ -0,0 +1,9 @@
|
||||
-- update approximately 2000 rows or 301 MB in the transaction table
|
||||
-- takes about 90 seconds
|
||||
UPDATE transaction.transaction
|
||||
SET is_last = NOT is_last
|
||||
WHERE ctid in (
|
||||
SELECT ctid
|
||||
FROM transaction.transaction
|
||||
TABLESAMPLE SYSTEM (0.0002)
|
||||
);
|
||||
@@ -0,0 +1,9 @@
|
||||
-- update approximately 2500 rows or 1 MB in the values table
|
||||
-- takes about 3 minutes
|
||||
UPDATE workflows.values
|
||||
SET bool_value = NOT bool_value
|
||||
WHERE ctid in (
|
||||
SELECT ctid
|
||||
FROM workflows.values
|
||||
TABLESAMPLE SYSTEM (0.0002)
|
||||
) AND bool_value IS NOT NULL;
|
||||
@@ -0,0 +1,9 @@
|
||||
-- update approximately 10000 rows or 2 MB in the vertices table
|
||||
-- takes about 1 minute
|
||||
UPDATE workflows.vertices
|
||||
SET has_been_visited = NOT has_been_visited
|
||||
WHERE ctid in (
|
||||
SELECT ctid
|
||||
FROM workflows.vertices
|
||||
TABLESAMPLE SYSTEM (0.0002)
|
||||
);
|
||||
@@ -31,7 +31,9 @@ def get_custom_scripts(
|
||||
return rv
|
||||
|
||||
|
||||
def run_test_pgbench(env: PgCompare, custom_scripts: str, duration: int):
|
||||
def run_test_pgbench(
|
||||
env: PgCompare, custom_scripts: str, duration: int, clients: int = 500, jobs: int = 100
|
||||
):
|
||||
password = env.pg.default_options.get("password", None)
|
||||
options = env.pg.default_options.get("options", "")
|
||||
# drop password from the connection string by passing password=None and set password separately
|
||||
@@ -46,8 +48,8 @@ def run_test_pgbench(env: PgCompare, custom_scripts: str, duration: int):
|
||||
"-n", # no explicit vacuum before the test - we want to rely on auto-vacuum
|
||||
"-M",
|
||||
"prepared",
|
||||
"--client=500",
|
||||
"--jobs=100",
|
||||
f"--client={clients}",
|
||||
f"--jobs={jobs}",
|
||||
f"-T{duration}",
|
||||
"-P60", # progress every minute
|
||||
"--progress-timestamp",
|
||||
@@ -164,6 +166,12 @@ def test_perf_oltp_large_tenant_pgbench(
|
||||
run_test_pgbench(remote_compare, custom_scripts, duration)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("duration", get_durations_matrix())
|
||||
@pytest.mark.remote_cluster
|
||||
def test_perf_oltp_large_tenant_growth(remote_compare: PgCompare, duration: int):
|
||||
run_test_pgbench(remote_compare, " ".join(get_custom_scripts()), duration, 35, 35)
|
||||
|
||||
|
||||
@pytest.mark.remote_cluster
|
||||
def test_perf_oltp_large_tenant_maintenance(remote_compare: PgCompare):
|
||||
# run analyze, vacuum, re-index after the test and measure and report its duration
|
||||
|
||||
Reference in New Issue
Block a user