Increase tenant size for large tenant oltp workload (#12260)

## Problem - We run the large tenant oltp workload with a fixed size (larger than existing customers' workloads). Our customer's workloads are continuously growing and our testing should stay ahead of the customers' production workloads. - we want to touch all tables in the tenant's database (updates) so that we simulate a continuous change in layer files like in a real production workload - our current oltp benchmark uses a mixture of read and write transactions, however we also want a separate test run with read-only transactions only ## Summary of changes - modify the existing workload to have a separate run with pgbench custom scripts that are read-only - create a new workload that - grows all large tables in each run (for the reuse branch in the large oltp tenant's project) - updates a percentage of rows in all large tables in each run (to enforce table bloat and auto-vacuum runs and layer rebuild in pageservers Each run of the new workflow increases the logical database size about 16 GB. We start with 6 runs per day which will give us about 96-100 GB growth per day. --------- Co-authored-by: Alexander Lakhin <alexander.lakhin@neon.tech>
2025-12-22 21:59:59 +00:00 · 2025-06-18 14:40:25 +02:00
parent e95f2f9a67
commit 7e711ede44
38 changed files with 679 additions and 4 deletions
--- a/.github/workflows/large_oltp_benchmark.yml
+++ b/.github/workflows/large_oltp_benchmark.yml
@@ -33,11 +33,19 @@ jobs:
      fail-fast: false # allow other variants to continue even if one fails
      matrix:
        include:
+          # test only read-only custom scripts in new branch without database maintenance
+          - target: new_branch
+            custom_scripts: select_any_webhook_with_skew.sql@300 select_recent_webhook.sql@397 select_prefetch_webhook.sql@3
+            test_maintenance: false
+          # test all custom scripts in new branch with database maintenance
          - target: new_branch
            custom_scripts: insert_webhooks.sql@200 select_any_webhook_with_skew.sql@300 select_recent_webhook.sql@397 select_prefetch_webhook.sql@3 IUD_one_transaction.sql@100
+            test_maintenance: true
+          # test all custom scripts in reuse branch with database maintenance
          - target: reuse_branch
            custom_scripts: insert_webhooks.sql@200 select_any_webhook_with_skew.sql@300 select_recent_webhook.sql@397 select_prefetch_webhook.sql@3 IUD_one_transaction.sql@100
-      max-parallel: 1 # we want to run each stripe size sequentially to be able to compare the results
+            test_maintenance: true
+      max-parallel: 1 # we want to run each benchmark sequentially to not have noisy neighbors on shared storage (PS, SK)
    permissions:
      contents: write
      statuses: write
@@ -145,6 +153,7 @@ jobs:
        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"

    - name: Benchmark database maintenance
+      if: ${{ matrix.test_maintenance == 'true' }}
      uses: ./.github/actions/run-python-test-set
      with:
        build_type: ${{ env.BUILD_TYPE }}
--- a/.github/workflows/large_oltp_growth.yml
+++ b/.github/workflows/large_oltp_growth.yml
@@ -0,0 +1,175 @@
+name: large oltp growth
+# workflow to grow the reuse branch of large oltp benchmark continuously (about 16 GB per run)
+
+on:
+  # uncomment to run on push for debugging your PR
+  # push:
+  #  branches: [ bodobolero/increase_large_oltp_workload ]
+
+  schedule:
+    # * is a special character in YAML so you have to quote this string
+    #        ┌───────────── minute (0 - 59)
+    #        │ ┌───────────── hour (0 - 23)
+    #        │ │  ┌───────────── day of the month (1 - 31)
+    #        │ │  │ ┌───────────── month (1 - 12 or JAN-DEC)
+    #        │ │  │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
+    - cron: '0 6 * * *'   # 06:00 UTC
+    - cron: '0 8 * * *'   # 08:00 UTC
+    - cron: '0 10 * * *'  # 10:00 UTC
+    - cron: '0 12 * * *'  # 12:00 UTC
+    - cron: '0 14 * * *'  # 14:00 UTC
+    - cron: '0 16 * * *'  # 16:00 UTC
+  workflow_dispatch: # adds ability to run this manually
+
+defaults:
+  run:
+    shell: bash -euxo pipefail {0}
+
+concurrency:
+  # Allow only one workflow globally because we need dedicated resources which only exist once
+  group: large-oltp-growth
+  cancel-in-progress: true
+
+permissions:
+  contents: read
+
+jobs:
+  oltp:
+    strategy:
+      fail-fast: false # allow other variants to continue even if one fails
+      matrix:
+        include:
+          # for now only grow the reuse branch, not the other branches.
+          - target: reuse_branch
+            custom_scripts:
+            - grow_action_blocks.sql
+            - grow_action_kwargs.sql
+            - grow_device_fingerprint_event.sql
+            - grow_edges.sql
+            - grow_hotel_rate_mapping.sql
+            - grow_ocr_pipeline_results_version.sql
+            - grow_priceline_raw_response.sql
+            - grow_relabled_transactions.sql
+            - grow_state_values.sql
+            - grow_values.sql
+            - grow_vertices.sql
+            - update_accounting_coding_body_tracking_category_selection.sql
+            - update_action_blocks.sql
+            - update_action_kwargs.sql
+            - update_denormalized_approval_workflow.sql
+            - update_device_fingerprint_event.sql
+            - update_edges.sql
+            - update_heron_transaction_enriched_log.sql
+            - update_heron_transaction_enrichment_requests.sql
+            - update_hotel_rate_mapping.sql
+            - update_incoming_webhooks.sql
+            - update_manual_transaction.sql
+            - update_ml_receipt_matching_log.sql
+            - update_ocr_pipeine_results_version.sql
+            - update_orc_pipeline_step_results.sql
+            - update_orc_pipeline_step_results_version.sql
+            - update_priceline_raw_response.sql
+            - update_quickbooks_transactions.sql
+            - update_raw_finicity_transaction.sql
+            - update_relabeled_transactions.sql
+            - update_state_values.sql
+            - update_stripe_authorization_event_log.sql
+            - update_transaction.sql
+            - update_values.sql
+            - update_vertices.sql
+      max-parallel: 1 # we want to run each growth workload sequentially (for now there is just one)
+    permissions:
+      contents: write
+      statuses: write
+      id-token: write # aws-actions/configure-aws-credentials
+    env:
+      TEST_PG_BENCH_DURATIONS_MATRIX: "1h"
+      TEST_PGBENCH_CUSTOM_SCRIPTS: ${{ join(matrix.custom_scripts, ' ') }}
+      POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
+      PG_VERSION: 16 # pre-determined by pre-determined project
+      TEST_OUTPUT: /tmp/test_output
+      BUILD_TYPE: remote
+      PLATFORM: ${{ matrix.target }}
+
+    runs-on: [ self-hosted, us-east-2, x64 ]
+    container:
+      image: ghcr.io/neondatabase/build-tools:pinned-bookworm
+      credentials:
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+      options: --init
+
+    steps:
+    - name: Harden the runner (Audit all outbound calls)
+      uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
+      with:
+        egress-policy: audit
+
+    - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+
+    - name: Configure AWS credentials # necessary to download artefacts
+      uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2
+      with:
+        aws-region: eu-central-1
+        role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        role-duration-seconds: 18000 # 5 hours is currently max associated with IAM role
+
+    - name: Download Neon artifact
+      uses: ./.github/actions/download
+      with:
+        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
+        path: /tmp/neon/
+        prefix: latest
+        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+
+    - name: Set up Connection String
+      id: set-up-connstr
+      run: |
+        case "${{ matrix.target }}" in
+          reuse_branch)
+          CONNSTR=${{ secrets.BENCHMARK_LARGE_OLTP_REUSE_CONNSTR }}
+          ;;
+          *)
+          echo >&2 "Unknown target=${{ matrix.target }}"
+          exit 1
+          ;;
+        esac
+
+        CONNSTR_WITHOUT_POOLER="${CONNSTR//-pooler/}"
+
+        echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
+        echo "connstr_without_pooler=${CONNSTR_WITHOUT_POOLER}" >> $GITHUB_OUTPUT
+
+    - name: pgbench with custom-scripts
+      uses: ./.github/actions/run-python-test-set
+      with:
+        build_type: ${{ env.BUILD_TYPE }}
+        test_selection: performance
+        run_in_parallel: false
+        save_perf_report: true
+        extra_params: -m remote_cluster --timeout 7200 -k test_perf_oltp_large_tenant_growth
+        pg_version: ${{ env.PG_VERSION }}
+        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+      env:
+        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
+        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
+        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
+
+    - name: Create Allure report
+      id: create-allure-report
+      if: ${{ !cancelled() }}
+      uses: ./.github/actions/allure-report-generate
+      with:
+        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+
+    - name: Post to a Slack channel
+      if: ${{ github.event.schedule && failure() }}
+      uses: slackapi/slack-github-action@fcfb566f8b0aab22203f066d80ca1d7e4b5d05b3 # v1.27.1
+      with:
+        channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream
+        slack-message: |
+          Periodic large oltp tenant growth increase: ${{ job.status }}
+          <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
+          <${{ steps.create-allure-report.outputs.report-url }}|Allure report>
+      env:
+        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
--- a/test_runner/performance/large_synthetic_oltp/grow_action_blocks.sql
+++ b/test_runner/performance/large_synthetic_oltp/grow_action_blocks.sql
@@ -0,0 +1,22 @@
+-- add 100000 rows or approximately 11 MB to the action_blocks table
+-- takes about 1 second
+INSERT INTO workflows.action_blocks (
+    id,
+    uuid,
+    created_at,
+    status,
+    function_signature,
+    reference_id,
+    blocking,
+    run_synchronously
+)
+SELECT
+    id,
+    uuid_generate_v4(),
+    now() - (random() * interval '100 days'), -- Random date within the last 100 days
+    'CONDITIONS_NOT_MET',
+    'function_signature_' || id, -- Create a unique function signature using id
+    CASE WHEN random() > 0.5 THEN 'reference_' || id ELSE NULL END, -- 50% chance of being NULL
+    true,
+    CASE WHEN random() > 0.5 THEN true ELSE false END -- Random boolean value
+FROM generate_series(1, 100000) AS id;
--- a/test_runner/performance/large_synthetic_oltp/grow_action_kwargs.sql
+++ b/test_runner/performance/large_synthetic_oltp/grow_action_kwargs.sql
@@ -0,0 +1,11 @@
+-- add 100000 rows or approximately 10 MB to the action_kwargs table
+-- takes about 5 minutes
+INSERT INTO workflows.action_kwargs (created_at, key, uuid, value_id, state_value_id, action_block_id)
+SELECT 
+    now(),  -- Using the default value for `created_at`
+    'key_' || gs.id,  -- Generating a unique key based on the id
+    uuid_generate_v4(),  -- Generating a new UUID for each row
+    CASE WHEN gs.id % 2 = 0 THEN gs.id ELSE NULL END,  -- Setting value_id for even ids
+    CASE WHEN gs.id % 2 <> 0 THEN gs.id ELSE NULL END,  -- Setting state_value_id for odd ids
+    1  -- Setting action_block_id as 1 for simplicity
+FROM generate_series(1, 100000) AS gs(id);
--- a/test_runner/performance/large_synthetic_oltp/grow_device_fingerprint_event.sql
+++ b/test_runner/performance/large_synthetic_oltp/grow_device_fingerprint_event.sql
@@ -0,0 +1,56 @@
+-- add 100000 rows or approx. 30 MB to the device_fingerprint_event table
+-- takes about 4 minutes
+INSERT INTO authentication.device_fingerprint_event (
+    uuid,
+    created_at,
+    identity_uuid,
+    fingerprint_request_id,
+    fingerprint_id,
+    confidence_score,
+    ip_address,
+    url,
+    client_referrer,
+    last_seen_at,
+    raw_fingerprint_response,
+    session_uuid,
+    fingerprint_response,
+    browser_version,
+    browser_name,
+    device,
+    operating_system,
+    operating_system_version,
+    user_agent,
+    ip_address_location_city,
+    ip_address_location_region,
+    ip_address_location_country_code,
+    ip_address_location_latitude,
+    ip_address_location_longitude,
+    is_incognito
+)
+SELECT
+    gen_random_uuid(),  -- Generates a random UUID for primary key
+    now() - (random() * interval '10 days'),  -- Random timestamp within the last 10 days
+    gen_random_uuid(),  -- Random UUID for identity
+    md5(gs::text),  -- Simulates unique fingerprint request ID using `md5` hash of series number
+    md5((gs + 10000)::text),  -- Simulates unique fingerprint ID
+    round(CAST(random() AS numeric), 2),  -- Generates a random score between 0 and 1, cast `random()` to numeric
+    '192.168.' || (random() * 255)::int || '.' || (random() * 255)::int,  -- Random IP address
+    'https://example.com/' || (gs % 1000),  -- Random URL with series number suffix
+    CASE WHEN random() < 0.5 THEN NULL ELSE 'https://referrer.com/' || (gs % 100)::text END,  -- Random referrer, 50% chance of being NULL
+    now() - (random() * interval '5 days'),  -- Last seen timestamp within the last 5 days
+    NULL,  -- Keeping raw_fingerprint_response NULL for simplicity
+    CASE WHEN random() < 0.3 THEN gen_random_uuid() ELSE NULL END,  -- Session UUID, 30% chance of NULL
+    NULL,  -- Keeping fingerprint_response NULL for simplicity
+    CASE WHEN random() < 0.5 THEN '93.0' ELSE '92.0' END,  -- Random browser version
+    CASE WHEN random() < 0.5 THEN 'Firefox' ELSE 'Chrome' END,  -- Random browser name
+    CASE WHEN random() < 0.5 THEN 'Desktop' ELSE 'Mobile' END,  -- Random device type
+    'Windows',  -- Static value for operating system
+    '10.0',  -- Static value for operating system version
+    'Mozilla/5.0',  -- Static value for user agent
+    'City ' || (gs % 1000)::text,  -- Random city name
+    'Region ' || (gs % 100)::text,  -- Random region name
+    'US',  -- Static country code
+    random() * 180 - 90,  -- Random latitude between -90 and 90
+    random() * 360 - 180,  -- Random longitude between -180 and 180
+    random() < 0.1  -- 10% chance of being incognito
+FROM generate_series(1, 100000) AS gs;
--- a/test_runner/performance/large_synthetic_oltp/grow_edges.sql
+++ b/test_runner/performance/large_synthetic_oltp/grow_edges.sql
@@ -0,0 +1,10 @@
+-- add 100000 rows or approximately 11 MB to the edges table
+-- takes about 1 minute
+INSERT INTO workflows.edges (created_at, workflow_id, uuid, from_vertex_id, to_vertex_id)
+SELECT 
+    now() - (random() * interval '365 days'), -- Random `created_at` timestamp in the last year
+    (random() * 100)::int + 1,                -- Random `workflow_id` between 1 and 100
+    uuid_generate_v4(),                       -- Generate a new UUID for each row
+    (random() * 100000)::bigint + 1,           -- Random `from_vertex_id` between 1 and 100,000
+    (random() * 100000)::bigint + 1           -- Random `to_vertex_id` between 1 and 100,000
+FROM generate_series(1, 100000) AS gs;         -- Generate 100,000 sequential IDs
--- a/test_runner/performance/large_synthetic_oltp/grow_hotel_rate_mapping.sql
+++ b/test_runner/performance/large_synthetic_oltp/grow_hotel_rate_mapping.sql
@@ -0,0 +1,21 @@
+-- add 100000 rows or approximately 10 MB to the hotel_rate_mapping table
+-- takes about 1 second
+INSERT INTO booking_inventory.hotel_rate_mapping (
+    uuid,
+    created_at,
+    updated_at,
+    hotel_rate_id,
+    remote_id,
+    source
+)
+SELECT
+    uuid_generate_v4(), -- Unique UUID for each row
+    now(), -- Created at timestamp
+    now(), -- Updated at timestamp
+    'rate_' || gs AS hotel_rate_id, -- Unique hotel_rate_id
+    'remote_' || gs AS remote_id, -- Unique remote_id
+    CASE WHEN gs % 3 = 0 THEN 'source_1'
+         WHEN gs % 3 = 1 THEN 'source_2'
+         ELSE 'source_3'
+    END AS source -- Distributing sources among three options
+FROM generate_series(1, 100000) AS gs;
--- a/test_runner/performance/large_synthetic_oltp/grow_ocr_pipeline_results_version.sql
+++ b/test_runner/performance/large_synthetic_oltp/grow_ocr_pipeline_results_version.sql
@@ -0,0 +1,31 @@
+-- add 100000 rows or approximately 20 MB to the ocr_pipeline_results_version table
+-- takes about 1 second
+INSERT INTO ocr.ocr_pipeline_results_version (
+    id, transaction_id, operation_type, created_at, updated_at, s3_filename, completed_at, result,
+    end_transaction_id, pipeline_type, is_async, callback, callback_kwargs, input, error, file_type, s3_bucket_name, pipeline_kwargs
+)
+SELECT
+    gs.aid,  -- id
+    gs.aid,  -- transaction_id (same as id for simplicity)
+    (gs.aid % 5)::smallint + 1,  -- operation_type (cyclic values from 1 to 5)
+    now() - interval '1 day' * (random() * 30),  -- created_at (random timestamp within the last 30 days)
+    now() - interval '1 day' * (random() * 30),  -- updated_at (random timestamp within the last 30 days)
+    's3_file_' || gs.aid || '.txt',  -- s3_filename (synthetic filename)
+    now() - interval '1 day' * (random() * 30),  -- completed_at (random timestamp within the last 30 days)
+    '{}'::jsonb,  -- result (empty JSON object)
+    NULL,  -- end_transaction_id (NULL)
+    CASE (gs.aid % 3)  -- pipeline_type (cyclic text values)
+        WHEN 0 THEN 'OCR'
+        WHEN 1 THEN 'PDF'
+        ELSE 'Image'
+    END,
+    gs.aid % 2 = 0,  -- is_async (alternating between true and false)
+    'http://callback/' || gs.aid,  -- callback (synthetic URL)
+    '{}'::jsonb,  -- callback_kwargs (empty JSON object)
+    'Input text ' || gs.aid,  -- input (synthetic input text)
+    NULL,  -- error (NULL)
+    'pdf',  -- file_type (default to 'pdf')
+    'bucket_' || gs.aid % 10,  -- s3_bucket_name (synthetic bucket names)
+    '{}'::jsonb  -- pipeline_kwargs (empty JSON object)
+FROM
+    generate_series(1, 100000) AS gs(aid);
--- a/test_runner/performance/large_synthetic_oltp/grow_priceline_raw_response.sql
+++ b/test_runner/performance/large_synthetic_oltp/grow_priceline_raw_response.sql
@@ -0,0 +1,18 @@
+-- add 100000 rows or approx. 20 MB to the priceline_raw_response table
+-- takes about 20 seconds
+INSERT INTO booking_inventory.priceline_raw_response (
+    uuid, created_at, updated_at, url, base_url, path, method, params, request, response
+)
+SELECT 
+    gen_random_uuid(),  -- Generate random UUIDs
+    now() - (random() * interval '30 days'),  -- Random creation time within the past 30 days
+    now() - (random() * interval '30 days'),  -- Random update time within the past 30 days
+    'https://example.com/resource/' || gs,  -- Construct a unique URL for each row
+    'https://example.com',  -- Base URL for all rows
+    '/resource/' || gs,  -- Path for each row
+    CASE WHEN gs % 2 = 0 THEN 'GET' ELSE 'POST' END,  -- Alternate between GET and POST methods
+    'id=' || gs,  -- Simple parameter pattern for each row
+    '{}'::jsonb,  -- Empty JSON object for request
+    jsonb_build_object('status', 'success', 'data', gs)  -- Construct a valid JSON response
+FROM 
+    generate_series(1, 100000) AS gs;
--- a/test_runner/performance/large_synthetic_oltp/grow_relabled_transactions.sql
+++ b/test_runner/performance/large_synthetic_oltp/grow_relabled_transactions.sql
@@ -0,0 +1,26 @@
+-- add 100000 rows or approx. 1 MB to the relabeled_transactions table
+-- takes about 1 second
+INSERT INTO heron.relabeled_transactions (
+    id, 
+    created_at, 
+    universal_transaction_id, 
+    raw_result, 
+    category, 
+    category_confidence, 
+    merchant, 
+    batch_id
+)
+SELECT 
+    gs.aid AS id, 
+    now() - (gs.aid % 1000) * interval '1 second' AS created_at, 
+    'txn_' || gs.aid AS universal_transaction_id, 
+    '{}'::jsonb AS raw_result, 
+    CASE WHEN gs.aid % 5 = 0 THEN 'grocery' 
+         WHEN gs.aid % 5 = 1 THEN 'electronics' 
+         WHEN gs.aid % 5 = 2 THEN 'clothing' 
+         WHEN gs.aid % 5 = 3 THEN 'utilities' 
+         ELSE NULL END AS category, 
+    ROUND(RANDOM()::numeric, 2) AS category_confidence, 
+    CASE WHEN gs.aid % 2 = 0 THEN 'Merchant_' || gs.aid % 20 ELSE NULL END AS merchant, 
+    gs.aid % 100 + 1 AS batch_id
+FROM generate_series(1, 100000) AS gs(aid);
--- a/test_runner/performance/large_synthetic_oltp/grow_state_values.sql
+++ b/test_runner/performance/large_synthetic_oltp/grow_state_values.sql
@@ -0,0 +1,9 @@
+-- add 100000 rows or approx.10 MB to the state_values table
+-- takes about 14 seconds
+INSERT INTO workflows.state_values (key, workflow_id, state_type, value_id)
+SELECT 
+    'key_' || gs::text,               -- Key: Generate as 'key_1', 'key_2', etc.
+    (gs - 1) / 1000 + 1,              -- workflow_id: Distribute over a range (1000 workflows)
+    'STATIC',                         -- state_type: Use constant 'STATIC' as defined in schema
+    gs::bigint                        -- value_id: Use the same as the series value
+FROM generate_series(1, 100000) AS gs; -- Generate 100,000 rows
--- a/test_runner/performance/large_synthetic_oltp/grow_values.sql
+++ b/test_runner/performance/large_synthetic_oltp/grow_values.sql
@@ -0,0 +1,30 @@
+-- add 100000 rows or approx. 24 MB to the values table
+-- takes about 126 seconds
+INSERT INTO workflows.values (
+    id,
+    type,
+    int_value,
+    string_value,
+    child_type,
+    bool_value,
+    uuid,
+    numeric_value,
+    workflow_id,
+    jsonb_value,
+    parent_value_id
+)
+SELECT
+    gs AS id,
+    'TYPE_A' AS type,
+    CASE WHEN selector = 1 THEN gs ELSE NULL END AS int_value,
+    CASE WHEN selector = 2 THEN 'string_value_' || gs::text ELSE NULL END AS string_value,
+    'CHILD_TYPE_A' AS child_type,  -- Always non-null
+    CASE WHEN selector = 3 THEN (gs % 2 = 0) ELSE NULL END AS bool_value,
+    uuid_generate_v4() AS uuid,  -- Always non-null
+    CASE WHEN selector = 4 THEN gs * 1.0 ELSE NULL END AS numeric_value,
+    (array[1, 2, 3, 4, 5])[gs % 5 + 1] AS workflow_id,  -- Use only existing workflow IDs
+    CASE WHEN selector = 5 THEN ('{"key":' || gs::text || '}')::jsonb ELSE NULL END AS jsonb_value,
+    (gs % 100) + 1 AS parent_value_id  -- Always non-null
+FROM
+    generate_series(1, 100000) AS gs,
+    (SELECT floor(random() * 5 + 1)::int AS selector) AS s;
--- a/test_runner/performance/large_synthetic_oltp/grow_vertices.sql
+++ b/test_runner/performance/large_synthetic_oltp/grow_vertices.sql
@@ -0,0 +1,26 @@
+-- add 100000 rows or approx. 18 MB to the vertices table
+-- takes about 90 seconds
+INSERT INTO workflows.vertices(
+  uuid,
+  created_at,
+  condition_block_id,
+  operator,
+  has_been_visited,
+  reference_id,
+  workflow_id,
+  meta_data,
+  -- id,
+  action_block_id
+)
+SELECT
+  uuid_generate_v4() AS uuid,
+  now() AS created_at,
+  CASE WHEN (gs % 2 = 0) THEN gs % 10 ELSE NULL END AS condition_block_id, -- Every alternate row has a condition_block_id
+  'operator_' || (gs % 10) AS operator, -- Cyclical operator values (e.g., operator_0, operator_1)
+  false AS has_been_visited,
+  'ref_' || gs AS reference_id, -- Unique reference_id for each row
+  (gs % 1000) + 1 AS workflow_id, -- Random workflow_id values between 1 and 1000
+  '{}'::jsonb AS meta_data, -- Empty JSON metadata
+  -- gs AS id, -- default from sequence to get unique ID
+  CASE WHEN (gs % 2 = 1) THEN gs ELSE NULL END AS action_block_id -- Complementary to condition_block_id
+FROM generate_series(1, 100000) AS gs;
--- a/test_runner/performance/large_synthetic_oltp/update_accounting_coding_body_tracking_category_selection.sql
+++ b/test_runner/performance/large_synthetic_oltp/update_accounting_coding_body_tracking_category_selection.sql
@@ -0,0 +1,9 @@
+-- update approximately 2000 rows or 200 kb in the accounting_coding_body_tracking_category_selection table
+-- takes about 1 second
+UPDATE  accounting.accounting_coding_body_tracking_category_selection
+SET created_at = now()
+WHERE ctid in (
+    SELECT ctid
+    FROM  accounting.accounting_coding_body_tracking_category_selection
+    TABLESAMPLE SYSTEM (0.0005) 
+);
--- a/test_runner/performance/large_synthetic_oltp/update_action_blocks.sql
+++ b/test_runner/performance/large_synthetic_oltp/update_action_blocks.sql
@@ -0,0 +1,9 @@
+-- update approximately 9000 rows or 1 MB in the action_blocks table
+-- takes about 1 second
+UPDATE  workflows.action_blocks 
+SET run_synchronously = NOT run_synchronously
+WHERE ctid in (
+    SELECT ctid
+    FROM  workflows.action_blocks 
+    TABLESAMPLE SYSTEM (0.001) 
+);
--- a/test_runner/performance/large_synthetic_oltp/update_action_kwargs.sql
+++ b/test_runner/performance/large_synthetic_oltp/update_action_kwargs.sql
@@ -0,0 +1,9 @@
+-- update approximately 5000 rows or 1 MB in the action_kwargs table
+-- takes about 1 second
+UPDATE workflows.action_kwargs
+SET created_at = now()
+WHERE ctid in (
+    SELECT ctid
+    FROM workflows.action_kwargs
+    TABLESAMPLE SYSTEM (0.0002) 
+);
--- a/test_runner/performance/large_synthetic_oltp/update_denormalized_approval_workflow.sql
+++ b/test_runner/performance/large_synthetic_oltp/update_denormalized_approval_workflow.sql
@@ -0,0 +1,10 @@
+-- update approximately 3000 rows or 500 KB in the denormalized_approval_workflow table
+-- takes about 1 second
+UPDATE  approvals_v2.denormalized_approval_workflow 
+SET created_at = now()
+WHERE ctid in (
+    SELECT ctid
+    FROM  approvals_v2.denormalized_approval_workflow 
+    TABLESAMPLE SYSTEM (0.0005) 
+);
+
--- a/test_runner/performance/large_synthetic_oltp/update_device_fingerprint_event.sql
+++ b/test_runner/performance/large_synthetic_oltp/update_device_fingerprint_event.sql
@@ -0,0 +1,9 @@
+-- update approximately 2000 rows or 1 MB in the device_fingerprint_event table
+-- takes about 5 seconds
+UPDATE authentication.device_fingerprint_event
+SET is_incognito = NOT is_incognito
+WHERE ctid in (
+    SELECT ctid
+    FROM authentication.device_fingerprint_event
+    TABLESAMPLE SYSTEM (0.001) 
+);
--- a/test_runner/performance/large_synthetic_oltp/update_edges.sql
+++ b/test_runner/performance/large_synthetic_oltp/update_edges.sql
@@ -0,0 +1,9 @@
+-- update approximately 4000 rows or 600 kb in the edges table
+-- takes about 1 second
+UPDATE workflows.edges
+SET created_at = now()
+WHERE ctid in (
+    SELECT ctid
+    FROM workflows.edges
+    TABLESAMPLE SYSTEM (0.0005) 
+);
--- a/test_runner/performance/large_synthetic_oltp/update_heron_transaction_enriched_log.sql
+++ b/test_runner/performance/large_synthetic_oltp/update_heron_transaction_enriched_log.sql
@@ -0,0 +1,9 @@
+-- update approximately 10000 rows or 200 KB in the heron_transaction_enriched_log table
+-- takes about 1 minutes
+UPDATE heron.heron_transaction_enriched_log
+SET created_at = now()
+WHERE ctid in (
+    SELECT ctid
+    FROM heron.heron_transaction_enriched_log
+    TABLESAMPLE SYSTEM (0.0005) 
+);
--- a/test_runner/performance/large_synthetic_oltp/update_heron_transaction_enrichment_requests.sql
+++ b/test_runner/performance/large_synthetic_oltp/update_heron_transaction_enrichment_requests.sql
@@ -0,0 +1,9 @@
+-- update approximately 4000 rows or 1 MB in the heron_transaction_enrichment_requests table
+-- takes about 2 minutes
+UPDATE  heron.heron_transaction_enrichment_requests  
+SET created_at = now()
+WHERE ctid in (
+    SELECT ctid
+    FROM  heron.heron_transaction_enrichment_requests  
+    TABLESAMPLE SYSTEM (0.0002) 
+);
--- a/test_runner/performance/large_synthetic_oltp/update_hotel_rate_mapping.sql
+++ b/test_runner/performance/large_synthetic_oltp/update_hotel_rate_mapping.sql
@@ -0,0 +1,9 @@
+-- update approximately 6000 rows or 600 kb in the hotel_rate_mapping table
+-- takes about 1 second
+UPDATE  booking_inventory.hotel_rate_mapping
+SET created_at = now()
+WHERE ctid in (
+    SELECT ctid
+    FROM  booking_inventory.hotel_rate_mapping
+    TABLESAMPLE SYSTEM (0.0005) 
+);
--- a/test_runner/performance/large_synthetic_oltp/update_incoming_webhooks.sql
+++ b/test_runner/performance/large_synthetic_oltp/update_incoming_webhooks.sql
@@ -0,0 +1,9 @@
+-- update approximately 2000 rows or 1 MB in the incoming_webhooks table
+-- takes about 5 seconds
+UPDATE webhook.incoming_webhooks
+SET is_body_encrypted = NOT is_body_encrypted
+WHERE ctid in (
+    SELECT ctid
+    FROM webhook.incoming_webhooks
+    TABLESAMPLE SYSTEM (0.0002) 
+);
--- a/test_runner/performance/large_synthetic_oltp/update_manual_transaction.sql
+++ b/test_runner/performance/large_synthetic_oltp/update_manual_transaction.sql
@@ -0,0 +1,9 @@
+-- update approximately 1000 rows or 200 kb in the manual_transaction table
+-- takes about 2 seconds
+UPDATE banking.manual_transaction
+SET created_at = now()
+WHERE ctid in (
+    SELECT ctid
+    FROM  banking.manual_transaction
+    TABLESAMPLE SYSTEM (0.0005) 
+);
--- a/test_runner/performance/large_synthetic_oltp/update_ml_receipt_matching_log.sql
+++ b/test_runner/performance/large_synthetic_oltp/update_ml_receipt_matching_log.sql
@@ -0,0 +1,9 @@
+-- update approximately 1000 rows or 100 kb in the ml_receipt_matching_log table
+-- takes about 1 second
+UPDATE   receipt.ml_receipt_matching_log 
+SET is_shadow_mode = NOT is_shadow_mode
+WHERE ctid in (
+    SELECT ctid
+    FROM   receipt.ml_receipt_matching_log 
+    TABLESAMPLE SYSTEM (0.0005) 
+);
--- a/test_runner/performance/large_synthetic_oltp/update_ocr_pipeine_results_version.sql
+++ b/test_runner/performance/large_synthetic_oltp/update_ocr_pipeine_results_version.sql
@@ -0,0 +1,9 @@
+-- update approximately 2000 rows or 400 kb in the ocr_pipeline_results_version table
+-- takes about 1 second
+UPDATE   ocr.ocr_pipeline_results_version 
+SET is_async = NOT is_async
+WHERE ctid in (
+    SELECT ctid
+    FROM   ocr.ocr_pipeline_results_version 
+    TABLESAMPLE SYSTEM (0.0005) 
+);
--- a/test_runner/performance/large_synthetic_oltp/update_orc_pipeline_step_results.sql
+++ b/test_runner/performance/large_synthetic_oltp/update_orc_pipeline_step_results.sql
@@ -0,0 +1,9 @@
+-- update approximately 3000 rows or 1 MB in the ocr_pipeline_step_results table
+-- takes about 11 seconds
+UPDATE     ocr.ocr_pipeline_step_results 
+SET created_at = now()
+WHERE ctid in (
+    SELECT ctid
+    FROM    ocr.ocr_pipeline_step_results 
+    TABLESAMPLE SYSTEM (0.0005) 
+);
--- a/test_runner/performance/large_synthetic_oltp/update_orc_pipeline_step_results_version.sql
+++ b/test_runner/performance/large_synthetic_oltp/update_orc_pipeline_step_results_version.sql
@@ -0,0 +1,9 @@
+-- update approximately 5000 rows or 1 MB in the ocr_pipeline_step_results_version table
+-- takes about 40 seconds
+UPDATE    ocr.ocr_pipeline_step_results_version  
+SET created_at = now()
+WHERE ctid in (
+    SELECT ctid
+    FROM    ocr.ocr_pipeline_step_results_version  
+    TABLESAMPLE SYSTEM (0.0005) 
+);
--- a/test_runner/performance/large_synthetic_oltp/update_priceline_raw_response.sql
+++ b/test_runner/performance/large_synthetic_oltp/update_priceline_raw_response.sql
@@ -0,0 +1,9 @@
+-- update approximately 5000 rows or 1 MB in the priceline_raw_response table
+-- takes about 1 second
+UPDATE booking_inventory.priceline_raw_response
+SET created_at = now()
+WHERE ctid in (
+    SELECT ctid
+    FROM booking_inventory.priceline_raw_response
+    TABLESAMPLE SYSTEM (0.0005) 
+);
--- a/test_runner/performance/large_synthetic_oltp/update_quickbooks_transactions.sql
+++ b/test_runner/performance/large_synthetic_oltp/update_quickbooks_transactions.sql
@@ -0,0 +1,9 @@
+-- update approximately 5000 rows or 1 MB in the quickbooks_transactions table
+-- takes about 30 seconds
+UPDATE   accounting.quickbooks_transactions 
+SET created_at = now()
+WHERE ctid in (
+    SELECT ctid
+    FROM   accounting.quickbooks_transactions 
+    TABLESAMPLE SYSTEM (0.0005) 
+);
--- a/test_runner/performance/large_synthetic_oltp/update_raw_finicity_transaction.sql
+++ b/test_runner/performance/large_synthetic_oltp/update_raw_finicity_transaction.sql
@@ -0,0 +1,15 @@
+-- update approximately 6000 rows or 600 kb in the raw_finicity_transaction table
+-- takes about 1 second
+UPDATE banking.raw_finicity_transaction
+SET raw_data = 
+    jsonb_set(
+        raw_data,
+        '{updated}',
+        to_jsonb(now()),
+        true
+    )
+WHERE ctid IN (
+    SELECT ctid
+    FROM banking.raw_finicity_transaction
+    TABLESAMPLE SYSTEM (0.0005)
+);
--- a/test_runner/performance/large_synthetic_oltp/update_relabeled_transactions.sql
+++ b/test_runner/performance/large_synthetic_oltp/update_relabeled_transactions.sql
@@ -0,0 +1,9 @@
+-- update approximately 8000 rows or 1 MB in the relabeled_transactions table
+-- takes about 1 second
+UPDATE heron.relabeled_transactions
+SET created_at = now()
+WHERE ctid in (
+    SELECT ctid
+    FROM heron.relabeled_transactions
+    TABLESAMPLE SYSTEM (0.0005) 
+);
--- a/test_runner/performance/large_synthetic_oltp/update_state_values.sql
+++ b/test_runner/performance/large_synthetic_oltp/update_state_values.sql
@@ -0,0 +1,9 @@
+-- update approximately 8000 rows or 1 MB in the state_values table
+-- takes about 2 minutes
+UPDATE workflows.state_values
+SET state_type = now()::text
+WHERE ctid in (
+    SELECT ctid
+    FROM workflows.state_values
+    TABLESAMPLE SYSTEM (0.0002) 
+);
--- a/test_runner/performance/large_synthetic_oltp/update_stripe_authorization_event_log.sql
+++ b/test_runner/performance/large_synthetic_oltp/update_stripe_authorization_event_log.sql
@@ -0,0 +1,9 @@
+-- update approximately 4000 rows or 1 MB in the stripe_authorization_event_log table
+-- takes about 5 minutes
+UPDATE stripe.stripe_authorization_event_log
+SET approved = NOT approved
+WHERE ctid in (
+    SELECT ctid
+    FROM stripe.stripe_authorization_event_log
+    TABLESAMPLE SYSTEM (0.0002) 
+);
--- a/test_runner/performance/large_synthetic_oltp/update_transaction.sql
+++ b/test_runner/performance/large_synthetic_oltp/update_transaction.sql
@@ -0,0 +1,9 @@
+-- update approximately 2000 rows or 301 MB in the transaction table
+-- takes about 90 seconds
+UPDATE transaction.transaction
+SET is_last = NOT is_last
+WHERE ctid in (
+    SELECT ctid
+    FROM transaction.transaction
+    TABLESAMPLE SYSTEM (0.0002) 
+);
--- a/test_runner/performance/large_synthetic_oltp/update_values.sql
+++ b/test_runner/performance/large_synthetic_oltp/update_values.sql
@@ -0,0 +1,9 @@
+-- update approximately 2500 rows or 1 MB in the values table
+-- takes about 3 minutes
+UPDATE workflows.values
+SET bool_value = NOT bool_value
+WHERE ctid in (
+    SELECT ctid
+    FROM workflows.values
+    TABLESAMPLE SYSTEM (0.0002) 
+) AND bool_value IS NOT NULL;
--- a/test_runner/performance/large_synthetic_oltp/update_vertices.sql
+++ b/test_runner/performance/large_synthetic_oltp/update_vertices.sql
@@ -0,0 +1,9 @@
+-- update approximately 10000 rows or 2 MB in the vertices table
+-- takes about 1 minute
+UPDATE workflows.vertices
+SET has_been_visited = NOT has_been_visited
+WHERE ctid in (
+    SELECT ctid
+    FROM workflows.vertices
+    TABLESAMPLE SYSTEM (0.0002) 
+);
--- a/test_runner/performance/test_perf_oltp_large_tenant.py
+++ b/test_runner/performance/test_perf_oltp_large_tenant.py
@@ -31,7 +31,9 @@ def get_custom_scripts(
    return rv


-def run_test_pgbench(env: PgCompare, custom_scripts: str, duration: int):
+def run_test_pgbench(
+    env: PgCompare, custom_scripts: str, duration: int, clients: int = 500, jobs: int = 100
+):
    password = env.pg.default_options.get("password", None)
    options = env.pg.default_options.get("options", "")
    # drop password from the connection string by passing password=None and set password separately
@@ -46,8 +48,8 @@ def run_test_pgbench(env: PgCompare, custom_scripts: str, duration: int):
        "-n",  # no explicit vacuum before the test - we want to rely on auto-vacuum
        "-M",
        "prepared",
-        "--client=500",
-        "--jobs=100",
+        f"--client={clients}",
+        f"--jobs={jobs}",
        f"-T{duration}",
        "-P60",  # progress every minute
        "--progress-timestamp",
@@ -164,6 +166,12 @@ def test_perf_oltp_large_tenant_pgbench(
    run_test_pgbench(remote_compare, custom_scripts, duration)


+@pytest.mark.parametrize("duration", get_durations_matrix())
+@pytest.mark.remote_cluster
+def test_perf_oltp_large_tenant_growth(remote_compare: PgCompare, duration: int):
+    run_test_pgbench(remote_compare, " ".join(get_custom_scripts()), duration, 35, 35)
+
+
@pytest.mark.remote_cluster
 def test_perf_oltp_large_tenant_maintenance(remote_compare: PgCompare):
    # run analyze, vacuum, re-index after the test and measure and report its duration