diff --git a/.github/workflows/large_oltp_benchmark.yml b/.github/workflows/large_oltp_benchmark.yml index 42dcc8e918..050b9047c7 100644 --- a/.github/workflows/large_oltp_benchmark.yml +++ b/.github/workflows/large_oltp_benchmark.yml @@ -33,11 +33,19 @@ jobs: fail-fast: false # allow other variants to continue even if one fails matrix: include: + # test only read-only custom scripts in new branch without database maintenance + - target: new_branch + custom_scripts: select_any_webhook_with_skew.sql@300 select_recent_webhook.sql@397 select_prefetch_webhook.sql@3 + test_maintenance: false + # test all custom scripts in new branch with database maintenance - target: new_branch custom_scripts: insert_webhooks.sql@200 select_any_webhook_with_skew.sql@300 select_recent_webhook.sql@397 select_prefetch_webhook.sql@3 IUD_one_transaction.sql@100 + test_maintenance: true + # test all custom scripts in reuse branch with database maintenance - target: reuse_branch custom_scripts: insert_webhooks.sql@200 select_any_webhook_with_skew.sql@300 select_recent_webhook.sql@397 select_prefetch_webhook.sql@3 IUD_one_transaction.sql@100 - max-parallel: 1 # we want to run each stripe size sequentially to be able to compare the results + test_maintenance: true + max-parallel: 1 # we want to run each benchmark sequentially to not have noisy neighbors on shared storage (PS, SK) permissions: contents: write statuses: write @@ -145,6 +153,7 @@ jobs: PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" - name: Benchmark database maintenance + if: ${{ matrix.test_maintenance == 'true' }} uses: ./.github/actions/run-python-test-set with: build_type: ${{ env.BUILD_TYPE }} diff --git a/.github/workflows/large_oltp_growth.yml b/.github/workflows/large_oltp_growth.yml new file mode 100644 index 0000000000..8ca640d6ef --- /dev/null +++ b/.github/workflows/large_oltp_growth.yml @@ -0,0 +1,175 @@ +name: large oltp growth +# workflow to grow the reuse branch of large oltp benchmark continuously (about 16 GB per run) + +on: + # uncomment to run on push for debugging your PR + # push: + # branches: [ bodobolero/increase_large_oltp_workload ] + + schedule: + # * is a special character in YAML so you have to quote this string + # ┌───────────── minute (0 - 59) + # │ ┌───────────── hour (0 - 23) + # │ │ ┌───────────── day of the month (1 - 31) + # │ │ │ ┌───────────── month (1 - 12 or JAN-DEC) + # │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT) + - cron: '0 6 * * *' # 06:00 UTC + - cron: '0 8 * * *' # 08:00 UTC + - cron: '0 10 * * *' # 10:00 UTC + - cron: '0 12 * * *' # 12:00 UTC + - cron: '0 14 * * *' # 14:00 UTC + - cron: '0 16 * * *' # 16:00 UTC + workflow_dispatch: # adds ability to run this manually + +defaults: + run: + shell: bash -euxo pipefail {0} + +concurrency: + # Allow only one workflow globally because we need dedicated resources which only exist once + group: large-oltp-growth + cancel-in-progress: true + +permissions: + contents: read + +jobs: + oltp: + strategy: + fail-fast: false # allow other variants to continue even if one fails + matrix: + include: + # for now only grow the reuse branch, not the other branches. + - target: reuse_branch + custom_scripts: + - grow_action_blocks.sql + - grow_action_kwargs.sql + - grow_device_fingerprint_event.sql + - grow_edges.sql + - grow_hotel_rate_mapping.sql + - grow_ocr_pipeline_results_version.sql + - grow_priceline_raw_response.sql + - grow_relabled_transactions.sql + - grow_state_values.sql + - grow_values.sql + - grow_vertices.sql + - update_accounting_coding_body_tracking_category_selection.sql + - update_action_blocks.sql + - update_action_kwargs.sql + - update_denormalized_approval_workflow.sql + - update_device_fingerprint_event.sql + - update_edges.sql + - update_heron_transaction_enriched_log.sql + - update_heron_transaction_enrichment_requests.sql + - update_hotel_rate_mapping.sql + - update_incoming_webhooks.sql + - update_manual_transaction.sql + - update_ml_receipt_matching_log.sql + - update_ocr_pipeine_results_version.sql + - update_orc_pipeline_step_results.sql + - update_orc_pipeline_step_results_version.sql + - update_priceline_raw_response.sql + - update_quickbooks_transactions.sql + - update_raw_finicity_transaction.sql + - update_relabeled_transactions.sql + - update_state_values.sql + - update_stripe_authorization_event_log.sql + - update_transaction.sql + - update_values.sql + - update_vertices.sql + max-parallel: 1 # we want to run each growth workload sequentially (for now there is just one) + permissions: + contents: write + statuses: write + id-token: write # aws-actions/configure-aws-credentials + env: + TEST_PG_BENCH_DURATIONS_MATRIX: "1h" + TEST_PGBENCH_CUSTOM_SCRIPTS: ${{ join(matrix.custom_scripts, ' ') }} + POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install + PG_VERSION: 16 # pre-determined by pre-determined project + TEST_OUTPUT: /tmp/test_output + BUILD_TYPE: remote + PLATFORM: ${{ matrix.target }} + + runs-on: [ self-hosted, us-east-2, x64 ] + container: + image: ghcr.io/neondatabase/build-tools:pinned-bookworm + credentials: + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + options: --init + + steps: + - name: Harden the runner (Audit all outbound calls) + uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0 + with: + egress-policy: audit + + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + + - name: Configure AWS credentials # necessary to download artefacts + uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2 + with: + aws-region: eu-central-1 + role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + role-duration-seconds: 18000 # 5 hours is currently max associated with IAM role + + - name: Download Neon artifact + uses: ./.github/actions/download + with: + name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact + path: /tmp/neon/ + prefix: latest + aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + + - name: Set up Connection String + id: set-up-connstr + run: | + case "${{ matrix.target }}" in + reuse_branch) + CONNSTR=${{ secrets.BENCHMARK_LARGE_OLTP_REUSE_CONNSTR }} + ;; + *) + echo >&2 "Unknown target=${{ matrix.target }}" + exit 1 + ;; + esac + + CONNSTR_WITHOUT_POOLER="${CONNSTR//-pooler/}" + + echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT + echo "connstr_without_pooler=${CONNSTR_WITHOUT_POOLER}" >> $GITHUB_OUTPUT + + - name: pgbench with custom-scripts + uses: ./.github/actions/run-python-test-set + with: + build_type: ${{ env.BUILD_TYPE }} + test_selection: performance + run_in_parallel: false + save_perf_report: true + extra_params: -m remote_cluster --timeout 7200 -k test_perf_oltp_large_tenant_growth + pg_version: ${{ env.PG_VERSION }} + aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + env: + BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }} + VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" + PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" + + - name: Create Allure report + id: create-allure-report + if: ${{ !cancelled() }} + uses: ./.github/actions/allure-report-generate + with: + aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + + - name: Post to a Slack channel + if: ${{ github.event.schedule && failure() }} + uses: slackapi/slack-github-action@fcfb566f8b0aab22203f066d80ca1d7e4b5d05b3 # v1.27.1 + with: + channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream + slack-message: | + Periodic large oltp tenant growth increase: ${{ job.status }} + <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run> + <${{ steps.create-allure-report.outputs.report-url }}|Allure report> + env: + SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} diff --git a/test_runner/performance/large_synthetic_oltp/grow_action_blocks.sql b/test_runner/performance/large_synthetic_oltp/grow_action_blocks.sql new file mode 100644 index 0000000000..0860b76331 --- /dev/null +++ b/test_runner/performance/large_synthetic_oltp/grow_action_blocks.sql @@ -0,0 +1,22 @@ +-- add 100000 rows or approximately 11 MB to the action_blocks table +-- takes about 1 second +INSERT INTO workflows.action_blocks ( + id, + uuid, + created_at, + status, + function_signature, + reference_id, + blocking, + run_synchronously +) +SELECT + id, + uuid_generate_v4(), + now() - (random() * interval '100 days'), -- Random date within the last 100 days + 'CONDITIONS_NOT_MET', + 'function_signature_' || id, -- Create a unique function signature using id + CASE WHEN random() > 0.5 THEN 'reference_' || id ELSE NULL END, -- 50% chance of being NULL + true, + CASE WHEN random() > 0.5 THEN true ELSE false END -- Random boolean value +FROM generate_series(1, 100000) AS id; \ No newline at end of file diff --git a/test_runner/performance/large_synthetic_oltp/grow_action_kwargs.sql b/test_runner/performance/large_synthetic_oltp/grow_action_kwargs.sql new file mode 100644 index 0000000000..8a2b7c398a --- /dev/null +++ b/test_runner/performance/large_synthetic_oltp/grow_action_kwargs.sql @@ -0,0 +1,11 @@ +-- add 100000 rows or approximately 10 MB to the action_kwargs table +-- takes about 5 minutes +INSERT INTO workflows.action_kwargs (created_at, key, uuid, value_id, state_value_id, action_block_id) +SELECT + now(), -- Using the default value for `created_at` + 'key_' || gs.id, -- Generating a unique key based on the id + uuid_generate_v4(), -- Generating a new UUID for each row + CASE WHEN gs.id % 2 = 0 THEN gs.id ELSE NULL END, -- Setting value_id for even ids + CASE WHEN gs.id % 2 <> 0 THEN gs.id ELSE NULL END, -- Setting state_value_id for odd ids + 1 -- Setting action_block_id as 1 for simplicity +FROM generate_series(1, 100000) AS gs(id); \ No newline at end of file diff --git a/test_runner/performance/large_synthetic_oltp/grow_device_fingerprint_event.sql b/test_runner/performance/large_synthetic_oltp/grow_device_fingerprint_event.sql new file mode 100644 index 0000000000..1ef38451b7 --- /dev/null +++ b/test_runner/performance/large_synthetic_oltp/grow_device_fingerprint_event.sql @@ -0,0 +1,56 @@ +-- add 100000 rows or approx. 30 MB to the device_fingerprint_event table +-- takes about 4 minutes +INSERT INTO authentication.device_fingerprint_event ( + uuid, + created_at, + identity_uuid, + fingerprint_request_id, + fingerprint_id, + confidence_score, + ip_address, + url, + client_referrer, + last_seen_at, + raw_fingerprint_response, + session_uuid, + fingerprint_response, + browser_version, + browser_name, + device, + operating_system, + operating_system_version, + user_agent, + ip_address_location_city, + ip_address_location_region, + ip_address_location_country_code, + ip_address_location_latitude, + ip_address_location_longitude, + is_incognito +) +SELECT + gen_random_uuid(), -- Generates a random UUID for primary key + now() - (random() * interval '10 days'), -- Random timestamp within the last 10 days + gen_random_uuid(), -- Random UUID for identity + md5(gs::text), -- Simulates unique fingerprint request ID using `md5` hash of series number + md5((gs + 10000)::text), -- Simulates unique fingerprint ID + round(CAST(random() AS numeric), 2), -- Generates a random score between 0 and 1, cast `random()` to numeric + '192.168.' || (random() * 255)::int || '.' || (random() * 255)::int, -- Random IP address + 'https://example.com/' || (gs % 1000), -- Random URL with series number suffix + CASE WHEN random() < 0.5 THEN NULL ELSE 'https://referrer.com/' || (gs % 100)::text END, -- Random referrer, 50% chance of being NULL + now() - (random() * interval '5 days'), -- Last seen timestamp within the last 5 days + NULL, -- Keeping raw_fingerprint_response NULL for simplicity + CASE WHEN random() < 0.3 THEN gen_random_uuid() ELSE NULL END, -- Session UUID, 30% chance of NULL + NULL, -- Keeping fingerprint_response NULL for simplicity + CASE WHEN random() < 0.5 THEN '93.0' ELSE '92.0' END, -- Random browser version + CASE WHEN random() < 0.5 THEN 'Firefox' ELSE 'Chrome' END, -- Random browser name + CASE WHEN random() < 0.5 THEN 'Desktop' ELSE 'Mobile' END, -- Random device type + 'Windows', -- Static value for operating system + '10.0', -- Static value for operating system version + 'Mozilla/5.0', -- Static value for user agent + 'City ' || (gs % 1000)::text, -- Random city name + 'Region ' || (gs % 100)::text, -- Random region name + 'US', -- Static country code + random() * 180 - 90, -- Random latitude between -90 and 90 + random() * 360 - 180, -- Random longitude between -180 and 180 + random() < 0.1 -- 10% chance of being incognito +FROM generate_series(1, 100000) AS gs; \ No newline at end of file diff --git a/test_runner/performance/large_synthetic_oltp/grow_edges.sql b/test_runner/performance/large_synthetic_oltp/grow_edges.sql new file mode 100644 index 0000000000..17f289fe5b --- /dev/null +++ b/test_runner/performance/large_synthetic_oltp/grow_edges.sql @@ -0,0 +1,10 @@ +-- add 100000 rows or approximately 11 MB to the edges table +-- takes about 1 minute +INSERT INTO workflows.edges (created_at, workflow_id, uuid, from_vertex_id, to_vertex_id) +SELECT + now() - (random() * interval '365 days'), -- Random `created_at` timestamp in the last year + (random() * 100)::int + 1, -- Random `workflow_id` between 1 and 100 + uuid_generate_v4(), -- Generate a new UUID for each row + (random() * 100000)::bigint + 1, -- Random `from_vertex_id` between 1 and 100,000 + (random() * 100000)::bigint + 1 -- Random `to_vertex_id` between 1 and 100,000 +FROM generate_series(1, 100000) AS gs; -- Generate 100,000 sequential IDs \ No newline at end of file diff --git a/test_runner/performance/large_synthetic_oltp/grow_hotel_rate_mapping.sql b/test_runner/performance/large_synthetic_oltp/grow_hotel_rate_mapping.sql new file mode 100644 index 0000000000..1e79f94eab --- /dev/null +++ b/test_runner/performance/large_synthetic_oltp/grow_hotel_rate_mapping.sql @@ -0,0 +1,21 @@ +-- add 100000 rows or approximately 10 MB to the hotel_rate_mapping table +-- takes about 1 second +INSERT INTO booking_inventory.hotel_rate_mapping ( + uuid, + created_at, + updated_at, + hotel_rate_id, + remote_id, + source +) +SELECT + uuid_generate_v4(), -- Unique UUID for each row + now(), -- Created at timestamp + now(), -- Updated at timestamp + 'rate_' || gs AS hotel_rate_id, -- Unique hotel_rate_id + 'remote_' || gs AS remote_id, -- Unique remote_id + CASE WHEN gs % 3 = 0 THEN 'source_1' + WHEN gs % 3 = 1 THEN 'source_2' + ELSE 'source_3' + END AS source -- Distributing sources among three options +FROM generate_series(1, 100000) AS gs; \ No newline at end of file diff --git a/test_runner/performance/large_synthetic_oltp/grow_ocr_pipeline_results_version.sql b/test_runner/performance/large_synthetic_oltp/grow_ocr_pipeline_results_version.sql new file mode 100644 index 0000000000..21ebac74d2 --- /dev/null +++ b/test_runner/performance/large_synthetic_oltp/grow_ocr_pipeline_results_version.sql @@ -0,0 +1,31 @@ +-- add 100000 rows or approximately 20 MB to the ocr_pipeline_results_version table +-- takes about 1 second +INSERT INTO ocr.ocr_pipeline_results_version ( + id, transaction_id, operation_type, created_at, updated_at, s3_filename, completed_at, result, + end_transaction_id, pipeline_type, is_async, callback, callback_kwargs, input, error, file_type, s3_bucket_name, pipeline_kwargs +) +SELECT + gs.aid, -- id + gs.aid, -- transaction_id (same as id for simplicity) + (gs.aid % 5)::smallint + 1, -- operation_type (cyclic values from 1 to 5) + now() - interval '1 day' * (random() * 30), -- created_at (random timestamp within the last 30 days) + now() - interval '1 day' * (random() * 30), -- updated_at (random timestamp within the last 30 days) + 's3_file_' || gs.aid || '.txt', -- s3_filename (synthetic filename) + now() - interval '1 day' * (random() * 30), -- completed_at (random timestamp within the last 30 days) + '{}'::jsonb, -- result (empty JSON object) + NULL, -- end_transaction_id (NULL) + CASE (gs.aid % 3) -- pipeline_type (cyclic text values) + WHEN 0 THEN 'OCR' + WHEN 1 THEN 'PDF' + ELSE 'Image' + END, + gs.aid % 2 = 0, -- is_async (alternating between true and false) + 'http://callback/' || gs.aid, -- callback (synthetic URL) + '{}'::jsonb, -- callback_kwargs (empty JSON object) + 'Input text ' || gs.aid, -- input (synthetic input text) + NULL, -- error (NULL) + 'pdf', -- file_type (default to 'pdf') + 'bucket_' || gs.aid % 10, -- s3_bucket_name (synthetic bucket names) + '{}'::jsonb -- pipeline_kwargs (empty JSON object) +FROM + generate_series(1, 100000) AS gs(aid); \ No newline at end of file diff --git a/test_runner/performance/large_synthetic_oltp/grow_priceline_raw_response.sql b/test_runner/performance/large_synthetic_oltp/grow_priceline_raw_response.sql new file mode 100644 index 0000000000..28c4f1a7fb --- /dev/null +++ b/test_runner/performance/large_synthetic_oltp/grow_priceline_raw_response.sql @@ -0,0 +1,18 @@ +-- add 100000 rows or approx. 20 MB to the priceline_raw_response table +-- takes about 20 seconds +INSERT INTO booking_inventory.priceline_raw_response ( + uuid, created_at, updated_at, url, base_url, path, method, params, request, response +) +SELECT + gen_random_uuid(), -- Generate random UUIDs + now() - (random() * interval '30 days'), -- Random creation time within the past 30 days + now() - (random() * interval '30 days'), -- Random update time within the past 30 days + 'https://example.com/resource/' || gs, -- Construct a unique URL for each row + 'https://example.com', -- Base URL for all rows + '/resource/' || gs, -- Path for each row + CASE WHEN gs % 2 = 0 THEN 'GET' ELSE 'POST' END, -- Alternate between GET and POST methods + 'id=' || gs, -- Simple parameter pattern for each row + '{}'::jsonb, -- Empty JSON object for request + jsonb_build_object('status', 'success', 'data', gs) -- Construct a valid JSON response +FROM + generate_series(1, 100000) AS gs; \ No newline at end of file diff --git a/test_runner/performance/large_synthetic_oltp/grow_relabled_transactions.sql b/test_runner/performance/large_synthetic_oltp/grow_relabled_transactions.sql new file mode 100644 index 0000000000..0b1aa2d2bd --- /dev/null +++ b/test_runner/performance/large_synthetic_oltp/grow_relabled_transactions.sql @@ -0,0 +1,26 @@ +-- add 100000 rows or approx. 1 MB to the relabeled_transactions table +-- takes about 1 second +INSERT INTO heron.relabeled_transactions ( + id, + created_at, + universal_transaction_id, + raw_result, + category, + category_confidence, + merchant, + batch_id +) +SELECT + gs.aid AS id, + now() - (gs.aid % 1000) * interval '1 second' AS created_at, + 'txn_' || gs.aid AS universal_transaction_id, + '{}'::jsonb AS raw_result, + CASE WHEN gs.aid % 5 = 0 THEN 'grocery' + WHEN gs.aid % 5 = 1 THEN 'electronics' + WHEN gs.aid % 5 = 2 THEN 'clothing' + WHEN gs.aid % 5 = 3 THEN 'utilities' + ELSE NULL END AS category, + ROUND(RANDOM()::numeric, 2) AS category_confidence, + CASE WHEN gs.aid % 2 = 0 THEN 'Merchant_' || gs.aid % 20 ELSE NULL END AS merchant, + gs.aid % 100 + 1 AS batch_id +FROM generate_series(1, 100000) AS gs(aid); \ No newline at end of file diff --git a/test_runner/performance/large_synthetic_oltp/grow_state_values.sql b/test_runner/performance/large_synthetic_oltp/grow_state_values.sql new file mode 100644 index 0000000000..8a8ce146be --- /dev/null +++ b/test_runner/performance/large_synthetic_oltp/grow_state_values.sql @@ -0,0 +1,9 @@ +-- add 100000 rows or approx.10 MB to the state_values table +-- takes about 14 seconds +INSERT INTO workflows.state_values (key, workflow_id, state_type, value_id) +SELECT + 'key_' || gs::text, -- Key: Generate as 'key_1', 'key_2', etc. + (gs - 1) / 1000 + 1, -- workflow_id: Distribute over a range (1000 workflows) + 'STATIC', -- state_type: Use constant 'STATIC' as defined in schema + gs::bigint -- value_id: Use the same as the series value +FROM generate_series(1, 100000) AS gs; -- Generate 100,000 rows \ No newline at end of file diff --git a/test_runner/performance/large_synthetic_oltp/grow_values.sql b/test_runner/performance/large_synthetic_oltp/grow_values.sql new file mode 100644 index 0000000000..3afdafdf86 --- /dev/null +++ b/test_runner/performance/large_synthetic_oltp/grow_values.sql @@ -0,0 +1,30 @@ +-- add 100000 rows or approx. 24 MB to the values table +-- takes about 126 seconds +INSERT INTO workflows.values ( + id, + type, + int_value, + string_value, + child_type, + bool_value, + uuid, + numeric_value, + workflow_id, + jsonb_value, + parent_value_id +) +SELECT + gs AS id, + 'TYPE_A' AS type, + CASE WHEN selector = 1 THEN gs ELSE NULL END AS int_value, + CASE WHEN selector = 2 THEN 'string_value_' || gs::text ELSE NULL END AS string_value, + 'CHILD_TYPE_A' AS child_type, -- Always non-null + CASE WHEN selector = 3 THEN (gs % 2 = 0) ELSE NULL END AS bool_value, + uuid_generate_v4() AS uuid, -- Always non-null + CASE WHEN selector = 4 THEN gs * 1.0 ELSE NULL END AS numeric_value, + (array[1, 2, 3, 4, 5])[gs % 5 + 1] AS workflow_id, -- Use only existing workflow IDs + CASE WHEN selector = 5 THEN ('{"key":' || gs::text || '}')::jsonb ELSE NULL END AS jsonb_value, + (gs % 100) + 1 AS parent_value_id -- Always non-null +FROM + generate_series(1, 100000) AS gs, + (SELECT floor(random() * 5 + 1)::int AS selector) AS s; \ No newline at end of file diff --git a/test_runner/performance/large_synthetic_oltp/grow_vertices.sql b/test_runner/performance/large_synthetic_oltp/grow_vertices.sql new file mode 100644 index 0000000000..87a2410e8a --- /dev/null +++ b/test_runner/performance/large_synthetic_oltp/grow_vertices.sql @@ -0,0 +1,26 @@ +-- add 100000 rows or approx. 18 MB to the vertices table +-- takes about 90 seconds +INSERT INTO workflows.vertices( + uuid, + created_at, + condition_block_id, + operator, + has_been_visited, + reference_id, + workflow_id, + meta_data, + -- id, + action_block_id +) +SELECT + uuid_generate_v4() AS uuid, + now() AS created_at, + CASE WHEN (gs % 2 = 0) THEN gs % 10 ELSE NULL END AS condition_block_id, -- Every alternate row has a condition_block_id + 'operator_' || (gs % 10) AS operator, -- Cyclical operator values (e.g., operator_0, operator_1) + false AS has_been_visited, + 'ref_' || gs AS reference_id, -- Unique reference_id for each row + (gs % 1000) + 1 AS workflow_id, -- Random workflow_id values between 1 and 1000 + '{}'::jsonb AS meta_data, -- Empty JSON metadata + -- gs AS id, -- default from sequence to get unique ID + CASE WHEN (gs % 2 = 1) THEN gs ELSE NULL END AS action_block_id -- Complementary to condition_block_id +FROM generate_series(1, 100000) AS gs; \ No newline at end of file diff --git a/test_runner/performance/large_synthetic_oltp/update_accounting_coding_body_tracking_category_selection.sql b/test_runner/performance/large_synthetic_oltp/update_accounting_coding_body_tracking_category_selection.sql new file mode 100644 index 0000000000..78688fc8ba --- /dev/null +++ b/test_runner/performance/large_synthetic_oltp/update_accounting_coding_body_tracking_category_selection.sql @@ -0,0 +1,9 @@ +-- update approximately 2000 rows or 200 kb in the accounting_coding_body_tracking_category_selection table +-- takes about 1 second +UPDATE accounting.accounting_coding_body_tracking_category_selection +SET created_at = now() +WHERE ctid in ( + SELECT ctid + FROM accounting.accounting_coding_body_tracking_category_selection + TABLESAMPLE SYSTEM (0.0005) +); diff --git a/test_runner/performance/large_synthetic_oltp/update_action_blocks.sql b/test_runner/performance/large_synthetic_oltp/update_action_blocks.sql new file mode 100644 index 0000000000..ad1ee6c749 --- /dev/null +++ b/test_runner/performance/large_synthetic_oltp/update_action_blocks.sql @@ -0,0 +1,9 @@ +-- update approximately 9000 rows or 1 MB in the action_blocks table +-- takes about 1 second +UPDATE workflows.action_blocks +SET run_synchronously = NOT run_synchronously +WHERE ctid in ( + SELECT ctid + FROM workflows.action_blocks + TABLESAMPLE SYSTEM (0.001) +); diff --git a/test_runner/performance/large_synthetic_oltp/update_action_kwargs.sql b/test_runner/performance/large_synthetic_oltp/update_action_kwargs.sql new file mode 100644 index 0000000000..b939c0ff2d --- /dev/null +++ b/test_runner/performance/large_synthetic_oltp/update_action_kwargs.sql @@ -0,0 +1,9 @@ +-- update approximately 5000 rows or 1 MB in the action_kwargs table +-- takes about 1 second +UPDATE workflows.action_kwargs +SET created_at = now() +WHERE ctid in ( + SELECT ctid + FROM workflows.action_kwargs + TABLESAMPLE SYSTEM (0.0002) +); diff --git a/test_runner/performance/large_synthetic_oltp/update_denormalized_approval_workflow.sql b/test_runner/performance/large_synthetic_oltp/update_denormalized_approval_workflow.sql new file mode 100644 index 0000000000..671ddbc2d4 --- /dev/null +++ b/test_runner/performance/large_synthetic_oltp/update_denormalized_approval_workflow.sql @@ -0,0 +1,10 @@ +-- update approximately 3000 rows or 500 KB in the denormalized_approval_workflow table +-- takes about 1 second +UPDATE approvals_v2.denormalized_approval_workflow +SET created_at = now() +WHERE ctid in ( + SELECT ctid + FROM approvals_v2.denormalized_approval_workflow + TABLESAMPLE SYSTEM (0.0005) +); + diff --git a/test_runner/performance/large_synthetic_oltp/update_device_fingerprint_event.sql b/test_runner/performance/large_synthetic_oltp/update_device_fingerprint_event.sql new file mode 100644 index 0000000000..20baf12887 --- /dev/null +++ b/test_runner/performance/large_synthetic_oltp/update_device_fingerprint_event.sql @@ -0,0 +1,9 @@ +-- update approximately 2000 rows or 1 MB in the device_fingerprint_event table +-- takes about 5 seconds +UPDATE authentication.device_fingerprint_event +SET is_incognito = NOT is_incognito +WHERE ctid in ( + SELECT ctid + FROM authentication.device_fingerprint_event + TABLESAMPLE SYSTEM (0.001) +); diff --git a/test_runner/performance/large_synthetic_oltp/update_edges.sql b/test_runner/performance/large_synthetic_oltp/update_edges.sql new file mode 100644 index 0000000000..d79da78de3 --- /dev/null +++ b/test_runner/performance/large_synthetic_oltp/update_edges.sql @@ -0,0 +1,9 @@ +-- update approximately 4000 rows or 600 kb in the edges table +-- takes about 1 second +UPDATE workflows.edges +SET created_at = now() +WHERE ctid in ( + SELECT ctid + FROM workflows.edges + TABLESAMPLE SYSTEM (0.0005) +); diff --git a/test_runner/performance/large_synthetic_oltp/update_heron_transaction_enriched_log.sql b/test_runner/performance/large_synthetic_oltp/update_heron_transaction_enriched_log.sql new file mode 100644 index 0000000000..5bcc885736 --- /dev/null +++ b/test_runner/performance/large_synthetic_oltp/update_heron_transaction_enriched_log.sql @@ -0,0 +1,9 @@ +-- update approximately 10000 rows or 200 KB in the heron_transaction_enriched_log table +-- takes about 1 minutes +UPDATE heron.heron_transaction_enriched_log +SET created_at = now() +WHERE ctid in ( + SELECT ctid + FROM heron.heron_transaction_enriched_log + TABLESAMPLE SYSTEM (0.0005) +); diff --git a/test_runner/performance/large_synthetic_oltp/update_heron_transaction_enrichment_requests.sql b/test_runner/performance/large_synthetic_oltp/update_heron_transaction_enrichment_requests.sql new file mode 100644 index 0000000000..02cf0ca420 --- /dev/null +++ b/test_runner/performance/large_synthetic_oltp/update_heron_transaction_enrichment_requests.sql @@ -0,0 +1,9 @@ +-- update approximately 4000 rows or 1 MB in the heron_transaction_enrichment_requests table +-- takes about 2 minutes +UPDATE heron.heron_transaction_enrichment_requests +SET created_at = now() +WHERE ctid in ( + SELECT ctid + FROM heron.heron_transaction_enrichment_requests + TABLESAMPLE SYSTEM (0.0002) +); diff --git a/test_runner/performance/large_synthetic_oltp/update_hotel_rate_mapping.sql b/test_runner/performance/large_synthetic_oltp/update_hotel_rate_mapping.sql new file mode 100644 index 0000000000..3210b6dff8 --- /dev/null +++ b/test_runner/performance/large_synthetic_oltp/update_hotel_rate_mapping.sql @@ -0,0 +1,9 @@ +-- update approximately 6000 rows or 600 kb in the hotel_rate_mapping table +-- takes about 1 second +UPDATE booking_inventory.hotel_rate_mapping +SET created_at = now() +WHERE ctid in ( + SELECT ctid + FROM booking_inventory.hotel_rate_mapping + TABLESAMPLE SYSTEM (0.0005) +); diff --git a/test_runner/performance/large_synthetic_oltp/update_incoming_webhooks.sql b/test_runner/performance/large_synthetic_oltp/update_incoming_webhooks.sql new file mode 100644 index 0000000000..ea284eb47c --- /dev/null +++ b/test_runner/performance/large_synthetic_oltp/update_incoming_webhooks.sql @@ -0,0 +1,9 @@ +-- update approximately 2000 rows or 1 MB in the incoming_webhooks table +-- takes about 5 seconds +UPDATE webhook.incoming_webhooks +SET is_body_encrypted = NOT is_body_encrypted +WHERE ctid in ( + SELECT ctid + FROM webhook.incoming_webhooks + TABLESAMPLE SYSTEM (0.0002) +); diff --git a/test_runner/performance/large_synthetic_oltp/update_manual_transaction.sql b/test_runner/performance/large_synthetic_oltp/update_manual_transaction.sql new file mode 100644 index 0000000000..190bc625e2 --- /dev/null +++ b/test_runner/performance/large_synthetic_oltp/update_manual_transaction.sql @@ -0,0 +1,9 @@ +-- update approximately 1000 rows or 200 kb in the manual_transaction table +-- takes about 2 seconds +UPDATE banking.manual_transaction +SET created_at = now() +WHERE ctid in ( + SELECT ctid + FROM banking.manual_transaction + TABLESAMPLE SYSTEM (0.0005) +); diff --git a/test_runner/performance/large_synthetic_oltp/update_ml_receipt_matching_log.sql b/test_runner/performance/large_synthetic_oltp/update_ml_receipt_matching_log.sql new file mode 100644 index 0000000000..810021b09d --- /dev/null +++ b/test_runner/performance/large_synthetic_oltp/update_ml_receipt_matching_log.sql @@ -0,0 +1,9 @@ +-- update approximately 1000 rows or 100 kb in the ml_receipt_matching_log table +-- takes about 1 second +UPDATE receipt.ml_receipt_matching_log +SET is_shadow_mode = NOT is_shadow_mode +WHERE ctid in ( + SELECT ctid + FROM receipt.ml_receipt_matching_log + TABLESAMPLE SYSTEM (0.0005) +); diff --git a/test_runner/performance/large_synthetic_oltp/update_ocr_pipeine_results_version.sql b/test_runner/performance/large_synthetic_oltp/update_ocr_pipeine_results_version.sql new file mode 100644 index 0000000000..a1da8fdb07 --- /dev/null +++ b/test_runner/performance/large_synthetic_oltp/update_ocr_pipeine_results_version.sql @@ -0,0 +1,9 @@ +-- update approximately 2000 rows or 400 kb in the ocr_pipeline_results_version table +-- takes about 1 second +UPDATE ocr.ocr_pipeline_results_version +SET is_async = NOT is_async +WHERE ctid in ( + SELECT ctid + FROM ocr.ocr_pipeline_results_version + TABLESAMPLE SYSTEM (0.0005) +); diff --git a/test_runner/performance/large_synthetic_oltp/update_orc_pipeline_step_results.sql b/test_runner/performance/large_synthetic_oltp/update_orc_pipeline_step_results.sql new file mode 100644 index 0000000000..b7bb4932bd --- /dev/null +++ b/test_runner/performance/large_synthetic_oltp/update_orc_pipeline_step_results.sql @@ -0,0 +1,9 @@ +-- update approximately 3000 rows or 1 MB in the ocr_pipeline_step_results table +-- takes about 11 seconds +UPDATE ocr.ocr_pipeline_step_results +SET created_at = now() +WHERE ctid in ( + SELECT ctid + FROM ocr.ocr_pipeline_step_results + TABLESAMPLE SYSTEM (0.0005) +); diff --git a/test_runner/performance/large_synthetic_oltp/update_orc_pipeline_step_results_version.sql b/test_runner/performance/large_synthetic_oltp/update_orc_pipeline_step_results_version.sql new file mode 100644 index 0000000000..83e9765d22 --- /dev/null +++ b/test_runner/performance/large_synthetic_oltp/update_orc_pipeline_step_results_version.sql @@ -0,0 +1,9 @@ +-- update approximately 5000 rows or 1 MB in the ocr_pipeline_step_results_version table +-- takes about 40 seconds +UPDATE ocr.ocr_pipeline_step_results_version +SET created_at = now() +WHERE ctid in ( + SELECT ctid + FROM ocr.ocr_pipeline_step_results_version + TABLESAMPLE SYSTEM (0.0005) +); diff --git a/test_runner/performance/large_synthetic_oltp/update_priceline_raw_response.sql b/test_runner/performance/large_synthetic_oltp/update_priceline_raw_response.sql new file mode 100644 index 0000000000..a434c6cb63 --- /dev/null +++ b/test_runner/performance/large_synthetic_oltp/update_priceline_raw_response.sql @@ -0,0 +1,9 @@ +-- update approximately 5000 rows or 1 MB in the priceline_raw_response table +-- takes about 1 second +UPDATE booking_inventory.priceline_raw_response +SET created_at = now() +WHERE ctid in ( + SELECT ctid + FROM booking_inventory.priceline_raw_response + TABLESAMPLE SYSTEM (0.0005) +); diff --git a/test_runner/performance/large_synthetic_oltp/update_quickbooks_transactions.sql b/test_runner/performance/large_synthetic_oltp/update_quickbooks_transactions.sql new file mode 100644 index 0000000000..a783246c4c --- /dev/null +++ b/test_runner/performance/large_synthetic_oltp/update_quickbooks_transactions.sql @@ -0,0 +1,9 @@ +-- update approximately 5000 rows or 1 MB in the quickbooks_transactions table +-- takes about 30 seconds +UPDATE accounting.quickbooks_transactions +SET created_at = now() +WHERE ctid in ( + SELECT ctid + FROM accounting.quickbooks_transactions + TABLESAMPLE SYSTEM (0.0005) +); diff --git a/test_runner/performance/large_synthetic_oltp/update_raw_finicity_transaction.sql b/test_runner/performance/large_synthetic_oltp/update_raw_finicity_transaction.sql new file mode 100644 index 0000000000..91fb1bc789 --- /dev/null +++ b/test_runner/performance/large_synthetic_oltp/update_raw_finicity_transaction.sql @@ -0,0 +1,15 @@ +-- update approximately 6000 rows or 600 kb in the raw_finicity_transaction table +-- takes about 1 second +UPDATE banking.raw_finicity_transaction +SET raw_data = + jsonb_set( + raw_data, + '{updated}', + to_jsonb(now()), + true + ) +WHERE ctid IN ( + SELECT ctid + FROM banking.raw_finicity_transaction + TABLESAMPLE SYSTEM (0.0005) +); diff --git a/test_runner/performance/large_synthetic_oltp/update_relabeled_transactions.sql b/test_runner/performance/large_synthetic_oltp/update_relabeled_transactions.sql new file mode 100644 index 0000000000..87b402f9e7 --- /dev/null +++ b/test_runner/performance/large_synthetic_oltp/update_relabeled_transactions.sql @@ -0,0 +1,9 @@ +-- update approximately 8000 rows or 1 MB in the relabeled_transactions table +-- takes about 1 second +UPDATE heron.relabeled_transactions +SET created_at = now() +WHERE ctid in ( + SELECT ctid + FROM heron.relabeled_transactions + TABLESAMPLE SYSTEM (0.0005) +); diff --git a/test_runner/performance/large_synthetic_oltp/update_state_values.sql b/test_runner/performance/large_synthetic_oltp/update_state_values.sql new file mode 100644 index 0000000000..2365ea3d6b --- /dev/null +++ b/test_runner/performance/large_synthetic_oltp/update_state_values.sql @@ -0,0 +1,9 @@ +-- update approximately 8000 rows or 1 MB in the state_values table +-- takes about 2 minutes +UPDATE workflows.state_values +SET state_type = now()::text +WHERE ctid in ( + SELECT ctid + FROM workflows.state_values + TABLESAMPLE SYSTEM (0.0002) +); \ No newline at end of file diff --git a/test_runner/performance/large_synthetic_oltp/update_stripe_authorization_event_log.sql b/test_runner/performance/large_synthetic_oltp/update_stripe_authorization_event_log.sql new file mode 100644 index 0000000000..5328db9fb8 --- /dev/null +++ b/test_runner/performance/large_synthetic_oltp/update_stripe_authorization_event_log.sql @@ -0,0 +1,9 @@ +-- update approximately 4000 rows or 1 MB in the stripe_authorization_event_log table +-- takes about 5 minutes +UPDATE stripe.stripe_authorization_event_log +SET approved = NOT approved +WHERE ctid in ( + SELECT ctid + FROM stripe.stripe_authorization_event_log + TABLESAMPLE SYSTEM (0.0002) +); \ No newline at end of file diff --git a/test_runner/performance/large_synthetic_oltp/update_transaction.sql b/test_runner/performance/large_synthetic_oltp/update_transaction.sql new file mode 100644 index 0000000000..83bec52065 --- /dev/null +++ b/test_runner/performance/large_synthetic_oltp/update_transaction.sql @@ -0,0 +1,9 @@ +-- update approximately 2000 rows or 301 MB in the transaction table +-- takes about 90 seconds +UPDATE transaction.transaction +SET is_last = NOT is_last +WHERE ctid in ( + SELECT ctid + FROM transaction.transaction + TABLESAMPLE SYSTEM (0.0002) +); diff --git a/test_runner/performance/large_synthetic_oltp/update_values.sql b/test_runner/performance/large_synthetic_oltp/update_values.sql new file mode 100644 index 0000000000..e5d576dae5 --- /dev/null +++ b/test_runner/performance/large_synthetic_oltp/update_values.sql @@ -0,0 +1,9 @@ +-- update approximately 2500 rows or 1 MB in the values table +-- takes about 3 minutes +UPDATE workflows.values +SET bool_value = NOT bool_value +WHERE ctid in ( + SELECT ctid + FROM workflows.values + TABLESAMPLE SYSTEM (0.0002) +) AND bool_value IS NOT NULL; diff --git a/test_runner/performance/large_synthetic_oltp/update_vertices.sql b/test_runner/performance/large_synthetic_oltp/update_vertices.sql new file mode 100644 index 0000000000..714c38965b --- /dev/null +++ b/test_runner/performance/large_synthetic_oltp/update_vertices.sql @@ -0,0 +1,9 @@ +-- update approximately 10000 rows or 2 MB in the vertices table +-- takes about 1 minute +UPDATE workflows.vertices +SET has_been_visited = NOT has_been_visited +WHERE ctid in ( + SELECT ctid + FROM workflows.vertices + TABLESAMPLE SYSTEM (0.0002) +); \ No newline at end of file diff --git a/test_runner/performance/test_perf_oltp_large_tenant.py b/test_runner/performance/test_perf_oltp_large_tenant.py index b45394d627..bd00f6b65f 100644 --- a/test_runner/performance/test_perf_oltp_large_tenant.py +++ b/test_runner/performance/test_perf_oltp_large_tenant.py @@ -31,7 +31,9 @@ def get_custom_scripts( return rv -def run_test_pgbench(env: PgCompare, custom_scripts: str, duration: int): +def run_test_pgbench( + env: PgCompare, custom_scripts: str, duration: int, clients: int = 500, jobs: int = 100 +): password = env.pg.default_options.get("password", None) options = env.pg.default_options.get("options", "") # drop password from the connection string by passing password=None and set password separately @@ -46,8 +48,8 @@ def run_test_pgbench(env: PgCompare, custom_scripts: str, duration: int): "-n", # no explicit vacuum before the test - we want to rely on auto-vacuum "-M", "prepared", - "--client=500", - "--jobs=100", + f"--client={clients}", + f"--jobs={jobs}", f"-T{duration}", "-P60", # progress every minute "--progress-timestamp", @@ -164,6 +166,12 @@ def test_perf_oltp_large_tenant_pgbench( run_test_pgbench(remote_compare, custom_scripts, duration) +@pytest.mark.parametrize("duration", get_durations_matrix()) +@pytest.mark.remote_cluster +def test_perf_oltp_large_tenant_growth(remote_compare: PgCompare, duration: int): + run_test_pgbench(remote_compare, " ".join(get_custom_scripts()), duration, 35, 35) + + @pytest.mark.remote_cluster def test_perf_oltp_large_tenant_maintenance(remote_compare: PgCompare): # run analyze, vacuum, re-index after the test and measure and report its duration