From c8fbbb9b65587d25b9dbd3c8f21266ce07159d02 Mon Sep 17 00:00:00 2001 From: Peter Bendel Date: Mon, 27 Jan 2025 22:06:05 +0100 Subject: [PATCH] Test ingest_benchmark with different stripe size and also PostgreSQL version 17 (#10510) We want to verify if pageserver stripe size has an impact on ingest performance. We want to verify if ingest performance has improved or regressed with postgres version 17. ## Summary of changes - Allow to create new project with different postgres versions - allow to pre-shard new project with different stripe sizes instead of relying on storage manager to shard_split the project once a threshold is exceeded Replaces https://github.com/neondatabase/neon/pull/10509 Test run https://github.com/neondatabase/neon/actions/runs/12986410381 --- .../actions/neon-project-create/action.yml | 48 +++++++++++++++++++ .github/workflows/ingest_benchmark.yml | 33 ++++++++++--- 2 files changed, 75 insertions(+), 6 deletions(-) diff --git a/.github/actions/neon-project-create/action.yml b/.github/actions/neon-project-create/action.yml index f4a194639f..11f46bce8e 100644 --- a/.github/actions/neon-project-create/action.yml +++ b/.github/actions/neon-project-create/action.yml @@ -17,6 +17,31 @@ inputs: compute_units: description: '[Min, Max] compute units' default: '[1, 1]' + # settings below only needed if you want the project to be sharded from the beginning + shard_split_project: + description: 'by default new projects are not shard-split, specify true to shard-split' + required: false + default: 'false' + admin_api_key: + description: 'Admin API Key needed for shard-splitting. Must be specified if shard_split_project is true' + required: false + shard_count: + description: 'Number of shards to split the project into, only applies if shard_split_project is true' + required: false + default: '8' + stripe_size: + description: 'Stripe size, optional, in 8kiB pages. e.g. set 2048 for 16MB stripes. Default is 128 MiB, only applies if shard_split_project is true' + required: false + default: '32768' + psql_path: + description: 'Path to psql binary - it is caller responsibility to provision the psql binary' + required: false + default: '/tmp/neon/pg_install/v16/bin/psql' + libpq_lib_path: + description: 'Path to directory containing libpq library - it is caller responsibility to provision the libpq library' + required: false + default: '/tmp/neon/pg_install/v16/lib' + outputs: dsn: @@ -63,6 +88,23 @@ runs: echo "project_id=${project_id}" >> $GITHUB_OUTPUT echo "Project ${project_id} has been created" + + if [ "${SHARD_SPLIT_PROJECT}" = "true" ]; then + # determine tenant ID + TENANT_ID=`${PSQL} ${dsn} -t -A -c "SHOW neon.tenant_id"` + + echo "Splitting project ${project_id} with tenant_id ${TENANT_ID} into $((SHARD_COUNT)) shards with stripe size $((STRIPE_SIZE))" + + echo "Sending PUT request to https://${API_HOST}/regions/${REGION_ID}/api/v1/admin/storage/proxy/control/v1/tenant/${TENANT_ID}/shard_split" + echo "with body {\"new_shard_count\": $((SHARD_COUNT)), \"new_stripe_size\": $((STRIPE_SIZE))}" + + # we need an ADMIN API KEY to invoke storage controller API for shard splitting (bash -u above checks that the variable is set) + curl -X PUT \ + "https://${API_HOST}/regions/${REGION_ID}/api/v1/admin/storage/proxy/control/v1/tenant/${TENANT_ID}/shard_split" \ + -H "Accept: application/json" -H "Content-Type: application/json" -H "Authorization: Bearer ${ADMIN_API_KEY}" \ + -d "{\"new_shard_count\": $SHARD_COUNT, \"new_stripe_size\": $STRIPE_SIZE}" + fi + env: API_HOST: ${{ inputs.api_host }} API_KEY: ${{ inputs.api_key }} @@ -70,3 +112,9 @@ runs: POSTGRES_VERSION: ${{ inputs.postgres_version }} MIN_CU: ${{ fromJSON(inputs.compute_units)[0] }} MAX_CU: ${{ fromJSON(inputs.compute_units)[1] }} + SHARD_SPLIT_PROJECT: ${{ inputs.shard_split_project }} + ADMIN_API_KEY: ${{ inputs.admin_api_key }} + SHARD_COUNT: ${{ inputs.shard_count }} + STRIPE_SIZE: ${{ inputs.stripe_size }} + PSQL: ${{ inputs.psql_path }} + LD_LIBRARY_PATH: ${{ inputs.libpq_lib_path }} diff --git a/.github/workflows/ingest_benchmark.yml b/.github/workflows/ingest_benchmark.yml index fc33c0a980..7b303fa37a 100644 --- a/.github/workflows/ingest_benchmark.yml +++ b/.github/workflows/ingest_benchmark.yml @@ -28,7 +28,24 @@ jobs: strategy: fail-fast: false # allow other variants to continue even if one fails matrix: - target_project: [new_empty_project, large_existing_project] + include: + - target_project: new_empty_project_stripe_size_2048 + stripe_size: 2048 # 16 MiB + postgres_version: 16 + - target_project: new_empty_project_stripe_size_32768 + stripe_size: 32768 # 256 MiB # note that this is different from null because using null will shard_split the project only if it reaches the threshold + # while here it is sharded from the beginning with a shard size of 256 MiB + postgres_version: 16 + - target_project: new_empty_project + stripe_size: null # run with neon defaults which will shard split only when reaching the threshold + postgres_version: 16 + - target_project: new_empty_project + stripe_size: null # run with neon defaults which will shard split only when reaching the threshold + postgres_version: 17 + - target_project: large_existing_project + stripe_size: null # cannot re-shared or choose different stripe size for existing, already sharded project + postgres_version: 16 + max-parallel: 1 # we want to run each stripe size sequentially to be able to compare the results permissions: contents: write statuses: write @@ -67,17 +84,21 @@ jobs: aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Create Neon Project - if: ${{ matrix.target_project == 'new_empty_project' }} + if: ${{ startsWith(matrix.target_project, 'new_empty_project') }} id: create-neon-project-ingest-target uses: ./.github/actions/neon-project-create with: region_id: aws-us-east-2 - postgres_version: 16 + postgres_version: ${{ matrix.postgres_version }} compute_units: '[7, 7]' # we want to test large compute here to avoid compute-side bottleneck api_key: ${{ secrets.NEON_STAGING_API_KEY }} + shard_split_project: ${{ matrix.stripe_size != null && 'true' || 'false' }} + admin_api_key: ${{ secrets.NEON_STAGING_ADMIN_API_KEY }} + shard_count: 8 + stripe_size: ${{ matrix.stripe_size }} - name: Initialize Neon project - if: ${{ matrix.target_project == 'new_empty_project' }} + if: ${{ startsWith(matrix.target_project, 'new_empty_project') }} env: BENCHMARK_INGEST_TARGET_CONNSTR: ${{ steps.create-neon-project-ingest-target.outputs.dsn }} NEW_PROJECT_ID: ${{ steps.create-neon-project-ingest-target.outputs.project_id }} @@ -130,7 +151,7 @@ jobs: test_selection: performance/test_perf_ingest_using_pgcopydb.py run_in_parallel: false extra_params: -s -m remote_cluster --timeout 86400 -k test_ingest_performance_using_pgcopydb - pg_version: v16 + pg_version: v${{ matrix.postgres_version }} save_perf_report: true aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: @@ -146,7 +167,7 @@ jobs: ${PSQL} "${BENCHMARK_INGEST_TARGET_CONNSTR}" -c "\dt+" - name: Delete Neon Project - if: ${{ always() && matrix.target_project == 'new_empty_project' }} + if: ${{ always() && startsWith(matrix.target_project, 'new_empty_project') }} uses: ./.github/actions/neon-project-delete with: project_id: ${{ steps.create-neon-project-ingest-target.outputs.project_id }}