mirror of
https://github.com/neondatabase/neon.git
synced 2025-12-22 21:59:59 +00:00
Performance test for LFC prewarm (#12524)
https://github.com/neondatabase/cloud/issues/19011 Measure relative performance for prewarmed and non-prewarmed endpoints. Add test that runs on every commit, and one performance test with a remote cluster.
This commit is contained in:
1
.github/actionlint.yml
vendored
1
.github/actionlint.yml
vendored
@@ -31,6 +31,7 @@ config-variables:
|
||||
- NEON_PROD_AWS_ACCOUNT_ID
|
||||
- PGREGRESS_PG16_PROJECT_ID
|
||||
- PGREGRESS_PG17_PROJECT_ID
|
||||
- PREWARM_PGBENCH_SIZE
|
||||
- REMOTE_STORAGE_AZURE_CONTAINER
|
||||
- REMOTE_STORAGE_AZURE_REGION
|
||||
- SLACK_CICD_CHANNEL_ID
|
||||
|
||||
72
.github/workflows/benchmarking.yml
vendored
72
.github/workflows/benchmarking.yml
vendored
@@ -219,6 +219,7 @@ jobs:
|
||||
--ignore test_runner/performance/test_cumulative_statistics_persistence.py
|
||||
--ignore test_runner/performance/test_perf_many_relations.py
|
||||
--ignore test_runner/performance/test_perf_oltp_large_tenant.py
|
||||
--ignore test_runner/performance/test_lfc_prewarm.py
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
@@ -410,6 +411,77 @@ jobs:
|
||||
env:
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
|
||||
prewarm-test:
|
||||
if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
|
||||
permissions:
|
||||
contents: write
|
||||
statuses: write
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
env:
|
||||
PGBENCH_SIZE: ${{ vars.PREWARM_PGBENCH_SIZE }}
|
||||
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
||||
DEFAULT_PG_VERSION: 17
|
||||
TEST_OUTPUT: /tmp/test_output
|
||||
BUILD_TYPE: remote
|
||||
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
|
||||
PLATFORM: "neon-staging"
|
||||
|
||||
runs-on: [ self-hosted, us-east-2, x64 ]
|
||||
container:
|
||||
image: ghcr.io/neondatabase/build-tools:pinned-bookworm
|
||||
credentials:
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
- name: Harden the runner (Audit all outbound calls)
|
||||
uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
|
||||
with:
|
||||
egress-policy: audit
|
||||
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
|
||||
- name: Configure AWS credentials
|
||||
uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2
|
||||
with:
|
||||
aws-region: eu-central-1
|
||||
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
role-duration-seconds: 18000 # 5 hours
|
||||
|
||||
- name: Download Neon artifact
|
||||
uses: ./.github/actions/download
|
||||
with:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
|
||||
path: /tmp/neon/
|
||||
prefix: latest
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Run prewarm benchmark
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
with:
|
||||
build_type: ${{ env.BUILD_TYPE }}
|
||||
test_selection: performance/test_lfc_prewarm.py
|
||||
run_in_parallel: false
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 5400
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
NEON_API_KEY: ${{ secrets.NEON_STAGING_API_KEY }}
|
||||
|
||||
- name: Create Allure report
|
||||
id: create-allure-report
|
||||
if: ${{ !cancelled() }}
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
with:
|
||||
store-test-results-into-db: true
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
|
||||
|
||||
generate-matrices:
|
||||
if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
|
||||
# Create matrices for the benchmarking jobs, so we run benchmarks on rds only once a week (on Saturday)
|
||||
|
||||
@@ -314,6 +314,10 @@ class NeonAPI:
|
||||
if endpoint_type:
|
||||
data["endpoint"]["type"] = endpoint_type
|
||||
if settings:
|
||||
# otherwise we get 400 "settings must not be nil"
|
||||
# TODO(myrrc): fix on cplane side
|
||||
if "pg_settings" not in settings:
|
||||
settings["pg_settings"] = {}
|
||||
data["endpoint"]["settings"] = settings
|
||||
|
||||
resp = self.__request(
|
||||
|
||||
167
test_runner/performance/test_lfc_prewarm.py
Normal file
167
test_runner/performance/test_lfc_prewarm.py
Normal file
@@ -0,0 +1,167 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import timeit
|
||||
import traceback
|
||||
from concurrent.futures import ThreadPoolExecutor as Exec
|
||||
from pathlib import Path
|
||||
from time import sleep
|
||||
from typing import TYPE_CHECKING, Any, cast
|
||||
|
||||
import pytest
|
||||
from fixtures.benchmark_fixture import NeonBenchmarker, PgBenchRunResult
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_api import NeonAPI, connection_parameters_to_env
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from fixtures.compare_fixtures import NeonCompare
|
||||
from fixtures.neon_fixtures import Endpoint, PgBin
|
||||
from fixtures.pg_version import PgVersion
|
||||
|
||||
from performance.test_perf_pgbench import utc_now_timestamp
|
||||
|
||||
# These tests compare performance for a write-heavy and read-heavy workloads of an ordinary endpoint
|
||||
# compared to the endpoint which saves its LFC and prewarms using it on startup.
|
||||
|
||||
|
||||
def test_compare_prewarmed_pgbench_perf(neon_compare: NeonCompare):
|
||||
env = neon_compare.env
|
||||
env.create_branch("normal")
|
||||
env.create_branch("prewarmed")
|
||||
pg_bin = neon_compare.pg_bin
|
||||
ep_normal: Endpoint = env.endpoints.create_start("normal")
|
||||
ep_prewarmed: Endpoint = env.endpoints.create_start("prewarmed", autoprewarm=True)
|
||||
|
||||
for ep in [ep_normal, ep_prewarmed]:
|
||||
connstr: str = ep.connstr()
|
||||
pg_bin.run(["pgbench", "-i", "-I", "dtGvp", connstr, "-s100"])
|
||||
ep.safe_psql("CREATE EXTENSION neon")
|
||||
client = ep.http_client()
|
||||
client.offload_lfc()
|
||||
ep.stop()
|
||||
ep.start()
|
||||
client.prewarm_lfc_wait()
|
||||
|
||||
run_start_timestamp = utc_now_timestamp()
|
||||
t0 = timeit.default_timer()
|
||||
out = pg_bin.run_capture(["pgbench", "-c10", "-T10", connstr])
|
||||
run_duration = timeit.default_timer() - t0
|
||||
run_end_timestamp = utc_now_timestamp()
|
||||
|
||||
stdout = Path(f"{out}.stdout").read_text()
|
||||
res = PgBenchRunResult.parse_from_stdout(
|
||||
stdout=stdout,
|
||||
run_duration=run_duration,
|
||||
run_start_timestamp=run_start_timestamp,
|
||||
run_end_timestamp=run_end_timestamp,
|
||||
)
|
||||
name: str = cast("str", ep.branch_name)
|
||||
neon_compare.zenbenchmark.record_pg_bench_result(name, res)
|
||||
|
||||
|
||||
@pytest.mark.remote_cluster
|
||||
@pytest.mark.timeout(30 * 60)
|
||||
def test_compare_prewarmed_pgbench_perf_benchmark(
|
||||
pg_bin: PgBin,
|
||||
neon_api: NeonAPI,
|
||||
pg_version: PgVersion,
|
||||
zenbenchmark: NeonBenchmarker,
|
||||
):
|
||||
name = f"Test prewarmed pgbench performance, GITHUB_RUN_ID={os.getenv('GITHUB_RUN_ID')}"
|
||||
project = neon_api.create_project(pg_version, name)
|
||||
project_id = project["project"]["id"]
|
||||
neon_api.wait_for_operation_to_finish(project_id)
|
||||
err = False
|
||||
try:
|
||||
benchmark_impl(pg_bin, neon_api, project, zenbenchmark)
|
||||
except Exception as e:
|
||||
err = True
|
||||
log.error(f"Caught exception: {e}")
|
||||
log.error(traceback.format_exc())
|
||||
finally:
|
||||
assert not err
|
||||
neon_api.delete_project(project_id)
|
||||
|
||||
|
||||
def benchmark_impl(
|
||||
pg_bin: PgBin, neon_api: NeonAPI, project: dict[str, Any], zenbenchmark: NeonBenchmarker
|
||||
):
|
||||
pgbench_size = int(os.getenv("PGBENCH_SIZE") or "3424") # 50GB
|
||||
offload_secs = 20
|
||||
test_duration_min = 5
|
||||
pgbench_duration = f"-T{test_duration_min * 60}"
|
||||
# prewarm API is not publicly exposed. In order to test performance of a
|
||||
# fully prewarmed endpoint, wait after it restarts
|
||||
prewarmed_sleep_secs = 30
|
||||
|
||||
branch_id = project["branch"]["id"]
|
||||
project_id = project["project"]["id"]
|
||||
normal_env = connection_parameters_to_env(
|
||||
project["connection_uris"][0]["connection_parameters"]
|
||||
)
|
||||
normal_id = project["endpoints"][0]["id"]
|
||||
|
||||
prewarmed_branch_id = neon_api.create_branch(
|
||||
project_id, "prewarmed", parent_id=branch_id, add_endpoint=False
|
||||
)["branch"]["id"]
|
||||
neon_api.wait_for_operation_to_finish(project_id)
|
||||
|
||||
ep_prewarmed = neon_api.create_endpoint(
|
||||
project_id,
|
||||
prewarmed_branch_id,
|
||||
endpoint_type="read_write",
|
||||
settings={"autoprewarm": True, "offload_lfc_interval_seconds": offload_secs},
|
||||
)
|
||||
neon_api.wait_for_operation_to_finish(project_id)
|
||||
|
||||
prewarmed_env = normal_env.copy()
|
||||
prewarmed_env["PGHOST"] = ep_prewarmed["endpoint"]["host"]
|
||||
prewarmed_id = ep_prewarmed["endpoint"]["id"]
|
||||
|
||||
def bench(endpoint_name, endpoint_id, env):
|
||||
pg_bin.run(["pgbench", "-i", "-I", "dtGvp", f"-s{pgbench_size}"], env)
|
||||
sleep(offload_secs * 2) # ensure LFC is offloaded after pgbench finishes
|
||||
neon_api.restart_endpoint(project_id, endpoint_id)
|
||||
sleep(prewarmed_sleep_secs)
|
||||
|
||||
run_start_timestamp = utc_now_timestamp()
|
||||
t0 = timeit.default_timer()
|
||||
out = pg_bin.run_capture(["pgbench", "-c10", pgbench_duration, "-Mprepared"], env)
|
||||
run_duration = timeit.default_timer() - t0
|
||||
run_end_timestamp = utc_now_timestamp()
|
||||
|
||||
stdout = Path(f"{out}.stdout").read_text()
|
||||
res = PgBenchRunResult.parse_from_stdout(
|
||||
stdout=stdout,
|
||||
run_duration=run_duration,
|
||||
run_start_timestamp=run_start_timestamp,
|
||||
run_end_timestamp=run_end_timestamp,
|
||||
)
|
||||
zenbenchmark.record_pg_bench_result(endpoint_name, res)
|
||||
|
||||
with Exec(max_workers=2) as exe:
|
||||
exe.submit(bench, "normal", normal_id, normal_env)
|
||||
exe.submit(bench, "prewarmed", prewarmed_id, prewarmed_env)
|
||||
|
||||
|
||||
def test_compare_prewarmed_read_perf(neon_compare: NeonCompare):
|
||||
env = neon_compare.env
|
||||
env.create_branch("normal")
|
||||
env.create_branch("prewarmed")
|
||||
ep_normal: Endpoint = env.endpoints.create_start("normal")
|
||||
ep_prewarmed: Endpoint = env.endpoints.create_start("prewarmed", autoprewarm=True)
|
||||
|
||||
sql = [
|
||||
"CREATE EXTENSION neon",
|
||||
"CREATE TABLE foo(key serial primary key, t text default 'foooooooooooooooooooooooooooooooooooooooooooooooooooo')",
|
||||
"INSERT INTO foo SELECT FROM generate_series(1,1000000)",
|
||||
]
|
||||
for ep in [ep_normal, ep_prewarmed]:
|
||||
ep.safe_psql_many(sql)
|
||||
client = ep.http_client()
|
||||
client.offload_lfc()
|
||||
ep.stop()
|
||||
ep.start()
|
||||
client.prewarm_lfc_wait()
|
||||
with neon_compare.record_duration(f"{ep.branch_name}_run_duration"):
|
||||
ep.safe_psql("SELECT count(*) from foo")
|
||||
Reference in New Issue
Block a user