mirror of
https://github.com/neondatabase/neon.git
synced 2025-12-22 21:59:59 +00:00
LFC prewarm perftest fixes: use existing staging project (#12651)
https://github.com/neondatabase/cloud/issues/19011 - Prewarm config changes are not publicly available. Correct the test by using a pre-filled 50 GB project on staging - Create extension neon with schema neon to fix read performance tests on staging, error example in https://neon-github-public-dev.s3.amazonaws.com/reports/main/16483462789/index.html#suites/3d632da6dda4a70f5b4bd24904ab444c/919841e331089fc4/ - Don't create extra endpoint in LFC prewarm performance tests
This commit is contained in:
2
.github/actionlint.yml
vendored
2
.github/actionlint.yml
vendored
@@ -31,7 +31,7 @@ config-variables:
|
||||
- NEON_PROD_AWS_ACCOUNT_ID
|
||||
- PGREGRESS_PG16_PROJECT_ID
|
||||
- PGREGRESS_PG17_PROJECT_ID
|
||||
- PREWARM_PGBENCH_SIZE
|
||||
- PREWARM_PROJECT_ID
|
||||
- REMOTE_STORAGE_AZURE_CONTAINER
|
||||
- REMOTE_STORAGE_AZURE_REGION
|
||||
- SLACK_CICD_CHANNEL_ID
|
||||
|
||||
2
.github/workflows/benchmarking.yml
vendored
2
.github/workflows/benchmarking.yml
vendored
@@ -418,7 +418,7 @@ jobs:
|
||||
statuses: write
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
env:
|
||||
PGBENCH_SIZE: ${{ vars.PREWARM_PGBENCH_SIZE }}
|
||||
PROJECT_ID: ${{ vars.PREWARM_PROJECT_ID }}
|
||||
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
||||
DEFAULT_PG_VERSION: 17
|
||||
TEST_OUTPUT: /tmp/test_output
|
||||
|
||||
@@ -16,6 +16,7 @@ from typing_extensions import override
|
||||
from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import (
|
||||
Endpoint,
|
||||
NeonEnv,
|
||||
PgBin,
|
||||
PgProtocol,
|
||||
@@ -129,6 +130,10 @@ class NeonCompare(PgCompare):
|
||||
# Start pg
|
||||
self._pg = self.env.endpoints.create_start("main", "main", self.tenant)
|
||||
|
||||
@property
|
||||
def endpoint(self) -> Endpoint:
|
||||
return self._pg
|
||||
|
||||
@property
|
||||
@override
|
||||
def pg(self) -> PgProtocol:
|
||||
|
||||
@@ -79,18 +79,28 @@ class EndpointHttpClient(requests.Session):
|
||||
return json
|
||||
|
||||
def prewarm_lfc(self, from_endpoint_id: str | None = None):
|
||||
"""
|
||||
Prewarm LFC cache from given endpoint and wait till it finishes or errors
|
||||
"""
|
||||
params = {"from_endpoint": from_endpoint_id} if from_endpoint_id else dict()
|
||||
self.post(self.prewarm_url, params=params).raise_for_status()
|
||||
self.prewarm_lfc_wait()
|
||||
|
||||
def prewarm_lfc_wait(self):
|
||||
"""
|
||||
Wait till LFC prewarm returns with error or success.
|
||||
If prewarm was not requested before calling this function, it will error
|
||||
"""
|
||||
statuses = "failed", "completed", "skipped"
|
||||
|
||||
def prewarmed():
|
||||
json = self.prewarm_lfc_status()
|
||||
status, err = json["status"], json.get("error")
|
||||
assert status in ["failed", "completed", "skipped"], f"{status}, {err=}"
|
||||
assert status in statuses, f"{status}, {err=}"
|
||||
|
||||
wait_until(prewarmed, timeout=60)
|
||||
assert self.prewarm_lfc_status()["status"] != "failed"
|
||||
res = self.prewarm_lfc_status()
|
||||
assert res["status"] != "failed", res
|
||||
|
||||
def offload_lfc_status(self) -> dict[str, str]:
|
||||
res = self.get(self.offload_url)
|
||||
@@ -99,17 +109,26 @@ class EndpointHttpClient(requests.Session):
|
||||
return json
|
||||
|
||||
def offload_lfc(self):
|
||||
"""
|
||||
Offload LFC cache to endpoint storage and wait till offload finishes or errors
|
||||
"""
|
||||
self.post(self.offload_url).raise_for_status()
|
||||
self.offload_lfc_wait()
|
||||
|
||||
def offload_lfc_wait(self):
|
||||
"""
|
||||
Wait till LFC offload returns with error or success.
|
||||
If offload was not requested before calling this function, it will error
|
||||
"""
|
||||
|
||||
def offloaded():
|
||||
json = self.offload_lfc_status()
|
||||
status, err = json["status"], json.get("error")
|
||||
assert status in ["failed", "completed"], f"{status}, {err=}"
|
||||
|
||||
wait_until(offloaded)
|
||||
assert self.offload_lfc_status()["status"] != "failed"
|
||||
wait_until(offloaded, timeout=60)
|
||||
res = self.offload_lfc_status()
|
||||
assert res["status"] != "failed", res
|
||||
|
||||
def promote(self, promote_spec: dict[str, Any], disconnect: bool = False):
|
||||
url = f"http://localhost:{self.external_port}/promote"
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import time
|
||||
from typing import TYPE_CHECKING, cast, final
|
||||
|
||||
@@ -13,6 +14,17 @@ if TYPE_CHECKING:
|
||||
from fixtures.pg_version import PgVersion
|
||||
|
||||
|
||||
def connstr_to_env(connstr: str) -> dict[str, str]:
|
||||
# postgresql://neondb_owner:npg_kuv6Rqi1cB@ep-old-silence-w26pxsvz-pooler.us-east-2.aws.neon.build/neondb?sslmode=require&channel_binding=...'
|
||||
parts = re.split(r":|@|\/|\?", connstr.removeprefix("postgresql://"))
|
||||
return {
|
||||
"PGUSER": parts[0],
|
||||
"PGPASSWORD": parts[1],
|
||||
"PGHOST": parts[2],
|
||||
"PGDATABASE": parts[3],
|
||||
}
|
||||
|
||||
|
||||
def connection_parameters_to_env(params: dict[str, str]) -> dict[str, str]:
|
||||
return {
|
||||
"PGHOST": params["host"],
|
||||
|
||||
@@ -2,45 +2,48 @@ from __future__ import annotations
|
||||
|
||||
import os
|
||||
import timeit
|
||||
import traceback
|
||||
from concurrent.futures import ThreadPoolExecutor as Exec
|
||||
from pathlib import Path
|
||||
from threading import Thread
|
||||
from time import sleep
|
||||
from typing import TYPE_CHECKING, Any, cast
|
||||
from typing import TYPE_CHECKING, cast
|
||||
|
||||
import pytest
|
||||
from fixtures.benchmark_fixture import NeonBenchmarker, PgBenchRunResult
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_api import NeonAPI, connection_parameters_to_env
|
||||
from fixtures.neon_api import NeonAPI, connstr_to_env
|
||||
|
||||
from performance.test_perf_pgbench import utc_now_timestamp
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from fixtures.compare_fixtures import NeonCompare
|
||||
from fixtures.neon_fixtures import Endpoint, PgBin
|
||||
from fixtures.pg_version import PgVersion
|
||||
|
||||
from performance.test_perf_pgbench import utc_now_timestamp
|
||||
|
||||
# These tests compare performance for a write-heavy and read-heavy workloads of an ordinary endpoint
|
||||
# compared to the endpoint which saves its LFC and prewarms using it on startup.
|
||||
# compared to the endpoint which saves its LFC and prewarms using it on startup
|
||||
|
||||
|
||||
def test_compare_prewarmed_pgbench_perf(neon_compare: NeonCompare):
|
||||
env = neon_compare.env
|
||||
env.create_branch("normal")
|
||||
env.create_branch("prewarmed")
|
||||
pg_bin = neon_compare.pg_bin
|
||||
ep_normal: Endpoint = env.endpoints.create_start("normal")
|
||||
ep_prewarmed: Endpoint = env.endpoints.create_start("prewarmed", autoprewarm=True)
|
||||
ep_ordinary: Endpoint = neon_compare.endpoint
|
||||
ep_prewarmed: Endpoint = env.endpoints.create_start("prewarmed")
|
||||
|
||||
for ep in [ep_normal, ep_prewarmed]:
|
||||
for ep in [ep_ordinary, ep_prewarmed]:
|
||||
connstr: str = ep.connstr()
|
||||
pg_bin.run(["pgbench", "-i", "-I", "dtGvp", connstr, "-s100"])
|
||||
ep.safe_psql("CREATE EXTENSION neon")
|
||||
client = ep.http_client()
|
||||
client.offload_lfc()
|
||||
ep.stop()
|
||||
ep.start()
|
||||
client.prewarm_lfc_wait()
|
||||
ep.safe_psql("CREATE SCHEMA neon; CREATE EXTENSION neon WITH SCHEMA neon")
|
||||
if ep == ep_prewarmed:
|
||||
client = ep.http_client()
|
||||
client.offload_lfc()
|
||||
ep.stop()
|
||||
ep.start(autoprewarm=True)
|
||||
client.prewarm_lfc_wait()
|
||||
else:
|
||||
ep.stop()
|
||||
ep.start()
|
||||
|
||||
run_start_timestamp = utc_now_timestamp()
|
||||
t0 = timeit.default_timer()
|
||||
@@ -59,6 +62,36 @@ def test_compare_prewarmed_pgbench_perf(neon_compare: NeonCompare):
|
||||
neon_compare.zenbenchmark.record_pg_bench_result(name, res)
|
||||
|
||||
|
||||
def test_compare_prewarmed_read_perf(neon_compare: NeonCompare):
|
||||
env = neon_compare.env
|
||||
env.create_branch("prewarmed")
|
||||
ep_ordinary: Endpoint = neon_compare.endpoint
|
||||
ep_prewarmed: Endpoint = env.endpoints.create_start("prewarmed")
|
||||
|
||||
sql = [
|
||||
"CREATE SCHEMA neon",
|
||||
"CREATE EXTENSION neon WITH SCHEMA neon",
|
||||
"CREATE TABLE foo(key serial primary key, t text default 'foooooooooooooooooooooooooooooooooooooooooooooooooooo')",
|
||||
"INSERT INTO foo SELECT FROM generate_series(1,1000000)",
|
||||
]
|
||||
sql_check = "SELECT count(*) from foo"
|
||||
|
||||
ep_ordinary.safe_psql_many(sql)
|
||||
ep_ordinary.stop()
|
||||
ep_ordinary.start()
|
||||
with neon_compare.record_duration("ordinary_run_duration"):
|
||||
ep_ordinary.safe_psql(sql_check)
|
||||
|
||||
ep_prewarmed.safe_psql_many(sql)
|
||||
client = ep_prewarmed.http_client()
|
||||
client.offload_lfc()
|
||||
ep_prewarmed.stop()
|
||||
ep_prewarmed.start(autoprewarm=True)
|
||||
client.prewarm_lfc_wait()
|
||||
with neon_compare.record_duration("prewarmed_run_duration"):
|
||||
ep_prewarmed.safe_psql(sql_check)
|
||||
|
||||
|
||||
@pytest.mark.remote_cluster
|
||||
@pytest.mark.timeout(2 * 60 * 60)
|
||||
def test_compare_prewarmed_pgbench_perf_benchmark(
|
||||
@@ -67,67 +100,66 @@ def test_compare_prewarmed_pgbench_perf_benchmark(
|
||||
pg_version: PgVersion,
|
||||
zenbenchmark: NeonBenchmarker,
|
||||
):
|
||||
name = f"Test prewarmed pgbench performance, GITHUB_RUN_ID={os.getenv('GITHUB_RUN_ID')}"
|
||||
project = neon_api.create_project(pg_version, name)
|
||||
project_id = project["project"]["id"]
|
||||
neon_api.wait_for_operation_to_finish(project_id)
|
||||
err = False
|
||||
try:
|
||||
benchmark_impl(pg_bin, neon_api, project, zenbenchmark)
|
||||
except Exception as e:
|
||||
err = True
|
||||
log.error(f"Caught exception: {e}")
|
||||
log.error(traceback.format_exc())
|
||||
finally:
|
||||
assert not err
|
||||
neon_api.delete_project(project_id)
|
||||
"""
|
||||
Prewarm API is not public, so this test relies on a pre-created project
|
||||
with pgbench size of 3424, pgbench -i -IdtGvp -s3424. Sleeping and
|
||||
offloading constants are hardcoded to this size as well
|
||||
"""
|
||||
project_id = os.getenv("PROJECT_ID")
|
||||
assert project_id
|
||||
|
||||
ordinary_branch_id = ""
|
||||
prewarmed_branch_id = ""
|
||||
for branch in neon_api.get_branches(project_id)["branches"]:
|
||||
if branch["name"] == "ordinary":
|
||||
ordinary_branch_id = branch["id"]
|
||||
if branch["name"] == "prewarmed":
|
||||
prewarmed_branch_id = branch["id"]
|
||||
assert len(ordinary_branch_id) > 0
|
||||
assert len(prewarmed_branch_id) > 0
|
||||
|
||||
ep_ordinary = None
|
||||
ep_prewarmed = None
|
||||
for ep in neon_api.get_endpoints(project_id)["endpoints"]:
|
||||
if ep["branch_id"] == ordinary_branch_id:
|
||||
ep_ordinary = ep
|
||||
if ep["branch_id"] == prewarmed_branch_id:
|
||||
ep_prewarmed = ep
|
||||
assert ep_ordinary
|
||||
assert ep_prewarmed
|
||||
ordinary_id = ep_ordinary["id"]
|
||||
prewarmed_id = ep_prewarmed["id"]
|
||||
|
||||
def benchmark_impl(
|
||||
pg_bin: PgBin, neon_api: NeonAPI, project: dict[str, Any], zenbenchmark: NeonBenchmarker
|
||||
):
|
||||
pgbench_size = int(os.getenv("PGBENCH_SIZE") or "3424") # 50GB
|
||||
offload_secs = 20
|
||||
test_duration_min = 5
|
||||
test_duration_min = 3
|
||||
pgbench_duration = f"-T{test_duration_min * 60}"
|
||||
# prewarm API is not publicly exposed. In order to test performance of a
|
||||
# fully prewarmed endpoint, wait after it restarts.
|
||||
# The number here is empirical, based on manual runs on staging
|
||||
pgbench_init_cmd = ["pgbench", "-P10", "-n", "-c10", pgbench_duration, "-Mprepared"]
|
||||
pgbench_perf_cmd = pgbench_init_cmd + ["-S"]
|
||||
prewarmed_sleep_secs = 180
|
||||
|
||||
branch_id = project["branch"]["id"]
|
||||
project_id = project["project"]["id"]
|
||||
normal_env = connection_parameters_to_env(
|
||||
project["connection_uris"][0]["connection_parameters"]
|
||||
)
|
||||
normal_id = project["endpoints"][0]["id"]
|
||||
|
||||
prewarmed_branch_id = neon_api.create_branch(
|
||||
project_id, "prewarmed", parent_id=branch_id, add_endpoint=False
|
||||
)["branch"]["id"]
|
||||
neon_api.wait_for_operation_to_finish(project_id)
|
||||
|
||||
ep_prewarmed = neon_api.create_endpoint(
|
||||
project_id,
|
||||
prewarmed_branch_id,
|
||||
endpoint_type="read_write",
|
||||
settings={"autoprewarm": True, "offload_lfc_interval_seconds": offload_secs},
|
||||
)
|
||||
neon_api.wait_for_operation_to_finish(project_id)
|
||||
|
||||
prewarmed_env = normal_env.copy()
|
||||
prewarmed_env["PGHOST"] = ep_prewarmed["endpoint"]["host"]
|
||||
prewarmed_id = ep_prewarmed["endpoint"]["id"]
|
||||
ordinary_uri = neon_api.get_connection_uri(project_id, ordinary_branch_id, ordinary_id)["uri"]
|
||||
prewarmed_uri = neon_api.get_connection_uri(project_id, prewarmed_branch_id, prewarmed_id)[
|
||||
"uri"
|
||||
]
|
||||
|
||||
def bench(endpoint_name, endpoint_id, env):
|
||||
pg_bin.run(["pgbench", "-i", "-I", "dtGvp", f"-s{pgbench_size}"], env)
|
||||
sleep(offload_secs * 2) # ensure LFC is offloaded after pgbench finishes
|
||||
neon_api.restart_endpoint(project_id, endpoint_id)
|
||||
sleep(prewarmed_sleep_secs)
|
||||
log.info(f"Running pgbench for {pgbench_duration}s to warm up the cache")
|
||||
pg_bin.run_capture(pgbench_init_cmd, env) # capture useful for debugging
|
||||
|
||||
log.info(f"Initialized {endpoint_name}")
|
||||
if endpoint_name == "prewarmed":
|
||||
log.info(f"sleeping {offload_secs * 2} to ensure LFC is offloaded")
|
||||
sleep(offload_secs * 2)
|
||||
neon_api.restart_endpoint(project_id, endpoint_id)
|
||||
log.info(f"sleeping {prewarmed_sleep_secs} to ensure LFC is prewarmed")
|
||||
sleep(prewarmed_sleep_secs)
|
||||
else:
|
||||
neon_api.restart_endpoint(project_id, endpoint_id)
|
||||
|
||||
log.info(f"Starting benchmark for {endpoint_name}")
|
||||
run_start_timestamp = utc_now_timestamp()
|
||||
t0 = timeit.default_timer()
|
||||
out = pg_bin.run_capture(["pgbench", "-c10", pgbench_duration, "-Mprepared"], env)
|
||||
out = pg_bin.run_capture(pgbench_perf_cmd, env)
|
||||
run_duration = timeit.default_timer() - t0
|
||||
run_end_timestamp = utc_now_timestamp()
|
||||
|
||||
@@ -140,29 +172,9 @@ def benchmark_impl(
|
||||
)
|
||||
zenbenchmark.record_pg_bench_result(endpoint_name, res)
|
||||
|
||||
with Exec(max_workers=2) as exe:
|
||||
exe.submit(bench, "normal", normal_id, normal_env)
|
||||
exe.submit(bench, "prewarmed", prewarmed_id, prewarmed_env)
|
||||
prewarmed_args = ("prewarmed", prewarmed_id, connstr_to_env(prewarmed_uri))
|
||||
prewarmed_thread = Thread(target=bench, args=prewarmed_args)
|
||||
prewarmed_thread.start()
|
||||
|
||||
|
||||
def test_compare_prewarmed_read_perf(neon_compare: NeonCompare):
|
||||
env = neon_compare.env
|
||||
env.create_branch("normal")
|
||||
env.create_branch("prewarmed")
|
||||
ep_normal: Endpoint = env.endpoints.create_start("normal")
|
||||
ep_prewarmed: Endpoint = env.endpoints.create_start("prewarmed", autoprewarm=True)
|
||||
|
||||
sql = [
|
||||
"CREATE EXTENSION neon",
|
||||
"CREATE TABLE foo(key serial primary key, t text default 'foooooooooooooooooooooooooooooooooooooooooooooooooooo')",
|
||||
"INSERT INTO foo SELECT FROM generate_series(1,1000000)",
|
||||
]
|
||||
for ep in [ep_normal, ep_prewarmed]:
|
||||
ep.safe_psql_many(sql)
|
||||
client = ep.http_client()
|
||||
client.offload_lfc()
|
||||
ep.stop()
|
||||
ep.start()
|
||||
client.prewarm_lfc_wait()
|
||||
with neon_compare.record_duration(f"{ep.branch_name}_run_duration"):
|
||||
ep.safe_psql("SELECT count(*) from foo")
|
||||
bench("ordinary", ordinary_id, connstr_to_env(ordinary_uri))
|
||||
prewarmed_thread.join()
|
||||
|
||||
Reference in New Issue
Block a user