mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-08 14:02:55 +00:00
Fix `test_seqscans` by disabling statement timeout. Also, replace increasing statement timeout with disabling it for performance tests. This should make tests more stable and allow us to observe performance degradation instead of test failures.
224 lines
7.9 KiB
Python
224 lines
7.9 KiB
Python
import calendar
|
|
import enum
|
|
import os
|
|
import timeit
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
from typing import Dict, List
|
|
|
|
import pytest
|
|
from fixtures.benchmark_fixture import MetricReport, PgBenchInitResult, PgBenchRunResult
|
|
from fixtures.compare_fixtures import NeonCompare, PgCompare
|
|
from fixtures.utils import get_scale_for_db
|
|
|
|
|
|
@enum.unique
|
|
class PgBenchLoadType(enum.Enum):
|
|
INIT = "init"
|
|
SIMPLE_UPDATE = "simple_update"
|
|
SELECT_ONLY = "select-only"
|
|
|
|
|
|
def utc_now_timestamp() -> int:
|
|
return calendar.timegm(datetime.utcnow().utctimetuple())
|
|
|
|
|
|
def init_pgbench(env: PgCompare, cmdline, password: None):
|
|
environ: Dict[str, str] = {}
|
|
if password is not None:
|
|
environ["PGPASSWORD"] = password
|
|
|
|
# calculate timestamps and durations separately
|
|
# timestamp is intended to be used for linking to grafana and logs
|
|
# duration is actually a metric and uses float instead of int for timestamp
|
|
start_timestamp = utc_now_timestamp()
|
|
t0 = timeit.default_timer()
|
|
with env.record_pageserver_writes("init.pageserver_writes"):
|
|
out = env.pg_bin.run_capture(cmdline, env=environ)
|
|
env.flush()
|
|
|
|
duration = timeit.default_timer() - t0
|
|
end_timestamp = utc_now_timestamp()
|
|
|
|
stderr = Path(f"{out}.stderr").read_text()
|
|
|
|
res = PgBenchInitResult.parse_from_stderr(
|
|
stderr=stderr,
|
|
duration=duration,
|
|
start_timestamp=start_timestamp,
|
|
end_timestamp=end_timestamp,
|
|
)
|
|
env.zenbenchmark.record_pg_bench_init_result("init", res)
|
|
|
|
|
|
def run_pgbench(env: PgCompare, prefix: str, cmdline, password: None):
|
|
environ: Dict[str, str] = {}
|
|
if password is not None:
|
|
environ["PGPASSWORD"] = password
|
|
|
|
with env.record_pageserver_writes(f"{prefix}.pageserver_writes"):
|
|
run_start_timestamp = utc_now_timestamp()
|
|
t0 = timeit.default_timer()
|
|
out = env.pg_bin.run_capture(cmdline, env=environ)
|
|
run_duration = timeit.default_timer() - t0
|
|
run_end_timestamp = utc_now_timestamp()
|
|
env.flush()
|
|
|
|
stdout = Path(f"{out}.stdout").read_text()
|
|
|
|
res = PgBenchRunResult.parse_from_stdout(
|
|
stdout=stdout,
|
|
run_duration=run_duration,
|
|
run_start_timestamp=run_start_timestamp,
|
|
run_end_timestamp=run_end_timestamp,
|
|
)
|
|
env.zenbenchmark.record_pg_bench_result(prefix, res)
|
|
|
|
|
|
#
|
|
# Initialize a pgbench database, and run pgbench against it.
|
|
#
|
|
# This makes runs two different pgbench workloads against the same
|
|
# initialized database, and 'duration' is the time of each run. So
|
|
# the total runtime is 2 * duration, plus time needed to initialize
|
|
# the test database.
|
|
#
|
|
# Currently, the # of connections is hardcoded at 4
|
|
def run_test_pgbench(env: PgCompare, scale: int, duration: int, workload_type: PgBenchLoadType):
|
|
env.zenbenchmark.record("scale", scale, "", MetricReport.TEST_PARAM)
|
|
|
|
password = env.pg.default_options.get("password", None)
|
|
options = "-cstatement_timeout=0 " + env.pg.default_options.get("options", "")
|
|
# drop password from the connection string by passing password=None and set password separately
|
|
connstr = env.pg.connstr(password=None, options=options)
|
|
|
|
if workload_type == PgBenchLoadType.INIT:
|
|
# Run initialize
|
|
init_pgbench(env, ["pgbench", f"-s{scale}", "-i", connstr], password=password)
|
|
|
|
if workload_type == PgBenchLoadType.SIMPLE_UPDATE:
|
|
# Run simple-update workload
|
|
run_pgbench(
|
|
env,
|
|
"simple-update",
|
|
[
|
|
"pgbench",
|
|
"-N",
|
|
"-c4",
|
|
f"-T{duration}",
|
|
"-P2",
|
|
"--progress-timestamp",
|
|
connstr,
|
|
],
|
|
password=password,
|
|
)
|
|
|
|
if workload_type == PgBenchLoadType.SELECT_ONLY:
|
|
# Run SELECT workload
|
|
run_pgbench(
|
|
env,
|
|
"select-only",
|
|
[
|
|
"pgbench",
|
|
"-S",
|
|
"-c4",
|
|
f"-T{duration}",
|
|
"-P2",
|
|
"--progress-timestamp",
|
|
connstr,
|
|
],
|
|
password=password,
|
|
)
|
|
|
|
env.report_size()
|
|
|
|
|
|
def get_durations_matrix(default: int = 45) -> List[int]:
|
|
durations = os.getenv("TEST_PG_BENCH_DURATIONS_MATRIX", default=str(default))
|
|
rv = []
|
|
for d in durations.split(","):
|
|
d = d.strip().lower()
|
|
if d.endswith("h"):
|
|
duration = int(d.removesuffix("h")) * 60 * 60
|
|
elif d.endswith("m"):
|
|
duration = int(d.removesuffix("m")) * 60
|
|
else:
|
|
duration = int(d.removesuffix("s"))
|
|
rv.append(duration)
|
|
|
|
return rv
|
|
|
|
|
|
def get_scales_matrix(default: int = 10) -> List[int]:
|
|
scales = os.getenv("TEST_PG_BENCH_SCALES_MATRIX", default=str(default))
|
|
rv = []
|
|
for s in scales.split(","):
|
|
s = s.strip().lower()
|
|
if s.endswith("mb"):
|
|
scale = get_scale_for_db(int(s.removesuffix("mb")))
|
|
elif s.endswith("gb"):
|
|
scale = get_scale_for_db(int(s.removesuffix("gb")) * 1024)
|
|
else:
|
|
scale = int(s)
|
|
rv.append(scale)
|
|
|
|
return rv
|
|
|
|
|
|
# Run the pgbench tests against vanilla Postgres and neon
|
|
@pytest.mark.parametrize("scale", get_scales_matrix())
|
|
@pytest.mark.parametrize("duration", get_durations_matrix())
|
|
def test_pgbench(neon_with_baseline: PgCompare, scale: int, duration: int):
|
|
run_test_pgbench(neon_with_baseline, scale, duration, PgBenchLoadType.INIT)
|
|
run_test_pgbench(neon_with_baseline, scale, duration, PgBenchLoadType.SIMPLE_UPDATE)
|
|
run_test_pgbench(neon_with_baseline, scale, duration, PgBenchLoadType.SELECT_ONLY)
|
|
|
|
|
|
# Run the pgbench tests, and generate a flamegraph from it
|
|
# This requires that the pageserver was built with the 'profiling' feature.
|
|
#
|
|
# TODO: If the profiling is cheap enough, there's no need to run the same test
|
|
# twice, with and without profiling. But for now, run it separately, so that we
|
|
# can see how much overhead the profiling adds.
|
|
@pytest.mark.parametrize("scale", get_scales_matrix())
|
|
@pytest.mark.parametrize("duration", get_durations_matrix())
|
|
def test_pgbench_flamegraph(zenbenchmark, pg_bin, neon_env_builder, scale: int, duration: int):
|
|
neon_env_builder.pageserver_config_override = """
|
|
profiling="page_requests"
|
|
"""
|
|
env = neon_env_builder.init_start()
|
|
env.pageserver.is_profiling_enabled_or_skip()
|
|
env.neon_cli.create_branch("empty", "main")
|
|
|
|
neon_compare = NeonCompare(zenbenchmark, env, pg_bin, "pgbench")
|
|
run_test_pgbench(neon_compare, scale, duration, PgBenchLoadType.INIT)
|
|
run_test_pgbench(neon_compare, scale, duration, PgBenchLoadType.SIMPLE_UPDATE)
|
|
run_test_pgbench(neon_compare, scale, duration, PgBenchLoadType.SELECT_ONLY)
|
|
|
|
|
|
# The following 3 tests run on an existing database as it was set up by previous tests,
|
|
# and leaves the database in a state that would be used in the next tests.
|
|
# Modifying the definition order of these functions or adding other remote tests in between will alter results.
|
|
# See usage of --sparse-ordering flag in the pytest invocation in the CI workflow
|
|
#
|
|
# Run the pgbench tests against an existing Postgres cluster
|
|
@pytest.mark.parametrize("scale", get_scales_matrix())
|
|
@pytest.mark.parametrize("duration", get_durations_matrix())
|
|
@pytest.mark.remote_cluster
|
|
def test_pgbench_remote_init(remote_compare: PgCompare, scale: int, duration: int):
|
|
run_test_pgbench(remote_compare, scale, duration, PgBenchLoadType.INIT)
|
|
|
|
|
|
@pytest.mark.parametrize("scale", get_scales_matrix())
|
|
@pytest.mark.parametrize("duration", get_durations_matrix())
|
|
@pytest.mark.remote_cluster
|
|
def test_pgbench_remote_simple_update(remote_compare: PgCompare, scale: int, duration: int):
|
|
run_test_pgbench(remote_compare, scale, duration, PgBenchLoadType.SIMPLE_UPDATE)
|
|
|
|
|
|
@pytest.mark.parametrize("scale", get_scales_matrix())
|
|
@pytest.mark.parametrize("duration", get_durations_matrix())
|
|
@pytest.mark.remote_cluster
|
|
def test_pgbench_remote_select_only(remote_compare: PgCompare, scale: int, duration: int):
|
|
run_test_pgbench(remote_compare, scale, duration, PgBenchLoadType.SELECT_ONLY)
|