mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-05 20:42:54 +00:00
This depends on a hacked version of the 'pprof-rs' crate. Because of that, it's under an optional 'profiling' feature. It is disabled by default, but enabled for release builds in CircleCI config. It doesn't currently work on macOS. The flamegraph is written to 'flamegraph.svg' in the pageserver workdir when the 'pageserver' process exits. Add a performance test that runs the perf_pgbench test, with profiling enabled.
137 lines
5.0 KiB
Python
137 lines
5.0 KiB
Python
from contextlib import closing
|
|
from fixtures.zenith_fixtures import PgBin, VanillaPostgres, ZenithEnv, profiling_supported
|
|
from fixtures.compare_fixtures import PgCompare, VanillaCompare, ZenithCompare
|
|
|
|
from fixtures.benchmark_fixture import PgBenchRunResult, MetricReport, ZenithBenchmarker
|
|
from fixtures.log_helper import log
|
|
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
from datetime import datetime
|
|
import calendar
|
|
import os
|
|
import timeit
|
|
|
|
|
|
def utc_now_timestamp() -> int:
|
|
return calendar.timegm(datetime.utcnow().utctimetuple())
|
|
|
|
|
|
def init_pgbench(env: PgCompare, cmdline):
|
|
# calculate timestamps and durations separately
|
|
# timestamp is intended to be used for linking to grafana and logs
|
|
# duration is actually a metric and uses float instead of int for timestamp
|
|
init_start_timestamp = utc_now_timestamp()
|
|
t0 = timeit.default_timer()
|
|
with env.record_pageserver_writes('init.pageserver_writes'):
|
|
env.pg_bin.run_capture(cmdline)
|
|
env.flush()
|
|
init_duration = timeit.default_timer() - t0
|
|
init_end_timestamp = utc_now_timestamp()
|
|
|
|
env.zenbenchmark.record("init.duration",
|
|
init_duration,
|
|
unit="s",
|
|
report=MetricReport.LOWER_IS_BETTER)
|
|
env.zenbenchmark.record("init.start_timestamp",
|
|
init_start_timestamp,
|
|
'',
|
|
MetricReport.TEST_PARAM)
|
|
env.zenbenchmark.record("init.end_timestamp", init_end_timestamp, '', MetricReport.TEST_PARAM)
|
|
|
|
|
|
def run_pgbench(env: PgCompare, prefix: str, cmdline):
|
|
with env.record_pageserver_writes(f'{prefix}.pageserver_writes'):
|
|
run_start_timestamp = utc_now_timestamp()
|
|
t0 = timeit.default_timer()
|
|
out = env.pg_bin.run_capture(cmdline, )
|
|
run_duration = timeit.default_timer() - t0
|
|
run_end_timestamp = utc_now_timestamp()
|
|
env.flush()
|
|
|
|
stdout = Path(f"{out}.stdout").read_text()
|
|
|
|
res = PgBenchRunResult.parse_from_stdout(
|
|
stdout=stdout,
|
|
run_duration=run_duration,
|
|
run_start_timestamp=run_start_timestamp,
|
|
run_end_timestamp=run_end_timestamp,
|
|
)
|
|
env.zenbenchmark.record_pg_bench_result(prefix, res)
|
|
|
|
|
|
#
|
|
# Initialize a pgbench database, and run pgbench against it.
|
|
#
|
|
# This makes runs two different pgbench workloads against the same
|
|
# initialized database, and 'duration' is the time of each run. So
|
|
# the total runtime is 2 * duration, plus time needed to initialize
|
|
# the test database.
|
|
#
|
|
# Currently, the # of connections is hardcoded at 4
|
|
def run_test_pgbench(env: PgCompare, scale: int, duration: int):
|
|
|
|
# Record the scale and initialize
|
|
env.zenbenchmark.record("scale", scale, '', MetricReport.TEST_PARAM)
|
|
init_pgbench(env, ['pgbench', f'-s{scale}', '-i', env.pg.connstr()])
|
|
|
|
# Run simple-update workload
|
|
run_pgbench(env,
|
|
"simple-update",
|
|
['pgbench', '-n', '-c4', f'-T{duration}', '-P2', '-Mprepared', env.pg.connstr()])
|
|
|
|
# Run SELECT workload
|
|
run_pgbench(env,
|
|
"select-only",
|
|
['pgbench', '-S', '-c4', f'-T{duration}', '-P2', '-Mprepared', env.pg.connstr()])
|
|
|
|
env.report_size()
|
|
|
|
|
|
def get_durations_matrix():
|
|
durations = os.getenv("TEST_PG_BENCH_DURATIONS_MATRIX", default="45")
|
|
return list(map(int, durations.split(",")))
|
|
|
|
|
|
def get_scales_matrix():
|
|
scales = os.getenv("TEST_PG_BENCH_SCALES_MATRIX", default="10")
|
|
return list(map(int, scales.split(",")))
|
|
|
|
|
|
# Run the pgbench tests against vanilla Postgres and zenith
|
|
@pytest.mark.parametrize("scale", get_scales_matrix())
|
|
@pytest.mark.parametrize("duration", get_durations_matrix())
|
|
def test_pgbench(zenith_with_baseline: PgCompare, scale: int, duration: int):
|
|
run_test_pgbench(zenith_with_baseline, scale, duration)
|
|
|
|
|
|
# Run the pgbench tests, and generate a flamegraph from it
|
|
# This requires that the pageserver was built with the 'profiling' feature.
|
|
#
|
|
# TODO: If the profiling is cheap enough, there's no need to run the same test
|
|
# twice, with and without profiling. But for now, run it separately, so that we
|
|
# can see how much overhead the profiling adds.
|
|
@pytest.mark.parametrize("scale", get_scales_matrix())
|
|
@pytest.mark.parametrize("duration", get_durations_matrix())
|
|
def test_pgbench_flamegraph(zenbenchmark, pg_bin, zenith_env_builder, scale: int, duration: int):
|
|
zenith_env_builder.num_safekeepers = 1
|
|
zenith_env_builder.pageserver_config_override = '''
|
|
profiling="page_requests"
|
|
'''
|
|
if not profiling_supported():
|
|
pytest.skip("pageserver was built without 'profiling' feature")
|
|
|
|
env = zenith_env_builder.init_start()
|
|
env.zenith_cli.create_branch("empty", "main")
|
|
|
|
run_test_pgbench(ZenithCompare(zenbenchmark, env, pg_bin, "pgbench"), scale, duration)
|
|
|
|
|
|
# Run the pgbench tests against an existing Postgres cluster
|
|
@pytest.mark.parametrize("scale", get_scales_matrix())
|
|
@pytest.mark.parametrize("duration", get_durations_matrix())
|
|
@pytest.mark.remote_cluster
|
|
def test_pgbench_remote(remote_compare: PgCompare, scale: int, duration: int):
|
|
run_test_pgbench(remote_compare, scale, duration)
|