From ea13838be7173c091c3e2937d68b3fc9b4bf1c54 Mon Sep 17 00:00:00 2001 From: bojanserafimov Date: Thu, 10 Feb 2022 15:33:36 -0500 Subject: [PATCH] Add pgbench baseline test (#1204) Co-authored-by: Heikki Linnakangas --- test_runner/batch_others/test_wal_acceptor.py | 2 +- test_runner/fixtures/zenith_fixtures.py | 73 +++++++++++++++++-- test_runner/performance/test_perf_pgbench.py | 49 ++++++++++++- 3 files changed, 113 insertions(+), 11 deletions(-) diff --git a/test_runner/batch_others/test_wal_acceptor.py b/test_runner/batch_others/test_wal_acceptor.py index a020bf6339..d1edb2963d 100644 --- a/test_runner/batch_others/test_wal_acceptor.py +++ b/test_runner/batch_others/test_wal_acceptor.py @@ -325,7 +325,7 @@ class ProposerPostgres(PgProtocol): tenant_id: str, listen_addr: str, port: int): - super().__init__(host=listen_addr, port=port) + super().__init__(host=listen_addr, port=port, username='zenith_admin') self.pgdata_dir: str = pgdata_dir self.pg_bin: PgBin = pg_bin diff --git a/test_runner/fixtures/zenith_fixtures.py b/test_runner/fixtures/zenith_fixtures.py index c161c577d0..142045c04d 100644 --- a/test_runner/fixtures/zenith_fixtures.py +++ b/test_runner/fixtures/zenith_fixtures.py @@ -184,6 +184,16 @@ def worker_base_port(worker_seq_no: int): return BASE_PORT + worker_seq_no * WORKER_PORT_NUM +def get_dir_size(path: str) -> int: + """Return size in bytes.""" + totalbytes = 0 + for root, dirs, files in os.walk(path): + for name in files: + totalbytes += os.path.getsize(os.path.join(root, name)) + + return totalbytes + + def can_bind(host: str, port: int) -> bool: """ Check whether a host:port is available to bind for listening @@ -230,7 +240,7 @@ class PgProtocol: def __init__(self, host: str, port: int, username: Optional[str] = None): self.host = host self.port = port - self.username = username or "zenith_admin" + self.username = username def connstr(self, *, @@ -242,10 +252,15 @@ class PgProtocol: """ username = username or self.username - res = f'host={self.host} port={self.port} user={username} dbname={dbname}' - if not password: - return res - return f'{res} password={password}' + res = f'host={self.host} port={self.port} dbname={dbname}' + + if username: + res = f'{res} user={username}' + + if password: + res = f'{res} password={password}' + + return res # autocommit=True here by default because that's what we need most of the time def connect(self, @@ -835,7 +850,7 @@ class ZenithPageserver(PgProtocol): port: PageserverPort, remote_storage: Optional[RemoteStorage] = None, enable_auth=False): - super().__init__(host='localhost', port=port.pg) + super().__init__(host='localhost', port=port.pg, username='zenith_admin') self.env = env self.running = False self.service_port = port # do not shadow PgProtocol.port which is just int @@ -973,10 +988,54 @@ def pg_bin(test_output_dir: str) -> PgBin: return PgBin(test_output_dir) +class VanillaPostgres(PgProtocol): + def __init__(self, pgdatadir: str, pg_bin: PgBin, port: int): + super().__init__(host='localhost', port=port) + self.pgdatadir = pgdatadir + self.pg_bin = pg_bin + self.running = False + self.pg_bin.run_capture(['initdb', '-D', pgdatadir]) + + def configure(self, options: List[str]) -> None: + """Append lines into postgresql.conf file.""" + assert not self.running + with open(os.path.join(self.pgdatadir, 'postgresql.conf'), 'a') as conf_file: + conf_file.writelines(options) + + def start(self) -> None: + assert not self.running + self.running = True + self.pg_bin.run_capture(['pg_ctl', '-D', self.pgdatadir, 'start']) + + def stop(self) -> None: + assert self.running + self.running = False + self.pg_bin.run_capture(['pg_ctl', '-D', self.pgdatadir, 'stop']) + + def get_subdir_size(self, subdir) -> int: + """Return size of pgdatadir subdirectory in bytes.""" + return get_dir_size(os.path.join(self.pgdatadir, subdir)) + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + if self.running: + self.stop() + + +@pytest.fixture(scope='function') +def vanilla_pg(test_output_dir: str) -> Iterator[VanillaPostgres]: + pgdatadir = os.path.join(test_output_dir, "pgdata-vanilla") + pg_bin = PgBin(test_output_dir) + with VanillaPostgres(pgdatadir, pg_bin, 5432) as vanilla_pg: + yield vanilla_pg + + class Postgres(PgProtocol): """ An object representing a running postgres daemon. """ def __init__(self, env: ZenithEnv, tenant_id: str, port: int): - super().__init__(host='localhost', port=port) + super().__init__(host='localhost', port=port, username='zenith_admin') self.env = env self.running = False diff --git a/test_runner/performance/test_perf_pgbench.py b/test_runner/performance/test_perf_pgbench.py index 307dfb3559..724a822055 100644 --- a/test_runner/performance/test_perf_pgbench.py +++ b/test_runner/performance/test_perf_pgbench.py @@ -1,5 +1,5 @@ from contextlib import closing -from fixtures.zenith_fixtures import PgBin, ZenithEnv +from fixtures.zenith_fixtures import PgBin, VanillaPostgres, ZenithEnv from fixtures.benchmark_fixture import MetricReport, ZenithBenchmarker from fixtures.log_helper import log @@ -7,6 +7,14 @@ from fixtures.log_helper import log pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture") +def pgbench_init(pg_bin: PgBin, connstr: str): + pg_bin.run_capture(['pgbench', '-s5', '-i', connstr]) + + +def pgbench_run_5000_transactions(pg_bin: PgBin, connstr: str): + pg_bin.run_capture(['pgbench', '-c1', '-t5000', connstr]) + + # # Run a very short pgbench test. # @@ -40,7 +48,7 @@ def test_pgbench(zenith_simple_env: ZenithEnv, pg_bin: PgBin, zenbenchmark: Zeni # Initialize pgbench database, recording the time and I/O it takes with zenbenchmark.record_pageserver_writes(env.pageserver, 'pageserver_writes'): with zenbenchmark.record_duration('init'): - pg_bin.run_capture(['pgbench', '-s5', '-i', connstr]) + pgbench_init(pg_bin, connstr) # Flush the layers from memory to disk. This is included in the reported # time and I/O @@ -48,7 +56,7 @@ def test_pgbench(zenith_simple_env: ZenithEnv, pg_bin: PgBin, zenbenchmark: Zeni # Run pgbench for 5000 transactions with zenbenchmark.record_duration('5000_xacts'): - pg_bin.run_capture(['pgbench', '-c1', '-t5000', connstr]) + pgbench_run_5000_transactions(pg_bin, connstr) # Flush the layers to disk again. This is *not' included in the reported time, # though. @@ -60,3 +68,38 @@ def test_pgbench(zenith_simple_env: ZenithEnv, pg_bin: PgBin, zenbenchmark: Zeni timeline_size / (1024 * 1024), 'MB', report=MetricReport.LOWER_IS_BETTER) + + +def test_pgbench_baseline(vanilla_pg: VanillaPostgres, zenbenchmark: ZenithBenchmarker): + vanilla_pg.configure(['shared_buffers=1MB']) + vanilla_pg.start() + + pg_bin = vanilla_pg.pg_bin + connstr = vanilla_pg.connstr() + conn = vanilla_pg.connect() + cur = conn.cursor() + + with zenbenchmark.record_duration('init'): + pgbench_init(pg_bin, connstr) + + # This is roughly equivalent to flushing the layers from memory to disk with Zenith. + cur.execute(f"checkpoint") + + # Run pgbench for 5000 transactions + with zenbenchmark.record_duration('5000_xacts'): + pgbench_run_5000_transactions(pg_bin, connstr) + + # This is roughly equivalent to flush the layers from memory to disk with Zenith. + cur.execute(f"checkpoint") + + # Report disk space used by the repository + data_size = vanilla_pg.get_subdir_size('base') + zenbenchmark.record('data_size', + data_size / (1024 * 1024), + 'MB', + report=MetricReport.LOWER_IS_BETTER) + wal_size = vanilla_pg.get_subdir_size('pg_wal') + zenbenchmark.record('wal_size', + wal_size / (1024 * 1024), + 'MB', + report=MetricReport.LOWER_IS_BETTER)