mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-19 06:00:38 +00:00
Merge branch 'main' into bojan-get-page-tests
This commit is contained in:
@@ -28,4 +28,4 @@ def test_createuser(zenith_simple_env: ZenithEnv):
|
||||
pg2 = env.postgres.create_start('test_createuser2')
|
||||
|
||||
# Test that you can connect to new branch as a new user
|
||||
assert pg2.safe_psql('select current_user', username='testuser') == [('testuser', )]
|
||||
assert pg2.safe_psql('select current_user', user='testuser') == [('testuser', )]
|
||||
|
||||
@@ -19,6 +19,11 @@ async def copy_test_data_to_table(pg: Postgres, worker_id: int, table_name: str)
|
||||
copy_input = repeat_bytes(buf.read(), 5000)
|
||||
|
||||
pg_conn = await pg.connect_async()
|
||||
|
||||
# PgProtocol.connect_async sets statement_timeout to 2 minutes.
|
||||
# That's not enough for this test, on a slow system in debug mode.
|
||||
await pg_conn.execute("SET statement_timeout='300s'")
|
||||
|
||||
await pg_conn.copy_to_table(table_name, source=copy_input)
|
||||
|
||||
|
||||
|
||||
@@ -1,14 +0,0 @@
|
||||
from fixtures.zenith_fixtures import ZenithEnv
|
||||
from fixtures.log_helper import log
|
||||
|
||||
|
||||
def test_pgbench(zenith_simple_env: ZenithEnv, pg_bin):
|
||||
env = zenith_simple_env
|
||||
env.zenith_cli.create_branch("test_pgbench", "empty")
|
||||
pg = env.postgres.create_start('test_pgbench')
|
||||
log.info("postgres is running on 'test_pgbench' branch")
|
||||
|
||||
connstr = pg.connstr()
|
||||
|
||||
pg_bin.run_capture(['pgbench', '-i', connstr])
|
||||
pg_bin.run_capture(['pgbench'] + '-c 10 -T 5 -P 1 -M prepared'.split() + [connstr])
|
||||
@@ -5,11 +5,14 @@ def test_proxy_select_1(static_proxy):
|
||||
static_proxy.safe_psql("select 1;")
|
||||
|
||||
|
||||
@pytest.mark.xfail # Proxy eats the extra connection options
|
||||
# Pass extra options to the server.
|
||||
#
|
||||
# Currently, proxy eats the extra connection options, so this fails.
|
||||
# See https://github.com/neondatabase/neon/issues/1287
|
||||
@pytest.mark.xfail
|
||||
def test_proxy_options(static_proxy):
|
||||
schema_name = "tmp_schema_1"
|
||||
with static_proxy.connect(schema=schema_name) as conn:
|
||||
with static_proxy.connect(options="-cproxytest.option=value") as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("SHOW search_path;")
|
||||
search_path = cur.fetchall()[0][0]
|
||||
assert schema_name == search_path
|
||||
cur.execute("SHOW proxytest.option;")
|
||||
value = cur.fetchall()[0][0]
|
||||
assert value == 'value'
|
||||
|
||||
@@ -379,7 +379,7 @@ class ProposerPostgres(PgProtocol):
|
||||
tenant_id: uuid.UUID,
|
||||
listen_addr: str,
|
||||
port: int):
|
||||
super().__init__(host=listen_addr, port=port, username='zenith_admin')
|
||||
super().__init__(host=listen_addr, port=port, user='zenith_admin', dbname='postgres')
|
||||
|
||||
self.pgdata_dir: str = pgdata_dir
|
||||
self.pg_bin: PgBin = pg_bin
|
||||
|
||||
@@ -35,9 +35,9 @@ def test_isolation(zenith_simple_env: ZenithEnv, test_output_dir, pg_bin, capsys
|
||||
]
|
||||
|
||||
env_vars = {
|
||||
'PGPORT': str(pg.port),
|
||||
'PGUSER': pg.username,
|
||||
'PGHOST': pg.host,
|
||||
'PGPORT': str(pg.default_options['port']),
|
||||
'PGUSER': pg.default_options['user'],
|
||||
'PGHOST': pg.default_options['host'],
|
||||
}
|
||||
|
||||
# Run the command.
|
||||
|
||||
@@ -35,9 +35,9 @@ def test_pg_regress(zenith_simple_env: ZenithEnv, test_output_dir: str, pg_bin,
|
||||
]
|
||||
|
||||
env_vars = {
|
||||
'PGPORT': str(pg.port),
|
||||
'PGUSER': pg.username,
|
||||
'PGHOST': pg.host,
|
||||
'PGPORT': str(pg.default_options['port']),
|
||||
'PGUSER': pg.default_options['user'],
|
||||
'PGHOST': pg.default_options['host'],
|
||||
}
|
||||
|
||||
# Run the command.
|
||||
|
||||
@@ -40,9 +40,9 @@ def test_zenith_regress(zenith_simple_env: ZenithEnv, test_output_dir, pg_bin, c
|
||||
|
||||
log.info(pg_regress_command)
|
||||
env_vars = {
|
||||
'PGPORT': str(pg.port),
|
||||
'PGUSER': pg.username,
|
||||
'PGHOST': pg.host,
|
||||
'PGPORT': str(pg.default_options['port']),
|
||||
'PGUSER': pg.default_options['user'],
|
||||
'PGHOST': pg.default_options['host'],
|
||||
}
|
||||
|
||||
# Run the command.
|
||||
|
||||
@@ -17,7 +17,7 @@ import warnings
|
||||
from contextlib import contextmanager
|
||||
|
||||
# Type-related stuff
|
||||
from typing import Iterator
|
||||
from typing import Iterator, Optional
|
||||
"""
|
||||
This file contains fixtures for micro-benchmarks.
|
||||
|
||||
@@ -51,17 +51,12 @@ in the test initialization, or measure disk usage after the test query.
|
||||
|
||||
@dataclasses.dataclass
|
||||
class PgBenchRunResult:
|
||||
scale: int
|
||||
number_of_clients: int
|
||||
number_of_threads: int
|
||||
number_of_transactions_actually_processed: int
|
||||
latency_average: float
|
||||
latency_stddev: float
|
||||
tps_including_connection_time: float
|
||||
tps_excluding_connection_time: float
|
||||
init_duration: float
|
||||
init_start_timestamp: int
|
||||
init_end_timestamp: int
|
||||
latency_stddev: Optional[float]
|
||||
tps: float
|
||||
run_duration: float
|
||||
run_start_timestamp: int
|
||||
run_end_timestamp: int
|
||||
@@ -69,56 +64,67 @@ class PgBenchRunResult:
|
||||
# TODO progress
|
||||
|
||||
@classmethod
|
||||
def parse_from_output(
|
||||
def parse_from_stdout(
|
||||
cls,
|
||||
out: 'subprocess.CompletedProcess[str]',
|
||||
init_duration: float,
|
||||
init_start_timestamp: int,
|
||||
init_end_timestamp: int,
|
||||
stdout: str,
|
||||
run_duration: float,
|
||||
run_start_timestamp: int,
|
||||
run_end_timestamp: int,
|
||||
):
|
||||
stdout_lines = out.stdout.splitlines()
|
||||
stdout_lines = stdout.splitlines()
|
||||
|
||||
latency_stddev = None
|
||||
|
||||
# we know significant parts of these values from test input
|
||||
# but to be precise take them from output
|
||||
# scaling factor: 5
|
||||
assert "scaling factor" in stdout_lines[1]
|
||||
scale = int(stdout_lines[1].split()[-1])
|
||||
# number of clients: 1
|
||||
assert "number of clients" in stdout_lines[3]
|
||||
number_of_clients = int(stdout_lines[3].split()[-1])
|
||||
# number of threads: 1
|
||||
assert "number of threads" in stdout_lines[4]
|
||||
number_of_threads = int(stdout_lines[4].split()[-1])
|
||||
# number of transactions actually processed: 1000/1000
|
||||
assert "number of transactions actually processed" in stdout_lines[6]
|
||||
number_of_transactions_actually_processed = int(stdout_lines[6].split("/")[1])
|
||||
# latency average = 19.894 ms
|
||||
assert "latency average" in stdout_lines[7]
|
||||
latency_average = stdout_lines[7].split()[-2]
|
||||
# latency stddev = 3.387 ms
|
||||
assert "latency stddev" in stdout_lines[8]
|
||||
latency_stddev = stdout_lines[8].split()[-2]
|
||||
# tps = 50.219689 (including connections establishing)
|
||||
assert "(including connections establishing)" in stdout_lines[9]
|
||||
tps_including_connection_time = stdout_lines[9].split()[2]
|
||||
# tps = 50.264435 (excluding connections establishing)
|
||||
assert "(excluding connections establishing)" in stdout_lines[10]
|
||||
tps_excluding_connection_time = stdout_lines[10].split()[2]
|
||||
for line in stdout.splitlines():
|
||||
# scaling factor: 5
|
||||
if line.startswith("scaling factor:"):
|
||||
scale = int(line.split()[-1])
|
||||
# number of clients: 1
|
||||
if line.startswith("number of clients: "):
|
||||
number_of_clients = int(line.split()[-1])
|
||||
# number of threads: 1
|
||||
if line.startswith("number of threads: "):
|
||||
number_of_threads = int(line.split()[-1])
|
||||
# number of transactions actually processed: 1000/1000
|
||||
# OR
|
||||
# number of transactions actually processed: 1000
|
||||
if line.startswith("number of transactions actually processed"):
|
||||
if "/" in line:
|
||||
number_of_transactions_actually_processed = int(line.split("/")[1])
|
||||
else:
|
||||
number_of_transactions_actually_processed = int(line.split()[-1])
|
||||
# latency average = 19.894 ms
|
||||
if line.startswith("latency average"):
|
||||
latency_average = float(line.split()[-2])
|
||||
# latency stddev = 3.387 ms
|
||||
# (only printed with some options)
|
||||
if line.startswith("latency stddev"):
|
||||
latency_stddev = float(line.split()[-2])
|
||||
|
||||
# Get the TPS without initial connection time. The format
|
||||
# of the tps lines changed in pgbench v14, but we accept
|
||||
# either format:
|
||||
#
|
||||
# pgbench v13 and below:
|
||||
# tps = 50.219689 (including connections establishing)
|
||||
# tps = 50.264435 (excluding connections establishing)
|
||||
#
|
||||
# pgbench v14:
|
||||
# initial connection time = 3.858 ms
|
||||
# tps = 309.281539 (without initial connection time)
|
||||
if (line.startswith("tps = ") and ("(excluding connections establishing)" in line
|
||||
or "(without initial connection time)")):
|
||||
tps = float(line.split()[2])
|
||||
|
||||
return cls(
|
||||
scale=scale,
|
||||
number_of_clients=number_of_clients,
|
||||
number_of_threads=number_of_threads,
|
||||
number_of_transactions_actually_processed=number_of_transactions_actually_processed,
|
||||
latency_average=float(latency_average),
|
||||
latency_stddev=float(latency_stddev),
|
||||
tps_including_connection_time=float(tps_including_connection_time),
|
||||
tps_excluding_connection_time=float(tps_excluding_connection_time),
|
||||
init_duration=init_duration,
|
||||
init_start_timestamp=init_start_timestamp,
|
||||
init_end_timestamp=init_end_timestamp,
|
||||
latency_average=latency_average,
|
||||
latency_stddev=latency_stddev,
|
||||
tps=tps,
|
||||
run_duration=run_duration,
|
||||
run_start_timestamp=run_start_timestamp,
|
||||
run_end_timestamp=run_end_timestamp,
|
||||
@@ -187,60 +193,41 @@ class ZenithBenchmarker:
|
||||
report=MetricReport.LOWER_IS_BETTER,
|
||||
)
|
||||
|
||||
def record_pg_bench_result(self, pg_bench_result: PgBenchRunResult):
|
||||
self.record("scale", pg_bench_result.scale, '', MetricReport.TEST_PARAM)
|
||||
self.record("number_of_clients",
|
||||
def record_pg_bench_result(self, prefix: str, pg_bench_result: PgBenchRunResult):
|
||||
self.record(f"{prefix}.number_of_clients",
|
||||
pg_bench_result.number_of_clients,
|
||||
'',
|
||||
MetricReport.TEST_PARAM)
|
||||
self.record("number_of_threads",
|
||||
self.record(f"{prefix}.number_of_threads",
|
||||
pg_bench_result.number_of_threads,
|
||||
'',
|
||||
MetricReport.TEST_PARAM)
|
||||
self.record(
|
||||
"number_of_transactions_actually_processed",
|
||||
f"{prefix}.number_of_transactions_actually_processed",
|
||||
pg_bench_result.number_of_transactions_actually_processed,
|
||||
'',
|
||||
# thats because this is predefined by test matrix and doesnt change across runs
|
||||
report=MetricReport.TEST_PARAM,
|
||||
)
|
||||
self.record("latency_average",
|
||||
self.record(f"{prefix}.latency_average",
|
||||
pg_bench_result.latency_average,
|
||||
unit="ms",
|
||||
report=MetricReport.LOWER_IS_BETTER)
|
||||
self.record("latency_stddev",
|
||||
pg_bench_result.latency_stddev,
|
||||
unit="ms",
|
||||
report=MetricReport.LOWER_IS_BETTER)
|
||||
self.record("tps_including_connection_time",
|
||||
pg_bench_result.tps_including_connection_time,
|
||||
'',
|
||||
report=MetricReport.HIGHER_IS_BETTER)
|
||||
self.record("tps_excluding_connection_time",
|
||||
pg_bench_result.tps_excluding_connection_time,
|
||||
'',
|
||||
report=MetricReport.HIGHER_IS_BETTER)
|
||||
self.record("init_duration",
|
||||
pg_bench_result.init_duration,
|
||||
unit="s",
|
||||
report=MetricReport.LOWER_IS_BETTER)
|
||||
self.record("init_start_timestamp",
|
||||
pg_bench_result.init_start_timestamp,
|
||||
'',
|
||||
MetricReport.TEST_PARAM)
|
||||
self.record("init_end_timestamp",
|
||||
pg_bench_result.init_end_timestamp,
|
||||
'',
|
||||
MetricReport.TEST_PARAM)
|
||||
self.record("run_duration",
|
||||
if pg_bench_result.latency_stddev is not None:
|
||||
self.record(f"{prefix}.latency_stddev",
|
||||
pg_bench_result.latency_stddev,
|
||||
unit="ms",
|
||||
report=MetricReport.LOWER_IS_BETTER)
|
||||
self.record(f"{prefix}.tps", pg_bench_result.tps, '', report=MetricReport.HIGHER_IS_BETTER)
|
||||
self.record(f"{prefix}.run_duration",
|
||||
pg_bench_result.run_duration,
|
||||
unit="s",
|
||||
report=MetricReport.LOWER_IS_BETTER)
|
||||
self.record("run_start_timestamp",
|
||||
self.record(f"{prefix}.run_start_timestamp",
|
||||
pg_bench_result.run_start_timestamp,
|
||||
'',
|
||||
MetricReport.TEST_PARAM)
|
||||
self.record("run_end_timestamp",
|
||||
self.record(f"{prefix}.run_end_timestamp",
|
||||
pg_bench_result.run_end_timestamp,
|
||||
'',
|
||||
MetricReport.TEST_PARAM)
|
||||
@@ -259,10 +246,18 @@ class ZenithBenchmarker:
|
||||
"""
|
||||
Fetch the "cumulative # of bytes written" metric from the pageserver
|
||||
"""
|
||||
# Fetch all the exposed prometheus metrics from page server
|
||||
all_metrics = pageserver.http_client().get_metrics()
|
||||
# Use a regular expression to extract the one we're interested in
|
||||
#
|
||||
metric_name = r'pageserver_disk_io_bytes{io_operation="write"}'
|
||||
return self.get_int_counter_value(pageserver, metric_name)
|
||||
|
||||
def get_peak_mem(self, pageserver) -> int:
|
||||
"""
|
||||
Fetch the "maxrss" metric from the pageserver
|
||||
"""
|
||||
metric_name = r'pageserver_maxrss_kb'
|
||||
return self.get_int_counter_value(pageserver, metric_name)
|
||||
|
||||
def get_int_counter_value(self, pageserver, metric_name) -> int:
|
||||
"""Fetch the value of given int counter from pageserver metrics."""
|
||||
# TODO: If we start to collect more of the prometheus metrics in the
|
||||
# performance test suite like this, we should refactor this to load and
|
||||
# parse all the metrics into a more convenient structure in one go.
|
||||
@@ -270,20 +265,8 @@ class ZenithBenchmarker:
|
||||
# The metric should be an integer, as it's a number of bytes. But in general
|
||||
# all prometheus metrics are floats. So to be pedantic, read it as a float
|
||||
# and round to integer.
|
||||
matches = re.search(r'^pageserver_disk_io_bytes{io_operation="write"} (\S+)$',
|
||||
all_metrics,
|
||||
re.MULTILINE)
|
||||
assert matches
|
||||
return int(round(float(matches.group(1))))
|
||||
|
||||
def get_peak_mem(self, pageserver) -> int:
|
||||
"""
|
||||
Fetch the "maxrss" metric from the pageserver
|
||||
"""
|
||||
# Fetch all the exposed prometheus metrics from page server
|
||||
all_metrics = pageserver.http_client().get_metrics()
|
||||
# See comment in get_io_writes()
|
||||
matches = re.search(r'^pageserver_maxrss_kb (\S+)$', all_metrics, re.MULTILINE)
|
||||
matches = re.search(fr'^{metric_name} (\S+)$', all_metrics, re.MULTILINE)
|
||||
assert matches
|
||||
return int(round(float(matches.group(1))))
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@ import pytest
|
||||
from contextlib import contextmanager
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
from fixtures.zenith_fixtures import PgBin, PgProtocol, VanillaPostgres, ZenithEnv
|
||||
from fixtures.zenith_fixtures import PgBin, PgProtocol, VanillaPostgres, RemotePostgres, ZenithEnv
|
||||
from fixtures.benchmark_fixture import MetricReport, ZenithBenchmarker
|
||||
|
||||
# Type-related stuff
|
||||
@@ -87,6 +87,9 @@ class ZenithCompare(PgCompare):
|
||||
def flush(self):
|
||||
self.pscur.execute(f"do_gc {self.env.initial_tenant.hex} {self.timeline} 0")
|
||||
|
||||
def compact(self):
|
||||
self.pscur.execute(f"compact {self.env.initial_tenant.hex} {self.timeline}")
|
||||
|
||||
def report_peak_memory_use(self) -> None:
|
||||
self.zenbenchmark.record("peak_mem",
|
||||
self.zenbenchmark.get_peak_mem(self.env.pageserver) / 1024,
|
||||
@@ -102,6 +105,19 @@ class ZenithCompare(PgCompare):
|
||||
'MB',
|
||||
report=MetricReport.LOWER_IS_BETTER)
|
||||
|
||||
total_files = self.zenbenchmark.get_int_counter_value(
|
||||
self.env.pageserver, "pageserver_num_persistent_files_created")
|
||||
total_bytes = self.zenbenchmark.get_int_counter_value(
|
||||
self.env.pageserver, "pageserver_persistent_bytes_written")
|
||||
self.zenbenchmark.record("data_uploaded",
|
||||
total_bytes / (1024 * 1024),
|
||||
"MB",
|
||||
report=MetricReport.LOWER_IS_BETTER)
|
||||
self.zenbenchmark.record("num_files_uploaded",
|
||||
total_files,
|
||||
"",
|
||||
report=MetricReport.LOWER_IS_BETTER)
|
||||
|
||||
def record_pageserver_writes(self, out_name):
|
||||
return self.zenbenchmark.record_pageserver_writes(self.env.pageserver, out_name)
|
||||
|
||||
@@ -159,6 +175,48 @@ class VanillaCompare(PgCompare):
|
||||
return self.zenbenchmark.record_duration(out_name)
|
||||
|
||||
|
||||
class RemoteCompare(PgCompare):
|
||||
"""PgCompare interface for a remote postgres instance."""
|
||||
def __init__(self, zenbenchmark, remote_pg: RemotePostgres):
|
||||
self._pg = remote_pg
|
||||
self._zenbenchmark = zenbenchmark
|
||||
|
||||
# Long-lived cursor, useful for flushing
|
||||
self.conn = self.pg.connect()
|
||||
self.cur = self.conn.cursor()
|
||||
|
||||
@property
|
||||
def pg(self):
|
||||
return self._pg
|
||||
|
||||
@property
|
||||
def zenbenchmark(self):
|
||||
return self._zenbenchmark
|
||||
|
||||
@property
|
||||
def pg_bin(self):
|
||||
return self._pg.pg_bin
|
||||
|
||||
def flush(self):
|
||||
# TODO: flush the remote pageserver
|
||||
pass
|
||||
|
||||
def report_peak_memory_use(self) -> None:
|
||||
# TODO: get memory usage from remote pageserver
|
||||
pass
|
||||
|
||||
def report_size(self) -> None:
|
||||
# TODO: get storage size from remote pageserver
|
||||
pass
|
||||
|
||||
@contextmanager
|
||||
def record_pageserver_writes(self, out_name):
|
||||
yield # Do nothing
|
||||
|
||||
def record_duration(self, out_name):
|
||||
return self.zenbenchmark.record_duration(out_name)
|
||||
|
||||
|
||||
@pytest.fixture(scope='function')
|
||||
def zenith_compare(request, zenbenchmark, pg_bin, zenith_simple_env) -> ZenithCompare:
|
||||
branch_name = request.node.name
|
||||
@@ -170,6 +228,11 @@ def vanilla_compare(zenbenchmark, vanilla_pg) -> VanillaCompare:
|
||||
return VanillaCompare(zenbenchmark, vanilla_pg)
|
||||
|
||||
|
||||
@pytest.fixture(scope='function')
|
||||
def remote_compare(zenbenchmark, remote_pg) -> RemoteCompare:
|
||||
return RemoteCompare(zenbenchmark, remote_pg)
|
||||
|
||||
|
||||
@pytest.fixture(params=["vanilla_compare", "zenith_compare"], ids=["vanilla", "zenith"])
|
||||
def zenith_with_baseline(request) -> PgCompare:
|
||||
"""Parameterized fixture that helps compare zenith against vanilla postgres.
|
||||
|
||||
@@ -27,6 +27,7 @@ from dataclasses import dataclass
|
||||
|
||||
# Type-related stuff
|
||||
from psycopg2.extensions import connection as PgConnection
|
||||
from psycopg2.extensions import make_dsn, parse_dsn
|
||||
from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, TypeVar, cast, Union, Tuple
|
||||
from typing_extensions import Literal
|
||||
|
||||
@@ -122,6 +123,22 @@ def pytest_configure(config):
|
||||
top_output_dir = os.path.join(base_dir, DEFAULT_OUTPUT_DIR)
|
||||
mkdir_if_needed(top_output_dir)
|
||||
|
||||
# Find the postgres installation.
|
||||
global pg_distrib_dir
|
||||
env_postgres_bin = os.environ.get('POSTGRES_DISTRIB_DIR')
|
||||
if env_postgres_bin:
|
||||
pg_distrib_dir = env_postgres_bin
|
||||
else:
|
||||
pg_distrib_dir = os.path.normpath(os.path.join(base_dir, DEFAULT_POSTGRES_DIR))
|
||||
log.info(f'pg_distrib_dir is {pg_distrib_dir}')
|
||||
if os.getenv("REMOTE_ENV"):
|
||||
# When testing against a remote server, we only need the client binary.
|
||||
if not os.path.exists(os.path.join(pg_distrib_dir, 'bin/psql')):
|
||||
raise Exception('psql not found at "{}"'.format(pg_distrib_dir))
|
||||
else:
|
||||
if not os.path.exists(os.path.join(pg_distrib_dir, 'bin/postgres')):
|
||||
raise Exception('postgres not found at "{}"'.format(pg_distrib_dir))
|
||||
|
||||
if os.getenv("REMOTE_ENV"):
|
||||
# we are in remote env and do not have zenith binaries locally
|
||||
# this is the case for benchmarks run on self-hosted runner
|
||||
@@ -137,17 +154,6 @@ def pytest_configure(config):
|
||||
if not os.path.exists(os.path.join(zenith_binpath, 'pageserver')):
|
||||
raise Exception('zenith binaries not found at "{}"'.format(zenith_binpath))
|
||||
|
||||
# Find the postgres installation.
|
||||
global pg_distrib_dir
|
||||
env_postgres_bin = os.environ.get('POSTGRES_DISTRIB_DIR')
|
||||
if env_postgres_bin:
|
||||
pg_distrib_dir = env_postgres_bin
|
||||
else:
|
||||
pg_distrib_dir = os.path.normpath(os.path.join(base_dir, DEFAULT_POSTGRES_DIR))
|
||||
log.info(f'pg_distrib_dir is {pg_distrib_dir}')
|
||||
if not os.path.exists(os.path.join(pg_distrib_dir, 'bin/postgres')):
|
||||
raise Exception('postgres not found at "{}"'.format(pg_distrib_dir))
|
||||
|
||||
|
||||
def zenfixture(func: Fn) -> Fn:
|
||||
"""
|
||||
@@ -238,98 +244,69 @@ def port_distributor(worker_base_port):
|
||||
|
||||
class PgProtocol:
|
||||
""" Reusable connection logic """
|
||||
def __init__(self,
|
||||
host: str,
|
||||
port: int,
|
||||
username: Optional[str] = None,
|
||||
password: Optional[str] = None,
|
||||
dbname: Optional[str] = None,
|
||||
schema: Optional[str] = None):
|
||||
self.host = host
|
||||
self.port = port
|
||||
self.username = username
|
||||
self.password = password
|
||||
self.dbname = dbname
|
||||
self.schema = schema
|
||||
def __init__(self, **kwargs):
|
||||
self.default_options = kwargs
|
||||
|
||||
def connstr(self,
|
||||
*,
|
||||
dbname: Optional[str] = None,
|
||||
schema: Optional[str] = None,
|
||||
username: Optional[str] = None,
|
||||
password: Optional[str] = None,
|
||||
statement_timeout_ms: Optional[int] = None) -> str:
|
||||
def connstr(self, **kwargs) -> str:
|
||||
"""
|
||||
Build a libpq connection string for the Postgres instance.
|
||||
"""
|
||||
return str(make_dsn(**self.conn_options(**kwargs)))
|
||||
|
||||
username = username or self.username
|
||||
password = password or self.password
|
||||
dbname = dbname or self.dbname or "postgres"
|
||||
schema = schema or self.schema
|
||||
res = f'host={self.host} port={self.port} dbname={dbname}'
|
||||
def conn_options(self, **kwargs):
|
||||
conn_options = self.default_options.copy()
|
||||
if 'dsn' in kwargs:
|
||||
conn_options.update(parse_dsn(kwargs['dsn']))
|
||||
conn_options.update(kwargs)
|
||||
|
||||
if username:
|
||||
res = f'{res} user={username}'
|
||||
|
||||
if password:
|
||||
res = f'{res} password={password}'
|
||||
|
||||
if schema:
|
||||
res = f"{res} options='-c search_path={schema}'"
|
||||
|
||||
if statement_timeout_ms:
|
||||
res = f"{res} options='-c statement_timeout={statement_timeout_ms}'"
|
||||
|
||||
return res
|
||||
# Individual statement timeout in seconds. 2 minutes should be
|
||||
# enough for our tests, but if you need a longer, you can
|
||||
# change it by calling "SET statement_timeout" after
|
||||
# connecting.
|
||||
if 'options' in conn_options:
|
||||
conn_options['options'] = f"-cstatement_timeout=120s " + conn_options['options']
|
||||
else:
|
||||
conn_options['options'] = "-cstatement_timeout=120s"
|
||||
return conn_options
|
||||
|
||||
# autocommit=True here by default because that's what we need most of the time
|
||||
def connect(
|
||||
self,
|
||||
*,
|
||||
autocommit=True,
|
||||
dbname: Optional[str] = None,
|
||||
schema: Optional[str] = None,
|
||||
username: Optional[str] = None,
|
||||
password: Optional[str] = None,
|
||||
# individual statement timeout in seconds, 2 minutes should be enough for our tests
|
||||
statement_timeout: Optional[int] = 120
|
||||
) -> PgConnection:
|
||||
def connect(self, autocommit=True, **kwargs) -> PgConnection:
|
||||
"""
|
||||
Connect to the node.
|
||||
Returns psycopg2's connection object.
|
||||
This method passes all extra params to connstr.
|
||||
"""
|
||||
conn = psycopg2.connect(**self.conn_options(**kwargs))
|
||||
|
||||
conn = psycopg2.connect(
|
||||
self.connstr(dbname=dbname,
|
||||
schema=schema,
|
||||
username=username,
|
||||
password=password,
|
||||
statement_timeout_ms=statement_timeout *
|
||||
1000 if statement_timeout else None))
|
||||
# WARNING: this setting affects *all* tests!
|
||||
conn.autocommit = autocommit
|
||||
return conn
|
||||
|
||||
async def connect_async(self,
|
||||
*,
|
||||
dbname: str = 'postgres',
|
||||
username: Optional[str] = None,
|
||||
password: Optional[str] = None) -> asyncpg.Connection:
|
||||
async def connect_async(self, **kwargs) -> asyncpg.Connection:
|
||||
"""
|
||||
Connect to the node from async python.
|
||||
Returns asyncpg's connection object.
|
||||
"""
|
||||
|
||||
conn = await asyncpg.connect(
|
||||
host=self.host,
|
||||
port=self.port,
|
||||
database=dbname,
|
||||
user=username or self.username,
|
||||
password=password,
|
||||
)
|
||||
return conn
|
||||
# asyncpg takes slightly different options than psycopg2. Try
|
||||
# to convert the defaults from the psycopg2 format.
|
||||
|
||||
# The psycopg2 option 'dbname' is called 'database' is asyncpg
|
||||
conn_options = self.conn_options(**kwargs)
|
||||
if 'dbname' in conn_options:
|
||||
conn_options['database'] = conn_options.pop('dbname')
|
||||
|
||||
# Convert options='-c<key>=<val>' to server_settings
|
||||
if 'options' in conn_options:
|
||||
options = conn_options.pop('options')
|
||||
for match in re.finditer('-c(\w*)=(\w*)', options):
|
||||
key = match.group(1)
|
||||
val = match.group(2)
|
||||
if 'server_options' in conn_options:
|
||||
conn_options['server_settings'].update({key: val})
|
||||
else:
|
||||
conn_options['server_settings'] = {key: val}
|
||||
return await asyncpg.connect(**conn_options)
|
||||
|
||||
def safe_psql(self, query: str, **kwargs: Any) -> List[Any]:
|
||||
"""
|
||||
@@ -1149,10 +1126,10 @@ class ZenithPageserver(PgProtocol):
|
||||
port: PageserverPort,
|
||||
remote_storage: Optional[RemoteStorage] = None,
|
||||
config_override: Optional[str] = None):
|
||||
super().__init__(host='localhost', port=port.pg, username='zenith_admin')
|
||||
super().__init__(host='localhost', port=port.pg, user='zenith_admin')
|
||||
self.env = env
|
||||
self.running = False
|
||||
self.service_port = port # do not shadow PgProtocol.port which is just int
|
||||
self.service_port = port
|
||||
self.remote_storage = remote_storage
|
||||
self.config_override = config_override
|
||||
|
||||
@@ -1314,7 +1291,7 @@ def psbench_bin(test_output_dir):
|
||||
|
||||
class VanillaPostgres(PgProtocol):
|
||||
def __init__(self, pgdatadir: str, pg_bin: PgBin, port: int):
|
||||
super().__init__(host='localhost', port=port)
|
||||
super().__init__(host='localhost', port=port, dbname='postgres')
|
||||
self.pgdatadir = pgdatadir
|
||||
self.pg_bin = pg_bin
|
||||
self.running = False
|
||||
@@ -1356,10 +1333,57 @@ def vanilla_pg(test_output_dir: str) -> Iterator[VanillaPostgres]:
|
||||
yield vanilla_pg
|
||||
|
||||
|
||||
class RemotePostgres(PgProtocol):
|
||||
def __init__(self, pg_bin: PgBin, remote_connstr: str):
|
||||
super().__init__(**parse_dsn(remote_connstr))
|
||||
self.pg_bin = pg_bin
|
||||
# The remote server is assumed to be running already
|
||||
self.running = True
|
||||
|
||||
def configure(self, options: List[str]):
|
||||
raise Exception('cannot change configuration of remote Posgres instance')
|
||||
|
||||
def start(self):
|
||||
raise Exception('cannot start a remote Postgres instance')
|
||||
|
||||
def stop(self):
|
||||
raise Exception('cannot stop a remote Postgres instance')
|
||||
|
||||
def get_subdir_size(self, subdir) -> int:
|
||||
# TODO: Could use the server's Generic File Acccess functions if superuser.
|
||||
# See https://www.postgresql.org/docs/14/functions-admin.html#FUNCTIONS-ADMIN-GENFILE
|
||||
raise Exception('cannot get size of a Postgres instance')
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc, tb):
|
||||
# do nothing
|
||||
pass
|
||||
|
||||
|
||||
@pytest.fixture(scope='function')
|
||||
def remote_pg(test_output_dir: str) -> Iterator[RemotePostgres]:
|
||||
pg_bin = PgBin(test_output_dir)
|
||||
|
||||
connstr = os.getenv("BENCHMARK_CONNSTR")
|
||||
if connstr is None:
|
||||
raise ValueError("no connstr provided, use BENCHMARK_CONNSTR environment variable")
|
||||
|
||||
with RemotePostgres(pg_bin, connstr) as remote_pg:
|
||||
yield remote_pg
|
||||
|
||||
|
||||
class ZenithProxy(PgProtocol):
|
||||
def __init__(self, port: int):
|
||||
super().__init__(host="127.0.0.1", username="pytest", password="pytest", port=port)
|
||||
super().__init__(host="127.0.0.1",
|
||||
user="pytest",
|
||||
password="pytest",
|
||||
port=port,
|
||||
dbname='postgres')
|
||||
self.http_port = 7001
|
||||
self.host = "127.0.0.1"
|
||||
self.port = port
|
||||
self._popen: Optional[subprocess.Popen[bytes]] = None
|
||||
|
||||
def start_static(self, addr="127.0.0.1:5432") -> None:
|
||||
@@ -1403,13 +1427,13 @@ def static_proxy(vanilla_pg) -> Iterator[ZenithProxy]:
|
||||
class Postgres(PgProtocol):
|
||||
""" An object representing a running postgres daemon. """
|
||||
def __init__(self, env: ZenithEnv, tenant_id: uuid.UUID, port: int):
|
||||
super().__init__(host='localhost', port=port, username='zenith_admin')
|
||||
|
||||
super().__init__(host='localhost', port=port, user='zenith_admin', dbname='postgres')
|
||||
self.env = env
|
||||
self.running = False
|
||||
self.node_name: Optional[str] = None # dubious, see asserts below
|
||||
self.pgdata_dir: Optional[str] = None # Path to computenode PGDATA
|
||||
self.tenant_id = tenant_id
|
||||
self.port = port
|
||||
# path to conf is <repo_dir>/pgdatadirs/tenants/<tenant_id>/<node_name>/postgresql.conf
|
||||
|
||||
def create(
|
||||
|
||||
@@ -2,29 +2,113 @@ from contextlib import closing
|
||||
from fixtures.zenith_fixtures import PgBin, VanillaPostgres, ZenithEnv
|
||||
from fixtures.compare_fixtures import PgCompare, VanillaCompare, ZenithCompare
|
||||
|
||||
from fixtures.benchmark_fixture import MetricReport, ZenithBenchmarker
|
||||
from fixtures.benchmark_fixture import PgBenchRunResult, MetricReport, ZenithBenchmarker
|
||||
from fixtures.log_helper import log
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from datetime import datetime
|
||||
import calendar
|
||||
import os
|
||||
import timeit
|
||||
|
||||
|
||||
def utc_now_timestamp() -> int:
|
||||
return calendar.timegm(datetime.utcnow().utctimetuple())
|
||||
|
||||
|
||||
def init_pgbench(env: PgCompare, cmdline):
|
||||
# calculate timestamps and durations separately
|
||||
# timestamp is intended to be used for linking to grafana and logs
|
||||
# duration is actually a metric and uses float instead of int for timestamp
|
||||
init_start_timestamp = utc_now_timestamp()
|
||||
t0 = timeit.default_timer()
|
||||
with env.record_pageserver_writes('init.pageserver_writes'):
|
||||
env.pg_bin.run_capture(cmdline)
|
||||
env.flush()
|
||||
init_duration = timeit.default_timer() - t0
|
||||
init_end_timestamp = utc_now_timestamp()
|
||||
|
||||
env.zenbenchmark.record("init.duration",
|
||||
init_duration,
|
||||
unit="s",
|
||||
report=MetricReport.LOWER_IS_BETTER)
|
||||
env.zenbenchmark.record("init.start_timestamp",
|
||||
init_start_timestamp,
|
||||
'',
|
||||
MetricReport.TEST_PARAM)
|
||||
env.zenbenchmark.record("init.end_timestamp", init_end_timestamp, '', MetricReport.TEST_PARAM)
|
||||
|
||||
|
||||
def run_pgbench(env: PgCompare, prefix: str, cmdline):
|
||||
with env.record_pageserver_writes(f'{prefix}.pageserver_writes'):
|
||||
run_start_timestamp = utc_now_timestamp()
|
||||
t0 = timeit.default_timer()
|
||||
out = env.pg_bin.run_capture(cmdline, )
|
||||
run_duration = timeit.default_timer() - t0
|
||||
run_end_timestamp = utc_now_timestamp()
|
||||
env.flush()
|
||||
|
||||
stdout = Path(f"{out}.stdout").read_text()
|
||||
|
||||
res = PgBenchRunResult.parse_from_stdout(
|
||||
stdout=stdout,
|
||||
run_duration=run_duration,
|
||||
run_start_timestamp=run_start_timestamp,
|
||||
run_end_timestamp=run_end_timestamp,
|
||||
)
|
||||
env.zenbenchmark.record_pg_bench_result(prefix, res)
|
||||
|
||||
|
||||
#
|
||||
# Run a very short pgbench test.
|
||||
# Initialize a pgbench database, and run pgbench against it.
|
||||
#
|
||||
# Collects three metrics:
|
||||
# This makes runs two different pgbench workloads against the same
|
||||
# initialized database, and 'duration' is the time of each run. So
|
||||
# the total runtime is 2 * duration, plus time needed to initialize
|
||||
# the test database.
|
||||
#
|
||||
# 1. Time to initialize the pgbench database (pgbench -s5 -i)
|
||||
# 2. Time to run 5000 pgbench transactions
|
||||
# 3. Disk space used
|
||||
#
|
||||
def test_pgbench(zenith_with_baseline: PgCompare):
|
||||
env = zenith_with_baseline
|
||||
# Currently, the # of connections is hardcoded at 4
|
||||
def run_test_pgbench(env: PgCompare, scale: int, duration: int):
|
||||
|
||||
with env.record_pageserver_writes('pageserver_writes'):
|
||||
with env.record_duration('init'):
|
||||
env.pg_bin.run_capture(['pgbench', '-s5', '-i', env.pg.connstr()])
|
||||
env.flush()
|
||||
# Record the scale and initialize
|
||||
env.zenbenchmark.record("scale", scale, '', MetricReport.TEST_PARAM)
|
||||
init_pgbench(env, ['pgbench', f'-s{scale}', '-i', env.pg.connstr()])
|
||||
|
||||
with env.record_duration('5000_xacts'):
|
||||
env.pg_bin.run_capture(['pgbench', '-c1', '-t5000', env.pg.connstr()])
|
||||
env.flush()
|
||||
# Run simple-update workload
|
||||
run_pgbench(env,
|
||||
"simple-update",
|
||||
['pgbench', '-n', '-c4', f'-T{duration}', '-P2', '-Mprepared', env.pg.connstr()])
|
||||
|
||||
# Run SELECT workload
|
||||
run_pgbench(env,
|
||||
"select-only",
|
||||
['pgbench', '-S', '-c4', f'-T{duration}', '-P2', '-Mprepared', env.pg.connstr()])
|
||||
|
||||
env.report_size()
|
||||
|
||||
|
||||
def get_durations_matrix():
|
||||
durations = os.getenv("TEST_PG_BENCH_DURATIONS_MATRIX", default="45")
|
||||
return list(map(int, durations.split(",")))
|
||||
|
||||
|
||||
def get_scales_matrix():
|
||||
scales = os.getenv("TEST_PG_BENCH_SCALES_MATRIX", default="10")
|
||||
return list(map(int, scales.split(",")))
|
||||
|
||||
|
||||
# Run the pgbench tests against vanilla Postgres and zenith
|
||||
@pytest.mark.parametrize("scale", get_scales_matrix())
|
||||
@pytest.mark.parametrize("duration", get_durations_matrix())
|
||||
def test_pgbench(zenith_with_baseline: PgCompare, scale: int, duration: int):
|
||||
run_test_pgbench(zenith_with_baseline, scale, duration)
|
||||
|
||||
|
||||
# Run the pgbench tests against an existing Postgres cluster
|
||||
@pytest.mark.parametrize("scale", get_scales_matrix())
|
||||
@pytest.mark.parametrize("duration", get_durations_matrix())
|
||||
@pytest.mark.remote_cluster
|
||||
def test_pgbench_remote(remote_compare: PgCompare, scale: int, duration: int):
|
||||
run_test_pgbench(remote_compare, scale, duration)
|
||||
|
||||
@@ -1,124 +0,0 @@
|
||||
import dataclasses
|
||||
import os
|
||||
import subprocess
|
||||
from typing import List
|
||||
from fixtures.benchmark_fixture import PgBenchRunResult, ZenithBenchmarker
|
||||
import pytest
|
||||
from datetime import datetime
|
||||
import calendar
|
||||
import timeit
|
||||
import os
|
||||
|
||||
|
||||
def utc_now_timestamp() -> int:
|
||||
return calendar.timegm(datetime.utcnow().utctimetuple())
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class PgBenchRunner:
|
||||
connstr: str
|
||||
scale: int
|
||||
transactions: int
|
||||
pgbench_bin_path: str = "pgbench"
|
||||
|
||||
def invoke(self, args: List[str]) -> 'subprocess.CompletedProcess[str]':
|
||||
res = subprocess.run([self.pgbench_bin_path, *args], text=True, capture_output=True)
|
||||
|
||||
if res.returncode != 0:
|
||||
raise RuntimeError(f"pgbench failed. stdout: {res.stdout} stderr: {res.stderr}")
|
||||
return res
|
||||
|
||||
def init(self, vacuum: bool = True) -> 'subprocess.CompletedProcess[str]':
|
||||
args = []
|
||||
if not vacuum:
|
||||
args.append("--no-vacuum")
|
||||
args.extend([f"--scale={self.scale}", "--initialize", self.connstr])
|
||||
return self.invoke(args)
|
||||
|
||||
def run(self, jobs: int = 1, clients: int = 1):
|
||||
return self.invoke([
|
||||
f"--transactions={self.transactions}",
|
||||
f"--jobs={jobs}",
|
||||
f"--client={clients}",
|
||||
"--progress=2", # print progress every two seconds
|
||||
self.connstr,
|
||||
])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def connstr():
|
||||
res = os.getenv("BENCHMARK_CONNSTR")
|
||||
if res is None:
|
||||
raise ValueError("no connstr provided, use BENCHMARK_CONNSTR environment variable")
|
||||
return res
|
||||
|
||||
|
||||
def get_transactions_matrix():
|
||||
transactions = os.getenv("TEST_PG_BENCH_TRANSACTIONS_MATRIX")
|
||||
if transactions is None:
|
||||
return [10**4, 10**5]
|
||||
return list(map(int, transactions.split(",")))
|
||||
|
||||
|
||||
def get_scales_matrix():
|
||||
scales = os.getenv("TEST_PG_BENCH_SCALES_MATRIX")
|
||||
if scales is None:
|
||||
return [10, 20]
|
||||
return list(map(int, scales.split(",")))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("scale", get_scales_matrix())
|
||||
@pytest.mark.parametrize("transactions", get_transactions_matrix())
|
||||
@pytest.mark.remote_cluster
|
||||
def test_pg_bench_remote_cluster(zenbenchmark: ZenithBenchmarker,
|
||||
connstr: str,
|
||||
scale: int,
|
||||
transactions: int):
|
||||
"""
|
||||
The best way is to run same pack of tests both, for local zenith
|
||||
and against staging, but currently local tests heavily depend on
|
||||
things available only locally e.g. zenith binaries, pageserver api, etc.
|
||||
Also separate test allows to run pgbench workload against vanilla postgres
|
||||
or other systems that support postgres protocol.
|
||||
|
||||
Also now this is more of a liveness test because it stresses pageserver internals,
|
||||
so we clearly see what goes wrong in more "real" environment.
|
||||
"""
|
||||
pg_bin = os.getenv("PG_BIN")
|
||||
if pg_bin is not None:
|
||||
pgbench_bin_path = os.path.join(pg_bin, "pgbench")
|
||||
else:
|
||||
pgbench_bin_path = "pgbench"
|
||||
|
||||
runner = PgBenchRunner(
|
||||
connstr=connstr,
|
||||
scale=scale,
|
||||
transactions=transactions,
|
||||
pgbench_bin_path=pgbench_bin_path,
|
||||
)
|
||||
# calculate timestamps and durations separately
|
||||
# timestamp is intended to be used for linking to grafana and logs
|
||||
# duration is actually a metric and uses float instead of int for timestamp
|
||||
init_start_timestamp = utc_now_timestamp()
|
||||
t0 = timeit.default_timer()
|
||||
runner.init()
|
||||
init_duration = timeit.default_timer() - t0
|
||||
init_end_timestamp = utc_now_timestamp()
|
||||
|
||||
run_start_timestamp = utc_now_timestamp()
|
||||
t0 = timeit.default_timer()
|
||||
out = runner.run() # TODO handle failures
|
||||
run_duration = timeit.default_timer() - t0
|
||||
run_end_timestamp = utc_now_timestamp()
|
||||
|
||||
res = PgBenchRunResult.parse_from_output(
|
||||
out=out,
|
||||
init_duration=init_duration,
|
||||
init_start_timestamp=init_start_timestamp,
|
||||
init_end_timestamp=init_end_timestamp,
|
||||
run_duration=run_duration,
|
||||
run_start_timestamp=run_start_timestamp,
|
||||
run_end_timestamp=run_end_timestamp,
|
||||
)
|
||||
|
||||
zenbenchmark.record_pg_bench_result(res)
|
||||
Reference in New Issue
Block a user