Reformat all python files by black & isort

This commit is contained in:
Alexander Bayandin
2022-08-18 13:37:28 +01:00
committed by Alexander Bayandin
parent 6b2e1d9065
commit 4c2bb43775
84 changed files with 3282 additions and 2687 deletions

View File

@@ -9,13 +9,6 @@
# * https://github.com/taiki-e/cargo-llvm-cov
# * https://github.com/llvm/llvm-project/tree/main/llvm/test/tools/llvm-cov
from abc import ABC, abstractmethod
from dataclasses import dataclass
from pathlib import Path
from tempfile import TemporaryDirectory
from textwrap import dedent
from typing import Any, Dict, Iterator, Iterable, List, Optional
import argparse
import hashlib
import json
@@ -24,6 +17,12 @@ import shutil
import socket
import subprocess
import sys
from abc import ABC, abstractmethod
from dataclasses import dataclass
from pathlib import Path
from tempfile import TemporaryDirectory
from textwrap import dedent
from typing import Any, Dict, Iterable, Iterator, List, Optional
def file_mtime_or_zero(path: Path) -> int:

View File

@@ -20,20 +20,21 @@
# For more context on how to use this, see:
# https://github.com/neondatabase/cloud/wiki/Storage-format-migration
import os
from os import path
import shutil
from pathlib import Path
import tempfile
from contextlib import closing
import psycopg2
import subprocess
import argparse
import os
import shutil
import subprocess
import tempfile
import time
import requests
import uuid
from contextlib import closing
from os import path
from pathlib import Path
from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple, TypeVar, Union, cast
import psycopg2
import requests
from psycopg2.extensions import connection as PgConnection
from typing import Any, Callable, Dict, Iterator, List, Optional, TypeVar, cast, Union, Tuple
###############################################
### client-side utils copied from test fixtures
@@ -45,7 +46,7 @@ _global_counter = 0
def global_counter() -> int:
""" A really dumb global counter.
"""A really dumb global counter.
This is useful for giving output files a unique number, so if we run the
same command multiple times we can keep their output separate.
"""
@@ -55,7 +56,7 @@ def global_counter() -> int:
def subprocess_capture(capture_dir: str, cmd: List[str], **kwargs: Any) -> str:
""" Run a process and capture its output
"""Run a process and capture its output
Output will go to files named "cmd_NNN.stdout" and "cmd_NNN.stderr"
where "cmd" is the name of the program and NNN is an incrementing
counter.
@@ -63,13 +64,13 @@ def subprocess_capture(capture_dir: str, cmd: List[str], **kwargs: Any) -> str:
Returns basepath for files with captured output.
"""
assert type(cmd) is list
base = os.path.basename(cmd[0]) + '_{}'.format(global_counter())
base = os.path.basename(cmd[0]) + "_{}".format(global_counter())
basepath = os.path.join(capture_dir, base)
stdout_filename = basepath + '.stdout'
stderr_filename = basepath + '.stderr'
stdout_filename = basepath + ".stdout"
stderr_filename = basepath + ".stderr"
with open(stdout_filename, 'w') as stdout_f:
with open(stderr_filename, 'w') as stderr_f:
with open(stdout_filename, "w") as stdout_f:
with open(stderr_filename, "w") as stderr_f:
print('(capturing output to "{}.stdout")'.format(base))
subprocess.run(cmd, **kwargs, stdout=stdout_f, stderr=stderr_f)
@@ -77,15 +78,16 @@ def subprocess_capture(capture_dir: str, cmd: List[str], **kwargs: Any) -> str:
class PgBin:
""" A helper class for executing postgres binaries """
"""A helper class for executing postgres binaries"""
def __init__(self, log_dir: Path, pg_distrib_dir):
self.log_dir = log_dir
self.pg_bin_path = os.path.join(str(pg_distrib_dir), 'bin')
self.pg_bin_path = os.path.join(str(pg_distrib_dir), "bin")
self.env = os.environ.copy()
self.env['LD_LIBRARY_PATH'] = os.path.join(str(pg_distrib_dir), 'lib')
self.env["LD_LIBRARY_PATH"] = os.path.join(str(pg_distrib_dir), "lib")
def _fixpath(self, command: List[str]):
if '/' not in command[0]:
if "/" not in command[0]:
command[0] = os.path.join(self.pg_bin_path, command[0])
def _build_env(self, env_add: Optional[Env]) -> Env:
@@ -106,15 +108,17 @@ class PgBin:
"""
self._fixpath(command)
print('Running command "{}"'.format(' '.join(command)))
print('Running command "{}"'.format(" ".join(command)))
env = self._build_env(env)
subprocess.run(command, env=env, cwd=cwd, check=True)
def run_capture(self,
command: List[str],
env: Optional[Env] = None,
cwd: Optional[str] = None,
**kwargs: Any) -> str:
def run_capture(
self,
command: List[str],
env: Optional[Env] = None,
cwd: Optional[str] = None,
**kwargs: Any,
) -> str:
"""
Run one of the postgres binaries, with stderr and stdout redirected to a file.
This is just like `run`, but for chatty programs. Returns basepath for files
@@ -122,35 +126,33 @@ class PgBin:
"""
self._fixpath(command)
print('Running command "{}"'.format(' '.join(command)))
print('Running command "{}"'.format(" ".join(command)))
env = self._build_env(env)
return subprocess_capture(str(self.log_dir),
command,
env=env,
cwd=cwd,
check=True,
**kwargs)
return subprocess_capture(
str(self.log_dir), command, env=env, cwd=cwd, check=True, **kwargs
)
class PgProtocol:
""" Reusable connection logic """
"""Reusable connection logic"""
def __init__(self, **kwargs):
self.default_options = kwargs
def conn_options(self, **kwargs):
conn_options = self.default_options.copy()
if 'dsn' in kwargs:
conn_options.update(parse_dsn(kwargs['dsn']))
if "dsn" in kwargs:
conn_options.update(parse_dsn(kwargs["dsn"]))
conn_options.update(kwargs)
# Individual statement timeout in seconds. 2 minutes should be
# enough for our tests, but if you need a longer, you can
# change it by calling "SET statement_timeout" after
# connecting.
if 'options' in conn_options:
conn_options['options'] = f"-cstatement_timeout=120s " + conn_options['options']
if "options" in conn_options:
conn_options["options"] = f"-cstatement_timeout=120s " + conn_options["options"]
else:
conn_options['options'] = "-cstatement_timeout=120s"
conn_options["options"] = "-cstatement_timeout=120s"
return conn_options
# autocommit=True here by default because that's what we need most of the time
@@ -194,18 +196,18 @@ class PgProtocol:
class VanillaPostgres(PgProtocol):
def __init__(self, pgdatadir: Path, pg_bin: PgBin, port: int, init=True):
super().__init__(host='localhost', port=port, dbname='postgres')
super().__init__(host="localhost", port=port, dbname="postgres")
self.pgdatadir = pgdatadir
self.pg_bin = pg_bin
self.running = False
if init:
self.pg_bin.run_capture(['initdb', '-D', str(pgdatadir)])
self.pg_bin.run_capture(["initdb", "-D", str(pgdatadir)])
self.configure([f"port = {port}\n"])
def configure(self, options: List[str]):
"""Append lines into postgresql.conf file."""
assert not self.running
with open(os.path.join(self.pgdatadir, 'postgresql.conf'), 'a') as conf_file:
with open(os.path.join(self.pgdatadir, "postgresql.conf"), "a") as conf_file:
conf_file.write("\n".join(options))
def start(self, log_path: Optional[str] = None):
@@ -216,12 +218,13 @@ class VanillaPostgres(PgProtocol):
log_path = os.path.join(self.pgdatadir, "pg.log")
self.pg_bin.run_capture(
['pg_ctl', '-w', '-D', str(self.pgdatadir), '-l', log_path, 'start'])
["pg_ctl", "-w", "-D", str(self.pgdatadir), "-l", log_path, "start"]
)
def stop(self):
assert self.running
self.running = False
self.pg_bin.run_capture(['pg_ctl', '-w', '-D', str(self.pgdatadir), 'stop'])
self.pg_bin.run_capture(["pg_ctl", "-w", "-D", str(self.pgdatadir), "stop"])
def __enter__(self):
return self
@@ -246,9 +249,9 @@ class NeonPageserverHttpClient(requests.Session):
res.raise_for_status()
except requests.RequestException as e:
try:
msg = res.json()['msg']
msg = res.json()["msg"]
except:
msg = ''
msg = ""
raise NeonPageserverApiException(msg) from e
def check_status(self):
@@ -265,17 +268,17 @@ class NeonPageserverHttpClient(requests.Session):
res = self.post(
f"http://{self.host}:{self.port}/v1/tenant",
json={
'new_tenant_id': new_tenant_id.hex,
"new_tenant_id": new_tenant_id.hex,
},
)
if res.status_code == 409:
if ok_if_exists:
print(f'could not create tenant: already exists for id {new_tenant_id}')
print(f"could not create tenant: already exists for id {new_tenant_id}")
else:
res.raise_for_status()
elif res.status_code == 201:
print(f'created tenant {new_tenant_id}')
print(f"created tenant {new_tenant_id}")
else:
self.verbose_error(res)
@@ -299,47 +302,55 @@ class NeonPageserverHttpClient(requests.Session):
def lsn_to_hex(num: int) -> str:
""" Convert lsn from int to standard hex notation. """
return "{:X}/{:X}".format(num >> 32, num & 0xffffffff)
"""Convert lsn from int to standard hex notation."""
return "{:X}/{:X}".format(num >> 32, num & 0xFFFFFFFF)
def lsn_from_hex(lsn_hex: str) -> int:
""" Convert lsn from hex notation to int. """
l, r = lsn_hex.split('/')
"""Convert lsn from hex notation to int."""
l, r = lsn_hex.split("/")
return (int(l, 16) << 32) + int(r, 16)
def remote_consistent_lsn(pageserver_http_client: NeonPageserverHttpClient,
tenant: uuid.UUID,
timeline: uuid.UUID) -> int:
def remote_consistent_lsn(
pageserver_http_client: NeonPageserverHttpClient, tenant: uuid.UUID, timeline: uuid.UUID
) -> int:
detail = pageserver_http_client.timeline_detail(tenant, timeline)
if detail['remote'] is None:
if detail["remote"] is None:
# No remote information at all. This happens right after creating
# a timeline, before any part of it has been uploaded to remote
# storage yet.
return 0
else:
lsn_str = detail['remote']['remote_consistent_lsn']
lsn_str = detail["remote"]["remote_consistent_lsn"]
assert isinstance(lsn_str, str)
return lsn_from_hex(lsn_str)
def wait_for_upload(pageserver_http_client: NeonPageserverHttpClient,
tenant: uuid.UUID,
timeline: uuid.UUID,
lsn: int):
def wait_for_upload(
pageserver_http_client: NeonPageserverHttpClient,
tenant: uuid.UUID,
timeline: uuid.UUID,
lsn: int,
):
"""waits for local timeline upload up to specified lsn"""
for i in range(10):
current_lsn = remote_consistent_lsn(pageserver_http_client, tenant, timeline)
if current_lsn >= lsn:
return
print("waiting for remote_consistent_lsn to reach {}, now {}, iteration {}".format(
lsn_to_hex(lsn), lsn_to_hex(current_lsn), i + 1))
print(
"waiting for remote_consistent_lsn to reach {}, now {}, iteration {}".format(
lsn_to_hex(lsn), lsn_to_hex(current_lsn), i + 1
)
)
time.sleep(1)
raise Exception("timed out while waiting for remote_consistent_lsn to reach {}, was {}".format(
lsn_to_hex(lsn), lsn_to_hex(current_lsn)))
raise Exception(
"timed out while waiting for remote_consistent_lsn to reach {}, was {}".format(
lsn_to_hex(lsn), lsn_to_hex(current_lsn)
)
)
##############
@@ -399,7 +410,7 @@ def reconstruct_paths(log_dir, pg_bin, base_tar):
# Add all template0copy paths to template0
prefix = f"base/{oid}/"
if filepath.startswith(prefix):
suffix = filepath[len(prefix):]
suffix = filepath[len(prefix) :]
yield f"base/{template0_oid}/{suffix}"
elif filepath.startswith("global"):
print(f"skipping {database} global file {filepath}")
@@ -451,15 +462,17 @@ def get_rlsn(pageserver_connstr, tenant_id, timeline_id):
return last_lsn, prev_lsn
def import_timeline(args,
psql_path,
pageserver_connstr,
pageserver_http,
tenant_id,
timeline_id,
last_lsn,
prev_lsn,
tar_filename):
def import_timeline(
args,
psql_path,
pageserver_connstr,
pageserver_http,
tenant_id,
timeline_id,
last_lsn,
prev_lsn,
tar_filename,
):
# Import timelines to new pageserver
import_cmd = f"import basebackup {tenant_id} {timeline_id} {last_lsn} {last_lsn}"
full_cmd = rf"""cat {tar_filename} | {psql_path} {pageserver_connstr} -c '{import_cmd}' """
@@ -469,34 +482,30 @@ def import_timeline(args,
print(f"Running: {full_cmd}")
with open(stdout_filename, 'w') as stdout_f:
with open(stderr_filename2, 'w') as stderr_f:
with open(stdout_filename, "w") as stdout_f:
with open(stderr_filename2, "w") as stderr_f:
print(f"(capturing output to {stdout_filename})")
pg_bin = PgBin(args.work_dir, args.pg_distrib_dir)
subprocess.run(full_cmd,
stdout=stdout_f,
stderr=stderr_f,
env=pg_bin._build_env(None),
shell=True,
check=True)
subprocess.run(
full_cmd,
stdout=stdout_f,
stderr=stderr_f,
env=pg_bin._build_env(None),
shell=True,
check=True,
)
print(f"Done import")
# Wait until pageserver persists the files
wait_for_upload(pageserver_http,
uuid.UUID(tenant_id),
uuid.UUID(timeline_id),
lsn_from_hex(last_lsn))
wait_for_upload(
pageserver_http, uuid.UUID(tenant_id), uuid.UUID(timeline_id), lsn_from_hex(last_lsn)
)
def export_timeline(args,
psql_path,
pageserver_connstr,
tenant_id,
timeline_id,
last_lsn,
prev_lsn,
tar_filename):
def export_timeline(
args, psql_path, pageserver_connstr, tenant_id, timeline_id, last_lsn, prev_lsn, tar_filename
):
# Choose filenames
incomplete_filename = tar_filename + ".incomplete"
stderr_filename = path.join(args.work_dir, f"{tenant_id}_{timeline_id}.stderr")
@@ -507,15 +516,13 @@ def export_timeline(args,
# Run export command
print(f"Running: {cmd}")
with open(incomplete_filename, 'w') as stdout_f:
with open(stderr_filename, 'w') as stderr_f:
with open(incomplete_filename, "w") as stdout_f:
with open(stderr_filename, "w") as stderr_f:
print(f"(capturing output to {incomplete_filename})")
pg_bin = PgBin(args.work_dir, args.pg_distrib_dir)
subprocess.run(cmd,
stdout=stdout_f,
stderr=stderr_f,
env=pg_bin._build_env(None),
check=True)
subprocess.run(
cmd, stdout=stdout_f, stderr=stderr_f, env=pg_bin._build_env(None), check=True
)
# Add missing rels
pg_bin = PgBin(args.work_dir, args.pg_distrib_dir)
@@ -551,27 +558,28 @@ def main(args: argparse.Namespace):
for timeline in timelines:
# Skip timelines we don't need to export
if args.timelines and timeline['timeline_id'] not in args.timelines:
if args.timelines and timeline["timeline_id"] not in args.timelines:
print(f"Skipping timeline {timeline['timeline_id']}")
continue
# Choose filenames
tar_filename = path.join(args.work_dir,
f"{timeline['tenant_id']}_{timeline['timeline_id']}.tar")
tar_filename = path.join(
args.work_dir, f"{timeline['tenant_id']}_{timeline['timeline_id']}.tar"
)
# Export timeline from old pageserver
if args.only_import is False:
last_lsn, prev_lsn = get_rlsn(
old_pageserver_connstr,
timeline['tenant_id'],
timeline['timeline_id'],
timeline["tenant_id"],
timeline["timeline_id"],
)
export_timeline(
args,
psql_path,
old_pageserver_connstr,
timeline['tenant_id'],
timeline['timeline_id'],
timeline["tenant_id"],
timeline["timeline_id"],
last_lsn,
prev_lsn,
tar_filename,
@@ -583,8 +591,8 @@ def main(args: argparse.Namespace):
psql_path,
new_pageserver_connstr,
new_http_client,
timeline['tenant_id'],
timeline['timeline_id'],
timeline["tenant_id"],
timeline["timeline_id"],
last_lsn,
prev_lsn,
tar_filename,
@@ -592,117 +600,118 @@ def main(args: argparse.Namespace):
# Re-export and compare
re_export_filename = tar_filename + ".reexport"
export_timeline(args,
psql_path,
new_pageserver_connstr,
timeline['tenant_id'],
timeline['timeline_id'],
last_lsn,
prev_lsn,
re_export_filename)
export_timeline(
args,
psql_path,
new_pageserver_connstr,
timeline["tenant_id"],
timeline["timeline_id"],
last_lsn,
prev_lsn,
re_export_filename,
)
# Check the size is the same
old_size = os.path.getsize(tar_filename),
new_size = os.path.getsize(re_export_filename),
old_size = (os.path.getsize(tar_filename),)
new_size = (os.path.getsize(re_export_filename),)
if old_size != new_size:
raise AssertionError(f"Sizes don't match old: {old_size} new: {new_size}")
if __name__ == '__main__':
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
'--tenant-id',
dest='tenants',
"--tenant-id",
dest="tenants",
required=True,
nargs='+',
help='Id of the tenant to migrate. You can pass multiple arguments',
nargs="+",
help="Id of the tenant to migrate. You can pass multiple arguments",
)
parser.add_argument(
'--timeline-id',
dest='timelines',
"--timeline-id",
dest="timelines",
required=False,
nargs='+',
help='Id of the timeline to migrate. You can pass multiple arguments',
nargs="+",
help="Id of the timeline to migrate. You can pass multiple arguments",
)
parser.add_argument(
'--from-host',
dest='old_pageserver_host',
"--from-host",
dest="old_pageserver_host",
required=True,
help='Host of the pageserver to migrate data from',
help="Host of the pageserver to migrate data from",
)
parser.add_argument(
'--from-http-port',
dest='old_pageserver_http_port',
"--from-http-port",
dest="old_pageserver_http_port",
required=False,
type=int,
default=9898,
help='HTTP port of the pageserver to migrate data from. Default: 9898',
help="HTTP port of the pageserver to migrate data from. Default: 9898",
)
parser.add_argument(
'--from-pg-port',
dest='old_pageserver_pg_port',
"--from-pg-port",
dest="old_pageserver_pg_port",
required=False,
type=int,
default=6400,
help='pg port of the pageserver to migrate data from. Default: 6400',
help="pg port of the pageserver to migrate data from. Default: 6400",
)
parser.add_argument(
'--to-host',
dest='new_pageserver_host',
"--to-host",
dest="new_pageserver_host",
required=True,
help='Host of the pageserver to migrate data to',
help="Host of the pageserver to migrate data to",
)
parser.add_argument(
'--to-http-port',
dest='new_pageserver_http_port',
"--to-http-port",
dest="new_pageserver_http_port",
required=False,
default=9898,
type=int,
help='HTTP port of the pageserver to migrate data to. Default: 9898',
help="HTTP port of the pageserver to migrate data to. Default: 9898",
)
parser.add_argument(
'--to-pg-port',
dest='new_pageserver_pg_port',
"--to-pg-port",
dest="new_pageserver_pg_port",
required=False,
default=6400,
type=int,
help='pg port of the pageserver to migrate data to. Default: 6400',
help="pg port of the pageserver to migrate data to. Default: 6400",
)
parser.add_argument(
'--ignore-tenant-exists',
dest='ok_if_exists',
"--ignore-tenant-exists",
dest="ok_if_exists",
required=False,
help=
'Ignore error if we are trying to create the tenant that already exists. It can be dangerous if existing tenant already contains some data.',
help="Ignore error if we are trying to create the tenant that already exists. It can be dangerous if existing tenant already contains some data.",
)
parser.add_argument(
'--pg-distrib-dir',
dest='pg_distrib_dir',
"--pg-distrib-dir",
dest="pg_distrib_dir",
required=False,
default='/usr/local/',
help='Path where postgres binaries are installed. Default: /usr/local/',
default="/usr/local/",
help="Path where postgres binaries are installed. Default: /usr/local/",
)
parser.add_argument(
'--psql-path',
dest='psql_path',
"--psql-path",
dest="psql_path",
required=False,
default='/usr/local/bin/psql',
help='Path to the psql binary. Default: /usr/local/bin/psql',
default="/usr/local/bin/psql",
help="Path to the psql binary. Default: /usr/local/bin/psql",
)
parser.add_argument(
'--only-import',
dest='only_import',
"--only-import",
dest="only_import",
required=False,
default=False,
action='store_true',
help='Skip export and tenant creation part',
action="store_true",
help="Skip export and tenant creation part",
)
parser.add_argument(
'--work-dir',
dest='work_dir',
"--work-dir",
dest="work_dir",
required=True,
default=False,
help='directory where temporary tar files are stored',
help="directory where temporary tar files are stored",
)
args = parser.parse_args()
main(args)

View File

@@ -1,31 +1,36 @@
#!/usr/bin/env python3
import argparse
import json
from dataclasses import dataclass
from pathlib import Path
import json
from typing import Any, Dict, List, Optional, Tuple, cast
from jinja2 import Template
# skip 'input' columns. They are included in the header and just blow the table
EXCLUDE_COLUMNS = frozenset({
'scale',
'duration',
'number_of_clients',
'number_of_threads',
'init_start_timestamp',
'init_end_timestamp',
'run_start_timestamp',
'run_end_timestamp',
})
EXCLUDE_COLUMNS = frozenset(
{
"scale",
"duration",
"number_of_clients",
"number_of_threads",
"init_start_timestamp",
"init_end_timestamp",
"run_start_timestamp",
"run_end_timestamp",
}
)
KEY_EXCLUDE_FIELDS = frozenset({
'init_start_timestamp',
'init_end_timestamp',
'run_start_timestamp',
'run_end_timestamp',
})
NEGATIVE_COLOR = 'negative'
POSITIVE_COLOR = 'positive'
KEY_EXCLUDE_FIELDS = frozenset(
{
"init_start_timestamp",
"init_end_timestamp",
"run_start_timestamp",
"run_end_timestamp",
}
)
NEGATIVE_COLOR = "negative"
POSITIVE_COLOR = "positive"
EPS = 1e-6
@@ -55,75 +60,76 @@ def get_columns(values: List[Dict[Any, Any]]) -> Tuple[List[Tuple[str, str]], Li
value_columns = []
common_columns = []
for item in values:
if item['name'] in KEY_EXCLUDE_FIELDS:
if item["name"] in KEY_EXCLUDE_FIELDS:
continue
if item['report'] != 'test_param':
value_columns.append(cast(str, item['name']))
if item["report"] != "test_param":
value_columns.append(cast(str, item["name"]))
else:
common_columns.append((cast(str, item['name']), cast(str, item['value'])))
common_columns.append((cast(str, item["name"]), cast(str, item["value"])))
value_columns.sort()
common_columns.sort(key=lambda x: x[0]) # sort by name
return common_columns, value_columns
def format_ratio(ratio: float, report: str) -> Tuple[str, str]:
color = ''
sign = '+' if ratio > 0 else ''
color = ""
sign = "+" if ratio > 0 else ""
if abs(ratio) < 0.05:
return f'&nbsp({sign}{ratio:.2f})', color
return f"&nbsp({sign}{ratio:.2f})", color
if report not in {'test_param', 'higher_is_better', 'lower_is_better'}:
raise ValueError(f'Unknown report type: {report}')
if report not in {"test_param", "higher_is_better", "lower_is_better"}:
raise ValueError(f"Unknown report type: {report}")
if report == 'test_param':
return f'{ratio:.2f}', color
if report == "test_param":
return f"{ratio:.2f}", color
if ratio > 0:
if report == 'higher_is_better':
if report == "higher_is_better":
color = POSITIVE_COLOR
elif report == 'lower_is_better':
elif report == "lower_is_better":
color = NEGATIVE_COLOR
elif ratio < 0:
if report == 'higher_is_better':
if report == "higher_is_better":
color = NEGATIVE_COLOR
elif report == 'lower_is_better':
elif report == "lower_is_better":
color = POSITIVE_COLOR
return f'&nbsp({sign}{ratio:.2f})', color
return f"&nbsp({sign}{ratio:.2f})", color
def extract_value(name: str, suit_run: SuitRun) -> Optional[Dict[str, Any]]:
for item in suit_run.values['data']:
if item['name'] == name:
for item in suit_run.values["data"]:
if item["name"] == name:
return cast(Dict[str, Any], item)
return None
def get_row_values(columns: List[str], run_result: SuitRun,
prev_result: Optional[SuitRun]) -> List[RowValue]:
def get_row_values(
columns: List[str], run_result: SuitRun, prev_result: Optional[SuitRun]
) -> List[RowValue]:
row_values = []
for column in columns:
current_value = extract_value(column, run_result)
if current_value is None:
# should never happen
raise ValueError(f'{column} not found in {run_result.values}')
raise ValueError(f"{column} not found in {run_result.values}")
value = current_value["value"]
if isinstance(value, float):
value = f'{value:.2f}'
value = f"{value:.2f}"
if prev_result is None:
row_values.append(RowValue(value, '', ''))
row_values.append(RowValue(value, "", ""))
continue
prev_value = extract_value(column, prev_result)
if prev_value is None:
# this might happen when new metric is added and there is no value for it in previous run
# let this be here, TODO add proper handling when this actually happens
raise ValueError(f'{column} not found in previous result')
raise ValueError(f"{column} not found in previous result")
# adding `EPS` to each term to avoid ZeroDivisionError when the denominator is zero
ratio = (float(value) + EPS) / (float(prev_value['value']) + EPS) - 1
ratio_display, color = format_ratio(ratio, current_value['report'])
ratio = (float(value) + EPS) / (float(prev_value["value"]) + EPS) - 1
ratio_display, color = format_ratio(ratio, current_value["report"])
row_values.append(RowValue(value, color, ratio_display))
return row_values
@@ -139,8 +145,10 @@ def prepare_rows_from_runs(value_columns: List[str], runs: List[SuitRun]) -> Lis
prev_run = None
for run in runs:
rows.append(
SuiteRunTableRow(revision=run.revision,
values=get_row_values(value_columns, run, prev_run)))
SuiteRunTableRow(
revision=run.revision, values=get_row_values(value_columns, run, prev_run)
)
)
prev_run = run
return rows
@@ -152,27 +160,29 @@ def main(args: argparse.Namespace) -> None:
# we have files in form: <ctr>_<rev>.json
# fill them in the hashmap so we have grouped items for the
# same run configuration (scale, duration etc.) ordered by counter.
for item in sorted(input_dir.iterdir(), key=lambda x: int(x.name.split('_')[0])):
for item in sorted(input_dir.iterdir(), key=lambda x: int(x.name.split("_")[0])):
run_data = json.loads(item.read_text())
revision = run_data['revision']
revision = run_data["revision"]
for suit_result in run_data['result']:
key = "{}{}".format(run_data['platform'], suit_result['suit'])
for suit_result in run_data["result"]:
key = "{}{}".format(run_data["platform"], suit_result["suit"])
# pack total duration as a synthetic value
total_duration = suit_result['total_duration']
suit_result['data'].append({
'name': 'total_duration',
'value': total_duration,
'unit': 's',
'report': 'lower_is_better',
})
common_columns, value_columns = get_columns(suit_result['data'])
total_duration = suit_result["total_duration"]
suit_result["data"].append(
{
"name": "total_duration",
"value": total_duration,
"unit": "s",
"report": "lower_is_better",
}
)
common_columns, value_columns = get_columns(suit_result["data"])
grouped_runs.setdefault(
key,
SuitRuns(
platform=run_data['platform'],
suit=suit_result['suit'],
platform=run_data["platform"],
suit=suit_result["suit"],
common_columns=common_columns,
value_columns=value_columns,
runs=[],
@@ -184,26 +194,26 @@ def main(args: argparse.Namespace) -> None:
for result in grouped_runs.values():
suit = result.suit
context[suit] = {
'common_columns': result.common_columns,
'value_columns': result.value_columns,
'platform': result.platform,
"common_columns": result.common_columns,
"value_columns": result.value_columns,
"platform": result.platform,
# reverse the order so newest results are on top of the table
'rows': reversed(prepare_rows_from_runs(result.value_columns, result.runs)),
"rows": reversed(prepare_rows_from_runs(result.value_columns, result.runs)),
}
template = Template((Path(__file__).parent / 'perf_report_template.html').read_text())
template = Template((Path(__file__).parent / "perf_report_template.html").read_text())
Path(args.out).write_text(template.render(context=context))
if __name__ == '__main__':
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
'--input-dir',
dest='input_dir',
"--input-dir",
dest="input_dir",
required=True,
help='Directory with jsons generated by the test suite',
help="Directory with jsons generated by the test suite",
)
parser.add_argument('--out', required=True, help='Output html file path')
parser.add_argument("--out", required=True, help="Output html file path")
args = parser.parse_args()
main(args)

View File

@@ -1,17 +1,16 @@
#!/usr/bin/env python3
from contextlib import contextmanager
import shlex
from tempfile import TemporaryDirectory
from distutils.dir_util import copy_tree
from pathlib import Path
import argparse
import os
import shlex
import shutil
import subprocess
import sys
import textwrap
from contextlib import contextmanager
from distutils.dir_util import copy_tree
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import Optional

View File

@@ -1,12 +1,13 @@
#!/usr/bin/env python3
import argparse
from contextlib import contextmanager
import json
import os
from contextlib import contextmanager
from datetime import datetime
from pathlib import Path
import psycopg2
import psycopg2.extras
from pathlib import Path
from datetime import datetime
CREATE_TABLE = """
CREATE TABLE IF NOT EXISTS perf_test_results (
@@ -24,15 +25,15 @@ CREATE TABLE IF NOT EXISTS perf_test_results (
def err(msg):
print(f'error: {msg}')
print(f"error: {msg}")
exit(1)
@contextmanager
def get_connection_cursor():
connstr = os.getenv('DATABASE_URL')
connstr = os.getenv("DATABASE_URL")
if not connstr:
err('DATABASE_URL environment variable is not set')
err("DATABASE_URL environment variable is not set")
with psycopg2.connect(connstr) as conn:
with conn.cursor() as cur:
yield cur
@@ -44,33 +45,35 @@ def create_table(cur):
def ingest_perf_test_result(cursor, data_dile: Path, recorded_at_timestamp: int) -> int:
run_data = json.loads(data_dile.read_text())
revision = run_data['revision']
platform = run_data['platform']
revision = run_data["revision"]
platform = run_data["platform"]
run_result = run_data['result']
run_result = run_data["result"]
args_list = []
for suit_result in run_result:
suit = suit_result['suit']
total_duration = suit_result['total_duration']
suit = suit_result["suit"]
total_duration = suit_result["total_duration"]
suit_result['data'].append({
'name': 'total_duration',
'value': total_duration,
'unit': 's',
'report': 'lower_is_better',
})
suit_result["data"].append(
{
"name": "total_duration",
"value": total_duration,
"unit": "s",
"report": "lower_is_better",
}
)
for metric in suit_result['data']:
for metric in suit_result["data"]:
values = {
'suit': suit,
'revision': revision,
'platform': platform,
'metric_name': metric['name'],
'metric_value': metric['value'],
'metric_unit': metric['unit'],
'metric_report_type': metric['report'],
'recorded_at_timestamp': datetime.utcfromtimestamp(recorded_at_timestamp),
"suit": suit,
"revision": revision,
"platform": platform,
"metric_name": metric["name"],
"metric_value": metric["value"],
"metric_unit": metric["unit"],
"metric_report_type": metric["report"],
"recorded_at_timestamp": datetime.utcfromtimestamp(recorded_at_timestamp),
}
args_list.append(values)
@@ -104,13 +107,16 @@ def ingest_perf_test_result(cursor, data_dile: Path, recorded_at_timestamp: int)
def main():
parser = argparse.ArgumentParser(description='Perf test result uploader. \
Database connection string should be provided via DATABASE_URL environment variable', )
parser = argparse.ArgumentParser(
description="Perf test result uploader. \
Database connection string should be provided via DATABASE_URL environment variable",
)
parser.add_argument(
'--ingest',
"--ingest",
type=Path,
help='Path to perf test result file, or directory with perf test result files')
parser.add_argument('--initdb', action='store_true', help='Initialuze database')
help="Path to perf test result file, or directory with perf test result files",
)
parser.add_argument("--initdb", action="store_true", help="Initialuze database")
args = parser.parse_args()
with get_connection_cursor() as cur:
@@ -118,19 +124,19 @@ def main():
create_table(cur)
if not args.ingest.exists():
err(f'ingest path {args.ingest} does not exist')
err(f"ingest path {args.ingest} does not exist")
if args.ingest:
if args.ingest.is_dir():
for item in sorted(args.ingest.iterdir(), key=lambda x: int(x.name.split('_')[0])):
recorded_at_timestamp = int(item.name.split('_')[0])
for item in sorted(args.ingest.iterdir(), key=lambda x: int(x.name.split("_")[0])):
recorded_at_timestamp = int(item.name.split("_")[0])
ingested = ingest_perf_test_result(cur, item, recorded_at_timestamp)
print(f'Ingested {ingested} metric values from {item}')
print(f"Ingested {ingested} metric values from {item}")
else:
recorded_at_timestamp = int(args.ingest.name.split('_')[0])
recorded_at_timestamp = int(args.ingest.name.split("_")[0])
ingested = ingest_perf_test_result(cur, args.ingest, recorded_at_timestamp)
print(f'Ingested {ingested} metric values from {args.ingest}')
print(f"Ingested {ingested} metric values from {args.ingest}")
if __name__ == '__main__':
if __name__ == "__main__":
main()

View File

@@ -13,83 +13,90 @@ def test_ancestor_branch(neon_env_builder: NeonEnvBuilder):
# Extend compaction_period and gc_period to disable background compaction and gc.
tenant, _ = env.neon_cli.create_tenant(
conf={
'gc_period': '10 m',
'gc_horizon': '1048576',
'checkpoint_distance': '4194304',
'compaction_period': '10 m',
'compaction_threshold': '2',
'compaction_target_size': '4194304',
})
"gc_period": "10 m",
"gc_horizon": "1048576",
"checkpoint_distance": "4194304",
"compaction_period": "10 m",
"compaction_threshold": "2",
"compaction_target_size": "4194304",
}
)
env.pageserver.safe_psql("failpoints flush-frozen-before-sync=sleep(10000)")
pg_branch0 = env.postgres.create_start('main', tenant_id=tenant)
pg_branch0 = env.postgres.create_start("main", tenant_id=tenant)
branch0_cur = pg_branch0.connect().cursor()
branch0_timeline = query_scalar(branch0_cur, "SHOW neon.timeline_id")
log.info(f"b0 timeline {branch0_timeline}")
# Create table, and insert 100k rows.
branch0_lsn = query_scalar(branch0_cur, 'SELECT pg_current_wal_insert_lsn()')
branch0_lsn = query_scalar(branch0_cur, "SELECT pg_current_wal_insert_lsn()")
log.info(f"b0 at lsn {branch0_lsn}")
branch0_cur.execute('CREATE TABLE foo (t text) WITH (autovacuum_enabled = off)')
branch0_cur.execute('''
branch0_cur.execute("CREATE TABLE foo (t text) WITH (autovacuum_enabled = off)")
branch0_cur.execute(
"""
INSERT INTO foo
SELECT '00112233445566778899AABBCCDDEEFF' || ':branch0:' || g
FROM generate_series(1, 100000) g
''')
lsn_100 = query_scalar(branch0_cur, 'SELECT pg_current_wal_insert_lsn()')
log.info(f'LSN after 100k rows: {lsn_100}')
"""
)
lsn_100 = query_scalar(branch0_cur, "SELECT pg_current_wal_insert_lsn()")
log.info(f"LSN after 100k rows: {lsn_100}")
# Create branch1.
env.neon_cli.create_branch('branch1', 'main', tenant_id=tenant, ancestor_start_lsn=lsn_100)
pg_branch1 = env.postgres.create_start('branch1', tenant_id=tenant)
env.neon_cli.create_branch("branch1", "main", tenant_id=tenant, ancestor_start_lsn=lsn_100)
pg_branch1 = env.postgres.create_start("branch1", tenant_id=tenant)
log.info("postgres is running on 'branch1' branch")
branch1_cur = pg_branch1.connect().cursor()
branch1_timeline = query_scalar(branch1_cur, "SHOW neon.timeline_id")
log.info(f"b1 timeline {branch1_timeline}")
branch1_lsn = query_scalar(branch1_cur, 'SELECT pg_current_wal_insert_lsn()')
branch1_lsn = query_scalar(branch1_cur, "SELECT pg_current_wal_insert_lsn()")
log.info(f"b1 at lsn {branch1_lsn}")
# Insert 100k rows.
branch1_cur.execute('''
branch1_cur.execute(
"""
INSERT INTO foo
SELECT '00112233445566778899AABBCCDDEEFF' || ':branch1:' || g
FROM generate_series(1, 100000) g
''')
lsn_200 = query_scalar(branch1_cur, 'SELECT pg_current_wal_insert_lsn()')
log.info(f'LSN after 200k rows: {lsn_200}')
"""
)
lsn_200 = query_scalar(branch1_cur, "SELECT pg_current_wal_insert_lsn()")
log.info(f"LSN after 200k rows: {lsn_200}")
# Create branch2.
env.neon_cli.create_branch('branch2', 'branch1', tenant_id=tenant, ancestor_start_lsn=lsn_200)
pg_branch2 = env.postgres.create_start('branch2', tenant_id=tenant)
env.neon_cli.create_branch("branch2", "branch1", tenant_id=tenant, ancestor_start_lsn=lsn_200)
pg_branch2 = env.postgres.create_start("branch2", tenant_id=tenant)
log.info("postgres is running on 'branch2' branch")
branch2_cur = pg_branch2.connect().cursor()
branch2_timeline = query_scalar(branch2_cur, "SHOW neon.timeline_id")
log.info(f"b2 timeline {branch2_timeline}")
branch2_lsn = query_scalar(branch2_cur, 'SELECT pg_current_wal_insert_lsn()')
branch2_lsn = query_scalar(branch2_cur, "SELECT pg_current_wal_insert_lsn()")
log.info(f"b2 at lsn {branch2_lsn}")
# Insert 100k rows.
branch2_cur.execute('''
branch2_cur.execute(
"""
INSERT INTO foo
SELECT '00112233445566778899AABBCCDDEEFF' || ':branch2:' || g
FROM generate_series(1, 100000) g
''')
lsn_300 = query_scalar(branch2_cur, 'SELECT pg_current_wal_insert_lsn()')
log.info(f'LSN after 300k rows: {lsn_300}')
"""
)
lsn_300 = query_scalar(branch2_cur, "SELECT pg_current_wal_insert_lsn()")
log.info(f"LSN after 300k rows: {lsn_300}")
# Run compaction on branch1.
compact = f'compact {tenant.hex} {branch1_timeline} {lsn_200}'
compact = f"compact {tenant.hex} {branch1_timeline} {lsn_200}"
log.info(compact)
env.pageserver.safe_psql(compact)
assert query_scalar(branch0_cur, 'SELECT count(*) FROM foo') == 100000
assert query_scalar(branch0_cur, "SELECT count(*) FROM foo") == 100000
assert query_scalar(branch1_cur, 'SELECT count(*) FROM foo') == 200000
assert query_scalar(branch1_cur, "SELECT count(*) FROM foo") == 200000
assert query_scalar(branch2_cur, 'SELECT count(*) FROM foo') == 300000
assert query_scalar(branch2_cur, "SELECT count(*) FROM foo") == 300000

View File

@@ -1,7 +1,8 @@
from contextlib import closing
from uuid import uuid4
from fixtures.neon_fixtures import NeonEnvBuilder, NeonPageserverApiException
import pytest
from fixtures.neon_fixtures import NeonEnvBuilder, NeonPageserverApiException
def test_pageserver_auth(neon_env_builder: NeonEnvBuilder):
@@ -23,41 +24,46 @@ def test_pageserver_auth(neon_env_builder: NeonEnvBuilder):
ps.safe_psql("set FOO", password=tenant_token)
ps.safe_psql("set FOO", password=management_token)
new_timeline_id = env.neon_cli.create_branch('test_pageserver_auth',
tenant_id=env.initial_tenant)
new_timeline_id = env.neon_cli.create_branch(
"test_pageserver_auth", tenant_id=env.initial_tenant
)
# tenant can create branches
tenant_http_client.timeline_create(tenant_id=env.initial_tenant,
ancestor_timeline_id=new_timeline_id)
tenant_http_client.timeline_create(
tenant_id=env.initial_tenant, ancestor_timeline_id=new_timeline_id
)
# console can create branches for tenant
management_http_client.timeline_create(tenant_id=env.initial_tenant,
ancestor_timeline_id=new_timeline_id)
management_http_client.timeline_create(
tenant_id=env.initial_tenant, ancestor_timeline_id=new_timeline_id
)
# fail to create branch using token with different tenant_id
with pytest.raises(NeonPageserverApiException,
match='Forbidden: Tenant id mismatch. Permission denied'):
invalid_tenant_http_client.timeline_create(tenant_id=env.initial_tenant,
ancestor_timeline_id=new_timeline_id)
with pytest.raises(
NeonPageserverApiException, match="Forbidden: Tenant id mismatch. Permission denied"
):
invalid_tenant_http_client.timeline_create(
tenant_id=env.initial_tenant, ancestor_timeline_id=new_timeline_id
)
# create tenant using management token
management_http_client.tenant_create()
# fail to create tenant using tenant token
with pytest.raises(
NeonPageserverApiException,
match='Forbidden: Attempt to access management api with tenant scope. Permission denied'
NeonPageserverApiException,
match="Forbidden: Attempt to access management api with tenant scope. Permission denied",
):
tenant_http_client.tenant_create()
@pytest.mark.parametrize('with_safekeepers', [False, True])
@pytest.mark.parametrize("with_safekeepers", [False, True])
def test_compute_auth_to_pageserver(neon_env_builder: NeonEnvBuilder, with_safekeepers: bool):
neon_env_builder.auth_enabled = True
if with_safekeepers:
neon_env_builder.num_safekeepers = 3
env = neon_env_builder.init_start()
branch = f'test_compute_auth_to_pageserver{with_safekeepers}'
branch = f"test_compute_auth_to_pageserver{with_safekeepers}"
env.neon_cli.create_branch(branch)
pg = env.postgres.create_start(branch)
@@ -65,7 +71,7 @@ def test_compute_auth_to_pageserver(neon_env_builder: NeonEnvBuilder, with_safek
with conn.cursor() as cur:
# we rely upon autocommit after each statement
# as waiting for acceptors happens there
cur.execute('CREATE TABLE t(key int primary key, value text)')
cur.execute("CREATE TABLE t(key int primary key, value text)")
cur.execute("INSERT INTO t SELECT generate_series(1,100000), 'payload'")
cur.execute('SELECT sum(key) FROM t')
assert cur.fetchone() == (5000050000, )
cur.execute("SELECT sum(key) FROM t")
assert cur.fetchone() == (5000050000,)

View File

@@ -1,13 +1,13 @@
import threading
import time
from contextlib import closing, contextmanager
import psycopg2.extras
import pytest
from fixtures.neon_fixtures import NeonEnvBuilder
from fixtures.log_helper import log
import time
from fixtures.neon_fixtures import Postgres
import threading
from fixtures.neon_fixtures import NeonEnvBuilder, Postgres
pytest_plugins = ("fixtures.neon_fixtures")
pytest_plugins = "fixtures.neon_fixtures"
@contextmanager
@@ -44,7 +44,8 @@ def check_backpressure(pg: Postgres, stop_event: threading.Event, polling_interv
with pg_cur(pg) as cur:
while not stop_event.is_set():
try:
cur.execute('''
cur.execute(
"""
select pg_wal_lsn_diff(pg_current_wal_flush_lsn(),received_lsn) as received_lsn_lag,
pg_wal_lsn_diff(pg_current_wal_flush_lsn(),disk_consistent_lsn) as disk_consistent_lsn_lag,
pg_wal_lsn_diff(pg_current_wal_flush_lsn(),remote_consistent_lsn) as remote_consistent_lsn_lag,
@@ -52,16 +53,19 @@ def check_backpressure(pg: Postgres, stop_event: threading.Event, polling_interv
pg_size_pretty(pg_wal_lsn_diff(pg_current_wal_flush_lsn(),disk_consistent_lsn)),
pg_size_pretty(pg_wal_lsn_diff(pg_current_wal_flush_lsn(),remote_consistent_lsn))
from backpressure_lsns();
''')
"""
)
res = cur.fetchone()
received_lsn_lag = res[0]
disk_consistent_lsn_lag = res[1]
remote_consistent_lsn_lag = res[2]
log.info(f"received_lsn_lag = {received_lsn_lag} ({res[3]}), "
f"disk_consistent_lsn_lag = {disk_consistent_lsn_lag} ({res[4]}), "
f"remote_consistent_lsn_lag = {remote_consistent_lsn_lag} ({res[5]})")
log.info(
f"received_lsn_lag = {received_lsn_lag} ({res[3]}), "
f"disk_consistent_lsn_lag = {disk_consistent_lsn_lag} ({res[4]}), "
f"remote_consistent_lsn_lag = {remote_consistent_lsn_lag} ({res[5]})"
)
# Since feedback from pageserver is not immediate, we should allow some lag overflow
lag_overflow = 5 * 1024 * 1024 # 5MB
@@ -71,7 +75,9 @@ def check_backpressure(pg: Postgres, stop_event: threading.Event, polling_interv
if max_replication_flush_lag_bytes > 0:
assert disk_consistent_lsn_lag < max_replication_flush_lag_bytes + lag_overflow
if max_replication_apply_lag_bytes > 0:
assert remote_consistent_lsn_lag < max_replication_apply_lag_bytes + lag_overflow
assert (
remote_consistent_lsn_lag < max_replication_apply_lag_bytes + lag_overflow
)
time.sleep(polling_interval)
@@ -79,7 +85,7 @@ def check_backpressure(pg: Postgres, stop_event: threading.Event, polling_interv
log.info(f"backpressure check query failed: {e}")
stop_event.set()
log.info('check thread stopped')
log.info("check thread stopped")
# This test illustrates how to tune backpressure to control the lag
@@ -94,10 +100,11 @@ def check_backpressure(pg: Postgres, stop_event: threading.Event, polling_interv
def test_backpressure_received_lsn_lag(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
# Create a branch for us
env.neon_cli.create_branch('test_backpressure')
env.neon_cli.create_branch("test_backpressure")
pg = env.postgres.create_start('test_backpressure',
config_lines=['max_replication_write_lag=30MB'])
pg = env.postgres.create_start(
"test_backpressure", config_lines=["max_replication_write_lag=30MB"]
)
log.info("postgres is running on 'test_backpressure' branch")
# setup check thread
@@ -131,23 +138,29 @@ def test_backpressure_received_lsn_lag(neon_env_builder: NeonEnvBuilder):
rows_inserted += 100000
except Exception as e:
if check_thread.is_alive():
log.info('stopping check thread')
log.info("stopping check thread")
check_stop_event.set()
check_thread.join()
assert False, f"Exception {e} while inserting rows, but WAL lag is within configured threshold. That means backpressure is not tuned properly"
assert (
False
), f"Exception {e} while inserting rows, but WAL lag is within configured threshold. That means backpressure is not tuned properly"
else:
assert False, f"Exception {e} while inserting rows and WAL lag overflowed configured threshold. That means backpressure doesn't work."
assert (
False
), f"Exception {e} while inserting rows and WAL lag overflowed configured threshold. That means backpressure doesn't work."
log.info(f"inserted {rows_inserted} rows")
if check_thread.is_alive():
log.info('stopping check thread')
log.info("stopping check thread")
check_stop_event.set()
check_thread.join()
log.info('check thread stopped')
log.info("check thread stopped")
else:
assert False, "WAL lag overflowed configured threshold. That means backpressure doesn't work."
assert (
False
), "WAL lag overflowed configured threshold. That means backpressure doesn't work."
#TODO test_backpressure_disk_consistent_lsn_lag. Play with pageserver's checkpoint settings
#TODO test_backpressure_remote_consistent_lsn_lag
# TODO test_backpressure_disk_consistent_lsn_lag. Play with pageserver's checkpoint settings
# TODO test_backpressure_remote_consistent_lsn_lag

View File

@@ -1,5 +1,4 @@
import pytest
from fixtures.neon_fixtures import NeonEnv
@@ -15,4 +14,4 @@ def test_basebackup_error(neon_simple_env: NeonEnv):
env.pageserver.safe_psql(f"failpoints basebackup-before-control-file=return")
with pytest.raises(Exception, match="basebackup-before-control-file"):
pg = env.postgres.create_start('test_basebackup_error')
pg = env.postgres.create_start("test_basebackup_error")

View File

@@ -1,6 +1,7 @@
import threading
import pytest
import time
import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnv
from fixtures.utils import lsn_from_hex, query_scalar
@@ -49,55 +50,52 @@ def test_branch_and_gc(neon_simple_env: NeonEnv):
tenant, _ = env.neon_cli.create_tenant(
conf={
# disable background GC
'gc_period': '10 m',
'gc_horizon': f'{10 * 1024 ** 3}',
"gc_period": "10 m",
"gc_horizon": f"{10 * 1024 ** 3}",
# small checkpoint distance to create more delta layer files
'checkpoint_distance': f'{1024 ** 2}',
"checkpoint_distance": f"{1024 ** 2}",
# set the target size to be large to allow the image layer to cover the whole key space
'compaction_target_size': f'{1024 ** 3}',
"compaction_target_size": f"{1024 ** 3}",
# tweak the default settings to allow quickly create image layers and L1 layers
'compaction_period': '1 s',
'compaction_threshold': '2',
'image_creation_threshold': '1',
"compaction_period": "1 s",
"compaction_threshold": "2",
"image_creation_threshold": "1",
# set PITR interval to be small, so we can do GC
'pitr_interval': '1 s'
})
"pitr_interval": "1 s",
}
)
timeline_main = env.neon_cli.create_timeline(f'test_main', tenant_id=tenant)
pg_main = env.postgres.create_start('test_main', tenant_id=tenant)
timeline_main = env.neon_cli.create_timeline(f"test_main", tenant_id=tenant)
pg_main = env.postgres.create_start("test_main", tenant_id=tenant)
main_cur = pg_main.connect().cursor()
main_cur.execute(
"CREATE TABLE foo(key serial primary key, t text default 'foooooooooooooooooooooooooooooooooooooooooooooooooooo')"
)
main_cur.execute('INSERT INTO foo SELECT FROM generate_series(1, 100000)')
lsn1 = query_scalar(main_cur, 'SELECT pg_current_wal_insert_lsn()')
log.info(f'LSN1: {lsn1}')
main_cur.execute("INSERT INTO foo SELECT FROM generate_series(1, 100000)")
lsn1 = query_scalar(main_cur, "SELECT pg_current_wal_insert_lsn()")
log.info(f"LSN1: {lsn1}")
main_cur.execute('INSERT INTO foo SELECT FROM generate_series(1, 100000)')
lsn2 = query_scalar(main_cur, 'SELECT pg_current_wal_insert_lsn()')
log.info(f'LSN2: {lsn2}')
main_cur.execute("INSERT INTO foo SELECT FROM generate_series(1, 100000)")
lsn2 = query_scalar(main_cur, "SELECT pg_current_wal_insert_lsn()")
log.info(f"LSN2: {lsn2}")
# Set the GC horizon so that lsn1 is inside the horizon, which means
# we can create a new branch starting from lsn1.
env.pageserver.safe_psql(
f'do_gc {tenant.hex} {timeline_main.hex} {lsn_from_hex(lsn2) - lsn_from_hex(lsn1) + 1024}')
f"do_gc {tenant.hex} {timeline_main.hex} {lsn_from_hex(lsn2) - lsn_from_hex(lsn1) + 1024}"
)
env.neon_cli.create_branch('test_branch',
'test_main',
tenant_id=tenant,
ancestor_start_lsn=lsn1)
pg_branch = env.postgres.create_start('test_branch', tenant_id=tenant)
env.neon_cli.create_branch(
"test_branch", "test_main", tenant_id=tenant, ancestor_start_lsn=lsn1
)
pg_branch = env.postgres.create_start("test_branch", tenant_id=tenant)
branch_cur = pg_branch.connect().cursor()
branch_cur.execute('INSERT INTO foo SELECT FROM generate_series(1, 100000)')
branch_cur.execute("INSERT INTO foo SELECT FROM generate_series(1, 100000)")
assert query_scalar(branch_cur, 'SELECT count(*) FROM foo') == 200000
assert query_scalar(branch_cur, "SELECT count(*) FROM foo") == 200000
# This test simulates a race condition happening when branch creation and GC are performed concurrently.
@@ -120,32 +118,31 @@ def test_branch_creation_before_gc(neon_simple_env: NeonEnv):
tenant, _ = env.neon_cli.create_tenant(
conf={
# disable background GC
'gc_period': '10 m',
'gc_horizon': f'{10 * 1024 ** 3}',
"gc_period": "10 m",
"gc_horizon": f"{10 * 1024 ** 3}",
# small checkpoint distance to create more delta layer files
'checkpoint_distance': f'{1024 ** 2}',
"checkpoint_distance": f"{1024 ** 2}",
# set the target size to be large to allow the image layer to cover the whole key space
'compaction_target_size': f'{1024 ** 3}',
"compaction_target_size": f"{1024 ** 3}",
# tweak the default settings to allow quickly create image layers and L1 layers
'compaction_period': '1 s',
'compaction_threshold': '2',
'image_creation_threshold': '1',
"compaction_period": "1 s",
"compaction_threshold": "2",
"image_creation_threshold": "1",
# set PITR interval to be small, so we can do GC
'pitr_interval': '0 s'
})
"pitr_interval": "0 s",
}
)
b0 = env.neon_cli.create_branch('b0', tenant_id=tenant)
pg0 = env.postgres.create_start('b0', tenant_id=tenant)
res = pg0.safe_psql_many(queries=[
"CREATE TABLE t(key serial primary key)",
"INSERT INTO t SELECT FROM generate_series(1, 100000)",
"SELECT pg_current_wal_insert_lsn()",
"INSERT INTO t SELECT FROM generate_series(1, 100000)",
])
b0 = env.neon_cli.create_branch("b0", tenant_id=tenant)
pg0 = env.postgres.create_start("b0", tenant_id=tenant)
res = pg0.safe_psql_many(
queries=[
"CREATE TABLE t(key serial primary key)",
"INSERT INTO t SELECT FROM generate_series(1, 100000)",
"SELECT pg_current_wal_insert_lsn()",
"INSERT INTO t SELECT FROM generate_series(1, 100000)",
]
)
lsn = res[2][0][0]
# Use `failpoint=sleep` and `threading` to make the GC iteration triggers *before* the
@@ -166,6 +163,6 @@ def test_branch_creation_before_gc(neon_simple_env: NeonEnv):
# The starting LSN is invalid as the corresponding record is scheduled to be removed by in-queue GC.
with pytest.raises(Exception, match="invalid branch start lsn"):
env.neon_cli.create_branch('b1', 'b0', tenant_id=tenant, ancestor_start_lsn=lsn)
env.neon_cli.create_branch("b1", "b0", tenant_id=tenant, ancestor_start_lsn=lsn)
thread.join()

View File

@@ -1,8 +1,8 @@
import psycopg2.extras
import pytest
from fixtures.log_helper import log
from fixtures.utils import print_gc_result, query_scalar
from fixtures.neon_fixtures import NeonEnvBuilder
from fixtures.utils import print_gc_result, query_scalar
#
@@ -21,8 +21,8 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
# Branch at the point where only 100 rows were inserted
env.neon_cli.create_branch('test_branch_behind')
pgmain = env.postgres.create_start('test_branch_behind')
env.neon_cli.create_branch("test_branch_behind")
pgmain = env.postgres.create_start("test_branch_behind")
log.info("postgres is running on 'test_branch_behind' branch")
main_cur = pgmain.connect().cursor()
@@ -30,80 +30,86 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder):
timeline = query_scalar(main_cur, "SHOW neon.timeline_id")
# Create table, and insert the first 100 rows
main_cur.execute('CREATE TABLE foo (t text)')
main_cur.execute("CREATE TABLE foo (t text)")
# keep some early lsn to test branch creation on out of date lsn
gced_lsn = query_scalar(main_cur, 'SELECT pg_current_wal_insert_lsn()')
gced_lsn = query_scalar(main_cur, "SELECT pg_current_wal_insert_lsn()")
main_cur.execute('''
main_cur.execute(
"""
INSERT INTO foo
SELECT 'long string to consume some space' || g
FROM generate_series(1, 100) g
''')
lsn_a = query_scalar(main_cur, 'SELECT pg_current_wal_insert_lsn()')
log.info(f'LSN after 100 rows: {lsn_a}')
"""
)
lsn_a = query_scalar(main_cur, "SELECT pg_current_wal_insert_lsn()")
log.info(f"LSN after 100 rows: {lsn_a}")
# Insert some more rows. (This generates enough WAL to fill a few segments.)
main_cur.execute('''
main_cur.execute(
"""
INSERT INTO foo
SELECT 'long string to consume some space' || g
FROM generate_series(1, 200000) g
''')
lsn_b = query_scalar(main_cur, 'SELECT pg_current_wal_insert_lsn()')
log.info(f'LSN after 200100 rows: {lsn_b}')
"""
)
lsn_b = query_scalar(main_cur, "SELECT pg_current_wal_insert_lsn()")
log.info(f"LSN after 200100 rows: {lsn_b}")
# Branch at the point where only 100 rows were inserted
env.neon_cli.create_branch('test_branch_behind_hundred',
'test_branch_behind',
ancestor_start_lsn=lsn_a)
env.neon_cli.create_branch(
"test_branch_behind_hundred", "test_branch_behind", ancestor_start_lsn=lsn_a
)
# Insert many more rows. This generates enough WAL to fill a few segments.
main_cur.execute('''
main_cur.execute(
"""
INSERT INTO foo
SELECT 'long string to consume some space' || g
FROM generate_series(1, 200000) g
''')
lsn_c = query_scalar(main_cur, 'SELECT pg_current_wal_insert_lsn()')
"""
)
lsn_c = query_scalar(main_cur, "SELECT pg_current_wal_insert_lsn()")
log.info(f'LSN after 400100 rows: {lsn_c}')
log.info(f"LSN after 400100 rows: {lsn_c}")
# Branch at the point where only 200100 rows were inserted
env.neon_cli.create_branch('test_branch_behind_more',
'test_branch_behind',
ancestor_start_lsn=lsn_b)
env.neon_cli.create_branch(
"test_branch_behind_more", "test_branch_behind", ancestor_start_lsn=lsn_b
)
pg_hundred = env.postgres.create_start('test_branch_behind_hundred')
pg_more = env.postgres.create_start('test_branch_behind_more')
pg_hundred = env.postgres.create_start("test_branch_behind_hundred")
pg_more = env.postgres.create_start("test_branch_behind_more")
# On the 'hundred' branch, we should see only 100 rows
hundred_cur = pg_hundred.connect().cursor()
assert query_scalar(hundred_cur, 'SELECT count(*) FROM foo') == 100
assert query_scalar(hundred_cur, "SELECT count(*) FROM foo") == 100
# On the 'more' branch, we should see 100200 rows
more_cur = pg_more.connect().cursor()
assert query_scalar(more_cur, 'SELECT count(*) FROM foo') == 200100
assert query_scalar(more_cur, "SELECT count(*) FROM foo") == 200100
# All the rows are visible on the main branch
assert query_scalar(main_cur, 'SELECT count(*) FROM foo') == 400100
assert query_scalar(main_cur, "SELECT count(*) FROM foo") == 400100
# Check bad lsn's for branching
# branch at segment boundary
env.neon_cli.create_branch('test_branch_segment_boundary',
'test_branch_behind',
ancestor_start_lsn="0/3000000")
pg = env.postgres.create_start('test_branch_segment_boundary')
assert pg.safe_psql('SELECT 1')[0][0] == 1
env.neon_cli.create_branch(
"test_branch_segment_boundary", "test_branch_behind", ancestor_start_lsn="0/3000000"
)
pg = env.postgres.create_start("test_branch_segment_boundary")
assert pg.safe_psql("SELECT 1")[0][0] == 1
# branch at pre-initdb lsn
with pytest.raises(Exception, match="invalid branch start lsn"):
env.neon_cli.create_branch('test_branch_preinitdb', ancestor_start_lsn="0/42")
env.neon_cli.create_branch("test_branch_preinitdb", ancestor_start_lsn="0/42")
# branch at pre-ancestor lsn
with pytest.raises(Exception, match="less than timeline ancestor lsn"):
env.neon_cli.create_branch('test_branch_preinitdb',
'test_branch_behind',
ancestor_start_lsn="0/42")
env.neon_cli.create_branch(
"test_branch_preinitdb", "test_branch_behind", ancestor_start_lsn="0/42"
)
# check that we cannot create branch based on garbage collected data
with env.pageserver.cursor(cursor_factory=psycopg2.extras.DictCursor) as pscur:
@@ -114,13 +120,13 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder):
with pytest.raises(Exception, match="invalid branch start lsn"):
# this gced_lsn is pretty random, so if gc is disabled this woudln't fail
env.neon_cli.create_branch('test_branch_create_fail',
'test_branch_behind',
ancestor_start_lsn=gced_lsn)
env.neon_cli.create_branch(
"test_branch_create_fail", "test_branch_behind", ancestor_start_lsn=gced_lsn
)
# check that after gc everything is still there
assert query_scalar(hundred_cur, 'SELECT count(*) FROM foo') == 100
assert query_scalar(hundred_cur, "SELECT count(*) FROM foo") == 100
assert query_scalar(more_cur, 'SELECT count(*) FROM foo') == 200100
assert query_scalar(more_cur, "SELECT count(*) FROM foo") == 200100
assert query_scalar(main_cur, 'SELECT count(*) FROM foo') == 400100
assert query_scalar(main_cur, "SELECT count(*) FROM foo") == 400100

View File

@@ -1,10 +1,11 @@
from typing import List
import threading
import pytest
from fixtures.neon_fixtures import NeonEnv, PgBin, Postgres
import time
import random
import threading
import time
from typing import List
import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnv, PgBin, Postgres
from performance.test_perf_pgbench import get_scales_matrix
@@ -20,38 +21,37 @@ from performance.test_perf_pgbench import get_scales_matrix
@pytest.mark.parametrize("n_branches", [10])
@pytest.mark.parametrize("scale", get_scales_matrix(1))
@pytest.mark.parametrize("ty", ["cascade", "flat"])
def test_branching_with_pgbench(neon_simple_env: NeonEnv,
pg_bin: PgBin,
n_branches: int,
scale: int,
ty: str):
def test_branching_with_pgbench(
neon_simple_env: NeonEnv, pg_bin: PgBin, n_branches: int, scale: int, ty: str
):
env = neon_simple_env
# Use aggressive GC and checkpoint settings, so that we also exercise GC during the test
tenant, _ = env.neon_cli.create_tenant(
conf={
'gc_period': '5 s',
'gc_horizon': f'{1024 ** 2}',
'checkpoint_distance': f'{1024 ** 2}',
'compaction_target_size': f'{1024 ** 2}',
# set PITR interval to be small, so we can do GC
'pitr_interval': '5 s'
})
conf={
"gc_period": "5 s",
"gc_horizon": f"{1024 ** 2}",
"checkpoint_distance": f"{1024 ** 2}",
"compaction_target_size": f"{1024 ** 2}",
# set PITR interval to be small, so we can do GC
"pitr_interval": "5 s",
}
)
def run_pgbench(pg: Postgres):
connstr = pg.connstr()
log.info(f"Start a pgbench workload on pg {connstr}")
pg_bin.run_capture(['pgbench', '-i', f'-s{scale}', connstr])
pg_bin.run_capture(['pgbench', '-T15', connstr])
pg_bin.run_capture(["pgbench", "-i", f"-s{scale}", connstr])
pg_bin.run_capture(["pgbench", "-T15", connstr])
env.neon_cli.create_branch('b0', tenant_id=tenant)
env.neon_cli.create_branch("b0", tenant_id=tenant)
pgs: List[Postgres] = []
pgs.append(env.postgres.create_start('b0', tenant_id=tenant))
pgs.append(env.postgres.create_start("b0", tenant_id=tenant))
threads: List[threading.Thread] = []
threads.append(threading.Thread(target=run_pgbench, args=(pgs[0], ), daemon=True))
threads.append(threading.Thread(target=run_pgbench, args=(pgs[0],), daemon=True))
threads[-1].start()
thread_limit = 4
@@ -72,18 +72,18 @@ def test_branching_with_pgbench(neon_simple_env: NeonEnv,
threads = []
if ty == "cascade":
env.neon_cli.create_branch('b{}'.format(i + 1), 'b{}'.format(i), tenant_id=tenant)
env.neon_cli.create_branch("b{}".format(i + 1), "b{}".format(i), tenant_id=tenant)
else:
env.neon_cli.create_branch('b{}'.format(i + 1), 'b0', tenant_id=tenant)
env.neon_cli.create_branch("b{}".format(i + 1), "b0", tenant_id=tenant)
pgs.append(env.postgres.create_start('b{}'.format(i + 1), tenant_id=tenant))
pgs.append(env.postgres.create_start("b{}".format(i + 1), tenant_id=tenant))
threads.append(threading.Thread(target=run_pgbench, args=(pgs[-1], ), daemon=True))
threads.append(threading.Thread(target=run_pgbench, args=(pgs[-1],), daemon=True))
threads[-1].start()
for thread in threads:
thread.join()
for pg in pgs:
res = pg.safe_psql('SELECT count(*) from pgbench_accounts')
assert res[0] == (100000 * scale, )
res = pg.safe_psql("SELECT count(*) from pgbench_accounts")
assert res[0] == (100000 * scale,)

View File

@@ -1,12 +1,12 @@
import concurrent.futures
import os
from contextlib import closing
from typing import List, Tuple
from uuid import UUID
import pytest
import concurrent.futures
from contextlib import closing
from fixtures.neon_fixtures import NeonEnvBuilder, NeonEnv, Postgres
from fixtures.log_helper import log
import os
import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres
from fixtures.utils import query_scalar
@@ -24,7 +24,7 @@ def test_broken_timeline(neon_env_builder: NeonEnvBuilder):
tenant_id = tenant_id_uuid.hex
timeline_id = timeline_id_uuid.hex
pg = env.postgres.create_start(f'main', tenant_id=tenant_id_uuid)
pg = env.postgres.create_start(f"main", tenant_id=tenant_id_uuid)
with pg.cursor() as cur:
cur.execute("CREATE TABLE t(key int primary key, value text)")
cur.execute("INSERT INTO t SELECT generate_series(1,100), 'payload'")
@@ -42,7 +42,7 @@ def test_broken_timeline(neon_env_builder: NeonEnvBuilder):
# Corrupt metadata file on timeline 1
(tenant1, timeline1, pg1) = tenant_timelines[1]
metadata_path = "{}/tenants/{}/timelines/{}/metadata".format(env.repo_dir, tenant1, timeline1)
print(f'overwriting metadata file at {metadata_path}')
print(f"overwriting metadata file at {metadata_path}")
f = open(metadata_path, "w")
f.write("overwritten with garbage!")
f.close()
@@ -52,17 +52,17 @@ def test_broken_timeline(neon_env_builder: NeonEnvBuilder):
(tenant2, timeline2, pg2) = tenant_timelines[2]
timeline_path = "{}/tenants/{}/timelines/{}/".format(env.repo_dir, tenant2, timeline2)
for filename in os.listdir(timeline_path):
if filename.startswith('00000'):
if filename.startswith("00000"):
# Looks like a layer file. Remove it
os.remove(f'{timeline_path}/{filename}')
os.remove(f"{timeline_path}/{filename}")
# Corrupt layer files file on timeline 3
(tenant3, timeline3, pg3) = tenant_timelines[3]
timeline_path = "{}/tenants/{}/timelines/{}/".format(env.repo_dir, tenant3, timeline3)
for filename in os.listdir(timeline_path):
if filename.startswith('00000'):
if filename.startswith("00000"):
# Looks like a layer file. Corrupt it
f = open(f'{timeline_path}/{filename}', "w")
f = open(f"{timeline_path}/{filename}", "w")
f.write("overwritten with garbage!")
f.close()
@@ -77,7 +77,7 @@ def test_broken_timeline(neon_env_builder: NeonEnvBuilder):
(tenant, timeline, pg) = tenant_timelines[n]
with pytest.raises(Exception, match="Cannot load local timeline") as err:
pg.start()
log.info(f'compute startup failed as expected: {err}')
log.info(f"compute startup failed as expected: {err}")
def test_create_multiple_timelines_parallel(neon_simple_env: NeonEnv):
@@ -87,9 +87,10 @@ def test_create_multiple_timelines_parallel(neon_simple_env: NeonEnv):
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
futures = [
executor.submit(env.neon_cli.create_timeline,
f"test-create-multiple-timelines-{i}",
tenant_id) for i in range(4)
executor.submit(
env.neon_cli.create_timeline, f"test-create-multiple-timelines-{i}", tenant_id
)
for i in range(4)
]
for future in futures:
future.result()

View File

@@ -1,10 +1,9 @@
import time
import os
import time
from contextlib import closing
from fixtures.neon_fixtures import NeonEnv
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnv
from fixtures.utils import query_scalar
@@ -13,40 +12,40 @@ from fixtures.utils import query_scalar
#
def test_clog_truncate(neon_simple_env: NeonEnv):
env = neon_simple_env
env.neon_cli.create_branch('test_clog_truncate', 'empty')
env.neon_cli.create_branch("test_clog_truncate", "empty")
# set aggressive autovacuum to make sure that truncation will happen
config = [
'autovacuum_max_workers=10',
'autovacuum_vacuum_threshold=0',
'autovacuum_vacuum_insert_threshold=0',
'autovacuum_vacuum_cost_delay=0',
'autovacuum_vacuum_cost_limit=10000',
'autovacuum_naptime =1s',
'autovacuum_freeze_max_age=100000'
"autovacuum_max_workers=10",
"autovacuum_vacuum_threshold=0",
"autovacuum_vacuum_insert_threshold=0",
"autovacuum_vacuum_cost_delay=0",
"autovacuum_vacuum_cost_limit=10000",
"autovacuum_naptime =1s",
"autovacuum_freeze_max_age=100000",
]
pg = env.postgres.create_start('test_clog_truncate', config_lines=config)
log.info('postgres is running on test_clog_truncate branch')
pg = env.postgres.create_start("test_clog_truncate", config_lines=config)
log.info("postgres is running on test_clog_truncate branch")
# Install extension containing function needed for test
pg.safe_psql('CREATE EXTENSION neon_test_utils')
pg.safe_psql("CREATE EXTENSION neon_test_utils")
# Consume many xids to advance clog
with pg.cursor() as cur:
cur.execute('select test_consume_xids(1000*1000*10);')
log.info('xids consumed')
cur.execute("select test_consume_xids(1000*1000*10);")
log.info("xids consumed")
# call a checkpoint to trigger TruncateSubtrans
cur.execute('CHECKPOINT;')
cur.execute("CHECKPOINT;")
# ensure WAL flush
cur.execute('select txid_current()')
cur.execute("select txid_current()")
log.info(cur.fetchone())
# wait for autovacuum to truncate the pg_xact
# XXX Is it worth to add a timeout here?
pg_xact_0000_path = os.path.join(pg.pg_xact_dir_path(), '0000')
pg_xact_0000_path = os.path.join(pg.pg_xact_dir_path(), "0000")
log.info(f"pg_xact_0000_path = {pg_xact_0000_path}")
while os.path.isfile(pg_xact_0000_path):
@@ -55,18 +54,18 @@ def test_clog_truncate(neon_simple_env: NeonEnv):
# checkpoint to advance latest lsn
with pg.cursor() as cur:
cur.execute('CHECKPOINT;')
lsn_after_truncation = query_scalar(cur, 'select pg_current_wal_insert_lsn()')
cur.execute("CHECKPOINT;")
lsn_after_truncation = query_scalar(cur, "select pg_current_wal_insert_lsn()")
# create new branch after clog truncation and start a compute node on it
log.info(f'create branch at lsn_after_truncation {lsn_after_truncation}')
env.neon_cli.create_branch('test_clog_truncate_new',
'test_clog_truncate',
ancestor_start_lsn=lsn_after_truncation)
pg2 = env.postgres.create_start('test_clog_truncate_new')
log.info('postgres is running on test_clog_truncate_new branch')
log.info(f"create branch at lsn_after_truncation {lsn_after_truncation}")
env.neon_cli.create_branch(
"test_clog_truncate_new", "test_clog_truncate", ancestor_start_lsn=lsn_after_truncation
)
pg2 = env.postgres.create_start("test_clog_truncate_new")
log.info("postgres is running on test_clog_truncate_new branch")
# check that new node doesn't contain truncated segment
pg_xact_0000_path_new = os.path.join(pg2.pg_xact_dir_path(), '0000')
pg_xact_0000_path_new = os.path.join(pg2.pg_xact_dir_path(), "0000")
log.info(f"pg_xact_0000_path_new = {pg_xact_0000_path_new}")
assert os.path.isfile(pg_xact_0000_path_new) is False

View File

@@ -1,18 +1,18 @@
from contextlib import closing
import shutil
import time
import subprocess
import os.path
import shutil
import subprocess
import time
from contextlib import closing
from cached_property import threading
from fixtures.neon_fixtures import NeonEnv
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnv
def lsof_path() -> str:
path_output = shutil.which("lsof")
if path_output is None:
raise RuntimeError('lsof not found in PATH')
raise RuntimeError("lsof not found in PATH")
else:
return path_output
@@ -36,16 +36,18 @@ def test_lsof_pageserver_pid(neon_simple_env: NeonEnv):
path = os.path.join(env.repo_dir, "pageserver.pid")
lsof = lsof_path()
while workload_thread.is_alive():
res = subprocess.run([lsof, path],
check=False,
universal_newlines=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
res = subprocess.run(
[lsof, path],
check=False,
universal_newlines=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
# parse the `lsof` command's output to get only the list of commands
commands = [line.split(' ')[0] for line in res.stdout.strip().split('\n')[1:]]
commands = [line.split(" ")[0] for line in res.stdout.strip().split("\n")[1:]]
if len(commands) > 0:
log.info(f"lsof commands: {commands}")
assert commands == ['pageserve']
assert commands == ["pageserve"]
time.sleep(1.0)

View File

@@ -1,7 +1,7 @@
from contextlib import closing
from fixtures.neon_fixtures import NeonEnv
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnv
#
@@ -12,19 +12,21 @@ def test_config(neon_simple_env: NeonEnv):
env.neon_cli.create_branch("test_config", "empty")
# change config
pg = env.postgres.create_start('test_config', config_lines=['log_min_messages=debug1'])
log.info('postgres is running on test_config branch')
pg = env.postgres.create_start("test_config", config_lines=["log_min_messages=debug1"])
log.info("postgres is running on test_config branch")
with closing(pg.connect()) as conn:
with conn.cursor() as cur:
cur.execute('''
cur.execute(
"""
SELECT setting
FROM pg_settings
WHERE
source != 'default'
AND source != 'override'
AND name = 'log_min_messages'
''')
"""
)
# check that config change was applied
assert cur.fetchone() == ('debug1', )
assert cur.fetchone() == ("debug1",)

View File

@@ -1,34 +1,38 @@
from fixtures.neon_fixtures import NeonEnvBuilder, WalCraft
from fixtures.log_helper import log
import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnvBuilder, WalCraft
# Restart nodes with WAL end having specially crafted shape, like last record
# crossing segment boundary, to test decoding issues.
@pytest.mark.parametrize('wal_type',
[
'simple',
'last_wal_record_xlog_switch',
'last_wal_record_xlog_switch_ends_on_page_boundary',
'last_wal_record_crossing_segment',
'wal_record_crossing_segment_followed_by_small_one',
])
@pytest.mark.parametrize(
"wal_type",
[
"simple",
"last_wal_record_xlog_switch",
"last_wal_record_xlog_switch_ends_on_page_boundary",
"last_wal_record_crossing_segment",
"wal_record_crossing_segment_followed_by_small_one",
],
)
def test_crafted_wal_end(neon_env_builder: NeonEnvBuilder, wal_type: str):
neon_env_builder.num_safekeepers = 1
env = neon_env_builder.init_start()
env.neon_cli.create_branch('test_crafted_wal_end')
env.neon_cli.create_branch("test_crafted_wal_end")
pg = env.postgres.create('test_crafted_wal_end')
pg = env.postgres.create("test_crafted_wal_end")
wal_craft = WalCraft(env)
pg.config(wal_craft.postgres_config())
pg.start()
res = pg.safe_psql_many(queries=[
'CREATE TABLE keys(key int primary key)',
'INSERT INTO keys SELECT generate_series(1, 100)',
'SELECT SUM(key) FROM keys'
])
assert res[-1][0] == (5050, )
res = pg.safe_psql_many(
queries=[
"CREATE TABLE keys(key int primary key)",
"INSERT INTO keys SELECT generate_series(1, 100)",
"SELECT SUM(key) FROM keys",
]
)
assert res[-1][0] == (5050,)
wal_craft.in_existing(wal_type, pg.connstr())
@@ -39,13 +43,15 @@ def test_crafted_wal_end(neon_env_builder: NeonEnvBuilder, wal_type: str):
env.pageserver.start()
log.info("Trying more queries")
res = pg.safe_psql_many(queries=[
'SELECT SUM(key) FROM keys',
'INSERT INTO keys SELECT generate_series(101, 200)',
'SELECT SUM(key) FROM keys',
])
assert res[0][0] == (5050, )
assert res[-1][0] == (20100, )
res = pg.safe_psql_many(
queries=[
"SELECT SUM(key) FROM keys",
"INSERT INTO keys SELECT generate_series(101, 200)",
"SELECT SUM(key) FROM keys",
]
)
assert res[0][0] == (5050,)
assert res[-1][0] == (20100,)
log.info("Restarting all safekeepers and pageservers (again)")
env.pageserver.stop()
@@ -54,10 +60,12 @@ def test_crafted_wal_end(neon_env_builder: NeonEnvBuilder, wal_type: str):
env.pageserver.start()
log.info("Trying more queries (again)")
res = pg.safe_psql_many(queries=[
'SELECT SUM(key) FROM keys',
'INSERT INTO keys SELECT generate_series(201, 300)',
'SELECT SUM(key) FROM keys',
])
assert res[0][0] == (20100, )
assert res[-1][0] == (45150, )
res = pg.safe_psql_many(
queries=[
"SELECT SUM(key) FROM keys",
"INSERT INTO keys SELECT generate_series(201, 300)",
"SELECT SUM(key) FROM keys",
]
)
assert res[0][0] == (20100,)
assert res[-1][0] == (45150,)

View File

@@ -1,9 +1,9 @@
import os
import pathlib
from contextlib import closing
from fixtures.neon_fixtures import NeonEnv, check_restored_datadir_content
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnv, check_restored_datadir_content
from fixtures.utils import query_scalar
@@ -12,35 +12,37 @@ from fixtures.utils import query_scalar
#
def test_createdb(neon_simple_env: NeonEnv):
env = neon_simple_env
env.neon_cli.create_branch('test_createdb', 'empty')
env.neon_cli.create_branch("test_createdb", "empty")
pg = env.postgres.create_start('test_createdb')
pg = env.postgres.create_start("test_createdb")
log.info("postgres is running on 'test_createdb' branch")
with pg.cursor() as cur:
# Cause a 'relmapper' change in the original branch
cur.execute('VACUUM FULL pg_class')
cur.execute("VACUUM FULL pg_class")
cur.execute('CREATE DATABASE foodb')
cur.execute("CREATE DATABASE foodb")
lsn = query_scalar(cur, 'SELECT pg_current_wal_insert_lsn()')
lsn = query_scalar(cur, "SELECT pg_current_wal_insert_lsn()")
# Create a branch
env.neon_cli.create_branch('test_createdb2', 'test_createdb', ancestor_start_lsn=lsn)
pg2 = env.postgres.create_start('test_createdb2')
env.neon_cli.create_branch("test_createdb2", "test_createdb", ancestor_start_lsn=lsn)
pg2 = env.postgres.create_start("test_createdb2")
# Test that you can connect to the new database on both branches
for db in (pg, pg2):
with db.cursor(dbname='foodb') as cur:
with db.cursor(dbname="foodb") as cur:
# Check database size in both branches
cur.execute("""
cur.execute(
"""
select pg_size_pretty(pg_database_size('foodb')),
pg_size_pretty(
sum(pg_relation_size(oid, 'main'))
+sum(pg_relation_size(oid, 'vm'))
+sum(pg_relation_size(oid, 'fsm'))
) FROM pg_class where relisshared is false
""")
"""
)
res = cur.fetchone()
assert res is not None
# check that dbsize equals sum of all relation sizes, excluding shared ones
@@ -53,48 +55,48 @@ def test_createdb(neon_simple_env: NeonEnv):
#
def test_dropdb(neon_simple_env: NeonEnv, test_output_dir):
env = neon_simple_env
env.neon_cli.create_branch('test_dropdb', 'empty')
pg = env.postgres.create_start('test_dropdb')
env.neon_cli.create_branch("test_dropdb", "empty")
pg = env.postgres.create_start("test_dropdb")
log.info("postgres is running on 'test_dropdb' branch")
with pg.cursor() as cur:
cur.execute('CREATE DATABASE foodb')
cur.execute("CREATE DATABASE foodb")
lsn_before_drop = query_scalar(cur, 'SELECT pg_current_wal_insert_lsn()')
lsn_before_drop = query_scalar(cur, "SELECT pg_current_wal_insert_lsn()")
dboid = query_scalar(cur, "SELECT oid FROM pg_database WHERE datname='foodb';")
with pg.cursor() as cur:
cur.execute('DROP DATABASE foodb')
cur.execute("DROP DATABASE foodb")
cur.execute('CHECKPOINT')
cur.execute("CHECKPOINT")
lsn_after_drop = query_scalar(cur, 'SELECT pg_current_wal_insert_lsn()')
lsn_after_drop = query_scalar(cur, "SELECT pg_current_wal_insert_lsn()")
# Create two branches before and after database drop.
env.neon_cli.create_branch('test_before_dropdb',
'test_dropdb',
ancestor_start_lsn=lsn_before_drop)
pg_before = env.postgres.create_start('test_before_dropdb')
env.neon_cli.create_branch(
"test_before_dropdb", "test_dropdb", ancestor_start_lsn=lsn_before_drop
)
pg_before = env.postgres.create_start("test_before_dropdb")
env.neon_cli.create_branch('test_after_dropdb',
'test_dropdb',
ancestor_start_lsn=lsn_after_drop)
pg_after = env.postgres.create_start('test_after_dropdb')
env.neon_cli.create_branch(
"test_after_dropdb", "test_dropdb", ancestor_start_lsn=lsn_after_drop
)
pg_after = env.postgres.create_start("test_after_dropdb")
# Test that database exists on the branch before drop
pg_before.connect(dbname='foodb').close()
pg_before.connect(dbname="foodb").close()
# Test that database subdir exists on the branch before drop
assert pg_before.pgdata_dir
dbpath = pathlib.Path(pg_before.pgdata_dir) / 'base' / str(dboid)
dbpath = pathlib.Path(pg_before.pgdata_dir) / "base" / str(dboid)
log.info(dbpath)
assert os.path.isdir(dbpath) == True
# Test that database subdir doesn't exist on the branch after drop
assert pg_after.pgdata_dir
dbpath = pathlib.Path(pg_after.pgdata_dir) / 'base' / str(dboid)
dbpath = pathlib.Path(pg_after.pgdata_dir) / "base" / str(dboid)
log.info(dbpath)
assert os.path.isdir(dbpath) == False

View File

@@ -1,5 +1,5 @@
from fixtures.neon_fixtures import NeonEnv
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnv
from fixtures.utils import query_scalar
@@ -8,21 +8,21 @@ from fixtures.utils import query_scalar
#
def test_createuser(neon_simple_env: NeonEnv):
env = neon_simple_env
env.neon_cli.create_branch('test_createuser', 'empty')
pg = env.postgres.create_start('test_createuser')
env.neon_cli.create_branch("test_createuser", "empty")
pg = env.postgres.create_start("test_createuser")
log.info("postgres is running on 'test_createuser' branch")
with pg.cursor() as cur:
# Cause a 'relmapper' change in the original branch
cur.execute('CREATE USER testuser with password %s', ('testpwd', ))
cur.execute("CREATE USER testuser with password %s", ("testpwd",))
cur.execute('CHECKPOINT')
cur.execute("CHECKPOINT")
lsn = query_scalar(cur, 'SELECT pg_current_wal_insert_lsn()')
lsn = query_scalar(cur, "SELECT pg_current_wal_insert_lsn()")
# Create a branch
env.neon_cli.create_branch('test_createuser2', 'test_createuser', ancestor_start_lsn=lsn)
pg2 = env.postgres.create_start('test_createuser2')
env.neon_cli.create_branch("test_createuser2", "test_createuser", ancestor_start_lsn=lsn)
pg2 = env.postgres.create_start("test_createuser2")
# Test that you can connect to new branch as a new user
assert pg2.safe_psql('select current_user', user='testuser') == [('testuser', )]
assert pg2.safe_psql("select current_user", user="testuser") == [("testuser",)]

View File

@@ -1,11 +1,12 @@
import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, NeonPageserverHttpClient
import pytest
def test_fsm_truncate(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
env.neon_cli.create_branch("test_fsm_truncate")
pg = env.postgres.create_start('test_fsm_truncate')
pg = env.postgres.create_start("test_fsm_truncate")
pg.safe_psql(
'CREATE TABLE t1(key int); CREATE TABLE t2(key int); TRUNCATE TABLE t1; TRUNCATE TABLE t2;')
"CREATE TABLE t1(key int); CREATE TABLE t2(key int); TRUNCATE TABLE t1; TRUNCATE TABLE t2;"
)

View File

@@ -1,22 +1,28 @@
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnvBuilder, PgBin, PortDistributor, VanillaPostgres
from fixtures.neon_fixtures import pg_distrib_dir
import os
from fixtures.log_helper import log
from fixtures.neon_fixtures import (
NeonEnvBuilder,
PgBin,
PortDistributor,
VanillaPostgres,
pg_distrib_dir,
)
from fixtures.utils import query_scalar, subprocess_capture
num_rows = 1000
# Ensure that regular postgres can start from fullbackup
def test_fullbackup(neon_env_builder: NeonEnvBuilder,
pg_bin: PgBin,
port_distributor: PortDistributor):
def test_fullbackup(
neon_env_builder: NeonEnvBuilder, pg_bin: PgBin, port_distributor: PortDistributor
):
neon_env_builder.num_safekeepers = 1
env = neon_env_builder.init_start()
env.neon_cli.create_branch('test_fullbackup')
pgmain = env.postgres.create_start('test_fullbackup')
env.neon_cli.create_branch("test_fullbackup")
pgmain = env.postgres.create_start("test_fullbackup")
log.info("postgres is running on 'test_fullbackup' branch")
with pgmain.cursor() as cur:
@@ -24,16 +30,18 @@ def test_fullbackup(neon_env_builder: NeonEnvBuilder,
# data loading may take a while, so increase statement timeout
cur.execute("SET statement_timeout='300s'")
cur.execute(f'''CREATE TABLE tbl AS SELECT 'long string to consume some space' || g
from generate_series(1,{num_rows}) g''')
cur.execute(
f"""CREATE TABLE tbl AS SELECT 'long string to consume some space' || g
from generate_series(1,{num_rows}) g"""
)
cur.execute("CHECKPOINT")
lsn = query_scalar(cur, 'SELECT pg_current_wal_insert_lsn()')
lsn = query_scalar(cur, "SELECT pg_current_wal_insert_lsn()")
log.info(f"start_backup_lsn = {lsn}")
# Set LD_LIBRARY_PATH in the env properly, otherwise we may use the wrong libpq.
# PgBin sets it automatically, but here we need to pipe psql output to the tar command.
psql_env = {'LD_LIBRARY_PATH': os.path.join(str(pg_distrib_dir), 'lib')}
psql_env = {"LD_LIBRARY_PATH": os.path.join(str(pg_distrib_dir), "lib")}
# Get and unpack fullbackup from pageserver
restored_dir_path = env.repo_dir / "restored_datadir"
@@ -42,13 +50,14 @@ def test_fullbackup(neon_env_builder: NeonEnvBuilder,
cmd = ["psql", "--no-psqlrc", env.pageserver.connstr(), "-c", query]
result_basepath = pg_bin.run_capture(cmd, env=psql_env)
tar_output_file = result_basepath + ".stdout"
subprocess_capture(str(env.repo_dir),
["tar", "-xf", tar_output_file, "-C", str(restored_dir_path)])
subprocess_capture(
str(env.repo_dir), ["tar", "-xf", tar_output_file, "-C", str(restored_dir_path)]
)
# HACK
# fullbackup returns neon specific pg_control and first WAL segment
# use resetwal to overwrite it
pg_resetwal_path = os.path.join(pg_bin.pg_bin_path, 'pg_resetwal')
pg_resetwal_path = os.path.join(pg_bin.pg_bin_path, "pg_resetwal")
cmd = [pg_resetwal_path, "-D", str(restored_dir_path)]
pg_bin.run_capture(cmd, env=psql_env)
@@ -56,9 +65,11 @@ def test_fullbackup(neon_env_builder: NeonEnvBuilder,
port = port_distributor.get_port()
with VanillaPostgres(restored_dir_path, pg_bin, port, init=False) as vanilla_pg:
# TODO make port an optional argument
vanilla_pg.configure([
f"port={port}",
])
vanilla_pg.configure(
[
f"port={port}",
]
)
vanilla_pg.start()
num_rows_found = vanilla_pg.safe_psql('select count(*) from tbl;', user="cloud_admin")[0][0]
num_rows_found = vanilla_pg.safe_psql("select count(*) from tbl;", user="cloud_admin")[0][0]
assert num_rows == num_rows_found

View File

@@ -1,8 +1,8 @@
import asyncio
import random
from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres
from fixtures.utils import query_scalar
# Test configuration
@@ -24,7 +24,7 @@ async def update_table(pg: Postgres):
while updates_performed < updates_to_perform:
updates_performed += 1
id = random.randrange(1, num_rows)
row = await pg_conn.fetchrow(f'UPDATE foo SET counter = counter + 1 WHERE id = {id}')
row = await pg_conn.fetchrow(f"UPDATE foo SET counter = counter + 1 WHERE id = {id}")
# Perform aggressive GC with 0 horizon
@@ -57,24 +57,26 @@ def test_gc_aggressive(neon_env_builder: NeonEnvBuilder):
neon_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '0 sec'}"
env = neon_env_builder.init_start()
env.neon_cli.create_branch("test_gc_aggressive", "main")
pg = env.postgres.create_start('test_gc_aggressive')
log.info('postgres is running on test_gc_aggressive branch')
pg = env.postgres.create_start("test_gc_aggressive")
log.info("postgres is running on test_gc_aggressive branch")
with pg.cursor() as cur:
timeline = query_scalar(cur, "SHOW neon.timeline_id")
# Create table, and insert the first 100 rows
cur.execute('CREATE TABLE foo (id int, counter int, t text)')
cur.execute(f'''
cur.execute("CREATE TABLE foo (id int, counter int, t text)")
cur.execute(
f"""
INSERT INTO foo
SELECT g, 0, 'long string to consume some space' || g
FROM generate_series(1, {num_rows}) g
''')
cur.execute('CREATE INDEX ON foo(id)')
"""
)
cur.execute("CREATE INDEX ON foo(id)")
asyncio.run(update_and_gc(env, pg, timeline))
cur.execute('SELECT COUNT(*), SUM(counter) FROM foo')
cur.execute("SELECT COUNT(*), SUM(counter) FROM foo")
r = cur.fetchone()
assert r is not None
assert r == (num_rows, updates_to_perform)

View File

@@ -1,17 +1,24 @@
import re
import pytest
from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, PgBin, Postgres, wait_for_upload, wait_for_last_record_lsn
from fixtures.utils import lsn_from_hex
from uuid import UUID, uuid4
import os
import tarfile
import shutil
from pathlib import Path
import json
from fixtures.utils import subprocess_capture
from fixtures.log_helper import log
import os
import re
import shutil
import tarfile
from contextlib import closing
from fixtures.neon_fixtures import pg_distrib_dir
from pathlib import Path
from uuid import UUID, uuid4
import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import (
NeonEnv,
NeonEnvBuilder,
PgBin,
Postgres,
pg_distrib_dir,
wait_for_last_record_lsn,
wait_for_upload,
)
from fixtures.utils import lsn_from_hex, subprocess_capture
@pytest.mark.timeout(600)
@@ -19,9 +26,11 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build
# Put data in vanilla pg
vanilla_pg.start()
vanilla_pg.safe_psql("create user cloud_admin with password 'postgres' superuser")
vanilla_pg.safe_psql('''create table t as select 'long string to consume some space' || g
from generate_series(1,300000) g''')
assert vanilla_pg.safe_psql('select count(*) from t') == [(300000, )]
vanilla_pg.safe_psql(
"""create table t as select 'long string to consume some space' || g
from generate_series(1,300000) g"""
)
assert vanilla_pg.safe_psql("select count(*) from t") == [(300000,)]
# Take basebackup
basebackup_dir = os.path.join(test_output_dir, "basebackup")
@@ -29,15 +38,17 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build
wal_tar = os.path.join(basebackup_dir, "pg_wal.tar")
os.mkdir(basebackup_dir)
vanilla_pg.safe_psql("CHECKPOINT")
pg_bin.run([
"pg_basebackup",
"-F",
"tar",
"-d",
vanilla_pg.connstr(),
"-D",
basebackup_dir,
])
pg_bin.run(
[
"pg_basebackup",
"-F",
"tar",
"-d",
vanilla_pg.connstr(),
"-D",
basebackup_dir,
]
)
# Make corrupt base tar with missing pg_control
unpacked_base = os.path.join(basebackup_dir, "unpacked-base")
@@ -45,9 +56,11 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build
os.mkdir(unpacked_base, 0o750)
subprocess_capture(str(test_output_dir), ["tar", "-xf", base_tar, "-C", unpacked_base])
os.remove(os.path.join(unpacked_base, "global/pg_control"))
subprocess_capture(str(test_output_dir),
["tar", "-cf", "corrupt-base.tar"] + os.listdir(unpacked_base),
cwd=unpacked_base)
subprocess_capture(
str(test_output_dir),
["tar", "-cf", "corrupt-base.tar"] + os.listdir(unpacked_base),
cwd=unpacked_base,
)
# Get start_lsn and end_lsn
with open(os.path.join(basebackup_dir, "backup_manifest")) as f:
@@ -65,24 +78,26 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build
env.pageserver.http_client().tenant_create(tenant)
def import_tar(base, wal):
env.neon_cli.raw_cli([
"timeline",
"import",
"--tenant-id",
tenant.hex,
"--timeline-id",
timeline.hex,
"--node-name",
node_name,
"--base-lsn",
start_lsn,
"--base-tarfile",
base,
"--end-lsn",
end_lsn,
"--wal-tarfile",
wal,
])
env.neon_cli.raw_cli(
[
"timeline",
"import",
"--tenant-id",
tenant.hex,
"--timeline-id",
timeline.hex,
"--node-name",
node_name,
"--base-lsn",
start_lsn,
"--base-tarfile",
base,
"--end-lsn",
end_lsn,
"--wal-tarfile",
wal,
]
)
# Importing corrupt backup fails
with pytest.raises(Exception):
@@ -102,7 +117,7 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build
# Check it worked
pg = env.postgres.create_start(node_name, tenant_id=tenant)
assert pg.safe_psql('select count(*) from t') == [(300000, )]
assert pg.safe_psql("select count(*) from t") == [(300000,)]
@pytest.mark.timeout(600)
@@ -111,8 +126,8 @@ def test_import_from_pageserver_small(pg_bin: PgBin, neon_env_builder: NeonEnvBu
neon_env_builder.enable_local_fs_remote_storage()
env = neon_env_builder.init_start()
timeline = env.neon_cli.create_branch('test_import_from_pageserver_small')
pg = env.postgres.create_start('test_import_from_pageserver_small')
timeline = env.neon_cli.create_branch("test_import_from_pageserver_small")
pg = env.postgres.create_start("test_import_from_pageserver_small")
num_rows = 3000
lsn = _generate_data(num_rows, pg)
@@ -129,8 +144,8 @@ def test_import_from_pageserver_multisegment(pg_bin: PgBin, neon_env_builder: Ne
neon_env_builder.enable_local_fs_remote_storage()
env = neon_env_builder.init_start()
timeline = env.neon_cli.create_branch('test_import_from_pageserver_multisegment')
pg = env.postgres.create_start('test_import_from_pageserver_multisegment')
timeline = env.neon_cli.create_branch("test_import_from_pageserver_multisegment")
pg = env.postgres.create_start("test_import_from_pageserver_multisegment")
# For `test_import_from_pageserver_multisegment`, we want to make sure that the data
# is large enough to create multi-segment files. Typically, a segment file's size is
@@ -139,8 +154,9 @@ def test_import_from_pageserver_multisegment(pg_bin: PgBin, neon_env_builder: Ne
num_rows = 30000000
lsn = _generate_data(num_rows, pg)
logical_size = env.pageserver.http_client().timeline_detail(
env.initial_tenant, timeline)['local']['current_logical_size']
logical_size = env.pageserver.http_client().timeline_detail(env.initial_tenant, timeline)[
"local"
]["current_logical_size"]
log.info(f"timeline logical size = {logical_size / (1024 ** 2)}MB")
assert logical_size > 1024**3 # = 1GB
@@ -148,7 +164,7 @@ def test_import_from_pageserver_multisegment(pg_bin: PgBin, neon_env_builder: Ne
# Check if the backup data contains multiple segment files
cnt_seg_files = 0
segfile_re = re.compile('[0-9]+\\.[0-9]+')
segfile_re = re.compile("[0-9]+\\.[0-9]+")
with tarfile.open(tar_output_file, "r") as tar_f:
for f in tar_f.getnames():
if segfile_re.search(f) is not None:
@@ -166,11 +182,13 @@ def _generate_data(num_rows: int, pg: Postgres) -> str:
with conn.cursor() as cur:
# data loading may take a while, so increase statement timeout
cur.execute("SET statement_timeout='300s'")
cur.execute(f'''CREATE TABLE tbl AS SELECT 'long string to consume some space' || g
from generate_series(1,{num_rows}) g''')
cur.execute(
f"""CREATE TABLE tbl AS SELECT 'long string to consume some space' || g
from generate_series(1,{num_rows}) g"""
)
cur.execute("CHECKPOINT")
cur.execute('SELECT pg_current_wal_insert_lsn()')
cur.execute("SELECT pg_current_wal_insert_lsn()")
res = cur.fetchone()
assert res is not None and isinstance(res[0], str)
return res[0]
@@ -189,7 +207,7 @@ def _import(expected_num_rows: int, lsn: str, env: NeonEnv, pg_bin: PgBin, timel
# Set LD_LIBRARY_PATH in the env properly, otherwise we may use the wrong libpq.
# PgBin sets it automatically, but here we need to pipe psql output to the tar command.
psql_env = {'LD_LIBRARY_PATH': os.path.join(str(pg_distrib_dir), 'lib')}
psql_env = {"LD_LIBRARY_PATH": os.path.join(str(pg_distrib_dir), "lib")}
# Get a fullbackup from pageserver
query = f"fullbackup { env.initial_tenant.hex} {timeline.hex} {lsn}"
@@ -201,11 +219,11 @@ def _import(expected_num_rows: int, lsn: str, env: NeonEnv, pg_bin: PgBin, timel
env.postgres.stop_all()
env.pageserver.stop()
dir_to_clear = Path(env.repo_dir) / 'tenants'
dir_to_clear = Path(env.repo_dir) / "tenants"
shutil.rmtree(dir_to_clear)
os.mkdir(dir_to_clear)
#start the pageserver again
# start the pageserver again
env.pageserver.start()
# Import using another tenantid, because we use the same pageserver.
@@ -216,20 +234,22 @@ def _import(expected_num_rows: int, lsn: str, env: NeonEnv, pg_bin: PgBin, timel
node_name = "import_from_pageserver"
client = env.pageserver.http_client()
client.tenant_create(tenant)
env.neon_cli.raw_cli([
"timeline",
"import",
"--tenant-id",
tenant.hex,
"--timeline-id",
timeline.hex,
"--node-name",
node_name,
"--base-lsn",
lsn,
"--base-tarfile",
os.path.join(tar_output_file),
])
env.neon_cli.raw_cli(
[
"timeline",
"import",
"--tenant-id",
tenant.hex,
"--timeline-id",
timeline.hex,
"--node-name",
node_name,
"--base-lsn",
lsn,
"--base-tarfile",
os.path.join(tar_output_file),
]
)
# Wait for data to land in s3
wait_for_last_record_lsn(client, tenant, timeline, lsn_from_hex(lsn))
@@ -237,7 +257,7 @@ def _import(expected_num_rows: int, lsn: str, env: NeonEnv, pg_bin: PgBin, timel
# Check it worked
pg = env.postgres.create_start(node_name, tenant_id=tenant)
assert pg.safe_psql('select count(*) from tbl') == [(expected_num_rows, )]
assert pg.safe_psql("select count(*) from tbl") == [(expected_num_rows,)]
# Take another fullbackup
query = f"fullbackup { tenant.hex} {timeline.hex} {lsn}"

View File

@@ -1,7 +1,8 @@
import time
import os
from fixtures.neon_fixtures import NeonEnvBuilder
import time
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnvBuilder
# This test creates large number of tables which cause large catalog.
@@ -14,7 +15,7 @@ from fixtures.log_helper import log
def test_large_schema(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
pg = env.postgres.create_start('main')
pg = env.postgres.create_start("main")
conn = pg.connect()
cur = conn.cursor()
@@ -22,7 +23,7 @@ def test_large_schema(neon_env_builder: NeonEnvBuilder):
tables = 2 # 10 is too much for debug build
partitions = 1000
for i in range(1, tables + 1):
print(f'iteration {i} / {tables}')
print(f"iteration {i} / {tables}")
# Restart compute. Restart is actually not strictly needed.
# It is done mostly because this test originally tries to model the problem reported by Ketteq.
@@ -52,10 +53,10 @@ def test_large_schema(neon_env_builder: NeonEnvBuilder):
# It's normal that it takes some time for the pageserver to
# restart, and for the connection to fail until it does. It
# should eventually recover, so retry until it succeeds.
print(f'failed: {error}')
print(f"failed: {error}")
if retries < max_retries:
retries += 1
print(f'retry {retries} / {max_retries}')
print(f"retry {retries} / {max_retries}")
time.sleep(retry_sleep)
continue
else:
@@ -67,7 +68,7 @@ def test_large_schema(neon_env_builder: NeonEnvBuilder):
for i in range(1, tables + 1):
cur.execute(f"SELECT count(*) FROM t_{i}")
assert cur.fetchone() == (partitions, )
assert cur.fetchone() == (partitions,)
cur.execute("set enable_sort=off")
cur.execute("select * from pg_depend order by refclassid, refobjid, refobjsubid")
@@ -77,6 +78,6 @@ def test_large_schema(neon_env_builder: NeonEnvBuilder):
timeline_id = pg.safe_psql("show neon.timeline_id")[0][0]
timeline_path = "{}/tenants/{}/timelines/{}/".format(env.repo_dir, tenant_id, timeline_id)
for filename in os.listdir(timeline_path):
if filename.startswith('00000'):
log.info(f'layer {filename} size is {os.path.getsize(timeline_path + filename)}')
if filename.startswith("00000"):
log.info(f"layer {filename} size is {os.path.getsize(timeline_path + filename)}")
assert os.path.getsize(timeline_path + filename) < 512_000_000

View File

@@ -1,13 +1,13 @@
import math
import time
from contextlib import closing
from datetime import timedelta, timezone, tzinfo
import math
from uuid import UUID
import psycopg2.extras
import psycopg2.errors
from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres
from fixtures.log_helper import log
import time
import psycopg2.errors
import psycopg2.extras
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres
from fixtures.utils import query_scalar
@@ -18,7 +18,7 @@ def test_lsn_mapping(neon_env_builder: NeonEnvBuilder):
neon_env_builder.num_safekeepers = 1
env = neon_env_builder.init_start()
new_timeline_id = env.neon_cli.create_branch('test_lsn_mapping')
new_timeline_id = env.neon_cli.create_branch("test_lsn_mapping")
pgmain = env.postgres.create_start("test_lsn_mapping")
log.info("postgres is running on 'test_lsn_mapping' branch")
@@ -35,7 +35,7 @@ def test_lsn_mapping(neon_env_builder: NeonEnvBuilder):
for i in range(1000):
cur.execute(f"INSERT INTO foo VALUES({i})")
# Get the timestamp at UTC
after_timestamp = query_scalar(cur, 'SELECT clock_timestamp()').replace(tzinfo=None)
after_timestamp = query_scalar(cur, "SELECT clock_timestamp()").replace(tzinfo=None)
tbl.append([i, after_timestamp])
# Execute one more transaction with synchronous_commit enabled, to flush
@@ -47,17 +47,17 @@ def test_lsn_mapping(neon_env_builder: NeonEnvBuilder):
probe_timestamp = tbl[-1][1] + timedelta(hours=1)
result = query_scalar(
ps_cur,
f"get_lsn_by_timestamp {env.initial_tenant.hex} {new_timeline_id.hex} '{probe_timestamp.isoformat()}Z'"
f"get_lsn_by_timestamp {env.initial_tenant.hex} {new_timeline_id.hex} '{probe_timestamp.isoformat()}Z'",
)
assert result == 'future'
assert result == "future"
# timestamp too the far history
probe_timestamp = tbl[0][1] - timedelta(hours=10)
result = query_scalar(
ps_cur,
f"get_lsn_by_timestamp {env.initial_tenant.hex} {new_timeline_id.hex} '{probe_timestamp.isoformat()}Z'"
f"get_lsn_by_timestamp {env.initial_tenant.hex} {new_timeline_id.hex} '{probe_timestamp.isoformat()}Z'",
)
assert result == 'past'
assert result == "past"
# Probe a bunch of timestamps in the valid range
for i in range(1, len(tbl), 100):
@@ -66,14 +66,14 @@ def test_lsn_mapping(neon_env_builder: NeonEnvBuilder):
# Call get_lsn_by_timestamp to get the LSN
lsn = query_scalar(
ps_cur,
f"get_lsn_by_timestamp {env.initial_tenant.hex} {new_timeline_id.hex} '{probe_timestamp.isoformat()}Z'"
f"get_lsn_by_timestamp {env.initial_tenant.hex} {new_timeline_id.hex} '{probe_timestamp.isoformat()}Z'",
)
# Launch a new read-only node at that LSN, and check that only the rows
# that were supposed to be committed at that point in time are visible.
pg_here = env.postgres.create_start(branch_name='test_lsn_mapping',
node_name='test_lsn_mapping_read',
lsn=lsn)
pg_here = env.postgres.create_start(
branch_name="test_lsn_mapping", node_name="test_lsn_mapping_read", lsn=lsn
)
assert pg_here.safe_psql("SELECT max(x) FROM foo")[0][0] == i
pg_here.stop_and_destroy()

View File

@@ -1,5 +1,5 @@
from fixtures.neon_fixtures import NeonEnv, check_restored_datadir_content
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnv, check_restored_datadir_content
from fixtures.utils import query_scalar
@@ -11,18 +11,21 @@ from fixtures.utils import query_scalar
#
def test_multixact(neon_simple_env: NeonEnv, test_output_dir):
env = neon_simple_env
env.neon_cli.create_branch('test_multixact', 'empty')
pg = env.postgres.create_start('test_multixact')
env.neon_cli.create_branch("test_multixact", "empty")
pg = env.postgres.create_start("test_multixact")
log.info("postgres is running on 'test_multixact' branch")
cur = pg.connect().cursor()
cur.execute('''
cur.execute(
"""
CREATE TABLE t1(i int primary key);
INSERT INTO t1 select * from generate_series(1, 100);
''')
"""
)
next_multixact_id_old = query_scalar(cur,
'SELECT next_multixact_id FROM pg_control_checkpoint()')
next_multixact_id_old = query_scalar(
cur, "SELECT next_multixact_id FROM pg_control_checkpoint()"
)
# Lock entries using parallel connections in a round-robin fashion.
nclients = 20
@@ -40,17 +43,18 @@ def test_multixact(neon_simple_env: NeonEnv, test_output_dir):
for i in range(5000):
conn = connections[i % nclients]
conn.commit()
conn.cursor().execute('select * from t1 for key share')
conn.cursor().execute("select * from t1 for key share")
# We have multixacts now. We can close the connections.
for c in connections:
c.close()
# force wal flush
cur.execute('checkpoint')
cur.execute("checkpoint")
cur.execute(
'SELECT next_multixact_id, pg_current_wal_insert_lsn() FROM pg_control_checkpoint()')
"SELECT next_multixact_id, pg_current_wal_insert_lsn() FROM pg_control_checkpoint()"
)
res = cur.fetchone()
assert res is not None
next_multixact_id = res[0]
@@ -60,12 +64,13 @@ def test_multixact(neon_simple_env: NeonEnv, test_output_dir):
assert int(next_multixact_id) > int(next_multixact_id_old)
# Branch at this point
env.neon_cli.create_branch('test_multixact_new', 'test_multixact', ancestor_start_lsn=lsn)
pg_new = env.postgres.create_start('test_multixact_new')
env.neon_cli.create_branch("test_multixact_new", "test_multixact", ancestor_start_lsn=lsn)
pg_new = env.postgres.create_start("test_multixact_new")
log.info("postgres is running on 'test_multixact_new' branch")
next_multixact_id_new = pg_new.safe_psql(
'SELECT next_multixact_id FROM pg_control_checkpoint()')[0][0]
"SELECT next_multixact_id FROM pg_control_checkpoint()"
)[0][0]
# Check that we restored pg_controlfile correctly
assert next_multixact_id_new == next_multixact_id

View File

@@ -1,21 +1,29 @@
import uuid
import requests
from fixtures.neon_fixtures import DEFAULT_BRANCH_NAME, NeonEnv, NeonEnvBuilder, NeonPageserverHttpClient
from typing import cast
import requests
from fixtures.neon_fixtures import (
DEFAULT_BRANCH_NAME,
NeonEnv,
NeonEnvBuilder,
NeonPageserverHttpClient,
)
def helper_compare_timeline_list(pageserver_http_client: NeonPageserverHttpClient,
env: NeonEnv,
initial_tenant: uuid.UUID):
def helper_compare_timeline_list(
pageserver_http_client: NeonPageserverHttpClient, env: NeonEnv, initial_tenant: uuid.UUID
):
"""
Compare timelines list returned by CLI and directly via API.
Filters out timelines created by other tests.
"""
timelines_api = sorted(
map(lambda t: cast(str, t['timeline_id']),
pageserver_http_client.timeline_list(initial_tenant)))
map(
lambda t: cast(str, t["timeline_id"]),
pageserver_http_client.timeline_list(initial_tenant),
)
)
timelines_cli = env.neon_cli.list_timelines()
assert timelines_cli == env.neon_cli.list_timelines(initial_tenant)
@@ -32,12 +40,13 @@ def test_cli_timeline_list(neon_simple_env: NeonEnv):
helper_compare_timeline_list(pageserver_http_client, env, env.initial_tenant)
# Create a branch for us
main_timeline_id = env.neon_cli.create_branch('test_cli_branch_list_main')
main_timeline_id = env.neon_cli.create_branch("test_cli_branch_list_main")
helper_compare_timeline_list(pageserver_http_client, env, env.initial_tenant)
# Create a nested branch
nested_timeline_id = env.neon_cli.create_branch('test_cli_branch_list_nested',
'test_cli_branch_list_main')
nested_timeline_id = env.neon_cli.create_branch(
"test_cli_branch_list_nested", "test_cli_branch_list_main"
)
helper_compare_timeline_list(pageserver_http_client, env, env.initial_tenant)
# Check that all new branches are visible via CLI
@@ -49,7 +58,7 @@ def test_cli_timeline_list(neon_simple_env: NeonEnv):
def helper_compare_tenant_list(pageserver_http_client: NeonPageserverHttpClient, env: NeonEnv):
tenants = pageserver_http_client.tenant_list()
tenants_api = sorted(map(lambda t: cast(str, t['id']), tenants))
tenants_api = sorted(map(lambda t: cast(str, t["id"]), tenants))
res = env.neon_cli.list_tenants()
tenants_cli = sorted(map(lambda t: t.split()[0], res.stdout.splitlines()))
@@ -97,7 +106,7 @@ def test_cli_ipv4_listeners(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
# Connect to sk port on v4 loopback
res = requests.get(f'http://127.0.0.1:{env.safekeepers[0].port.http}/v1/status')
res = requests.get(f"http://127.0.0.1:{env.safekeepers[0].port.http}/v1/status")
assert res.ok
# FIXME Test setup is using localhost:xx in ps config.

View File

@@ -8,15 +8,15 @@ from fixtures.neon_fixtures import NeonEnvBuilder
def test_next_xid(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
pg = env.postgres.create_start('main')
pg = env.postgres.create_start("main")
conn = pg.connect()
cur = conn.cursor()
cur.execute('CREATE TABLE t(x integer)')
cur.execute("CREATE TABLE t(x integer)")
iterations = 32
for i in range(1, iterations + 1):
print(f'iteration {i} / {iterations}')
print(f"iteration {i} / {iterations}")
# Kill and restart the pageserver.
pg.stop()
@@ -38,10 +38,10 @@ def test_next_xid(neon_env_builder: NeonEnvBuilder):
# It's normal that it takes some time for the pageserver to
# restart, and for the connection to fail until it does. It
# should eventually recover, so retry until it succeeds.
print(f'failed: {error}')
print(f"failed: {error}")
if retries < max_retries:
retries += 1
print(f'retry {retries} / {max_retries}')
print(f"retry {retries} / {max_retries}")
time.sleep(retry_sleep)
continue
else:
@@ -51,4 +51,4 @@ def test_next_xid(neon_env_builder: NeonEnvBuilder):
conn = pg.connect()
cur = conn.cursor()
cur.execute("SELECT count(*) FROM t")
assert cur.fetchone() == (iterations, )
assert cur.fetchone() == (iterations,)

View File

@@ -1,33 +1,35 @@
import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, NeonPageserverHttpClient
import pytest
def check_tenant(env: NeonEnv, pageserver_http: NeonPageserverHttpClient):
tenant_id, timeline_id = env.neon_cli.create_tenant()
pg = env.postgres.create_start('main', tenant_id=tenant_id)
pg = env.postgres.create_start("main", tenant_id=tenant_id)
# we rely upon autocommit after each statement
res_1 = pg.safe_psql_many(queries=[
'CREATE TABLE t(key int primary key, value text)',
'INSERT INTO t SELECT generate_series(1,100000), \'payload\'',
'SELECT sum(key) FROM t',
])
res_1 = pg.safe_psql_many(
queries=[
"CREATE TABLE t(key int primary key, value text)",
"INSERT INTO t SELECT generate_series(1,100000), 'payload'",
"SELECT sum(key) FROM t",
]
)
assert res_1[-1][0] == (5000050000, )
assert res_1[-1][0] == (5000050000,)
# TODO check detach on live instance
log.info("stopping compute")
pg.stop()
log.info("compute stopped")
pg.start()
res_2 = pg.safe_psql('SELECT sum(key) FROM t')
assert res_2[0] == (5000050000, )
res_2 = pg.safe_psql("SELECT sum(key) FROM t")
assert res_2[0] == (5000050000,)
pg.stop()
pageserver_http.tenant_detach(tenant_id)
@pytest.mark.parametrize('num_timelines,num_safekeepers', [(3, 1)])
@pytest.mark.parametrize("num_timelines,num_safekeepers", [(3, 1)])
def test_normal_work(neon_env_builder: NeonEnvBuilder, num_timelines: int, num_safekeepers: int):
"""
Basic test:

View File

@@ -1,7 +1,7 @@
from fixtures.neon_fixtures import NeonEnvBuilder
from fixtures.log_helper import log
from fixtures.utils import print_gc_result, query_scalar
import psycopg2.extras
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnvBuilder
from fixtures.utils import print_gc_result, query_scalar
#
@@ -19,8 +19,8 @@ def test_old_request_lsn(neon_env_builder: NeonEnvBuilder):
neon_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '0 sec'}"
env = neon_env_builder.init_start()
env.neon_cli.create_branch("test_old_request_lsn", "main")
pg = env.postgres.create_start('test_old_request_lsn')
log.info('postgres is running on test_old_request_lsn branch')
pg = env.postgres.create_start("test_old_request_lsn")
log.info("postgres is running on test_old_request_lsn branch")
pg_conn = pg.connect()
cur = pg_conn.cursor()
@@ -33,25 +33,29 @@ def test_old_request_lsn(neon_env_builder: NeonEnvBuilder):
# Create table, and insert some rows. Make it big enough that it doesn't fit in
# shared_buffers.
cur.execute('CREATE TABLE foo (id int4 PRIMARY KEY, val int, t text)')
cur.execute('''
cur.execute("CREATE TABLE foo (id int4 PRIMARY KEY, val int, t text)")
cur.execute(
"""
INSERT INTO foo
SELECT g, 1, 'long string to consume some space' || g
FROM generate_series(1, 100000) g
''')
"""
)
# Verify that the table is larger than shared_buffers, so that the SELECT below
# will cause GetPage requests.
cur.execute('''
cur.execute(
"""
select setting::int * pg_size_bytes(unit) as shared_buffers, pg_relation_size('foo') as tbl_ize
from pg_settings where name = 'shared_buffers'
''')
"""
)
row = cur.fetchone()
assert row is not None
log.info(f'shared_buffers is {row[0]}, table size {row[1]}')
log.info(f"shared_buffers is {row[0]}, table size {row[1]}")
assert int(row[0]) < int(row[1])
cur.execute('VACUUM foo')
cur.execute("VACUUM foo")
# Make a lot of updates on a single row, generating a lot of WAL. Trigger
# garbage collections so that the page server will remove old page versions.
@@ -61,7 +65,7 @@ def test_old_request_lsn(neon_env_builder: NeonEnvBuilder):
print_gc_result(row)
for j in range(100):
cur.execute('UPDATE foo SET val = val + 1 WHERE id = 1;')
cur.execute("UPDATE foo SET val = val + 1 WHERE id = 1;")
# All (or at least most of) the updates should've been on the same page, so
# that we haven't had to evict any dirty pages for a long time. Now run

View File

@@ -1,54 +1,65 @@
from typing import Optional
from uuid import uuid4, UUID
import pytest
import pathlib
import os
import pathlib
import subprocess
from fixtures.utils import lsn_from_hex
from typing import Optional
from uuid import UUID, uuid4
import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import (
DEFAULT_BRANCH_NAME,
NeonEnv,
NeonEnvBuilder,
NeonPageserverHttpClient,
NeonPageserverApiException,
wait_until,
NeonPageserverHttpClient,
neon_binpath,
pg_distrib_dir,
wait_until,
)
from fixtures.utils import lsn_from_hex
# test that we cannot override node id after init
def test_pageserver_init_node_id(neon_simple_env: NeonEnv):
repo_dir = neon_simple_env.repo_dir
pageserver_config = repo_dir / 'pageserver.toml'
pageserver_bin = pathlib.Path(neon_binpath) / 'pageserver'
run_pageserver = lambda args: subprocess.run([str(pageserver_bin), '-D', str(repo_dir), *args],
check=False,
universal_newlines=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
pageserver_config = repo_dir / "pageserver.toml"
pageserver_bin = pathlib.Path(neon_binpath) / "pageserver"
run_pageserver = lambda args: subprocess.run(
[str(pageserver_bin), "-D", str(repo_dir), *args],
check=False,
universal_newlines=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
# remove initial config
pageserver_config.unlink()
bad_init = run_pageserver(['--init', '-c', f'pg_distrib_dir="{pg_distrib_dir}"'])
assert bad_init.returncode == 1, 'pageserver should not be able to init new config without the node id'
bad_init = run_pageserver(["--init", "-c", f'pg_distrib_dir="{pg_distrib_dir}"'])
assert (
bad_init.returncode == 1
), "pageserver should not be able to init new config without the node id"
assert "missing id" in bad_init.stderr
assert not pageserver_config.exists(), 'config file should not be created after init error'
assert not pageserver_config.exists(), "config file should not be created after init error"
completed_init = run_pageserver(
['--init', '-c', 'id = 12345', '-c', f'pg_distrib_dir="{pg_distrib_dir}"'])
assert completed_init.returncode == 0, 'pageserver should be able to create a new config with the node id given'
assert pageserver_config.exists(), 'config file should be created successfully'
["--init", "-c", "id = 12345", "-c", f'pg_distrib_dir="{pg_distrib_dir}"']
)
assert (
completed_init.returncode == 0
), "pageserver should be able to create a new config with the node id given"
assert pageserver_config.exists(), "config file should be created successfully"
bad_reinit = run_pageserver(
['--init', '-c', 'id = 12345', '-c', f'pg_distrib_dir="{pg_distrib_dir}"'])
assert bad_reinit.returncode == 1, 'pageserver should not be able to init new config without the node id'
["--init", "-c", "id = 12345", "-c", f'pg_distrib_dir="{pg_distrib_dir}"']
)
assert (
bad_reinit.returncode == 1
), "pageserver should not be able to init new config without the node id"
assert "already exists, cannot init it" in bad_reinit.stderr
bad_update = run_pageserver(['--update-config', '-c', 'id = 3'])
assert bad_update.returncode == 1, 'pageserver should not allow updating node id'
bad_update = run_pageserver(["--update-config", "-c", "id = 3"])
assert bad_update.returncode == 1, "pageserver should not allow updating node id"
assert "has node id already, it cannot be overridden" in bad_update.stderr
@@ -56,12 +67,12 @@ def check_client(client: NeonPageserverHttpClient, initial_tenant: UUID):
client.check_status()
# check initial tenant is there
assert initial_tenant.hex in {t['id'] for t in client.tenant_list()}
assert initial_tenant.hex in {t["id"] for t in client.tenant_list()}
# create new tenant and check it is also there
tenant_id = uuid4()
client.tenant_create(tenant_id)
assert tenant_id.hex in {t['id'] for t in client.tenant_list()}
assert tenant_id.hex in {t["id"] for t in client.tenant_list()}
timelines = client.timeline_list(tenant_id)
assert len(timelines) == 0, "initial tenant should not have any timelines"
@@ -74,19 +85,21 @@ def check_client(client: NeonPageserverHttpClient, initial_tenant: UUID):
assert len(timelines) > 0
# check it is there
assert timeline_id.hex in {b['timeline_id'] for b in client.timeline_list(tenant_id)}
assert timeline_id.hex in {b["timeline_id"] for b in client.timeline_list(tenant_id)}
for timeline in timelines:
timeline_id_str = str(timeline['timeline_id'])
timeline_details = client.timeline_detail(tenant_id=tenant_id,
timeline_id=UUID(timeline_id_str),
include_non_incremental_logical_size=True)
timeline_id_str = str(timeline["timeline_id"])
timeline_details = client.timeline_detail(
tenant_id=tenant_id,
timeline_id=UUID(timeline_id_str),
include_non_incremental_logical_size=True,
)
assert timeline_details['tenant_id'] == tenant_id.hex
assert timeline_details['timeline_id'] == timeline_id_str
assert timeline_details["tenant_id"] == tenant_id.hex
assert timeline_details["timeline_id"] == timeline_id_str
local_timeline_details = timeline_details.get('local')
local_timeline_details = timeline_details.get("local")
assert local_timeline_details is not None
assert local_timeline_details['timeline_state'] == 'Loaded'
assert local_timeline_details["timeline_state"] == "Loaded"
def test_pageserver_http_get_wal_receiver_not_found(neon_simple_env: NeonEnv):
@@ -94,32 +107,43 @@ def test_pageserver_http_get_wal_receiver_not_found(neon_simple_env: NeonEnv):
with env.pageserver.http_client() as client:
tenant_id, timeline_id = env.neon_cli.create_tenant()
timeline_details = client.timeline_detail(tenant_id=tenant_id,
timeline_id=timeline_id,
include_non_incremental_logical_size=True)
timeline_details = client.timeline_detail(
tenant_id=tenant_id, timeline_id=timeline_id, include_non_incremental_logical_size=True
)
assert timeline_details.get('wal_source_connstr') is None, 'Should not be able to connect to WAL streaming without PG compute node running'
assert timeline_details.get('last_received_msg_lsn') is None, 'Should not be able to connect to WAL streaming without PG compute node running'
assert timeline_details.get('last_received_msg_ts') is None, 'Should not be able to connect to WAL streaming without PG compute node running'
assert (
timeline_details.get("wal_source_connstr") is None
), "Should not be able to connect to WAL streaming without PG compute node running"
assert (
timeline_details.get("last_received_msg_lsn") is None
), "Should not be able to connect to WAL streaming without PG compute node running"
assert (
timeline_details.get("last_received_msg_ts") is None
), "Should not be able to connect to WAL streaming without PG compute node running"
def expect_updated_msg_lsn(client: NeonPageserverHttpClient,
tenant_id: UUID,
timeline_id: UUID,
prev_msg_lsn: Optional[int]) -> int:
def expect_updated_msg_lsn(
client: NeonPageserverHttpClient,
tenant_id: UUID,
timeline_id: UUID,
prev_msg_lsn: Optional[int],
) -> int:
timeline_details = client.timeline_detail(tenant_id, timeline_id=timeline_id)
# a successful `timeline_details` response must contain the below fields
local_timeline_details = timeline_details['local']
local_timeline_details = timeline_details["local"]
assert "wal_source_connstr" in local_timeline_details.keys()
assert "last_received_msg_lsn" in local_timeline_details.keys()
assert "last_received_msg_ts" in local_timeline_details.keys()
assert local_timeline_details["last_received_msg_lsn"] is not None, "the last received message's LSN is empty"
assert (
local_timeline_details["last_received_msg_lsn"] is not None
), "the last received message's LSN is empty"
last_msg_lsn = lsn_from_hex(local_timeline_details["last_received_msg_lsn"])
assert prev_msg_lsn is None or prev_msg_lsn < last_msg_lsn, \
f"the last received message's LSN {last_msg_lsn} hasn't been updated \
assert (
prev_msg_lsn is None or prev_msg_lsn < last_msg_lsn
), f"the last received message's LSN {last_msg_lsn} hasn't been updated \
compared to the previous message's LSN {prev_msg_lsn}"
return last_msg_lsn
@@ -139,15 +163,19 @@ def test_pageserver_http_get_wal_receiver_success(neon_simple_env: NeonEnv):
# We need to wait here because it's possible that we don't have access to
# the latest WAL yet, when the `timeline_detail` API is first called.
# See: https://github.com/neondatabase/neon/issues/1768.
lsn = wait_until(number_of_iterations=5,
interval=1,
func=lambda: expect_updated_msg_lsn(client, tenant_id, timeline_id, None))
lsn = wait_until(
number_of_iterations=5,
interval=1,
func=lambda: expect_updated_msg_lsn(client, tenant_id, timeline_id, None),
)
# Make a DB modification then expect getting a new WAL receiver's data.
pg.safe_psql("CREATE TABLE t(key int primary key, value text)")
wait_until(number_of_iterations=5,
interval=1,
func=lambda: expect_updated_msg_lsn(client, tenant_id, timeline_id, lsn))
wait_until(
number_of_iterations=5,
interval=1,
func=lambda: expect_updated_msg_lsn(client, tenant_id, timeline_id, lsn),
)
def test_pageserver_http_api_client(neon_simple_env: NeonEnv):

View File

@@ -9,24 +9,27 @@ def test_pageserver_catchup_while_compute_down(neon_env_builder: NeonEnvBuilder)
neon_env_builder.num_safekeepers = 3
env = neon_env_builder.init_start()
env.neon_cli.create_branch('test_pageserver_catchup_while_compute_down')
env.neon_cli.create_branch("test_pageserver_catchup_while_compute_down")
# Make shared_buffers large to ensure we won't query pageserver while it is down.
pg = env.postgres.create_start('test_pageserver_catchup_while_compute_down',
config_lines=['shared_buffers=512MB'])
pg = env.postgres.create_start(
"test_pageserver_catchup_while_compute_down", config_lines=["shared_buffers=512MB"]
)
pg_conn = pg.connect()
cur = pg_conn.cursor()
# Create table, and insert some rows.
cur.execute('CREATE TABLE foo (t text)')
cur.execute('''
cur.execute("CREATE TABLE foo (t text)")
cur.execute(
"""
INSERT INTO foo
SELECT 'long string to consume some space' || g
FROM generate_series(1, 10000) g
''')
"""
)
cur.execute("SELECT count(*) FROM foo")
assert cur.fetchone() == (10000, )
assert cur.fetchone() == (10000,)
# Stop and restart pageserver. This is a more or less graceful shutdown, although
# the page server doesn't currently have a shutdown routine so there's no difference
@@ -35,11 +38,13 @@ def test_pageserver_catchup_while_compute_down(neon_env_builder: NeonEnvBuilder)
# insert some more rows
# since pageserver is shut down, these will be only on safekeepers
cur.execute('''
cur.execute(
"""
INSERT INTO foo
SELECT 'long string to consume some space' || g
FROM generate_series(1, 10000) g
''')
"""
)
# stop safekeepers gracefully
env.safekeepers[0].stop()
@@ -54,11 +59,11 @@ def test_pageserver_catchup_while_compute_down(neon_env_builder: NeonEnvBuilder)
env.safekeepers[2].start()
# restart compute node
pg.stop_and_destroy().create_start('test_pageserver_catchup_while_compute_down')
pg.stop_and_destroy().create_start("test_pageserver_catchup_while_compute_down")
# Ensure that basebackup went correct and pageserver returned all data
pg_conn = pg.connect()
cur = pg_conn.cursor()
cur.execute("SELECT count(*) FROM foo")
assert cur.fetchone() == (20000, )
assert cur.fetchone() == (20000,)

View File

@@ -1,5 +1,5 @@
from fixtures.neon_fixtures import NeonEnvBuilder
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnvBuilder
# Test restarting page server, while safekeeper and compute node keep
@@ -7,8 +7,8 @@ from fixtures.log_helper import log
def test_pageserver_restart(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
env.neon_cli.create_branch('test_pageserver_restart')
pg = env.postgres.create_start('test_pageserver_restart')
env.neon_cli.create_branch("test_pageserver_restart")
pg = env.postgres.create_start("test_pageserver_restart")
pg_conn = pg.connect()
cur = pg_conn.cursor()
@@ -17,18 +17,22 @@ def test_pageserver_restart(neon_env_builder: NeonEnvBuilder):
# shared_buffers, otherwise the SELECT after restart will just return answer
# from shared_buffers without hitting the page server, which defeats the point
# of this test.
cur.execute('CREATE TABLE foo (t text)')
cur.execute('''
cur.execute("CREATE TABLE foo (t text)")
cur.execute(
"""
INSERT INTO foo
SELECT 'long string to consume some space' || g
FROM generate_series(1, 100000) g
''')
"""
)
# Verify that the table is larger than shared_buffers
cur.execute('''
cur.execute(
"""
select setting::int * pg_size_bytes(unit) as shared_buffers, pg_relation_size('foo') as tbl_ize
from pg_settings where name = 'shared_buffers'
''')
"""
)
row = cur.fetchone()
assert row is not None
log.info(f"shared_buffers is {row[0]}, table size {row[1]}")
@@ -49,7 +53,7 @@ def test_pageserver_restart(neon_env_builder: NeonEnvBuilder):
cur = pg_conn.cursor()
cur.execute("SELECT count(*) FROM foo")
assert cur.fetchone() == (100000, )
assert cur.fetchone() == (100000,)
# Stop the page server by force, and restart it
env.pageserver.stop()

View File

@@ -1,7 +1,8 @@
from io import BytesIO
import asyncio
from fixtures.neon_fixtures import NeonEnv, Postgres
from io import BytesIO
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnv, Postgres
async def repeat_bytes(buf, repetitions: int):
@@ -13,7 +14,8 @@ async def copy_test_data_to_table(pg: Postgres, worker_id: int, table_name: str)
buf = BytesIO()
for i in range(1000):
buf.write(
f"{i}\tLoaded by worker {worker_id}. Long string to consume some space.\n".encode())
f"{i}\tLoaded by worker {worker_id}. Long string to consume some space.\n".encode()
)
buf.seek(0)
copy_input = repeat_bytes(buf.read(), 5000)
@@ -30,7 +32,7 @@ async def copy_test_data_to_table(pg: Postgres, worker_id: int, table_name: str)
async def parallel_load_same_table(pg: Postgres, n_parallel: int):
workers = []
for worker_id in range(n_parallel):
worker = copy_test_data_to_table(pg, worker_id, f'copytest')
worker = copy_test_data_to_table(pg, worker_id, f"copytest")
workers.append(asyncio.create_task(worker))
# await all workers
@@ -41,13 +43,13 @@ async def parallel_load_same_table(pg: Postgres, n_parallel: int):
def test_parallel_copy(neon_simple_env: NeonEnv, n_parallel=5):
env = neon_simple_env
env.neon_cli.create_branch("test_parallel_copy", "empty")
pg = env.postgres.create_start('test_parallel_copy')
pg = env.postgres.create_start("test_parallel_copy")
log.info("postgres is running on 'test_parallel_copy' branch")
# Create test table
conn = pg.connect()
cur = conn.cursor()
cur.execute(f'CREATE TABLE copytest (i int, t text)')
cur.execute(f"CREATE TABLE copytest (i int, t text)")
# Run COPY TO to load the table with parallel connections.
asyncio.run(parallel_load_same_table(pg, n_parallel))

View File

@@ -2,8 +2,8 @@ from contextlib import closing
import psycopg2.extras
from fixtures.log_helper import log
from fixtures.utils import print_gc_result, query_scalar
from fixtures.neon_fixtures import NeonEnvBuilder
from fixtures.utils import print_gc_result, query_scalar
#
@@ -14,10 +14,12 @@ def test_pitr_gc(neon_env_builder: NeonEnvBuilder):
neon_env_builder.num_safekeepers = 1
# Set pitr interval such that we need to keep the data
neon_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '1 day', gc_horizon = 0}"
neon_env_builder.pageserver_config_override = (
"tenant_config={pitr_interval = '1 day', gc_horizon = 0}"
)
env = neon_env_builder.init_start()
pgmain = env.postgres.create_start('main')
pgmain = env.postgres.create_start("main")
log.info("postgres is running on 'main' branch")
main_pg_conn = pgmain.connect()
@@ -25,30 +27,32 @@ def test_pitr_gc(neon_env_builder: NeonEnvBuilder):
timeline = query_scalar(main_cur, "SHOW neon.timeline_id")
# Create table
main_cur.execute('CREATE TABLE foo (t text)')
main_cur.execute("CREATE TABLE foo (t text)")
for i in range(10000):
main_cur.execute('''
main_cur.execute(
"""
INSERT INTO foo
SELECT 'long string to consume some space';
''')
"""
)
if i == 99:
# keep some early lsn to test branch creation after GC
main_cur.execute('SELECT pg_current_wal_insert_lsn(), txid_current()')
main_cur.execute("SELECT pg_current_wal_insert_lsn(), txid_current()")
res = main_cur.fetchone()
assert res is not None
lsn_a = res[0]
xid_a = res[1]
log.info(f'LSN after 100 rows: {lsn_a} xid {xid_a}')
log.info(f"LSN after 100 rows: {lsn_a} xid {xid_a}")
main_cur.execute('SELECT pg_current_wal_insert_lsn(), txid_current()')
main_cur.execute("SELECT pg_current_wal_insert_lsn(), txid_current()")
res = main_cur.fetchone()
assert res is not None
debug_lsn = res[0]
debug_xid = res[1]
log.info(f'LSN after 10000 rows: {debug_lsn} xid {debug_xid}')
log.info(f"LSN after 10000 rows: {debug_lsn} xid {debug_xid}")
# run GC
with closing(env.pageserver.connect()) as psconn:
@@ -61,16 +65,16 @@ def test_pitr_gc(neon_env_builder: NeonEnvBuilder):
# Branch at the point where only 100 rows were inserted
# It must have been preserved by PITR setting
env.neon_cli.create_branch('test_pitr_gc_hundred', 'main', ancestor_start_lsn=lsn_a)
env.neon_cli.create_branch("test_pitr_gc_hundred", "main", ancestor_start_lsn=lsn_a)
pg_hundred = env.postgres.create_start('test_pitr_gc_hundred')
pg_hundred = env.postgres.create_start("test_pitr_gc_hundred")
# On the 'hundred' branch, we should see only 100 rows
hundred_pg_conn = pg_hundred.connect()
hundred_cur = hundred_pg_conn.cursor()
hundred_cur.execute('SELECT count(*) FROM foo')
assert hundred_cur.fetchone() == (100, )
hundred_cur.execute("SELECT count(*) FROM foo")
assert hundred_cur.fetchone() == (100,)
# All the rows are visible on the main branch
main_cur.execute('SELECT count(*) FROM foo')
assert main_cur.fetchone() == (10000, )
main_cur.execute("SELECT count(*) FROM foo")
assert main_cur.fetchone() == (10000,)

View File

@@ -1,25 +1,26 @@
import pytest
import psycopg2
import pytest
def test_proxy_select_1(static_proxy):
static_proxy.safe_psql('select 1', options='project=generic-project-name')
static_proxy.safe_psql("select 1", options="project=generic-project-name")
def test_password_hack(static_proxy):
user = 'borat'
password = 'password'
static_proxy.safe_psql(f"create role {user} with login password '{password}'",
options='project=irrelevant')
user = "borat"
password = "password"
static_proxy.safe_psql(
f"create role {user} with login password '{password}'", options="project=irrelevant"
)
# Note the format of `magic`!
magic = f"project=irrelevant;{password}"
static_proxy.safe_psql('select 1', sslsni=0, user=user, password=magic)
static_proxy.safe_psql("select 1", sslsni=0, user=user, password=magic)
# Must also check that invalid magic won't be accepted.
with pytest.raises(psycopg2.errors.OperationalError):
magic = "broken"
static_proxy.safe_psql('select 1', sslsni=0, user=user, password=magic)
static_proxy.safe_psql("select 1", sslsni=0, user=user, password=magic)
# Pass extra options to the server.
@@ -28,8 +29,8 @@ def test_password_hack(static_proxy):
# See https://github.com/neondatabase/neon/issues/1287
@pytest.mark.xfail
def test_proxy_options(static_proxy):
with static_proxy.connect(options='-cproxytest.option=value') as conn:
with static_proxy.connect(options="-cproxytest.option=value") as conn:
with conn.cursor() as cur:
cur.execute('SHOW proxytest.option')
cur.execute("SHOW proxytest.option")
value = cur.fetchall()[0][0]
assert value == 'value'
assert value == "value"

View File

@@ -1,14 +1,11 @@
from contextlib import closing
from fixtures.neon_fixtures import NeonEnv
from fixtures.log_helper import log
from psycopg2.errors import UndefinedTable
from psycopg2.errors import IoError
from fixtures.neon_fixtures import NeonEnv
from fixtures.utils import query_scalar
from psycopg2.errors import IoError, UndefinedTable
pytest_plugins = ("fixtures.neon_fixtures")
pytest_plugins = "fixtures.neon_fixtures"
extensions = ["pageinspect", "neon_test_utils", "pg_buffercache"]
@@ -47,13 +44,15 @@ def test_read_validation(neon_simple_env: NeonEnv):
log.info("Test table is populated, validating buffer cache")
cache_entries = query_scalar(
c,
"select count(*) from pg_buffercache where relfilenode = {}".format(relfilenode))
c, "select count(*) from pg_buffercache where relfilenode = {}".format(relfilenode)
)
assert cache_entries > 0, "No buffers cached for the test relation"
c.execute(
"select reltablespace, reldatabase, relfilenode from pg_buffercache where relfilenode = {}"
.format(relfilenode))
"select reltablespace, reldatabase, relfilenode from pg_buffercache where relfilenode = {}".format(
relfilenode
)
)
reln = c.fetchone()
assert reln is not None
@@ -62,21 +61,23 @@ def test_read_validation(neon_simple_env: NeonEnv):
c.execute("select clear_buffer_cache()")
cache_entries = query_scalar(
c,
"select count(*) from pg_buffercache where relfilenode = {}".format(relfilenode))
c, "select count(*) from pg_buffercache where relfilenode = {}".format(relfilenode)
)
assert cache_entries == 0, "Failed to clear buffer cache"
log.info("Cache is clear, reading stale page version")
c.execute(
"select lsn, lower, upper from page_header(get_raw_page_at_lsn('foo', 'main', 0, '{}'))"
.format(first[0]))
"select lsn, lower, upper from page_header(get_raw_page_at_lsn('foo', 'main', 0, '{}'))".format(
first[0]
)
)
direct_first = c.fetchone()
assert first == direct_first, "Failed fetch page at historic lsn"
cache_entries = query_scalar(
c,
"select count(*) from pg_buffercache where relfilenode = {}".format(relfilenode))
c, "select count(*) from pg_buffercache where relfilenode = {}".format(relfilenode)
)
assert cache_entries == 0, "relation buffers detected after invalidation"
log.info("Cache is clear, reading latest page version without cache")
@@ -88,8 +89,8 @@ def test_read_validation(neon_simple_env: NeonEnv):
assert second == direct_latest, "Failed fetch page at latest lsn"
cache_entries = query_scalar(
c,
"select count(*) from pg_buffercache where relfilenode = {}".format(relfilenode))
c, "select count(*) from pg_buffercache where relfilenode = {}".format(relfilenode)
)
assert cache_entries == 0, "relation buffers detected after invalidation"
log.info(
@@ -97,8 +98,10 @@ def test_read_validation(neon_simple_env: NeonEnv):
)
c.execute(
"select lsn, lower, upper from page_header(get_raw_page_at_lsn( {}, {}, {}, 0, 0, '{}' ))"
.format(reln[0], reln[1], reln[2], first[0]))
"select lsn, lower, upper from page_header(get_raw_page_at_lsn( {}, {}, {}, 0, 0, '{}' ))".format(
reln[0], reln[1], reln[2], first[0]
)
)
direct_first = c.fetchone()
assert first == direct_first, "Failed fetch page at historic lsn using oid"
@@ -107,20 +110,24 @@ def test_read_validation(neon_simple_env: NeonEnv):
)
c.execute(
"select lsn, lower, upper from page_header(get_raw_page_at_lsn( {}, {}, {}, 0, 0, NULL ))"
.format(reln[0], reln[1], reln[2]))
"select lsn, lower, upper from page_header(get_raw_page_at_lsn( {}, {}, {}, 0, 0, NULL ))".format(
reln[0], reln[1], reln[2]
)
)
direct_latest = c.fetchone()
assert second == direct_latest, "Failed fetch page at latest lsn"
c.execute('drop table foo;')
c.execute("drop table foo;")
log.info(
"Relation dropped, attempting reading stale page version without cache using relation identifiers"
)
c.execute(
"select lsn, lower, upper from page_header(get_raw_page_at_lsn( {}, {}, {}, 0, 0, '{}' ))"
.format(reln[0], reln[1], reln[2], first[0]))
"select lsn, lower, upper from page_header(get_raw_page_at_lsn( {}, {}, {}, 0, 0, '{}' ))".format(
reln[0], reln[1], reln[2], first[0]
)
)
direct_first = c.fetchone()
assert first == direct_first, "Failed fetch page at historic lsn using oid"

View File

@@ -12,81 +12,87 @@ from fixtures.utils import query_scalar
#
def test_readonly_node(neon_simple_env: NeonEnv):
env = neon_simple_env
env.neon_cli.create_branch('test_readonly_node', 'empty')
pgmain = env.postgres.create_start('test_readonly_node')
env.neon_cli.create_branch("test_readonly_node", "empty")
pgmain = env.postgres.create_start("test_readonly_node")
log.info("postgres is running on 'test_readonly_node' branch")
main_pg_conn = pgmain.connect()
main_cur = main_pg_conn.cursor()
# Create table, and insert the first 100 rows
main_cur.execute('CREATE TABLE foo (t text)')
main_cur.execute("CREATE TABLE foo (t text)")
main_cur.execute('''
main_cur.execute(
"""
INSERT INTO foo
SELECT 'long string to consume some space' || g
FROM generate_series(1, 100) g
''')
main_cur.execute('SELECT pg_current_wal_insert_lsn()')
lsn_a = query_scalar(main_cur, 'SELECT pg_current_wal_insert_lsn()')
log.info('LSN after 100 rows: ' + lsn_a)
"""
)
main_cur.execute("SELECT pg_current_wal_insert_lsn()")
lsn_a = query_scalar(main_cur, "SELECT pg_current_wal_insert_lsn()")
log.info("LSN after 100 rows: " + lsn_a)
# Insert some more rows. (This generates enough WAL to fill a few segments.)
main_cur.execute('''
main_cur.execute(
"""
INSERT INTO foo
SELECT 'long string to consume some space' || g
FROM generate_series(1, 200000) g
''')
lsn_b = query_scalar(main_cur, 'SELECT pg_current_wal_insert_lsn()')
log.info('LSN after 200100 rows: ' + lsn_b)
"""
)
lsn_b = query_scalar(main_cur, "SELECT pg_current_wal_insert_lsn()")
log.info("LSN after 200100 rows: " + lsn_b)
# Insert many more rows. This generates enough WAL to fill a few segments.
main_cur.execute('''
main_cur.execute(
"""
INSERT INTO foo
SELECT 'long string to consume some space' || g
FROM generate_series(1, 200000) g
''')
"""
)
lsn_c = query_scalar(main_cur, 'SELECT pg_current_wal_insert_lsn()')
log.info('LSN after 400100 rows: ' + lsn_c)
lsn_c = query_scalar(main_cur, "SELECT pg_current_wal_insert_lsn()")
log.info("LSN after 400100 rows: " + lsn_c)
# Create first read-only node at the point where only 100 rows were inserted
pg_hundred = env.postgres.create_start(branch_name='test_readonly_node',
node_name='test_readonly_node_hundred',
lsn=lsn_a)
pg_hundred = env.postgres.create_start(
branch_name="test_readonly_node", node_name="test_readonly_node_hundred", lsn=lsn_a
)
# And another at the point where 200100 rows were inserted
pg_more = env.postgres.create_start(branch_name='test_readonly_node',
node_name='test_readonly_node_more',
lsn=lsn_b)
pg_more = env.postgres.create_start(
branch_name="test_readonly_node", node_name="test_readonly_node_more", lsn=lsn_b
)
# On the 'hundred' node, we should see only 100 rows
hundred_pg_conn = pg_hundred.connect()
hundred_cur = hundred_pg_conn.cursor()
hundred_cur.execute('SELECT count(*) FROM foo')
assert hundred_cur.fetchone() == (100, )
hundred_cur.execute("SELECT count(*) FROM foo")
assert hundred_cur.fetchone() == (100,)
# On the 'more' node, we should see 100200 rows
more_pg_conn = pg_more.connect()
more_cur = more_pg_conn.cursor()
more_cur.execute('SELECT count(*) FROM foo')
assert more_cur.fetchone() == (200100, )
more_cur.execute("SELECT count(*) FROM foo")
assert more_cur.fetchone() == (200100,)
# All the rows are visible on the main branch
main_cur.execute('SELECT count(*) FROM foo')
assert main_cur.fetchone() == (400100, )
main_cur.execute("SELECT count(*) FROM foo")
assert main_cur.fetchone() == (400100,)
# Check creating a node at segment boundary
pg = env.postgres.create_start(branch_name='test_readonly_node',
node_name='test_branch_segment_boundary',
lsn='0/3000000')
pg = env.postgres.create_start(
branch_name="test_readonly_node", node_name="test_branch_segment_boundary", lsn="0/3000000"
)
cur = pg.connect().cursor()
cur.execute('SELECT 1')
assert cur.fetchone() == (1, )
cur.execute("SELECT 1")
assert cur.fetchone() == (1,)
# Create node at pre-initdb lsn
with pytest.raises(Exception, match="invalid basebackup lsn"):
# compute node startup with invalid LSN should fail
env.postgres.create_start(branch_name='test_readonly_node',
node_name='test_readonly_node_preinitdb',
lsn='0/42')
env.postgres.create_start(
branch_name="test_readonly_node", node_name="test_readonly_node_preinitdb", lsn="0/42"
)

View File

@@ -1,11 +1,12 @@
import json
import os
import time
import psycopg2.extras
import json
from ast import Assert
from contextlib import closing
from fixtures.neon_fixtures import NeonEnvBuilder
import psycopg2.extras
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnvBuilder
#
@@ -21,13 +22,15 @@ def test_pageserver_recovery(neon_env_builder: NeonEnvBuilder):
# Check if failpoints enables. Otherwise the test doesn't make sense
f = env.neon_cli.pageserver_enabled_features()
assert "failpoints" in f["features"], "Build pageserver with --features=failpoints option to run this test"
assert (
"failpoints" in f["features"]
), "Build pageserver with --features=failpoints option to run this test"
neon_env_builder.start()
# Create a branch for us
env.neon_cli.create_branch("test_pageserver_recovery", "main")
pg = env.postgres.create_start('test_pageserver_recovery')
pg = env.postgres.create_start("test_pageserver_recovery")
log.info("postgres is running on 'test_pageserver_recovery' branch")
connstr = pg.connstr()
@@ -62,4 +65,4 @@ def test_pageserver_recovery(neon_env_builder: NeonEnvBuilder):
with closing(pg.connect()) as conn:
with conn.cursor() as cur:
cur.execute("select count(*) from foo")
assert cur.fetchone() == (100000, )
assert cur.fetchone() == (100000,)

View File

@@ -1,14 +1,24 @@
# It's possible to run any regular test with the local fs remote storage via
# env ZENITH_PAGESERVER_OVERRIDES="remote_storage={local_path='/tmp/neon_zzz/'}" poetry ......
import shutil, os
from pathlib import Path
import os
import shutil
import time
from pathlib import Path
from uuid import UUID
from fixtures.neon_fixtures import NeonEnvBuilder, RemoteStorageKind, assert_timeline_local, available_remote_storages, wait_until, wait_for_last_record_lsn, wait_for_upload
from fixtures.log_helper import log
from fixtures.utils import lsn_from_hex, query_scalar
import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import (
NeonEnvBuilder,
RemoteStorageKind,
assert_timeline_local,
available_remote_storages,
wait_for_last_record_lsn,
wait_for_upload,
wait_until,
)
from fixtures.utils import lsn_from_hex, query_scalar
#
@@ -28,7 +38,7 @@ import pytest
# * queries the specific data, ensuring that it matches the one stored before
#
# The tests are done for all types of remote storage pageserver supports.
@pytest.mark.parametrize('remote_storatge_kind', available_remote_storages())
@pytest.mark.parametrize("remote_storatge_kind", available_remote_storages())
def test_remote_storage_backup_and_restore(
neon_env_builder: NeonEnvBuilder,
remote_storatge_kind: RemoteStorageKind,
@@ -39,15 +49,15 @@ def test_remote_storage_backup_and_restore(
neon_env_builder.enable_remote_storage(
remote_storage_kind=remote_storatge_kind,
test_name='test_remote_storage_backup_and_restore',
test_name="test_remote_storage_backup_and_restore",
)
data_id = 1
data_secret = 'very secret secret'
data_secret = "very secret secret"
##### First start, insert secret data and upload it to the remote storage
env = neon_env_builder.init_start()
pg = env.postgres.create_start('main')
pg = env.postgres.create_start("main")
client = env.pageserver.http_client()
@@ -58,10 +68,12 @@ def test_remote_storage_backup_and_restore(
for checkpoint_number in checkpoint_numbers:
with pg.cursor() as cur:
cur.execute(f'''
cur.execute(
f"""
CREATE TABLE t{checkpoint_number}(id int primary key, secret text);
INSERT INTO t{checkpoint_number} VALUES ({data_id}, '{data_secret}|{checkpoint_number}');
''')
"""
)
current_lsn = lsn_from_hex(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()"))
# wait until pageserver receives that data
@@ -70,16 +82,16 @@ def test_remote_storage_backup_and_restore(
# run checkpoint manually to be sure that data landed in remote storage
env.pageserver.safe_psql(f"checkpoint {tenant_id} {timeline_id}")
log.info(f'waiting for checkpoint {checkpoint_number} upload')
log.info(f"waiting for checkpoint {checkpoint_number} upload")
# wait until pageserver successfully uploaded a checkpoint to remote storage
wait_for_upload(client, UUID(tenant_id), UUID(timeline_id), current_lsn)
log.info(f'upload of checkpoint {checkpoint_number} is done')
log.info(f"upload of checkpoint {checkpoint_number} is done")
##### Stop the first pageserver instance, erase all its data
env.postgres.stop_all()
env.pageserver.stop()
dir_to_clear = Path(env.repo_dir) / 'tenants'
dir_to_clear = Path(env.repo_dir) / "tenants"
shutil.rmtree(dir_to_clear)
os.mkdir(dir_to_clear)
@@ -100,8 +112,8 @@ def test_remote_storage_backup_and_restore(
detail = client.timeline_detail(UUID(tenant_id), UUID(timeline_id))
log.info("Timeline detail with active failpoint: %s", detail)
assert detail['local'] is None
assert detail['remote']['awaits_download']
assert detail["local"] is None
assert detail["remote"]["awaits_download"]
# trigger temporary download files removal
env.pageserver.stop()
@@ -110,19 +122,24 @@ def test_remote_storage_backup_and_restore(
client.tenant_attach(UUID(tenant_id))
log.info("waiting for timeline redownload")
wait_until(number_of_iterations=20,
interval=1,
func=lambda: assert_timeline_local(client, UUID(tenant_id), UUID(timeline_id)))
wait_until(
number_of_iterations=20,
interval=1,
func=lambda: assert_timeline_local(client, UUID(tenant_id), UUID(timeline_id)),
)
detail = client.timeline_detail(UUID(tenant_id), UUID(timeline_id))
assert detail['local'] is not None
assert detail["local"] is not None
log.info("Timeline detail after attach completed: %s", detail)
assert lsn_from_hex(detail['local']['last_record_lsn']) >= current_lsn, 'current db Lsn should should not be less than the one stored on remote storage'
assert not detail['remote']['awaits_download']
assert (
lsn_from_hex(detail["local"]["last_record_lsn"]) >= current_lsn
), "current db Lsn should should not be less than the one stored on remote storage"
assert not detail["remote"]["awaits_download"]
pg = env.postgres.create_start('main')
pg = env.postgres.create_start("main")
with pg.cursor() as cur:
for checkpoint_number in checkpoint_numbers:
assert query_scalar(cur,
f'SELECT secret FROM t{checkpoint_number} WHERE id = {data_id};'
) == f'{data_secret}|{checkpoint_number}'
assert (
query_scalar(cur, f"SELECT secret FROM t{checkpoint_number} WHERE id = {data_id};")
== f"{data_secret}|{checkpoint_number}"
)

View File

@@ -1,5 +1,5 @@
from fixtures.neon_fixtures import NeonEnv, check_restored_datadir_content
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnv, check_restored_datadir_content
# Test subtransactions
@@ -11,28 +11,30 @@ from fixtures.log_helper import log
def test_subxacts(neon_simple_env: NeonEnv, test_output_dir):
env = neon_simple_env
env.neon_cli.create_branch("test_subxacts", "empty")
pg = env.postgres.create_start('test_subxacts')
pg = env.postgres.create_start("test_subxacts")
log.info("postgres is running on 'test_subxacts' branch")
pg_conn = pg.connect()
cur = pg_conn.cursor()
cur.execute('''
cur.execute(
"""
CREATE TABLE t1(i int, j int);
''')
"""
)
cur.execute('select pg_switch_wal();')
cur.execute("select pg_switch_wal();")
# Issue 100 transactions, with 1000 subtransactions in each.
for i in range(100):
cur.execute('begin')
cur.execute("begin")
for j in range(1000):
cur.execute(f'savepoint sp{j}')
cur.execute(f'insert into t1 values ({i}, {j})')
cur.execute('commit')
cur.execute(f"savepoint sp{j}")
cur.execute(f"insert into t1 values ({i}, {j})")
cur.execute("commit")
# force wal flush
cur.execute('checkpoint')
cur.execute("checkpoint")
# Check that we can restore the content of the datadir correctly
check_restored_datadir_content(test_output_dir, env, pg)

View File

@@ -1,27 +1,28 @@
from contextlib import closing
import pytest
import psycopg2.extras
from fixtures.neon_fixtures import NeonEnvBuilder
import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnvBuilder
def test_tenant_config(neon_env_builder: NeonEnvBuilder):
# set some non-default global config
neon_env_builder.pageserver_config_override = '''
neon_env_builder.pageserver_config_override = """
page_cache_size=444;
wait_lsn_timeout='111 s';
tenant_config={checkpoint_distance = 10000, compaction_target_size = 1048576}'''
tenant_config={checkpoint_distance = 10000, compaction_target_size = 1048576}"""
env = neon_env_builder.init_start()
"""Test per tenant configuration"""
tenant, _ = env.neon_cli.create_tenant(conf={
'checkpoint_distance': '20000',
'gc_period': '30sec',
})
tenant, _ = env.neon_cli.create_tenant(
conf={
"checkpoint_distance": "20000",
"gc_period": "30sec",
}
)
env.neon_cli.create_timeline(f'test_tenant_conf', tenant_id=tenant)
env.neon_cli.create_timeline(f"test_tenant_conf", tenant_id=tenant)
pg = env.postgres.create_start(
"test_tenant_conf",
"main",
@@ -36,7 +37,8 @@ tenant_config={checkpoint_distance = 10000, compaction_target_size = 1048576}'''
pscur.execute(f"show {env.initial_tenant.hex}")
res = pscur.fetchone()
assert all(
i in res.items() for i in {
i in res.items()
for i in {
"checkpoint_distance": 10000,
"compaction_target_size": 1048576,
"compaction_period": 1,
@@ -44,8 +46,9 @@ tenant_config={checkpoint_distance = 10000, compaction_target_size = 1048576}'''
"gc_horizon": 67108864,
"gc_period": 100,
"image_creation_threshold": 3,
"pitr_interval": 2592000
}.items())
"pitr_interval": 2592000,
}.items()
)
# check the configuration of the new tenant
with closing(env.pageserver.connect()) as psconn:
@@ -54,7 +57,8 @@ tenant_config={checkpoint_distance = 10000, compaction_target_size = 1048576}'''
res = pscur.fetchone()
log.info(f"res: {res}")
assert all(
i in res.items() for i in {
i in res.items()
for i in {
"checkpoint_distance": 20000,
"compaction_target_size": 1048576,
"compaction_period": 1,
@@ -62,15 +66,18 @@ tenant_config={checkpoint_distance = 10000, compaction_target_size = 1048576}'''
"gc_horizon": 67108864,
"gc_period": 30,
"image_creation_threshold": 3,
"pitr_interval": 2592000
}.items())
"pitr_interval": 2592000,
}.items()
)
# update the config and ensure that it has changed
env.neon_cli.config_tenant(tenant_id=tenant,
conf={
'checkpoint_distance': '15000',
'gc_period': '80sec',
})
env.neon_cli.config_tenant(
tenant_id=tenant,
conf={
"checkpoint_distance": "15000",
"gc_period": "80sec",
},
)
with closing(env.pageserver.connect()) as psconn:
with psconn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as pscur:
@@ -78,7 +85,8 @@ tenant_config={checkpoint_distance = 10000, compaction_target_size = 1048576}'''
res = pscur.fetchone()
log.info(f"after config res: {res}")
assert all(
i in res.items() for i in {
i in res.items()
for i in {
"checkpoint_distance": 15000,
"compaction_target_size": 1048576,
"compaction_period": 1,
@@ -86,8 +94,9 @@ tenant_config={checkpoint_distance = 10000, compaction_target_size = 1048576}'''
"gc_horizon": 67108864,
"gc_period": 80,
"image_creation_threshold": 3,
"pitr_interval": 2592000
}.items())
"pitr_interval": 2592000,
}.items()
)
# restart the pageserver and ensure that the config is still correct
env.pageserver.stop()
@@ -99,7 +108,8 @@ tenant_config={checkpoint_distance = 10000, compaction_target_size = 1048576}'''
res = pscur.fetchone()
log.info(f"after restart res: {res}")
assert all(
i in res.items() for i in {
i in res.items()
for i in {
"checkpoint_distance": 15000,
"compaction_target_size": 1048576,
"compaction_period": 1,
@@ -107,5 +117,6 @@ tenant_config={checkpoint_distance = 10000, compaction_target_size = 1048576}'''
"gc_horizon": 67108864,
"gc_period": 80,
"image_creation_threshold": 3,
"pitr_interval": 2592000
}.items())
"pitr_interval": 2592000,
}.items()
)

View File

@@ -1,9 +1,9 @@
import uuid
from threading import Thread
from uuid import uuid4
import uuid
import psycopg2
import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, NeonPageserverApiException
@@ -11,7 +11,7 @@ from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, NeonPageserverApiExc
def do_gc_target(env: NeonEnv, tenant_id: uuid.UUID, timeline_id: uuid.UUID):
"""Hack to unblock main, see https://github.com/neondatabase/neon/issues/2211"""
try:
env.pageserver.safe_psql(f'do_gc {tenant_id.hex} {timeline_id.hex} 0')
env.pageserver.safe_psql(f"do_gc {tenant_id.hex} {timeline_id.hex} 0")
except Exception as e:
log.error("do_gc failed: %s", e)
@@ -22,8 +22,10 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder):
# first check for non existing tenant
tenant_id = uuid4()
with pytest.raises(expected_exception=NeonPageserverApiException,
match=f'Tenant not found for id {tenant_id.hex}'):
with pytest.raises(
expected_exception=NeonPageserverApiException,
match=f"Tenant not found for id {tenant_id.hex}",
):
pageserver_http.tenant_detach(tenant_id)
# create new nenant
@@ -32,17 +34,20 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder):
# assert tenant exists on disk
assert (env.repo_dir / "tenants" / tenant_id.hex).exists()
pg = env.postgres.create_start('main', tenant_id=tenant_id)
pg = env.postgres.create_start("main", tenant_id=tenant_id)
# we rely upon autocommit after each statement
pg.safe_psql_many(queries=[
'CREATE TABLE t(key int primary key, value text)',
'INSERT INTO t SELECT generate_series(1,100000), \'payload\'',
])
pg.safe_psql_many(
queries=[
"CREATE TABLE t(key int primary key, value text)",
"INSERT INTO t SELECT generate_series(1,100000), 'payload'",
]
)
# gc should not try to even start
with pytest.raises(expected_exception=psycopg2.DatabaseError,
match='gc target timeline does not exist'):
env.pageserver.safe_psql(f'do_gc {tenant_id.hex} {uuid4().hex} 0')
with pytest.raises(
expected_exception=psycopg2.DatabaseError, match="gc target timeline does not exist"
):
env.pageserver.safe_psql(f"do_gc {tenant_id.hex} {uuid4().hex} 0")
# try to concurrently run gc and detach
gc_thread = Thread(target=lambda: do_gc_target(env, tenant_id, timeline_id))
@@ -67,6 +72,7 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder):
# check that nothing is left on disk for deleted tenant
assert not (env.repo_dir / "tenants" / tenant_id.hex).exists()
with pytest.raises(expected_exception=psycopg2.DatabaseError,
match=f'Tenant {tenant_id.hex} not found'):
env.pageserver.safe_psql(f'do_gc {tenant_id.hex} {timeline_id.hex} 0')
with pytest.raises(
expected_exception=psycopg2.DatabaseError, match=f"Tenant {tenant_id.hex} not found"
):
env.pageserver.safe_psql(f"do_gc {tenant_id.hex} {timeline_id.hex} 0")

View File

@@ -34,12 +34,14 @@ def assert_abs_margin_ratio(a: float, b: float, margin_ratio: float):
@contextmanager
def new_pageserver_helper(new_pageserver_dir: pathlib.Path,
pageserver_bin: pathlib.Path,
remote_storage_mock_path: pathlib.Path,
pg_port: int,
http_port: int,
broker: Optional[Etcd]):
def new_pageserver_helper(
new_pageserver_dir: pathlib.Path,
pageserver_bin: pathlib.Path,
remote_storage_mock_path: pathlib.Path,
pg_port: int,
http_port: int,
broker: Optional[Etcd],
):
"""
cannot use NeonPageserver yet because it depends on neon cli
which currently lacks support for multiple pageservers
@@ -47,10 +49,10 @@ def new_pageserver_helper(new_pageserver_dir: pathlib.Path,
# actually run new pageserver
cmd = [
str(pageserver_bin),
'--workdir',
"--workdir",
str(new_pageserver_dir),
'--daemonize',
'--update-config',
"--daemonize",
"--update-config",
f"-c listen_pg_addr='localhost:{pg_port}'",
f"-c listen_http_addr='localhost:{http_port}'",
f"-c pg_distrib_dir='{pg_distrib_dir}'",
@@ -58,7 +60,9 @@ def new_pageserver_helper(new_pageserver_dir: pathlib.Path,
f"-c remote_storage={{local_path='{remote_storage_mock_path}'}}",
]
if broker is not None:
cmd.append(f"-c broker_endpoints=['{broker.client_url()}']", )
cmd.append(
f"-c broker_endpoints=['{broker.client_url()}']",
)
log.info("starting new pageserver %s", cmd)
out = subprocess.check_output(cmd, text=True)
@@ -67,7 +71,7 @@ def new_pageserver_helper(new_pageserver_dir: pathlib.Path,
yield
finally:
log.info("stopping new pageserver")
pid = int((new_pageserver_dir / 'pageserver.pid').read_text())
pid = int((new_pageserver_dir / "pageserver.pid").read_text())
os.kill(pid, signal.SIGQUIT)
@@ -105,7 +109,7 @@ def load(pg: Postgres, stop_event: threading.Event, load_ok_event: threading.Eve
log.info("successfully recovered %s", inserted_ctr)
failed = False
load_ok_event.set()
log.info('load thread stopped')
log.info("load thread stopped")
def populate_branch(
@@ -123,8 +127,10 @@ def populate_branch(
cur.execute("SELECT pg_current_wal_flush_lsn()")
log.info("pg_current_wal_flush_lsn() %s", lsn_from_hex(cur.fetchone()[0]))
log.info("timeline detail %s",
ps_http.timeline_detail(tenant_id=tenant_id, timeline_id=timeline_id))
log.info(
"timeline detail %s",
ps_http.timeline_detail(tenant_id=tenant_id, timeline_id=timeline_id),
)
# we rely upon autocommit after each statement
# as waiting for acceptors happens there
@@ -133,7 +139,7 @@ def populate_branch(
cur.execute("INSERT INTO t SELECT generate_series(1,1000), 'some payload'")
if expected_sum is not None:
cur.execute("SELECT sum(key) FROM t")
assert cur.fetchone() == (expected_sum, )
assert cur.fetchone() == (expected_sum,)
cur.execute("SELECT pg_current_wal_flush_lsn()")
current_lsn = lsn_from_hex(cur.fetchone()[0])
@@ -166,34 +172,41 @@ def check_timeline_attached(
# when load is active these checks can break because lsns are not static
# so lets check with some margin
assert_abs_margin_ratio(lsn_from_hex(new_timeline_detail['local']['disk_consistent_lsn']),
lsn_from_hex(old_timeline_detail['local']['disk_consistent_lsn']),
0.03)
assert_abs_margin_ratio(
lsn_from_hex(new_timeline_detail["local"]["disk_consistent_lsn"]),
lsn_from_hex(old_timeline_detail["local"]["disk_consistent_lsn"]),
0.03,
)
assert_abs_margin_ratio(lsn_from_hex(new_timeline_detail['local']['disk_consistent_lsn']),
old_current_lsn,
0.03)
assert_abs_margin_ratio(
lsn_from_hex(new_timeline_detail["local"]["disk_consistent_lsn"]), old_current_lsn, 0.03
)
def switch_pg_to_new_pageserver(env: NeonEnv,
pg: Postgres,
new_pageserver_port: int,
tenant_id: UUID,
timeline_id: UUID) -> pathlib.Path:
def switch_pg_to_new_pageserver(
env: NeonEnv, pg: Postgres, new_pageserver_port: int, tenant_id: UUID, timeline_id: UUID
) -> pathlib.Path:
pg.stop()
pg_config_file_path = pathlib.Path(pg.config_file_path())
pg_config_file_path.open('a').write(
f"\nneon.pageserver_connstring = 'postgresql://no_user:@localhost:{new_pageserver_port}'")
pg_config_file_path.open("a").write(
f"\nneon.pageserver_connstring = 'postgresql://no_user:@localhost:{new_pageserver_port}'"
)
pg.start()
timeline_to_detach_local_path = env.repo_dir / 'tenants' / tenant_id.hex / 'timelines' / timeline_id.hex
timeline_to_detach_local_path = (
env.repo_dir / "tenants" / tenant_id.hex / "timelines" / timeline_id.hex
)
files_before_detach = os.listdir(timeline_to_detach_local_path)
assert 'metadata' in files_before_detach, f'Regular timeline {timeline_to_detach_local_path} should have the metadata file,\
but got: {files_before_detach}'
assert len(files_before_detach) >= 2, f'Regular timeline {timeline_to_detach_local_path} should have at least one layer file,\
but got {files_before_detach}'
assert (
"metadata" in files_before_detach
), f"Regular timeline {timeline_to_detach_local_path} should have the metadata file,\
but got: {files_before_detach}"
assert (
len(files_before_detach) >= 2
), f"Regular timeline {timeline_to_detach_local_path} should have at least one layer file,\
but got {files_before_detach}"
return timeline_to_detach_local_path
@@ -202,39 +215,44 @@ def post_migration_check(pg: Postgres, sum_before_migration: int, old_local_path
with pg_cur(pg) as cur:
# check that data is still there
cur.execute("SELECT sum(key) FROM t")
assert cur.fetchone() == (sum_before_migration, )
assert cur.fetchone() == (sum_before_migration,)
# check that we can write new data
cur.execute("INSERT INTO t SELECT generate_series(1001,2000), 'some payload'")
cur.execute("SELECT sum(key) FROM t")
assert cur.fetchone() == (sum_before_migration + 1500500, )
assert cur.fetchone() == (sum_before_migration + 1500500,)
assert not os.path.exists(old_local_path), f'After detach, local timeline dir {old_local_path} should be removed'
assert not os.path.exists(
old_local_path
), f"After detach, local timeline dir {old_local_path} should be removed"
@pytest.mark.parametrize(
'method',
"method",
[
# A minor migration involves no storage breaking changes.
# It is done by attaching the tenant to a new pageserver.
'minor',
"minor",
# A major migration involves exporting a postgres datadir
# basebackup and importing it into the new pageserver.
# This kind of migration can tolerate breaking changes
# to storage format
'major',
])
@pytest.mark.parametrize('with_load', ['with_load', 'without_load'])
def test_tenant_relocation(neon_env_builder: NeonEnvBuilder,
port_distributor: PortDistributor,
test_output_dir,
method: str,
with_load: str):
"major",
],
)
@pytest.mark.parametrize("with_load", ["with_load", "without_load"])
def test_tenant_relocation(
neon_env_builder: NeonEnvBuilder,
port_distributor: PortDistributor,
test_output_dir,
method: str,
with_load: str,
):
neon_env_builder.enable_local_fs_remote_storage()
env = neon_env_builder.init_start()
# create folder for remote storage mock
remote_storage_mock_path = env.repo_dir / 'local_fs_remote_storage'
remote_storage_mock_path = env.repo_dir / "local_fs_remote_storage"
# we use two branches to check that they are both relocated
# first branch is used for load, compute for second one is used to
@@ -242,12 +260,15 @@ def test_tenant_relocation(neon_env_builder: NeonEnvBuilder,
pageserver_http = env.pageserver.http_client()
tenant_id, initial_timeline_id = env.neon_cli.create_tenant(UUID("74ee8b079a0e437eb0afea7d26a07209"))
tenant_id, initial_timeline_id = env.neon_cli.create_tenant(
UUID("74ee8b079a0e437eb0afea7d26a07209")
)
log.info("tenant to relocate %s initial_timeline_id %s", tenant_id, initial_timeline_id)
env.neon_cli.create_branch("test_tenant_relocation_main", tenant_id=tenant_id)
pg_main = env.postgres.create_start(branch_name='test_tenant_relocation_main',
tenant_id=tenant_id)
pg_main = env.postgres.create_start(
branch_name="test_tenant_relocation_main", tenant_id=tenant_id
)
timeline_id_main, current_lsn_main = populate_branch(
pg_main,
@@ -263,8 +284,9 @@ def test_tenant_relocation(neon_env_builder: NeonEnvBuilder,
ancestor_start_lsn=lsn_to_hex(current_lsn_main),
tenant_id=tenant_id,
)
pg_second = env.postgres.create_start(branch_name='test_tenant_relocation_second',
tenant_id=tenant_id)
pg_second = env.postgres.create_start(
branch_name="test_tenant_relocation_second", tenant_id=tenant_id
)
timeline_id_second, current_lsn_second = populate_branch(
pg_second,
@@ -281,7 +303,7 @@ def test_tenant_relocation(neon_env_builder: NeonEnvBuilder,
wait_for_last_record_lsn(pageserver_http, tenant_id, timeline_id_second, current_lsn_second)
timeline_detail_second = assert_timeline_local(pageserver_http, tenant_id, timeline_id_second)
if with_load == 'with_load':
if with_load == "with_load":
# create load table
with pg_cur(pg_main) as cur:
cur.execute("CREATE TABLE load(value text)")
@@ -317,22 +339,24 @@ def test_tenant_relocation(neon_env_builder: NeonEnvBuilder,
log.info("inititalizing new pageserver")
# bootstrap second pageserver
new_pageserver_dir = env.repo_dir / 'new_pageserver'
new_pageserver_dir = env.repo_dir / "new_pageserver"
new_pageserver_dir.mkdir()
new_pageserver_pg_port = port_distributor.get_port()
new_pageserver_http_port = port_distributor.get_port()
log.info("new pageserver ports pg %s http %s", new_pageserver_pg_port, new_pageserver_http_port)
pageserver_bin = pathlib.Path(neon_binpath) / 'pageserver'
pageserver_bin = pathlib.Path(neon_binpath) / "pageserver"
new_pageserver_http = NeonPageserverHttpClient(port=new_pageserver_http_port, auth_token=None)
with new_pageserver_helper(new_pageserver_dir,
pageserver_bin,
remote_storage_mock_path,
new_pageserver_pg_port,
new_pageserver_http_port,
neon_env_builder.broker):
with new_pageserver_helper(
new_pageserver_dir,
pageserver_bin,
remote_storage_mock_path,
new_pageserver_pg_port,
new_pageserver_http_port,
neon_env_builder.broker,
):
# Migrate either by attaching from s3 or import/export basebackup
if method == "major":
@@ -367,13 +391,16 @@ def test_tenant_relocation(neon_env_builder: NeonEnvBuilder,
# check that it shows that download is in progress
tenant_status = new_pageserver_http.tenant_status(tenant_id=tenant_id)
assert tenant_status.get('has_in_progress_downloads'), tenant_status
assert tenant_status.get("has_in_progress_downloads"), tenant_status
# wait until tenant is downloaded
wait_until(number_of_iterations=10,
interval=1,
func=lambda: assert_no_in_progress_downloads_for_tenant(
new_pageserver_http, tenant_id))
wait_until(
number_of_iterations=10,
interval=1,
func=lambda: assert_no_in_progress_downloads_for_tenant(
new_pageserver_http, tenant_id
),
)
check_timeline_attached(
new_pageserver_http,
@@ -392,10 +419,10 @@ def test_tenant_relocation(neon_env_builder: NeonEnvBuilder,
)
# rewrite neon cli config to use new pageserver for basebackup to start new compute
cli_config_lines = (env.repo_dir / 'config').read_text().splitlines()
cli_config_lines = (env.repo_dir / "config").read_text().splitlines()
cli_config_lines[-2] = f"listen_http_addr = 'localhost:{new_pageserver_http_port}'"
cli_config_lines[-1] = f"listen_pg_addr = 'localhost:{new_pageserver_pg_port}'"
(env.repo_dir / 'config').write_text('\n'.join(cli_config_lines))
(env.repo_dir / "config").write_text("\n".join(cli_config_lines))
old_local_path_main = switch_pg_to_new_pageserver(
env,
@@ -423,7 +450,8 @@ def test_tenant_relocation(neon_env_builder: NeonEnvBuilder,
# ensure that we can successfully read all relations on the new pageserver
with pg_cur(pg_second) as cur:
cur.execute('''
cur.execute(
"""
DO $$
DECLARE
r RECORD;
@@ -435,18 +463,19 @@ def test_tenant_relocation(neon_env_builder: NeonEnvBuilder,
EXECUTE 'SELECT count(*) FROM quote_ident($1)' USING r.relname;
END LOOP;
END$$;
''')
"""
)
if with_load == 'with_load':
if with_load == "with_load":
assert load_ok_event.wait(3)
log.info('stopping load thread')
log.info("stopping load thread")
load_stop_event.set()
load_thread.join(timeout=10)
log.info('load thread stopped')
log.info("load thread stopped")
# bring old pageserver back for clean shutdown via neon cli
# new pageserver will be shut down by the context manager
cli_config_lines = (env.repo_dir / 'config').read_text().splitlines()
cli_config_lines = (env.repo_dir / "config").read_text().splitlines()
cli_config_lines[-2] = f"listen_http_addr = 'localhost:{env.pageserver.service_port.http}'"
cli_config_lines[-1] = f"listen_pg_addr = 'localhost:{env.pageserver.service_port.pg}'"
(env.repo_dir / 'config').write_text('\n'.join(cli_config_lines))
(env.repo_dir / "config").write_text("\n".join(cli_config_lines))

View File

@@ -1,6 +1,7 @@
from fixtures.neon_fixtures import NeonEnvBuilder, wait_until
from uuid import UUID
import time
from uuid import UUID
from fixtures.neon_fixtures import NeonEnvBuilder, wait_until
def get_only_element(l):
@@ -47,7 +48,7 @@ def test_tenant_tasks(neon_env_builder: NeonEnvBuilder):
tenant, _ = env.neon_cli.create_tenant()
timeline = env.neon_cli.create_timeline(name, tenant_id=tenant)
pg = env.postgres.create_start(name, tenant_id=tenant)
assert (get_state(tenant) == "Active")
assert get_state(tenant) == "Active"
# Stop compute
pg.stop()

View File

@@ -1,15 +1,15 @@
import os
from contextlib import closing
from datetime import datetime
import os
import pytest
from fixtures.neon_fixtures import NeonEnvBuilder
import pytest
from fixtures.log_helper import log
from fixtures.metrics import parse_metrics
from fixtures.neon_fixtures import NeonEnvBuilder
from fixtures.utils import lsn_to_hex
@pytest.mark.parametrize('with_safekeepers', [False, True])
@pytest.mark.parametrize("with_safekeepers", [False, True])
def test_tenants_normal_work(neon_env_builder: NeonEnvBuilder, with_safekeepers: bool):
if with_safekeepers:
neon_env_builder.num_safekeepers = 3
@@ -19,17 +19,19 @@ def test_tenants_normal_work(neon_env_builder: NeonEnvBuilder, with_safekeepers:
tenant_1, _ = env.neon_cli.create_tenant()
tenant_2, _ = env.neon_cli.create_tenant()
env.neon_cli.create_timeline(f'test_tenants_normal_work_with_safekeepers{with_safekeepers}',
tenant_id=tenant_1)
env.neon_cli.create_timeline(f'test_tenants_normal_work_with_safekeepers{with_safekeepers}',
tenant_id=tenant_2)
env.neon_cli.create_timeline(
f"test_tenants_normal_work_with_safekeepers{with_safekeepers}", tenant_id=tenant_1
)
env.neon_cli.create_timeline(
f"test_tenants_normal_work_with_safekeepers{with_safekeepers}", tenant_id=tenant_2
)
pg_tenant1 = env.postgres.create_start(
f'test_tenants_normal_work_with_safekeepers{with_safekeepers}',
f"test_tenants_normal_work_with_safekeepers{with_safekeepers}",
tenant_id=tenant_1,
)
pg_tenant2 = env.postgres.create_start(
f'test_tenants_normal_work_with_safekeepers{with_safekeepers}',
f"test_tenants_normal_work_with_safekeepers{with_safekeepers}",
tenant_id=tenant_2,
)
@@ -41,7 +43,7 @@ def test_tenants_normal_work(neon_env_builder: NeonEnvBuilder, with_safekeepers:
cur.execute("CREATE TABLE t(key int primary key, value text)")
cur.execute("INSERT INTO t SELECT generate_series(1,100000), 'payload'")
cur.execute("SELECT sum(key) FROM t")
assert cur.fetchone() == (5000050000, )
assert cur.fetchone() == (5000050000,)
def test_metrics_normal_work(neon_env_builder: NeonEnvBuilder):
@@ -51,11 +53,11 @@ def test_metrics_normal_work(neon_env_builder: NeonEnvBuilder):
tenant_1, _ = env.neon_cli.create_tenant()
tenant_2, _ = env.neon_cli.create_tenant()
timeline_1 = env.neon_cli.create_timeline('test_metrics_normal_work', tenant_id=tenant_1)
timeline_2 = env.neon_cli.create_timeline('test_metrics_normal_work', tenant_id=tenant_2)
timeline_1 = env.neon_cli.create_timeline("test_metrics_normal_work", tenant_id=tenant_1)
timeline_2 = env.neon_cli.create_timeline("test_metrics_normal_work", tenant_id=tenant_2)
pg_tenant1 = env.postgres.create_start('test_metrics_normal_work', tenant_id=tenant_1)
pg_tenant2 = env.postgres.create_start('test_metrics_normal_work', tenant_id=tenant_2)
pg_tenant1 = env.postgres.create_start("test_metrics_normal_work", tenant_id=tenant_1)
pg_tenant2 = env.postgres.create_start("test_metrics_normal_work", tenant_id=tenant_2)
for pg in [pg_tenant1, pg_tenant2]:
with closing(pg.connect()) as conn:
@@ -63,29 +65,28 @@ def test_metrics_normal_work(neon_env_builder: NeonEnvBuilder):
cur.execute("CREATE TABLE t(key int primary key, value text)")
cur.execute("INSERT INTO t SELECT generate_series(1,100000), 'payload'")
cur.execute("SELECT sum(key) FROM t")
assert cur.fetchone() == (5000050000, )
assert cur.fetchone() == (5000050000,)
collected_metrics = {
"pageserver": env.pageserver.http_client().get_metrics(),
}
for sk in env.safekeepers:
collected_metrics[f'safekeeper{sk.id}'] = sk.http_client().get_metrics_str()
collected_metrics[f"safekeeper{sk.id}"] = sk.http_client().get_metrics_str()
for name in collected_metrics:
basepath = os.path.join(neon_env_builder.repo_dir, f'{name}.metrics')
basepath = os.path.join(neon_env_builder.repo_dir, f"{name}.metrics")
with open(basepath, 'w') as stdout_f:
with open(basepath, "w") as stdout_f:
print(collected_metrics[name], file=stdout_f, flush=True)
all_metrics = [parse_metrics(m, name) for name, m in collected_metrics.items()]
ps_metrics = all_metrics[0]
sk_metrics = all_metrics[1:]
ttids = [{
'tenant_id': tenant_1.hex, 'timeline_id': timeline_1.hex
}, {
'tenant_id': tenant_2.hex, 'timeline_id': timeline_2.hex
}]
ttids = [
{"tenant_id": tenant_1.hex, "timeline_id": timeline_1.hex},
{"tenant_id": tenant_2.hex, "timeline_id": timeline_2.hex},
]
# Test metrics per timeline
for tt in ttids:
@@ -105,7 +106,8 @@ def test_metrics_normal_work(neon_env_builder: NeonEnvBuilder):
log.info(f"Checking common metrics for {metrics.name}")
log.info(
f"process_cpu_seconds_total: {metrics.query_one('process_cpu_seconds_total').value}")
f"process_cpu_seconds_total: {metrics.query_one('process_cpu_seconds_total').value}"
)
log.info(f"process_threads: {int(metrics.query_one('process_threads').value)}")
log.info(
f"process_resident_memory_bytes (MB): {metrics.query_one('process_resident_memory_bytes').value / 1024 / 1024}"

View File

@@ -12,8 +12,15 @@ from typing import List, Tuple
from uuid import UUID
import pytest
from fixtures.neon_fixtures import NeonEnvBuilder, NeonEnv, Postgres, RemoteStorageKind, available_remote_storages, wait_for_last_record_lsn, wait_for_upload
from fixtures.neon_fixtures import (
NeonEnv,
NeonEnvBuilder,
Postgres,
RemoteStorageKind,
available_remote_storages,
wait_for_last_record_lsn,
wait_for_upload,
)
from fixtures.utils import lsn_from_hex
@@ -28,7 +35,8 @@ async def tenant_workload(env: NeonEnv, pg: Postgres):
await pg_conn.execute("CREATE TABLE t(key int primary key, value text)")
for i in range(1, 100):
await pg_conn.execute(
f"INSERT INTO t SELECT {i}*1000 + g, 'payload' from generate_series(1,1000) g")
f"INSERT INTO t SELECT {i}*1000 + g, 'payload' from generate_series(1,1000) g"
)
# we rely upon autocommit after each statement
# as waiting for acceptors happens there
@@ -46,11 +54,11 @@ async def all_tenants_workload(env: NeonEnv, tenants_pgs):
await asyncio.gather(*workers)
@pytest.mark.parametrize('remote_storatge_kind', available_remote_storages())
@pytest.mark.parametrize("remote_storatge_kind", available_remote_storages())
def test_tenants_many(neon_env_builder: NeonEnvBuilder, remote_storatge_kind: RemoteStorageKind):
neon_env_builder.enable_remote_storage(
remote_storage_kind=remote_storatge_kind,
test_name='test_tenants_many',
test_name="test_tenants_many",
)
env = neon_env_builder.init_start()
@@ -61,12 +69,13 @@ def test_tenants_many(neon_env_builder: NeonEnvBuilder, remote_storatge_kind: Re
# Use a tiny checkpoint distance, to create a lot of layers quickly
tenant, _ = env.neon_cli.create_tenant(
conf={
'checkpoint_distance': '5000000',
})
env.neon_cli.create_timeline(f'test_tenants_many', tenant_id=tenant)
"checkpoint_distance": "5000000",
}
)
env.neon_cli.create_timeline(f"test_tenants_many", tenant_id=tenant)
pg = env.postgres.create_start(
f'test_tenants_many',
f"test_tenants_many",
tenant_id=tenant,
)
tenants_pgs.append((tenant, pg))
@@ -77,7 +86,8 @@ def test_tenants_many(neon_env_builder: NeonEnvBuilder, remote_storatge_kind: Re
pageserver_http = env.pageserver.http_client()
for tenant, pg in tenants_pgs:
res = pg.safe_psql_many(
["SHOW neon.tenant_id", "SHOW neon.timeline_id", "SELECT pg_current_wal_flush_lsn()"])
["SHOW neon.tenant_id", "SHOW neon.timeline_id", "SELECT pg_current_wal_flush_lsn()"]
)
tenant_id = res[0][0][0]
timeline_id = res[1][0][0]
current_lsn = lsn_from_hex(res[2][0][0])

View File

@@ -1,6 +1,6 @@
from uuid import uuid4
import pytest
import pytest
from fixtures.neon_fixtures import NeonEnv, NeonPageserverApiException, wait_until
@@ -17,44 +17,57 @@ def test_timeline_delete(neon_simple_env: NeonEnv):
# for non existing tenant:
invalid_tenant_id = uuid4()
with pytest.raises(NeonPageserverApiException,
match=f"Tenant {invalid_tenant_id.hex} not found in local tenant state"):
with pytest.raises(
NeonPageserverApiException,
match=f"Tenant {invalid_tenant_id.hex} not found in local tenant state",
):
ps_http.timeline_delete(tenant_id=invalid_tenant_id, timeline_id=invalid_timeline_id)
# construct pair of branches to validate that pageserver prohibits
# deletion of ancestor timelines when they have child branches
parent_timeline_id = env.neon_cli.create_branch("test_ancestor_branch_delete_parent", "empty")
leaf_timeline_id = env.neon_cli.create_branch("test_ancestor_branch_delete_branch1",
"test_ancestor_branch_delete_parent")
leaf_timeline_id = env.neon_cli.create_branch(
"test_ancestor_branch_delete_branch1", "test_ancestor_branch_delete_parent"
)
ps_http = env.pageserver.http_client()
with pytest.raises(NeonPageserverApiException,
match="Cannot detach timeline which has child timelines"):
with pytest.raises(
NeonPageserverApiException, match="Cannot detach timeline which has child timelines"
):
timeline_path = env.repo_dir / "tenants" / env.initial_tenant.hex / "timelines" / parent_timeline_id.hex
timeline_path = (
env.repo_dir / "tenants" / env.initial_tenant.hex / "timelines" / parent_timeline_id.hex
)
assert timeline_path.exists()
ps_http.timeline_delete(env.initial_tenant, parent_timeline_id)
assert not timeline_path.exists()
timeline_path = env.repo_dir / "tenants" / env.initial_tenant.hex / "timelines" / leaf_timeline_id.hex
timeline_path = (
env.repo_dir / "tenants" / env.initial_tenant.hex / "timelines" / leaf_timeline_id.hex
)
assert timeline_path.exists()
# retry deletes when compaction or gc is running in pageserver
wait_until(number_of_iterations=3,
interval=0.2,
func=lambda: ps_http.timeline_delete(env.initial_tenant, leaf_timeline_id))
wait_until(
number_of_iterations=3,
interval=0.2,
func=lambda: ps_http.timeline_delete(env.initial_tenant, leaf_timeline_id),
)
assert not timeline_path.exists()
# check 404
with pytest.raises(NeonPageserverApiException,
match="is not found neither locally nor remotely"):
with pytest.raises(
NeonPageserverApiException, match="is not found neither locally nor remotely"
):
ps_http.timeline_detail(env.initial_tenant, leaf_timeline_id)
# FIXME leaves tenant without timelines, should we prevent deletion of root timeline?
wait_until(number_of_iterations=3,
interval=0.2,
func=lambda: ps_http.timeline_delete(env.initial_tenant, parent_timeline_id))
wait_until(
number_of_iterations=3,
interval=0.2,
func=lambda: ps_http.timeline_delete(env.initial_tenant, parent_timeline_id),
)

View File

@@ -1,25 +1,33 @@
from contextlib import closing
import math
import random
from uuid import UUID
import re
import psycopg2.extras
import psycopg2.errors
from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres, assert_timeline_local, wait_for_last_flush_lsn
from fixtures.log_helper import log
import time
from contextlib import closing
from uuid import UUID
import psycopg2.errors
import psycopg2.extras
from fixtures.log_helper import log
from fixtures.neon_fixtures import (
NeonEnv,
NeonEnvBuilder,
Postgres,
assert_timeline_local,
wait_for_last_flush_lsn,
)
from fixtures.utils import get_timeline_dir_size
def test_timeline_size(neon_simple_env: NeonEnv):
env = neon_simple_env
new_timeline_id = env.neon_cli.create_branch('test_timeline_size', 'empty')
new_timeline_id = env.neon_cli.create_branch("test_timeline_size", "empty")
client = env.pageserver.http_client()
timeline_details = assert_timeline_local(client, env.initial_tenant, new_timeline_id)
assert timeline_details['local']['current_logical_size'] == timeline_details['local'][
'current_logical_size_non_incremental']
assert (
timeline_details["local"]["current_logical_size"]
== timeline_details["local"]["current_logical_size_non_incremental"]
)
pgmain = env.postgres.create_start("test_timeline_size")
log.info("postgres is running on 'test_timeline_size' branch")
@@ -29,32 +37,40 @@ def test_timeline_size(neon_simple_env: NeonEnv):
cur.execute("SHOW neon.timeline_id")
cur.execute("CREATE TABLE foo (t text)")
cur.execute("""
cur.execute(
"""
INSERT INTO foo
SELECT 'long string to consume some space' || g
FROM generate_series(1, 10) g
""")
"""
)
res = assert_timeline_local(client, env.initial_tenant, new_timeline_id)
local_details = res['local']
assert local_details["current_logical_size"] == local_details[
"current_logical_size_non_incremental"]
local_details = res["local"]
assert (
local_details["current_logical_size"]
== local_details["current_logical_size_non_incremental"]
)
cur.execute("TRUNCATE foo")
res = assert_timeline_local(client, env.initial_tenant, new_timeline_id)
local_details = res['local']
assert local_details["current_logical_size"] == local_details[
"current_logical_size_non_incremental"]
local_details = res["local"]
assert (
local_details["current_logical_size"]
== local_details["current_logical_size_non_incremental"]
)
def test_timeline_size_createdropdb(neon_simple_env: NeonEnv):
env = neon_simple_env
new_timeline_id = env.neon_cli.create_branch('test_timeline_size', 'empty')
new_timeline_id = env.neon_cli.create_branch("test_timeline_size", "empty")
client = env.pageserver.http_client()
timeline_details = assert_timeline_local(client, env.initial_tenant, new_timeline_id)
assert timeline_details['local']['current_logical_size'] == timeline_details['local'][
'current_logical_size_non_incremental']
assert (
timeline_details["local"]["current_logical_size"]
== timeline_details["local"]["current_logical_size_non_incremental"]
)
pgmain = env.postgres.create_start("test_timeline_size")
log.info("postgres is running on 'test_timeline_size' branch")
@@ -64,32 +80,40 @@ def test_timeline_size_createdropdb(neon_simple_env: NeonEnv):
cur.execute("SHOW neon.timeline_id")
res = assert_timeline_local(client, env.initial_tenant, new_timeline_id)
local_details = res['local']
assert local_details["current_logical_size"] == local_details[
"current_logical_size_non_incremental"]
local_details = res["local"]
assert (
local_details["current_logical_size"]
== local_details["current_logical_size_non_incremental"]
)
cur.execute('CREATE DATABASE foodb')
with closing(pgmain.connect(dbname='foodb')) as conn:
cur.execute("CREATE DATABASE foodb")
with closing(pgmain.connect(dbname="foodb")) as conn:
with conn.cursor() as cur2:
cur2.execute("CREATE TABLE foo (t text)")
cur2.execute("""
cur2.execute(
"""
INSERT INTO foo
SELECT 'long string to consume some space' || g
FROM generate_series(1, 10) g
""")
"""
)
res = assert_timeline_local(client, env.initial_tenant, new_timeline_id)
local_details = res['local']
assert local_details["current_logical_size"] == local_details[
"current_logical_size_non_incremental"]
local_details = res["local"]
assert (
local_details["current_logical_size"]
== local_details["current_logical_size_non_incremental"]
)
cur.execute('DROP DATABASE foodb')
cur.execute("DROP DATABASE foodb")
res = assert_timeline_local(client, env.initial_tenant, new_timeline_id)
local_details = res['local']
assert local_details["current_logical_size"] == local_details[
"current_logical_size_non_incremental"]
local_details = res["local"]
assert (
local_details["current_logical_size"]
== local_details["current_logical_size_non_incremental"]
)
# wait until received_lsn_lag is 0
@@ -101,14 +125,17 @@ def wait_for_pageserver_catchup(pgmain: Postgres, polling_interval=1, timeout=60
elapsed = time.time() - started_at
if elapsed > timeout:
raise RuntimeError(
f"timed out waiting for pageserver to reach pg_current_wal_flush_lsn()")
f"timed out waiting for pageserver to reach pg_current_wal_flush_lsn()"
)
res = pgmain.safe_psql('''
res = pgmain.safe_psql(
"""
SELECT
pg_size_pretty(pg_cluster_size()),
pg_wal_lsn_diff(pg_current_wal_flush_lsn(), received_lsn) as received_lsn_lag
FROM backpressure_lsns();
''')[0]
"""
)[0]
log.info(f"pg_cluster_size = {res[0]}, received_lsn_lag = {res[1]}")
received_lsn_lag = res[1]
@@ -117,17 +144,19 @@ def wait_for_pageserver_catchup(pgmain: Postgres, polling_interval=1, timeout=60
def test_timeline_size_quota(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
new_timeline_id = env.neon_cli.create_branch('test_timeline_size_quota')
new_timeline_id = env.neon_cli.create_branch("test_timeline_size_quota")
client = env.pageserver.http_client()
res = assert_timeline_local(client, env.initial_tenant, new_timeline_id)
assert res['local']["current_logical_size"] == res['local'][
"current_logical_size_non_incremental"]
assert (
res["local"]["current_logical_size"] == res["local"]["current_logical_size_non_incremental"]
)
pgmain = env.postgres.create_start(
"test_timeline_size_quota",
# Set small limit for the test
config_lines=['neon.max_cluster_size=30MB'])
config_lines=["neon.max_cluster_size=30MB"],
)
log.info("postgres is running on 'test_timeline_size_quota' branch")
with closing(pgmain.connect()) as conn:
@@ -140,19 +169,23 @@ def test_timeline_size_quota(neon_env_builder: NeonEnvBuilder):
# Insert many rows. This query must fail because of space limit
try:
cur.execute('''
cur.execute(
"""
INSERT INTO foo
SELECT 'long string to consume some space' || g
FROM generate_series(1, 100000) g
''')
"""
)
wait_for_pageserver_catchup(pgmain)
cur.execute('''
cur.execute(
"""
INSERT INTO foo
SELECT 'long string to consume some space' || g
FROM generate_series(1, 500000) g
''')
"""
)
# If we get here, the timeline size limit failed
log.error("Query unexpectedly succeeded")
@@ -162,17 +195,19 @@ def test_timeline_size_quota(neon_env_builder: NeonEnvBuilder):
log.info(f"Query expectedly failed with: {err}")
# drop table to free space
cur.execute('DROP TABLE foo')
cur.execute("DROP TABLE foo")
wait_for_pageserver_catchup(pgmain)
# create it again and insert some rows. This query must succeed
cur.execute("CREATE TABLE foo (t text)")
cur.execute('''
cur.execute(
"""
INSERT INTO foo
SELECT 'long string to consume some space' || g
FROM generate_series(1, 10000) g
''')
"""
)
wait_for_pageserver_catchup(pgmain)
@@ -183,15 +218,17 @@ def test_timeline_size_quota(neon_env_builder: NeonEnvBuilder):
def test_timeline_physical_size_init(neon_simple_env: NeonEnv):
env = neon_simple_env
new_timeline_id = env.neon_cli.create_branch('test_timeline_physical_size_init')
new_timeline_id = env.neon_cli.create_branch("test_timeline_physical_size_init")
pg = env.postgres.create_start("test_timeline_physical_size_init")
pg.safe_psql_many([
"CREATE TABLE foo (t text)",
"""INSERT INTO foo
pg.safe_psql_many(
[
"CREATE TABLE foo (t text)",
"""INSERT INTO foo
SELECT 'long string to consume some space' || g
FROM generate_series(1, 1000) g""",
])
]
)
wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id)
@@ -204,15 +241,17 @@ def test_timeline_physical_size_init(neon_simple_env: NeonEnv):
def test_timeline_physical_size_post_checkpoint(neon_simple_env: NeonEnv):
env = neon_simple_env
new_timeline_id = env.neon_cli.create_branch('test_timeline_physical_size_post_checkpoint')
new_timeline_id = env.neon_cli.create_branch("test_timeline_physical_size_post_checkpoint")
pg = env.postgres.create_start("test_timeline_physical_size_post_checkpoint")
pg.safe_psql_many([
"CREATE TABLE foo (t text)",
"""INSERT INTO foo
pg.safe_psql_many(
[
"CREATE TABLE foo (t text)",
"""INSERT INTO foo
SELECT 'long string to consume some space' || g
FROM generate_series(1, 1000) g""",
])
]
)
wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id)
env.pageserver.safe_psql(f"checkpoint {env.initial_tenant.hex} {new_timeline_id.hex}")
@@ -223,19 +262,23 @@ def test_timeline_physical_size_post_checkpoint(neon_simple_env: NeonEnv):
def test_timeline_physical_size_post_compaction(neon_env_builder: NeonEnvBuilder):
# Disable background compaction as we don't want it to happen after `get_physical_size` request
# and before checking the expected size on disk, which makes the assertion failed
neon_env_builder.pageserver_config_override = "tenant_config={checkpoint_distance=100000, compaction_period='10m'}"
neon_env_builder.pageserver_config_override = (
"tenant_config={checkpoint_distance=100000, compaction_period='10m'}"
)
env = neon_env_builder.init_start()
new_timeline_id = env.neon_cli.create_branch('test_timeline_physical_size_post_compaction')
new_timeline_id = env.neon_cli.create_branch("test_timeline_physical_size_post_compaction")
pg = env.postgres.create_start("test_timeline_physical_size_post_compaction")
pg.safe_psql_many([
"CREATE TABLE foo (t text)",
"""INSERT INTO foo
pg.safe_psql_many(
[
"CREATE TABLE foo (t text)",
"""INSERT INTO foo
SELECT 'long string to consume some space' || g
FROM generate_series(1, 100000) g""",
])
]
)
wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id)
env.pageserver.safe_psql(f"checkpoint {env.initial_tenant.hex} {new_timeline_id.hex}")
@@ -247,29 +290,32 @@ def test_timeline_physical_size_post_compaction(neon_env_builder: NeonEnvBuilder
def test_timeline_physical_size_post_gc(neon_env_builder: NeonEnvBuilder):
# Disable background compaction and GC as we don't want it to happen after `get_physical_size` request
# and before checking the expected size on disk, which makes the assertion failed
neon_env_builder.pageserver_config_override = \
"tenant_config={checkpoint_distance=100000, compaction_period='10m', gc_period='10m', pitr_interval='1s'}"
neon_env_builder.pageserver_config_override = "tenant_config={checkpoint_distance=100000, compaction_period='10m', gc_period='10m', pitr_interval='1s'}"
env = neon_env_builder.init_start()
new_timeline_id = env.neon_cli.create_branch('test_timeline_physical_size_post_gc')
new_timeline_id = env.neon_cli.create_branch("test_timeline_physical_size_post_gc")
pg = env.postgres.create_start("test_timeline_physical_size_post_gc")
pg.safe_psql_many([
"CREATE TABLE foo (t text)",
"""INSERT INTO foo
pg.safe_psql_many(
[
"CREATE TABLE foo (t text)",
"""INSERT INTO foo
SELECT 'long string to consume some space' || g
FROM generate_series(1, 100000) g""",
])
]
)
wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id)
env.pageserver.safe_psql(f"checkpoint {env.initial_tenant.hex} {new_timeline_id.hex}")
pg.safe_psql("""
pg.safe_psql(
"""
INSERT INTO foo
SELECT 'long string to consume some space' || g
FROM generate_series(1, 100000) g
""")
"""
)
wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id)
env.pageserver.safe_psql(f"checkpoint {env.initial_tenant.hex} {new_timeline_id.hex}")
@@ -284,15 +330,17 @@ def test_timeline_physical_size_post_gc(neon_env_builder: NeonEnvBuilder):
def test_timeline_size_metrics(neon_simple_env: NeonEnv):
env = neon_simple_env
new_timeline_id = env.neon_cli.create_branch('test_timeline_size_metrics')
new_timeline_id = env.neon_cli.create_branch("test_timeline_size_metrics")
pg = env.postgres.create_start("test_timeline_size_metrics")
pg.safe_psql_many([
"CREATE TABLE foo (t text)",
"""INSERT INTO foo
pg.safe_psql_many(
[
"CREATE TABLE foo (t text)",
"""INSERT INTO foo
SELECT 'long string to consume some space' || g
FROM generate_series(1, 100000) g""",
])
]
)
wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id)
env.pageserver.safe_psql(f"checkpoint {env.initial_tenant.hex} {new_timeline_id.hex}")
@@ -302,7 +350,8 @@ def test_timeline_size_metrics(neon_simple_env: NeonEnv):
matches = re.search(
f'^pageserver_current_physical_size{{tenant_id="{env.initial_tenant.hex}",timeline_id="{new_timeline_id.hex}"}} (\\S+)$',
metrics,
re.MULTILINE)
re.MULTILINE,
)
assert matches
tl_physical_size_metric = int(matches.group(1))
@@ -314,7 +363,8 @@ def test_timeline_size_metrics(neon_simple_env: NeonEnv):
matches = re.search(
f'^pageserver_current_logical_size{{tenant_id="{env.initial_tenant.hex}",timeline_id="{new_timeline_id.hex}"}} (\\S+)$',
metrics,
re.MULTILINE)
re.MULTILINE,
)
assert matches
tl_logical_size_metric = int(matches.group(1))
@@ -341,7 +391,7 @@ def test_tenant_physical_size(neon_simple_env: NeonEnv):
def get_timeline_physical_size(timeline: UUID):
res = client.timeline_detail(tenant, timeline)
return res['local']['current_physical_size_non_incremental']
return res["local"]["current_physical_size_non_incremental"]
timeline_total_size = get_timeline_physical_size(timeline)
for i in range(10):
@@ -350,10 +400,12 @@ def test_tenant_physical_size(neon_simple_env: NeonEnv):
timeline = env.neon_cli.create_branch(f"test_tenant_physical_size_{i}", tenant_id=tenant)
pg = env.postgres.create_start(f"test_tenant_physical_size_{i}", tenant_id=tenant)
pg.safe_psql_many([
"CREATE TABLE foo (t text)",
f"INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, {n_rows}) g",
])
pg.safe_psql_many(
[
"CREATE TABLE foo (t text)",
f"INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, {n_rows}) g",
]
)
wait_for_last_flush_lsn(env, pg, tenant, timeline)
env.pageserver.safe_psql(f"checkpoint {tenant.hex} {timeline.hex}")
@@ -362,7 +414,7 @@ def test_tenant_physical_size(neon_simple_env: NeonEnv):
pg.stop()
tenant_physical_size = int(client.tenant_status(tenant_id=tenant)['current_physical_size'])
tenant_physical_size = int(client.tenant_status(tenant_id=tenant)["current_physical_size"])
assert tenant_physical_size == timeline_total_size
@@ -372,6 +424,8 @@ def assert_physical_size(env: NeonEnv, tenant_id: UUID, timeline_id: UUID):
client = env.pageserver.http_client()
res = assert_timeline_local(client, tenant_id, timeline_id)
timeline_path = env.timeline_dir(tenant_id, timeline_id)
assert res["local"]["current_physical_size"] == res["local"][
"current_physical_size_non_incremental"]
assert (
res["local"]["current_physical_size"]
== res["local"]["current_physical_size_non_incremental"]
)
assert res["local"]["current_physical_size"] == get_timeline_dir_size(timeline_path)

View File

@@ -1,7 +1,7 @@
import os
from fixtures.neon_fixtures import NeonEnv
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnv
#
@@ -10,37 +10,37 @@ from fixtures.log_helper import log
def test_twophase(neon_simple_env: NeonEnv):
env = neon_simple_env
env.neon_cli.create_branch("test_twophase", "empty")
pg = env.postgres.create_start('test_twophase', config_lines=['max_prepared_transactions=5'])
pg = env.postgres.create_start("test_twophase", config_lines=["max_prepared_transactions=5"])
log.info("postgres is running on 'test_twophase' branch")
conn = pg.connect()
cur = conn.cursor()
cur.execute('CREATE TABLE foo (t text)')
cur.execute("CREATE TABLE foo (t text)")
# Prepare a transaction that will insert a row
cur.execute('BEGIN')
cur.execute("BEGIN")
cur.execute("INSERT INTO foo VALUES ('one')")
cur.execute("PREPARE TRANSACTION 'insert_one'")
# Prepare another transaction that will insert a row
cur.execute('BEGIN')
cur.execute("BEGIN")
cur.execute("INSERT INTO foo VALUES ('two')")
cur.execute("PREPARE TRANSACTION 'insert_two'")
# Prepare a transaction that will insert a row
cur.execute('BEGIN')
cur.execute("BEGIN")
cur.execute("INSERT INTO foo VALUES ('three')")
cur.execute("PREPARE TRANSACTION 'insert_three'")
# Prepare another transaction that will insert a row
cur.execute('BEGIN')
cur.execute("BEGIN")
cur.execute("INSERT INTO foo VALUES ('four')")
cur.execute("PREPARE TRANSACTION 'insert_four'")
# On checkpoint state data copied to files in
# pg_twophase directory and fsynced
cur.execute('CHECKPOINT')
cur.execute("CHECKPOINT")
twophase_files = os.listdir(pg.pg_twophase_dir_path())
log.info(twophase_files)
@@ -48,7 +48,7 @@ def test_twophase(neon_simple_env: NeonEnv):
cur.execute("COMMIT PREPARED 'insert_three'")
cur.execute("ROLLBACK PREPARED 'insert_four'")
cur.execute('CHECKPOINT')
cur.execute("CHECKPOINT")
twophase_files = os.listdir(pg.pg_twophase_dir_path())
log.info(twophase_files)
@@ -59,8 +59,8 @@ def test_twophase(neon_simple_env: NeonEnv):
# Start compute on the new branch
pg2 = env.postgres.create_start(
'test_twophase_prepared',
config_lines=['max_prepared_transactions=5'],
"test_twophase_prepared",
config_lines=["max_prepared_transactions=5"],
)
# Check that we restored only needed twophase files
@@ -76,9 +76,9 @@ def test_twophase(neon_simple_env: NeonEnv):
cur2.execute("COMMIT PREPARED 'insert_one'")
cur2.execute("ROLLBACK PREPARED 'insert_two'")
cur2.execute('SELECT * FROM foo')
assert cur2.fetchall() == [('one', ), ('three', )]
cur2.execute("SELECT * FROM foo")
assert cur2.fetchall() == [("one",), ("three",)]
# Only one committed insert is visible on the original branch
cur.execute('SELECT * FROM foo')
assert cur.fetchall() == [('three', )]
cur.execute("SELECT * FROM foo")
assert cur.fetchall() == [("three",)]

View File

@@ -1,5 +1,5 @@
from fixtures.neon_fixtures import NeonEnv
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnv
#
@@ -10,48 +10,50 @@ def test_vm_bit_clear(neon_simple_env: NeonEnv):
env = neon_simple_env
env.neon_cli.create_branch("test_vm_bit_clear", "empty")
pg = env.postgres.create_start('test_vm_bit_clear')
pg = env.postgres.create_start("test_vm_bit_clear")
log.info("postgres is running on 'test_vm_bit_clear' branch")
pg_conn = pg.connect()
cur = pg_conn.cursor()
# Install extension containing function needed for test
cur.execute('CREATE EXTENSION neon_test_utils')
cur.execute("CREATE EXTENSION neon_test_utils")
# Create a test table and freeze it to set the VM bit.
cur.execute('CREATE TABLE vmtest_delete (id integer PRIMARY KEY)')
cur.execute('INSERT INTO vmtest_delete VALUES (1)')
cur.execute('VACUUM FREEZE vmtest_delete')
cur.execute("CREATE TABLE vmtest_delete (id integer PRIMARY KEY)")
cur.execute("INSERT INTO vmtest_delete VALUES (1)")
cur.execute("VACUUM FREEZE vmtest_delete")
cur.execute('CREATE TABLE vmtest_update (id integer PRIMARY KEY)')
cur.execute('INSERT INTO vmtest_update SELECT g FROM generate_series(1, 1000) g')
cur.execute('VACUUM FREEZE vmtest_update')
cur.execute("CREATE TABLE vmtest_update (id integer PRIMARY KEY)")
cur.execute("INSERT INTO vmtest_update SELECT g FROM generate_series(1, 1000) g")
cur.execute("VACUUM FREEZE vmtest_update")
# DELETE and UPDATE the rows.
cur.execute('DELETE FROM vmtest_delete WHERE id = 1')
cur.execute('UPDATE vmtest_update SET id = 5000 WHERE id = 1')
cur.execute("DELETE FROM vmtest_delete WHERE id = 1")
cur.execute("UPDATE vmtest_update SET id = 5000 WHERE id = 1")
# Branch at this point, to test that later
env.neon_cli.create_branch("test_vm_bit_clear_new", "test_vm_bit_clear")
# Clear the buffer cache, to force the VM page to be re-fetched from
# the page server
cur.execute('SELECT clear_buffer_cache()')
cur.execute("SELECT clear_buffer_cache()")
# Check that an index-only scan doesn't see the deleted row. If the
# clearing of the VM bit was not replayed correctly, this would incorrectly
# return deleted row.
cur.execute('''
cur.execute(
"""
set enable_seqscan=off;
set enable_indexscan=on;
set enable_bitmapscan=off;
''')
"""
)
cur.execute('SELECT * FROM vmtest_delete WHERE id = 1')
assert (cur.fetchall() == [])
cur.execute('SELECT * FROM vmtest_update WHERE id = 1')
assert (cur.fetchall() == [])
cur.execute("SELECT * FROM vmtest_delete WHERE id = 1")
assert cur.fetchall() == []
cur.execute("SELECT * FROM vmtest_update WHERE id = 1")
assert cur.fetchall() == []
cur.close()
@@ -61,19 +63,21 @@ def test_vm_bit_clear(neon_simple_env: NeonEnv):
# a dirty VM page is evicted. If the VM bit was not correctly cleared by the
# earlier WAL record, the full-page image hides the problem. Starting a new
# server at the right point-in-time avoids that full-page image.
pg_new = env.postgres.create_start('test_vm_bit_clear_new')
pg_new = env.postgres.create_start("test_vm_bit_clear_new")
log.info("postgres is running on 'test_vm_bit_clear_new' branch")
pg_new_conn = pg_new.connect()
cur_new = pg_new_conn.cursor()
cur_new.execute('''
cur_new.execute(
"""
set enable_seqscan=off;
set enable_indexscan=on;
set enable_bitmapscan=off;
''')
"""
)
cur_new.execute('SELECT * FROM vmtest_delete WHERE id = 1')
assert (cur_new.fetchall() == [])
cur_new.execute('SELECT * FROM vmtest_update WHERE id = 1')
assert (cur_new.fetchall() == [])
cur_new.execute("SELECT * FROM vmtest_delete WHERE id = 1")
assert cur_new.fetchall() == []
cur_new.execute("SELECT * FROM vmtest_update WHERE id = 1")
assert cur_new.fetchall() == []

View File

@@ -1,42 +1,59 @@
import pathlib
import pytest
import random
import time
import os
import pathlib
import random
import shutil
import signal
import subprocess
import sys
import threading
import time
import uuid
from contextlib import closing
from dataclasses import dataclass, field
from pathlib import Path
from fixtures.neon_fixtures import NeonPageserver, PgBin, Etcd, Postgres, RemoteStorageKind, RemoteStorageUsers, Safekeeper, NeonEnv, NeonEnvBuilder, PortDistributor, SafekeeperPort, available_remote_storages, neon_binpath, PgProtocol, wait_for_last_record_lsn, wait_for_upload
from fixtures.utils import get_dir_size, lsn_to_hex, lsn_from_hex, query_scalar
from fixtures.log_helper import log
from typing import List, Optional, Any
from typing import Any, List, Optional
from uuid import uuid4
import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import (
Etcd,
NeonEnv,
NeonEnvBuilder,
NeonPageserver,
PgBin,
PgProtocol,
PortDistributor,
Postgres,
RemoteStorageKind,
RemoteStorageUsers,
Safekeeper,
SafekeeperPort,
available_remote_storages,
neon_binpath,
wait_for_last_record_lsn,
wait_for_upload,
)
from fixtures.utils import get_dir_size, lsn_from_hex, lsn_to_hex, query_scalar
def wait_lsn_force_checkpoint(tenant_id: str,
timeline_id: str,
pg: Postgres,
ps: NeonPageserver,
pageserver_conn_options={}):
lsn = lsn_from_hex(pg.safe_psql('SELECT pg_current_wal_flush_lsn()')[0][0])
def wait_lsn_force_checkpoint(
tenant_id: str, timeline_id: str, pg: Postgres, ps: NeonPageserver, pageserver_conn_options={}
):
lsn = lsn_from_hex(pg.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0])
log.info(f"pg_current_wal_flush_lsn is {lsn_to_hex(lsn)}, waiting for it on pageserver")
auth_token = None
if 'password' in pageserver_conn_options:
auth_token = pageserver_conn_options['password']
if "password" in pageserver_conn_options:
auth_token = pageserver_conn_options["password"]
# wait for the pageserver to catch up
wait_for_last_record_lsn(ps.http_client(auth_token=auth_token),
uuid.UUID(hex=tenant_id),
uuid.UUID(hex=timeline_id),
lsn)
wait_for_last_record_lsn(
ps.http_client(auth_token=auth_token),
uuid.UUID(hex=tenant_id),
uuid.UUID(hex=timeline_id),
lsn,
)
# force checkpoint to advance remote_consistent_lsn
with closing(ps.connect(**pageserver_conn_options)) as psconn:
@@ -44,10 +61,12 @@ def wait_lsn_force_checkpoint(tenant_id: str,
pscur.execute(f"checkpoint {tenant_id} {timeline_id}")
# ensure that remote_consistent_lsn is advanced
wait_for_upload(ps.http_client(auth_token=auth_token),
uuid.UUID(hex=tenant_id),
uuid.UUID(hex=timeline_id),
lsn)
wait_for_upload(
ps.http_client(auth_token=auth_token),
uuid.UUID(hex=tenant_id),
uuid.UUID(hex=timeline_id),
lsn,
)
@dataclass
@@ -89,7 +108,8 @@ def test_many_timelines(neon_env_builder: NeonEnvBuilder):
with env.pageserver.http_client() as pageserver_http:
timeline_details = [
pageserver_http.timeline_detail(
tenant_id=tenant_id, timeline_id=branch_names_to_timeline_ids[branch_name])
tenant_id=tenant_id, timeline_id=branch_names_to_timeline_ids[branch_name]
)
for branch_name in branch_names
]
# All changes visible to pageserver (last_record_lsn) should be
@@ -105,14 +125,14 @@ def test_many_timelines(neon_env_builder: NeonEnvBuilder):
for timeline_detail in timeline_details:
timeline_id: str = timeline_detail["timeline_id"]
local_timeline_detail = timeline_detail.get('local')
local_timeline_detail = timeline_detail.get("local")
if local_timeline_detail is None:
log.debug(f"Timeline {timeline_id} is not present locally, skipping")
continue
m = TimelineMetrics(
timeline_id=timeline_id,
last_record_lsn=lsn_from_hex(local_timeline_detail['last_record_lsn']),
last_record_lsn=lsn_from_hex(local_timeline_detail["last_record_lsn"]),
)
for sk_m in sk_metrics:
m.flush_lsns.append(sk_m.flush_lsn_inexact[(tenant_id.hex, timeline_id)])
@@ -120,14 +140,20 @@ def test_many_timelines(neon_env_builder: NeonEnvBuilder):
for flush_lsn, commit_lsn in zip(m.flush_lsns, m.commit_lsns):
# Invariant. May be < when transaction is in progress.
assert commit_lsn <= flush_lsn, f"timeline_id={timeline_id}, timeline_detail={timeline_detail}, sk_metrics={sk_metrics}"
assert (
commit_lsn <= flush_lsn
), f"timeline_id={timeline_id}, timeline_detail={timeline_detail}, sk_metrics={sk_metrics}"
# We only call collect_metrics() after a transaction is confirmed by
# the compute node, which only happens after a consensus of safekeepers
# has confirmed the transaction. We assume majority consensus here.
assert (2 * sum(m.last_record_lsn <= lsn
for lsn in m.flush_lsns) > neon_env_builder.num_safekeepers), f"timeline_id={timeline_id}, timeline_detail={timeline_detail}, sk_metrics={sk_metrics}"
assert (2 * sum(m.last_record_lsn <= lsn
for lsn in m.commit_lsns) > neon_env_builder.num_safekeepers), f"timeline_id={timeline_id}, timeline_detail={timeline_detail}, sk_metrics={sk_metrics}"
assert (
2 * sum(m.last_record_lsn <= lsn for lsn in m.flush_lsns)
> neon_env_builder.num_safekeepers
), f"timeline_id={timeline_id}, timeline_detail={timeline_detail}, sk_metrics={sk_metrics}"
assert (
2 * sum(m.last_record_lsn <= lsn for lsn in m.commit_lsns)
> neon_env_builder.num_safekeepers
), f"timeline_id={timeline_id}, timeline_detail={timeline_detail}, sk_metrics={sk_metrics}"
timeline_metrics.append(m)
log.info(f"{message}: {timeline_metrics}")
return timeline_metrics
@@ -155,8 +181,10 @@ def test_many_timelines(neon_env_builder: NeonEnvBuilder):
collect_metrics("during INSERT INTO")
time.sleep(1)
except:
log.error("MetricsChecker's thread failed, the test will be failed on .stop() call",
exc_info=True)
log.error(
"MetricsChecker's thread failed, the test will be failed on .stop() call",
exc_info=True,
)
# We want to preserve traceback as well as the exception
exc_type, exc_value, exc_tb = sys.exc_info()
assert exc_type
@@ -183,7 +211,7 @@ def test_many_timelines(neon_env_builder: NeonEnvBuilder):
# Check data for 2/3 timelines
for pg in pgs[:-1]:
res = pg.safe_psql("SELECT sum(key) FROM t")
assert res[0] == (5000050000, )
assert res[0] == (5000050000,)
final_m = collect_metrics("after SELECT")
# Assume that LSNs (a) behave similarly in all timelines; and (b) INSERT INTO alters LSN significantly.
@@ -208,8 +236,8 @@ def test_restarts(neon_env_builder: NeonEnvBuilder):
neon_env_builder.num_safekeepers = n_acceptors
env = neon_env_builder.init_start()
env.neon_cli.create_branch('test_safekeepers_restarts')
pg = env.postgres.create_start('test_safekeepers_restarts')
env.neon_cli.create_branch("test_safekeepers_restarts")
pg = env.postgres.create_start("test_safekeepers_restarts")
# we rely upon autocommit after each statement
# as waiting for acceptors happens there
@@ -217,9 +245,9 @@ def test_restarts(neon_env_builder: NeonEnvBuilder):
cur = pg_conn.cursor()
failed_node = None
cur.execute('CREATE TABLE t(key int primary key, value text)')
cur.execute("CREATE TABLE t(key int primary key, value text)")
for i in range(n_inserts):
cur.execute("INSERT INTO t values (%s, 'payload');", (i + 1, ))
cur.execute("INSERT INTO t values (%s, 'payload');", (i + 1,))
if random.random() <= fault_probability:
if failed_node is None:
@@ -228,7 +256,7 @@ def test_restarts(neon_env_builder: NeonEnvBuilder):
else:
failed_node.start()
failed_node = None
assert query_scalar(cur, 'SELECT sum(key) FROM t') == 500500
assert query_scalar(cur, "SELECT sum(key) FROM t") == 500500
# Test that safekeepers push their info to the broker and learn peer status from it
@@ -238,7 +266,7 @@ def test_broker(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
env.neon_cli.create_branch("test_broker", "main")
pg = env.postgres.create_start('test_broker')
pg = env.postgres.create_start("test_broker")
pg.safe_psql("CREATE TABLE t(key int primary key, value text)")
# learn neon timeline from compute
@@ -260,9 +288,10 @@ def test_broker(neon_env_builder: NeonEnvBuilder):
while True:
stat_after = [cli.timeline_status(tenant_id, timeline_id) for cli in clients]
if all(
lsn_from_hex(s_after.remote_consistent_lsn) > lsn_from_hex(
s_before.remote_consistent_lsn) for s_after,
s_before in zip(stat_after, stat_before)):
lsn_from_hex(s_after.remote_consistent_lsn)
> lsn_from_hex(s_before.remote_consistent_lsn)
for s_after, s_before in zip(stat_after, stat_before)
):
break
elapsed = time.time() - started_at
if elapsed > 20:
@@ -273,7 +302,7 @@ def test_broker(neon_env_builder: NeonEnvBuilder):
# Test that old WAL consumed by peers and pageserver is removed from safekeepers.
@pytest.mark.parametrize('auth_enabled', [False, True])
@pytest.mark.parametrize("auth_enabled", [False, True])
def test_wal_removal(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
neon_env_builder.num_safekeepers = 2
# to advance remote_consistent_lsn
@@ -281,16 +310,18 @@ def test_wal_removal(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
neon_env_builder.auth_enabled = auth_enabled
env = neon_env_builder.init_start()
env.neon_cli.create_branch('test_safekeepers_wal_removal')
pg = env.postgres.create_start('test_safekeepers_wal_removal')
env.neon_cli.create_branch("test_safekeepers_wal_removal")
pg = env.postgres.create_start("test_safekeepers_wal_removal")
# Note: it is important to insert at least two segments, as currently
# control file is synced roughly once in segment range and WAL is not
# removed until all horizons are persisted.
pg.safe_psql_many([
'CREATE TABLE t(key int primary key, value text)',
"INSERT INTO t SELECT generate_series(1,200000), 'payload'",
])
pg.safe_psql_many(
[
"CREATE TABLE t(key int primary key, value text)",
"INSERT INTO t SELECT generate_series(1,200000), 'payload'",
]
)
tenant_id = pg.safe_psql("show neon.tenant_id")[0][0]
timeline_id = pg.safe_psql("show neon.timeline_id")[0][0]
@@ -298,12 +329,12 @@ def test_wal_removal(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
# force checkpoint to advance remote_consistent_lsn
pageserver_conn_options = {}
if auth_enabled:
pageserver_conn_options['password'] = env.auth_keys.generate_tenant_token(tenant_id)
pageserver_conn_options["password"] = env.auth_keys.generate_tenant_token(tenant_id)
wait_lsn_force_checkpoint(tenant_id, timeline_id, pg, env.pageserver, pageserver_conn_options)
# We will wait for first segment removal. Make sure they exist for starter.
first_segments = [
os.path.join(sk.data_dir(), tenant_id, timeline_id, '000000010000000000000001')
os.path.join(sk.data_dir(), tenant_id, timeline_id, "000000010000000000000001")
for sk in env.safekeepers
]
assert all(os.path.exists(p) for p in first_segments)
@@ -312,25 +343,33 @@ def test_wal_removal(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
http_cli = env.safekeepers[0].http_client()
else:
http_cli = env.safekeepers[0].http_client(
auth_token=env.auth_keys.generate_tenant_token(tenant_id))
auth_token=env.auth_keys.generate_tenant_token(tenant_id)
)
http_cli_other = env.safekeepers[0].http_client(
auth_token=env.auth_keys.generate_tenant_token(uuid4().hex))
auth_token=env.auth_keys.generate_tenant_token(uuid4().hex)
)
http_cli_noauth = env.safekeepers[0].http_client()
# Pretend WAL is offloaded to s3.
if auth_enabled:
old_backup_lsn = http_cli.timeline_status(tenant_id=tenant_id,
timeline_id=timeline_id).backup_lsn
assert 'FFFFFFFF/FEFFFFFF' != old_backup_lsn
old_backup_lsn = http_cli.timeline_status(
tenant_id=tenant_id, timeline_id=timeline_id
).backup_lsn
assert "FFFFFFFF/FEFFFFFF" != old_backup_lsn
for cli in [http_cli_other, http_cli_noauth]:
with pytest.raises(cli.HTTPError, match='Forbidden|Unauthorized'):
cli.record_safekeeper_info(tenant_id,
timeline_id, {'backup_lsn': 'FFFFFFFF/FEFFFFFF'})
assert old_backup_lsn == http_cli.timeline_status(tenant_id=tenant_id,
timeline_id=timeline_id).backup_lsn
http_cli.record_safekeeper_info(tenant_id, timeline_id, {'backup_lsn': 'FFFFFFFF/FEFFFFFF'})
assert 'FFFFFFFF/FEFFFFFF' == http_cli.timeline_status(tenant_id=tenant_id,
timeline_id=timeline_id).backup_lsn
with pytest.raises(cli.HTTPError, match="Forbidden|Unauthorized"):
cli.record_safekeeper_info(
tenant_id, timeline_id, {"backup_lsn": "FFFFFFFF/FEFFFFFF"}
)
assert (
old_backup_lsn
== http_cli.timeline_status(tenant_id=tenant_id, timeline_id=timeline_id).backup_lsn
)
http_cli.record_safekeeper_info(tenant_id, timeline_id, {"backup_lsn": "FFFFFFFF/FEFFFFFF"})
assert (
"FFFFFFFF/FEFFFFFF"
== http_cli.timeline_status(tenant_id=tenant_id, timeline_id=timeline_id).backup_lsn
)
# wait till first segment is removed on all safekeepers
started_at = time.time()
@@ -355,7 +394,8 @@ def wait_segment_offload(tenant_id, timeline_id, live_sk, seg_end):
elapsed = time.time() - started_at
if elapsed > 30:
raise RuntimeError(
f"timed out waiting {elapsed:.0f}s for segment ending at {seg_end} get offloaded")
f"timed out waiting {elapsed:.0f}s for segment ending at {seg_end} get offloaded"
)
time.sleep(0.5)
@@ -364,8 +404,9 @@ def wait_wal_trim(tenant_id, timeline_id, sk, target_size):
http_cli = sk.http_client()
while True:
tli_status = http_cli.timeline_status(tenant_id, timeline_id)
sk_wal_size = get_dir_size(os.path.join(sk.data_dir(), tenant_id,
timeline_id)) / 1024 / 1024
sk_wal_size = (
get_dir_size(os.path.join(sk.data_dir(), tenant_id, timeline_id)) / 1024 / 1024
)
log.info(f"Safekeeper id={sk.id} wal_size={sk_wal_size:.2f}MB status={tli_status}")
if sk_wal_size <= target_size:
@@ -379,21 +420,21 @@ def wait_wal_trim(tenant_id, timeline_id, sk, target_size):
time.sleep(0.5)
@pytest.mark.parametrize('remote_storatge_kind', available_remote_storages())
@pytest.mark.parametrize("remote_storatge_kind", available_remote_storages())
def test_wal_backup(neon_env_builder: NeonEnvBuilder, remote_storatge_kind: RemoteStorageKind):
neon_env_builder.num_safekeepers = 3
neon_env_builder.enable_remote_storage(
remote_storage_kind=remote_storatge_kind,
test_name='test_safekeepers_wal_backup',
test_name="test_safekeepers_wal_backup",
)
neon_env_builder.remote_storage_users = RemoteStorageUsers.SAFEKEEPER
env = neon_env_builder.init_start()
env.neon_cli.create_branch('test_safekeepers_wal_backup')
pg = env.postgres.create_start('test_safekeepers_wal_backup')
env.neon_cli.create_branch("test_safekeepers_wal_backup")
pg = env.postgres.create_start("test_safekeepers_wal_backup")
# learn neon timeline from compute
tenant_id = pg.safe_psql("show neon.tenant_id")[0][0]
@@ -401,11 +442,11 @@ def test_wal_backup(neon_env_builder: NeonEnvBuilder, remote_storatge_kind: Remo
pg_conn = pg.connect()
cur = pg_conn.cursor()
cur.execute('create table t(key int, value text)')
cur.execute("create table t(key int, value text)")
# Shut down subsequently each of safekeepers and fill a segment while sk is
# down; ensure segment gets offloaded by others.
offloaded_seg_end = ['0/2000000', '0/3000000', '0/4000000']
offloaded_seg_end = ["0/2000000", "0/3000000", "0/4000000"]
for victim, seg_end in zip(env.safekeepers, offloaded_seg_end):
victim.stop()
# roughly fills one segment
@@ -419,36 +460,36 @@ def test_wal_backup(neon_env_builder: NeonEnvBuilder, remote_storatge_kind: Remo
# put one of safekeepers down again
env.safekeepers[0].stop()
# restart postgres
pg.stop_and_destroy().create_start('test_safekeepers_wal_backup')
pg.stop_and_destroy().create_start("test_safekeepers_wal_backup")
# and ensure offloading still works
with closing(pg.connect()) as conn:
with conn.cursor() as cur:
cur.execute("insert into t select generate_series(1,250000), 'payload'")
wait_segment_offload(tenant_id, timeline_id, env.safekeepers[1], '0/5000000')
wait_segment_offload(tenant_id, timeline_id, env.safekeepers[1], "0/5000000")
@pytest.mark.parametrize('remote_storatge_kind', available_remote_storages())
@pytest.mark.parametrize("remote_storatge_kind", available_remote_storages())
def test_s3_wal_replay(neon_env_builder: NeonEnvBuilder, remote_storatge_kind: RemoteStorageKind):
neon_env_builder.num_safekeepers = 3
neon_env_builder.enable_remote_storage(
remote_storage_kind=remote_storatge_kind,
test_name='test_s3_wal_replay',
test_name="test_s3_wal_replay",
)
neon_env_builder.remote_storage_users = RemoteStorageUsers.SAFEKEEPER
env = neon_env_builder.init_start()
env.neon_cli.create_branch('test_s3_wal_replay')
env.neon_cli.create_branch("test_s3_wal_replay")
env.pageserver.stop()
pageserver_tenants_dir = os.path.join(env.repo_dir, 'tenants')
pageserver_fresh_copy = os.path.join(env.repo_dir, 'tenants_fresh')
pageserver_tenants_dir = os.path.join(env.repo_dir, "tenants")
pageserver_fresh_copy = os.path.join(env.repo_dir, "tenants_fresh")
log.info(f"Creating a copy of pageserver in a fresh state at {pageserver_fresh_copy}")
shutil.copytree(pageserver_tenants_dir, pageserver_fresh_copy)
env.pageserver.start()
pg = env.postgres.create_start('test_s3_wal_replay')
pg = env.postgres.create_start("test_s3_wal_replay")
# learn neon timeline from compute
tenant_id = pg.safe_psql("show neon.tenant_id")[0][0]
@@ -462,7 +503,7 @@ def test_s3_wal_replay(neon_env_builder: NeonEnvBuilder, remote_storatge_kind: R
cur.execute("insert into t values (1, 'payload')")
expected_sum += 1
offloaded_seg_end = ['0/3000000']
offloaded_seg_end = ["0/3000000"]
for seg_end in offloaded_seg_end:
# roughly fills two segments
cur.execute("insert into t select generate_series(1,500000), 'payload'")
@@ -476,28 +517,30 @@ def test_s3_wal_replay(neon_env_builder: NeonEnvBuilder, remote_storatge_kind: R
# advance remote_consistent_lsn to trigger WAL trimming
# this LSN should be less than commit_lsn, so timeline will be active=true in safekeepers, to push etcd updates
env.safekeepers[0].http_client().record_safekeeper_info(
tenant_id, timeline_id, {'remote_consistent_lsn': offloaded_seg_end[-1]})
tenant_id, timeline_id, {"remote_consistent_lsn": offloaded_seg_end[-1]}
)
for sk in env.safekeepers:
# require WAL to be trimmed, so no more than one segment is left on disk
wait_wal_trim(tenant_id, timeline_id, sk, 16 * 1.5)
last_lsn = query_scalar(cur, 'SELECT pg_current_wal_flush_lsn()')
last_lsn = query_scalar(cur, "SELECT pg_current_wal_flush_lsn()")
pageserver_lsn = env.pageserver.http_client().timeline_detail(
uuid.UUID(tenant_id), uuid.UUID((timeline_id)))["local"]["last_record_lsn"]
uuid.UUID(tenant_id), uuid.UUID((timeline_id))
)["local"]["last_record_lsn"]
lag = lsn_from_hex(last_lsn) - lsn_from_hex(pageserver_lsn)
log.info(
f'Pageserver last_record_lsn={pageserver_lsn}; flush_lsn={last_lsn}; lag before replay is {lag / 1024}kb'
f"Pageserver last_record_lsn={pageserver_lsn}; flush_lsn={last_lsn}; lag before replay is {lag / 1024}kb"
)
# replace pageserver with a fresh copy
pg.stop_and_destroy()
env.pageserver.stop()
log.info(f'Removing current pageserver state at {pageserver_tenants_dir}')
log.info(f"Removing current pageserver state at {pageserver_tenants_dir}")
shutil.rmtree(pageserver_tenants_dir)
log.info(f'Copying fresh pageserver state from {pageserver_fresh_copy}')
log.info(f"Copying fresh pageserver state from {pageserver_fresh_copy}")
shutil.move(pageserver_fresh_copy, pageserver_tenants_dir)
# start pageserver and wait for replay
@@ -509,39 +552,43 @@ def test_s3_wal_replay(neon_env_builder: NeonEnvBuilder, remote_storatge_kind: R
while True:
elapsed = time.time() - started_at
if elapsed > wait_lsn_timeout:
raise RuntimeError(f'Timed out waiting for WAL redo')
raise RuntimeError(f"Timed out waiting for WAL redo")
pageserver_lsn = env.pageserver.http_client().timeline_detail(
uuid.UUID(tenant_id), uuid.UUID((timeline_id)))["local"]["last_record_lsn"]
uuid.UUID(tenant_id), uuid.UUID((timeline_id))
)["local"]["last_record_lsn"]
lag = lsn_from_hex(last_lsn) - lsn_from_hex(pageserver_lsn)
if time.time() > last_debug_print + 10 or lag <= 0:
last_debug_print = time.time()
log.info(f'Pageserver last_record_lsn={pageserver_lsn}; lag is {lag / 1024}kb')
log.info(f"Pageserver last_record_lsn={pageserver_lsn}; lag is {lag / 1024}kb")
if lag <= 0:
break
time.sleep(1)
log.info(f'WAL redo took {elapsed} s')
log.info(f"WAL redo took {elapsed} s")
# verify data
pg.create_start('test_s3_wal_replay')
pg.create_start("test_s3_wal_replay")
assert pg.safe_psql("select sum(key) from t")[0][0] == expected_sum
class ProposerPostgres(PgProtocol):
"""Object for running postgres without NeonEnv"""
def __init__(self,
pgdata_dir: str,
pg_bin,
timeline_id: uuid.UUID,
tenant_id: uuid.UUID,
listen_addr: str,
port: int):
super().__init__(host=listen_addr, port=port, user='cloud_admin', dbname='postgres')
def __init__(
self,
pgdata_dir: str,
pg_bin,
timeline_id: uuid.UUID,
tenant_id: uuid.UUID,
listen_addr: str,
port: int,
):
super().__init__(host=listen_addr, port=port, user="cloud_admin", dbname="postgres")
self.pgdata_dir: str = pgdata_dir
self.pg_bin: PgBin = pg_bin
@@ -551,15 +598,15 @@ class ProposerPostgres(PgProtocol):
self.port: int = port
def pg_data_dir_path(self) -> str:
""" Path to data directory """
"""Path to data directory"""
return self.pgdata_dir
def config_file_path(self) -> str:
""" Path to postgresql.conf """
return os.path.join(self.pgdata_dir, 'postgresql.conf')
"""Path to postgresql.conf"""
return os.path.join(self.pgdata_dir, "postgresql.conf")
def create_dir_config(self, safekeepers: str):
""" Create dir and config for running --sync-safekeepers """
"""Create dir and config for running --sync-safekeepers"""
pathlib.Path(self.pg_data_dir_path()).mkdir(exist_ok=True)
with open(self.config_file_path(), "w") as f:
@@ -588,36 +635,36 @@ class ProposerPostgres(PgProtocol):
}
basepath = self.pg_bin.run_capture(command, env)
stdout_filename = basepath + '.stdout'
stdout_filename = basepath + ".stdout"
with open(stdout_filename, 'r') as stdout_f:
with open(stdout_filename, "r") as stdout_f:
stdout = stdout_f.read()
return stdout.strip("\n ")
def initdb(self):
""" Run initdb """
"""Run initdb"""
args = ["initdb", "-U", "cloud_admin", "-D", self.pg_data_dir_path()]
self.pg_bin.run(args)
def start(self):
""" Start postgres with pg_ctl """
"""Start postgres with pg_ctl"""
log_path = os.path.join(self.pg_data_dir_path(), "pg.log")
args = ["pg_ctl", "-D", self.pg_data_dir_path(), "-l", log_path, "-w", "start"]
self.pg_bin.run(args)
def stop(self):
""" Stop postgres with pg_ctl """
"""Stop postgres with pg_ctl"""
args = ["pg_ctl", "-D", self.pg_data_dir_path(), "-m", "immediate", "-w", "stop"]
self.pg_bin.run(args)
# insert wal in all safekeepers and run sync on proposer
def test_sync_safekeepers(neon_env_builder: NeonEnvBuilder,
pg_bin: PgBin,
port_distributor: PortDistributor):
def test_sync_safekeepers(
neon_env_builder: NeonEnvBuilder, pg_bin: PgBin, port_distributor: PortDistributor
):
# We don't really need the full environment for this test, just the
# safekeepers would be enough.
@@ -629,12 +676,9 @@ def test_sync_safekeepers(neon_env_builder: NeonEnvBuilder,
# write config for proposer
pgdata_dir = os.path.join(env.repo_dir, "proposer_pgdata")
pg = ProposerPostgres(pgdata_dir,
pg_bin,
timeline_id,
tenant_id,
'127.0.0.1',
port_distributor.get_port())
pg = ProposerPostgres(
pgdata_dir, pg_bin, timeline_id, tenant_id, "127.0.0.1", port_distributor.get_port()
)
pg.create_dir_config(env.get_safekeeper_connstrs())
# valid lsn, which is not in the segment start, nor in zero segment
@@ -669,13 +713,13 @@ def test_sync_safekeepers(neon_env_builder: NeonEnvBuilder,
assert all(lsn_after_sync == lsn for lsn in lsn_after_append)
@pytest.mark.parametrize('auth_enabled', [False, True])
@pytest.mark.parametrize("auth_enabled", [False, True])
def test_timeline_status(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
neon_env_builder.auth_enabled = auth_enabled
env = neon_env_builder.init_start()
env.neon_cli.create_branch('test_timeline_status')
pg = env.postgres.create_start('test_timeline_status')
env.neon_cli.create_branch("test_timeline_status")
pg = env.postgres.create_start("test_timeline_status")
wa = env.safekeepers[0]
@@ -690,7 +734,8 @@ def test_timeline_status(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
wa_http_cli = wa.http_client(auth_token=env.auth_keys.generate_tenant_token(tenant_id))
wa_http_cli.check_status()
wa_http_cli_bad = wa.http_client(
auth_token=env.auth_keys.generate_tenant_token(uuid4().hex))
auth_token=env.auth_keys.generate_tenant_token(uuid4().hex)
)
wa_http_cli_bad.check_status()
wa_http_cli_noauth = wa.http_client()
wa_http_cli_noauth.check_status()
@@ -702,7 +747,7 @@ def test_timeline_status(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
if auth_enabled:
for cli in [wa_http_cli_bad, wa_http_cli_noauth]:
with pytest.raises(cli.HTTPError, match='Forbidden|Unauthorized'):
with pytest.raises(cli.HTTPError, match="Forbidden|Unauthorized"):
cli.timeline_status(tenant_id, timeline_id)
pg.safe_psql("create table t(i int)")
@@ -720,19 +765,23 @@ def test_timeline_status(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
class SafekeeperEnv:
def __init__(self,
repo_dir: Path,
port_distributor: PortDistributor,
pg_bin: PgBin,
num_safekeepers: int = 1):
def __init__(
self,
repo_dir: Path,
port_distributor: PortDistributor,
pg_bin: PgBin,
num_safekeepers: int = 1,
):
self.repo_dir = repo_dir
self.port_distributor = port_distributor
self.broker = Etcd(datadir=os.path.join(self.repo_dir, "etcd"),
port=self.port_distributor.get_port(),
peer_port=self.port_distributor.get_port())
self.broker = Etcd(
datadir=os.path.join(self.repo_dir, "etcd"),
port=self.port_distributor.get_port(),
peer_port=self.port_distributor.get_port(),
)
self.pg_bin = pg_bin
self.num_safekeepers = num_safekeepers
self.bin_safekeeper = os.path.join(str(neon_binpath), 'safekeeper')
self.bin_safekeeper = os.path.join(str(neon_binpath), "safekeeper")
self.safekeepers: Optional[List[subprocess.CompletedProcess[Any]]] = None
self.postgres: Optional[ProposerPostgres] = None
self.tenant_id: Optional[uuid.UUID] = None
@@ -778,23 +827,25 @@ class SafekeeperEnv:
str(i),
"--broker-endpoints",
self.broker.client_url(),
"--daemonize"
"--daemonize",
]
log.info(f'Running command "{" ".join(args)}"')
return subprocess.run(args, check=True)
def get_safekeeper_connstrs(self):
return ','.join([sk_proc.args[2] for sk_proc in self.safekeepers])
return ",".join([sk_proc.args[2] for sk_proc in self.safekeepers])
def create_postgres(self):
pgdata_dir = os.path.join(self.repo_dir, "proposer_pgdata")
pg = ProposerPostgres(pgdata_dir,
self.pg_bin,
self.timeline_id,
self.tenant_id,
"127.0.0.1",
self.port_distributor.get_port())
pg = ProposerPostgres(
pgdata_dir,
self.pg_bin,
self.timeline_id,
self.tenant_id,
"127.0.0.1",
self.port_distributor.get_port(),
)
pg.initdb()
pg.create_dir_config(self.get_safekeeper_connstrs())
return pg
@@ -811,7 +862,7 @@ class SafekeeperEnv:
return self
def __exit__(self, exc_type, exc_value, traceback):
log.info('Cleaning up all safekeeper and compute nodes')
log.info("Cleaning up all safekeeper and compute nodes")
# Stop all the nodes
if self.postgres is not None:
@@ -821,9 +872,9 @@ class SafekeeperEnv:
self.kill_safekeeper(sk_proc.args[6])
def test_safekeeper_without_pageserver(test_output_dir: str,
port_distributor: PortDistributor,
pg_bin: PgBin):
def test_safekeeper_without_pageserver(
test_output_dir: str, port_distributor: PortDistributor, pg_bin: PgBin
):
# Create the environment in the test-specific output dir
repo_dir = Path(os.path.join(test_output_dir, "repo"))
@@ -845,19 +896,19 @@ def test_safekeeper_without_pageserver(test_output_dir: str,
def test_replace_safekeeper(neon_env_builder: NeonEnvBuilder):
def safekeepers_guc(env: NeonEnv, sk_names: List[int]) -> str:
return ','.join([f'localhost:{sk.port.pg}' for sk in env.safekeepers if sk.id in sk_names])
return ",".join([f"localhost:{sk.port.pg}" for sk in env.safekeepers if sk.id in sk_names])
def execute_payload(pg: Postgres):
with closing(pg.connect()) as conn:
with conn.cursor() as cur:
# we rely upon autocommit after each statement
# as waiting for acceptors happens there
cur.execute('CREATE TABLE IF NOT EXISTS t(key int, value text)')
cur.execute("CREATE TABLE IF NOT EXISTS t(key int, value text)")
cur.execute("INSERT INTO t VALUES (0, 'something')")
sum_before = query_scalar(cur, 'SELECT SUM(key) FROM t')
sum_before = query_scalar(cur, "SELECT SUM(key) FROM t")
cur.execute("INSERT INTO t SELECT generate_series(1,100000), 'payload'")
sum_after = query_scalar(cur, 'SELECT SUM(key) FROM t')
sum_after = query_scalar(cur, "SELECT SUM(key) FROM t")
assert sum_after == sum_before + 5000050000
def show_statuses(safekeepers: List[Safekeeper], tenant_id: str, timeline_id: str):
@@ -871,12 +922,12 @@ def test_replace_safekeeper(neon_env_builder: NeonEnvBuilder):
neon_env_builder.num_safekeepers = 4
env = neon_env_builder.init_start()
env.neon_cli.create_branch('test_replace_safekeeper')
env.neon_cli.create_branch("test_replace_safekeeper")
log.info("Use only first 3 safekeepers")
env.safekeepers[3].stop()
active_safekeepers = [1, 2, 3]
pg = env.postgres.create('test_replace_safekeeper')
pg = env.postgres.create("test_replace_safekeeper")
pg.adjust_for_safekeepers(safekeepers_guc(env, active_safekeepers))
pg.start()
@@ -914,7 +965,7 @@ def test_replace_safekeeper(neon_env_builder: NeonEnvBuilder):
show_statuses(env.safekeepers, tenant_id, timeline_id)
log.info("Recreate postgres to replace failed sk1 with new sk4")
pg.stop_and_destroy().create('test_replace_safekeeper')
pg.stop_and_destroy().create("test_replace_safekeeper")
active_safekeepers = [2, 3, 4]
env.safekeepers[3].start()
pg.adjust_for_safekeepers(safekeepers_guc(env, active_safekeepers))
@@ -934,16 +985,16 @@ def test_replace_safekeeper(neon_env_builder: NeonEnvBuilder):
# of WAL segments.
def test_wal_deleted_after_broadcast(neon_env_builder: NeonEnvBuilder):
# used to calculate delta in collect_stats
last_lsn = .0
last_lsn = 0.0
# returns LSN and pg_wal size, all in MB
def collect_stats(pg: Postgres, cur, enable_logs=True):
nonlocal last_lsn
assert pg.pgdata_dir is not None
log.info('executing INSERT to generate WAL')
log.info("executing INSERT to generate WAL")
current_lsn = lsn_from_hex(query_scalar(cur, "select pg_current_wal_lsn()")) / 1024 / 1024
pg_wal_size = get_dir_size(os.path.join(pg.pgdata_dir, 'pg_wal')) / 1024 / 1024
pg_wal_size = get_dir_size(os.path.join(pg.pgdata_dir, "pg_wal")) / 1024 / 1024
if enable_logs:
log.info(f"LSN delta: {current_lsn - last_lsn} MB, current WAL size: {pg_wal_size} MB")
last_lsn = current_lsn
@@ -956,15 +1007,16 @@ def test_wal_deleted_after_broadcast(neon_env_builder: NeonEnvBuilder):
neon_env_builder.num_safekeepers = 3
env = neon_env_builder.init_start()
env.neon_cli.create_branch('test_wal_deleted_after_broadcast')
env.neon_cli.create_branch("test_wal_deleted_after_broadcast")
# Adjust checkpoint config to prevent keeping old WAL segments
pg = env.postgres.create_start(
'test_wal_deleted_after_broadcast',
config_lines=['min_wal_size=32MB', 'max_wal_size=32MB', 'log_checkpoints=on'])
"test_wal_deleted_after_broadcast",
config_lines=["min_wal_size=32MB", "max_wal_size=32MB", "log_checkpoints=on"],
)
pg_conn = pg.connect()
cur = pg_conn.cursor()
cur.execute('CREATE TABLE t(key int, value text)')
cur.execute("CREATE TABLE t(key int, value text)")
collect_stats(pg, cur)
@@ -973,15 +1025,15 @@ def test_wal_deleted_after_broadcast(neon_env_builder: NeonEnvBuilder):
generate_wal(cur)
collect_stats(pg, cur)
log.info('executing checkpoint')
cur.execute('CHECKPOINT')
log.info("executing checkpoint")
cur.execute("CHECKPOINT")
wal_size_after_checkpoint = collect_stats(pg, cur)[1]
# there shouldn't be more than 2 WAL segments (but dir may have archive_status files)
assert wal_size_after_checkpoint < 16 * 2.5
@pytest.mark.parametrize('auth_enabled', [False, True])
@pytest.mark.parametrize("auth_enabled", [False, True])
def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
neon_env_builder.num_safekeepers = 1
neon_env_builder.auth_enabled = auth_enabled
@@ -989,25 +1041,25 @@ def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
# Create two tenants: one will be deleted, other should be preserved.
tenant_id = env.initial_tenant.hex
timeline_id_1 = env.neon_cli.create_branch('br1').hex # Active, delete explicitly
timeline_id_2 = env.neon_cli.create_branch('br2').hex # Inactive, delete explicitly
timeline_id_3 = env.neon_cli.create_branch('br3').hex # Active, delete with the tenant
timeline_id_4 = env.neon_cli.create_branch('br4').hex # Inactive, delete with the tenant
timeline_id_1 = env.neon_cli.create_branch("br1").hex # Active, delete explicitly
timeline_id_2 = env.neon_cli.create_branch("br2").hex # Inactive, delete explicitly
timeline_id_3 = env.neon_cli.create_branch("br3").hex # Active, delete with the tenant
timeline_id_4 = env.neon_cli.create_branch("br4").hex # Inactive, delete with the tenant
tenant_id_other_uuid, timeline_id_other_uuid = env.neon_cli.create_tenant()
tenant_id_other = tenant_id_other_uuid.hex
timeline_id_other = timeline_id_other_uuid.hex
# Populate branches
pg_1 = env.postgres.create_start('br1')
pg_2 = env.postgres.create_start('br2')
pg_3 = env.postgres.create_start('br3')
pg_4 = env.postgres.create_start('br4')
pg_other = env.postgres.create_start('main', tenant_id=uuid.UUID(hex=tenant_id_other))
pg_1 = env.postgres.create_start("br1")
pg_2 = env.postgres.create_start("br2")
pg_3 = env.postgres.create_start("br3")
pg_4 = env.postgres.create_start("br4")
pg_other = env.postgres.create_start("main", tenant_id=uuid.UUID(hex=tenant_id_other))
for pg in [pg_1, pg_2, pg_3, pg_4, pg_other]:
with closing(pg.connect()) as conn:
with conn.cursor() as cur:
cur.execute('CREATE TABLE t(key int primary key)')
cur.execute("CREATE TABLE t(key int primary key)")
sk = env.safekeepers[0]
sk_data_dir = Path(sk.data_dir())
if not auth_enabled:
@@ -1016,7 +1068,8 @@ def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
else:
sk_http = sk.http_client(auth_token=env.auth_keys.generate_tenant_token(tenant_id))
sk_http_other = sk.http_client(
auth_token=env.auth_keys.generate_tenant_token(tenant_id_other))
auth_token=env.auth_keys.generate_tenant_token(tenant_id_other)
)
sk_http_noauth = sk.http_client()
assert (sk_data_dir / tenant_id / timeline_id_1).is_dir()
assert (sk_data_dir / tenant_id / timeline_id_2).is_dir()
@@ -1034,7 +1087,7 @@ def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
for pg in [pg_1, pg_3, pg_other]:
with closing(pg.connect()) as conn:
with conn.cursor() as cur:
cur.execute('INSERT INTO t (key) VALUES (1)')
cur.execute("INSERT INTO t (key) VALUES (1)")
# Remove initial tenant's br1 (active)
assert sk_http.timeline_delete_force(tenant_id, timeline_id_1) == {
@@ -1049,7 +1102,8 @@ def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
# Ensure repeated deletion succeeds
assert sk_http.timeline_delete_force(tenant_id, timeline_id_1) == {
"dir_existed": False, "was_active": False
"dir_existed": False,
"was_active": False,
}
assert not (sk_data_dir / tenant_id / timeline_id_1).exists()
assert (sk_data_dir / tenant_id / timeline_id_2).is_dir()
@@ -1060,9 +1114,9 @@ def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
if auth_enabled:
# Ensure we cannot delete the other tenant
for sk_h in [sk_http, sk_http_noauth]:
with pytest.raises(sk_h.HTTPError, match='Forbidden|Unauthorized'):
with pytest.raises(sk_h.HTTPError, match="Forbidden|Unauthorized"):
assert sk_h.timeline_delete_force(tenant_id_other, timeline_id_other)
with pytest.raises(sk_h.HTTPError, match='Forbidden|Unauthorized'):
with pytest.raises(sk_h.HTTPError, match="Forbidden|Unauthorized"):
assert sk_h.tenant_delete_force(tenant_id_other)
assert (sk_data_dir / tenant_id_other / timeline_id_other).is_dir()
@@ -1078,7 +1132,7 @@ def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
assert (sk_data_dir / tenant_id_other / timeline_id_other).is_dir()
# Remove non-existing branch, should succeed
assert sk_http.timeline_delete_force(tenant_id, '00' * 16) == {
assert sk_http.timeline_delete_force(tenant_id, "00" * 16) == {
"dir_existed": False,
"was_active": False,
}
@@ -1107,4 +1161,4 @@ def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
sk_http_other.timeline_status(tenant_id_other, timeline_id_other)
with closing(pg_other.connect()) as conn:
with conn.cursor() as cur:
cur.execute('INSERT INTO t (key) VALUES (123)')
cur.execute("INSERT INTO t (key) VALUES (123)")

View File

@@ -1,17 +1,16 @@
import asyncio
import uuid
import asyncpg
import random
import time
from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres, Safekeeper
from fixtures.log_helper import getLogger
from fixtures.utils import lsn_from_hex, lsn_to_hex
from typing import List, Optional
import uuid
from dataclasses import dataclass
from typing import List, Optional
log = getLogger('root.safekeeper_async')
import asyncpg
from fixtures.log_helper import getLogger
from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres, Safekeeper
from fixtures.utils import lsn_from_hex, lsn_to_hex
log = getLogger("root.safekeeper_async")
class BankClient(object):
@@ -21,21 +20,22 @@ class BankClient(object):
self.init_amount = init_amount
async def initdb(self):
await self.conn.execute('DROP TABLE IF EXISTS bank_accs')
await self.conn.execute('CREATE TABLE bank_accs(uid int primary key, amount int)')
await self.conn.execute("DROP TABLE IF EXISTS bank_accs")
await self.conn.execute("CREATE TABLE bank_accs(uid int primary key, amount int)")
await self.conn.execute(
'''
"""
INSERT INTO bank_accs
SELECT *, $1 FROM generate_series(0, $2)
''',
""",
self.init_amount,
self.n_accounts - 1)
await self.conn.execute('DROP TABLE IF EXISTS bank_log')
await self.conn.execute('CREATE TABLE bank_log(from_uid int, to_uid int, amount int)')
self.n_accounts - 1,
)
await self.conn.execute("DROP TABLE IF EXISTS bank_log")
await self.conn.execute("CREATE TABLE bank_log(from_uid int, to_uid int, amount int)")
async def check_invariant(self):
row = await self.conn.fetchrow('SELECT sum(amount) AS sum FROM bank_accs')
assert row['sum'] == self.n_accounts * self.init_amount
row = await self.conn.fetchrow("SELECT sum(amount) AS sum FROM bank_accs")
assert row["sum"] == self.n_accounts * self.init_amount
async def bank_transfer(conn: asyncpg.Connection, from_uid, to_uid, amount):
@@ -45,17 +45,17 @@ async def bank_transfer(conn: asyncpg.Connection, from_uid, to_uid, amount):
async with conn.transaction():
await conn.execute(
'UPDATE bank_accs SET amount = amount + ($1) WHERE uid = $2',
"UPDATE bank_accs SET amount = amount + ($1) WHERE uid = $2",
amount,
to_uid,
)
await conn.execute(
'UPDATE bank_accs SET amount = amount - ($1) WHERE uid = $2',
"UPDATE bank_accs SET amount = amount - ($1) WHERE uid = $2",
amount,
from_uid,
)
await conn.execute(
'INSERT INTO bank_log VALUES ($1, $2, $3)',
"INSERT INTO bank_log VALUES ($1, $2, $3)",
from_uid,
to_uid,
amount,
@@ -80,12 +80,12 @@ class WorkerStats(object):
assert all(cnt > 0 for cnt in self.counters)
progress = sum(self.counters)
log.info('All workers made {} transactions'.format(progress))
log.info("All workers made {} transactions".format(progress))
async def run_random_worker(stats: WorkerStats, pg: Postgres, worker_id, n_accounts, max_transfer):
pg_conn = await pg.connect_async()
log.debug('Started worker {}'.format(worker_id))
log.debug("Started worker {}".format(worker_id))
while stats.running:
from_uid = random.randint(0, n_accounts - 1)
@@ -95,19 +95,21 @@ async def run_random_worker(stats: WorkerStats, pg: Postgres, worker_id, n_accou
await bank_transfer(pg_conn, from_uid, to_uid, amount)
stats.inc_progress(worker_id)
log.debug('Executed transfer({}) {} => {}'.format(amount, from_uid, to_uid))
log.debug("Executed transfer({}) {} => {}".format(amount, from_uid, to_uid))
log.debug('Finished worker {}'.format(worker_id))
log.debug("Finished worker {}".format(worker_id))
await pg_conn.close()
async def wait_for_lsn(safekeeper: Safekeeper,
tenant_id: str,
timeline_id: str,
wait_lsn: str,
polling_interval=1,
timeout=60):
async def wait_for_lsn(
safekeeper: Safekeeper,
tenant_id: str,
timeline_id: str,
wait_lsn: str,
polling_interval=1,
timeout=60,
):
"""
Poll flush_lsn from safekeeper until it's greater or equal than
provided wait_lsn. To do that, timeline_status is fetched from
@@ -119,7 +121,7 @@ async def wait_for_lsn(safekeeper: Safekeeper,
flush_lsn = client.timeline_status(tenant_id, timeline_id).flush_lsn
log.info(
f'Safekeeper at port {safekeeper.port.pg} has flush_lsn {flush_lsn}, waiting for lsn {wait_lsn}'
f"Safekeeper at port {safekeeper.port.pg} has flush_lsn {flush_lsn}, waiting for lsn {wait_lsn}"
)
while lsn_from_hex(wait_lsn) > lsn_from_hex(flush_lsn):
@@ -131,22 +133,24 @@ async def wait_for_lsn(safekeeper: Safekeeper,
await asyncio.sleep(polling_interval)
flush_lsn = client.timeline_status(tenant_id, timeline_id).flush_lsn
log.debug(f'safekeeper port={safekeeper.port.pg} flush_lsn={flush_lsn} wait_lsn={wait_lsn}')
log.debug(f"safekeeper port={safekeeper.port.pg} flush_lsn={flush_lsn} wait_lsn={wait_lsn}")
# This test will run several iterations and check progress in each of them.
# On each iteration 1 acceptor is stopped, and 2 others should allow
# background workers execute transactions. In the end, state should remain
# consistent.
async def run_restarts_under_load(env: NeonEnv,
pg: Postgres,
acceptors: List[Safekeeper],
n_workers=10,
n_accounts=100,
init_amount=100000,
max_transfer=100,
period_time=4,
iterations=10):
async def run_restarts_under_load(
env: NeonEnv,
pg: Postgres,
acceptors: List[Safekeeper],
n_workers=10,
n_accounts=100,
init_amount=100000,
max_transfer=100,
period_time=4,
iterations=10,
):
# Set timeout for this test at 5 minutes. It should be enough for test to complete,
# taking into account that this timeout is checked only at the beginning of every iteration.
test_timeout_at = time.monotonic() + 5 * 60
@@ -166,20 +170,21 @@ async def run_restarts_under_load(env: NeonEnv,
workers.append(asyncio.create_task(worker))
for it in range(iterations):
assert time.monotonic() < test_timeout_at, 'test timed out'
assert time.monotonic() < test_timeout_at, "test timed out"
victim_idx = it % len(acceptors)
victim = acceptors[victim_idx]
victim.stop()
flush_lsn = await pg_conn.fetchval('SELECT pg_current_wal_flush_lsn()')
flush_lsn = await pg_conn.fetchval("SELECT pg_current_wal_flush_lsn()")
flush_lsn = lsn_to_hex(flush_lsn)
log.info(f'Postgres flush_lsn {flush_lsn}')
log.info(f"Postgres flush_lsn {flush_lsn}")
pageserver_lsn = env.pageserver.http_client().timeline_detail(
uuid.UUID(tenant_id), uuid.UUID((timeline_id)))["local"]["last_record_lsn"]
uuid.UUID(tenant_id), uuid.UUID((timeline_id))
)["local"]["last_record_lsn"]
sk_ps_lag = lsn_from_hex(flush_lsn) - lsn_from_hex(pageserver_lsn)
log.info(f'Pageserver last_record_lsn={pageserver_lsn} lag={sk_ps_lag / 1024}kb')
log.info(f"Pageserver last_record_lsn={pageserver_lsn} lag={sk_ps_lag / 1024}kb")
# Wait until alive safekeepers catch up with postgres
for idx, safekeeper in enumerate(acceptors):
@@ -193,7 +198,7 @@ async def run_restarts_under_load(env: NeonEnv,
victim.start()
log.info('Iterations are finished, exiting coroutines...')
log.info("Iterations are finished, exiting coroutines...")
stats.running = False
# await all workers
await asyncio.gather(*workers)
@@ -207,10 +212,11 @@ def test_restarts_under_load(neon_env_builder: NeonEnvBuilder):
neon_env_builder.num_safekeepers = 3
env = neon_env_builder.init_start()
env.neon_cli.create_branch('test_safekeepers_restarts_under_load')
env.neon_cli.create_branch("test_safekeepers_restarts_under_load")
# Enable backpressure with 1MB maximal lag, because we don't want to block on `wait_for_lsn()` for too long
pg = env.postgres.create_start('test_safekeepers_restarts_under_load',
config_lines=['max_replication_write_lag=1MB'])
pg = env.postgres.create_start(
"test_safekeepers_restarts_under_load", config_lines=["max_replication_write_lag=1MB"]
)
asyncio.run(run_restarts_under_load(env, pg, env.safekeepers))
@@ -222,15 +228,17 @@ def test_restarts_frequent_checkpoints(neon_env_builder: NeonEnvBuilder):
neon_env_builder.num_safekeepers = 3
env = neon_env_builder.init_start()
env.neon_cli.create_branch('test_restarts_frequent_checkpoints')
env.neon_cli.create_branch("test_restarts_frequent_checkpoints")
# Enable backpressure with 1MB maximal lag, because we don't want to block on `wait_for_lsn()` for too long
pg = env.postgres.create_start('test_restarts_frequent_checkpoints',
config_lines=[
'max_replication_write_lag=1MB',
'min_wal_size=32MB',
'max_wal_size=32MB',
'log_checkpoints=on'
])
pg = env.postgres.create_start(
"test_restarts_frequent_checkpoints",
config_lines=[
"max_replication_write_lag=1MB",
"min_wal_size=32MB",
"max_wal_size=32MB",
"log_checkpoints=on",
],
)
# we try to simulate large (flush_lsn - truncate_lsn) lag, to test that WAL segments
# are not removed before broadcasted to all safekeepers, with the help of replication slot
@@ -244,51 +252,51 @@ def postgres_create_start(env: NeonEnv, branch: str, pgdir_name: Optional[str]):
port=env.port_distributor.get_port(),
# In these tests compute has high probability of terminating on its own
# before our stop() due to lost consensus leadership.
check_stop_result=False)
check_stop_result=False,
)
# embed current time in node name
node_name = pgdir_name or f'pg_node_{time.time()}'
return pg.create_start(branch_name=branch,
node_name=node_name,
config_lines=['log_statement=all'])
node_name = pgdir_name or f"pg_node_{time.time()}"
return pg.create_start(
branch_name=branch, node_name=node_name, config_lines=["log_statement=all"]
)
async def exec_compute_query(env: NeonEnv,
branch: str,
query: str,
pgdir_name: Optional[str] = None):
async def exec_compute_query(
env: NeonEnv, branch: str, query: str, pgdir_name: Optional[str] = None
):
with postgres_create_start(env, branch=branch, pgdir_name=pgdir_name) as pg:
before_conn = time.time()
conn = await pg.connect_async()
res = await conn.fetch(query)
await conn.close()
after_conn = time.time()
log.info(f'{query} took {after_conn - before_conn}s')
log.info(f"{query} took {after_conn - before_conn}s")
return res
async def run_compute_restarts(env: NeonEnv,
queries=16,
batch_insert=10000,
branch='test_compute_restarts'):
async def run_compute_restarts(
env: NeonEnv, queries=16, batch_insert=10000, branch="test_compute_restarts"
):
cnt = 0
sum = 0
await exec_compute_query(env, branch, 'CREATE TABLE t (i int)')
await exec_compute_query(env, branch, "CREATE TABLE t (i int)")
for i in range(queries):
if i % 4 == 0:
await exec_compute_query(
env, branch, f'INSERT INTO t SELECT 1 FROM generate_series(1, {batch_insert})')
env, branch, f"INSERT INTO t SELECT 1 FROM generate_series(1, {batch_insert})"
)
sum += batch_insert
cnt += batch_insert
elif (i % 4 == 1) or (i % 4 == 3):
# Note that select causes lots of FPI's and increases probability of safekeepers
# standing at different LSNs after compute termination.
actual_sum = (await exec_compute_query(env, branch, 'SELECT SUM(i) FROM t'))[0][0]
assert actual_sum == sum, f'Expected sum={sum}, actual={actual_sum}'
actual_sum = (await exec_compute_query(env, branch, "SELECT SUM(i) FROM t"))[0][0]
assert actual_sum == sum, f"Expected sum={sum}, actual={actual_sum}"
elif i % 4 == 2:
await exec_compute_query(env, branch, 'UPDATE t SET i = i + 1')
await exec_compute_query(env, branch, "UPDATE t SET i = i + 1")
sum += cnt
@@ -297,7 +305,7 @@ def test_compute_restarts(neon_env_builder: NeonEnvBuilder):
neon_env_builder.num_safekeepers = 3
env = neon_env_builder.init_start()
env.neon_cli.create_branch('test_compute_restarts')
env.neon_cli.create_branch("test_compute_restarts")
asyncio.run(run_compute_restarts(env))
@@ -315,7 +323,7 @@ class BackgroundCompute(object):
async def run(self):
if self.running:
raise Exception('BackgroundCompute is already running')
raise Exception("BackgroundCompute is already running")
self.running = True
i = 0
@@ -327,17 +335,17 @@ class BackgroundCompute(object):
res = await exec_compute_query(
self.env,
self.branch,
f'INSERT INTO query_log(index, verify_key) VALUES ({self.index}, {verify_key}) RETURNING verify_key',
pgdir_name=f'bgcompute{self.index}_key{verify_key}',
f"INSERT INTO query_log(index, verify_key) VALUES ({self.index}, {verify_key}) RETURNING verify_key",
pgdir_name=f"bgcompute{self.index}_key{verify_key}",
)
log.info(f'result: {res}')
log.info(f"result: {res}")
if len(res) != 1:
raise Exception('No result returned')
raise Exception("No result returned")
if res[0][0] != verify_key:
raise Exception('Wrong result returned')
raise Exception("Wrong result returned")
self.successful_queries.append(verify_key)
except Exception as e:
log.info(f'BackgroundCompute {self.index} query failed: {e}')
log.info(f"BackgroundCompute {self.index} query failed: {e}")
# With less sleep, there is a very big chance of not committing
# anything or only 1 xact during test run.
@@ -345,14 +353,12 @@ class BackgroundCompute(object):
self.running = False
async def run_concurrent_computes(env: NeonEnv,
num_computes=10,
run_seconds=20,
branch='test_concurrent_computes'):
async def run_concurrent_computes(
env: NeonEnv, num_computes=10, run_seconds=20, branch="test_concurrent_computes"
):
await exec_compute_query(
env,
branch,
'CREATE TABLE query_log (t timestamp default now(), index int, verify_key int)')
env, branch, "CREATE TABLE query_log (t timestamp default now(), index int, verify_key int)"
)
computes = [BackgroundCompute(i, env, branch) for i in range(num_computes)]
background_tasks = [asyncio.create_task(compute.run()) for compute in computes]
@@ -367,13 +373,17 @@ async def run_concurrent_computes(env: NeonEnv,
# work for some time with only one compute -- it should be able to make some xacts
TIMEOUT_SECONDS = computes[0].MAX_QUERY_GAP_SECONDS + 3
initial_queries_by_0 = len(computes[0].successful_queries)
log.info(f'Waiting for another query by computes[0], '
f'it already had {initial_queries_by_0}, timeout is {TIMEOUT_SECONDS}s')
log.info(
f"Waiting for another query by computes[0], "
f"it already had {initial_queries_by_0}, timeout is {TIMEOUT_SECONDS}s"
)
for _ in range(10 * TIMEOUT_SECONDS):
current_queries_by_0 = len(computes[0].successful_queries) - initial_queries_by_0
if current_queries_by_0 >= 1:
log.info(f'Found {current_queries_by_0} successful queries '
f'by computes[0], completing the test')
log.info(
f"Found {current_queries_by_0} successful queries "
f"by computes[0], completing the test"
)
break
await asyncio.sleep(0.1)
else:
@@ -382,12 +392,14 @@ async def run_concurrent_computes(env: NeonEnv,
await asyncio.gather(background_tasks[0])
result = await exec_compute_query(env, branch, 'SELECT * FROM query_log')
result = await exec_compute_query(env, branch, "SELECT * FROM query_log")
# we should have inserted something while single compute was running
log.info(f'Executed {len(result)} queries, {current_queries_by_0} of them '
f'by computes[0] after we started stopping the others')
log.info(
f"Executed {len(result)} queries, {current_queries_by_0} of them "
f"by computes[0] after we started stopping the others"
)
for row in result:
log.info(f'{row[0]} {row[1]} {row[2]}')
log.info(f"{row[0]} {row[1]} {row[2]}")
# ensure everything reported as committed wasn't lost
for compute in computes:
@@ -402,16 +414,15 @@ def test_concurrent_computes(neon_env_builder: NeonEnvBuilder):
neon_env_builder.num_safekeepers = 3
env = neon_env_builder.init_start()
env.neon_cli.create_branch('test_concurrent_computes')
env.neon_cli.create_branch("test_concurrent_computes")
asyncio.run(run_concurrent_computes(env))
# Stop safekeeper and check that query cannot be executed while safekeeper is down.
# Query will insert a single row into a table.
async def check_unavailability(sk: Safekeeper,
conn: asyncpg.Connection,
key: int,
start_delay_sec: int = 2):
async def check_unavailability(
sk: Safekeeper, conn: asyncpg.Connection, key: int, start_delay_sec: int = 2
):
# shutdown one of two acceptors, that is, majority
sk.stop()
@@ -431,7 +442,7 @@ async def run_unavailability(env: NeonEnv, pg: Postgres):
conn = await pg.connect_async()
# check basic work with table
await conn.execute('CREATE TABLE t(key int primary key, value text)')
await conn.execute("CREATE TABLE t(key int primary key, value text)")
await conn.execute("INSERT INTO t values (1, 'payload')")
# stop safekeeper and check that query cannot be executed while safekeeper is down
@@ -443,7 +454,7 @@ async def run_unavailability(env: NeonEnv, pg: Postgres):
# check that we can execute queries after restart
await conn.execute("INSERT INTO t values (4, 'payload')")
result_sum = await conn.fetchval('SELECT sum(key) FROM t')
result_sum = await conn.fetchval("SELECT sum(key) FROM t")
assert result_sum == 10
@@ -452,8 +463,8 @@ def test_unavailability(neon_env_builder: NeonEnvBuilder):
neon_env_builder.num_safekeepers = 2
env = neon_env_builder.init_start()
env.neon_cli.create_branch('test_safekeepers_unavailability')
pg = env.postgres.create_start('test_safekeepers_unavailability')
env.neon_cli.create_branch("test_safekeepers_unavailability")
pg = env.postgres.create_start("test_safekeepers_unavailability")
asyncio.run(run_unavailability(env, pg))
@@ -473,20 +484,20 @@ async def xmas_garland(safekeepers: List[Safekeeper], data: RaceConditionTest):
if random.random() >= 0.5:
victims.append(sk)
log.info(
f'Iteration {data.iteration}: stopping {list(map(lambda sk: sk.id, victims))} safekeepers'
f"Iteration {data.iteration}: stopping {list(map(lambda sk: sk.id, victims))} safekeepers"
)
for v in victims:
v.stop()
await asyncio.sleep(1)
for v in victims:
v.start()
log.info(f'Iteration {data.iteration} finished')
log.info(f"Iteration {data.iteration} finished")
await asyncio.sleep(1)
async def run_race_conditions(env: NeonEnv, pg: Postgres):
conn = await pg.connect_async()
await conn.execute('CREATE TABLE t(key int primary key, value text)')
await conn.execute("CREATE TABLE t(key int primary key, value text)")
data = RaceConditionTest(0, False)
bg_xmas = asyncio.create_task(xmas_garland(env.safekeepers, data))
@@ -501,9 +512,9 @@ async def run_race_conditions(env: NeonEnv, pg: Postgres):
expected_sum += i
i += 1
log.info(f'Executed {i-1} queries')
log.info(f"Executed {i-1} queries")
res = await conn.fetchval('SELECT sum(key) FROM t')
res = await conn.fetchval("SELECT sum(key) FROM t")
assert res == expected_sum
data.is_stopped = True
@@ -516,8 +527,8 @@ def test_race_conditions(neon_env_builder: NeonEnvBuilder):
neon_env_builder.num_safekeepers = 3
env = neon_env_builder.init_start()
env.neon_cli.create_branch('test_safekeepers_race_conditions')
pg = env.postgres.create_start('test_safekeepers_race_conditions')
env.neon_cli.create_branch("test_safekeepers_race_conditions")
pg = env.postgres.create_start("test_safekeepers_race_conditions")
asyncio.run(run_race_conditions(env, pg))
@@ -527,13 +538,15 @@ def test_race_conditions(neon_env_builder: NeonEnvBuilder):
async def run_wal_lagging(env: NeonEnv, pg: Postgres):
def safekeepers_guc(env: NeonEnv, active_sk: List[bool]) -> str:
# use ports 10, 11 and 12 to simulate unavailable safekeepers
return ','.join([
f'localhost:{sk.port.pg if active else 10 + i}'
for i, (sk, active) in enumerate(zip(env.safekeepers, active_sk))
])
return ",".join(
[
f"localhost:{sk.port.pg if active else 10 + i}"
for i, (sk, active) in enumerate(zip(env.safekeepers, active_sk))
]
)
conn = await pg.connect_async()
await conn.execute('CREATE TABLE t(key int primary key, value text)')
await conn.execute("CREATE TABLE t(key int primary key, value text)")
await conn.close()
pg.stop()
@@ -552,7 +565,7 @@ async def run_wal_lagging(env: NeonEnv, pg: Postgres):
continue
pg.adjust_for_safekeepers(safekeepers_guc(env, active_sk))
log.info(f'Iteration {it}: {active_sk}')
log.info(f"Iteration {it}: {active_sk}")
pg.start()
conn = await pg.connect_async()
@@ -569,9 +582,9 @@ async def run_wal_lagging(env: NeonEnv, pg: Postgres):
pg.start()
conn = await pg.connect_async()
log.info(f'Executed {i-1} queries')
log.info(f"Executed {i-1} queries")
res = await conn.fetchval('SELECT sum(key) FROM t')
res = await conn.fetchval("SELECT sum(key) FROM t")
assert res == expected_sum
@@ -581,7 +594,7 @@ def test_wal_lagging(neon_env_builder: NeonEnvBuilder):
neon_env_builder.num_safekeepers = 3
env = neon_env_builder.init_start()
env.neon_cli.create_branch('test_wal_lagging')
pg = env.postgres.create_start('test_wal_lagging')
env.neon_cli.create_branch("test_wal_lagging")
pg = env.postgres.create_start("test_wal_lagging")
asyncio.run(run_wal_lagging(env, pg))

View File

@@ -1,33 +1,39 @@
import os
from pathlib import Path
from fixtures.neon_fixtures import (NeonEnvBuilder,
VanillaPostgres,
PortDistributor,
PgBin,
base_dir,
pg_distrib_dir)
from fixtures.neon_fixtures import (
NeonEnvBuilder,
PgBin,
PortDistributor,
VanillaPostgres,
base_dir,
pg_distrib_dir,
)
def test_wal_restore(neon_env_builder: NeonEnvBuilder,
pg_bin: PgBin,
test_output_dir: Path,
port_distributor: PortDistributor):
def test_wal_restore(
neon_env_builder: NeonEnvBuilder,
pg_bin: PgBin,
test_output_dir: Path,
port_distributor: PortDistributor,
):
env = neon_env_builder.init_start()
env.neon_cli.create_branch("test_wal_restore")
pg = env.postgres.create_start('test_wal_restore')
pg = env.postgres.create_start("test_wal_restore")
pg.safe_psql("create table t as select generate_series(1,300000)")
tenant_id = pg.safe_psql("show neon.tenant_id")[0][0]
env.neon_cli.pageserver_stop()
port = port_distributor.get_port()
data_dir = test_output_dir / 'pgsql.restored'
data_dir = test_output_dir / "pgsql.restored"
with VanillaPostgres(data_dir, PgBin(test_output_dir), port) as restored:
pg_bin.run_capture([
os.path.join(base_dir, 'libs/utils/scripts/restore_from_wal.sh'),
os.path.join(pg_distrib_dir, 'bin'),
str(test_output_dir / 'repo' / 'safekeepers' / 'sk1' / str(tenant_id) / '*'),
str(data_dir),
str(port)
])
pg_bin.run_capture(
[
os.path.join(base_dir, "libs/utils/scripts/restore_from_wal.sh"),
os.path.join(pg_distrib_dir, "bin"),
str(test_output_dir / "repo" / "safekeepers" / "sk1" / str(tenant_id) / "*"),
str(data_dir),
str(port),
]
)
restored.start()
assert restored.safe_psql('select count(*) from t', user='cloud_admin') == [(300000, )]
assert restored.safe_psql("select count(*) from t", user="cloud_admin") == [(300000,)]

View File

@@ -1,5 +1,6 @@
import os
from pathlib import Path
import pytest
from fixtures.neon_fixtures import NeonEnv, base_dir, pg_distrib_dir
@@ -13,33 +14,33 @@ def test_isolation(neon_simple_env: NeonEnv, test_output_dir: Path, pg_bin, caps
env.neon_cli.create_branch("test_isolation", "empty")
# Connect to postgres and create a database called "regression".
# isolation tests use prepared transactions, so enable them
pg = env.postgres.create_start('test_isolation', config_lines=['max_prepared_transactions=100'])
pg.safe_psql('CREATE DATABASE isolation_regression')
pg = env.postgres.create_start("test_isolation", config_lines=["max_prepared_transactions=100"])
pg.safe_psql("CREATE DATABASE isolation_regression")
# Create some local directories for pg_isolation_regress to run in.
runpath = test_output_dir / 'regress'
(runpath / 'testtablespace').mkdir(parents=True)
runpath = test_output_dir / "regress"
(runpath / "testtablespace").mkdir(parents=True)
# Compute all the file locations that pg_isolation_regress will need.
build_path = os.path.join(pg_distrib_dir, 'build/src/test/isolation')
src_path = os.path.join(base_dir, 'vendor/postgres/src/test/isolation')
bindir = os.path.join(pg_distrib_dir, 'bin')
schedule = os.path.join(src_path, 'isolation_schedule')
pg_isolation_regress = os.path.join(build_path, 'pg_isolation_regress')
build_path = os.path.join(pg_distrib_dir, "build/src/test/isolation")
src_path = os.path.join(base_dir, "vendor/postgres/src/test/isolation")
bindir = os.path.join(pg_distrib_dir, "bin")
schedule = os.path.join(src_path, "isolation_schedule")
pg_isolation_regress = os.path.join(build_path, "pg_isolation_regress")
pg_isolation_regress_command = [
pg_isolation_regress,
'--use-existing',
'--bindir={}'.format(bindir),
'--dlpath={}'.format(build_path),
'--inputdir={}'.format(src_path),
'--schedule={}'.format(schedule),
"--use-existing",
"--bindir={}".format(bindir),
"--dlpath={}".format(build_path),
"--inputdir={}".format(src_path),
"--schedule={}".format(schedule),
]
env_vars = {
'PGPORT': str(pg.default_options['port']),
'PGUSER': pg.default_options['user'],
'PGHOST': pg.default_options['host'],
"PGPORT": str(pg.default_options["port"]),
"PGUSER": pg.default_options["user"],
"PGHOST": pg.default_options["host"],
}
# Run the command.

View File

@@ -1,11 +1,8 @@
import os
from pathlib import Path
from fixtures.neon_fixtures import (NeonEnv,
check_restored_datadir_content,
base_dir,
pg_distrib_dir)
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnv, base_dir, check_restored_datadir_content, pg_distrib_dir
def test_neon_regress(neon_simple_env: NeonEnv, test_output_dir: Path, pg_bin, capsys):
@@ -13,35 +10,35 @@ def test_neon_regress(neon_simple_env: NeonEnv, test_output_dir: Path, pg_bin, c
env.neon_cli.create_branch("test_neon_regress", "empty")
# Connect to postgres and create a database called "regression".
pg = env.postgres.create_start('test_neon_regress')
pg.safe_psql('CREATE DATABASE regression')
pg = env.postgres.create_start("test_neon_regress")
pg.safe_psql("CREATE DATABASE regression")
# Create some local directories for pg_regress to run in.
runpath = test_output_dir / 'regress'
(runpath / 'testtablespace').mkdir(parents=True)
runpath = test_output_dir / "regress"
(runpath / "testtablespace").mkdir(parents=True)
# Compute all the file locations that pg_regress will need.
# This test runs neon specific tests
build_path = os.path.join(pg_distrib_dir, 'build/src/test/regress')
src_path = os.path.join(base_dir, 'test_runner/neon_regress')
bindir = os.path.join(pg_distrib_dir, 'bin')
schedule = os.path.join(src_path, 'parallel_schedule')
pg_regress = os.path.join(build_path, 'pg_regress')
build_path = os.path.join(pg_distrib_dir, "build/src/test/regress")
src_path = os.path.join(base_dir, "test_runner/neon_regress")
bindir = os.path.join(pg_distrib_dir, "bin")
schedule = os.path.join(src_path, "parallel_schedule")
pg_regress = os.path.join(build_path, "pg_regress")
pg_regress_command = [
pg_regress,
'--use-existing',
'--bindir={}'.format(bindir),
'--dlpath={}'.format(build_path),
'--schedule={}'.format(schedule),
'--inputdir={}'.format(src_path),
"--use-existing",
"--bindir={}".format(bindir),
"--dlpath={}".format(build_path),
"--schedule={}".format(schedule),
"--inputdir={}".format(src_path),
]
log.info(pg_regress_command)
env_vars = {
'PGPORT': str(pg.default_options['port']),
'PGUSER': pg.default_options['user'],
'PGHOST': pg.default_options['host'],
"PGPORT": str(pg.default_options["port"]),
"PGUSER": pg.default_options["user"],
"PGHOST": pg.default_options["host"],
}
# Run the command.
@@ -51,8 +48,8 @@ def test_neon_regress(neon_simple_env: NeonEnv, test_output_dir: Path, pg_bin, c
pg_bin.run(pg_regress_command, env=env_vars, cwd=runpath)
# checkpoint one more time to ensure that the lsn we get is the latest one
pg.safe_psql('CHECKPOINT')
lsn = pg.safe_psql('select pg_current_wal_insert_lsn()')[0][0]
pg.safe_psql("CHECKPOINT")
lsn = pg.safe_psql("select pg_current_wal_insert_lsn()")[0][0]
# Check that we restore the content of the datadir correctly
check_restored_datadir_content(test_output_dir, env, pg)

View File

@@ -1,7 +1,8 @@
import os
import pathlib
import pytest
from fixtures.neon_fixtures import NeonEnv, check_restored_datadir_content, base_dir, pg_distrib_dir
from fixtures.neon_fixtures import NeonEnv, base_dir, check_restored_datadir_content, pg_distrib_dir
# The pg_regress tests run for a long time, especially in debug mode,
@@ -12,34 +13,34 @@ def test_pg_regress(neon_simple_env: NeonEnv, test_output_dir: pathlib.Path, pg_
env.neon_cli.create_branch("test_pg_regress", "empty")
# Connect to postgres and create a database called "regression".
pg = env.postgres.create_start('test_pg_regress')
pg.safe_psql('CREATE DATABASE regression')
pg = env.postgres.create_start("test_pg_regress")
pg.safe_psql("CREATE DATABASE regression")
# Create some local directories for pg_regress to run in.
runpath = test_output_dir / 'regress'
(runpath / 'testtablespace').mkdir(parents=True)
runpath = test_output_dir / "regress"
(runpath / "testtablespace").mkdir(parents=True)
# Compute all the file locations that pg_regress will need.
build_path = os.path.join(pg_distrib_dir, 'build/src/test/regress')
src_path = os.path.join(base_dir, 'vendor/postgres/src/test/regress')
bindir = os.path.join(pg_distrib_dir, 'bin')
schedule = os.path.join(src_path, 'parallel_schedule')
pg_regress = os.path.join(build_path, 'pg_regress')
build_path = os.path.join(pg_distrib_dir, "build/src/test/regress")
src_path = os.path.join(base_dir, "vendor/postgres/src/test/regress")
bindir = os.path.join(pg_distrib_dir, "bin")
schedule = os.path.join(src_path, "parallel_schedule")
pg_regress = os.path.join(build_path, "pg_regress")
pg_regress_command = [
pg_regress,
'--bindir=""',
'--use-existing',
'--bindir={}'.format(bindir),
'--dlpath={}'.format(build_path),
'--schedule={}'.format(schedule),
'--inputdir={}'.format(src_path),
"--use-existing",
"--bindir={}".format(bindir),
"--dlpath={}".format(build_path),
"--schedule={}".format(schedule),
"--inputdir={}".format(src_path),
]
env_vars = {
'PGPORT': str(pg.default_options['port']),
'PGUSER': pg.default_options['user'],
'PGHOST': pg.default_options['host'],
"PGPORT": str(pg.default_options["port"]),
"PGUSER": pg.default_options["user"],
"PGHOST": pg.default_options["host"],
}
# Run the command.
@@ -49,7 +50,7 @@ def test_pg_regress(neon_simple_env: NeonEnv, test_output_dir: pathlib.Path, pg_
pg_bin.run(pg_regress_command, env=env_vars, cwd=runpath)
# checkpoint one more time to ensure that the lsn we get is the latest one
pg.safe_psql('CHECKPOINT')
pg.safe_psql("CHECKPOINT")
# Check that we restore the content of the datadir correctly
check_restored_datadir_content(test_output_dir, env, pg)

View File

@@ -1,5 +1,7 @@
pytest_plugins = ("fixtures.neon_fixtures",
"fixtures.benchmark_fixture",
"fixtures.pg_stats",
"fixtures.compare_fixtures",
"fixtures.slow")
pytest_plugins = (
"fixtures.neon_fixtures",
"fixtures.benchmark_fixture",
"fixtures.pg_stats",
"fixtures.compare_fixtures",
"fixtures.slow",
)

View File

@@ -10,12 +10,14 @@ import warnings
from contextlib import contextmanager
from datetime import datetime
from pathlib import Path
# Type-related stuff
from typing import Iterator, Optional
import pytest
from _pytest.config import Config
from _pytest.terminal import TerminalReporter
"""
This file contains fixtures for micro-benchmarks.
@@ -112,8 +114,10 @@ class PgBenchRunResult:
# pgbench v14:
# initial connection time = 3.858 ms
# tps = 309.281539 (without initial connection time)
if (line.startswith("tps = ") and ("(excluding connections establishing)" in line
or "(without initial connection time)")):
if line.startswith("tps = ") and (
"(excluding connections establishing)" in line
or "(without initial connection time)"
):
tps = float(line.split()[2])
return cls(
@@ -154,17 +158,21 @@ class PgBenchInitResult:
last_line = stderr.splitlines()[-1]
regex = re.compile(r"done in (\d+\.\d+) s "
r"\("
r"(?:drop tables (\d+\.\d+) s)?(?:, )?"
r"(?:create tables (\d+\.\d+) s)?(?:, )?"
r"(?:client-side generate (\d+\.\d+) s)?(?:, )?"
r"(?:vacuum (\d+\.\d+) s)?(?:, )?"
r"(?:primary keys (\d+\.\d+) s)?(?:, )?"
r"\)\.")
regex = re.compile(
r"done in (\d+\.\d+) s "
r"\("
r"(?:drop tables (\d+\.\d+) s)?(?:, )?"
r"(?:create tables (\d+\.\d+) s)?(?:, )?"
r"(?:client-side generate (\d+\.\d+) s)?(?:, )?"
r"(?:vacuum (\d+\.\d+) s)?(?:, )?"
r"(?:primary keys (\d+\.\d+) s)?(?:, )?"
r"\)\."
)
if (m := regex.match(last_line)) is not None:
total, drop_tables, create_tables, client_side_generate, vacuum, primary_keys = [float(v) for v in m.groups() if v is not None]
total, drop_tables, create_tables, client_side_generate, vacuum, primary_keys = [
float(v) for v in m.groups() if v is not None
]
else:
raise RuntimeError(f"can't parse pgbench initialize results from `{last_line}`")
@@ -185,11 +193,11 @@ class PgBenchInitResult:
class MetricReport(str, enum.Enum): # str is a hack to make it json serializable
# this means that this is a constant test parameter
# like number of transactions, or number of clients
TEST_PARAM = 'test_param'
TEST_PARAM = "test_param"
# reporter can use it to mark test runs with higher values as improvements
HIGHER_IS_BETTER = 'higher_is_better'
HIGHER_IS_BETTER = "higher_is_better"
# the same but for lower values
LOWER_IS_BETTER = 'lower_is_better'
LOWER_IS_BETTER = "lower_is_better"
class NeonBenchmarker:
@@ -197,6 +205,7 @@ class NeonBenchmarker:
An object for recording benchmark results. This is created for each test
function by the zenbenchmark fixture
"""
def __init__(self, property_recorder):
# property recorder here is a pytest fixture provided by junitxml module
# https://docs.pytest.org/en/6.2.x/reference.html#pytest.junitxml.record_property
@@ -244,43 +253,57 @@ class NeonBenchmarker:
)
def record_pg_bench_result(self, prefix: str, pg_bench_result: PgBenchRunResult):
self.record(f"{prefix}.number_of_clients",
pg_bench_result.number_of_clients,
'',
MetricReport.TEST_PARAM)
self.record(f"{prefix}.number_of_threads",
pg_bench_result.number_of_threads,
'',
MetricReport.TEST_PARAM)
self.record(
f"{prefix}.number_of_clients",
pg_bench_result.number_of_clients,
"",
MetricReport.TEST_PARAM,
)
self.record(
f"{prefix}.number_of_threads",
pg_bench_result.number_of_threads,
"",
MetricReport.TEST_PARAM,
)
self.record(
f"{prefix}.number_of_transactions_actually_processed",
pg_bench_result.number_of_transactions_actually_processed,
'',
"",
# that's because this is predefined by test matrix and doesn't change across runs
report=MetricReport.TEST_PARAM,
)
self.record(f"{prefix}.latency_average",
pg_bench_result.latency_average,
unit="ms",
report=MetricReport.LOWER_IS_BETTER)
self.record(
f"{prefix}.latency_average",
pg_bench_result.latency_average,
unit="ms",
report=MetricReport.LOWER_IS_BETTER,
)
if pg_bench_result.latency_stddev is not None:
self.record(f"{prefix}.latency_stddev",
pg_bench_result.latency_stddev,
unit="ms",
report=MetricReport.LOWER_IS_BETTER)
self.record(f"{prefix}.tps", pg_bench_result.tps, '', report=MetricReport.HIGHER_IS_BETTER)
self.record(f"{prefix}.run_duration",
pg_bench_result.run_duration,
unit="s",
report=MetricReport.LOWER_IS_BETTER)
self.record(f"{prefix}.run_start_timestamp",
pg_bench_result.run_start_timestamp,
'',
MetricReport.TEST_PARAM)
self.record(f"{prefix}.run_end_timestamp",
pg_bench_result.run_end_timestamp,
'',
MetricReport.TEST_PARAM)
self.record(
f"{prefix}.latency_stddev",
pg_bench_result.latency_stddev,
unit="ms",
report=MetricReport.LOWER_IS_BETTER,
)
self.record(f"{prefix}.tps", pg_bench_result.tps, "", report=MetricReport.HIGHER_IS_BETTER)
self.record(
f"{prefix}.run_duration",
pg_bench_result.run_duration,
unit="s",
report=MetricReport.LOWER_IS_BETTER,
)
self.record(
f"{prefix}.run_start_timestamp",
pg_bench_result.run_start_timestamp,
"",
MetricReport.TEST_PARAM,
)
self.record(
f"{prefix}.run_end_timestamp",
pg_bench_result.run_end_timestamp,
"",
MetricReport.TEST_PARAM,
)
def record_pg_bench_init_result(self, prefix: str, result: PgBenchInitResult):
test_params = [
@@ -288,10 +311,9 @@ class NeonBenchmarker:
"end_timestamp",
]
for test_param in test_params:
self.record(f"{prefix}.{test_param}",
getattr(result, test_param),
'',
MetricReport.TEST_PARAM)
self.record(
f"{prefix}.{test_param}", getattr(result, test_param), "", MetricReport.TEST_PARAM
)
metrics = [
"duration",
@@ -303,10 +325,9 @@ class NeonBenchmarker:
]
for metric in metrics:
if (value := getattr(result, metric)) is not None:
self.record(f"{prefix}.{metric}",
value,
unit="s",
report=MetricReport.LOWER_IS_BETTER)
self.record(
f"{prefix}.{metric}", value, unit="s", report=MetricReport.LOWER_IS_BETTER
)
def get_io_writes(self, pageserver) -> int:
"""
@@ -319,7 +340,7 @@ class NeonBenchmarker:
"""
Fetch the "maxrss" metric from the pageserver
"""
metric_name = r'libmetrics_maxrss_kb'
metric_name = r"libmetrics_maxrss_kb"
return self.get_int_counter_value(pageserver, metric_name)
def get_int_counter_value(self, pageserver, metric_name) -> int:
@@ -332,7 +353,7 @@ class NeonBenchmarker:
# all prometheus metrics are floats. So to be pedantic, read it as a float
# and round to integer.
all_metrics = pageserver.http_client().get_metrics()
matches = re.search(fr'^{metric_name} (\S+)$', all_metrics, re.MULTILINE)
matches = re.search(rf"^{metric_name} (\S+)$", all_metrics, re.MULTILINE)
assert matches
return int(round(float(matches.group(1))))
@@ -358,10 +379,12 @@ class NeonBenchmarker:
yield
after = self.get_io_writes(pageserver)
self.record(metric_name,
round((after - before) / (1024 * 1024)),
"MB",
report=MetricReport.LOWER_IS_BETTER)
self.record(
metric_name,
round((after - before) / (1024 * 1024)),
"MB",
report=MetricReport.LOWER_IS_BETTER,
)
@pytest.fixture(scope="function")
@@ -410,8 +433,9 @@ def pytest_terminal_summary(terminalreporter: TerminalReporter, exitstatus: int,
result_entry = []
for _, recorded_property in test_report.user_properties:
terminalreporter.write("{}.{}: ".format(test_report.head_line,
recorded_property["name"]))
terminalreporter.write(
"{}.{}: ".format(test_report.head_line, recorded_property["name"])
)
unit = recorded_property["unit"]
value = recorded_property["value"]
if unit == "MB":
@@ -426,11 +450,13 @@ def pytest_terminal_summary(terminalreporter: TerminalReporter, exitstatus: int,
result_entry.append(recorded_property)
result.append({
"suit": test_report.nodeid,
"total_duration": test_report.duration,
"data": result_entry,
})
result.append(
{
"suit": test_report.nodeid,
"total_duration": test_report.duration,
"data": result_entry,
}
)
out_dir = config.getoption("out_dir")
if out_dir is None:
@@ -442,6 +468,5 @@ def pytest_terminal_summary(terminalreporter: TerminalReporter, exitstatus: int,
return
get_out_path(Path(out_dir), revision=revision).write_text(
json.dumps({
"revision": revision, "platform": platform, "result": result
}, indent=4))
json.dumps({"revision": revision, "platform": platform, "result": result}, indent=4)
)

View File

@@ -1,14 +1,14 @@
import pytest
from contextlib import contextmanager
from abc import ABC, abstractmethod
from fixtures.pg_stats import PgStatTable
from fixtures.neon_fixtures import PgBin, PgProtocol, VanillaPostgres, RemotePostgres, NeonEnv
from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker
from contextlib import contextmanager
# Type-related stuff
from typing import Dict, List
import pytest
from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker
from fixtures.neon_fixtures import NeonEnv, PgBin, PgProtocol, RemotePostgres, VanillaPostgres
from fixtures.pg_stats import PgStatTable
class PgCompare(ABC):
"""Common interface of all postgres implementations, useful for benchmarks.
@@ -16,6 +16,7 @@ class PgCompare(ABC):
This class is a helper class for the neon_with_baseline fixture. See its documentation
for more details.
"""
@property
@abstractmethod
def pg(self) -> PgProtocol:
@@ -61,7 +62,7 @@ class PgCompare(ABC):
data = self._retrieve_pg_stats(pg_stats)
for k in set(init_data) & set(data):
self.zenbenchmark.record(k, data[k] - init_data[k], '', MetricReport.HIGHER_IS_BETTER)
self.zenbenchmark.record(k, data[k] - init_data[k], "", MetricReport.HIGHER_IS_BETTER)
def _retrieve_pg_stats(self, pg_stats: List[PgStatTable]) -> Dict[str, int]:
results: Dict[str, int] = {}
@@ -81,17 +82,16 @@ class PgCompare(ABC):
class NeonCompare(PgCompare):
"""PgCompare interface for the neon stack."""
def __init__(self,
zenbenchmark: NeonBenchmarker,
neon_simple_env: NeonEnv,
pg_bin: PgBin,
branch_name):
def __init__(
self, zenbenchmark: NeonBenchmarker, neon_simple_env: NeonEnv, pg_bin: PgBin, branch_name
):
self.env = neon_simple_env
self._zenbenchmark = zenbenchmark
self._pg_bin = pg_bin
# We only use one branch and one timeline
self.env.neon_cli.create_branch(branch_name, 'empty')
self.env.neon_cli.create_branch(branch_name, "empty")
self._pg = self.env.postgres.create_start(branch_name)
self.timeline = self.pg.safe_psql("SHOW neon.timeline_id")[0][0]
@@ -118,32 +118,33 @@ class NeonCompare(PgCompare):
self.pscur.execute(f"compact {self.env.initial_tenant.hex} {self.timeline}")
def report_peak_memory_use(self) -> None:
self.zenbenchmark.record("peak_mem",
self.zenbenchmark.get_peak_mem(self.env.pageserver) / 1024,
'MB',
report=MetricReport.LOWER_IS_BETTER)
self.zenbenchmark.record(
"peak_mem",
self.zenbenchmark.get_peak_mem(self.env.pageserver) / 1024,
"MB",
report=MetricReport.LOWER_IS_BETTER,
)
def report_size(self) -> None:
timeline_size = self.zenbenchmark.get_timeline_size(self.env.repo_dir,
self.env.initial_tenant,
self.timeline)
self.zenbenchmark.record('size',
timeline_size / (1024 * 1024),
'MB',
report=MetricReport.LOWER_IS_BETTER)
timeline_size = self.zenbenchmark.get_timeline_size(
self.env.repo_dir, self.env.initial_tenant, self.timeline
)
self.zenbenchmark.record(
"size", timeline_size / (1024 * 1024), "MB", report=MetricReport.LOWER_IS_BETTER
)
total_files = self.zenbenchmark.get_int_counter_value(
self.env.pageserver, "pageserver_created_persistent_files_total")
self.env.pageserver, "pageserver_created_persistent_files_total"
)
total_bytes = self.zenbenchmark.get_int_counter_value(
self.env.pageserver, "pageserver_written_persistent_bytes_total")
self.zenbenchmark.record("data_uploaded",
total_bytes / (1024 * 1024),
"MB",
report=MetricReport.LOWER_IS_BETTER)
self.zenbenchmark.record("num_files_uploaded",
total_files,
"",
report=MetricReport.LOWER_IS_BETTER)
self.env.pageserver, "pageserver_written_persistent_bytes_total"
)
self.zenbenchmark.record(
"data_uploaded", total_bytes / (1024 * 1024), "MB", report=MetricReport.LOWER_IS_BETTER
)
self.zenbenchmark.record(
"num_files_uploaded", total_files, "", report=MetricReport.LOWER_IS_BETTER
)
def record_pageserver_writes(self, out_name):
return self.zenbenchmark.record_pageserver_writes(self.env.pageserver, out_name)
@@ -154,13 +155,16 @@ class NeonCompare(PgCompare):
class VanillaCompare(PgCompare):
"""PgCompare interface for vanilla postgres."""
def __init__(self, zenbenchmark, vanilla_pg: VanillaPostgres):
self._pg = vanilla_pg
self._zenbenchmark = zenbenchmark
vanilla_pg.configure([
'shared_buffers=1MB',
'synchronous_commit=off',
])
vanilla_pg.configure(
[
"shared_buffers=1MB",
"synchronous_commit=off",
]
)
vanilla_pg.start()
# Long-lived cursor, useful for flushing
@@ -186,16 +190,14 @@ class VanillaCompare(PgCompare):
pass # TODO find something
def report_size(self) -> None:
data_size = self.pg.get_subdir_size('base')
self.zenbenchmark.record('data_size',
data_size / (1024 * 1024),
'MB',
report=MetricReport.LOWER_IS_BETTER)
wal_size = self.pg.get_subdir_size('pg_wal')
self.zenbenchmark.record('wal_size',
wal_size / (1024 * 1024),
'MB',
report=MetricReport.LOWER_IS_BETTER)
data_size = self.pg.get_subdir_size("base")
self.zenbenchmark.record(
"data_size", data_size / (1024 * 1024), "MB", report=MetricReport.LOWER_IS_BETTER
)
wal_size = self.pg.get_subdir_size("pg_wal")
self.zenbenchmark.record(
"wal_size", wal_size / (1024 * 1024), "MB", report=MetricReport.LOWER_IS_BETTER
)
@contextmanager
def record_pageserver_writes(self, out_name):
@@ -207,6 +209,7 @@ class VanillaCompare(PgCompare):
class RemoteCompare(PgCompare):
"""PgCompare interface for a remote postgres instance."""
def __init__(self, zenbenchmark, remote_pg: RemotePostgres):
self._pg = remote_pg
self._zenbenchmark = zenbenchmark
@@ -247,18 +250,18 @@ class RemoteCompare(PgCompare):
return self.zenbenchmark.record_duration(out_name)
@pytest.fixture(scope='function')
@pytest.fixture(scope="function")
def neon_compare(request, zenbenchmark, pg_bin, neon_simple_env) -> NeonCompare:
branch_name = request.node.name
return NeonCompare(zenbenchmark, neon_simple_env, pg_bin, branch_name)
@pytest.fixture(scope='function')
@pytest.fixture(scope="function")
def vanilla_compare(zenbenchmark, vanilla_pg) -> VanillaCompare:
return VanillaCompare(zenbenchmark, vanilla_pg)
@pytest.fixture(scope='function')
@pytest.fixture(scope="function")
def remote_compare(zenbenchmark, remote_pg) -> RemoteCompare:
return RemoteCompare(zenbenchmark, remote_pg)

View File

@@ -1,5 +1,6 @@
import logging
import logging.config
"""
This file configures logging to use in python tests.
Logs are automatically captured and shown in their
@@ -22,20 +23,16 @@ https://docs.pytest.org/en/6.2.x/logging.html
LOGGING = {
"version": 1,
"loggers": {
"root": {
"level": "INFO"
},
"root.safekeeper_async": {
"level": "INFO" # a lot of logs on DEBUG level
}
}
"root": {"level": "INFO"},
"root.safekeeper_async": {"level": "INFO"}, # a lot of logs on DEBUG level
},
}
def getLogger(name='root') -> logging.Logger:
def getLogger(name="root") -> logging.Logger:
"""Method to get logger for tests.
Should be used to get correctly initialized logger. """
Should be used to get correctly initialized logger."""
return logging.getLogger(name)

View File

@@ -1,10 +1,10 @@
from dataclasses import dataclass
from prometheus_client.parser import text_string_to_metric_families
from prometheus_client.samples import Sample
from typing import Dict, List
from collections import defaultdict
from dataclasses import dataclass
from typing import Dict, List
from fixtures.log_helper import log
from prometheus_client.parser import text_string_to_metric_families
from prometheus_client.samples import Sample
class Metrics:

File diff suppressed because it is too large Load Diff

View File

@@ -18,35 +18,43 @@ class PgStatTable:
return f"SELECT {','.join(self.columns)} FROM {self.table} {self.additional_query}"
@pytest.fixture(scope='function')
@pytest.fixture(scope="function")
def pg_stats_rw() -> List[PgStatTable]:
return [
PgStatTable("pg_stat_database",
["tup_returned", "tup_fetched", "tup_inserted", "tup_updated", "tup_deleted"],
"WHERE datname='postgres'"),
PgStatTable(
"pg_stat_database",
["tup_returned", "tup_fetched", "tup_inserted", "tup_updated", "tup_deleted"],
"WHERE datname='postgres'",
),
]
@pytest.fixture(scope='function')
@pytest.fixture(scope="function")
def pg_stats_ro() -> List[PgStatTable]:
return [
PgStatTable("pg_stat_database", ["tup_returned", "tup_fetched"],
"WHERE datname='postgres'"),
PgStatTable(
"pg_stat_database", ["tup_returned", "tup_fetched"], "WHERE datname='postgres'"
),
]
@pytest.fixture(scope='function')
@pytest.fixture(scope="function")
def pg_stats_wo() -> List[PgStatTable]:
return [
PgStatTable("pg_stat_database", ["tup_inserted", "tup_updated", "tup_deleted"],
"WHERE datname='postgres'"),
PgStatTable(
"pg_stat_database",
["tup_inserted", "tup_updated", "tup_deleted"],
"WHERE datname='postgres'",
),
]
@pytest.fixture(scope='function')
@pytest.fixture(scope="function")
def pg_stats_wal() -> List[PgStatTable]:
return [
PgStatTable("pg_stat_wal",
["wal_records", "wal_fpi", "wal_bytes", "wal_buffers_full", "wal_write"],
"")
PgStatTable(
"pg_stat_wal",
["wal_records", "wal_fpi", "wal_bytes", "wal_buffers_full", "wal_write"],
"",
)
]

View File

@@ -1,4 +1,5 @@
import pytest
"""
This plugin allows tests to be marked as slow using pytest.mark.slow. By default slow
tests are excluded. They need to be specifically requested with the --runslow flag in

View File

@@ -4,20 +4,19 @@ import pathlib
import shutil
import subprocess
from pathlib import Path
from typing import Any, List, Tuple
from psycopg2.extensions import cursor
from fixtures.log_helper import log
from psycopg2.extensions import cursor
def get_self_dir() -> str:
""" Get the path to the directory where this script lives. """
"""Get the path to the directory where this script lives."""
return os.path.dirname(os.path.abspath(__file__))
def subprocess_capture(capture_dir: str, cmd: List[str], **kwargs: Any) -> str:
""" Run a process and capture its output
"""Run a process and capture its output
Output will go to files named "cmd_NNN.stdout" and "cmd_NNN.stderr"
where "cmd" is the name of the program and NNN is an incrementing
@@ -27,14 +26,14 @@ def subprocess_capture(capture_dir: str, cmd: List[str], **kwargs: Any) -> str:
Returns basepath for files with captured output.
"""
assert type(cmd) is list
base = os.path.basename(cmd[0]) + '_{}'.format(global_counter())
base = os.path.basename(cmd[0]) + "_{}".format(global_counter())
basepath = os.path.join(capture_dir, base)
stdout_filename = basepath + '.stdout'
stderr_filename = basepath + '.stderr'
stdout_filename = basepath + ".stdout"
stderr_filename = basepath + ".stderr"
try:
with open(stdout_filename, 'w') as stdout_f:
with open(stderr_filename, 'w') as stderr_f:
with open(stdout_filename, "w") as stdout_f:
with open(stderr_filename, "w") as stderr_f:
log.info(f'Capturing stdout to "{base}.stdout" and stderr to "{base}.stderr"')
subprocess.run(cmd, **kwargs, stdout=stdout_f, stderr=stderr_f)
finally:
@@ -50,7 +49,7 @@ _global_counter = 0
def global_counter() -> int:
""" A really dumb global counter.
"""A really dumb global counter.
This is useful for giving output files a unique number, so if we run the
same command multiple times we can keep their output separate.
@@ -61,13 +60,13 @@ def global_counter() -> int:
def lsn_to_hex(num: int) -> str:
""" Convert lsn from int to standard hex notation. """
return "{:X}/{:X}".format(num >> 32, num & 0xffffffff)
"""Convert lsn from int to standard hex notation."""
return "{:X}/{:X}".format(num >> 32, num & 0xFFFFFFFF)
def lsn_from_hex(lsn_hex: str) -> int:
""" Convert lsn from hex notation to int. """
l, r = lsn_hex.split('/')
"""Convert lsn from hex notation to int."""
l, r = lsn_hex.split("/")
return (int(l, 16) << 32) + int(r, 16)
@@ -75,14 +74,16 @@ def print_gc_result(row):
log.info("GC duration {elapsed} ms".format_map(row))
log.info(
" total: {layers_total}, needed_by_cutoff {layers_needed_by_cutoff}, needed_by_pitr {layers_needed_by_pitr}"
" needed_by_branches: {layers_needed_by_branches}, not_updated: {layers_not_updated}, removed: {layers_removed}"
.format_map(row))
" needed_by_branches: {layers_needed_by_branches}, not_updated: {layers_not_updated}, removed: {layers_removed}".format_map(
row
)
)
def etcd_path() -> Path:
path_output = shutil.which("etcd")
if path_output is None:
raise RuntimeError('etcd not found in PATH')
raise RuntimeError("etcd not found in PATH")
else:
return Path(path_output)
@@ -145,7 +146,12 @@ def parse_delta_layer(f_name: str) -> Tuple[int, int, int, int]:
parts = f_name.split("__")
key_parts = parts[0].split("-")
lsn_parts = parts[1].split("-")
return int(key_parts[0], 16), int(key_parts[1], 16), int(lsn_parts[0], 16), int(lsn_parts[1], 16)
return (
int(key_parts[0], 16),
int(key_parts[1], 16),
int(lsn_parts[0], 16),
int(lsn_parts[1], 16),
)
def get_scale_for_db(size_mb: int) -> int:

View File

@@ -1,28 +1,26 @@
import random
import time
import statistics
import threading
import time
import timeit
import pytest
from typing import List
import pytest
from fixtures.benchmark_fixture import MetricReport
from fixtures.compare_fixtures import NeonCompare
from fixtures.log_helper import log
def _record_branch_creation_durations(neon_compare: NeonCompare, durs: List[float]):
neon_compare.zenbenchmark.record("branch_creation_duration_max",
max(durs),
's',
MetricReport.LOWER_IS_BETTER)
neon_compare.zenbenchmark.record("branch_creation_duration_avg",
statistics.mean(durs),
's',
MetricReport.LOWER_IS_BETTER)
neon_compare.zenbenchmark.record("branch_creation_duration_stdev",
statistics.stdev(durs),
's',
MetricReport.LOWER_IS_BETTER)
neon_compare.zenbenchmark.record(
"branch_creation_duration_max", max(durs), "s", MetricReport.LOWER_IS_BETTER
)
neon_compare.zenbenchmark.record(
"branch_creation_duration_avg", statistics.mean(durs), "s", MetricReport.LOWER_IS_BETTER
)
neon_compare.zenbenchmark.record(
"branch_creation_duration_stdev", statistics.stdev(durs), "s", MetricReport.LOWER_IS_BETTER
)
@pytest.mark.parametrize("n_branches", [20])
@@ -37,15 +35,16 @@ def test_branch_creation_heavy_write(neon_compare: NeonCompare, n_branches: int)
# Use aggressive GC and checkpoint settings, so GC and compaction happen more often during the test
tenant, _ = env.neon_cli.create_tenant(
conf={
'gc_period': '5 s',
'gc_horizon': f'{4 * 1024 ** 2}',
'checkpoint_distance': f'{2 * 1024 ** 2}',
'compaction_target_size': f'{1024 ** 2}',
'compaction_threshold': '2',
# set PITR interval to be small, so we can do GC
'pitr_interval': '5 s'
})
conf={
"gc_period": "5 s",
"gc_horizon": f"{4 * 1024 ** 2}",
"checkpoint_distance": f"{2 * 1024 ** 2}",
"compaction_target_size": f"{1024 ** 2}",
"compaction_threshold": "2",
# set PITR interval to be small, so we can do GC
"pitr_interval": "5 s",
}
)
def run_pgbench(branch: str):
log.info(f"Start a pgbench workload on branch {branch}")
@@ -53,15 +52,15 @@ def test_branch_creation_heavy_write(neon_compare: NeonCompare, n_branches: int)
pg = env.postgres.create_start(branch, tenant_id=tenant)
connstr = pg.connstr()
pg_bin.run_capture(['pgbench', '-i', connstr])
pg_bin.run_capture(['pgbench', '-c10', '-T10', connstr])
pg_bin.run_capture(["pgbench", "-i", connstr])
pg_bin.run_capture(["pgbench", "-c10", "-T10", connstr])
pg.stop()
env.neon_cli.create_branch('b0', tenant_id=tenant)
env.neon_cli.create_branch("b0", tenant_id=tenant)
threads: List[threading.Thread] = []
threads.append(threading.Thread(target=run_pgbench, args=('b0', ), daemon=True))
threads.append(threading.Thread(target=run_pgbench, args=("b0",), daemon=True))
threads[-1].start()
branch_creation_durations = []
@@ -72,13 +71,13 @@ def test_branch_creation_heavy_write(neon_compare: NeonCompare, n_branches: int)
p = random.randint(0, i)
timer = timeit.default_timer()
env.neon_cli.create_branch('b{}'.format(i + 1), 'b{}'.format(p), tenant_id=tenant)
env.neon_cli.create_branch("b{}".format(i + 1), "b{}".format(p), tenant_id=tenant)
dur = timeit.default_timer() - timer
log.info(f"Creating branch b{i+1} took {dur}s")
branch_creation_durations.append(dur)
threads.append(threading.Thread(target=run_pgbench, args=(f'b{i+1}', ), daemon=True))
threads.append(threading.Thread(target=run_pgbench, args=(f"b{i+1}",), daemon=True))
threads[-1].start()
for thread in threads:
@@ -92,10 +91,10 @@ def test_branch_creation_heavy_write(neon_compare: NeonCompare, n_branches: int)
def test_branch_creation_many(neon_compare: NeonCompare, n_branches: int):
env = neon_compare.env
env.neon_cli.create_branch('b0')
env.neon_cli.create_branch("b0")
pg = env.postgres.create_start('b0')
neon_compare.pg_bin.run_capture(['pgbench', '-i', '-s10', pg.connstr()])
pg = env.postgres.create_start("b0")
neon_compare.pg_bin.run_capture(["pgbench", "-i", "-s10", pg.connstr()])
branch_creation_durations = []
@@ -103,7 +102,7 @@ def test_branch_creation_many(neon_compare: NeonCompare, n_branches: int):
# random a source branch
p = random.randint(0, i)
timer = timeit.default_timer()
env.neon_cli.create_branch('b{}'.format(i + 1), 'b{}'.format(p))
env.neon_cli.create_branch("b{}".format(i + 1), "b{}".format(p))
dur = timeit.default_timer() - timer
branch_creation_durations.append(dur)

View File

@@ -1,8 +1,9 @@
from contextlib import closing
from fixtures.neon_fixtures import NeonEnv
from fixtures.log_helper import log
from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker
from fixtures.compare_fixtures import PgCompare, VanillaCompare, NeonCompare
from fixtures.compare_fixtures import NeonCompare, PgCompare, VanillaCompare
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnv
#
@@ -23,8 +24,8 @@ def test_bulk_insert(neon_with_baseline: PgCompare):
cur.execute("create table huge (i int, j int);")
# Run INSERT, recording the time and I/O it takes
with env.record_pageserver_writes('pageserver_writes'):
with env.record_duration('insert'):
with env.record_pageserver_writes("pageserver_writes"):
with env.record_duration("insert"):
cur.execute("insert into huge values (generate_series(1, 5000000), 0);")
env.flush()

View File

@@ -1,7 +1,7 @@
import timeit
from fixtures.benchmark_fixture import MetricReport
import pytest
import pytest
from fixtures.benchmark_fixture import MetricReport
from fixtures.neon_fixtures import NeonEnvBuilder
# Run bulk tenant creation test.
@@ -12,7 +12,7 @@ from fixtures.neon_fixtures import NeonEnvBuilder
# 2. Average creation time per tenant
@pytest.mark.parametrize('tenants_count', [1, 5, 10])
@pytest.mark.parametrize("tenants_count", [1, 5, 10])
def test_bulk_tenant_create(
neon_env_builder: NeonEnvBuilder,
tenants_count: int,
@@ -27,22 +27,26 @@ def test_bulk_tenant_create(
start = timeit.default_timer()
tenant, _ = env.neon_cli.create_tenant()
env.neon_cli.create_timeline(f'test_bulk_tenant_create_{tenants_count}_{i}',
tenant_id=tenant)
env.neon_cli.create_timeline(
f"test_bulk_tenant_create_{tenants_count}_{i}", tenant_id=tenant
)
# FIXME: We used to start new safekeepers here. Did that make sense? Should we do it now?
#if use_safekeepers == 'with_sa':
# if use_safekeepers == 'with_sa':
# wa_factory.start_n_new(3)
pg_tenant = env.postgres.create_start(f'test_bulk_tenant_create_{tenants_count}_{i}',
tenant_id=tenant)
pg_tenant = env.postgres.create_start(
f"test_bulk_tenant_create_{tenants_count}_{i}", tenant_id=tenant
)
end = timeit.default_timer()
time_slices.append(end - start)
pg_tenant.stop()
zenbenchmark.record('tenant_creation_time',
sum(time_slices) / len(time_slices),
's',
report=MetricReport.LOWER_IS_BETTER)
zenbenchmark.record(
"tenant_creation_time",
sum(time_slices) / len(time_slices),
"s",
report=MetricReport.LOWER_IS_BETTER,
)

View File

@@ -6,7 +6,6 @@ from typing import List
import pytest
from fixtures.compare_fixtures import PgCompare
from fixtures.pg_stats import PgStatTable
from performance.test_perf_pgbench import get_durations_matrix, get_scales_matrix
@@ -18,85 +17,96 @@ def get_seeds_matrix(default: int = 100):
@pytest.mark.parametrize("seed", get_seeds_matrix())
@pytest.mark.parametrize("scale", get_scales_matrix())
@pytest.mark.parametrize("duration", get_durations_matrix(5))
def test_compare_pg_stats_rw_with_pgbench_default(neon_with_baseline: PgCompare,
seed: int,
scale: int,
duration: int,
pg_stats_rw: List[PgStatTable]):
def test_compare_pg_stats_rw_with_pgbench_default(
neon_with_baseline: PgCompare,
seed: int,
scale: int,
duration: int,
pg_stats_rw: List[PgStatTable],
):
env = neon_with_baseline
# initialize pgbench
env.pg_bin.run_capture(['pgbench', f'-s{scale}', '-i', env.pg.connstr()])
env.pg_bin.run_capture(["pgbench", f"-s{scale}", "-i", env.pg.connstr()])
env.flush()
with env.record_pg_stats(pg_stats_rw):
env.pg_bin.run_capture(
['pgbench', f'-T{duration}', f'--random-seed={seed}', env.pg.connstr()])
["pgbench", f"-T{duration}", f"--random-seed={seed}", env.pg.connstr()]
)
env.flush()
@pytest.mark.parametrize("seed", get_seeds_matrix())
@pytest.mark.parametrize("scale", get_scales_matrix())
@pytest.mark.parametrize("duration", get_durations_matrix(5))
def test_compare_pg_stats_wo_with_pgbench_simple_update(neon_with_baseline: PgCompare,
seed: int,
scale: int,
duration: int,
pg_stats_wo: List[PgStatTable]):
def test_compare_pg_stats_wo_with_pgbench_simple_update(
neon_with_baseline: PgCompare,
seed: int,
scale: int,
duration: int,
pg_stats_wo: List[PgStatTable],
):
env = neon_with_baseline
# initialize pgbench
env.pg_bin.run_capture(['pgbench', f'-s{scale}', '-i', env.pg.connstr()])
env.pg_bin.run_capture(["pgbench", f"-s{scale}", "-i", env.pg.connstr()])
env.flush()
with env.record_pg_stats(pg_stats_wo):
env.pg_bin.run_capture(
['pgbench', '-N', f'-T{duration}', f'--random-seed={seed}', env.pg.connstr()])
["pgbench", "-N", f"-T{duration}", f"--random-seed={seed}", env.pg.connstr()]
)
env.flush()
@pytest.mark.parametrize("seed", get_seeds_matrix())
@pytest.mark.parametrize("scale", get_scales_matrix())
@pytest.mark.parametrize("duration", get_durations_matrix(5))
def test_compare_pg_stats_ro_with_pgbench_select_only(neon_with_baseline: PgCompare,
seed: int,
scale: int,
duration: int,
pg_stats_ro: List[PgStatTable]):
def test_compare_pg_stats_ro_with_pgbench_select_only(
neon_with_baseline: PgCompare,
seed: int,
scale: int,
duration: int,
pg_stats_ro: List[PgStatTable],
):
env = neon_with_baseline
# initialize pgbench
env.pg_bin.run_capture(['pgbench', f'-s{scale}', '-i', env.pg.connstr()])
env.pg_bin.run_capture(["pgbench", f"-s{scale}", "-i", env.pg.connstr()])
env.flush()
with env.record_pg_stats(pg_stats_ro):
env.pg_bin.run_capture(
['pgbench', '-S', f'-T{duration}', f'--random-seed={seed}', env.pg.connstr()])
["pgbench", "-S", f"-T{duration}", f"--random-seed={seed}", env.pg.connstr()]
)
env.flush()
@pytest.mark.parametrize("seed", get_seeds_matrix())
@pytest.mark.parametrize("scale", get_scales_matrix())
@pytest.mark.parametrize("duration", get_durations_matrix(5))
def test_compare_pg_stats_wal_with_pgbench_default(neon_with_baseline: PgCompare,
seed: int,
scale: int,
duration: int,
pg_stats_wal: List[PgStatTable]):
def test_compare_pg_stats_wal_with_pgbench_default(
neon_with_baseline: PgCompare,
seed: int,
scale: int,
duration: int,
pg_stats_wal: List[PgStatTable],
):
env = neon_with_baseline
# initialize pgbench
env.pg_bin.run_capture(['pgbench', f'-s{scale}', '-i', env.pg.connstr()])
env.pg_bin.run_capture(["pgbench", f"-s{scale}", "-i", env.pg.connstr()])
env.flush()
with env.record_pg_stats(pg_stats_wal):
env.pg_bin.run_capture(
['pgbench', f'-T{duration}', f'--random-seed={seed}', env.pg.connstr()])
["pgbench", f"-T{duration}", f"--random-seed={seed}", env.pg.connstr()]
)
env.flush()
@pytest.mark.parametrize("n_tables", [1, 10])
@pytest.mark.parametrize("duration", get_durations_matrix(10))
def test_compare_pg_stats_wo_with_heavy_write(neon_with_baseline: PgCompare,
n_tables: int,
duration: int,
pg_stats_wo: List[PgStatTable]):
def test_compare_pg_stats_wo_with_heavy_write(
neon_with_baseline: PgCompare, n_tables: int, duration: int, pg_stats_wo: List[PgStatTable]
):
env = neon_with_baseline
with env.pg.connect().cursor() as cur:
for i in range(n_tables):
@@ -112,8 +122,7 @@ def test_compare_pg_stats_wo_with_heavy_write(neon_with_baseline: PgCompare,
with env.record_pg_stats(pg_stats_wo):
threads = [
threading.Thread(target=start_single_table_workload, args=(i, ))
for i in range(n_tables)
threading.Thread(target=start_single_table_workload, args=(i,)) for i in range(n_tables)
]
for thread in threads:

View File

@@ -1,11 +1,12 @@
from contextlib import closing
from fixtures.neon_fixtures import NeonEnv
from fixtures.log_helper import log
from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker
from fixtures.compare_fixtures import PgCompare, VanillaCompare, NeonCompare
from io import BufferedReader, RawIOBase
from itertools import repeat
from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker
from fixtures.compare_fixtures import NeonCompare, PgCompare, VanillaCompare
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnv
class CopyTestData(RawIOBase):
def __init__(self, rows: int):
@@ -29,7 +30,7 @@ class CopyTestData(RawIOBase):
# Number of bytes to read in this call
l = min(len(self.linebuf) - self.ptr, len(b))
b[:l] = self.linebuf[self.ptr:(self.ptr + l)]
b[:l] = self.linebuf[self.ptr : (self.ptr + l)]
self.ptr += l
return l
@@ -52,19 +53,19 @@ def test_copy(neon_with_baseline: PgCompare):
# Load data with COPY, recording the time and I/O it takes.
#
# Since there's no data in the table previously, this extends it.
with env.record_pageserver_writes('copy_extend_pageserver_writes'):
with env.record_duration('copy_extend'):
cur.copy_from(copy_test_data(1000000), 'copytest')
with env.record_pageserver_writes("copy_extend_pageserver_writes"):
with env.record_duration("copy_extend"):
cur.copy_from(copy_test_data(1000000), "copytest")
env.flush()
# Delete most rows, and VACUUM to make the space available for reuse.
with env.record_pageserver_writes('delete_pageserver_writes'):
with env.record_duration('delete'):
with env.record_pageserver_writes("delete_pageserver_writes"):
with env.record_duration("delete"):
cur.execute("delete from copytest where i % 100 <> 0;")
env.flush()
with env.record_pageserver_writes('vacuum_pageserver_writes'):
with env.record_duration('vacuum'):
with env.record_pageserver_writes("vacuum_pageserver_writes"):
with env.record_duration("vacuum"):
cur.execute("vacuum copytest")
env.flush()
@@ -72,9 +73,9 @@ def test_copy(neon_with_baseline: PgCompare):
# by the VACUUM.
#
# This will also clear all the VM bits.
with env.record_pageserver_writes('copy_reuse_pageserver_writes'):
with env.record_duration('copy_reuse'):
cur.copy_from(copy_test_data(1000000), 'copytest')
with env.record_pageserver_writes("copy_reuse_pageserver_writes"):
with env.record_duration("copy_reuse"):
cur.copy_from(copy_test_data(1000000), "copytest")
env.flush()
env.report_peak_memory_use()

View File

@@ -1,5 +1,6 @@
import pytest
from contextlib import closing
import pytest
from fixtures.compare_fixtures import PgCompare
from pytest_lazyfixture import lazy_fixture # type: ignore
@@ -11,22 +12,24 @@ from pytest_lazyfixture import lazy_fixture # type: ignore
pytest.param(lazy_fixture("neon_compare"), id="neon", marks=pytest.mark.slow),
pytest.param(lazy_fixture("vanilla_compare"), id="vanilla", marks=pytest.mark.slow),
pytest.param(lazy_fixture("remote_compare"), id="remote", marks=pytest.mark.remote_cluster),
])
],
)
def test_dup_key(env: PgCompare):
# Update the same page many times, then measure read performance
with closing(env.pg.connect()) as conn:
with conn.cursor() as cur:
cur.execute('drop table if exists t, f;')
cur.execute("drop table if exists t, f;")
cur.execute("SET synchronous_commit=off")
cur.execute("SET statement_timeout=0")
# Write many updates to the same row
with env.record_duration('write'):
with env.record_duration("write"):
cur.execute("create table t (i integer, filler text);")
cur.execute('insert into t values (0);')
cur.execute("""
cur.execute("insert into t values (0);")
cur.execute(
"""
do $$
begin
for ivar in 1..5000000 loop
@@ -38,13 +41,14 @@ begin
end loop;
end;
$$;
""")
"""
)
# Write 3-4 MB to evict t from compute cache
cur.execute('create table f (i integer);')
cur.execute(f'insert into f values (generate_series(1,100000));')
cur.execute("create table f (i integer);")
cur.execute(f"insert into f values (generate_series(1,100000));")
# Read
with env.record_duration('read'):
cur.execute('select * from t;')
with env.record_duration("read"):
cur.execute("select * from t;")
cur.fetchall()

View File

@@ -1,9 +1,10 @@
import os
from contextlib import closing
from fixtures.benchmark_fixture import MetricReport
from fixtures.neon_fixtures import NeonEnv
from fixtures.compare_fixtures import PgCompare, VanillaCompare, NeonCompare
from fixtures.compare_fixtures import NeonCompare, PgCompare, VanillaCompare
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnv
#
@@ -24,8 +25,8 @@ def test_gist_buffering_build(neon_with_baseline: PgCompare):
)
# Build the index.
with env.record_pageserver_writes('pageserver_writes'):
with env.record_duration('build'):
with env.record_pageserver_writes("pageserver_writes"):
with env.record_duration("build"):
cur.execute(
"create index gist_pointidx2 on gist_point_tbl using gist(p) with (buffering = on)"
)

View File

@@ -1,5 +1,6 @@
import pytest
from contextlib import closing
import pytest
from fixtures.compare_fixtures import PgCompare
from pytest_lazyfixture import lazy_fixture # type: ignore
@@ -11,27 +12,28 @@ from pytest_lazyfixture import lazy_fixture # type: ignore
pytest.param(lazy_fixture("neon_compare"), id="neon", marks=pytest.mark.slow),
pytest.param(lazy_fixture("vanilla_compare"), id="vanilla", marks=pytest.mark.slow),
pytest.param(lazy_fixture("remote_compare"), id="remote", marks=pytest.mark.remote_cluster),
])
],
)
def test_hot_page(env: PgCompare):
# Update the same page many times, then measure read performance
num_writes = 1000000
with closing(env.pg.connect()) as conn:
with conn.cursor() as cur:
cur.execute('drop table if exists t, f;')
cur.execute("drop table if exists t, f;")
# Write many updates to the same row
with env.record_duration('write'):
cur.execute('create table t (i integer);')
cur.execute('insert into t values (0);')
with env.record_duration("write"):
cur.execute("create table t (i integer);")
cur.execute("insert into t values (0);")
for i in range(num_writes):
cur.execute(f'update t set i = {i};')
cur.execute(f"update t set i = {i};")
# Write 3-4 MB to evict t from compute cache
cur.execute('create table f (i integer);')
cur.execute(f'insert into f values (generate_series(1,100000));')
cur.execute("create table f (i integer);")
cur.execute(f"insert into f values (generate_series(1,100000));")
# Read
with env.record_duration('read'):
cur.execute('select * from t;')
with env.record_duration("read"):
cur.execute("select * from t;")
cur.fetchall()

View File

@@ -1,5 +1,6 @@
import pytest
from contextlib import closing
import pytest
from fixtures.compare_fixtures import PgCompare
from pytest_lazyfixture import lazy_fixture # type: ignore
@@ -11,7 +12,8 @@ from pytest_lazyfixture import lazy_fixture # type: ignore
pytest.param(lazy_fixture("neon_compare"), id="neon", marks=pytest.mark.slow),
pytest.param(lazy_fixture("vanilla_compare"), id="vanilla", marks=pytest.mark.slow),
pytest.param(lazy_fixture("remote_compare"), id="remote", marks=pytest.mark.remote_cluster),
])
],
)
def test_hot_table(env: PgCompare):
# Update a small table many times, then measure read performance
num_rows = 100000 # Slightly larger than shared buffers size TODO validate
@@ -20,17 +22,17 @@ def test_hot_table(env: PgCompare):
with closing(env.pg.connect()) as conn:
with conn.cursor() as cur:
cur.execute('drop table if exists t;')
cur.execute("drop table if exists t;")
# Write many updates to a small table
with env.record_duration('write'):
cur.execute('create table t (i integer primary key);')
cur.execute(f'insert into t values (generate_series(1,{num_rows}));')
with env.record_duration("write"):
cur.execute("create table t (i integer primary key);")
cur.execute(f"insert into t values (generate_series(1,{num_rows}));")
for i in range(num_writes):
cur.execute(f'update t set i = {i + num_rows} WHERE i = {i};')
cur.execute(f"update t set i = {i + num_rows} WHERE i = {i};")
# Read the table
with env.record_duration('read'):
with env.record_duration("read"):
for i in range(num_reads):
cur.execute('select * from t;')
cur.execute("select * from t;")
cur.fetchall()

View File

@@ -1,10 +1,11 @@
from io import BytesIO
import asyncio
from io import BytesIO
import asyncpg
from fixtures.neon_fixtures import NeonEnv, Postgres, PgProtocol
from fixtures.log_helper import log
from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker
from fixtures.compare_fixtures import PgCompare, VanillaCompare, NeonCompare
from fixtures.compare_fixtures import NeonCompare, PgCompare, VanillaCompare
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnv, PgProtocol, Postgres
async def repeat_bytes(buf, repetitions: int):
@@ -16,7 +17,8 @@ async def copy_test_data_to_table(pg: PgProtocol, worker_id: int, table_name: st
buf = BytesIO()
for i in range(1000):
buf.write(
f"{i}\tLoaded by worker {worker_id}. Long string to consume some space.\n".encode())
f"{i}\tLoaded by worker {worker_id}. Long string to consume some space.\n".encode()
)
buf.seek(0)
copy_input = repeat_bytes(buf.read(), 5000)
@@ -28,7 +30,7 @@ async def copy_test_data_to_table(pg: PgProtocol, worker_id: int, table_name: st
async def parallel_load_different_tables(pg: PgProtocol, n_parallel: int):
workers = []
for worker_id in range(n_parallel):
worker = copy_test_data_to_table(pg, worker_id, f'copytest_{worker_id}')
worker = copy_test_data_to_table(pg, worker_id, f"copytest_{worker_id}")
workers.append(asyncio.create_task(worker))
# await all workers
@@ -43,10 +45,10 @@ def test_parallel_copy_different_tables(neon_with_baseline: PgCompare, n_paralle
cur = conn.cursor()
for worker_id in range(n_parallel):
cur.execute(f'CREATE TABLE copytest_{worker_id} (i int, t text)')
cur.execute(f"CREATE TABLE copytest_{worker_id} (i int, t text)")
with env.record_pageserver_writes('pageserver_writes'):
with env.record_duration('load'):
with env.record_pageserver_writes("pageserver_writes"):
with env.record_duration("load"):
asyncio.run(parallel_load_different_tables(env.pg, n_parallel))
env.flush()
@@ -57,7 +59,7 @@ def test_parallel_copy_different_tables(neon_with_baseline: PgCompare, n_paralle
async def parallel_load_same_table(pg: PgProtocol, n_parallel: int):
workers = []
for worker_id in range(n_parallel):
worker = copy_test_data_to_table(pg, worker_id, f'copytest')
worker = copy_test_data_to_table(pg, worker_id, f"copytest")
workers.append(asyncio.create_task(worker))
# await all workers
@@ -70,10 +72,10 @@ def test_parallel_copy_same_table(neon_with_baseline: PgCompare, n_parallel=5):
conn = env.pg.connect()
cur = conn.cursor()
cur.execute(f'CREATE TABLE copytest (i int, t text)')
cur.execute(f"CREATE TABLE copytest (i int, t text)")
with env.record_pageserver_writes('pageserver_writes'):
with env.record_duration('load'):
with env.record_pageserver_writes("pageserver_writes"):
with env.record_duration("load"):
asyncio.run(parallel_load_same_table(env.pg, n_parallel))
env.flush()

View File

@@ -30,7 +30,7 @@ def init_pgbench(env: PgCompare, cmdline):
# duration is actually a metric and uses float instead of int for timestamp
start_timestamp = utc_now_timestamp()
t0 = timeit.default_timer()
with env.record_pageserver_writes('init.pageserver_writes'):
with env.record_pageserver_writes("init.pageserver_writes"):
out = env.pg_bin.run_capture(cmdline)
env.flush()
@@ -49,10 +49,12 @@ def init_pgbench(env: PgCompare, cmdline):
def run_pgbench(env: PgCompare, prefix: str, cmdline):
with env.record_pageserver_writes(f'{prefix}.pageserver_writes'):
with env.record_pageserver_writes(f"{prefix}.pageserver_writes"):
run_start_timestamp = utc_now_timestamp()
t0 = timeit.default_timer()
out = env.pg_bin.run_capture(cmdline, )
out = env.pg_bin.run_capture(
cmdline,
)
run_duration = timeit.default_timer() - t0
run_end_timestamp = utc_now_timestamp()
env.flush()
@@ -78,40 +80,45 @@ def run_pgbench(env: PgCompare, prefix: str, cmdline):
#
# Currently, the # of connections is hardcoded at 4
def run_test_pgbench(env: PgCompare, scale: int, duration: int, workload_type: PgBenchLoadType):
env.zenbenchmark.record("scale", scale, '', MetricReport.TEST_PARAM)
env.zenbenchmark.record("scale", scale, "", MetricReport.TEST_PARAM)
if workload_type == PgBenchLoadType.INIT:
# Run initialize
init_pgbench(
env, ['pgbench', f'-s{scale}', '-i', env.pg.connstr(options='-cstatement_timeout=1h')])
env, ["pgbench", f"-s{scale}", "-i", env.pg.connstr(options="-cstatement_timeout=1h")]
)
if workload_type == PgBenchLoadType.SIMPLE_UPDATE:
# Run simple-update workload
run_pgbench(env,
"simple-update",
[
'pgbench',
'-N',
'-c4',
f'-T{duration}',
'-P2',
'--progress-timestamp',
env.pg.connstr(),
])
run_pgbench(
env,
"simple-update",
[
"pgbench",
"-N",
"-c4",
f"-T{duration}",
"-P2",
"--progress-timestamp",
env.pg.connstr(),
],
)
if workload_type == PgBenchLoadType.SELECT_ONLY:
# Run SELECT workload
run_pgbench(env,
"select-only",
[
'pgbench',
'-S',
'-c4',
f'-T{duration}',
'-P2',
'--progress-timestamp',
env.pg.connstr(),
])
run_pgbench(
env,
"select-only",
[
"pgbench",
"-S",
"-c4",
f"-T{duration}",
"-P2",
"--progress-timestamp",
env.pg.connstr(),
],
)
env.report_size()
@@ -121,12 +128,12 @@ def get_durations_matrix(default: int = 45) -> List[int]:
rv = []
for d in durations.split(","):
d = d.strip().lower()
if d.endswith('h'):
duration = int(d.removesuffix('h')) * 60 * 60
elif d.endswith('m'):
duration = int(d.removesuffix('m')) * 60
if d.endswith("h"):
duration = int(d.removesuffix("h")) * 60 * 60
elif d.endswith("m"):
duration = int(d.removesuffix("m")) * 60
else:
duration = int(d.removesuffix('s'))
duration = int(d.removesuffix("s"))
rv.append(duration)
return rv
@@ -137,10 +144,10 @@ def get_scales_matrix(default: int = 10) -> List[int]:
rv = []
for s in scales.split(","):
s = s.strip().lower()
if s.endswith('mb'):
scale = get_scale_for_db(int(s.removesuffix('mb')))
elif s.endswith('gb'):
scale = get_scale_for_db(int(s.removesuffix('gb')) * 1024)
if s.endswith("mb"):
scale = get_scale_for_db(int(s.removesuffix("mb")))
elif s.endswith("gb"):
scale = get_scale_for_db(int(s.removesuffix("gb")) * 1024)
else:
scale = int(s)
rv.append(scale)
@@ -167,9 +174,9 @@ def test_pgbench(neon_with_baseline: PgCompare, scale: int, duration: int):
@pytest.mark.parametrize("duration", get_durations_matrix())
def test_pgbench_flamegraph(zenbenchmark, pg_bin, neon_env_builder, scale: int, duration: int):
neon_env_builder.num_safekeepers = 1
neon_env_builder.pageserver_config_override = '''
neon_env_builder.pageserver_config_override = """
profiling="page_requests"
'''
"""
if not profiling_supported():
pytest.skip("pageserver was built without 'profiling' feature")

View File

@@ -1,14 +1,13 @@
import os
from contextlib import closing
from fixtures.benchmark_fixture import MetricReport
from fixtures.neon_fixtures import NeonEnv
from fixtures.compare_fixtures import PgCompare, VanillaCompare, NeonCompare
from fixtures.log_helper import log
import psycopg2.extras
import random
import time
from contextlib import closing
import psycopg2.extras
from fixtures.benchmark_fixture import MetricReport
from fixtures.compare_fixtures import NeonCompare, PgCompare, VanillaCompare
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnv
from fixtures.utils import query_scalar
@@ -43,13 +42,15 @@ def test_random_writes(neon_with_baseline: PgCompare):
with closing(env.pg.connect()) as conn:
with conn.cursor() as cur:
# Create the test table
with env.record_duration('init'):
cur.execute("""
with env.record_duration("init"):
cur.execute(
"""
CREATE TABLE Big(
pk integer primary key,
count integer default 0
);
""")
"""
)
# Insert n_rows in batches to avoid query timeouts
rows_inserted = 0
@@ -62,7 +63,7 @@ def test_random_writes(neon_with_baseline: PgCompare):
# Get table size (can't be predicted because padding and alignment)
table_size = query_scalar(cur, "SELECT pg_relation_size('Big')")
env.zenbenchmark.record("table_size", table_size, 'bytes', MetricReport.TEST_PARAM)
env.zenbenchmark.record("table_size", table_size, "bytes", MetricReport.TEST_PARAM)
# Decide how much to write, based on knowledge of pageserver implementation.
# Avoiding segment collisions maximizes (neon_runtime / vanilla_runtime).
@@ -72,13 +73,15 @@ def test_random_writes(neon_with_baseline: PgCompare):
# The closer this is to 250 MB, the more realistic the test is.
effective_checkpoint_distance = table_size * n_writes // n_rows
env.zenbenchmark.record("effective_checkpoint_distance",
effective_checkpoint_distance,
'bytes',
MetricReport.TEST_PARAM)
env.zenbenchmark.record(
"effective_checkpoint_distance",
effective_checkpoint_distance,
"bytes",
MetricReport.TEST_PARAM,
)
# Update random keys
with env.record_duration('run'):
with env.record_duration("run"):
for it in range(n_iterations):
for i in range(n_writes):
key = random.randint(1, n_rows)

View File

@@ -2,15 +2,16 @@
#
from contextlib import closing
from dataclasses import dataclass
from fixtures.neon_fixtures import NeonEnv
from fixtures.log_helper import log
import pytest
from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker
from fixtures.compare_fixtures import PgCompare
import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnv
@pytest.mark.parametrize(
'rows,iters,workers',
"rows,iters,workers",
[
# The test table is large enough (3-4 MB) that it doesn't fit in the compute node
# cache, so the seqscans go to the page server. But small enough that it fits
@@ -18,31 +19,34 @@ import pytest
pytest.param(100000, 100, 0),
# Also test with a larger table, with and without parallelism
pytest.param(10000000, 1, 0),
pytest.param(10000000, 1, 4)
])
pytest.param(10000000, 1, 4),
],
)
def test_seqscans(neon_with_baseline: PgCompare, rows: int, iters: int, workers: int):
env = neon_with_baseline
with closing(env.pg.connect()) as conn:
with conn.cursor() as cur:
cur.execute('create table t (i integer);')
cur.execute(f'insert into t values (generate_series(1,{rows}));')
cur.execute("create table t (i integer);")
cur.execute(f"insert into t values (generate_series(1,{rows}));")
# Verify that the table is larger than shared_buffers
cur.execute('''
cur.execute(
"""
select setting::int * pg_size_bytes(unit) as shared_buffers, pg_relation_size('t') as tbl_ize
from pg_settings where name = 'shared_buffers'
''')
"""
)
row = cur.fetchone()
assert row is not None
shared_buffers = row[0]
table_size = row[1]
log.info(f"shared_buffers is {shared_buffers}, table size {table_size}")
assert int(shared_buffers) < int(table_size)
env.zenbenchmark.record("table_size", table_size, 'bytes', MetricReport.TEST_PARAM)
env.zenbenchmark.record("table_size", table_size, "bytes", MetricReport.TEST_PARAM)
cur.execute(f"set max_parallel_workers_per_gather = {workers}")
with env.record_duration('run'):
with env.record_duration("run"):
for i in range(iters):
cur.execute('select count(*) from t;')
cur.execute("select count(*) from t;")

View File

@@ -1,7 +1,8 @@
import pytest
from contextlib import closing
from fixtures.neon_fixtures import NeonEnvBuilder
import pytest
from fixtures.benchmark_fixture import NeonBenchmarker
from fixtures.neon_fixtures import NeonEnvBuilder
# This test sometimes runs for longer than the global 5 minute timeout.
@@ -11,15 +12,15 @@ def test_startup(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchmarker
env = neon_env_builder.init_start()
# Start
env.neon_cli.create_branch('test_startup')
env.neon_cli.create_branch("test_startup")
with zenbenchmark.record_duration("startup_time"):
pg = env.postgres.create_start('test_startup')
pg = env.postgres.create_start("test_startup")
pg.safe_psql("select 1;")
# Restart
pg.stop_and_destroy()
with zenbenchmark.record_duration("restart_time"):
pg.create_start('test_startup')
pg.create_start("test_startup")
pg.safe_psql("select 1;")
# Fill up
@@ -28,8 +29,8 @@ def test_startup(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchmarker
with closing(pg.connect()) as conn:
with conn.cursor() as cur:
for i in range(num_tables):
cur.execute(f'create table t_{i} (i integer);')
cur.execute(f'insert into t_{i} values (generate_series(1,{num_rows}));')
cur.execute(f"create table t_{i} (i integer);")
cur.execute(f"insert into t_{i} values (generate_series(1,{num_rows}));")
# Read
with zenbenchmark.record_duration("read_time"):
@@ -42,7 +43,7 @@ def test_startup(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchmarker
# Restart
pg.stop_and_destroy()
with zenbenchmark.record_duration("restart_with_data"):
pg.create_start('test_startup')
pg.create_start("test_startup")
pg.safe_psql("select 1;")
# Read

View File

@@ -10,8 +10,7 @@ from fixtures.compare_fixtures import NeonCompare, PgCompare, VanillaCompare
from fixtures.log_helper import log
from fixtures.neon_fixtures import DEFAULT_BRANCH_NAME, NeonEnvBuilder, PgBin
from fixtures.utils import lsn_from_hex
from performance.test_perf_pgbench import (get_durations_matrix, get_scales_matrix)
from performance.test_perf_pgbench import get_durations_matrix, get_scales_matrix
@pytest.fixture(params=["vanilla", "neon_off", "neon_on"])
@@ -30,7 +29,9 @@ def pg_compare(request) -> PgCompare:
return fixture
else:
assert len(x) == 2, f"request param ({request.param}) should have a format of \
assert (
len(x) == 2
), f"request param ({request.param}) should have a format of \
`neon_{{safekeepers_enable_fsync}}`"
# `NeonCompare` interface
@@ -70,8 +71,7 @@ def start_heavy_write_workload(env: PgCompare, n_tables: int, scale: int, num_it
with env.record_duration("run_duration"):
threads = [
threading.Thread(target=start_single_table_workload, args=(i, ))
for i in range(n_tables)
threading.Thread(target=start_single_table_workload, args=(i,)) for i in range(n_tables)
]
for thread in threads:
@@ -95,12 +95,14 @@ def test_heavy_write_workload(pg_compare: PgCompare, n_tables: int, scale: int,
)
cur.execute(f"INSERT INTO t{i} (key) VALUES (0)")
workload_thread = threading.Thread(target=start_heavy_write_workload,
args=(env, n_tables, scale, num_iters))
workload_thread = threading.Thread(
target=start_heavy_write_workload, args=(env, n_tables, scale, num_iters)
)
workload_thread.start()
record_thread = threading.Thread(target=record_lsn_write_lag,
args=(env, lambda: workload_thread.is_alive()))
record_thread = threading.Thread(
target=record_lsn_write_lag, args=(env, lambda: workload_thread.is_alive())
)
record_thread.start()
record_read_latency(env, lambda: workload_thread.is_alive(), "SELECT * from t0 where key = 0")
@@ -110,14 +112,16 @@ def test_heavy_write_workload(pg_compare: PgCompare, n_tables: int, scale: int,
def start_pgbench_simple_update_workload(env: PgCompare, duration: int):
with env.record_duration("run_duration"):
env.pg_bin.run_capture([
'pgbench',
'-j10',
'-c10',
'-N',
f'-T{duration}',
env.pg.connstr(options="-csynchronous_commit=off")
])
env.pg_bin.run_capture(
[
"pgbench",
"-j10",
"-c10",
"-N",
f"-T{duration}",
env.pg.connstr(options="-csynchronous_commit=off"),
]
)
env.flush()
@@ -128,20 +132,22 @@ def test_pgbench_simple_update_workload(pg_compare: PgCompare, scale: int, durat
env = pg_compare
# initialize pgbench tables
env.pg_bin.run_capture(['pgbench', f'-s{scale}', '-i', env.pg.connstr()])
env.pg_bin.run_capture(["pgbench", f"-s{scale}", "-i", env.pg.connstr()])
env.flush()
workload_thread = threading.Thread(target=start_pgbench_simple_update_workload,
args=(env, duration))
workload_thread = threading.Thread(
target=start_pgbench_simple_update_workload, args=(env, duration)
)
workload_thread.start()
record_thread = threading.Thread(target=record_lsn_write_lag,
args=(env, lambda: workload_thread.is_alive()))
record_thread = threading.Thread(
target=record_lsn_write_lag, args=(env, lambda: workload_thread.is_alive())
)
record_thread.start()
record_read_latency(env,
lambda: workload_thread.is_alive(),
"SELECT * from pgbench_accounts where aid = 1")
record_read_latency(
env, lambda: workload_thread.is_alive(), "SELECT * from pgbench_accounts where aid = 1"
)
workload_thread.join()
record_thread.join()
@@ -150,13 +156,15 @@ def start_pgbench_intensive_initialization(env: PgCompare, scale: int, done_even
with env.record_duration("run_duration"):
# Needs to increase the statement timeout (default: 120s) because the
# initialization step can be slow with a large scale.
env.pg_bin.run_capture([
'pgbench',
f'-s{scale}',
'-i',
'-Idtg',
env.pg.connstr(options='-cstatement_timeout=600s')
])
env.pg_bin.run_capture(
[
"pgbench",
f"-s{scale}",
"-i",
"-Idtg",
env.pg.connstr(options="-cstatement_timeout=600s"),
]
)
done_event.set()
@@ -170,12 +178,14 @@ def test_pgbench_intensive_init_workload(pg_compare: PgCompare, scale: int):
workload_done_event = threading.Event()
workload_thread = threading.Thread(target=start_pgbench_intensive_initialization,
args=(env, scale, workload_done_event))
workload_thread = threading.Thread(
target=start_pgbench_intensive_initialization, args=(env, scale, workload_done_event)
)
workload_thread.start()
record_thread = threading.Thread(target=record_lsn_write_lag,
args=(env, lambda: not workload_done_event.is_set()))
record_thread = threading.Thread(
target=record_lsn_write_lag, args=(env, lambda: not workload_done_event.is_set())
)
record_thread.start()
record_read_latency(env, lambda: not workload_done_event.is_set(), "SELECT count(*) from foo")
@@ -195,13 +205,15 @@ def record_lsn_write_lag(env: PgCompare, run_cond: Callable[[], bool], pool_inte
cur.execute("CREATE EXTENSION neon")
while run_cond():
cur.execute('''
cur.execute(
"""
select pg_wal_lsn_diff(pg_current_wal_flush_lsn(),received_lsn),
pg_size_pretty(pg_wal_lsn_diff(pg_current_wal_flush_lsn(),received_lsn)),
pg_current_wal_flush_lsn(),
received_lsn
from backpressure_lsns();
''')
"""
)
res = cur.fetchone()
lsn_write_lags.append(res[0])
@@ -220,24 +232,29 @@ def record_lsn_write_lag(env: PgCompare, run_cond: Callable[[], bool], pool_inte
time.sleep(pool_interval)
env.zenbenchmark.record("lsn_write_lag_max",
float(max(lsn_write_lags) / (1024**2)),
"MB",
MetricReport.LOWER_IS_BETTER)
env.zenbenchmark.record("lsn_write_lag_avg",
float(statistics.mean(lsn_write_lags) / (1024**2)),
"MB",
MetricReport.LOWER_IS_BETTER)
env.zenbenchmark.record("lsn_write_lag_stdev",
float(statistics.stdev(lsn_write_lags) / (1024**2)),
"MB",
MetricReport.LOWER_IS_BETTER)
env.zenbenchmark.record(
"lsn_write_lag_max",
float(max(lsn_write_lags) / (1024**2)),
"MB",
MetricReport.LOWER_IS_BETTER,
)
env.zenbenchmark.record(
"lsn_write_lag_avg",
float(statistics.mean(lsn_write_lags) / (1024**2)),
"MB",
MetricReport.LOWER_IS_BETTER,
)
env.zenbenchmark.record(
"lsn_write_lag_stdev",
float(statistics.stdev(lsn_write_lags) / (1024**2)),
"MB",
MetricReport.LOWER_IS_BETTER,
)
def record_read_latency(env: PgCompare,
run_cond: Callable[[], bool],
read_query: str,
read_interval: float = 1.0):
def record_read_latency(
env: PgCompare, run_cond: Callable[[], bool], read_query: str, read_interval: float = 1.0
):
read_latencies = []
with env.pg.connect().cursor() as cur:
@@ -256,15 +273,12 @@ def record_read_latency(env: PgCompare,
time.sleep(read_interval)
env.zenbenchmark.record("read_latency_max",
max(read_latencies),
's',
MetricReport.LOWER_IS_BETTER)
env.zenbenchmark.record("read_latency_avg",
statistics.mean(read_latencies),
's',
MetricReport.LOWER_IS_BETTER)
env.zenbenchmark.record("read_latency_stdev",
statistics.stdev(read_latencies),
's',
MetricReport.LOWER_IS_BETTER)
env.zenbenchmark.record(
"read_latency_max", max(read_latencies), "s", MetricReport.LOWER_IS_BETTER
)
env.zenbenchmark.record(
"read_latency_avg", statistics.mean(read_latencies), "s", MetricReport.LOWER_IS_BETTER
)
env.zenbenchmark.record(
"read_latency_stdev", statistics.stdev(read_latencies), "s", MetricReport.LOWER_IS_BETTER
)

View File

@@ -12,10 +12,11 @@
# Amplification problem at its finest.
import os
from contextlib import closing
from fixtures.benchmark_fixture import MetricReport
from fixtures.neon_fixtures import NeonEnv
from fixtures.compare_fixtures import PgCompare, VanillaCompare, NeonCompare
from fixtures.compare_fixtures import NeonCompare, PgCompare, VanillaCompare
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnv
def test_write_amplification(neon_with_baseline: PgCompare):
@@ -23,18 +24,20 @@ def test_write_amplification(neon_with_baseline: PgCompare):
with closing(env.pg.connect()) as conn:
with conn.cursor() as cur:
with env.record_pageserver_writes('pageserver_writes'):
with env.record_duration('run'):
with env.record_pageserver_writes("pageserver_writes"):
with env.record_duration("run"):
# NOTE: Because each iteration updates every table already created,
# the runtime and write amplification is O(n^2), where n is the
# number of iterations.
for i in range(25):
cur.execute(f'''
cur.execute(
f"""
CREATE TABLE tbl{i} AS
SELECT g as i, 'long string to consume some space' || g as t
FROM generate_series(1, 100000) g
''')
"""
)
cur.execute(f"create index on tbl{i} (i);")
for j in range(1, i):
cur.execute(f"delete from tbl{j} where i = {i}")

View File

@@ -18,10 +18,12 @@ from fixtures.utils import subprocess_capture
"python/asyncpg",
pytest.param(
"python/pg8000", # See https://github.com/neondatabase/neon/pull/2008#discussion_r912264281
marks=pytest.mark.xfail(reason="Handles SSL in incompatible with Neon way")),
marks=pytest.mark.xfail(reason="Handles SSL in incompatible with Neon way"),
),
pytest.param(
"swift/PostgresClientKit", # See https://github.com/neondatabase/neon/pull/2008#discussion_r911896592
marks=pytest.mark.xfail(reason="Neither SNI nor parameters is supported")),
marks=pytest.mark.xfail(reason="Neither SNI nor parameters is supported"),
),
"typescript/postgresql-client",
],
)
@@ -31,12 +33,14 @@ def test_pg_clients(test_output_dir: Path, remote_pg: RemotePostgres, client: st
env_file = None
with NamedTemporaryFile(mode="w", delete=False) as f:
env_file = f.name
f.write(f"""
f.write(
f"""
NEON_HOST={conn_options["host"]}
NEON_DATABASE={conn_options["dbname"]}
NEON_USER={conn_options["user"]}
NEON_PASSWORD={conn_options["password"]}
""")
"""
)
image_tag = client.lower()
docker_bin = shutil.which("docker")

View File

@@ -1,8 +1,9 @@
import pytest
import os
from fixtures.neon_fixtures import NeonEnv
import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnv
"""
Use this test to see what happens when tests fail.
@@ -13,8 +14,9 @@ Set the environment variable RUN_BROKEN to see this test run (and fail,
and hopefully not leave any server processes behind).
"""
run_broken = pytest.mark.skipif(os.environ.get('RUN_BROKEN') is None,
reason="only used for testing the fixtures")
run_broken = pytest.mark.skipif(
os.environ.get("RUN_BROKEN") is None, reason="only used for testing the fixtures"
)
@run_broken
@@ -23,7 +25,7 @@ def test_broken(neon_simple_env: NeonEnv, pg_bin):
env.neon_cli.create_branch("test_broken", "empty")
env.postgres.create_start("test_broken")
log.info('postgres is running')
log.info("postgres is running")
log.info('THIS NEXT COMMAND WILL FAIL:')
pg_bin.run('pgbench -i_am_a_broken_test'.split())
log.info("THIS NEXT COMMAND WILL FAIL:")
pg_bin.run("pgbench -i_am_a_broken_test".split())